@honeybee-ai/incubator 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin.d.ts CHANGED
@@ -1,2 +1 @@
1
- #!/usr/bin/env node
2
1
  export {};
package/dist/bin.js CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env node
2
1
  import { main } from './index.js';
3
2
  main().catch((err) => {
4
3
  console.error('[incubator] Fatal error:', err.message);
package/dist/bin.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"bin.js","sourceRoot":"","sources":["../src/bin.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;IAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"bin.js","sourceRoot":"","sources":["../src/bin.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAElC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;IAClE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { EvalCheck } from '../types.js';
2
+ export declare const securityCheck: EvalCheck;
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Security check — Carapace scan on generated source files.
3
+ */
4
+ import { readdirSync, readFileSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ /** Default weight if not configured in task. */
7
+ const DEFAULT_WEIGHT = 2;
8
+ /**
9
+ * Recursively collect .ts/.js files from a directory.
10
+ */
11
+ function collectSourceFiles(dir, base) {
12
+ const result = [];
13
+ const entries = readdirSync(dir, { withFileTypes: true });
14
+ for (const entry of entries) {
15
+ if (entry.name === 'node_modules' || entry.name === '.git' || entry.name === 'dist')
16
+ continue;
17
+ const rel = base ? `${base}/${entry.name}` : entry.name;
18
+ if (entry.isDirectory()) {
19
+ result.push(...collectSourceFiles(join(dir, entry.name), rel));
20
+ }
21
+ else if (/\.(ts|js|tsx|jsx)$/.test(entry.name) && !entry.name.endsWith('.d.ts')) {
22
+ result.push(rel);
23
+ }
24
+ }
25
+ return result;
26
+ }
27
+ export const securityCheck = {
28
+ name: 'security',
29
+ async run(workDir, task) {
30
+ const start = Date.now();
31
+ const weight = task.checks?.security?.weight ?? DEFAULT_WEIGHT;
32
+ const files = collectSourceFiles(workDir);
33
+ if (files.length === 0) {
34
+ return {
35
+ name: 'security',
36
+ passed: true,
37
+ score: 1.0,
38
+ weight,
39
+ details: 'No source files to scan',
40
+ duration_ms: Date.now() - start,
41
+ };
42
+ }
43
+ // Use createEdgeScanner() — pre-loads patterns via static require()
44
+ // which esbuild inlines. The default scan() uses __dirname-relative
45
+ // fs.readFileSync for patterns, which breaks when bundled.
46
+ let scanner;
47
+ try {
48
+ const carapace = await import('@honeybee-ai/carapace');
49
+ const create = carapace.createEdgeScanner;
50
+ if (create) {
51
+ scanner = create();
52
+ }
53
+ else {
54
+ // Fallback: wrap scan() directly
55
+ const scanFn = carapace.scan;
56
+ scanner = { scan: scanFn };
57
+ }
58
+ }
59
+ catch {
60
+ return {
61
+ name: 'security',
62
+ passed: true,
63
+ score: 1.0,
64
+ weight,
65
+ details: 'Carapace not available — skipped',
66
+ duration_ms: Date.now() - start,
67
+ };
68
+ }
69
+ let warnCount = 0;
70
+ let blockCount = 0;
71
+ const findings = [];
72
+ for (const file of files) {
73
+ const content = readFileSync(join(workDir, file), 'utf-8');
74
+ if (!content.trim())
75
+ continue;
76
+ const result = scanner.scan(content);
77
+ if (result.action === 'BLOCK') {
78
+ blockCount++;
79
+ findings.push(`BLOCK: ${file} (score: ${result.score})`);
80
+ }
81
+ else if (result.action === 'WARN') {
82
+ warnCount++;
83
+ findings.push(`WARN: ${file} (score: ${result.score})`);
84
+ }
85
+ }
86
+ // Score: 1.0 if all PASS, -0.1 per WARN, 0 on any BLOCK
87
+ let score;
88
+ if (blockCount > 0) {
89
+ score = 0;
90
+ }
91
+ else {
92
+ score = Math.max(0, 1 - warnCount * 0.1);
93
+ }
94
+ const passed = blockCount === 0 && warnCount === 0;
95
+ return {
96
+ name: 'security',
97
+ passed,
98
+ score,
99
+ weight,
100
+ details: `Scanned ${files.length} files: ${blockCount} blocked, ${warnCount} warnings${findings.length > 0 ? '\n' + findings.join('\n') : ''}`,
101
+ errors: findings.length > 0 ? findings : undefined,
102
+ duration_ms: Date.now() - start,
103
+ };
104
+ },
105
+ };
106
+ //# sourceMappingURL=security.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"security.js","sourceRoot":"","sources":["../../../src/eval/checks/security.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACpD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,gDAAgD;AAChD,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB;;GAEG;AACH,SAAS,kBAAkB,CAAC,GAAW,EAAE,IAAa;IACpD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAE1D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM;YAAE,SAAS;QAC9F,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;QACxD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QACjE,CAAC;aAAM,IAAI,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAClF,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,MAAM,aAAa,GAAc;IACtC,IAAI,EAAE,UAAU;IAEhB,KAAK,CAAC,GAAG,CAAC,OAAe,EAAE,IAAc;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,IAAI,cAAc,CAAC;QAE/D,MAAM,KAAK,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;QAC1C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,yBAAyB;gBAClC,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,oEAAoE;QACpE,oEAAoE;QACpE,2DAA2D;QAC3D,IAAI,OAAsE,CAAC;QAC3E,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;YACvD,MAAM,MAAM,GAAI,QAAoC,CAAC,iBAC4D,CAAC;YAClH,IAAI,MAAM,EAAE,CAAC;gBACX,OAAO,GAAG,MAAM,EAAE,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,iCAAiC;gBACjC,MAAM,MAAM,GAAI,QAAoC,CAAC,IAA2D,CAAC;gBACjH,OAAO,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;YAC7B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,kCAAkC;gBAC3C,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;YAC3D,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE;gBAAE,SAAS;YAE9B,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBAC9B,UAAU,EAAE,CAAC;gBACb,QAAQ,CAAC,IAAI,CAAC,UAAU,IAAI,YAAY,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC;YAC3D,CAAC;iBAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;gBACpC,SAAS,EAAE,CAAC;gBACZ,QAAQ,CAAC,IAAI,CAAC,SAAS,IAAI,YAAY,MAAM,CAAC,KAAK,GAAG,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAED,wDAAwD;QACxD,IAAI,KAAa,CAAC;QAClB,IAAI,UAAU,GAAG,CAAC,EAAE,CAAC;YACnB,KAAK,GAAG,CAAC,CAAC;QACZ,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,GAAG,GAAG,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,MAAM,GAAG,UAAU,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,CAAC;QAEnD,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,MAAM;YACN,KAAK;YACL,MAAM;YACN,OAAO,EAAE,WAAW,KAAK,CAAC,MAAM,WAAW,UAAU,aAAa,SAAS,YAAY,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE;YAC9I,MAAM,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;YAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAChC,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { EvalCheck } from '../types.js';
2
+ export declare const structureCheck: EvalCheck;
@@ -0,0 +1,100 @@
1
+ /**
2
+ * File structure check — validates expected files exist and are non-empty.
3
+ */
4
+ import { existsSync, statSync, readdirSync } from 'node:fs';
5
+ import { join } from 'node:path';
6
+ /** Default weight if not configured in task. */
7
+ const DEFAULT_WEIGHT = 1;
8
+ /**
9
+ * Simple glob match (supports `*` and `**`).
10
+ * Not a full glob engine — handles the common patterns.
11
+ */
12
+ function matchGlob(pattern, filePath) {
13
+ // Exact match
14
+ if (pattern === filePath)
15
+ return true;
16
+ // Convert glob to regex (safe — patterns come from task YAML, not user input)
17
+ let regexStr = pattern
18
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape regex chars (not * or ?)
19
+ .replace(/\*\*\//g, '{{GLOBSTAR_SEP}}') // **/ = zero or more dirs
20
+ .replace(/\*\*/g, '{{GLOBSTAR}}') // ** at end
21
+ .replace(/\*/g, '[^/]*') // * = one segment
22
+ .replace(/\{\{GLOBSTAR_SEP\}\}/g, '(?:.+/)?') // **/ matches "" or "a/" or "a/b/"
23
+ .replace(/\{\{GLOBSTAR\}\}/g, '.*');
24
+ return new RegExp(`^${regexStr}$`).test(filePath);
25
+ }
26
+ /**
27
+ * Recursively list all files in a directory (relative paths).
28
+ */
29
+ function listFiles(dir, base) {
30
+ const result = [];
31
+ const entries = readdirSync(dir, { withFileTypes: true });
32
+ for (const entry of entries) {
33
+ if (entry.name === 'node_modules' || entry.name === '.git')
34
+ continue;
35
+ const rel = base ? `${base}/${entry.name}` : entry.name;
36
+ if (entry.isDirectory()) {
37
+ result.push(...listFiles(join(dir, entry.name), rel));
38
+ }
39
+ else {
40
+ result.push(rel);
41
+ }
42
+ }
43
+ return result;
44
+ }
45
+ export const structureCheck = {
46
+ name: 'structure',
47
+ async run(workDir, task) {
48
+ const start = Date.now();
49
+ const weight = task.checks?.structure?.weight ?? DEFAULT_WEIGHT;
50
+ const patterns = task.expected_files;
51
+ if (!patterns || patterns.length === 0) {
52
+ return {
53
+ name: 'structure',
54
+ passed: true,
55
+ score: 1.0,
56
+ weight,
57
+ details: 'No expected files configured — skipped',
58
+ duration_ms: Date.now() - start,
59
+ };
60
+ }
61
+ const allFiles = listFiles(workDir);
62
+ const results = [];
63
+ for (const pattern of patterns) {
64
+ // Check if any file matches this pattern
65
+ const matches = allFiles.filter(f => matchGlob(pattern, f));
66
+ // Verify matched files are non-empty
67
+ const nonEmpty = matches.filter(f => {
68
+ const fullPath = join(workDir, f);
69
+ return existsSync(fullPath) && statSync(fullPath).size > 0;
70
+ });
71
+ results.push({
72
+ pattern,
73
+ matched: nonEmpty.length > 0,
74
+ files: nonEmpty,
75
+ });
76
+ }
77
+ const matchedCount = results.filter(r => r.matched).length;
78
+ const score = patterns.length > 0 ? matchedCount / patterns.length : 1.0;
79
+ const passed = matchedCount === patterns.length;
80
+ const matchedPatterns = results.filter(r => r.matched).map(r => r.pattern);
81
+ const missingPatterns = results.filter(r => !r.matched).map(r => r.pattern);
82
+ const parts = [];
83
+ if (matchedPatterns.length > 0) {
84
+ parts.push(`Matched: ${matchedPatterns.join(', ')}`);
85
+ }
86
+ if (missingPatterns.length > 0) {
87
+ parts.push(`Missing: ${missingPatterns.join(', ')}`);
88
+ }
89
+ return {
90
+ name: 'structure',
91
+ passed,
92
+ score,
93
+ weight,
94
+ details: `${matchedCount}/${patterns.length} expected file patterns found. ${parts.join('. ')}`,
95
+ errors: missingPatterns.length > 0 ? missingPatterns.map(p => `Missing: ${p}`) : undefined,
96
+ duration_ms: Date.now() - start,
97
+ };
98
+ },
99
+ };
100
+ //# sourceMappingURL=structure.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structure.js","sourceRoot":"","sources":["../../../src/eval/checks/structure.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAC5D,OAAO,EAAE,IAAI,EAAY,MAAM,WAAW,CAAC;AAG3C,gDAAgD;AAChD,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB;;;GAGG;AACH,SAAS,SAAS,CAAC,OAAe,EAAE,QAAgB;IAClD,cAAc;IACd,IAAI,OAAO,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAEtC,8EAA8E;IAC9E,IAAI,QAAQ,GAAG,OAAO;SACnB,OAAO,CAAC,mBAAmB,EAAE,MAAM,CAAC,CAAE,kCAAkC;SACxE,OAAO,CAAC,SAAS,EAAE,kBAAkB,CAAC,CAAC,0BAA0B;SACjE,OAAO,CAAC,OAAO,EAAE,cAAc,CAAC,CAAO,YAAY;SACnD,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,CAAgB,kBAAkB;SACzD,OAAO,CAAC,uBAAuB,EAAE,UAAU,CAAC,CAAE,mCAAmC;SACjF,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC,CAAC;IAEtC,OAAO,IAAI,MAAM,CAAC,IAAI,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACpD,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,GAAW,EAAE,IAAa;IAC3C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;IAE1D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM;YAAE,SAAS;QACrE,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;QACxD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QACxD,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,CAAC,MAAM,cAAc,GAAc;IACvC,IAAI,EAAE,WAAW;IAEjB,KAAK,CAAC,GAAG,CAAC,OAAe,EAAE,IAAc;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,IAAI,cAAc,CAAC;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC;QAErC,IAAI,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,wCAAwC;gBACjD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,OAAO,GAAkE,EAAE,CAAC;QAElF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,yCAAyC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;YAE5D,qCAAqC;YACrC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;gBAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;gBAClC,OAAO,UAAU,CAAC,QAAQ,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;YAC7D,CAAC,CAAC,CAAC;YAEH,OAAO,CAAC,IAAI,CAAC;gBACX,OAAO;gBACP,OAAO,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAC5B,KAAK,EAAE,QAAQ;aAChB,CAAC,CAAC;QACL,CAAC;QAED,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAC3D,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;QACzE,MAAM,MAAM,GAAG,YAAY,KAAK,QAAQ,CAAC,MAAM,CAAC;QAEhD,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC3E,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAE5E,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,YAAY,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;QACD,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,KAAK,CAAC,IAAI,CAAC,YAAY,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvD,CAAC;QAED,OAAO;YACL,IAAI,EAAE,WAAW;YACjB,MAAM;YACN,KAAK;YACL,MAAM;YACN,OAAO,EAAE,GAAG,YAAY,IAAI,QAAQ,CAAC,MAAM,kCAAkC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YAC/F,MAAM,EAAE,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;YAC1F,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAChC,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { EvalCheck } from '../types.js';
2
+ export declare const testCheck: EvalCheck;
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Test runner check — install deps and run the project's test command.
3
+ */
4
+ import { execFileSync } from 'node:child_process';
5
+ import { existsSync } from 'node:fs';
6
+ import { join } from 'node:path';
7
+ /** Default weight if not configured in task. */
8
+ const DEFAULT_WEIGHT = 4;
9
+ export const testCheck = {
10
+ name: 'test',
11
+ async run(workDir, task) {
12
+ const start = Date.now();
13
+ const weight = task.checks?.test?.weight ?? DEFAULT_WEIGHT;
14
+ // Check if package.json exists
15
+ if (!existsSync(join(workDir, 'package.json'))) {
16
+ return {
17
+ name: 'test',
18
+ passed: false,
19
+ score: 0,
20
+ weight,
21
+ details: 'No package.json found — cannot run tests',
22
+ duration_ms: Date.now() - start,
23
+ };
24
+ }
25
+ // Install dependencies
26
+ try {
27
+ execFileSync('npm', ['install', '--ignore-scripts'], {
28
+ cwd: workDir,
29
+ timeout: 120_000,
30
+ stdio: ['ignore', 'pipe', 'pipe'],
31
+ });
32
+ }
33
+ catch (err) {
34
+ const stderr = err?.stderr?.toString()?.slice(0, 500) ?? '';
35
+ return {
36
+ name: 'test',
37
+ passed: false,
38
+ score: 0,
39
+ weight,
40
+ details: `npm install failed: ${stderr}`.slice(0, 2000),
41
+ errors: ['npm install failed'],
42
+ duration_ms: Date.now() - start,
43
+ };
44
+ }
45
+ // Run tests
46
+ const testCommand = task.checks?.test?.command ?? 'npm test';
47
+ const [cmd, ...cmdArgs] = testCommand.split(' ');
48
+ try {
49
+ const result = execFileSync(cmd, cmdArgs, {
50
+ cwd: workDir,
51
+ timeout: 120_000,
52
+ stdio: ['ignore', 'pipe', 'pipe'],
53
+ });
54
+ const output = result.toString().trim();
55
+ return {
56
+ name: 'test',
57
+ passed: true,
58
+ score: 1.0,
59
+ weight,
60
+ details: `Tests passed.\n${output}`.slice(0, 2000),
61
+ duration_ms: Date.now() - start,
62
+ };
63
+ }
64
+ catch (err) {
65
+ const stderr = err?.stderr?.toString() ?? '';
66
+ const stdout = err?.stdout?.toString() ?? '';
67
+ const output = (stderr + '\n' + stdout).trim();
68
+ return {
69
+ name: 'test',
70
+ passed: false,
71
+ score: 0,
72
+ weight,
73
+ details: `Tests failed.\n${output}`.slice(0, 2000),
74
+ errors: ['Test suite failed'],
75
+ duration_ms: Date.now() - start,
76
+ };
77
+ }
78
+ },
79
+ };
80
+ //# sourceMappingURL=test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"test.js","sourceRoot":"","sources":["../../../src/eval/checks/test.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,gDAAgD;AAChD,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,MAAM,CAAC,MAAM,SAAS,GAAc;IAClC,IAAI,EAAE,MAAM;IAEZ,KAAK,CAAC,GAAG,CAAC,OAAe,EAAE,IAAc;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,IAAI,cAAc,CAAC;QAE3D,+BAA+B;QAC/B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC,EAAE,CAAC;YAC/C,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,MAAM;gBACN,OAAO,EAAE,0CAA0C;gBACnD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,uBAAuB;QACvB,IAAI,CAAC;YACH,YAAY,CAAC,KAAK,EAAE,CAAC,SAAS,EAAE,kBAAkB,CAAC,EAAE;gBACnD,GAAG,EAAE,OAAO;gBACZ,OAAO,EAAE,OAAO;gBAChB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAI,GAA2B,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;YACrF,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,MAAM;gBACN,OAAO,EAAE,uBAAuB,MAAM,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;gBACvD,MAAM,EAAE,CAAC,oBAAoB,CAAC;gBAC9B,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,YAAY;QACZ,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,IAAI,UAAU,CAAC;QAC7D,MAAM,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAEjD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,EAAE,OAAO,EAAE;gBACxC,GAAG,EAAE,OAAO;gBACZ,OAAO,EAAE,OAAO;gBAChB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC;YAExC,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,kBAAkB,MAAM,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;gBAClD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAI,GAA2B,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtE,MAAM,MAAM,GAAI,GAA2B,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtE,MAAM,MAAM,GAAG,CAAC,MAAM,GAAG,IAAI,GAAG,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YAE/C,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,KAAK;gBACb,KAAK,EAAE,CAAC;gBACR,MAAM;gBACN,OAAO,EAAE,kBAAkB,MAAM,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;gBAClD,MAAM,EAAE,CAAC,mBAAmB,CAAC;gBAC7B,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;IACH,CAAC;CACF,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { EvalCheck } from '../types.js';
2
+ export declare const typecheckCheck: EvalCheck;
@@ -0,0 +1,78 @@
1
+ /**
2
+ * TypeScript compilation check — runs tsc --noEmit.
3
+ */
4
+ import { execFileSync } from 'node:child_process';
5
+ import { existsSync } from 'node:fs';
6
+ import { join } from 'node:path';
7
+ import { createRequire } from 'node:module';
8
+ const require = createRequire(import.meta.url);
9
+ /** Resolve tsc binary from our own typescript installation. */
10
+ function findTsc() {
11
+ try {
12
+ return require.resolve('typescript/bin/tsc');
13
+ }
14
+ catch {
15
+ return 'tsc'; // fallback to PATH
16
+ }
17
+ }
18
+ /** Default weight if not configured in task. */
19
+ const DEFAULT_WEIGHT = 3;
20
+ export const typecheckCheck = {
21
+ name: 'typecheck',
22
+ async run(workDir, task) {
23
+ const start = Date.now();
24
+ const weight = task.checks?.typecheck?.weight ?? DEFAULT_WEIGHT;
25
+ // Skip if no tsconfig.json
26
+ if (!existsSync(join(workDir, 'tsconfig.json'))) {
27
+ return {
28
+ name: 'typecheck',
29
+ passed: true,
30
+ score: 1.0,
31
+ weight,
32
+ details: 'Skipped: no tsconfig.json found',
33
+ duration_ms: Date.now() - start,
34
+ };
35
+ }
36
+ try {
37
+ execFileSync('node', [findTsc(), '--noEmit', '--pretty'], {
38
+ cwd: workDir,
39
+ timeout: 120_000,
40
+ stdio: ['ignore', 'pipe', 'pipe'],
41
+ });
42
+ return {
43
+ name: 'typecheck',
44
+ passed: true,
45
+ score: 1.0,
46
+ weight,
47
+ details: 'TypeScript compilation passed with no errors',
48
+ duration_ms: Date.now() - start,
49
+ };
50
+ }
51
+ catch (err) {
52
+ const stderr = err?.stderr?.toString() ?? '';
53
+ const stdout = err?.stdout?.toString() ?? '';
54
+ const output = (stderr + stdout).trim();
55
+ // Count errors from tsc output
56
+ const errorMatch = output.match(/Found (\d+) errors?/);
57
+ const errorCount = errorMatch ? parseInt(errorMatch[1], 10) : 1;
58
+ // Score degrades with errors: 0 errors = 1.0, each error costs 0.1
59
+ const score = Math.max(0, 1 - errorCount * 0.1);
60
+ // Extract error lines (truncate to 2000 chars)
61
+ const lines = output.split('\n');
62
+ const errorLines = lines.filter(l => l.includes('error TS')).slice(0, 20);
63
+ const details = errorLines.length > 0
64
+ ? `${errorCount} TypeScript error(s):\n${errorLines.join('\n')}`
65
+ : `TypeScript compilation failed (${errorCount} error(s))`;
66
+ return {
67
+ name: 'typecheck',
68
+ passed: false,
69
+ score,
70
+ weight,
71
+ details: details.slice(0, 2000),
72
+ errors: errorLines,
73
+ duration_ms: Date.now() - start,
74
+ };
75
+ }
76
+ },
77
+ };
78
+ //# sourceMappingURL=typecheck.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"typecheck.js","sourceRoot":"","sources":["../../../src/eval/checks/typecheck.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAG5C,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAE/C,+DAA+D;AAC/D,SAAS,OAAO;IACd,IAAI,CAAC;QACH,OAAO,OAAO,CAAC,OAAO,CAAC,oBAAoB,CAAC,CAAC;IAC/C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC,CAAC,mBAAmB;IACnC,CAAC;AACH,CAAC;AAED,gDAAgD;AAChD,MAAM,cAAc,GAAG,CAAC,CAAC;AAEzB,MAAM,CAAC,MAAM,cAAc,GAAc;IACvC,IAAI,EAAE,WAAW;IAEjB,KAAK,CAAC,GAAG,CAAC,OAAe,EAAE,IAAc;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,IAAI,cAAc,CAAC;QAEhE,2BAA2B;QAC3B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC,EAAE,CAAC;YAChD,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,iCAAiC;gBAC1C,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAED,IAAI,CAAC;YACH,YAAY,CAAC,MAAM,EAAE,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,UAAU,CAAC,EAAE;gBACxD,GAAG,EAAE,OAAO;gBACZ,OAAO,EAAE,OAAO;gBAChB,KAAK,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC;aAClC,CAAC,CAAC;YAEH,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,IAAI;gBACZ,KAAK,EAAE,GAAG;gBACV,MAAM;gBACN,OAAO,EAAE,8CAA8C;gBACvD,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,MAAM,GAAI,GAA2B,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtE,MAAM,MAAM,GAAI,GAA2B,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;YACtE,MAAM,MAAM,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;YAExC,+BAA+B;YAC/B,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;YACvD,MAAM,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAEhE,mEAAmE;YACnE,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,GAAG,GAAG,CAAC,CAAC;YAEhD,+CAA+C;YAC/C,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAC1E,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC;gBACnC,CAAC,CAAC,GAAG,UAAU,0BAA0B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;gBAChE,CAAC,CAAC,kCAAkC,UAAU,YAAY,CAAC;YAE7D,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,MAAM,EAAE,KAAK;gBACb,KAAK;gBACL,MAAM;gBACN,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC;gBAC/B,MAAM,EAAE,UAAU;gBAClB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;aAChC,CAAC;QACJ,CAAC;IACH,CAAC;CACF,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { runEval } from './runner.js';
2
+ export { parseEvalTask } from './task.js';
3
+ export { computeScore } from './scorer.js';
4
+ export type { EvalTask, EvalResult, EvalCheckResult, EvalProcessMetrics, EvalRunOptions, } from './types.js';
@@ -0,0 +1,4 @@
1
+ export { runEval } from './runner.js';
2
+ export { parseEvalTask } from './task.js';
3
+ export { computeScore } from './scorer.js';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/eval/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { EvalTask, EvalResult, EvalRunOptions } from './types.js';
2
+ /**
3
+ * Run a full evaluation: spawn agents, wait for completion, run checks, score.
4
+ */
5
+ export declare function runEval(task: EvalTask, opts?: EvalRunOptions): Promise<EvalResult>;