cognitive-core 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +363 -2
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +61 -9
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/index.d.ts +0 -4
- package/index.js +0 -4
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Verification Runner
|
|
3
|
+
*
|
|
4
|
+
* Provides common verification patterns for evaluating solutions:
|
|
5
|
+
* - Command execution (bash/shell)
|
|
6
|
+
* - Test runner integration
|
|
7
|
+
* - Code linting/type checking
|
|
8
|
+
* - Custom verification functions
|
|
9
|
+
*
|
|
10
|
+
* Used with SolutionEvaluator to verify trajectory outcomes.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Trajectory } from '../types/trajectory.js';
|
|
14
|
+
import type { Task } from '../types/task.js';
|
|
15
|
+
import type { VerificationFunction, VerificationResult } from './evaluator.js';
|
|
16
|
+
import { execSync, spawn } from 'child_process';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Configuration for a verification command
|
|
20
|
+
*/
|
|
21
|
+
export interface CommandVerificationConfig {
|
|
22
|
+
/** Command to run (can include placeholders like {{solution}}, {{taskId}}) */
|
|
23
|
+
command: string;
|
|
24
|
+
/** Working directory for the command */
|
|
25
|
+
cwd?: string;
|
|
26
|
+
/** Environment variables to set */
|
|
27
|
+
env?: Record<string, string>;
|
|
28
|
+
/** Timeout in milliseconds (default: 30000) */
|
|
29
|
+
timeout?: number;
|
|
30
|
+
/** Whether exit code 0 indicates success (default: true) */
|
|
31
|
+
successOnZeroExit?: boolean;
|
|
32
|
+
/** Pattern to search for in output to indicate success */
|
|
33
|
+
successPattern?: RegExp;
|
|
34
|
+
/** Pattern to search for in output to indicate failure */
|
|
35
|
+
failurePattern?: RegExp;
|
|
36
|
+
/** Function to extract issues from output */
|
|
37
|
+
issueExtractor?: (output: string, exitCode: number) => VerificationIssue[];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Issue found during verification
|
|
42
|
+
*/
|
|
43
|
+
export interface VerificationIssue {
|
|
44
|
+
type: 'incomplete' | 'incorrect' | 'error';
|
|
45
|
+
description: string;
|
|
46
|
+
severity?: 'critical' | 'major' | 'minor';
|
|
47
|
+
line?: number;
|
|
48
|
+
file?: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Result of running a command
|
|
53
|
+
*/
|
|
54
|
+
export interface CommandResult {
|
|
55
|
+
stdout: string;
|
|
56
|
+
stderr: string;
|
|
57
|
+
exitCode: number;
|
|
58
|
+
timedOut: boolean;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Common test runner configurations
|
|
63
|
+
*/
|
|
64
|
+
export const TestRunners = {
|
|
65
|
+
/** Node.js test runners */
|
|
66
|
+
vitest: (testPath?: string): CommandVerificationConfig => ({
|
|
67
|
+
command: `npx vitest run ${testPath ?? ''} --reporter=json`,
|
|
68
|
+
timeout: 60000,
|
|
69
|
+
successOnZeroExit: true,
|
|
70
|
+
issueExtractor: extractVitestIssues,
|
|
71
|
+
}),
|
|
72
|
+
|
|
73
|
+
jest: (testPath?: string): CommandVerificationConfig => ({
|
|
74
|
+
command: `npx jest ${testPath ?? ''} --json`,
|
|
75
|
+
timeout: 60000,
|
|
76
|
+
successOnZeroExit: true,
|
|
77
|
+
issueExtractor: extractJestIssues,
|
|
78
|
+
}),
|
|
79
|
+
|
|
80
|
+
mocha: (testPath?: string): CommandVerificationConfig => ({
|
|
81
|
+
command: `npx mocha ${testPath ?? ''} --reporter json`,
|
|
82
|
+
timeout: 60000,
|
|
83
|
+
successOnZeroExit: true,
|
|
84
|
+
}),
|
|
85
|
+
|
|
86
|
+
/** Python test runners */
|
|
87
|
+
pytest: (testPath?: string): CommandVerificationConfig => ({
|
|
88
|
+
command: `python -m pytest ${testPath ?? ''} --tb=short -q`,
|
|
89
|
+
timeout: 60000,
|
|
90
|
+
successOnZeroExit: true,
|
|
91
|
+
failurePattern: /FAILED|ERROR/,
|
|
92
|
+
issueExtractor: extractPytestIssues,
|
|
93
|
+
}),
|
|
94
|
+
|
|
95
|
+
/** Go test runner */
|
|
96
|
+
goTest: (testPath?: string): CommandVerificationConfig => ({
|
|
97
|
+
command: `go test ${testPath ?? './...'} -v`,
|
|
98
|
+
timeout: 60000,
|
|
99
|
+
successOnZeroExit: true,
|
|
100
|
+
failurePattern: /--- FAIL:|FAIL\s+\w+/,
|
|
101
|
+
}),
|
|
102
|
+
|
|
103
|
+
/** Rust test runner */
|
|
104
|
+
cargoTest: (testPath?: string): CommandVerificationConfig => ({
|
|
105
|
+
command: `cargo test ${testPath ?? ''} -- --nocapture`,
|
|
106
|
+
timeout: 120000,
|
|
107
|
+
successOnZeroExit: true,
|
|
108
|
+
failurePattern: /test .+ \.\.\. FAILED/,
|
|
109
|
+
}),
|
|
110
|
+
|
|
111
|
+
/** TypeScript type checking */
|
|
112
|
+
typescript: (): CommandVerificationConfig => ({
|
|
113
|
+
command: 'npx tsc --noEmit',
|
|
114
|
+
timeout: 60000,
|
|
115
|
+
successOnZeroExit: true,
|
|
116
|
+
issueExtractor: extractTypescriptIssues,
|
|
117
|
+
}),
|
|
118
|
+
|
|
119
|
+
/** ESLint */
|
|
120
|
+
eslint: (path?: string): CommandVerificationConfig => ({
|
|
121
|
+
command: `npx eslint ${path ?? '.'} --format json`,
|
|
122
|
+
timeout: 30000,
|
|
123
|
+
successOnZeroExit: true,
|
|
124
|
+
issueExtractor: extractEslintIssues,
|
|
125
|
+
}),
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* VerificationRunner - Executes verification commands and parses results
|
|
130
|
+
*/
|
|
131
|
+
export class VerificationRunner {
|
|
132
|
+
private defaultCwd: string;
|
|
133
|
+
private defaultTimeout: number;
|
|
134
|
+
private defaultEnv: Record<string, string>;
|
|
135
|
+
|
|
136
|
+
constructor(options?: {
|
|
137
|
+
cwd?: string;
|
|
138
|
+
timeout?: number;
|
|
139
|
+
env?: Record<string, string>;
|
|
140
|
+
}) {
|
|
141
|
+
this.defaultCwd = options?.cwd ?? process.cwd();
|
|
142
|
+
this.defaultTimeout = options?.timeout ?? 30000;
|
|
143
|
+
this.defaultEnv = options?.env ?? {};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Create a verification function from a command configuration
|
|
148
|
+
*/
|
|
149
|
+
createVerifier(config: CommandVerificationConfig): VerificationFunction {
|
|
150
|
+
return async (trajectory: Trajectory, task: Task): Promise<VerificationResult> => {
|
|
151
|
+
// Interpolate command with trajectory/task values
|
|
152
|
+
const command = this.interpolateCommand(config.command, trajectory, task);
|
|
153
|
+
|
|
154
|
+
// Run the command
|
|
155
|
+
const result = await this.runCommand(command, {
|
|
156
|
+
cwd: config.cwd ?? this.defaultCwd,
|
|
157
|
+
env: { ...this.defaultEnv, ...config.env },
|
|
158
|
+
timeout: config.timeout ?? this.defaultTimeout,
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// Determine success
|
|
162
|
+
let passed = false;
|
|
163
|
+
const combinedOutput = result.stdout + '\n' + result.stderr;
|
|
164
|
+
|
|
165
|
+
if (result.timedOut) {
|
|
166
|
+
passed = false;
|
|
167
|
+
} else if (config.successOnZeroExit !== false && result.exitCode === 0) {
|
|
168
|
+
// Check success pattern if provided
|
|
169
|
+
if (config.successPattern) {
|
|
170
|
+
passed = config.successPattern.test(combinedOutput);
|
|
171
|
+
} else {
|
|
172
|
+
passed = true;
|
|
173
|
+
}
|
|
174
|
+
} else if (config.failurePattern) {
|
|
175
|
+
passed = !config.failurePattern.test(combinedOutput);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Extract issues
|
|
179
|
+
let issues: VerificationIssue[] = [];
|
|
180
|
+
if (config.issueExtractor) {
|
|
181
|
+
issues = config.issueExtractor(combinedOutput, result.exitCode);
|
|
182
|
+
} else if (!passed) {
|
|
183
|
+
// Default issue extraction
|
|
184
|
+
issues = this.extractDefaultIssues(combinedOutput, result.exitCode);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Calculate confidence based on clarity of result
|
|
188
|
+
const confidence = result.timedOut
|
|
189
|
+
? 0.5
|
|
190
|
+
: (config.successPattern || config.failurePattern || config.issueExtractor)
|
|
191
|
+
? 0.9
|
|
192
|
+
: 0.7;
|
|
193
|
+
|
|
194
|
+
return {
|
|
195
|
+
passed,
|
|
196
|
+
confidence,
|
|
197
|
+
issues: issues.map((issue) => ({
|
|
198
|
+
type: issue.type,
|
|
199
|
+
description: issue.description,
|
|
200
|
+
severity: issue.severity,
|
|
201
|
+
})),
|
|
202
|
+
details: result.timedOut
|
|
203
|
+
? 'Verification timed out'
|
|
204
|
+
: `Exit code: ${result.exitCode}\n${combinedOutput.slice(0, 2000)}`,
|
|
205
|
+
};
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Run a command and capture output
|
|
211
|
+
*/
|
|
212
|
+
async runCommand(
|
|
213
|
+
command: string,
|
|
214
|
+
options: {
|
|
215
|
+
cwd?: string;
|
|
216
|
+
env?: Record<string, string>;
|
|
217
|
+
timeout?: number;
|
|
218
|
+
}
|
|
219
|
+
): Promise<CommandResult> {
|
|
220
|
+
return new Promise((resolve) => {
|
|
221
|
+
const cwd = options.cwd ?? this.defaultCwd;
|
|
222
|
+
const timeout = options.timeout ?? this.defaultTimeout;
|
|
223
|
+
const env = { ...process.env, ...this.defaultEnv, ...options.env };
|
|
224
|
+
|
|
225
|
+
let stdout = '';
|
|
226
|
+
let stderr = '';
|
|
227
|
+
let timedOut = false;
|
|
228
|
+
|
|
229
|
+
const child = spawn(command, {
|
|
230
|
+
shell: true,
|
|
231
|
+
cwd,
|
|
232
|
+
env,
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
const timer = setTimeout(() => {
|
|
236
|
+
timedOut = true;
|
|
237
|
+
child.kill('SIGTERM');
|
|
238
|
+
// Force kill after 5 seconds if still running
|
|
239
|
+
setTimeout(() => child.kill('SIGKILL'), 5000);
|
|
240
|
+
}, timeout);
|
|
241
|
+
|
|
242
|
+
child.stdout.on('data', (data) => {
|
|
243
|
+
stdout += data.toString();
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
child.stderr.on('data', (data) => {
|
|
247
|
+
stderr += data.toString();
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
child.on('close', (code) => {
|
|
251
|
+
clearTimeout(timer);
|
|
252
|
+
resolve({
|
|
253
|
+
stdout,
|
|
254
|
+
stderr,
|
|
255
|
+
exitCode: code ?? (timedOut ? 124 : 1),
|
|
256
|
+
timedOut,
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
child.on('error', (error) => {
|
|
261
|
+
clearTimeout(timer);
|
|
262
|
+
resolve({
|
|
263
|
+
stdout,
|
|
264
|
+
stderr: stderr + '\n' + error.message,
|
|
265
|
+
exitCode: 1,
|
|
266
|
+
timedOut: false,
|
|
267
|
+
});
|
|
268
|
+
});
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Run a command synchronously (for simple checks)
|
|
274
|
+
*/
|
|
275
|
+
runCommandSync(
|
|
276
|
+
command: string,
|
|
277
|
+
options?: {
|
|
278
|
+
cwd?: string;
|
|
279
|
+
env?: Record<string, string>;
|
|
280
|
+
timeout?: number;
|
|
281
|
+
}
|
|
282
|
+
): CommandResult {
|
|
283
|
+
const cwd = options?.cwd ?? this.defaultCwd;
|
|
284
|
+
const timeout = options?.timeout ?? this.defaultTimeout;
|
|
285
|
+
const env = { ...process.env, ...this.defaultEnv, ...options?.env };
|
|
286
|
+
|
|
287
|
+
try {
|
|
288
|
+
const output = execSync(command, {
|
|
289
|
+
cwd,
|
|
290
|
+
env,
|
|
291
|
+
timeout,
|
|
292
|
+
encoding: 'utf-8',
|
|
293
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
294
|
+
});
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
stdout: output,
|
|
298
|
+
stderr: '',
|
|
299
|
+
exitCode: 0,
|
|
300
|
+
timedOut: false,
|
|
301
|
+
};
|
|
302
|
+
} catch (error: any) {
|
|
303
|
+
const timedOut = error.killed && error.signal === 'SIGTERM';
|
|
304
|
+
return {
|
|
305
|
+
stdout: error.stdout?.toString() ?? '',
|
|
306
|
+
stderr: error.stderr?.toString() ?? error.message,
|
|
307
|
+
exitCode: error.status ?? 1,
|
|
308
|
+
timedOut,
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Interpolate command with trajectory/task values
|
|
315
|
+
*/
|
|
316
|
+
private interpolateCommand(
|
|
317
|
+
command: string,
|
|
318
|
+
trajectory: Trajectory,
|
|
319
|
+
task: Task
|
|
320
|
+
): string {
|
|
321
|
+
return command
|
|
322
|
+
.replace(/\{\{taskId\}\}/g, task.id)
|
|
323
|
+
.replace(/\{\{domain\}\}/g, task.domain ?? '')
|
|
324
|
+
.replace(/\{\{trajectoryId\}\}/g, trajectory.id)
|
|
325
|
+
.replace(
|
|
326
|
+
/\{\{solution\}\}/g,
|
|
327
|
+
typeof trajectory.outcome.solution === 'string'
|
|
328
|
+
? trajectory.outcome.solution
|
|
329
|
+
: ''
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Extract default issues from output
|
|
335
|
+
*/
|
|
336
|
+
private extractDefaultIssues(
|
|
337
|
+
output: string,
|
|
338
|
+
exitCode: number
|
|
339
|
+
): VerificationIssue[] {
|
|
340
|
+
const issues: VerificationIssue[] = [];
|
|
341
|
+
|
|
342
|
+
if (exitCode !== 0) {
|
|
343
|
+
// Look for common error patterns
|
|
344
|
+
const errorMatch = output.match(/(?:error|Error|ERROR)[:\s]+(.+?)(?:\n|$)/);
|
|
345
|
+
const failMatch = output.match(/(?:fail|Fail|FAIL)[:\s]+(.+?)(?:\n|$)/);
|
|
346
|
+
|
|
347
|
+
if (errorMatch) {
|
|
348
|
+
issues.push({
|
|
349
|
+
type: 'error',
|
|
350
|
+
description: errorMatch[1].slice(0, 200),
|
|
351
|
+
severity: 'critical',
|
|
352
|
+
});
|
|
353
|
+
} else if (failMatch) {
|
|
354
|
+
issues.push({
|
|
355
|
+
type: 'incorrect',
|
|
356
|
+
description: failMatch[1].slice(0, 200),
|
|
357
|
+
severity: 'major',
|
|
358
|
+
});
|
|
359
|
+
} else {
|
|
360
|
+
issues.push({
|
|
361
|
+
type: 'error',
|
|
362
|
+
description: `Command failed with exit code ${exitCode}`,
|
|
363
|
+
severity: 'major',
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return issues;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Create a composite verifier that runs multiple verifications
|
|
373
|
+
*/
|
|
374
|
+
createCompositeVerifier(
|
|
375
|
+
configs: CommandVerificationConfig[]
|
|
376
|
+
): VerificationFunction {
|
|
377
|
+
const verifiers = configs.map((config) => this.createVerifier(config));
|
|
378
|
+
|
|
379
|
+
return async (trajectory: Trajectory, task: Task): Promise<VerificationResult> => {
|
|
380
|
+
const results: VerificationResult[] = [];
|
|
381
|
+
|
|
382
|
+
for (const verifier of verifiers) {
|
|
383
|
+
const result = await verifier(trajectory, task);
|
|
384
|
+
results.push(result);
|
|
385
|
+
|
|
386
|
+
// Fail fast on critical failure
|
|
387
|
+
if (!result.passed && result.issues?.some((i) => i.severity === 'critical')) {
|
|
388
|
+
break;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Aggregate results
|
|
393
|
+
const allPassed = results.every((r) => r.passed);
|
|
394
|
+
const avgConfidence =
|
|
395
|
+
results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
|
|
396
|
+
const allIssues = results.flatMap((r) => r.issues ?? []);
|
|
397
|
+
const details = results.map((r) => r.details).filter(Boolean).join('\n---\n');
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
passed: allPassed,
|
|
401
|
+
confidence: avgConfidence,
|
|
402
|
+
issues: allIssues,
|
|
403
|
+
details,
|
|
404
|
+
};
|
|
405
|
+
};
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Create a verification runner
|
|
411
|
+
*/
|
|
412
|
+
export function createVerificationRunner(options?: {
|
|
413
|
+
cwd?: string;
|
|
414
|
+
timeout?: number;
|
|
415
|
+
env?: Record<string, string>;
|
|
416
|
+
}): VerificationRunner {
|
|
417
|
+
return new VerificationRunner(options);
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// Issue extractors for common test runners
|
|
421
|
+
|
|
422
|
+
function extractVitestIssues(output: string, _exitCode: number): VerificationIssue[] {
|
|
423
|
+
const issues: VerificationIssue[] = [];
|
|
424
|
+
|
|
425
|
+
try {
|
|
426
|
+
// Try to parse JSON output
|
|
427
|
+
const jsonMatch = output.match(/\{[\s\S]*"testResults"[\s\S]*\}/);
|
|
428
|
+
if (jsonMatch) {
|
|
429
|
+
const data = JSON.parse(jsonMatch[0]);
|
|
430
|
+
for (const testResult of data.testResults ?? []) {
|
|
431
|
+
for (const assertion of testResult.assertionResults ?? []) {
|
|
432
|
+
if (assertion.status === 'failed') {
|
|
433
|
+
issues.push({
|
|
434
|
+
type: 'incorrect',
|
|
435
|
+
description: `${assertion.fullName}: ${assertion.failureMessages?.[0]?.slice(0, 200) ?? 'Failed'}`,
|
|
436
|
+
severity: 'major',
|
|
437
|
+
file: testResult.name,
|
|
438
|
+
});
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
} catch {
|
|
444
|
+
// Fall back to pattern matching
|
|
445
|
+
const failureMatches = output.matchAll(/FAIL\s+(.+?)\n.*?(\d+)\s+failed/g);
|
|
446
|
+
for (const match of failureMatches) {
|
|
447
|
+
issues.push({
|
|
448
|
+
type: 'incorrect',
|
|
449
|
+
description: `Test suite failed: ${match[1]}`,
|
|
450
|
+
severity: 'major',
|
|
451
|
+
file: match[1],
|
|
452
|
+
});
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
return issues;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function extractJestIssues(output: string, _exitCode: number): VerificationIssue[] {
|
|
460
|
+
const issues: VerificationIssue[] = [];
|
|
461
|
+
|
|
462
|
+
try {
|
|
463
|
+
const jsonMatch = output.match(/\{[\s\S]*"testResults"[\s\S]*\}/);
|
|
464
|
+
if (jsonMatch) {
|
|
465
|
+
const data = JSON.parse(jsonMatch[0]);
|
|
466
|
+
for (const testResult of data.testResults ?? []) {
|
|
467
|
+
for (const assertion of testResult.assertionResults ?? []) {
|
|
468
|
+
if (assertion.status === 'failed') {
|
|
469
|
+
issues.push({
|
|
470
|
+
type: 'incorrect',
|
|
471
|
+
description: `${assertion.fullName}: ${assertion.failureMessages?.[0]?.slice(0, 200) ?? 'Failed'}`,
|
|
472
|
+
severity: 'major',
|
|
473
|
+
file: testResult.name,
|
|
474
|
+
});
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
} catch {
|
|
480
|
+
const failureMatches = output.matchAll(/● (.+)/g);
|
|
481
|
+
for (const match of failureMatches) {
|
|
482
|
+
issues.push({
|
|
483
|
+
type: 'incorrect',
|
|
484
|
+
description: match[1].slice(0, 200),
|
|
485
|
+
severity: 'major',
|
|
486
|
+
});
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
return issues;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function extractPytestIssues(output: string, _exitCode: number): VerificationIssue[] {
|
|
494
|
+
const issues: VerificationIssue[] = [];
|
|
495
|
+
|
|
496
|
+
// Match pytest failure lines
|
|
497
|
+
const failureMatches = output.matchAll(/FAILED\s+(.+?)(?:::|$)/g);
|
|
498
|
+
for (const match of failureMatches) {
|
|
499
|
+
issues.push({
|
|
500
|
+
type: 'incorrect',
|
|
501
|
+
description: `Test failed: ${match[1]}`,
|
|
502
|
+
severity: 'major',
|
|
503
|
+
});
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// Match pytest errors
|
|
507
|
+
const errorMatches = output.matchAll(/ERROR\s+(.+?)(?:\n|$)/g);
|
|
508
|
+
for (const match of errorMatches) {
|
|
509
|
+
issues.push({
|
|
510
|
+
type: 'error',
|
|
511
|
+
description: match[1].slice(0, 200),
|
|
512
|
+
severity: 'critical',
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
return issues;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
function extractTypescriptIssues(output: string, _exitCode: number): VerificationIssue[] {
|
|
520
|
+
const issues: VerificationIssue[] = [];
|
|
521
|
+
|
|
522
|
+
// Match TypeScript error format: file(line,col): error TS####: message
|
|
523
|
+
const errorMatches = output.matchAll(/(.+?)\((\d+),\d+\):\s+error\s+TS\d+:\s+(.+?)(?:\n|$)/g);
|
|
524
|
+
for (const match of errorMatches) {
|
|
525
|
+
issues.push({
|
|
526
|
+
type: 'error',
|
|
527
|
+
description: match[3].slice(0, 200),
|
|
528
|
+
severity: 'critical',
|
|
529
|
+
file: match[1],
|
|
530
|
+
line: parseInt(match[2], 10),
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
return issues;
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
function extractEslintIssues(output: string, _exitCode: number): VerificationIssue[] {
|
|
538
|
+
const issues: VerificationIssue[] = [];
|
|
539
|
+
|
|
540
|
+
try {
|
|
541
|
+
const data = JSON.parse(output);
|
|
542
|
+
for (const file of data) {
|
|
543
|
+
for (const message of file.messages ?? []) {
|
|
544
|
+
const severity: 'critical' | 'major' | 'minor' =
|
|
545
|
+
message.severity === 2 ? 'major' : 'minor';
|
|
546
|
+
|
|
547
|
+
issues.push({
|
|
548
|
+
type: message.fatal ? 'error' : 'incorrect',
|
|
549
|
+
description: `${message.ruleId}: ${message.message}`,
|
|
550
|
+
severity,
|
|
551
|
+
file: file.filePath,
|
|
552
|
+
line: message.line,
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
} catch {
|
|
557
|
+
// ESLint output wasn't JSON
|
|
558
|
+
const errorMatches = output.matchAll(/(\d+):\d+\s+error\s+(.+?)\s+(\S+)/g);
|
|
559
|
+
for (const match of errorMatches) {
|
|
560
|
+
issues.push({
|
|
561
|
+
type: 'error',
|
|
562
|
+
description: `${match[3]}: ${match[2]}`,
|
|
563
|
+
severity: 'major',
|
|
564
|
+
line: parseInt(match[1], 10),
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return issues;
|
|
570
|
+
}
|