cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,570 @@
1
+ /**
2
+ * Verification Runner
3
+ *
4
+ * Provides common verification patterns for evaluating solutions:
5
+ * - Command execution (bash/shell)
6
+ * - Test runner integration
7
+ * - Code linting/type checking
8
+ * - Custom verification functions
9
+ *
10
+ * Used with SolutionEvaluator to verify trajectory outcomes.
11
+ */
12
+
13
+ import type { Trajectory } from '../types/trajectory.js';
14
+ import type { Task } from '../types/task.js';
15
+ import type { VerificationFunction, VerificationResult } from './evaluator.js';
16
+ import { execSync, spawn } from 'child_process';
17
+
18
+ /**
19
+ * Configuration for a verification command
20
+ */
21
+ export interface CommandVerificationConfig {
22
+ /** Command to run (can include placeholders like {{solution}}, {{taskId}}) */
23
+ command: string;
24
+ /** Working directory for the command */
25
+ cwd?: string;
26
+ /** Environment variables to set */
27
+ env?: Record<string, string>;
28
+ /** Timeout in milliseconds (default: 30000) */
29
+ timeout?: number;
30
+ /** Whether exit code 0 indicates success (default: true) */
31
+ successOnZeroExit?: boolean;
32
+ /** Pattern to search for in output to indicate success */
33
+ successPattern?: RegExp;
34
+ /** Pattern to search for in output to indicate failure */
35
+ failurePattern?: RegExp;
36
+ /** Function to extract issues from output */
37
+ issueExtractor?: (output: string, exitCode: number) => VerificationIssue[];
38
+ }
39
+
40
+ /**
41
+ * Issue found during verification
42
+ */
43
+ export interface VerificationIssue {
44
+ type: 'incomplete' | 'incorrect' | 'error';
45
+ description: string;
46
+ severity?: 'critical' | 'major' | 'minor';
47
+ line?: number;
48
+ file?: string;
49
+ }
50
+
51
+ /**
52
+ * Result of running a command
53
+ */
54
+ export interface CommandResult {
55
+ stdout: string;
56
+ stderr: string;
57
+ exitCode: number;
58
+ timedOut: boolean;
59
+ }
60
+
61
+ /**
62
+ * Common test runner configurations
63
+ */
64
+ export const TestRunners = {
65
+ /** Node.js test runners */
66
+ vitest: (testPath?: string): CommandVerificationConfig => ({
67
+ command: `npx vitest run ${testPath ?? ''} --reporter=json`,
68
+ timeout: 60000,
69
+ successOnZeroExit: true,
70
+ issueExtractor: extractVitestIssues,
71
+ }),
72
+
73
+ jest: (testPath?: string): CommandVerificationConfig => ({
74
+ command: `npx jest ${testPath ?? ''} --json`,
75
+ timeout: 60000,
76
+ successOnZeroExit: true,
77
+ issueExtractor: extractJestIssues,
78
+ }),
79
+
80
+ mocha: (testPath?: string): CommandVerificationConfig => ({
81
+ command: `npx mocha ${testPath ?? ''} --reporter json`,
82
+ timeout: 60000,
83
+ successOnZeroExit: true,
84
+ }),
85
+
86
+ /** Python test runners */
87
+ pytest: (testPath?: string): CommandVerificationConfig => ({
88
+ command: `python -m pytest ${testPath ?? ''} --tb=short -q`,
89
+ timeout: 60000,
90
+ successOnZeroExit: true,
91
+ failurePattern: /FAILED|ERROR/,
92
+ issueExtractor: extractPytestIssues,
93
+ }),
94
+
95
+ /** Go test runner */
96
+ goTest: (testPath?: string): CommandVerificationConfig => ({
97
+ command: `go test ${testPath ?? './...'} -v`,
98
+ timeout: 60000,
99
+ successOnZeroExit: true,
100
+ failurePattern: /--- FAIL:|FAIL\s+\w+/,
101
+ }),
102
+
103
+ /** Rust test runner */
104
+ cargoTest: (testPath?: string): CommandVerificationConfig => ({
105
+ command: `cargo test ${testPath ?? ''} -- --nocapture`,
106
+ timeout: 120000,
107
+ successOnZeroExit: true,
108
+ failurePattern: /test .+ \.\.\. FAILED/,
109
+ }),
110
+
111
+ /** TypeScript type checking */
112
+ typescript: (): CommandVerificationConfig => ({
113
+ command: 'npx tsc --noEmit',
114
+ timeout: 60000,
115
+ successOnZeroExit: true,
116
+ issueExtractor: extractTypescriptIssues,
117
+ }),
118
+
119
+ /** ESLint */
120
+ eslint: (path?: string): CommandVerificationConfig => ({
121
+ command: `npx eslint ${path ?? '.'} --format json`,
122
+ timeout: 30000,
123
+ successOnZeroExit: true,
124
+ issueExtractor: extractEslintIssues,
125
+ }),
126
+ };
127
+
128
+ /**
129
+ * VerificationRunner - Executes verification commands and parses results
130
+ */
131
+ export class VerificationRunner {
132
+ private defaultCwd: string;
133
+ private defaultTimeout: number;
134
+ private defaultEnv: Record<string, string>;
135
+
136
+ constructor(options?: {
137
+ cwd?: string;
138
+ timeout?: number;
139
+ env?: Record<string, string>;
140
+ }) {
141
+ this.defaultCwd = options?.cwd ?? process.cwd();
142
+ this.defaultTimeout = options?.timeout ?? 30000;
143
+ this.defaultEnv = options?.env ?? {};
144
+ }
145
+
146
+ /**
147
+ * Create a verification function from a command configuration
148
+ */
149
+ createVerifier(config: CommandVerificationConfig): VerificationFunction {
150
+ return async (trajectory: Trajectory, task: Task): Promise<VerificationResult> => {
151
+ // Interpolate command with trajectory/task values
152
+ const command = this.interpolateCommand(config.command, trajectory, task);
153
+
154
+ // Run the command
155
+ const result = await this.runCommand(command, {
156
+ cwd: config.cwd ?? this.defaultCwd,
157
+ env: { ...this.defaultEnv, ...config.env },
158
+ timeout: config.timeout ?? this.defaultTimeout,
159
+ });
160
+
161
+ // Determine success
162
+ let passed = false;
163
+ const combinedOutput = result.stdout + '\n' + result.stderr;
164
+
165
+ if (result.timedOut) {
166
+ passed = false;
167
+ } else if (config.successOnZeroExit !== false && result.exitCode === 0) {
168
+ // Check success pattern if provided
169
+ if (config.successPattern) {
170
+ passed = config.successPattern.test(combinedOutput);
171
+ } else {
172
+ passed = true;
173
+ }
174
+ } else if (config.failurePattern) {
175
+ passed = !config.failurePattern.test(combinedOutput);
176
+ }
177
+
178
+ // Extract issues
179
+ let issues: VerificationIssue[] = [];
180
+ if (config.issueExtractor) {
181
+ issues = config.issueExtractor(combinedOutput, result.exitCode);
182
+ } else if (!passed) {
183
+ // Default issue extraction
184
+ issues = this.extractDefaultIssues(combinedOutput, result.exitCode);
185
+ }
186
+
187
+ // Calculate confidence based on clarity of result
188
+ const confidence = result.timedOut
189
+ ? 0.5
190
+ : (config.successPattern || config.failurePattern || config.issueExtractor)
191
+ ? 0.9
192
+ : 0.7;
193
+
194
+ return {
195
+ passed,
196
+ confidence,
197
+ issues: issues.map((issue) => ({
198
+ type: issue.type,
199
+ description: issue.description,
200
+ severity: issue.severity,
201
+ })),
202
+ details: result.timedOut
203
+ ? 'Verification timed out'
204
+ : `Exit code: ${result.exitCode}\n${combinedOutput.slice(0, 2000)}`,
205
+ };
206
+ };
207
+ }
208
+
209
+ /**
210
+ * Run a command and capture output
211
+ */
212
+ async runCommand(
213
+ command: string,
214
+ options: {
215
+ cwd?: string;
216
+ env?: Record<string, string>;
217
+ timeout?: number;
218
+ }
219
+ ): Promise<CommandResult> {
220
+ return new Promise((resolve) => {
221
+ const cwd = options.cwd ?? this.defaultCwd;
222
+ const timeout = options.timeout ?? this.defaultTimeout;
223
+ const env = { ...process.env, ...this.defaultEnv, ...options.env };
224
+
225
+ let stdout = '';
226
+ let stderr = '';
227
+ let timedOut = false;
228
+
229
+ const child = spawn(command, {
230
+ shell: true,
231
+ cwd,
232
+ env,
233
+ });
234
+
235
+ const timer = setTimeout(() => {
236
+ timedOut = true;
237
+ child.kill('SIGTERM');
238
+ // Force kill after 5 seconds if still running
239
+ setTimeout(() => child.kill('SIGKILL'), 5000);
240
+ }, timeout);
241
+
242
+ child.stdout.on('data', (data) => {
243
+ stdout += data.toString();
244
+ });
245
+
246
+ child.stderr.on('data', (data) => {
247
+ stderr += data.toString();
248
+ });
249
+
250
+ child.on('close', (code) => {
251
+ clearTimeout(timer);
252
+ resolve({
253
+ stdout,
254
+ stderr,
255
+ exitCode: code ?? (timedOut ? 124 : 1),
256
+ timedOut,
257
+ });
258
+ });
259
+
260
+ child.on('error', (error) => {
261
+ clearTimeout(timer);
262
+ resolve({
263
+ stdout,
264
+ stderr: stderr + '\n' + error.message,
265
+ exitCode: 1,
266
+ timedOut: false,
267
+ });
268
+ });
269
+ });
270
+ }
271
+
272
+ /**
273
+ * Run a command synchronously (for simple checks)
274
+ */
275
+ runCommandSync(
276
+ command: string,
277
+ options?: {
278
+ cwd?: string;
279
+ env?: Record<string, string>;
280
+ timeout?: number;
281
+ }
282
+ ): CommandResult {
283
+ const cwd = options?.cwd ?? this.defaultCwd;
284
+ const timeout = options?.timeout ?? this.defaultTimeout;
285
+ const env = { ...process.env, ...this.defaultEnv, ...options?.env };
286
+
287
+ try {
288
+ const output = execSync(command, {
289
+ cwd,
290
+ env,
291
+ timeout,
292
+ encoding: 'utf-8',
293
+ stdio: ['pipe', 'pipe', 'pipe'],
294
+ });
295
+
296
+ return {
297
+ stdout: output,
298
+ stderr: '',
299
+ exitCode: 0,
300
+ timedOut: false,
301
+ };
302
+ } catch (error: any) {
303
+ const timedOut = error.killed && error.signal === 'SIGTERM';
304
+ return {
305
+ stdout: error.stdout?.toString() ?? '',
306
+ stderr: error.stderr?.toString() ?? error.message,
307
+ exitCode: error.status ?? 1,
308
+ timedOut,
309
+ };
310
+ }
311
+ }
312
+
313
+ /**
314
+ * Interpolate command with trajectory/task values
315
+ */
316
+ private interpolateCommand(
317
+ command: string,
318
+ trajectory: Trajectory,
319
+ task: Task
320
+ ): string {
321
+ return command
322
+ .replace(/\{\{taskId\}\}/g, task.id)
323
+ .replace(/\{\{domain\}\}/g, task.domain ?? '')
324
+ .replace(/\{\{trajectoryId\}\}/g, trajectory.id)
325
+ .replace(
326
+ /\{\{solution\}\}/g,
327
+ typeof trajectory.outcome.solution === 'string'
328
+ ? trajectory.outcome.solution
329
+ : ''
330
+ );
331
+ }
332
+
333
+ /**
334
+ * Extract default issues from output
335
+ */
336
+ private extractDefaultIssues(
337
+ output: string,
338
+ exitCode: number
339
+ ): VerificationIssue[] {
340
+ const issues: VerificationIssue[] = [];
341
+
342
+ if (exitCode !== 0) {
343
+ // Look for common error patterns
344
+ const errorMatch = output.match(/(?:error|Error|ERROR)[:\s]+(.+?)(?:\n|$)/);
345
+ const failMatch = output.match(/(?:fail|Fail|FAIL)[:\s]+(.+?)(?:\n|$)/);
346
+
347
+ if (errorMatch) {
348
+ issues.push({
349
+ type: 'error',
350
+ description: errorMatch[1].slice(0, 200),
351
+ severity: 'critical',
352
+ });
353
+ } else if (failMatch) {
354
+ issues.push({
355
+ type: 'incorrect',
356
+ description: failMatch[1].slice(0, 200),
357
+ severity: 'major',
358
+ });
359
+ } else {
360
+ issues.push({
361
+ type: 'error',
362
+ description: `Command failed with exit code ${exitCode}`,
363
+ severity: 'major',
364
+ });
365
+ }
366
+ }
367
+
368
+ return issues;
369
+ }
370
+
371
+ /**
372
+ * Create a composite verifier that runs multiple verifications
373
+ */
374
+ createCompositeVerifier(
375
+ configs: CommandVerificationConfig[]
376
+ ): VerificationFunction {
377
+ const verifiers = configs.map((config) => this.createVerifier(config));
378
+
379
+ return async (trajectory: Trajectory, task: Task): Promise<VerificationResult> => {
380
+ const results: VerificationResult[] = [];
381
+
382
+ for (const verifier of verifiers) {
383
+ const result = await verifier(trajectory, task);
384
+ results.push(result);
385
+
386
+ // Fail fast on critical failure
387
+ if (!result.passed && result.issues?.some((i) => i.severity === 'critical')) {
388
+ break;
389
+ }
390
+ }
391
+
392
+ // Aggregate results
393
+ const allPassed = results.every((r) => r.passed);
394
+ const avgConfidence =
395
+ results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
396
+ const allIssues = results.flatMap((r) => r.issues ?? []);
397
+ const details = results.map((r) => r.details).filter(Boolean).join('\n---\n');
398
+
399
+ return {
400
+ passed: allPassed,
401
+ confidence: avgConfidence,
402
+ issues: allIssues,
403
+ details,
404
+ };
405
+ };
406
+ }
407
+ }
408
+
409
+ /**
410
+ * Create a verification runner
411
+ */
412
+ export function createVerificationRunner(options?: {
413
+ cwd?: string;
414
+ timeout?: number;
415
+ env?: Record<string, string>;
416
+ }): VerificationRunner {
417
+ return new VerificationRunner(options);
418
+ }
419
+
420
+ // Issue extractors for common test runners
421
+
422
+ function extractVitestIssues(output: string, _exitCode: number): VerificationIssue[] {
423
+ const issues: VerificationIssue[] = [];
424
+
425
+ try {
426
+ // Try to parse JSON output
427
+ const jsonMatch = output.match(/\{[\s\S]*"testResults"[\s\S]*\}/);
428
+ if (jsonMatch) {
429
+ const data = JSON.parse(jsonMatch[0]);
430
+ for (const testResult of data.testResults ?? []) {
431
+ for (const assertion of testResult.assertionResults ?? []) {
432
+ if (assertion.status === 'failed') {
433
+ issues.push({
434
+ type: 'incorrect',
435
+ description: `${assertion.fullName}: ${assertion.failureMessages?.[0]?.slice(0, 200) ?? 'Failed'}`,
436
+ severity: 'major',
437
+ file: testResult.name,
438
+ });
439
+ }
440
+ }
441
+ }
442
+ }
443
+ } catch {
444
+ // Fall back to pattern matching
445
+ const failureMatches = output.matchAll(/FAIL\s+(.+?)\n.*?(\d+)\s+failed/g);
446
+ for (const match of failureMatches) {
447
+ issues.push({
448
+ type: 'incorrect',
449
+ description: `Test suite failed: ${match[1]}`,
450
+ severity: 'major',
451
+ file: match[1],
452
+ });
453
+ }
454
+ }
455
+
456
+ return issues;
457
+ }
458
+
459
+ function extractJestIssues(output: string, _exitCode: number): VerificationIssue[] {
460
+ const issues: VerificationIssue[] = [];
461
+
462
+ try {
463
+ const jsonMatch = output.match(/\{[\s\S]*"testResults"[\s\S]*\}/);
464
+ if (jsonMatch) {
465
+ const data = JSON.parse(jsonMatch[0]);
466
+ for (const testResult of data.testResults ?? []) {
467
+ for (const assertion of testResult.assertionResults ?? []) {
468
+ if (assertion.status === 'failed') {
469
+ issues.push({
470
+ type: 'incorrect',
471
+ description: `${assertion.fullName}: ${assertion.failureMessages?.[0]?.slice(0, 200) ?? 'Failed'}`,
472
+ severity: 'major',
473
+ file: testResult.name,
474
+ });
475
+ }
476
+ }
477
+ }
478
+ }
479
+ } catch {
480
+ const failureMatches = output.matchAll(/● (.+)/g);
481
+ for (const match of failureMatches) {
482
+ issues.push({
483
+ type: 'incorrect',
484
+ description: match[1].slice(0, 200),
485
+ severity: 'major',
486
+ });
487
+ }
488
+ }
489
+
490
+ return issues;
491
+ }
492
+
493
+ function extractPytestIssues(output: string, _exitCode: number): VerificationIssue[] {
494
+ const issues: VerificationIssue[] = [];
495
+
496
+ // Match pytest failure lines
497
+ const failureMatches = output.matchAll(/FAILED\s+(.+?)(?:::|$)/g);
498
+ for (const match of failureMatches) {
499
+ issues.push({
500
+ type: 'incorrect',
501
+ description: `Test failed: ${match[1]}`,
502
+ severity: 'major',
503
+ });
504
+ }
505
+
506
+ // Match pytest errors
507
+ const errorMatches = output.matchAll(/ERROR\s+(.+?)(?:\n|$)/g);
508
+ for (const match of errorMatches) {
509
+ issues.push({
510
+ type: 'error',
511
+ description: match[1].slice(0, 200),
512
+ severity: 'critical',
513
+ });
514
+ }
515
+
516
+ return issues;
517
+ }
518
+
519
+ function extractTypescriptIssues(output: string, _exitCode: number): VerificationIssue[] {
520
+ const issues: VerificationIssue[] = [];
521
+
522
+ // Match TypeScript error format: file(line,col): error TS####: message
523
+ const errorMatches = output.matchAll(/(.+?)\((\d+),\d+\):\s+error\s+TS\d+:\s+(.+?)(?:\n|$)/g);
524
+ for (const match of errorMatches) {
525
+ issues.push({
526
+ type: 'error',
527
+ description: match[3].slice(0, 200),
528
+ severity: 'critical',
529
+ file: match[1],
530
+ line: parseInt(match[2], 10),
531
+ });
532
+ }
533
+
534
+ return issues;
535
+ }
536
+
537
+ function extractEslintIssues(output: string, _exitCode: number): VerificationIssue[] {
538
+ const issues: VerificationIssue[] = [];
539
+
540
+ try {
541
+ const data = JSON.parse(output);
542
+ for (const file of data) {
543
+ for (const message of file.messages ?? []) {
544
+ const severity: 'critical' | 'major' | 'minor' =
545
+ message.severity === 2 ? 'major' : 'minor';
546
+
547
+ issues.push({
548
+ type: message.fatal ? 'error' : 'incorrect',
549
+ description: `${message.ruleId}: ${message.message}`,
550
+ severity,
551
+ file: file.filePath,
552
+ line: message.line,
553
+ });
554
+ }
555
+ }
556
+ } catch {
557
+ // ESLint output wasn't JSON
558
+ const errorMatches = output.matchAll(/(\d+):\d+\s+error\s+(.+?)\s+(\S+)/g);
559
+ for (const match of errorMatches) {
560
+ issues.push({
561
+ type: 'error',
562
+ description: `${match[3]}: ${match[2]}`,
563
+ severity: 'major',
564
+ line: parseInt(match[1], 10),
565
+ });
566
+ }
567
+ }
568
+
569
+ return issues;
570
+ }
@@ -0,0 +1,6 @@
1
+ export {
2
+ SkillLibrary,
3
+ createSkillLibrary,
4
+ type AgentSkills,
5
+ type TierRefreshResult,
6
+ } from './skill-library.js';