@monoes/monomindcli 1.11.11 → 1.11.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. package/.claude/commands/mastermind/idea.md +1 -1
  2. package/.claude/commands/mastermind/master.md +1 -1
  3. package/.claude/skills/mastermind/_protocol.md +4 -4
  4. package/.claude/skills/mastermind/autodev.md +1 -1
  5. package/.claude/skills/mastermind/build.md +3 -3
  6. package/.claude/skills/mastermind/content.md +3 -3
  7. package/.claude/skills/mastermind/createorg.md +2 -2
  8. package/.claude/skills/mastermind/finance.md +3 -3
  9. package/.claude/skills/mastermind/marketing.md +3 -3
  10. package/.claude/skills/mastermind/ops.md +3 -3
  11. package/.claude/skills/mastermind/release.md +3 -3
  12. package/.claude/skills/mastermind/research.md +3 -3
  13. package/.claude/skills/mastermind/review.md +3 -3
  14. package/.claude/skills/mastermind/sales.md +3 -3
  15. package/dist/src/init/statusline-generator.js +3 -3
  16. package/dist/src/observability/replay-reader.d.ts +1 -1
  17. package/dist/src/observability/replay-reader.d.ts.map +1 -1
  18. package/dist/src/update/checker.d.ts.map +1 -1
  19. package/dist/src/update/checker.js +24 -7
  20. package/dist/src/update/checker.js.map +1 -1
  21. package/dist/src/update/index.d.ts.map +1 -1
  22. package/dist/src/update/index.js +3 -6
  23. package/dist/src/update/index.js.map +1 -1
  24. package/dist/tsconfig.tsbuildinfo +1 -1
  25. package/package.json +1 -1
  26. package/dist/src/agents/halt-signal.d.ts +0 -25
  27. package/dist/src/agents/halt-signal.d.ts.map +0 -1
  28. package/dist/src/agents/halt-signal.js +0 -76
  29. package/dist/src/agents/halt-signal.js.map +0 -1
  30. package/dist/src/agents/index.d.ts +0 -18
  31. package/dist/src/agents/index.d.ts.map +0 -1
  32. package/dist/src/agents/index.js +0 -13
  33. package/dist/src/agents/index.js.map +0 -1
  34. package/dist/src/agents/managed-agent.d.ts +0 -41
  35. package/dist/src/agents/managed-agent.d.ts.map +0 -1
  36. package/dist/src/agents/managed-agent.js +0 -69
  37. package/dist/src/agents/managed-agent.js.map +0 -1
  38. package/dist/src/agents/prompt-experiment.d.ts +0 -23
  39. package/dist/src/agents/prompt-experiment.d.ts.map +0 -1
  40. package/dist/src/agents/prompt-experiment.js +0 -49
  41. package/dist/src/agents/prompt-experiment.js.map +0 -1
  42. package/dist/src/agents/prompt-version-manager.d.ts +0 -22
  43. package/dist/src/agents/prompt-version-manager.d.ts.map +0 -1
  44. package/dist/src/agents/prompt-version-manager.js +0 -80
  45. package/dist/src/agents/prompt-version-manager.js.map +0 -1
  46. package/dist/src/agents/registry-query.d.ts +0 -71
  47. package/dist/src/agents/registry-query.d.ts.map +0 -1
  48. package/dist/src/agents/registry-query.js +0 -125
  49. package/dist/src/agents/registry-query.js.map +0 -1
  50. package/dist/src/agents/score-decay.d.ts +0 -19
  51. package/dist/src/agents/score-decay.d.ts.map +0 -1
  52. package/dist/src/agents/score-decay.js +0 -22
  53. package/dist/src/agents/score-decay.js.map +0 -1
  54. package/dist/src/agents/shared-instructions-loader.d.ts +0 -13
  55. package/dist/src/agents/shared-instructions-loader.d.ts.map +0 -1
  56. package/dist/src/agents/shared-instructions-loader.js +0 -40
  57. package/dist/src/agents/shared-instructions-loader.js.map +0 -1
  58. package/dist/src/agents/specialization-scorer.d.ts +0 -54
  59. package/dist/src/agents/specialization-scorer.d.ts.map +0 -1
  60. package/dist/src/agents/specialization-scorer.js +0 -212
  61. package/dist/src/agents/specialization-scorer.js.map +0 -1
  62. package/dist/src/agents/termination-watcher.d.ts +0 -30
  63. package/dist/src/agents/termination-watcher.d.ts.map +0 -1
  64. package/dist/src/agents/termination-watcher.js +0 -84
  65. package/dist/src/agents/termination-watcher.js.map +0 -1
  66. package/dist/src/agents/trigger-index.d.ts +0 -20
  67. package/dist/src/agents/trigger-index.d.ts.map +0 -1
  68. package/dist/src/agents/trigger-index.js +0 -38
  69. package/dist/src/agents/trigger-index.js.map +0 -1
  70. package/dist/src/agents/trigger-scanner.d.ts +0 -64
  71. package/dist/src/agents/trigger-scanner.d.ts.map +0 -1
  72. package/dist/src/agents/trigger-scanner.js +0 -308
  73. package/dist/src/agents/trigger-scanner.js.map +0 -1
  74. package/dist/src/agents/version-diff.d.ts +0 -18
  75. package/dist/src/agents/version-diff.d.ts.map +0 -1
  76. package/dist/src/agents/version-diff.js +0 -64
  77. package/dist/src/agents/version-diff.js.map +0 -1
  78. package/dist/src/agents/version-store.d.ts +0 -60
  79. package/dist/src/agents/version-store.d.ts.map +0 -1
  80. package/dist/src/agents/version-store.js +0 -235
  81. package/dist/src/agents/version-store.js.map +0 -1
  82. package/dist/src/benchmarks/pretrain/index.d.ts +0 -45
  83. package/dist/src/benchmarks/pretrain/index.d.ts.map +0 -1
  84. package/dist/src/benchmarks/pretrain/index.js +0 -404
  85. package/dist/src/benchmarks/pretrain/index.js.map +0 -1
  86. package/dist/src/commands/agent-wasm.d.ts +0 -14
  87. package/dist/src/commands/agent-wasm.d.ts.map +0 -1
  88. package/dist/src/commands/agent-wasm.js +0 -333
  89. package/dist/src/commands/agent-wasm.js.map +0 -1
  90. package/dist/src/commands/embeddings.d.ts.map +0 -1
  91. package/dist/src/commands/embeddings.js.map +0 -1
  92. package/dist/src/commands/ui.js +0 -68
  93. package/dist/src/consensus/index.d.ts +0 -7
  94. package/dist/src/consensus/index.d.ts.map +0 -1
  95. package/dist/src/consensus/index.js +0 -6
  96. package/dist/src/consensus/index.js.map +0 -1
  97. package/dist/src/context/context-provider.d.ts +0 -44
  98. package/dist/src/context/context-provider.d.ts.map +0 -1
  99. package/dist/src/context/context-provider.js +0 -25
  100. package/dist/src/context/context-provider.js.map +0 -1
  101. package/dist/src/context/git-state-provider.d.ts +0 -12
  102. package/dist/src/context/git-state-provider.d.ts.map +0 -1
  103. package/dist/src/context/git-state-provider.js +0 -34
  104. package/dist/src/context/git-state-provider.js.map +0 -1
  105. package/dist/src/context/index.d.ts +0 -12
  106. package/dist/src/context/index.d.ts.map +0 -1
  107. package/dist/src/context/index.js +0 -12
  108. package/dist/src/context/index.js.map +0 -1
  109. package/dist/src/context/project-conventions-provider.d.ts +0 -15
  110. package/dist/src/context/project-conventions-provider.d.ts.map +0 -1
  111. package/dist/src/context/project-conventions-provider.js +0 -19
  112. package/dist/src/context/project-conventions-provider.js.map +0 -1
  113. package/dist/src/context/prompt-assembler.d.ts +0 -26
  114. package/dist/src/context/prompt-assembler.d.ts.map +0 -1
  115. package/dist/src/context/prompt-assembler.js +0 -93
  116. package/dist/src/context/prompt-assembler.js.map +0 -1
  117. package/dist/src/context/task-history-provider.d.ts +0 -24
  118. package/dist/src/context/task-history-provider.d.ts.map +0 -1
  119. package/dist/src/context/task-history-provider.js +0 -32
  120. package/dist/src/context/task-history-provider.js.map +0 -1
  121. package/dist/src/context/user-preferences-provider.d.ts +0 -14
  122. package/dist/src/context/user-preferences-provider.d.ts.map +0 -1
  123. package/dist/src/context/user-preferences-provider.js +0 -27
  124. package/dist/src/context/user-preferences-provider.js.map +0 -1
  125. package/dist/src/dlq/dlq-reader.d.ts +0 -31
  126. package/dist/src/dlq/dlq-reader.d.ts.map +0 -1
  127. package/dist/src/dlq/dlq-reader.js +0 -81
  128. package/dist/src/dlq/dlq-reader.js.map +0 -1
  129. package/dist/src/dlq/dlq-writer.d.ts +0 -24
  130. package/dist/src/dlq/dlq-writer.d.ts.map +0 -1
  131. package/dist/src/dlq/dlq-writer.js +0 -65
  132. package/dist/src/dlq/dlq-writer.js.map +0 -1
  133. package/dist/src/dlq/index.d.ts +0 -10
  134. package/dist/src/dlq/index.d.ts.map +0 -1
  135. package/dist/src/dlq/index.js +0 -7
  136. package/dist/src/dlq/index.js.map +0 -1
  137. package/dist/src/eval/dataset-manager.d.ts +0 -33
  138. package/dist/src/eval/dataset-manager.d.ts.map +0 -1
  139. package/dist/src/eval/dataset-manager.js +0 -107
  140. package/dist/src/eval/dataset-manager.js.map +0 -1
  141. package/dist/src/eval/dataset-runner.d.ts +0 -23
  142. package/dist/src/eval/dataset-runner.d.ts.map +0 -1
  143. package/dist/src/eval/dataset-runner.js +0 -59
  144. package/dist/src/eval/dataset-runner.js.map +0 -1
  145. package/dist/src/eval/index.d.ts +0 -10
  146. package/dist/src/eval/index.d.ts.map +0 -1
  147. package/dist/src/eval/index.js +0 -7
  148. package/dist/src/eval/index.js.map +0 -1
  149. package/dist/src/eval/trace-collector.d.ts +0 -40
  150. package/dist/src/eval/trace-collector.d.ts.map +0 -1
  151. package/dist/src/eval/trace-collector.js +0 -102
  152. package/dist/src/eval/trace-collector.js.map +0 -1
  153. package/dist/src/graph/enrich.mjs +0 -362
  154. package/dist/src/infrastructure/in-memory-repositories.d.ts +0 -68
  155. package/dist/src/infrastructure/in-memory-repositories.d.ts.map +0 -1
  156. package/dist/src/infrastructure/in-memory-repositories.js +0 -264
  157. package/dist/src/infrastructure/in-memory-repositories.js.map +0 -1
  158. package/dist/src/interactive/interrupt.d.ts +0 -22
  159. package/dist/src/interactive/interrupt.d.ts.map +0 -1
  160. package/dist/src/interactive/interrupt.js +0 -71
  161. package/dist/src/interactive/interrupt.js.map +0 -1
  162. package/dist/src/mcp/deprecation-injector.d.ts +0 -25
  163. package/dist/src/mcp/deprecation-injector.d.ts.map +0 -1
  164. package/dist/src/mcp/deprecation-injector.js +0 -48
  165. package/dist/src/mcp/deprecation-injector.js.map +0 -1
  166. package/dist/src/mcp/tool-registry.d.ts +0 -61
  167. package/dist/src/mcp/tool-registry.d.ts.map +0 -1
  168. package/dist/src/mcp/tool-registry.js +0 -246
  169. package/dist/src/mcp/tool-registry.js.map +0 -1
  170. package/dist/src/mcp-tools/wasm-agent-tools.d.ts +0 -9
  171. package/dist/src/mcp-tools/wasm-agent-tools.d.ts.map +0 -1
  172. package/dist/src/mcp-tools/wasm-agent-tools.js +0 -230
  173. package/dist/src/mcp-tools/wasm-agent-tools.js.map +0 -1
  174. package/dist/src/model/complexity-scorer.d.ts +0 -21
  175. package/dist/src/model/complexity-scorer.d.ts.map +0 -1
  176. package/dist/src/model/complexity-scorer.js +0 -106
  177. package/dist/src/model/complexity-scorer.js.map +0 -1
  178. package/dist/src/model/index.d.ts +0 -4
  179. package/dist/src/model/index.d.ts.map +0 -1
  180. package/dist/src/model/index.js +0 -4
  181. package/dist/src/model/index.js.map +0 -1
  182. package/dist/src/model/model-settings.d.ts +0 -22
  183. package/dist/src/model/model-settings.d.ts.map +0 -1
  184. package/dist/src/model/model-settings.js +0 -33
  185. package/dist/src/model/model-settings.js.map +0 -1
  186. package/dist/src/model/model-tier-resolver.d.ts +0 -24
  187. package/dist/src/model/model-tier-resolver.d.ts.map +0 -1
  188. package/dist/src/model/model-tier-resolver.js +0 -65
  189. package/dist/src/model/model-tier-resolver.js.map +0 -1
  190. package/dist/src/monovector/capabilities.d.ts +0 -34
  191. package/dist/src/monovector/capabilities.d.ts.map +0 -1
  192. package/dist/src/monovector/capabilities.js +0 -37
  193. package/dist/src/monovector/capabilities.js.map +0 -1
  194. package/dist/src/orchestration/index.d.ts +0 -7
  195. package/dist/src/orchestration/index.d.ts.map +0 -1
  196. package/dist/src/orchestration/index.js +0 -6
  197. package/dist/src/orchestration/index.js.map +0 -1
  198. package/dist/src/orchestration/mode-dispatcher.d.ts +0 -11
  199. package/dist/src/orchestration/mode-dispatcher.d.ts.map +0 -1
  200. package/dist/src/orchestration/mode-dispatcher.js +0 -31
  201. package/dist/src/orchestration/mode-dispatcher.js.map +0 -1
  202. package/dist/src/orchestration/routing-modes.d.ts +0 -68
  203. package/dist/src/orchestration/routing-modes.d.ts.map +0 -1
  204. package/dist/src/orchestration/routing-modes.js +0 -180
  205. package/dist/src/orchestration/routing-modes.js.map +0 -1
  206. package/dist/src/plugins/tests/demo-plugin-store.d.ts +0 -7
  207. package/dist/src/plugins/tests/demo-plugin-store.d.ts.map +0 -1
  208. package/dist/src/plugins/tests/demo-plugin-store.js +0 -126
  209. package/dist/src/plugins/tests/demo-plugin-store.js.map +0 -1
  210. package/dist/src/plugins/tests/standalone-test.d.ts +0 -12
  211. package/dist/src/plugins/tests/standalone-test.d.ts.map +0 -1
  212. package/dist/src/plugins/tests/standalone-test.js +0 -188
  213. package/dist/src/plugins/tests/standalone-test.js.map +0 -1
  214. package/dist/src/plugins/tests/test-plugin-store.d.ts +0 -7
  215. package/dist/src/plugins/tests/test-plugin-store.d.ts.map +0 -1
  216. package/dist/src/plugins/tests/test-plugin-store.js +0 -206
  217. package/dist/src/plugins/tests/test-plugin-store.js.map +0 -1
  218. package/dist/src/runtime/headless.d.ts +0 -60
  219. package/dist/src/runtime/headless.d.ts.map +0 -1
  220. package/dist/src/runtime/headless.js +0 -284
  221. package/dist/src/runtime/headless.js.map +0 -1
  222. package/dist/src/services/agentic-flow-bridge.d.ts +0 -50
  223. package/dist/src/services/agentic-flow-bridge.d.ts.map +0 -1
  224. package/dist/src/services/agentic-flow-bridge.js +0 -95
  225. package/dist/src/services/agentic-flow-bridge.js.map +0 -1
  226. package/dist/src/services/container-worker-pool.d.ts +0 -197
  227. package/dist/src/services/container-worker-pool.d.ts.map +0 -1
  228. package/dist/src/services/container-worker-pool.js +0 -623
  229. package/dist/src/services/container-worker-pool.js.map +0 -1
  230. package/dist/src/services/index.d.ts +0 -13
  231. package/dist/src/services/index.d.ts.map +0 -1
  232. package/dist/src/services/index.js +0 -11
  233. package/dist/src/services/index.js.map +0 -1
  234. package/dist/src/services/worker-queue.d.ts +0 -201
  235. package/dist/src/services/worker-queue.d.ts.map +0 -1
  236. package/dist/src/services/worker-queue.js +0 -594
  237. package/dist/src/services/worker-queue.js.map +0 -1
  238. package/dist/src/swarm/communication-graph.d.ts +0 -25
  239. package/dist/src/swarm/communication-graph.d.ts.map +0 -1
  240. package/dist/src/swarm/communication-graph.js +0 -77
  241. package/dist/src/swarm/communication-graph.js.map +0 -1
  242. package/dist/src/swarm/flow-enforcer.d.ts +0 -31
  243. package/dist/src/swarm/flow-enforcer.d.ts.map +0 -1
  244. package/dist/src/swarm/flow-enforcer.js +0 -61
  245. package/dist/src/swarm/flow-enforcer.js.map +0 -1
  246. package/dist/src/swarm/flow-visualizer.d.ts +0 -19
  247. package/dist/src/swarm/flow-visualizer.d.ts.map +0 -1
  248. package/dist/src/swarm/flow-visualizer.js +0 -68
  249. package/dist/src/swarm/flow-visualizer.js.map +0 -1
  250. package/dist/src/transfer/deploy-seraphine.d.ts +0 -13
  251. package/dist/src/transfer/deploy-seraphine.d.ts.map +0 -1
  252. package/dist/src/transfer/deploy-seraphine.js +0 -205
  253. package/dist/src/transfer/deploy-seraphine.js.map +0 -1
  254. package/dist/src/transfer/store/tests/standalone-test.d.ts +0 -12
  255. package/dist/src/transfer/store/tests/standalone-test.d.ts.map +0 -1
  256. package/dist/src/transfer/store/tests/standalone-test.js +0 -190
  257. package/dist/src/transfer/store/tests/standalone-test.js.map +0 -1
  258. package/dist/src/transfer/test-seraphine.d.ts +0 -6
  259. package/dist/src/transfer/test-seraphine.d.ts.map +0 -1
  260. package/dist/src/transfer/test-seraphine.js +0 -105
  261. package/dist/src/transfer/test-seraphine.js.map +0 -1
  262. package/dist/src/transfer/tests/test-store.d.ts +0 -7
  263. package/dist/src/transfer/tests/test-store.d.ts.map +0 -1
  264. package/dist/src/transfer/tests/test-store.js +0 -214
  265. package/dist/src/transfer/tests/test-store.js.map +0 -1
  266. package/dist/src/ui/.monomind/data/pending-insights.jsonl +0 -0
  267. package/dist/src/ui/.monomind/data/ranked-context.json +0 -5
  268. package/dist/src/ui/.monomind/loops/mastermind-review-1778664132789.json +0 -16
  269. package/dist/src/ui/.monomind/sessions/current.json +0 -13
  270. package/dist/src/ui/.monomind/sessions/session-1776778451399.json +0 -15
  271. package/dist/src/ui/collector.mjs +0 -646
  272. package/dist/src/ui/dashboard.html +0 -9694
  273. package/dist/src/ui/data/mastermind-events.jsonl +0 -59
  274. package/dist/src/ui/data/mastermind-sessions.json +0 -1
  275. package/dist/src/ui/orgs.html +0 -1360
  276. package/dist/src/ui/server.mjs +0 -4334
  277. package/dist/src/workflow/condition-evaluator.d.ts +0 -10
  278. package/dist/src/workflow/condition-evaluator.d.ts.map +0 -1
  279. package/dist/src/workflow/condition-evaluator.js +0 -82
  280. package/dist/src/workflow/condition-evaluator.js.map +0 -1
  281. package/dist/src/workflow/context-resolver.d.ts +0 -12
  282. package/dist/src/workflow/context-resolver.d.ts.map +0 -1
  283. package/dist/src/workflow/context-resolver.js +0 -23
  284. package/dist/src/workflow/context-resolver.js.map +0 -1
  285. package/dist/src/workflow/dag-builder.d.ts +0 -17
  286. package/dist/src/workflow/dag-builder.d.ts.map +0 -1
  287. package/dist/src/workflow/dag-builder.js +0 -129
  288. package/dist/src/workflow/dag-builder.js.map +0 -1
  289. package/dist/src/workflow/dag-executor.d.ts +0 -9
  290. package/dist/src/workflow/dag-executor.d.ts.map +0 -1
  291. package/dist/src/workflow/dag-executor.js +0 -116
  292. package/dist/src/workflow/dag-executor.js.map +0 -1
  293. package/dist/src/workflow/dag-types.d.ts +0 -41
  294. package/dist/src/workflow/dag-types.d.ts.map +0 -1
  295. package/dist/src/workflow/dag-types.js +0 -8
  296. package/dist/src/workflow/dag-types.js.map +0 -1
  297. package/dist/src/workflow/dsl-parser.d.ts +0 -12
  298. package/dist/src/workflow/dsl-parser.d.ts.map +0 -1
  299. package/dist/src/workflow/dsl-parser.js +0 -20
  300. package/dist/src/workflow/dsl-parser.js.map +0 -1
  301. package/dist/src/workflow/dsl-schema.d.ts +0 -165
  302. package/dist/src/workflow/dsl-schema.d.ts.map +0 -1
  303. package/dist/src/workflow/dsl-schema.js +0 -82
  304. package/dist/src/workflow/dsl-schema.js.map +0 -1
  305. package/dist/src/workflow/index.d.ts +0 -13
  306. package/dist/src/workflow/index.d.ts.map +0 -1
  307. package/dist/src/workflow/index.js +0 -11
  308. package/dist/src/workflow/index.js.map +0 -1
  309. package/dist/src/workflow/template-engine.d.ts +0 -11
  310. package/dist/src/workflow/template-engine.d.ts.map +0 -1
  311. package/dist/src/workflow/template-engine.js +0 -40
  312. package/dist/src/workflow/template-engine.js.map +0 -1
  313. package/dist/src/workflow/workflow-executor.d.ts +0 -29
  314. package/dist/src/workflow/workflow-executor.d.ts.map +0 -1
  315. package/dist/src/workflow/workflow-executor.js +0 -227
  316. package/dist/src/workflow/workflow-executor.js.map +0 -1
@@ -1,65 +0,0 @@
1
- /**
2
- * DLQ Writer (Task 37)
3
- *
4
- * JSONL append-only storage for dead-letter queue entries.
5
- */
6
- import { randomUUID } from 'crypto';
7
- import { existsSync, mkdirSync, appendFileSync } from 'fs';
8
- import { join, resolve, sep } from 'path';
9
- export class DLQWriter {
10
- filePath;
11
- constructor(dataDir) {
12
- const resolvedDataDir = resolve(dataDir);
13
- const allowedRoot = resolve(process.env.MONOMIND_DATA_DIR ?? process.cwd());
14
- if (resolvedDataDir !== allowedRoot && !resolvedDataDir.startsWith(allowedRoot + sep)) {
15
- throw new Error(`DLQ dataDir escapes allowed root: ${resolvedDataDir}`);
16
- }
17
- if (!existsSync(resolvedDataDir)) {
18
- mkdirSync(resolvedDataDir, { recursive: true });
19
- }
20
- this.filePath = join(resolvedDataDir, 'dead-letter-queue.jsonl');
21
- }
22
- /** Enqueue a failed message into the DLQ */
23
- enqueue(input) {
24
- const lastAttempt = input.deliveryAttempts[input.deliveryAttempts.length - 1];
25
- const firstAttempt = input.deliveryAttempts[0];
26
- const entry = {
27
- messageId: randomUUID(),
28
- toolName: input.toolName,
29
- originalPayload: input.originalPayload,
30
- deliveryAttempts: input.deliveryAttempts,
31
- finalError: lastAttempt?.errorMessage ?? 'unknown',
32
- finalErrorType: lastAttempt?.errorType ?? 'unknown',
33
- agentId: input.agentId,
34
- swarmId: input.swarmId,
35
- createdAt: firstAttempt?.attemptedAt ?? new Date().toISOString(),
36
- archivedAt: new Date().toISOString(),
37
- status: 'pending',
38
- tags: input.tags ?? [],
39
- };
40
- // JSON.stringify can throw on circular references, BigInt, and non-serializable
41
- // values. originalPayload is `unknown` (caller-controlled), so a malicious or
42
- // malformed input could otherwise crash the writer mid-flight. Fall back to
43
- // a sanitized record so the audit trail is preserved.
44
- let serialized;
45
- try {
46
- serialized = JSON.stringify(entry);
47
- }
48
- catch {
49
- serialized = JSON.stringify({
50
- messageId: entry.messageId,
51
- toolName: entry.toolName,
52
- archivedAt: entry.archivedAt,
53
- status: 'pending',
54
- finalError: 'serialize_failed',
55
- });
56
- }
57
- appendFileSync(this.filePath, serialized + '\n', 'utf-8');
58
- return entry;
59
- }
60
- /** Get the file path (for reader/replayer) */
61
- getFilePath() {
62
- return this.filePath;
63
- }
64
- }
65
- //# sourceMappingURL=dlq-writer.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"dlq-writer.js","sourceRoot":"","sources":["../../../src/dlq/dlq-writer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAW,OAAO,EAAE,GAAG,EAAE,MAAM,MAAM,CAAC;AAanD,MAAM,OAAO,SAAS;IACH,QAAQ,CAAS;IAElC,YAAY,OAAe;QACzB,MAAM,eAAe,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;QACzC,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;QAC5E,IAAI,eAAe,KAAK,WAAW,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,WAAW,GAAG,GAAG,CAAC,EAAE,CAAC;YACtF,MAAM,IAAI,KAAK,CAAC,qCAAqC,eAAe,EAAE,CAAC,CAAC;QAC1E,CAAC;QACD,IAAI,CAAC,UAAU,CAAC,eAAe,CAAC,EAAE,CAAC;YACjC,SAAS,CAAC,eAAe,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,yBAAyB,CAAC,CAAC;IACnE,CAAC;IAED,4CAA4C;IAC5C,OAAO,CAAC,KAAmB;QACzB,MAAM,WAAW,GAAG,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC9E,MAAM,YAAY,GAAG,KAAK,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC;QAE/C,MAAM,KAAK,GAAa;YACtB,SAAS,EAAE,UAAU,EAAE;YACvB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,gBAAgB,EAAE,KAAK,CAAC,gBAAgB;YACxC,UAAU,EAAE,WAAW,EAAE,YAAY,IAAI,SAAS;YAClD,cAAc,EAAE,WAAW,EAAE,SAAS,IAAI,SAAS;YACnD,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,YAAY,EAAE,WAAW,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAChE,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACpC,MAAM,EAAE,SAAS;YACjB,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,EAAE;SACvB,CAAC;QAEF,gFAAgF;QAChF,8EAA8E;QAC9E,4EAA4E;QAC5E,sDAAsD;QACtD,IAAI,UAAkB,CAAC;QACvB,IAAI,CAAC;YACH,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC;YACP,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC;gBAC1B,SAAS,EAAE,KAAK,CAAC,SAAS;gBAC1B,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,MAAM,EAAE,SAAS;gBACjB,UAAU,EAAE,kBAAkB;aAC/B,CAAC,CAAC;QACL,CAAC;QACD,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAC1D,OAAO,KAAK,CAAC;IACf,CAAC;IAED,8CAA8C;IAC9C,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;CACF"}
@@ -1,10 +0,0 @@
1
- /**
2
- * DLQ Module Barrel (Task 37)
3
- */
4
- export { DLQWriter } from './dlq-writer.js';
5
- export type { EnqueueInput } from './dlq-writer.js';
6
- export { DLQReader } from './dlq-reader.js';
7
- export type { DLQListOptions } from './dlq-reader.js';
8
- export { DLQReplayer } from './dlq-replayer.js';
9
- export type { ToolCaller } from './dlq-replayer.js';
10
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/dlq/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,YAAY,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AACpD,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,YAAY,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAChD,YAAY,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * DLQ Module Barrel (Task 37)
3
- */
4
- export { DLQWriter } from './dlq-writer.js';
5
- export { DLQReader } from './dlq-reader.js';
6
- export { DLQReplayer } from './dlq-replayer.js';
7
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/dlq/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC"}
@@ -1,33 +0,0 @@
1
- import type { EvalDataset, EvalDatasetEntry, EvalTrace } from '../../../shared/src/types/eval.js';
2
- export interface CreateFromTracesOpts {
3
- name: string;
4
- description: string;
5
- traces: EvalTrace[];
6
- agentSlugs?: string[];
7
- }
8
- export declare class DatasetManager {
9
- private datasetsPath;
10
- private entriesPath;
11
- constructor(datasetsPath: string, entriesPath: string);
12
- /**
13
- * Create a dataset from a set of filtered traces.
14
- */
15
- createFromTraces(opts: CreateFromTracesOpts): EvalDataset;
16
- /**
17
- * List all datasets.
18
- */
19
- listDatasets(): EvalDataset[];
20
- /**
21
- * Get entries for a specific dataset.
22
- */
23
- getEntries(datasetId: string): EvalDatasetEntry[];
24
- /**
25
- * Add a single trace to an existing dataset.
26
- */
27
- addTraceToDataset(datasetId: string, traceId: string): EvalDatasetEntry;
28
- /**
29
- * Export a dataset to a JSON file. Output path must be within `allowedRoot`.
30
- */
31
- exportToFile(datasetId: string, outputPath: string, allowedRoot?: string): void;
32
- }
33
- //# sourceMappingURL=dataset-manager.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"dataset-manager.d.ts","sourceRoot":"","sources":["../../../src/eval/dataset-manager.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,WAAW,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,mCAAmC,CAAC;AAGlG,MAAM,WAAW,oBAAoB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,WAAW,CAAS;gBAEhB,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM;IAKrD;;OAEG;IACH,gBAAgB,CAAC,IAAI,EAAE,oBAAoB,GAAG,WAAW;IA4BzD;;OAEG;IACH,YAAY,IAAI,WAAW,EAAE;IAS7B;;OAEG;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,gBAAgB,EAAE;IASjD;;OAEG;IACH,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,gBAAgB;IAwBvE;;OAEG;IACH,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI;CAahF"}
@@ -1,107 +0,0 @@
1
- /**
2
- * DatasetManager - JSONL-based eval dataset management (Task 33)
3
- */
4
- import { randomUUID } from 'crypto';
5
- import { appendFileSync, readFileSync, writeFileSync, renameSync, existsSync, statSync } from 'fs';
6
- import { resolve, sep } from 'path';
7
- import { parseJsonl } from '../utils/parse-jsonl.js';
8
- export class DatasetManager {
9
- datasetsPath;
10
- entriesPath;
11
- constructor(datasetsPath, entriesPath) {
12
- this.datasetsPath = datasetsPath;
13
- this.entriesPath = entriesPath;
14
- }
15
- /**
16
- * Create a dataset from a set of filtered traces.
17
- */
18
- createFromTraces(opts) {
19
- const now = new Date().toISOString();
20
- const agentSlugs = opts.agentSlugs ?? [...new Set(opts.traces.map((t) => t.agentSlug))];
21
- const dataset = {
22
- datasetId: randomUUID(),
23
- name: opts.name,
24
- description: opts.description,
25
- agentSlugs,
26
- createdAt: now,
27
- updatedAt: now,
28
- entryCount: opts.traces.length,
29
- };
30
- appendFileSync(this.datasetsPath, JSON.stringify(dataset) + '\n', 'utf-8');
31
- for (const trace of opts.traces) {
32
- const entry = {
33
- entryId: randomUUID(),
34
- datasetId: dataset.datasetId,
35
- traceId: trace.traceId,
36
- addedAt: now,
37
- };
38
- appendFileSync(this.entriesPath, JSON.stringify(entry) + '\n', 'utf-8');
39
- }
40
- return dataset;
41
- }
42
- /**
43
- * List all datasets.
44
- */
45
- listDatasets() {
46
- if (!existsSync(this.datasetsPath))
47
- return [];
48
- if (statSync(this.datasetsPath).size > 50 * 1024 * 1024) {
49
- throw new Error('Dataset file exceeds 50MB — run cleanup');
50
- }
51
- const content = readFileSync(this.datasetsPath, 'utf-8');
52
- return parseJsonl(content);
53
- }
54
- /**
55
- * Get entries for a specific dataset.
56
- */
57
- getEntries(datasetId) {
58
- if (!existsSync(this.entriesPath))
59
- return [];
60
- if (statSync(this.entriesPath).size > 50 * 1024 * 1024) {
61
- throw new Error('Entries file exceeds 50MB — run cleanup');
62
- }
63
- const content = readFileSync(this.entriesPath, 'utf-8');
64
- return parseJsonl(content).filter((e) => e.datasetId === datasetId);
65
- }
66
- /**
67
- * Add a single trace to an existing dataset.
68
- */
69
- addTraceToDataset(datasetId, traceId) {
70
- const entry = {
71
- entryId: randomUUID(),
72
- datasetId,
73
- traceId,
74
- addedAt: new Date().toISOString(),
75
- };
76
- appendFileSync(this.entriesPath, JSON.stringify(entry) + '\n', 'utf-8');
77
- // Update dataset entryCount via atomic write
78
- const datasets = this.listDatasets();
79
- const updated = datasets.map((d) => {
80
- if (d.datasetId === datasetId) {
81
- return { ...d, entryCount: d.entryCount + 1, updatedAt: new Date().toISOString() };
82
- }
83
- return d;
84
- });
85
- const tmp = `${this.datasetsPath}.${randomUUID()}.tmp`;
86
- writeFileSync(tmp, updated.map((d) => JSON.stringify(d)).join('\n') + '\n', 'utf-8');
87
- renameSync(tmp, this.datasetsPath);
88
- return entry;
89
- }
90
- /**
91
- * Export a dataset to a JSON file. Output path must be within `allowedRoot`.
92
- */
93
- exportToFile(datasetId, outputPath, allowedRoot) {
94
- if (allowedRoot) {
95
- const resolvedOut = resolve(outputPath);
96
- const resolvedRoot = resolve(allowedRoot);
97
- if (!resolvedOut.startsWith(resolvedRoot + sep) && resolvedOut !== resolvedRoot) {
98
- throw new Error(`Export path escapes allowed root: ${resolvedOut}`);
99
- }
100
- }
101
- const datasets = this.listDatasets();
102
- const dataset = datasets.find((d) => d.datasetId === datasetId);
103
- const entries = this.getEntries(datasetId);
104
- writeFileSync(outputPath, JSON.stringify({ dataset, entries }, null, 2), 'utf-8');
105
- }
106
- }
107
- //# sourceMappingURL=dataset-manager.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"dataset-manager.js","sourceRoot":"","sources":["../../../src/eval/dataset-manager.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AACnG,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,MAAM,CAAC;AAEpC,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AASrD,MAAM,OAAO,cAAc;IACjB,YAAY,CAAS;IACrB,WAAW,CAAS;IAE5B,YAAY,YAAoB,EAAE,WAAmB;QACnD,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,IAA0B;QACzC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;QACxF,MAAM,OAAO,GAAgB;YAC3B,SAAS,EAAE,UAAU,EAAE;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,UAAU;YACV,SAAS,EAAE,GAAG;YACd,SAAS,EAAE,GAAG;YACd,UAAU,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;SAC/B,CAAC;QAEF,cAAc,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAE3E,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,KAAK,GAAqB;gBAC9B,OAAO,EAAE,UAAU,EAAE;gBACrB,SAAS,EAAE,OAAO,CAAC,SAAS;gBAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,OAAO,EAAE,GAAG;aACb,CAAC;YACF,cAAc,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAC1E,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,YAAY;QACV,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC;YAAE,OAAO,EAAE,CAAC;QAC9C,IAAI,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,IAAI,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACxD,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QACD,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;QACzD,OAAO,UAAU,CAAc,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,SAAiB;QAC1B,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,EAAE,CAAC;QAC7C,IAAI,QAAQ,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QACD,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACxD,OAAO,UAAU,CAAmB,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;IACxF,CAAC;IAED;;OAEG;IACH,iBAAiB,CAAC,SAAiB,EAAE,OAAe;QAClD,MAAM,KAAK,GAAqB;YAC9B,OAAO,EAAE,UAAU,EAAE;YACrB,SAAS;YACT,OAAO;YACP,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SAClC,CAAC;QACF,cAAc,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAExE,6CAA6C;QAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACjC,IAAI,CAAC,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;gBAC9B,OAAO,EAAE,GAAG,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,GAAG,CAAC,EAAE,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC;YACrF,CAAC;YACD,OAAO,CAAC,CAAC;QACX,CAAC,CAAC,CAAC;QACH,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,YAAY,IAAI,UAAU,EAAE,MAAM,CAAC;QACvD,aAAa,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QACrF,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,YAAY,CAAC,CAAC;QAEnC,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,SAAiB,EAAE,UAAkB,EAAE,WAAoB;QACtE,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;YACxC,MAAM,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;YAC1C,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,YAAY,GAAG,GAAG,CAAC,IAAI,WAAW,KAAK,YAAY,EAAE,CAAC;gBAChF,MAAM,IAAI,KAAK,CAAC,qCAAqC,WAAW,EAAE,CAAC,CAAC;YACtE,CAAC;QACH,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,SAAS,CAAC,CAAC;QAChE,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;QAC3C,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IACpF,CAAC;CACF"}
@@ -1,23 +0,0 @@
1
- import type { EvalRunResult, EvalTrace } from '../../../shared/src/types/eval.js';
2
- export interface AgentRunnerResult {
3
- agentOutput: string;
4
- outcome: 'success' | 'failure' | 'timeout';
5
- qualityScore: number;
6
- latencyMs: number;
7
- }
8
- export interface DatasetRunOpts {
9
- datasetId: string;
10
- agentVersion: string;
11
- traces: EvalTrace[];
12
- agentRunner: (trace: EvalTrace) => Promise<AgentRunnerResult>;
13
- baselineResult?: EvalRunResult;
14
- regressionThreshold?: number;
15
- }
16
- export declare class DatasetRunner {
17
- /**
18
- * Run all traces through the agent runner and compute stats.
19
- * Optionally compare against a baseline to detect regressions.
20
- */
21
- run(opts: DatasetRunOpts): Promise<EvalRunResult>;
22
- }
23
- //# sourceMappingURL=dataset-runner.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"dataset-runner.d.ts","sourceRoot":"","sources":["../../../src/eval/dataset-runner.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,aAAa,EAAE,SAAS,EAAoB,MAAM,mCAAmC,CAAC;AAEpG,MAAM,WAAW,iBAAiB;IAChC,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3C,YAAY,EAAE,MAAM,CAAC;IACrB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,SAAS,EAAE,CAAC;IACpB,WAAW,EAAE,CAAC,KAAK,EAAE,SAAS,KAAK,OAAO,CAAC,iBAAiB,CAAC,CAAC;IAC9D,cAAc,CAAC,EAAE,aAAa,CAAC;IAC/B,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED,qBAAa,aAAa;IACxB;;;OAGG;IACG,GAAG,CAAC,IAAI,EAAE,cAAc,GAAG,OAAO,CAAC,aAAa,CAAC;CA6DxD"}
@@ -1,59 +0,0 @@
1
- /**
2
- * DatasetRunner - Run eval datasets against agents and detect regressions (Task 33)
3
- */
4
- import { randomUUID } from 'crypto';
5
- export class DatasetRunner {
6
- /**
7
- * Run all traces through the agent runner and compute stats.
8
- * Optionally compare against a baseline to detect regressions.
9
- */
10
- async run(opts) {
11
- const { datasetId, agentVersion, traces, agentRunner, baselineResult, regressionThreshold = 0.1, } = opts;
12
- const results = [];
13
- for (const trace of traces) {
14
- const result = await agentRunner(trace);
15
- results.push({ trace, result });
16
- }
17
- const passCount = results.filter((r) => r.result.outcome === 'success').length;
18
- const failCount = results.length - passCount;
19
- const totalQuality = results.reduce((sum, r) => sum + r.result.qualityScore, 0);
20
- const totalLatency = results.reduce((sum, r) => sum + r.result.latencyMs, 0);
21
- const avgQualityScore = results.length > 0 ? totalQuality / results.length : 0;
22
- const avgLatencyMs = results.length > 0 ? totalLatency / results.length : 0;
23
- // Regression detection
24
- const regressionDetails = [];
25
- let regressionDetected = false;
26
- if (baselineResult) {
27
- const delta = baselineResult.avgQualityScore - avgQualityScore;
28
- if (delta > regressionThreshold) {
29
- regressionDetected = true;
30
- // Report per-trace regressions for traces with quality below baseline average
31
- for (const { trace, result } of results) {
32
- if (result.qualityScore < baselineResult.avgQualityScore) {
33
- regressionDetails.push({
34
- traceId: trace.traceId,
35
- agentSlug: trace.agentSlug,
36
- baselineScore: baselineResult.avgQualityScore,
37
- currentScore: result.qualityScore,
38
- delta: baselineResult.avgQualityScore - result.qualityScore,
39
- });
40
- }
41
- }
42
- }
43
- }
44
- return {
45
- runId: randomUUID(),
46
- datasetId,
47
- runAt: new Date().toISOString(),
48
- agentVersion,
49
- entriesTested: results.length,
50
- passCount,
51
- failCount,
52
- avgQualityScore,
53
- avgLatencyMs,
54
- regressionDetected,
55
- regressionDetails,
56
- };
57
- }
58
- }
59
- //# sourceMappingURL=dataset-runner.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"dataset-runner.js","sourceRoot":"","sources":["../../../src/eval/dataset-runner.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAmBpC,MAAM,OAAO,aAAa;IACxB;;;OAGG;IACH,KAAK,CAAC,GAAG,CAAC,IAAoB;QAC5B,MAAM,EACJ,SAAS,EACT,YAAY,EACZ,MAAM,EACN,WAAW,EACX,cAAc,EACd,mBAAmB,GAAG,GAAG,GAC1B,GAAG,IAAI,CAAC;QAET,MAAM,OAAO,GAA2D,EAAE,CAAC;QAE3E,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,KAAK,CAAC,CAAC;YACxC,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;QAClC,CAAC;QAED,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;QAC/E,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;QAC7C,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;QAC7E,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/E,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAE5E,uBAAuB;QACvB,MAAM,iBAAiB,GAAuB,EAAE,CAAC;QACjD,IAAI,kBAAkB,GAAG,KAAK,CAAC;QAE/B,IAAI,cAAc,EAAE,CAAC;YACnB,MAAM,KAAK,GAAG,cAAc,CAAC,eAAe,GAAG,eAAe,CAAC;YAC/D,IAAI,KAAK,GAAG,mBAAmB,EAAE,CAAC;gBAChC,kBAAkB,GAAG,IAAI,CAAC;gBAC1B,8EAA8E;gBAC9E,KAAK,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;oBACxC,IAAI,MAAM,CAAC,YAAY,GAAG,cAAc,CAAC,eAAe,EAAE,CAAC;wBACzD,iBAAiB,CAAC,IAAI,CAAC;4BACrB,OAAO,EAAE,KAAK,CAAC,OAAO;4BACtB,SAAS,EAAE,KAAK,CAAC,SAAS;4BAC1B,aAAa,EAAE,cAAc,CAAC,eAAe;4BAC7C,YAAY,EAAE,MAAM,CAAC,YAAY;4BACjC,KAAK,EAAE,cAAc,CAAC,eAAe,GAAG,MAAM,CAAC,YAAY;yBAC5D,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO;YACL,KAAK,EAAE,UAAU,EAAE;YACnB,SAAS;YACT,KAAK,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAC/B,YAAY;YACZ,aAAa,EAAE,OAAO,CAAC,MAAM;YAC7B,SAAS;YACT,SAAS;YACT,eAAe;YACf,YAAY;YACZ,kBAAkB;YAClB,iBAAiB;SAClB,CAAC;IACJ,CAAC;CACF"}
@@ -1,10 +0,0 @@
1
- /**
2
- * Eval Module - Automated eval dataset from production traces (Task 33)
3
- */
4
- export { TraceCollector } from './trace-collector.js';
5
- export type { RecordTraceInput } from './trace-collector.js';
6
- export { DatasetManager } from './dataset-manager.js';
7
- export type { CreateFromTracesOpts } from './dataset-manager.js';
8
- export { DatasetRunner } from './dataset-runner.js';
9
- export type { AgentRunnerResult, DatasetRunOpts } from './dataset-runner.js';
10
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAE7D,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAEjE,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC"}
@@ -1,7 +0,0 @@
1
- /**
2
- * Eval Module - Automated eval dataset from production traces (Task 33)
3
- */
4
- export { TraceCollector } from './trace-collector.js';
5
- export { DatasetManager } from './dataset-manager.js';
6
- export { DatasetRunner } from './dataset-runner.js';
7
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/eval/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAGtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAGtD,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC"}
@@ -1,40 +0,0 @@
1
- import type { EvalTrace } from '../../../shared/src/types/eval.js';
2
- export interface RecordTraceInput {
3
- agentSlug: string;
4
- agentVersion: string;
5
- taskDescription: string;
6
- taskInput: string;
7
- agentOutput: string;
8
- retryCount: number;
9
- qualityScore?: number;
10
- outcome: 'success' | 'failure' | 'timeout';
11
- latencyMs: number;
12
- tokenCount?: number;
13
- costUsd?: number;
14
- correctedOutput?: string;
15
- }
16
- export declare class TraceCollector {
17
- private filePath;
18
- constructor(filePath: string);
19
- /**
20
- * Determine auto review status based on trace quality signals.
21
- */
22
- autoReviewStatus(input: RecordTraceInput): 'pending' | 'approved';
23
- /**
24
- * Auto-generate tags based on trace characteristics.
25
- */
26
- autoTag(input: RecordTraceInput): string[];
27
- /**
28
- * Record a trace, auto-generating traceId, capturedAt, reviewStatus, and tags.
29
- */
30
- record(input: RecordTraceInput): EvalTrace;
31
- /**
32
- * Read all traces from the JSONL file.
33
- */
34
- readAll(): EvalTrace[];
35
- /**
36
- * Get traces pending review, with optional limit.
37
- */
38
- getTracesPendingReview(limit?: number): EvalTrace[];
39
- }
40
- //# sourceMappingURL=trace-collector.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"trace-collector.d.ts","sourceRoot":"","sources":["../../../src/eval/trace-collector.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mCAAmC,CAAC;AAGnE,MAAM,WAAW,gBAAgB;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,EAAE,SAAS,GAAG,SAAS,GAAG,SAAS,CAAC;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAS;gBAEb,QAAQ,EAAE,MAAM;IAI5B;;OAEG;IACH,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,GAAG,SAAS,GAAG,UAAU;IAOjE;;OAEG;IACH,OAAO,CAAC,KAAK,EAAE,gBAAgB,GAAG,MAAM,EAAE;IAQ1C;;OAEG;IACH,MAAM,CAAC,KAAK,EAAE,gBAAgB,GAAG,SAAS;IAsC1C;;OAEG;IACH,OAAO,IAAI,SAAS,EAAE;IAWtB;;OAEG;IACH,sBAAsB,CAAC,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,EAAE;CAKpD"}
@@ -1,102 +0,0 @@
1
- /**
2
- * TraceCollector - JSONL-based production trace collection (Task 33)
3
- */
4
- import { randomUUID } from 'crypto';
5
- import { appendFileSync, readFileSync, existsSync, statSync } from 'fs';
6
- import { parseJsonl } from '../utils/parse-jsonl.js';
7
- export class TraceCollector {
8
- filePath;
9
- constructor(filePath) {
10
- this.filePath = filePath;
11
- }
12
- /**
13
- * Determine auto review status based on trace quality signals.
14
- */
15
- autoReviewStatus(input) {
16
- if (input.retryCount > 1)
17
- return 'pending';
18
- if (input.qualityScore !== undefined && input.qualityScore < 0.6)
19
- return 'pending';
20
- if (input.outcome === 'failure')
21
- return 'pending';
22
- return 'approved';
23
- }
24
- /**
25
- * Auto-generate tags based on trace characteristics.
26
- */
27
- autoTag(input) {
28
- const tags = [];
29
- if (input.retryCount > 1)
30
- tags.push('high-retry');
31
- if (input.outcome === 'failure')
32
- tags.push('failure');
33
- if (input.outcome === 'timeout')
34
- tags.push('timeout');
35
- return tags;
36
- }
37
- /**
38
- * Record a trace, auto-generating traceId, capturedAt, reviewStatus, and tags.
39
- */
40
- record(input) {
41
- const trace = {
42
- traceId: randomUUID(),
43
- agentSlug: input.agentSlug,
44
- agentVersion: input.agentVersion,
45
- taskDescription: input.taskDescription,
46
- taskInput: input.taskInput,
47
- agentOutput: input.agentOutput,
48
- retryCount: input.retryCount,
49
- qualityScore: input.qualityScore,
50
- outcome: input.outcome,
51
- latencyMs: input.latencyMs,
52
- tokenCount: input.tokenCount,
53
- costUsd: input.costUsd,
54
- capturedAt: new Date().toISOString(),
55
- reviewStatus: this.autoReviewStatus(input),
56
- correctedOutput: input.correctedOutput,
57
- tags: this.autoTag(input),
58
- };
59
- // Defensive serialization — agent outputs may contain circular references
60
- // or BigInt; without this guard the writer crashes mid-trace.
61
- let serialized;
62
- try {
63
- serialized = JSON.stringify(trace);
64
- }
65
- catch {
66
- serialized = JSON.stringify({
67
- traceId: trace.traceId,
68
- agentSlug: trace.agentSlug,
69
- capturedAt: trace.capturedAt,
70
- reviewStatus: trace.reviewStatus,
71
- outcome: 'serialize_failed',
72
- });
73
- }
74
- appendFileSync(this.filePath, serialized + '\n', 'utf-8');
75
- return trace;
76
- }
77
- /**
78
- * Read all traces from the JSONL file.
79
- */
80
- readAll() {
81
- if (!existsSync(this.filePath))
82
- return [];
83
- const stat = statSync(this.filePath);
84
- if (stat.size > 256 * 1024 * 1024) {
85
- throw new Error(`Trace file exceeds 256MB (${stat.size} bytes). Run rotation/cleanup.`);
86
- }
87
- const content = readFileSync(this.filePath, 'utf-8').trim();
88
- if (!content)
89
- return [];
90
- return parseJsonl(content);
91
- }
92
- /**
93
- * Get traces pending review, with optional limit.
94
- */
95
- getTracesPendingReview(limit) {
96
- const all = this.readAll().filter((t) => t.reviewStatus === 'pending');
97
- if (limit !== undefined)
98
- return all.slice(0, limit);
99
- return all;
100
- }
101
- }
102
- //# sourceMappingURL=trace-collector.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"trace-collector.js","sourceRoot":"","sources":["../../../src/eval/trace-collector.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAExE,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAiBrD,MAAM,OAAO,cAAc;IACjB,QAAQ,CAAS;IAEzB,YAAY,QAAgB;QAC1B,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,KAAuB;QACtC,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC;YAAE,OAAO,SAAS,CAAC;QAC3C,IAAI,KAAK,CAAC,YAAY,KAAK,SAAS,IAAI,KAAK,CAAC,YAAY,GAAG,GAAG;YAAE,OAAO,SAAS,CAAC;QACnF,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS;YAAE,OAAO,SAAS,CAAC;QAClD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,KAAuB;QAC7B,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,IAAI,KAAK,CAAC,UAAU,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAClD,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS;YAAE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACtD,IAAI,KAAK,CAAC,OAAO,KAAK,SAAS;YAAE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACtD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAuB;QAC5B,MAAM,KAAK,GAAc;YACvB,OAAO,EAAE,UAAU,EAAE;YACrB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,WAAW,EAAE,KAAK,CAAC,WAAW;YAC9B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;YAChC,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,SAAS,EAAE,KAAK,CAAC,SAAS;YAC1B,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,UAAU,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACpC,YAAY,EAAE,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC;YAC1C,eAAe,EAAE,KAAK,CAAC,eAAe;YACtC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC;SAC1B,CAAC;QAEF,0EAA0E;QAC1E,8DAA8D;QAC9D,IAAI,UAAkB,CAAC;QACvB,IAAI,CAAC;YACH,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACrC,CAAC;QAAC,MAAM,CAAC;YACP,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC;gBAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,SAAS,EAAE,KAAK,CAAC,SAAS;gBAC1B,UAAU,EAAE,KAAK,CAAC,UAAU;gBAC5B,YAAY,EAAE,KAAK,CAAC,YAAY;gBAChC,OAAO,EAAE,kBAAkB;aAC5B,CAAC,CAAC;QACL,CAAC;QACD,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAC1D,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACH,OAAO;QACL,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,OAAO,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrC,IAAI,IAAI,CAAC,IAAI,GAAG,GAAG,GAAG,IAAI,GAAG,IAAI,EAAE,CAAC;YAClC,MAAM,IAAI,KAAK,CAAC,6BAA6B,IAAI,CAAC,IAAI,gCAAgC,CAAC,CAAC;QAC1F,CAAC;QACD,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,IAAI,CAAC,OAAO;YAAE,OAAO,EAAE,CAAC;QACxB,OAAO,UAAU,CAAY,OAAO,CAAC,CAAC;IACxC,CAAC;IAED;;OAEG;IACH,sBAAsB,CAAC,KAAc;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,KAAK,SAAS,CAAC,CAAC;QACvE,IAAI,KAAK,KAAK,SAAS;YAAE,OAAO,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO,GAAG,CAAC;IACb,CAAC;CACF"}