martin-loop 0.1.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CODE_OF_CONDUCT.md +32 -0
  2. package/README.md +172 -227
  3. package/demo/seeded-workspace/README.md +35 -0
  4. package/demo/seeded-workspace/TASKS.md +29 -0
  5. package/demo/seeded-workspace/martin.config.yaml +11 -0
  6. package/demo/seeded-workspace/package.json +8 -0
  7. package/demo/seeded-workspace/src/invoice-summary.js +11 -0
  8. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
  9. package/dist/bin/martin-loop.js +0 -0
  10. package/dist/vendor/adapters/claude-cli.d.ts +19 -4
  11. package/dist/vendor/adapters/claude-cli.js +55 -24
  12. package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
  13. package/dist/vendor/adapters/cli-bridge.js +154 -28
  14. package/dist/vendor/adapters/counter.d.ts +1 -0
  15. package/dist/vendor/adapters/counter.js +4 -0
  16. package/dist/vendor/adapters/git-baseline.d.ts +50 -0
  17. package/dist/vendor/adapters/git-baseline.js +233 -0
  18. package/dist/vendor/adapters/index.d.ts +1 -0
  19. package/dist/vendor/adapters/index.js +1 -0
  20. package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
  21. package/dist/vendor/adapters/openrouter-adapter.js +302 -0
  22. package/dist/vendor/adapters/usage.d.ts +48 -0
  23. package/dist/vendor/adapters/usage.js +66 -0
  24. package/dist/vendor/adapters/verifier-only.d.ts +7 -0
  25. package/dist/vendor/adapters/verifier-only.js +57 -0
  26. package/dist/vendor/cli/bin/exit.d.ts +12 -0
  27. package/dist/vendor/cli/bin/exit.js +28 -0
  28. package/dist/vendor/cli/commands/analyze.d.ts +5 -0
  29. package/dist/vendor/cli/commands/analyze.js +58 -0
  30. package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
  31. package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
  32. package/dist/vendor/cli/commands/audit.d.ts +8 -0
  33. package/dist/vendor/cli/commands/audit.js +199 -0
  34. package/dist/vendor/cli/commands/corpus.d.ts +5 -0
  35. package/dist/vendor/cli/commands/corpus.js +60 -0
  36. package/dist/vendor/cli/commands/doctor.d.ts +8 -0
  37. package/dist/vendor/cli/commands/doctor.js +219 -0
  38. package/dist/vendor/cli/commands/explain.d.ts +17 -0
  39. package/dist/vendor/cli/commands/explain.js +176 -0
  40. package/dist/vendor/cli/commands/export.d.ts +5 -0
  41. package/dist/vendor/cli/commands/export.js +60 -0
  42. package/dist/vendor/cli/commands/governance.d.ts +8 -0
  43. package/dist/vendor/cli/commands/governance.js +95 -0
  44. package/dist/vendor/cli/commands/improve.d.ts +18 -0
  45. package/dist/vendor/cli/commands/improve.js +396 -0
  46. package/dist/vendor/cli/commands/init.d.ts +8 -0
  47. package/dist/vendor/cli/commands/init.js +281 -0
  48. package/dist/vendor/cli/commands/migration.d.ts +8 -0
  49. package/dist/vendor/cli/commands/migration.js +67 -0
  50. package/dist/vendor/cli/commands/prior.d.ts +23 -0
  51. package/dist/vendor/cli/commands/prior.js +145 -0
  52. package/dist/vendor/cli/commands/resume.d.ts +21 -0
  53. package/dist/vendor/cli/commands/resume.js +73 -0
  54. package/dist/vendor/cli/commands/verify.d.ts +6 -0
  55. package/dist/vendor/cli/commands/verify.js +43 -0
  56. package/dist/vendor/cli/index.d.ts +6 -1
  57. package/dist/vendor/cli/index.js +124 -7
  58. package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
  59. package/dist/vendor/cli/research/public-corpus.js +151 -0
  60. package/dist/vendor/cli/ui/error-card.d.ts +38 -0
  61. package/dist/vendor/cli/ui/error-card.js +103 -0
  62. package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
  63. package/dist/vendor/cli/ui/mission-brief.js +173 -0
  64. package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
  65. package/dist/vendor/cli/ui/summary-card.js +102 -0
  66. package/dist/vendor/contracts/audit.d.ts +46 -0
  67. package/dist/vendor/contracts/audit.js +360 -0
  68. package/dist/vendor/contracts/index.d.ts +3 -1
  69. package/dist/vendor/contracts/post-phase15.d.ts +240 -0
  70. package/dist/vendor/contracts/post-phase15.js +166 -0
  71. package/dist/vendor/core/agent/mandates.d.ts +46 -0
  72. package/dist/vendor/core/agent/mandates.js +178 -0
  73. package/dist/vendor/core/agent/receipts.d.ts +38 -0
  74. package/dist/vendor/core/agent/receipts.js +131 -0
  75. package/dist/vendor/core/agent/signing.d.ts +17 -0
  76. package/dist/vendor/core/agent/signing.js +91 -0
  77. package/dist/vendor/core/attestation/sign.d.ts +25 -0
  78. package/dist/vendor/core/attestation/sign.js +216 -0
  79. package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
  80. package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
  81. package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
  82. package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
  83. package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
  84. package/dist/vendor/core/autonomy/envelope.js +27 -0
  85. package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
  86. package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
  87. package/dist/vendor/core/autonomy/resume.d.ts +15 -0
  88. package/dist/vendor/core/autonomy/resume.js +23 -0
  89. package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
  90. package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
  91. package/dist/vendor/core/compiler.d.ts +2 -0
  92. package/dist/vendor/core/compiler.js +10 -4
  93. package/dist/vendor/core/context-distillation.d.ts +3 -0
  94. package/dist/vendor/core/context-distillation.js +44 -0
  95. package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
  96. package/dist/vendor/core/context-flow/compile-context.js +111 -0
  97. package/dist/vendor/core/context-flow/entities.d.ts +2 -0
  98. package/dist/vendor/core/context-flow/entities.js +44 -0
  99. package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
  100. package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
  101. package/dist/vendor/core/context-flow/index.d.ts +11 -0
  102. package/dist/vendor/core/context-flow/index.js +24 -0
  103. package/dist/vendor/core/context-flow/labels.d.ts +3 -0
  104. package/dist/vendor/core/context-flow/labels.js +17 -0
  105. package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
  106. package/dist/vendor/core/context-flow/normalizer.js +69 -0
  107. package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
  108. package/dist/vendor/core/context-flow/profiles.js +36 -0
  109. package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
  110. package/dist/vendor/core/context-flow/redaction.js +6 -0
  111. package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
  112. package/dist/vendor/core/context-flow/sensitivity.js +27 -0
  113. package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
  114. package/dist/vendor/core/context-flow/sync-preview.js +22 -0
  115. package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
  116. package/dist/vendor/core/context-flow/token-estimator.js +13 -0
  117. package/dist/vendor/core/context-flow/types.d.ts +91 -0
  118. package/dist/vendor/core/context-flow/types.js +2 -0
  119. package/dist/vendor/core/context-integrity.d.ts +26 -0
  120. package/dist/vendor/core/context-integrity.js +56 -0
  121. package/dist/vendor/core/context-utility.d.ts +47 -0
  122. package/dist/vendor/core/context-utility.js +405 -0
  123. package/dist/vendor/core/cost/pipeline.d.ts +92 -0
  124. package/dist/vendor/core/cost/pipeline.js +141 -0
  125. package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
  126. package/dist/vendor/core/cost/tagged-cost.js +55 -0
  127. package/dist/vendor/core/cost-governor.d.ts +2 -0
  128. package/dist/vendor/core/cost-governor.js +50 -0
  129. package/dist/vendor/core/cve/cve-check.d.ts +80 -0
  130. package/dist/vendor/core/cve/cve-check.js +172 -0
  131. package/dist/vendor/core/digital-twin/index.d.ts +27 -0
  132. package/dist/vendor/core/digital-twin/index.js +90 -0
  133. package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
  134. package/dist/vendor/core/drift/drift-graph.js +100 -0
  135. package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
  136. package/dist/vendor/core/drift/objective-lock.js +88 -0
  137. package/dist/vendor/core/drift/scope.d.ts +46 -0
  138. package/dist/vendor/core/drift/scope.js +102 -0
  139. package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
  140. package/dist/vendor/core/drift/signature-lock.js +202 -0
  141. package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
  142. package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
  143. package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
  144. package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
  145. package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
  146. package/dist/vendor/core/evidence/claim-audit.js +89 -0
  147. package/dist/vendor/core/exit-intelligence.d.ts +2 -0
  148. package/dist/vendor/core/exit-intelligence.js +58 -0
  149. package/dist/vendor/core/explain/formatter.d.ts +42 -0
  150. package/dist/vendor/core/explain/formatter.js +171 -0
  151. package/dist/vendor/core/explain/timeline.d.ts +29 -0
  152. package/dist/vendor/core/explain/timeline.js +213 -0
  153. package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
  154. package/dist/vendor/core/failure-taxonomy.js +76 -0
  155. package/dist/vendor/core/gateway/index.d.ts +10 -0
  156. package/dist/vendor/core/gateway/index.js +12 -0
  157. package/dist/vendor/core/gateway/registry.d.ts +40 -0
  158. package/dist/vendor/core/gateway/registry.js +97 -0
  159. package/dist/vendor/core/gateway/transport.d.ts +31 -0
  160. package/dist/vendor/core/gateway/transport.js +82 -0
  161. package/dist/vendor/core/gateway/vault.d.ts +19 -0
  162. package/dist/vendor/core/gateway/vault.js +29 -0
  163. package/dist/vendor/core/graph/adapters.d.ts +43 -0
  164. package/dist/vendor/core/graph/adapters.js +91 -0
  165. package/dist/vendor/core/graph/hotspots.d.ts +22 -0
  166. package/dist/vendor/core/graph/hotspots.js +30 -0
  167. package/dist/vendor/core/graph/index.d.ts +1 -0
  168. package/dist/vendor/core/graph/index.js +2 -0
  169. package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
  170. package/dist/vendor/core/honey/honey-tokens.js +44 -0
  171. package/dist/vendor/core/index.d.ts +7 -4
  172. package/dist/vendor/core/index.js +222 -64
  173. package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
  174. package/dist/vendor/core/learning/bayesian-update.js +60 -0
  175. package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
  176. package/dist/vendor/core/learning/prior-sets.js +111 -0
  177. package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
  178. package/dist/vendor/core/learning/promotion-gate.js +23 -0
  179. package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
  180. package/dist/vendor/core/leash/blast-radius.js +156 -0
  181. package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
  182. package/dist/vendor/core/leash/policy-leash.js +117 -0
  183. package/dist/vendor/core/memo/memo.d.ts +63 -0
  184. package/dist/vendor/core/memo/memo.js +97 -0
  185. package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
  186. package/dist/vendor/core/memory/learning-pipeline.js +391 -0
  187. package/dist/vendor/core/memory/palace.d.ts +84 -0
  188. package/dist/vendor/core/memory/palace.js +379 -0
  189. package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
  190. package/dist/vendor/core/merge/ast-merge.js +350 -0
  191. package/dist/vendor/core/merge/text-merge.d.ts +12 -0
  192. package/dist/vendor/core/merge/text-merge.js +182 -0
  193. package/dist/vendor/core/otel/tracer.d.ts +45 -0
  194. package/dist/vendor/core/otel/tracer.js +116 -0
  195. package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
  196. package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
  197. package/dist/vendor/core/parallel/scorer.d.ts +24 -0
  198. package/dist/vendor/core/parallel/scorer.js +65 -0
  199. package/dist/vendor/core/pattern-detection.d.ts +64 -0
  200. package/dist/vendor/core/pattern-detection.js +108 -0
  201. package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
  202. package/dist/vendor/core/persistence/checkpoint.js +156 -0
  203. package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
  204. package/dist/vendor/core/persistence/cleanup.js +131 -0
  205. package/dist/vendor/core/persistence/index.d.ts +2 -0
  206. package/dist/vendor/core/persistence/index.js +1 -0
  207. package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
  208. package/dist/vendor/core/persistence/runs-reader.js +84 -0
  209. package/dist/vendor/core/persistence/store.d.ts +6 -1
  210. package/dist/vendor/core/persistence/store.js +5 -0
  211. package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
  212. package/dist/vendor/core/policy/file-touch-quota.js +105 -0
  213. package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
  214. package/dist/vendor/core/policy/policy-loader.js +170 -0
  215. package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
  216. package/dist/vendor/core/policy/policy-schema.js +78 -0
  217. package/dist/vendor/core/policy.d.ts +6 -0
  218. package/dist/vendor/core/probe/probe.d.ts +49 -0
  219. package/dist/vendor/core/probe/probe.js +115 -0
  220. package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
  221. package/dist/vendor/core/proof/patch-proof.js +84 -0
  222. package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
  223. package/dist/vendor/core/proof/semantic-probe.js +82 -0
  224. package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
  225. package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
  226. package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
  227. package/dist/vendor/core/red-blue/red-phase.js +141 -0
  228. package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
  229. package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
  230. package/dist/vendor/core/replay/replay.d.ts +85 -0
  231. package/dist/vendor/core/replay/replay.js +109 -0
  232. package/dist/vendor/core/router/engine.d.ts +54 -0
  233. package/dist/vendor/core/router/engine.js +131 -0
  234. package/dist/vendor/core/router/index.d.ts +1 -0
  235. package/dist/vendor/core/router/index.js +2 -0
  236. package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
  237. package/dist/vendor/core/router/trust-calibration.js +127 -0
  238. package/dist/vendor/core/run-martin.d.ts +2 -0
  239. package/dist/vendor/core/run-martin.js +287 -0
  240. package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
  241. package/dist/vendor/core/security/cve-scanner.js +178 -0
  242. package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
  243. package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
  244. package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
  245. package/dist/vendor/core/sentinel/progress-guard.js +46 -0
  246. package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
  247. package/dist/vendor/core/siem/siem-emitter.js +157 -0
  248. package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
  249. package/dist/vendor/core/strategy/attempt-brief.js +89 -0
  250. package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
  251. package/dist/vendor/core/summarize/diff-summary.js +204 -0
  252. package/dist/vendor/core/surface-signals.d.ts +21 -0
  253. package/dist/vendor/core/surface-signals.js +139 -0
  254. package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
  255. package/dist/vendor/core/truth/truth-wall.js +69 -0
  256. package/dist/vendor/core/truth-spine.d.ts +26 -0
  257. package/dist/vendor/core/truth-spine.js +62 -0
  258. package/dist/vendor/core/types.d.ts +115 -0
  259. package/dist/vendor/core/types.js +2 -0
  260. package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
  261. package/dist/vendor/core/verification/tiered-verify.js +29 -0
  262. package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
  263. package/dist/vendor/core/verifier-pyramid.js +111 -0
  264. package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
  265. package/dist/vendor/core/workflow-artifacts.js +668 -0
  266. package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
  267. package/dist/vendor/core/wrap/supervised-run.js +178 -0
  268. package/docs/assets/cli-animated.svg +139 -0
  269. package/docs/assets/cli-static.svg +34 -0
  270. package/docs/assets/github-hero-v2.svg +23 -0
  271. package/docs/assets/martin-raplph.png.jpg +0 -0
  272. package/docs/assets/martinloop-logo.png +0 -0
  273. package/docs/assets/nvidia-inception-program-light.png +0 -0
  274. package/docs/assets/nvidia-inception-program.png +0 -0
  275. package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
  276. package/docs/assets/side-by-side.svg +134 -0
  277. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
  278. package/docs/oss/EXAMPLES.md +9 -1
  279. package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
  280. package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
  281. package/docs/oss/QUICKSTART.md +39 -4
  282. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
  283. package/docs/oss/README.md +7 -4
  284. package/docs/oss/RELEASE-SURFACE-REPORT.json +46 -45
  285. package/docs/oss/RELEASE-SURFACE-REPORT.md +36 -35
  286. package/package.json +129 -49
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Trust Calibration Engine — the self-improvement loop.
3
+ *
4
+ * Reads historical run records from ~/.martin/runs/ and computes a reliability
5
+ * profile for each model that has been used. The router uses these profiles to
6
+ * automatically downgrade to cheaper models when evidence shows they perform
7
+ * as well as more expensive ones, and to deprioritize models with poor track records.
8
+ *
9
+ * This closes the feedback loop that was missing: every completed run writes
10
+ * evidence to disk; this module reads it back into routing decisions.
11
+ */
12
+ export interface ModelTrustProfile {
13
+ /** Model identifier as recorded in attempt records (e.g. "claude-sonnet-4-6") */
14
+ model: string;
15
+ /** Total runs where this model was used for at least one attempt */
16
+ runsObserved: number;
17
+ /** Fraction of observed runs that completed successfully (0–1) */
18
+ completionRate: number;
19
+ /** Average USD cost per iteration (attempt) */
20
+ avgCostPerIteration: number;
21
+ /** Average iterations used vs budget.maxIterations (lower = more efficient) */
22
+ avgIterationEfficiency: number;
23
+ /**
24
+ * Composite score 0–1: completionRate * (1 - avgIterationEfficiency).
25
+ * High score = completes well AND uses fewer iterations than the budget allows.
26
+ */
27
+ efficiencyScore: number;
28
+ /** ISO timestamp of the most recent run that informed this profile */
29
+ lastUpdated: string;
30
+ }
31
+ export interface TrustCalibrationResult {
32
+ /** Per-model reliability profiles, sorted by efficiencyScore descending */
33
+ profiles: ModelTrustProfile[];
34
+ /**
35
+ * The model with the best efficiencyScore that also meets minRuns threshold.
36
+ * Null if insufficient data exists yet.
37
+ */
38
+ recommendedModel: string | null;
39
+ /** Total number of runs analyzed to produce this result */
40
+ calibrationBasis: number;
41
+ }
42
+ /**
43
+ * Reads historical loop records and computes a trust profile for each model.
44
+ *
45
+ * @param runsDir - Override the default ~/.martin/runs path (useful for testing)
46
+ * @param minRuns - Minimum observations required before a profile is considered
47
+ * reliable enough to influence routing. Default: 3.
48
+ * @param efficiencyThreshold - Minimum efficiencyScore for a model to be
49
+ * eligible for auto-recommendation. Default: 0.75.
50
+ */
51
+ export declare function calibrateTrust(runsDir?: string, minRuns?: number, efficiencyThreshold?: number): Promise<TrustCalibrationResult>;
52
+ /**
53
+ * Returns true if a model should be deprioritized based on its trust profile.
54
+ * A model is deprioritized when it has enough observations to be confident
55
+ * it performs poorly (low completion rate).
56
+ */
57
+ export declare function shouldDeprioritize(profile: ModelTrustProfile, minRuns?: number, minCompletionRate?: number): boolean;
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Trust Calibration Engine — the self-improvement loop.
3
+ *
4
+ * Reads historical run records from ~/.martin/runs/ and computes a reliability
5
+ * profile for each model that has been used. The router uses these profiles to
6
+ * automatically downgrade to cheaper models when evidence shows they perform
7
+ * as well as more expensive ones, and to deprioritize models with poor track records.
8
+ *
9
+ * This closes the feedback loop that was missing: every completed run writes
10
+ * evidence to disk; this module reads it back into routing decisions.
11
+ */
12
+ import { readAllLoopRecords } from "../persistence/runs-reader.js";
13
+ /**
14
+ * Reads historical loop records and computes a trust profile for each model.
15
+ *
16
+ * @param runsDir - Override the default ~/.martin/runs path (useful for testing)
17
+ * @param minRuns - Minimum observations required before a profile is considered
18
+ * reliable enough to influence routing. Default: 3.
19
+ * @param efficiencyThreshold - Minimum efficiencyScore for a model to be
20
+ * eligible for auto-recommendation. Default: 0.75.
21
+ */
22
+ export async function calibrateTrust(runsDir, minRuns = 3, efficiencyThreshold = 0.75) {
23
+ const records = await readAllLoopRecords(runsDir);
24
+ if (records.length === 0) {
25
+ return { profiles: [], recommendedModel: null, calibrationBasis: 0 };
26
+ }
27
+ const accumulators = new Map();
28
+ for (const record of records) {
29
+ const modelsInRun = extractModelsFromRun(record);
30
+ const isCompleted = record.status === "completed";
31
+ const iterationEfficiency = record.budget.maxIterations > 0
32
+ ? record.attempts.length / record.budget.maxIterations
33
+ : 1;
34
+ for (const model of modelsInRun) {
35
+ const existing = accumulators.get(model) ?? {
36
+ model,
37
+ completedRuns: 0,
38
+ totalRuns: 0,
39
+ totalCostUsd: 0,
40
+ totalAttempts: 0,
41
+ totalIterationsUsedFraction: 0,
42
+ latestUpdatedAt: record.createdAt
43
+ };
44
+ existing.totalRuns += 1;
45
+ existing.totalAttempts += record.attempts.length;
46
+ existing.totalCostUsd += record.cost.actualUsd;
47
+ existing.totalIterationsUsedFraction += iterationEfficiency;
48
+ if (isCompleted)
49
+ existing.completedRuns += 1;
50
+ const recordTs = record.updatedAt ?? record.createdAt;
51
+ if (recordTs > existing.latestUpdatedAt) {
52
+ existing.latestUpdatedAt = recordTs;
53
+ }
54
+ accumulators.set(model, existing);
55
+ }
56
+ }
57
+ const profiles = [];
58
+ for (const acc of accumulators.values()) {
59
+ if (acc.totalRuns === 0)
60
+ continue;
61
+ const completionRate = acc.completedRuns / acc.totalRuns;
62
+ const avgIterationEfficiency = acc.totalIterationsUsedFraction / acc.totalRuns;
63
+ const avgCostPerIteration = acc.totalAttempts > 0 ? acc.totalCostUsd / acc.totalAttempts : 0;
64
+ // efficiencyScore: high means "completes reliably AND uses fewer iterations"
65
+ const efficiencyScore = completionRate * (1 - avgIterationEfficiency * 0.5);
66
+ profiles.push({
67
+ model: acc.model,
68
+ runsObserved: acc.totalRuns,
69
+ completionRate,
70
+ avgCostPerIteration,
71
+ avgIterationEfficiency,
72
+ efficiencyScore: Math.round(efficiencyScore * 1000) / 1000,
73
+ lastUpdated: acc.latestUpdatedAt
74
+ });
75
+ }
76
+ // Sort by efficiency descending
77
+ profiles.sort((a, b) => b.efficiencyScore - a.efficiencyScore);
78
+ // Recommend the cheapest model that meets threshold with enough data
79
+ const eligible = profiles.filter((p) => p.runsObserved >= minRuns && p.efficiencyScore >= efficiencyThreshold);
80
+ const recommendedModel = eligible.length > 0
81
+ ? eligible.reduce((best, p) => p.avgCostPerIteration < best.avgCostPerIteration ? p : best).model
82
+ : null;
83
+ return {
84
+ profiles,
85
+ recommendedModel,
86
+ calibrationBasis: records.length
87
+ };
88
+ }
89
+ /**
90
+ * Returns true if a model should be deprioritized based on its trust profile.
91
+ * A model is deprioritized when it has enough observations to be confident
92
+ * it performs poorly (low completion rate).
93
+ */
94
+ export function shouldDeprioritize(profile, minRuns = 5, minCompletionRate = 0.4) {
95
+ return (profile.runsObserved >= minRuns &&
96
+ profile.completionRate < minCompletionRate);
97
+ }
98
+ /**
99
+ * Extracts the distinct set of models used in a run.
100
+ * Falls back to adapterId if model field is absent.
101
+ */
102
+ function extractModelsFromRun(record) {
103
+ const models = new Set();
104
+ for (const attempt of record.attempts) {
105
+ const key = attempt.model ?? attempt.adapterId;
106
+ if (key)
107
+ models.add(normalizeModelName(key));
108
+ }
109
+ return [...models];
110
+ }
111
+ function normalizeModelName(raw) {
112
+ // Normalize known aliases to a consistent key
113
+ if (raw.includes("sonnet"))
114
+ return "claude-sonnet";
115
+ if (raw.includes("haiku"))
116
+ return "claude-haiku";
117
+ if (raw.includes("opus"))
118
+ return "claude-opus";
119
+ if (raw.includes("gpt-4o-mini"))
120
+ return "gpt-4o-mini";
121
+ if (raw.includes("gpt-4o"))
122
+ return "gpt-4o";
123
+ if (raw.includes("o3"))
124
+ return "o3";
125
+ return raw;
126
+ }
127
+ //# sourceMappingURL=trust-calibration.js.map
@@ -0,0 +1,2 @@
1
+ import type { RunMartinOptions, RunMartinResult } from "./types.js";
2
+ export declare function runMartin(options: RunMartinOptions): Promise<RunMartinResult>;
@@ -0,0 +1,287 @@
1
+ import { appendLoopEvent, createLoopRecord } from "../contracts/index.js";
2
+ import { distillContext } from "./context-distillation.js";
3
+ import { evaluateCostGovernor } from "./cost-governor.js";
4
+ import { inferExit } from "./exit-intelligence.js";
5
+ import { classifyFailure } from "./failure-taxonomy.js";
6
+ export async function runMartin(options) {
7
+ const now = options.now ?? (() => new Date().toISOString());
8
+ const contracts = createContractOptions(options.idFactory);
9
+ let loop = createLoopRecord({
10
+ workspaceId: options.workspaceId,
11
+ projectId: options.projectId,
12
+ task: options.task,
13
+ ...(options.budget ? { budget: options.budget } : {}),
14
+ ...(options.teamId ? { teamId: options.teamId } : {})
15
+ }, {
16
+ ...contracts,
17
+ now: now()
18
+ });
19
+ let finalContext = distillContext(loop);
20
+ let decision = {
21
+ shouldExit: false,
22
+ lifecycleState: "running",
23
+ reason: "Run initialized."
24
+ };
25
+ loop = pushEvent(loop, {
26
+ type: "run.started",
27
+ lifecycleState: "running",
28
+ payload: {
29
+ adapterId: options.adapter.adapterId,
30
+ adapterKind: options.adapter.kind
31
+ }
32
+ }, contracts, now(), "running");
33
+ while (loop.attempts.length < loop.budget.maxIterations) {
34
+ const preAttemptCost = evaluateCostGovernor({
35
+ budget: loop.budget,
36
+ cost: loop.cost,
37
+ attemptsUsed: loop.attempts.length
38
+ });
39
+ if (preAttemptCost.shouldStop) {
40
+ decision = {
41
+ shouldExit: true,
42
+ lifecycleState: "budget_exit",
43
+ reason: "Budget governor reached a hard limit."
44
+ };
45
+ loop = finalizeLoop(loop, decision, contracts, now(), "exited");
46
+ finalContext = distillContext(loop);
47
+ return {
48
+ loop,
49
+ decision,
50
+ finalContext
51
+ };
52
+ }
53
+ finalContext = distillContext(loop);
54
+ const attemptIndex = loop.attempts.length + 1;
55
+ const attemptStartedAt = now();
56
+ const attemptId = makeId("att", options.idFactory);
57
+ loop = {
58
+ ...loop,
59
+ attempts: [
60
+ ...loop.attempts,
61
+ {
62
+ attemptId,
63
+ index: attemptIndex,
64
+ adapterId: options.adapter.adapterId,
65
+ model: options.adapter.metadata.model ?? options.adapter.label,
66
+ startedAt: attemptStartedAt
67
+ }
68
+ ],
69
+ status: "running",
70
+ lifecycleState: "running",
71
+ updatedAt: attemptStartedAt
72
+ };
73
+ loop = pushEvent(loop, {
74
+ type: "attempt.started",
75
+ lifecycleState: "running",
76
+ payload: {
77
+ attemptId,
78
+ attemptIndex,
79
+ adapterId: options.adapter.adapterId
80
+ }
81
+ }, contracts, attemptStartedAt, "running");
82
+ const adapterRequest = {
83
+ loopId: loop.loopId,
84
+ workspaceId: loop.workspaceId,
85
+ projectId: loop.projectId,
86
+ attemptIndex,
87
+ task: loop.task,
88
+ context: finalContext,
89
+ budget: loop.budget,
90
+ costState: preAttemptCost
91
+ };
92
+ if (loop.teamId) {
93
+ adapterRequest.teamId = loop.teamId;
94
+ }
95
+ const result = await options.adapter.execute(adapterRequest);
96
+ const completedAt = now();
97
+ loop = applyResult(loop, attemptId, result, completedAt);
98
+ loop = pushEvent(loop, {
99
+ type: "attempt.completed",
100
+ lifecycleState: "running",
101
+ payload: {
102
+ attemptId,
103
+ status: result.status,
104
+ summary: result.summary
105
+ }
106
+ }, contracts, completedAt, "running");
107
+ const postAttemptCost = evaluateCostGovernor({
108
+ budget: loop.budget,
109
+ cost: loop.cost,
110
+ attemptsUsed: loop.attempts.length
111
+ });
112
+ if (postAttemptCost.pressure !== "healthy") {
113
+ loop = pushEvent(loop, {
114
+ type: "budget.updated",
115
+ lifecycleState: "running",
116
+ payload: {
117
+ pressure: postAttemptCost.pressure,
118
+ remainingBudgetUsd: postAttemptCost.remainingBudgetUsd,
119
+ remainingIterations: postAttemptCost.remainingIterations,
120
+ remainingTokens: postAttemptCost.remainingTokens
121
+ }
122
+ }, contracts, now(), "running");
123
+ }
124
+ if (result.status === "completed" && result.verification.passed) {
125
+ loop = pushEvent(loop, {
126
+ type: "verification.completed",
127
+ lifecycleState: "verifying",
128
+ payload: {
129
+ attemptId,
130
+ passed: true,
131
+ summary: result.verification.summary
132
+ }
133
+ }, contracts, now(), "verifying");
134
+ decision = {
135
+ shouldExit: true,
136
+ lifecycleState: "completed",
137
+ reason: result.verification.summary
138
+ };
139
+ loop = finalizeLoop(loop, decision, contracts, now(), "completed");
140
+ finalContext = distillContext(loop);
141
+ return {
142
+ loop,
143
+ decision,
144
+ finalContext
145
+ };
146
+ }
147
+ const failure = classifyFailure({
148
+ attempts: loop.attempts.slice(0, -1),
149
+ result
150
+ });
151
+ loop = annotateAttempt(loop, attemptId, failure);
152
+ loop = pushEvent(loop, {
153
+ type: "failure.classified",
154
+ lifecycleState: "running",
155
+ payload: {
156
+ attemptId,
157
+ failureClass: failure.failureClass,
158
+ rationale: failure.rationale
159
+ }
160
+ }, contracts, now(), "running");
161
+ loop = pushEvent(loop, {
162
+ type: "intervention.selected",
163
+ lifecycleState: "running",
164
+ payload: {
165
+ attemptId,
166
+ intervention: failure.recommendedIntervention
167
+ }
168
+ }, contracts, now(), "running");
169
+ loop = pushEvent(loop, {
170
+ type: "verification.completed",
171
+ lifecycleState: "verifying",
172
+ payload: {
173
+ attemptId,
174
+ passed: result.verification.passed,
175
+ summary: result.verification.summary
176
+ }
177
+ }, contracts, now(), "verifying");
178
+ decision = inferExit({
179
+ loop,
180
+ lastResult: result,
181
+ lastFailure: failure,
182
+ costState: postAttemptCost
183
+ });
184
+ if (decision.shouldExit) {
185
+ loop = finalizeLoop(loop, decision, contracts, now(), lifecycleStatus(decision));
186
+ finalContext = distillContext(loop);
187
+ return {
188
+ loop,
189
+ decision,
190
+ finalContext
191
+ };
192
+ }
193
+ }
194
+ decision = {
195
+ shouldExit: true,
196
+ lifecycleState: "budget_exit",
197
+ reason: "The run exhausted its iteration budget."
198
+ };
199
+ loop = finalizeLoop(loop, decision, contracts, now(), "exited");
200
+ finalContext = distillContext(loop);
201
+ return {
202
+ loop,
203
+ decision,
204
+ finalContext
205
+ };
206
+ }
207
+ function applyResult(loop, attemptId, result, completedAt) {
208
+ return {
209
+ ...loop,
210
+ attempts: loop.attempts.map((attempt) => attempt.attemptId === attemptId ? buildCompletedAttempt(attempt, result, completedAt) : attempt),
211
+ artifacts: [...loop.artifacts, ...(result.artifacts ?? [])],
212
+ cost: {
213
+ actualUsd: round(loop.cost.actualUsd + result.usage.actualUsd),
214
+ avoidedUsd: round(loop.cost.avoidedUsd + (result.usage.avoidedUsd ?? 0)),
215
+ tokensIn: loop.cost.tokensIn + result.usage.tokensIn,
216
+ tokensOut: loop.cost.tokensOut + result.usage.tokensOut
217
+ },
218
+ updatedAt: completedAt
219
+ };
220
+ }
221
+ function buildCompletedAttempt(attempt, result, completedAt) {
222
+ const nextAttempt = {
223
+ ...attempt,
224
+ completedAt,
225
+ summary: result.summary
226
+ };
227
+ if (result.failure?.classHint) {
228
+ nextAttempt.failureClass = result.failure.classHint;
229
+ }
230
+ return nextAttempt;
231
+ }
232
+ function annotateAttempt(loop, attemptId, failure) {
233
+ return {
234
+ ...loop,
235
+ attempts: loop.attempts.map((attempt) => {
236
+ if (attempt.attemptId !== attemptId) {
237
+ return attempt;
238
+ }
239
+ return {
240
+ ...attempt,
241
+ failureClass: failure.failureClass,
242
+ intervention: failure.recommendedIntervention
243
+ };
244
+ })
245
+ };
246
+ }
247
+ function finalizeLoop(loop, decision, contracts, timestamp, status) {
248
+ return pushEvent({
249
+ ...loop,
250
+ lifecycleState: decision.lifecycleState,
251
+ status,
252
+ updatedAt: timestamp
253
+ }, {
254
+ type: "run.completed",
255
+ lifecycleState: decision.lifecycleState,
256
+ payload: {
257
+ reason: decision.reason
258
+ }
259
+ }, contracts, timestamp, status);
260
+ }
261
+ function pushEvent(loop, event, contracts, timestamp, status) {
262
+ const next = appendLoopEvent(loop, {
263
+ ...event,
264
+ timestamp
265
+ }, {
266
+ ...contracts,
267
+ now: timestamp
268
+ });
269
+ return status ? { ...next, status, lifecycleState: event.lifecycleState ?? next.lifecycleState } : next;
270
+ }
271
+ function lifecycleStatus(decision) {
272
+ return decision.lifecycleState === "completed" ? "completed" : "exited";
273
+ }
274
+ function createContractOptions(idFactory) {
275
+ return idFactory ? { idFactory } : {};
276
+ }
277
+ function makeId(prefix, idFactory) {
278
+ if (idFactory) {
279
+ return idFactory(prefix);
280
+ }
281
+ const entropy = Math.random().toString(36).slice(2, 10);
282
+ return `${prefix}_${entropy}`;
283
+ }
284
+ function round(value) {
285
+ return Number(value.toFixed(4));
286
+ }
287
+ //# sourceMappingURL=run-martin.js.map
@@ -0,0 +1,62 @@
1
+ /**
2
+ * CVE Patch Scanner — Phase 37.
3
+ *
4
+ * Parses a unified diff for newly added package dependencies and queries the
5
+ * OSV.dev API (https://api.osv.dev) to check for known CVEs. Blocks the
6
+ * attempt if any discovered package has severity HIGH or CRITICAL.
7
+ *
8
+ * Supported manifest formats:
9
+ * - package.json (npm/Node.js) — ecosystem: "npm"
10
+ * - requirements.txt (Python) — ecosystem: "PyPI"
11
+ * - Cargo.toml (Rust) — ecosystem: "crates.io"
12
+ * - go.mod (Go) — ecosystem: "Go"
13
+ *
14
+ * Design rules:
15
+ * - Advisory when OSV.dev is unreachable (never hard-fail on network error)
16
+ * - Only checks ADDED lines (+ prefix) — not removed packages
17
+ * - Deduplicates package names before querying
18
+ * - MAX_PACKAGES_PER_SCAN = 20 to bound latency
19
+ */
20
+ export type CveSeverity = "LOW" | "MEDIUM" | "HIGH" | "CRITICAL" | "UNKNOWN";
21
+ export interface CveMatch {
22
+ packageName: string;
23
+ version?: string;
24
+ ecosystem: string;
25
+ vulnId: string;
26
+ summary: string;
27
+ severity: CveSeverity;
28
+ url: string;
29
+ }
30
+ export interface CveScanResult {
31
+ /** Newly added packages extracted from the diff. */
32
+ packageCandidates: PackageCandidate[];
33
+ /** CVEs found for any of the candidates. */
34
+ matches: CveMatch[];
35
+ /**
36
+ * True when any match has severity HIGH or CRITICAL.
37
+ * The caller should discard the attempt when this is true.
38
+ */
39
+ blocked: boolean;
40
+ /** Human-readable block reason. Undefined when not blocked. */
41
+ blockReason?: string;
42
+ /** True when OSV.dev was unreachable — scan ran in advisory-only mode. */
43
+ networkError?: boolean;
44
+ }
45
+ export interface PackageCandidate {
46
+ name: string;
47
+ version?: string;
48
+ ecosystem: string;
49
+ }
50
+ /**
51
+ * Extract newly added package dependencies from a unified diff string.
52
+ * Only examines added lines (starting with +) to avoid flagging removals.
53
+ */
54
+ export declare function extractPackageCandidates(diff: string): PackageCandidate[];
55
+ /**
56
+ * Scan a unified diff for new package dependencies and check them against
57
+ * the OSV.dev vulnerability database.
58
+ *
59
+ * Returns immediately (advisory mode) if OSV.dev is unreachable.
60
+ * Blocks the attempt if any package has severity HIGH or CRITICAL.
61
+ */
62
+ export declare function scanDiffForCves(diff: string): Promise<CveScanResult>;