martin-loop 0.1.5 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/CODE_OF_CONDUCT.md +32 -0
  2. package/LICENSE +21 -21
  3. package/README.md +307 -398
  4. package/demo/seeded-workspace/README.md +35 -35
  5. package/demo/seeded-workspace/TASKS.md +29 -29
  6. package/demo/seeded-workspace/martin.config.yaml +11 -11
  7. package/demo/seeded-workspace/package.json +8 -8
  8. package/demo/seeded-workspace/src/invoice-summary.js +11 -11
  9. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -20
  10. package/dist/bin/martin-loop.js +0 -0
  11. package/dist/vendor/adapters/counter.d.ts +1 -0
  12. package/dist/vendor/adapters/counter.js +4 -0
  13. package/dist/vendor/adapters/git-baseline.d.ts +50 -0
  14. package/dist/vendor/adapters/git-baseline.js +233 -0
  15. package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
  16. package/dist/vendor/adapters/openrouter-adapter.js +302 -0
  17. package/dist/vendor/adapters/usage.d.ts +48 -0
  18. package/dist/vendor/adapters/usage.js +66 -0
  19. package/dist/vendor/cli/bin/exit.d.ts +12 -0
  20. package/dist/vendor/cli/bin/exit.js +28 -0
  21. package/dist/vendor/cli/commands/analyze.d.ts +5 -0
  22. package/dist/vendor/cli/commands/analyze.js +58 -0
  23. package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
  24. package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
  25. package/dist/vendor/cli/commands/audit.d.ts +8 -0
  26. package/dist/vendor/cli/commands/audit.js +199 -0
  27. package/dist/vendor/cli/commands/corpus.d.ts +5 -0
  28. package/dist/vendor/cli/commands/corpus.js +60 -0
  29. package/dist/vendor/cli/commands/doctor.d.ts +8 -0
  30. package/dist/vendor/cli/commands/doctor.js +219 -0
  31. package/dist/vendor/cli/commands/explain.d.ts +17 -0
  32. package/dist/vendor/cli/commands/explain.js +176 -0
  33. package/dist/vendor/cli/commands/export.d.ts +5 -0
  34. package/dist/vendor/cli/commands/export.js +60 -0
  35. package/dist/vendor/cli/commands/governance.d.ts +8 -0
  36. package/dist/vendor/cli/commands/governance.js +95 -0
  37. package/dist/vendor/cli/commands/improve.d.ts +18 -0
  38. package/dist/vendor/cli/commands/improve.js +396 -0
  39. package/dist/vendor/cli/commands/init.d.ts +8 -0
  40. package/dist/vendor/cli/commands/init.js +281 -0
  41. package/dist/vendor/cli/commands/migration.d.ts +8 -0
  42. package/dist/vendor/cli/commands/migration.js +67 -0
  43. package/dist/vendor/cli/commands/prior.d.ts +23 -0
  44. package/dist/vendor/cli/commands/prior.js +145 -0
  45. package/dist/vendor/cli/commands/resume.d.ts +21 -0
  46. package/dist/vendor/cli/commands/resume.js +73 -0
  47. package/dist/vendor/cli/commands/verify.d.ts +6 -0
  48. package/dist/vendor/cli/commands/verify.js +43 -0
  49. package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
  50. package/dist/vendor/cli/research/public-corpus.js +151 -0
  51. package/dist/vendor/cli/ui/error-card.d.ts +38 -0
  52. package/dist/vendor/cli/ui/error-card.js +103 -0
  53. package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
  54. package/dist/vendor/cli/ui/mission-brief.js +173 -0
  55. package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
  56. package/dist/vendor/cli/ui/summary-card.js +102 -0
  57. package/dist/vendor/contracts/audit.d.ts +46 -0
  58. package/dist/vendor/contracts/audit.js +360 -0
  59. package/dist/vendor/contracts/post-phase15.d.ts +240 -0
  60. package/dist/vendor/contracts/post-phase15.js +166 -0
  61. package/dist/vendor/core/agent/mandates.d.ts +46 -0
  62. package/dist/vendor/core/agent/mandates.js +178 -0
  63. package/dist/vendor/core/agent/receipts.d.ts +38 -0
  64. package/dist/vendor/core/agent/receipts.js +131 -0
  65. package/dist/vendor/core/agent/signing.d.ts +17 -0
  66. package/dist/vendor/core/agent/signing.js +91 -0
  67. package/dist/vendor/core/attestation/sign.d.ts +25 -0
  68. package/dist/vendor/core/attestation/sign.js +216 -0
  69. package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
  70. package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
  71. package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
  72. package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
  73. package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
  74. package/dist/vendor/core/autonomy/envelope.js +27 -0
  75. package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
  76. package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
  77. package/dist/vendor/core/autonomy/resume.d.ts +15 -0
  78. package/dist/vendor/core/autonomy/resume.js +23 -0
  79. package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
  80. package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
  81. package/dist/vendor/core/context-distillation.d.ts +3 -0
  82. package/dist/vendor/core/context-distillation.js +44 -0
  83. package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
  84. package/dist/vendor/core/context-flow/compile-context.js +111 -0
  85. package/dist/vendor/core/context-flow/entities.d.ts +2 -0
  86. package/dist/vendor/core/context-flow/entities.js +44 -0
  87. package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
  88. package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
  89. package/dist/vendor/core/context-flow/index.d.ts +11 -0
  90. package/dist/vendor/core/context-flow/index.js +24 -0
  91. package/dist/vendor/core/context-flow/labels.d.ts +3 -0
  92. package/dist/vendor/core/context-flow/labels.js +17 -0
  93. package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
  94. package/dist/vendor/core/context-flow/normalizer.js +69 -0
  95. package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
  96. package/dist/vendor/core/context-flow/profiles.js +36 -0
  97. package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
  98. package/dist/vendor/core/context-flow/redaction.js +6 -0
  99. package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
  100. package/dist/vendor/core/context-flow/sensitivity.js +27 -0
  101. package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
  102. package/dist/vendor/core/context-flow/sync-preview.js +22 -0
  103. package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
  104. package/dist/vendor/core/context-flow/token-estimator.js +13 -0
  105. package/dist/vendor/core/context-flow/types.d.ts +91 -0
  106. package/dist/vendor/core/context-flow/types.js +2 -0
  107. package/dist/vendor/core/context-utility.d.ts +47 -0
  108. package/dist/vendor/core/context-utility.js +405 -0
  109. package/dist/vendor/core/cost/pipeline.d.ts +92 -0
  110. package/dist/vendor/core/cost/pipeline.js +141 -0
  111. package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
  112. package/dist/vendor/core/cost/tagged-cost.js +55 -0
  113. package/dist/vendor/core/cost-governor.d.ts +2 -0
  114. package/dist/vendor/core/cost-governor.js +50 -0
  115. package/dist/vendor/core/cve/cve-check.d.ts +80 -0
  116. package/dist/vendor/core/cve/cve-check.js +172 -0
  117. package/dist/vendor/core/digital-twin/index.d.ts +27 -0
  118. package/dist/vendor/core/digital-twin/index.js +90 -0
  119. package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
  120. package/dist/vendor/core/drift/drift-graph.js +100 -0
  121. package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
  122. package/dist/vendor/core/drift/objective-lock.js +88 -0
  123. package/dist/vendor/core/drift/scope.d.ts +46 -0
  124. package/dist/vendor/core/drift/scope.js +102 -0
  125. package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
  126. package/dist/vendor/core/drift/signature-lock.js +202 -0
  127. package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
  128. package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
  129. package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
  130. package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
  131. package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
  132. package/dist/vendor/core/evidence/claim-audit.js +89 -0
  133. package/dist/vendor/core/exit-intelligence.d.ts +2 -0
  134. package/dist/vendor/core/exit-intelligence.js +58 -0
  135. package/dist/vendor/core/explain/formatter.d.ts +42 -0
  136. package/dist/vendor/core/explain/formatter.js +171 -0
  137. package/dist/vendor/core/explain/timeline.d.ts +29 -0
  138. package/dist/vendor/core/explain/timeline.js +213 -0
  139. package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
  140. package/dist/vendor/core/failure-taxonomy.js +76 -0
  141. package/dist/vendor/core/gateway/index.d.ts +10 -0
  142. package/dist/vendor/core/gateway/index.js +12 -0
  143. package/dist/vendor/core/gateway/registry.d.ts +40 -0
  144. package/dist/vendor/core/gateway/registry.js +97 -0
  145. package/dist/vendor/core/gateway/transport.d.ts +31 -0
  146. package/dist/vendor/core/gateway/transport.js +82 -0
  147. package/dist/vendor/core/gateway/vault.d.ts +19 -0
  148. package/dist/vendor/core/gateway/vault.js +29 -0
  149. package/dist/vendor/core/graph/adapters.d.ts +43 -0
  150. package/dist/vendor/core/graph/adapters.js +91 -0
  151. package/dist/vendor/core/graph/hotspots.d.ts +22 -0
  152. package/dist/vendor/core/graph/hotspots.js +30 -0
  153. package/dist/vendor/core/graph/index.d.ts +1 -0
  154. package/dist/vendor/core/graph/index.js +2 -0
  155. package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
  156. package/dist/vendor/core/honey/honey-tokens.js +44 -0
  157. package/dist/vendor/core/index.d.ts +2 -2
  158. package/dist/vendor/core/index.js +38 -12
  159. package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
  160. package/dist/vendor/core/learning/bayesian-update.js +60 -0
  161. package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
  162. package/dist/vendor/core/learning/prior-sets.js +111 -0
  163. package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
  164. package/dist/vendor/core/learning/promotion-gate.js +23 -0
  165. package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
  166. package/dist/vendor/core/leash/blast-radius.js +156 -0
  167. package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
  168. package/dist/vendor/core/leash/policy-leash.js +117 -0
  169. package/dist/vendor/core/memo/memo.d.ts +63 -0
  170. package/dist/vendor/core/memo/memo.js +97 -0
  171. package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
  172. package/dist/vendor/core/memory/learning-pipeline.js +391 -0
  173. package/dist/vendor/core/memory/palace.d.ts +84 -0
  174. package/dist/vendor/core/memory/palace.js +379 -0
  175. package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
  176. package/dist/vendor/core/merge/ast-merge.js +350 -0
  177. package/dist/vendor/core/merge/text-merge.d.ts +12 -0
  178. package/dist/vendor/core/merge/text-merge.js +182 -0
  179. package/dist/vendor/core/otel/tracer.d.ts +45 -0
  180. package/dist/vendor/core/otel/tracer.js +116 -0
  181. package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
  182. package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
  183. package/dist/vendor/core/parallel/scorer.d.ts +24 -0
  184. package/dist/vendor/core/parallel/scorer.js +65 -0
  185. package/dist/vendor/core/pattern-detection.d.ts +64 -0
  186. package/dist/vendor/core/pattern-detection.js +108 -0
  187. package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
  188. package/dist/vendor/core/persistence/checkpoint.js +156 -0
  189. package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
  190. package/dist/vendor/core/persistence/cleanup.js +131 -0
  191. package/dist/vendor/core/persistence/index.d.ts +2 -0
  192. package/dist/vendor/core/persistence/index.js +1 -0
  193. package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
  194. package/dist/vendor/core/persistence/runs-reader.js +84 -0
  195. package/dist/vendor/core/persistence/store.d.ts +6 -1
  196. package/dist/vendor/core/persistence/store.js +5 -0
  197. package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
  198. package/dist/vendor/core/policy/file-touch-quota.js +105 -0
  199. package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
  200. package/dist/vendor/core/policy/policy-loader.js +170 -0
  201. package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
  202. package/dist/vendor/core/policy/policy-schema.js +78 -0
  203. package/dist/vendor/core/probe/probe.d.ts +49 -0
  204. package/dist/vendor/core/probe/probe.js +115 -0
  205. package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
  206. package/dist/vendor/core/proof/patch-proof.js +84 -0
  207. package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
  208. package/dist/vendor/core/proof/semantic-probe.js +82 -0
  209. package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
  210. package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
  211. package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
  212. package/dist/vendor/core/red-blue/red-phase.js +141 -0
  213. package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
  214. package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
  215. package/dist/vendor/core/replay/replay.d.ts +85 -0
  216. package/dist/vendor/core/replay/replay.js +109 -0
  217. package/dist/vendor/core/router/engine.d.ts +54 -0
  218. package/dist/vendor/core/router/engine.js +131 -0
  219. package/dist/vendor/core/router/index.d.ts +1 -0
  220. package/dist/vendor/core/router/index.js +2 -0
  221. package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
  222. package/dist/vendor/core/router/trust-calibration.js +127 -0
  223. package/dist/vendor/core/run-martin.d.ts +2 -0
  224. package/dist/vendor/core/run-martin.js +287 -0
  225. package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
  226. package/dist/vendor/core/security/cve-scanner.js +178 -0
  227. package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
  228. package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
  229. package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
  230. package/dist/vendor/core/sentinel/progress-guard.js +46 -0
  231. package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
  232. package/dist/vendor/core/siem/siem-emitter.js +157 -0
  233. package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
  234. package/dist/vendor/core/strategy/attempt-brief.js +89 -0
  235. package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
  236. package/dist/vendor/core/summarize/diff-summary.js +204 -0
  237. package/dist/vendor/core/surface-signals.d.ts +21 -0
  238. package/dist/vendor/core/surface-signals.js +139 -0
  239. package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
  240. package/dist/vendor/core/truth/truth-wall.js +69 -0
  241. package/dist/vendor/core/truth-spine.d.ts +26 -0
  242. package/dist/vendor/core/truth-spine.js +62 -0
  243. package/dist/vendor/core/types.d.ts +115 -0
  244. package/dist/vendor/core/types.js +2 -0
  245. package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
  246. package/dist/vendor/core/verification/tiered-verify.js +29 -0
  247. package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
  248. package/dist/vendor/core/verifier-pyramid.js +111 -0
  249. package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
  250. package/dist/vendor/core/workflow-artifacts.js +668 -0
  251. package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
  252. package/dist/vendor/core/wrap/supervised-run.js +178 -0
  253. package/docs/assets/cli-animated.svg +139 -0
  254. package/docs/assets/cli-static.svg +34 -0
  255. package/docs/assets/github-hero-v2.svg +23 -0
  256. package/docs/assets/martin-raplph.png.jpg +0 -0
  257. package/docs/assets/martinloop-logo.png +0 -0
  258. package/docs/assets/nvidia-inception-program-light.png +0 -0
  259. package/docs/assets/nvidia-inception-program.png +0 -0
  260. package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
  261. package/docs/assets/side-by-side.svg +134 -0
  262. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -142
  263. package/docs/oss/EXAMPLES.md +134 -134
  264. package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
  265. package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
  266. package/docs/oss/QUICKSTART.md +170 -165
  267. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -113
  268. package/docs/oss/README.md +96 -96
  269. package/docs/oss/RELEASE-SURFACE-REPORT.json +2 -1
  270. package/docs/oss/RELEASE-SURFACE-REPORT.md +2 -1
  271. package/package.json +130 -58
  272. package/docs/distribution/DIRECTORY-SUBMISSIONS.md +0 -89
  273. package/docs/distribution/INTEGRATION-OUTREACH.md +0 -61
  274. package/docs/distribution/UNDER-3-CHALLENGE.md +0 -65
@@ -0,0 +1,141 @@
1
+ import { PROBE_COUNTS, RED_PHASE_MODEL, resolveRedBudgetPolicy } from "./risk-tiers.js";
2
+ // ─── Programmatic probes ──────────────────────────────────────────────────────
3
+ /**
4
+ * Runs programmatic adversarial probes against a patch diff.
5
+ * These are deterministic checks — no model call.
6
+ */
7
+ function runProgrammaticProbes(patch, paranoid) {
8
+ const findings = [];
9
+ // Probe 1: assertion deletion
10
+ if (/^\-.*expect\(.*\)\.to/m.test(patch.diff)) {
11
+ findings.push({
12
+ trapId: "T01",
13
+ severity: "warn",
14
+ description: "Patch removes test assertions — possible weak-test false green."
15
+ });
16
+ }
17
+ // Probe 2: grounding evasion pragma
18
+ if (/@martin-ignore|@ts-nocheck|eslint-disable/i.test(patch.diff)) {
19
+ findings.push({
20
+ trapId: "T11",
21
+ severity: "block",
22
+ description: "Grounding evasion pragma detected in patch."
23
+ });
24
+ }
25
+ // Probe 3: context poisoning
26
+ if (/\.martin\/|\.claude\//.test(patch.diff)) {
27
+ findings.push({
28
+ trapId: "T07",
29
+ severity: "block",
30
+ description: "Patch writes to agent context directory (.martin/ or .claude/)."
31
+ });
32
+ }
33
+ // Probe 4: budget self-report
34
+ if (/budgetSource.*model_reported|model_reported.*budget/i.test(patch.diff)) {
35
+ findings.push({
36
+ trapId: "T10",
37
+ severity: "block",
38
+ description: "Patch introduces model-reported budget sourcing."
39
+ });
40
+ }
41
+ if (paranoid) {
42
+ // Probe 5: scope creep — manifest changes
43
+ if (/^\+.*"[^"]+"\s*:\s*"[^"]+".*$/m.test(patch.diff) &&
44
+ /package\.json|Cargo\.toml|go\.mod/i.test(patch.changedFiles.join(","))) {
45
+ findings.push({
46
+ trapId: "T03",
47
+ severity: "warn",
48
+ description: "Paranoid scan: substantive manifest change detected."
49
+ });
50
+ }
51
+ // Probe 6: silent revert — removal of recently added symbols
52
+ const removedExportPattern = /^\-.*export\s+(function|const|class)\s+\w+/m;
53
+ if (removedExportPattern.test(patch.diff)) {
54
+ findings.push({
55
+ trapId: "T02",
56
+ severity: "warn",
57
+ description: "Paranoid scan: exported symbol removed — potential silent revert."
58
+ });
59
+ }
60
+ }
61
+ return findings;
62
+ }
63
+ // ─── Red phase runner ─────────────────────────────────────────────────────────
64
+ /**
65
+ * Runs the Red phase for a given patch and risk tier.
66
+ *
67
+ * - baseline: programmatic probes only, no model call
68
+ * - high_risk: paranoid programmatic scan, no model call
69
+ * - release_critical: paranoid scan + one Haiku model call
70
+ */
71
+ export async function runRedPhase(patch, tier, blueBudgetUsd, options = {}) {
72
+ const policy = resolveRedBudgetPolicy(tier, blueBudgetUsd);
73
+ const paranoid = tier !== "baseline";
74
+ let findings = runProgrammaticProbes(patch, paranoid);
75
+ let modelCallMade = false;
76
+ let modelUsed;
77
+ let budgetUsedUsd = 0;
78
+ const probesRun = PROBE_COUNTS[tier];
79
+ if (policy.modelCallAllowed && options.modelClient) {
80
+ const prompt = buildRedPhasePrompt(patch, findings);
81
+ const result = await options.modelClient.complete(prompt);
82
+ findings = [...findings, ...result.findings];
83
+ modelCallMade = true;
84
+ modelUsed = RED_PHASE_MODEL;
85
+ budgetUsedUsd += result.costUsd;
86
+ }
87
+ const result = {
88
+ riskTier: tier,
89
+ probesRun,
90
+ findingsCount: findings.length,
91
+ findings,
92
+ modelCallMade,
93
+ ...(modelUsed !== undefined ? { modelUsed } : {}),
94
+ budgetUsedUsd
95
+ };
96
+ // Emit ledger event
97
+ options.onLedgerEvent?.({
98
+ type: "red_phase_findings",
99
+ riskTier: tier,
100
+ probesRun,
101
+ findingsCount: findings.length,
102
+ modelCallMade,
103
+ timestamp: new Date().toISOString()
104
+ });
105
+ return result;
106
+ }
107
+ // ─── Policy helpers ───────────────────────────────────────────────────────────
108
+ /**
109
+ * Returns true only if the findings contain zero block-severity entries.
110
+ * A single block finding rejects the patch regardless of warn count.
111
+ */
112
+ export function shouldAcceptPatch(findings) {
113
+ return !findings.findings.some((f) => f.severity === "block");
114
+ }
115
+ /**
116
+ * Convenience builder for RedFindings — useful in tests and policy engine.
117
+ */
118
+ export function buildRedFindings(input) {
119
+ return {
120
+ riskTier: input.riskTier,
121
+ probesRun: input.probesRun ?? PROBE_COUNTS[input.riskTier],
122
+ findingsCount: input.findings.length,
123
+ findings: input.findings,
124
+ modelCallMade: input.modelCallMade ?? false,
125
+ ...(input.modelUsed !== undefined ? { modelUsed: input.modelUsed } : {}),
126
+ budgetUsedUsd: input.budgetUsedUsd ?? 0
127
+ };
128
+ }
129
+ // ─── Internal helpers ─────────────────────────────────────────────────────────
130
+ function buildRedPhasePrompt(patch, existingFindings) {
131
+ return [
132
+ "You are a security-focused code reviewer running adversarial analysis.",
133
+ "Analyze this patch for adversarial patterns not caught by programmatic probes.",
134
+ "Return JSON array of findings with: trapId, severity (warn|block), description.",
135
+ `Already detected: ${JSON.stringify(existingFindings.map(f => f.trapId))}`,
136
+ `Changed files: ${patch.changedFiles.join(", ")}`,
137
+ "Diff (truncated to 2000 chars):",
138
+ patch.diff.slice(0, 2000)
139
+ ].join("\n");
140
+ }
141
+ //# sourceMappingURL=red-phase.js.map
@@ -0,0 +1,22 @@
1
+ export type RiskTier = "baseline" | "high_risk" | "release_critical";
2
+ export interface RedBudgetPolicy {
3
+ riskTier: RiskTier;
4
+ blueBudgetUsd: number;
5
+ /** Cap on Red phase spend: 30% / 100% / 150% of Blue */
6
+ redBudgetCapUsd: number;
7
+ /** Only release_critical permits a Haiku model call */
8
+ modelCallAllowed: boolean;
9
+ }
10
+ /**
11
+ * Returns the Red phase budget policy for a given risk tier and Blue budget.
12
+ */
13
+ export declare function resolveRedBudgetPolicy(tier: RiskTier, blueBudgetUsd: number): RedBudgetPolicy;
14
+ /**
15
+ * Probe counts per tier.
16
+ * baseline = standard 6-probe sweep
17
+ * high_risk = paranoid 12-probe sweep
18
+ * release_critical = paranoid 12-probe sweep + model
19
+ */
20
+ export declare const PROBE_COUNTS: Record<RiskTier, number>;
21
+ /** The only model ever permitted in the Red phase. */
22
+ export declare const RED_PHASE_MODEL: "claude-haiku-4-5-20251001";
@@ -0,0 +1,33 @@
1
+ // ─── Risk Tier Definitions ────────────────────────────────────────────────────
2
+ // Governs how aggressively Red phase probes a patch and whether a model call
3
+ // is permitted. Budget caps are expressed as fractions of the Blue phase budget.
4
+ const BUDGET_MULTIPLIERS = {
5
+ baseline: 0.30,
6
+ high_risk: 1.00,
7
+ release_critical: 1.50
8
+ };
9
+ /**
10
+ * Returns the Red phase budget policy for a given risk tier and Blue budget.
11
+ */
12
+ export function resolveRedBudgetPolicy(tier, blueBudgetUsd) {
13
+ return {
14
+ riskTier: tier,
15
+ blueBudgetUsd,
16
+ redBudgetCapUsd: blueBudgetUsd * BUDGET_MULTIPLIERS[tier],
17
+ modelCallAllowed: tier === "release_critical"
18
+ };
19
+ }
20
+ /**
21
+ * Probe counts per tier.
22
+ * baseline = standard 6-probe sweep
23
+ * high_risk = paranoid 12-probe sweep
24
+ * release_critical = paranoid 12-probe sweep + model
25
+ */
26
+ export const PROBE_COUNTS = {
27
+ baseline: 6,
28
+ high_risk: 12,
29
+ release_critical: 12
30
+ };
31
+ /** The only model ever permitted in the Red phase. */
32
+ export const RED_PHASE_MODEL = "claude-haiku-4-5-20251001";
33
+ //# sourceMappingURL=risk-tiers.js.map
@@ -0,0 +1,85 @@
1
+ /**
2
+ * replay.ts — SLICE-10
3
+ *
4
+ * Reproducibility seal: re-runs the decision pipeline over stored attempt
5
+ * artifacts (no model calls) and confirms every gate decision still matches.
6
+ *
7
+ * Any deterministic mismatch is a P0 defect in the decision pipeline.
8
+ */
9
+ export interface StoredAttemptArtifact {
10
+ attemptId: string;
11
+ loopId: string;
12
+ /** The diff that was evaluated */
13
+ diff: string;
14
+ /** The objective at time of evaluation */
15
+ objective: string;
16
+ /** Decisions recorded during the original run */
17
+ decisions: {
18
+ leash: {
19
+ blocked: boolean;
20
+ matchedPattern?: string;
21
+ };
22
+ grounding: {
23
+ contradictions: number;
24
+ };
25
+ proof: {
26
+ grade: "A" | "B" | "C";
27
+ passed: boolean;
28
+ };
29
+ finalVerdict: "ACCEPTED" | "REJECTED";
30
+ };
31
+ /** ISO timestamp of original run */
32
+ recordedAt: string;
33
+ }
34
+ export interface ReplayDecisions {
35
+ leash: {
36
+ blocked: boolean;
37
+ matchedPattern?: string;
38
+ };
39
+ grounding: {
40
+ contradictions: number;
41
+ };
42
+ proof: {
43
+ grade: "A" | "B" | "C";
44
+ passed: boolean;
45
+ };
46
+ finalVerdict: "ACCEPTED" | "REJECTED";
47
+ }
48
+ export interface ReplayMismatch {
49
+ gate: "leash" | "grounding" | "proof" | "finalVerdict";
50
+ original: unknown;
51
+ replayed: unknown;
52
+ severity: "P0" | "P1";
53
+ }
54
+ export interface ReplayReport {
55
+ loopId: string;
56
+ attemptId: string;
57
+ match: boolean;
58
+ mismatches: ReplayMismatch[];
59
+ replayedAt: string;
60
+ /** SHA-256 over (loopId + attemptId + replayedAt + match) */
61
+ replayHash: string;
62
+ }
63
+ /**
64
+ * Re-runs the leash check over a stored diff.
65
+ * Uses the same BLOCKED_PATTERNS from leash.ts — but as a lightweight
66
+ * re-implementation to avoid circular deps in the replay layer.
67
+ */
68
+ export declare function replayLeashCheck(diff: string): {
69
+ blocked: boolean;
70
+ matchedPattern?: string;
71
+ };
72
+ /**
73
+ * Re-evaluates the proof grade from stored inputs.
74
+ * Grade is deterministic from the verifier result string.
75
+ */
76
+ export declare function replayProofGrade(verifierResult: string, objective: string): {
77
+ grade: "A" | "B" | "C";
78
+ passed: boolean;
79
+ };
80
+ export declare function replayAttempt(artifact: StoredAttemptArtifact, verifierResult?: string): ReplayReport;
81
+ export declare function replayLoop(artifacts: StoredAttemptArtifact[], verifierResults?: Map<string, string>): {
82
+ reports: ReplayReport[];
83
+ allMatch: boolean;
84
+ p0Count: number;
85
+ };
@@ -0,0 +1,109 @@
1
+ /**
2
+ * replay.ts — SLICE-10
3
+ *
4
+ * Reproducibility seal: re-runs the decision pipeline over stored attempt
5
+ * artifacts (no model calls) and confirms every gate decision still matches.
6
+ *
7
+ * Any deterministic mismatch is a P0 defect in the decision pipeline.
8
+ */
9
+ import { createHash } from "node:crypto";
10
+ // ---------------------------------------------------------------------------
11
+ // Pipeline re-runners (deterministic, no model calls)
12
+ // ---------------------------------------------------------------------------
13
+ /**
14
+ * Re-runs the leash check over a stored diff.
15
+ * Uses the same BLOCKED_PATTERNS from leash.ts — but as a lightweight
16
+ * re-implementation to avoid circular deps in the replay layer.
17
+ */
18
+ export function replayLeashCheck(diff) {
19
+ const DANGEROUS_PATTERNS = [
20
+ { name: "SHELL_RM_RF", re: /rm\s+-rf?\b/ },
21
+ { name: "SHELL_PIPE_EVAL", re: /curl.*\|\s*(?:ba)?sh|wget.*\|\s*(?:ba)?sh/ },
22
+ { name: "GIT_FORCE_PUSH", re: /git\s+push\s+.*--force/ },
23
+ { name: "GIT_RESET_HARD", re: /git\s+reset\s+--hard/ },
24
+ { name: "FORK_BOMB", re: /:\s*\(\s*\)\s*\{.*:.*\|.*:.*\}/ },
25
+ { name: "SHELL_CHMOD_777", re: /chmod\s+(?:a\+rwx|777)/ },
26
+ { name: "NODE_EXEC_EVAL", re: /eval\s*\(/ },
27
+ { name: "SUDO_ESCALATION", re: /sudo\s+/ },
28
+ ];
29
+ for (const { name, re } of DANGEROUS_PATTERNS) {
30
+ if (re.test(diff))
31
+ return { blocked: true, matchedPattern: name };
32
+ }
33
+ return { blocked: false };
34
+ }
35
+ /**
36
+ * Re-evaluates the proof grade from stored inputs.
37
+ * Grade is deterministic from the verifier result string.
38
+ */
39
+ export function replayProofGrade(verifierResult, objective) {
40
+ const lower = verifierResult.toLowerCase();
41
+ const hasPass = lower.includes("pass") || lower.includes("ok") || lower.includes("✓");
42
+ const hasFail = lower.includes("fail") || lower.includes("error") || lower.includes("✗");
43
+ if (hasPass && !hasFail)
44
+ return { grade: "A", passed: true };
45
+ if (hasPass && hasFail)
46
+ return { grade: "B", passed: false };
47
+ return { grade: "C", passed: false };
48
+ }
49
+ // ---------------------------------------------------------------------------
50
+ // Core replay function
51
+ // ---------------------------------------------------------------------------
52
+ export function replayAttempt(artifact, verifierResult) {
53
+ const replayedAt = new Date().toISOString();
54
+ const mismatches = [];
55
+ // Re-run leash
56
+ const replayLeash = replayLeashCheck(artifact.diff);
57
+ if (replayLeash.blocked !== artifact.decisions.leash.blocked) {
58
+ mismatches.push({
59
+ gate: "leash",
60
+ original: artifact.decisions.leash,
61
+ replayed: replayLeash,
62
+ severity: "P0"
63
+ });
64
+ }
65
+ // Re-run proof grade (if verifier result provided)
66
+ if (verifierResult !== undefined) {
67
+ const replayProof = replayProofGrade(verifierResult, artifact.objective);
68
+ if (replayProof.grade !== artifact.decisions.proof.grade) {
69
+ mismatches.push({
70
+ gate: "proof",
71
+ original: artifact.decisions.proof,
72
+ replayed: replayProof,
73
+ severity: "P0"
74
+ });
75
+ }
76
+ }
77
+ // Final verdict: derive from replayed decisions
78
+ const replayedVerdict = (!replayLeash.blocked && (verifierResult === undefined || artifact.decisions.proof.passed)) ? "ACCEPTED" : "REJECTED";
79
+ if (replayedVerdict !== artifact.decisions.finalVerdict) {
80
+ mismatches.push({
81
+ gate: "finalVerdict",
82
+ original: artifact.decisions.finalVerdict,
83
+ replayed: replayedVerdict,
84
+ severity: "P0"
85
+ });
86
+ }
87
+ const match = mismatches.length === 0;
88
+ const replayHash = createHash("sha256")
89
+ .update(`${artifact.loopId}:${artifact.attemptId}:${replayedAt}:${match}`)
90
+ .digest("hex");
91
+ return {
92
+ loopId: artifact.loopId,
93
+ attemptId: artifact.attemptId,
94
+ match,
95
+ mismatches,
96
+ replayedAt,
97
+ replayHash
98
+ };
99
+ }
100
+ // ---------------------------------------------------------------------------
101
+ // Replay runner: multiple attempts
102
+ // ---------------------------------------------------------------------------
103
+ export function replayLoop(artifacts, verifierResults) {
104
+ const reports = artifacts.map(a => replayAttempt(a, verifierResults?.get(a.attemptId)));
105
+ const allMatch = reports.every(r => r.match);
106
+ const p0Count = reports.reduce((n, r) => n + r.mismatches.filter(m => m.severity === "P0").length, 0);
107
+ return { reports, allMatch, p0Count };
108
+ }
109
+ //# sourceMappingURL=replay.js.map
@@ -0,0 +1,54 @@
1
+ import type { LoopTask, FailureClass } from "../../contracts/index.js";
2
+ import type { CostGovernorState } from "../policy.js";
3
+ import { type ModelTrustProfile } from "./trust-calibration.js";
4
+ /**
5
+ * Minimal structural interface for what the router requires from an adapter.
6
+ * Any MartinAdapter from @martin/core is structurally compatible with this.
7
+ */
8
+ export interface RouterAdapterRef {
9
+ metadata: {
10
+ model: string;
11
+ provider?: string;
12
+ };
13
+ }
14
+ export interface RouteConfig {
15
+ adapter: RouterAdapterRef;
16
+ baseCostUsdPer1kTcs: number;
17
+ maxLatencyMs?: number;
18
+ trustTier: "high" | "medium" | "low";
19
+ }
20
+ export interface RouteEvaluationContext {
21
+ task: LoopTask;
22
+ costState: CostGovernorState;
23
+ currentFailure?: FailureClass;
24
+ complexityScore: number;
25
+ /**
26
+ * Estimated blast radius of the planned action on a 0–100 scale.
27
+ * Exposed in run summary and OTel span.
28
+ * When > 70, forces high-trust route regardless of other heuristics.
29
+ */
30
+ blastRadius?: number;
31
+ /**
32
+ * Trust profiles derived from historical run data by the Trust Calibration Engine.
33
+ * When present, the router uses these to auto-downgrade to cheaper models that
34
+ * have proven reliability and to deprioritize models with poor track records.
35
+ */
36
+ trustProfiles?: ModelTrustProfile[];
37
+ }
38
+ export interface RouteDecision {
39
+ adapter: RouterAdapterRef;
40
+ rationale: string;
41
+ /** Estimated cost per 1k tokens for the selected route — exposed in run summary and OTel span */
42
+ selectedCostPer1kTcs: number;
43
+ /** Trust tier of the selected route */
44
+ selectedTrustTier: "high" | "medium" | "low";
45
+ }
46
+ export declare class MartinRouter {
47
+ private readonly availableRoutes;
48
+ constructor(availableRoutes: RouteConfig[]);
49
+ /**
50
+ * Dynamically selects the optimal provider/model adapter for the next attempt.
51
+ * Balances the necessity for intelligence (trustTier, complexity) against budget constraints.
52
+ */
53
+ evaluateRoute(context: RouteEvaluationContext): RouteDecision;
54
+ }
@@ -0,0 +1,131 @@
1
+ import { getTracer } from "../otel/tracer.js";
2
+ import { shouldDeprioritize } from "./trust-calibration.js";
3
+ export class MartinRouter {
4
+ availableRoutes;
5
+ constructor(availableRoutes) {
6
+ this.availableRoutes = availableRoutes;
7
+ }
8
+ /**
9
+ * Dynamically selects the optimal provider/model adapter for the next attempt.
10
+ * Balances the necessity for intelligence (trustTier, complexity) against budget constraints.
11
+ */
12
+ evaluateRoute(context) {
13
+ if (this.availableRoutes.length === 0) {
14
+ throw new Error("MartinRouter has no available routes configured.");
15
+ }
16
+ const { costState, currentFailure, complexityScore, blastRadius, trustProfiles } = context;
17
+ const tracer = getTracer();
18
+ const span = tracer.startSpan("martin.router_decision", {
19
+ "router.pressure": costState.pressure,
20
+ "router.remaining_budget_usd": costState.remainingBudgetUsd,
21
+ "router.complexity_score": complexityScore,
22
+ ...(currentFailure ? { "router.failure_class": currentFailure } : {}),
23
+ ...(blastRadius !== undefined ? { "router.blast_radius": blastRadius } : {})
24
+ });
25
+ const emit = (selected, rationale) => {
26
+ span.attributes["router.selected_model"] = selected.adapter.metadata.model;
27
+ span.attributes["router.selected_trust_tier"] = selected.trustTier;
28
+ span.attributes["router.selected_cost_per_1k_tcs"] = selected.baseCostUsdPer1kTcs;
29
+ span.attributes["router.rationale"] = rationale;
30
+ tracer.endSpan(span, "OK");
31
+ return {
32
+ adapter: selected.adapter,
33
+ rationale,
34
+ selectedCostPer1kTcs: selected.baseCostUsdPer1kTcs,
35
+ selectedTrustTier: selected.trustTier
36
+ };
37
+ };
38
+ // Filter out adapters that would aggressively violate remaining budget estimations
39
+ const budgetFilteredRoutes = this.availableRoutes.filter((route) => {
40
+ const estimatedCost = route.baseCostUsdPer1kTcs * 15;
41
+ return estimatedCost <= costState.remainingBudgetUsd;
42
+ });
43
+ const routes = budgetFilteredRoutes.length > 0 ? budgetFilteredRoutes : this.availableRoutes;
44
+ // High blast radius: force a high-trust route to reduce regression risk
45
+ if (blastRadius !== undefined && blastRadius > 70) {
46
+ const highTrust = routes.filter((r) => r.trustTier === "high");
47
+ if (highTrust.length > 0) {
48
+ const safest = highTrust.reduce((min, r) => r.baseCostUsdPer1kTcs < min.baseCostUsdPer1kTcs ? r : min);
49
+ return emit(safest, `Forced high-trust route ${safest.adapter.metadata.model} due to blast radius ${blastRadius}/100.`);
50
+ }
51
+ }
52
+ // Trust calibration: deprioritize models with proven poor track records
53
+ // and prefer cheaper models with proven high efficiency when data warrants it
54
+ let calibratedRoutes = routes;
55
+ if (trustProfiles && trustProfiles.length > 0) {
56
+ // Remove routes whose model has been deprioritized by evidence
57
+ const evidenceBacked = routes.filter((r) => {
58
+ const profile = trustProfiles.find((p) => profileMatchesRoute(p, r));
59
+ return profile === undefined || !shouldDeprioritize(profile);
60
+ });
61
+ if (evidenceBacked.length > 0)
62
+ calibratedRoutes = evidenceBacked;
63
+ const trustedRoutes = calibratedRoutes
64
+ .map((route) => ({
65
+ route,
66
+ profile: trustProfiles.find((profile) => profileMatchesRoute(profile, route))
67
+ }))
68
+ .filter((item) => {
69
+ const profile = item.profile;
70
+ return (profile !== undefined &&
71
+ profile.efficiencyScore > 0.85 &&
72
+ profile.runsObserved >= 3);
73
+ })
74
+ .sort((a, b) => a.route.baseCostUsdPer1kTcs - b.route.baseCostUsdPer1kTcs);
75
+ const trustedRoute = trustedRoutes[0];
76
+ const defaultRoute = routes[0];
77
+ if (trustedRoute &&
78
+ defaultRoute &&
79
+ trustedRoute.route.baseCostUsdPer1kTcs < defaultRoute.baseCostUsdPer1kTcs) {
80
+ return emit(trustedRoute.route, `Auto-selected ${trustedRoute.route.adapter.metadata.model} based on ${String(trustedRoute.profile.runsObserved)} historical runs (efficiency: ${String(Math.round(trustedRoute.profile.efficiencyScore * 100))}%, completion: ${String(Math.round(trustedRoute.profile.completionRate * 100))}%).`);
81
+ }
82
+ }
83
+ const effectiveRoutes = calibratedRoutes;
84
+ // Default to cheapest route when budget is at the hard ceiling
85
+ if (costState.pressure === "hard_limit") {
86
+ const cheapest = effectiveRoutes.reduce((min, r) => r.baseCostUsdPer1kTcs < min.baseCostUsdPer1kTcs ? r : min);
87
+ return emit(cheapest, `Selected ${cheapest.adapter.metadata.model} due to severe budget pressure (hard_limit).`);
88
+ }
89
+ // If we're failing on reasoning or grounding, escalate to a high-trust model immediately
90
+ if (currentFailure === "verification_failure" ||
91
+ currentFailure === "repo_grounding_failure" ||
92
+ complexityScore > 0.7) {
93
+ const highTrust = effectiveRoutes.filter((r) => r.trustTier === "high");
94
+ if (highTrust.length > 0) {
95
+ const best = highTrust.reduce((min, r) => r.baseCostUsdPer1kTcs < min.baseCostUsdPer1kTcs ? r : min);
96
+ return emit(best, `Escalated to ${best.adapter.metadata.model} (high-trust) due to failure profile '${currentFailure ?? "complex_task"}'.`);
97
+ }
98
+ }
99
+ // Prefer economical route for simple or trivially-classified errors
100
+ if (currentFailure === "syntax_error" ||
101
+ currentFailure === "repo_grounding_failure" ||
102
+ complexityScore < 0.3) {
103
+ const economical = effectiveRoutes.filter((r) => r.trustTier !== "high");
104
+ if (economical.length > 0) {
105
+ const best = economical.reduce((min, r) => r.baseCostUsdPer1kTcs < min.baseCostUsdPer1kTcs ? r : min);
106
+ return emit(best, `Selected ${best.adapter.metadata.model} to preserve budget on low-complexity task recovery.`);
107
+ }
108
+ }
109
+ // Default to a balanced medium-tier route if no strict heuristic applies
110
+ const mediumTrust = effectiveRoutes.filter((r) => r.trustTier === "medium");
111
+ if (mediumTrust.length > 0) {
112
+ const best = mediumTrust.reduce((min, r) => Math.abs(r.maxLatencyMs ?? 5000) < Math.abs(min.maxLatencyMs ?? 5000) ? r : min);
113
+ return emit(best, `Selected balanced route ${best.adapter.metadata.model} for nominal execution.`);
114
+ }
115
+ // Ultimate fallback — effectiveRoutes is non-empty (guaranteed by the guard above)
116
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
117
+ const bestRoute = effectiveRoutes[0];
118
+ return emit(bestRoute, `Fallback route ${bestRoute.adapter.metadata.model} selected as default.`);
119
+ }
120
+ }
121
+ function profileMatchesRoute(profile, route) {
122
+ const model = normalizeModelKey(route.adapter.metadata.model);
123
+ const profileModel = normalizeModelKey(profile.model);
124
+ return (model === profileModel ||
125
+ model.includes(profileModel) ||
126
+ profileModel.includes(model));
127
+ }
128
+ function normalizeModelKey(value) {
129
+ return value.toLowerCase().replace(/[^a-z0-9]+/g, "-");
130
+ }
131
+ //# sourceMappingURL=engine.js.map
@@ -0,0 +1 @@
1
+ export * from "./engine.js";
@@ -0,0 +1,2 @@
1
+ export * from "./engine.js";
2
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Trust Calibration Engine — the self-improvement loop.
3
+ *
4
+ * Reads historical run records from ~/.martin/runs/ and computes a reliability
5
+ * profile for each model that has been used. The router uses these profiles to
6
+ * automatically downgrade to cheaper models when evidence shows they perform
7
+ * as well as more expensive ones, and to deprioritize models with poor track records.
8
+ *
9
+ * This closes the feedback loop that was missing: every completed run writes
10
+ * evidence to disk; this module reads it back into routing decisions.
11
+ */
12
+ export interface ModelTrustProfile {
13
+ /** Model identifier as recorded in attempt records (e.g. "claude-sonnet-4-6") */
14
+ model: string;
15
+ /** Total runs where this model was used for at least one attempt */
16
+ runsObserved: number;
17
+ /** Fraction of observed runs that completed successfully (0–1) */
18
+ completionRate: number;
19
+ /** Average USD cost per iteration (attempt) */
20
+ avgCostPerIteration: number;
21
+ /** Average iterations used vs budget.maxIterations (lower = more efficient) */
22
+ avgIterationEfficiency: number;
23
+ /**
24
+ * Composite score 0–1: completionRate * (1 - avgIterationEfficiency).
25
+ * High score = completes well AND uses fewer iterations than the budget allows.
26
+ */
27
+ efficiencyScore: number;
28
+ /** ISO timestamp of the most recent run that informed this profile */
29
+ lastUpdated: string;
30
+ }
31
+ export interface TrustCalibrationResult {
32
+ /** Per-model reliability profiles, sorted by efficiencyScore descending */
33
+ profiles: ModelTrustProfile[];
34
+ /**
35
+ * The model with the best efficiencyScore that also meets minRuns threshold.
36
+ * Null if insufficient data exists yet.
37
+ */
38
+ recommendedModel: string | null;
39
+ /** Total number of runs analyzed to produce this result */
40
+ calibrationBasis: number;
41
+ }
42
+ /**
43
+ * Reads historical loop records and computes a trust profile for each model.
44
+ *
45
+ * @param runsDir - Override the default ~/.martin/runs path (useful for testing)
46
+ * @param minRuns - Minimum observations required before a profile is considered
47
+ * reliable enough to influence routing. Default: 3.
48
+ * @param efficiencyThreshold - Minimum efficiencyScore for a model to be
49
+ * eligible for auto-recommendation. Default: 0.75.
50
+ */
51
+ export declare function calibrateTrust(runsDir?: string, minRuns?: number, efficiencyThreshold?: number): Promise<TrustCalibrationResult>;
52
+ /**
53
+ * Returns true if a model should be deprioritized based on its trust profile.
54
+ * A model is deprioritized when it has enough observations to be confident
55
+ * it performs poorly (low completion rate).
56
+ */
57
+ export declare function shouldDeprioritize(profile: ModelTrustProfile, minRuns?: number, minCompletionRate?: number): boolean;