npm - martin-loop - Versions diffs - 0.1.4 → 1.3.0 - Mend

martin-loop 0.1.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (286) hide show

package/CODE_OF_CONDUCT.md +32 -0
package/README.md +172 -227
package/demo/seeded-workspace/README.md +35 -0
package/demo/seeded-workspace/TASKS.md +29 -0
package/demo/seeded-workspace/martin.config.yaml +11 -0
package/demo/seeded-workspace/package.json +8 -0
package/demo/seeded-workspace/src/invoice-summary.js +11 -0
package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
package/dist/bin/martin-loop.js +0 -0
package/dist/vendor/adapters/claude-cli.d.ts +19 -4
package/dist/vendor/adapters/claude-cli.js +55 -24
package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
package/dist/vendor/adapters/cli-bridge.js +154 -28
package/dist/vendor/adapters/counter.d.ts +1 -0
package/dist/vendor/adapters/counter.js +4 -0
package/dist/vendor/adapters/git-baseline.d.ts +50 -0
package/dist/vendor/adapters/git-baseline.js +233 -0
package/dist/vendor/adapters/index.d.ts +1 -0
package/dist/vendor/adapters/index.js +1 -0
package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
package/dist/vendor/adapters/openrouter-adapter.js +302 -0
package/dist/vendor/adapters/usage.d.ts +48 -0
package/dist/vendor/adapters/usage.js +66 -0
package/dist/vendor/adapters/verifier-only.d.ts +7 -0
package/dist/vendor/adapters/verifier-only.js +57 -0
package/dist/vendor/cli/bin/exit.d.ts +12 -0
package/dist/vendor/cli/bin/exit.js +28 -0
package/dist/vendor/cli/commands/analyze.d.ts +5 -0
package/dist/vendor/cli/commands/analyze.js +58 -0
package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
package/dist/vendor/cli/commands/audit.d.ts +8 -0
package/dist/vendor/cli/commands/audit.js +199 -0
package/dist/vendor/cli/commands/corpus.d.ts +5 -0
package/dist/vendor/cli/commands/corpus.js +60 -0
package/dist/vendor/cli/commands/doctor.d.ts +8 -0
package/dist/vendor/cli/commands/doctor.js +219 -0
package/dist/vendor/cli/commands/explain.d.ts +17 -0
package/dist/vendor/cli/commands/explain.js +176 -0
package/dist/vendor/cli/commands/export.d.ts +5 -0
package/dist/vendor/cli/commands/export.js +60 -0
package/dist/vendor/cli/commands/governance.d.ts +8 -0
package/dist/vendor/cli/commands/governance.js +95 -0
package/dist/vendor/cli/commands/improve.d.ts +18 -0
package/dist/vendor/cli/commands/improve.js +396 -0
package/dist/vendor/cli/commands/init.d.ts +8 -0
package/dist/vendor/cli/commands/init.js +281 -0
package/dist/vendor/cli/commands/migration.d.ts +8 -0
package/dist/vendor/cli/commands/migration.js +67 -0
package/dist/vendor/cli/commands/prior.d.ts +23 -0
package/dist/vendor/cli/commands/prior.js +145 -0
package/dist/vendor/cli/commands/resume.d.ts +21 -0
package/dist/vendor/cli/commands/resume.js +73 -0
package/dist/vendor/cli/commands/verify.d.ts +6 -0
package/dist/vendor/cli/commands/verify.js +43 -0
package/dist/vendor/cli/index.d.ts +6 -1
package/dist/vendor/cli/index.js +124 -7
package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
package/dist/vendor/cli/research/public-corpus.js +151 -0
package/dist/vendor/cli/ui/error-card.d.ts +38 -0
package/dist/vendor/cli/ui/error-card.js +103 -0
package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
package/dist/vendor/cli/ui/mission-brief.js +173 -0
package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
package/dist/vendor/cli/ui/summary-card.js +102 -0
package/dist/vendor/contracts/audit.d.ts +46 -0
package/dist/vendor/contracts/audit.js +360 -0
package/dist/vendor/contracts/index.d.ts +3 -1
package/dist/vendor/contracts/post-phase15.d.ts +240 -0
package/dist/vendor/contracts/post-phase15.js +166 -0
package/dist/vendor/core/agent/mandates.d.ts +46 -0
package/dist/vendor/core/agent/mandates.js +178 -0
package/dist/vendor/core/agent/receipts.d.ts +38 -0
package/dist/vendor/core/agent/receipts.js +131 -0
package/dist/vendor/core/agent/signing.d.ts +17 -0
package/dist/vendor/core/agent/signing.js +91 -0
package/dist/vendor/core/attestation/sign.d.ts +25 -0
package/dist/vendor/core/attestation/sign.js +216 -0
package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
package/dist/vendor/core/autonomy/envelope.js +27 -0
package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
package/dist/vendor/core/autonomy/resume.d.ts +15 -0
package/dist/vendor/core/autonomy/resume.js +23 -0
package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
package/dist/vendor/core/compiler.d.ts +2 -0
package/dist/vendor/core/compiler.js +10 -4
package/dist/vendor/core/context-distillation.d.ts +3 -0
package/dist/vendor/core/context-distillation.js +44 -0
package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
package/dist/vendor/core/context-flow/compile-context.js +111 -0
package/dist/vendor/core/context-flow/entities.d.ts +2 -0
package/dist/vendor/core/context-flow/entities.js +44 -0
package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
package/dist/vendor/core/context-flow/index.d.ts +11 -0
package/dist/vendor/core/context-flow/index.js +24 -0
package/dist/vendor/core/context-flow/labels.d.ts +3 -0
package/dist/vendor/core/context-flow/labels.js +17 -0
package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
package/dist/vendor/core/context-flow/normalizer.js +69 -0
package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
package/dist/vendor/core/context-flow/profiles.js +36 -0
package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
package/dist/vendor/core/context-flow/redaction.js +6 -0
package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
package/dist/vendor/core/context-flow/sensitivity.js +27 -0
package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
package/dist/vendor/core/context-flow/sync-preview.js +22 -0
package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
package/dist/vendor/core/context-flow/token-estimator.js +13 -0
package/dist/vendor/core/context-flow/types.d.ts +91 -0
package/dist/vendor/core/context-flow/types.js +2 -0
package/dist/vendor/core/context-integrity.d.ts +26 -0
package/dist/vendor/core/context-integrity.js +56 -0
package/dist/vendor/core/context-utility.d.ts +47 -0
package/dist/vendor/core/context-utility.js +405 -0
package/dist/vendor/core/cost/pipeline.d.ts +92 -0
package/dist/vendor/core/cost/pipeline.js +141 -0
package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
package/dist/vendor/core/cost/tagged-cost.js +55 -0
package/dist/vendor/core/cost-governor.d.ts +2 -0
package/dist/vendor/core/cost-governor.js +50 -0
package/dist/vendor/core/cve/cve-check.d.ts +80 -0
package/dist/vendor/core/cve/cve-check.js +172 -0
package/dist/vendor/core/digital-twin/index.d.ts +27 -0
package/dist/vendor/core/digital-twin/index.js +90 -0
package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
package/dist/vendor/core/drift/drift-graph.js +100 -0
package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
package/dist/vendor/core/drift/objective-lock.js +88 -0
package/dist/vendor/core/drift/scope.d.ts +46 -0
package/dist/vendor/core/drift/scope.js +102 -0
package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
package/dist/vendor/core/drift/signature-lock.js +202 -0
package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
package/dist/vendor/core/evidence/claim-audit.js +89 -0
package/dist/vendor/core/exit-intelligence.d.ts +2 -0
package/dist/vendor/core/exit-intelligence.js +58 -0
package/dist/vendor/core/explain/formatter.d.ts +42 -0
package/dist/vendor/core/explain/formatter.js +171 -0
package/dist/vendor/core/explain/timeline.d.ts +29 -0
package/dist/vendor/core/explain/timeline.js +213 -0
package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
package/dist/vendor/core/failure-taxonomy.js +76 -0
package/dist/vendor/core/gateway/index.d.ts +10 -0
package/dist/vendor/core/gateway/index.js +12 -0
package/dist/vendor/core/gateway/registry.d.ts +40 -0
package/dist/vendor/core/gateway/registry.js +97 -0
package/dist/vendor/core/gateway/transport.d.ts +31 -0
package/dist/vendor/core/gateway/transport.js +82 -0
package/dist/vendor/core/gateway/vault.d.ts +19 -0
package/dist/vendor/core/gateway/vault.js +29 -0
package/dist/vendor/core/graph/adapters.d.ts +43 -0
package/dist/vendor/core/graph/adapters.js +91 -0
package/dist/vendor/core/graph/hotspots.d.ts +22 -0
package/dist/vendor/core/graph/hotspots.js +30 -0
package/dist/vendor/core/graph/index.d.ts +1 -0
package/dist/vendor/core/graph/index.js +2 -0
package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
package/dist/vendor/core/honey/honey-tokens.js +44 -0
package/dist/vendor/core/index.d.ts +7 -4
package/dist/vendor/core/index.js +222 -64
package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
package/dist/vendor/core/learning/bayesian-update.js +60 -0
package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
package/dist/vendor/core/learning/prior-sets.js +111 -0
package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
package/dist/vendor/core/learning/promotion-gate.js +23 -0
package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
package/dist/vendor/core/leash/blast-radius.js +156 -0
package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
package/dist/vendor/core/leash/policy-leash.js +117 -0
package/dist/vendor/core/memo/memo.d.ts +63 -0
package/dist/vendor/core/memo/memo.js +97 -0
package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
package/dist/vendor/core/memory/learning-pipeline.js +391 -0
package/dist/vendor/core/memory/palace.d.ts +84 -0
package/dist/vendor/core/memory/palace.js +379 -0
package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
package/dist/vendor/core/merge/ast-merge.js +350 -0
package/dist/vendor/core/merge/text-merge.d.ts +12 -0
package/dist/vendor/core/merge/text-merge.js +182 -0
package/dist/vendor/core/otel/tracer.d.ts +45 -0
package/dist/vendor/core/otel/tracer.js +116 -0
package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
package/dist/vendor/core/parallel/scorer.d.ts +24 -0
package/dist/vendor/core/parallel/scorer.js +65 -0
package/dist/vendor/core/pattern-detection.d.ts +64 -0
package/dist/vendor/core/pattern-detection.js +108 -0
package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
package/dist/vendor/core/persistence/checkpoint.js +156 -0
package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
package/dist/vendor/core/persistence/cleanup.js +131 -0
package/dist/vendor/core/persistence/index.d.ts +2 -0
package/dist/vendor/core/persistence/index.js +1 -0
package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
package/dist/vendor/core/persistence/runs-reader.js +84 -0
package/dist/vendor/core/persistence/store.d.ts +6 -1
package/dist/vendor/core/persistence/store.js +5 -0
package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
package/dist/vendor/core/policy/file-touch-quota.js +105 -0
package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
package/dist/vendor/core/policy/policy-loader.js +170 -0
package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
package/dist/vendor/core/policy/policy-schema.js +78 -0
package/dist/vendor/core/policy.d.ts +6 -0
package/dist/vendor/core/probe/probe.d.ts +49 -0
package/dist/vendor/core/probe/probe.js +115 -0
package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
package/dist/vendor/core/proof/patch-proof.js +84 -0
package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
package/dist/vendor/core/proof/semantic-probe.js +82 -0
package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
package/dist/vendor/core/red-blue/red-phase.js +141 -0
package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
package/dist/vendor/core/replay/replay.d.ts +85 -0
package/dist/vendor/core/replay/replay.js +109 -0
package/dist/vendor/core/router/engine.d.ts +54 -0
package/dist/vendor/core/router/engine.js +131 -0
package/dist/vendor/core/router/index.d.ts +1 -0
package/dist/vendor/core/router/index.js +2 -0
package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
package/dist/vendor/core/router/trust-calibration.js +127 -0
package/dist/vendor/core/run-martin.d.ts +2 -0
package/dist/vendor/core/run-martin.js +287 -0
package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
package/dist/vendor/core/security/cve-scanner.js +178 -0
package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
package/dist/vendor/core/sentinel/progress-guard.js +46 -0
package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
package/dist/vendor/core/siem/siem-emitter.js +157 -0
package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
package/dist/vendor/core/strategy/attempt-brief.js +89 -0
package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
package/dist/vendor/core/summarize/diff-summary.js +204 -0
package/dist/vendor/core/surface-signals.d.ts +21 -0
package/dist/vendor/core/surface-signals.js +139 -0
package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
package/dist/vendor/core/truth/truth-wall.js +69 -0
package/dist/vendor/core/truth-spine.d.ts +26 -0
package/dist/vendor/core/truth-spine.js +62 -0
package/dist/vendor/core/types.d.ts +115 -0
package/dist/vendor/core/types.js +2 -0
package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
package/dist/vendor/core/verification/tiered-verify.js +29 -0
package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
package/dist/vendor/core/verifier-pyramid.js +111 -0
package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
package/dist/vendor/core/workflow-artifacts.js +668 -0
package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
package/dist/vendor/core/wrap/supervised-run.js +178 -0
package/docs/assets/cli-animated.svg +139 -0
package/docs/assets/cli-static.svg +34 -0
package/docs/assets/github-hero-v2.svg +23 -0
package/docs/assets/martin-raplph.png.jpg +0 -0
package/docs/assets/martinloop-logo.png +0 -0
package/docs/assets/nvidia-inception-program-light.png +0 -0
package/docs/assets/nvidia-inception-program.png +0 -0
package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
package/docs/assets/side-by-side.svg +134 -0
package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
package/docs/oss/EXAMPLES.md +9 -1
package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
package/docs/oss/QUICKSTART.md +39 -4
package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
package/docs/oss/README.md +7 -4
package/docs/oss/RELEASE-SURFACE-REPORT.json +46 -45
package/docs/oss/RELEASE-SURFACE-REPORT.md +36 -35
package/package.json +129 -49

package/dist/vendor/core/router/trust-calibration.d.ts ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Trust Calibration Engine — the self-improvement loop.
+ *
+ * Reads historical run records from ~/.martin/runs/ and computes a reliability
+ * profile for each model that has been used. The router uses these profiles to
+ * automatically downgrade to cheaper models when evidence shows they perform
+ * as well as more expensive ones, and to deprioritize models with poor track records.
+ *
+ * This closes the feedback loop that was missing: every completed run writes
+ * evidence to disk; this module reads it back into routing decisions.
+ */
+export interface ModelTrustProfile {
+    /** Model identifier as recorded in attempt records (e.g. "claude-sonnet-4-6") */
+    model: string;
+    /** Total runs where this model was used for at least one attempt */
+    runsObserved: number;
+    /** Fraction of observed runs that completed successfully (0–1) */
+    completionRate: number;
+    /** Average USD cost per iteration (attempt) */
+    avgCostPerIteration: number;
+    /** Average iterations used vs budget.maxIterations (lower = more efficient) */
+    avgIterationEfficiency: number;
+    /**
+     * Composite score 0–1: completionRate * (1 - avgIterationEfficiency).
+     * High score = completes well AND uses fewer iterations than the budget allows.
+     */
+    efficiencyScore: number;
+    /** ISO timestamp of the most recent run that informed this profile */
+    lastUpdated: string;
+}
+export interface TrustCalibrationResult {
+    /** Per-model reliability profiles, sorted by efficiencyScore descending */
+    profiles: ModelTrustProfile[];
+    /**
+     * The model with the best efficiencyScore that also meets minRuns threshold.
+     * Null if insufficient data exists yet.
+     */
+    recommendedModel: string | null;
+    /** Total number of runs analyzed to produce this result */
+    calibrationBasis: number;
+}
+/**
+ * Reads historical loop records and computes a trust profile for each model.
+ *
+ * @param runsDir - Override the default ~/.martin/runs path (useful for testing)
+ * @param minRuns - Minimum observations required before a profile is considered
+ *   reliable enough to influence routing. Default: 3.
+ * @param efficiencyThreshold - Minimum efficiencyScore for a model to be
+ *   eligible for auto-recommendation. Default: 0.75.
+ */
+export declare function calibrateTrust(runsDir?: string, minRuns?: number, efficiencyThreshold?: number): Promise<TrustCalibrationResult>;
+/**
+ * Returns true if a model should be deprioritized based on its trust profile.
+ * A model is deprioritized when it has enough observations to be confident
+ * it performs poorly (low completion rate).
+ */
+export declare function shouldDeprioritize(profile: ModelTrustProfile, minRuns?: number, minCompletionRate?: number): boolean;

package/dist/vendor/core/router/trust-calibration.js ADDED Viewed

@@ -0,0 +1,127 @@
+/**
+ * Trust Calibration Engine — the self-improvement loop.
+ *
+ * Reads historical run records from ~/.martin/runs/ and computes a reliability
+ * profile for each model that has been used. The router uses these profiles to
+ * automatically downgrade to cheaper models when evidence shows they perform
+ * as well as more expensive ones, and to deprioritize models with poor track records.
+ *
+ * This closes the feedback loop that was missing: every completed run writes
+ * evidence to disk; this module reads it back into routing decisions.
+ */
+import { readAllLoopRecords } from "../persistence/runs-reader.js";
+/**
+ * Reads historical loop records and computes a trust profile for each model.
+ *
+ * @param runsDir - Override the default ~/.martin/runs path (useful for testing)
+ * @param minRuns - Minimum observations required before a profile is considered
+ *   reliable enough to influence routing. Default: 3.
+ * @param efficiencyThreshold - Minimum efficiencyScore for a model to be
+ *   eligible for auto-recommendation. Default: 0.75.
+ */
+export async function calibrateTrust(runsDir, minRuns = 3, efficiencyThreshold = 0.75) {
+    const records = await readAllLoopRecords(runsDir);
+    if (records.length === 0) {
+        return { profiles: [], recommendedModel: null, calibrationBasis: 0 };
+    }
+    const accumulators = new Map();
+    for (const record of records) {
+        const modelsInRun = extractModelsFromRun(record);
+        const isCompleted = record.status === "completed";
+        const iterationEfficiency = record.budget.maxIterations > 0
+            ? record.attempts.length / record.budget.maxIterations
+            : 1;
+        for (const model of modelsInRun) {
+            const existing = accumulators.get(model) ?? {
+                model,
+                completedRuns: 0,
+                totalRuns: 0,
+                totalCostUsd: 0,
+                totalAttempts: 0,
+                totalIterationsUsedFraction: 0,
+                latestUpdatedAt: record.createdAt
+            };
+            existing.totalRuns += 1;
+            existing.totalAttempts += record.attempts.length;
+            existing.totalCostUsd += record.cost.actualUsd;
+            existing.totalIterationsUsedFraction += iterationEfficiency;
+            if (isCompleted)
+                existing.completedRuns += 1;
+            const recordTs = record.updatedAt ?? record.createdAt;
+            if (recordTs > existing.latestUpdatedAt) {
+                existing.latestUpdatedAt = recordTs;
+            }
+            accumulators.set(model, existing);
+        }
+    }
+    const profiles = [];
+    for (const acc of accumulators.values()) {
+        if (acc.totalRuns === 0)
+            continue;
+        const completionRate = acc.completedRuns / acc.totalRuns;
+        const avgIterationEfficiency = acc.totalIterationsUsedFraction / acc.totalRuns;
+        const avgCostPerIteration = acc.totalAttempts > 0 ? acc.totalCostUsd / acc.totalAttempts : 0;
+        // efficiencyScore: high means "completes reliably AND uses fewer iterations"
+        const efficiencyScore = completionRate * (1 - avgIterationEfficiency * 0.5);
+        profiles.push({
+            model: acc.model,
+            runsObserved: acc.totalRuns,
+            completionRate,
+            avgCostPerIteration,
+            avgIterationEfficiency,
+            efficiencyScore: Math.round(efficiencyScore * 1000) / 1000,
+            lastUpdated: acc.latestUpdatedAt
+        });
+    }
+    // Sort by efficiency descending
+    profiles.sort((a, b) => b.efficiencyScore - a.efficiencyScore);
+    // Recommend the cheapest model that meets threshold with enough data
+    const eligible = profiles.filter((p) => p.runsObserved >= minRuns && p.efficiencyScore >= efficiencyThreshold);
+    const recommendedModel = eligible.length > 0
+        ? eligible.reduce((best, p) => p.avgCostPerIteration < best.avgCostPerIteration ? p : best).model
+        : null;
+    return {
+        profiles,
+        recommendedModel,
+        calibrationBasis: records.length
+    };
+}
+/**
+ * Returns true if a model should be deprioritized based on its trust profile.
+ * A model is deprioritized when it has enough observations to be confident
+ * it performs poorly (low completion rate).
+ */
+export function shouldDeprioritize(profile, minRuns = 5, minCompletionRate = 0.4) {
+    return (profile.runsObserved >= minRuns &&
+        profile.completionRate < minCompletionRate);
+}
+/**
+ * Extracts the distinct set of models used in a run.
+ * Falls back to adapterId if model field is absent.
+ */
+function extractModelsFromRun(record) {
+    const models = new Set();
+    for (const attempt of record.attempts) {
+        const key = attempt.model ?? attempt.adapterId;
+        if (key)
+            models.add(normalizeModelName(key));
+    }
+    return [...models];
+}
+function normalizeModelName(raw) {
+    // Normalize known aliases to a consistent key
+    if (raw.includes("sonnet"))
+        return "claude-sonnet";
+    if (raw.includes("haiku"))
+        return "claude-haiku";
+    if (raw.includes("opus"))
+        return "claude-opus";
+    if (raw.includes("gpt-4o-mini"))
+        return "gpt-4o-mini";
+    if (raw.includes("gpt-4o"))
+        return "gpt-4o";
+    if (raw.includes("o3"))
+        return "o3";
+    return raw;
+}
+//# sourceMappingURL=trust-calibration.js.map

package/dist/vendor/core/run-martin.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { RunMartinOptions, RunMartinResult } from "./types.js";
2	+ export declare function runMartin(options: RunMartinOptions): Promise<RunMartinResult>;

package/dist/vendor/core/run-martin.js ADDED Viewed

@@ -0,0 +1,287 @@
+import { appendLoopEvent, createLoopRecord } from "../contracts/index.js";
+import { distillContext } from "./context-distillation.js";
+import { evaluateCostGovernor } from "./cost-governor.js";
+import { inferExit } from "./exit-intelligence.js";
+import { classifyFailure } from "./failure-taxonomy.js";
+export async function runMartin(options) {
+    const now = options.now ?? (() => new Date().toISOString());
+    const contracts = createContractOptions(options.idFactory);
+    let loop = createLoopRecord({
+        workspaceId: options.workspaceId,
+        projectId: options.projectId,
+        task: options.task,
+        ...(options.budget ? { budget: options.budget } : {}),
+        ...(options.teamId ? { teamId: options.teamId } : {})
+    }, {
+        ...contracts,
+        now: now()
+    });
+    let finalContext = distillContext(loop);
+    let decision = {
+        shouldExit: false,
+        lifecycleState: "running",
+        reason: "Run initialized."
+    };
+    loop = pushEvent(loop, {
+        type: "run.started",
+        lifecycleState: "running",
+        payload: {
+            adapterId: options.adapter.adapterId,
+            adapterKind: options.adapter.kind
+        }
+    }, contracts, now(), "running");
+    while (loop.attempts.length < loop.budget.maxIterations) {
+        const preAttemptCost = evaluateCostGovernor({
+            budget: loop.budget,
+            cost: loop.cost,
+            attemptsUsed: loop.attempts.length
+        });
+        if (preAttemptCost.shouldStop) {
+            decision = {
+                shouldExit: true,
+                lifecycleState: "budget_exit",
+                reason: "Budget governor reached a hard limit."
+            };
+            loop = finalizeLoop(loop, decision, contracts, now(), "exited");
+            finalContext = distillContext(loop);
+            return {
+                loop,
+                decision,
+                finalContext
+            };
+        }
+        finalContext = distillContext(loop);
+        const attemptIndex = loop.attempts.length + 1;
+        const attemptStartedAt = now();
+        const attemptId = makeId("att", options.idFactory);
+        loop = {
+            ...loop,
+            attempts: [
+                ...loop.attempts,
+                {
+                    attemptId,
+                    index: attemptIndex,
+                    adapterId: options.adapter.adapterId,
+                    model: options.adapter.metadata.model ?? options.adapter.label,
+                    startedAt: attemptStartedAt
+                }
+            ],
+            status: "running",
+            lifecycleState: "running",
+            updatedAt: attemptStartedAt
+        };
+        loop = pushEvent(loop, {
+            type: "attempt.started",
+            lifecycleState: "running",
+            payload: {
+                attemptId,
+                attemptIndex,
+                adapterId: options.adapter.adapterId
+            }
+        }, contracts, attemptStartedAt, "running");
+        const adapterRequest = {
+            loopId: loop.loopId,
+            workspaceId: loop.workspaceId,
+            projectId: loop.projectId,
+            attemptIndex,
+            task: loop.task,
+            context: finalContext,
+            budget: loop.budget,
+            costState: preAttemptCost
+        };
+        if (loop.teamId) {
+            adapterRequest.teamId = loop.teamId;
+        }
+        const result = await options.adapter.execute(adapterRequest);
+        const completedAt = now();
+        loop = applyResult(loop, attemptId, result, completedAt);
+        loop = pushEvent(loop, {
+            type: "attempt.completed",
+            lifecycleState: "running",
+            payload: {
+                attemptId,
+                status: result.status,
+                summary: result.summary
+            }
+        }, contracts, completedAt, "running");
+        const postAttemptCost = evaluateCostGovernor({
+            budget: loop.budget,
+            cost: loop.cost,
+            attemptsUsed: loop.attempts.length
+        });
+        if (postAttemptCost.pressure !== "healthy") {
+            loop = pushEvent(loop, {
+                type: "budget.updated",
+                lifecycleState: "running",
+                payload: {
+                    pressure: postAttemptCost.pressure,
+                    remainingBudgetUsd: postAttemptCost.remainingBudgetUsd,
+                    remainingIterations: postAttemptCost.remainingIterations,
+                    remainingTokens: postAttemptCost.remainingTokens
+                }
+            }, contracts, now(), "running");
+        }
+        if (result.status === "completed" && result.verification.passed) {
+            loop = pushEvent(loop, {
+                type: "verification.completed",
+                lifecycleState: "verifying",
+                payload: {
+                    attemptId,
+                    passed: true,
+                    summary: result.verification.summary
+                }
+            }, contracts, now(), "verifying");
+            decision = {
+                shouldExit: true,
+                lifecycleState: "completed",
+                reason: result.verification.summary
+            };
+            loop = finalizeLoop(loop, decision, contracts, now(), "completed");
+            finalContext = distillContext(loop);
+            return {
+                loop,
+                decision,
+                finalContext
+            };
+        }
+        const failure = classifyFailure({
+            attempts: loop.attempts.slice(0, -1),
+            result
+        });
+        loop = annotateAttempt(loop, attemptId, failure);
+        loop = pushEvent(loop, {
+            type: "failure.classified",
+            lifecycleState: "running",
+            payload: {
+                attemptId,
+                failureClass: failure.failureClass,
+                rationale: failure.rationale
+            }
+        }, contracts, now(), "running");
+        loop = pushEvent(loop, {
+            type: "intervention.selected",
+            lifecycleState: "running",
+            payload: {
+                attemptId,
+                intervention: failure.recommendedIntervention
+            }
+        }, contracts, now(), "running");
+        loop = pushEvent(loop, {
+            type: "verification.completed",
+            lifecycleState: "verifying",
+            payload: {
+                attemptId,
+                passed: result.verification.passed,
+                summary: result.verification.summary
+            }
+        }, contracts, now(), "verifying");
+        decision = inferExit({
+            loop,
+            lastResult: result,
+            lastFailure: failure,
+            costState: postAttemptCost
+        });
+        if (decision.shouldExit) {
+            loop = finalizeLoop(loop, decision, contracts, now(), lifecycleStatus(decision));
+            finalContext = distillContext(loop);
+            return {
+                loop,
+                decision,
+                finalContext
+            };
+        }
+    }
+    decision = {
+        shouldExit: true,
+        lifecycleState: "budget_exit",
+        reason: "The run exhausted its iteration budget."
+    };
+    loop = finalizeLoop(loop, decision, contracts, now(), "exited");
+    finalContext = distillContext(loop);
+    return {
+        loop,
+        decision,
+        finalContext
+    };
+}
+function applyResult(loop, attemptId, result, completedAt) {
+    return {
+        ...loop,
+        attempts: loop.attempts.map((attempt) => attempt.attemptId === attemptId ? buildCompletedAttempt(attempt, result, completedAt) : attempt),
+        artifacts: [...loop.artifacts, ...(result.artifacts ?? [])],
+        cost: {
+            actualUsd: round(loop.cost.actualUsd + result.usage.actualUsd),
+            avoidedUsd: round(loop.cost.avoidedUsd + (result.usage.avoidedUsd ?? 0)),
+            tokensIn: loop.cost.tokensIn + result.usage.tokensIn,
+            tokensOut: loop.cost.tokensOut + result.usage.tokensOut
+        },
+        updatedAt: completedAt
+    };
+}
+function buildCompletedAttempt(attempt, result, completedAt) {
+    const nextAttempt = {
+        ...attempt,
+        completedAt,
+        summary: result.summary
+    };
+    if (result.failure?.classHint) {
+        nextAttempt.failureClass = result.failure.classHint;
+    }
+    return nextAttempt;
+}
+function annotateAttempt(loop, attemptId, failure) {
+    return {
+        ...loop,
+        attempts: loop.attempts.map((attempt) => {
+            if (attempt.attemptId !== attemptId) {
+                return attempt;
+            }
+            return {
+                ...attempt,
+                failureClass: failure.failureClass,
+                intervention: failure.recommendedIntervention
+            };
+        })
+    };
+}
+function finalizeLoop(loop, decision, contracts, timestamp, status) {
+    return pushEvent({
+        ...loop,
+        lifecycleState: decision.lifecycleState,
+        status,
+        updatedAt: timestamp
+    }, {
+        type: "run.completed",
+        lifecycleState: decision.lifecycleState,
+        payload: {
+            reason: decision.reason
+        }
+    }, contracts, timestamp, status);
+}
+function pushEvent(loop, event, contracts, timestamp, status) {
+    const next = appendLoopEvent(loop, {
+        ...event,
+        timestamp
+    }, {
+        ...contracts,
+        now: timestamp
+    });
+    return status ? { ...next, status, lifecycleState: event.lifecycleState ?? next.lifecycleState } : next;
+}
+function lifecycleStatus(decision) {
+    return decision.lifecycleState === "completed" ? "completed" : "exited";
+}
+function createContractOptions(idFactory) {
+    return idFactory ? { idFactory } : {};
+}
+function makeId(prefix, idFactory) {
+    if (idFactory) {
+        return idFactory(prefix);
+    }
+    const entropy = Math.random().toString(36).slice(2, 10);
+    return `${prefix}_${entropy}`;
+}
+function round(value) {
+    return Number(value.toFixed(4));
+}
+//# sourceMappingURL=run-martin.js.map

package/dist/vendor/core/security/cve-scanner.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * CVE Patch Scanner — Phase 37.
+ *
+ * Parses a unified diff for newly added package dependencies and queries the
+ * OSV.dev API (https://api.osv.dev) to check for known CVEs. Blocks the
+ * attempt if any discovered package has severity HIGH or CRITICAL.
+ *
+ * Supported manifest formats:
+ *   - package.json (npm/Node.js) — ecosystem: "npm"
+ *   - requirements.txt (Python) — ecosystem: "PyPI"
+ *   - Cargo.toml (Rust) — ecosystem: "crates.io"
+ *   - go.mod (Go) — ecosystem: "Go"
+ *
+ * Design rules:
+ * - Advisory when OSV.dev is unreachable (never hard-fail on network error)
+ * - Only checks ADDED lines (+ prefix) — not removed packages
+ * - Deduplicates package names before querying
+ * - MAX_PACKAGES_PER_SCAN = 20 to bound latency
+ */
+export type CveSeverity = "LOW" | "MEDIUM" | "HIGH" | "CRITICAL" | "UNKNOWN";
+export interface CveMatch {
+    packageName: string;
+    version?: string;
+    ecosystem: string;
+    vulnId: string;
+    summary: string;
+    severity: CveSeverity;
+    url: string;
+}
+export interface CveScanResult {
+    /** Newly added packages extracted from the diff. */
+    packageCandidates: PackageCandidate[];
+    /** CVEs found for any of the candidates. */
+    matches: CveMatch[];
+    /**
+     * True when any match has severity HIGH or CRITICAL.
+     * The caller should discard the attempt when this is true.
+     */
+    blocked: boolean;
+    /** Human-readable block reason. Undefined when not blocked. */
+    blockReason?: string;
+    /** True when OSV.dev was unreachable — scan ran in advisory-only mode. */
+    networkError?: boolean;
+}
+export interface PackageCandidate {
+    name: string;
+    version?: string;
+    ecosystem: string;
+}
+/**
+ * Extract newly added package dependencies from a unified diff string.
+ * Only examines added lines (starting with +) to avoid flagging removals.
+ */
+export declare function extractPackageCandidates(diff: string): PackageCandidate[];
+/**
+ * Scan a unified diff for new package dependencies and check them against
+ * the OSV.dev vulnerability database.
+ *
+ * Returns immediately (advisory mode) if OSV.dev is unreachable.
+ * Blocks the attempt if any package has severity HIGH or CRITICAL.
+ */
+export declare function scanDiffForCves(diff: string): Promise<CveScanResult>;