martin-loop 0.1.5 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +32 -0
- package/LICENSE +21 -21
- package/README.md +307 -398
- package/demo/seeded-workspace/README.md +35 -35
- package/demo/seeded-workspace/TASKS.md +29 -29
- package/demo/seeded-workspace/martin.config.yaml +11 -11
- package/demo/seeded-workspace/package.json +8 -8
- package/demo/seeded-workspace/src/invoice-summary.js +11 -11
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -20
- package/dist/bin/martin-loop.js +0 -0
- package/dist/vendor/adapters/counter.d.ts +1 -0
- package/dist/vendor/adapters/counter.js +4 -0
- package/dist/vendor/adapters/git-baseline.d.ts +50 -0
- package/dist/vendor/adapters/git-baseline.js +233 -0
- package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
- package/dist/vendor/adapters/openrouter-adapter.js +302 -0
- package/dist/vendor/adapters/usage.d.ts +48 -0
- package/dist/vendor/adapters/usage.js +66 -0
- package/dist/vendor/cli/bin/exit.d.ts +12 -0
- package/dist/vendor/cli/bin/exit.js +28 -0
- package/dist/vendor/cli/commands/analyze.d.ts +5 -0
- package/dist/vendor/cli/commands/analyze.js +58 -0
- package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
- package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
- package/dist/vendor/cli/commands/audit.d.ts +8 -0
- package/dist/vendor/cli/commands/audit.js +199 -0
- package/dist/vendor/cli/commands/corpus.d.ts +5 -0
- package/dist/vendor/cli/commands/corpus.js +60 -0
- package/dist/vendor/cli/commands/doctor.d.ts +8 -0
- package/dist/vendor/cli/commands/doctor.js +219 -0
- package/dist/vendor/cli/commands/explain.d.ts +17 -0
- package/dist/vendor/cli/commands/explain.js +176 -0
- package/dist/vendor/cli/commands/export.d.ts +5 -0
- package/dist/vendor/cli/commands/export.js +60 -0
- package/dist/vendor/cli/commands/governance.d.ts +8 -0
- package/dist/vendor/cli/commands/governance.js +95 -0
- package/dist/vendor/cli/commands/improve.d.ts +18 -0
- package/dist/vendor/cli/commands/improve.js +396 -0
- package/dist/vendor/cli/commands/init.d.ts +8 -0
- package/dist/vendor/cli/commands/init.js +281 -0
- package/dist/vendor/cli/commands/migration.d.ts +8 -0
- package/dist/vendor/cli/commands/migration.js +67 -0
- package/dist/vendor/cli/commands/prior.d.ts +23 -0
- package/dist/vendor/cli/commands/prior.js +145 -0
- package/dist/vendor/cli/commands/resume.d.ts +21 -0
- package/dist/vendor/cli/commands/resume.js +73 -0
- package/dist/vendor/cli/commands/verify.d.ts +6 -0
- package/dist/vendor/cli/commands/verify.js +43 -0
- package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
- package/dist/vendor/cli/research/public-corpus.js +151 -0
- package/dist/vendor/cli/ui/error-card.d.ts +38 -0
- package/dist/vendor/cli/ui/error-card.js +103 -0
- package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
- package/dist/vendor/cli/ui/mission-brief.js +173 -0
- package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
- package/dist/vendor/cli/ui/summary-card.js +102 -0
- package/dist/vendor/contracts/audit.d.ts +46 -0
- package/dist/vendor/contracts/audit.js +360 -0
- package/dist/vendor/contracts/post-phase15.d.ts +240 -0
- package/dist/vendor/contracts/post-phase15.js +166 -0
- package/dist/vendor/core/agent/mandates.d.ts +46 -0
- package/dist/vendor/core/agent/mandates.js +178 -0
- package/dist/vendor/core/agent/receipts.d.ts +38 -0
- package/dist/vendor/core/agent/receipts.js +131 -0
- package/dist/vendor/core/agent/signing.d.ts +17 -0
- package/dist/vendor/core/agent/signing.js +91 -0
- package/dist/vendor/core/attestation/sign.d.ts +25 -0
- package/dist/vendor/core/attestation/sign.js +216 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
- package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
- package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
- package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
- package/dist/vendor/core/autonomy/envelope.js +27 -0
- package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
- package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
- package/dist/vendor/core/autonomy/resume.d.ts +15 -0
- package/dist/vendor/core/autonomy/resume.js +23 -0
- package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
- package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
- package/dist/vendor/core/context-distillation.d.ts +3 -0
- package/dist/vendor/core/context-distillation.js +44 -0
- package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
- package/dist/vendor/core/context-flow/compile-context.js +111 -0
- package/dist/vendor/core/context-flow/entities.d.ts +2 -0
- package/dist/vendor/core/context-flow/entities.js +44 -0
- package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
- package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
- package/dist/vendor/core/context-flow/index.d.ts +11 -0
- package/dist/vendor/core/context-flow/index.js +24 -0
- package/dist/vendor/core/context-flow/labels.d.ts +3 -0
- package/dist/vendor/core/context-flow/labels.js +17 -0
- package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
- package/dist/vendor/core/context-flow/normalizer.js +69 -0
- package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
- package/dist/vendor/core/context-flow/profiles.js +36 -0
- package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
- package/dist/vendor/core/context-flow/redaction.js +6 -0
- package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
- package/dist/vendor/core/context-flow/sensitivity.js +27 -0
- package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
- package/dist/vendor/core/context-flow/sync-preview.js +22 -0
- package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
- package/dist/vendor/core/context-flow/token-estimator.js +13 -0
- package/dist/vendor/core/context-flow/types.d.ts +91 -0
- package/dist/vendor/core/context-flow/types.js +2 -0
- package/dist/vendor/core/context-utility.d.ts +47 -0
- package/dist/vendor/core/context-utility.js +405 -0
- package/dist/vendor/core/cost/pipeline.d.ts +92 -0
- package/dist/vendor/core/cost/pipeline.js +141 -0
- package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
- package/dist/vendor/core/cost/tagged-cost.js +55 -0
- package/dist/vendor/core/cost-governor.d.ts +2 -0
- package/dist/vendor/core/cost-governor.js +50 -0
- package/dist/vendor/core/cve/cve-check.d.ts +80 -0
- package/dist/vendor/core/cve/cve-check.js +172 -0
- package/dist/vendor/core/digital-twin/index.d.ts +27 -0
- package/dist/vendor/core/digital-twin/index.js +90 -0
- package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
- package/dist/vendor/core/drift/drift-graph.js +100 -0
- package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
- package/dist/vendor/core/drift/objective-lock.js +88 -0
- package/dist/vendor/core/drift/scope.d.ts +46 -0
- package/dist/vendor/core/drift/scope.js +102 -0
- package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
- package/dist/vendor/core/drift/signature-lock.js +202 -0
- package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
- package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
- package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
- package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
- package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
- package/dist/vendor/core/evidence/claim-audit.js +89 -0
- package/dist/vendor/core/exit-intelligence.d.ts +2 -0
- package/dist/vendor/core/exit-intelligence.js +58 -0
- package/dist/vendor/core/explain/formatter.d.ts +42 -0
- package/dist/vendor/core/explain/formatter.js +171 -0
- package/dist/vendor/core/explain/timeline.d.ts +29 -0
- package/dist/vendor/core/explain/timeline.js +213 -0
- package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
- package/dist/vendor/core/failure-taxonomy.js +76 -0
- package/dist/vendor/core/gateway/index.d.ts +10 -0
- package/dist/vendor/core/gateway/index.js +12 -0
- package/dist/vendor/core/gateway/registry.d.ts +40 -0
- package/dist/vendor/core/gateway/registry.js +97 -0
- package/dist/vendor/core/gateway/transport.d.ts +31 -0
- package/dist/vendor/core/gateway/transport.js +82 -0
- package/dist/vendor/core/gateway/vault.d.ts +19 -0
- package/dist/vendor/core/gateway/vault.js +29 -0
- package/dist/vendor/core/graph/adapters.d.ts +43 -0
- package/dist/vendor/core/graph/adapters.js +91 -0
- package/dist/vendor/core/graph/hotspots.d.ts +22 -0
- package/dist/vendor/core/graph/hotspots.js +30 -0
- package/dist/vendor/core/graph/index.d.ts +1 -0
- package/dist/vendor/core/graph/index.js +2 -0
- package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
- package/dist/vendor/core/honey/honey-tokens.js +44 -0
- package/dist/vendor/core/index.d.ts +2 -2
- package/dist/vendor/core/index.js +38 -12
- package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
- package/dist/vendor/core/learning/bayesian-update.js +60 -0
- package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
- package/dist/vendor/core/learning/prior-sets.js +111 -0
- package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
- package/dist/vendor/core/learning/promotion-gate.js +23 -0
- package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
- package/dist/vendor/core/leash/blast-radius.js +156 -0
- package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
- package/dist/vendor/core/leash/policy-leash.js +117 -0
- package/dist/vendor/core/memo/memo.d.ts +63 -0
- package/dist/vendor/core/memo/memo.js +97 -0
- package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
- package/dist/vendor/core/memory/learning-pipeline.js +391 -0
- package/dist/vendor/core/memory/palace.d.ts +84 -0
- package/dist/vendor/core/memory/palace.js +379 -0
- package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
- package/dist/vendor/core/merge/ast-merge.js +350 -0
- package/dist/vendor/core/merge/text-merge.d.ts +12 -0
- package/dist/vendor/core/merge/text-merge.js +182 -0
- package/dist/vendor/core/otel/tracer.d.ts +45 -0
- package/dist/vendor/core/otel/tracer.js +116 -0
- package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
- package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
- package/dist/vendor/core/parallel/scorer.d.ts +24 -0
- package/dist/vendor/core/parallel/scorer.js +65 -0
- package/dist/vendor/core/pattern-detection.d.ts +64 -0
- package/dist/vendor/core/pattern-detection.js +108 -0
- package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
- package/dist/vendor/core/persistence/checkpoint.js +156 -0
- package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
- package/dist/vendor/core/persistence/cleanup.js +131 -0
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
- package/dist/vendor/core/policy/file-touch-quota.js +105 -0
- package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
- package/dist/vendor/core/policy/policy-loader.js +170 -0
- package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
- package/dist/vendor/core/policy/policy-schema.js +78 -0
- package/dist/vendor/core/probe/probe.d.ts +49 -0
- package/dist/vendor/core/probe/probe.js +115 -0
- package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
- package/dist/vendor/core/proof/patch-proof.js +84 -0
- package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
- package/dist/vendor/core/proof/semantic-probe.js +82 -0
- package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
- package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +141 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
- package/dist/vendor/core/replay/replay.d.ts +85 -0
- package/dist/vendor/core/replay/replay.js +109 -0
- package/dist/vendor/core/router/engine.d.ts +54 -0
- package/dist/vendor/core/router/engine.js +131 -0
- package/dist/vendor/core/router/index.d.ts +1 -0
- package/dist/vendor/core/router/index.js +2 -0
- package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
- package/dist/vendor/core/router/trust-calibration.js +127 -0
- package/dist/vendor/core/run-martin.d.ts +2 -0
- package/dist/vendor/core/run-martin.js +287 -0
- package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
- package/dist/vendor/core/security/cve-scanner.js +178 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
- package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
- package/dist/vendor/core/sentinel/progress-guard.js +46 -0
- package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
- package/dist/vendor/core/siem/siem-emitter.js +157 -0
- package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
- package/dist/vendor/core/strategy/attempt-brief.js +89 -0
- package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
- package/dist/vendor/core/summarize/diff-summary.js +204 -0
- package/dist/vendor/core/surface-signals.d.ts +21 -0
- package/dist/vendor/core/surface-signals.js +139 -0
- package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
- package/dist/vendor/core/truth/truth-wall.js +69 -0
- package/dist/vendor/core/truth-spine.d.ts +26 -0
- package/dist/vendor/core/truth-spine.js +62 -0
- package/dist/vendor/core/types.d.ts +115 -0
- package/dist/vendor/core/types.js +2 -0
- package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
- package/dist/vendor/core/verification/tiered-verify.js +29 -0
- package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
- package/dist/vendor/core/verifier-pyramid.js +111 -0
- package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
- package/dist/vendor/core/workflow-artifacts.js +668 -0
- package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
- package/dist/vendor/core/wrap/supervised-run.js +178 -0
- package/docs/assets/cli-animated.svg +139 -0
- package/docs/assets/cli-static.svg +34 -0
- package/docs/assets/github-hero-v2.svg +23 -0
- package/docs/assets/martin-raplph.png.jpg +0 -0
- package/docs/assets/martinloop-logo.png +0 -0
- package/docs/assets/nvidia-inception-program-light.png +0 -0
- package/docs/assets/nvidia-inception-program.png +0 -0
- package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
- package/docs/assets/side-by-side.svg +134 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -142
- package/docs/oss/EXAMPLES.md +134 -134
- package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
- package/docs/oss/QUICKSTART.md +170 -165
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -113
- package/docs/oss/README.md +96 -96
- package/docs/oss/RELEASE-SURFACE-REPORT.json +2 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +2 -1
- package/package.json +130 -58
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +0 -89
- package/docs/distribution/INTEGRATION-OUTREACH.md +0 -61
- package/docs/distribution/UNDER-3-CHALLENGE.md +0 -65
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Trust Calibration Engine — the self-improvement loop.
|
|
3
|
+
*
|
|
4
|
+
* Reads historical run records from ~/.martin/runs/ and computes a reliability
|
|
5
|
+
* profile for each model that has been used. The router uses these profiles to
|
|
6
|
+
* automatically downgrade to cheaper models when evidence shows they perform
|
|
7
|
+
* as well as more expensive ones, and to deprioritize models with poor track records.
|
|
8
|
+
*
|
|
9
|
+
* This closes the feedback loop that was missing: every completed run writes
|
|
10
|
+
* evidence to disk; this module reads it back into routing decisions.
|
|
11
|
+
*/
|
|
12
|
+
import { readAllLoopRecords } from "../persistence/runs-reader.js";
|
|
13
|
+
/**
|
|
14
|
+
* Reads historical loop records and computes a trust profile for each model.
|
|
15
|
+
*
|
|
16
|
+
* @param runsDir - Override the default ~/.martin/runs path (useful for testing)
|
|
17
|
+
* @param minRuns - Minimum observations required before a profile is considered
|
|
18
|
+
* reliable enough to influence routing. Default: 3.
|
|
19
|
+
* @param efficiencyThreshold - Minimum efficiencyScore for a model to be
|
|
20
|
+
* eligible for auto-recommendation. Default: 0.75.
|
|
21
|
+
*/
|
|
22
|
+
export async function calibrateTrust(runsDir, minRuns = 3, efficiencyThreshold = 0.75) {
|
|
23
|
+
const records = await readAllLoopRecords(runsDir);
|
|
24
|
+
if (records.length === 0) {
|
|
25
|
+
return { profiles: [], recommendedModel: null, calibrationBasis: 0 };
|
|
26
|
+
}
|
|
27
|
+
const accumulators = new Map();
|
|
28
|
+
for (const record of records) {
|
|
29
|
+
const modelsInRun = extractModelsFromRun(record);
|
|
30
|
+
const isCompleted = record.status === "completed";
|
|
31
|
+
const iterationEfficiency = record.budget.maxIterations > 0
|
|
32
|
+
? record.attempts.length / record.budget.maxIterations
|
|
33
|
+
: 1;
|
|
34
|
+
for (const model of modelsInRun) {
|
|
35
|
+
const existing = accumulators.get(model) ?? {
|
|
36
|
+
model,
|
|
37
|
+
completedRuns: 0,
|
|
38
|
+
totalRuns: 0,
|
|
39
|
+
totalCostUsd: 0,
|
|
40
|
+
totalAttempts: 0,
|
|
41
|
+
totalIterationsUsedFraction: 0,
|
|
42
|
+
latestUpdatedAt: record.createdAt
|
|
43
|
+
};
|
|
44
|
+
existing.totalRuns += 1;
|
|
45
|
+
existing.totalAttempts += record.attempts.length;
|
|
46
|
+
existing.totalCostUsd += record.cost.actualUsd;
|
|
47
|
+
existing.totalIterationsUsedFraction += iterationEfficiency;
|
|
48
|
+
if (isCompleted)
|
|
49
|
+
existing.completedRuns += 1;
|
|
50
|
+
const recordTs = record.updatedAt ?? record.createdAt;
|
|
51
|
+
if (recordTs > existing.latestUpdatedAt) {
|
|
52
|
+
existing.latestUpdatedAt = recordTs;
|
|
53
|
+
}
|
|
54
|
+
accumulators.set(model, existing);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
const profiles = [];
|
|
58
|
+
for (const acc of accumulators.values()) {
|
|
59
|
+
if (acc.totalRuns === 0)
|
|
60
|
+
continue;
|
|
61
|
+
const completionRate = acc.completedRuns / acc.totalRuns;
|
|
62
|
+
const avgIterationEfficiency = acc.totalIterationsUsedFraction / acc.totalRuns;
|
|
63
|
+
const avgCostPerIteration = acc.totalAttempts > 0 ? acc.totalCostUsd / acc.totalAttempts : 0;
|
|
64
|
+
// efficiencyScore: high means "completes reliably AND uses fewer iterations"
|
|
65
|
+
const efficiencyScore = completionRate * (1 - avgIterationEfficiency * 0.5);
|
|
66
|
+
profiles.push({
|
|
67
|
+
model: acc.model,
|
|
68
|
+
runsObserved: acc.totalRuns,
|
|
69
|
+
completionRate,
|
|
70
|
+
avgCostPerIteration,
|
|
71
|
+
avgIterationEfficiency,
|
|
72
|
+
efficiencyScore: Math.round(efficiencyScore * 1000) / 1000,
|
|
73
|
+
lastUpdated: acc.latestUpdatedAt
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
// Sort by efficiency descending
|
|
77
|
+
profiles.sort((a, b) => b.efficiencyScore - a.efficiencyScore);
|
|
78
|
+
// Recommend the cheapest model that meets threshold with enough data
|
|
79
|
+
const eligible = profiles.filter((p) => p.runsObserved >= minRuns && p.efficiencyScore >= efficiencyThreshold);
|
|
80
|
+
const recommendedModel = eligible.length > 0
|
|
81
|
+
? eligible.reduce((best, p) => p.avgCostPerIteration < best.avgCostPerIteration ? p : best).model
|
|
82
|
+
: null;
|
|
83
|
+
return {
|
|
84
|
+
profiles,
|
|
85
|
+
recommendedModel,
|
|
86
|
+
calibrationBasis: records.length
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Returns true if a model should be deprioritized based on its trust profile.
|
|
91
|
+
* A model is deprioritized when it has enough observations to be confident
|
|
92
|
+
* it performs poorly (low completion rate).
|
|
93
|
+
*/
|
|
94
|
+
export function shouldDeprioritize(profile, minRuns = 5, minCompletionRate = 0.4) {
|
|
95
|
+
return (profile.runsObserved >= minRuns &&
|
|
96
|
+
profile.completionRate < minCompletionRate);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Extracts the distinct set of models used in a run.
|
|
100
|
+
* Falls back to adapterId if model field is absent.
|
|
101
|
+
*/
|
|
102
|
+
function extractModelsFromRun(record) {
|
|
103
|
+
const models = new Set();
|
|
104
|
+
for (const attempt of record.attempts) {
|
|
105
|
+
const key = attempt.model ?? attempt.adapterId;
|
|
106
|
+
if (key)
|
|
107
|
+
models.add(normalizeModelName(key));
|
|
108
|
+
}
|
|
109
|
+
return [...models];
|
|
110
|
+
}
|
|
111
|
+
function normalizeModelName(raw) {
|
|
112
|
+
// Normalize known aliases to a consistent key
|
|
113
|
+
if (raw.includes("sonnet"))
|
|
114
|
+
return "claude-sonnet";
|
|
115
|
+
if (raw.includes("haiku"))
|
|
116
|
+
return "claude-haiku";
|
|
117
|
+
if (raw.includes("opus"))
|
|
118
|
+
return "claude-opus";
|
|
119
|
+
if (raw.includes("gpt-4o-mini"))
|
|
120
|
+
return "gpt-4o-mini";
|
|
121
|
+
if (raw.includes("gpt-4o"))
|
|
122
|
+
return "gpt-4o";
|
|
123
|
+
if (raw.includes("o3"))
|
|
124
|
+
return "o3";
|
|
125
|
+
return raw;
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=trust-calibration.js.map
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import { appendLoopEvent, createLoopRecord } from "../contracts/index.js";
|
|
2
|
+
import { distillContext } from "./context-distillation.js";
|
|
3
|
+
import { evaluateCostGovernor } from "./cost-governor.js";
|
|
4
|
+
import { inferExit } from "./exit-intelligence.js";
|
|
5
|
+
import { classifyFailure } from "./failure-taxonomy.js";
|
|
6
|
+
export async function runMartin(options) {
|
|
7
|
+
const now = options.now ?? (() => new Date().toISOString());
|
|
8
|
+
const contracts = createContractOptions(options.idFactory);
|
|
9
|
+
let loop = createLoopRecord({
|
|
10
|
+
workspaceId: options.workspaceId,
|
|
11
|
+
projectId: options.projectId,
|
|
12
|
+
task: options.task,
|
|
13
|
+
...(options.budget ? { budget: options.budget } : {}),
|
|
14
|
+
...(options.teamId ? { teamId: options.teamId } : {})
|
|
15
|
+
}, {
|
|
16
|
+
...contracts,
|
|
17
|
+
now: now()
|
|
18
|
+
});
|
|
19
|
+
let finalContext = distillContext(loop);
|
|
20
|
+
let decision = {
|
|
21
|
+
shouldExit: false,
|
|
22
|
+
lifecycleState: "running",
|
|
23
|
+
reason: "Run initialized."
|
|
24
|
+
};
|
|
25
|
+
loop = pushEvent(loop, {
|
|
26
|
+
type: "run.started",
|
|
27
|
+
lifecycleState: "running",
|
|
28
|
+
payload: {
|
|
29
|
+
adapterId: options.adapter.adapterId,
|
|
30
|
+
adapterKind: options.adapter.kind
|
|
31
|
+
}
|
|
32
|
+
}, contracts, now(), "running");
|
|
33
|
+
while (loop.attempts.length < loop.budget.maxIterations) {
|
|
34
|
+
const preAttemptCost = evaluateCostGovernor({
|
|
35
|
+
budget: loop.budget,
|
|
36
|
+
cost: loop.cost,
|
|
37
|
+
attemptsUsed: loop.attempts.length
|
|
38
|
+
});
|
|
39
|
+
if (preAttemptCost.shouldStop) {
|
|
40
|
+
decision = {
|
|
41
|
+
shouldExit: true,
|
|
42
|
+
lifecycleState: "budget_exit",
|
|
43
|
+
reason: "Budget governor reached a hard limit."
|
|
44
|
+
};
|
|
45
|
+
loop = finalizeLoop(loop, decision, contracts, now(), "exited");
|
|
46
|
+
finalContext = distillContext(loop);
|
|
47
|
+
return {
|
|
48
|
+
loop,
|
|
49
|
+
decision,
|
|
50
|
+
finalContext
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
finalContext = distillContext(loop);
|
|
54
|
+
const attemptIndex = loop.attempts.length + 1;
|
|
55
|
+
const attemptStartedAt = now();
|
|
56
|
+
const attemptId = makeId("att", options.idFactory);
|
|
57
|
+
loop = {
|
|
58
|
+
...loop,
|
|
59
|
+
attempts: [
|
|
60
|
+
...loop.attempts,
|
|
61
|
+
{
|
|
62
|
+
attemptId,
|
|
63
|
+
index: attemptIndex,
|
|
64
|
+
adapterId: options.adapter.adapterId,
|
|
65
|
+
model: options.adapter.metadata.model ?? options.adapter.label,
|
|
66
|
+
startedAt: attemptStartedAt
|
|
67
|
+
}
|
|
68
|
+
],
|
|
69
|
+
status: "running",
|
|
70
|
+
lifecycleState: "running",
|
|
71
|
+
updatedAt: attemptStartedAt
|
|
72
|
+
};
|
|
73
|
+
loop = pushEvent(loop, {
|
|
74
|
+
type: "attempt.started",
|
|
75
|
+
lifecycleState: "running",
|
|
76
|
+
payload: {
|
|
77
|
+
attemptId,
|
|
78
|
+
attemptIndex,
|
|
79
|
+
adapterId: options.adapter.adapterId
|
|
80
|
+
}
|
|
81
|
+
}, contracts, attemptStartedAt, "running");
|
|
82
|
+
const adapterRequest = {
|
|
83
|
+
loopId: loop.loopId,
|
|
84
|
+
workspaceId: loop.workspaceId,
|
|
85
|
+
projectId: loop.projectId,
|
|
86
|
+
attemptIndex,
|
|
87
|
+
task: loop.task,
|
|
88
|
+
context: finalContext,
|
|
89
|
+
budget: loop.budget,
|
|
90
|
+
costState: preAttemptCost
|
|
91
|
+
};
|
|
92
|
+
if (loop.teamId) {
|
|
93
|
+
adapterRequest.teamId = loop.teamId;
|
|
94
|
+
}
|
|
95
|
+
const result = await options.adapter.execute(adapterRequest);
|
|
96
|
+
const completedAt = now();
|
|
97
|
+
loop = applyResult(loop, attemptId, result, completedAt);
|
|
98
|
+
loop = pushEvent(loop, {
|
|
99
|
+
type: "attempt.completed",
|
|
100
|
+
lifecycleState: "running",
|
|
101
|
+
payload: {
|
|
102
|
+
attemptId,
|
|
103
|
+
status: result.status,
|
|
104
|
+
summary: result.summary
|
|
105
|
+
}
|
|
106
|
+
}, contracts, completedAt, "running");
|
|
107
|
+
const postAttemptCost = evaluateCostGovernor({
|
|
108
|
+
budget: loop.budget,
|
|
109
|
+
cost: loop.cost,
|
|
110
|
+
attemptsUsed: loop.attempts.length
|
|
111
|
+
});
|
|
112
|
+
if (postAttemptCost.pressure !== "healthy") {
|
|
113
|
+
loop = pushEvent(loop, {
|
|
114
|
+
type: "budget.updated",
|
|
115
|
+
lifecycleState: "running",
|
|
116
|
+
payload: {
|
|
117
|
+
pressure: postAttemptCost.pressure,
|
|
118
|
+
remainingBudgetUsd: postAttemptCost.remainingBudgetUsd,
|
|
119
|
+
remainingIterations: postAttemptCost.remainingIterations,
|
|
120
|
+
remainingTokens: postAttemptCost.remainingTokens
|
|
121
|
+
}
|
|
122
|
+
}, contracts, now(), "running");
|
|
123
|
+
}
|
|
124
|
+
if (result.status === "completed" && result.verification.passed) {
|
|
125
|
+
loop = pushEvent(loop, {
|
|
126
|
+
type: "verification.completed",
|
|
127
|
+
lifecycleState: "verifying",
|
|
128
|
+
payload: {
|
|
129
|
+
attemptId,
|
|
130
|
+
passed: true,
|
|
131
|
+
summary: result.verification.summary
|
|
132
|
+
}
|
|
133
|
+
}, contracts, now(), "verifying");
|
|
134
|
+
decision = {
|
|
135
|
+
shouldExit: true,
|
|
136
|
+
lifecycleState: "completed",
|
|
137
|
+
reason: result.verification.summary
|
|
138
|
+
};
|
|
139
|
+
loop = finalizeLoop(loop, decision, contracts, now(), "completed");
|
|
140
|
+
finalContext = distillContext(loop);
|
|
141
|
+
return {
|
|
142
|
+
loop,
|
|
143
|
+
decision,
|
|
144
|
+
finalContext
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
const failure = classifyFailure({
|
|
148
|
+
attempts: loop.attempts.slice(0, -1),
|
|
149
|
+
result
|
|
150
|
+
});
|
|
151
|
+
loop = annotateAttempt(loop, attemptId, failure);
|
|
152
|
+
loop = pushEvent(loop, {
|
|
153
|
+
type: "failure.classified",
|
|
154
|
+
lifecycleState: "running",
|
|
155
|
+
payload: {
|
|
156
|
+
attemptId,
|
|
157
|
+
failureClass: failure.failureClass,
|
|
158
|
+
rationale: failure.rationale
|
|
159
|
+
}
|
|
160
|
+
}, contracts, now(), "running");
|
|
161
|
+
loop = pushEvent(loop, {
|
|
162
|
+
type: "intervention.selected",
|
|
163
|
+
lifecycleState: "running",
|
|
164
|
+
payload: {
|
|
165
|
+
attemptId,
|
|
166
|
+
intervention: failure.recommendedIntervention
|
|
167
|
+
}
|
|
168
|
+
}, contracts, now(), "running");
|
|
169
|
+
loop = pushEvent(loop, {
|
|
170
|
+
type: "verification.completed",
|
|
171
|
+
lifecycleState: "verifying",
|
|
172
|
+
payload: {
|
|
173
|
+
attemptId,
|
|
174
|
+
passed: result.verification.passed,
|
|
175
|
+
summary: result.verification.summary
|
|
176
|
+
}
|
|
177
|
+
}, contracts, now(), "verifying");
|
|
178
|
+
decision = inferExit({
|
|
179
|
+
loop,
|
|
180
|
+
lastResult: result,
|
|
181
|
+
lastFailure: failure,
|
|
182
|
+
costState: postAttemptCost
|
|
183
|
+
});
|
|
184
|
+
if (decision.shouldExit) {
|
|
185
|
+
loop = finalizeLoop(loop, decision, contracts, now(), lifecycleStatus(decision));
|
|
186
|
+
finalContext = distillContext(loop);
|
|
187
|
+
return {
|
|
188
|
+
loop,
|
|
189
|
+
decision,
|
|
190
|
+
finalContext
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
decision = {
|
|
195
|
+
shouldExit: true,
|
|
196
|
+
lifecycleState: "budget_exit",
|
|
197
|
+
reason: "The run exhausted its iteration budget."
|
|
198
|
+
};
|
|
199
|
+
loop = finalizeLoop(loop, decision, contracts, now(), "exited");
|
|
200
|
+
finalContext = distillContext(loop);
|
|
201
|
+
return {
|
|
202
|
+
loop,
|
|
203
|
+
decision,
|
|
204
|
+
finalContext
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
function applyResult(loop, attemptId, result, completedAt) {
|
|
208
|
+
return {
|
|
209
|
+
...loop,
|
|
210
|
+
attempts: loop.attempts.map((attempt) => attempt.attemptId === attemptId ? buildCompletedAttempt(attempt, result, completedAt) : attempt),
|
|
211
|
+
artifacts: [...loop.artifacts, ...(result.artifacts ?? [])],
|
|
212
|
+
cost: {
|
|
213
|
+
actualUsd: round(loop.cost.actualUsd + result.usage.actualUsd),
|
|
214
|
+
avoidedUsd: round(loop.cost.avoidedUsd + (result.usage.avoidedUsd ?? 0)),
|
|
215
|
+
tokensIn: loop.cost.tokensIn + result.usage.tokensIn,
|
|
216
|
+
tokensOut: loop.cost.tokensOut + result.usage.tokensOut
|
|
217
|
+
},
|
|
218
|
+
updatedAt: completedAt
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
function buildCompletedAttempt(attempt, result, completedAt) {
|
|
222
|
+
const nextAttempt = {
|
|
223
|
+
...attempt,
|
|
224
|
+
completedAt,
|
|
225
|
+
summary: result.summary
|
|
226
|
+
};
|
|
227
|
+
if (result.failure?.classHint) {
|
|
228
|
+
nextAttempt.failureClass = result.failure.classHint;
|
|
229
|
+
}
|
|
230
|
+
return nextAttempt;
|
|
231
|
+
}
|
|
232
|
+
function annotateAttempt(loop, attemptId, failure) {
|
|
233
|
+
return {
|
|
234
|
+
...loop,
|
|
235
|
+
attempts: loop.attempts.map((attempt) => {
|
|
236
|
+
if (attempt.attemptId !== attemptId) {
|
|
237
|
+
return attempt;
|
|
238
|
+
}
|
|
239
|
+
return {
|
|
240
|
+
...attempt,
|
|
241
|
+
failureClass: failure.failureClass,
|
|
242
|
+
intervention: failure.recommendedIntervention
|
|
243
|
+
};
|
|
244
|
+
})
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
function finalizeLoop(loop, decision, contracts, timestamp, status) {
|
|
248
|
+
return pushEvent({
|
|
249
|
+
...loop,
|
|
250
|
+
lifecycleState: decision.lifecycleState,
|
|
251
|
+
status,
|
|
252
|
+
updatedAt: timestamp
|
|
253
|
+
}, {
|
|
254
|
+
type: "run.completed",
|
|
255
|
+
lifecycleState: decision.lifecycleState,
|
|
256
|
+
payload: {
|
|
257
|
+
reason: decision.reason
|
|
258
|
+
}
|
|
259
|
+
}, contracts, timestamp, status);
|
|
260
|
+
}
|
|
261
|
+
function pushEvent(loop, event, contracts, timestamp, status) {
|
|
262
|
+
const next = appendLoopEvent(loop, {
|
|
263
|
+
...event,
|
|
264
|
+
timestamp
|
|
265
|
+
}, {
|
|
266
|
+
...contracts,
|
|
267
|
+
now: timestamp
|
|
268
|
+
});
|
|
269
|
+
return status ? { ...next, status, lifecycleState: event.lifecycleState ?? next.lifecycleState } : next;
|
|
270
|
+
}
|
|
271
|
+
function lifecycleStatus(decision) {
|
|
272
|
+
return decision.lifecycleState === "completed" ? "completed" : "exited";
|
|
273
|
+
}
|
|
274
|
+
function createContractOptions(idFactory) {
|
|
275
|
+
return idFactory ? { idFactory } : {};
|
|
276
|
+
}
|
|
277
|
+
function makeId(prefix, idFactory) {
|
|
278
|
+
if (idFactory) {
|
|
279
|
+
return idFactory(prefix);
|
|
280
|
+
}
|
|
281
|
+
const entropy = Math.random().toString(36).slice(2, 10);
|
|
282
|
+
return `${prefix}_${entropy}`;
|
|
283
|
+
}
|
|
284
|
+
function round(value) {
|
|
285
|
+
return Number(value.toFixed(4));
|
|
286
|
+
}
|
|
287
|
+
//# sourceMappingURL=run-martin.js.map
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CVE Patch Scanner — Phase 37.
|
|
3
|
+
*
|
|
4
|
+
* Parses a unified diff for newly added package dependencies and queries the
|
|
5
|
+
* OSV.dev API (https://api.osv.dev) to check for known CVEs. Blocks the
|
|
6
|
+
* attempt if any discovered package has severity HIGH or CRITICAL.
|
|
7
|
+
*
|
|
8
|
+
* Supported manifest formats:
|
|
9
|
+
* - package.json (npm/Node.js) — ecosystem: "npm"
|
|
10
|
+
* - requirements.txt (Python) — ecosystem: "PyPI"
|
|
11
|
+
* - Cargo.toml (Rust) — ecosystem: "crates.io"
|
|
12
|
+
* - go.mod (Go) — ecosystem: "Go"
|
|
13
|
+
*
|
|
14
|
+
* Design rules:
|
|
15
|
+
* - Advisory when OSV.dev is unreachable (never hard-fail on network error)
|
|
16
|
+
* - Only checks ADDED lines (+ prefix) — not removed packages
|
|
17
|
+
* - Deduplicates package names before querying
|
|
18
|
+
* - MAX_PACKAGES_PER_SCAN = 20 to bound latency
|
|
19
|
+
*/
|
|
20
|
+
export type CveSeverity = "LOW" | "MEDIUM" | "HIGH" | "CRITICAL" | "UNKNOWN";
|
|
21
|
+
export interface CveMatch {
|
|
22
|
+
packageName: string;
|
|
23
|
+
version?: string;
|
|
24
|
+
ecosystem: string;
|
|
25
|
+
vulnId: string;
|
|
26
|
+
summary: string;
|
|
27
|
+
severity: CveSeverity;
|
|
28
|
+
url: string;
|
|
29
|
+
}
|
|
30
|
+
export interface CveScanResult {
|
|
31
|
+
/** Newly added packages extracted from the diff. */
|
|
32
|
+
packageCandidates: PackageCandidate[];
|
|
33
|
+
/** CVEs found for any of the candidates. */
|
|
34
|
+
matches: CveMatch[];
|
|
35
|
+
/**
|
|
36
|
+
* True when any match has severity HIGH or CRITICAL.
|
|
37
|
+
* The caller should discard the attempt when this is true.
|
|
38
|
+
*/
|
|
39
|
+
blocked: boolean;
|
|
40
|
+
/** Human-readable block reason. Undefined when not blocked. */
|
|
41
|
+
blockReason?: string;
|
|
42
|
+
/** True when OSV.dev was unreachable — scan ran in advisory-only mode. */
|
|
43
|
+
networkError?: boolean;
|
|
44
|
+
}
|
|
45
|
+
export interface PackageCandidate {
|
|
46
|
+
name: string;
|
|
47
|
+
version?: string;
|
|
48
|
+
ecosystem: string;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Extract newly added package dependencies from a unified diff string.
|
|
52
|
+
* Only examines added lines (starting with +) to avoid flagging removals.
|
|
53
|
+
*/
|
|
54
|
+
export declare function extractPackageCandidates(diff: string): PackageCandidate[];
|
|
55
|
+
/**
|
|
56
|
+
* Scan a unified diff for new package dependencies and check them against
|
|
57
|
+
* the OSV.dev vulnerability database.
|
|
58
|
+
*
|
|
59
|
+
* Returns immediately (advisory mode) if OSV.dev is unreachable.
|
|
60
|
+
* Blocks the attempt if any package has severity HIGH or CRITICAL.
|
|
61
|
+
*/
|
|
62
|
+
export declare function scanDiffForCves(diff: string): Promise<CveScanResult>;
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CVE Patch Scanner — Phase 37.
|
|
3
|
+
*
|
|
4
|
+
* Parses a unified diff for newly added package dependencies and queries the
|
|
5
|
+
* OSV.dev API (https://api.osv.dev) to check for known CVEs. Blocks the
|
|
6
|
+
* attempt if any discovered package has severity HIGH or CRITICAL.
|
|
7
|
+
*
|
|
8
|
+
* Supported manifest formats:
|
|
9
|
+
* - package.json (npm/Node.js) — ecosystem: "npm"
|
|
10
|
+
* - requirements.txt (Python) — ecosystem: "PyPI"
|
|
11
|
+
* - Cargo.toml (Rust) — ecosystem: "crates.io"
|
|
12
|
+
* - go.mod (Go) — ecosystem: "Go"
|
|
13
|
+
*
|
|
14
|
+
* Design rules:
|
|
15
|
+
* - Advisory when OSV.dev is unreachable (never hard-fail on network error)
|
|
16
|
+
* - Only checks ADDED lines (+ prefix) — not removed packages
|
|
17
|
+
* - Deduplicates package names before querying
|
|
18
|
+
* - MAX_PACKAGES_PER_SCAN = 20 to bound latency
|
|
19
|
+
*/
|
|
20
|
+
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
21
|
+
const OSV_API = "https://api.osv.dev/v1/query";
|
|
22
|
+
const MAX_PACKAGES_PER_SCAN = 20;
|
|
23
|
+
const BLOCKING_SEVERITIES = ["HIGH", "CRITICAL"];
|
|
24
|
+
// ─── Diff parsing ─────────────────────────────────────────────────────────────
|
|
25
|
+
/**
|
|
26
|
+
* Extract newly added package dependencies from a unified diff string.
|
|
27
|
+
* Only examines added lines (starting with +) to avoid flagging removals.
|
|
28
|
+
*/
|
|
29
|
+
export function extractPackageCandidates(diff) {
|
|
30
|
+
const candidates = [];
|
|
31
|
+
const seen = new Set();
|
|
32
|
+
const addedLines = diff
|
|
33
|
+
.split("\n")
|
|
34
|
+
.filter(line => line.startsWith("+") && !line.startsWith("+++"));
|
|
35
|
+
for (const line of addedLines) {
|
|
36
|
+
const content = line.slice(1).trim();
|
|
37
|
+
// package.json dependency: "name": "^1.2.3"
|
|
38
|
+
const npmMatch = content.match(/^"([@\w][\w\-./@]*)"\s*:\s*"([^"]+)"/);
|
|
39
|
+
if (npmMatch && !content.includes("description") && !content.includes("license")) {
|
|
40
|
+
const name = npmMatch[1];
|
|
41
|
+
const raw = npmMatch[2];
|
|
42
|
+
const version = raw.replace(/^[\^~>=<]+/, "").split(" ")[0];
|
|
43
|
+
const key = `npm:${name}`;
|
|
44
|
+
if (!seen.has(key)) {
|
|
45
|
+
seen.add(key);
|
|
46
|
+
candidates.push({ name, version, ecosystem: "npm" });
|
|
47
|
+
}
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
// requirements.txt: package==1.2.3 or package>=1.0
|
|
51
|
+
const pypiMatch = content.match(/^([\w\-\.]+)\s*[>=<!~^]+\s*([\d.]+)/);
|
|
52
|
+
if (pypiMatch) {
|
|
53
|
+
const name = pypiMatch[1];
|
|
54
|
+
const version = pypiMatch[2];
|
|
55
|
+
const key = `PyPI:${name.toLowerCase()}`;
|
|
56
|
+
if (!seen.has(key)) {
|
|
57
|
+
seen.add(key);
|
|
58
|
+
candidates.push({ name, version, ecosystem: "PyPI" });
|
|
59
|
+
}
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
// Cargo.toml: name = "1.2.3" or name = { version = "1.2.3" }
|
|
63
|
+
const cargoMatch = content.match(/^([\w\-]+)\s*=\s*"([\d.]+)"/);
|
|
64
|
+
if (cargoMatch) {
|
|
65
|
+
const name = cargoMatch[1];
|
|
66
|
+
const version = cargoMatch[2];
|
|
67
|
+
const key = `crates.io:${name}`;
|
|
68
|
+
if (!seen.has(key)) {
|
|
69
|
+
seen.add(key);
|
|
70
|
+
candidates.push({ name, version, ecosystem: "crates.io" });
|
|
71
|
+
}
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
// go.mod: require package/path v1.2.3
|
|
75
|
+
const goMatch = content.match(/^(?:require\s+)?([\w.\-/]+)\s+v([\d.]+(?:-\w+)?)/);
|
|
76
|
+
if (goMatch) {
|
|
77
|
+
const name = goMatch[1];
|
|
78
|
+
const version = goMatch[2];
|
|
79
|
+
const key = `Go:${name}`;
|
|
80
|
+
if (!seen.has(key)) {
|
|
81
|
+
seen.add(key);
|
|
82
|
+
candidates.push({ name, version, ecosystem: "Go" });
|
|
83
|
+
}
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return candidates.slice(0, MAX_PACKAGES_PER_SCAN);
|
|
88
|
+
}
|
|
89
|
+
function resolveSeverity(vuln) {
|
|
90
|
+
// Try database_specific.severity first (most common)
|
|
91
|
+
const dbSev = vuln.database_specific?.severity?.toUpperCase();
|
|
92
|
+
if (dbSev === "CRITICAL")
|
|
93
|
+
return "CRITICAL";
|
|
94
|
+
if (dbSev === "HIGH")
|
|
95
|
+
return "HIGH";
|
|
96
|
+
if (dbSev === "MEDIUM")
|
|
97
|
+
return "MEDIUM";
|
|
98
|
+
if (dbSev === "LOW")
|
|
99
|
+
return "LOW";
|
|
100
|
+
// Fall back to CVSS score — OSV may return a numeric score string ("7.5") or a
|
|
101
|
+
// CVSS vector string ("CVSS:3.1/AV:N/..."). Vector strings are NOT numeric scores;
|
|
102
|
+
// skip them to avoid over-classification.
|
|
103
|
+
const cvss = vuln.severity?.find(s => s.type === "CVSS_V3")?.score ?? "";
|
|
104
|
+
if (cvss && !cvss.startsWith("CVSS:")) {
|
|
105
|
+
const baseScore = parseFloat(cvss);
|
|
106
|
+
if (!isNaN(baseScore) && baseScore >= 0 && baseScore <= 10) {
|
|
107
|
+
if (baseScore >= 9.0)
|
|
108
|
+
return "CRITICAL";
|
|
109
|
+
if (baseScore >= 7.0)
|
|
110
|
+
return "HIGH";
|
|
111
|
+
if (baseScore >= 4.0)
|
|
112
|
+
return "MEDIUM";
|
|
113
|
+
return "LOW";
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return "UNKNOWN";
|
|
117
|
+
}
|
|
118
|
+
async function queryOsv(candidate) {
|
|
119
|
+
const body = JSON.stringify({
|
|
120
|
+
version: candidate.version,
|
|
121
|
+
package: { name: candidate.name, ecosystem: candidate.ecosystem }
|
|
122
|
+
});
|
|
123
|
+
const res = await fetch(OSV_API, {
|
|
124
|
+
method: "POST",
|
|
125
|
+
headers: { "Content-Type": "application/json" },
|
|
126
|
+
body,
|
|
127
|
+
signal: AbortSignal.timeout(5_000)
|
|
128
|
+
});
|
|
129
|
+
if (!res.ok)
|
|
130
|
+
return [];
|
|
131
|
+
const data = (await res.json());
|
|
132
|
+
return (data.vulns ?? []).map(v => ({
|
|
133
|
+
packageName: candidate.name,
|
|
134
|
+
version: candidate.version,
|
|
135
|
+
ecosystem: candidate.ecosystem,
|
|
136
|
+
vulnId: v.id,
|
|
137
|
+
summary: v.summary ?? "No summary available",
|
|
138
|
+
severity: resolveSeverity(v),
|
|
139
|
+
url: `https://osv.dev/vulnerability/${v.id}`
|
|
140
|
+
}));
|
|
141
|
+
}
|
|
142
|
+
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
143
|
+
/**
|
|
144
|
+
* Scan a unified diff for new package dependencies and check them against
|
|
145
|
+
* the OSV.dev vulnerability database.
|
|
146
|
+
*
|
|
147
|
+
* Returns immediately (advisory mode) if OSV.dev is unreachable.
|
|
148
|
+
* Blocks the attempt if any package has severity HIGH or CRITICAL.
|
|
149
|
+
*/
|
|
150
|
+
export async function scanDiffForCves(diff) {
|
|
151
|
+
const packageCandidates = extractPackageCandidates(diff);
|
|
152
|
+
if (packageCandidates.length === 0) {
|
|
153
|
+
return { packageCandidates: [], matches: [], blocked: false };
|
|
154
|
+
}
|
|
155
|
+
let matches = [];
|
|
156
|
+
let networkError = false;
|
|
157
|
+
try {
|
|
158
|
+
const results = await Promise.all(packageCandidates.map(queryOsv));
|
|
159
|
+
matches = results.flat();
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
// OSV.dev unreachable — advisory mode, do not block
|
|
163
|
+
networkError = true;
|
|
164
|
+
return { packageCandidates, matches: [], blocked: false, networkError };
|
|
165
|
+
}
|
|
166
|
+
const blockingMatches = matches.filter(m => BLOCKING_SEVERITIES.includes(m.severity));
|
|
167
|
+
const blocked = blockingMatches.length > 0;
|
|
168
|
+
let blockReason;
|
|
169
|
+
if (blocked) {
|
|
170
|
+
const summary = blockingMatches
|
|
171
|
+
.slice(0, 3)
|
|
172
|
+
.map(m => `${m.packageName} (${m.vulnId} ${m.severity})`)
|
|
173
|
+
.join(", ");
|
|
174
|
+
blockReason = `CVE scan blocked: ${summary}${blockingMatches.length > 3 ? ` +${blockingMatches.length - 3} more` : ""}`;
|
|
175
|
+
}
|
|
176
|
+
return { packageCandidates, matches, blocked, blockReason };
|
|
177
|
+
}
|
|
178
|
+
//# sourceMappingURL=cve-scanner.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface CostBaseline {
|
|
2
|
+
samples: number;
|
|
3
|
+
p25CostUsd: number;
|
|
4
|
+
p50CostUsd: number;
|
|
5
|
+
lastUpdated: string;
|
|
6
|
+
}
|
|
7
|
+
export interface EfficiencyAnomaly {
|
|
8
|
+
trapId: "T14";
|
|
9
|
+
runId: string;
|
|
10
|
+
actualCostUsd: number;
|
|
11
|
+
p25BaselineCostUsd: number;
|
|
12
|
+
deviationPct: number;
|
|
13
|
+
/** T14 is always "logged" — never "block". Warn-only trap. */
|
|
14
|
+
action: "logged";
|
|
15
|
+
}
|
|
16
|
+
export interface CheckEfficiencyInput {
|
|
17
|
+
runId: string;
|
|
18
|
+
actualCostUsd: number;
|
|
19
|
+
baseline: CostBaseline;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* T14 — Efficiency Anomaly Detection
|
|
23
|
+
*
|
|
24
|
+
* Detection logic:
|
|
25
|
+
* - IF baseline.samples < 20 → DO NOT FIRE (cold-start guard)
|
|
26
|
+
* - IF actualCost < (baseline.p25 * 0.75) → fire T14 anomaly
|
|
27
|
+
* - T14 is ALWAYS warn-only (action: "logged") — never hard-rejects a run
|
|
28
|
+
*/
|
|
29
|
+
export declare function checkEfficiencyAnomaly(input: CheckEfficiencyInput): EfficiencyAnomaly | null;
|