martin-loop 0.1.5 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +32 -0
- package/LICENSE +21 -21
- package/README.md +307 -398
- package/demo/seeded-workspace/README.md +35 -35
- package/demo/seeded-workspace/TASKS.md +29 -29
- package/demo/seeded-workspace/martin.config.yaml +11 -11
- package/demo/seeded-workspace/package.json +8 -8
- package/demo/seeded-workspace/src/invoice-summary.js +11 -11
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -20
- package/dist/bin/martin-loop.js +0 -0
- package/dist/vendor/adapters/counter.d.ts +1 -0
- package/dist/vendor/adapters/counter.js +4 -0
- package/dist/vendor/adapters/git-baseline.d.ts +50 -0
- package/dist/vendor/adapters/git-baseline.js +233 -0
- package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
- package/dist/vendor/adapters/openrouter-adapter.js +302 -0
- package/dist/vendor/adapters/usage.d.ts +48 -0
- package/dist/vendor/adapters/usage.js +66 -0
- package/dist/vendor/cli/bin/exit.d.ts +12 -0
- package/dist/vendor/cli/bin/exit.js +28 -0
- package/dist/vendor/cli/commands/analyze.d.ts +5 -0
- package/dist/vendor/cli/commands/analyze.js +58 -0
- package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
- package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
- package/dist/vendor/cli/commands/audit.d.ts +8 -0
- package/dist/vendor/cli/commands/audit.js +199 -0
- package/dist/vendor/cli/commands/corpus.d.ts +5 -0
- package/dist/vendor/cli/commands/corpus.js +60 -0
- package/dist/vendor/cli/commands/doctor.d.ts +8 -0
- package/dist/vendor/cli/commands/doctor.js +219 -0
- package/dist/vendor/cli/commands/explain.d.ts +17 -0
- package/dist/vendor/cli/commands/explain.js +176 -0
- package/dist/vendor/cli/commands/export.d.ts +5 -0
- package/dist/vendor/cli/commands/export.js +60 -0
- package/dist/vendor/cli/commands/governance.d.ts +8 -0
- package/dist/vendor/cli/commands/governance.js +95 -0
- package/dist/vendor/cli/commands/improve.d.ts +18 -0
- package/dist/vendor/cli/commands/improve.js +396 -0
- package/dist/vendor/cli/commands/init.d.ts +8 -0
- package/dist/vendor/cli/commands/init.js +281 -0
- package/dist/vendor/cli/commands/migration.d.ts +8 -0
- package/dist/vendor/cli/commands/migration.js +67 -0
- package/dist/vendor/cli/commands/prior.d.ts +23 -0
- package/dist/vendor/cli/commands/prior.js +145 -0
- package/dist/vendor/cli/commands/resume.d.ts +21 -0
- package/dist/vendor/cli/commands/resume.js +73 -0
- package/dist/vendor/cli/commands/verify.d.ts +6 -0
- package/dist/vendor/cli/commands/verify.js +43 -0
- package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
- package/dist/vendor/cli/research/public-corpus.js +151 -0
- package/dist/vendor/cli/ui/error-card.d.ts +38 -0
- package/dist/vendor/cli/ui/error-card.js +103 -0
- package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
- package/dist/vendor/cli/ui/mission-brief.js +173 -0
- package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
- package/dist/vendor/cli/ui/summary-card.js +102 -0
- package/dist/vendor/contracts/audit.d.ts +46 -0
- package/dist/vendor/contracts/audit.js +360 -0
- package/dist/vendor/contracts/post-phase15.d.ts +240 -0
- package/dist/vendor/contracts/post-phase15.js +166 -0
- package/dist/vendor/core/agent/mandates.d.ts +46 -0
- package/dist/vendor/core/agent/mandates.js +178 -0
- package/dist/vendor/core/agent/receipts.d.ts +38 -0
- package/dist/vendor/core/agent/receipts.js +131 -0
- package/dist/vendor/core/agent/signing.d.ts +17 -0
- package/dist/vendor/core/agent/signing.js +91 -0
- package/dist/vendor/core/attestation/sign.d.ts +25 -0
- package/dist/vendor/core/attestation/sign.js +216 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
- package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
- package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
- package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
- package/dist/vendor/core/autonomy/envelope.js +27 -0
- package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
- package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
- package/dist/vendor/core/autonomy/resume.d.ts +15 -0
- package/dist/vendor/core/autonomy/resume.js +23 -0
- package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
- package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
- package/dist/vendor/core/context-distillation.d.ts +3 -0
- package/dist/vendor/core/context-distillation.js +44 -0
- package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
- package/dist/vendor/core/context-flow/compile-context.js +111 -0
- package/dist/vendor/core/context-flow/entities.d.ts +2 -0
- package/dist/vendor/core/context-flow/entities.js +44 -0
- package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
- package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
- package/dist/vendor/core/context-flow/index.d.ts +11 -0
- package/dist/vendor/core/context-flow/index.js +24 -0
- package/dist/vendor/core/context-flow/labels.d.ts +3 -0
- package/dist/vendor/core/context-flow/labels.js +17 -0
- package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
- package/dist/vendor/core/context-flow/normalizer.js +69 -0
- package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
- package/dist/vendor/core/context-flow/profiles.js +36 -0
- package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
- package/dist/vendor/core/context-flow/redaction.js +6 -0
- package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
- package/dist/vendor/core/context-flow/sensitivity.js +27 -0
- package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
- package/dist/vendor/core/context-flow/sync-preview.js +22 -0
- package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
- package/dist/vendor/core/context-flow/token-estimator.js +13 -0
- package/dist/vendor/core/context-flow/types.d.ts +91 -0
- package/dist/vendor/core/context-flow/types.js +2 -0
- package/dist/vendor/core/context-utility.d.ts +47 -0
- package/dist/vendor/core/context-utility.js +405 -0
- package/dist/vendor/core/cost/pipeline.d.ts +92 -0
- package/dist/vendor/core/cost/pipeline.js +141 -0
- package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
- package/dist/vendor/core/cost/tagged-cost.js +55 -0
- package/dist/vendor/core/cost-governor.d.ts +2 -0
- package/dist/vendor/core/cost-governor.js +50 -0
- package/dist/vendor/core/cve/cve-check.d.ts +80 -0
- package/dist/vendor/core/cve/cve-check.js +172 -0
- package/dist/vendor/core/digital-twin/index.d.ts +27 -0
- package/dist/vendor/core/digital-twin/index.js +90 -0
- package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
- package/dist/vendor/core/drift/drift-graph.js +100 -0
- package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
- package/dist/vendor/core/drift/objective-lock.js +88 -0
- package/dist/vendor/core/drift/scope.d.ts +46 -0
- package/dist/vendor/core/drift/scope.js +102 -0
- package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
- package/dist/vendor/core/drift/signature-lock.js +202 -0
- package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
- package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
- package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
- package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
- package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
- package/dist/vendor/core/evidence/claim-audit.js +89 -0
- package/dist/vendor/core/exit-intelligence.d.ts +2 -0
- package/dist/vendor/core/exit-intelligence.js +58 -0
- package/dist/vendor/core/explain/formatter.d.ts +42 -0
- package/dist/vendor/core/explain/formatter.js +171 -0
- package/dist/vendor/core/explain/timeline.d.ts +29 -0
- package/dist/vendor/core/explain/timeline.js +213 -0
- package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
- package/dist/vendor/core/failure-taxonomy.js +76 -0
- package/dist/vendor/core/gateway/index.d.ts +10 -0
- package/dist/vendor/core/gateway/index.js +12 -0
- package/dist/vendor/core/gateway/registry.d.ts +40 -0
- package/dist/vendor/core/gateway/registry.js +97 -0
- package/dist/vendor/core/gateway/transport.d.ts +31 -0
- package/dist/vendor/core/gateway/transport.js +82 -0
- package/dist/vendor/core/gateway/vault.d.ts +19 -0
- package/dist/vendor/core/gateway/vault.js +29 -0
- package/dist/vendor/core/graph/adapters.d.ts +43 -0
- package/dist/vendor/core/graph/adapters.js +91 -0
- package/dist/vendor/core/graph/hotspots.d.ts +22 -0
- package/dist/vendor/core/graph/hotspots.js +30 -0
- package/dist/vendor/core/graph/index.d.ts +1 -0
- package/dist/vendor/core/graph/index.js +2 -0
- package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
- package/dist/vendor/core/honey/honey-tokens.js +44 -0
- package/dist/vendor/core/index.d.ts +2 -2
- package/dist/vendor/core/index.js +38 -12
- package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
- package/dist/vendor/core/learning/bayesian-update.js +60 -0
- package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
- package/dist/vendor/core/learning/prior-sets.js +111 -0
- package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
- package/dist/vendor/core/learning/promotion-gate.js +23 -0
- package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
- package/dist/vendor/core/leash/blast-radius.js +156 -0
- package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
- package/dist/vendor/core/leash/policy-leash.js +117 -0
- package/dist/vendor/core/memo/memo.d.ts +63 -0
- package/dist/vendor/core/memo/memo.js +97 -0
- package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
- package/dist/vendor/core/memory/learning-pipeline.js +391 -0
- package/dist/vendor/core/memory/palace.d.ts +84 -0
- package/dist/vendor/core/memory/palace.js +379 -0
- package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
- package/dist/vendor/core/merge/ast-merge.js +350 -0
- package/dist/vendor/core/merge/text-merge.d.ts +12 -0
- package/dist/vendor/core/merge/text-merge.js +182 -0
- package/dist/vendor/core/otel/tracer.d.ts +45 -0
- package/dist/vendor/core/otel/tracer.js +116 -0
- package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
- package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
- package/dist/vendor/core/parallel/scorer.d.ts +24 -0
- package/dist/vendor/core/parallel/scorer.js +65 -0
- package/dist/vendor/core/pattern-detection.d.ts +64 -0
- package/dist/vendor/core/pattern-detection.js +108 -0
- package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
- package/dist/vendor/core/persistence/checkpoint.js +156 -0
- package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
- package/dist/vendor/core/persistence/cleanup.js +131 -0
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
- package/dist/vendor/core/policy/file-touch-quota.js +105 -0
- package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
- package/dist/vendor/core/policy/policy-loader.js +170 -0
- package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
- package/dist/vendor/core/policy/policy-schema.js +78 -0
- package/dist/vendor/core/probe/probe.d.ts +49 -0
- package/dist/vendor/core/probe/probe.js +115 -0
- package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
- package/dist/vendor/core/proof/patch-proof.js +84 -0
- package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
- package/dist/vendor/core/proof/semantic-probe.js +82 -0
- package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
- package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +141 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
- package/dist/vendor/core/replay/replay.d.ts +85 -0
- package/dist/vendor/core/replay/replay.js +109 -0
- package/dist/vendor/core/router/engine.d.ts +54 -0
- package/dist/vendor/core/router/engine.js +131 -0
- package/dist/vendor/core/router/index.d.ts +1 -0
- package/dist/vendor/core/router/index.js +2 -0
- package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
- package/dist/vendor/core/router/trust-calibration.js +127 -0
- package/dist/vendor/core/run-martin.d.ts +2 -0
- package/dist/vendor/core/run-martin.js +287 -0
- package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
- package/dist/vendor/core/security/cve-scanner.js +178 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
- package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
- package/dist/vendor/core/sentinel/progress-guard.js +46 -0
- package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
- package/dist/vendor/core/siem/siem-emitter.js +157 -0
- package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
- package/dist/vendor/core/strategy/attempt-brief.js +89 -0
- package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
- package/dist/vendor/core/summarize/diff-summary.js +204 -0
- package/dist/vendor/core/surface-signals.d.ts +21 -0
- package/dist/vendor/core/surface-signals.js +139 -0
- package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
- package/dist/vendor/core/truth/truth-wall.js +69 -0
- package/dist/vendor/core/truth-spine.d.ts +26 -0
- package/dist/vendor/core/truth-spine.js +62 -0
- package/dist/vendor/core/types.d.ts +115 -0
- package/dist/vendor/core/types.js +2 -0
- package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
- package/dist/vendor/core/verification/tiered-verify.js +29 -0
- package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
- package/dist/vendor/core/verifier-pyramid.js +111 -0
- package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
- package/dist/vendor/core/workflow-artifacts.js +668 -0
- package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
- package/dist/vendor/core/wrap/supervised-run.js +178 -0
- package/docs/assets/cli-animated.svg +139 -0
- package/docs/assets/cli-static.svg +34 -0
- package/docs/assets/github-hero-v2.svg +23 -0
- package/docs/assets/martin-raplph.png.jpg +0 -0
- package/docs/assets/martinloop-logo.png +0 -0
- package/docs/assets/nvidia-inception-program-light.png +0 -0
- package/docs/assets/nvidia-inception-program.png +0 -0
- package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
- package/docs/assets/side-by-side.svg +134 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -142
- package/docs/oss/EXAMPLES.md +134 -134
- package/docs/oss/OSS-BOUNDARY-REPORT.json +1 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.md +1 -1
- package/docs/oss/QUICKSTART.md +170 -165
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -113
- package/docs/oss/README.md +96 -96
- package/docs/oss/RELEASE-SURFACE-REPORT.json +2 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +2 -1
- package/package.json +130 -58
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +0 -89
- package/docs/distribution/INTEGRATION-OUTREACH.md +0 -61
- package/docs/distribution/UNDER-3-CHALLENGE.md +0 -65
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* learning-pipeline.ts
|
|
3
|
+
*
|
|
4
|
+
* Guarded promotion pipeline for self-improving memory heuristics.
|
|
5
|
+
*
|
|
6
|
+
* Flow: candidate → shadow test → holdout validation → approval gate → promoted
|
|
7
|
+
*
|
|
8
|
+
* A learning candidate NEVER becomes an active heuristic without passing:
|
|
9
|
+
* 1. Shadow tests (runs in parallel with existing logic, results compared)
|
|
10
|
+
* 2. Holdout validation (tested on a held-out set, not the training data)
|
|
11
|
+
* 3. Approval gate (auto-approved if confidence >= threshold, else requires manual sign-off)
|
|
12
|
+
*
|
|
13
|
+
* Fail-open: any pipeline error keeps the existing heuristic active untouched.
|
|
14
|
+
*/
|
|
15
|
+
import { appendFile, mkdir, readdir, readFile, writeFile } from "node:fs/promises";
|
|
16
|
+
import { join } from "node:path";
|
|
17
|
+
export const DEFAULT_PROTECTED_LEARNING_PATHS = [
|
|
18
|
+
"packages/core/src/leash.ts",
|
|
19
|
+
"packages/core/src/leash/**",
|
|
20
|
+
"packages/core/src/policy.ts",
|
|
21
|
+
"packages/core/src/grounding.ts",
|
|
22
|
+
"packages/core/src/security/**",
|
|
23
|
+
"packages/core/src/attestation/**",
|
|
24
|
+
"packages/contracts/src/**",
|
|
25
|
+
"packages/mcp/src/**",
|
|
26
|
+
"packages/cli/src/index.ts",
|
|
27
|
+
"docs/release/**",
|
|
28
|
+
"docs/security/**",
|
|
29
|
+
"docs/handoffs/**",
|
|
30
|
+
".planning/**",
|
|
31
|
+
"scripts/**"
|
|
32
|
+
];
|
|
33
|
+
export const DEFAULT_ACTIVE_LEARNING_HEURISTICS = {
|
|
34
|
+
schemaVersion: "1.0",
|
|
35
|
+
updatedAt: new Date(0).toISOString(),
|
|
36
|
+
recallScoring: {
|
|
37
|
+
failureClassWeight: 30,
|
|
38
|
+
interventionWeight: 14,
|
|
39
|
+
tokenOverlapWeight: 6,
|
|
40
|
+
completionWeight: 4
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
// ─── Shadow test harness ──────────────────────────────────────────────────────
|
|
44
|
+
/**
|
|
45
|
+
* Runs the candidate heuristic in shadow mode alongside the existing logic.
|
|
46
|
+
* The shadow result is compared but never used to change actual behaviour.
|
|
47
|
+
*
|
|
48
|
+
* `existingScores` and `candidateScores` are parallel arrays — one per recall query.
|
|
49
|
+
* A case is "passed" when the candidate score >= existing score (does not regress).
|
|
50
|
+
*/
|
|
51
|
+
export function runShadowTest(candidateId, existingScores, candidateScores) {
|
|
52
|
+
if (existingScores.length === 0) {
|
|
53
|
+
return { candidateId, totalCases: 0, passedCases: 0, passRate: 1, passed: true };
|
|
54
|
+
}
|
|
55
|
+
const totalCases = existingScores.length;
|
|
56
|
+
let passedCases = 0;
|
|
57
|
+
for (let i = 0; i < totalCases; i++) {
|
|
58
|
+
const existing = existingScores[i] ?? 0;
|
|
59
|
+
const candidate = candidateScores[i] ?? 0;
|
|
60
|
+
// Pass when candidate is at least as good as existing (within 5% tolerance)
|
|
61
|
+
if (candidate >= existing * 0.95) {
|
|
62
|
+
passedCases++;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const passRate = passedCases / totalCases;
|
|
66
|
+
return { candidateId, totalCases, passedCases, passRate, passed: passRate >= 0.8 };
|
|
67
|
+
}
|
|
68
|
+
// ─── Holdout validation ───────────────────────────────────────────────────────
|
|
69
|
+
/**
|
|
70
|
+
* Validates the candidate against a held-out set (data not used to build the candidate).
|
|
71
|
+
* The holdout set is scored by both the existing and candidate heuristic;
|
|
72
|
+
* the candidate passes if it meets or exceeds the existing on >= 80% of cases.
|
|
73
|
+
*/
|
|
74
|
+
export function runHoldoutValidation(candidateId, holdoutExistingScores, holdoutCandidateScores) {
|
|
75
|
+
if (holdoutExistingScores.length === 0) {
|
|
76
|
+
return {
|
|
77
|
+
candidateId,
|
|
78
|
+
holdoutSize: 0,
|
|
79
|
+
passedCases: 0,
|
|
80
|
+
passRate: 1,
|
|
81
|
+
passed: true
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
const holdoutSize = holdoutExistingScores.length;
|
|
85
|
+
let passedCases = 0;
|
|
86
|
+
for (let i = 0; i < holdoutSize; i++) {
|
|
87
|
+
const existing = holdoutExistingScores[i] ?? 0;
|
|
88
|
+
const candidate = holdoutCandidateScores[i] ?? 0;
|
|
89
|
+
if (candidate >= existing * 0.95) {
|
|
90
|
+
passedCases++;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
const passRate = passedCases / holdoutSize;
|
|
94
|
+
return { candidateId, holdoutSize, passedCases, passRate, passed: passRate >= 0.8 };
|
|
95
|
+
}
|
|
96
|
+
// ─── Approval gate ────────────────────────────────────────────────────────────
|
|
97
|
+
/**
|
|
98
|
+
* Determines whether a candidate can be promoted based on holdout results and
|
|
99
|
+
* the configured approval policy.
|
|
100
|
+
*
|
|
101
|
+
* Auto-approves when:
|
|
102
|
+
* - holdout passed AND holdoutPassRate >= autoApproveThreshold
|
|
103
|
+
* - AND requireManualApproval is false
|
|
104
|
+
*
|
|
105
|
+
* In all other cases, the candidate is held at "holdout_passed" status and
|
|
106
|
+
* awaits manual sign-off (future: webhook / dashboard approval flow).
|
|
107
|
+
*/
|
|
108
|
+
export function evaluateApprovalGate(holdout, config = { autoApproveThreshold: 0.85, requireManualApproval: false }) {
|
|
109
|
+
if (!holdout.passed) {
|
|
110
|
+
return {
|
|
111
|
+
approved: false,
|
|
112
|
+
reason: `Holdout failed: ${(holdout.passRate * 100).toFixed(1)}% pass rate (threshold 80%).`
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
if (config.requireManualApproval) {
|
|
116
|
+
return {
|
|
117
|
+
approved: false,
|
|
118
|
+
reason: "Manual approval required by policy. Candidate held at holdout_passed."
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
if (holdout.passRate >= config.autoApproveThreshold) {
|
|
122
|
+
return {
|
|
123
|
+
approved: true,
|
|
124
|
+
reason: `Auto-approved: ${(holdout.passRate * 100).toFixed(1)}% holdout pass rate >= ${(config.autoApproveThreshold * 100).toFixed(0)}% threshold.`
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
return {
|
|
128
|
+
approved: false,
|
|
129
|
+
reason: `Pass rate ${(holdout.passRate * 100).toFixed(1)}% below auto-approve threshold of ${(config.autoApproveThreshold * 100).toFixed(0)}%. Manual review required.`
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
// ─── Persistence ──────────────────────────────────────────────────────────────
|
|
133
|
+
export async function persistCandidate(candidate, pipelineRoot) {
|
|
134
|
+
const dir = join(pipelineRoot, "learning-pipeline");
|
|
135
|
+
await mkdir(dir, { recursive: true });
|
|
136
|
+
const candidatePath = join(dir, `${candidate.candidateId}.json`);
|
|
137
|
+
await writeFile(candidatePath, JSON.stringify(candidate, null, 2), "utf8");
|
|
138
|
+
// Append to audit log
|
|
139
|
+
const auditEntry = JSON.stringify({
|
|
140
|
+
candidateId: candidate.candidateId,
|
|
141
|
+
status: candidate.status,
|
|
142
|
+
timestamp: new Date().toISOString()
|
|
143
|
+
});
|
|
144
|
+
await appendFile(join(dir, "pipeline-audit.jsonl"), `${auditEntry}\n`, "utf8");
|
|
145
|
+
}
|
|
146
|
+
export async function loadCandidate(candidateId, pipelineRoot) {
|
|
147
|
+
try {
|
|
148
|
+
const raw = await readFile(join(pipelineRoot, "learning-pipeline", `${candidateId}.json`), "utf8");
|
|
149
|
+
return JSON.parse(raw);
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
export async function loadActiveLearningHeuristics(pipelineRoot) {
|
|
156
|
+
try {
|
|
157
|
+
const raw = await readFile(resolveActiveHeuristicsPath(pipelineRoot), "utf8");
|
|
158
|
+
const parsed = JSON.parse(raw);
|
|
159
|
+
const recall = isRecord(parsed.recallScoring)
|
|
160
|
+
? parsed.recallScoring
|
|
161
|
+
: {};
|
|
162
|
+
return {
|
|
163
|
+
schemaVersion: "1.0",
|
|
164
|
+
updatedAt: typeof parsed.updatedAt === "string"
|
|
165
|
+
? parsed.updatedAt
|
|
166
|
+
: DEFAULT_ACTIVE_LEARNING_HEURISTICS.updatedAt,
|
|
167
|
+
recallScoring: {
|
|
168
|
+
failureClassWeight: numberOrDefault(recall["failureClassWeight"], DEFAULT_ACTIVE_LEARNING_HEURISTICS.recallScoring.failureClassWeight),
|
|
169
|
+
interventionWeight: numberOrDefault(recall["interventionWeight"], DEFAULT_ACTIVE_LEARNING_HEURISTICS.recallScoring.interventionWeight),
|
|
170
|
+
tokenOverlapWeight: numberOrDefault(recall["tokenOverlapWeight"], DEFAULT_ACTIVE_LEARNING_HEURISTICS.recallScoring.tokenOverlapWeight),
|
|
171
|
+
completionWeight: numberOrDefault(recall["completionWeight"], DEFAULT_ACTIVE_LEARNING_HEURISTICS.recallScoring.completionWeight)
|
|
172
|
+
}
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return DEFAULT_ACTIVE_LEARNING_HEURISTICS;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
export async function summarizeLearningPipeline(pipelineRoot) {
|
|
180
|
+
const dir = join(pipelineRoot, "learning-pipeline");
|
|
181
|
+
let names = [];
|
|
182
|
+
try {
|
|
183
|
+
names = await readdir(dir);
|
|
184
|
+
}
|
|
185
|
+
catch {
|
|
186
|
+
return {
|
|
187
|
+
counts: emptySummaryCounts(),
|
|
188
|
+
protectedPaths: [...DEFAULT_PROTECTED_LEARNING_PATHS],
|
|
189
|
+
entries: []
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
const candidateFiles = names.filter((name) => name.endsWith(".json") && name !== "active-heuristics.json");
|
|
193
|
+
const entries = [];
|
|
194
|
+
const counts = emptySummaryCounts();
|
|
195
|
+
for (const name of candidateFiles) {
|
|
196
|
+
try {
|
|
197
|
+
const raw = await readFile(join(dir, name), "utf8");
|
|
198
|
+
const candidate = JSON.parse(raw);
|
|
199
|
+
incrementSummaryCount(counts, candidate.status);
|
|
200
|
+
entries.push({
|
|
201
|
+
candidateId: candidate.candidateId,
|
|
202
|
+
heuristicFamily: candidate.heuristicFamily,
|
|
203
|
+
status: candidate.status,
|
|
204
|
+
sourceRunId: candidate.sourceRunId,
|
|
205
|
+
createdAt: candidate.createdAt,
|
|
206
|
+
confidenceScore: candidate.provenance?.confidenceScore,
|
|
207
|
+
patternId: candidate.provenance?.patternId,
|
|
208
|
+
summary: candidate.description
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return {
|
|
216
|
+
counts,
|
|
217
|
+
protectedPaths: [...DEFAULT_PROTECTED_LEARNING_PATHS],
|
|
218
|
+
entries: entries.sort((left, right) => right.createdAt.localeCompare(left.createdAt))
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* Runs the full guarded promotion pipeline:
|
|
223
|
+
* shadow → holdout → approval gate → persist result.
|
|
224
|
+
*
|
|
225
|
+
* Returns whether the candidate was promoted to "approved" status.
|
|
226
|
+
* Never throws — any error is caught and candidate is marked rejected.
|
|
227
|
+
*/
|
|
228
|
+
export async function runPromotionPipeline(input) {
|
|
229
|
+
const { candidate } = input;
|
|
230
|
+
try {
|
|
231
|
+
const scopeVerdict = validateCandidateScope(candidate);
|
|
232
|
+
if (!scopeVerdict.allowed) {
|
|
233
|
+
candidate.status = "blocked_protected_surface";
|
|
234
|
+
candidate.rejectedAt = new Date().toISOString();
|
|
235
|
+
candidate.rejectedReason = scopeVerdict.reason;
|
|
236
|
+
await persistCandidate(candidate, input.pipelineRoot);
|
|
237
|
+
return {
|
|
238
|
+
candidateId: candidate.candidateId,
|
|
239
|
+
promoted: false,
|
|
240
|
+
reason: scopeVerdict.reason
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
// Phase 1: Shadow test
|
|
244
|
+
const shadow = runShadowTest(candidate.candidateId, input.existingScores, input.candidateScores);
|
|
245
|
+
if (!shadow.passed) {
|
|
246
|
+
candidate.status = "shadow_failed";
|
|
247
|
+
candidate.shadowPassRate = shadow.passRate;
|
|
248
|
+
candidate.rejectedAt = new Date().toISOString();
|
|
249
|
+
candidate.rejectedReason = `Shadow test failed: ${(shadow.passRate * 100).toFixed(1)}% pass rate.`;
|
|
250
|
+
await persistCandidate(candidate, input.pipelineRoot);
|
|
251
|
+
return { candidateId: candidate.candidateId, promoted: false, reason: candidate.rejectedReason };
|
|
252
|
+
}
|
|
253
|
+
candidate.status = "shadow_passed";
|
|
254
|
+
candidate.shadowPassRate = shadow.passRate;
|
|
255
|
+
// Phase 2: Holdout validation
|
|
256
|
+
const holdout = runHoldoutValidation(candidate.candidateId, input.holdoutExistingScores, input.holdoutCandidateScores);
|
|
257
|
+
if (!holdout.passed) {
|
|
258
|
+
candidate.status = "holdout_failed";
|
|
259
|
+
candidate.holdoutPassRate = holdout.passRate;
|
|
260
|
+
candidate.rejectedAt = new Date().toISOString();
|
|
261
|
+
candidate.rejectedReason = `Holdout validation failed: ${(holdout.passRate * 100).toFixed(1)}% pass rate.`;
|
|
262
|
+
await persistCandidate(candidate, input.pipelineRoot);
|
|
263
|
+
return { candidateId: candidate.candidateId, promoted: false, reason: candidate.rejectedReason };
|
|
264
|
+
}
|
|
265
|
+
candidate.status = "holdout_passed";
|
|
266
|
+
candidate.holdoutPassRate = holdout.passRate;
|
|
267
|
+
// Phase 3: Approval gate
|
|
268
|
+
const gate = evaluateApprovalGate(holdout, input.approvalConfig);
|
|
269
|
+
if (gate.approved) {
|
|
270
|
+
candidate.approvedAt = new Date().toISOString();
|
|
271
|
+
candidate.status = "approved";
|
|
272
|
+
await applyCandidatePromotion(candidate, input.pipelineRoot);
|
|
273
|
+
candidate.status = "promoted";
|
|
274
|
+
await persistCandidate(candidate, input.pipelineRoot);
|
|
275
|
+
return { candidateId: candidate.candidateId, promoted: true, reason: gate.reason };
|
|
276
|
+
}
|
|
277
|
+
// Held for manual review — not rejected, not approved
|
|
278
|
+
candidate.status = "manual_review";
|
|
279
|
+
await persistCandidate(candidate, input.pipelineRoot);
|
|
280
|
+
return { candidateId: candidate.candidateId, promoted: false, reason: gate.reason };
|
|
281
|
+
}
|
|
282
|
+
catch (err) {
|
|
283
|
+
// Fail-open: never let pipeline errors affect the running system
|
|
284
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
285
|
+
return {
|
|
286
|
+
candidateId: candidate.candidateId,
|
|
287
|
+
promoted: false,
|
|
288
|
+
reason: `Pipeline error (fail-open): ${message}`
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
function emptySummaryCounts() {
|
|
293
|
+
return {
|
|
294
|
+
proposed: 0,
|
|
295
|
+
shadowPassed: 0,
|
|
296
|
+
holdoutPassed: 0,
|
|
297
|
+
approved: 0,
|
|
298
|
+
promoted: 0,
|
|
299
|
+
rejected: 0,
|
|
300
|
+
blockedProtectedSurface: 0,
|
|
301
|
+
rolledBack: 0
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
function incrementSummaryCount(counts, status) {
|
|
305
|
+
switch (status) {
|
|
306
|
+
case "pending_shadow":
|
|
307
|
+
counts.proposed++;
|
|
308
|
+
break;
|
|
309
|
+
case "shadow_passed":
|
|
310
|
+
counts.shadowPassed++;
|
|
311
|
+
break;
|
|
312
|
+
case "holdout_passed":
|
|
313
|
+
counts.holdoutPassed++;
|
|
314
|
+
break;
|
|
315
|
+
case "approved":
|
|
316
|
+
counts.approved++;
|
|
317
|
+
break;
|
|
318
|
+
case "promoted":
|
|
319
|
+
counts.promoted++;
|
|
320
|
+
break;
|
|
321
|
+
case "blocked_protected_surface":
|
|
322
|
+
counts.blockedProtectedSurface++;
|
|
323
|
+
break;
|
|
324
|
+
case "rolled_back":
|
|
325
|
+
counts.rolledBack++;
|
|
326
|
+
break;
|
|
327
|
+
case "shadow_failed":
|
|
328
|
+
case "holdout_failed":
|
|
329
|
+
case "rejected":
|
|
330
|
+
case "manual_review":
|
|
331
|
+
counts.rejected++;
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
function validateCandidateScope(candidate) {
|
|
336
|
+
const proposedPaths = candidate.proposedPaths ?? [];
|
|
337
|
+
if (proposedPaths.length === 0) {
|
|
338
|
+
return { allowed: true, reason: "No protected surfaces targeted." };
|
|
339
|
+
}
|
|
340
|
+
const blockedPath = proposedPaths.find((path) => DEFAULT_PROTECTED_LEARNING_PATHS.some((protectedPath) => matchesProtectedPath(path, protectedPath)));
|
|
341
|
+
if (!blockedPath) {
|
|
342
|
+
return { allowed: true, reason: "Candidate scope limited to allowed surfaces." };
|
|
343
|
+
}
|
|
344
|
+
return {
|
|
345
|
+
allowed: false,
|
|
346
|
+
reason: `Candidate targets protected surface: ${blockedPath}`
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
function matchesProtectedPath(candidatePath, protectedPath) {
|
|
350
|
+
const normalizedCandidate = normalizePath(candidatePath);
|
|
351
|
+
const normalizedProtected = normalizePath(protectedPath);
|
|
352
|
+
if (normalizedProtected.endsWith("/**")) {
|
|
353
|
+
const prefix = normalizedProtected.slice(0, -3);
|
|
354
|
+
return normalizedCandidate === prefix || normalizedCandidate.startsWith(`${prefix}/`);
|
|
355
|
+
}
|
|
356
|
+
return normalizedCandidate === normalizedProtected;
|
|
357
|
+
}
|
|
358
|
+
function normalizePath(value) {
|
|
359
|
+
return value.replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+$/u, "");
|
|
360
|
+
}
|
|
361
|
+
async function applyCandidatePromotion(candidate, pipelineRoot) {
|
|
362
|
+
if (candidate.heuristicFamily !== "recall_scoring") {
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
const current = await loadActiveLearningHeuristics(pipelineRoot);
|
|
366
|
+
const weights = isRecord(candidate.payload["weights"])
|
|
367
|
+
? candidate.payload["weights"]
|
|
368
|
+
: {};
|
|
369
|
+
const next = {
|
|
370
|
+
schemaVersion: "1.0",
|
|
371
|
+
updatedAt: new Date().toISOString(),
|
|
372
|
+
recallScoring: {
|
|
373
|
+
failureClassWeight: numberOrDefault(weights["failureClassWeight"], current.recallScoring.failureClassWeight),
|
|
374
|
+
interventionWeight: numberOrDefault(weights["interventionWeight"], current.recallScoring.interventionWeight),
|
|
375
|
+
tokenOverlapWeight: numberOrDefault(weights["tokenOverlapWeight"], current.recallScoring.tokenOverlapWeight),
|
|
376
|
+
completionWeight: numberOrDefault(weights["completionWeight"], current.recallScoring.completionWeight)
|
|
377
|
+
}
|
|
378
|
+
};
|
|
379
|
+
await mkdir(join(pipelineRoot, "learning-pipeline"), { recursive: true });
|
|
380
|
+
await writeFile(resolveActiveHeuristicsPath(pipelineRoot), JSON.stringify(next, null, 2), "utf8");
|
|
381
|
+
}
|
|
382
|
+
function resolveActiveHeuristicsPath(pipelineRoot) {
|
|
383
|
+
return join(pipelineRoot, "learning-pipeline", "active-heuristics.json");
|
|
384
|
+
}
|
|
385
|
+
function isRecord(value) {
|
|
386
|
+
return typeof value === "object" && value !== null;
|
|
387
|
+
}
|
|
388
|
+
function numberOrDefault(value, fallback) {
|
|
389
|
+
return typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
|
390
|
+
}
|
|
391
|
+
//# sourceMappingURL=learning-pipeline.js.map
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import type { FailureClass, InterventionType, LoopRecord, LoopStatus, LoopLifecycleState } from "../../contracts/index.js";
|
|
2
|
+
export interface MemoryRecallEntry {
|
|
3
|
+
drawerId: string;
|
|
4
|
+
roomKey: string;
|
|
5
|
+
lesson: string;
|
|
6
|
+
content: string;
|
|
7
|
+
sourceRunId: string;
|
|
8
|
+
sourceAttemptIndex?: number;
|
|
9
|
+
score: number;
|
|
10
|
+
reason: string;
|
|
11
|
+
}
|
|
12
|
+
export interface MemoryBrief {
|
|
13
|
+
wingKey: string;
|
|
14
|
+
recalledAt: string;
|
|
15
|
+
entries: MemoryRecallEntry[];
|
|
16
|
+
}
|
|
17
|
+
export interface LoopMemoryStore {
|
|
18
|
+
recall(input: {
|
|
19
|
+
workspaceId: string;
|
|
20
|
+
projectId: string;
|
|
21
|
+
objective: string;
|
|
22
|
+
focus: string;
|
|
23
|
+
previousAttempts: Array<{
|
|
24
|
+
failureClass?: FailureClass;
|
|
25
|
+
intervention?: InterventionType;
|
|
26
|
+
summary?: string;
|
|
27
|
+
}>;
|
|
28
|
+
limit?: number;
|
|
29
|
+
currentRunId?: string;
|
|
30
|
+
currentAttemptIndex?: number;
|
|
31
|
+
}): Promise<MemoryBrief | undefined>;
|
|
32
|
+
recordLoop(input: {
|
|
33
|
+
loop: LoopRecord;
|
|
34
|
+
status: LoopStatus;
|
|
35
|
+
lifecycleState: LoopLifecycleState;
|
|
36
|
+
reason?: string;
|
|
37
|
+
}): Promise<{
|
|
38
|
+
roomCount: number;
|
|
39
|
+
drawerCount: number;
|
|
40
|
+
}>;
|
|
41
|
+
recordPriorLearning(input: Record<string, unknown>): Promise<void>;
|
|
42
|
+
}
|
|
43
|
+
export interface PalaceMemoryRecall {
|
|
44
|
+
recallId: string;
|
|
45
|
+
wingKey: string;
|
|
46
|
+
roomKey: string;
|
|
47
|
+
drawerId: string;
|
|
48
|
+
runId: string;
|
|
49
|
+
attemptIndex: number;
|
|
50
|
+
recallScore: number;
|
|
51
|
+
recallReason: string;
|
|
52
|
+
recalledAt: string;
|
|
53
|
+
}
|
|
54
|
+
export interface PalaceMemoryRoom {
|
|
55
|
+
wingKey: string;
|
|
56
|
+
roomKey: string;
|
|
57
|
+
roomTitle: string;
|
|
58
|
+
roomType: "failure" | "outcome";
|
|
59
|
+
drawerCount: number;
|
|
60
|
+
lastRunId: string;
|
|
61
|
+
updatedAt: string;
|
|
62
|
+
}
|
|
63
|
+
export interface PalaceMemoryDrawer {
|
|
64
|
+
drawerId: string;
|
|
65
|
+
wingKey: string;
|
|
66
|
+
roomKey: string;
|
|
67
|
+
roomTitle: string;
|
|
68
|
+
workspaceId: string;
|
|
69
|
+
projectId: string;
|
|
70
|
+
sourceRunId: string;
|
|
71
|
+
sourceAttemptIndex?: number;
|
|
72
|
+
outcome: "failure" | "completion" | "exit";
|
|
73
|
+
failureClass?: FailureClass;
|
|
74
|
+
intervention?: InterventionType;
|
|
75
|
+
tags: string[];
|
|
76
|
+
tokens: string[];
|
|
77
|
+
lesson: string;
|
|
78
|
+
content: string;
|
|
79
|
+
createdAt: string;
|
|
80
|
+
}
|
|
81
|
+
export declare function resolveMemoryRoot(env?: NodeJS.ProcessEnv): string;
|
|
82
|
+
export declare function createFileLoopMemoryStore(options?: {
|
|
83
|
+
memoryRoot?: string;
|
|
84
|
+
}): LoopMemoryStore;
|