martin-loop 0.1.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +32 -0
- package/README.md +172 -227
- package/demo/seeded-workspace/README.md +35 -0
- package/demo/seeded-workspace/TASKS.md +29 -0
- package/demo/seeded-workspace/martin.config.yaml +11 -0
- package/demo/seeded-workspace/package.json +8 -0
- package/demo/seeded-workspace/src/invoice-summary.js +11 -0
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
- package/dist/bin/martin-loop.js +0 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/counter.d.ts +1 -0
- package/dist/vendor/adapters/counter.js +4 -0
- package/dist/vendor/adapters/git-baseline.d.ts +50 -0
- package/dist/vendor/adapters/git-baseline.js +233 -0
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
- package/dist/vendor/adapters/openrouter-adapter.js +302 -0
- package/dist/vendor/adapters/usage.d.ts +48 -0
- package/dist/vendor/adapters/usage.js +66 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/cli/bin/exit.d.ts +12 -0
- package/dist/vendor/cli/bin/exit.js +28 -0
- package/dist/vendor/cli/commands/analyze.d.ts +5 -0
- package/dist/vendor/cli/commands/analyze.js +58 -0
- package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
- package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
- package/dist/vendor/cli/commands/audit.d.ts +8 -0
- package/dist/vendor/cli/commands/audit.js +199 -0
- package/dist/vendor/cli/commands/corpus.d.ts +5 -0
- package/dist/vendor/cli/commands/corpus.js +60 -0
- package/dist/vendor/cli/commands/doctor.d.ts +8 -0
- package/dist/vendor/cli/commands/doctor.js +219 -0
- package/dist/vendor/cli/commands/explain.d.ts +17 -0
- package/dist/vendor/cli/commands/explain.js +176 -0
- package/dist/vendor/cli/commands/export.d.ts +5 -0
- package/dist/vendor/cli/commands/export.js +60 -0
- package/dist/vendor/cli/commands/governance.d.ts +8 -0
- package/dist/vendor/cli/commands/governance.js +95 -0
- package/dist/vendor/cli/commands/improve.d.ts +18 -0
- package/dist/vendor/cli/commands/improve.js +396 -0
- package/dist/vendor/cli/commands/init.d.ts +8 -0
- package/dist/vendor/cli/commands/init.js +281 -0
- package/dist/vendor/cli/commands/migration.d.ts +8 -0
- package/dist/vendor/cli/commands/migration.js +67 -0
- package/dist/vendor/cli/commands/prior.d.ts +23 -0
- package/dist/vendor/cli/commands/prior.js +145 -0
- package/dist/vendor/cli/commands/resume.d.ts +21 -0
- package/dist/vendor/cli/commands/resume.js +73 -0
- package/dist/vendor/cli/commands/verify.d.ts +6 -0
- package/dist/vendor/cli/commands/verify.js +43 -0
- package/dist/vendor/cli/index.d.ts +6 -1
- package/dist/vendor/cli/index.js +124 -7
- package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
- package/dist/vendor/cli/research/public-corpus.js +151 -0
- package/dist/vendor/cli/ui/error-card.d.ts +38 -0
- package/dist/vendor/cli/ui/error-card.js +103 -0
- package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
- package/dist/vendor/cli/ui/mission-brief.js +173 -0
- package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
- package/dist/vendor/cli/ui/summary-card.js +102 -0
- package/dist/vendor/contracts/audit.d.ts +46 -0
- package/dist/vendor/contracts/audit.js +360 -0
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/contracts/post-phase15.d.ts +240 -0
- package/dist/vendor/contracts/post-phase15.js +166 -0
- package/dist/vendor/core/agent/mandates.d.ts +46 -0
- package/dist/vendor/core/agent/mandates.js +178 -0
- package/dist/vendor/core/agent/receipts.d.ts +38 -0
- package/dist/vendor/core/agent/receipts.js +131 -0
- package/dist/vendor/core/agent/signing.d.ts +17 -0
- package/dist/vendor/core/agent/signing.js +91 -0
- package/dist/vendor/core/attestation/sign.d.ts +25 -0
- package/dist/vendor/core/attestation/sign.js +216 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
- package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
- package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
- package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
- package/dist/vendor/core/autonomy/envelope.js +27 -0
- package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
- package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
- package/dist/vendor/core/autonomy/resume.d.ts +15 -0
- package/dist/vendor/core/autonomy/resume.js +23 -0
- package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
- package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-distillation.d.ts +3 -0
- package/dist/vendor/core/context-distillation.js +44 -0
- package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
- package/dist/vendor/core/context-flow/compile-context.js +111 -0
- package/dist/vendor/core/context-flow/entities.d.ts +2 -0
- package/dist/vendor/core/context-flow/entities.js +44 -0
- package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
- package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
- package/dist/vendor/core/context-flow/index.d.ts +11 -0
- package/dist/vendor/core/context-flow/index.js +24 -0
- package/dist/vendor/core/context-flow/labels.d.ts +3 -0
- package/dist/vendor/core/context-flow/labels.js +17 -0
- package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
- package/dist/vendor/core/context-flow/normalizer.js +69 -0
- package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
- package/dist/vendor/core/context-flow/profiles.js +36 -0
- package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
- package/dist/vendor/core/context-flow/redaction.js +6 -0
- package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
- package/dist/vendor/core/context-flow/sensitivity.js +27 -0
- package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
- package/dist/vendor/core/context-flow/sync-preview.js +22 -0
- package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
- package/dist/vendor/core/context-flow/token-estimator.js +13 -0
- package/dist/vendor/core/context-flow/types.d.ts +91 -0
- package/dist/vendor/core/context-flow/types.js +2 -0
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/context-utility.d.ts +47 -0
- package/dist/vendor/core/context-utility.js +405 -0
- package/dist/vendor/core/cost/pipeline.d.ts +92 -0
- package/dist/vendor/core/cost/pipeline.js +141 -0
- package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
- package/dist/vendor/core/cost/tagged-cost.js +55 -0
- package/dist/vendor/core/cost-governor.d.ts +2 -0
- package/dist/vendor/core/cost-governor.js +50 -0
- package/dist/vendor/core/cve/cve-check.d.ts +80 -0
- package/dist/vendor/core/cve/cve-check.js +172 -0
- package/dist/vendor/core/digital-twin/index.d.ts +27 -0
- package/dist/vendor/core/digital-twin/index.js +90 -0
- package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
- package/dist/vendor/core/drift/drift-graph.js +100 -0
- package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
- package/dist/vendor/core/drift/objective-lock.js +88 -0
- package/dist/vendor/core/drift/scope.d.ts +46 -0
- package/dist/vendor/core/drift/scope.js +102 -0
- package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
- package/dist/vendor/core/drift/signature-lock.js +202 -0
- package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
- package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
- package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
- package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
- package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
- package/dist/vendor/core/evidence/claim-audit.js +89 -0
- package/dist/vendor/core/exit-intelligence.d.ts +2 -0
- package/dist/vendor/core/exit-intelligence.js +58 -0
- package/dist/vendor/core/explain/formatter.d.ts +42 -0
- package/dist/vendor/core/explain/formatter.js +171 -0
- package/dist/vendor/core/explain/timeline.d.ts +29 -0
- package/dist/vendor/core/explain/timeline.js +213 -0
- package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
- package/dist/vendor/core/failure-taxonomy.js +76 -0
- package/dist/vendor/core/gateway/index.d.ts +10 -0
- package/dist/vendor/core/gateway/index.js +12 -0
- package/dist/vendor/core/gateway/registry.d.ts +40 -0
- package/dist/vendor/core/gateway/registry.js +97 -0
- package/dist/vendor/core/gateway/transport.d.ts +31 -0
- package/dist/vendor/core/gateway/transport.js +82 -0
- package/dist/vendor/core/gateway/vault.d.ts +19 -0
- package/dist/vendor/core/gateway/vault.js +29 -0
- package/dist/vendor/core/graph/adapters.d.ts +43 -0
- package/dist/vendor/core/graph/adapters.js +91 -0
- package/dist/vendor/core/graph/hotspots.d.ts +22 -0
- package/dist/vendor/core/graph/hotspots.js +30 -0
- package/dist/vendor/core/graph/index.d.ts +1 -0
- package/dist/vendor/core/graph/index.js +2 -0
- package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
- package/dist/vendor/core/honey/honey-tokens.js +44 -0
- package/dist/vendor/core/index.d.ts +7 -4
- package/dist/vendor/core/index.js +222 -64
- package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
- package/dist/vendor/core/learning/bayesian-update.js +60 -0
- package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
- package/dist/vendor/core/learning/prior-sets.js +111 -0
- package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
- package/dist/vendor/core/learning/promotion-gate.js +23 -0
- package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
- package/dist/vendor/core/leash/blast-radius.js +156 -0
- package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
- package/dist/vendor/core/leash/policy-leash.js +117 -0
- package/dist/vendor/core/memo/memo.d.ts +63 -0
- package/dist/vendor/core/memo/memo.js +97 -0
- package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
- package/dist/vendor/core/memory/learning-pipeline.js +391 -0
- package/dist/vendor/core/memory/palace.d.ts +84 -0
- package/dist/vendor/core/memory/palace.js +379 -0
- package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
- package/dist/vendor/core/merge/ast-merge.js +350 -0
- package/dist/vendor/core/merge/text-merge.d.ts +12 -0
- package/dist/vendor/core/merge/text-merge.js +182 -0
- package/dist/vendor/core/otel/tracer.d.ts +45 -0
- package/dist/vendor/core/otel/tracer.js +116 -0
- package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
- package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
- package/dist/vendor/core/parallel/scorer.d.ts +24 -0
- package/dist/vendor/core/parallel/scorer.js +65 -0
- package/dist/vendor/core/pattern-detection.d.ts +64 -0
- package/dist/vendor/core/pattern-detection.js +108 -0
- package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
- package/dist/vendor/core/persistence/checkpoint.js +156 -0
- package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
- package/dist/vendor/core/persistence/cleanup.js +131 -0
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
- package/dist/vendor/core/policy/file-touch-quota.js +105 -0
- package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
- package/dist/vendor/core/policy/policy-loader.js +170 -0
- package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
- package/dist/vendor/core/policy/policy-schema.js +78 -0
- package/dist/vendor/core/policy.d.ts +6 -0
- package/dist/vendor/core/probe/probe.d.ts +49 -0
- package/dist/vendor/core/probe/probe.js +115 -0
- package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
- package/dist/vendor/core/proof/patch-proof.js +84 -0
- package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
- package/dist/vendor/core/proof/semantic-probe.js +82 -0
- package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
- package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +141 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
- package/dist/vendor/core/replay/replay.d.ts +85 -0
- package/dist/vendor/core/replay/replay.js +109 -0
- package/dist/vendor/core/router/engine.d.ts +54 -0
- package/dist/vendor/core/router/engine.js +131 -0
- package/dist/vendor/core/router/index.d.ts +1 -0
- package/dist/vendor/core/router/index.js +2 -0
- package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
- package/dist/vendor/core/router/trust-calibration.js +127 -0
- package/dist/vendor/core/run-martin.d.ts +2 -0
- package/dist/vendor/core/run-martin.js +287 -0
- package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
- package/dist/vendor/core/security/cve-scanner.js +178 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
- package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
- package/dist/vendor/core/sentinel/progress-guard.js +46 -0
- package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
- package/dist/vendor/core/siem/siem-emitter.js +157 -0
- package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
- package/dist/vendor/core/strategy/attempt-brief.js +89 -0
- package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
- package/dist/vendor/core/summarize/diff-summary.js +204 -0
- package/dist/vendor/core/surface-signals.d.ts +21 -0
- package/dist/vendor/core/surface-signals.js +139 -0
- package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
- package/dist/vendor/core/truth/truth-wall.js +69 -0
- package/dist/vendor/core/truth-spine.d.ts +26 -0
- package/dist/vendor/core/truth-spine.js +62 -0
- package/dist/vendor/core/types.d.ts +115 -0
- package/dist/vendor/core/types.js +2 -0
- package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
- package/dist/vendor/core/verification/tiered-verify.js +29 -0
- package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
- package/dist/vendor/core/verifier-pyramid.js +111 -0
- package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
- package/dist/vendor/core/workflow-artifacts.js +668 -0
- package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
- package/dist/vendor/core/wrap/supervised-run.js +178 -0
- package/docs/assets/cli-animated.svg +139 -0
- package/docs/assets/cli-static.svg +34 -0
- package/docs/assets/github-hero-v2.svg +23 -0
- package/docs/assets/martin-raplph.png.jpg +0 -0
- package/docs/assets/martinloop-logo.png +0 -0
- package/docs/assets/nvidia-inception-program-light.png +0 -0
- package/docs/assets/nvidia-inception-program.png +0 -0
- package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
- package/docs/assets/side-by-side.svg +134 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
- package/docs/oss/EXAMPLES.md +9 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
- package/docs/oss/QUICKSTART.md +39 -4
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
- package/docs/oss/README.md +7 -4
- package/docs/oss/RELEASE-SURFACE-REPORT.json +46 -45
- package/docs/oss/RELEASE-SURFACE-REPORT.md +36 -35
- package/package.json +129 -49
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* policy-schema.ts — SLICE-15
|
|
3
|
+
*
|
|
4
|
+
* Defines the MartinPolicyFile schema and built-in defaults.
|
|
5
|
+
* No external validation library — validated manually so zero new deps.
|
|
6
|
+
*/
|
|
7
|
+
export interface BudgetPolicy {
|
|
8
|
+
perRun: number;
|
|
9
|
+
perDay: number;
|
|
10
|
+
}
|
|
11
|
+
export interface SiemPolicy {
|
|
12
|
+
endpoint?: string;
|
|
13
|
+
format?: "ocsf" | "cef";
|
|
14
|
+
apiKey?: string;
|
|
15
|
+
flushIntervalMs?: number;
|
|
16
|
+
}
|
|
17
|
+
export interface PolicyFile {
|
|
18
|
+
/** Budget limits */
|
|
19
|
+
budgetUsd?: Partial<BudgetPolicy>;
|
|
20
|
+
/** Commands the leash always blocks (appended to built-in list) */
|
|
21
|
+
blockedCommands?: string[];
|
|
22
|
+
/** Verifier commands allowed to run (undefined = all allowed) */
|
|
23
|
+
allowedVerifiers?: string[];
|
|
24
|
+
/** Glob restricting which files attempts may touch */
|
|
25
|
+
fileScopeGlob?: string;
|
|
26
|
+
/** Hard cap on attempts per run */
|
|
27
|
+
maxAttempts?: number;
|
|
28
|
+
/** Pause and request approval before spending over this amount */
|
|
29
|
+
requireApprovalAboveUsd?: number;
|
|
30
|
+
/** CVE check configuration */
|
|
31
|
+
cveCheck?: {
|
|
32
|
+
enabled?: boolean;
|
|
33
|
+
blockSeverity?: "CRITICAL" | "HIGH" | "MEDIUM" | "LOW";
|
|
34
|
+
failClosed?: boolean;
|
|
35
|
+
};
|
|
36
|
+
/** SIEM configuration */
|
|
37
|
+
siem?: SiemPolicy;
|
|
38
|
+
}
|
|
39
|
+
export interface ResolvedPolicy {
|
|
40
|
+
budgetUsd: BudgetPolicy;
|
|
41
|
+
blockedCommands: string[];
|
|
42
|
+
allowedVerifiers: string[] | null;
|
|
43
|
+
fileScopeGlob: string | null;
|
|
44
|
+
maxAttempts: number;
|
|
45
|
+
requireApprovalAboveUsd: number | null;
|
|
46
|
+
cveCheck: {
|
|
47
|
+
enabled: boolean;
|
|
48
|
+
blockSeverity: "CRITICAL" | "HIGH" | "MEDIUM" | "LOW";
|
|
49
|
+
failClosed: boolean;
|
|
50
|
+
};
|
|
51
|
+
siem: SiemPolicy;
|
|
52
|
+
}
|
|
53
|
+
export declare const BUILTIN_DEFAULTS: ResolvedPolicy;
|
|
54
|
+
export declare function validatePolicyFile(raw: unknown): string[];
|
|
55
|
+
export declare function mergePolicies(base: ResolvedPolicy, override: PolicyFile): ResolvedPolicy;
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* policy-schema.ts — SLICE-15
|
|
3
|
+
*
|
|
4
|
+
* Defines the MartinPolicyFile schema and built-in defaults.
|
|
5
|
+
* No external validation library — validated manually so zero new deps.
|
|
6
|
+
*/
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
// Built-in defaults
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
export const BUILTIN_DEFAULTS = {
|
|
11
|
+
budgetUsd: { perRun: Infinity, perDay: Infinity },
|
|
12
|
+
blockedCommands: [],
|
|
13
|
+
allowedVerifiers: null,
|
|
14
|
+
fileScopeGlob: null,
|
|
15
|
+
maxAttempts: 10,
|
|
16
|
+
requireApprovalAboveUsd: null,
|
|
17
|
+
cveCheck: { enabled: true, blockSeverity: "HIGH", failClosed: false },
|
|
18
|
+
siem: {}
|
|
19
|
+
};
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Validator — returns error strings or empty array
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
export function validatePolicyFile(raw) {
|
|
24
|
+
const errors = [];
|
|
25
|
+
if (raw === null || typeof raw !== "object") {
|
|
26
|
+
return ["Policy file must be a JSON object"];
|
|
27
|
+
}
|
|
28
|
+
const p = raw;
|
|
29
|
+
if (p.budgetUsd !== undefined) {
|
|
30
|
+
if (typeof p.budgetUsd !== "object" || p.budgetUsd === null) {
|
|
31
|
+
errors.push("budgetUsd must be an object");
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
const b = p.budgetUsd;
|
|
35
|
+
if (b.perRun !== undefined && typeof b.perRun !== "number")
|
|
36
|
+
errors.push("budgetUsd.perRun must be a number");
|
|
37
|
+
if (b.perDay !== undefined && typeof b.perDay !== "number")
|
|
38
|
+
errors.push("budgetUsd.perDay must be a number");
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
if (p.blockedCommands !== undefined && !Array.isArray(p.blockedCommands))
|
|
42
|
+
errors.push("blockedCommands must be an array of strings");
|
|
43
|
+
if (p.allowedVerifiers !== undefined && !Array.isArray(p.allowedVerifiers))
|
|
44
|
+
errors.push("allowedVerifiers must be an array of strings");
|
|
45
|
+
if (p.fileScopeGlob !== undefined && typeof p.fileScopeGlob !== "string")
|
|
46
|
+
errors.push("fileScopeGlob must be a string");
|
|
47
|
+
if (p.maxAttempts !== undefined && (typeof p.maxAttempts !== "number" || p.maxAttempts < 1))
|
|
48
|
+
errors.push("maxAttempts must be a positive integer");
|
|
49
|
+
if (p.requireApprovalAboveUsd !== undefined && typeof p.requireApprovalAboveUsd !== "number")
|
|
50
|
+
errors.push("requireApprovalAboveUsd must be a number");
|
|
51
|
+
return errors;
|
|
52
|
+
}
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Merge: layer b on top of a (b wins on defined fields)
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
export function mergePolicies(base, override) {
|
|
57
|
+
return {
|
|
58
|
+
budgetUsd: {
|
|
59
|
+
perRun: override.budgetUsd?.perRun ?? base.budgetUsd.perRun,
|
|
60
|
+
perDay: override.budgetUsd?.perDay ?? base.budgetUsd.perDay
|
|
61
|
+
},
|
|
62
|
+
blockedCommands: [
|
|
63
|
+
...base.blockedCommands,
|
|
64
|
+
...(override.blockedCommands ?? [])
|
|
65
|
+
],
|
|
66
|
+
allowedVerifiers: override.allowedVerifiers ?? base.allowedVerifiers,
|
|
67
|
+
fileScopeGlob: override.fileScopeGlob ?? base.fileScopeGlob,
|
|
68
|
+
maxAttempts: override.maxAttempts ?? base.maxAttempts,
|
|
69
|
+
requireApprovalAboveUsd: override.requireApprovalAboveUsd ?? base.requireApprovalAboveUsd,
|
|
70
|
+
cveCheck: {
|
|
71
|
+
enabled: override.cveCheck?.enabled ?? base.cveCheck.enabled,
|
|
72
|
+
blockSeverity: override.cveCheck?.blockSeverity ?? base.cveCheck.blockSeverity,
|
|
73
|
+
failClosed: override.cveCheck?.failClosed ?? base.cveCheck.failClosed
|
|
74
|
+
},
|
|
75
|
+
siem: { ...base.siem, ...(override.siem ?? {}) }
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=policy-schema.js.map
|
|
@@ -18,6 +18,12 @@ export interface ExitDecision {
|
|
|
18
18
|
lifecycleState: LoopLifecycleState;
|
|
19
19
|
status: LoopStatus;
|
|
20
20
|
reason: string;
|
|
21
|
+
/** Machine-readable stop classifier for non-attempt exits such as preflight safety blocks. */
|
|
22
|
+
failureClass?: FailureClass;
|
|
23
|
+
/** Machine-readable safety surface, when the stop came from a safety leash. */
|
|
24
|
+
safetySurface?: string;
|
|
25
|
+
/** Stable reason code for dashboards, MCP, and downstream automation. */
|
|
26
|
+
reasonCode?: string;
|
|
21
27
|
}
|
|
22
28
|
export interface MartinAdapterResultLike {
|
|
23
29
|
status: "completed" | "failed";
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { MartinAdapter, MartinUsage, ProbeTier } from "../index.js";
|
|
2
|
+
export type EntryProbeRoute = "primary" | "cheap-first";
|
|
3
|
+
export type EntryProbeOutcome = {
|
|
4
|
+
status: "skipped";
|
|
5
|
+
route: "primary";
|
|
6
|
+
reason: string;
|
|
7
|
+
primaryModel: string;
|
|
8
|
+
initialModel: string;
|
|
9
|
+
usage: MartinUsage;
|
|
10
|
+
} | {
|
|
11
|
+
status: "failed";
|
|
12
|
+
route: "primary";
|
|
13
|
+
reason: string;
|
|
14
|
+
primaryModel: string;
|
|
15
|
+
initialModel: string;
|
|
16
|
+
probeModel: string;
|
|
17
|
+
usage: MartinUsage;
|
|
18
|
+
startedAt: string;
|
|
19
|
+
completedAt: string;
|
|
20
|
+
adapterId: string;
|
|
21
|
+
providerId: string;
|
|
22
|
+
} | {
|
|
23
|
+
status: "completed";
|
|
24
|
+
route: EntryProbeRoute;
|
|
25
|
+
reason: string;
|
|
26
|
+
primaryModel: string;
|
|
27
|
+
initialModel: string;
|
|
28
|
+
probeModel: string;
|
|
29
|
+
tier: ProbeTier;
|
|
30
|
+
usage: MartinUsage;
|
|
31
|
+
startedAt: string;
|
|
32
|
+
completedAt: string;
|
|
33
|
+
adapterId: string;
|
|
34
|
+
providerId: string;
|
|
35
|
+
};
|
|
36
|
+
export declare function resolveProbeModel(primaryModel: string, fallbackModels: string[]): string | undefined;
|
|
37
|
+
export declare function selectProbeRoute(input: {
|
|
38
|
+
status: "completed" | "failed" | "skipped";
|
|
39
|
+
tier?: ProbeTier;
|
|
40
|
+
}): EntryProbeRoute;
|
|
41
|
+
export declare function probePhase(input: {
|
|
42
|
+
adapter: MartinAdapter;
|
|
43
|
+
fallbackModels: string[];
|
|
44
|
+
objective: string;
|
|
45
|
+
fileHints: string[];
|
|
46
|
+
remainingBudgetUsd: number;
|
|
47
|
+
primaryModel: string;
|
|
48
|
+
now: () => string;
|
|
49
|
+
}): Promise<EntryProbeOutcome>;
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
export function resolveProbeModel(primaryModel, fallbackModels) {
|
|
2
|
+
const orderedModels = [...new Set(fallbackModels.map((model) => model.trim()).filter(Boolean))];
|
|
3
|
+
const primaryIndex = orderedModels.indexOf(primaryModel.trim());
|
|
4
|
+
if (primaryIndex <= 0) {
|
|
5
|
+
return undefined;
|
|
6
|
+
}
|
|
7
|
+
return orderedModels[0];
|
|
8
|
+
}
|
|
9
|
+
export function selectProbeRoute(input) {
|
|
10
|
+
return input.status === "completed" && input.tier === "trivial" ? "cheap-first" : "primary";
|
|
11
|
+
}
|
|
12
|
+
export async function probePhase(input) {
|
|
13
|
+
const probeModel = resolveProbeModel(input.primaryModel, input.fallbackModels);
|
|
14
|
+
if (!probeModel) {
|
|
15
|
+
return {
|
|
16
|
+
status: "skipped",
|
|
17
|
+
route: "primary",
|
|
18
|
+
reason: "No cheaper same-lane fallback model is configured for the current primary.",
|
|
19
|
+
primaryModel: input.primaryModel,
|
|
20
|
+
initialModel: input.primaryModel,
|
|
21
|
+
usage: zeroUsage()
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
const probeAdapter = input.adapter.withModel?.(probeModel);
|
|
25
|
+
if (!probeAdapter?.probe) {
|
|
26
|
+
return {
|
|
27
|
+
status: "skipped",
|
|
28
|
+
route: "primary",
|
|
29
|
+
reason: "The current adapter does not support entry probing.",
|
|
30
|
+
primaryModel: input.primaryModel,
|
|
31
|
+
initialModel: input.primaryModel,
|
|
32
|
+
usage: zeroUsage()
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
const startedAt = input.now();
|
|
36
|
+
try {
|
|
37
|
+
const result = await probeAdapter.probe({
|
|
38
|
+
objective: input.objective,
|
|
39
|
+
fileHints: input.fileHints,
|
|
40
|
+
primaryModel: input.primaryModel,
|
|
41
|
+
probeModel,
|
|
42
|
+
remainingBudgetUsd: input.remainingBudgetUsd
|
|
43
|
+
});
|
|
44
|
+
return buildOutcome({
|
|
45
|
+
result,
|
|
46
|
+
probeModel,
|
|
47
|
+
primaryModel: input.primaryModel,
|
|
48
|
+
startedAt,
|
|
49
|
+
completedAt: input.now(),
|
|
50
|
+
adapterId: probeAdapter.adapterId,
|
|
51
|
+
providerId: probeAdapter.metadata.providerId
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
56
|
+
return {
|
|
57
|
+
status: "failed",
|
|
58
|
+
route: "primary",
|
|
59
|
+
reason,
|
|
60
|
+
primaryModel: input.primaryModel,
|
|
61
|
+
initialModel: input.primaryModel,
|
|
62
|
+
probeModel,
|
|
63
|
+
usage: zeroUsage(),
|
|
64
|
+
startedAt,
|
|
65
|
+
completedAt: input.now(),
|
|
66
|
+
adapterId: probeAdapter.adapterId,
|
|
67
|
+
providerId: probeAdapter.metadata.providerId
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function buildOutcome(input) {
|
|
72
|
+
if (input.result.status === "completed") {
|
|
73
|
+
const route = selectProbeRoute({
|
|
74
|
+
status: input.result.status,
|
|
75
|
+
tier: input.result.tier
|
|
76
|
+
});
|
|
77
|
+
const initialModel = route === "cheap-first" ? input.probeModel : input.primaryModel;
|
|
78
|
+
return {
|
|
79
|
+
status: "completed",
|
|
80
|
+
route,
|
|
81
|
+
reason: input.result.reason,
|
|
82
|
+
primaryModel: input.primaryModel,
|
|
83
|
+
initialModel,
|
|
84
|
+
probeModel: input.probeModel,
|
|
85
|
+
tier: input.result.tier,
|
|
86
|
+
usage: input.result.usage,
|
|
87
|
+
startedAt: input.startedAt,
|
|
88
|
+
completedAt: input.completedAt,
|
|
89
|
+
adapterId: input.adapterId,
|
|
90
|
+
providerId: input.providerId
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
status: "failed",
|
|
95
|
+
route: "primary",
|
|
96
|
+
reason: input.result.reason,
|
|
97
|
+
primaryModel: input.primaryModel,
|
|
98
|
+
initialModel: input.primaryModel,
|
|
99
|
+
probeModel: input.probeModel,
|
|
100
|
+
usage: input.result.usage,
|
|
101
|
+
startedAt: input.startedAt,
|
|
102
|
+
completedAt: input.completedAt,
|
|
103
|
+
adapterId: input.adapterId,
|
|
104
|
+
providerId: input.providerId
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
function zeroUsage() {
|
|
108
|
+
return {
|
|
109
|
+
actualUsd: 0,
|
|
110
|
+
tokensIn: 0,
|
|
111
|
+
tokensOut: 0,
|
|
112
|
+
provenance: "unavailable"
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=probe.js.map
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
export type ConfidenceGrade = "A" | "B" | "C";
|
|
2
|
+
export interface ProofRedFinding {
|
|
3
|
+
trapId: string;
|
|
4
|
+
severity: "warn" | "block";
|
|
5
|
+
description: string;
|
|
6
|
+
}
|
|
7
|
+
export interface PatchProof {
|
|
8
|
+
runId: string;
|
|
9
|
+
patchId: string;
|
|
10
|
+
confidenceGrade: ConfidenceGrade;
|
|
11
|
+
proofBundleVersion: "h3.v1";
|
|
12
|
+
emittedAt: string;
|
|
13
|
+
changedSymbols: string[];
|
|
14
|
+
affectedPublicInterfaces: string[];
|
|
15
|
+
downstreamCallSites: string[];
|
|
16
|
+
driftViolations: string[];
|
|
17
|
+
redFindingsResolved: number;
|
|
18
|
+
residualRisk: "low" | "medium" | "high";
|
|
19
|
+
rollbackCompatible: boolean;
|
|
20
|
+
/** Required when confidenceGrade is C */
|
|
21
|
+
gradeJustification?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface PatchProofInput {
|
|
24
|
+
runId: string;
|
|
25
|
+
patchId: string;
|
|
26
|
+
changedSymbols: string[];
|
|
27
|
+
affectedPublicInterfaces: string[];
|
|
28
|
+
downstreamCallSites: string[];
|
|
29
|
+
driftViolations: string[];
|
|
30
|
+
redFindingsResolved: number;
|
|
31
|
+
redFindings: ProofRedFinding[];
|
|
32
|
+
rollbackCompatible: boolean;
|
|
33
|
+
/** Provide when grade would be C to satisfy the justification requirement */
|
|
34
|
+
gradeJustification?: string;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Determines the confidence grade from findings.
|
|
38
|
+
*
|
|
39
|
+
* A — zero warn+block findings, no drift violations
|
|
40
|
+
* B — ≤2 warn-severity findings, zero block findings
|
|
41
|
+
* C — >2 warn findings (all warn, none block) — requires gradeJustification
|
|
42
|
+
* D — any block-severity finding → caller must REJECT, never emit
|
|
43
|
+
*/
|
|
44
|
+
export declare function gradeFromFindings(input: {
|
|
45
|
+
redFindings: ProofRedFinding[];
|
|
46
|
+
driftViolations: string[];
|
|
47
|
+
}): ConfidenceGrade | "D";
|
|
48
|
+
/**
|
|
49
|
+
* Builds a PatchProof object. Returns null if the grade is D (block findings).
|
|
50
|
+
* Never emits grade D — caller should reject the patch.
|
|
51
|
+
*/
|
|
52
|
+
export declare function buildPatchProof(input: PatchProofInput): PatchProof;
|
|
53
|
+
/**
|
|
54
|
+
* Builds and writes patch-proof.json to the run output directory.
|
|
55
|
+
* Returns the written proof, or null if the patch should be rejected (grade D).
|
|
56
|
+
* A null return means NO file was written.
|
|
57
|
+
*/
|
|
58
|
+
export declare function emitPatchProof(input: PatchProofInput, runDir: string): Promise<PatchProof | null>;
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import { writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
// ─── Grade logic ──────────────────────────────────────────────────────────────
|
|
4
|
+
/**
|
|
5
|
+
* Determines the confidence grade from findings.
|
|
6
|
+
*
|
|
7
|
+
* A — zero warn+block findings, no drift violations
|
|
8
|
+
* B — ≤2 warn-severity findings, zero block findings
|
|
9
|
+
* C — >2 warn findings (all warn, none block) — requires gradeJustification
|
|
10
|
+
* D — any block-severity finding → caller must REJECT, never emit
|
|
11
|
+
*/
|
|
12
|
+
export function gradeFromFindings(input) {
|
|
13
|
+
const blockCount = input.redFindings.filter((f) => f.severity === "block").length;
|
|
14
|
+
if (blockCount > 0)
|
|
15
|
+
return "D";
|
|
16
|
+
const warnCount = input.redFindings.filter((f) => f.severity === "warn").length;
|
|
17
|
+
const hasDrift = input.driftViolations.length > 0;
|
|
18
|
+
if (warnCount === 0 && !hasDrift)
|
|
19
|
+
return "A";
|
|
20
|
+
if (warnCount <= 2 && !hasDrift)
|
|
21
|
+
return "B";
|
|
22
|
+
return "C";
|
|
23
|
+
}
|
|
24
|
+
function residualRiskFromGrade(grade) {
|
|
25
|
+
if (grade === "A")
|
|
26
|
+
return "low";
|
|
27
|
+
if (grade === "B")
|
|
28
|
+
return "medium";
|
|
29
|
+
return "high";
|
|
30
|
+
}
|
|
31
|
+
// ─── Builder ──────────────────────────────────────────────────────────────────
|
|
32
|
+
/**
|
|
33
|
+
* Builds a PatchProof object. Returns null if the grade is D (block findings).
|
|
34
|
+
* Never emits grade D — caller should reject the patch.
|
|
35
|
+
*/
|
|
36
|
+
export function buildPatchProof(input) {
|
|
37
|
+
const grade = gradeFromFindings({
|
|
38
|
+
redFindings: input.redFindings,
|
|
39
|
+
driftViolations: input.driftViolations
|
|
40
|
+
});
|
|
41
|
+
const gradeJustification = input.gradeJustification?.trim();
|
|
42
|
+
if (grade === "D") {
|
|
43
|
+
throw new Error(`buildPatchProof: patch ${input.patchId} has block-severity findings — must be rejected, not proven.`);
|
|
44
|
+
}
|
|
45
|
+
if (grade === "C" && !gradeJustification) {
|
|
46
|
+
throw new Error(`buildPatchProof: grade C requires a non-empty gradeJustification for patch ${input.patchId}.`);
|
|
47
|
+
}
|
|
48
|
+
const proof = {
|
|
49
|
+
runId: input.runId,
|
|
50
|
+
patchId: input.patchId,
|
|
51
|
+
confidenceGrade: grade,
|
|
52
|
+
proofBundleVersion: "h3.v1",
|
|
53
|
+
emittedAt: new Date().toISOString(),
|
|
54
|
+
changedSymbols: input.changedSymbols,
|
|
55
|
+
affectedPublicInterfaces: input.affectedPublicInterfaces,
|
|
56
|
+
downstreamCallSites: input.downstreamCallSites,
|
|
57
|
+
driftViolations: input.driftViolations,
|
|
58
|
+
redFindingsResolved: input.redFindingsResolved,
|
|
59
|
+
residualRisk: residualRiskFromGrade(grade),
|
|
60
|
+
rollbackCompatible: input.rollbackCompatible,
|
|
61
|
+
...(grade === "C" && gradeJustification ? { gradeJustification } : {})
|
|
62
|
+
};
|
|
63
|
+
return proof;
|
|
64
|
+
}
|
|
65
|
+
// ─── Emitter ──────────────────────────────────────────────────────────────────
|
|
66
|
+
/**
|
|
67
|
+
* Builds and writes patch-proof.json to the run output directory.
|
|
68
|
+
* Returns the written proof, or null if the patch should be rejected (grade D).
|
|
69
|
+
* A null return means NO file was written.
|
|
70
|
+
*/
|
|
71
|
+
export async function emitPatchProof(input, runDir) {
|
|
72
|
+
const grade = gradeFromFindings({
|
|
73
|
+
redFindings: input.redFindings,
|
|
74
|
+
driftViolations: input.driftViolations
|
|
75
|
+
});
|
|
76
|
+
if (grade === "D") {
|
|
77
|
+
// Grade D is never emitted — patch is rejected
|
|
78
|
+
return null;
|
|
79
|
+
}
|
|
80
|
+
const proof = buildPatchProof(input);
|
|
81
|
+
await writeFile(join(runDir, "patch-proof.json"), JSON.stringify(proof, null, 2), "utf8");
|
|
82
|
+
return proof;
|
|
83
|
+
}
|
|
84
|
+
//# sourceMappingURL=patch-proof.js.map
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export type ProbeGrade = "A" | "B" | "C";
|
|
2
|
+
export type ProbeReasonCode = "semantic_implausible" | "probe_disabled" | "probe_error";
|
|
3
|
+
export interface SemanticProbeOptions {
|
|
4
|
+
objective: string;
|
|
5
|
+
diff: string;
|
|
6
|
+
modelCallFn?: (prompt: string) => Promise<string>;
|
|
7
|
+
policy?: {
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
};
|
|
10
|
+
}
|
|
11
|
+
export interface SemanticProbeResult {
|
|
12
|
+
plausible: boolean;
|
|
13
|
+
grade: ProbeGrade;
|
|
14
|
+
reasonCode?: ProbeReasonCode;
|
|
15
|
+
reason?: string;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Runs a cheap model probe to check whether a diff is semantically plausible
|
|
19
|
+
* for the given objective. This is a SOFT signal — it never hard-blocks a
|
|
20
|
+
* patch. A "no" response downgrades the grade (A→B, B→C, C→C).
|
|
21
|
+
*
|
|
22
|
+
* Fail-open: if the probe is disabled, the model call throws, or no
|
|
23
|
+
* modelCallFn is provided, returns plausible:true with grade unchanged.
|
|
24
|
+
*/
|
|
25
|
+
export declare function semanticPlausibilityProbe(options: SemanticProbeOptions, baseGrade?: ProbeGrade): Promise<SemanticProbeResult>;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
// SLICE-08 — Semantic plausibility probe
|
|
2
|
+
// Soft second-order guard that asks a model: "does this diff plausibly solve
|
|
3
|
+
// this objective?" Failure downgrades the patch grade but never hard-blocks.
|
|
4
|
+
function downgradeGrade(grade) {
|
|
5
|
+
if (grade === "A")
|
|
6
|
+
return "B";
|
|
7
|
+
if (grade === "B")
|
|
8
|
+
return "C";
|
|
9
|
+
return "C";
|
|
10
|
+
}
|
|
11
|
+
function buildProbePrompt(objective, diff) {
|
|
12
|
+
return [
|
|
13
|
+
"You are a code review assistant.",
|
|
14
|
+
"Objective: " + objective,
|
|
15
|
+
"Diff:\n" + diff,
|
|
16
|
+
"",
|
|
17
|
+
'Does this diff plausibly solve the objective? Answer with exactly "yes" or "no".'
|
|
18
|
+
].join("\n");
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Runs a cheap model probe to check whether a diff is semantically plausible
|
|
22
|
+
* for the given objective. This is a SOFT signal — it never hard-blocks a
|
|
23
|
+
* patch. A "no" response downgrades the grade (A→B, B→C, C→C).
|
|
24
|
+
*
|
|
25
|
+
* Fail-open: if the probe is disabled, the model call throws, or no
|
|
26
|
+
* modelCallFn is provided, returns plausible:true with grade unchanged.
|
|
27
|
+
*/
|
|
28
|
+
export async function semanticPlausibilityProbe(options, baseGrade = "A") {
|
|
29
|
+
const { objective, diff, modelCallFn, policy } = options;
|
|
30
|
+
// Disabled path
|
|
31
|
+
if (policy && !policy.enabled) {
|
|
32
|
+
return {
|
|
33
|
+
plausible: true,
|
|
34
|
+
grade: baseGrade,
|
|
35
|
+
reasonCode: "probe_disabled",
|
|
36
|
+
reason: "Semantic probe is disabled by policy"
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
// No model function provided — fail open
|
|
40
|
+
if (!modelCallFn) {
|
|
41
|
+
return {
|
|
42
|
+
plausible: true,
|
|
43
|
+
grade: baseGrade,
|
|
44
|
+
reasonCode: "probe_disabled",
|
|
45
|
+
reason: "No modelCallFn provided; probe skipped"
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
try {
|
|
49
|
+
const prompt = buildProbePrompt(objective, diff);
|
|
50
|
+
const response = await modelCallFn(prompt);
|
|
51
|
+
const normalized = response.trim().toLowerCase();
|
|
52
|
+
if (normalized.includes("yes")) {
|
|
53
|
+
return { plausible: true, grade: baseGrade };
|
|
54
|
+
}
|
|
55
|
+
if (normalized.includes("no")) {
|
|
56
|
+
const downgradedGrade = downgradeGrade(baseGrade);
|
|
57
|
+
return {
|
|
58
|
+
plausible: false,
|
|
59
|
+
grade: downgradedGrade,
|
|
60
|
+
reasonCode: "semantic_implausible",
|
|
61
|
+
reason: "Model probe responded: diff does not plausibly solve objective"
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
// Ambiguous response — fail open
|
|
65
|
+
return {
|
|
66
|
+
plausible: true,
|
|
67
|
+
grade: baseGrade,
|
|
68
|
+
reasonCode: "probe_error",
|
|
69
|
+
reason: `Ambiguous probe response: "${response.slice(0, 80)}"`
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
74
|
+
return {
|
|
75
|
+
plausible: true,
|
|
76
|
+
grade: baseGrade,
|
|
77
|
+
reasonCode: "probe_error",
|
|
78
|
+
reason: `Probe model call failed: ${message}`
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=semantic-probe.js.map
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export interface FailureModeRecoveryCase {
|
|
2
|
+
modeId: string;
|
|
3
|
+
injectedFault: string;
|
|
4
|
+
expectedRecovery: string;
|
|
5
|
+
recovered: boolean;
|
|
6
|
+
silentCorruption: boolean;
|
|
7
|
+
recoveryTimeMs: number;
|
|
8
|
+
evidence: string[];
|
|
9
|
+
}
|
|
10
|
+
export interface FailureModeRecoverySuiteInput {
|
|
11
|
+
suiteId: string;
|
|
12
|
+
cases: FailureModeRecoveryCase[];
|
|
13
|
+
minRecoveryRate?: number;
|
|
14
|
+
maxSilentCorruption?: number;
|
|
15
|
+
maxRecoveryTimeMs?: number;
|
|
16
|
+
}
|
|
17
|
+
export interface FailureModeRecoverySuiteReport {
|
|
18
|
+
suiteId: string;
|
|
19
|
+
totalCases: number;
|
|
20
|
+
recoveredCases: number;
|
|
21
|
+
recoveryRate: number;
|
|
22
|
+
silentCorruptionCount: number;
|
|
23
|
+
maxObservedRecoveryTimeMs: number;
|
|
24
|
+
gateStatus: "pass" | "fail";
|
|
25
|
+
allowedClaimWording: string;
|
|
26
|
+
nonClaims: string[];
|
|
27
|
+
cases: FailureModeRecoveryCase[];
|
|
28
|
+
}
|
|
29
|
+
export declare function runFailureModeRecoverySuite(input: FailureModeRecoverySuiteInput): FailureModeRecoverySuiteReport;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
export function runFailureModeRecoverySuite(input) {
|
|
2
|
+
const totalCases = input.cases.length;
|
|
3
|
+
const recoveredCases = input.cases.filter((testCase) => testCase.recovered).length;
|
|
4
|
+
const silentCorruptionCount = input.cases.filter((testCase) => testCase.silentCorruption).length;
|
|
5
|
+
const maxObservedRecoveryTimeMs = Math.max(0, ...input.cases.map((testCase) => testCase.recoveryTimeMs));
|
|
6
|
+
const recoveryRate = totalCases === 0 ? 0 : Math.round((recoveredCases / totalCases) * 10000) / 100;
|
|
7
|
+
const minRecoveryRate = input.minRecoveryRate ?? 99;
|
|
8
|
+
const maxSilentCorruption = input.maxSilentCorruption ?? 0;
|
|
9
|
+
const maxRecoveryTimeMs = input.maxRecoveryTimeMs ?? Number.POSITIVE_INFINITY;
|
|
10
|
+
const gateStatus = recoveryRate >= minRecoveryRate &&
|
|
11
|
+
silentCorruptionCount <= maxSilentCorruption &&
|
|
12
|
+
maxObservedRecoveryTimeMs <= maxRecoveryTimeMs
|
|
13
|
+
? "pass"
|
|
14
|
+
: "fail";
|
|
15
|
+
return {
|
|
16
|
+
suiteId: input.suiteId,
|
|
17
|
+
totalCases,
|
|
18
|
+
recoveredCases,
|
|
19
|
+
recoveryRate,
|
|
20
|
+
silentCorruptionCount,
|
|
21
|
+
maxObservedRecoveryTimeMs,
|
|
22
|
+
gateStatus,
|
|
23
|
+
allowedClaimWording: `Self-healing for the declared failure-mode catalog only: ${recoveredCases}/${totalCases} recovered, ` +
|
|
24
|
+
`${silentCorruptionCount} silent-corruption cases.`,
|
|
25
|
+
nonClaims: [
|
|
26
|
+
"unqualified self-healing",
|
|
27
|
+
"universal recovery",
|
|
28
|
+
"recovery for undeclared failure modes"
|
|
29
|
+
],
|
|
30
|
+
cases: input.cases.map(cloneCase)
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
function cloneCase(testCase) {
|
|
34
|
+
return {
|
|
35
|
+
...testCase,
|
|
36
|
+
evidence: [...testCase.evidence]
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
//# sourceMappingURL=failure-mode-runner.js.map
|