martin-loop 0.1.4 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +32 -0
- package/README.md +172 -227
- package/demo/seeded-workspace/README.md +35 -0
- package/demo/seeded-workspace/TASKS.md +29 -0
- package/demo/seeded-workspace/martin.config.yaml +11 -0
- package/demo/seeded-workspace/package.json +8 -0
- package/demo/seeded-workspace/src/invoice-summary.js +11 -0
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
- package/dist/bin/martin-loop.js +0 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/counter.d.ts +1 -0
- package/dist/vendor/adapters/counter.js +4 -0
- package/dist/vendor/adapters/git-baseline.d.ts +50 -0
- package/dist/vendor/adapters/git-baseline.js +233 -0
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/openrouter-adapter.d.ts +15 -0
- package/dist/vendor/adapters/openrouter-adapter.js +302 -0
- package/dist/vendor/adapters/usage.d.ts +48 -0
- package/dist/vendor/adapters/usage.js +66 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/cli/bin/exit.d.ts +12 -0
- package/dist/vendor/cli/bin/exit.js +28 -0
- package/dist/vendor/cli/commands/analyze.d.ts +5 -0
- package/dist/vendor/cli/commands/analyze.js +58 -0
- package/dist/vendor/cli/commands/audit-log-verify.d.ts +34 -0
- package/dist/vendor/cli/commands/audit-log-verify.js +99 -0
- package/dist/vendor/cli/commands/audit.d.ts +8 -0
- package/dist/vendor/cli/commands/audit.js +199 -0
- package/dist/vendor/cli/commands/corpus.d.ts +5 -0
- package/dist/vendor/cli/commands/corpus.js +60 -0
- package/dist/vendor/cli/commands/doctor.d.ts +8 -0
- package/dist/vendor/cli/commands/doctor.js +219 -0
- package/dist/vendor/cli/commands/explain.d.ts +17 -0
- package/dist/vendor/cli/commands/explain.js +176 -0
- package/dist/vendor/cli/commands/export.d.ts +5 -0
- package/dist/vendor/cli/commands/export.js +60 -0
- package/dist/vendor/cli/commands/governance.d.ts +8 -0
- package/dist/vendor/cli/commands/governance.js +95 -0
- package/dist/vendor/cli/commands/improve.d.ts +18 -0
- package/dist/vendor/cli/commands/improve.js +396 -0
- package/dist/vendor/cli/commands/init.d.ts +8 -0
- package/dist/vendor/cli/commands/init.js +281 -0
- package/dist/vendor/cli/commands/migration.d.ts +8 -0
- package/dist/vendor/cli/commands/migration.js +67 -0
- package/dist/vendor/cli/commands/prior.d.ts +23 -0
- package/dist/vendor/cli/commands/prior.js +145 -0
- package/dist/vendor/cli/commands/resume.d.ts +21 -0
- package/dist/vendor/cli/commands/resume.js +73 -0
- package/dist/vendor/cli/commands/verify.d.ts +6 -0
- package/dist/vendor/cli/commands/verify.js +43 -0
- package/dist/vendor/cli/index.d.ts +6 -1
- package/dist/vendor/cli/index.js +124 -7
- package/dist/vendor/cli/research/public-corpus.d.ts +43 -0
- package/dist/vendor/cli/research/public-corpus.js +151 -0
- package/dist/vendor/cli/ui/error-card.d.ts +38 -0
- package/dist/vendor/cli/ui/error-card.js +103 -0
- package/dist/vendor/cli/ui/mission-brief.d.ts +41 -0
- package/dist/vendor/cli/ui/mission-brief.js +173 -0
- package/dist/vendor/cli/ui/summary-card.d.ts +34 -0
- package/dist/vendor/cli/ui/summary-card.js +102 -0
- package/dist/vendor/contracts/audit.d.ts +46 -0
- package/dist/vendor/contracts/audit.js +360 -0
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/contracts/post-phase15.d.ts +240 -0
- package/dist/vendor/contracts/post-phase15.js +166 -0
- package/dist/vendor/core/agent/mandates.d.ts +46 -0
- package/dist/vendor/core/agent/mandates.js +178 -0
- package/dist/vendor/core/agent/receipts.d.ts +38 -0
- package/dist/vendor/core/agent/receipts.js +131 -0
- package/dist/vendor/core/agent/signing.d.ts +17 -0
- package/dist/vendor/core/agent/signing.js +91 -0
- package/dist/vendor/core/attestation/sign.d.ts +25 -0
- package/dist/vendor/core/attestation/sign.js +216 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.d.ts +120 -0
- package/dist/vendor/core/autonomy/autonomous-promotion.js +346 -0
- package/dist/vendor/core/autonomy/envelope-v2.d.ts +29 -0
- package/dist/vendor/core/autonomy/envelope-v2.js +60 -0
- package/dist/vendor/core/autonomy/envelope.d.ts +17 -0
- package/dist/vendor/core/autonomy/envelope.js +27 -0
- package/dist/vendor/core/autonomy/escalation-ledger.d.ts +20 -0
- package/dist/vendor/core/autonomy/escalation-ledger.js +18 -0
- package/dist/vendor/core/autonomy/resume.d.ts +15 -0
- package/dist/vendor/core/autonomy/resume.js +23 -0
- package/dist/vendor/core/circuit/circuit-breaker.d.ts +60 -0
- package/dist/vendor/core/circuit/circuit-breaker.js +143 -0
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-distillation.d.ts +3 -0
- package/dist/vendor/core/context-distillation.js +44 -0
- package/dist/vendor/core/context-flow/compile-context.d.ts +8 -0
- package/dist/vendor/core/context-flow/compile-context.js +111 -0
- package/dist/vendor/core/context-flow/entities.d.ts +2 -0
- package/dist/vendor/core/context-flow/entities.js +44 -0
- package/dist/vendor/core/context-flow/evaluate-policy.d.ts +2 -0
- package/dist/vendor/core/context-flow/evaluate-policy.js +42 -0
- package/dist/vendor/core/context-flow/index.d.ts +11 -0
- package/dist/vendor/core/context-flow/index.js +24 -0
- package/dist/vendor/core/context-flow/labels.d.ts +3 -0
- package/dist/vendor/core/context-flow/labels.js +17 -0
- package/dist/vendor/core/context-flow/normalizer.d.ts +9 -0
- package/dist/vendor/core/context-flow/normalizer.js +69 -0
- package/dist/vendor/core/context-flow/profiles.d.ts +33 -0
- package/dist/vendor/core/context-flow/profiles.js +36 -0
- package/dist/vendor/core/context-flow/redaction.d.ts +1 -0
- package/dist/vendor/core/context-flow/redaction.js +6 -0
- package/dist/vendor/core/context-flow/sensitivity.d.ts +2 -0
- package/dist/vendor/core/context-flow/sensitivity.js +27 -0
- package/dist/vendor/core/context-flow/sync-preview.d.ts +2 -0
- package/dist/vendor/core/context-flow/sync-preview.js +22 -0
- package/dist/vendor/core/context-flow/token-estimator.d.ts +3 -0
- package/dist/vendor/core/context-flow/token-estimator.js +13 -0
- package/dist/vendor/core/context-flow/types.d.ts +91 -0
- package/dist/vendor/core/context-flow/types.js +2 -0
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/context-utility.d.ts +47 -0
- package/dist/vendor/core/context-utility.js +405 -0
- package/dist/vendor/core/cost/pipeline.d.ts +92 -0
- package/dist/vendor/core/cost/pipeline.js +141 -0
- package/dist/vendor/core/cost/tagged-cost.d.ts +27 -0
- package/dist/vendor/core/cost/tagged-cost.js +55 -0
- package/dist/vendor/core/cost-governor.d.ts +2 -0
- package/dist/vendor/core/cost-governor.js +50 -0
- package/dist/vendor/core/cve/cve-check.d.ts +80 -0
- package/dist/vendor/core/cve/cve-check.js +172 -0
- package/dist/vendor/core/digital-twin/index.d.ts +27 -0
- package/dist/vendor/core/digital-twin/index.js +90 -0
- package/dist/vendor/core/drift/drift-graph.d.ts +47 -0
- package/dist/vendor/core/drift/drift-graph.js +100 -0
- package/dist/vendor/core/drift/objective-lock.d.ts +69 -0
- package/dist/vendor/core/drift/objective-lock.js +88 -0
- package/dist/vendor/core/drift/scope.d.ts +46 -0
- package/dist/vendor/core/drift/scope.js +102 -0
- package/dist/vendor/core/drift/signature-lock.d.ts +48 -0
- package/dist/vendor/core/drift/signature-lock.js +202 -0
- package/dist/vendor/core/drift/stale-proof-gate.d.ts +21 -0
- package/dist/vendor/core/drift/stale-proof-gate.js +19 -0
- package/dist/vendor/core/eval/known-bad-world-runner.d.ts +24 -0
- package/dist/vendor/core/eval/known-bad-world-runner.js +256 -0
- package/dist/vendor/core/evidence/claim-audit.d.ts +18 -0
- package/dist/vendor/core/evidence/claim-audit.js +89 -0
- package/dist/vendor/core/exit-intelligence.d.ts +2 -0
- package/dist/vendor/core/exit-intelligence.js +58 -0
- package/dist/vendor/core/explain/formatter.d.ts +42 -0
- package/dist/vendor/core/explain/formatter.js +171 -0
- package/dist/vendor/core/explain/timeline.d.ts +29 -0
- package/dist/vendor/core/explain/timeline.js +213 -0
- package/dist/vendor/core/failure-taxonomy.d.ts +2 -0
- package/dist/vendor/core/failure-taxonomy.js +76 -0
- package/dist/vendor/core/gateway/index.d.ts +10 -0
- package/dist/vendor/core/gateway/index.js +12 -0
- package/dist/vendor/core/gateway/registry.d.ts +40 -0
- package/dist/vendor/core/gateway/registry.js +97 -0
- package/dist/vendor/core/gateway/transport.d.ts +31 -0
- package/dist/vendor/core/gateway/transport.js +82 -0
- package/dist/vendor/core/gateway/vault.d.ts +19 -0
- package/dist/vendor/core/gateway/vault.js +29 -0
- package/dist/vendor/core/graph/adapters.d.ts +43 -0
- package/dist/vendor/core/graph/adapters.js +91 -0
- package/dist/vendor/core/graph/hotspots.d.ts +22 -0
- package/dist/vendor/core/graph/hotspots.js +30 -0
- package/dist/vendor/core/graph/index.d.ts +1 -0
- package/dist/vendor/core/graph/index.js +2 -0
- package/dist/vendor/core/honey/honey-tokens.d.ts +32 -0
- package/dist/vendor/core/honey/honey-tokens.js +44 -0
- package/dist/vendor/core/index.d.ts +7 -4
- package/dist/vendor/core/index.js +222 -64
- package/dist/vendor/core/learning/bayesian-update.d.ts +31 -0
- package/dist/vendor/core/learning/bayesian-update.js +60 -0
- package/dist/vendor/core/learning/prior-sets.d.ts +42 -0
- package/dist/vendor/core/learning/prior-sets.js +111 -0
- package/dist/vendor/core/learning/promotion-gate.d.ts +17 -0
- package/dist/vendor/core/learning/promotion-gate.js +23 -0
- package/dist/vendor/core/leash/blast-radius.d.ts +42 -0
- package/dist/vendor/core/leash/blast-radius.js +156 -0
- package/dist/vendor/core/leash/policy-leash.d.ts +31 -0
- package/dist/vendor/core/leash/policy-leash.js +117 -0
- package/dist/vendor/core/memo/memo.d.ts +63 -0
- package/dist/vendor/core/memo/memo.js +97 -0
- package/dist/vendor/core/memory/learning-pipeline.d.ts +154 -0
- package/dist/vendor/core/memory/learning-pipeline.js +391 -0
- package/dist/vendor/core/memory/palace.d.ts +84 -0
- package/dist/vendor/core/memory/palace.js +379 -0
- package/dist/vendor/core/merge/ast-merge.d.ts +22 -0
- package/dist/vendor/core/merge/ast-merge.js +350 -0
- package/dist/vendor/core/merge/text-merge.d.ts +12 -0
- package/dist/vendor/core/merge/text-merge.js +182 -0
- package/dist/vendor/core/otel/tracer.d.ts +45 -0
- package/dist/vendor/core/otel/tracer.js +116 -0
- package/dist/vendor/core/parallel/parallel-attempts.d.ts +28 -0
- package/dist/vendor/core/parallel/parallel-attempts.js +41 -0
- package/dist/vendor/core/parallel/scorer.d.ts +24 -0
- package/dist/vendor/core/parallel/scorer.js +65 -0
- package/dist/vendor/core/pattern-detection.d.ts +64 -0
- package/dist/vendor/core/pattern-detection.js +108 -0
- package/dist/vendor/core/persistence/checkpoint.d.ts +44 -0
- package/dist/vendor/core/persistence/checkpoint.js +156 -0
- package/dist/vendor/core/persistence/cleanup.d.ts +22 -0
- package/dist/vendor/core/persistence/cleanup.js +131 -0
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy/file-touch-quota.d.ts +60 -0
- package/dist/vendor/core/policy/file-touch-quota.js +105 -0
- package/dist/vendor/core/policy/policy-loader.d.ts +30 -0
- package/dist/vendor/core/policy/policy-loader.js +170 -0
- package/dist/vendor/core/policy/policy-schema.d.ts +55 -0
- package/dist/vendor/core/policy/policy-schema.js +78 -0
- package/dist/vendor/core/policy.d.ts +6 -0
- package/dist/vendor/core/probe/probe.d.ts +49 -0
- package/dist/vendor/core/probe/probe.js +115 -0
- package/dist/vendor/core/proof/patch-proof.d.ts +58 -0
- package/dist/vendor/core/proof/patch-proof.js +84 -0
- package/dist/vendor/core/proof/semantic-probe.d.ts +25 -0
- package/dist/vendor/core/proof/semantic-probe.js +82 -0
- package/dist/vendor/core/recovery/failure-mode-runner.d.ts +29 -0
- package/dist/vendor/core/recovery/failure-mode-runner.js +39 -0
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +141 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +33 -0
- package/dist/vendor/core/replay/replay.d.ts +85 -0
- package/dist/vendor/core/replay/replay.js +109 -0
- package/dist/vendor/core/router/engine.d.ts +54 -0
- package/dist/vendor/core/router/engine.js +131 -0
- package/dist/vendor/core/router/index.d.ts +1 -0
- package/dist/vendor/core/router/index.js +2 -0
- package/dist/vendor/core/router/trust-calibration.d.ts +57 -0
- package/dist/vendor/core/router/trust-calibration.js +127 -0
- package/dist/vendor/core/run-martin.d.ts +2 -0
- package/dist/vendor/core/run-martin.js +287 -0
- package/dist/vendor/core/security/cve-scanner.d.ts +62 -0
- package/dist/vendor/core/security/cve-scanner.js +178 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.d.ts +29 -0
- package/dist/vendor/core/sentinel/efficiency-sentinel.js +30 -0
- package/dist/vendor/core/sentinel/progress-guard.d.ts +35 -0
- package/dist/vendor/core/sentinel/progress-guard.js +46 -0
- package/dist/vendor/core/siem/siem-emitter.d.ts +49 -0
- package/dist/vendor/core/siem/siem-emitter.js +157 -0
- package/dist/vendor/core/strategy/attempt-brief.d.ts +22 -0
- package/dist/vendor/core/strategy/attempt-brief.js +89 -0
- package/dist/vendor/core/summarize/diff-summary.d.ts +35 -0
- package/dist/vendor/core/summarize/diff-summary.js +204 -0
- package/dist/vendor/core/surface-signals.d.ts +21 -0
- package/dist/vendor/core/surface-signals.js +139 -0
- package/dist/vendor/core/truth/truth-wall.d.ts +51 -0
- package/dist/vendor/core/truth/truth-wall.js +69 -0
- package/dist/vendor/core/truth-spine.d.ts +26 -0
- package/dist/vendor/core/truth-spine.js +62 -0
- package/dist/vendor/core/types.d.ts +115 -0
- package/dist/vendor/core/types.js +2 -0
- package/dist/vendor/core/verification/tiered-verify.d.ts +17 -0
- package/dist/vendor/core/verification/tiered-verify.js +29 -0
- package/dist/vendor/core/verifier-pyramid.d.ts +32 -0
- package/dist/vendor/core/verifier-pyramid.js +111 -0
- package/dist/vendor/core/workflow-artifacts.d.ts +99 -0
- package/dist/vendor/core/workflow-artifacts.js +668 -0
- package/dist/vendor/core/wrap/supervised-run.d.ts +96 -0
- package/dist/vendor/core/wrap/supervised-run.js +178 -0
- package/docs/assets/cli-animated.svg +139 -0
- package/docs/assets/cli-static.svg +34 -0
- package/docs/assets/github-hero-v2.svg +23 -0
- package/docs/assets/martin-raplph.png.jpg +0 -0
- package/docs/assets/martinloop-logo.png +0 -0
- package/docs/assets/nvidia-inception-program-light.png +0 -0
- package/docs/assets/nvidia-inception-program.png +0 -0
- package/docs/assets/phase3c-sidesidebyside-demo.html +228 -0
- package/docs/assets/side-by-side.svg +134 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
- package/docs/oss/EXAMPLES.md +9 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
- package/docs/oss/QUICKSTART.md +39 -4
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
- package/docs/oss/README.md +7 -4
- package/docs/oss/RELEASE-SURFACE-REPORT.json +46 -45
- package/docs/oss/RELEASE-SURFACE-REPORT.md +36 -35
- package/package.json +129 -49
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
import { randomUUID } from "node:crypto";
|
|
2
|
+
import { existsSync } from "node:fs";
|
|
3
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
4
|
+
import { dirname, resolve } from "node:path";
|
|
5
|
+
import { buildAutonomousPromotionArtifact, buildCoreAutonomousPromotionArtifactInput, buildImprovementEvidenceSummary, buildImprovementResult, buildImprovementTask, ensureCanonicalAttemptEvidenceArtifacts, resolveImprovementArtifactPaths, selectTopImprovementTarget, writeAutonomousPromotionArtifact, writeImprovementResultArtifact, writePostChangeHoldoutArtifact } from "@martin/trace-intelligence";
|
|
6
|
+
export async function handleImproveCommand(args) {
|
|
7
|
+
const reportPath = parseFlag(args, "--report") ?? "trace-report.json";
|
|
8
|
+
const rawPatternKind = parseFlag(args, "--pattern");
|
|
9
|
+
const dryRun = args.includes("--dry-run");
|
|
10
|
+
const live = args.includes("--live");
|
|
11
|
+
const certifyTraceAutonomy = args.includes("--certify-trace-autonomy");
|
|
12
|
+
const traceAutonomyNegativeControl = args.includes("--negative-control") ||
|
|
13
|
+
process.env["MARTIN_TRACE_AUTONOMY_NEGATIVE_CONTROL"] === "1" ||
|
|
14
|
+
process.env["MARTIN_TRACE_AUTONOMY_NEGATIVE_CONTROL"]?.toLowerCase() === "true";
|
|
15
|
+
if (dryRun && live) {
|
|
16
|
+
return {
|
|
17
|
+
exitCode: 1,
|
|
18
|
+
stdout: "",
|
|
19
|
+
stderr: "Choose either --dry-run or --live; trace-autonomy certification previews are available in --dry-run mode."
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
// Validate pattern kind before reading report — prevents confusing downstream errors
|
|
23
|
+
const VALID_PATTERN_KINDS = new Set([
|
|
24
|
+
"oscillation_loop", "budget_blowout", "grounding_cluster", "scope_creep_pattern",
|
|
25
|
+
"hallucinated_tool_calls", "refusal_loop", "verification_blind_spot",
|
|
26
|
+
"recovery_exhaustion", "stagnation_signature", "admission_churn"
|
|
27
|
+
]);
|
|
28
|
+
if (rawPatternKind && !VALID_PATTERN_KINDS.has(rawPatternKind)) {
|
|
29
|
+
return {
|
|
30
|
+
exitCode: 1,
|
|
31
|
+
stdout: "",
|
|
32
|
+
stderr: `Invalid pattern kind: "${rawPatternKind}". Valid: ${[...VALID_PATTERN_KINDS].join(", ")}`
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
const patternKind = rawPatternKind;
|
|
36
|
+
try {
|
|
37
|
+
const repoRoot = resolveImproveRepoRoot();
|
|
38
|
+
const raw = await readFile(resolve(reportPath), "utf8").catch(() => {
|
|
39
|
+
throw new Error(`Trace report not found at ${reportPath}. Run martin analyze first.`);
|
|
40
|
+
});
|
|
41
|
+
const report = JSON.parse(raw);
|
|
42
|
+
let target = selectTopImprovementTarget(report);
|
|
43
|
+
if (patternKind) {
|
|
44
|
+
const match = report.patterns.find(p => p.kind === patternKind);
|
|
45
|
+
if (!match) {
|
|
46
|
+
return {
|
|
47
|
+
exitCode: 1,
|
|
48
|
+
stdout: "",
|
|
49
|
+
stderr: `No pattern of kind "${patternKind}" found in the report.\nAvailable: ${report.patterns.map(p => p.kind).join(", ") || "none"}`
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
target = match;
|
|
53
|
+
}
|
|
54
|
+
if (!target) {
|
|
55
|
+
return {
|
|
56
|
+
exitCode: 0,
|
|
57
|
+
stdout: "No patterns found in the report — nothing to improve.",
|
|
58
|
+
stderr: ""
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
const task = buildImprovementTask(target, {
|
|
62
|
+
repoRoot,
|
|
63
|
+
surfaceScope: certifyTraceAutonomy ? "trace_autonomy_stage1" : "phase3_default"
|
|
64
|
+
});
|
|
65
|
+
const dryRunPaths = resolveImprovementArtifactPaths(resolve(repoRoot, ".martin", "improvements"), task.loopTask.metadata.candidateId ?? target.kind);
|
|
66
|
+
const dryRunEvidence = buildImprovementEvidenceSummary(task, dryRunPaths);
|
|
67
|
+
const dryRunCertificationMetadata = await buildOptionalTraceAutonomyCertificationMetadata({
|
|
68
|
+
source: "cli.improve",
|
|
69
|
+
stage: "stage_1_trace_intelligence_only",
|
|
70
|
+
mode: "dry_run",
|
|
71
|
+
report,
|
|
72
|
+
targetPattern: target,
|
|
73
|
+
task,
|
|
74
|
+
evidence: dryRunEvidence,
|
|
75
|
+
artifactPaths: dryRunPaths,
|
|
76
|
+
runsRoot: resolve(repoRoot, ".martin", "improvements")
|
|
77
|
+
});
|
|
78
|
+
if (dryRun) {
|
|
79
|
+
return {
|
|
80
|
+
exitCode: 0,
|
|
81
|
+
stdout: JSON.stringify({
|
|
82
|
+
dryRun: true,
|
|
83
|
+
targetPattern: target.kind,
|
|
84
|
+
severity: target.severity,
|
|
85
|
+
affectedRunCount: target.affectedRunCount,
|
|
86
|
+
...dryRunEvidence,
|
|
87
|
+
...dryRunCertificationMetadata,
|
|
88
|
+
traceAutonomyCertificationMode: "preview_only",
|
|
89
|
+
loopTask: task.loopTask
|
|
90
|
+
}, null, 2),
|
|
91
|
+
stderr: ""
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
// Real run: import core and execute the improvement task
|
|
95
|
+
const core = await import("../../core/index.js");
|
|
96
|
+
const { runMartin, createFileRunStore, resolveRunsRoot } = core;
|
|
97
|
+
const { createClaudeCliAdapter, createCodexCliAdapter } = await import("../../adapters/index.js");
|
|
98
|
+
const runsRoot = resolveRunsRoot(process.env);
|
|
99
|
+
const store = createFileRunStore({ runsRoot });
|
|
100
|
+
const adapterSelection = resolveImproveAdapterSelection(args, process.env);
|
|
101
|
+
const adapterOptions = {
|
|
102
|
+
workingDirectory: repoRoot,
|
|
103
|
+
timeoutMs: resolveImproveLiveAdapterTimeoutMs(),
|
|
104
|
+
...(adapterSelection.model ? { model: adapterSelection.model } : {})
|
|
105
|
+
};
|
|
106
|
+
const adapter = adapterSelection.engine === "codex"
|
|
107
|
+
? createCodexCliAdapter(adapterOptions)
|
|
108
|
+
: createClaudeCliAdapter(adapterOptions);
|
|
109
|
+
const maxIterations = resolveImproveMaxIterations(args, process.env, {
|
|
110
|
+
certifyTraceAutonomy
|
|
111
|
+
});
|
|
112
|
+
const beforeArtifactMaterial = `${JSON.stringify(report, null, 2)}\n`;
|
|
113
|
+
// Certification runs must produce one auditable ledger entry per invocation;
|
|
114
|
+
// retries are disabled there so partial evidence cannot be hidden by a later pass.
|
|
115
|
+
const TRANSIENT_PATTERNS = ["ECONNRESET", "ETIMEDOUT", "ENOTFOUND", "spawn"];
|
|
116
|
+
const retryLimit = resolveImproveRetryLimit({ certifyTraceAutonomy });
|
|
117
|
+
let result;
|
|
118
|
+
let livePaths;
|
|
119
|
+
let lastErr;
|
|
120
|
+
for (let attempt = 0; attempt < retryLimit; attempt++) {
|
|
121
|
+
const reservedLoopId = createReservedLoopId("improve");
|
|
122
|
+
const attemptLivePaths = resolveImprovementArtifactPaths(runsRoot, reservedLoopId);
|
|
123
|
+
try {
|
|
124
|
+
await writeTextArtifact(attemptLivePaths.beforeArtifactPath, beforeArtifactMaterial);
|
|
125
|
+
result = await runMartin({
|
|
126
|
+
workspaceId: "ws_improve",
|
|
127
|
+
projectId: "proj_improve",
|
|
128
|
+
task: task.loopTask,
|
|
129
|
+
budget: { maxUsd: 2.0, softLimitUsd: 1.5, maxIterations, maxTokens: 20_000 },
|
|
130
|
+
adapter,
|
|
131
|
+
idFactory: createReservedLoopIdFactory(reservedLoopId),
|
|
132
|
+
store
|
|
133
|
+
});
|
|
134
|
+
if (result.loop.loopId !== reservedLoopId) {
|
|
135
|
+
throw new Error(`Reserved loop id mismatch: expected ${reservedLoopId}, got ${result.loop.loopId}.`);
|
|
136
|
+
}
|
|
137
|
+
livePaths = attemptLivePaths;
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
catch (err) {
|
|
141
|
+
lastErr = err;
|
|
142
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
143
|
+
const isTransient = TRANSIENT_PATTERNS.some(p => msg.includes(p));
|
|
144
|
+
if (!isTransient || attempt === retryLimit - 1)
|
|
145
|
+
throw err;
|
|
146
|
+
await new Promise(res => setTimeout(res, 2 ** attempt * 1000));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
if (!result || !livePaths)
|
|
150
|
+
throw lastErr;
|
|
151
|
+
const liveEvidence = buildImprovementEvidenceSummary(task, livePaths, {
|
|
152
|
+
allowAutonomousPromotion: certifyTraceAutonomy && !traceAutonomyNegativeControl
|
|
153
|
+
});
|
|
154
|
+
const improvementResult = buildImprovementResult(target, result.loop.loopId, summarizeTraceReport(report), undefined);
|
|
155
|
+
await writeImprovementResultArtifact(livePaths.improvementResultPath, improvementResult);
|
|
156
|
+
await ensureCanonicalAttemptEvidenceArtifacts({
|
|
157
|
+
runsRoot,
|
|
158
|
+
loopRunId: result.loop.loopId,
|
|
159
|
+
attemptCount: result.loop.attempts.length,
|
|
160
|
+
beforeArtifactPath: livePaths.beforeArtifactPath,
|
|
161
|
+
improvementResultPath: livePaths.improvementResultPath,
|
|
162
|
+
reason: result.loop.attempts.length === 0
|
|
163
|
+
? "pre_attempt_policy_denial_no_mutation"
|
|
164
|
+
: "fail_closed_no_tracked_mutation",
|
|
165
|
+
createdAt: result.loop.updatedAt
|
|
166
|
+
});
|
|
167
|
+
await writePostChangeHoldoutArtifact(livePaths.holdoutArtifactPath, {
|
|
168
|
+
task,
|
|
169
|
+
loopRunId: result.loop.loopId,
|
|
170
|
+
runsRoot,
|
|
171
|
+
beforeArtifactMaterial,
|
|
172
|
+
beforeArtifactPath: livePaths.beforeArtifactPath,
|
|
173
|
+
improvementResultPath: livePaths.improvementResultPath,
|
|
174
|
+
verifierImproved: result.decision.lifecycleState === "completed",
|
|
175
|
+
budgetCritical: result.loop.cost.actualUsd >= result.loop.budget.maxUsd,
|
|
176
|
+
attemptCount: result.loop.attempts.length,
|
|
177
|
+
groundingFailureCount: countGroundingFailures(result.loop)
|
|
178
|
+
});
|
|
179
|
+
const autonomousPromotionArtifact = await buildCoreCompatibleAutonomousPromotionArtifact(core, task, liveEvidence, result.loop.loopId, runsRoot, beforeArtifactMaterial, livePaths.beforeArtifactPath, livePaths.improvementResultPath, livePaths.holdoutArtifactPath, result.decision.lifecycleState === "completed", result.loop.cost.actualUsd >= result.loop.budget.maxUsd, result.loop.attempts.length, report, countGroundingFailures(result.loop));
|
|
180
|
+
await persistSignedAutonomousPromotionArtifact(core, resolve(runsRoot, result.loop.loopId), livePaths.autonomousPromotionPath, autonomousPromotionArtifact);
|
|
181
|
+
const liveCertificationMetadata = await buildOptionalTraceAutonomyCertificationMetadata({
|
|
182
|
+
source: "cli.improve",
|
|
183
|
+
stage: "stage_1_trace_intelligence_only",
|
|
184
|
+
mode: "live",
|
|
185
|
+
recordCertificationEvidence: certifyTraceAutonomy,
|
|
186
|
+
report,
|
|
187
|
+
targetPattern: target,
|
|
188
|
+
task,
|
|
189
|
+
evidence: liveEvidence,
|
|
190
|
+
artifactPaths: livePaths,
|
|
191
|
+
runsRoot,
|
|
192
|
+
loopRunId: result.loop.loopId,
|
|
193
|
+
loopOutcome: result.decision.lifecycleState,
|
|
194
|
+
costUsd: result.loop.cost.actualUsd,
|
|
195
|
+
attemptCount: result.loop.attempts.length,
|
|
196
|
+
negativeControl: traceAutonomyNegativeControl,
|
|
197
|
+
improvementResult,
|
|
198
|
+
autonomousPromotionArtifact
|
|
199
|
+
});
|
|
200
|
+
return {
|
|
201
|
+
exitCode: result.decision.lifecycleState === "completed" ? 0 : 1,
|
|
202
|
+
stdout: JSON.stringify({
|
|
203
|
+
outcome: result.decision.lifecycleState,
|
|
204
|
+
targetPattern: target.kind,
|
|
205
|
+
loopId: result.loop.loopId,
|
|
206
|
+
cost: result.loop.cost.actualUsd,
|
|
207
|
+
...mergeArtifactEvidence(liveEvidence, autonomousPromotionArtifact),
|
|
208
|
+
...liveCertificationMetadata,
|
|
209
|
+
traceAutonomyCertificationMode: certifyTraceAutonomy ? "recorded" : "preview_only"
|
|
210
|
+
}, null, 2),
|
|
211
|
+
stderr: ""
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
catch (err) {
|
|
215
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
216
|
+
return { exitCode: 1, stdout: "", stderr: `Error: ${message}` };
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
function parseFlag(args, flag) {
|
|
220
|
+
const idx = args.indexOf(flag);
|
|
221
|
+
return idx >= 0 ? args[idx + 1] : undefined;
|
|
222
|
+
}
|
|
223
|
+
function parsePositiveInteger(raw, label) {
|
|
224
|
+
if (raw === undefined || raw.trim() === "") {
|
|
225
|
+
return undefined;
|
|
226
|
+
}
|
|
227
|
+
const value = Number(raw);
|
|
228
|
+
if (!Number.isInteger(value) || value <= 0) {
|
|
229
|
+
throw new Error(`${label} must be a positive integer.`);
|
|
230
|
+
}
|
|
231
|
+
return value;
|
|
232
|
+
}
|
|
233
|
+
export function resolveImproveAdapterSelection(args, env = process.env) {
|
|
234
|
+
const rawEngine = (parseFlag(args, "--engine") ?? env["MARTIN_CLI_IMPROVE_ENGINE"] ?? "claude")
|
|
235
|
+
.trim()
|
|
236
|
+
.toLowerCase();
|
|
237
|
+
if (rawEngine !== "claude" && rawEngine !== "codex") {
|
|
238
|
+
throw new Error(`--engine must be "claude" or "codex", got "${rawEngine}".`);
|
|
239
|
+
}
|
|
240
|
+
const model = (parseFlag(args, "--model") ?? env["MARTIN_CLI_IMPROVE_MODEL"])?.trim();
|
|
241
|
+
return {
|
|
242
|
+
engine: rawEngine,
|
|
243
|
+
...(model ? { model } : {})
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
export function resolveImproveLiveAdapterTimeoutMs(env = process.env) {
|
|
247
|
+
const configured = Number(env["MARTIN_CLI_AGENT_TIMEOUT_MS"]);
|
|
248
|
+
return Number.isFinite(configured) && configured > 0 ? Math.floor(configured) : 300_000;
|
|
249
|
+
}
|
|
250
|
+
export function resolveImproveMaxIterations(args, env = process.env, options = {}) {
|
|
251
|
+
const configured = parsePositiveInteger(parseFlag(args, "--max-iterations") ?? env["MARTIN_CLI_IMPROVE_MAX_ITERATIONS"], "--max-iterations");
|
|
252
|
+
if (configured !== undefined) {
|
|
253
|
+
return configured;
|
|
254
|
+
}
|
|
255
|
+
return options.certifyTraceAutonomy ? 1 : 3;
|
|
256
|
+
}
|
|
257
|
+
export function resolveImproveRetryLimit(options = {}) {
|
|
258
|
+
return options.certifyTraceAutonomy ? 1 : 3;
|
|
259
|
+
}
|
|
260
|
+
function resolveImproveRepoRoot() {
|
|
261
|
+
const explicitRoot = process.env.MARTIN_REPO_ROOT?.trim();
|
|
262
|
+
if (explicitRoot) {
|
|
263
|
+
return resolve(explicitRoot);
|
|
264
|
+
}
|
|
265
|
+
const initCwd = process.env.INIT_CWD?.trim();
|
|
266
|
+
if (initCwd && isLikelyRepoRoot(initCwd)) {
|
|
267
|
+
return resolve(initCwd);
|
|
268
|
+
}
|
|
269
|
+
return process.cwd();
|
|
270
|
+
}
|
|
271
|
+
function isLikelyRepoRoot(path) {
|
|
272
|
+
return (existsSync(path) &&
|
|
273
|
+
(existsSync(resolve(path, "pnpm-workspace.yaml")) ||
|
|
274
|
+
existsSync(resolve(path, ".git")) ||
|
|
275
|
+
existsSync(resolve(path, "package.json"))));
|
|
276
|
+
}
|
|
277
|
+
function summarizeTraceReport(report) {
|
|
278
|
+
return {
|
|
279
|
+
runsAnalyzed: report.runsAnalyzed,
|
|
280
|
+
patternsFound: report.patterns.length,
|
|
281
|
+
avgKeepRate: report.stats.totalAttempts > 0
|
|
282
|
+
? report.stats.totalKeptAttempts / report.stats.totalAttempts
|
|
283
|
+
: 0
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
async function buildCoreCompatibleAutonomousPromotionArtifact(core, task, evidence, loopRunId, runsRoot, beforeArtifactMaterial, beforeArtifactPath, improvementResultPath, holdoutArtifactPath, verifierImproved, budgetCritical, attemptCount, report, groundingFailureCount) {
|
|
287
|
+
const maybeCore = core;
|
|
288
|
+
const corePromotionInput = await buildCoreAutonomousPromotionArtifactInput({
|
|
289
|
+
task,
|
|
290
|
+
evidence,
|
|
291
|
+
loopRunId,
|
|
292
|
+
runsRoot,
|
|
293
|
+
beforeArtifactMaterial,
|
|
294
|
+
beforeArtifactPath,
|
|
295
|
+
improvementResultPath,
|
|
296
|
+
holdoutArtifactPath,
|
|
297
|
+
verifierImproved,
|
|
298
|
+
budgetCritical,
|
|
299
|
+
attemptCount,
|
|
300
|
+
report,
|
|
301
|
+
groundingFailureCount
|
|
302
|
+
});
|
|
303
|
+
if (typeof maybeCore.buildAutonomousPromotionArtifact === "function") {
|
|
304
|
+
try {
|
|
305
|
+
return maybeCore.buildAutonomousPromotionArtifact(corePromotionInput);
|
|
306
|
+
}
|
|
307
|
+
catch {
|
|
308
|
+
// Fall back to the trace-intelligence compatibility artifact until core owns all callers.
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return buildAutonomousPromotionArtifact(task, evidence);
|
|
312
|
+
}
|
|
313
|
+
function mergeArtifactEvidence(fallback, artifact) {
|
|
314
|
+
const maybeArtifact = artifact;
|
|
315
|
+
const verifierReceiptPresent = typeof maybeArtifact.evidence?.verifierReceiptPresent === "boolean"
|
|
316
|
+
? maybeArtifact.evidence.verifierReceiptPresent
|
|
317
|
+
: fallback.verifierReceiptPresent;
|
|
318
|
+
const verifierReceiptOutcome = typeof maybeArtifact.evidence?.verifierReceiptOutcome === "string"
|
|
319
|
+
? maybeArtifact.evidence.verifierReceiptOutcome
|
|
320
|
+
: fallback.verifierReceiptOutcome;
|
|
321
|
+
return {
|
|
322
|
+
...fallback,
|
|
323
|
+
surfaceClass: maybeArtifact.surfaceClass ?? fallback.surfaceClass,
|
|
324
|
+
promotionPolicyVerdict: maybeArtifact.promotionPolicyVerdict ?? fallback.promotionPolicyVerdict,
|
|
325
|
+
tripwireVerdict: maybeArtifact.tripwireVerdict ?? fallback.tripwireVerdict,
|
|
326
|
+
verifierReceiptPresent,
|
|
327
|
+
verifierReceiptOutcome
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
async function buildOptionalTraceAutonomyCertificationMetadata(input) {
|
|
331
|
+
const traceIntelligence = await import("@martin/trace-intelligence");
|
|
332
|
+
const buildMetadata = traceIntelligence.buildTraceAutonomyCertificationMetadata;
|
|
333
|
+
if (typeof buildMetadata !== "function") {
|
|
334
|
+
return {};
|
|
335
|
+
}
|
|
336
|
+
return normalizeTraceAutonomyCertificationMetadata(await buildMetadata(input));
|
|
337
|
+
}
|
|
338
|
+
function normalizeTraceAutonomyCertificationMetadata(metadata) {
|
|
339
|
+
const candidate = metadata;
|
|
340
|
+
const normalized = {};
|
|
341
|
+
if (typeof candidate["certificationRunId"] === "string") {
|
|
342
|
+
normalized.certificationRunId = candidate["certificationRunId"];
|
|
343
|
+
}
|
|
344
|
+
if (typeof candidate["certificationStatus"] === "string") {
|
|
345
|
+
normalized.certificationStatus = candidate["certificationStatus"];
|
|
346
|
+
}
|
|
347
|
+
if (typeof candidate["evidenceRunCount"] === "number" &&
|
|
348
|
+
Number.isFinite(candidate["evidenceRunCount"])) {
|
|
349
|
+
normalized.evidenceRunCount = candidate["evidenceRunCount"];
|
|
350
|
+
}
|
|
351
|
+
if (typeof candidate["claimReadiness"] === "string") {
|
|
352
|
+
normalized.claimReadiness = candidate["claimReadiness"];
|
|
353
|
+
}
|
|
354
|
+
if (typeof candidate["allowedClaimWording"] === "string") {
|
|
355
|
+
normalized.allowedClaimWording = candidate["allowedClaimWording"];
|
|
356
|
+
}
|
|
357
|
+
if (Array.isArray(candidate["blockedClaimReasons"]) &&
|
|
358
|
+
candidate["blockedClaimReasons"].every((reason) => typeof reason === "string")) {
|
|
359
|
+
normalized.blockedClaimReasons = [...candidate["blockedClaimReasons"]];
|
|
360
|
+
}
|
|
361
|
+
return normalized;
|
|
362
|
+
}
|
|
363
|
+
function countGroundingFailures(loop) {
|
|
364
|
+
const attemptFailures = loop.attempts?.filter((attempt) => attempt.failureClass === "repo_grounding_failure").length ?? 0;
|
|
365
|
+
const eventFailures = loop.events?.filter((event) => event.type === "failure.classified" &&
|
|
366
|
+
event.payload?.failureClass === "repo_grounding_failure").length ?? 0;
|
|
367
|
+
return Math.max(attemptFailures, eventFailures);
|
|
368
|
+
}
|
|
369
|
+
async function persistSignedAutonomousPromotionArtifact(core, root, fallbackPath, artifact) {
|
|
370
|
+
const maybeCore = core;
|
|
371
|
+
if (typeof maybeCore.persistAutonomousPromotionArtifact === "function" && isRecord(artifact)) {
|
|
372
|
+
const persisted = await maybeCore.persistAutonomousPromotionArtifact({ root, artifact });
|
|
373
|
+
return persisted.path;
|
|
374
|
+
}
|
|
375
|
+
return writeAutonomousPromotionArtifact(fallbackPath, artifact);
|
|
376
|
+
}
|
|
377
|
+
function isRecord(value) {
|
|
378
|
+
return Boolean(value) && typeof value === "object";
|
|
379
|
+
}
|
|
380
|
+
async function writeTextArtifact(path, value) {
|
|
381
|
+
await mkdir(dirname(path), { recursive: true });
|
|
382
|
+
await writeFile(path, value, "utf8");
|
|
383
|
+
}
|
|
384
|
+
function createReservedLoopId(scope) {
|
|
385
|
+
const entropy = randomUUID().replace(/-/gu, "").slice(0, 16);
|
|
386
|
+
return `loop_${scope}_${Date.now().toString(36)}_${entropy}`;
|
|
387
|
+
}
|
|
388
|
+
function createReservedLoopIdFactory(loopId) {
|
|
389
|
+
return (prefix) => {
|
|
390
|
+
if (prefix === "loop") {
|
|
391
|
+
return loopId;
|
|
392
|
+
}
|
|
393
|
+
return `${prefix}_${randomUUID().replace(/-/gu, "").slice(0, 16)}`;
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
//# sourceMappingURL=improve.js.map
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import * as readline from "node:readline/promises";
|
|
5
|
+
const POLICY_EXAMPLE = `# martin.policy.yaml — runtime governance rules for Martin Loop
|
|
6
|
+
# This file is safe to commit. It contains no secrets.
|
|
7
|
+
# Secrets (API keys) always go in environment variables.
|
|
8
|
+
|
|
9
|
+
budgetUsd:
|
|
10
|
+
perRun: 5.00 # hard cap per individual run
|
|
11
|
+
perDay: 50.00 # daily cap across all runs (informational)
|
|
12
|
+
|
|
13
|
+
allowedVerifiers:
|
|
14
|
+
- "pnpm test"
|
|
15
|
+
- "bun run test"
|
|
16
|
+
- "npm test"
|
|
17
|
+
- "cargo test"
|
|
18
|
+
- "pytest"
|
|
19
|
+
|
|
20
|
+
# Commands appended to the built-in leash block list
|
|
21
|
+
blockedCommands:
|
|
22
|
+
# - "npm publish"
|
|
23
|
+
# - "git push --force"
|
|
24
|
+
|
|
25
|
+
# Restrict which files martin may touch (glob, optional)
|
|
26
|
+
# fileScopeGlob: "src/**"
|
|
27
|
+
|
|
28
|
+
# Hard cap on attempts per run
|
|
29
|
+
maxAttempts: 5
|
|
30
|
+
|
|
31
|
+
# Pause and require human approval before spending above this
|
|
32
|
+
requireApprovalAboveUsd: 2.00
|
|
33
|
+
|
|
34
|
+
# SIEM integration (optional — requires SLICE-16)
|
|
35
|
+
# siem:
|
|
36
|
+
# endpoint: "https://your-siem.example.com/events"
|
|
37
|
+
# format: "ocsf" # or "cef"
|
|
38
|
+
# apiKey: "$SIEM_API_KEY" # env var reference
|
|
39
|
+
`;
|
|
40
|
+
const MARTIN_CONFIG_TEMPLATE = (adapter, model) => `# martin.config.yaml
|
|
41
|
+
# Core runtime configuration for Martin Loop.
|
|
42
|
+
# Do not commit this file if it contains any secrets.
|
|
43
|
+
|
|
44
|
+
policyProfile: balanced
|
|
45
|
+
|
|
46
|
+
budget:
|
|
47
|
+
maxUsd: 5.00
|
|
48
|
+
softLimitUsd: 3.75
|
|
49
|
+
maxIterations: 5
|
|
50
|
+
maxTokens: 50000
|
|
51
|
+
|
|
52
|
+
governance:
|
|
53
|
+
destructiveActionPolicy: approval
|
|
54
|
+
telemetryDestination: local-only
|
|
55
|
+
verifierRules:
|
|
56
|
+
- "pnpm test"
|
|
57
|
+
`;
|
|
58
|
+
const GH_ACTIONS_TEMPLATE = `# .github/workflows/martin-loop.yml
|
|
59
|
+
# Martin Loop governed AI coding runtime — GitHub Actions integration
|
|
60
|
+
# Docs: https://github.com/your-org/martin-loop
|
|
61
|
+
|
|
62
|
+
name: Martin Loop
|
|
63
|
+
|
|
64
|
+
on:
|
|
65
|
+
workflow_dispatch:
|
|
66
|
+
inputs:
|
|
67
|
+
objective:
|
|
68
|
+
description: "What should Martin Loop fix or implement?"
|
|
69
|
+
required: true
|
|
70
|
+
budget_usd:
|
|
71
|
+
description: "Hard budget cap for this run (USD)"
|
|
72
|
+
required: false
|
|
73
|
+
default: "5.00"
|
|
74
|
+
issue_comment:
|
|
75
|
+
types: [created]
|
|
76
|
+
|
|
77
|
+
jobs:
|
|
78
|
+
martin:
|
|
79
|
+
# Trigger on workflow_dispatch OR on /martin comment in a PR
|
|
80
|
+
if: |
|
|
81
|
+
github.event_name == 'workflow_dispatch' ||
|
|
82
|
+
(github.event_name == 'issue_comment' &&
|
|
83
|
+
github.event.issue.pull_request != null &&
|
|
84
|
+
startsWith(github.event.comment.body, '/martin'))
|
|
85
|
+
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
|
|
88
|
+
permissions:
|
|
89
|
+
contents: write
|
|
90
|
+
pull-requests: write
|
|
91
|
+
|
|
92
|
+
steps:
|
|
93
|
+
- name: Check out repository
|
|
94
|
+
uses: actions/checkout@v4
|
|
95
|
+
|
|
96
|
+
- name: Set up pnpm
|
|
97
|
+
uses: pnpm/action-setup@v4
|
|
98
|
+
with:
|
|
99
|
+
version: 10.17.1
|
|
100
|
+
|
|
101
|
+
- name: Set up Node.js
|
|
102
|
+
uses: actions/setup-node@v4
|
|
103
|
+
with:
|
|
104
|
+
node-version: 22
|
|
105
|
+
cache: pnpm
|
|
106
|
+
|
|
107
|
+
- name: Install martin-loop
|
|
108
|
+
run: pnpm add -g @martin/cli
|
|
109
|
+
|
|
110
|
+
# Pre-flight check — fails the job if environment is misconfigured
|
|
111
|
+
- name: Pre-flight check
|
|
112
|
+
run: martin-loop doctor
|
|
113
|
+
env:
|
|
114
|
+
ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
|
|
115
|
+
|
|
116
|
+
- name: Run Martin Loop
|
|
117
|
+
id: martin
|
|
118
|
+
run: |
|
|
119
|
+
# Resolve objective from dispatch input or /martin comment
|
|
120
|
+
if [ "\${{ github.event_name }}" = "workflow_dispatch" ]; then
|
|
121
|
+
OBJECTIVE="\${{ github.event.inputs.objective }}"
|
|
122
|
+
BUDGET="\${{ github.event.inputs.budget_usd }}"
|
|
123
|
+
else
|
|
124
|
+
OBJECTIVE="\$(echo '\${{ github.event.comment.body }}' | sed 's|^/martin[[:space:]]*||')"
|
|
125
|
+
BUDGET="5.00"
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
martin-loop run \\
|
|
129
|
+
--objective "\$OBJECTIVE" \\
|
|
130
|
+
--budget-usd "\$BUDGET" \\
|
|
131
|
+
--workspace "\${{ github.repository_owner }}" \\
|
|
132
|
+
--project "\${{ github.repository }}" \\
|
|
133
|
+
--json > martin-result.json
|
|
134
|
+
env:
|
|
135
|
+
ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
|
|
136
|
+
MARTIN_NO_SUMMARY: "1"
|
|
137
|
+
|
|
138
|
+
- name: Upload audit artifacts
|
|
139
|
+
if: always()
|
|
140
|
+
uses: actions/upload-artifact@v4
|
|
141
|
+
with:
|
|
142
|
+
name: martin-loop-audit-\${{ github.run_id }}
|
|
143
|
+
path: ~/.martin/runs/
|
|
144
|
+
retention-days: 30
|
|
145
|
+
|
|
146
|
+
- name: Commit any changes from Martin Loop
|
|
147
|
+
run: |
|
|
148
|
+
git config user.name "martin-loop[bot]"
|
|
149
|
+
git config user.email "martin-loop[bot]@users.noreply.github.com"
|
|
150
|
+
git add -A
|
|
151
|
+
git diff --staged --quiet || git commit -m "feat(martin-loop): apply governed patch from run \${{ github.run_id }}"
|
|
152
|
+
git push
|
|
153
|
+
|
|
154
|
+
- name: Post PR comment with run summary
|
|
155
|
+
if: github.event_name == 'issue_comment'
|
|
156
|
+
uses: actions/github-script@v7
|
|
157
|
+
with:
|
|
158
|
+
script: |
|
|
159
|
+
const fs = require('fs');
|
|
160
|
+
let body;
|
|
161
|
+
try {
|
|
162
|
+
const result = JSON.parse(fs.readFileSync('martin-result.json', 'utf8'));
|
|
163
|
+
const decision = result.decision ?? {};
|
|
164
|
+
const loop = result.loop ?? {};
|
|
165
|
+
const succeeded = decision.lifecycleState === 'succeeded';
|
|
166
|
+
const outcome = succeeded ? '✅ SUCCEEDED' : '❌ FAILED';
|
|
167
|
+
const cost = loop.costUsd ? \`$\${loop.costUsd.toFixed(2)}\` : 'n/a';
|
|
168
|
+
const loopId = loop.loopId ?? 'unknown';
|
|
169
|
+
body = [
|
|
170
|
+
\`## Martin Loop — \${outcome}\`,
|
|
171
|
+
\`\`,
|
|
172
|
+
\`| Field | Value |\`,
|
|
173
|
+
\`|---|---|\`,
|
|
174
|
+
\`| Outcome | \${decision.lifecycleState ?? 'unknown'} |\`,
|
|
175
|
+
\`| Cost | \${cost} |\`,
|
|
176
|
+
\`| Loop ID | \${loopId} |\`,
|
|
177
|
+
\`\`,
|
|
178
|
+
succeeded
|
|
179
|
+
? \`Patch applied and committed. Audit artifacts uploaded to this run's Actions artifacts.\`
|
|
180
|
+
: \`Run failed. See Actions artifacts for the full audit trail.\\n\\nTip: martin-loop explain \${loopId}\`
|
|
181
|
+
].join('\\n');
|
|
182
|
+
} catch (e) {
|
|
183
|
+
body = '## Martin Loop\\n\\nRun completed — see Actions artifacts for details.';
|
|
184
|
+
}
|
|
185
|
+
github.rest.issues.createComment({
|
|
186
|
+
issue_number: context.issue.number,
|
|
187
|
+
owner: context.repo.owner,
|
|
188
|
+
repo: context.repo.repo,
|
|
189
|
+
body
|
|
190
|
+
});
|
|
191
|
+
`;
|
|
192
|
+
export async function handleInitCommand(args, options = {}) {
|
|
193
|
+
const cwd = options.cwd ?? process.cwd();
|
|
194
|
+
const force = args.includes("--force");
|
|
195
|
+
const nonInteractive = !process.stdin.isTTY || args.includes("--yes") || args.includes("-y");
|
|
196
|
+
const configPath = join(cwd, "martin.config.yaml");
|
|
197
|
+
const policyExamplePath = join(cwd, "martin.policy.yaml.example");
|
|
198
|
+
const ghWorkflowDir = join(cwd, ".github", "workflows");
|
|
199
|
+
const ghWorkflowPath = join(ghWorkflowDir, "martin-loop.yml");
|
|
200
|
+
// Guard against overwriting existing config without --force
|
|
201
|
+
if (!force && existsSync(configPath)) {
|
|
202
|
+
return {
|
|
203
|
+
exitCode: 1,
|
|
204
|
+
stdout: "",
|
|
205
|
+
stderr: [
|
|
206
|
+
`martin.config.yaml already exists in ${cwd}.`,
|
|
207
|
+
`Use --force to overwrite, or delete it manually first.`
|
|
208
|
+
].join("\n")
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
// Detect adapter from environment
|
|
212
|
+
const hasAnthropic = !!(process.env.ANTHROPIC_API_KEY && process.env.ANTHROPIC_API_KEY.length > 10);
|
|
213
|
+
const hasOpenAI = !!(process.env.OPENAI_API_KEY && process.env.OPENAI_API_KEY.length > 10);
|
|
214
|
+
let adapter = hasAnthropic ? "claude" : hasOpenAI ? "codex" : "claude";
|
|
215
|
+
let model = adapter === "claude" ? "claude-sonnet-4-6" : "gpt-4o";
|
|
216
|
+
// Ask for budget in interactive mode
|
|
217
|
+
let budget = "5.00";
|
|
218
|
+
let writeGhWorkflow = true;
|
|
219
|
+
if (!nonInteractive) {
|
|
220
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
221
|
+
try {
|
|
222
|
+
const budgetAnswer = await rl.question(` Budget cap per run (USD) [default: 5.00]: `);
|
|
223
|
+
if (budgetAnswer.trim()) {
|
|
224
|
+
const parsed = parseFloat(budgetAnswer.trim());
|
|
225
|
+
if (!isNaN(parsed) && parsed > 0)
|
|
226
|
+
budget = parsed.toFixed(2);
|
|
227
|
+
}
|
|
228
|
+
const ghAnswer = await rl.question(` Write GitHub Actions workflow? (Y/n) [default: Y]: `);
|
|
229
|
+
writeGhWorkflow = !ghAnswer.trim() || ghAnswer.trim().toLowerCase() !== "n";
|
|
230
|
+
}
|
|
231
|
+
finally {
|
|
232
|
+
rl.close();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
const written = [];
|
|
236
|
+
// Write martin.config.yaml
|
|
237
|
+
await writeFile(configPath, MARTIN_CONFIG_TEMPLATE(adapter, model).replace("maxUsd: 5.00", `maxUsd: ${budget}`), "utf8");
|
|
238
|
+
written.push(" martin.config.yaml");
|
|
239
|
+
// Write martin.policy.yaml.example
|
|
240
|
+
await writeFile(policyExamplePath, POLICY_EXAMPLE, "utf8");
|
|
241
|
+
written.push(" martin.policy.yaml.example");
|
|
242
|
+
// Write GitHub Actions workflow
|
|
243
|
+
if (writeGhWorkflow) {
|
|
244
|
+
await mkdir(ghWorkflowDir, { recursive: true });
|
|
245
|
+
await writeFile(ghWorkflowPath, GH_ACTIONS_TEMPLATE, "utf8");
|
|
246
|
+
written.push(" .github/workflows/martin-loop.yml");
|
|
247
|
+
}
|
|
248
|
+
// Run doctor check after init
|
|
249
|
+
let doctorSummary = "";
|
|
250
|
+
try {
|
|
251
|
+
const { handleDoctorCommand } = await import("./doctor.js");
|
|
252
|
+
const doctorResult = await handleDoctorCommand([], { cwd });
|
|
253
|
+
doctorSummary = "\n" + doctorResult.stdout;
|
|
254
|
+
}
|
|
255
|
+
catch {
|
|
256
|
+
// Non-fatal
|
|
257
|
+
}
|
|
258
|
+
const credWarning = !hasAnthropic && !hasOpenAI
|
|
259
|
+
? `\n ⚠ No API key found. Set ANTHROPIC_API_KEY before running martin-loop.\n`
|
|
260
|
+
: "";
|
|
261
|
+
const output = [
|
|
262
|
+
"Martin Loop initialized.",
|
|
263
|
+
"",
|
|
264
|
+
"Written:",
|
|
265
|
+
...written,
|
|
266
|
+
"",
|
|
267
|
+
"Add to .gitignore (optional — no secrets but env-specific):",
|
|
268
|
+
" martin.config.yaml",
|
|
269
|
+
"",
|
|
270
|
+
credWarning,
|
|
271
|
+
"Next steps:",
|
|
272
|
+
" 1. Set ANTHROPIC_API_KEY in your environment or CI secrets",
|
|
273
|
+
" 2. Run: martin-loop doctor",
|
|
274
|
+
" 3. Run: martin-loop run --objective \"your first task\"",
|
|
275
|
+
"",
|
|
276
|
+
"Docs: https://github.com/your-org/martin-loop",
|
|
277
|
+
doctorSummary
|
|
278
|
+
].filter((line) => line !== undefined).join("\n");
|
|
279
|
+
return { exitCode: 0, stdout: output, stderr: "" };
|
|
280
|
+
}
|
|
281
|
+
//# sourceMappingURL=init.js.map
|