martin-loop 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +398 -362
  3. package/demo/seeded-workspace/README.md +35 -0
  4. package/demo/seeded-workspace/TASKS.md +29 -0
  5. package/demo/seeded-workspace/martin.config.yaml +11 -0
  6. package/demo/seeded-workspace/package.json +8 -0
  7. package/demo/seeded-workspace/src/invoice-summary.js +11 -0
  8. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
  9. package/dist/vendor/adapters/claude-cli.d.ts +19 -4
  10. package/dist/vendor/adapters/claude-cli.js +55 -24
  11. package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
  12. package/dist/vendor/adapters/cli-bridge.js +154 -28
  13. package/dist/vendor/adapters/index.d.ts +1 -0
  14. package/dist/vendor/adapters/index.js +1 -0
  15. package/dist/vendor/adapters/verifier-only.d.ts +7 -0
  16. package/dist/vendor/adapters/verifier-only.js +57 -0
  17. package/dist/vendor/cli/index.d.ts +6 -1
  18. package/dist/vendor/cli/index.js +124 -7
  19. package/dist/vendor/contracts/index.d.ts +3 -1
  20. package/dist/vendor/core/compiler.d.ts +2 -0
  21. package/dist/vendor/core/compiler.js +10 -4
  22. package/dist/vendor/core/context-integrity.d.ts +26 -0
  23. package/dist/vendor/core/context-integrity.js +56 -0
  24. package/dist/vendor/core/index.d.ts +5 -2
  25. package/dist/vendor/core/index.js +186 -54
  26. package/dist/vendor/core/policy.d.ts +6 -0
  27. package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
  28. package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
  29. package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
  30. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
  31. package/docs/oss/EXAMPLES.md +134 -126
  32. package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
  33. package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
  34. package/docs/oss/QUICKSTART.md +165 -135
  35. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
  36. package/docs/oss/README.md +96 -93
  37. package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
  38. package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
  39. package/package.json +19 -11
@@ -5,8 +5,11 @@ import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLea
5
5
  import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
6
6
  import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
7
7
  import { compilePromptPacket } from "./compiler.js";
8
- import { makeLedgerEvent } from "./persistence/index.js";
8
+ import { makeLedgerEvent, resolveRunsRoot, runDir } from "./persistence/index.js";
9
+ import { runContextIntegrityPrecheck } from "./context-integrity.js";
9
10
  export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
11
+ // ─── Context Integrity Pre-gate ──────────────────────────────────────────────
12
+ export { runContextIntegrityPrecheck } from "./context-integrity.js";
10
13
  // ─── Prompt packet compiler ──────────────────────────────────────────────────
11
14
  export { compilePromptPacket } from "./compiler.js";
12
15
  // ─── Persistence (RunStore, LedgerEvent, FileRunStore) ──────────────────────
@@ -136,6 +139,7 @@ export async function runMartin(input) {
136
139
  let currentAdapterIndex = 0;
137
140
  let currentAdapter = adapterChain[currentAdapterIndex] ?? input.adapter;
138
141
  let useCompressedContext = false;
142
+ const isVerifyOnly = input.task.mutationMode === "verify_only";
139
143
  const executionProfile = resolveExecutionProfile({
140
144
  executionProfile: input.task.executionProfile,
141
145
  allowedNetworkDomains: input.task.allowedNetworkDomains
@@ -153,7 +157,8 @@ export async function runMartin(input) {
153
157
  shouldExit: true,
154
158
  lifecycleState: "human_escalation",
155
159
  status: "exited",
156
- reason
160
+ reason,
161
+ ...classifySafetyLeashExit(leashDecision, "verifier")
157
162
  };
158
163
  if (input.store) {
159
164
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
@@ -169,11 +174,7 @@ export async function runMartin(input) {
169
174
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
170
175
  kind: "run.exited",
171
176
  runId: loop.loopId,
172
- payload: {
173
- lifecycleState: leashExitDecision.lifecycleState,
174
- status: leashExitDecision.status,
175
- reason: leashExitDecision.reason
176
- }
177
+ payload: createRunExitPayload(leashExitDecision)
177
178
  }));
178
179
  }
179
180
  return {
@@ -193,7 +194,8 @@ export async function runMartin(input) {
193
194
  shouldExit: true,
194
195
  lifecycleState: "human_escalation",
195
196
  status: "exited",
196
- reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context."
197
+ reason: secretDecision.reason ?? "Safety leash blocked secret-like values in the runtime context.",
198
+ ...classifySafetyLeashExit(secretDecision, "secret")
197
199
  };
198
200
  if (input.store) {
199
201
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
@@ -208,11 +210,7 @@ export async function runMartin(input) {
208
210
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
209
211
  kind: "run.exited",
210
212
  runId: loop.loopId,
211
- payload: {
212
- lifecycleState: secretExitDecision.lifecycleState,
213
- status: secretExitDecision.status,
214
- reason: secretExitDecision.reason
215
- }
213
+ payload: createRunExitPayload(secretExitDecision)
216
214
  }));
217
215
  }
218
216
  return {
@@ -254,11 +252,7 @@ export async function runMartin(input) {
254
252
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
255
253
  kind: "run.exited",
256
254
  runId: loop.loopId,
257
- payload: {
258
- lifecycleState: preflightExitDecision.lifecycleState,
259
- status: preflightExitDecision.status,
260
- reason: preflightExitDecision.reason
261
- }
255
+ payload: createRunExitPayload(preflightExitDecision)
262
256
  }));
263
257
  }
264
258
  return {
@@ -268,6 +262,38 @@ export async function runMartin(input) {
268
262
  }
269
263
  // GATHER → ADMIT: run admission control before executing
270
264
  currentPhase = "ADMIT";
265
+ // T05: Context Integrity Pre-gate — blocks authority inversion / injection before reasoning
266
+ const contextPrecheck = await runContextIntegrityPrecheck(loop.loopId, loop.attempts.length + 1, runDir(resolveRunsRoot(), loop.loopId), {
267
+ userPrompt: distilled.focus,
268
+ history: loop.attempts.map(a => a.summary).join("\n")
269
+ });
270
+ if (contextPrecheck.verdict === "context_poisoning_block") {
271
+ currentPhase = "ABORT";
272
+ const poisoningExitDecision = {
273
+ shouldExit: true,
274
+ lifecycleState: "human_escalation",
275
+ status: "exited",
276
+ reason: "Context Integrity Pre-gate: context poisoning attempt detected.",
277
+ failureClass: "safety_leash_blocked",
278
+ safetySurface: "context_integrity",
279
+ reasonCode: "context_poisoning_blocked"
280
+ };
281
+ if (input.store) {
282
+ await input.store.appendLedger(loop.loopId, makeLedgerEvent({
283
+ kind: "safety.violations_found",
284
+ runId: loop.loopId,
285
+ payload: {
286
+ verdict: contextPrecheck.verdict,
287
+ signals: contextPrecheck.detectedSignals,
288
+ source: "context_integrity_pregate"
289
+ }
290
+ }));
291
+ }
292
+ return {
293
+ loop: finalizeLoop(loop, poisoningExitDecision, now(), idFactory),
294
+ decision: poisoningExitDecision
295
+ };
296
+ }
271
297
  const admissionDecision = evaluateAttemptPolicy({
272
298
  request: {
273
299
  loopId: loop.loopId,
@@ -315,11 +341,7 @@ export async function runMartin(input) {
315
341
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
316
342
  kind: "run.exited",
317
343
  runId: loop.loopId,
318
- payload: {
319
- lifecycleState: exitDecision.lifecycleState,
320
- status: exitDecision.status,
321
- reason: exitDecision.reason
322
- }
344
+ payload: createRunExitPayload(exitDecision)
323
345
  }));
324
346
  }
325
347
  return {
@@ -361,6 +383,7 @@ export async function runMartin(input) {
361
383
  objective: loop.task.objective,
362
384
  verificationPlan: loop.task.verificationPlan,
363
385
  ...(loop.task.verificationStack ? { verificationStack: loop.task.verificationStack } : {}),
386
+ ...(loop.task.mutationMode ? { mutationMode: loop.task.mutationMode } : {}),
364
387
  ...(loop.task.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
365
388
  ...(loop.task.allowedPaths ? { allowedPaths: loop.task.allowedPaths } : {}),
366
389
  ...(loop.task.deniedPaths ? { deniedPaths: loop.task.deniedPaths } : {}),
@@ -539,6 +562,90 @@ export async function runMartin(input) {
539
562
  // returned a non-empty list. A repoRoot alone is insufficient — git may fail (e.g. not
540
563
  // a git repo) and silently return [], which would falsely trigger no_code_change.
541
564
  const changedFileEvidenceAvailable = result.execution?.changedFiles !== undefined || changedFiles.length > 0;
565
+ if (isVerifyOnly && changedFiles.length > 0) {
566
+ const patchDecision = evaluatePatchDecision({
567
+ verificationPassed: result.verification.passed,
568
+ previousVerifierScore,
569
+ verifierScore: result.verification.passed ? 1 : 0,
570
+ scopeViolationCount: changedFiles.length,
571
+ changedFileCount: changedFiles.length,
572
+ diffNovelty: 1,
573
+ diffStats: result.execution?.diffStats,
574
+ costUsd: getUsageUsd(result.usage),
575
+ summary: result.summary
576
+ });
577
+ const verifyOnlyExitDecision = {
578
+ shouldExit: true,
579
+ lifecycleState: "human_escalation",
580
+ status: "exited",
581
+ reason: "Verify-only mode forbids file changes.",
582
+ failureClass: "safety_leash_blocked",
583
+ safetySurface: "filesystem",
584
+ reasonCode: "verify_only_write_attempt"
585
+ };
586
+ const rollbackOutcome = await restoreRollbackBoundary({
587
+ repoRoot: request.context.repoRoot,
588
+ boundary: rollbackBoundary,
589
+ restoredAt: attemptCompletedAt,
590
+ decision: patchDecision.decision
591
+ });
592
+ if (input.store) {
593
+ const verifyOnlyViolation = {
594
+ kind: "path_not_allowed",
595
+ message: `Verify-only mode forbids changed files: ${changedFiles.join(", ")}`,
596
+ file: changedFiles[0]
597
+ };
598
+ await input.store.writeAttemptArtifacts(loop.loopId, currentAttemptIndex, {
599
+ compiledContext,
600
+ leash: createLeashArtifact({
601
+ surface: "filesystem",
602
+ reason: verifyOnlyExitDecision.reason,
603
+ violations: [verifyOnlyViolation]
604
+ }, currentAttemptIndex),
605
+ patchScore: patchDecision.score,
606
+ patchDecision: toPatchDecisionArtifact(patchDecision),
607
+ ...(rollbackBoundary ? { rollbackBoundary } : {}),
608
+ ...(rollbackOutcome ? { rollbackOutcome } : {})
609
+ });
610
+ await input.store.appendLedger(loop.loopId, makeLedgerEvent({
611
+ kind: "safety.violations_found",
612
+ runId: loop.loopId,
613
+ attemptIndex: currentAttemptIndex,
614
+ payload: {
615
+ surface: "filesystem",
616
+ blocked: true,
617
+ attemptIndex: currentAttemptIndex,
618
+ violations: [
619
+ {
620
+ kind: "path_not_allowed",
621
+ message: verifyOnlyExitDecision.reason,
622
+ files: changedFiles
623
+ }
624
+ ]
625
+ }
626
+ }));
627
+ await input.store.appendLedger(loop.loopId, makeLedgerEvent({
628
+ kind: "attempt.discarded",
629
+ runId: loop.loopId,
630
+ attemptIndex: currentAttemptIndex,
631
+ payload: {
632
+ decision: patchDecision.decision,
633
+ reason: patchDecision.summary,
634
+ reasonCodes: patchDecision.reasonCodes,
635
+ score: patchDecision.score.score
636
+ }
637
+ }));
638
+ await input.store.appendLedger(loop.loopId, makeLedgerEvent({
639
+ kind: "run.exited",
640
+ runId: loop.loopId,
641
+ payload: createRunExitPayload(verifyOnlyExitDecision)
642
+ }));
643
+ }
644
+ return {
645
+ loop: finalizeLoop(loop, verifyOnlyExitDecision, now(), idFactory),
646
+ decision: verifyOnlyExitDecision
647
+ };
648
+ }
542
649
  const filesystemDecision = evaluateFilesystemLeash({
543
650
  repoRoot: request.context.repoRoot,
544
651
  changedFiles,
@@ -561,7 +668,8 @@ export async function runMartin(input) {
561
668
  shouldExit: true,
562
669
  lifecycleState: "human_escalation",
563
670
  status: "exited",
564
- reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes."
671
+ reason: filesystemDecision.reason ?? "Safety leash blocked filesystem changes.",
672
+ ...classifySafetyLeashExit(filesystemDecision, "filesystem")
565
673
  };
566
674
  const rollbackOutcome = await restoreRollbackBoundary({
567
675
  repoRoot: request.context.repoRoot,
@@ -603,11 +711,7 @@ export async function runMartin(input) {
603
711
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
604
712
  kind: "run.exited",
605
713
  runId: loop.loopId,
606
- payload: {
607
- lifecycleState: filesystemExitDecision.lifecycleState,
608
- status: filesystemExitDecision.status,
609
- reason: filesystemExitDecision.reason
610
- }
714
+ payload: createRunExitPayload(filesystemExitDecision)
611
715
  }));
612
716
  }
613
717
  return {
@@ -638,7 +742,8 @@ export async function runMartin(input) {
638
742
  lifecycleState: "human_escalation",
639
743
  status: "exited",
640
744
  reason: changeApprovalDecision.reason ??
641
- "Safety leash blocked dependency or migration changes that require approval."
745
+ "Safety leash blocked dependency or migration changes that require approval.",
746
+ ...classifySafetyLeashExit(changeApprovalDecision, "dependency")
642
747
  };
643
748
  const rollbackOutcome = await restoreRollbackBoundary({
644
749
  repoRoot: request.context.repoRoot,
@@ -681,11 +786,7 @@ export async function runMartin(input) {
681
786
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
682
787
  kind: "run.exited",
683
788
  runId: loop.loopId,
684
- payload: {
685
- lifecycleState: approvalExitDecision.lifecycleState,
686
- status: approvalExitDecision.status,
687
- reason: approvalExitDecision.reason
688
- }
789
+ payload: createRunExitPayload(approvalExitDecision)
689
790
  }));
690
791
  }
691
792
  return {
@@ -728,8 +829,8 @@ export async function runMartin(input) {
728
829
  previousVerifierScore,
729
830
  verifierScore: result.verification.passed ? 1 : 0,
730
831
  groundingViolationCount: groundingScanResult?.violations.length ?? 0,
731
- changedFileCount: changedFileEvidenceAvailable ? changedFiles.length : undefined,
732
- diffNovelty: changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
832
+ changedFileCount: !isVerifyOnly && changedFileEvidenceAvailable ? changedFiles.length : undefined,
833
+ diffNovelty: !isVerifyOnly && changedFileEvidenceAvailable ? (changedFiles.length > 0 ? 1 : 0) : undefined,
733
834
  diffStats: result.execution?.diffStats,
734
835
  costUsd: getUsageUsd(result.usage),
735
836
  summary: result.summary
@@ -822,11 +923,7 @@ export async function runMartin(input) {
822
923
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
823
924
  kind: "run.exited",
824
925
  runId: loop.loopId,
825
- payload: {
826
- lifecycleState: patchExitDecision.lifecycleState,
827
- status: patchExitDecision.status,
828
- reason: patchExitDecision.reason
829
- }
926
+ payload: createRunExitPayload(patchExitDecision)
830
927
  }));
831
928
  }
832
929
  return {
@@ -870,11 +967,7 @@ export async function runMartin(input) {
870
967
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
871
968
  kind: "run.exited",
872
969
  runId: loop.loopId,
873
- payload: {
874
- lifecycleState: decision.lifecycleState,
875
- status: decision.status,
876
- reason: decision.reason
877
- }
970
+ payload: createRunExitPayload(decision)
878
971
  }));
879
972
  }
880
973
  return {
@@ -893,11 +986,7 @@ export async function runMartin(input) {
893
986
  await input.store.appendLedger(loop.loopId, makeLedgerEvent({
894
987
  kind: "run.exited",
895
988
  runId: loop.loopId,
896
- payload: {
897
- lifecycleState: decision.lifecycleState,
898
- status: decision.status,
899
- reason: decision.reason
900
- }
989
+ payload: createRunExitPayload(decision)
901
990
  }));
902
991
  }
903
992
  return {
@@ -905,11 +994,54 @@ export async function runMartin(input) {
905
994
  decision
906
995
  };
907
996
  }
997
+ function createRunExitPayload(decision) {
998
+ return {
999
+ lifecycleState: decision.lifecycleState,
1000
+ status: decision.status,
1001
+ reason: decision.reason,
1002
+ ...(decision.failureClass ? { failureClass: decision.failureClass } : {}),
1003
+ ...(decision.safetySurface ? { safetySurface: decision.safetySurface } : {}),
1004
+ ...(decision.reasonCode ? { reasonCode: decision.reasonCode } : {})
1005
+ };
1006
+ }
1007
+ function classifySafetyLeashExit(decision, safetySurface = decision.surface) {
1008
+ return {
1009
+ failureClass: "safety_leash_blocked",
1010
+ safetySurface,
1011
+ reasonCode: safetyLeashReasonCode(decision, safetySurface)
1012
+ };
1013
+ }
1014
+ function safetyLeashReasonCode(decision, safetySurface) {
1015
+ const kind = decision.violations[0]?.kind;
1016
+ switch (kind) {
1017
+ case "command_blocked":
1018
+ return safetySurface === "verifier" ? "destructive_verifier_command" : "command_blocked";
1019
+ case "network_blocked":
1020
+ return safetySurface === "verifier" ? "verifier_network_blocked" : "network_access_blocked";
1021
+ case "secret_value":
1022
+ return "secret_context_value";
1023
+ case "path_denied":
1024
+ case "protected_path":
1025
+ return "protected_surface_write";
1026
+ case "path_not_allowed":
1027
+ return "surface_write_not_allowed";
1028
+ case "path_outside_repo":
1029
+ return "outside_repo_write";
1030
+ case "dependency_approval_required":
1031
+ return "dependency_approval_required";
1032
+ case "migration_approval_required":
1033
+ return "migration_approval_required";
1034
+ case "config_change_approval_required":
1035
+ return "config_change_approval_required";
1036
+ default:
1037
+ return `${safetySurface}_safety_block`;
1038
+ }
1039
+ }
908
1040
  function finalizeLoop(loop, decision, timestamp, idFactory) {
909
1041
  const finalized = appendLoopEvent(loop, {
910
1042
  type: "run.completed",
911
1043
  lifecycleState: decision.lifecycleState,
912
- payload: { status: decision.status, reason: decision.reason }
1044
+ payload: createRunExitPayload(decision)
913
1045
  }, { now: timestamp, idFactory });
914
1046
  return {
915
1047
  ...finalized,
@@ -934,7 +1066,7 @@ function getUsageProvenance(usage) {
934
1066
  return "actual";
935
1067
  }
936
1068
  function resolveChangedFiles(result, repoRoot) {
937
- if (result.execution?.changedFiles?.length) {
1069
+ if (result.execution?.changedFiles !== undefined) {
938
1070
  return result.execution.changedFiles;
939
1071
  }
940
1072
  if (!repoRoot) {
@@ -18,6 +18,12 @@ export interface ExitDecision {
18
18
  lifecycleState: LoopLifecycleState;
19
19
  status: LoopStatus;
20
20
  reason: string;
21
+ /** Machine-readable stop classifier for non-attempt exits such as preflight safety blocks. */
22
+ failureClass?: FailureClass;
23
+ /** Machine-readable safety surface, when the stop came from a safety leash. */
24
+ safetySurface?: string;
25
+ /** Stable reason code for dashboards, MCP, and downstream automation. */
26
+ reasonCode?: string;
21
27
  }
22
28
  export interface MartinAdapterResultLike {
23
29
  status: "completed" | "failed";
@@ -0,0 +1,89 @@
1
+ # Directory Submission Pack
2
+
3
+ Use this file as the single source of truth for public directory submissions.
4
+
5
+ ## Short tagline
6
+
7
+ Open-source control plane for AI coding agents.
8
+
9
+ ## Long description
10
+
11
+ MartinLoop is an open-source governed runtime for AI coding agents. It wraps autonomous coding loops with budget caps, verifier gates, rollback evidence, JSONL run records, failure classification, and MCP/Claude/Codex integration so agent work can be inspected, halted, and trusted.
12
+
13
+ ## Primary links
14
+
15
+ - GitHub repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
16
+ - Website: [martinloop.com](https://martinloop.com)
17
+ - npm package: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)
18
+ - Benchmark challenge: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
19
+
20
+ ## Submission checklist
21
+
22
+ ### OpenAlternative
23
+
24
+ - status: pending
25
+ - surface: OSS alternative listing
26
+ - copy to use: short tagline + long description
27
+ - include: GitHub, website, npm
28
+
29
+ ### DevHunt
30
+
31
+ - status: pending
32
+ - surface: product hunt style dev tools directory
33
+ - copy to use: short tagline + long description
34
+ - include: benchmark challenge and demo command
35
+
36
+ ### Uneed
37
+
38
+ - status: pending
39
+ - surface: startup/tool discovery
40
+ - copy to use: short tagline + long description
41
+ - include: GitHub, website, npm
42
+
43
+ ### BetaList
44
+
45
+ - status: pending
46
+ - surface: early product discovery
47
+ - copy to use: short tagline + long description
48
+ - include: why governed agent runs matter
49
+
50
+ ### Microlaunch
51
+
52
+ - status: pending
53
+ - surface: lightweight launch directory
54
+ - copy to use: short tagline + long description
55
+ - include: demo command and benchmark challenge
56
+
57
+ ### AlternativeTo
58
+
59
+ - status: pending
60
+ - surface: alternative comparison listing
61
+ - copy to use: short tagline + long description
62
+ - include: comparable tools and differentiators
63
+
64
+ ### Futurepedia
65
+
66
+ - status: pending
67
+ - surface: AI tools directory
68
+ - copy to use: short tagline + long description
69
+ - include: Claude, Codex, and MCP integration
70
+
71
+ ### Toolify
72
+
73
+ - status: pending
74
+ - surface: AI tool directory
75
+ - copy to use: short tagline + long description
76
+ - include: benchmark challenge link
77
+
78
+ ### There’s An AI For That
79
+
80
+ - status: pending
81
+ - surface: AI tool catalog
82
+ - copy to use: short tagline + long description
83
+ - include: GitHub, website, npm
84
+
85
+ ## Notes
86
+
87
+ - Prefer submissions that link directly to the repo, website, and npm package together.
88
+ - Reuse the benchmark challenge and `martin-loop demo` as the fastest trust-building assets.
89
+ - If a directory wants screenshots, use the current public repo README visuals instead of inventing a separate pitch deck.
@@ -0,0 +1,61 @@
1
+ # Integration Outreach Pack
2
+
3
+ Use this file for direct outreach to projects and communities building around AI coding agents.
4
+
5
+ ## Core message
6
+
7
+ Hey [Name] — I’m building MartinLoop, an OSS governed runtime for AI coding agents.
8
+
9
+ The repo already supports budget caps, verifier gates, JSONL run records, rollback evidence, Claude/Codex adapters, and an MCP package.
10
+
11
+ I’m trying to understand where a control layer like this should integrate best with projects like [their project]: CLI wrapper, MCP boundary, CI, or runtime adapter.
12
+
13
+ Would value your blunt take — useful direction or wrong abstraction?
14
+
15
+ ## Target projects
16
+
17
+ - Claude Code
18
+ - Codex CLI
19
+ - MCP servers
20
+ - Aider
21
+ - Cline
22
+ - Continue
23
+ - OpenHands
24
+ - SWE-agent
25
+ - Goose
26
+ - DevContainers
27
+ - GitHub Actions
28
+
29
+ ## Outreach notes by target
30
+
31
+ ### Claude Code
32
+
33
+ - emphasize governed repo runs and MCP install path
34
+ - ask whether the best control point is local CLI wrapper or MCP boundary
35
+
36
+ ### Codex CLI
37
+
38
+ - emphasize budget caps, verifier gates, and auditable run records
39
+ - ask whether wrapper, runtime adapter, or CI integration is most useful
40
+
41
+ ### MCP projects
42
+
43
+ - emphasize the packaged `@martinloop/mcp` server surface
44
+ - ask whether the trust layer belongs at tool boundary or runtime boundary
45
+
46
+ ### Aider, Cline, Continue, OpenHands, SWE-agent, Goose
47
+
48
+ - emphasize adapter-normalized receipts and halt reasons
49
+ - ask how much control should live in the agent runtime versus CI or wrapper
50
+
51
+ ### DevContainers and GitHub Actions
52
+
53
+ - emphasize safe default automation, budget visibility, and verifier gates in shared team workflows
54
+ - ask where platform teams want policy to live
55
+
56
+ ## Supporting assets
57
+
58
+ - challenge page: [UNDER-3-CHALLENGE.md](./UNDER-3-CHALLENGE.md)
59
+ - directory copy: [DIRECTORY-SUBMISSIONS.md](./DIRECTORY-SUBMISSIONS.md)
60
+ - repo: [github.com/Keesan12/martin-loop](https://github.com/Keesan12/martin-loop)
61
+ - npm: [npmjs.com/package/martin-loop](https://www.npmjs.com/package/martin-loop)
@@ -0,0 +1,65 @@
1
+ # Can your AI coding agent finish this task under $3?
2
+
3
+ MartinLoop is testing a simple question:
4
+
5
+ Can an AI coding agent complete a task under a fixed budget, with verifier-passed completion and an inspectable run record?
6
+
7
+ ## Current repo-backed comparison
8
+
9
+ Same task, same starting state:
10
+
11
+ - governed MartinLoop run: `$2.30`
12
+ - uncontrolled retry loop: `$5.20`
13
+ - governed outcome: `completed` and verifier-passed with an inspectable record
14
+ - uncontrolled outcome: failed after repeated retries with no comparable audit trail
15
+
16
+ These numbers match the current public benchmark story shown in the repo README and visualized in [`docs/assets/side-by-side.svg`](../assets/side-by-side.svg).
17
+
18
+ ## Why this matters
19
+
20
+ The claim is not that every governed run is always cheaper. The claim is that the run becomes inspectable and enforceable:
21
+
22
+ - budget policy is explicit
23
+ - verifier success is explicit
24
+ - stop reasons are explicit
25
+ - artifacts are inspectable after the run
26
+
27
+ That makes a coding-agent result easier to trust, replay, compare, and audit.
28
+
29
+ ## Reproduce it
30
+
31
+ From the repo root:
32
+
33
+ ```bash
34
+ pnpm --filter @martin/benchmarks test
35
+ pnpm --filter @martin/benchmarks eval
36
+ pnpm --filter @martin/benchmarks eval:phase12
37
+ ```
38
+
39
+ ## What to share back
40
+
41
+ If you run a similar challenge with Claude Code, Codex CLI, Cursor, Aider, Cline, Continue, OpenHands, SWE-agent, Goose, or an internal coding agent, share:
42
+
43
+ - total budget used
44
+ - number of attempts
45
+ - verifier result
46
+ - whether the final run was auditable
47
+ - whether rollback evidence was available
48
+
49
+ ## Try MartinLoop without risking your repo
50
+
51
+ You can copy the public demo sandbox first:
52
+
53
+ ```bash
54
+ npx martin-loop demo
55
+ ```
56
+
57
+ Then run the sandbox locally with the printed next steps.
58
+
59
+ ## Claim boundary
60
+
61
+ This page intentionally stays inside the current public evidence boundary:
62
+
63
+ - the `$2.30` and `$5.20` figures are the current repo-backed benchmark story used in the public README
64
+ - the reproduction commands above are real commands from this repository
65
+ - the benchmark harness remains a workspace-level surface, so challenge claims should stay tied to repo-backed outputs rather than generic marketing numbers