@metaharness/darwin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +221 -0
  3. package/SECURITY.md +200 -0
  4. package/dist/archive.d.ts +89 -0
  5. package/dist/archive.d.ts.map +1 -0
  6. package/dist/archive.js +220 -0
  7. package/dist/archive.js.map +1 -0
  8. package/dist/bench/gates.d.ts +19 -0
  9. package/dist/bench/gates.d.ts.map +1 -0
  10. package/dist/bench/gates.js +82 -0
  11. package/dist/bench/gates.js.map +1 -0
  12. package/dist/bench/index.d.ts +11 -0
  13. package/dist/bench/index.d.ts.map +1 -0
  14. package/dist/bench/index.js +25 -0
  15. package/dist/bench/index.js.map +1 -0
  16. package/dist/bench/lineage.d.ts +60 -0
  17. package/dist/bench/lineage.d.ts.map +1 -0
  18. package/dist/bench/lineage.js +166 -0
  19. package/dist/bench/lineage.js.map +1 -0
  20. package/dist/bench/metrics.d.ts +32 -0
  21. package/dist/bench/metrics.d.ts.map +1 -0
  22. package/dist/bench/metrics.js +52 -0
  23. package/dist/bench/metrics.js.map +1 -0
  24. package/dist/bench/promotion.d.ts +21 -0
  25. package/dist/bench/promotion.d.ts.map +1 -0
  26. package/dist/bench/promotion.js +109 -0
  27. package/dist/bench/promotion.js.map +1 -0
  28. package/dist/bench/risk.d.ts +45 -0
  29. package/dist/bench/risk.d.ts.map +1 -0
  30. package/dist/bench/risk.js +71 -0
  31. package/dist/bench/risk.js.map +1 -0
  32. package/dist/bench/runner.d.ts +53 -0
  33. package/dist/bench/runner.d.ts.map +1 -0
  34. package/dist/bench/runner.js +131 -0
  35. package/dist/bench/runner.js.map +1 -0
  36. package/dist/bench/score.d.ts +16 -0
  37. package/dist/bench/score.d.ts.map +1 -0
  38. package/dist/bench/score.js +83 -0
  39. package/dist/bench/score.js.map +1 -0
  40. package/dist/bench/stats.d.ts +26 -0
  41. package/dist/bench/stats.d.ts.map +1 -0
  42. package/dist/bench/stats.js +74 -0
  43. package/dist/bench/stats.js.map +1 -0
  44. package/dist/bench/suite.d.ts +16 -0
  45. package/dist/bench/suite.d.ts.map +1 -0
  46. package/dist/bench/suite.js +59 -0
  47. package/dist/bench/suite.js.map +1 -0
  48. package/dist/bench/types.d.ts +135 -0
  49. package/dist/bench/types.d.ts.map +1 -0
  50. package/dist/bench/types.js +16 -0
  51. package/dist/bench/types.js.map +1 -0
  52. package/dist/cli.d.ts +3 -0
  53. package/dist/cli.d.ts.map +1 -0
  54. package/dist/cli.js +125 -0
  55. package/dist/cli.js.map +1 -0
  56. package/dist/evolve.d.ts +11 -0
  57. package/dist/evolve.d.ts.map +1 -0
  58. package/dist/evolve.js +129 -0
  59. package/dist/evolve.js.map +1 -0
  60. package/dist/generator.d.ts +9 -0
  61. package/dist/generator.d.ts.map +1 -0
  62. package/dist/generator.js +46 -0
  63. package/dist/generator.js.map +1 -0
  64. package/dist/index.d.ts +12 -0
  65. package/dist/index.d.ts.map +1 -0
  66. package/dist/index.js +37 -0
  67. package/dist/index.js.map +1 -0
  68. package/dist/mutator.d.ts +61 -0
  69. package/dist/mutator.d.ts.map +1 -0
  70. package/dist/mutator.js +193 -0
  71. package/dist/mutator.js.map +1 -0
  72. package/dist/openrouter-mutator.d.ts +32 -0
  73. package/dist/openrouter-mutator.d.ts.map +1 -0
  74. package/dist/openrouter-mutator.js +81 -0
  75. package/dist/openrouter-mutator.js.map +1 -0
  76. package/dist/repo_profiler.d.ts +8 -0
  77. package/dist/repo_profiler.d.ts.map +1 -0
  78. package/dist/repo_profiler.js +127 -0
  79. package/dist/repo_profiler.js.map +1 -0
  80. package/dist/safety.d.ts +45 -0
  81. package/dist/safety.d.ts.map +1 -0
  82. package/dist/safety.js +191 -0
  83. package/dist/safety.js.map +1 -0
  84. package/dist/sandbox.d.ts +24 -0
  85. package/dist/sandbox.d.ts.map +1 -0
  86. package/dist/sandbox.js +153 -0
  87. package/dist/sandbox.js.map +1 -0
  88. package/dist/scorer.d.ts +26 -0
  89. package/dist/scorer.d.ts.map +1 -0
  90. package/dist/scorer.js +168 -0
  91. package/dist/scorer.js.map +1 -0
  92. package/dist/templates.d.ts +37 -0
  93. package/dist/templates.d.ts.map +1 -0
  94. package/dist/templates.js +309 -0
  95. package/dist/templates.js.map +1 -0
  96. package/dist/types.d.ts +123 -0
  97. package/dist/types.d.ts.map +1 -0
  98. package/dist/types.js +13 -0
  99. package/dist/types.js.map +1 -0
  100. package/package.json +57 -0
@@ -0,0 +1,309 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Mutation-surface templates (ADR-071) — seven pure functions, one per surface,
4
+ // each returning the SOURCE TEXT of a baseline mutation-surface file.
5
+ //
6
+ // LOAD-BEARING CONSTRAINT: every string returned here is later written to a
7
+ // variant directory and statically scanned by `inspectVariant` /
8
+ // `validateGeneratedCode`. The emitted source therefore contains ONLY pure,
9
+ // side-effect-free policy logic that operates on its arguments — no process,
10
+ // network, filesystem, dynamic-eval, shell, or sensitive-material references.
11
+ // The policies are expressed over symbolic data injected at call time, never
12
+ // over embedded literals.
13
+ /**
14
+ * planner.ts — turns a task string into an ordered list of plan steps. The
15
+ * baseline plan is a generic map → inspect → patch → verify loop, with the
16
+ * repository summary baked in as data for downstream context.
17
+ */
18
+ export function plannerTemplate(profile) {
19
+ return `// SPDX-License-Identifier: MIT
20
+ //
21
+ // planner — mutation surface "planner" (ADR-071). Pure policy: task -> steps.
22
+
23
+ /** One ordered step in a plan. */
24
+ export interface PlanStep {
25
+ /** Stable ordering index, 0-based. */
26
+ order: number;
27
+ /** Short symbolic kind of work this step performs. */
28
+ kind: 'map' | 'inspect' | 'patch' | 'verify';
29
+ /** Human-readable description of the step. */
30
+ description: string;
31
+ }
32
+
33
+ /** A one-line summary of the repository this harness was generated for. */
34
+ export const repoSummary = ${JSON.stringify(profile.summary)};
35
+
36
+ /**
37
+ * Build an ordered plan for a task. The baseline strategy is deliberately
38
+ * conservative: locate the relevant files, inspect the existing tests, apply a
39
+ * minimal patch, then verify by running the test command.
40
+ */
41
+ export function createPlan(task: string): PlanStep[] {
42
+ const trimmed = task.trim();
43
+ const label = trimmed.length > 0 ? trimmed : 'the requested change';
44
+ return [
45
+ { order: 0, kind: 'map', description: \`Map files relevant to: \${label}\` },
46
+ { order: 1, kind: 'inspect', description: 'Inspect existing tests and surrounding code' },
47
+ { order: 2, kind: 'patch', description: 'Apply the smallest patch that satisfies the task' },
48
+ { order: 3, kind: 'verify', description: 'Verify by running the project test command' },
49
+ ];
50
+ }
51
+ `;
52
+ }
53
+ /**
54
+ * context_builder.ts — ranks candidate files by lexical overlap with the task
55
+ * terms and returns the top slice as context items.
56
+ */
57
+ export function contextBuilderTemplate() {
58
+ return `// SPDX-License-Identifier: MIT
59
+ //
60
+ // context builder — mutation surface "contextBuilder" (ADR-071). Pure policy:
61
+ // rank candidate files by overlap with the task's terms.
62
+
63
+ /** A ranked piece of context offered to the worker. */
64
+ export interface ContextItem {
65
+ /** Relative path of the file. */
66
+ path: string;
67
+ /** Overlap score (count of shared terms). Higher is more relevant. */
68
+ score: number;
69
+ }
70
+
71
+ /** Split a string into lowercased alphanumeric terms of length >= 2. */
72
+ function terms(text: string): string[] {
73
+ return text
74
+ .toLowerCase()
75
+ .split(/[^a-z0-9]+/)
76
+ .filter((t) => t.length >= 2);
77
+ }
78
+
79
+ /**
80
+ * Rank \`files\` by how many task terms appear in each file path, returning the
81
+ * top 30 items in descending relevance. Ties keep the original path order.
82
+ */
83
+ export function buildContext(task: string, files: string[]): ContextItem[] {
84
+ const wanted = new Set(terms(task));
85
+ const scored = files.map((path, index) => {
86
+ const pathTerms = terms(path);
87
+ let score = 0;
88
+ for (const t of pathTerms) if (wanted.has(t)) score += 1;
89
+ return { path, score, index };
90
+ });
91
+ scored.sort((a, b) => (b.score - a.score) || (a.index - b.index));
92
+ return scored.slice(0, 30).map(({ path, score }) => ({ path, score }));
93
+ }
94
+ `;
95
+ }
96
+ /**
97
+ * reviewer.ts — flags changed files that intersect an injected risk-file list
98
+ * and escalates severity when tests have failed. No inline pattern matching on
99
+ * sensitive words; the risk set is passed in as data.
100
+ */
101
+ export function reviewerTemplate() {
102
+ return `// SPDX-License-Identifier: MIT
103
+ //
104
+ // reviewer — mutation surface "reviewer" (ADR-071). Pure policy: judge a patch
105
+ // against an injected risk-file list and the test outcome.
106
+
107
+ /** A single review finding for one changed file. */
108
+ export interface ReviewFinding {
109
+ /** The changed file the finding refers to. */
110
+ file: string;
111
+ /** Severity of the finding. */
112
+ severity: 'blocker' | 'warning' | 'info';
113
+ /** Why the finding was raised. */
114
+ reason: string;
115
+ }
116
+
117
+ /**
118
+ * Review a patch. A changed file is flagged when it appears in the injected
119
+ * \`riskFiles\` list (the reviewer does not itself decide what is risky — that
120
+ * judgement is supplied as data). When the test suite failed, findings are
121
+ * escalated to 'blocker'; otherwise risk-file edits are 'warning' and all other
122
+ * changes are 'info'.
123
+ */
124
+ export function reviewPatch(
125
+ changedFiles: string[],
126
+ testPassed: boolean,
127
+ riskFiles: string[],
128
+ ): ReviewFinding[] {
129
+ const risky = new Set(riskFiles);
130
+ const findings: ReviewFinding[] = [];
131
+ for (const file of changedFiles) {
132
+ const isRisky = risky.has(file);
133
+ if (!testPassed) {
134
+ findings.push({
135
+ file,
136
+ severity: 'blocker',
137
+ reason: isRisky
138
+ ? 'tests failed and the change touches a protected file'
139
+ : 'tests failed for this change',
140
+ });
141
+ } else if (isRisky) {
142
+ findings.push({
143
+ file,
144
+ severity: 'warning',
145
+ reason: 'change touches a protected file',
146
+ });
147
+ } else {
148
+ findings.push({ file, severity: 'info', reason: 'routine change' });
149
+ }
150
+ }
151
+ return findings;
152
+ }
153
+ `;
154
+ }
155
+ /**
156
+ * retry_policy.ts — decides whether to retry an attempt based on a symbolic
157
+ * failure classification (an injected enum), never by scanning raw output.
158
+ */
159
+ export function retryPolicyTemplate() {
160
+ return `// SPDX-License-Identifier: MIT
161
+ //
162
+ // retry policy — mutation surface "retryPolicy" (ADR-071). Pure policy: decide
163
+ // whether to retry based on an injected, symbolic failure classification.
164
+
165
+ /** Symbolic classification of a failed attempt, supplied by the caller. */
166
+ export type FailureClassification = 'transient' | 'repairable' | 'unknown';
167
+
168
+ /** The decision about whether and how to retry. */
169
+ export interface RetryDecision {
170
+ /** Whether another attempt should be made. */
171
+ retry: boolean;
172
+ /** Backoff to wait before the next attempt, in milliseconds. */
173
+ backoffMs: number;
174
+ /** Why the decision was made. */
175
+ reason: string;
176
+ }
177
+
178
+ /** Maximum number of attempts the baseline policy will allow. */
179
+ export const maxAttempts = 3;
180
+
181
+ /**
182
+ * Decide whether to retry. Transient failures (e.g. a timeout) are retried with
183
+ * exponential backoff; repairable failures (e.g. a type error a patch can fix)
184
+ * get one immediate retry; unknown failures are not retried. The classification
185
+ * is injected — the policy never inspects raw process output.
186
+ */
187
+ export function decideRetry(
188
+ attempt: number,
189
+ classification: FailureClassification,
190
+ ): RetryDecision {
191
+ if (attempt >= maxAttempts) {
192
+ return { retry: false, backoffMs: 0, reason: 'attempt budget exhausted' };
193
+ }
194
+ switch (classification) {
195
+ case 'transient':
196
+ return {
197
+ retry: true,
198
+ backoffMs: 250 * 2 ** attempt,
199
+ reason: 'transient failure — retry with backoff',
200
+ };
201
+ case 'repairable':
202
+ return { retry: true, backoffMs: 0, reason: 'repairable failure — retry once' };
203
+ case 'unknown':
204
+ default:
205
+ return { retry: false, backoffMs: 0, reason: 'unknown failure — do not retry' };
206
+ }
207
+ }
208
+ `;
209
+ }
210
+ /**
211
+ * tool_policy.ts — expresses the tool policy over symbolic command kinds, with
212
+ * an allow-list and a deterministic ordering. No raw shell strings appear.
213
+ */
214
+ export function toolPolicyTemplate() {
215
+ return `// SPDX-License-Identifier: MIT
216
+ //
217
+ // tool policy — mutation surface "toolPolicy" (ADR-071). Pure policy over
218
+ // SYMBOLIC command kinds. No raw shell strings are embedded.
219
+
220
+ /** The symbolic kinds of command the harness may schedule. */
221
+ export type CommandKind = 'test' | 'build' | 'lint';
222
+
223
+ /** The allow-listed kinds, in canonical order. */
224
+ export const allowedKinds: CommandKind[] = ['test', 'build', 'lint'];
225
+
226
+ /** True iff the given kind is permitted. */
227
+ export function isKindAllowed(k: CommandKind): boolean {
228
+ return allowedKinds.includes(k);
229
+ }
230
+
231
+ /** Preferred execution order: lint first (cheap), then build, then test. */
232
+ const ORDER: Record<CommandKind, number> = { lint: 0, build: 1, test: 2 };
233
+
234
+ /**
235
+ * Order a set of requested kinds deterministically (cheapest-first), dropping
236
+ * any kind that is not allow-listed.
237
+ */
238
+ export function orderKinds(kinds: CommandKind[]): CommandKind[] {
239
+ return kinds
240
+ .filter(isKindAllowed)
241
+ .slice()
242
+ .sort((a, b) => ORDER[a] - ORDER[b]);
243
+ }
244
+ `;
245
+ }
246
+ /**
247
+ * memory_policy.ts — decides whether an outcome record is worth remembering.
248
+ */
249
+ export function memoryPolicyTemplate() {
250
+ return `// SPDX-License-Identifier: MIT
251
+ //
252
+ // memory policy — mutation surface "memoryPolicy" (ADR-071). Pure policy:
253
+ // decide whether an outcome record is worth keeping.
254
+
255
+ /** A record of one outcome the harness might choose to remember. */
256
+ export interface MemoryRecord {
257
+ /** Whether the associated task ultimately succeeded. */
258
+ success: boolean;
259
+ /** How many attempts the task took. */
260
+ attempts: number;
261
+ /** Whether this outcome was novel relative to what is already known. */
262
+ novel: boolean;
263
+ }
264
+
265
+ /**
266
+ * Decide whether to remember a record. The baseline keeps anything novel, plus
267
+ * any failure that took more than one attempt (a hard case worth recalling).
268
+ */
269
+ export function shouldRemember(record: MemoryRecord): boolean {
270
+ if (record.novel) return true;
271
+ if (!record.success && record.attempts > 1) return true;
272
+ return false;
273
+ }
274
+ `;
275
+ }
276
+ /**
277
+ * score_policy.ts — the weight vector folded over the positive scoring terms.
278
+ */
279
+ export function scorePolicyTemplate() {
280
+ return `// SPDX-License-Identifier: MIT
281
+ //
282
+ // score policy — mutation surface "scorePolicy" (ADR-071). Pure policy: the
283
+ // weight vector applied to the positive scoring terms (ADR-072). Weights are
284
+ // non-negative and sum to 1.
285
+
286
+ /** The weights applied to each positive scoring term. */
287
+ export interface ScoreWeights {
288
+ taskSuccess: number;
289
+ testPassRate: number;
290
+ traceQuality: number;
291
+ costEfficiency: number;
292
+ latencyEfficiency: number;
293
+ safetyScore: number;
294
+ }
295
+
296
+ /** The baseline weight vector (sums to 1). */
297
+ export function scoreWeights(): ScoreWeights {
298
+ return {
299
+ taskSuccess: 0.35,
300
+ testPassRate: 0.2,
301
+ traceQuality: 0.15,
302
+ costEfficiency: 0.1,
303
+ latencyEfficiency: 0.1,
304
+ safetyScore: 0.1,
305
+ };
306
+ }
307
+ `;
308
+ }
309
+ //# sourceMappingURL=templates.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"templates.js","sourceRoot":"","sources":["../src/templates.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,gFAAgF;AAChF,sEAAsE;AACtE,EAAE;AACF,4EAA4E;AAC5E,iEAAiE;AACjE,4EAA4E;AAC5E,6EAA6E;AAC7E,8EAA8E;AAC9E,6EAA6E;AAC7E,0BAA0B;AAI1B;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,OAAoB;IAClD,OAAO;;;;;;;;;;;;;;;6BAeoB,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC;;;;;;;;;;;;;;;;;CAiB3D,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAoCR,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,gBAAgB;IAC9B,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmDR,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgDR,CAAC;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA6BR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB;IAClC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;CAwBR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;CA2BR,CAAC;AACF,CAAC"}
@@ -0,0 +1,123 @@
1
+ /**
2
+ * The seven — and only seven — files a child variant may mutate (ADR-071). One
3
+ * concern each. The authoritative filename mapping lives in `safety.ts`
4
+ * (`FILE_BY_SURFACE` / `APPROVED_FILES`); this union is the symbolic handle.
5
+ */
6
+ export type MutationSurface = 'planner' | 'contextBuilder' | 'reviewer' | 'retryPolicy' | 'toolPolicy' | 'memoryPolicy' | 'scorePolicy';
7
+ /** A repo distilled to the signals Darwin Mode needs (ADR-070 §profile). */
8
+ export interface RepoProfile {
9
+ /** Absolute path to the repo root. */
10
+ root: string;
11
+ packageManager: 'npm' | 'pnpm' | 'yarn' | 'unknown';
12
+ /** The command the sandbox runs to score a variant, e.g. "npm test". */
13
+ testCommand: string;
14
+ /** Source files discovered (relative to root), used by the context builder. */
15
+ sourceFiles: string[];
16
+ /** Files matching risk patterns (.env / secret / deploy / …) — never written. */
17
+ riskFiles: string[];
18
+ /** A short, human-readable one-line summary. */
19
+ summary: string;
20
+ }
21
+ /** A single harness variant: a directory of approved mutation-surface files. */
22
+ export interface HarnessVariant {
23
+ /** Stable id; "baseline" for the root, else `g<gen>_v<index>_<rand>`. */
24
+ id: string;
25
+ /** Parent variant id, or null for the baseline. */
26
+ parentId: string | null;
27
+ /** Generation number (0 = baseline). */
28
+ generation: number;
29
+ /** Absolute path to this variant's directory under workRoot/variants. */
30
+ dir: string;
31
+ /** Which surface this variant mutated relative to its parent. */
32
+ mutationSurface: MutationSurface;
33
+ /** One-line description of the mutation. */
34
+ mutationSummary: string;
35
+ /** ISO timestamp of creation. */
36
+ createdAt: string;
37
+ }
38
+ /** The result of running one variant against one task in the sandbox. */
39
+ export interface RunTrace {
40
+ variantId: string;
41
+ taskId: string;
42
+ startedAt: string;
43
+ finishedAt: string;
44
+ /** Process exit code. The reserved value 99 means "disqualified by safety". */
45
+ exitCode: number;
46
+ stdout: string;
47
+ stderr: string;
48
+ durationMs: number;
49
+ /** Whether the run hit its wall-clock timeout (drives the tool-loop penalty). */
50
+ timedOut: boolean;
51
+ /** Findings from the pre-execution safety inspection (empty ⇒ clean). */
52
+ blockedActions: string[];
53
+ }
54
+ /** A fully-scored variant (ADR-072). All terms and penalties are in [0,1]. */
55
+ export interface ScoreCard {
56
+ variantId: string;
57
+ taskSuccess: number;
58
+ testPassRate: number;
59
+ traceQuality: number;
60
+ costEfficiency: number;
61
+ latencyEfficiency: number;
62
+ safetyScore: number;
63
+ secretExposure: number;
64
+ destructiveAction: number;
65
+ hallucinatedFile: number;
66
+ toolLoop: number;
67
+ costOverrun: number;
68
+ /** Weighted base score before penalties (0..1). */
69
+ baseScore: number;
70
+ /** baseScore minus the penalty layer (may be negative). */
71
+ finalScore: number;
72
+ /** True iff the strict promotion gate (ADR-072) holds vs. the parent. */
73
+ promoted: boolean;
74
+ reason: string;
75
+ }
76
+ /** One node in the archive tree (ADR-073). */
77
+ export interface ArchiveRecord {
78
+ variant: HarnessVariant;
79
+ /** null until evaluated. */
80
+ score: ScoreCard | null;
81
+ /** Child variant ids (the tree edges). */
82
+ children: string[];
83
+ }
84
+ /** Configuration for a full `evolve` run. */
85
+ export interface EvolutionConfig {
86
+ /** Absolute path to the repo to evolve. */
87
+ repoRoot: string;
88
+ /** Absolute path to the `.metaharness` work tree. */
89
+ workRoot: string;
90
+ /** Number of generations to run. */
91
+ generations: number;
92
+ /** Children produced per parent per generation. */
93
+ childrenPerGeneration: number;
94
+ /** The fixed task ids each variant is scored on (the child cannot edit these). */
95
+ tasks: string[];
96
+ /** A child must beat its parent's finalScore by at least this margin (ADR-072). */
97
+ promotionDelta: number;
98
+ /** Max variants evaluated concurrently (bounded resource use). Default 4. */
99
+ concurrency?: number;
100
+ /** Per-variant test-command wall-clock budget in ms. Default 120000. */
101
+ taskTimeoutMs?: number;
102
+ /** Per-generation cost-proxy budget (cumulative variant-seconds). Optional breaker. */
103
+ costBudgetSeconds?: number;
104
+ /** Deterministic seed for mutation selection (reproducibility). Default 0. */
105
+ seed?: number;
106
+ /**
107
+ * Pluggable code generator (ADR-071). Default is the DeterministicMutator;
108
+ * pass an LLM-backed one (e.g. OpenRouterMutator) to evolve via a model — it
109
+ * still passes the same validateGeneratedCode safety gate.
110
+ */
111
+ generator?: import('./mutator.js').CodeGenerator;
112
+ }
113
+ /** The outcome of an `evolve` run. */
114
+ export interface EvolutionResult {
115
+ baseline: ArchiveRecord;
116
+ winner: ArchiveRecord | null;
117
+ /** Every record in the archive, in insertion order. */
118
+ records: ArchiveRecord[];
119
+ generations: number;
120
+ /** Lineage of the winner, baseline → … → winner (ids). */
121
+ winnerLineage: string[];
122
+ }
123
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAYA;;;;GAIG;AACH,MAAM,MAAM,eAAe,GACvB,SAAS,GACT,gBAAgB,GAChB,UAAU,GACV,aAAa,GACb,YAAY,GACZ,cAAc,GACd,aAAa,CAAC;AAElB,4EAA4E;AAC5E,MAAM,WAAW,WAAW;IAC1B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAC;IACb,cAAc,EAAE,KAAK,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;IACpD,wEAAwE;IACxE,WAAW,EAAE,MAAM,CAAC;IACpB,+EAA+E;IAC/E,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,iFAAiF;IACjF,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,gDAAgD;IAChD,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,gFAAgF;AAChF,MAAM,WAAW,cAAc;IAC7B,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;IACX,mDAAmD;IACnD,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,GAAG,EAAE,MAAM,CAAC;IACZ,iEAAiE;IACjE,eAAe,EAAE,eAAe,CAAC;IACjC,4CAA4C;IAC5C,eAAe,EAAE,MAAM,CAAC;IACxB,iCAAiC;IACjC,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,yEAAyE;AACzE,MAAM,WAAW,QAAQ;IACvB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,+EAA+E;IAC/E,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,iFAAiF;IACjF,QAAQ,EAAE,OAAO,CAAC;IAClB,yEAAyE;IACzE,cAAc,EAAE,MAAM,EAAE,CAAC;CAC1B;AAED,8EAA8E;AAC9E,MAAM,WAAW,SAAS;IACxB,SAAS,EAAE,MAAM,CAAC;IAElB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IAEpB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IAEpB,mDAAmD;IACnD,SAAS,EAAE,MAAM,CAAC;IAClB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;IACnB,yEAAyE;IACzE,QAAQ,EAAE,OAAO,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,8CAA8C;AAC9C,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,cAAc,CAAC;IACxB,4BAA4B;IAC5B,KAAK,EAAE,SAAS,GAAG,IAAI,CAAC;IACxB,0CAA0C;IAC1C,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,6CAA6C;AAC7C,MAAM,WAAW,eAAe;IAC9B,2CAA2C;IAC3C,QAAQ,EAAE,MAAM,CAAC;IACjB,qDAAqD;IACrD,QAAQ,EAAE,MAAM,CAAC;IACjB,oCAAoC;IACpC,WAAW,EAAE,MAAM,CAAC;IACpB,mDAAmD;IACnD,qBAAqB,EAAE,MAAM,CAAC;IAC9B,kFAAkF;IAClF,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,mFAAmF;IACnF,cAAc,EAAE,MAAM,CAAC;IACvB,6EAA6E;IAC7E,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wEAAwE;IACxE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uFAAuF;IACvF,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,8EAA8E;IAC9E,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,cAAc,EAAE,aAAa,CAAC;CAClD;AAED,sCAAsC;AACtC,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,aAAa,CAAC;IACxB,MAAM,EAAE,aAAa,GAAG,IAAI,CAAC;IAC7B,uDAAuD;IACvD,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,0DAA0D;IAC1D,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB"}
package/dist/types.js ADDED
@@ -0,0 +1,13 @@
1
+ // SPDX-License-Identifier: MIT
2
+ //
3
+ // Darwin Mode — shared types (the integration contract).
4
+ //
5
+ // Every module in this package codes against these interfaces. They are the
6
+ // load-bearing contract: the profiler produces a RepoProfile; the generator and
7
+ // mutator produce HarnessVariants; the sandbox produces RunTraces; the scorer
8
+ // folds traces into a ScoreCard; the archive stores ArchiveRecords as a tree.
9
+ //
10
+ // See ADR-070 (loop), ADR-071 (mutation surfaces), ADR-072 (scoring),
11
+ // ADR-073 (archive), ADR-075 (acceptance).
12
+ export {};
13
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,+BAA+B;AAC/B,EAAE;AACF,yDAAyD;AACzD,EAAE;AACF,4EAA4E;AAC5E,gFAAgF;AAChF,8EAA8E;AAC9E,8EAA8E;AAC9E,EAAE;AACF,sEAAsE;AACtE,2CAA2C"}
package/package.json ADDED
@@ -0,0 +1,57 @@
1
+ {
2
+ "name": "@metaharness/darwin",
3
+ "version": "0.1.0",
4
+ "description": "Darwin Mode (ADR-070…075) — bounded, empirical, population-based self-improvement of an agent harness. Generate child harness variants, sandbox-score them, archive the lineage, and promote only measured, safe wins. The model is frozen; the harness evolves. Dependency-free (Node built-ins only).",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "bin": {
9
+ "metaharness-darwin": "./dist/cli.js"
10
+ },
11
+ "exports": {
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "import": "./dist/index.js"
15
+ }
16
+ },
17
+ "files": [
18
+ "dist/**",
19
+ "README.md",
20
+ "SECURITY.md",
21
+ "LICENSE"
22
+ ],
23
+ "scripts": {
24
+ "build": "tsc",
25
+ "test": "vitest run",
26
+ "lint": "tsc --noEmit"
27
+ },
28
+ "keywords": [
29
+ "agent-harness",
30
+ "darwin-mode",
31
+ "self-improvement",
32
+ "evolutionary-search",
33
+ "metaharness",
34
+ "dgm",
35
+ "archive",
36
+ "sandbox",
37
+ "benchmark"
38
+ ],
39
+ "author": "rUv <ruv@ruv.net>",
40
+ "license": "MIT",
41
+ "homepage": "https://github.com/ruvnet/agent-harness-generator",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "https://github.com/ruvnet/agent-harness-generator",
45
+ "directory": "packages/darwin-mode"
46
+ },
47
+ "engines": {
48
+ "node": ">=20.0.0"
49
+ },
50
+ "devDependencies": {
51
+ "typescript": "^5.4.0",
52
+ "vitest": "^2.0.0"
53
+ },
54
+ "publishConfig": {
55
+ "access": "public"
56
+ }
57
+ }