darwin-agents 0.4.9 → 0.5.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +85 -0
- package/dist/agents/analyst.d.ts +11 -0
- package/dist/agents/analyst.d.ts.map +1 -0
- package/dist/agents/analyst.js +78 -0
- package/dist/agents/analyst.js.map +1 -0
- package/dist/agents/blog-writer.d.ts +13 -0
- package/dist/agents/blog-writer.d.ts.map +1 -0
- package/dist/agents/blog-writer.js +59 -0
- package/dist/agents/blog-writer.js.map +1 -0
- package/dist/agents/critic.d.ts +11 -0
- package/dist/agents/critic.d.ts.map +1 -0
- package/dist/agents/critic.js +57 -0
- package/dist/agents/critic.js.map +1 -0
- package/dist/agents/index.d.ts +15 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +31 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/investigator-critic.d.ts +10 -0
- package/dist/agents/investigator-critic.d.ts.map +1 -0
- package/dist/agents/investigator-critic.js +78 -0
- package/dist/agents/investigator-critic.js.map +1 -0
- package/dist/agents/investigator.d.ts +13 -0
- package/dist/agents/investigator.d.ts.map +1 -0
- package/dist/agents/investigator.js +105 -0
- package/dist/agents/investigator.js.map +1 -0
- package/dist/agents/marketing.d.ts +13 -0
- package/dist/agents/marketing.d.ts.map +1 -0
- package/dist/agents/marketing.js +59 -0
- package/dist/agents/marketing.js.map +1 -0
- package/dist/agents/researcher.d.ts +11 -0
- package/dist/agents/researcher.d.ts.map +1 -0
- package/dist/agents/researcher.js +68 -0
- package/dist/agents/researcher.js.map +1 -0
- package/dist/agents/writer.d.ts +9 -0
- package/dist/agents/writer.d.ts.map +1 -0
- package/dist/agents/writer.js +47 -0
- package/dist/agents/writer.js.map +1 -0
- package/dist/cli/create.d.ts +11 -0
- package/dist/cli/create.d.ts.map +1 -0
- package/dist/cli/create.js +104 -0
- package/dist/cli/create.js.map +1 -0
- package/dist/cli/evolve.d.ts +13 -0
- package/dist/cli/evolve.d.ts.map +1 -0
- package/dist/cli/evolve.js +69 -0
- package/dist/cli/evolve.js.map +1 -0
- package/dist/cli/index.d.ts +13 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +84 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/init.d.ts +12 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +68 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/run.d.ts +7 -0
- package/dist/cli/run.d.ts.map +1 -0
- package/dist/cli/run.js +371 -0
- package/dist/cli/run.js.map +1 -0
- package/dist/cli/status.d.ts +7 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +123 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/core/agent.d.ts +53 -0
- package/dist/core/agent.d.ts.map +1 -0
- package/dist/core/agent.js +172 -0
- package/dist/core/agent.js.map +1 -0
- package/dist/core/runner.d.ts +75 -0
- package/dist/core/runner.d.ts.map +1 -0
- package/dist/core/runner.js +255 -0
- package/dist/core/runner.js.map +1 -0
- package/dist/evolution/loop.d.ts +100 -0
- package/dist/evolution/loop.d.ts.map +1 -0
- package/dist/evolution/loop.js +424 -0
- package/dist/evolution/loop.js.map +1 -0
- package/dist/evolution/multi-critic.d.ts +58 -0
- package/dist/evolution/multi-critic.d.ts.map +1 -0
- package/dist/evolution/multi-critic.js +326 -0
- package/dist/evolution/multi-critic.js.map +1 -0
- package/dist/evolution/notifications.d.ts +32 -0
- package/dist/evolution/notifications.d.ts.map +1 -0
- package/dist/evolution/notifications.js +92 -0
- package/dist/evolution/notifications.js.map +1 -0
- package/dist/evolution/optimizer.d.ts +64 -0
- package/dist/evolution/optimizer.d.ts.map +1 -0
- package/dist/evolution/optimizer.js +223 -0
- package/dist/evolution/optimizer.js.map +1 -0
- package/dist/evolution/patterns.d.ts +63 -0
- package/dist/evolution/patterns.d.ts.map +1 -0
- package/dist/evolution/patterns.js +297 -0
- package/dist/evolution/patterns.js.map +1 -0
- package/dist/evolution/safety.d.ts +76 -0
- package/dist/evolution/safety.d.ts.map +1 -0
- package/dist/evolution/safety.js +182 -0
- package/dist/evolution/safety.js.map +1 -0
- package/dist/evolution/tracker.d.ts +48 -0
- package/dist/evolution/tracker.d.ts.map +1 -0
- package/dist/evolution/tracker.js +163 -0
- package/dist/evolution/tracker.js.map +1 -0
- package/dist/index.d.ts +32 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +35 -0
- package/dist/index.js.map +1 -0
- package/dist/memory/index.d.ts +32 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +49 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/postgres-memory.d.ts +52 -0
- package/dist/memory/postgres-memory.d.ts.map +1 -0
- package/dist/memory/postgres-memory.js +515 -0
- package/dist/memory/postgres-memory.js.map +1 -0
- package/dist/memory/sqlite-memory.d.ts +36 -0
- package/dist/memory/sqlite-memory.d.ts.map +1 -0
- package/dist/memory/sqlite-memory.js +380 -0
- package/dist/memory/sqlite-memory.js.map +1 -0
- package/dist/providers/anthropic.d.ts +20 -0
- package/dist/providers/anthropic.d.ts.map +1 -0
- package/dist/providers/anthropic.js +82 -0
- package/dist/providers/anthropic.js.map +1 -0
- package/dist/providers/claude-cli.d.ts +35 -0
- package/dist/providers/claude-cli.d.ts.map +1 -0
- package/dist/providers/claude-cli.js +153 -0
- package/dist/providers/claude-cli.js.map +1 -0
- package/dist/providers/index.d.ts +39 -0
- package/dist/providers/index.d.ts.map +1 -0
- package/dist/providers/index.js +58 -0
- package/dist/providers/index.js.map +1 -0
- package/dist/providers/ollama.d.ts +17 -0
- package/dist/providers/ollama.d.ts.map +1 -0
- package/dist/providers/ollama.js +64 -0
- package/dist/providers/ollama.js.map +1 -0
- package/dist/providers/openai.d.ts +19 -0
- package/dist/providers/openai.d.ts.map +1 -0
- package/dist/providers/openai.js +75 -0
- package/dist/providers/openai.js.map +1 -0
- package/dist/providers/types.d.ts +62 -0
- package/dist/providers/types.d.ts.map +1 -0
- package/dist/providers/types.js +9 -0
- package/dist/providers/types.js.map +1 -0
- package/dist/src/core/trace-capture.d.ts +107 -0
- package/dist/src/core/trace-capture.d.ts.map +1 -0
- package/dist/src/core/trace-capture.js +183 -0
- package/dist/src/core/trace-capture.js.map +1 -0
- package/dist/src/evolution/optimizer-gepa.d.ts +149 -0
- package/dist/src/evolution/optimizer-gepa.d.ts.map +1 -0
- package/dist/src/evolution/optimizer-gepa.js +198 -0
- package/dist/src/evolution/optimizer-gepa.js.map +1 -0
- package/dist/src/evolution/pareto.d.ts +116 -0
- package/dist/src/evolution/pareto.d.ts.map +1 -0
- package/dist/src/evolution/pareto.js +140 -0
- package/dist/src/evolution/pareto.js.map +1 -0
- package/dist/src/evolution/reflector.d.ts +107 -0
- package/dist/src/evolution/reflector.d.ts.map +1 -0
- package/dist/src/evolution/reflector.js +158 -0
- package/dist/src/evolution/reflector.js.map +1 -0
- package/dist/src/evolution/run-prompt-fn.d.ts +11 -0
- package/dist/src/evolution/run-prompt-fn.d.ts.map +1 -0
- package/dist/src/evolution/run-prompt-fn.js +11 -0
- package/dist/src/evolution/run-prompt-fn.js.map +1 -0
- package/dist/src/index.d.ts +7 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +6 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/memory/postgres-memory.d.ts.map +1 -1
- package/dist/src/memory/postgres-memory.js +38 -3
- package/dist/src/memory/postgres-memory.js.map +1 -1
- package/dist/src/memory/sqlite-memory.d.ts.map +1 -1
- package/dist/src/memory/sqlite-memory.js +47 -2
- package/dist/src/memory/sqlite-memory.js.map +1 -1
- package/dist/src/types.d.ts +136 -0
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/types.js.map +1 -1
- package/dist/types.d.ts +221 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +19 -0
- package/dist/types.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace-capture.d.ts","sourceRoot":"","sources":["../../../src/core/trace-capture.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,KAAK,EACV,cAAc,EAEd,eAAe,EAEhB,MAAM,aAAa,CAAC;AAuBrB,MAAM,WAAW,YAAY;IAC3B;;;;;OAKG;IACH,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC;IAC9E;;;;OAIG;IACH,gBAAgB,CACd,EAAE,EAAE,MAAM,EACV,OAAO,EAAE,SAAS,GAAG,OAAO,EAC5B,IAAI,CAAC,EAAE;QACL,aAAa,CAAC,EAAE,MAAM,CAAC;QACvB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,GACA,IAAI,CAAC;IACR,gFAAgF;IAChF,SAAS,IAAI,IAAI,CAAC;IAClB;;;;;;OAMG;IACH,eAAe,IAAI,IAAI,CAAC;IACxB,gFAAgF;IAChF,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;OAKG;IACH,SAAS,CAAC,KAAK,EAAE,eAAe,GAAG,IAAI,CAAC;IACxC;;;;;OAKG;IACH,QAAQ,IAAI,cAAc,CAAC;CAC5B;AAED,MAAM,WAAW,mBAAmB;IAClC;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IACnB;;;;OAIG;IACH,SAAS,CAAC,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,OAAO,CAAC;CAC3C;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,GAAE,mBAAwB,GAAG,YAAY,CAoI/E"}
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Darwin — Execution Trace Capture (v0.5 / A1)
|
|
3
|
+
*
|
|
4
|
+
* Pure, transport-agnostic capturer. Knows nothing about Anthropic SDK,
|
|
5
|
+
* Claude CLI, OpenAI, or any specific runtime — the runtime feeds events,
|
|
6
|
+
* the capturer aggregates into an ExecutionTrace.
|
|
7
|
+
*
|
|
8
|
+
* Three event types map to the three industry-standard span types
|
|
9
|
+
* (Braintrust + Langfuse + Strands SDK + OTEL GenAI 2026):
|
|
10
|
+
* - recordToolUse / recordToolResult → Tool spans
|
|
11
|
+
* - recordTextBlock → assistant prose counter (NOT thinking-block)
|
|
12
|
+
* - recordError → Turn-level errors
|
|
13
|
+
* - addTokens → aggregated LLM token usage
|
|
14
|
+
*
|
|
15
|
+
* Pairing rule: each `recordToolUse(id, ...)` MUST be paired with a later
|
|
16
|
+
* `recordToolResult(id, ...)` to compute durationMs. Unpaired uses get
|
|
17
|
+
* `durationMs > 0` (start-to-finalize) and `outcome = 'error'` at
|
|
18
|
+
* finalize-time with errorClass = 'unpaired_call' so we never silently
|
|
19
|
+
* drop hanging tools.
|
|
20
|
+
*
|
|
21
|
+
* Privacy + size: args are passed through as-is — caller's responsibility
|
|
22
|
+
* to truncate sensitive values. resultSummary is truncated to 2000 chars
|
|
23
|
+
* at recordToolResult-time. OTEL note: `gen_ai.tool.call.arguments` is
|
|
24
|
+
* Opt-In in the GenAI spec; Darwin's "always-capture args" stance is
|
|
25
|
+
* acceptable for internal use but MUST be documented when traces touch
|
|
26
|
+
* customer data (V2 will add a Redaction-Layer).
|
|
27
|
+
*
|
|
28
|
+
* Backwards-compat: capturer is opt-in. Existing code paths that don't
|
|
29
|
+
* instantiate it produce DarwinExperiment.trajectory === undefined and
|
|
30
|
+
* the rest of the system behaves identically to pre-A1.
|
|
31
|
+
*/
|
|
32
|
+
/**
|
|
33
|
+
* Result-summary cap. Tuned for GEPA-style reflective optimizers
|
|
34
|
+
* (Phase 2 A2): the reflector reads the summary as context for
|
|
35
|
+
* "why did this tool produce a bad outcome", so a too-tight cap
|
|
36
|
+
* truncates the diagnostic signal. 2000 chars holds a typical
|
|
37
|
+
* JSON-RPC tool response without bloating JSONB.
|
|
38
|
+
* Bumped from 500 → 2000 after R1 Research review (Gap 3).
|
|
39
|
+
*/
|
|
40
|
+
const MAX_RESULT_SUMMARY_CHARS = 2000;
|
|
41
|
+
const MAX_ERROR_MESSAGE_CHARS = 200;
|
|
42
|
+
/**
|
|
43
|
+
* Create a fresh trace capturer.
|
|
44
|
+
*
|
|
45
|
+
* Usage:
|
|
46
|
+
* ```ts
|
|
47
|
+
* const trace = createTraceCapture();
|
|
48
|
+
* trace.startTurn();
|
|
49
|
+
* trace.recordToolUse('call_1', 'mcp__nex__search', { query: 'foo' });
|
|
50
|
+
* trace.recordToolResult('call_1', 'success', { resultSummary: '3 hits' });
|
|
51
|
+
* trace.recordTextBlock();
|
|
52
|
+
* trace.addTokens({ inputTokens: 1200, outputTokens: 340 });
|
|
53
|
+
* const trajectory = trace.finalize();
|
|
54
|
+
* ```
|
|
55
|
+
*/
|
|
56
|
+
export function createTraceCapture(opts = {}) {
|
|
57
|
+
const now = opts.now ?? (() => Date.now());
|
|
58
|
+
const isMcpTool = opts.isMcpTool ?? ((name) => name.startsWith('mcp__'));
|
|
59
|
+
const capturedAt = new Date(now()).toISOString();
|
|
60
|
+
const pending = new Map();
|
|
61
|
+
const completed = [];
|
|
62
|
+
const errors = [];
|
|
63
|
+
let textBlockCount = 0;
|
|
64
|
+
let turnCount = 0;
|
|
65
|
+
// Token-usage aggregate. Kept as a sparse object — undefined fields
|
|
66
|
+
// stay undefined (vs 0) so consumers can distinguish "provider didn't
|
|
67
|
+
// report" from "actually zero tokens".
|
|
68
|
+
const tokenUsage = {};
|
|
69
|
+
function currentTurn() {
|
|
70
|
+
// 1-indexed. If no startTurn() was called yet, treat events as belonging
|
|
71
|
+
// to turn 1 (defensive — capturer should remain useful even if the
|
|
72
|
+
// runtime forgets to call startTurn).
|
|
73
|
+
return turnCount === 0 ? 1 : turnCount;
|
|
74
|
+
}
|
|
75
|
+
/** Additive merge of one usage record into the aggregate. */
|
|
76
|
+
function addToken(field, value) {
|
|
77
|
+
if (typeof value !== 'number' || !Number.isFinite(value))
|
|
78
|
+
return;
|
|
79
|
+
tokenUsage[field] = (tokenUsage[field] ?? 0) + value;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
recordToolUse(id, tool, args) {
|
|
83
|
+
// Defensive: if a duplicate id is emitted (rare SDK bug), overwrite
|
|
84
|
+
// — last-write-wins is preferable to crashing or silently dropping
|
|
85
|
+
// the new call.
|
|
86
|
+
pending.set(id, {
|
|
87
|
+
callId: id,
|
|
88
|
+
tool,
|
|
89
|
+
args,
|
|
90
|
+
startedAtMs: now(),
|
|
91
|
+
turn: currentTurn(),
|
|
92
|
+
isMcp: isMcpTool(tool),
|
|
93
|
+
});
|
|
94
|
+
},
|
|
95
|
+
recordToolResult(id, outcome, resultOpts) {
|
|
96
|
+
const start = pending.get(id);
|
|
97
|
+
if (!start) {
|
|
98
|
+
// No matching tool_use — silently drop (some SDKs emit tool_result
|
|
99
|
+
// for built-ins without a paired tool_use event).
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
pending.delete(id);
|
|
103
|
+
const call = {
|
|
104
|
+
id: start.callId,
|
|
105
|
+
tool: start.tool,
|
|
106
|
+
outcome,
|
|
107
|
+
durationMs: Math.max(0, now() - start.startedAtMs),
|
|
108
|
+
turn: start.turn,
|
|
109
|
+
};
|
|
110
|
+
if (start.args !== undefined) {
|
|
111
|
+
call.args = start.args;
|
|
112
|
+
}
|
|
113
|
+
if (resultOpts?.resultSummary !== undefined && resultOpts.resultSummary !== null) {
|
|
114
|
+
call.resultSummary = resultOpts.resultSummary.slice(0, MAX_RESULT_SUMMARY_CHARS);
|
|
115
|
+
}
|
|
116
|
+
if (outcome === 'error') {
|
|
117
|
+
if (resultOpts?.errorClass) {
|
|
118
|
+
call.errorClass = resultOpts.errorClass;
|
|
119
|
+
}
|
|
120
|
+
if (resultOpts?.errorMessage) {
|
|
121
|
+
call.errorMessage = resultOpts.errorMessage.slice(0, MAX_ERROR_MESSAGE_CHARS);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (resultOpts?.retryCount && resultOpts.retryCount > 0) {
|
|
125
|
+
call.retryCount = resultOpts.retryCount;
|
|
126
|
+
}
|
|
127
|
+
completed.push(call);
|
|
128
|
+
},
|
|
129
|
+
startTurn() {
|
|
130
|
+
turnCount++;
|
|
131
|
+
},
|
|
132
|
+
recordTextBlock() {
|
|
133
|
+
textBlockCount++;
|
|
134
|
+
},
|
|
135
|
+
recordError(class_, message) {
|
|
136
|
+
errors.push({
|
|
137
|
+
class: class_,
|
|
138
|
+
message: message.slice(0, MAX_ERROR_MESSAGE_CHARS),
|
|
139
|
+
turn: currentTurn(),
|
|
140
|
+
});
|
|
141
|
+
},
|
|
142
|
+
addTokens(usage) {
|
|
143
|
+
addToken('inputTokens', usage.inputTokens);
|
|
144
|
+
addToken('outputTokens', usage.outputTokens);
|
|
145
|
+
addToken('cacheReadTokens', usage.cacheReadTokens);
|
|
146
|
+
addToken('cacheCreationTokens', usage.cacheCreationTokens);
|
|
147
|
+
},
|
|
148
|
+
finalize() {
|
|
149
|
+
// Surface unpaired tool calls so silent hangs are visible in the trace.
|
|
150
|
+
// (E.g. SDK crashes mid-tool, child process dies, timeout escapes us.)
|
|
151
|
+
for (const [, pendingCall] of pending) {
|
|
152
|
+
completed.push({
|
|
153
|
+
id: pendingCall.callId,
|
|
154
|
+
tool: pendingCall.tool,
|
|
155
|
+
...(pendingCall.args !== undefined ? { args: pendingCall.args } : {}),
|
|
156
|
+
outcome: 'error',
|
|
157
|
+
durationMs: Math.max(0, now() - pendingCall.startedAtMs),
|
|
158
|
+
errorClass: 'unpaired_call',
|
|
159
|
+
errorMessage: 'tool_use without matching tool_result',
|
|
160
|
+
turn: pendingCall.turn,
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
pending.clear();
|
|
164
|
+
const mcpInvocations = completed.filter((c) => isMcpTool(c.tool)).length;
|
|
165
|
+
const trace = {
|
|
166
|
+
version: 1,
|
|
167
|
+
toolCalls: completed,
|
|
168
|
+
textBlockCount,
|
|
169
|
+
turnCount: Math.max(turnCount, 1),
|
|
170
|
+
mcpInvocations,
|
|
171
|
+
errors,
|
|
172
|
+
capturedAt,
|
|
173
|
+
};
|
|
174
|
+
// Only attach tokenUsage if at least one field was populated —
|
|
175
|
+
// keeps the JSONB lean for runs against providers without usage data.
|
|
176
|
+
if (Object.keys(tokenUsage).length > 0) {
|
|
177
|
+
trace.tokenUsage = { ...tokenUsage };
|
|
178
|
+
}
|
|
179
|
+
return trace;
|
|
180
|
+
},
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
//# sourceMappingURL=trace-capture.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"trace-capture.js","sourceRoot":"","sources":["../../../src/core/trace-capture.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AASH;;;;;;;GAOG;AACH,MAAM,wBAAwB,GAAG,IAAI,CAAC;AACtC,MAAM,uBAAuB,GAAG,GAAG,CAAC;AA6EpC;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAA4B,EAAE;IAC/D,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC;IAEjF,MAAM,UAAU,GAAG,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACjD,MAAM,OAAO,GAAG,IAAI,GAAG,EAA2B,CAAC;IACnD,MAAM,SAAS,GAAoB,EAAE,CAAC;IACtC,MAAM,MAAM,GAAqB,EAAE,CAAC;IACpC,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,oEAAoE;IACpE,sEAAsE;IACtE,uCAAuC;IACvC,MAAM,UAAU,GAAoB,EAAE,CAAC;IAEvC,SAAS,WAAW;QAClB,yEAAyE;QACzE,mEAAmE;QACnE,sCAAsC;QACtC,OAAO,SAAS,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IACzC,CAAC;IAED,6DAA6D;IAC7D,SAAS,QAAQ,CAAC,KAA4B,EAAE,KAAyB;QACvE,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC;YAAE,OAAO;QACjE,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC;IACvD,CAAC;IAED,OAAO;QACL,aAAa,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI;YAC1B,oEAAoE;YACpE,mEAAmE;YACnE,gBAAgB;YAChB,OAAO,CAAC,GAAG,CAAC,EAAE,EAAE;gBACd,MAAM,EAAE,EAAE;gBACV,IAAI;gBACJ,IAAI;gBACJ,WAAW,EAAE,GAAG,EAAE;gBAClB,IAAI,EAAE,WAAW,EAAE;gBACnB,KAAK,EAAE,SAAS,CAAC,IAAI,CAAC;aACvB,CAAC,CAAC;QACL,CAAC;QACD,gBAAgB,CAAC,EAAE,EAAE,OAAO,EAAE,UAAU;YACtC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC9B,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,mEAAmE;gBACnE,kDAAkD;gBAClD,OAAO;YACT,CAAC;YACD,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YACnB,MAAM,IAAI,GAAkB;gBAC1B,EAAE,EAAE,KAAK,CAAC,MAAM;gBAChB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,OAAO;gBACP,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC,WAAW,CAAC;gBAClD,IAAI,EAAE,KAAK,CAAC,IAAI;aACjB,CAAC;YACF,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC7B,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;YACzB,CAAC;YACD,IAAI,UAAU,EAAE,aAAa,KAAK,SAAS,IAAI,UAAU,CAAC,aAAa,KAAK,IAAI,EAAE,CAAC;gBACjF,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,wBAAwB,CAAC,CAAC;YACnF,CAAC;YACD,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;gBACxB,IAAI,UAAU,EAAE,UAAU,EAAE,CAAC;oBAC3B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC;gBAC1C,CAAC;gBACD,IAAI,UAAU,EAAE,YAAY,EAAE,CAAC;oBAC7B,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,uBAAuB,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC;YACD,IAAI,UAAU,EAAE,UAAU,IAAI,UAAU,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;gBACxD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC;YAC1C,CAAC;YACD,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;QACD,SAAS;YACP,SAAS,EAAE,CAAC;QACd,CAAC;QACD,eAAe;YACb,cAAc,EAAE,CAAC;QACnB,CAAC;QACD,WAAW,CAAC,MAAM,EAAE,OAAO;YACzB,MAAM,CAAC,IAAI,CAAC;gBACV,KAAK,EAAE,MAAM;gBACb,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,uBAAuB,CAAC;gBAClD,IAAI,EAAE,WAAW,EAAE;aACpB,CAAC,CAAC;QACL,CAAC;QACD,SAAS,CAAC,KAAK;YACb,QAAQ,CAAC,aAAa,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;YAC3C,QAAQ,CAAC,cAAc,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;YAC7C,QAAQ,CAAC,iBAAiB,EAAE,KAAK,CAAC,eAAe,CAAC,CAAC;YACnD,QAAQ,CAAC,qBAAqB,EAAE,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAC7D,CAAC;QACD,QAAQ;YACN,wEAAwE;YACxE,uEAAuE;YACvE,KAAK,MAAM,CAAC,EAAE,WAAW,CAAC,IAAI,OAAO,EAAE,CAAC;gBACtC,SAAS,CAAC,IAAI,CAAC;oBACb,EAAE,EAAE,WAAW,CAAC,MAAM;oBACtB,IAAI,EAAE,WAAW,CAAC,IAAI;oBACtB,GAAG,CAAC,WAAW,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBACrE,OAAO,EAAE,OAAO;oBAChB,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,GAAG,WAAW,CAAC,WAAW,CAAC;oBACxD,UAAU,EAAE,eAAe;oBAC3B,YAAY,EAAE,uCAAuC;oBACrD,IAAI,EAAE,WAAW,CAAC,IAAI;iBACvB,CAAC,CAAC;YACL,CAAC;YACD,OAAO,CAAC,KAAK,EAAE,CAAC;YAEhB,MAAM,cAAc,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YAEzE,MAAM,KAAK,GAAmB;gBAC5B,OAAO,EAAE,CAAC;gBACV,SAAS,EAAE,SAAS;gBACpB,cAAc;gBACd,SAAS,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC;gBACjC,cAAc;gBACd,MAAM;gBACN,UAAU;aACX,CAAC;YACF,+DAA+D;YAC/D,sEAAsE;YACtE,IAAI,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvC,KAAK,CAAC,UAAU,GAAG,EAAE,GAAG,UAAU,EAAE,CAAC;YACvC,CAAC;YACD,OAAO,KAAK,CAAC;QACf,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Darwin — GEPA-Style Reflective Optimizer (Phase 2 A2, S1185).
|
|
3
|
+
*
|
|
4
|
+
* Generation-loop wrapper around {@link Reflector} + {@link paretoSelect}
|
|
5
|
+
* that produces N variants (default 3) per cycle and carries the
|
|
6
|
+
* non-dominated set forward to the next generation. Opt-in via
|
|
7
|
+
* `useGepa: true` in the evolution loop config.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline per generation:
|
|
10
|
+
*
|
|
11
|
+
* 1. Take a parent variant (current prompt or last gen's winner).
|
|
12
|
+
* 2. Generate N candidate mutations by calling the reflector with
|
|
13
|
+
* different sliced feedback (forces variation while keeping each
|
|
14
|
+
* mutation principled — not stochastic-only).
|
|
15
|
+
* 3. Caller scores the candidates against their domain metric(s).
|
|
16
|
+
* 4. paretoSelect keeps the non-dominated set (≤ maxCarry).
|
|
17
|
+
* 5. Next generation reflects from the survivors.
|
|
18
|
+
*
|
|
19
|
+
* This file orchestrates only the GENERATION step. Scoring lives in
|
|
20
|
+
* the caller (loop.ts → evaluator) because Darwin's existing
|
|
21
|
+
* multi-critic pipeline is the authoritative source of variant scores.
|
|
22
|
+
*
|
|
23
|
+
* Why this design and not a full GEPA-engine port:
|
|
24
|
+
* - Keep the Python lib (`gepa-ai/gepa`) out of our peer-dep graph.
|
|
25
|
+
* - Reuse Darwin's existing multi-critic + safety + A/B machinery —
|
|
26
|
+
* GepaOptimizer is purely about variant GENERATION + Pareto SELECTION.
|
|
27
|
+
* - Smallest-possible-edit reflection is the part that empirically
|
|
28
|
+
* matters most (per GEPA papers); the multi-objective search is the
|
|
29
|
+
* other part. Together they cover the GEPA value-prop without a
|
|
30
|
+
* Python dependency.
|
|
31
|
+
*
|
|
32
|
+
* **Inspired by, not lifted from** the official GEPA Python library /
|
|
33
|
+
* arxiv 2507.19457 paper. Deliberate deviations (R1 Research Finding F1
|
|
34
|
+
* + F4 + F6, S1185):
|
|
35
|
+
* - **N variants per generate() call:** GEPA Algorithm 1 produces 1
|
|
36
|
+
* offspring per iteration. We produce N=3-5 per call via three
|
|
37
|
+
* `feedbackStrategy` modes (split / replicate / single). This is
|
|
38
|
+
* our structural adaptation to TS-callsite ergonomics — call us
|
|
39
|
+
* three times to recover the per-iteration GEPA budget.
|
|
40
|
+
* - **`feedbackStrategy: "split"` is OUR adaptation** — GEPA paper
|
|
41
|
+
* does not partition feedback across offspring. We do it to force
|
|
42
|
+
* mutation diversity in a single `Promise.all` batch.
|
|
43
|
+
* - **paretoSelect truncation uses scalarised tie-break** — GEPA
|
|
44
|
+
* Algorithm 2 samples Pareto candidates proportional to instance
|
|
45
|
+
* coverage. We use a simpler weighted-sum tie-break. V0.6 will add
|
|
46
|
+
* a `truncationStrategy` option for coverage-proportional +
|
|
47
|
+
* NSGA-II crowding-distance.
|
|
48
|
+
* - **GEPA+Merge (system-aware crossover from two Pareto-pool
|
|
49
|
+
* ancestors, paper Appendix F)** is NOT implemented. +5% reported
|
|
50
|
+
* lift in the paper. Backlog for V0.6.
|
|
51
|
+
* - **Instance-wise coverage sampling** (paper Algorithm 2) is NOT
|
|
52
|
+
* implemented. Backlog for V0.6.
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* import { GepaOptimizer, DARWIN_DEFAULT_OBJECTIVES } from "darwin-agents";
|
|
57
|
+
*
|
|
58
|
+
* const optimizer = new GepaOptimizer(myRunPromptFn);
|
|
59
|
+
*
|
|
60
|
+
* // Step 1: generate N variant mutations from feedback
|
|
61
|
+
* const variants = await optimizer.generate(currentPrompt, feedbacks, {
|
|
62
|
+
* numVariants: 3,
|
|
63
|
+
* });
|
|
64
|
+
*
|
|
65
|
+
* // Step 2: score each variant externally (via Darwin's multi-critic
|
|
66
|
+
* // pipeline or any other evaluator)
|
|
67
|
+
* const scored = await Promise.all(
|
|
68
|
+
* variants.map(async (v) => ({
|
|
69
|
+
* ...v,
|
|
70
|
+
* metrics: await scoreVariant(v.prompt),
|
|
71
|
+
* })),
|
|
72
|
+
* );
|
|
73
|
+
*
|
|
74
|
+
* // Step 3: Pareto-select survivors for the next generation
|
|
75
|
+
* const survivors = optimizer.nextGeneration(scored, {
|
|
76
|
+
* objectives: DARWIN_DEFAULT_OBJECTIVES,
|
|
77
|
+
* maxCarry: 3,
|
|
78
|
+
* });
|
|
79
|
+
* ```
|
|
80
|
+
*/
|
|
81
|
+
import type { ReflectiveFeedback, RunPromptFn } from "./reflector.js";
|
|
82
|
+
import { type ParetoObjective } from "./pareto.js";
|
|
83
|
+
/** One evaluated variant in a generation. */
|
|
84
|
+
export interface ScoredVariant {
|
|
85
|
+
/** Variant identifier (e.g. "v3-gen2-cand1"). */
|
|
86
|
+
id: string;
|
|
87
|
+
/** Mutated prompt text. */
|
|
88
|
+
prompt: string;
|
|
89
|
+
/** Score map keyed by objective name — must include the keys in `objectives`. */
|
|
90
|
+
metrics: Record<string, number>;
|
|
91
|
+
/** Optional text feedback collected for the next generation's reflector. */
|
|
92
|
+
textFeedback?: string;
|
|
93
|
+
}
|
|
94
|
+
/** Options for {@link GepaOptimizer#generate}. */
|
|
95
|
+
export interface GenerateOptions {
|
|
96
|
+
/** Number of variants per generation. Default 3, clamped to [1, 10]. */
|
|
97
|
+
numVariants?: number;
|
|
98
|
+
/**
|
|
99
|
+
* Strategy for feeding feedback to the N reflector calls:
|
|
100
|
+
* - `"split"` (default): split feedback array N ways; each variant
|
|
101
|
+
* reflects on a different subset → encourages diversity.
|
|
102
|
+
* - `"replicate"`: every variant sees every feedback → encourages
|
|
103
|
+
* consistency at the cost of diversity.
|
|
104
|
+
* - `"single"`: 1 reflection call, deduplicated to a single variant
|
|
105
|
+
* (use when feedback set is tiny).
|
|
106
|
+
*/
|
|
107
|
+
feedbackStrategy?: "split" | "replicate" | "single";
|
|
108
|
+
}
|
|
109
|
+
/** Options for {@link GepaOptimizer#nextGeneration}. */
|
|
110
|
+
export interface NextGenerationOptions extends GenerateOptions {
|
|
111
|
+
/**
|
|
112
|
+
* Pareto objectives over `ScoredVariant.metrics`. Required —
|
|
113
|
+
* GepaOptimizer is multi-objective by design; if you only have one
|
|
114
|
+
* objective, use the plain `PromptOptimizer` instead.
|
|
115
|
+
*/
|
|
116
|
+
objectives: ReadonlyArray<ParetoObjective<Record<string, number>>>;
|
|
117
|
+
/**
|
|
118
|
+
* Max variants to keep on the Pareto front. Default 3.
|
|
119
|
+
*/
|
|
120
|
+
maxCarry?: number;
|
|
121
|
+
}
|
|
122
|
+
export declare class GepaOptimizer {
|
|
123
|
+
private readonly reflector;
|
|
124
|
+
constructor(runPrompt: RunPromptFn);
|
|
125
|
+
/**
|
|
126
|
+
* Generate N variant mutations from the current prompt + feedback set.
|
|
127
|
+
* Returns the raw mutated prompts — caller scores them (typically via
|
|
128
|
+
* the existing multi-critic pipeline) before passing them through
|
|
129
|
+
* {@link GepaOptimizer#nextGeneration}.
|
|
130
|
+
*/
|
|
131
|
+
generate(currentPrompt: string, feedbacks: ReadonlyArray<ReflectiveFeedback>, opts?: GenerateOptions): Promise<Array<{
|
|
132
|
+
id: string;
|
|
133
|
+
prompt: string;
|
|
134
|
+
}>>;
|
|
135
|
+
/**
|
|
136
|
+
* Given the scored variants of a generation, return the survivors
|
|
137
|
+
* carried to the next generation (Pareto-front, capped at maxCarry).
|
|
138
|
+
*
|
|
139
|
+
* This is a pure-function wrapper around `paretoSelect` exposed on
|
|
140
|
+
* the optimizer for ergonomic call-site grouping ("generate, score,
|
|
141
|
+
* nextGeneration"). The actual selection logic lives in `pareto.ts`.
|
|
142
|
+
*/
|
|
143
|
+
nextGeneration(scored: ReadonlyArray<ScoredVariant>, opts: NextGenerationOptions): ScoredVariant[];
|
|
144
|
+
/** Clamp variant count into the documented bounds. */
|
|
145
|
+
private clampN;
|
|
146
|
+
/** Stable variant id: `gepa-cand-${i}` (caller can re-namespace). */
|
|
147
|
+
private makeId;
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=optimizer-gepa.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"optimizer-gepa.d.ts","sourceRoot":"","sources":["../../../src/evolution/optimizer-gepa.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+EG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAEtE,OAAO,EAEL,KAAK,eAAe,EACrB,MAAM,aAAa,CAAC;AAErB,6CAA6C;AAC7C,MAAM,WAAW,aAAa;IAC5B,iDAAiD;IACjD,EAAE,EAAE,MAAM,CAAC;IACX,2BAA2B;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,iFAAiF;IACjF,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,4EAA4E;IAC5E,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,kDAAkD;AAClD,MAAM,WAAW,eAAe;IAC9B,wEAAwE;IACxE,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;OAQG;IACH,gBAAgB,CAAC,EAAE,OAAO,GAAG,WAAW,GAAG,QAAQ,CAAC;CACrD;AAED,wDAAwD;AACxD,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D;;;;OAIG;IACH,UAAU,EAAE,aAAa,CAAC,eAAe,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC;IACnE;;OAEG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAOD,qBAAa,aAAa;IACxB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAY;gBAE1B,SAAS,EAAE,WAAW;IAIlC;;;;;OAKG;IACG,QAAQ,CACZ,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,aAAa,CAAC,kBAAkB,CAAC,EAC5C,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IA0DjD;;;;;;;OAOG;IACH,cAAc,CACZ,MAAM,EAAE,aAAa,CAAC,aAAa,CAAC,EACpC,IAAI,EAAE,qBAAqB,GAC1B,aAAa,EAAE;IAyClB,sDAAsD;IACtD,OAAO,CAAC,MAAM;IAKd,qEAAqE;IACrE,OAAO,CAAC,MAAM;CAGf"}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Darwin — GEPA-Style Reflective Optimizer (Phase 2 A2, S1185).
|
|
3
|
+
*
|
|
4
|
+
* Generation-loop wrapper around {@link Reflector} + {@link paretoSelect}
|
|
5
|
+
* that produces N variants (default 3) per cycle and carries the
|
|
6
|
+
* non-dominated set forward to the next generation. Opt-in via
|
|
7
|
+
* `useGepa: true` in the evolution loop config.
|
|
8
|
+
*
|
|
9
|
+
* Pipeline per generation:
|
|
10
|
+
*
|
|
11
|
+
* 1. Take a parent variant (current prompt or last gen's winner).
|
|
12
|
+
* 2. Generate N candidate mutations by calling the reflector with
|
|
13
|
+
* different sliced feedback (forces variation while keeping each
|
|
14
|
+
* mutation principled — not stochastic-only).
|
|
15
|
+
* 3. Caller scores the candidates against their domain metric(s).
|
|
16
|
+
* 4. paretoSelect keeps the non-dominated set (≤ maxCarry).
|
|
17
|
+
* 5. Next generation reflects from the survivors.
|
|
18
|
+
*
|
|
19
|
+
* This file orchestrates only the GENERATION step. Scoring lives in
|
|
20
|
+
* the caller (loop.ts → evaluator) because Darwin's existing
|
|
21
|
+
* multi-critic pipeline is the authoritative source of variant scores.
|
|
22
|
+
*
|
|
23
|
+
* Why this design and not a full GEPA-engine port:
|
|
24
|
+
* - Keep the Python lib (`gepa-ai/gepa`) out of our peer-dep graph.
|
|
25
|
+
* - Reuse Darwin's existing multi-critic + safety + A/B machinery —
|
|
26
|
+
* GepaOptimizer is purely about variant GENERATION + Pareto SELECTION.
|
|
27
|
+
* - Smallest-possible-edit reflection is the part that empirically
|
|
28
|
+
* matters most (per GEPA papers); the multi-objective search is the
|
|
29
|
+
* other part. Together they cover the GEPA value-prop without a
|
|
30
|
+
* Python dependency.
|
|
31
|
+
*
|
|
32
|
+
* **Inspired by, not lifted from** the official GEPA Python library /
|
|
33
|
+
* arxiv 2507.19457 paper. Deliberate deviations (R1 Research Finding F1
|
|
34
|
+
* + F4 + F6, S1185):
|
|
35
|
+
* - **N variants per generate() call:** GEPA Algorithm 1 produces 1
|
|
36
|
+
* offspring per iteration. We produce N=3-5 per call via three
|
|
37
|
+
* `feedbackStrategy` modes (split / replicate / single). This is
|
|
38
|
+
* our structural adaptation to TS-callsite ergonomics — call us
|
|
39
|
+
* three times to recover the per-iteration GEPA budget.
|
|
40
|
+
* - **`feedbackStrategy: "split"` is OUR adaptation** — GEPA paper
|
|
41
|
+
* does not partition feedback across offspring. We do it to force
|
|
42
|
+
* mutation diversity in a single `Promise.all` batch.
|
|
43
|
+
* - **paretoSelect truncation uses scalarised tie-break** — GEPA
|
|
44
|
+
* Algorithm 2 samples Pareto candidates proportional to instance
|
|
45
|
+
* coverage. We use a simpler weighted-sum tie-break. V0.6 will add
|
|
46
|
+
* a `truncationStrategy` option for coverage-proportional +
|
|
47
|
+
* NSGA-II crowding-distance.
|
|
48
|
+
* - **GEPA+Merge (system-aware crossover from two Pareto-pool
|
|
49
|
+
* ancestors, paper Appendix F)** is NOT implemented. +5% reported
|
|
50
|
+
* lift in the paper. Backlog for V0.6.
|
|
51
|
+
* - **Instance-wise coverage sampling** (paper Algorithm 2) is NOT
|
|
52
|
+
* implemented. Backlog for V0.6.
|
|
53
|
+
*
|
|
54
|
+
* @example
|
|
55
|
+
* ```ts
|
|
56
|
+
* import { GepaOptimizer, DARWIN_DEFAULT_OBJECTIVES } from "darwin-agents";
|
|
57
|
+
*
|
|
58
|
+
* const optimizer = new GepaOptimizer(myRunPromptFn);
|
|
59
|
+
*
|
|
60
|
+
* // Step 1: generate N variant mutations from feedback
|
|
61
|
+
* const variants = await optimizer.generate(currentPrompt, feedbacks, {
|
|
62
|
+
* numVariants: 3,
|
|
63
|
+
* });
|
|
64
|
+
*
|
|
65
|
+
* // Step 2: score each variant externally (via Darwin's multi-critic
|
|
66
|
+
* // pipeline or any other evaluator)
|
|
67
|
+
* const scored = await Promise.all(
|
|
68
|
+
* variants.map(async (v) => ({
|
|
69
|
+
* ...v,
|
|
70
|
+
* metrics: await scoreVariant(v.prompt),
|
|
71
|
+
* })),
|
|
72
|
+
* );
|
|
73
|
+
*
|
|
74
|
+
* // Step 3: Pareto-select survivors for the next generation
|
|
75
|
+
* const survivors = optimizer.nextGeneration(scored, {
|
|
76
|
+
* objectives: DARWIN_DEFAULT_OBJECTIVES,
|
|
77
|
+
* maxCarry: 3,
|
|
78
|
+
* });
|
|
79
|
+
* ```
|
|
80
|
+
*/
|
|
81
|
+
import { Reflector } from "./reflector.js";
|
|
82
|
+
import { paretoSelect, } from "./pareto.js";
|
|
83
|
+
const DEFAULT_NUM_VARIANTS = 3;
|
|
84
|
+
const MIN_NUM_VARIANTS = 1;
|
|
85
|
+
const MAX_NUM_VARIANTS = 10;
|
|
86
|
+
const DEFAULT_MAX_CARRY = 3;
|
|
87
|
+
export class GepaOptimizer {
|
|
88
|
+
reflector;
|
|
89
|
+
constructor(runPrompt) {
|
|
90
|
+
this.reflector = new Reflector(runPrompt);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Generate N variant mutations from the current prompt + feedback set.
|
|
94
|
+
* Returns the raw mutated prompts — caller scores them (typically via
|
|
95
|
+
* the existing multi-critic pipeline) before passing them through
|
|
96
|
+
* {@link GepaOptimizer#nextGeneration}.
|
|
97
|
+
*/
|
|
98
|
+
async generate(currentPrompt, feedbacks, opts = {}) {
|
|
99
|
+
// R2 V0.5.0-alpha.2 Critic Finding R2-L1 (S1185): boundary-level
|
|
100
|
+
// validation with a GEPA-specific error message. Without this, a
|
|
101
|
+
// caller passing an empty `feedbacks` array gets an opaque
|
|
102
|
+
// `Reflector.reflect: feedbacks must contain at least one entry`
|
|
103
|
+
// bubbling up from internal code.
|
|
104
|
+
if (feedbacks.length === 0) {
|
|
105
|
+
throw new TypeError("GepaOptimizer.generate: feedbacks array must be non-empty — " +
|
|
106
|
+
"GEPA reflection needs at least one variant evaluation to mutate from. " +
|
|
107
|
+
"Use the plain PromptOptimizer for cold-start mutation.");
|
|
108
|
+
}
|
|
109
|
+
const n = this.clampN(opts.numVariants ?? DEFAULT_NUM_VARIANTS);
|
|
110
|
+
const strategy = opts.feedbackStrategy ?? "split";
|
|
111
|
+
if (strategy === "single") {
|
|
112
|
+
// Fast path: one reflection, one variant. Used when feedback set is
|
|
113
|
+
// small or the caller deliberately wants a single deterministic mutation.
|
|
114
|
+
const mutated = await this.reflector.reflect(currentPrompt, feedbacks);
|
|
115
|
+
return [{ id: this.makeId(0), prompt: mutated }];
|
|
116
|
+
}
|
|
117
|
+
if (strategy === "replicate") {
|
|
118
|
+
// Every variant sees every feedback. The LLM is non-deterministic
|
|
119
|
+
// enough that this still yields N different mutations in practice;
|
|
120
|
+
// for tests with a deterministic mock you'll get N identical
|
|
121
|
+
// outputs which is the expected behaviour (and the test asserts
|
|
122
|
+
// exactly that).
|
|
123
|
+
const promises = Array.from({ length: n }, () => this.reflector.reflect(currentPrompt, feedbacks));
|
|
124
|
+
const mutations = await Promise.all(promises);
|
|
125
|
+
return mutations.map((p, i) => ({ id: this.makeId(i), prompt: p }));
|
|
126
|
+
}
|
|
127
|
+
// Default strategy "split": partition feedback round-robin into N
|
|
128
|
+
// buckets. Each bucket reflects on a different feedback subset.
|
|
129
|
+
const buckets = Array.from({ length: n }, () => []);
|
|
130
|
+
feedbacks.forEach((fb, i) => {
|
|
131
|
+
const bucketIdx = i % n;
|
|
132
|
+
// Non-null assertion: `n` is clamped to [MIN, MAX] (both ≥ 1) so
|
|
133
|
+
// every index into a freshly allocated length-n array is in-range.
|
|
134
|
+
buckets[bucketIdx].push(fb);
|
|
135
|
+
});
|
|
136
|
+
const promises = buckets.map((bucket) => {
|
|
137
|
+
if (bucket.length === 0) {
|
|
138
|
+
// Bucket is empty (fewer feedbacks than variants) — reflect on
|
|
139
|
+
// the full set for this slot. Better than skipping the slot.
|
|
140
|
+
return this.reflector.reflect(currentPrompt, feedbacks);
|
|
141
|
+
}
|
|
142
|
+
return this.reflector.reflect(currentPrompt, bucket);
|
|
143
|
+
});
|
|
144
|
+
const mutations = await Promise.all(promises);
|
|
145
|
+
return mutations.map((p, i) => ({ id: this.makeId(i), prompt: p }));
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Given the scored variants of a generation, return the survivors
|
|
149
|
+
* carried to the next generation (Pareto-front, capped at maxCarry).
|
|
150
|
+
*
|
|
151
|
+
* This is a pure-function wrapper around `paretoSelect` exposed on
|
|
152
|
+
* the optimizer for ergonomic call-site grouping ("generate, score,
|
|
153
|
+
* nextGeneration"). The actual selection logic lives in `pareto.ts`.
|
|
154
|
+
*/
|
|
155
|
+
nextGeneration(scored, opts) {
|
|
156
|
+
if (!opts.objectives || opts.objectives.length === 0) {
|
|
157
|
+
throw new TypeError("GepaOptimizer.nextGeneration: opts.objectives must contain at least one objective");
|
|
158
|
+
}
|
|
159
|
+
const maxCarry = opts.maxCarry ?? DEFAULT_MAX_CARRY;
|
|
160
|
+
// R1 V0.5.0-alpha.2 Critic Finding M2 (S1185): map metrics back to
|
|
161
|
+
// indices via parallel array (NOT reference-identity on the metrics
|
|
162
|
+
// object). Reference-identity worked today because paretoSelect
|
|
163
|
+
// returns the input references, but that is a fragile invariant —
|
|
164
|
+
// a future refactor to immutable-copy semantics would silently
|
|
165
|
+
// produce empty survivor sets. Index-based mapping is explicit.
|
|
166
|
+
//
|
|
167
|
+
// R2 V0.5.0-alpha.2 Critic Finding R2-M1 (S1185): guard against
|
|
168
|
+
// shared metrics-object references across ScoredVariants.
|
|
169
|
+
// Caller-side mistake (programmer reuses the same metrics literal)
|
|
170
|
+
// would otherwise drop survivors silently in the Map below.
|
|
171
|
+
const metricsArr = scored.map((v) => v.metrics);
|
|
172
|
+
if (new Set(metricsArr).size !== metricsArr.length) {
|
|
173
|
+
throw new TypeError("GepaOptimizer.nextGeneration: two or more ScoredVariants share the same " +
|
|
174
|
+
"`metrics` object reference — provide a distinct metrics object per variant.");
|
|
175
|
+
}
|
|
176
|
+
const objectives = opts.objectives;
|
|
177
|
+
const survivorMetrics = paretoSelect(metricsArr, objectives, maxCarry);
|
|
178
|
+
// Build "metrics-ref → index" map then look up each survivor's index.
|
|
179
|
+
// Linear scan twice is O(N²) worst-case but N≤10 in practice.
|
|
180
|
+
const metricsToIndex = new Map();
|
|
181
|
+
metricsArr.forEach((m, i) => metricsToIndex.set(m, i));
|
|
182
|
+
const survivorIndices = new Set(survivorMetrics
|
|
183
|
+
.map((m) => metricsToIndex.get(m))
|
|
184
|
+
.filter((i) => typeof i === "number"));
|
|
185
|
+
return scored.filter((_, i) => survivorIndices.has(i));
|
|
186
|
+
}
|
|
187
|
+
/** Clamp variant count into the documented bounds. */
|
|
188
|
+
clampN(n) {
|
|
189
|
+
if (!Number.isFinite(n))
|
|
190
|
+
return DEFAULT_NUM_VARIANTS;
|
|
191
|
+
return Math.max(MIN_NUM_VARIANTS, Math.min(MAX_NUM_VARIANTS, Math.floor(n)));
|
|
192
|
+
}
|
|
193
|
+
/** Stable variant id: `gepa-cand-${i}` (caller can re-namespace). */
|
|
194
|
+
makeId(i) {
|
|
195
|
+
return `gepa-cand-${i}`;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
//# sourceMappingURL=optimizer-gepa.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"optimizer-gepa.js","sourceRoot":"","sources":["../../../src/evolution/optimizer-gepa.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+EG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EACL,YAAY,GAEb,MAAM,aAAa,CAAC;AA4CrB,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAC3B,MAAM,gBAAgB,GAAG,EAAE,CAAC;AAC5B,MAAM,iBAAiB,GAAG,CAAC,CAAC;AAE5B,MAAM,OAAO,aAAa;IACP,SAAS,CAAY;IAEtC,YAAY,SAAsB;QAChC,IAAI,CAAC,SAAS,GAAG,IAAI,SAAS,CAAC,SAAS,CAAC,CAAC;IAC5C,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,QAAQ,CACZ,aAAqB,EACrB,SAA4C,EAC5C,OAAwB,EAAE;QAE1B,iEAAiE;QACjE,iEAAiE;QACjE,2DAA2D;QAC3D,iEAAiE;QACjE,kCAAkC;QAClC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,SAAS,CACjB,8DAA8D;gBAC5D,wEAAwE;gBACxE,wDAAwD,CAC3D,CAAC;QACJ,CAAC;QACD,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,WAAW,IAAI,oBAAoB,CAAC,CAAC;QAChE,MAAM,QAAQ,GAAG,IAAI,CAAC,gBAAgB,IAAI,OAAO,CAAC;QAElD,IAAI,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC1B,oEAAoE;YACpE,0EAA0E;YAC1E,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;YACvE,OAAO,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,IAAI,QAAQ,KAAK,WAAW,EAAE,CAAC;YAC7B,kEAAkE;YAClE,mEAAmE;YACnE,6DAA6D;YAC7D,gEAAgE;YAChE,iBAAiB;YACjB,MAAM,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,CAC9C,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC,CACjD,CAAC;YACF,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC9C,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;QACtE,CAAC;QAED,kEAAkE;QAClE,gEAAgE;QAChE,MAAM,OAAO,GAA2B,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAC5E,SAAS,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE;YAC1B,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,CAAC;YACxB,iEAAiE;YACjE,mEAAmE;YACnE,OAAO,CAAC,SAAS,CAAE,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;YACtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,+DAA+D;gBAC/D,6DAA6D;gBAC7D,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;YAC1D,CAAC;YACD,OAAO,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC9C,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IACtE,CAAC;IAED;;;;;;;OAOG;IACH,cAAc,CACZ,MAAoC,EACpC,IAA2B;QAE3B,IAAI,CAAC,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrD,MAAM,IAAI,SAAS,CACjB,mFAAmF,CACpF,CAAC;QACJ,CAAC;QACD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,iBAAiB,CAAC;QACpD,mEAAmE;QACnE,oEAAoE;QACpE,gEAAgE;QAChE,kEAAkE;QAClE,+DAA+D;QAC/D,gEAAgE;QAChE,EAAE;QACF,gEAAgE;QAChE,0DAA0D;QAC1D,mEAAmE;QACnE,4DAA4D;QAC5D,MAAM,UAAU,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAChD,IAAI,IAAI,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,MAAM,EAAE,CAAC;YACnD,MAAM,IAAI,SAAS,CACjB,0EAA0E;gBACxE,6EAA6E,CAChF,CAAC;QACJ,CAAC;QACD,MAAM,UAAU,GAAG,IAAI,CAAC,UAEvB,CAAC;QACF,MAAM,eAAe,GAAG,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;QACvE,sEAAsE;QACtE,8DAA8D;QAC9D,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkC,CAAC;QACjE,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QACvD,MAAM,eAAe,GAAG,IAAI,GAAG,CAC7B,eAAe;aACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;aACjC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CACrD,CAAC;QACF,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACzD,CAAC;IAED,sDAAsD;IAC9C,MAAM,CAAC,CAAS;QACtB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAAE,OAAO,oBAAoB,CAAC;QACrD,OAAO,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC;IAED,qEAAqE;IAC7D,MAAM,CAAC,CAAS;QACtB,OAAO,aAAa,CAAC,EAAE,CAAC;IAC1B,CAAC;CACF"}
|