darwin-agents 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +245 -0
  3. package/dist/agents/analyst.d.ts +11 -0
  4. package/dist/agents/analyst.d.ts.map +1 -0
  5. package/dist/agents/analyst.js +78 -0
  6. package/dist/agents/analyst.js.map +1 -0
  7. package/dist/agents/blog-writer.d.ts +13 -0
  8. package/dist/agents/blog-writer.d.ts.map +1 -0
  9. package/dist/agents/blog-writer.js +59 -0
  10. package/dist/agents/blog-writer.js.map +1 -0
  11. package/dist/agents/critic.d.ts +11 -0
  12. package/dist/agents/critic.d.ts.map +1 -0
  13. package/dist/agents/critic.js +57 -0
  14. package/dist/agents/critic.js.map +1 -0
  15. package/dist/agents/index.d.ts +15 -0
  16. package/dist/agents/index.d.ts.map +1 -0
  17. package/dist/agents/index.js +31 -0
  18. package/dist/agents/index.js.map +1 -0
  19. package/dist/agents/investigator-critic.d.ts +10 -0
  20. package/dist/agents/investigator-critic.d.ts.map +1 -0
  21. package/dist/agents/investigator-critic.js +78 -0
  22. package/dist/agents/investigator-critic.js.map +1 -0
  23. package/dist/agents/investigator.d.ts +13 -0
  24. package/dist/agents/investigator.d.ts.map +1 -0
  25. package/dist/agents/investigator.js +105 -0
  26. package/dist/agents/investigator.js.map +1 -0
  27. package/dist/agents/marketing.d.ts +13 -0
  28. package/dist/agents/marketing.d.ts.map +1 -0
  29. package/dist/agents/marketing.js +59 -0
  30. package/dist/agents/marketing.js.map +1 -0
  31. package/dist/agents/researcher.d.ts +11 -0
  32. package/dist/agents/researcher.d.ts.map +1 -0
  33. package/dist/agents/researcher.js +68 -0
  34. package/dist/agents/researcher.js.map +1 -0
  35. package/dist/agents/writer.d.ts +9 -0
  36. package/dist/agents/writer.d.ts.map +1 -0
  37. package/dist/agents/writer.js +47 -0
  38. package/dist/agents/writer.js.map +1 -0
  39. package/dist/cli/create.d.ts +11 -0
  40. package/dist/cli/create.d.ts.map +1 -0
  41. package/dist/cli/create.js +104 -0
  42. package/dist/cli/create.js.map +1 -0
  43. package/dist/cli/evolve.d.ts +13 -0
  44. package/dist/cli/evolve.d.ts.map +1 -0
  45. package/dist/cli/evolve.js +69 -0
  46. package/dist/cli/evolve.js.map +1 -0
  47. package/dist/cli/index.d.ts +13 -0
  48. package/dist/cli/index.d.ts.map +1 -0
  49. package/dist/cli/index.js +84 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/init.d.ts +12 -0
  52. package/dist/cli/init.d.ts.map +1 -0
  53. package/dist/cli/init.js +68 -0
  54. package/dist/cli/init.js.map +1 -0
  55. package/dist/cli/run.d.ts +7 -0
  56. package/dist/cli/run.d.ts.map +1 -0
  57. package/dist/cli/run.js +371 -0
  58. package/dist/cli/run.js.map +1 -0
  59. package/dist/cli/status.d.ts +7 -0
  60. package/dist/cli/status.d.ts.map +1 -0
  61. package/dist/cli/status.js +123 -0
  62. package/dist/cli/status.js.map +1 -0
  63. package/dist/core/agent.d.ts +53 -0
  64. package/dist/core/agent.d.ts.map +1 -0
  65. package/dist/core/agent.js +172 -0
  66. package/dist/core/agent.js.map +1 -0
  67. package/dist/core/runner.d.ts +64 -0
  68. package/dist/core/runner.d.ts.map +1 -0
  69. package/dist/core/runner.js +203 -0
  70. package/dist/core/runner.js.map +1 -0
  71. package/dist/evolution/loop.d.ts +100 -0
  72. package/dist/evolution/loop.d.ts.map +1 -0
  73. package/dist/evolution/loop.js +424 -0
  74. package/dist/evolution/loop.js.map +1 -0
  75. package/dist/evolution/multi-critic.d.ts +58 -0
  76. package/dist/evolution/multi-critic.d.ts.map +1 -0
  77. package/dist/evolution/multi-critic.js +324 -0
  78. package/dist/evolution/multi-critic.js.map +1 -0
  79. package/dist/evolution/notifications.d.ts +32 -0
  80. package/dist/evolution/notifications.d.ts.map +1 -0
  81. package/dist/evolution/notifications.js +92 -0
  82. package/dist/evolution/notifications.js.map +1 -0
  83. package/dist/evolution/optimizer.d.ts +64 -0
  84. package/dist/evolution/optimizer.d.ts.map +1 -0
  85. package/dist/evolution/optimizer.js +223 -0
  86. package/dist/evolution/optimizer.js.map +1 -0
  87. package/dist/evolution/patterns.d.ts +63 -0
  88. package/dist/evolution/patterns.d.ts.map +1 -0
  89. package/dist/evolution/patterns.js +297 -0
  90. package/dist/evolution/patterns.js.map +1 -0
  91. package/dist/evolution/safety.d.ts +76 -0
  92. package/dist/evolution/safety.d.ts.map +1 -0
  93. package/dist/evolution/safety.js +182 -0
  94. package/dist/evolution/safety.js.map +1 -0
  95. package/dist/evolution/tracker.d.ts +48 -0
  96. package/dist/evolution/tracker.d.ts.map +1 -0
  97. package/dist/evolution/tracker.js +163 -0
  98. package/dist/evolution/tracker.js.map +1 -0
  99. package/dist/index.d.ts +32 -0
  100. package/dist/index.d.ts.map +1 -0
  101. package/dist/index.js +35 -0
  102. package/dist/index.js.map +1 -0
  103. package/dist/memory/index.d.ts +32 -0
  104. package/dist/memory/index.d.ts.map +1 -0
  105. package/dist/memory/index.js +49 -0
  106. package/dist/memory/index.js.map +1 -0
  107. package/dist/memory/postgres-memory.d.ts +52 -0
  108. package/dist/memory/postgres-memory.d.ts.map +1 -0
  109. package/dist/memory/postgres-memory.js +515 -0
  110. package/dist/memory/postgres-memory.js.map +1 -0
  111. package/dist/memory/sqlite-memory.d.ts +36 -0
  112. package/dist/memory/sqlite-memory.d.ts.map +1 -0
  113. package/dist/memory/sqlite-memory.js +380 -0
  114. package/dist/memory/sqlite-memory.js.map +1 -0
  115. package/dist/providers/anthropic.d.ts +20 -0
  116. package/dist/providers/anthropic.d.ts.map +1 -0
  117. package/dist/providers/anthropic.js +82 -0
  118. package/dist/providers/anthropic.js.map +1 -0
  119. package/dist/providers/claude-cli.d.ts +35 -0
  120. package/dist/providers/claude-cli.d.ts.map +1 -0
  121. package/dist/providers/claude-cli.js +143 -0
  122. package/dist/providers/claude-cli.js.map +1 -0
  123. package/dist/providers/index.d.ts +39 -0
  124. package/dist/providers/index.d.ts.map +1 -0
  125. package/dist/providers/index.js +58 -0
  126. package/dist/providers/index.js.map +1 -0
  127. package/dist/providers/ollama.d.ts +17 -0
  128. package/dist/providers/ollama.d.ts.map +1 -0
  129. package/dist/providers/ollama.js +64 -0
  130. package/dist/providers/ollama.js.map +1 -0
  131. package/dist/providers/openai.d.ts +19 -0
  132. package/dist/providers/openai.d.ts.map +1 -0
  133. package/dist/providers/openai.js +75 -0
  134. package/dist/providers/openai.js.map +1 -0
  135. package/dist/providers/types.d.ts +62 -0
  136. package/dist/providers/types.d.ts.map +1 -0
  137. package/dist/providers/types.js +9 -0
  138. package/dist/providers/types.js.map +1 -0
  139. package/dist/types.d.ts +221 -0
  140. package/dist/types.d.ts.map +1 -0
  141. package/dist/types.js +19 -0
  142. package/dist/types.js.map +1 -0
  143. package/package.json +81 -0
@@ -0,0 +1,324 @@
1
+ /**
2
+ * Darwin — Multi-Critic Evaluator
3
+ *
4
+ * Runs 3 specialized critics in parallel and takes the median score.
5
+ * More robust than a single critic — reduces bias and random variance.
6
+ *
7
+ * Agent-aware: Different agents get different evaluation criteria.
8
+ *
9
+ * Investigator Critics:
10
+ * A: Facts & Sources — accuracy, citations, primary documents
11
+ * B: Honesty & Courage — intellectual bravery, clear positions, uncomfortable truths
12
+ * C: Completeness & Structure — full investigation, proper format, both sides covered
13
+ *
14
+ * Writer Critics:
15
+ * A: Task Compliance & Accuracy — did the writer follow the brief? Correct claims?
16
+ * B: Persuasion & Voice — tone, engagement, conviction, audience awareness
17
+ * C: Substance & Originality — depth, concrete value, fresh angles
18
+ */
19
+ // ─── Output Format (shared across all critics) ────────
20
+ const CRITIC_OUTPUT_FORMAT = `OUTPUT FORMAT (EXACTLY THIS):
21
+ ===SCORE===
22
+ {number 1-10}
23
+ ===ASSESSMENT===
24
+ {2-3 sentences}
25
+ ===END===`;
26
+ // ─── Investigator Critic Prompts ──────────────────────
27
+ const INVESTIGATOR_CRITIC_A = `You evaluate investigative reports on FACTUAL ACCURACY and SOURCE QUALITY.
28
+
29
+ Score 1-10 based on:
30
+ - Are claims backed by verifiable sources with URLs?
31
+ - Are primary documents cited (government reports, court filings, academic papers)?
32
+ - Are sources from multiple countries/languages?
33
+ - Are numbers, dates, and names specific and accurate?
34
+ - Is the source mix diverse (not just Wikipedia + one news outlet)?
35
+
36
+ LOW SCORE: Vague claims, no URLs, "experts say" without naming them, single-source narrative.
37
+ HIGH SCORE: Specific citations, primary documents, cross-referenced claims, diverse source mix.
38
+
39
+ ${CRITIC_OUTPUT_FORMAT}`;
40
+ const INVESTIGATOR_CRITIC_B = `You evaluate investigative reports on HONESTY and INTELLECTUAL COURAGE.
41
+
42
+ Score 1-10 based on:
43
+ - Does the report take a clear position or hide behind "both sides" diplomacy?
44
+ - Does it state uncomfortable conclusions backed by evidence?
45
+ - Does it challenge comfortable assumptions from BOTH mainstream and alternative sides?
46
+ - Does it acknowledge what it genuinely doesn't know?
47
+ - Does it resist the temptation to play it safe?
48
+
49
+ LOW SCORE: Fence-sitting, diplomatic non-answers, "more research needed" cop-out, predetermined conclusions.
50
+ HIGH SCORE: Clear honest position, challenges both sides, admits uncertainty where genuine, follows evidence over comfort.
51
+
52
+ ${CRITIC_OUTPUT_FORMAT}`;
53
+ const INVESTIGATOR_CRITIC_C = `You evaluate investigative reports on COMPLETENESS and STRUCTURE.
54
+
55
+ Score 1-10 based on:
56
+ - Are all required sections present (Official Narrative, Counter-Narrative, Evidence, Follow The Money, Assessment)?
57
+ - Are BOTH sides presented with their STRONGEST arguments (not strawmen)?
58
+ - Is the evidence analysis systematic with clear ratings?
59
+ - Is the output substantial enough for the topic complexity?
60
+ - Does it cover what we DON'T know, not just what we do?
61
+
62
+ LOW SCORE: Missing sections, one-sided presentation, no evidence table, too brief, strawman arguments.
63
+ HIGH SCORE: All sections complete, steelman both sides, systematic evidence ratings, thorough coverage.
64
+
65
+ ${CRITIC_OUTPUT_FORMAT}`;
66
+ // ─── Writer Critic Prompts ────────────────────────────
67
+ const WRITER_CRITIC_A = `You evaluate written content on TASK COMPLIANCE and FACTUAL ACCURACY.
68
+
69
+ This is the HARDEST critic. You enforce the contract between task and output.
70
+
71
+ Score 1-10 based on:
72
+ - Did the writer follow ALL constraints? (word count limits, format requirements, specific deliverables)
73
+ - Are factual claims backed by evidence or clearly marked as estimates?
74
+ - Are specific numbers, statistics, or percentages sourced or defensible?
75
+ - Does the output match the EXACT deliverable requested (e.g., "5 ads" means 5, not 4)?
76
+ - Is the content usable as-is for its stated purpose?
77
+
78
+ CRITICAL DEDUCTIONS:
79
+ - Exceeding word count by >20%: automatic cap at 5/10
80
+ - Missing required deliverables: automatic cap at 4/10
81
+ - Unsourced specific claims ("3-5x more leads", "50% faster"): -2 points
82
+ - Wrong language (task in German, output in English or vice versa): -3 points
83
+
84
+ LOW SCORE: Ignores constraints, invents statistics, misses deliverables, exceeds word limits, unusable output.
85
+ HIGH SCORE: Exact constraint compliance, defensible claims, complete deliverables, production-ready output.
86
+
87
+ ${CRITIC_OUTPUT_FORMAT}`;
88
+ const WRITER_CRITIC_B = `You evaluate written content on PERSUASION, VOICE, and AUDIENCE AWARENESS.
89
+
90
+ Score 1-10 based on:
91
+ - Does the writing have a distinct voice — not generic AI slop?
92
+ - Is the tone matched to the audience (technical, business, casual)?
93
+ - Does it make the reader CARE about the topic?
94
+ - Are arguments backed by concrete examples, numbers, or analogies?
95
+ - Does it include clear calls-to-action or next steps where appropriate?
96
+ - Would a human editor publish this WITHOUT major revisions?
97
+
98
+ CALIBRATION (be strict — most AI writing is 5-7, not 7-9):
99
+ - 9-10: Would win an award. Unique voice, unforgettable opening, zero filler.
100
+ - 7-8: Professional quality. Minor polish needed but publishable.
101
+ - 5-6: Competent but generic. Reads like any other AI-written content.
102
+ - 3-4: Template-level. Swappable headers, predictable structure, no soul.
103
+ - 1-2: Incoherent or fundamentally wrong tone.
104
+
105
+ LOW SCORE: Generic tone, reads like a template, no personality, vague claims, missing CTAs, audience mismatch.
106
+ HIGH SCORE: Distinctive voice, audience-aware tone, compelling arguments, specific examples, clear next steps.
107
+
108
+ ${CRITIC_OUTPUT_FORMAT}`;
109
+ const WRITER_CRITIC_C = `You evaluate written content on SUBSTANCE and ORIGINALITY.
110
+
111
+ Score 1-10 based on:
112
+ - Does it say something genuinely useful — not just restate the obvious?
113
+ - Are there fresh angles, insights, or frameworks the reader hasn't seen?
114
+ - Is the depth appropriate for the topic complexity?
115
+ - Are trade-offs and nuances acknowledged, not just one-sided cheerleading?
116
+ - Would an expert in this field find value, or is it surface-level?
117
+ - Are claims that could be verified actually verifiable? (statistics, trends, best practices)
118
+
119
+ CALIBRATION — the "Google Test":
120
+ If the reader could find the same content in the first 3 Google results, it is NOT original (max 6/10).
121
+ Originality means: non-obvious connections, counterintuitive insights, personal frameworks, or data the reader hasn't seen.
122
+
123
+ LOW SCORE: Restates common knowledge, no original insight, too shallow, one-sided, an expert would learn nothing.
124
+ HIGH SCORE: Genuine insights, fresh perspective, appropriate depth, honest about trade-offs, expert-level value.
125
+
126
+ ${CRITIC_OUTPUT_FORMAT}`;
127
+ // ─── Marketing Critic Prompts ──────────────────────────
128
+ const MARKETING_CRITIC_A = `You evaluate social media content on PLATFORM COMPLIANCE and BRAND CONSISTENCY.
129
+
130
+ Score 1-10 based on:
131
+ - Does the content match the requested platform format? (carousel slides, tweet length, LinkedIn structure)
132
+ - Is the language correct? (English for Instagram/X, German for LinkedIn)
133
+ - Does it follow brand voice? (professional but approachable, no corporate fluff)
134
+ - Are all required elements present? (hook, content, CTA, hashtags)
135
+ - Would a social media manager post this WITHOUT major edits?
136
+
137
+ CRITICAL DEDUCTIONS:
138
+ - Wrong platform format: automatic cap at 4/10
139
+ - Wrong language for platform: -3 points
140
+ - Missing CTA: -2 points
141
+ - Generic opening ("In today's digital world..."): -2 points
142
+
143
+ CALIBRATION: Most AI social content is 4-6. A 9 means a social media manager would post it immediately.
144
+
145
+ ${CRITIC_OUTPUT_FORMAT}`;
146
+ const MARKETING_CRITIC_B = `You evaluate social media content on SCROLL-STOPPING POWER and ENGAGEMENT POTENTIAL.
147
+
148
+ Score 1-10 based on:
149
+ - Would this stop someone scrolling? Is the hook genuinely compelling?
150
+ - Does it create an emotional response (curiosity, recognition, urgency)?
151
+ - Is the value proposition clear within the first 2 seconds of reading?
152
+ - Are the claims specific enough to be believable? ("3x more leads" vs "significantly more")
153
+ - Would the target audience (SMB owners) engage (like, comment, share)?
154
+
155
+ CALIBRATION: Most AI-generated social posts get 0 engagement. A 7+ means real engagement potential.
156
+ - 9-10: Viral potential. Unique angle no one else is taking.
157
+ - 7-8: Strong hook. Would generate comments and saves.
158
+ - 5-6: Competent but forgettable. Scrolled past in 2 seconds.
159
+ - 3-4: Generic template content. Actively hurts brand perception.
160
+
161
+ ${CRITIC_OUTPUT_FORMAT}`;
162
+ const MARKETING_CRITIC_C = `You evaluate social media content on CONVERSION INTENT and BUSINESS VALUE.
163
+
164
+ Score 1-10 based on:
165
+ - Does this content serve a business goal? (awareness, trust, leads, traffic)
166
+ - Is the CTA clear and compelling? Not just "visit our website" but WHY?
167
+ - Does the content position the brand as an expert without being salesy?
168
+ - Would this content attract the RIGHT audience (SMB decision-makers, not random followers)?
169
+ - Is there a clear content-to-conversion path? (post → interest → click → contact)
170
+
171
+ CALIBRATION: Content without conversion intent is 5/10 max, no matter how pretty.
172
+
173
+ ${CRITIC_OUTPUT_FORMAT}`;
174
+ // ─── Blog Critic Prompts ──────────────────────────────
175
+ const BLOG_CRITIC_A = `You evaluate blog posts on SEO STRUCTURE and TECHNICAL OPTIMIZATION.
176
+
177
+ Score 1-10 based on:
178
+ - Is the main keyword in: title, first paragraph, at least one H2, meta description?
179
+ - Are H2 headers using keyword variations (not exact stuffing)?
180
+ - Is the meta description compelling AND under 155 chars?
181
+ - Are paragraphs short enough for web reading (max 3-4 sentences)?
182
+ - Is there a FAQ section with structured-data-ready questions?
183
+ - Are internal link suggestions included?
184
+
185
+ CRITICAL DEDUCTIONS:
186
+ - No keyword in title: -3 points
187
+ - No meta description: automatic cap at 5/10
188
+ - Wall-of-text paragraphs (>5 sentences): -2 points
189
+ - No FAQ section: -1 point
190
+
191
+ ${CRITIC_OUTPUT_FORMAT}`;
192
+ const BLOG_CRITIC_B = `You evaluate blog posts on READABILITY and AUDIENCE FIT.
193
+
194
+ Score 1-10 based on:
195
+ - Would a small business owner understand this without Googling terms?
196
+ - Does it lead with the reader's problem, not the solution?
197
+ - Are technical concepts explained with analogies or examples?
198
+ - Is the tone expert but accessible (not academic, not dumbed-down)?
199
+ - Does every section answer "why should I care?"
200
+ - Would the reader finish the entire article?
201
+
202
+ CALIBRATION: Most SEO blog content is 4-6 (keyword-stuffed, boring). A 8+ means genuinely useful.
203
+
204
+ ${CRITIC_OUTPUT_FORMAT}`;
205
+ const BLOG_CRITIC_C = `You evaluate blog posts on CONVERSION POTENTIAL and ACTIONABILITY.
206
+
207
+ Score 1-10 based on:
208
+ - Does the reader know EXACTLY what to do next after reading?
209
+ - Is there a clear CTA that feels natural (not forced)?
210
+ - Does the content build trust and authority? (examples, data, experience)
211
+ - Are objections addressed proactively ("But what about...")?
212
+ - Would this post generate consultation requests or contact form submissions?
213
+
214
+ CALIBRATION: A blog post that informs but doesn't convert is 6/10 max.
215
+
216
+ ${CRITIC_OUTPUT_FORMAT}`;
217
+ // ─── Prompt Registry ──────────────────────────────────
218
+ const INVESTIGATOR_PROMPTS = [
219
+ { name: 'facts-sources', prompt: INVESTIGATOR_CRITIC_A },
220
+ { name: 'honesty-courage', prompt: INVESTIGATOR_CRITIC_B },
221
+ { name: 'completeness-structure', prompt: INVESTIGATOR_CRITIC_C },
222
+ ];
223
+ const WRITER_PROMPTS = [
224
+ { name: 'task-compliance', prompt: WRITER_CRITIC_A },
225
+ { name: 'persuasion-voice', prompt: WRITER_CRITIC_B },
226
+ { name: 'substance-originality', prompt: WRITER_CRITIC_C },
227
+ ];
228
+ const MARKETING_PROMPTS = [
229
+ { name: 'platform-compliance', prompt: MARKETING_CRITIC_A },
230
+ { name: 'scroll-stopping', prompt: MARKETING_CRITIC_B },
231
+ { name: 'conversion-intent', prompt: MARKETING_CRITIC_C },
232
+ ];
233
+ const BLOG_PROMPTS = [
234
+ { name: 'seo-structure', prompt: BLOG_CRITIC_A },
235
+ { name: 'readability', prompt: BLOG_CRITIC_B },
236
+ { name: 'conversion-potential', prompt: BLOG_CRITIC_C },
237
+ ];
238
+ /** Agent name → critic prompt set. Falls back to a sensible default. */
239
+ const AGENT_CRITIC_MAP = {
240
+ investigator: INVESTIGATOR_PROMPTS,
241
+ writer: WRITER_PROMPTS,
242
+ marketing: MARKETING_PROMPTS,
243
+ 'blog-writer': BLOG_PROMPTS,
244
+ };
245
+ /**
246
+ * Get the right critic prompts for an agent.
247
+ * Falls back to investigator prompts for unknown agents (backward-compatible).
248
+ */
249
+ export function getCriticPrompts(agentName) {
250
+ return AGENT_CRITIC_MAP[agentName] ?? INVESTIGATOR_PROMPTS;
251
+ }
252
+ /** @deprecated Use getCriticPrompts(agentName) instead. Kept for backward compatibility. */
253
+ export const CRITIC_PROMPTS = INVESTIGATOR_PROMPTS;
254
+ // ─── Multi-Critic Runner ───────────────────────────────
255
+ /** Descriptive labels for evaluation task preamble, per agent type */
256
+ const AGENT_OUTPUT_LABELS = {
257
+ investigator: 'investigative report',
258
+ writer: 'written content',
259
+ marketing: 'social media content',
260
+ 'blog-writer': 'blog post',
261
+ };
262
+ /**
263
+ * Run multiple specialized critics and return the median score.
264
+ * Critics are selected based on the agent being evaluated.
265
+ *
266
+ * @param agentOutput - The agent's output to evaluate
267
+ * @param task - The original task description
268
+ * @param runCritic - Function to run a critic (injected, uses Claude CLI)
269
+ * @param agentName - Name of the agent being evaluated (determines which critic set to use)
270
+ */
271
+ export async function runMultiCritic(agentOutput, task, runCritic, agentName) {
272
+ const outputLabel = AGENT_OUTPUT_LABELS[agentName ?? ''] ?? 'output';
273
+ const evaluationTask = `Evaluate the following ${outputLabel} for the task "${task}":\n\n${agentOutput}`;
274
+ const prompts = getCriticPrompts(agentName ?? '');
275
+ // Run all 3 critics in parallel
276
+ const promises = prompts.map(async ({ name, prompt }) => {
277
+ try {
278
+ const output = await runCritic(prompt, evaluationTask, name);
279
+ const score = parseScore(output);
280
+ return { critic: name, score, report: output };
281
+ }
282
+ catch {
283
+ // If a critic fails, return null score
284
+ return { critic: name, score: -1, report: 'Critic failed to produce output.' };
285
+ }
286
+ });
287
+ const results = await Promise.all(promises);
288
+ // Filter out failed critics
289
+ const validResults = results.filter((r) => r.score > 0);
290
+ if (validResults.length === 0) {
291
+ return {
292
+ medianScore: 0,
293
+ critics: results,
294
+ combinedReport: 'All critics failed to produce scores.',
295
+ };
296
+ }
297
+ // Calculate median
298
+ const scores = validResults.map((r) => r.score).sort((a, b) => a - b);
299
+ const medianScore = scores.length % 2 === 0
300
+ ? (scores[scores.length / 2 - 1] + scores[scores.length / 2]) / 2
301
+ : scores[Math.floor(scores.length / 2)];
302
+ // Build combined report
303
+ const reportLines = results.map((r) => `[${r.critic}] Score: ${r.score > 0 ? r.score : 'FAILED'}/10\n${r.report}`);
304
+ return {
305
+ medianScore: Math.round(medianScore * 10) / 10,
306
+ critics: results,
307
+ combinedReport: reportLines.join('\n\n---\n\n'),
308
+ };
309
+ }
310
+ // ─── Helpers ───────────────────────────────────────────
311
+ /** Parse ===SCORE=== from critic output, with fallback to X/10 pattern */
312
+ function parseScore(output) {
313
+ const scoreMatch = output.match(/===SCORE===\s*(\d+(?:\.\d+)?)/);
314
+ if (scoreMatch) {
315
+ return Math.max(1, Math.min(10, parseFloat(scoreMatch[1])));
316
+ }
317
+ // Fallback: X/10 pattern
318
+ const fallback = output.match(/\b(\d+(?:\.\d+)?)\s*\/\s*10\b/);
319
+ if (fallback) {
320
+ return Math.max(1, Math.min(10, parseFloat(fallback[1])));
321
+ }
322
+ return -1;
323
+ }
324
+ //# sourceMappingURL=multi-critic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"multi-critic.js","sourceRoot":"","sources":["../../src/evolution/multi-critic.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AA4BH,yDAAyD;AAEzD,MAAM,oBAAoB,GAAG;;;;;UAKnB,CAAC;AAEX,yDAAyD;AAEzD,MAAM,qBAAqB,GAAG;;;;;;;;;;;;EAY5B,oBAAoB,EAAE,CAAC;AAEzB,MAAM,qBAAqB,GAAG;;;;;;;;;;;;EAY5B,oBAAoB,EAAE,CAAC;AAEzB,MAAM,qBAAqB,GAAG;;;;;;;;;;;;EAY5B,oBAAoB,EAAE,CAAC;AAEzB,yDAAyD;AAEzD,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;;;;EAoBtB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;;;;EAoBtB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;EAiBtB,oBAAoB,EAAE,CAAC;AAEzB,0DAA0D;AAE1D,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;;;EAiBzB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,kBAAkB,GAAG;;;;;;;;;;;;;;;EAezB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,kBAAkB,GAAG;;;;;;;;;;;EAWzB,oBAAoB,EAAE,CAAC;AAEzB,yDAAyD;AAEzD,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;EAgBpB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,aAAa,GAAG;;;;;;;;;;;;EAYpB,oBAAoB,EAAE,CAAC;AAEzB,MAAM,aAAa,GAAG;;;;;;;;;;;EAWpB,oBAAoB,EAAE,CAAC;AAEzB,yDAAyD;AAEzD,MAAM,oBAAoB,GAAsB;IAC9C,EAAE,IAAI,EAAE,eAAe,EAAE,MAAM,EAAE,qBAAqB,EAAE;IACxD,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,qBAAqB,EAAE;IAC1D,EAAE,IAAI,EAAE,wBAAwB,EAAE,MAAM,EAAE,qBAAqB,EAAE;CAClE,CAAC;AAEF,MAAM,cAAc,GAAsB;IACxC,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,eAAe,EAAE;IACpD,EAAE,IAAI,EAAE,kBAAkB,EAAE,MAAM,EAAE,eAAe,EAAE;IACrD,EAAE,IAAI,EAAE,uBAAuB,EAAE,MAAM,EAAE,eAAe,EAAE;CAC3D,CAAC;AAEF,MAAM,iBAAiB,GAAsB;IAC3C,EAAE,IAAI,EAAE,qBAAqB,EAAE,MAAM,EAAE,kBAAkB,EAAE;IAC3D,EAAE,IAAI,EAAE,iBAAiB,EAAE,MAAM,EAAE,kBAAkB,EAAE;IACvD,EAAE,IAAI,EAAE,mBAAmB,EAAE,MAAM,EAAE,kBAAkB,EAAE;CAC1D,CAAC;AAEF,MAAM,YAAY,GAAsB;IACtC,EAAE,IAAI,EAAE,eAAe,EAAE,MAAM,EAAE,aAAa,EAAE;IAChD,EAAE,IAAI,EAAE,aAAa,EAAE,MAAM,EAAE,aAAa,EAAE;IAC9C,EAAE,IAAI,EAAE,sBAAsB,EAAE,MAAM,EAAE,aAAa,EAAE;CACxD,CAAC;AAEF,wEAAwE;AACxE,MAAM,gBAAgB,GAAsC;IAC1D,YAAY,EAAE,oBAAoB;IAClC,MAAM,EAAE,cAAc;IACtB,SAAS,EAAE,iBAAiB;IAC5B,aAAa,EAAE,YAAY;CAC5B,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,SAAiB;IAChD,OAAO,gBAAgB,CAAC,SAAS,CAAC,IAAI,oBAAoB,CAAC;AAC7D,CAAC;AAED,4FAA4F;AAC5F,MAAM,CAAC,MAAM,cAAc,GAAsB,oBAAoB,CAAC;AAEtE,0DAA0D;AAE1D,sEAAsE;AACtE,MAAM,mBAAmB,GAA2B;IAClD,YAAY,EAAE,sBAAsB;IACpC,MAAM,EAAE,iBAAiB;IACzB,SAAS,EAAE,sBAAsB;IACjC,aAAa,EAAE,WAAW;CAC3B,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,WAAmB,EACnB,IAAY,EACZ,SAAsB,EACtB,SAAkB;IAElB,MAAM,WAAW,GAAG,mBAAmB,CAAC,SAAS,IAAI,EAAE,CAAC,IAAI,QAAQ,CAAC;IACrE,MAAM,cAAc,GAAG,0BAA0B,WAAW,kBAAkB,IAAI,SAAS,WAAW,EAAE,CAAC;IAEzG,MAAM,OAAO,GAAG,gBAAgB,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC;IAElD,gCAAgC;IAChC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE;QACtD,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,EAAE,cAAc,EAAE,IAAI,CAAC,CAAC;YAC7D,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;YACjC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;QACjD,CAAC;QAAC,MAAM,CAAC;YACP,uCAAuC;YACvC,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,kCAAkC,EAAE,CAAC;QACjF,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAE5C,4BAA4B;IAC5B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;IAExD,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,OAAO;YACL,WAAW,EAAE,CAAC;YACd,OAAO,EAAE,OAAO;YAChB,cAAc,EAAE,uCAAuC;SACxD,CAAC;IACJ,CAAC;IAED,mBAAmB;IACnB,MAAM,MAAM,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACtE,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,KAAK,CAAC;QACzC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QACjE,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAE1C,wBAAwB;IACxB,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAC7B,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,YAAY,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,QAAQ,CAAC,CAAC,MAAM,EAAE,CAClF,CAAC;IAEF,OAAO;QACL,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,GAAG,EAAE,CAAC,GAAG,EAAE;QAC9C,OAAO,EAAE,OAAO;QAChB,cAAc,EAAE,WAAW,CAAC,IAAI,CAAC,aAAa,CAAC;KAChD,CAAC;AACJ,CAAC;AAED,0DAA0D;AAE1D,0EAA0E;AAC1E,SAAS,UAAU,CAAC,MAAc;IAChC,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACjE,IAAI,UAAU,EAAE,CAAC;QACf,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9D,CAAC;IAED,yBAAyB;IACzB,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;IAC/D,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,CAAC,CAAC,CAAC;AACZ,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Darwin — Notification System
3
+ *
4
+ * Sends alerts when important evolution events happen.
5
+ * Currently supports Telegram. Non-blocking — failures are logged, not thrown.
6
+ *
7
+ * Env vars: TELEGRAM_BOT_TOKEN, TELEGRAM_ADMIN_CHAT_ID
8
+ */
9
+ export interface NotificationConfig {
10
+ telegram?: {
11
+ botToken: string;
12
+ chatId: string;
13
+ };
14
+ }
15
+ /**
16
+ * Load notification config from environment variables.
17
+ * Returns config with telegram only if both vars are set.
18
+ */
19
+ export declare function loadNotificationConfig(): NotificationConfig;
20
+ /**
21
+ * Notify that an A/B test completed and a winner was activated.
22
+ */
23
+ export declare function notifyABTestComplete(config: NotificationConfig, agentName: string, winner: string, loser: string, compositeWinner: number, compositeLoser: number): Promise<void>;
24
+ /**
25
+ * Notify that a new prompt version was generated and A/B test started.
26
+ */
27
+ export declare function notifyEvolutionStarted(config: NotificationConfig, agentName: string, oldVersion: string, newVersion: string, reason: string): Promise<void>;
28
+ /**
29
+ * Notify that a rollback happened.
30
+ */
31
+ export declare function notifyRollback(config: NotificationConfig, agentName: string, rolledBackTo: string, failures: number): Promise<void>;
32
+ //# sourceMappingURL=notifications.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"notifications.d.ts","sourceRoot":"","sources":["../../src/evolution/notifications.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,EAAE;QACT,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,MAAM,CAAC;KAChB,CAAC;CACH;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,IAAI,kBAAkB,CAS3D;AAED;;GAEG;AACH,wBAAsB,oBAAoB,CACxC,MAAM,EAAE,kBAAkB,EAC1B,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,eAAe,EAAE,MAAM,EACvB,cAAc,EAAE,MAAM,GACrB,OAAO,CAAC,IAAI,CAAC,CAgBf;AAED;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,MAAM,EAAE,kBAAkB,EAC1B,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,IAAI,CAAC,CAUf;AAED;;GAEG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,kBAAkB,EAC1B,SAAS,EAAE,MAAM,EACjB,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,GACf,OAAO,CAAC,IAAI,CAAC,CAUf"}
@@ -0,0 +1,92 @@
1
+ /**
2
+ * Darwin — Notification System
3
+ *
4
+ * Sends alerts when important evolution events happen.
5
+ * Currently supports Telegram. Non-blocking — failures are logged, not thrown.
6
+ *
7
+ * Env vars: TELEGRAM_BOT_TOKEN, TELEGRAM_ADMIN_CHAT_ID
8
+ */
9
+ /**
10
+ * Load notification config from environment variables.
11
+ * Returns config with telegram only if both vars are set.
12
+ */
13
+ export function loadNotificationConfig() {
14
+ const botToken = process.env.TELEGRAM_BOT_TOKEN;
15
+ const chatId = process.env.TELEGRAM_ADMIN_CHAT_ID;
16
+ if (botToken && chatId) {
17
+ return { telegram: { botToken, chatId } };
18
+ }
19
+ return {};
20
+ }
21
+ /**
22
+ * Notify that an A/B test completed and a winner was activated.
23
+ */
24
+ export async function notifyABTestComplete(config, agentName, winner, loser, compositeWinner, compositeLoser) {
25
+ const delta = compositeWinner > 0 && compositeLoser > 0
26
+ ? `+${(((compositeWinner - compositeLoser) / compositeLoser) * 100).toFixed(1)}%`
27
+ : '';
28
+ const msg = [
29
+ `🧬 *Darwin A/B Test Complete*`,
30
+ ``,
31
+ `Agent: \`${agentName}\``,
32
+ `Winner: *${winner}* (${compositeWinner.toFixed(3)})`,
33
+ `Loser: ${loser} (${compositeLoser.toFixed(3)}) ${delta}`,
34
+ ``,
35
+ `${winner} is now the active prompt.`,
36
+ ].join('\n');
37
+ await sendTelegram(config, msg);
38
+ }
39
+ /**
40
+ * Notify that a new prompt version was generated and A/B test started.
41
+ */
42
+ export async function notifyEvolutionStarted(config, agentName, oldVersion, newVersion, reason) {
43
+ const msg = [
44
+ `🔬 *Darwin Evolution Started*`,
45
+ ``,
46
+ `Agent: \`${agentName}\``,
47
+ `A/B Test: ${oldVersion} vs *${newVersion}*`,
48
+ `Reason: ${reason.slice(0, 200)}`,
49
+ ].join('\n');
50
+ await sendTelegram(config, msg);
51
+ }
52
+ /**
53
+ * Notify that a rollback happened.
54
+ */
55
+ export async function notifyRollback(config, agentName, rolledBackTo, failures) {
56
+ const msg = [
57
+ `⚠️ *Darwin Rollback*`,
58
+ ``,
59
+ `Agent: \`${agentName}\``,
60
+ `Rolled back to: *${rolledBackTo}*`,
61
+ `After ${failures} consecutive failures`,
62
+ ].join('\n');
63
+ await sendTelegram(config, msg);
64
+ }
65
+ // ─── Telegram Helper ──────────────────────────────
66
+ async function sendTelegram(config, text) {
67
+ if (!config.telegram)
68
+ return;
69
+ const { botToken, chatId } = config.telegram;
70
+ const url = `https://api.telegram.org/bot${botToken}/sendMessage`;
71
+ try {
72
+ const response = await fetch(url, {
73
+ method: 'POST',
74
+ headers: { 'Content-Type': 'application/json' },
75
+ body: JSON.stringify({
76
+ chat_id: chatId,
77
+ text,
78
+ parse_mode: 'Markdown',
79
+ disable_web_page_preview: true,
80
+ }),
81
+ signal: AbortSignal.timeout(10_000),
82
+ });
83
+ if (!response.ok) {
84
+ const body = await response.text();
85
+ console.warn(`[darwin] Telegram notification failed: ${response.status} ${body}`);
86
+ }
87
+ }
88
+ catch (err) {
89
+ console.warn(`[darwin] Telegram notification error: ${err instanceof Error ? err.message : String(err)}`);
90
+ }
91
+ }
92
+ //# sourceMappingURL=notifications.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"notifications.js","sourceRoot":"","sources":["../../src/evolution/notifications.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AASH;;;GAGG;AACH,MAAM,UAAU,sBAAsB;IACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC;IAChD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC;IAElD,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;QACvB,OAAO,EAAE,QAAQ,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAC5C,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,MAA0B,EAC1B,SAAiB,EACjB,MAAc,EACd,KAAa,EACb,eAAuB,EACvB,cAAsB;IAEtB,MAAM,KAAK,GAAG,eAAe,GAAG,CAAC,IAAI,cAAc,GAAG,CAAC;QACrD,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,GAAG,cAAc,CAAC,GAAG,cAAc,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;QACjF,CAAC,CAAC,EAAE,CAAC;IAEP,MAAM,GAAG,GAAG;QACV,+BAA+B;QAC/B,EAAE;QACF,YAAY,SAAS,IAAI;QACzB,YAAY,MAAM,MAAM,eAAe,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;QACrD,UAAU,KAAK,KAAK,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,KAAK,EAAE;QACzD,EAAE;QACF,GAAG,MAAM,4BAA4B;KACtC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,MAAM,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,MAA0B,EAC1B,SAAiB,EACjB,UAAkB,EAClB,UAAkB,EAClB,MAAc;IAEd,MAAM,GAAG,GAAG;QACV,+BAA+B;QAC/B,EAAE;QACF,YAAY,SAAS,IAAI;QACzB,aAAa,UAAU,QAAQ,UAAU,GAAG;QAC5C,WAAW,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KAClC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,MAAM,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAClC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAA0B,EAC1B,SAAiB,EACjB,YAAoB,EACpB,QAAgB;IAEhB,MAAM,GAAG,GAAG;QACV,sBAAsB;QACtB,EAAE;QACF,YAAY,SAAS,IAAI;QACzB,oBAAoB,YAAY,GAAG;QACnC,SAAS,QAAQ,uBAAuB;KACzC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,MAAM,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAClC,CAAC;AAED,qDAAqD;AAErD,KAAK,UAAU,YAAY,CACzB,MAA0B,EAC1B,IAAY;IAEZ,IAAI,CAAC,MAAM,CAAC,QAAQ;QAAE,OAAO;IAE7B,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,QAAQ,CAAC;IAC7C,MAAM,GAAG,GAAG,+BAA+B,QAAQ,cAAc,CAAC;IAElE,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;YAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,OAAO,EAAE,MAAM;gBACf,IAAI;gBACJ,UAAU,EAAE,UAAU;gBACtB,wBAAwB,EAAE,IAAI;aAC/B,CAAC;YACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,MAAM,CAAC;SACpC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,OAAO,CAAC,IAAI,CAAC,0CAA0C,QAAQ,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC,CAAC;QACpF,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,IAAI,CAAC,yCAAyC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAC5G,CAAC;AACH,CAAC"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Darwin — Prompt Optimizer
3
+ *
4
+ * Uses an LLM (via injected callback) to generate improved prompt variants
5
+ * based on performance data and detected patterns.
6
+ */
7
+ import type { DarwinPattern, PromptVersionStats } from '../types.js';
8
+ /** Function signature for LLM calls — injected by the parent, never imported. */
9
+ export type RunPromptFn = (prompt: string) => Promise<string>;
10
+ /** Agent tool context so the optimizer knows what tools are available */
11
+ export interface AgentToolContext {
12
+ /** MCP server names the agent uses */
13
+ mcp?: string[];
14
+ /** Built-in tools the agent uses */
15
+ tools?: string[];
16
+ }
17
+ /** Stats broken down by task category */
18
+ export interface CategoryStats {
19
+ taskType: string;
20
+ totalRuns: number;
21
+ avgQuality: number;
22
+ avgSourceCount: number;
23
+ successRate: number;
24
+ }
25
+ export declare class PromptOptimizer {
26
+ private runPrompt;
27
+ constructor(runPrompt: RunPromptFn);
28
+ /**
29
+ * Generate an improved variant of an agent prompt.
30
+ *
31
+ * Builds a meta-prompt that includes the current prompt text, detected
32
+ * patterns (strengths, weaknesses, trends, anomalies), aggregated
33
+ * stats, tool context, per-category breakdowns, and recent critic feedback.
34
+ */
35
+ generateVariant(currentPrompt: string, patterns: DarwinPattern[], stats: PromptVersionStats, toolContext?: AgentToolContext, categoryStats?: CategoryStats[], recentFeedback?: string[]): Promise<string>;
36
+ /**
37
+ * Assemble the meta-prompt that instructs the LLM how to improve
38
+ * the agent's system prompt.
39
+ */
40
+ private buildMetaPrompt;
41
+ /**
42
+ * Format patterns into a human-readable summary for the meta-prompt.
43
+ */
44
+ private formatPatterns;
45
+ /**
46
+ * Format stats into a readable summary.
47
+ */
48
+ private formatStats;
49
+ /**
50
+ * Remove markdown code fences if the LLM wraps the output.
51
+ */
52
+ private cleanOutput;
53
+ /**
54
+ * Pluralize a pattern type label correctly.
55
+ * Handles: weakness -> Weaknesses, strength -> Strengths, trend -> Trends, anomaly -> Anomalies
56
+ */
57
+ private pluralize;
58
+ /**
59
+ * Check that the mutated prompt preserves safety-related keywords from the original.
60
+ * Returns null if OK, or a rejection reason if safety keywords were removed.
61
+ */
62
+ checkAlignmentPreservation(original: string, mutated: string): string | null;
63
+ }
64
+ //# sourceMappingURL=optimizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"optimizer.d.ts","sourceRoot":"","sources":["../../src/evolution/optimizer.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAErE,iFAAiF;AACjF,MAAM,MAAM,WAAW,GAAG,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;AAE9D,yEAAyE;AACzE,MAAM,WAAW,gBAAgB;IAC/B,sCAAsC;IACtC,GAAG,CAAC,EAAE,MAAM,EAAE,CAAC;IACf,oCAAoC;IACpC,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,yCAAyC;AACzC,MAAM,WAAW,aAAa;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,SAAS,CAAc;gBAEnB,SAAS,EAAE,WAAW;IAIlC;;;;;;OAMG;IACG,eAAe,CACnB,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,aAAa,EAAE,EACzB,KAAK,EAAE,kBAAkB,EACzB,WAAW,CAAC,EAAE,gBAAgB,EAC9B,aAAa,CAAC,EAAE,aAAa,EAAE,EAC/B,cAAc,CAAC,EAAE,MAAM,EAAE,GACxB,OAAO,CAAC,MAAM,CAAC;IA+BlB;;;OAGG;IACH,OAAO,CAAC,eAAe;IA2HvB;;OAEG;IACH,OAAO,CAAC,cAAc;IAiCtB;;OAEG;IACH,OAAO,CAAC,WAAW;IAUnB;;OAEG;IACH,OAAO,CAAC,WAAW;IAYnB;;;OAGG;IACH,OAAO,CAAC,SAAS;IAUjB;;;OAGG;IACH,0BAA0B,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;CAwC7E"}