darwin-agents 0.5.0-alpha.1 → 0.5.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/CHANGELOG.md +184 -0
  2. package/dist/agents/analyst.d.ts +11 -0
  3. package/dist/agents/analyst.d.ts.map +1 -0
  4. package/dist/agents/analyst.js +78 -0
  5. package/dist/agents/analyst.js.map +1 -0
  6. package/dist/agents/blog-writer.d.ts +13 -0
  7. package/dist/agents/blog-writer.d.ts.map +1 -0
  8. package/dist/agents/blog-writer.js +59 -0
  9. package/dist/agents/blog-writer.js.map +1 -0
  10. package/dist/agents/critic.d.ts +11 -0
  11. package/dist/agents/critic.d.ts.map +1 -0
  12. package/dist/agents/critic.js +57 -0
  13. package/dist/agents/critic.js.map +1 -0
  14. package/dist/agents/index.d.ts +15 -0
  15. package/dist/agents/index.d.ts.map +1 -0
  16. package/dist/agents/index.js +31 -0
  17. package/dist/agents/index.js.map +1 -0
  18. package/dist/agents/investigator-critic.d.ts +10 -0
  19. package/dist/agents/investigator-critic.d.ts.map +1 -0
  20. package/dist/agents/investigator-critic.js +78 -0
  21. package/dist/agents/investigator-critic.js.map +1 -0
  22. package/dist/agents/investigator.d.ts +13 -0
  23. package/dist/agents/investigator.d.ts.map +1 -0
  24. package/dist/agents/investigator.js +105 -0
  25. package/dist/agents/investigator.js.map +1 -0
  26. package/dist/agents/marketing.d.ts +13 -0
  27. package/dist/agents/marketing.d.ts.map +1 -0
  28. package/dist/agents/marketing.js +59 -0
  29. package/dist/agents/marketing.js.map +1 -0
  30. package/dist/agents/researcher.d.ts +11 -0
  31. package/dist/agents/researcher.d.ts.map +1 -0
  32. package/dist/agents/researcher.js +68 -0
  33. package/dist/agents/researcher.js.map +1 -0
  34. package/dist/agents/writer.d.ts +9 -0
  35. package/dist/agents/writer.d.ts.map +1 -0
  36. package/dist/agents/writer.js +47 -0
  37. package/dist/agents/writer.js.map +1 -0
  38. package/dist/cli/create.d.ts +11 -0
  39. package/dist/cli/create.d.ts.map +1 -0
  40. package/dist/cli/create.js +104 -0
  41. package/dist/cli/create.js.map +1 -0
  42. package/dist/cli/evolve.d.ts +13 -0
  43. package/dist/cli/evolve.d.ts.map +1 -0
  44. package/dist/cli/evolve.js +69 -0
  45. package/dist/cli/evolve.js.map +1 -0
  46. package/dist/cli/index.d.ts +13 -0
  47. package/dist/cli/index.d.ts.map +1 -0
  48. package/dist/cli/index.js +84 -0
  49. package/dist/cli/index.js.map +1 -0
  50. package/dist/cli/init.d.ts +12 -0
  51. package/dist/cli/init.d.ts.map +1 -0
  52. package/dist/cli/init.js +68 -0
  53. package/dist/cli/init.js.map +1 -0
  54. package/dist/cli/run.d.ts +7 -0
  55. package/dist/cli/run.d.ts.map +1 -0
  56. package/dist/cli/run.js +371 -0
  57. package/dist/cli/run.js.map +1 -0
  58. package/dist/cli/status.d.ts +7 -0
  59. package/dist/cli/status.d.ts.map +1 -0
  60. package/dist/cli/status.js +123 -0
  61. package/dist/cli/status.js.map +1 -0
  62. package/dist/core/agent.d.ts +53 -0
  63. package/dist/core/agent.d.ts.map +1 -0
  64. package/dist/core/agent.js +172 -0
  65. package/dist/core/agent.js.map +1 -0
  66. package/dist/core/runner.d.ts +75 -0
  67. package/dist/core/runner.d.ts.map +1 -0
  68. package/dist/core/runner.js +255 -0
  69. package/dist/core/runner.js.map +1 -0
  70. package/dist/evolution/loop.d.ts +100 -0
  71. package/dist/evolution/loop.d.ts.map +1 -0
  72. package/dist/evolution/loop.js +424 -0
  73. package/dist/evolution/loop.js.map +1 -0
  74. package/dist/evolution/multi-critic.d.ts +58 -0
  75. package/dist/evolution/multi-critic.d.ts.map +1 -0
  76. package/dist/evolution/multi-critic.js +326 -0
  77. package/dist/evolution/multi-critic.js.map +1 -0
  78. package/dist/evolution/notifications.d.ts +32 -0
  79. package/dist/evolution/notifications.d.ts.map +1 -0
  80. package/dist/evolution/notifications.js +92 -0
  81. package/dist/evolution/notifications.js.map +1 -0
  82. package/dist/evolution/optimizer.d.ts +64 -0
  83. package/dist/evolution/optimizer.d.ts.map +1 -0
  84. package/dist/evolution/optimizer.js +223 -0
  85. package/dist/evolution/optimizer.js.map +1 -0
  86. package/dist/evolution/patterns.d.ts +63 -0
  87. package/dist/evolution/patterns.d.ts.map +1 -0
  88. package/dist/evolution/patterns.js +297 -0
  89. package/dist/evolution/patterns.js.map +1 -0
  90. package/dist/evolution/safety.d.ts +76 -0
  91. package/dist/evolution/safety.d.ts.map +1 -0
  92. package/dist/evolution/safety.js +182 -0
  93. package/dist/evolution/safety.js.map +1 -0
  94. package/dist/evolution/tracker.d.ts +48 -0
  95. package/dist/evolution/tracker.d.ts.map +1 -0
  96. package/dist/evolution/tracker.js +163 -0
  97. package/dist/evolution/tracker.js.map +1 -0
  98. package/dist/index.d.ts +32 -0
  99. package/dist/index.d.ts.map +1 -0
  100. package/dist/index.js +35 -0
  101. package/dist/index.js.map +1 -0
  102. package/dist/memory/index.d.ts +32 -0
  103. package/dist/memory/index.d.ts.map +1 -0
  104. package/dist/memory/index.js +49 -0
  105. package/dist/memory/index.js.map +1 -0
  106. package/dist/memory/postgres-memory.d.ts +52 -0
  107. package/dist/memory/postgres-memory.d.ts.map +1 -0
  108. package/dist/memory/postgres-memory.js +515 -0
  109. package/dist/memory/postgres-memory.js.map +1 -0
  110. package/dist/memory/sqlite-memory.d.ts +36 -0
  111. package/dist/memory/sqlite-memory.d.ts.map +1 -0
  112. package/dist/memory/sqlite-memory.js +380 -0
  113. package/dist/memory/sqlite-memory.js.map +1 -0
  114. package/dist/providers/anthropic.d.ts +20 -0
  115. package/dist/providers/anthropic.d.ts.map +1 -0
  116. package/dist/providers/anthropic.js +82 -0
  117. package/dist/providers/anthropic.js.map +1 -0
  118. package/dist/providers/claude-cli.d.ts +35 -0
  119. package/dist/providers/claude-cli.d.ts.map +1 -0
  120. package/dist/providers/claude-cli.js +153 -0
  121. package/dist/providers/claude-cli.js.map +1 -0
  122. package/dist/providers/index.d.ts +39 -0
  123. package/dist/providers/index.d.ts.map +1 -0
  124. package/dist/providers/index.js +58 -0
  125. package/dist/providers/index.js.map +1 -0
  126. package/dist/providers/ollama.d.ts +17 -0
  127. package/dist/providers/ollama.d.ts.map +1 -0
  128. package/dist/providers/ollama.js +64 -0
  129. package/dist/providers/ollama.js.map +1 -0
  130. package/dist/providers/openai.d.ts +19 -0
  131. package/dist/providers/openai.d.ts.map +1 -0
  132. package/dist/providers/openai.js +75 -0
  133. package/dist/providers/openai.js.map +1 -0
  134. package/dist/providers/types.d.ts +62 -0
  135. package/dist/providers/types.d.ts.map +1 -0
  136. package/dist/providers/types.js +9 -0
  137. package/dist/providers/types.js.map +1 -0
  138. package/dist/src/evolution/optimizer-gepa.d.ts +237 -0
  139. package/dist/src/evolution/optimizer-gepa.d.ts.map +1 -0
  140. package/dist/src/evolution/optimizer-gepa.js +357 -0
  141. package/dist/src/evolution/optimizer-gepa.js.map +1 -0
  142. package/dist/src/evolution/pareto.d.ts +166 -0
  143. package/dist/src/evolution/pareto.d.ts.map +1 -0
  144. package/dist/src/evolution/pareto.js +225 -0
  145. package/dist/src/evolution/pareto.js.map +1 -0
  146. package/dist/src/evolution/reflector.d.ts +108 -0
  147. package/dist/src/evolution/reflector.d.ts.map +1 -0
  148. package/dist/src/evolution/reflector.js +159 -0
  149. package/dist/src/evolution/reflector.js.map +1 -0
  150. package/dist/src/evolution/run-prompt-fn.d.ts +11 -0
  151. package/dist/src/evolution/run-prompt-fn.d.ts.map +1 -0
  152. package/dist/src/evolution/run-prompt-fn.js +11 -0
  153. package/dist/src/evolution/run-prompt-fn.js.map +1 -0
  154. package/dist/src/index.d.ts +4 -0
  155. package/dist/src/index.d.ts.map +1 -1
  156. package/dist/src/index.js +6 -0
  157. package/dist/src/index.js.map +1 -1
  158. package/dist/types.d.ts +221 -0
  159. package/dist/types.d.ts.map +1 -0
  160. package/dist/types.js +19 -0
  161. package/dist/types.js.map +1 -0
  162. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -1,5 +1,189 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.5.1-alpha.1] — 2026-05-29
4
+
5
+ **GEPA Polish-Welle.** Closes the three deliberate paper deviations
6
+ documented in `optimizer-gepa.ts` as V0.6 backlog from V0.5.0-alpha.2.
7
+ **Zero breaking changes** — every V0.5.0 callsite keeps working unchanged.
8
+ **R1 + R2 + R3 code-review-loop GO**, **336/337 vitest tests grün** (+29
9
+ V0.5.1 regression tests). tsc strict clean, build clean.
10
+
11
+ ### Added — three new surfaces
12
+
13
+ - **`crowdingDistance(variants, objectives)`** in `src/evolution/pareto.ts` —
14
+ pure NSGA-II Deb 2002 density-estimator. Returns one distance per
15
+ variant: per-objective min-max-normalised neighbour gap, summed across
16
+ objectives, with `+Infinity` for boundary variants so they always
17
+ survive truncation. Scale-safe through per-objective normalisation
18
+ (unlike `scalarise` which is scale-sensitive).
19
+ - **`ParetoTruncationStrategy`** type + new 4th parameter to
20
+ `paretoSelect(variants, objectives, maxKeep, truncationStrategy)`.
21
+ Two strategies: `"scalarised"` (V0.5.0 default, kept) and `"crowding"`
22
+ (NSGA-II density-preserving). Backward-compatible default.
23
+ - **`GepaOptimizerOptions`** interface + new constructor option
24
+ `reflectionRunPrompt?: RunPromptFn`. When supplied, reflection AND
25
+ merge route through the override — matches GEPA paper guidance
26
+ (stronger LM for reflection than for task execution). Falls back to
27
+ the main `runPrompt` when omitted. Closes V0.5.0 R1 Research F7.
28
+ - **`GepaOptimizer.merge(parents, opts)`** — GEPA Paper Appendix F
29
+ system-aware merge. Takes two distinct Pareto-front parents, asks the
30
+ reflection LM to combine their strongest aspects into one mutated
31
+ prompt. Returns `{ id: "gepa-merge-<a>+<b>", prompt }`. Validations:
32
+ exactly 2 parents, distinct ids, non-empty prompts. Output is
33
+ fence-stripped + sentence-boundary capped to
34
+ `max(longerParent.length * 1.3, 3500)`. Paper reports ~5% lift when
35
+ run every K-th generation.
36
+ - **`GepaOptimizer.nextGeneration.truncationStrategy` passthrough** —
37
+ forwards the new `paretoSelect` parameter from `NextGenerationOptions`.
38
+ Default `"scalarised"` matches V0.5.0 byte-for-byte.
39
+
40
+ ### Fixed — R1 + R2 + R3 code-review-loop
41
+
42
+ R1 critic reported a P1 template-injection in `merge` (claimed `{SCORE_A}`
43
+ / `{SCORE_B}` placeholders inside parent prompts were double-substituted
44
+ because they ran before `{PROMPT_A}` / `{PROMPT_B}`). On R2 verification
45
+ the V1 ordering (ID + SCORE first, PROMPT last) was confirmed CORRECT —
46
+ `String.prototype.replace` only finds matches in the current working
47
+ string, and user content does not enter the working string until the
48
+ final two replacements. **Net effect:** code unchanged, but
49
+ `tests/v0.5.1-features.test.ts` now explicitly regression-tests BOTH
50
+ `{ID_B}` AND `{SCORE_A}` + `{SCORE_B}` literals inside parent prompts —
51
+ the test coverage gap was the real R1 finding, not the substitution order.
52
+
53
+ R1 Analyst documentation-drift fixes:
54
+
55
+ - `src/evolution/optimizer-gepa.ts` header — V0.6 deferrals updated to
56
+ reflect V0.5.1 shipping `truncationStrategy` + `merge` +
57
+ `reflectionRunPrompt`. Instance-coverage sampling remains V0.6
58
+ backlog.
59
+ - `src/evolution/reflector.ts` — "deferred to V0.5.1" wording replaced
60
+ with "SHIPPED in V0.5.1".
61
+ - `src/evolution/pareto.ts` — `"coverage"` mention removed from the
62
+ `paretoSelect` docstring; type carries only `"scalarised" | "crowding"`,
63
+ no type/doc mismatch remains.
64
+
65
+ ### Test coverage
66
+
67
+ - **336/337 vitest tests grün** (was 307/308 baseline + 29 new tests
68
+ in `tests/v0.5.1-features.test.ts`). 1 pre-existing skip carried over.
69
+ - New tests cover: `crowdingDistance` (4 boundary + 4 three-variant
70
+ scale-safe + non-finite defense), `paretoSelect` (default vs explicit
71
+ scalarised parity + crowding boundary preservation), `GepaOptimizer`
72
+ reflection-LM routing + fallback + invalid-type guard, `merge`
73
+ (template-injection for ID + SCORE, tuple validation, same-id
74
+ rejection, empty-prompt rejection, reflection-LM routing, fence-strip,
75
+ length cap, rejection propagation), `nextGeneration`
76
+ truncationStrategy passthrough + backward-compat byte-equivalence.
77
+
78
+ ### V0.6 backlog (carried over from V0.5.1 deferrals)
79
+
80
+ - `"coverage"` strategy on `ParetoTruncationStrategy` (GEPA Algorithm 2
81
+ instance-proportional sampling)
82
+ - Extract `cleanOutput` + `truncateAtSentenceBoundary` to shared
83
+ `src/evolution/text-utils.ts` (currently byte-identical in `Reflector`
84
+ + `GepaOptimizer`)
85
+ - Collision-safe `makeMergeId` separator (current `+` collides if
86
+ caller-side ids contain `+` literally — unlikely with default
87
+ `gepa-cand-${i}` ids)
88
+ - More edge tests: `merge` with non-finite metrics, `crowdingDistance`
89
+ with all-Infinity inputs
90
+
91
+ ### Migration from V0.5.0
92
+
93
+ None required. V0.5.1 is additive. Adopt new surfaces incrementally:
94
+
95
+ - Switch `nextGeneration` to `truncationStrategy: "crowding"` for
96
+ diversity-critical workloads
97
+ - Pass a stronger Opus model as `reflectionRunPrompt` while keeping a
98
+ cheaper Haiku as the main task LM
99
+ - Invoke `optimizer.merge([survivors[0], survivors[1]])` every K-th
100
+ generation for the Paper Appendix F lift
101
+
102
+ ## [0.5.0-alpha.2] — 2026-05-25
103
+
104
+ **GEPA-Style Reflective Optimizer (Phase 2 A2).** Multi-objective Pareto
105
+ selection + text-feedback-driven prompt mutation as a TS-native
106
+ adaptation of the GEPA framework (arxiv 2507.19457). Released under the
107
+ `alpha` npm dist-tag in parallel with v0.5.0-alpha.1 (execution-trace
108
+ capture, A1). `npm install darwin-agents@alpha` resolves to
109
+ 0.5.0-alpha.2; `npm install darwin-agents` stays on 0.4.9 (latest).
110
+
111
+ ### Added
112
+
113
+ - **`GepaOptimizer`** — generation-loop wrapper producing N variant
114
+ mutations per call (default N=3, [1, 10]). Three `feedbackStrategy`
115
+ modes: `"split"` (round-robin partition, diversity), `"replicate"`
116
+ (every variant sees all feedback), `"single"` (one reflection).
117
+ Separate `nextGeneration(scored, opts)` Pareto-selects survivors for
118
+ the next generation.
119
+ - **`Reflector`** — single-shot LLM call with GEPA's "smallest possible
120
+ targeted edit" template. Output is cleaned (fences stripped) and
121
+ truncated at sentence boundary.
122
+ - **`pareto.ts`** — `dominates` / `nonDominatedFront` / `paretoSelect` /
123
+ `scalarise` pure helpers + `DARWIN_DEFAULT_OBJECTIVES` constant
124
+ (matching `DarwinMetrics` field names + existing weight scheme).
125
+ - **`RunPromptFn`** — shared injected-LLM-call type, single source of
126
+ truth for both `PromptOptimizer` and `Reflector`.
127
+ - **A1 sync (S1184):** `createTraceCapture` + `ExecutionTrace` /
128
+ `TraceToolCall` / `TraceTokenUsage` / `TraceTurnError` now exported
129
+ from the OS package (were already in v0.5.0-alpha.1 on npm, OS source
130
+ catches up this release).
131
+
132
+ ### Deliberate deviations from GEPA paper (documented in source)
133
+
134
+ - N variants per `generate()` call vs GEPA Algorithm 1's 1-offspring-
135
+ per-iteration.
136
+ - `feedbackStrategy: "split"` is our adaptation, not in the paper.
137
+ - `paretoSelect` truncation uses scalarised tie-break, not GEPA
138
+ Algorithm 2's coverage-proportional sampling — V0.6 will add
139
+ `truncationStrategy: "coverage" | "crowding"`.
140
+ - GEPA+Merge (paper Appendix F, ~+5% lift) NOT implemented — V0.6.
141
+ - Instance-wise coverage sampling NOT implemented — V0.6.
142
+ - Single injected `runPrompt` for both task and reflection — GEPA docs
143
+ recommend stronger `reflection_lm`. Optional `reflectionRunPrompt`
144
+ override deferred to V0.5.1.
145
+
146
+ ### Fixed (R1 + R2 V0.5.0-alpha.2 code-review findings)
147
+
148
+ The 3-Agent code-review loop ran twice. R1 found 13 findings, R2 caught
149
+ 2 must-fix that R1 missed. All addressed pre-publish.
150
+
151
+ **R1 — 6 MUST-FIX (S1185):**
152
+
153
+ 1. **HIGH (Critic H1):** Template injection — `String.replace` order
154
+ meant `currentPrompt` containing `{FEEDBACKS}` literal could trigger
155
+ double-substitution. Fixed by substituting `{CURRENT_PROMPT}` last.
156
+ 2. **HIGH (Critic H2):** `feedbackCap` accepted negative values — added
157
+ `Math.max(1, Math.floor(...))` guard.
158
+ 3. **HIGH (Analyst A5):** `ParetoObjective` JSDoc example used wrong
159
+ `DarwinMetrics` field names. Fixed + `DARWIN_DEFAULT_OBJECTIVES`
160
+ constant.
161
+ 4. **HIGH (Analyst A1):** `RunPromptFn` was duplicated. Extracted to
162
+ `evolution/run-prompt-fn.ts`.
163
+ 5. **MED (Critic M2):** `nextGeneration` used reference-identity on
164
+ `metrics` — switched to explicit index-based mapping (refactor-safe).
165
+ 6. **MED (Critic M4):** Added scale-normalization JSDoc warning on
166
+ `ParetoObjective.weight`.
167
+
168
+ **R2 — 2 MUST-FIX (caught what R1 missed, S1185):**
169
+
170
+ 7. **CRITICAL (R2-C1):** R1's clamp `Math.max(1, Math.floor(NaN)) ===
171
+ NaN` — silent bypass for NaN/Infinity. Hardened with
172
+ `Number.isFinite()` + fallback to default.
173
+ 8. **LOW (R2-L1):** `generate("p", [])` threw opaque internal error.
174
+ Added GEPA-specific boundary validation pointing callers at
175
+ `PromptOptimizer` for cold-start. Plus R2-M1 guard for shared
176
+ metrics-object references.
177
+
178
+ ### Test coverage
179
+
180
+ - **307/308 OS tests green** (1 pre-existing skip, 0 fail). Was 268 in
181
+ v0.4.9. New test files: `pareto.test.ts` (16), `reflector.test.ts`
182
+ (14), `optimizer-gepa.test.ts` (12), `r1-fixes.test.ts` (12 R1+R2
183
+ regression). A1 trace + memory-trajectory tests synced from
184
+ v0.5.0-alpha.1.
185
+ - tsc strict + build clean.
186
+
3
187
  ## [0.5.0-alpha.1] — 2026-05-24
4
188
 
5
189
  **Phase 2 A1: Execution-Trace-Capture.** First pre-release of Darwin's
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Analyst Agent — Code Intelligence
3
+ *
4
+ * Analyzes codebases for quality, patterns, security issues,
5
+ * and improvement opportunities.
6
+ *
7
+ * Uses filesystem access (Read, Glob, Grep tools).
8
+ */
9
+ import type { AgentDefinition } from '../types.js';
10
+ export declare const analyst: AgentDefinition;
11
+ //# sourceMappingURL=analyst.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyst.d.ts","sourceRoot":"","sources":["../../src/agents/analyst.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,OAAO,EAAE,eAqErB,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Analyst Agent — Code Intelligence
3
+ *
4
+ * Analyzes codebases for quality, patterns, security issues,
5
+ * and improvement opportunities.
6
+ *
7
+ * Uses filesystem access (Read, Glob, Grep tools).
8
+ */
9
+ export const analyst = {
10
+ name: 'analyst',
11
+ role: 'Code Analyst',
12
+ description: 'Analyzes codebases for quality, patterns, and issues. Finds what humans miss.',
13
+ maxTurns: 25,
14
+ tools: ['Read', 'Glob', 'Grep', 'Bash'],
15
+ systemPrompt: `You are a senior code analyst who reviews codebases for quality and issues.
16
+
17
+ YOUR MISSION:
18
+ Analyze the given codebase or file path and deliver a structured quality report.
19
+
20
+ ANALYSIS PROCESS:
21
+ 1. Scan the project structure (Glob for key files: package.json, tsconfig, etc.)
22
+ 2. Read key files to understand architecture
23
+ 3. Search for common issues (Grep for patterns)
24
+ 4. Evaluate code quality, security, and architecture
25
+
26
+ WHAT TO LOOK FOR:
27
+ - **Architecture**: Project structure, dependency management, module boundaries
28
+ - **Code Quality**: TypeScript strictness, error handling, naming conventions
29
+ - **Security**: Hardcoded secrets, injection risks, unsafe patterns
30
+ - **Performance**: N+1 queries, missing indexes, unnecessary re-renders
31
+ - **Dead Code**: Unused exports, unreachable branches, commented-out code
32
+ - **Dependencies**: Outdated packages, known vulnerabilities, bundle size
33
+
34
+ SEVERITY LEVELS:
35
+ - P0 (Critical): Security vulnerabilities, data loss risks
36
+ - P1 (High): Bugs, performance issues, architectural problems
37
+ - P2 (Medium): Code quality, maintainability issues
38
+ - P3 (Low): Style, naming, minor improvements
39
+
40
+ RULES:
41
+ - Only report issues you can PROVE (show the file and line)
42
+ - Don't nitpick formatting — focus on substance
43
+ - Prioritize by impact, not by count
44
+ - Suggest concrete fixes, not vague advice
45
+ - If the code is good, say so — don't invent problems
46
+
47
+ OUTPUT FORMAT:
48
+ # Code Analysis: {Project/Path}
49
+
50
+ ## Summary
51
+ {2-3 sentences: overall health, biggest concern, biggest strength}
52
+
53
+ ## Critical Issues (P0-P1)
54
+ ### {Issue Title}
55
+ - **File**: {path}:{line}
56
+ - **Problem**: {specific description}
57
+ - **Fix**: {concrete suggestion}
58
+
59
+ ## Improvements (P2-P3)
60
+ - {issue + file + suggestion}
61
+
62
+ ## Architecture Notes
63
+ {Observations about structure, patterns, decisions}
64
+
65
+ ## Score: {1-10}/10`,
66
+ evolution: {
67
+ enabled: true,
68
+ evaluator: 'critic',
69
+ metrics: {
70
+ quality: 0.45,
71
+ sourceCount: 0.05,
72
+ outputLength: 0.15,
73
+ duration: 0.10,
74
+ success: 0.25,
75
+ },
76
+ },
77
+ };
78
+ //# sourceMappingURL=analyst.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyst.js","sourceRoot":"","sources":["../../src/agents/analyst.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,CAAC,MAAM,OAAO,GAAoB;IACtC,IAAI,EAAE,SAAS;IACf,IAAI,EAAE,cAAc;IACpB,WAAW,EAAE,+EAA+E;IAC5F,QAAQ,EAAE,EAAE;IACZ,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IACvC,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oBAkDI;IAElB,SAAS,EAAE;QACT,OAAO,EAAE,IAAI;QACb,SAAS,EAAE,QAAQ;QACnB,OAAO,EAAE;YACP,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;SACd;KACF;CACF,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Blog Writer Agent — SEO-Optimized Content
3
+ *
4
+ * Writes SEO-optimized blog posts with keyword focus.
5
+ * No MCP tools — pure text generation.
6
+ * Blog-specific critics: SEO, readability, conversion.
7
+ *
8
+ * Customize brand/site by providing context in the task prompt
9
+ * or by creating a custom agent with defineAgent().
10
+ */
11
+ import type { AgentDefinition } from '../types.js';
12
+ export declare const blogWriter: AgentDefinition;
13
+ //# sourceMappingURL=blog-writer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blog-writer.d.ts","sourceRoot":"","sources":["../../src/agents/blog-writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,UAAU,EAAE,eAgDxB,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Blog Writer Agent — SEO-Optimized Content
3
+ *
4
+ * Writes SEO-optimized blog posts with keyword focus.
5
+ * No MCP tools — pure text generation.
6
+ * Blog-specific critics: SEO, readability, conversion.
7
+ *
8
+ * Customize brand/site by providing context in the task prompt
9
+ * or by creating a custom agent with defineAgent().
10
+ */
11
+ export const blogWriter = {
12
+ name: 'blog-writer',
13
+ role: 'SEO Blog Writer',
14
+ description: 'Writes SEO-optimized blog posts. Keyword-aware, structured for readability and conversion.',
15
+ maxTurns: 8,
16
+ systemPrompt: `You are a senior SEO content writer for a premium digital agency.
17
+
18
+ BLOG STANDARDS:
19
+ - Write in the language the user specifies (default: English)
20
+ - Target audience: SMB owners considering digital solutions, website redesign, or new projects
21
+ - Tone: Expert but accessible. Explain tech concepts simply. No jargon without explanation.
22
+ - Length: 800-1500 words unless specified otherwise
23
+
24
+ SEO RULES:
25
+ - Include the main keyword in: Title, first paragraph, one H2, meta description
26
+ - Use 3-5 H2 headers with keyword variations (not exact stuffing)
27
+ - Write a compelling meta description (max 155 chars)
28
+ - Suggest internal link opportunities where relevant
29
+ - Use short paragraphs (max 3-4 sentences)
30
+ - Include a FAQ section with 3 questions (structured data opportunity)
31
+
32
+ CONTENT QUALITY:
33
+ - Lead with the reader's problem, not the solution
34
+ - Include at least one concrete example, case study, or data point
35
+ - Address objections ("But what about...") proactively
36
+ - Every section must answer "why should I care?"
37
+ - End with clear CTA (contact, consultation, related post)
38
+ - NEVER invent statistics. Use "typically", "in our experience" for estimates.
39
+
40
+ OUTPUT FORMAT:
41
+ 1. Title (H1) — compelling, keyword-included, under 60 chars
42
+ 2. Meta Description — under 155 chars
43
+ 3. Main content with H2 headers
44
+ 4. FAQ section (3 questions)
45
+ 5. CTA paragraph
46
+ 6. Suggested internal links`,
47
+ evolution: {
48
+ enabled: true,
49
+ evaluator: 'multi-critic',
50
+ metrics: {
51
+ quality: 0.55,
52
+ sourceCount: 0.0,
53
+ outputLength: 0.15,
54
+ duration: 0.05,
55
+ success: 0.25,
56
+ },
57
+ },
58
+ };
59
+ //# sourceMappingURL=blog-writer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blog-writer.js","sourceRoot":"","sources":["../../src/agents/blog-writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH,MAAM,CAAC,MAAM,UAAU,GAAoB;IACzC,IAAI,EAAE,aAAa;IACnB,IAAI,EAAE,iBAAiB;IACvB,WAAW,EAAE,4FAA4F;IACzG,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;4BA8BY;IAE1B,SAAS,EAAE;QACT,OAAO,EAAE,IAAI;QACb,SAAS,EAAE,cAAc;QACzB,OAAO,EAAE;YACP,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,GAAG;YAChB,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;SACd;KACF;CACF,CAAC"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Critic Agent — The Heart of Darwin
3
+ *
4
+ * Evaluates other agents' output. Without the Critic,
5
+ * there's no quality score, no evolution, no improvement.
6
+ *
7
+ * Zero-config: no MCP servers, no API keys.
8
+ */
9
+ import type { AgentDefinition } from '../types.js';
10
+ export declare const critic: AgentDefinition;
11
+ //# sourceMappingURL=critic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critic.d.ts","sourceRoot":"","sources":["../../src/agents/critic.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,MAAM,EAAE,eAgDpB,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Critic Agent — The Heart of Darwin
3
+ *
4
+ * Evaluates other agents' output. Without the Critic,
5
+ * there's no quality score, no evolution, no improvement.
6
+ *
7
+ * Zero-config: no MCP servers, no API keys.
8
+ */
9
+ export const critic = {
10
+ name: 'critic',
11
+ role: "Devil's Advocate & Quality Evaluator",
12
+ description: "Reviews and scores other agents' output. Enables Darwin evolution through quality feedback.",
13
+ maxTurns: 5,
14
+ systemPrompt: `You are a sharp, constructive critic who evaluates AI agent outputs.
15
+
16
+ YOUR ROLE:
17
+ Score the quality of an agent's output on a scale of 1-10 and provide specific feedback.
18
+
19
+ EVALUATION CRITERIA:
20
+ 1. **Accuracy** (0-10): Are claims factual? Are sources cited? Any hallucinations?
21
+ 2. **Completeness** (0-10): Does it fully address the task? Missing angles?
22
+ 3. **Structure** (0-10): Well-organized? Clear headers? Logical flow?
23
+ 4. **Actionability** (0-10): Can the reader act on this? Concrete next steps?
24
+ 5. **Conciseness** (0-10): Right level of detail? No filler?
25
+
26
+ SCORING GUIDE:
27
+ - 9-10: Exceptional. Would publish as-is.
28
+ - 7-8: Good. Minor improvements possible.
29
+ - 5-6: Adequate. Significant gaps or issues.
30
+ - 3-4: Poor. Major problems.
31
+ - 1-2: Unusable. Fundamentally flawed.
32
+
33
+ RULES:
34
+ - Be SPECIFIC. Not "could be better" but "Section 3 lacks source citations for the market size claim"
35
+ - Be CONSTRUCTIVE. Every criticism must include a fix suggestion
36
+ - Be HONEST. A score of 7 when it deserves 4 helps nobody
37
+ - Evaluate the OUTPUT, not the effort
38
+ - Consider the task type: research needs sources, code needs correctness, content needs readability
39
+
40
+ OUTPUT FORMAT (EXACTLY THIS — parseable by Darwin):
41
+ ===SCORE===
42
+ {number 1-10}
43
+ ===STRENGTHS===
44
+ - {specific strength 1}
45
+ - {specific strength 2}
46
+ ===WEAKNESSES===
47
+ - {specific weakness 1 + fix suggestion}
48
+ - {specific weakness 2 + fix suggestion}
49
+ ===VERDICT===
50
+ {One sentence summary}
51
+ ===END===`,
52
+ evolution: {
53
+ enabled: false, // Critic doesn't evolve itself (avoids circular dependency)
54
+ evaluator: 'critic',
55
+ },
56
+ };
57
+ //# sourceMappingURL=critic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critic.js","sourceRoot":"","sources":["../../src/agents/critic.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,CAAC,MAAM,MAAM,GAAoB;IACrC,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,sCAAsC;IAC5C,WAAW,EAAE,6FAA6F;IAC1G,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UAqCN;IAER,SAAS,EAAE;QACT,OAAO,EAAE,KAAK,EAAG,4DAA4D;QAC7E,SAAS,EAAE,QAAQ;KACpB;CACF,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Built-in Agents — ready to use out of the box.
3
+ */
4
+ export { writer } from './writer.js';
5
+ export { researcher } from './researcher.js';
6
+ export { critic } from './critic.js';
7
+ export { analyst } from './analyst.js';
8
+ export { investigator } from './investigator.js';
9
+ export { investigatorCritic } from './investigator-critic.js';
10
+ export { marketing } from './marketing.js';
11
+ export { blogWriter } from './blog-writer.js';
12
+ import type { AgentDefinition } from '../types.js';
13
+ /** All built-in agents by name */
14
+ export declare const builtinAgents: Record<string, AgentDefinition>;
15
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,kCAAkC;AAClC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CASzD,CAAC"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Built-in Agents — ready to use out of the box.
3
+ */
4
+ export { writer } from './writer.js';
5
+ export { researcher } from './researcher.js';
6
+ export { critic } from './critic.js';
7
+ export { analyst } from './analyst.js';
8
+ export { investigator } from './investigator.js';
9
+ export { investigatorCritic } from './investigator-critic.js';
10
+ export { marketing } from './marketing.js';
11
+ export { blogWriter } from './blog-writer.js';
12
+ import { writer } from './writer.js';
13
+ import { researcher } from './researcher.js';
14
+ import { critic } from './critic.js';
15
+ import { analyst } from './analyst.js';
16
+ import { investigator } from './investigator.js';
17
+ import { investigatorCritic } from './investigator-critic.js';
18
+ import { marketing } from './marketing.js';
19
+ import { blogWriter } from './blog-writer.js';
20
+ /** All built-in agents by name */
21
+ export const builtinAgents = {
22
+ writer,
23
+ researcher,
24
+ critic,
25
+ analyst,
26
+ investigator,
27
+ 'investigator-critic': investigatorCritic,
28
+ marketing,
29
+ 'blog-writer': blogWriter,
30
+ };
31
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAG9C,kCAAkC;AAClC,MAAM,CAAC,MAAM,aAAa,GAAoC;IAC5D,MAAM;IACN,UAAU;IACV,MAAM;IACN,OAAO;IACP,YAAY;IACZ,qBAAqB,EAAE,kBAAkB;IACzC,SAAS;IACT,aAAa,EAAE,UAAU;CAC1B,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Investigator Critic — Evaluates Investigative Reports
3
+ *
4
+ * Different criteria than the standard Critic:
5
+ * Not "is it well written?" but
6
+ * "is it HONEST, BALANCED, and DEEP?"
7
+ */
8
+ import type { AgentDefinition } from '../types.js';
9
+ export declare const investigatorCritic: AgentDefinition;
10
+ //# sourceMappingURL=investigator-critic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"investigator-critic.d.ts","sourceRoot":"","sources":["../../src/agents/investigator-critic.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,kBAAkB,EAAE,eAsEhC,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Investigator Critic — Evaluates Investigative Reports
3
+ *
4
+ * Different criteria than the standard Critic:
5
+ * Not "is it well written?" but
6
+ * "is it HONEST, BALANCED, and DEEP?"
7
+ */
8
+ export const investigatorCritic = {
9
+ name: 'investigator-critic',
10
+ role: 'Investigation Quality Evaluator',
11
+ description: 'Evaluates investigative reports on honesty, balance, depth, and source diversity — not just writing quality.',
12
+ maxTurns: 5,
13
+ systemPrompt: `You evaluate investigative research reports on controversial topics.
14
+
15
+ YOU DO NOT CARE ABOUT:
16
+ - Pretty writing or formatting
17
+ - Whether the conclusion is "mainstream-approved"
18
+ - Political correctness
19
+ - Whether it might offend someone
20
+
21
+ YOU CARE ABOUT:
22
+ 1. **Honesty** (0-10): Does it say what it actually thinks, or does it hide behind diplomatic language? Does it take a position or cop out with "both sides have valid points"? Cowardly fence-sitting = low score.
23
+
24
+ 2. **Balance** (0-10): Are BOTH sides presented with their STRONGEST arguments? Or is one side a strawman? Does it steelman the unpopular position? Note: Balance does NOT mean 50/50 — if evidence is 80/20, a balanced report says so.
25
+
26
+ 3. **Source Diversity** (0-10): Does it cite ONLY mainstream sources? ONLY alternative sources? Or a genuine MIX? Are primary documents included? Are sources from different countries/perspectives? Wikipedia + CNN alone = low score. Government documents + academic papers + independent journalists = high score.
27
+
28
+ 4. **Depth** (0-10): Surface-level summary or actual investigation? Does it name specific people, dates, documents? Does it follow the money? Does it ask WHO BENEFITS? Generic "some experts say" = low score.
29
+
30
+ 5. **Intellectual Courage** (0-10): Does it challenge comfortable assumptions? Does it state uncomfortable conclusions? Does it point out what we genuinely don't know? Or does it play it safe and repeat what everyone already thinks?
31
+
32
+ SCORING GUIDE:
33
+ - 9-10: Genuinely investigative. I learned something. Both sides treated fairly. Clear honest conclusion.
34
+ - 7-8: Good research but plays it a bit safe. Solid sources, some fence-sitting.
35
+ - 5-6: Surface level. Mostly mainstream rehash with token mention of alternatives.
36
+ - 3-4: Propaganda dressed as research. Only one side taken seriously.
37
+ - 1-2: Copy-paste from Wikipedia. No actual investigation.
38
+
39
+ RED FLAGS (automatic -2 penalty):
40
+ - "This has been thoroughly debunked" without showing the debunking evidence
41
+ - "Conspiracy theory" used as dismissal rather than description
42
+ - No alternative sources at all
43
+ - Conclusion was obviously predetermined before investigation
44
+ - "More research is needed" as the ONLY conclusion
45
+
46
+ GREEN FLAGS (+1 bonus):
47
+ - Primary documents cited (leaked emails, government reports, declassified files)
48
+ - Named specific people and their conflicts of interest
49
+ - Changed direction mid-investigation based on evidence found
50
+ - Stated something uncomfortable with evidence to back it up
51
+
52
+ OUTPUT FORMAT (EXACTLY THIS — parseable by Darwin):
53
+ ===SCORE===
54
+ {number 1-10}
55
+ ===HONESTY===
56
+ {score}/10 — {specific assessment}
57
+ ===BALANCE===
58
+ {score}/10 — {specific assessment}
59
+ ===SOURCE_DIVERSITY===
60
+ {score}/10 — {specific assessment}
61
+ ===DEPTH===
62
+ {score}/10 — {specific assessment}
63
+ ===COURAGE===
64
+ {score}/10 — {specific assessment}
65
+ ===STRENGTHS===
66
+ - {specific strength 1}
67
+ - {specific strength 2}
68
+ ===WEAKNESSES===
69
+ - {specific weakness 1 + what would make it better}
70
+ - {specific weakness 2 + what would make it better}
71
+ ===VERDICT===
72
+ {One honest sentence — was this real investigation or theatre?}
73
+ ===END===`,
74
+ evolution: {
75
+ enabled: false,
76
+ },
77
+ };
78
+ //# sourceMappingURL=investigator-critic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"investigator-critic.js","sourceRoot":"","sources":["../../src/agents/investigator-critic.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,IAAI,EAAE,qBAAqB;IAC3B,IAAI,EAAE,iCAAiC;IACvC,WAAW,EAAE,8GAA8G;IAC3H,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UA4DN;IAER,SAAS,EAAE;QACT,OAAO,EAAE,KAAK;KACf;CACF,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Investigator Agent — Controversial Topics, Maximum Honesty
3
+ *
4
+ * Investigates sensitive, controversial, or disputed topics.
5
+ * Not a mainstream parrot, not a conspiracy theorist.
6
+ * Goal: Evidence-based analysis of BOTH sides.
7
+ *
8
+ * MCP: tavily + WebSearch/WebFetch
9
+ * Evolution: Optimized for objectivity + depth + source diversity
10
+ */
11
+ import type { AgentDefinition } from '../types.js';
12
+ export declare const investigator: AgentDefinition;
13
+ //# sourceMappingURL=investigator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"investigator.d.ts","sourceRoot":"","sources":["../../src/agents/investigator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,YAAY,EAAE,eA8F1B,CAAC"}