darwin-agents 0.5.0-alpha.1 → 0.5.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/CHANGELOG.md +80 -127
  2. package/dist/agents/analyst.d.ts +11 -0
  3. package/dist/agents/analyst.d.ts.map +1 -0
  4. package/dist/agents/analyst.js +78 -0
  5. package/dist/agents/analyst.js.map +1 -0
  6. package/dist/agents/blog-writer.d.ts +13 -0
  7. package/dist/agents/blog-writer.d.ts.map +1 -0
  8. package/dist/agents/blog-writer.js +59 -0
  9. package/dist/agents/blog-writer.js.map +1 -0
  10. package/dist/agents/critic.d.ts +11 -0
  11. package/dist/agents/critic.d.ts.map +1 -0
  12. package/dist/agents/critic.js +57 -0
  13. package/dist/agents/critic.js.map +1 -0
  14. package/dist/agents/index.d.ts +15 -0
  15. package/dist/agents/index.d.ts.map +1 -0
  16. package/dist/agents/index.js +31 -0
  17. package/dist/agents/index.js.map +1 -0
  18. package/dist/agents/investigator-critic.d.ts +10 -0
  19. package/dist/agents/investigator-critic.d.ts.map +1 -0
  20. package/dist/agents/investigator-critic.js +78 -0
  21. package/dist/agents/investigator-critic.js.map +1 -0
  22. package/dist/agents/investigator.d.ts +13 -0
  23. package/dist/agents/investigator.d.ts.map +1 -0
  24. package/dist/agents/investigator.js +105 -0
  25. package/dist/agents/investigator.js.map +1 -0
  26. package/dist/agents/marketing.d.ts +13 -0
  27. package/dist/agents/marketing.d.ts.map +1 -0
  28. package/dist/agents/marketing.js +59 -0
  29. package/dist/agents/marketing.js.map +1 -0
  30. package/dist/agents/researcher.d.ts +11 -0
  31. package/dist/agents/researcher.d.ts.map +1 -0
  32. package/dist/agents/researcher.js +68 -0
  33. package/dist/agents/researcher.js.map +1 -0
  34. package/dist/agents/writer.d.ts +9 -0
  35. package/dist/agents/writer.d.ts.map +1 -0
  36. package/dist/agents/writer.js +47 -0
  37. package/dist/agents/writer.js.map +1 -0
  38. package/dist/cli/create.d.ts +11 -0
  39. package/dist/cli/create.d.ts.map +1 -0
  40. package/dist/cli/create.js +104 -0
  41. package/dist/cli/create.js.map +1 -0
  42. package/dist/cli/evolve.d.ts +13 -0
  43. package/dist/cli/evolve.d.ts.map +1 -0
  44. package/dist/cli/evolve.js +69 -0
  45. package/dist/cli/evolve.js.map +1 -0
  46. package/dist/cli/index.d.ts +13 -0
  47. package/dist/cli/index.d.ts.map +1 -0
  48. package/dist/cli/index.js +84 -0
  49. package/dist/cli/index.js.map +1 -0
  50. package/dist/cli/init.d.ts +12 -0
  51. package/dist/cli/init.d.ts.map +1 -0
  52. package/dist/cli/init.js +68 -0
  53. package/dist/cli/init.js.map +1 -0
  54. package/dist/cli/run.d.ts +7 -0
  55. package/dist/cli/run.d.ts.map +1 -0
  56. package/dist/cli/run.js +371 -0
  57. package/dist/cli/run.js.map +1 -0
  58. package/dist/cli/status.d.ts +7 -0
  59. package/dist/cli/status.d.ts.map +1 -0
  60. package/dist/cli/status.js +123 -0
  61. package/dist/cli/status.js.map +1 -0
  62. package/dist/core/agent.d.ts +53 -0
  63. package/dist/core/agent.d.ts.map +1 -0
  64. package/dist/core/agent.js +172 -0
  65. package/dist/core/agent.js.map +1 -0
  66. package/dist/core/runner.d.ts +75 -0
  67. package/dist/core/runner.d.ts.map +1 -0
  68. package/dist/core/runner.js +255 -0
  69. package/dist/core/runner.js.map +1 -0
  70. package/dist/evolution/loop.d.ts +100 -0
  71. package/dist/evolution/loop.d.ts.map +1 -0
  72. package/dist/evolution/loop.js +424 -0
  73. package/dist/evolution/loop.js.map +1 -0
  74. package/dist/evolution/multi-critic.d.ts +58 -0
  75. package/dist/evolution/multi-critic.d.ts.map +1 -0
  76. package/dist/evolution/multi-critic.js +326 -0
  77. package/dist/evolution/multi-critic.js.map +1 -0
  78. package/dist/evolution/notifications.d.ts +32 -0
  79. package/dist/evolution/notifications.d.ts.map +1 -0
  80. package/dist/evolution/notifications.js +92 -0
  81. package/dist/evolution/notifications.js.map +1 -0
  82. package/dist/evolution/optimizer.d.ts +64 -0
  83. package/dist/evolution/optimizer.d.ts.map +1 -0
  84. package/dist/evolution/optimizer.js +223 -0
  85. package/dist/evolution/optimizer.js.map +1 -0
  86. package/dist/evolution/patterns.d.ts +63 -0
  87. package/dist/evolution/patterns.d.ts.map +1 -0
  88. package/dist/evolution/patterns.js +297 -0
  89. package/dist/evolution/patterns.js.map +1 -0
  90. package/dist/evolution/safety.d.ts +76 -0
  91. package/dist/evolution/safety.d.ts.map +1 -0
  92. package/dist/evolution/safety.js +182 -0
  93. package/dist/evolution/safety.js.map +1 -0
  94. package/dist/evolution/tracker.d.ts +48 -0
  95. package/dist/evolution/tracker.d.ts.map +1 -0
  96. package/dist/evolution/tracker.js +163 -0
  97. package/dist/evolution/tracker.js.map +1 -0
  98. package/dist/index.d.ts +32 -0
  99. package/dist/index.d.ts.map +1 -0
  100. package/dist/index.js +35 -0
  101. package/dist/index.js.map +1 -0
  102. package/dist/memory/index.d.ts +32 -0
  103. package/dist/memory/index.d.ts.map +1 -0
  104. package/dist/memory/index.js +49 -0
  105. package/dist/memory/index.js.map +1 -0
  106. package/dist/memory/postgres-memory.d.ts +52 -0
  107. package/dist/memory/postgres-memory.d.ts.map +1 -0
  108. package/dist/memory/postgres-memory.js +515 -0
  109. package/dist/memory/postgres-memory.js.map +1 -0
  110. package/dist/memory/sqlite-memory.d.ts +36 -0
  111. package/dist/memory/sqlite-memory.d.ts.map +1 -0
  112. package/dist/memory/sqlite-memory.js +380 -0
  113. package/dist/memory/sqlite-memory.js.map +1 -0
  114. package/dist/providers/anthropic.d.ts +20 -0
  115. package/dist/providers/anthropic.d.ts.map +1 -0
  116. package/dist/providers/anthropic.js +82 -0
  117. package/dist/providers/anthropic.js.map +1 -0
  118. package/dist/providers/claude-cli.d.ts +35 -0
  119. package/dist/providers/claude-cli.d.ts.map +1 -0
  120. package/dist/providers/claude-cli.js +153 -0
  121. package/dist/providers/claude-cli.js.map +1 -0
  122. package/dist/providers/index.d.ts +39 -0
  123. package/dist/providers/index.d.ts.map +1 -0
  124. package/dist/providers/index.js +58 -0
  125. package/dist/providers/index.js.map +1 -0
  126. package/dist/providers/ollama.d.ts +17 -0
  127. package/dist/providers/ollama.d.ts.map +1 -0
  128. package/dist/providers/ollama.js +64 -0
  129. package/dist/providers/ollama.js.map +1 -0
  130. package/dist/providers/openai.d.ts +19 -0
  131. package/dist/providers/openai.d.ts.map +1 -0
  132. package/dist/providers/openai.js +75 -0
  133. package/dist/providers/openai.js.map +1 -0
  134. package/dist/providers/types.d.ts +62 -0
  135. package/dist/providers/types.d.ts.map +1 -0
  136. package/dist/providers/types.js +9 -0
  137. package/dist/providers/types.js.map +1 -0
  138. package/dist/src/evolution/optimizer-gepa.d.ts +149 -0
  139. package/dist/src/evolution/optimizer-gepa.d.ts.map +1 -0
  140. package/dist/src/evolution/optimizer-gepa.js +198 -0
  141. package/dist/src/evolution/optimizer-gepa.js.map +1 -0
  142. package/dist/src/evolution/pareto.d.ts +116 -0
  143. package/dist/src/evolution/pareto.d.ts.map +1 -0
  144. package/dist/src/evolution/pareto.js +140 -0
  145. package/dist/src/evolution/pareto.js.map +1 -0
  146. package/dist/src/evolution/reflector.d.ts +107 -0
  147. package/dist/src/evolution/reflector.d.ts.map +1 -0
  148. package/dist/src/evolution/reflector.js +158 -0
  149. package/dist/src/evolution/reflector.js.map +1 -0
  150. package/dist/src/evolution/run-prompt-fn.d.ts +11 -0
  151. package/dist/src/evolution/run-prompt-fn.d.ts.map +1 -0
  152. package/dist/src/evolution/run-prompt-fn.js +11 -0
  153. package/dist/src/evolution/run-prompt-fn.js.map +1 -0
  154. package/dist/src/index.d.ts +4 -0
  155. package/dist/src/index.d.ts.map +1 -1
  156. package/dist/src/index.js +5 -0
  157. package/dist/src/index.js.map +1 -1
  158. package/dist/types.d.ts +221 -0
  159. package/dist/types.d.ts.map +1 -0
  160. package/dist/types.js +19 -0
  161. package/dist/types.js.map +1 -0
  162. package/package.json +1 -1
package/CHANGELOG.md CHANGED
@@ -1,136 +1,89 @@
1
1
  # Changelog
2
2
 
3
- ## [0.5.0-alpha.1] — 2026-05-24
3
+ ## [0.5.0-alpha.2] — 2026-05-25
4
4
 
5
- **Phase 2 A1: Execution-Trace-Capture.** First pre-release of Darwin's
6
- Phase 2 tech roadmap. Unblocks GEPA-style reflective optimizers (A2)
7
- and validate-by-reproduce drift-detection (A5) by giving them a
8
- structured trajectory to consume.
9
-
10
- Industry-aligned with the 2026 agent-observability consensus (Braintrust,
11
- Langfuse, Strands SDK, Microsoft Foundry, OTEL GenAI semantic conventions):
12
- three span types — Tool / Reasoning / Turn-level errors — captured into a
13
- single `ExecutionTrace` object, persisted as JSONB (Postgres) or TEXT
14
- (SQLite), and tagged with a forward-compatible `version: 1` discriminator.
5
+ **GEPA-Style Reflective Optimizer (Phase 2 A2).** Multi-objective Pareto
6
+ selection + text-feedback-driven prompt mutation as a TS-native
7
+ adaptation of the GEPA framework (arxiv 2507.19457). Released under the
8
+ `alpha` npm dist-tag in parallel with v0.5.0-alpha.1 (execution-trace
9
+ capture, A1). `npm install darwin-agents@alpha` resolves to
10
+ 0.5.0-alpha.2; `npm install darwin-agents` stays on 0.4.9 (latest).
15
11
 
16
12
  ### Added
17
13
 
18
- - **`ExecutionTrace` schema** (`src/types.ts`) versioned trajectory shape:
19
- `toolCalls[]` (with OTEL-mappable `id` / `tool` / `args` / `resultSummary`
20
- (2000-char cap) / `outcome` / `durationMs` / `retryCount?` / `errorClass?` /
21
- `errorMessage?` / `turn`), `textBlockCount` (honest name — NOT a thinking-
22
- block counter, V2 will add typed `reasoningBlocks`), `turnCount`,
23
- `mcpInvocations`, `errors[]` (turn-level), `tokenUsage?` (OTEL `gen_ai.usage.*`
24
- fields: input/output/cache_read/cache_creation tokens), `capturedAt`. Plus
25
- optional `trajectory?: ExecutionTrace` on `DarwinExperiment` (additive
26
- pre-A1 callers unaffected).
27
-
28
- - **`createTraceCapture()` factory** (`src/core/trace-capture.ts`) — pure,
29
- transport-agnostic capturer. The runtime feeds tool events; the capturer
30
- aggregates into a typed trajectory. API:
31
-
32
- ```ts
33
- const trace = createTraceCapture();
34
- trace.startTurn();
35
- trace.recordToolUse('toolu_01AB', 'mcp__nex__search', { query: 'x' });
36
- trace.recordToolResult('toolu_01AB', 'success', { resultSummary: '3 hits' });
37
- trace.recordTextBlock();
38
- trace.addTokens({ inputTokens: 1200, outputTokens: 340 });
39
- trace.recordError('parse_error', 'invalid JSON');
40
- const trajectory = trace.finalize();
41
- ```
42
-
43
- Unpaired `recordToolUse` calls (no matching `recordToolResult` before
44
- `finalize`) surface as `outcome: 'error', errorClass: 'unpaired_call'`
45
- so silent SDK hangs remain visible in the trace. Customizable via
46
- `TraceCaptureOptions`: `now?` (clock injection for tests),
47
- `isMcpTool?` (predicate override for non-`mcp__`-prefixed servers).
48
-
49
- - **`addTokens()` aggregator** — lossy-merge of per-turn LLM usage. Missing
50
- fields (`NaN` / `Infinity` / `undefined`) skip silently rather than
51
- defaulting to zero preserves the distinction between "provider didn't
52
- report" and "actually zero tokens".
53
-
54
- - **JSONB persistence** in `darwin_experiments.trajectory` column +
55
- `idx_darwin_exp_trajectory_gin` GIN index (Postgres) for `@>`
56
- containment queries from A2 / A5 consumers. SQLite stores the same
57
- shape as JSON-stringified TEXT.
58
-
59
- - **`scripts/migrate-add-trajectory.ts`** idempotent migration script.
60
- Pre-checks column + index existence (filtered by `current_schema()`
61
- for multi-schema-safe operation), runs `ALTER TABLE ADD COLUMN IF
62
- NOT EXISTS trajectory JSONB` + `CREATE INDEX IF NOT EXISTS`, then
63
- verifies. Rollback path documented inline.
64
-
65
- ```bash
66
- DARWIN_POSTGRES_URL=postgresql://… npx tsx scripts/migrate-add-trajectory.ts
67
- ```
68
-
69
- - **Defensive parsing** in both memory backends — `parseTrajectory` /
70
- `parseTrajectoryColumn` drop malformed values (wrong `version`,
71
- non-object, invalid JSON) to `undefined` instead of crashing the
72
- load. Future schema versions (`version !== 1`) are silently ignored
73
- so v0.5 consumers don't break on v0.6 trajectories.
74
-
75
- - **39 new tests** across two suites (all green):
76
- - `tests/trace-capture.test.ts` (32 unit tests): basic flow,
77
- defensive behaviour, truncation (2000-char `resultSummary`),
78
- MCP-heuristic, schema invariants, tool_call_id passthrough,
79
- `addTokens` aggregate semantics
80
- - `tests/memory-trajectory.test.ts` (7 tests): SQLite roundtrip,
81
- backward-compat with pre-A1 rows, defensive parsing, idempotent
82
- migration, Postgres-gated JSONB roundtrip
83
-
84
- ### Changed
85
-
86
- - **DDL single-source-of-truth** the trajectory column is defined
87
- ONLY in the additive `ALTER TABLE … ADD COLUMN IF NOT EXISTS` path
88
- (Postgres) / PRAGMA-guarded ALTER (SQLite), never inline in the
89
- `CREATE TABLE`. Schema-evolution lives in one place; fresh installs
90
- reach the same end-state as legacy installs.
91
-
92
- - **Postgres `ON CONFLICT` preserves trajectory** on feedback-only
93
- re-saves via `COALESCE(EXCLUDED.trajectory, darwin_experiments.trajectory)`.
94
- This means a second `saveExperiment(exp)` call that omits trajectory
95
- doesn't zero out the previously-stored trace.
96
-
97
- **NOTE — SQLite asymmetry:** SQLite uses `INSERT OR REPLACE` which
98
- drops + re-inserts the row, so callers wanting to preserve a prior
99
- trajectory across re-saves MUST include it in the new payload. This
100
- asymmetry is documented on `MemoryProvider.saveExperiment` in the
101
- interface JSDoc.
102
-
103
- ### Backwards compatibility
104
-
105
- 100% backwards-compatible. The new `trajectory` field is optional, the
106
- new column is nullable, the new methods on `MemoryProvider` are
107
- additive. Existing v0.4.x consumers see no behavioural changes.
108
-
109
- Verified on a live `darwin_db` with 341 experiments, 339 of which
110
- pre-date A1 — all loaded cleanly with `trajectory: undefined`.
111
-
112
- ### Why "alpha.1"
113
-
114
- `textBlockCount` is honest but limited — V2 will replace it with a
115
- typed `reasoningBlocks: ReasoningBlock[]` sequence carrying the actual
116
- text content per reasoning step, which is what GEPA reflectors need
117
- for per-decision blame attribution. Existing `textBlockCount` will stay
118
- as a fast aggregate. The `alpha.1` tag signals the schema is subject to
119
- this kind of additive evolution before `0.5.0` final.
120
-
121
- Three known minor gaps (deferred to follow-up patches):
122
-
123
- - Per-call cost attribution (token usage per tool invocation, not just
124
- per-run aggregate)
125
- - Trace-capture lazy-load flag stays permanent on transient import
126
- failure (low impact: Darwin is either built or not)
127
- - Token extraction in the SDK adapter is Anthropic-shaped (`message.usage`)
128
- and may silently miss tokens for non-Anthropic providers — by design
129
- (token usage is documented optional), but a debug-level log line in a
130
- follow-up patch will make this easier to spot.
131
-
132
- Install: `npm install darwin-agents@alpha`. The default `latest` tag
133
- remains on `0.4.9` until `0.5.0` final ships.
14
+ - **`GepaOptimizer`** generation-loop wrapper producing N variant
15
+ mutations per call (default N=3, [1, 10]). Three `feedbackStrategy`
16
+ modes: `"split"` (round-robin partition, diversity), `"replicate"`
17
+ (every variant sees all feedback), `"single"` (one reflection).
18
+ Separate `nextGeneration(scored, opts)` Pareto-selects survivors for
19
+ the next generation.
20
+ - **`Reflector`** single-shot LLM call with GEPA's "smallest possible
21
+ targeted edit" template. Output is cleaned (fences stripped) and
22
+ truncated at sentence boundary.
23
+ - **`pareto.ts`** — `dominates` / `nonDominatedFront` / `paretoSelect` /
24
+ `scalarise` pure helpers + `DARWIN_DEFAULT_OBJECTIVES` constant
25
+ (matching `DarwinMetrics` field names + existing weight scheme).
26
+ - **`RunPromptFn`** shared injected-LLM-call type, single source of
27
+ truth for both `PromptOptimizer` and `Reflector`.
28
+ - **A1 sync (S1184):** `createTraceCapture` + `ExecutionTrace` /
29
+ `TraceToolCall` / `TraceTokenUsage` / `TraceTurnError` now exported
30
+ from the OS package (were already in v0.5.0-alpha.1 on npm, OS source
31
+ catches up this release).
32
+
33
+ ### Deliberate deviations from GEPA paper (documented in source)
34
+
35
+ - N variants per `generate()` call vs GEPA Algorithm 1's 1-offspring-
36
+ per-iteration.
37
+ - `feedbackStrategy: "split"` is our adaptation, not in the paper.
38
+ - `paretoSelect` truncation uses scalarised tie-break, not GEPA
39
+ Algorithm 2's coverage-proportional sampling V0.6 will add
40
+ `truncationStrategy: "coverage" | "crowding"`.
41
+ - GEPA+Merge (paper Appendix F, ~+5% lift) NOT implemented V0.6.
42
+ - Instance-wise coverage sampling NOT implemented — V0.6.
43
+ - Single injected `runPrompt` for both task and reflection — GEPA docs
44
+ recommend stronger `reflection_lm`. Optional `reflectionRunPrompt`
45
+ override deferred to V0.5.1.
46
+
47
+ ### Fixed (R1 + R2 V0.5.0-alpha.2 code-review findings)
48
+
49
+ The 3-Agent code-review loop ran twice. R1 found 13 findings, R2 caught
50
+ 2 must-fix that R1 missed. All addressed pre-publish.
51
+
52
+ **R1 6 MUST-FIX (S1185):**
53
+
54
+ 1. **HIGH (Critic H1):** Template injection — `String.replace` order
55
+ meant `currentPrompt` containing `{FEEDBACKS}` literal could trigger
56
+ double-substitution. Fixed by substituting `{CURRENT_PROMPT}` last.
57
+ 2. **HIGH (Critic H2):** `feedbackCap` accepted negative values added
58
+ `Math.max(1, Math.floor(...))` guard.
59
+ 3. **HIGH (Analyst A5):** `ParetoObjective` JSDoc example used wrong
60
+ `DarwinMetrics` field names. Fixed + `DARWIN_DEFAULT_OBJECTIVES`
61
+ constant.
62
+ 4. **HIGH (Analyst A1):** `RunPromptFn` was duplicated. Extracted to
63
+ `evolution/run-prompt-fn.ts`.
64
+ 5. **MED (Critic M2):** `nextGeneration` used reference-identity on
65
+ `metrics` switched to explicit index-based mapping (refactor-safe).
66
+ 6. **MED (Critic M4):** Added scale-normalization JSDoc warning on
67
+ `ParetoObjective.weight`.
68
+
69
+ **R2 2 MUST-FIX (caught what R1 missed, S1185):**
70
+
71
+ 7. **CRITICAL (R2-C1):** R1's clamp `Math.max(1, Math.floor(NaN)) ===
72
+ NaN` silent bypass for NaN/Infinity. Hardened with
73
+ `Number.isFinite()` + fallback to default.
74
+ 8. **LOW (R2-L1):** `generate("p", [])` threw opaque internal error.
75
+ Added GEPA-specific boundary validation pointing callers at
76
+ `PromptOptimizer` for cold-start. Plus R2-M1 guard for shared
77
+ metrics-object references.
78
+
79
+ ### Test coverage
80
+
81
+ - **307/308 OS tests green** (1 pre-existing skip, 0 fail). Was 268 in
82
+ v0.4.9. New test files: `pareto.test.ts` (16), `reflector.test.ts`
83
+ (14), `optimizer-gepa.test.ts` (12), `r1-fixes.test.ts` (12 R1+R2
84
+ regression). A1 trace + memory-trajectory tests synced from
85
+ v0.5.0-alpha.1.
86
+ - tsc strict + build clean.
134
87
 
135
88
  ## [0.4.9] — 2026-05-22
136
89
 
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Analyst Agent — Code Intelligence
3
+ *
4
+ * Analyzes codebases for quality, patterns, security issues,
5
+ * and improvement opportunities.
6
+ *
7
+ * Uses filesystem access (Read, Glob, Grep tools).
8
+ */
9
+ import type { AgentDefinition } from '../types.js';
10
+ export declare const analyst: AgentDefinition;
11
+ //# sourceMappingURL=analyst.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyst.d.ts","sourceRoot":"","sources":["../../src/agents/analyst.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,OAAO,EAAE,eAqErB,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Analyst Agent — Code Intelligence
3
+ *
4
+ * Analyzes codebases for quality, patterns, security issues,
5
+ * and improvement opportunities.
6
+ *
7
+ * Uses filesystem access (Read, Glob, Grep tools).
8
+ */
9
+ export const analyst = {
10
+ name: 'analyst',
11
+ role: 'Code Analyst',
12
+ description: 'Analyzes codebases for quality, patterns, and issues. Finds what humans miss.',
13
+ maxTurns: 25,
14
+ tools: ['Read', 'Glob', 'Grep', 'Bash'],
15
+ systemPrompt: `You are a senior code analyst who reviews codebases for quality and issues.
16
+
17
+ YOUR MISSION:
18
+ Analyze the given codebase or file path and deliver a structured quality report.
19
+
20
+ ANALYSIS PROCESS:
21
+ 1. Scan the project structure (Glob for key files: package.json, tsconfig, etc.)
22
+ 2. Read key files to understand architecture
23
+ 3. Search for common issues (Grep for patterns)
24
+ 4. Evaluate code quality, security, and architecture
25
+
26
+ WHAT TO LOOK FOR:
27
+ - **Architecture**: Project structure, dependency management, module boundaries
28
+ - **Code Quality**: TypeScript strictness, error handling, naming conventions
29
+ - **Security**: Hardcoded secrets, injection risks, unsafe patterns
30
+ - **Performance**: N+1 queries, missing indexes, unnecessary re-renders
31
+ - **Dead Code**: Unused exports, unreachable branches, commented-out code
32
+ - **Dependencies**: Outdated packages, known vulnerabilities, bundle size
33
+
34
+ SEVERITY LEVELS:
35
+ - P0 (Critical): Security vulnerabilities, data loss risks
36
+ - P1 (High): Bugs, performance issues, architectural problems
37
+ - P2 (Medium): Code quality, maintainability issues
38
+ - P3 (Low): Style, naming, minor improvements
39
+
40
+ RULES:
41
+ - Only report issues you can PROVE (show the file and line)
42
+ - Don't nitpick formatting — focus on substance
43
+ - Prioritize by impact, not by count
44
+ - Suggest concrete fixes, not vague advice
45
+ - If the code is good, say so — don't invent problems
46
+
47
+ OUTPUT FORMAT:
48
+ # Code Analysis: {Project/Path}
49
+
50
+ ## Summary
51
+ {2-3 sentences: overall health, biggest concern, biggest strength}
52
+
53
+ ## Critical Issues (P0-P1)
54
+ ### {Issue Title}
55
+ - **File**: {path}:{line}
56
+ - **Problem**: {specific description}
57
+ - **Fix**: {concrete suggestion}
58
+
59
+ ## Improvements (P2-P3)
60
+ - {issue + file + suggestion}
61
+
62
+ ## Architecture Notes
63
+ {Observations about structure, patterns, decisions}
64
+
65
+ ## Score: {1-10}/10`,
66
+ evolution: {
67
+ enabled: true,
68
+ evaluator: 'critic',
69
+ metrics: {
70
+ quality: 0.45,
71
+ sourceCount: 0.05,
72
+ outputLength: 0.15,
73
+ duration: 0.10,
74
+ success: 0.25,
75
+ },
76
+ },
77
+ };
78
+ //# sourceMappingURL=analyst.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyst.js","sourceRoot":"","sources":["../../src/agents/analyst.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,CAAC,MAAM,OAAO,GAAoB;IACtC,IAAI,EAAE,SAAS;IACf,IAAI,EAAE,cAAc;IACpB,WAAW,EAAE,+EAA+E;IAC5F,QAAQ,EAAE,EAAE;IACZ,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;IACvC,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;oBAkDI;IAElB,SAAS,EAAE;QACT,OAAO,EAAE,IAAI;QACb,SAAS,EAAE,QAAQ;QACnB,OAAO,EAAE;YACP,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,IAAI;YACjB,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;SACd;KACF;CACF,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Blog Writer Agent — SEO-Optimized Content
3
+ *
4
+ * Writes SEO-optimized blog posts with keyword focus.
5
+ * No MCP tools — pure text generation.
6
+ * Blog-specific critics: SEO, readability, conversion.
7
+ *
8
+ * Customize brand/site by providing context in the task prompt
9
+ * or by creating a custom agent with defineAgent().
10
+ */
11
+ import type { AgentDefinition } from '../types.js';
12
+ export declare const blogWriter: AgentDefinition;
13
+ //# sourceMappingURL=blog-writer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blog-writer.d.ts","sourceRoot":"","sources":["../../src/agents/blog-writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,UAAU,EAAE,eAgDxB,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Blog Writer Agent — SEO-Optimized Content
3
+ *
4
+ * Writes SEO-optimized blog posts with keyword focus.
5
+ * No MCP tools — pure text generation.
6
+ * Blog-specific critics: SEO, readability, conversion.
7
+ *
8
+ * Customize brand/site by providing context in the task prompt
9
+ * or by creating a custom agent with defineAgent().
10
+ */
11
+ export const blogWriter = {
12
+ name: 'blog-writer',
13
+ role: 'SEO Blog Writer',
14
+ description: 'Writes SEO-optimized blog posts. Keyword-aware, structured for readability and conversion.',
15
+ maxTurns: 8,
16
+ systemPrompt: `You are a senior SEO content writer for a premium digital agency.
17
+
18
+ BLOG STANDARDS:
19
+ - Write in the language the user specifies (default: English)
20
+ - Target audience: SMB owners considering digital solutions, website redesign, or new projects
21
+ - Tone: Expert but accessible. Explain tech concepts simply. No jargon without explanation.
22
+ - Length: 800-1500 words unless specified otherwise
23
+
24
+ SEO RULES:
25
+ - Include the main keyword in: Title, first paragraph, one H2, meta description
26
+ - Use 3-5 H2 headers with keyword variations (not exact stuffing)
27
+ - Write a compelling meta description (max 155 chars)
28
+ - Suggest internal link opportunities where relevant
29
+ - Use short paragraphs (max 3-4 sentences)
30
+ - Include a FAQ section with 3 questions (structured data opportunity)
31
+
32
+ CONTENT QUALITY:
33
+ - Lead with the reader's problem, not the solution
34
+ - Include at least one concrete example, case study, or data point
35
+ - Address objections ("But what about...") proactively
36
+ - Every section must answer "why should I care?"
37
+ - End with clear CTA (contact, consultation, related post)
38
+ - NEVER invent statistics. Use "typically", "in our experience" for estimates.
39
+
40
+ OUTPUT FORMAT:
41
+ 1. Title (H1) — compelling, keyword-included, under 60 chars
42
+ 2. Meta Description — under 155 chars
43
+ 3. Main content with H2 headers
44
+ 4. FAQ section (3 questions)
45
+ 5. CTA paragraph
46
+ 6. Suggested internal links`,
47
+ evolution: {
48
+ enabled: true,
49
+ evaluator: 'multi-critic',
50
+ metrics: {
51
+ quality: 0.55,
52
+ sourceCount: 0.0,
53
+ outputLength: 0.15,
54
+ duration: 0.05,
55
+ success: 0.25,
56
+ },
57
+ },
58
+ };
59
+ //# sourceMappingURL=blog-writer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blog-writer.js","sourceRoot":"","sources":["../../src/agents/blog-writer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAIH,MAAM,CAAC,MAAM,UAAU,GAAoB;IACzC,IAAI,EAAE,aAAa;IACnB,IAAI,EAAE,iBAAiB;IACvB,WAAW,EAAE,4FAA4F;IACzG,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;4BA8BY;IAE1B,SAAS,EAAE;QACT,OAAO,EAAE,IAAI;QACb,SAAS,EAAE,cAAc;QACzB,OAAO,EAAE;YACP,OAAO,EAAE,IAAI;YACb,WAAW,EAAE,GAAG;YAChB,YAAY,EAAE,IAAI;YAClB,QAAQ,EAAE,IAAI;YACd,OAAO,EAAE,IAAI;SACd;KACF;CACF,CAAC"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Critic Agent — The Heart of Darwin
3
+ *
4
+ * Evaluates other agents' output. Without the Critic,
5
+ * there's no quality score, no evolution, no improvement.
6
+ *
7
+ * Zero-config: no MCP servers, no API keys.
8
+ */
9
+ import type { AgentDefinition } from '../types.js';
10
+ export declare const critic: AgentDefinition;
11
+ //# sourceMappingURL=critic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critic.d.ts","sourceRoot":"","sources":["../../src/agents/critic.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,MAAM,EAAE,eAgDpB,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Critic Agent — The Heart of Darwin
3
+ *
4
+ * Evaluates other agents' output. Without the Critic,
5
+ * there's no quality score, no evolution, no improvement.
6
+ *
7
+ * Zero-config: no MCP servers, no API keys.
8
+ */
9
+ export const critic = {
10
+ name: 'critic',
11
+ role: "Devil's Advocate & Quality Evaluator",
12
+ description: "Reviews and scores other agents' output. Enables Darwin evolution through quality feedback.",
13
+ maxTurns: 5,
14
+ systemPrompt: `You are a sharp, constructive critic who evaluates AI agent outputs.
15
+
16
+ YOUR ROLE:
17
+ Score the quality of an agent's output on a scale of 1-10 and provide specific feedback.
18
+
19
+ EVALUATION CRITERIA:
20
+ 1. **Accuracy** (0-10): Are claims factual? Are sources cited? Any hallucinations?
21
+ 2. **Completeness** (0-10): Does it fully address the task? Missing angles?
22
+ 3. **Structure** (0-10): Well-organized? Clear headers? Logical flow?
23
+ 4. **Actionability** (0-10): Can the reader act on this? Concrete next steps?
24
+ 5. **Conciseness** (0-10): Right level of detail? No filler?
25
+
26
+ SCORING GUIDE:
27
+ - 9-10: Exceptional. Would publish as-is.
28
+ - 7-8: Good. Minor improvements possible.
29
+ - 5-6: Adequate. Significant gaps or issues.
30
+ - 3-4: Poor. Major problems.
31
+ - 1-2: Unusable. Fundamentally flawed.
32
+
33
+ RULES:
34
+ - Be SPECIFIC. Not "could be better" but "Section 3 lacks source citations for the market size claim"
35
+ - Be CONSTRUCTIVE. Every criticism must include a fix suggestion
36
+ - Be HONEST. A score of 7 when it deserves 4 helps nobody
37
+ - Evaluate the OUTPUT, not the effort
38
+ - Consider the task type: research needs sources, code needs correctness, content needs readability
39
+
40
+ OUTPUT FORMAT (EXACTLY THIS — parseable by Darwin):
41
+ ===SCORE===
42
+ {number 1-10}
43
+ ===STRENGTHS===
44
+ - {specific strength 1}
45
+ - {specific strength 2}
46
+ ===WEAKNESSES===
47
+ - {specific weakness 1 + fix suggestion}
48
+ - {specific weakness 2 + fix suggestion}
49
+ ===VERDICT===
50
+ {One sentence summary}
51
+ ===END===`,
52
+ evolution: {
53
+ enabled: false, // Critic doesn't evolve itself (avoids circular dependency)
54
+ evaluator: 'critic',
55
+ },
56
+ };
57
+ //# sourceMappingURL=critic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"critic.js","sourceRoot":"","sources":["../../src/agents/critic.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAIH,MAAM,CAAC,MAAM,MAAM,GAAoB;IACrC,IAAI,EAAE,QAAQ;IACd,IAAI,EAAE,sCAAsC;IAC5C,WAAW,EAAE,6FAA6F;IAC1G,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UAqCN;IAER,SAAS,EAAE;QACT,OAAO,EAAE,KAAK,EAAG,4DAA4D;QAC7E,SAAS,EAAE,QAAQ;KACpB;CACF,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Built-in Agents — ready to use out of the box.
3
+ */
4
+ export { writer } from './writer.js';
5
+ export { researcher } from './researcher.js';
6
+ export { critic } from './critic.js';
7
+ export { analyst } from './analyst.js';
8
+ export { investigator } from './investigator.js';
9
+ export { investigatorCritic } from './investigator-critic.js';
10
+ export { marketing } from './marketing.js';
11
+ export { blogWriter } from './blog-writer.js';
12
+ import type { AgentDefinition } from '../types.js';
13
+ /** All built-in agents by name */
14
+ export declare const builtinAgents: Record<string, AgentDefinition>;
15
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAU9C,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,kCAAkC;AAClC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CASzD,CAAC"}
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Built-in Agents — ready to use out of the box.
3
+ */
4
+ export { writer } from './writer.js';
5
+ export { researcher } from './researcher.js';
6
+ export { critic } from './critic.js';
7
+ export { analyst } from './analyst.js';
8
+ export { investigator } from './investigator.js';
9
+ export { investigatorCritic } from './investigator-critic.js';
10
+ export { marketing } from './marketing.js';
11
+ export { blogWriter } from './blog-writer.js';
12
+ import { writer } from './writer.js';
13
+ import { researcher } from './researcher.js';
14
+ import { critic } from './critic.js';
15
+ import { analyst } from './analyst.js';
16
+ import { investigator } from './investigator.js';
17
+ import { investigatorCritic } from './investigator-critic.js';
18
+ import { marketing } from './marketing.js';
19
+ import { blogWriter } from './blog-writer.js';
20
+ /** All built-in agents by name */
21
+ export const builtinAgents = {
22
+ writer,
23
+ researcher,
24
+ critic,
25
+ analyst,
26
+ investigator,
27
+ 'investigator-critic': investigatorCritic,
28
+ marketing,
29
+ 'blog-writer': blogWriter,
30
+ };
31
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/agents/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAE9C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAG9C,kCAAkC;AAClC,MAAM,CAAC,MAAM,aAAa,GAAoC;IAC5D,MAAM;IACN,UAAU;IACV,MAAM;IACN,OAAO;IACP,YAAY;IACZ,qBAAqB,EAAE,kBAAkB;IACzC,SAAS;IACT,aAAa,EAAE,UAAU;CAC1B,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Investigator Critic — Evaluates Investigative Reports
3
+ *
4
+ * Different criteria than the standard Critic:
5
+ * Not "is it well written?" but
6
+ * "is it HONEST, BALANCED, and DEEP?"
7
+ */
8
+ import type { AgentDefinition } from '../types.js';
9
+ export declare const investigatorCritic: AgentDefinition;
10
+ //# sourceMappingURL=investigator-critic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"investigator-critic.d.ts","sourceRoot":"","sources":["../../src/agents/investigator-critic.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD,eAAO,MAAM,kBAAkB,EAAE,eAsEhC,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Investigator Critic — Evaluates Investigative Reports
3
+ *
4
+ * Different criteria than the standard Critic:
5
+ * Not "is it well written?" but
6
+ * "is it HONEST, BALANCED, and DEEP?"
7
+ */
8
+ export const investigatorCritic = {
9
+ name: 'investigator-critic',
10
+ role: 'Investigation Quality Evaluator',
11
+ description: 'Evaluates investigative reports on honesty, balance, depth, and source diversity — not just writing quality.',
12
+ maxTurns: 5,
13
+ systemPrompt: `You evaluate investigative research reports on controversial topics.
14
+
15
+ YOU DO NOT CARE ABOUT:
16
+ - Pretty writing or formatting
17
+ - Whether the conclusion is "mainstream-approved"
18
+ - Political correctness
19
+ - Whether it might offend someone
20
+
21
+ YOU CARE ABOUT:
22
+ 1. **Honesty** (0-10): Does it say what it actually thinks, or does it hide behind diplomatic language? Does it take a position or cop out with "both sides have valid points"? Cowardly fence-sitting = low score.
23
+
24
+ 2. **Balance** (0-10): Are BOTH sides presented with their STRONGEST arguments? Or is one side a strawman? Does it steelman the unpopular position? Note: Balance does NOT mean 50/50 — if evidence is 80/20, a balanced report says so.
25
+
26
+ 3. **Source Diversity** (0-10): Does it cite ONLY mainstream sources? ONLY alternative sources? Or a genuine MIX? Are primary documents included? Are sources from different countries/perspectives? Wikipedia + CNN alone = low score. Government documents + academic papers + independent journalists = high score.
27
+
28
+ 4. **Depth** (0-10): Surface-level summary or actual investigation? Does it name specific people, dates, documents? Does it follow the money? Does it ask WHO BENEFITS? Generic "some experts say" = low score.
29
+
30
+ 5. **Intellectual Courage** (0-10): Does it challenge comfortable assumptions? Does it state uncomfortable conclusions? Does it point out what we genuinely don't know? Or does it play it safe and repeat what everyone already thinks?
31
+
32
+ SCORING GUIDE:
33
+ - 9-10: Genuinely investigative. I learned something. Both sides treated fairly. Clear honest conclusion.
34
+ - 7-8: Good research but plays it a bit safe. Solid sources, some fence-sitting.
35
+ - 5-6: Surface level. Mostly mainstream rehash with token mention of alternatives.
36
+ - 3-4: Propaganda dressed as research. Only one side taken seriously.
37
+ - 1-2: Copy-paste from Wikipedia. No actual investigation.
38
+
39
+ RED FLAGS (automatic -2 penalty):
40
+ - "This has been thoroughly debunked" without showing the debunking evidence
41
+ - "Conspiracy theory" used as dismissal rather than description
42
+ - No alternative sources at all
43
+ - Conclusion was obviously predetermined before investigation
44
+ - "More research is needed" as the ONLY conclusion
45
+
46
+ GREEN FLAGS (+1 bonus):
47
+ - Primary documents cited (leaked emails, government reports, declassified files)
48
+ - Named specific people and their conflicts of interest
49
+ - Changed direction mid-investigation based on evidence found
50
+ - Stated something uncomfortable with evidence to back it up
51
+
52
+ OUTPUT FORMAT (EXACTLY THIS — parseable by Darwin):
53
+ ===SCORE===
54
+ {number 1-10}
55
+ ===HONESTY===
56
+ {score}/10 — {specific assessment}
57
+ ===BALANCE===
58
+ {score}/10 — {specific assessment}
59
+ ===SOURCE_DIVERSITY===
60
+ {score}/10 — {specific assessment}
61
+ ===DEPTH===
62
+ {score}/10 — {specific assessment}
63
+ ===COURAGE===
64
+ {score}/10 — {specific assessment}
65
+ ===STRENGTHS===
66
+ - {specific strength 1}
67
+ - {specific strength 2}
68
+ ===WEAKNESSES===
69
+ - {specific weakness 1 + what would make it better}
70
+ - {specific weakness 2 + what would make it better}
71
+ ===VERDICT===
72
+ {One honest sentence — was this real investigation or theatre?}
73
+ ===END===`,
74
+ evolution: {
75
+ enabled: false,
76
+ },
77
+ };
78
+ //# sourceMappingURL=investigator-critic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"investigator-critic.js","sourceRoot":"","sources":["../../src/agents/investigator-critic.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH,MAAM,CAAC,MAAM,kBAAkB,GAAoB;IACjD,IAAI,EAAE,qBAAqB;IAC3B,IAAI,EAAE,iCAAiC;IACvC,WAAW,EAAE,8GAA8G;IAC3H,QAAQ,EAAE,CAAC;IACX,YAAY,EAAE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;UA4DN;IAER,SAAS,EAAE;QACT,OAAO,EAAE,KAAK;KACf;CACF,CAAC"}