darwin-agents 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +245 -0
  3. package/dist/agents/analyst.d.ts +11 -0
  4. package/dist/agents/analyst.d.ts.map +1 -0
  5. package/dist/agents/analyst.js +78 -0
  6. package/dist/agents/analyst.js.map +1 -0
  7. package/dist/agents/blog-writer.d.ts +13 -0
  8. package/dist/agents/blog-writer.d.ts.map +1 -0
  9. package/dist/agents/blog-writer.js +59 -0
  10. package/dist/agents/blog-writer.js.map +1 -0
  11. package/dist/agents/critic.d.ts +11 -0
  12. package/dist/agents/critic.d.ts.map +1 -0
  13. package/dist/agents/critic.js +57 -0
  14. package/dist/agents/critic.js.map +1 -0
  15. package/dist/agents/index.d.ts +15 -0
  16. package/dist/agents/index.d.ts.map +1 -0
  17. package/dist/agents/index.js +31 -0
  18. package/dist/agents/index.js.map +1 -0
  19. package/dist/agents/investigator-critic.d.ts +10 -0
  20. package/dist/agents/investigator-critic.d.ts.map +1 -0
  21. package/dist/agents/investigator-critic.js +78 -0
  22. package/dist/agents/investigator-critic.js.map +1 -0
  23. package/dist/agents/investigator.d.ts +13 -0
  24. package/dist/agents/investigator.d.ts.map +1 -0
  25. package/dist/agents/investigator.js +105 -0
  26. package/dist/agents/investigator.js.map +1 -0
  27. package/dist/agents/marketing.d.ts +13 -0
  28. package/dist/agents/marketing.d.ts.map +1 -0
  29. package/dist/agents/marketing.js +59 -0
  30. package/dist/agents/marketing.js.map +1 -0
  31. package/dist/agents/researcher.d.ts +11 -0
  32. package/dist/agents/researcher.d.ts.map +1 -0
  33. package/dist/agents/researcher.js +68 -0
  34. package/dist/agents/researcher.js.map +1 -0
  35. package/dist/agents/writer.d.ts +9 -0
  36. package/dist/agents/writer.d.ts.map +1 -0
  37. package/dist/agents/writer.js +47 -0
  38. package/dist/agents/writer.js.map +1 -0
  39. package/dist/cli/create.d.ts +11 -0
  40. package/dist/cli/create.d.ts.map +1 -0
  41. package/dist/cli/create.js +104 -0
  42. package/dist/cli/create.js.map +1 -0
  43. package/dist/cli/evolve.d.ts +13 -0
  44. package/dist/cli/evolve.d.ts.map +1 -0
  45. package/dist/cli/evolve.js +69 -0
  46. package/dist/cli/evolve.js.map +1 -0
  47. package/dist/cli/index.d.ts +13 -0
  48. package/dist/cli/index.d.ts.map +1 -0
  49. package/dist/cli/index.js +84 -0
  50. package/dist/cli/index.js.map +1 -0
  51. package/dist/cli/init.d.ts +12 -0
  52. package/dist/cli/init.d.ts.map +1 -0
  53. package/dist/cli/init.js +68 -0
  54. package/dist/cli/init.js.map +1 -0
  55. package/dist/cli/run.d.ts +7 -0
  56. package/dist/cli/run.d.ts.map +1 -0
  57. package/dist/cli/run.js +371 -0
  58. package/dist/cli/run.js.map +1 -0
  59. package/dist/cli/status.d.ts +7 -0
  60. package/dist/cli/status.d.ts.map +1 -0
  61. package/dist/cli/status.js +123 -0
  62. package/dist/cli/status.js.map +1 -0
  63. package/dist/core/agent.d.ts +53 -0
  64. package/dist/core/agent.d.ts.map +1 -0
  65. package/dist/core/agent.js +172 -0
  66. package/dist/core/agent.js.map +1 -0
  67. package/dist/core/runner.d.ts +64 -0
  68. package/dist/core/runner.d.ts.map +1 -0
  69. package/dist/core/runner.js +203 -0
  70. package/dist/core/runner.js.map +1 -0
  71. package/dist/evolution/loop.d.ts +100 -0
  72. package/dist/evolution/loop.d.ts.map +1 -0
  73. package/dist/evolution/loop.js +424 -0
  74. package/dist/evolution/loop.js.map +1 -0
  75. package/dist/evolution/multi-critic.d.ts +58 -0
  76. package/dist/evolution/multi-critic.d.ts.map +1 -0
  77. package/dist/evolution/multi-critic.js +324 -0
  78. package/dist/evolution/multi-critic.js.map +1 -0
  79. package/dist/evolution/notifications.d.ts +32 -0
  80. package/dist/evolution/notifications.d.ts.map +1 -0
  81. package/dist/evolution/notifications.js +92 -0
  82. package/dist/evolution/notifications.js.map +1 -0
  83. package/dist/evolution/optimizer.d.ts +64 -0
  84. package/dist/evolution/optimizer.d.ts.map +1 -0
  85. package/dist/evolution/optimizer.js +223 -0
  86. package/dist/evolution/optimizer.js.map +1 -0
  87. package/dist/evolution/patterns.d.ts +63 -0
  88. package/dist/evolution/patterns.d.ts.map +1 -0
  89. package/dist/evolution/patterns.js +297 -0
  90. package/dist/evolution/patterns.js.map +1 -0
  91. package/dist/evolution/safety.d.ts +76 -0
  92. package/dist/evolution/safety.d.ts.map +1 -0
  93. package/dist/evolution/safety.js +182 -0
  94. package/dist/evolution/safety.js.map +1 -0
  95. package/dist/evolution/tracker.d.ts +48 -0
  96. package/dist/evolution/tracker.d.ts.map +1 -0
  97. package/dist/evolution/tracker.js +163 -0
  98. package/dist/evolution/tracker.js.map +1 -0
  99. package/dist/index.d.ts +32 -0
  100. package/dist/index.d.ts.map +1 -0
  101. package/dist/index.js +35 -0
  102. package/dist/index.js.map +1 -0
  103. package/dist/memory/index.d.ts +32 -0
  104. package/dist/memory/index.d.ts.map +1 -0
  105. package/dist/memory/index.js +49 -0
  106. package/dist/memory/index.js.map +1 -0
  107. package/dist/memory/postgres-memory.d.ts +52 -0
  108. package/dist/memory/postgres-memory.d.ts.map +1 -0
  109. package/dist/memory/postgres-memory.js +515 -0
  110. package/dist/memory/postgres-memory.js.map +1 -0
  111. package/dist/memory/sqlite-memory.d.ts +36 -0
  112. package/dist/memory/sqlite-memory.d.ts.map +1 -0
  113. package/dist/memory/sqlite-memory.js +380 -0
  114. package/dist/memory/sqlite-memory.js.map +1 -0
  115. package/dist/providers/anthropic.d.ts +20 -0
  116. package/dist/providers/anthropic.d.ts.map +1 -0
  117. package/dist/providers/anthropic.js +82 -0
  118. package/dist/providers/anthropic.js.map +1 -0
  119. package/dist/providers/claude-cli.d.ts +35 -0
  120. package/dist/providers/claude-cli.d.ts.map +1 -0
  121. package/dist/providers/claude-cli.js +143 -0
  122. package/dist/providers/claude-cli.js.map +1 -0
  123. package/dist/providers/index.d.ts +39 -0
  124. package/dist/providers/index.d.ts.map +1 -0
  125. package/dist/providers/index.js +58 -0
  126. package/dist/providers/index.js.map +1 -0
  127. package/dist/providers/ollama.d.ts +17 -0
  128. package/dist/providers/ollama.d.ts.map +1 -0
  129. package/dist/providers/ollama.js +64 -0
  130. package/dist/providers/ollama.js.map +1 -0
  131. package/dist/providers/openai.d.ts +19 -0
  132. package/dist/providers/openai.d.ts.map +1 -0
  133. package/dist/providers/openai.js +75 -0
  134. package/dist/providers/openai.js.map +1 -0
  135. package/dist/providers/types.d.ts +62 -0
  136. package/dist/providers/types.d.ts.map +1 -0
  137. package/dist/providers/types.js +9 -0
  138. package/dist/providers/types.js.map +1 -0
  139. package/dist/types.d.ts +221 -0
  140. package/dist/types.d.ts.map +1 -0
  141. package/dist/types.js +19 -0
  142. package/dist/types.js.map +1 -0
  143. package/package.json +81 -0
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Darwin — Safety Gate
3
+ *
4
+ * Guards against regressions during prompt evolution.
5
+ * Enforces minimum data requirements, regression checks,
6
+ * rollback triggers, and A/B test evaluation rules.
7
+ */
8
+ import type { PromptVersionStats, SafetyThresholds, DarwinExperiment } from '../types.js';
9
+ export type ABTestOutcome = 'a_wins' | 'b_wins' | 'continue';
10
+ export interface ABTestConfidence {
11
+ /** Effect size (Cohen's d approximation) */
12
+ effectSize: number;
13
+ /** Whether the result meets minimum confidence threshold */
14
+ confident: boolean;
15
+ }
16
+ export declare class SafetyGate {
17
+ private thresholds;
18
+ constructor(thresholds?: SafetyThresholds);
19
+ /**
20
+ * Check whether an agent has accumulated enough data points
21
+ * to proceed with evolution (prompt optimization).
22
+ */
23
+ canEvolve(_agentName: string, stats: PromptVersionStats): boolean;
24
+ /**
25
+ * Check whether score B is NOT a regression beyond the allowed threshold.
26
+ *
27
+ * Returns `true` if B is acceptable (no regression or within tolerance).
28
+ * Returns `false` if B has regressed beyond `maxRegression` compared to A.
29
+ *
30
+ * Example: maxRegression = 0.20, scoreA = 0.80
31
+ * - scoreB = 0.70 => drop = 0.125 (12.5%) => acceptable
32
+ * - scoreB = 0.60 => drop = 0.250 (25.0%) => regression
33
+ */
34
+ checkRegression(scoreA: number, scoreB: number): boolean;
35
+ /**
36
+ * Check if the agent should roll back to its last-known-good prompt
37
+ * based on consecutive failure count.
38
+ */
39
+ shouldRollback(consecutiveFailures: number): boolean;
40
+ /**
41
+ * Evaluate the outcome of an A/B test between two prompt versions.
42
+ *
43
+ * Rules:
44
+ * 1. Both versions need at least `minRuns` total attempts (success + fail).
45
+ * 2. If a version has >50% failure rate with 3+ attempts, it auto-loses.
46
+ * 3. The winner must show >5% improvement in composite score.
47
+ * 4. If neither clears the bar, the test continues.
48
+ *
49
+ * @param overrideMinRuns — Per-test minimum runs (from ABTest.minRuns).
50
+ * Falls back to SafetyThresholds.minDataPoints if not provided.
51
+ */
52
+ evaluateABTest(compositeA: number, compositeB: number, runsA: number, runsB: number, failsA?: number, failsB?: number, overrideMinRuns?: number): ABTestOutcome;
53
+ /**
54
+ * Calculate a simple confidence metric for an A/B test result.
55
+ * Uses effect size (difference / pooled estimate) as a proxy.
56
+ * Minimum sample: both sides need >= minDataPoints runs.
57
+ */
58
+ calculateConfidence(compositeA: number, compositeB: number, runsA: number, runsB: number): ABTestConfidence;
59
+ /**
60
+ * Compute dynamic minRuns based on observed quality score variance.
61
+ *
62
+ * When scores cluster tightly (e.g., 6.5-7.0, std < 0.5), a small
63
+ * sample cannot distinguish between versions. This method increases
64
+ * minRuns proportionally to the inverse of variance:
65
+ *
66
+ * - High variance (std >= 1.0): floor (10) — easy to detect differences
67
+ * - Low variance (std < 0.5): ceil (30) — need more samples
68
+ * - Mid variance (0.5 - 1.0): linear interpolation
69
+ *
70
+ * @param experiments — Recent experiments for both A and B versions
71
+ * @param configMinRuns — Agent-level minRuns override from EvolutionConfig
72
+ * @returns Computed minRuns (never below floor, never above ceil)
73
+ */
74
+ computeDynamicMinRuns(experiments: DarwinExperiment[], configMinRuns?: number): number;
75
+ }
76
+ //# sourceMappingURL=safety.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"safety.d.ts","sourceRoot":"","sources":["../../src/evolution/safety.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAG1F,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,QAAQ,GAAG,UAAU,CAAC;AAE7D,MAAM,WAAW,gBAAgB;IAC/B,4CAA4C;IAC5C,UAAU,EAAE,MAAM,CAAC;IACnB,4DAA4D;IAC5D,SAAS,EAAE,OAAO,CAAC;CACpB;AAMD,qBAAa,UAAU;IACrB,OAAO,CAAC,UAAU,CAAmB;gBAEzB,UAAU,GAAE,gBAAiC;IAIzD;;;OAGG;IACH,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,kBAAkB,GAAG,OAAO;IAIjE;;;;;;;;;OASG;IACH,eAAe,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,OAAO;IAUxD;;;OAGG;IACH,cAAc,CAAC,mBAAmB,EAAE,MAAM,GAAG,OAAO;IAIpD;;;;;;;;;;;OAWG;IACH,cAAc,CACZ,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,MAAM,GAAE,MAAU,EAClB,MAAM,GAAE,MAAU,EAClB,eAAe,CAAC,EAAE,MAAM,GACvB,aAAa;IA+DhB;;;;OAIG;IACH,mBAAmB,CACjB,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,GACZ,gBAAgB;IAuBnB;;;;;;;;;;;;;;OAcG;IACH,qBAAqB,CACnB,WAAW,EAAE,gBAAgB,EAAE,EAC/B,aAAa,CAAC,EAAE,MAAM,GACrB,MAAM;CAkCV"}
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Darwin — Safety Gate
3
+ *
4
+ * Guards against regressions during prompt evolution.
5
+ * Enforces minimum data requirements, regression checks,
6
+ * rollback triggers, and A/B test evaluation rules.
7
+ */
8
+ import { DEFAULT_SAFETY } from '../types.js';
9
+ /** Default minRuns range for dynamic sizing */
10
+ const DYNAMIC_MIN_RUNS_FLOOR = 10;
11
+ const DYNAMIC_MIN_RUNS_CEIL = 30;
12
+ export class SafetyGate {
13
+ thresholds;
14
+ constructor(thresholds = DEFAULT_SAFETY) {
15
+ this.thresholds = thresholds;
16
+ }
17
+ /**
18
+ * Check whether an agent has accumulated enough data points
19
+ * to proceed with evolution (prompt optimization).
20
+ */
21
+ canEvolve(_agentName, stats) {
22
+ return stats.totalRuns >= this.thresholds.minDataPoints;
23
+ }
24
+ /**
25
+ * Check whether score B is NOT a regression beyond the allowed threshold.
26
+ *
27
+ * Returns `true` if B is acceptable (no regression or within tolerance).
28
+ * Returns `false` if B has regressed beyond `maxRegression` compared to A.
29
+ *
30
+ * Example: maxRegression = 0.20, scoreA = 0.80
31
+ * - scoreB = 0.70 => drop = 0.125 (12.5%) => acceptable
32
+ * - scoreB = 0.60 => drop = 0.250 (25.0%) => regression
33
+ */
34
+ checkRegression(scoreA, scoreB) {
35
+ // If A is zero or negative, any B is acceptable (no baseline)
36
+ if (scoreA <= 0) {
37
+ return true;
38
+ }
39
+ const drop = (scoreA - scoreB) / scoreA;
40
+ return drop <= this.thresholds.maxRegression;
41
+ }
42
+ /**
43
+ * Check if the agent should roll back to its last-known-good prompt
44
+ * based on consecutive failure count.
45
+ */
46
+ shouldRollback(consecutiveFailures) {
47
+ return consecutiveFailures >= this.thresholds.failureRollbackThreshold;
48
+ }
49
+ /**
50
+ * Evaluate the outcome of an A/B test between two prompt versions.
51
+ *
52
+ * Rules:
53
+ * 1. Both versions need at least `minRuns` total attempts (success + fail).
54
+ * 2. If a version has >50% failure rate with 3+ attempts, it auto-loses.
55
+ * 3. The winner must show >5% improvement in composite score.
56
+ * 4. If neither clears the bar, the test continues.
57
+ *
58
+ * @param overrideMinRuns — Per-test minimum runs (from ABTest.minRuns).
59
+ * Falls back to SafetyThresholds.minDataPoints if not provided.
60
+ */
61
+ evaluateABTest(compositeA, compositeB, runsA, runsB, failsA = 0, failsB = 0, overrideMinRuns) {
62
+ const minRuns = overrideMinRuns ?? this.thresholds.minDataPoints;
63
+ const totalA = runsA + failsA;
64
+ const totalB = runsB + failsB;
65
+ // Reliability check: if a version fails >50% with 3+ total attempts, it auto-loses
66
+ const minAttemptsForReliability = 3;
67
+ if (totalB >= minAttemptsForReliability && failsB / totalB > 0.5) {
68
+ return 'a_wins'; // B is unreliable
69
+ }
70
+ if (totalA >= minAttemptsForReliability && failsA / totalA > 0.5) {
71
+ return 'b_wins'; // A is unreliable
72
+ }
73
+ // Not enough successful data on either side — keep testing
74
+ if (runsA < minRuns || runsB < minRuns) {
75
+ return 'continue';
76
+ }
77
+ const improvementThreshold = 0.05; // 5% relative improvement needed
78
+ // Factor reliability into composite: penalize versions with failures
79
+ const reliabilityA = totalA > 0 ? runsA / totalA : 1;
80
+ const reliabilityB = totalB > 0 ? runsB / totalB : 1;
81
+ const adjustedA = compositeA * reliabilityA;
82
+ const adjustedB = compositeB * reliabilityB;
83
+ // Avoid division by zero
84
+ if (adjustedA === 0 && adjustedB === 0) {
85
+ return 'continue';
86
+ }
87
+ // Check if B beats A by >5%
88
+ if (adjustedA > 0) {
89
+ const bOverA = (adjustedB - adjustedA) / adjustedA;
90
+ if (bOverA > improvementThreshold) {
91
+ return 'b_wins';
92
+ }
93
+ }
94
+ else if (adjustedB > 0) {
95
+ return 'b_wins';
96
+ }
97
+ // Check if A beats B by >5%
98
+ if (adjustedB > 0) {
99
+ const aOverB = (adjustedA - adjustedB) / adjustedB;
100
+ if (aOverB > improvementThreshold) {
101
+ return 'a_wins';
102
+ }
103
+ }
104
+ else if (adjustedA > 0) {
105
+ return 'a_wins';
106
+ }
107
+ // Neither version has a decisive advantage.
108
+ // But prevent infinite tests: if both have 2x minRuns, declare incumbent (A) the winner.
109
+ // Rationale: if B can't prove itself better after double the sample, A keeps its position.
110
+ const maxRunsPerSide = minRuns * 2;
111
+ if (runsA >= maxRunsPerSide && runsB >= maxRunsPerSide) {
112
+ return 'a_wins'; // Incumbent wins by default — challenger failed to prove superiority
113
+ }
114
+ return 'continue';
115
+ }
116
+ /**
117
+ * Calculate a simple confidence metric for an A/B test result.
118
+ * Uses effect size (difference / pooled estimate) as a proxy.
119
+ * Minimum sample: both sides need >= minDataPoints runs.
120
+ */
121
+ calculateConfidence(compositeA, compositeB, runsA, runsB) {
122
+ const minRuns = this.thresholds.minDataPoints;
123
+ if (runsA < minRuns || runsB < minRuns) {
124
+ return { effectSize: 0, confident: false };
125
+ }
126
+ // Pooled estimate (simple average as variance proxy)
127
+ const pooled = (compositeA + compositeB) / 2;
128
+ if (pooled === 0) {
129
+ return { effectSize: 0, confident: false };
130
+ }
131
+ // Effect size: absolute difference normalized by pooled mean
132
+ const effectSize = Math.abs(compositeA - compositeB) / pooled;
133
+ // Require at least "small" effect size (0.2) and enough samples
134
+ const totalSamples = runsA + runsB;
135
+ const confident = effectSize >= 0.2 && totalSamples >= minRuns * 2;
136
+ return { effectSize, confident };
137
+ }
138
+ /**
139
+ * Compute dynamic minRuns based on observed quality score variance.
140
+ *
141
+ * When scores cluster tightly (e.g., 6.5-7.0, std < 0.5), a small
142
+ * sample cannot distinguish between versions. This method increases
143
+ * minRuns proportionally to the inverse of variance:
144
+ *
145
+ * - High variance (std >= 1.0): floor (10) — easy to detect differences
146
+ * - Low variance (std < 0.5): ceil (30) — need more samples
147
+ * - Mid variance (0.5 - 1.0): linear interpolation
148
+ *
149
+ * @param experiments — Recent experiments for both A and B versions
150
+ * @param configMinRuns — Agent-level minRuns override from EvolutionConfig
151
+ * @returns Computed minRuns (never below floor, never above ceil)
152
+ */
153
+ computeDynamicMinRuns(experiments, configMinRuns) {
154
+ const floor = configMinRuns ?? DYNAMIC_MIN_RUNS_FLOOR;
155
+ const ceil = Math.max(floor, DYNAMIC_MIN_RUNS_CEIL);
156
+ // Need at least 4 quality scores to estimate variance
157
+ const qualityScores = experiments
158
+ .map((e) => e.metrics.qualityScore)
159
+ .filter((s) => s !== null);
160
+ if (qualityScores.length < 4) {
161
+ return floor;
162
+ }
163
+ const mean = qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length;
164
+ // Bessel's correction (n-1): we are estimating population variance from a sample.
165
+ // Without this, small samples (n=4-5) underestimate std by ~13%, inflating minRuns.
166
+ const variance = qualityScores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / (qualityScores.length - 1);
167
+ const std = Math.sqrt(variance);
168
+ // High variance (std >= 1.0): floor — differences are easy to spot
169
+ if (std >= 1.0) {
170
+ return floor;
171
+ }
172
+ // Low variance (std < 0.5): ceil — need many samples
173
+ if (std < 0.5) {
174
+ return ceil;
175
+ }
176
+ // Mid range: linear interpolation between ceil and floor
177
+ // std=0.5 → ceil, std=1.0 → floor
178
+ const t = (std - 0.5) / 0.5; // 0 at std=0.5, 1 at std=1.0
179
+ return Math.round(ceil + t * (floor - ceil));
180
+ }
181
+ }
182
+ //# sourceMappingURL=safety.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"safety.js","sourceRoot":"","sources":["../../src/evolution/safety.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAW7C,+CAA+C;AAC/C,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,MAAM,qBAAqB,GAAG,EAAE,CAAC;AAEjC,MAAM,OAAO,UAAU;IACb,UAAU,CAAmB;IAErC,YAAY,aAA+B,cAAc;QACvD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED;;;OAGG;IACH,SAAS,CAAC,UAAkB,EAAE,KAAyB;QACrD,OAAO,KAAK,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC1D,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe,CAAC,MAAc,EAAE,MAAc;QAC5C,8DAA8D;QAC9D,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;YAChB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;QACxC,OAAO,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACH,cAAc,CAAC,mBAA2B;QACxC,OAAO,mBAAmB,IAAI,IAAI,CAAC,UAAU,CAAC,wBAAwB,CAAC;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,cAAc,CACZ,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa,EACb,SAAiB,CAAC,EAClB,SAAiB,CAAC,EAClB,eAAwB;QAExB,MAAM,OAAO,GAAG,eAAe,IAAI,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QACjE,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAC9B,MAAM,MAAM,GAAG,KAAK,GAAG,MAAM,CAAC;QAE9B,mFAAmF;QACnF,MAAM,yBAAyB,GAAG,CAAC,CAAC;QACpC,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QACD,IAAI,MAAM,IAAI,yBAAyB,IAAI,MAAM,GAAG,MAAM,GAAG,GAAG,EAAE,CAAC;YACjE,OAAO,QAAQ,CAAC,CAAC,kBAAkB;QACrC,CAAC;QAED,2DAA2D;QAC3D,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,MAAM,oBAAoB,GAAG,IAAI,CAAC,CAAC,iCAAiC;QAEpE,qEAAqE;QACrE,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACrD,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAC5C,MAAM,SAAS,GAAG,UAAU,GAAG,YAAY,CAAC;QAE5C,yBAAyB;QACzB,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACvC,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,4BAA4B;QAC5B,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;YACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;aAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,4BAA4B;QAC5B,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,CAAC,SAAS,GAAG,SAAS,CAAC,GAAG,SAAS,CAAC;YACnD,IAAI,MAAM,GAAG,oBAAoB,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;aAAM,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YACzB,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,4CAA4C;QAC5C,yFAAyF;QACzF,2FAA2F;QAC3F,MAAM,cAAc,GAAG,OAAO,GAAG,CAAC,CAAC;QACnC,IAAI,KAAK,IAAI,cAAc,IAAI,KAAK,IAAI,cAAc,EAAE,CAAC;YACvD,OAAO,QAAQ,CAAC,CAAC,qEAAqE;QACxF,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;IAED;;;;OAIG;IACH,mBAAmB,CACjB,UAAkB,EAClB,UAAkB,EAClB,KAAa,EACb,KAAa;QAEb,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAE9C,IAAI,KAAK,GAAG,OAAO,IAAI,KAAK,GAAG,OAAO,EAAE,CAAC;YACvC,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,qDAAqD;QACrD,MAAM,MAAM,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;QAC7C,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;QAC7C,CAAC;QAED,6DAA6D;QAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,UAAU,CAAC,GAAG,MAAM,CAAC;QAE9D,gEAAgE;QAChE,MAAM,YAAY,GAAG,KAAK,GAAG,KAAK,CAAC;QACnC,MAAM,SAAS,GAAG,UAAU,IAAI,GAAG,IAAI,YAAY,IAAI,OAAO,GAAG,CAAC,CAAC;QAEnE,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC;IACnC,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,qBAAqB,CACnB,WAA+B,EAC/B,aAAsB;QAEtB,MAAM,KAAK,GAAG,aAAa,IAAI,sBAAsB,CAAC;QACtD,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;QAEpD,sDAAsD;QACtD,MAAM,aAAa,GAAG,WAAW;aAC9B,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC;aAClC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;QAE1C,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,IAAI,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC;QAC7E,kFAAkF;QAClF,oFAAoF;QACpF,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACzG,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAEhC,mEAAmE;QACnE,IAAI,GAAG,IAAI,GAAG,EAAE,CAAC;YACf,OAAO,KAAK,CAAC;QACf,CAAC;QAED,qDAAqD;QACrD,IAAI,GAAG,GAAG,GAAG,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,yDAAyD;QACzD,kCAAkC;QAClC,MAAM,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC,6BAA6B;QAC1D,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;CACF"}
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Darwin — Experiment Tracker
3
+ *
4
+ * Records experiments, aggregates stats, and computes composite scores
5
+ * for prompt version evaluation.
6
+ */
7
+ import type { DarwinExperiment, MemoryProvider, MetricWeights, PromptVersionStats } from '../types.js';
8
+ import type { CategoryStats } from './optimizer.js';
9
+ export declare class ExperimentTracker {
10
+ private memory;
11
+ constructor(memory: MemoryProvider);
12
+ /**
13
+ * Record a completed experiment.
14
+ * Saves it to memory, updates the prompt version stats, and adjusts
15
+ * the consecutive-failure counter in Darwin state.
16
+ */
17
+ recordExperiment(exp: DarwinExperiment): Promise<void>;
18
+ /**
19
+ * Aggregate stats from all experiments for a given agent (optionally
20
+ * filtered to a single prompt version).
21
+ */
22
+ getStats(agentName: string, version?: string): Promise<PromptVersionStats>;
23
+ /**
24
+ * Calculate a composite score for a single experiment.
25
+ *
26
+ * Normalization ranges:
27
+ * quality — score / 10 (0-10 scale)
28
+ * sourceCount — min(count / 20, 1) (20 sources = perfect)
29
+ * outputLength — min(len / 10000, 1) (10k chars = perfect)
30
+ * duration — 1 - min(ms/300000, 1) (lower is better, 5 min cap)
31
+ * success — 1 if true, 0 if false
32
+ */
33
+ getCompositeScore(exp: DarwinExperiment, weights?: MetricWeights): number;
34
+ /**
35
+ * Get stats broken down by task category (P2-5).
36
+ * Gives the optimizer visibility into which topic types perform well/poorly.
37
+ */
38
+ getStatsByCategory(agentName: string): Promise<CategoryStats[]>;
39
+ /**
40
+ * Average composite score across experiments for a specific agent + prompt version.
41
+ *
42
+ * If `since` is provided, only experiments after that ISO timestamp are included.
43
+ * This is critical for A/B tests: compare only the test period, not all-time data
44
+ * (otherwise the incumbent version's historical data skews the comparison).
45
+ */
46
+ getAverageComposite(agentName: string, version: string, weights?: MetricWeights, since?: string): Promise<number>;
47
+ }
48
+ //# sourceMappingURL=tracker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tracker.d.ts","sourceRoot":"","sources":["../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EACV,gBAAgB,EAEhB,cAAc,EACd,aAAa,EACb,kBAAkB,EACnB,MAAM,aAAa,CAAC;AACrB,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAGpD,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAiB;gBAEnB,MAAM,EAAE,cAAc;IAIlC;;;;OAIG;IACG,gBAAgB,CAAC,GAAG,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;IA+B5D;;;OAGG;IACG,QAAQ,CACZ,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,kBAAkB,CAAC;IA6C9B;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAG,EAAE,gBAAgB,EACrB,OAAO,GAAE,aAA+B,GACvC,MAAM;IA8BT;;;OAGG;IACG,kBAAkB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA6BrE;;;;;;OAMG;IACG,mBAAmB,CACvB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,aAA+B,EACxC,KAAK,CAAC,EAAE,MAAM,GACb,OAAO,CAAC,MAAM,CAAC;CAmBnB"}
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Darwin — Experiment Tracker
3
+ *
4
+ * Records experiments, aggregates stats, and computes composite scores
5
+ * for prompt version evaluation.
6
+ */
7
+ import { DEFAULT_WEIGHTS } from '../types.js';
8
+ export class ExperimentTracker {
9
+ memory;
10
+ constructor(memory) {
11
+ this.memory = memory;
12
+ }
13
+ /**
14
+ * Record a completed experiment.
15
+ * Saves it to memory, updates the prompt version stats, and adjusts
16
+ * the consecutive-failure counter in Darwin state.
17
+ */
18
+ async recordExperiment(exp) {
19
+ // 1. Persist the raw experiment
20
+ await this.memory.saveExperiment(exp);
21
+ // 2. Refresh aggregated stats on the prompt version
22
+ const versions = await this.memory.getAllPromptVersions(exp.agentName);
23
+ const version = versions.find((v) => v.version === exp.promptVersion);
24
+ if (version) {
25
+ const updatedStats = await this.getStats(exp.agentName, exp.promptVersion);
26
+ version.stats = updatedStats;
27
+ await this.memory.savePromptVersion(version);
28
+ }
29
+ // 3. Atomically update Darwin state (experiment count + consecutive failures).
30
+ // Uses updateState() to prevent race conditions when multiple agents
31
+ // record experiments concurrently — getState()+saveState() would lose updates.
32
+ await this.memory.updateState((state) => {
33
+ state.experimentCounts[exp.agentName] =
34
+ (state.experimentCounts[exp.agentName] ?? 0) + 1;
35
+ if (exp.success) {
36
+ state.consecutiveFailures[exp.agentName] = 0;
37
+ }
38
+ else {
39
+ state.consecutiveFailures[exp.agentName] =
40
+ (state.consecutiveFailures[exp.agentName] ?? 0) + 1;
41
+ }
42
+ return state;
43
+ });
44
+ }
45
+ /**
46
+ * Aggregate stats from all experiments for a given agent (optionally
47
+ * filtered to a single prompt version).
48
+ */
49
+ async getStats(agentName, version) {
50
+ const experiments = await this.memory.loadExperiments(agentName);
51
+ const filtered = version
52
+ ? experiments.filter((e) => e.promptVersion === version)
53
+ : experiments;
54
+ if (filtered.length === 0) {
55
+ return {
56
+ totalRuns: 0,
57
+ avgQuality: 0,
58
+ avgDuration: 0,
59
+ successRate: 0,
60
+ avgSourceCount: 0,
61
+ };
62
+ }
63
+ const totalRuns = filtered.length;
64
+ const successCount = filtered.filter((e) => e.success).length;
65
+ // Quality: only count experiments that have a quality score
66
+ const withQuality = filtered.filter((e) => e.metrics.qualityScore !== null);
67
+ const avgQuality = withQuality.length > 0
68
+ ? withQuality.reduce((sum, e) => sum + (e.metrics.qualityScore ?? 0), 0) /
69
+ withQuality.length
70
+ : 0;
71
+ const avgDuration = filtered.reduce((sum, e) => sum + e.metrics.durationMs, 0) / totalRuns;
72
+ const avgSourceCount = filtered.reduce((sum, e) => sum + e.metrics.sourceCount, 0) / totalRuns;
73
+ return {
74
+ totalRuns,
75
+ avgQuality,
76
+ avgDuration,
77
+ successRate: successCount / totalRuns,
78
+ avgSourceCount,
79
+ };
80
+ }
81
+ /**
82
+ * Calculate a composite score for a single experiment.
83
+ *
84
+ * Normalization ranges:
85
+ * quality — score / 10 (0-10 scale)
86
+ * sourceCount — min(count / 20, 1) (20 sources = perfect)
87
+ * outputLength — min(len / 10000, 1) (10k chars = perfect)
88
+ * duration — 1 - min(ms/300000, 1) (lower is better, 5 min cap)
89
+ * success — 1 if true, 0 if false
90
+ */
91
+ getCompositeScore(exp, weights = DEFAULT_WEIGHTS) {
92
+ // NULL quality = critic failed, not agent failed. Exclude from quality component
93
+ // instead of treating as 0 (which unfairly tanks the composite).
94
+ const hasQuality = exp.metrics.qualityScore !== null;
95
+ const qualityNorm = hasQuality ? exp.metrics.qualityScore / 10 : 0;
96
+ // If no quality score, redistribute quality weight to other metrics
97
+ const effectiveQualityWeight = hasQuality ? weights.quality : 0;
98
+ const weightSum = effectiveQualityWeight + weights.sourceCount + weights.outputLength + weights.duration + weights.success;
99
+ const scale = weightSum > 0 ? 1 / weightSum : 0;
100
+ const normalized = {
101
+ quality: qualityNorm,
102
+ sourceCount: Math.min(exp.metrics.sourceCount / 20, 1),
103
+ outputLength: Math.min(exp.metrics.outputLength / 10000, 1),
104
+ duration: 1 - Math.min(exp.metrics.durationMs / 300000, 1),
105
+ success: exp.success ? 1 : 0,
106
+ };
107
+ const score = (normalized.quality * effectiveQualityWeight +
108
+ normalized.sourceCount * weights.sourceCount +
109
+ normalized.outputLength * weights.outputLength +
110
+ normalized.duration * weights.duration +
111
+ normalized.success * weights.success) * scale;
112
+ return score;
113
+ }
114
+ /**
115
+ * Get stats broken down by task category (P2-5).
116
+ * Gives the optimizer visibility into which topic types perform well/poorly.
117
+ */
118
+ async getStatsByCategory(agentName) {
119
+ const experiments = await this.memory.loadExperiments(agentName);
120
+ const byCategory = new Map();
121
+ for (const exp of experiments) {
122
+ const key = exp.taskType || 'general';
123
+ const list = byCategory.get(key);
124
+ if (list) {
125
+ list.push(exp);
126
+ }
127
+ else {
128
+ byCategory.set(key, [exp]);
129
+ }
130
+ }
131
+ const result = [];
132
+ for (const [taskType, exps] of byCategory) {
133
+ const withQuality = exps.filter((e) => e.metrics.qualityScore !== null);
134
+ const avgQuality = withQuality.length > 0
135
+ ? withQuality.reduce((s, e) => s + (e.metrics.qualityScore ?? 0), 0) / withQuality.length
136
+ : 0;
137
+ const avgSourceCount = exps.reduce((s, e) => s + e.metrics.sourceCount, 0) / exps.length;
138
+ const successRate = exps.filter((e) => e.success).length / exps.length;
139
+ result.push({ taskType, totalRuns: exps.length, avgQuality, avgSourceCount, successRate });
140
+ }
141
+ return result.sort((a, b) => b.totalRuns - a.totalRuns);
142
+ }
143
+ /**
144
+ * Average composite score across experiments for a specific agent + prompt version.
145
+ *
146
+ * If `since` is provided, only experiments after that ISO timestamp are included.
147
+ * This is critical for A/B tests: compare only the test period, not all-time data
148
+ * (otherwise the incumbent version's historical data skews the comparison).
149
+ */
150
+ async getAverageComposite(agentName, version, weights = DEFAULT_WEIGHTS, since) {
151
+ const experiments = await this.memory.loadExperiments(agentName);
152
+ let filtered = experiments.filter((e) => e.promptVersion === version);
153
+ if (since) {
154
+ filtered = filtered.filter((e) => e.startedAt >= since);
155
+ }
156
+ if (filtered.length === 0) {
157
+ return 0;
158
+ }
159
+ const total = filtered.reduce((sum, exp) => sum + this.getCompositeScore(exp, weights), 0);
160
+ return total / filtered.length;
161
+ }
162
+ }
163
+ //# sourceMappingURL=tracker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tracker.js","sourceRoot":"","sources":["../../src/evolution/tracker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAUH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAiB;IAE/B,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CAAC,GAAqB;QAC1C,gCAAgC;QAChC,MAAM,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAEtC,oDAAoD;QACpD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;QACvE,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,GAAG,CAAC,aAAa,CAAC,CAAC;QACtE,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,GAAG,CAAC,aAAa,CAAC,CAAC;YAC3E,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC;YAC7B,MAAM,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAC/C,CAAC;QAED,+EAA+E;QAC/E,qEAAqE;QACrE,+EAA+E;QAC/E,MAAM,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,KAAK,EAAE,EAAE;YACtC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC;gBACnC,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YAEnD,IAAI,GAAG,CAAC,OAAO,EAAE,CAAC;gBAChB,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAC/C,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC;oBACtC,CAAC,KAAK,CAAC,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACxD,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,QAAQ,CACZ,SAAiB,EACjB,OAAgB;QAEhB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QAEjE,MAAM,QAAQ,GAAG,OAAO;YACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC;YACxD,CAAC,CAAC,WAAW,CAAC;QAEhB,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO;gBACL,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC;gBACd,WAAW,EAAE,CAAC;gBACd,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;QAClC,MAAM,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QAE9D,4DAA4D;QAC5D,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CACjC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CACvC,CAAC;QACF,MAAM,UAAU,GACd,WAAW,CAAC,MAAM,GAAG,CAAC;YACpB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtE,WAAW,CAAC,MAAM;YACpB,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,WAAW,GACf,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAEzE,MAAM,cAAc,GAClB,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC;QAE1E,OAAO;YACL,SAAS;YACT,UAAU;YACV,WAAW;YACX,WAAW,EAAE,YAAY,GAAG,SAAS;YACrC,cAAc;SACf,CAAC;IACJ,CAAC;IAED;;;;;;;;;OASG;IACH,iBAAiB,CACf,GAAqB,EACrB,UAAyB,eAAe;QAExC,iFAAiF;QACjF,iEAAiE;QACjE,MAAM,UAAU,GAAG,GAAG,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC;QACrD,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAE,GAAG,CAAC,OAAO,CAAC,YAAuB,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/E,oEAAoE;QACpE,MAAM,sBAAsB,GAAG,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,sBAAsB,GAAG,OAAO,CAAC,WAAW,GAAG,OAAO,CAAC,YAAY,GAAG,OAAO,CAAC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC;QAC3H,MAAM,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhD,MAAM,UAAU,GAAG;YACjB,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,GAAG,EAAE,EAAE,CAAC,CAAC;YACtD,YAAY,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,YAAY,GAAG,KAAK,EAAE,CAAC,CAAC;YAC3D,QAAQ,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,GAAG,MAAM,EAAE,CAAC,CAAC;YAC1D,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;SAC7B,CAAC;QAEF,MAAM,KAAK,GAAG,CACZ,UAAU,CAAC,OAAO,GAAG,sBAAsB;YAC3C,UAAU,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW;YAC5C,UAAU,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY;YAC9C,UAAU,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ;YACtC,UAAU,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CACrC,GAAG,KAAK,CAAC;QAEV,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,kBAAkB,CAAC,SAAiB;QACxC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,MAAM,UAAU,GAAG,IAAI,GAAG,EAA8B,CAAC;QAEzD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,GAAG,CAAC,QAAQ,IAAI,SAAS,CAAC;YACtC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,IAAI,EAAE,CAAC;gBACT,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAoB,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,UAAU,EAAE,CAAC;YAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,KAAK,IAAI,CAAC,CAAC;YACxE,MAAM,UAAU,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC;gBACvC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM;gBACzF,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,cAAc,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC;YACzF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;YAEvE,MAAM,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,IAAI,CAAC,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QAC7F,CAAC;QAED,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;IAC1D,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,mBAAmB,CACvB,SAAiB,EACjB,OAAe,EACf,UAAyB,eAAe,EACxC,KAAc;QAEd,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,SAAS,CAAC,CAAC;QACjE,IAAI,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,KAAK,OAAO,CAAC,CAAC;QAEtE,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,KAAK,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,CAAC,CAAC;QACX,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAC3B,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,CAAC,iBAAiB,CAAC,GAAG,EAAE,OAAO,CAAC,EACxD,CAAC,CACF,CAAC;QAEF,OAAO,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IACjC,CAAC;CACF"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Darwin — AI agents that improve themselves.
3
+ *
4
+ * @example
5
+ * ```typescript
6
+ * import { defineAgent, defineConfig, runAgent } from 'darwin-agents';
7
+ *
8
+ * const myAgent = defineAgent({
9
+ * name: 'summarizer',
10
+ * role: 'Text Summarizer',
11
+ * systemPrompt: 'Summarize text in 3 bullet points.',
12
+ * evolution: { enabled: true, evaluator: 'critic' },
13
+ * });
14
+ *
15
+ * const result = await runAgent(myAgent, 'Summarize this article...');
16
+ * ```
17
+ */
18
+ export { defineAgent, defineConfig, loadConfig, loadConfigSync } from './core/agent.js';
19
+ export { runAgent } from './core/runner.js';
20
+ export type { AgentDefinition, DarwinConfig, DarwinExperiment, DarwinMetrics, DarwinPattern, DarwinState, ExperimentFeedback, EvolutionConfig, Learning, McpServerConfig, MemoryProvider, MetricWeights, PromptVersion, PromptVersionStats, RunResult, SafetyThresholds, } from './types.js';
21
+ export { DEFAULT_WEIGHTS, DEFAULT_SAFETY } from './types.js';
22
+ export { writer, researcher, critic, analyst, builtinAgents } from './agents/index.js';
23
+ export { createProvider } from './providers/index.js';
24
+ export type { LLMProvider, LLMCallOptions, LLMCallResult, ProviderConfig } from './providers/types.js';
25
+ export { AnthropicProvider } from './providers/anthropic.js';
26
+ export { OpenAIProvider } from './providers/openai.js';
27
+ export { OllamaProvider } from './providers/ollama.js';
28
+ export { ClaudeCliProvider } from './providers/claude-cli.js';
29
+ export { createMemory, SqliteMemoryProvider, PostgresMemoryProvider } from './memory/index.js';
30
+ export { loadNotificationConfig } from './evolution/notifications.js';
31
+ export type { NotificationConfig } from './evolution/notifications.js';
32
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAGH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACxF,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAG5C,YAAY,EACV,eAAe,EACf,YAAY,EACZ,gBAAgB,EAChB,aAAa,EACb,aAAa,EACb,WAAW,EACX,kBAAkB,EAClB,eAAe,EACf,QAAQ,EACR,eAAe,EACf,cAAc,EACd,aAAa,EACb,aAAa,EACb,kBAAkB,EAClB,SAAS,EACT,gBAAgB,GACjB,MAAM,YAAY,CAAC;AAGpB,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAG7D,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAGvF,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACvG,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAG9D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAG/F,OAAO,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AACtE,YAAY,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Darwin — AI agents that improve themselves.
3
+ *
4
+ * @example
5
+ * ```typescript
6
+ * import { defineAgent, defineConfig, runAgent } from 'darwin-agents';
7
+ *
8
+ * const myAgent = defineAgent({
9
+ * name: 'summarizer',
10
+ * role: 'Text Summarizer',
11
+ * systemPrompt: 'Summarize text in 3 bullet points.',
12
+ * evolution: { enabled: true, evaluator: 'critic' },
13
+ * });
14
+ *
15
+ * const result = await runAgent(myAgent, 'Summarize this article...');
16
+ * ```
17
+ */
18
+ // Core API
19
+ export { defineAgent, defineConfig, loadConfig, loadConfigSync } from './core/agent.js';
20
+ export { runAgent } from './core/runner.js';
21
+ // Constants
22
+ export { DEFAULT_WEIGHTS, DEFAULT_SAFETY } from './types.js';
23
+ // Built-in Agents
24
+ export { writer, researcher, critic, analyst, builtinAgents } from './agents/index.js';
25
+ // Providers
26
+ export { createProvider } from './providers/index.js';
27
+ export { AnthropicProvider } from './providers/anthropic.js';
28
+ export { OpenAIProvider } from './providers/openai.js';
29
+ export { OllamaProvider } from './providers/ollama.js';
30
+ export { ClaudeCliProvider } from './providers/claude-cli.js';
31
+ // Memory
32
+ export { createMemory, SqliteMemoryProvider, PostgresMemoryProvider } from './memory/index.js';
33
+ // Notifications
34
+ export { loadNotificationConfig } from './evolution/notifications.js';
35
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,WAAW;AACX,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACxF,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAsB5C,YAAY;AACZ,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE7D,kBAAkB;AAClB,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAEvF,YAAY;AACZ,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAE9D,SAAS;AACT,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAE/F,gBAAgB;AAChB,OAAO,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Darwin — Memory Factory
3
+ *
4
+ * Creates the appropriate MemoryProvider based on config.
5
+ * Supports SQLite (free) and PostgreSQL (pro).
6
+ */
7
+ import type { DarwinConfig, MemoryProvider } from '../types.js';
8
+ /**
9
+ * Create a MemoryProvider based on the Darwin config.
10
+ *
11
+ * @param config - Darwin configuration with memory backend selection
12
+ * @returns An uninitialized MemoryProvider — call `init()` before use
13
+ *
14
+ * @example
15
+ * ```ts
16
+ * // SQLite (default, zero-config)
17
+ * const memory = createMemory({ provider: 'claude-cli', memory: 'sqlite' });
18
+ *
19
+ * // PostgreSQL (production, concurrent agents)
20
+ * const memory = createMemory({
21
+ * provider: 'claude-cli',
22
+ * memory: 'postgres',
23
+ * postgresUrl: 'postgresql://user:pass@localhost:5432/darwin',
24
+ * });
25
+ *
26
+ * await memory.init();
27
+ * ```
28
+ */
29
+ export declare function createMemory(config: DarwinConfig): MemoryProvider;
30
+ export { SqliteMemoryProvider } from './sqlite-memory.js';
31
+ export { PostgresMemoryProvider } from './postgres-memory.js';
32
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/memory/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAIhE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,YAAY,GAAG,cAAc,CAmBjE;AAED,OAAO,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAC1D,OAAO,EAAE,sBAAsB,EAAE,MAAM,sBAAsB,CAAC"}