@cogitator-ai/self-modifying 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +714 -0
  3. package/dist/architecture-evolution/capability-analyzer.d.ts +32 -0
  4. package/dist/architecture-evolution/capability-analyzer.d.ts.map +1 -0
  5. package/dist/architecture-evolution/capability-analyzer.js +264 -0
  6. package/dist/architecture-evolution/capability-analyzer.js.map +1 -0
  7. package/dist/architecture-evolution/evolution-strategy.d.ts +29 -0
  8. package/dist/architecture-evolution/evolution-strategy.d.ts.map +1 -0
  9. package/dist/architecture-evolution/evolution-strategy.js +176 -0
  10. package/dist/architecture-evolution/evolution-strategy.js.map +1 -0
  11. package/dist/architecture-evolution/index.d.ts +5 -0
  12. package/dist/architecture-evolution/index.d.ts.map +1 -0
  13. package/dist/architecture-evolution/index.js +5 -0
  14. package/dist/architecture-evolution/index.js.map +1 -0
  15. package/dist/architecture-evolution/parameter-optimizer.d.ts +67 -0
  16. package/dist/architecture-evolution/parameter-optimizer.d.ts.map +1 -0
  17. package/dist/architecture-evolution/parameter-optimizer.js +341 -0
  18. package/dist/architecture-evolution/parameter-optimizer.js.map +1 -0
  19. package/dist/architecture-evolution/prompts.d.ts +33 -0
  20. package/dist/architecture-evolution/prompts.d.ts.map +1 -0
  21. package/dist/architecture-evolution/prompts.js +169 -0
  22. package/dist/architecture-evolution/prompts.js.map +1 -0
  23. package/dist/constraints/index.d.ts +4 -0
  24. package/dist/constraints/index.d.ts.map +1 -0
  25. package/dist/constraints/index.js +4 -0
  26. package/dist/constraints/index.js.map +1 -0
  27. package/dist/constraints/modification-validator.d.ts +26 -0
  28. package/dist/constraints/modification-validator.d.ts.map +1 -0
  29. package/dist/constraints/modification-validator.js +313 -0
  30. package/dist/constraints/modification-validator.js.map +1 -0
  31. package/dist/constraints/rollback-manager.d.ts +52 -0
  32. package/dist/constraints/rollback-manager.d.ts.map +1 -0
  33. package/dist/constraints/rollback-manager.js +113 -0
  34. package/dist/constraints/rollback-manager.js.map +1 -0
  35. package/dist/constraints/safety-constraints.d.ts +11 -0
  36. package/dist/constraints/safety-constraints.d.ts.map +1 -0
  37. package/dist/constraints/safety-constraints.js +78 -0
  38. package/dist/constraints/safety-constraints.js.map +1 -0
  39. package/dist/events/event-emitter.d.ts +12 -0
  40. package/dist/events/event-emitter.d.ts.map +1 -0
  41. package/dist/events/event-emitter.js +43 -0
  42. package/dist/events/event-emitter.js.map +1 -0
  43. package/dist/events/index.d.ts +2 -0
  44. package/dist/events/index.d.ts.map +1 -0
  45. package/dist/events/index.js +2 -0
  46. package/dist/events/index.js.map +1 -0
  47. package/dist/index.d.ts +8 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +7 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/meta-reasoning/index.d.ts +5 -0
  52. package/dist/meta-reasoning/index.d.ts.map +1 -0
  53. package/dist/meta-reasoning/index.js +5 -0
  54. package/dist/meta-reasoning/index.js.map +1 -0
  55. package/dist/meta-reasoning/meta-reasoner.d.ts +53 -0
  56. package/dist/meta-reasoning/meta-reasoner.d.ts.map +1 -0
  57. package/dist/meta-reasoning/meta-reasoner.js +261 -0
  58. package/dist/meta-reasoning/meta-reasoner.js.map +1 -0
  59. package/dist/meta-reasoning/observation-collector.d.ts +37 -0
  60. package/dist/meta-reasoning/observation-collector.d.ts.map +1 -0
  61. package/dist/meta-reasoning/observation-collector.js +123 -0
  62. package/dist/meta-reasoning/observation-collector.js.map +1 -0
  63. package/dist/meta-reasoning/prompts.d.ts +31 -0
  64. package/dist/meta-reasoning/prompts.d.ts.map +1 -0
  65. package/dist/meta-reasoning/prompts.js +96 -0
  66. package/dist/meta-reasoning/prompts.js.map +1 -0
  67. package/dist/meta-reasoning/strategy-selector.d.ts +27 -0
  68. package/dist/meta-reasoning/strategy-selector.d.ts.map +1 -0
  69. package/dist/meta-reasoning/strategy-selector.js +138 -0
  70. package/dist/meta-reasoning/strategy-selector.js.map +1 -0
  71. package/dist/self-modifying-agent.d.ts +61 -0
  72. package/dist/self-modifying-agent.d.ts.map +1 -0
  73. package/dist/self-modifying-agent.js +449 -0
  74. package/dist/self-modifying-agent.js.map +1 -0
  75. package/dist/tool-generation/gap-analyzer.d.ts +25 -0
  76. package/dist/tool-generation/gap-analyzer.d.ts.map +1 -0
  77. package/dist/tool-generation/gap-analyzer.js +153 -0
  78. package/dist/tool-generation/gap-analyzer.js.map +1 -0
  79. package/dist/tool-generation/generated-tool-store.d.ts +51 -0
  80. package/dist/tool-generation/generated-tool-store.d.ts.map +1 -0
  81. package/dist/tool-generation/generated-tool-store.js +195 -0
  82. package/dist/tool-generation/generated-tool-store.js.map +1 -0
  83. package/dist/tool-generation/index.d.ts +7 -0
  84. package/dist/tool-generation/index.d.ts.map +1 -0
  85. package/dist/tool-generation/index.js +7 -0
  86. package/dist/tool-generation/index.js.map +1 -0
  87. package/dist/tool-generation/prompts.d.ts +28 -0
  88. package/dist/tool-generation/prompts.d.ts.map +1 -0
  89. package/dist/tool-generation/prompts.js +269 -0
  90. package/dist/tool-generation/prompts.js.map +1 -0
  91. package/dist/tool-generation/tool-generator.d.ts +29 -0
  92. package/dist/tool-generation/tool-generator.d.ts.map +1 -0
  93. package/dist/tool-generation/tool-generator.js +169 -0
  94. package/dist/tool-generation/tool-generator.js.map +1 -0
  95. package/dist/tool-generation/tool-sandbox.d.ts +31 -0
  96. package/dist/tool-generation/tool-sandbox.d.ts.map +1 -0
  97. package/dist/tool-generation/tool-sandbox.js +240 -0
  98. package/dist/tool-generation/tool-sandbox.js.map +1 -0
  99. package/dist/tool-generation/tool-validator.d.ts +32 -0
  100. package/dist/tool-generation/tool-validator.d.ts.map +1 -0
  101. package/dist/tool-generation/tool-validator.js +304 -0
  102. package/dist/tool-generation/tool-validator.js.map +1 -0
  103. package/dist/utils/index.d.ts +2 -0
  104. package/dist/utils/index.d.ts.map +1 -0
  105. package/dist/utils/index.js +2 -0
  106. package/dist/utils/index.js.map +1 -0
  107. package/dist/utils/llm-helper.d.ts +6 -0
  108. package/dist/utils/llm-helper.d.ts.map +1 -0
  109. package/dist/utils/llm-helper.js +18 -0
  110. package/dist/utils/llm-helper.js.map +1 -0
  111. package/package.json +61 -0
  112. package/src/__tests__/architecture-evolution.test.ts +368 -0
  113. package/src/__tests__/constraints.test.ts +266 -0
  114. package/src/__tests__/index.test.ts +99 -0
  115. package/src/__tests__/meta-reasoning.test.ts +343 -0
  116. package/src/__tests__/tool-generation.test.ts +455 -0
  117. package/src/architecture-evolution/capability-analyzer.ts +337 -0
  118. package/src/architecture-evolution/evolution-strategy.ts +224 -0
  119. package/src/architecture-evolution/index.ts +26 -0
  120. package/src/architecture-evolution/parameter-optimizer.ts +489 -0
  121. package/src/architecture-evolution/prompts.ts +216 -0
  122. package/src/constraints/index.ts +23 -0
  123. package/src/constraints/modification-validator.ts +402 -0
  124. package/src/constraints/rollback-manager.ts +173 -0
  125. package/src/constraints/safety-constraints.ts +103 -0
  126. package/src/events/event-emitter.ts +62 -0
  127. package/src/events/index.ts +1 -0
  128. package/src/index.ts +112 -0
  129. package/src/meta-reasoning/index.ts +24 -0
  130. package/src/meta-reasoning/meta-reasoner.ts +381 -0
  131. package/src/meta-reasoning/observation-collector.ts +161 -0
  132. package/src/meta-reasoning/prompts.ts +131 -0
  133. package/src/meta-reasoning/strategy-selector.ts +179 -0
  134. package/src/self-modifying-agent.ts +585 -0
  135. package/src/tool-generation/gap-analyzer.ts +234 -0
  136. package/src/tool-generation/generated-tool-store.ts +268 -0
  137. package/src/tool-generation/index.ts +19 -0
  138. package/src/tool-generation/prompts.ts +308 -0
  139. package/src/tool-generation/tool-generator.ts +243 -0
  140. package/src/tool-generation/tool-sandbox.ts +332 -0
  141. package/src/tool-generation/tool-validator.ts +365 -0
  142. package/src/utils/index.ts +1 -0
  143. package/src/utils/llm-helper.ts +24 -0
@@ -0,0 +1,368 @@
1
+ import { describe, it, expect, beforeEach, vi } from 'vitest';
2
+ import {
3
+ CapabilityAnalyzer,
4
+ EvolutionStrategy,
5
+ ParameterOptimizer,
6
+ parseTaskProfileResponse,
7
+ parseCandidateGenerationResponse,
8
+ } from '../architecture-evolution';
9
+ import type { LLMBackend, EvolutionCandidate } from '@cogitator-ai/types';
10
+
11
+ const mockLLM: LLMBackend = {
12
+ complete: vi.fn(),
13
+ name: 'mock',
14
+ supportsTool: () => true,
15
+ supportsStreaming: () => false,
16
+ validateConfig: () => true,
17
+ };
18
+
19
+ describe('CapabilityAnalyzer', () => {
20
+ it('analyzes task with heuristics', async () => {
21
+ const analyzer = new CapabilityAnalyzer({ enableLLMAnalysis: false });
22
+
23
+ const profile = await analyzer.analyzeTask(
24
+ 'Write a complex algorithm to optimize database queries with detailed analysis'
25
+ );
26
+
27
+ expect(profile.domain).toBe('coding');
28
+ expect(profile.complexity).toBe('complex');
29
+ expect(profile.reasoningDepth).toBe('deep');
30
+ });
31
+
32
+ it('detects tool requirements', async () => {
33
+ const analyzer = new CapabilityAnalyzer({ enableLLMAnalysis: false });
34
+
35
+ const profile = await analyzer.analyzeTask(
36
+ 'Search for information and calculate the result'
37
+ );
38
+
39
+ expect(profile.requiresTools).toBe(true);
40
+ expect(profile.toolIntensity).not.toBe('none');
41
+ });
42
+
43
+ it('detects creative tasks', async () => {
44
+ const analyzer = new CapabilityAnalyzer({ enableLLMAnalysis: false });
45
+
46
+ const profile = await analyzer.analyzeTask(
47
+ 'Create an imaginative story with artistic elements and novel ideas'
48
+ );
49
+
50
+ expect(profile.domain).toBe('creative');
51
+ expect(profile.creativityLevel).toBe('high');
52
+ });
53
+
54
+ it('detects time constraints', async () => {
55
+ const analyzer = new CapabilityAnalyzer({ enableLLMAnalysis: false });
56
+
57
+ const urgentProfile = await analyzer.analyzeTask('I need this done urgently asap');
58
+ expect(urgentProfile.timeConstraint).toBe('strict');
59
+
60
+ const relaxedProfile = await analyzer.analyzeTask('When possible, no rush on this');
61
+ expect(relaxedProfile.timeConstraint).toBe('relaxed');
62
+ });
63
+
64
+ it('estimates token usage', async () => {
65
+ const analyzer = new CapabilityAnalyzer({ enableLLMAnalysis: false });
66
+
67
+ const simpleProfile = await analyzer.analyzeTask('Hello');
68
+ const complexProfile = await analyzer.analyzeTask(
69
+ 'Design and implement a comprehensive microservices architecture with event sourcing, CQRS, and distributed tracing capabilities that can handle millions of requests per second'
70
+ );
71
+
72
+ expect(complexProfile.estimatedTokens).toBeGreaterThan(simpleProfile.estimatedTokens);
73
+ });
74
+ });
75
+
76
+ describe('EvolutionStrategy', () => {
77
+ describe('epsilon-greedy', () => {
78
+ it('exploits best candidate most of the time', () => {
79
+ const strategy = new EvolutionStrategy({
80
+ strategy: { type: 'epsilon_greedy', epsilon: 0.1 },
81
+ });
82
+
83
+ const candidates: EvolutionCandidate[] = [
84
+ { id: 'best', config: {}, reasoning: '', expectedImprovement: 0.9, risk: 'low', generation: 0, score: 0.9, evaluationCount: 10 },
85
+ { id: 'worst', config: {}, reasoning: '', expectedImprovement: 0.3, risk: 'low', generation: 0, score: 0.3, evaluationCount: 10 },
86
+ ];
87
+
88
+ let bestCount = 0;
89
+ for (let i = 0; i < 100; i++) {
90
+ const result = strategy.select(candidates);
91
+ if (result.candidate.id === 'best') bestCount++;
92
+ }
93
+
94
+ expect(bestCount).toBeGreaterThan(80);
95
+ });
96
+ });
97
+
98
+ describe('UCB', () => {
99
+ it('explores unexplored candidates first', () => {
100
+ const strategy = new EvolutionStrategy({
101
+ strategy: { type: 'ucb', explorationConstant: 2 },
102
+ });
103
+
104
+ const candidates: EvolutionCandidate[] = [
105
+ { id: 'explored', config: {}, reasoning: '', expectedImprovement: 0.9, risk: 'low', generation: 0, score: 0.9, evaluationCount: 100 },
106
+ { id: 'unexplored', config: {}, reasoning: '', expectedImprovement: 0.5, risk: 'low', generation: 0, score: 0, evaluationCount: 0 },
107
+ ];
108
+
109
+ const result = strategy.select(candidates);
110
+
111
+ expect(result.candidate.id).toBe('unexplored');
112
+ expect(result.isExploration).toBe(true);
113
+ });
114
+
115
+ it('balances exploration and exploitation', () => {
116
+ const strategy = new EvolutionStrategy({
117
+ strategy: { type: 'ucb', explorationConstant: 2 },
118
+ });
119
+
120
+ const candidates: EvolutionCandidate[] = [
121
+ { id: 'high-score', config: {}, reasoning: '', expectedImprovement: 0.9, risk: 'low', generation: 0, score: 0.9, evaluationCount: 50 },
122
+ { id: 'low-score', config: {}, reasoning: '', expectedImprovement: 0.3, risk: 'low', generation: 0, score: 0.3, evaluationCount: 5 },
123
+ ];
124
+
125
+ const result = strategy.select(candidates);
126
+ expect(result.candidate).toBeDefined();
127
+ });
128
+ });
129
+
130
+ describe('Thompson Sampling', () => {
131
+ it('samples from posterior distributions', () => {
132
+ const strategy = new EvolutionStrategy({
133
+ strategy: { type: 'thompson_sampling' },
134
+ });
135
+
136
+ const candidates: EvolutionCandidate[] = [
137
+ { id: 'a', config: {}, reasoning: '', expectedImprovement: 0.7, risk: 'low', generation: 0, score: 0.7, evaluationCount: 10 },
138
+ { id: 'b', config: {}, reasoning: '', expectedImprovement: 0.6, risk: 'low', generation: 0, score: 0.6, evaluationCount: 10 },
139
+ ];
140
+
141
+ const selections = new Map<string, number>();
142
+ for (let i = 0; i < 100; i++) {
143
+ const result = strategy.select(candidates);
144
+ selections.set(result.candidate.id, (selections.get(result.candidate.id) || 0) + 1);
145
+ }
146
+
147
+ expect(selections.get('a')).toBeGreaterThan(0);
148
+ expect(selections.get('b')).toBeGreaterThan(0);
149
+ });
150
+ });
151
+
152
+ it('updates candidate scores', () => {
153
+ const strategy = new EvolutionStrategy({
154
+ strategy: { type: 'epsilon_greedy', epsilon: 0.1 },
155
+ });
156
+
157
+ const candidate: EvolutionCandidate = {
158
+ id: 'test',
159
+ config: {},
160
+ reasoning: '',
161
+ expectedImprovement: 0.5,
162
+ risk: 'low',
163
+ generation: 0,
164
+ score: 0.5,
165
+ evaluationCount: 2,
166
+ };
167
+
168
+ strategy.updateCandidate(candidate, 0.8);
169
+
170
+ expect(candidate.evaluationCount).toBe(3);
171
+ expect(candidate.score).toBeCloseTo((0.5 * 2 + 0.8) / 3, 5);
172
+ });
173
+
174
+ it('determines exploration needs', () => {
175
+ const strategy = new EvolutionStrategy({
176
+ strategy: { type: 'ucb' },
177
+ });
178
+
179
+ const unexploredCandidates: EvolutionCandidate[] = [
180
+ { id: 'a', config: {}, reasoning: '', expectedImprovement: 0.5, risk: 'low', generation: 0, score: 0, evaluationCount: 0 },
181
+ ];
182
+
183
+ const wellExploredCandidates: EvolutionCandidate[] = [
184
+ { id: 'a', config: {}, reasoning: '', expectedImprovement: 0.5, risk: 'low', generation: 0, score: 0.7, evaluationCount: 20 },
185
+ { id: 'b', config: {}, reasoning: '', expectedImprovement: 0.5, risk: 'low', generation: 0, score: 0.72, evaluationCount: 20 },
186
+ ];
187
+
188
+ expect(strategy.shouldExploreMore(unexploredCandidates)).toBe(true);
189
+ expect(strategy.shouldExploreMore(wellExploredCandidates)).toBe(false);
190
+ });
191
+ });
192
+
193
+ describe('ParameterOptimizer', () => {
194
+ beforeEach(() => {
195
+ vi.clearAllMocks();
196
+ (mockLLM.complete as ReturnType<typeof vi.fn>).mockResolvedValue({
197
+ content: JSON.stringify([
198
+ {
199
+ id: 'candidate_1',
200
+ config: { temperature: 0.5 },
201
+ reasoning: 'Lower temperature for precision',
202
+ expectedImprovement: 0.7,
203
+ risk: 'low',
204
+ },
205
+ {
206
+ id: 'candidate_2',
207
+ config: { temperature: 0.9, maxTokens: 8000 },
208
+ reasoning: 'Higher temperature for creativity',
209
+ expectedImprovement: 0.6,
210
+ risk: 'medium',
211
+ },
212
+ ]),
213
+ });
214
+ });
215
+
216
+ it('optimizes architecture for task', async () => {
217
+ const optimizer = new ParameterOptimizer({
218
+ llm: mockLLM,
219
+ config: {
220
+ enabled: true,
221
+ strategy: { type: 'ucb', explorationConstant: 2 },
222
+ maxCandidates: 10,
223
+ evaluationWindow: 10,
224
+ minEvaluationsBeforeEvolution: 3,
225
+ adaptationThreshold: 0.1,
226
+ },
227
+ baseConfig: {
228
+ model: 'gpt-4',
229
+ temperature: 0.7,
230
+ maxTokens: 4096,
231
+ toolStrategy: 'sequential',
232
+ reflectionDepth: 1,
233
+ },
234
+ });
235
+
236
+ const result = await optimizer.optimize('Write a creative story');
237
+
238
+ expect(result.recommendedConfig).toBeDefined();
239
+ expect(result.shouldAdopt).toBe(true);
240
+ });
241
+
242
+ it('records and learns from outcomes', async () => {
243
+ const optimizer = new ParameterOptimizer({
244
+ llm: mockLLM,
245
+ config: {
246
+ enabled: true,
247
+ strategy: { type: 'epsilon_greedy', epsilon: 0.1 },
248
+ maxCandidates: 5,
249
+ evaluationWindow: 5,
250
+ minEvaluationsBeforeEvolution: 2,
251
+ adaptationThreshold: 0.1,
252
+ },
253
+ baseConfig: {
254
+ model: 'gpt-4',
255
+ temperature: 0.7,
256
+ maxTokens: 4096,
257
+ toolStrategy: 'sequential',
258
+ reflectionDepth: 1,
259
+ },
260
+ });
261
+
262
+ await optimizer.optimize('Test task');
263
+
264
+ const candidates = optimizer.getCandidates();
265
+ const candidateId = candidates[0]?.id;
266
+
267
+ if (candidateId) {
268
+ await optimizer.recordOutcome(candidateId, {
269
+ complexity: 'simple',
270
+ domain: 'general',
271
+ estimatedTokens: 500,
272
+ requiresTools: false,
273
+ toolIntensity: 'none',
274
+ reasoningDepth: 'shallow',
275
+ creativityLevel: 'low',
276
+ accuracyRequirement: 'moderate',
277
+ timeConstraint: 'none',
278
+ }, {
279
+ successRate: 1.0,
280
+ latency: 1000,
281
+ tokenUsage: 500,
282
+ qualityScore: 0.9,
283
+ });
284
+
285
+ const updatedCandidates = optimizer.getCandidates();
286
+ const updated = updatedCandidates.find((c) => c.id === candidateId);
287
+ expect(updated?.evaluationCount).toBeGreaterThan(0);
288
+ }
289
+ });
290
+
291
+ it('resets state', async () => {
292
+ const optimizer = new ParameterOptimizer({
293
+ llm: mockLLM,
294
+ config: {
295
+ enabled: true,
296
+ strategy: { type: 'ucb' },
297
+ maxCandidates: 5,
298
+ evaluationWindow: 5,
299
+ minEvaluationsBeforeEvolution: 2,
300
+ adaptationThreshold: 0.1,
301
+ },
302
+ baseConfig: {
303
+ model: 'gpt-4',
304
+ temperature: 0.7,
305
+ maxTokens: 4096,
306
+ toolStrategy: 'sequential',
307
+ reflectionDepth: 1,
308
+ },
309
+ });
310
+
311
+ await optimizer.optimize('Test');
312
+ expect(optimizer.getCandidates().length).toBeGreaterThan(0);
313
+
314
+ optimizer.reset();
315
+ expect(optimizer.getCandidates()).toHaveLength(0);
316
+ expect(optimizer.getHistory()).toHaveLength(0);
317
+ });
318
+ });
319
+
320
+ describe('Parsing functions', () => {
321
+ it('parses task profile response', () => {
322
+ const response = `
323
+ Analysis:
324
+ {
325
+ "complexity": "complex",
326
+ "domain": "coding",
327
+ "estimatedTokens": 5000,
328
+ "requiresTools": true,
329
+ "toolIntensity": "moderate",
330
+ "reasoningDepth": "deep",
331
+ "creativityLevel": "low",
332
+ "accuracyRequirement": "high",
333
+ "timeConstraint": "moderate"
334
+ }
335
+ `;
336
+
337
+ const parsed = parseTaskProfileResponse(response);
338
+
339
+ expect(parsed).not.toBeNull();
340
+ expect(parsed?.complexity).toBe('complex');
341
+ expect(parsed?.domain).toBe('coding');
342
+ });
343
+
344
+ it('parses candidate generation response', () => {
345
+ const response = `
346
+ [
347
+ {
348
+ "id": "opt_1",
349
+ "config": { "temperature": 0.5, "maxTokens": 8000 },
350
+ "reasoning": "Lower temperature for precision",
351
+ "expectedImprovement": 0.7,
352
+ "risk": "low"
353
+ }
354
+ ]
355
+ `;
356
+
357
+ const parsed = parseCandidateGenerationResponse(response);
358
+
359
+ expect(parsed).toHaveLength(1);
360
+ expect(parsed[0].id).toBe('opt_1');
361
+ expect(parsed[0].config.temperature).toBe(0.5);
362
+ });
363
+
364
+ it('handles malformed responses', () => {
365
+ expect(parseTaskProfileResponse('not json')).toBeNull();
366
+ expect(parseCandidateGenerationResponse('not an array')).toHaveLength(0);
367
+ });
368
+ });
@@ -0,0 +1,266 @@
1
+ import { describe, it, expect, beforeEach } from 'vitest';
2
+ import {
3
+ ModificationValidator,
4
+ RollbackManager,
5
+ InMemoryCheckpointStore,
6
+ DEFAULT_SAFETY_CONSTRAINTS,
7
+ DEFAULT_CAPABILITY_CONSTRAINTS,
8
+ DEFAULT_RESOURCE_CONSTRAINTS,
9
+ mergeSafetyConstraints,
10
+ } from '../constraints';
11
+
12
+ describe('ModificationValidator', () => {
13
+ let validator: ModificationValidator;
14
+
15
+ beforeEach(() => {
16
+ validator = new ModificationValidator({
17
+ safetyConstraints: DEFAULT_SAFETY_CONSTRAINTS,
18
+ capabilityConstraints: DEFAULT_CAPABILITY_CONSTRAINTS,
19
+ resourceConstraints: DEFAULT_RESOURCE_CONSTRAINTS,
20
+ });
21
+ });
22
+
23
+ it('validates safe modifications', async () => {
24
+ const result = await validator.validate({
25
+ type: 'config_change',
26
+ target: 'temperature',
27
+ changes: { temperature: 0.7 },
28
+ reason: 'Adjust for creativity',
29
+ context: {
30
+ sandboxExecution: true,
31
+ linesOfCode: 50,
32
+ modificationDepth: 1,
33
+ },
34
+ });
35
+
36
+ expect(result.isValid).toBe(true);
37
+ expect(result.violations).toHaveLength(0);
38
+ });
39
+
40
+ it('rejects modifications violating safety constraints', async () => {
41
+ const result = await validator.validate({
42
+ type: 'tool_creation',
43
+ target: 'new_tool',
44
+ changes: { code: 'eval("malicious")' },
45
+ reason: 'Create tool',
46
+ context: {
47
+ sandboxExecution: false,
48
+ linesOfCode: 50,
49
+ modificationDepth: 1,
50
+ },
51
+ });
52
+
53
+ expect(result.isValid).toBe(false);
54
+ expect(result.violations.length).toBeGreaterThan(0);
55
+ });
56
+
57
+ it('handles complex constraint expressions', async () => {
58
+ const customValidator = new ModificationValidator({
59
+ safetyConstraints: [
60
+ {
61
+ id: 'complex_rule',
62
+ rule: 'temperature <= 1.5 AND maxTokens <= 8000',
63
+ severity: 'error',
64
+ description: 'Complex constraint',
65
+ },
66
+ ],
67
+ capabilityConstraints: [],
68
+ resourceConstraints: [],
69
+ });
70
+
71
+ const validResult = await customValidator.validate({
72
+ type: 'config_change',
73
+ target: 'config',
74
+ changes: {},
75
+ reason: 'Test',
76
+ context: { temperature: 1.0, maxTokens: 4000 },
77
+ });
78
+
79
+ expect(validResult.isValid).toBe(true);
80
+
81
+ const invalidResult = await customValidator.validate({
82
+ type: 'config_change',
83
+ target: 'config',
84
+ changes: {},
85
+ reason: 'Test',
86
+ context: { temperature: 2.0, maxTokens: 4000 },
87
+ });
88
+
89
+ expect(invalidResult.isValid).toBe(false);
90
+ });
91
+
92
+ it('adds custom constraints', async () => {
93
+ validator.addSafetyConstraint({
94
+ id: 'custom_test',
95
+ rule: 'customValue == true',
96
+ severity: 'error',
97
+ description: 'Custom test constraint',
98
+ });
99
+
100
+ const result = await validator.validate({
101
+ type: 'config_change',
102
+ target: 'test',
103
+ changes: {},
104
+ reason: 'Test',
105
+ context: { customValue: false },
106
+ });
107
+
108
+ expect(result.isValid).toBe(false);
109
+ });
110
+ });
111
+
112
+ describe('RollbackManager', () => {
113
+ let manager: RollbackManager;
114
+
115
+ beforeEach(() => {
116
+ manager = new RollbackManager();
117
+ });
118
+
119
+ it('creates checkpoints', async () => {
120
+ const checkpoint = await manager.createCheckpoint(
121
+ 'agent-1',
122
+ { model: 'gpt-4', temperature: 0.7 },
123
+ [{ name: 'tool1', description: 'Test', parameters: {}, execute: async () => null }],
124
+ []
125
+ );
126
+
127
+ expect(checkpoint.id).toBeDefined();
128
+ expect(checkpoint.agentId).toBe('agent-1');
129
+ expect(checkpoint.agentConfig.model).toBe('gpt-4');
130
+ });
131
+
132
+ it('rolls back to checkpoint', async () => {
133
+ const checkpoint = await manager.createCheckpoint(
134
+ 'agent-1',
135
+ { model: 'gpt-4', temperature: 0.7 },
136
+ [{ name: 'original_tool', description: 'Original', parameters: {}, execute: async () => null }],
137
+ []
138
+ );
139
+
140
+ const restored = await manager.rollbackTo(checkpoint.id);
141
+
142
+ expect(restored).not.toBeNull();
143
+ expect(restored?.agentConfig.model).toBe('gpt-4');
144
+ expect(restored?.tools).toHaveLength(1);
145
+ expect(restored?.tools[0].name).toBe('original_tool');
146
+ });
147
+
148
+ it('compares checkpoints', async () => {
149
+ const cp1 = await manager.createCheckpoint(
150
+ 'agent-1',
151
+ { model: 'gpt-4', temperature: 0.7 },
152
+ [{ name: 'tool1', description: 'Test', parameters: {}, execute: async () => null }],
153
+ []
154
+ );
155
+
156
+ const cp2 = await manager.createCheckpoint(
157
+ 'agent-1',
158
+ { model: 'gpt-4o', temperature: 0.9 },
159
+ [
160
+ { name: 'tool1', description: 'Test', parameters: {}, execute: async () => null },
161
+ { name: 'tool2', description: 'New', parameters: {}, execute: async () => null },
162
+ ],
163
+ []
164
+ );
165
+
166
+ const diff = manager.compareCheckpoints(cp1.id, cp2.id);
167
+
168
+ expect(diff).not.toBeNull();
169
+ expect(diff?.configChanges).toContain('model');
170
+ expect(diff?.configChanges).toContain('temperature');
171
+ expect(diff?.toolsAdded).toContain('tool2');
172
+ });
173
+
174
+ it('maintains checkpoint limit', async () => {
175
+ const customManager = new RollbackManager({ maxCheckpoints: 3 });
176
+
177
+ for (let i = 0; i < 5; i++) {
178
+ await customManager.createCheckpoint(
179
+ 'agent-1',
180
+ { iteration: i },
181
+ [],
182
+ []
183
+ );
184
+ }
185
+
186
+ const checkpoints = customManager.listCheckpoints('agent-1');
187
+ expect(checkpoints.length).toBeLessThanOrEqual(3);
188
+ });
189
+ });
190
+
191
+ describe('InMemoryCheckpointStore', () => {
192
+ let store: InMemoryCheckpointStore;
193
+
194
+ beforeEach(() => {
195
+ store = new InMemoryCheckpointStore();
196
+ });
197
+
198
+ it('saves and retrieves checkpoints', async () => {
199
+ const checkpoint = {
200
+ id: 'cp-1',
201
+ agentId: 'agent-1',
202
+ timestamp: new Date(),
203
+ agentConfig: { model: 'test' },
204
+ tools: [],
205
+ modifications: [],
206
+ };
207
+
208
+ await store.save(checkpoint);
209
+ const retrieved = await store.get('cp-1');
210
+
211
+ expect(retrieved).toEqual(checkpoint);
212
+ });
213
+
214
+ it('lists checkpoints by agent', async () => {
215
+ await store.save({
216
+ id: 'cp-1',
217
+ agentId: 'agent-1',
218
+ timestamp: new Date(),
219
+ agentConfig: {},
220
+ tools: [],
221
+ modifications: [],
222
+ });
223
+
224
+ await store.save({
225
+ id: 'cp-2',
226
+ agentId: 'agent-2',
227
+ timestamp: new Date(),
228
+ agentConfig: {},
229
+ tools: [],
230
+ modifications: [],
231
+ });
232
+
233
+ const agent1Checkpoints = await store.listByAgent('agent-1');
234
+ expect(agent1Checkpoints).toHaveLength(1);
235
+ expect(agent1Checkpoints[0].id).toBe('cp-1');
236
+ });
237
+
238
+ it('deletes checkpoints', async () => {
239
+ await store.save({
240
+ id: 'cp-1',
241
+ agentId: 'agent-1',
242
+ timestamp: new Date(),
243
+ agentConfig: {},
244
+ tools: [],
245
+ modifications: [],
246
+ });
247
+
248
+ await store.delete('cp-1');
249
+ const retrieved = await store.get('cp-1');
250
+
251
+ expect(retrieved).toBeNull();
252
+ });
253
+ });
254
+
255
+ describe('Constraint merging', () => {
256
+ it('merges safety constraints', () => {
257
+ const custom = [
258
+ { id: 'custom', rule: 'x == 1', severity: 'error' as const, description: 'Custom' },
259
+ ];
260
+
261
+ const merged = mergeSafetyConstraints(custom);
262
+
263
+ expect(merged.length).toBeGreaterThan(custom.length);
264
+ expect(merged.find((c) => c.id === 'custom')).toBeDefined();
265
+ });
266
+ });