@cogitator-ai/self-modifying 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/README.md +60 -50
  2. package/dist/architecture-evolution/capability-analyzer.d.ts.map +1 -1
  3. package/dist/architecture-evolution/capability-analyzer.js +105 -19
  4. package/dist/architecture-evolution/capability-analyzer.js.map +1 -1
  5. package/dist/architecture-evolution/evolution-strategy.d.ts.map +1 -1
  6. package/dist/architecture-evolution/evolution-strategy.js +2 -6
  7. package/dist/architecture-evolution/evolution-strategy.js.map +1 -1
  8. package/dist/architecture-evolution/index.d.ts +1 -1
  9. package/dist/architecture-evolution/index.d.ts.map +1 -1
  10. package/dist/architecture-evolution/index.js +1 -1
  11. package/dist/architecture-evolution/index.js.map +1 -1
  12. package/dist/architecture-evolution/parameter-optimizer.d.ts.map +1 -1
  13. package/dist/architecture-evolution/parameter-optimizer.js.map +1 -1
  14. package/dist/architecture-evolution/prompts.d.ts.map +1 -1
  15. package/dist/architecture-evolution/prompts.js.map +1 -1
  16. package/dist/constraints/index.d.ts +1 -1
  17. package/dist/constraints/index.d.ts.map +1 -1
  18. package/dist/constraints/index.js +1 -1
  19. package/dist/constraints/index.js.map +1 -1
  20. package/dist/constraints/modification-validator.d.ts.map +1 -1
  21. package/dist/constraints/modification-validator.js +1 -2
  22. package/dist/constraints/modification-validator.js.map +1 -1
  23. package/dist/constraints/rollback-manager.d.ts.map +1 -1
  24. package/dist/constraints/rollback-manager.js.map +1 -1
  25. package/dist/constraints/safety-constraints.d.ts.map +1 -1
  26. package/dist/constraints/safety-constraints.js +1 -3
  27. package/dist/constraints/safety-constraints.js.map +1 -1
  28. package/dist/index.d.ts +2 -2
  29. package/dist/index.d.ts.map +1 -1
  30. package/dist/index.js +2 -2
  31. package/dist/index.js.map +1 -1
  32. package/dist/meta-reasoning/meta-reasoner.d.ts.map +1 -1
  33. package/dist/meta-reasoning/meta-reasoner.js +5 -10
  34. package/dist/meta-reasoning/meta-reasoner.js.map +1 -1
  35. package/dist/meta-reasoning/observation-collector.d.ts.map +1 -1
  36. package/dist/meta-reasoning/observation-collector.js +1 -3
  37. package/dist/meta-reasoning/observation-collector.js.map +1 -1
  38. package/dist/meta-reasoning/prompts.d.ts.map +1 -1
  39. package/dist/meta-reasoning/prompts.js +4 -2
  40. package/dist/meta-reasoning/prompts.js.map +1 -1
  41. package/dist/meta-reasoning/strategy-selector.d.ts.map +1 -1
  42. package/dist/meta-reasoning/strategy-selector.js +3 -1
  43. package/dist/meta-reasoning/strategy-selector.js.map +1 -1
  44. package/dist/self-modifying-agent.d.ts.map +1 -1
  45. package/dist/self-modifying-agent.js +14 -6
  46. package/dist/self-modifying-agent.js.map +1 -1
  47. package/dist/tool-generation/gap-analyzer.d.ts.map +1 -1
  48. package/dist/tool-generation/gap-analyzer.js +4 -1
  49. package/dist/tool-generation/gap-analyzer.js.map +1 -1
  50. package/dist/tool-generation/generated-tool-store.d.ts.map +1 -1
  51. package/dist/tool-generation/generated-tool-store.js.map +1 -1
  52. package/dist/tool-generation/prompts.d.ts.map +1 -1
  53. package/dist/tool-generation/prompts.js +5 -15
  54. package/dist/tool-generation/prompts.js.map +1 -1
  55. package/dist/tool-generation/tool-generator.d.ts.map +1 -1
  56. package/dist/tool-generation/tool-generator.js.map +1 -1
  57. package/dist/tool-generation/tool-sandbox.d.ts.map +1 -1
  58. package/dist/tool-generation/tool-sandbox.js +1 -3
  59. package/dist/tool-generation/tool-sandbox.js.map +1 -1
  60. package/dist/tool-generation/tool-validator.d.ts.map +1 -1
  61. package/dist/tool-generation/tool-validator.js.map +1 -1
  62. package/package.json +4 -4
  63. package/src/__tests__/architecture-evolution.test.ts +131 -30
  64. package/src/__tests__/constraints.test.ts +61 -46
  65. package/src/__tests__/index.test.ts +4 -17
  66. package/src/__tests__/meta-reasoning.test.ts +246 -155
  67. package/src/__tests__/tool-generation.test.ts +26 -7
  68. package/src/architecture-evolution/capability-analyzer.ts +113 -31
  69. package/src/architecture-evolution/evolution-strategy.ts +20 -13
  70. package/src/architecture-evolution/index.ts +1 -4
  71. package/src/architecture-evolution/parameter-optimizer.ts +42 -23
  72. package/src/architecture-evolution/prompts.ts +14 -15
  73. package/src/constraints/index.ts +1 -4
  74. package/src/constraints/modification-validator.ts +5 -18
  75. package/src/constraints/rollback-manager.ts +1 -3
  76. package/src/constraints/safety-constraints.ts +1 -3
  77. package/src/index.ts +6 -5
  78. package/src/meta-reasoning/meta-reasoner.ts +9 -16
  79. package/src/meta-reasoning/observation-collector.ts +3 -12
  80. package/src/meta-reasoning/prompts.ts +9 -9
  81. package/src/meta-reasoning/strategy-selector.ts +5 -1
  82. package/src/self-modifying-agent.ts +25 -28
  83. package/src/tool-generation/gap-analyzer.ts +18 -14
  84. package/src/tool-generation/generated-tool-store.ts +5 -8
  85. package/src/tool-generation/prompts.ts +5 -15
  86. package/src/tool-generation/tool-generator.ts +15 -11
  87. package/src/tool-generation/tool-sandbox.ts +4 -15
  88. package/src/tool-generation/tool-validator.ts +17 -13
@@ -7,17 +7,23 @@ import {
7
7
  buildMetaAssessmentPrompt,
8
8
  parseMetaAssessmentResponse,
9
9
  } from '../meta-reasoning';
10
- import type { LLMBackend } from '@cogitator-ai/types';
10
+ import type { LLMBackend, MetaObservation } from '@cogitator-ai/types';
11
11
 
12
12
  const mockLLM: LLMBackend = {
13
- complete: vi.fn().mockResolvedValue({
13
+ chat: vi.fn().mockResolvedValue({
14
14
  content: JSON.stringify({
15
- isOnTrack: true,
15
+ onTrack: true,
16
16
  confidence: 0.8,
17
17
  issues: [],
18
- recommendations: ['Continue current approach'],
19
- requiresAdaptation: false,
18
+ opportunities: [],
19
+ reasoning: 'Continue current approach',
20
+ recommendation: {
21
+ action: 'continue',
22
+ confidence: 0.8,
23
+ reasoning: 'Making good progress',
24
+ },
20
25
  }),
26
+ usage: { outputTokens: 100 },
21
27
  }),
22
28
  name: 'mock',
23
29
  supportsTool: () => true,
@@ -27,90 +33,126 @@ const mockLLM: LLMBackend = {
27
33
 
28
34
  describe('ObservationCollector', () => {
29
35
  let collector: ObservationCollector;
36
+ const runId = 'test-run';
30
37
 
31
38
  beforeEach(() => {
32
39
  collector = new ObservationCollector();
40
+ collector.initializeRun(runId);
33
41
  });
34
42
 
35
43
  it('collects observations', () => {
36
- collector.recordAction({
44
+ collector.recordAction(runId, {
37
45
  type: 'tool_call',
38
- name: 'calculator',
39
- success: true,
46
+ toolName: 'calculator',
47
+ timestamp: Date.now(),
40
48
  duration: 100,
41
49
  });
42
50
 
43
- collector.recordAction({
51
+ collector.recordAction(runId, {
44
52
  type: 'tool_call',
45
- name: 'search',
46
- success: false,
53
+ toolName: 'search',
54
+ error: 'Not found',
55
+ timestamp: Date.now(),
47
56
  duration: 200,
48
57
  });
49
58
 
50
- const observation = collector.collect({
51
- currentProgress: 'Some progress',
52
- tokensUsed: 500,
53
- timeElapsed: 5000,
54
- toolCallsCount: 2,
55
- errorCount: 1,
56
- });
59
+ const observation = collector.collect(
60
+ {
61
+ runId,
62
+ iteration: 1,
63
+ goal: 'Test goal',
64
+ currentMode: 'analytical',
65
+ tokensUsed: 500,
66
+ timeElapsed: 5000,
67
+ iterationsRemaining: 10,
68
+ budgetRemaining: 0.9,
69
+ },
70
+ []
71
+ );
57
72
 
58
- expect(observation.actionCount).toBe(2);
59
- expect(observation.failedActions).toBe(1);
60
- expect(observation.metrics.tokensUsed).toBe(500);
73
+ expect(observation.tokensUsed).toBe(500);
74
+ expect(observation.currentMode).toBe('analytical');
61
75
  });
62
76
 
63
77
  it('calculates repetition score', () => {
64
78
  for (let i = 0; i < 5; i++) {
65
- collector.recordAction({
79
+ collector.recordAction(runId, {
66
80
  type: 'tool_call',
67
- name: 'same_tool',
68
- success: true,
81
+ toolName: 'same_tool',
82
+ input: { key: 'value' },
83
+ timestamp: Date.now(),
69
84
  duration: 100,
70
85
  });
71
86
  }
72
87
 
73
- const observation = collector.collect({
74
- currentProgress: 'Progress',
75
- tokensUsed: 100,
76
- timeElapsed: 1000,
77
- toolCallsCount: 5,
78
- errorCount: 0,
79
- });
88
+ const observation = collector.collect(
89
+ {
90
+ runId,
91
+ iteration: 1,
92
+ goal: 'Test',
93
+ currentMode: 'analytical',
94
+ tokensUsed: 100,
95
+ timeElapsed: 1000,
96
+ iterationsRemaining: 5,
97
+ budgetRemaining: 0.8,
98
+ },
99
+ []
100
+ );
80
101
 
81
102
  expect(observation.repetitionScore).toBeGreaterThan(0.5);
82
103
  });
83
104
 
84
105
  it('tracks tool success rate', () => {
85
- collector.recordAction({ type: 'tool_call', name: 'a', success: true, duration: 100 });
86
- collector.recordAction({ type: 'tool_call', name: 'b', success: true, duration: 100 });
87
- collector.recordAction({ type: 'tool_call', name: 'c', success: false, duration: 100 });
88
- collector.recordAction({ type: 'tool_call', name: 'd', success: true, duration: 100 });
89
-
90
- const observation = collector.collect({
91
- currentProgress: '',
92
- tokensUsed: 0,
93
- timeElapsed: 0,
94
- toolCallsCount: 4,
95
- errorCount: 1,
106
+ collector.recordAction(runId, {
107
+ type: 'tool_call',
108
+ toolName: 'a',
109
+ timestamp: Date.now(),
110
+ });
111
+ collector.recordAction(runId, {
112
+ type: 'tool_call',
113
+ toolName: 'b',
114
+ timestamp: Date.now(),
115
+ });
116
+ collector.recordAction(runId, {
117
+ type: 'tool_call',
118
+ toolName: 'c',
119
+ error: 'Failed',
120
+ timestamp: Date.now(),
121
+ });
122
+ collector.recordAction(runId, {
123
+ type: 'tool_call',
124
+ toolName: 'd',
125
+ timestamp: Date.now(),
96
126
  });
97
127
 
128
+ const observation = collector.collect(
129
+ {
130
+ runId,
131
+ iteration: 1,
132
+ goal: 'Test',
133
+ currentMode: 'analytical',
134
+ tokensUsed: 0,
135
+ timeElapsed: 0,
136
+ iterationsRemaining: 5,
137
+ budgetRemaining: 0.8,
138
+ },
139
+ []
140
+ );
141
+
98
142
  expect(observation.toolSuccessRate).toBe(0.75);
99
143
  });
100
144
 
101
- it('resets state', () => {
102
- collector.recordAction({ type: 'tool_call', name: 'test', success: true, duration: 100 });
103
- collector.reset();
104
-
105
- const observation = collector.collect({
106
- currentProgress: '',
107
- tokensUsed: 0,
108
- timeElapsed: 0,
109
- toolCallsCount: 0,
110
- errorCount: 0,
145
+ it('cleans up run state', () => {
146
+ collector.recordAction(runId, {
147
+ type: 'tool_call',
148
+ toolName: 'test',
149
+ timestamp: Date.now(),
111
150
  });
112
151
 
113
- expect(observation.actionCount).toBe(0);
152
+ collector.cleanupRun(runId);
153
+
154
+ const observations = collector.getObservations(runId);
155
+ expect(observations).toHaveLength(0);
114
156
  });
115
157
  });
116
158
 
@@ -118,11 +160,14 @@ describe('StrategySelector', () => {
118
160
  let selector: StrategySelector;
119
161
 
120
162
  beforeEach(() => {
121
- selector = new StrategySelector();
163
+ selector = new StrategySelector({
164
+ allowedModes: ['analytical', 'creative', 'systematic', 'intuitive'],
165
+ modeProfiles: DEFAULT_MODE_PROFILES,
166
+ });
122
167
  });
123
168
 
124
169
  it('selects mode based on task profile', () => {
125
- const result = selector.selectMode({
170
+ const mode = selector.selectForTask({
126
171
  complexity: 'complex',
127
172
  domain: 'coding',
128
173
  estimatedTokens: 5000,
@@ -132,33 +177,31 @@ describe('StrategySelector', () => {
132
177
  creativityLevel: 'low',
133
178
  accuracyRequirement: 'high',
134
179
  timeConstraint: 'none',
180
+ requiresReasoning: true,
135
181
  });
136
182
 
137
- expect(result.mode).toBeDefined();
138
- expect(result.confidence).toBeGreaterThan(0);
183
+ expect(mode).toBeDefined();
139
184
  });
140
185
 
141
- it('suggests mode switch', () => {
142
- selector.selectMode({
143
- complexity: 'simple',
144
- domain: 'general',
145
- estimatedTokens: 500,
146
- requiresTools: false,
147
- toolIntensity: 'none',
148
- reasoningDepth: 'shallow',
149
- creativityLevel: 'moderate',
150
- accuracyRequirement: 'moderate',
151
- timeConstraint: 'none',
152
- });
186
+ it('suggests mode switch on stagnation', () => {
187
+ const observation: MetaObservation = {
188
+ runId: 'test',
189
+ iteration: 5,
190
+ timestamp: Date.now(),
191
+ currentMode: 'analytical',
192
+ currentConfidence: 0.5,
193
+ progressScore: 0.3,
194
+ progressDelta: 0.01,
195
+ stagnationCount: 4,
196
+ confidenceHistory: [0.6, 0.5, 0.4],
197
+ tokensUsed: 2000,
198
+ timeElapsed: 10000,
199
+ toolSuccessRate: 0.6,
200
+ repetitionScore: 0.6,
201
+ confidenceTrend: 'falling',
202
+ };
153
203
 
154
- const suggestion = selector.suggestModeSwitch({
155
- currentIssues: ['Low confidence in outputs'],
156
- performanceMetrics: {
157
- tokensUsed: 2000,
158
- timeElapsed: 10000,
159
- qualityScore: 0.4,
160
- },
161
- });
204
+ const suggestion = selector.suggestSwitch(observation);
162
205
 
163
206
  expect(suggestion).toBeDefined();
164
207
  });
@@ -172,168 +215,216 @@ describe('StrategySelector', () => {
172
215
 
173
216
  describe('MetaReasoner', () => {
174
217
  let reasoner: MetaReasoner;
218
+ const runId = 'test-run';
175
219
 
176
220
  beforeEach(() => {
177
221
  vi.clearAllMocks();
178
222
  reasoner = new MetaReasoner({
179
223
  llm: mockLLM,
224
+ model: 'gpt-4o',
180
225
  config: {
181
226
  enabled: true,
182
227
  maxAssessmentsPerRun: 5,
183
228
  maxAdaptationsPerRun: 3,
184
- assessmentCooldown: 1000,
185
- triggers: ['on_failure', 'periodic'],
229
+ assessmentCooldown: 0,
230
+ metaAssessmentCooldown: 0,
231
+ adaptationCooldown: 0,
232
+ triggers: ['iteration_complete', 'confidence_drop', 'progress_stall'],
186
233
  tokenBudget: 2000,
187
234
  },
188
235
  });
189
236
  });
190
237
 
191
238
  it('initializes run with mode config', () => {
192
- const config = reasoner.initializeRun('run-1');
239
+ const config = reasoner.initializeRun(runId);
193
240
 
241
+ expect(config).toBeDefined();
194
242
  expect(config.mode).toBeDefined();
195
- expect(config.parameters).toBeDefined();
243
+ expect(config.temperature).toBeDefined();
196
244
  });
197
245
 
198
246
  it('determines trigger conditions', () => {
199
- reasoner.initializeRun('run-1');
247
+ reasoner.initializeRun(runId);
200
248
 
201
- const shouldTrigger = reasoner.shouldTrigger('run-1', 'on_failure', {
202
- currentProgress: 'Error occurred',
203
- tokensUsed: 500,
204
- timeElapsed: 5000,
205
- toolCallsCount: 2,
206
- errorCount: 3,
249
+ const shouldTrigger = reasoner.shouldTrigger(runId, 'progress_stall', {
250
+ iteration: 5,
251
+ confidence: 0.5,
252
+ progressDelta: 0.01,
253
+ stagnationCount: 3,
207
254
  });
208
255
 
209
256
  expect(shouldTrigger).toBe(true);
210
257
  });
211
258
 
212
259
  it('collects observations', () => {
260
+ reasoner.initializeRun(runId);
261
+
213
262
  const observation = reasoner.observe(
214
263
  {
215
- currentProgress: 'Making progress',
264
+ runId,
265
+ iteration: 1,
266
+ goal: 'Test goal',
267
+ currentMode: 'analytical',
216
268
  tokensUsed: 1000,
217
269
  timeElapsed: 10000,
218
- toolCallsCount: 5,
219
- errorCount: 0,
270
+ iterationsRemaining: 5,
271
+ budgetRemaining: 0.8,
220
272
  },
221
- {
222
- confidence: 0.8,
223
- relevance: 0.9,
224
- coherence: 0.85,
225
- }
273
+ [{ type: 'observation', content: 'Test insight', confidence: 0.8 }]
226
274
  );
227
275
 
228
- expect(observation.insights.confidence).toBe(0.8);
229
- expect(observation.metrics.tokensUsed).toBe(1000);
276
+ expect(observation.tokensUsed).toBe(1000);
230
277
  });
231
278
 
232
279
  it('performs assessment', async () => {
233
- reasoner.initializeRun('run-1');
234
-
235
- const observation = reasoner.observe(
236
- {
237
- currentProgress: 'Some output',
238
- tokensUsed: 500,
239
- timeElapsed: 3000,
240
- toolCallsCount: 2,
241
- errorCount: 0,
242
- },
243
- { confidence: 0.7, relevance: 0.8, coherence: 0.9 }
244
- );
280
+ reasoner.initializeRun(runId);
281
+
282
+ const observation: MetaObservation = {
283
+ runId,
284
+ iteration: 1,
285
+ timestamp: Date.now(),
286
+ goal: 'Test goal',
287
+ currentMode: 'analytical',
288
+ currentConfidence: 0.7,
289
+ progressScore: 0.5,
290
+ progressDelta: 0.1,
291
+ stagnationCount: 0,
292
+ confidenceHistory: [0.6, 0.7],
293
+ tokensUsed: 500,
294
+ timeElapsed: 3000,
295
+ iterationsRemaining: 5,
296
+ budgetRemaining: 0.9,
297
+ toolSuccessRate: 0.8,
298
+ repetitionScore: 0.2,
299
+ confidenceTrend: 'stable',
300
+ };
245
301
 
246
302
  const assessment = await reasoner.assess(observation);
247
303
 
248
- expect(assessment.isOnTrack).toBeDefined();
304
+ expect(assessment.onTrack).toBeDefined();
249
305
  expect(assessment.confidence).toBeDefined();
250
306
  });
251
307
 
252
308
  it('adapts strategy when needed', async () => {
253
- reasoner.initializeRun('run-1');
309
+ reasoner.initializeRun(runId);
254
310
 
255
311
  const assessment = {
256
- isOnTrack: false,
257
- confidence: 0.4,
258
- issues: ['Low quality outputs'],
259
- recommendations: ['Switch to more analytical mode'],
260
- requiresAdaptation: true,
261
- suggestedMode: 'analytical' as const,
312
+ id: 'assess-1',
313
+ observationId: 'obs-1',
314
+ timestamp: Date.now(),
315
+ onTrack: false,
316
+ confidence: 0.7,
317
+ issues: [],
318
+ opportunities: [],
319
+ reasoning: 'Need to switch mode',
320
+ recommendation: {
321
+ action: 'switch_mode' as const,
322
+ newMode: 'creative' as const,
323
+ confidence: 0.8,
324
+ reasoning: 'Creative mode better for current task',
325
+ },
326
+ assessmentDuration: 100,
327
+ assessmentCost: 0.001,
262
328
  };
263
329
 
264
- const adaptation = await reasoner.adapt('run-1', assessment);
330
+ const adaptation = await reasoner.adapt(runId, assessment);
265
331
 
266
332
  expect(adaptation).not.toBeNull();
267
333
  if (adaptation) {
268
- expect(adaptation.newMode).toBe('analytical');
334
+ expect(adaptation.type).toBe('mode_switch');
269
335
  }
270
336
  });
271
337
 
272
- it('supports rollback', () => {
273
- reasoner.initializeRun('run-1');
338
+ it('supports rollback', async () => {
339
+ reasoner.initializeRun(runId);
274
340
 
275
- reasoner.adapt('run-1', {
276
- isOnTrack: false,
277
- confidence: 0.3,
341
+ const assessment = {
342
+ id: 'assess-1',
343
+ observationId: 'obs-1',
344
+ timestamp: Date.now(),
345
+ onTrack: false,
346
+ confidence: 0.7,
278
347
  issues: [],
279
- recommendations: [],
280
- requiresAdaptation: true,
281
- suggestedMode: 'creative',
282
- });
348
+ opportunities: [],
349
+ reasoning: 'Switch needed',
350
+ recommendation: {
351
+ action: 'switch_mode' as const,
352
+ newMode: 'creative' as const,
353
+ confidence: 0.8,
354
+ reasoning: 'Try creative mode',
355
+ },
356
+ assessmentDuration: 100,
357
+ assessmentCost: 0.001,
358
+ };
283
359
 
284
- const rollback = reasoner.rollback('run-1');
360
+ await reasoner.adapt(runId, assessment);
361
+ const rollback = reasoner.rollback(runId);
285
362
 
286
363
  expect(rollback).not.toBeNull();
287
364
  if (rollback) {
288
- expect(rollback.isRollback).toBe(true);
365
+ expect(rollback.type).toBe('rollback');
289
366
  }
290
367
  });
291
368
  });
292
369
 
293
370
  describe('Meta-reasoning prompts', () => {
294
371
  it('builds assessment prompt', () => {
295
- const prompt = buildMetaAssessmentPrompt({
296
- id: 'obs-1',
297
- timestamp: new Date(),
298
- metrics: {
299
- tokensUsed: 1000,
300
- timeElapsed: 5000,
301
- progressPercentage: 50,
302
- },
303
- insights: {
304
- confidence: 0.7,
305
- relevance: 0.8,
306
- coherence: 0.9,
307
- },
308
- actionCount: 5,
309
- failedActions: 1,
310
- repetitionScore: 0.2,
372
+ const observation: MetaObservation = {
373
+ runId: 'test',
374
+ iteration: 3,
375
+ timestamp: Date.now(),
376
+ goal: 'Complete the task',
377
+ currentMode: 'analytical',
378
+ currentConfidence: 0.7,
379
+ progressScore: 0.5,
380
+ progressDelta: 0.1,
381
+ stagnationCount: 0,
382
+ confidenceHistory: [0.6, 0.65, 0.7],
383
+ tokensUsed: 1000,
384
+ timeElapsed: 5000,
385
+ iterationsRemaining: 7,
386
+ budgetRemaining: 0.8,
311
387
  toolSuccessRate: 0.8,
388
+ repetitionScore: 0.2,
312
389
  confidenceTrend: 'stable',
390
+ recentActions: [
391
+ { type: 'tool_call', toolName: 'search' },
392
+ { type: 'tool_call', toolName: 'calculator' },
393
+ ],
394
+ };
395
+
396
+ const prompt = buildMetaAssessmentPrompt(observation, {
397
+ allowedModes: ['analytical', 'creative'],
398
+ currentModeConfig: { mode: 'analytical', temperature: 0.3, depth: 3 },
313
399
  });
314
400
 
315
- expect(prompt).toContain('tokensUsed');
316
- expect(prompt).toContain('confidence');
401
+ expect(prompt).toContain('1000');
402
+ expect(prompt).toContain('analytical');
317
403
  });
318
404
 
319
405
  it('parses assessment response', () => {
320
406
  const response = `
321
407
  Here is my assessment:
322
408
  {
323
- "isOnTrack": true,
409
+ "onTrack": true,
324
410
  "confidence": 0.85,
325
- "issues": ["Minor formatting issues"],
326
- "recommendations": ["Continue with current approach"],
327
- "requiresAdaptation": false
411
+ "issues": [{"type": "minor", "severity": "low", "description": "Minor formatting issues"}],
412
+ "opportunities": [],
413
+ "reasoning": "Good progress",
414
+ "recommendation": {
415
+ "action": "continue",
416
+ "confidence": 0.9,
417
+ "reasoning": "Continue with current approach"
418
+ }
328
419
  }
329
420
  `;
330
421
 
331
422
  const parsed = parseMetaAssessmentResponse(response);
332
423
 
333
424
  expect(parsed).not.toBeNull();
334
- expect(parsed?.isOnTrack).toBe(true);
425
+ expect(parsed?.onTrack).toBe(true);
335
426
  expect(parsed?.confidence).toBe(0.85);
336
- expect(parsed?.issues).toContain('Minor formatting issues');
427
+ expect(parsed?.issues).toHaveLength(1);
337
428
  });
338
429
 
339
430
  it('handles malformed response', () => {
@@ -238,10 +238,14 @@ describe('GapAnalyzer', () => {
238
238
 
239
239
  const analyzer = new GapAnalyzer({ llm: mockLLM, config: mockToolConfig });
240
240
 
241
- const result = await analyzer.analyze(
242
- 'Parse and analyze the sales.csv file',
243
- [{ name: 'calculator', description: 'Perform calculations', parameters: {}, execute: async () => null }]
244
- );
241
+ const result = await analyzer.analyze('Parse and analyze the sales.csv file', [
242
+ {
243
+ name: 'calculator',
244
+ description: 'Perform calculations',
245
+ parameters: {},
246
+ execute: async () => null,
247
+ },
248
+ ]);
245
249
 
246
250
  expect(result.gaps.length).toBe(1);
247
251
  expect(result.gaps[0].suggestedToolName).toBe('csv_parser');
@@ -366,9 +370,24 @@ describe('InMemoryGeneratedToolStore', () => {
366
370
  status: 'active',
367
371
  });
368
372
 
369
- await store.recordUsage({ toolId: 'tool-1', timestamp: new Date(), success: true, executionTime: 100 });
370
- await store.recordUsage({ toolId: 'tool-1', timestamp: new Date(), success: true, executionTime: 150 });
371
- await store.recordUsage({ toolId: 'tool-1', timestamp: new Date(), success: false, executionTime: 200 });
373
+ await store.recordUsage({
374
+ toolId: 'tool-1',
375
+ timestamp: new Date(),
376
+ success: true,
377
+ executionTime: 100,
378
+ });
379
+ await store.recordUsage({
380
+ toolId: 'tool-1',
381
+ timestamp: new Date(),
382
+ success: true,
383
+ executionTime: 150,
384
+ });
385
+ await store.recordUsage({
386
+ toolId: 'tool-1',
387
+ timestamp: new Date(),
388
+ success: false,
389
+ executionTime: 200,
390
+ });
372
391
 
373
392
  const metrics = await store.getMetrics('tool-1');
374
393