@cogitator-ai/self-modifying 0.1.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -50
- package/dist/architecture-evolution/capability-analyzer.d.ts.map +1 -1
- package/dist/architecture-evolution/capability-analyzer.js +105 -19
- package/dist/architecture-evolution/capability-analyzer.js.map +1 -1
- package/dist/architecture-evolution/evolution-strategy.d.ts.map +1 -1
- package/dist/architecture-evolution/evolution-strategy.js +2 -6
- package/dist/architecture-evolution/evolution-strategy.js.map +1 -1
- package/dist/architecture-evolution/index.d.ts +1 -1
- package/dist/architecture-evolution/index.d.ts.map +1 -1
- package/dist/architecture-evolution/index.js +1 -1
- package/dist/architecture-evolution/index.js.map +1 -1
- package/dist/architecture-evolution/parameter-optimizer.d.ts.map +1 -1
- package/dist/architecture-evolution/parameter-optimizer.js +1 -1
- package/dist/architecture-evolution/parameter-optimizer.js.map +1 -1
- package/dist/architecture-evolution/prompts.d.ts.map +1 -1
- package/dist/architecture-evolution/prompts.js +3 -3
- package/dist/architecture-evolution/prompts.js.map +1 -1
- package/dist/constraints/index.d.ts +1 -1
- package/dist/constraints/index.d.ts.map +1 -1
- package/dist/constraints/index.js +1 -1
- package/dist/constraints/index.js.map +1 -1
- package/dist/constraints/modification-validator.d.ts.map +1 -1
- package/dist/constraints/modification-validator.js +6 -7
- package/dist/constraints/modification-validator.js.map +1 -1
- package/dist/constraints/rollback-manager.d.ts.map +1 -1
- package/dist/constraints/rollback-manager.js.map +1 -1
- package/dist/constraints/safety-constraints.d.ts.map +1 -1
- package/dist/constraints/safety-constraints.js +1 -3
- package/dist/constraints/safety-constraints.js.map +1 -1
- package/dist/events/event-emitter.js +1 -1
- package/dist/events/event-emitter.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/meta-reasoning/meta-reasoner.d.ts.map +1 -1
- package/dist/meta-reasoning/meta-reasoner.js +6 -11
- package/dist/meta-reasoning/meta-reasoner.js.map +1 -1
- package/dist/meta-reasoning/observation-collector.d.ts.map +1 -1
- package/dist/meta-reasoning/observation-collector.js +1 -3
- package/dist/meta-reasoning/observation-collector.js.map +1 -1
- package/dist/meta-reasoning/prompts.d.ts.map +1 -1
- package/dist/meta-reasoning/prompts.js +5 -3
- package/dist/meta-reasoning/prompts.js.map +1 -1
- package/dist/meta-reasoning/strategy-selector.d.ts.map +1 -1
- package/dist/meta-reasoning/strategy-selector.js +3 -1
- package/dist/meta-reasoning/strategy-selector.js.map +1 -1
- package/dist/self-modifying-agent.d.ts.map +1 -1
- package/dist/self-modifying-agent.js +27 -23
- package/dist/self-modifying-agent.js.map +1 -1
- package/dist/tool-generation/gap-analyzer.d.ts.map +1 -1
- package/dist/tool-generation/gap-analyzer.js +5 -2
- package/dist/tool-generation/gap-analyzer.js.map +1 -1
- package/dist/tool-generation/generated-tool-store.d.ts.map +1 -1
- package/dist/tool-generation/generated-tool-store.js.map +1 -1
- package/dist/tool-generation/prompts.d.ts.map +1 -1
- package/dist/tool-generation/prompts.js +8 -18
- package/dist/tool-generation/prompts.js.map +1 -1
- package/dist/tool-generation/tool-generator.d.ts +1 -1
- package/dist/tool-generation/tool-generator.d.ts.map +1 -1
- package/dist/tool-generation/tool-generator.js +3 -2
- package/dist/tool-generation/tool-generator.js.map +1 -1
- package/dist/tool-generation/tool-sandbox.d.ts.map +1 -1
- package/dist/tool-generation/tool-sandbox.js +8 -9
- package/dist/tool-generation/tool-sandbox.js.map +1 -1
- package/dist/tool-generation/tool-validator.d.ts.map +1 -1
- package/dist/tool-generation/tool-validator.js +5 -5
- package/dist/tool-generation/tool-validator.js.map +1 -1
- package/package.json +4 -4
- package/src/__tests__/architecture-evolution.test.ts +131 -30
- package/src/__tests__/constraints.test.ts +61 -46
- package/src/__tests__/index.test.ts +4 -17
- package/src/__tests__/meta-reasoning.test.ts +246 -155
- package/src/__tests__/tool-generation.test.ts +26 -7
- package/src/architecture-evolution/capability-analyzer.ts +113 -31
- package/src/architecture-evolution/evolution-strategy.ts +20 -13
- package/src/architecture-evolution/index.ts +1 -4
- package/src/architecture-evolution/parameter-optimizer.ts +44 -25
- package/src/architecture-evolution/prompts.ts +17 -18
- package/src/constraints/index.ts +1 -4
- package/src/constraints/modification-validator.ts +10 -23
- package/src/constraints/rollback-manager.ts +1 -3
- package/src/constraints/safety-constraints.ts +1 -3
- package/src/events/event-emitter.ts +1 -1
- package/src/index.ts +6 -5
- package/src/meta-reasoning/meta-reasoner.ts +10 -17
- package/src/meta-reasoning/observation-collector.ts +3 -12
- package/src/meta-reasoning/prompts.ts +10 -10
- package/src/meta-reasoning/strategy-selector.ts +5 -1
- package/src/self-modifying-agent.ts +38 -45
- package/src/tool-generation/gap-analyzer.ts +19 -15
- package/src/tool-generation/generated-tool-store.ts +5 -8
- package/src/tool-generation/prompts.ts +8 -18
- package/src/tool-generation/tool-generator.ts +18 -14
- package/src/tool-generation/tool-sandbox.ts +12 -23
- package/src/tool-generation/tool-validator.ts +22 -18
|
@@ -7,17 +7,23 @@ import {
|
|
|
7
7
|
buildMetaAssessmentPrompt,
|
|
8
8
|
parseMetaAssessmentResponse,
|
|
9
9
|
} from '../meta-reasoning';
|
|
10
|
-
import type { LLMBackend } from '@cogitator-ai/types';
|
|
10
|
+
import type { LLMBackend, MetaObservation } from '@cogitator-ai/types';
|
|
11
11
|
|
|
12
12
|
const mockLLM: LLMBackend = {
|
|
13
|
-
|
|
13
|
+
chat: vi.fn().mockResolvedValue({
|
|
14
14
|
content: JSON.stringify({
|
|
15
|
-
|
|
15
|
+
onTrack: true,
|
|
16
16
|
confidence: 0.8,
|
|
17
17
|
issues: [],
|
|
18
|
-
|
|
19
|
-
|
|
18
|
+
opportunities: [],
|
|
19
|
+
reasoning: 'Continue current approach',
|
|
20
|
+
recommendation: {
|
|
21
|
+
action: 'continue',
|
|
22
|
+
confidence: 0.8,
|
|
23
|
+
reasoning: 'Making good progress',
|
|
24
|
+
},
|
|
20
25
|
}),
|
|
26
|
+
usage: { outputTokens: 100 },
|
|
21
27
|
}),
|
|
22
28
|
name: 'mock',
|
|
23
29
|
supportsTool: () => true,
|
|
@@ -27,90 +33,126 @@ const mockLLM: LLMBackend = {
|
|
|
27
33
|
|
|
28
34
|
describe('ObservationCollector', () => {
|
|
29
35
|
let collector: ObservationCollector;
|
|
36
|
+
const runId = 'test-run';
|
|
30
37
|
|
|
31
38
|
beforeEach(() => {
|
|
32
39
|
collector = new ObservationCollector();
|
|
40
|
+
collector.initializeRun(runId);
|
|
33
41
|
});
|
|
34
42
|
|
|
35
43
|
it('collects observations', () => {
|
|
36
|
-
collector.recordAction({
|
|
44
|
+
collector.recordAction(runId, {
|
|
37
45
|
type: 'tool_call',
|
|
38
|
-
|
|
39
|
-
|
|
46
|
+
toolName: 'calculator',
|
|
47
|
+
timestamp: Date.now(),
|
|
40
48
|
duration: 100,
|
|
41
49
|
});
|
|
42
50
|
|
|
43
|
-
collector.recordAction({
|
|
51
|
+
collector.recordAction(runId, {
|
|
44
52
|
type: 'tool_call',
|
|
45
|
-
|
|
46
|
-
|
|
53
|
+
toolName: 'search',
|
|
54
|
+
error: 'Not found',
|
|
55
|
+
timestamp: Date.now(),
|
|
47
56
|
duration: 200,
|
|
48
57
|
});
|
|
49
58
|
|
|
50
|
-
const observation = collector.collect(
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
59
|
+
const observation = collector.collect(
|
|
60
|
+
{
|
|
61
|
+
runId,
|
|
62
|
+
iteration: 1,
|
|
63
|
+
goal: 'Test goal',
|
|
64
|
+
currentMode: 'analytical',
|
|
65
|
+
tokensUsed: 500,
|
|
66
|
+
timeElapsed: 5000,
|
|
67
|
+
iterationsRemaining: 10,
|
|
68
|
+
budgetRemaining: 0.9,
|
|
69
|
+
},
|
|
70
|
+
[]
|
|
71
|
+
);
|
|
57
72
|
|
|
58
|
-
expect(observation.
|
|
59
|
-
expect(observation.
|
|
60
|
-
expect(observation.metrics.tokensUsed).toBe(500);
|
|
73
|
+
expect(observation.tokensUsed).toBe(500);
|
|
74
|
+
expect(observation.currentMode).toBe('analytical');
|
|
61
75
|
});
|
|
62
76
|
|
|
63
77
|
it('calculates repetition score', () => {
|
|
64
78
|
for (let i = 0; i < 5; i++) {
|
|
65
|
-
collector.recordAction({
|
|
79
|
+
collector.recordAction(runId, {
|
|
66
80
|
type: 'tool_call',
|
|
67
|
-
|
|
68
|
-
|
|
81
|
+
toolName: 'same_tool',
|
|
82
|
+
input: { key: 'value' },
|
|
83
|
+
timestamp: Date.now(),
|
|
69
84
|
duration: 100,
|
|
70
85
|
});
|
|
71
86
|
}
|
|
72
87
|
|
|
73
|
-
const observation = collector.collect(
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
88
|
+
const observation = collector.collect(
|
|
89
|
+
{
|
|
90
|
+
runId,
|
|
91
|
+
iteration: 1,
|
|
92
|
+
goal: 'Test',
|
|
93
|
+
currentMode: 'analytical',
|
|
94
|
+
tokensUsed: 100,
|
|
95
|
+
timeElapsed: 1000,
|
|
96
|
+
iterationsRemaining: 5,
|
|
97
|
+
budgetRemaining: 0.8,
|
|
98
|
+
},
|
|
99
|
+
[]
|
|
100
|
+
);
|
|
80
101
|
|
|
81
102
|
expect(observation.repetitionScore).toBeGreaterThan(0.5);
|
|
82
103
|
});
|
|
83
104
|
|
|
84
105
|
it('tracks tool success rate', () => {
|
|
85
|
-
collector.recordAction(
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
106
|
+
collector.recordAction(runId, {
|
|
107
|
+
type: 'tool_call',
|
|
108
|
+
toolName: 'a',
|
|
109
|
+
timestamp: Date.now(),
|
|
110
|
+
});
|
|
111
|
+
collector.recordAction(runId, {
|
|
112
|
+
type: 'tool_call',
|
|
113
|
+
toolName: 'b',
|
|
114
|
+
timestamp: Date.now(),
|
|
115
|
+
});
|
|
116
|
+
collector.recordAction(runId, {
|
|
117
|
+
type: 'tool_call',
|
|
118
|
+
toolName: 'c',
|
|
119
|
+
error: 'Failed',
|
|
120
|
+
timestamp: Date.now(),
|
|
121
|
+
});
|
|
122
|
+
collector.recordAction(runId, {
|
|
123
|
+
type: 'tool_call',
|
|
124
|
+
toolName: 'd',
|
|
125
|
+
timestamp: Date.now(),
|
|
96
126
|
});
|
|
97
127
|
|
|
128
|
+
const observation = collector.collect(
|
|
129
|
+
{
|
|
130
|
+
runId,
|
|
131
|
+
iteration: 1,
|
|
132
|
+
goal: 'Test',
|
|
133
|
+
currentMode: 'analytical',
|
|
134
|
+
tokensUsed: 0,
|
|
135
|
+
timeElapsed: 0,
|
|
136
|
+
iterationsRemaining: 5,
|
|
137
|
+
budgetRemaining: 0.8,
|
|
138
|
+
},
|
|
139
|
+
[]
|
|
140
|
+
);
|
|
141
|
+
|
|
98
142
|
expect(observation.toolSuccessRate).toBe(0.75);
|
|
99
143
|
});
|
|
100
144
|
|
|
101
|
-
it('
|
|
102
|
-
collector.recordAction(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
currentProgress: '',
|
|
107
|
-
tokensUsed: 0,
|
|
108
|
-
timeElapsed: 0,
|
|
109
|
-
toolCallsCount: 0,
|
|
110
|
-
errorCount: 0,
|
|
145
|
+
it('cleans up run state', () => {
|
|
146
|
+
collector.recordAction(runId, {
|
|
147
|
+
type: 'tool_call',
|
|
148
|
+
toolName: 'test',
|
|
149
|
+
timestamp: Date.now(),
|
|
111
150
|
});
|
|
112
151
|
|
|
113
|
-
|
|
152
|
+
collector.cleanupRun(runId);
|
|
153
|
+
|
|
154
|
+
const observations = collector.getObservations(runId);
|
|
155
|
+
expect(observations).toHaveLength(0);
|
|
114
156
|
});
|
|
115
157
|
});
|
|
116
158
|
|
|
@@ -118,11 +160,14 @@ describe('StrategySelector', () => {
|
|
|
118
160
|
let selector: StrategySelector;
|
|
119
161
|
|
|
120
162
|
beforeEach(() => {
|
|
121
|
-
selector = new StrategySelector(
|
|
163
|
+
selector = new StrategySelector({
|
|
164
|
+
allowedModes: ['analytical', 'creative', 'systematic', 'intuitive'],
|
|
165
|
+
modeProfiles: DEFAULT_MODE_PROFILES,
|
|
166
|
+
});
|
|
122
167
|
});
|
|
123
168
|
|
|
124
169
|
it('selects mode based on task profile', () => {
|
|
125
|
-
const
|
|
170
|
+
const mode = selector.selectForTask({
|
|
126
171
|
complexity: 'complex',
|
|
127
172
|
domain: 'coding',
|
|
128
173
|
estimatedTokens: 5000,
|
|
@@ -132,33 +177,31 @@ describe('StrategySelector', () => {
|
|
|
132
177
|
creativityLevel: 'low',
|
|
133
178
|
accuracyRequirement: 'high',
|
|
134
179
|
timeConstraint: 'none',
|
|
180
|
+
requiresReasoning: true,
|
|
135
181
|
});
|
|
136
182
|
|
|
137
|
-
expect(
|
|
138
|
-
expect(result.confidence).toBeGreaterThan(0);
|
|
183
|
+
expect(mode).toBeDefined();
|
|
139
184
|
});
|
|
140
185
|
|
|
141
|
-
it('suggests mode switch', () => {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
186
|
+
it('suggests mode switch on stagnation', () => {
|
|
187
|
+
const observation: MetaObservation = {
|
|
188
|
+
runId: 'test',
|
|
189
|
+
iteration: 5,
|
|
190
|
+
timestamp: Date.now(),
|
|
191
|
+
currentMode: 'analytical',
|
|
192
|
+
currentConfidence: 0.5,
|
|
193
|
+
progressScore: 0.3,
|
|
194
|
+
progressDelta: 0.01,
|
|
195
|
+
stagnationCount: 4,
|
|
196
|
+
confidenceHistory: [0.6, 0.5, 0.4],
|
|
197
|
+
tokensUsed: 2000,
|
|
198
|
+
timeElapsed: 10000,
|
|
199
|
+
toolSuccessRate: 0.6,
|
|
200
|
+
repetitionScore: 0.6,
|
|
201
|
+
confidenceTrend: 'falling',
|
|
202
|
+
};
|
|
153
203
|
|
|
154
|
-
const suggestion = selector.
|
|
155
|
-
currentIssues: ['Low confidence in outputs'],
|
|
156
|
-
performanceMetrics: {
|
|
157
|
-
tokensUsed: 2000,
|
|
158
|
-
timeElapsed: 10000,
|
|
159
|
-
qualityScore: 0.4,
|
|
160
|
-
},
|
|
161
|
-
});
|
|
204
|
+
const suggestion = selector.suggestSwitch(observation);
|
|
162
205
|
|
|
163
206
|
expect(suggestion).toBeDefined();
|
|
164
207
|
});
|
|
@@ -172,168 +215,216 @@ describe('StrategySelector', () => {
|
|
|
172
215
|
|
|
173
216
|
describe('MetaReasoner', () => {
|
|
174
217
|
let reasoner: MetaReasoner;
|
|
218
|
+
const runId = 'test-run';
|
|
175
219
|
|
|
176
220
|
beforeEach(() => {
|
|
177
221
|
vi.clearAllMocks();
|
|
178
222
|
reasoner = new MetaReasoner({
|
|
179
223
|
llm: mockLLM,
|
|
224
|
+
model: 'gpt-4o',
|
|
180
225
|
config: {
|
|
181
226
|
enabled: true,
|
|
182
227
|
maxAssessmentsPerRun: 5,
|
|
183
228
|
maxAdaptationsPerRun: 3,
|
|
184
|
-
assessmentCooldown:
|
|
185
|
-
|
|
229
|
+
assessmentCooldown: 0,
|
|
230
|
+
metaAssessmentCooldown: 0,
|
|
231
|
+
adaptationCooldown: 0,
|
|
232
|
+
triggers: ['iteration_complete', 'confidence_drop', 'progress_stall'],
|
|
186
233
|
tokenBudget: 2000,
|
|
187
234
|
},
|
|
188
235
|
});
|
|
189
236
|
});
|
|
190
237
|
|
|
191
238
|
it('initializes run with mode config', () => {
|
|
192
|
-
const config = reasoner.initializeRun(
|
|
239
|
+
const config = reasoner.initializeRun(runId);
|
|
193
240
|
|
|
241
|
+
expect(config).toBeDefined();
|
|
194
242
|
expect(config.mode).toBeDefined();
|
|
195
|
-
expect(config.
|
|
243
|
+
expect(config.temperature).toBeDefined();
|
|
196
244
|
});
|
|
197
245
|
|
|
198
246
|
it('determines trigger conditions', () => {
|
|
199
|
-
reasoner.initializeRun(
|
|
247
|
+
reasoner.initializeRun(runId);
|
|
200
248
|
|
|
201
|
-
const shouldTrigger = reasoner.shouldTrigger(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
errorCount: 3,
|
|
249
|
+
const shouldTrigger = reasoner.shouldTrigger(runId, 'progress_stall', {
|
|
250
|
+
iteration: 5,
|
|
251
|
+
confidence: 0.5,
|
|
252
|
+
progressDelta: 0.01,
|
|
253
|
+
stagnationCount: 3,
|
|
207
254
|
});
|
|
208
255
|
|
|
209
256
|
expect(shouldTrigger).toBe(true);
|
|
210
257
|
});
|
|
211
258
|
|
|
212
259
|
it('collects observations', () => {
|
|
260
|
+
reasoner.initializeRun(runId);
|
|
261
|
+
|
|
213
262
|
const observation = reasoner.observe(
|
|
214
263
|
{
|
|
215
|
-
|
|
264
|
+
runId,
|
|
265
|
+
iteration: 1,
|
|
266
|
+
goal: 'Test goal',
|
|
267
|
+
currentMode: 'analytical',
|
|
216
268
|
tokensUsed: 1000,
|
|
217
269
|
timeElapsed: 10000,
|
|
218
|
-
|
|
219
|
-
|
|
270
|
+
iterationsRemaining: 5,
|
|
271
|
+
budgetRemaining: 0.8,
|
|
220
272
|
},
|
|
221
|
-
{
|
|
222
|
-
confidence: 0.8,
|
|
223
|
-
relevance: 0.9,
|
|
224
|
-
coherence: 0.85,
|
|
225
|
-
}
|
|
273
|
+
[{ type: 'observation', content: 'Test insight', confidence: 0.8 }]
|
|
226
274
|
);
|
|
227
275
|
|
|
228
|
-
expect(observation.
|
|
229
|
-
expect(observation.metrics.tokensUsed).toBe(1000);
|
|
276
|
+
expect(observation.tokensUsed).toBe(1000);
|
|
230
277
|
});
|
|
231
278
|
|
|
232
279
|
it('performs assessment', async () => {
|
|
233
|
-
reasoner.initializeRun(
|
|
234
|
-
|
|
235
|
-
const observation =
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
280
|
+
reasoner.initializeRun(runId);
|
|
281
|
+
|
|
282
|
+
const observation: MetaObservation = {
|
|
283
|
+
runId,
|
|
284
|
+
iteration: 1,
|
|
285
|
+
timestamp: Date.now(),
|
|
286
|
+
goal: 'Test goal',
|
|
287
|
+
currentMode: 'analytical',
|
|
288
|
+
currentConfidence: 0.7,
|
|
289
|
+
progressScore: 0.5,
|
|
290
|
+
progressDelta: 0.1,
|
|
291
|
+
stagnationCount: 0,
|
|
292
|
+
confidenceHistory: [0.6, 0.7],
|
|
293
|
+
tokensUsed: 500,
|
|
294
|
+
timeElapsed: 3000,
|
|
295
|
+
iterationsRemaining: 5,
|
|
296
|
+
budgetRemaining: 0.9,
|
|
297
|
+
toolSuccessRate: 0.8,
|
|
298
|
+
repetitionScore: 0.2,
|
|
299
|
+
confidenceTrend: 'stable',
|
|
300
|
+
};
|
|
245
301
|
|
|
246
302
|
const assessment = await reasoner.assess(observation);
|
|
247
303
|
|
|
248
|
-
expect(assessment.
|
|
304
|
+
expect(assessment.onTrack).toBeDefined();
|
|
249
305
|
expect(assessment.confidence).toBeDefined();
|
|
250
306
|
});
|
|
251
307
|
|
|
252
308
|
it('adapts strategy when needed', async () => {
|
|
253
|
-
reasoner.initializeRun(
|
|
309
|
+
reasoner.initializeRun(runId);
|
|
254
310
|
|
|
255
311
|
const assessment = {
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
312
|
+
id: 'assess-1',
|
|
313
|
+
observationId: 'obs-1',
|
|
314
|
+
timestamp: Date.now(),
|
|
315
|
+
onTrack: false,
|
|
316
|
+
confidence: 0.7,
|
|
317
|
+
issues: [],
|
|
318
|
+
opportunities: [],
|
|
319
|
+
reasoning: 'Need to switch mode',
|
|
320
|
+
recommendation: {
|
|
321
|
+
action: 'switch_mode' as const,
|
|
322
|
+
newMode: 'creative' as const,
|
|
323
|
+
confidence: 0.8,
|
|
324
|
+
reasoning: 'Creative mode better for current task',
|
|
325
|
+
},
|
|
326
|
+
assessmentDuration: 100,
|
|
327
|
+
assessmentCost: 0.001,
|
|
262
328
|
};
|
|
263
329
|
|
|
264
|
-
const adaptation = await reasoner.adapt(
|
|
330
|
+
const adaptation = await reasoner.adapt(runId, assessment);
|
|
265
331
|
|
|
266
332
|
expect(adaptation).not.toBeNull();
|
|
267
333
|
if (adaptation) {
|
|
268
|
-
expect(adaptation.
|
|
334
|
+
expect(adaptation.type).toBe('mode_switch');
|
|
269
335
|
}
|
|
270
336
|
});
|
|
271
337
|
|
|
272
|
-
it('supports rollback', () => {
|
|
273
|
-
reasoner.initializeRun(
|
|
338
|
+
it('supports rollback', async () => {
|
|
339
|
+
reasoner.initializeRun(runId);
|
|
274
340
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
341
|
+
const assessment = {
|
|
342
|
+
id: 'assess-1',
|
|
343
|
+
observationId: 'obs-1',
|
|
344
|
+
timestamp: Date.now(),
|
|
345
|
+
onTrack: false,
|
|
346
|
+
confidence: 0.7,
|
|
278
347
|
issues: [],
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
348
|
+
opportunities: [],
|
|
349
|
+
reasoning: 'Switch needed',
|
|
350
|
+
recommendation: {
|
|
351
|
+
action: 'switch_mode' as const,
|
|
352
|
+
newMode: 'creative' as const,
|
|
353
|
+
confidence: 0.8,
|
|
354
|
+
reasoning: 'Try creative mode',
|
|
355
|
+
},
|
|
356
|
+
assessmentDuration: 100,
|
|
357
|
+
assessmentCost: 0.001,
|
|
358
|
+
};
|
|
283
359
|
|
|
284
|
-
|
|
360
|
+
await reasoner.adapt(runId, assessment);
|
|
361
|
+
const rollback = reasoner.rollback(runId);
|
|
285
362
|
|
|
286
363
|
expect(rollback).not.toBeNull();
|
|
287
364
|
if (rollback) {
|
|
288
|
-
expect(rollback.
|
|
365
|
+
expect(rollback.type).toBe('rollback');
|
|
289
366
|
}
|
|
290
367
|
});
|
|
291
368
|
});
|
|
292
369
|
|
|
293
370
|
describe('Meta-reasoning prompts', () => {
|
|
294
371
|
it('builds assessment prompt', () => {
|
|
295
|
-
const
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
repetitionScore: 0.2,
|
|
372
|
+
const observation: MetaObservation = {
|
|
373
|
+
runId: 'test',
|
|
374
|
+
iteration: 3,
|
|
375
|
+
timestamp: Date.now(),
|
|
376
|
+
goal: 'Complete the task',
|
|
377
|
+
currentMode: 'analytical',
|
|
378
|
+
currentConfidence: 0.7,
|
|
379
|
+
progressScore: 0.5,
|
|
380
|
+
progressDelta: 0.1,
|
|
381
|
+
stagnationCount: 0,
|
|
382
|
+
confidenceHistory: [0.6, 0.65, 0.7],
|
|
383
|
+
tokensUsed: 1000,
|
|
384
|
+
timeElapsed: 5000,
|
|
385
|
+
iterationsRemaining: 7,
|
|
386
|
+
budgetRemaining: 0.8,
|
|
311
387
|
toolSuccessRate: 0.8,
|
|
388
|
+
repetitionScore: 0.2,
|
|
312
389
|
confidenceTrend: 'stable',
|
|
390
|
+
recentActions: [
|
|
391
|
+
{ type: 'tool_call', toolName: 'search' },
|
|
392
|
+
{ type: 'tool_call', toolName: 'calculator' },
|
|
393
|
+
],
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
const prompt = buildMetaAssessmentPrompt(observation, {
|
|
397
|
+
allowedModes: ['analytical', 'creative'],
|
|
398
|
+
currentModeConfig: { mode: 'analytical', temperature: 0.3, depth: 3 },
|
|
313
399
|
});
|
|
314
400
|
|
|
315
|
-
expect(prompt).toContain('
|
|
316
|
-
expect(prompt).toContain('
|
|
401
|
+
expect(prompt).toContain('1000');
|
|
402
|
+
expect(prompt).toContain('analytical');
|
|
317
403
|
});
|
|
318
404
|
|
|
319
405
|
it('parses assessment response', () => {
|
|
320
406
|
const response = `
|
|
321
407
|
Here is my assessment:
|
|
322
408
|
{
|
|
323
|
-
"
|
|
409
|
+
"onTrack": true,
|
|
324
410
|
"confidence": 0.85,
|
|
325
|
-
"issues": ["Minor formatting issues"],
|
|
326
|
-
"
|
|
327
|
-
"
|
|
411
|
+
"issues": [{"type": "minor", "severity": "low", "description": "Minor formatting issues"}],
|
|
412
|
+
"opportunities": [],
|
|
413
|
+
"reasoning": "Good progress",
|
|
414
|
+
"recommendation": {
|
|
415
|
+
"action": "continue",
|
|
416
|
+
"confidence": 0.9,
|
|
417
|
+
"reasoning": "Continue with current approach"
|
|
418
|
+
}
|
|
328
419
|
}
|
|
329
420
|
`;
|
|
330
421
|
|
|
331
422
|
const parsed = parseMetaAssessmentResponse(response);
|
|
332
423
|
|
|
333
424
|
expect(parsed).not.toBeNull();
|
|
334
|
-
expect(parsed?.
|
|
425
|
+
expect(parsed?.onTrack).toBe(true);
|
|
335
426
|
expect(parsed?.confidence).toBe(0.85);
|
|
336
|
-
expect(parsed?.issues).
|
|
427
|
+
expect(parsed?.issues).toHaveLength(1);
|
|
337
428
|
});
|
|
338
429
|
|
|
339
430
|
it('handles malformed response', () => {
|
|
@@ -238,10 +238,14 @@ describe('GapAnalyzer', () => {
|
|
|
238
238
|
|
|
239
239
|
const analyzer = new GapAnalyzer({ llm: mockLLM, config: mockToolConfig });
|
|
240
240
|
|
|
241
|
-
const result = await analyzer.analyze(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
241
|
+
const result = await analyzer.analyze('Parse and analyze the sales.csv file', [
|
|
242
|
+
{
|
|
243
|
+
name: 'calculator',
|
|
244
|
+
description: 'Perform calculations',
|
|
245
|
+
parameters: {},
|
|
246
|
+
execute: async () => null,
|
|
247
|
+
},
|
|
248
|
+
]);
|
|
245
249
|
|
|
246
250
|
expect(result.gaps.length).toBe(1);
|
|
247
251
|
expect(result.gaps[0].suggestedToolName).toBe('csv_parser');
|
|
@@ -366,9 +370,24 @@ describe('InMemoryGeneratedToolStore', () => {
|
|
|
366
370
|
status: 'active',
|
|
367
371
|
});
|
|
368
372
|
|
|
369
|
-
await store.recordUsage({
|
|
370
|
-
|
|
371
|
-
|
|
373
|
+
await store.recordUsage({
|
|
374
|
+
toolId: 'tool-1',
|
|
375
|
+
timestamp: new Date(),
|
|
376
|
+
success: true,
|
|
377
|
+
executionTime: 100,
|
|
378
|
+
});
|
|
379
|
+
await store.recordUsage({
|
|
380
|
+
toolId: 'tool-1',
|
|
381
|
+
timestamp: new Date(),
|
|
382
|
+
success: true,
|
|
383
|
+
executionTime: 150,
|
|
384
|
+
});
|
|
385
|
+
await store.recordUsage({
|
|
386
|
+
toolId: 'tool-1',
|
|
387
|
+
timestamp: new Date(),
|
|
388
|
+
success: false,
|
|
389
|
+
executionTime: 200,
|
|
390
|
+
});
|
|
372
391
|
|
|
373
392
|
const metrics = await store.getMetrics('tool-1');
|
|
374
393
|
|