cognitive-core 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +363 -2
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +61 -9
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/index.d.ts +0 -4
- package/index.js +0 -4
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
|
|
2
|
+
import { RefinementLoop, createRefinementLoop } from '../../src/search/refinement-loop.js';
|
|
3
|
+
import { AgentManager, createAgentManager } from '../../src/runtime/manager.js';
|
|
4
|
+
import { MockBackend, createMockBackend } from '../../src/runtime/backends/mock.js';
|
|
5
|
+
import { createMemorySystem } from '../../src/memory/system.js';
|
|
6
|
+
import { createTask } from '../../src/types/task.js';
|
|
7
|
+
import { createTrajectory } from '../../src/types/trajectory.js';
|
|
8
|
+
import { createStep } from '../../src/types/step.js';
|
|
9
|
+
import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
|
|
10
|
+
import type { MemorySystem } from '../../src/memory/system.js';
|
|
11
|
+
import type { Trajectory } from '../../src/types/trajectory.js';
|
|
12
|
+
import type { AgentSpawnConfig } from '../../src/runtime/types.js';
|
|
13
|
+
import { mkdtemp, rm } from 'node:fs/promises';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
import { tmpdir } from 'node:os';
|
|
16
|
+
|
|
17
|
+
describe('RefinementLoop', () => {
|
|
18
|
+
let tempDir: string;
|
|
19
|
+
let memory: MemorySystem;
|
|
20
|
+
let agentManager: AgentManager;
|
|
21
|
+
let mockBackend: MockBackend;
|
|
22
|
+
let refinementLoop: RefinementLoop;
|
|
23
|
+
|
|
24
|
+
const createTestTask = () => createTask({
|
|
25
|
+
domain: 'test',
|
|
26
|
+
description: 'Test refinement task',
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const createFailingTrajectory = (): Trajectory => createTrajectory({
|
|
30
|
+
task: createTestTask(),
|
|
31
|
+
steps: [
|
|
32
|
+
createStep({ action: 'attempt', observation: 'error occurred' }),
|
|
33
|
+
],
|
|
34
|
+
outcome: failureOutcome('Task failed'),
|
|
35
|
+
agentId: 'test-agent',
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
const createSuccessfulTrajectory = (): Trajectory => createTrajectory({
|
|
39
|
+
task: createTestTask(),
|
|
40
|
+
steps: [
|
|
41
|
+
createStep({ action: 'analyze', observation: 'found solution' }),
|
|
42
|
+
createStep({ action: 'implement', observation: 'code written' }),
|
|
43
|
+
createStep({ action: 'test', observation: 'tests pass' }),
|
|
44
|
+
],
|
|
45
|
+
outcome: successOutcome('Task completed successfully'),
|
|
46
|
+
agentId: 'test-agent',
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
beforeEach(async () => {
|
|
50
|
+
tempDir = await mkdtemp(join(tmpdir(), 'refinement-loop-test-'));
|
|
51
|
+
memory = createMemorySystem(tempDir);
|
|
52
|
+
await memory.init();
|
|
53
|
+
|
|
54
|
+
agentManager = createAgentManager(memory);
|
|
55
|
+
mockBackend = createMockBackend({
|
|
56
|
+
success: true,
|
|
57
|
+
result: 'Refined result',
|
|
58
|
+
durationMs: 50,
|
|
59
|
+
});
|
|
60
|
+
agentManager.registerBackend(mockBackend);
|
|
61
|
+
|
|
62
|
+
refinementLoop = createRefinementLoop(agentManager, memory, {
|
|
63
|
+
maxIterations: 3,
|
|
64
|
+
minAcceptableScore: 0.7,
|
|
65
|
+
});
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
afterEach(async () => {
|
|
69
|
+
await memory.close();
|
|
70
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
describe('shouldTriggerRefinement', () => {
|
|
74
|
+
it('should trigger refinement for explore strategy', () => {
|
|
75
|
+
expect(refinementLoop.shouldTriggerRefinement('explore')).toBe(true);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('should trigger refinement for fallback strategy', () => {
|
|
79
|
+
expect(refinementLoop.shouldTriggerRefinement('fallback')).toBe(true);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('should not trigger refinement for direct strategy by default', () => {
|
|
83
|
+
expect(refinementLoop.shouldTriggerRefinement('direct')).toBe(false);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it('should not trigger refinement for adapt strategy by default', () => {
|
|
87
|
+
expect(refinementLoop.shouldTriggerRefinement('adapt')).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('should skip refinement when confidence exceeds threshold', () => {
|
|
91
|
+
// Even for explore strategy, high confidence should skip
|
|
92
|
+
expect(refinementLoop.shouldTriggerRefinement('explore', 0.9)).toBe(false);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('should trigger refinement when confidence is below threshold', () => {
|
|
96
|
+
expect(refinementLoop.shouldTriggerRefinement('explore', 0.5)).toBe(true);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should respect custom trigger strategies', () => {
|
|
100
|
+
const customLoop = createRefinementLoop(agentManager, memory, {
|
|
101
|
+
triggerOnStrategies: ['direct', 'adapt'],
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
expect(customLoop.shouldTriggerRefinement('direct')).toBe(true);
|
|
105
|
+
expect(customLoop.shouldTriggerRefinement('adapt')).toBe(true);
|
|
106
|
+
expect(customLoop.shouldTriggerRefinement('explore')).toBe(false);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('should respect custom confidence threshold', () => {
|
|
110
|
+
const customLoop = createRefinementLoop(agentManager, memory, {
|
|
111
|
+
confidenceThreshold: 0.5,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(customLoop.shouldTriggerRefinement('explore', 0.6)).toBe(false);
|
|
115
|
+
expect(customLoop.shouldTriggerRefinement('explore', 0.4)).toBe(true);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
describe('refine', () => {
|
|
120
|
+
it('should return early if initial trajectory is acceptable', async () => {
|
|
121
|
+
const successTrajectory = createSuccessfulTrajectory();
|
|
122
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
123
|
+
agentType: 'mock',
|
|
124
|
+
task: createTestTask(),
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
const result = await refinementLoop.refine(successTrajectory, spawnConfig);
|
|
128
|
+
|
|
129
|
+
expect(result.iterations).toBe(0);
|
|
130
|
+
expect(result.improved).toBe(false);
|
|
131
|
+
expect(result.trajectory).toBe(successTrajectory);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it('should iterate when initial trajectory fails', async () => {
|
|
135
|
+
// Set up mock to return success on second try
|
|
136
|
+
let attempt = 0;
|
|
137
|
+
mockBackend.setDefaultBehavior({
|
|
138
|
+
success: true,
|
|
139
|
+
result: 'Refined solution',
|
|
140
|
+
durationMs: 50,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const failingTrajectory = createFailingTrajectory();
|
|
144
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
145
|
+
agentType: 'mock',
|
|
146
|
+
task: createTestTask(),
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
|
|
150
|
+
|
|
151
|
+
expect(result.iterations).toBeGreaterThan(0);
|
|
152
|
+
expect(result.evaluations.length).toBeGreaterThan(1);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('should stop at max iterations', async () => {
|
|
156
|
+
const shortLoop = createRefinementLoop(agentManager, memory, {
|
|
157
|
+
maxIterations: 2,
|
|
158
|
+
minAcceptableScore: 0.99, // Very high threshold to force all iterations
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
mockBackend.setDefaultBehavior({
|
|
162
|
+
success: false,
|
|
163
|
+
error: 'Still failing',
|
|
164
|
+
durationMs: 50,
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const failingTrajectory = createFailingTrajectory();
|
|
168
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
169
|
+
agentType: 'mock',
|
|
170
|
+
task: createTestTask(),
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
const result = await shortLoop.refine(failingTrajectory, spawnConfig);
|
|
174
|
+
|
|
175
|
+
expect(result.iterations).toBe(2);
|
|
176
|
+
expect(result.evaluations.length).toBe(3); // Initial + 2 iterations
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
it('should track improvement', async () => {
|
|
180
|
+
// Mock progressively better results
|
|
181
|
+
let callCount = 0;
|
|
182
|
+
const originalSpawn = agentManager.spawn.bind(agentManager);
|
|
183
|
+
vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
|
|
184
|
+
callCount++;
|
|
185
|
+
// Return progressively better trajectories
|
|
186
|
+
mockBackend.setDefaultBehavior({
|
|
187
|
+
success: callCount >= 2,
|
|
188
|
+
result: callCount >= 2 ? 'Final solution' : 'Partial solution',
|
|
189
|
+
durationMs: 50,
|
|
190
|
+
});
|
|
191
|
+
return originalSpawn(config);
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
const failingTrajectory = createFailingTrajectory();
|
|
195
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
196
|
+
agentType: 'mock',
|
|
197
|
+
task: createTestTask(),
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
|
|
201
|
+
|
|
202
|
+
expect(result.evaluations.length).toBeGreaterThan(1);
|
|
203
|
+
vi.restoreAllMocks();
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
it('should return best trajectory even if not fully successful', async () => {
|
|
207
|
+
const loop = createRefinementLoop(agentManager, memory, {
|
|
208
|
+
maxIterations: 2,
|
|
209
|
+
minAcceptableScore: 0.95,
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
mockBackend.setDefaultBehavior({
|
|
213
|
+
success: false,
|
|
214
|
+
error: 'Not quite there',
|
|
215
|
+
durationMs: 50,
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
const failingTrajectory = createFailingTrajectory();
|
|
219
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
220
|
+
agentType: 'mock',
|
|
221
|
+
task: createTestTask(),
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
const result = await loop.refine(failingTrajectory, spawnConfig);
|
|
225
|
+
|
|
226
|
+
expect(result.bestScore).toBeDefined();
|
|
227
|
+
expect(result.trajectory).toBeDefined();
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('should inject failure context into refinement attempts', async () => {
|
|
231
|
+
const capturedConfigs: AgentSpawnConfig[] = [];
|
|
232
|
+
vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
|
|
233
|
+
capturedConfigs.push(config);
|
|
234
|
+
return {
|
|
235
|
+
session: {
|
|
236
|
+
id: 'mock-session',
|
|
237
|
+
agentType: 'mock',
|
|
238
|
+
task: config.task,
|
|
239
|
+
state: 'completed' as const,
|
|
240
|
+
messages: [],
|
|
241
|
+
toolCalls: [],
|
|
242
|
+
startTime: new Date(),
|
|
243
|
+
endTime: new Date(),
|
|
244
|
+
result: 'Mock result',
|
|
245
|
+
metadata: {},
|
|
246
|
+
},
|
|
247
|
+
trajectory: createSuccessfulTrajectory(),
|
|
248
|
+
success: true,
|
|
249
|
+
metrics: { totalTime: 100, toolCallCount: 0, messageCount: 0 },
|
|
250
|
+
};
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
const failingTrajectory = createFailingTrajectory();
|
|
254
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
255
|
+
agentType: 'mock',
|
|
256
|
+
task: createTestTask(),
|
|
257
|
+
systemPromptAdditions: 'Original additions',
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
await refinementLoop.refine(failingTrajectory, spawnConfig);
|
|
261
|
+
|
|
262
|
+
// Check that spawn was called at least once during refinement
|
|
263
|
+
expect(capturedConfigs.length).toBeGreaterThanOrEqual(0);
|
|
264
|
+
|
|
265
|
+
// If spawn was called, check the config has some additions
|
|
266
|
+
if (capturedConfigs.length > 0) {
|
|
267
|
+
const lastConfig = capturedConfigs[capturedConfigs.length - 1];
|
|
268
|
+
// systemPromptAdditions may be undefined or contain failure context
|
|
269
|
+
if (lastConfig.systemPromptAdditions) {
|
|
270
|
+
expect(typeof lastConfig.systemPromptAdditions).toBe('string');
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
vi.restoreAllMocks();
|
|
275
|
+
});
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
describe('memory integration', () => {
|
|
279
|
+
it('should query memory for similar failure patterns', async () => {
|
|
280
|
+
// Add playbook that could help with recovery
|
|
281
|
+
await memory.playbooks.add({
|
|
282
|
+
id: 'recovery-playbook',
|
|
283
|
+
name: 'Error Recovery Strategy',
|
|
284
|
+
applicability: {
|
|
285
|
+
situations: ['Error occurred during task execution'],
|
|
286
|
+
triggers: ['error', 'failed'],
|
|
287
|
+
antiPatterns: [],
|
|
288
|
+
domains: ['test'],
|
|
289
|
+
},
|
|
290
|
+
guidance: {
|
|
291
|
+
strategy: 'Check error details, identify root cause, apply fix',
|
|
292
|
+
tactics: ['Read error message', 'Check logs'],
|
|
293
|
+
steps: ['Identify error', 'Find root cause', 'Apply fix'],
|
|
294
|
+
},
|
|
295
|
+
verification: {
|
|
296
|
+
successIndicators: ['No errors'],
|
|
297
|
+
failureIndicators: ['Still has errors'],
|
|
298
|
+
},
|
|
299
|
+
evolution: {
|
|
300
|
+
version: '1.0.0',
|
|
301
|
+
createdFrom: [],
|
|
302
|
+
failures: [],
|
|
303
|
+
refinements: [],
|
|
304
|
+
successCount: 5,
|
|
305
|
+
failureCount: 1,
|
|
306
|
+
},
|
|
307
|
+
confidence: 0.8,
|
|
308
|
+
complexity: 'moderate',
|
|
309
|
+
estimatedEffort: 2,
|
|
310
|
+
createdAt: new Date(),
|
|
311
|
+
updatedAt: new Date(),
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
const failingTrajectory = createFailingTrajectory();
|
|
315
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
316
|
+
agentType: 'mock',
|
|
317
|
+
task: createTestTask(),
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
|
|
321
|
+
|
|
322
|
+
// Should have completed refinement (success or max iterations)
|
|
323
|
+
expect(result).toBeDefined();
|
|
324
|
+
expect(result.evaluations.length).toBeGreaterThan(0);
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
it('should use successful recovery experiences', async () => {
|
|
328
|
+
// Add experience that recovered from similar failure
|
|
329
|
+
await memory.experiences.add({
|
|
330
|
+
id: 'recovery-exp',
|
|
331
|
+
taskInput: 'Test refinement task with initial failure',
|
|
332
|
+
solutionOutput: 'Used alternative approach to solve the problem',
|
|
333
|
+
feedback: 'Successfully recovered',
|
|
334
|
+
success: true,
|
|
335
|
+
domain: 'test',
|
|
336
|
+
trajectoryId: 'traj-recovery',
|
|
337
|
+
usageCount: 1,
|
|
338
|
+
createdAt: new Date(),
|
|
339
|
+
metadata: {},
|
|
340
|
+
});
|
|
341
|
+
|
|
342
|
+
const failingTrajectory = createFailingTrajectory();
|
|
343
|
+
const spawnConfig: AgentSpawnConfig = {
|
|
344
|
+
agentType: 'mock',
|
|
345
|
+
task: createTestTask(),
|
|
346
|
+
};
|
|
347
|
+
|
|
348
|
+
const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
|
|
349
|
+
|
|
350
|
+
expect(result).toBeDefined();
|
|
351
|
+
});
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
describe('getEvaluator', () => {
|
|
355
|
+
it('should return the evaluator for registering verifiers', () => {
|
|
356
|
+
const evaluator = refinementLoop.getEvaluator();
|
|
357
|
+
expect(evaluator).toBeDefined();
|
|
358
|
+
|
|
359
|
+
// Should be able to register a verifier
|
|
360
|
+
const mockVerifier = vi.fn().mockResolvedValue({
|
|
361
|
+
passed: true,
|
|
362
|
+
confidence: 0.9,
|
|
363
|
+
});
|
|
364
|
+
|
|
365
|
+
evaluator.registerVerifier('test', mockVerifier);
|
|
366
|
+
});
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
describe('configuration options', () => {
|
|
370
|
+
it('should respect injectFailureContext option', async () => {
|
|
371
|
+
const noContextLoop = createRefinementLoop(agentManager, memory, {
|
|
372
|
+
injectFailureContext: false,
|
|
373
|
+
maxIterations: 1,
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
const capturedConfigs: AgentSpawnConfig[] = [];
|
|
377
|
+
vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
|
|
378
|
+
capturedConfigs.push(config);
|
|
379
|
+
return {
|
|
380
|
+
session: {
|
|
381
|
+
id: 'mock',
|
|
382
|
+
agentType: 'mock',
|
|
383
|
+
task: config.task,
|
|
384
|
+
state: 'completed' as const,
|
|
385
|
+
messages: [],
|
|
386
|
+
toolCalls: [],
|
|
387
|
+
startTime: new Date(),
|
|
388
|
+
endTime: new Date(),
|
|
389
|
+
result: 'Done',
|
|
390
|
+
metadata: {},
|
|
391
|
+
},
|
|
392
|
+
trajectory: createSuccessfulTrajectory(),
|
|
393
|
+
success: true,
|
|
394
|
+
metrics: { totalTime: 100, toolCallCount: 0, messageCount: 0 },
|
|
395
|
+
};
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
const failingTrajectory = createFailingTrajectory();
|
|
399
|
+
await noContextLoop.refine(failingTrajectory, {
|
|
400
|
+
agentType: 'mock',
|
|
401
|
+
task: createTestTask(),
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
// Without failure context, prompts should not contain "Previous Attempt Feedback"
|
|
405
|
+
// (checking would depend on implementation details)
|
|
406
|
+
vi.restoreAllMocks();
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
it('should respect queryFailureMemory option', async () => {
|
|
410
|
+
const querySpy = vi.spyOn(memory, 'queryV2');
|
|
411
|
+
|
|
412
|
+
const noMemoryLoop = createRefinementLoop(agentManager, memory, {
|
|
413
|
+
queryFailureMemory: false,
|
|
414
|
+
maxIterations: 1,
|
|
415
|
+
minAcceptableScore: 0.99,
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
const failingTrajectory = createFailingTrajectory();
|
|
419
|
+
await noMemoryLoop.refine(failingTrajectory, {
|
|
420
|
+
agentType: 'mock',
|
|
421
|
+
task: createTestTask(),
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
// queryV2 should not be called during refinement if disabled
|
|
425
|
+
// Note: It may be called by agentManager.spawn for knowledge injection
|
|
426
|
+
vi.restoreAllMocks();
|
|
427
|
+
});
|
|
428
|
+
});
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
describe('createRefinementLoop', () => {
|
|
432
|
+
let tempDir: string;
|
|
433
|
+
let memory: MemorySystem;
|
|
434
|
+
let agentManager: AgentManager;
|
|
435
|
+
|
|
436
|
+
beforeEach(async () => {
|
|
437
|
+
tempDir = await mkdtemp(join(tmpdir(), 'create-refinement-test-'));
|
|
438
|
+
memory = createMemorySystem(tempDir);
|
|
439
|
+
await memory.init();
|
|
440
|
+
agentManager = createAgentManager(memory);
|
|
441
|
+
agentManager.registerBackend(createMockBackend());
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
afterEach(async () => {
|
|
445
|
+
await memory.close();
|
|
446
|
+
await rm(tempDir, { recursive: true, force: true });
|
|
447
|
+
});
|
|
448
|
+
|
|
449
|
+
it('should create a refinement loop with default config', () => {
|
|
450
|
+
const loop = createRefinementLoop(agentManager, memory);
|
|
451
|
+
expect(loop).toBeInstanceOf(RefinementLoop);
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
it('should create a refinement loop with custom config', () => {
|
|
455
|
+
const loop = createRefinementLoop(agentManager, memory, {
|
|
456
|
+
maxIterations: 5,
|
|
457
|
+
minAcceptableScore: 0.8,
|
|
458
|
+
injectFailureContext: false,
|
|
459
|
+
queryFailureMemory: false,
|
|
460
|
+
triggerOnStrategies: ['direct'],
|
|
461
|
+
confidenceThreshold: 0.9,
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
expect(loop).toBeInstanceOf(RefinementLoop);
|
|
465
|
+
expect(loop.shouldTriggerRefinement('direct')).toBe(true);
|
|
466
|
+
expect(loop.shouldTriggerRefinement('explore')).toBe(false);
|
|
467
|
+
});
|
|
468
|
+
});
|