cognitive-core 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +363 -2
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +61 -9
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/index.d.ts +0 -4
- package/index.js +0 -4
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
|
2
|
+
import { LearningPipeline, createLearningPipeline } from '../../src/learning/pipeline.js';
|
|
3
|
+
import { MemorySystem } from '../../src/memory/system.js';
|
|
4
|
+
import { createTrajectory, createStep, createTask, successOutcome, failureOutcome } from '../../src/types/index.js';
|
|
5
|
+
import * as fs from 'node:fs/promises';
|
|
6
|
+
import * as path from 'node:path';
|
|
7
|
+
|
|
8
|
+
describe('LearningPipeline', () => {
|
|
9
|
+
let pipeline: LearningPipeline;
|
|
10
|
+
let memory: MemorySystem;
|
|
11
|
+
let testDir: string;
|
|
12
|
+
|
|
13
|
+
beforeEach(async () => {
|
|
14
|
+
testDir = path.join(process.cwd(), '.test-pipeline-' + Date.now());
|
|
15
|
+
await fs.mkdir(testDir, { recursive: true });
|
|
16
|
+
memory = new MemorySystem(testDir);
|
|
17
|
+
await memory.init();
|
|
18
|
+
pipeline = createLearningPipeline(memory, {
|
|
19
|
+
minTrajectories: 3,
|
|
20
|
+
minSuccessRate: 0.5,
|
|
21
|
+
maxExperiences: 100,
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
afterEach(async () => {
|
|
26
|
+
await memory.close();
|
|
27
|
+
await fs.rm(testDir, { recursive: true, force: true });
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
const createTestTrajectory = (success: boolean, description: string) => {
|
|
31
|
+
return createTrajectory({
|
|
32
|
+
task: createTask({ domain: 'test', description }),
|
|
33
|
+
steps: [
|
|
34
|
+
createStep({
|
|
35
|
+
thought: 'Analyzing task',
|
|
36
|
+
action: 'read file',
|
|
37
|
+
observation: 'file contents',
|
|
38
|
+
}),
|
|
39
|
+
createStep({
|
|
40
|
+
thought: 'Applying solution',
|
|
41
|
+
action: 'edit file',
|
|
42
|
+
observation: success ? 'success' : 'error: failed',
|
|
43
|
+
}),
|
|
44
|
+
],
|
|
45
|
+
outcome: success
|
|
46
|
+
? successOutcome({ result: 'completed' })
|
|
47
|
+
: failureOutcome('Task failed'),
|
|
48
|
+
agentId: 'test-agent',
|
|
49
|
+
});
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
describe('processTrajectory', () => {
|
|
53
|
+
it('should process and store trajectory', async () => {
|
|
54
|
+
const trajectory = createTestTrajectory(true, 'Test task 1');
|
|
55
|
+
const result = await pipeline.processTrajectory(trajectory);
|
|
56
|
+
|
|
57
|
+
expect(result.stored).toBe(true);
|
|
58
|
+
expect(result.trajectoryId).toBe(trajectory.id);
|
|
59
|
+
expect(result.analysis).toBeDefined();
|
|
60
|
+
expect(result.analysis.success).toBe(true);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('should accumulate trajectories for batch learning', async () => {
|
|
64
|
+
expect(pipeline.getAccumulatedCount()).toBe(0);
|
|
65
|
+
|
|
66
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
67
|
+
expect(pipeline.getAccumulatedCount()).toBe(1);
|
|
68
|
+
|
|
69
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
70
|
+
expect(pipeline.getAccumulatedCount()).toBe(2);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should analyze trajectory for key steps', async () => {
|
|
74
|
+
const trajectory = createTestTrajectory(true, 'Test analysis');
|
|
75
|
+
const result = await pipeline.processTrajectory(trajectory);
|
|
76
|
+
|
|
77
|
+
expect(result.analysis.keySteps).toBeDefined();
|
|
78
|
+
expect(Array.isArray(result.analysis.keySteps)).toBe(true);
|
|
79
|
+
expect(result.analysis.stepAttribution).toBeDefined();
|
|
80
|
+
expect(result.analysis.stepAttribution.length).toBe(trajectory.steps.length);
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
describe('shouldRunBatch', () => {
|
|
85
|
+
it('should return false when below minTrajectories', () => {
|
|
86
|
+
expect(pipeline.shouldRunBatch()).toBe(false);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('should return true when above minTrajectories with sufficient success rate', async () => {
|
|
90
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
91
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
92
|
+
expect(pipeline.shouldRunBatch()).toBe(false); // Still below 3
|
|
93
|
+
|
|
94
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 3'));
|
|
95
|
+
expect(pipeline.shouldRunBatch()).toBe(true); // Now at 3
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('should return false when success rate is too low', async () => {
|
|
99
|
+
await pipeline.processTrajectory(createTestTrajectory(false, 'Task 1'));
|
|
100
|
+
await pipeline.processTrajectory(createTestTrajectory(false, 'Task 2'));
|
|
101
|
+
await pipeline.processTrajectory(createTestTrajectory(false, 'Task 3'));
|
|
102
|
+
|
|
103
|
+
// 0% success rate, below 50% threshold
|
|
104
|
+
expect(pipeline.shouldRunBatch()).toBe(false);
|
|
105
|
+
});
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
describe('runBatchLearning', () => {
|
|
109
|
+
it('should return empty result when no trajectories accumulated', async () => {
|
|
110
|
+
const result = await pipeline.runBatchLearning();
|
|
111
|
+
|
|
112
|
+
expect(result.trajectoriesProcessed).toBe(0);
|
|
113
|
+
expect(result.playbooksExtracted).toBe(0);
|
|
114
|
+
expect(result.experiencesPruned).toBe(0);
|
|
115
|
+
expect(result.successRate).toBe(0);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('should process accumulated trajectories', async () => {
|
|
119
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
120
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
121
|
+
await pipeline.processTrajectory(createTestTrajectory(false, 'Task 3'));
|
|
122
|
+
|
|
123
|
+
const result = await pipeline.runBatchLearning();
|
|
124
|
+
|
|
125
|
+
expect(result.trajectoriesProcessed).toBe(3);
|
|
126
|
+
expect(result.successRate).toBeCloseTo(2/3, 2);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should clear accumulated after batch', async () => {
|
|
130
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
131
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
132
|
+
|
|
133
|
+
expect(pipeline.getAccumulatedCount()).toBe(2);
|
|
134
|
+
await pipeline.runBatchLearning();
|
|
135
|
+
expect(pipeline.getAccumulatedCount()).toBe(0);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
describe('clearAccumulated', () => {
|
|
140
|
+
it('should clear all accumulated trajectories', async () => {
|
|
141
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
142
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
143
|
+
|
|
144
|
+
expect(pipeline.getAccumulatedCount()).toBe(2);
|
|
145
|
+
pipeline.clearAccumulated();
|
|
146
|
+
expect(pipeline.getAccumulatedCount()).toBe(0);
|
|
147
|
+
});
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
describe('configuration', () => {
|
|
151
|
+
it('should use configurable maxExperiences for pruning', async () => {
|
|
152
|
+
// The pipeline was created with maxExperiences: 100
|
|
153
|
+
// This test verifies the config is passed correctly
|
|
154
|
+
const configuredPipeline = createLearningPipeline(memory, {
|
|
155
|
+
maxExperiences: 50,
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// Process a trajectory to ensure it uses the config
|
|
159
|
+
const trajectory = createTestTrajectory(true, 'Config test');
|
|
160
|
+
const result = await configuredPipeline.processTrajectory(trajectory);
|
|
161
|
+
expect(result.stored).toBe(true);
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
it('should update configuration', async () => {
|
|
165
|
+
pipeline.updateConfig({ minTrajectories: 5 });
|
|
166
|
+
|
|
167
|
+
// Add 3 trajectories (was enough before update)
|
|
168
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 1'));
|
|
169
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 2'));
|
|
170
|
+
await pipeline.processTrajectory(createTestTrajectory(true, 'Task 3'));
|
|
171
|
+
|
|
172
|
+
// Should not run batch yet (need 5 now)
|
|
173
|
+
expect(pipeline.shouldRunBatch()).toBe(false);
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
});
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { PlaybookExtractor, createPlaybookExtractor } from '../../src/learning/playbook-extractor.js';
|
|
3
|
+
import { createPlaybook } from '../../src/types/playbook.js';
|
|
4
|
+
import { createTrajectory, createStep, createTask, successOutcome } from '../../src/types/index.js';
|
|
5
|
+
import type { AnalysisResult } from '../../src/learning/analyzer.js';
|
|
6
|
+
|
|
7
|
+
describe('PlaybookExtractor provenance', () => {
|
|
8
|
+
function makeTrajectory(description: string, domain: string, actions: string[]) {
|
|
9
|
+
return createTrajectory({
|
|
10
|
+
task: createTask({ domain, description }),
|
|
11
|
+
steps: actions.map((action) =>
|
|
12
|
+
createStep({
|
|
13
|
+
thought: `Doing: ${action}`,
|
|
14
|
+
action,
|
|
15
|
+
observation: 'done',
|
|
16
|
+
})
|
|
17
|
+
),
|
|
18
|
+
outcome: successOutcome({ result: 'completed' }),
|
|
19
|
+
agentId: 'test-agent',
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function makeAnalysis(trajectory: ReturnType<typeof makeTrajectory>): AnalysisResult {
|
|
24
|
+
return {
|
|
25
|
+
success: true,
|
|
26
|
+
keySteps: trajectory.steps.map((_, i) => i),
|
|
27
|
+
stepAttribution: trajectory.steps.map(() => 0.8),
|
|
28
|
+
errorPatterns: [],
|
|
29
|
+
abstractable: true,
|
|
30
|
+
trainingExamples: [],
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
it('should set origin to extracted on newly created playbooks', async () => {
|
|
35
|
+
const extractor = createPlaybookExtractor({ minTrajectories: 2 });
|
|
36
|
+
|
|
37
|
+
// Create 2 similar trajectories so the pattern meets frequency threshold
|
|
38
|
+
const t1 = makeTrajectory('Fix build error', 'code', ['read file', 'edit file']);
|
|
39
|
+
const t2 = makeTrajectory('Fix build error again', 'code', ['read file', 'edit file']);
|
|
40
|
+
|
|
41
|
+
const result = await extractor.extract(
|
|
42
|
+
[t1, t2],
|
|
43
|
+
[makeAnalysis(t1), makeAnalysis(t2)],
|
|
44
|
+
[],
|
|
45
|
+
);
|
|
46
|
+
|
|
47
|
+
// Should extract at least one playbook
|
|
48
|
+
expect(result.new.length).toBeGreaterThanOrEqual(1);
|
|
49
|
+
|
|
50
|
+
for (const playbook of result.new) {
|
|
51
|
+
expect(playbook.provenance).toBeDefined();
|
|
52
|
+
expect(playbook.provenance.origin).toBe('extracted');
|
|
53
|
+
expect(playbook.provenance.recordedAt).toBeInstanceOf(Date);
|
|
54
|
+
// Extracted playbooks should not have sourceFile or curatedBy
|
|
55
|
+
expect(playbook.provenance.sourceFile).toBeUndefined();
|
|
56
|
+
expect(playbook.provenance.curatedBy).toBeUndefined();
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should preserve provenance from primary playbook when merging', async () => {
|
|
61
|
+
const extractor = createPlaybookExtractor({
|
|
62
|
+
minTrajectories: 1,
|
|
63
|
+
mergeThreshold: 0.3, // Low threshold to force merging
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
// Two similar trajectories to get one playbook from consolidation
|
|
67
|
+
const t1 = makeTrajectory('Debug failing test', 'test', ['search', 'run', 'test']);
|
|
68
|
+
const t2 = makeTrajectory('Debug failing test', 'test', ['search', 'run', 'test']);
|
|
69
|
+
|
|
70
|
+
const result = await extractor.extract(
|
|
71
|
+
[t1, t2],
|
|
72
|
+
[makeAnalysis(t1), makeAnalysis(t2)],
|
|
73
|
+
[],
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
// All playbooks should have extracted provenance
|
|
77
|
+
for (const playbook of result.new) {
|
|
78
|
+
expect(playbook.provenance.origin).toBe('extracted');
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
it('should generate updates for existing playbooks without changing provenance', async () => {
|
|
83
|
+
const extractor = createPlaybookExtractor({ minTrajectories: 2 });
|
|
84
|
+
|
|
85
|
+
const existingPlaybook = createPlaybook({
|
|
86
|
+
name: 'existing-debug',
|
|
87
|
+
applicability: {
|
|
88
|
+
situations: ['read file'],
|
|
89
|
+
triggers: [],
|
|
90
|
+
antiPatterns: [],
|
|
91
|
+
domains: ['code'],
|
|
92
|
+
},
|
|
93
|
+
guidance: {
|
|
94
|
+
strategy: 'Read then fix',
|
|
95
|
+
tactics: ['read', 'fix'],
|
|
96
|
+
},
|
|
97
|
+
provenance: { origin: 'curated', sourceFile: 'debug.json', recordedAt: new Date() },
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
const t1 = makeTrajectory('Read file to debug', 'code', ['read file', 'edit file']);
|
|
101
|
+
const t2 = makeTrajectory('Read file to debug', 'code', ['read file', 'edit file']);
|
|
102
|
+
|
|
103
|
+
const result = await extractor.extract(
|
|
104
|
+
[t1, t2],
|
|
105
|
+
[makeAnalysis(t1), makeAnalysis(t2)],
|
|
106
|
+
[existingPlaybook],
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
// Updates to existing playbooks should reference the existing playbook
|
|
110
|
+
// The existing playbook's provenance should remain unchanged
|
|
111
|
+
expect(existingPlaybook.provenance.origin).toBe('curated');
|
|
112
|
+
expect(existingPlaybook.provenance.sourceFile).toBe('debug.json');
|
|
113
|
+
});
|
|
114
|
+
});
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
PlaybookUsageInference,
|
|
4
|
+
createUsageInference,
|
|
5
|
+
} from '../../src/learning/usage-inference.js';
|
|
6
|
+
import type { Playbook } from '../../src/types/playbook.js';
|
|
7
|
+
import type { Trajectory } from '../../src/types/trajectory.js';
|
|
8
|
+
|
|
9
|
+
describe('PlaybookUsageInference', () => {
|
|
10
|
+
let inference: PlaybookUsageInference;
|
|
11
|
+
|
|
12
|
+
beforeEach(() => {
|
|
13
|
+
inference = createUsageInference({
|
|
14
|
+
minMatchConfidence: 0.3,
|
|
15
|
+
tacticMatchThreshold: 0.5,
|
|
16
|
+
});
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
function createSamplePlaybook(overrides?: Partial<Playbook>): Playbook {
|
|
20
|
+
return {
|
|
21
|
+
id: `playbook-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
22
|
+
name: 'debug-error',
|
|
23
|
+
applicability: {
|
|
24
|
+
situations: ['Debugging runtime errors'],
|
|
25
|
+
triggers: ['Error:', 'Exception'],
|
|
26
|
+
antiPatterns: [],
|
|
27
|
+
domains: ['code'],
|
|
28
|
+
},
|
|
29
|
+
guidance: {
|
|
30
|
+
strategy: 'Identify the error, trace the cause, fix it',
|
|
31
|
+
tactics: [
|
|
32
|
+
'Read the error message carefully',
|
|
33
|
+
'Check the stack trace',
|
|
34
|
+
'Identify the root cause',
|
|
35
|
+
'Apply the fix',
|
|
36
|
+
'Verify the fix works',
|
|
37
|
+
],
|
|
38
|
+
},
|
|
39
|
+
verification: {
|
|
40
|
+
successIndicators: ['Error resolved', 'Tests pass'],
|
|
41
|
+
failureIndicators: ['Error persists', 'New errors'],
|
|
42
|
+
},
|
|
43
|
+
evolution: {
|
|
44
|
+
version: '1.0.0',
|
|
45
|
+
createdFrom: [],
|
|
46
|
+
failures: [],
|
|
47
|
+
refinements: [],
|
|
48
|
+
successCount: 0,
|
|
49
|
+
failureCount: 0,
|
|
50
|
+
},
|
|
51
|
+
confidence: 0.8,
|
|
52
|
+
complexity: 'moderate',
|
|
53
|
+
estimatedEffort: 3,
|
|
54
|
+
createdAt: new Date(),
|
|
55
|
+
updatedAt: new Date(),
|
|
56
|
+
...overrides,
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function createSampleTrajectory(steps: Array<{
|
|
61
|
+
thought?: string;
|
|
62
|
+
action: string;
|
|
63
|
+
observation?: string;
|
|
64
|
+
}>, overrides?: Partial<Trajectory>): Trajectory {
|
|
65
|
+
return {
|
|
66
|
+
id: `traj-${Date.now()}`,
|
|
67
|
+
task: {
|
|
68
|
+
id: 'task-1',
|
|
69
|
+
description: 'Fix the error in the code',
|
|
70
|
+
domain: 'code',
|
|
71
|
+
context: {},
|
|
72
|
+
metadata: {},
|
|
73
|
+
createdAt: new Date(),
|
|
74
|
+
},
|
|
75
|
+
steps: steps.map((s, i) => ({
|
|
76
|
+
index: i,
|
|
77
|
+
thought: s.thought,
|
|
78
|
+
action: s.action,
|
|
79
|
+
observation: s.observation,
|
|
80
|
+
timestamp: new Date(),
|
|
81
|
+
})),
|
|
82
|
+
outcome: { success: true, quality: 'good' },
|
|
83
|
+
metadata: {},
|
|
84
|
+
startedAt: new Date(),
|
|
85
|
+
completedAt: new Date(),
|
|
86
|
+
...overrides,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
describe('inferUsage', () => {
|
|
91
|
+
it('should detect playbook usage when tactics match', async () => {
|
|
92
|
+
const playbook = createSamplePlaybook({
|
|
93
|
+
guidance: {
|
|
94
|
+
strategy: 'Debug by reading errors',
|
|
95
|
+
tactics: [
|
|
96
|
+
'Read the error message',
|
|
97
|
+
'Check the stack trace',
|
|
98
|
+
'Fix the issue',
|
|
99
|
+
],
|
|
100
|
+
},
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
const trajectory = createSampleTrajectory([
|
|
104
|
+
{
|
|
105
|
+
thought: 'Let me read the error message carefully',
|
|
106
|
+
action: 'cat error.log',
|
|
107
|
+
observation: 'TypeError: Cannot read property of undefined',
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
thought: 'I should check the stack trace',
|
|
111
|
+
action: 'cat stacktrace.txt',
|
|
112
|
+
observation: 'at line 42 in main.ts',
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
thought: 'Now I will fix the issue',
|
|
116
|
+
action: 'edit main.ts',
|
|
117
|
+
observation: 'File updated',
|
|
118
|
+
},
|
|
119
|
+
]);
|
|
120
|
+
|
|
121
|
+
const results = await inference.inferUsage(trajectory, [playbook]);
|
|
122
|
+
|
|
123
|
+
expect(results).toHaveLength(1);
|
|
124
|
+
expect(results[0].wasUsed).toBe(true);
|
|
125
|
+
expect(results[0].usageConfidence).toBeGreaterThan(0);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('should detect non-usage when tactics do not match', async () => {
|
|
129
|
+
const playbook = createSamplePlaybook({
|
|
130
|
+
guidance: {
|
|
131
|
+
strategy: 'Performance optimization',
|
|
132
|
+
tactics: [
|
|
133
|
+
'Profile the application',
|
|
134
|
+
'Identify bottlenecks',
|
|
135
|
+
'Optimize hot paths',
|
|
136
|
+
],
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// Trajectory that does something completely different
|
|
141
|
+
const trajectory = createSampleTrajectory([
|
|
142
|
+
{
|
|
143
|
+
thought: 'Adding a new feature',
|
|
144
|
+
action: 'create new_feature.ts',
|
|
145
|
+
observation: 'File created',
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
thought: 'Writing the implementation',
|
|
149
|
+
action: 'edit new_feature.ts',
|
|
150
|
+
observation: 'Code added',
|
|
151
|
+
},
|
|
152
|
+
]);
|
|
153
|
+
|
|
154
|
+
const results = await inference.inferUsage(trajectory, [playbook]);
|
|
155
|
+
|
|
156
|
+
expect(results).toHaveLength(1);
|
|
157
|
+
expect(results[0].wasUsed).toBe(false);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('should infer success based on trajectory outcome', async () => {
|
|
161
|
+
const playbook = createSamplePlaybook({
|
|
162
|
+
guidance: {
|
|
163
|
+
strategy: 'Debug and fix',
|
|
164
|
+
tactics: ['Read the error', 'Fix the issue', 'Verify the fix'],
|
|
165
|
+
},
|
|
166
|
+
verification: {
|
|
167
|
+
successIndicators: ['Tests pass', 'Build succeeds'],
|
|
168
|
+
failureIndicators: ['Tests fail', 'Build error'],
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Create trajectory with matching tactics and successful outcome
|
|
173
|
+
const successfulTrajectory = createSampleTrajectory([
|
|
174
|
+
{
|
|
175
|
+
thought: 'Let me read the error carefully',
|
|
176
|
+
action: 'cat error.log',
|
|
177
|
+
observation: 'Error found',
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
thought: 'Now I will fix the issue',
|
|
181
|
+
action: 'edit file.ts',
|
|
182
|
+
observation: 'Fixed',
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
thought: 'Let me verify the fix works',
|
|
186
|
+
action: 'npm test',
|
|
187
|
+
observation: 'All tests pass',
|
|
188
|
+
},
|
|
189
|
+
], {
|
|
190
|
+
outcome: { success: true, quality: 'good' },
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
const results = await inference.inferUsage(successfulTrajectory, [playbook]);
|
|
194
|
+
|
|
195
|
+
// Should infer success from the trajectory outcome when playbook is used
|
|
196
|
+
// If wasUsed is true and trajectory succeeded, wasSuccessful should reflect that
|
|
197
|
+
expect(results).toHaveLength(1);
|
|
198
|
+
if (results[0].wasUsed) {
|
|
199
|
+
expect(results[0].wasSuccessful).toBe(true);
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
it('should handle multiple playbooks', async () => {
|
|
204
|
+
const playbook1 = createSamplePlaybook({
|
|
205
|
+
id: 'playbook-1',
|
|
206
|
+
name: 'error-fixing',
|
|
207
|
+
guidance: {
|
|
208
|
+
strategy: 'Fix errors',
|
|
209
|
+
tactics: ['Read error', 'Fix code'],
|
|
210
|
+
},
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
const playbook2 = createSamplePlaybook({
|
|
214
|
+
id: 'playbook-2',
|
|
215
|
+
name: 'testing',
|
|
216
|
+
guidance: {
|
|
217
|
+
strategy: 'Test code',
|
|
218
|
+
tactics: ['Run tests', 'Check coverage'],
|
|
219
|
+
},
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
const trajectory = createSampleTrajectory([
|
|
223
|
+
{
|
|
224
|
+
thought: 'Reading the error message',
|
|
225
|
+
action: 'cat error.log',
|
|
226
|
+
observation: 'Found error',
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
thought: 'Fixing the code',
|
|
230
|
+
action: 'edit file.ts',
|
|
231
|
+
observation: 'Fixed',
|
|
232
|
+
},
|
|
233
|
+
]);
|
|
234
|
+
|
|
235
|
+
const results = await inference.inferUsage(trajectory, [playbook1, playbook2]);
|
|
236
|
+
|
|
237
|
+
expect(results).toHaveLength(2);
|
|
238
|
+
// playbook1 should have higher usage confidence
|
|
239
|
+
const pb1Result = results.find(r => r.playbookId === 'playbook-1');
|
|
240
|
+
const pb2Result = results.find(r => r.playbookId === 'playbook-2');
|
|
241
|
+
expect(pb1Result?.usageConfidence).toBeGreaterThan(pb2Result?.usageConfidence || 0);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
it('should return empty array when no playbooks provided', async () => {
|
|
245
|
+
const trajectory = createSampleTrajectory([
|
|
246
|
+
{ action: 'some action', observation: 'result' },
|
|
247
|
+
]);
|
|
248
|
+
|
|
249
|
+
const results = await inference.inferUsage(trajectory, []);
|
|
250
|
+
|
|
251
|
+
expect(results).toHaveLength(0);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
});
|