cognitive-core 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +302 -116
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +30 -453
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -509
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +50 -34
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/dist/index.d.mts +0 -466
- package/dist/index.mjs +0 -478
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
TrajectoryAnalyzer,
|
|
4
|
+
simpleCreditAssignment,
|
|
5
|
+
outcomeCreditAssignment,
|
|
6
|
+
getCreditAssignmentFn,
|
|
7
|
+
createAnalyzer,
|
|
8
|
+
type AnalyzerConfig,
|
|
9
|
+
} from '../../src/learning/analyzer.js';
|
|
10
|
+
import { createTrajectory } from '../../src/types/trajectory.js';
|
|
11
|
+
import { createTask } from '../../src/types/task.js';
|
|
12
|
+
import { createStep } from '../../src/types/step.js';
|
|
13
|
+
import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
|
|
14
|
+
|
|
15
|
+
// Default config for tests
|
|
16
|
+
const DEFAULT_CONFIG: AnalyzerConfig = {
|
|
17
|
+
creditDecayFactor: 0.5,
|
|
18
|
+
actionRepetitionThreshold: 3,
|
|
19
|
+
attributionThreshold: 0.15,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
describe('simpleCreditAssignment', () => {
|
|
23
|
+
it('should assign exponentially decaying credit from end', async () => {
|
|
24
|
+
const task = createTask({ domain: 'test', description: 'test' });
|
|
25
|
+
const steps = [
|
|
26
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
27
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
28
|
+
createStep({ action: 'a3', observation: 'o3' }),
|
|
29
|
+
];
|
|
30
|
+
const trajectory = createTrajectory({
|
|
31
|
+
task,
|
|
32
|
+
steps,
|
|
33
|
+
outcome: successOutcome('done'),
|
|
34
|
+
agentId: 'agent',
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
|
|
38
|
+
|
|
39
|
+
expect(scores).toHaveLength(3);
|
|
40
|
+
expect(scores[2]).toBe(1.0); // Last step gets full credit
|
|
41
|
+
expect(scores[1]).toBe(0.5); // Second to last
|
|
42
|
+
expect(scores[0]).toBe(0.25); // Third to last
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('should handle single step trajectory', async () => {
|
|
46
|
+
const trajectory = createTrajectory({
|
|
47
|
+
task: createTask({ domain: 'test', description: 'test' }),
|
|
48
|
+
steps: [createStep({ action: 'a', observation: 'o' })],
|
|
49
|
+
outcome: successOutcome('x'),
|
|
50
|
+
agentId: 'agent',
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
|
|
54
|
+
expect(scores).toEqual([1.0]);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('should handle empty steps', async () => {
|
|
58
|
+
const trajectory = createTrajectory({
|
|
59
|
+
task: createTask({ domain: 'test', description: 'test' }),
|
|
60
|
+
steps: [],
|
|
61
|
+
outcome: successOutcome('x'),
|
|
62
|
+
agentId: 'agent',
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
|
|
66
|
+
expect(scores).toEqual([]);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it('should use configurable decay factor', async () => {
|
|
70
|
+
const trajectory = createTrajectory({
|
|
71
|
+
task: createTask({ domain: 'test', description: 'test' }),
|
|
72
|
+
steps: [
|
|
73
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
74
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
75
|
+
createStep({ action: 'a3', observation: 'o3' }),
|
|
76
|
+
],
|
|
77
|
+
outcome: successOutcome('done'),
|
|
78
|
+
agentId: 'agent',
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
const customConfig = { ...DEFAULT_CONFIG, creditDecayFactor: 0.8 };
|
|
82
|
+
const scores = await simpleCreditAssignment(trajectory, customConfig);
|
|
83
|
+
|
|
84
|
+
expect(scores[0]).toBeCloseTo(0.64); // 0.8^2
|
|
85
|
+
expect(scores[1]).toBeCloseTo(0.8); // 0.8^1
|
|
86
|
+
expect(scores[2]).toBe(1.0); // 0.8^0
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
describe('outcomeCreditAssignment', () => {
|
|
91
|
+
it('should assign higher credit for successful trajectories', async () => {
|
|
92
|
+
const successTrajectory = createTrajectory({
|
|
93
|
+
task: createTask({ domain: 'test', description: 'test' }),
|
|
94
|
+
steps: [
|
|
95
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
96
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
97
|
+
],
|
|
98
|
+
outcome: successOutcome('done'),
|
|
99
|
+
agentId: 'agent',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const failTrajectory = createTrajectory({
|
|
103
|
+
task: createTask({ domain: 'test', description: 'test' }),
|
|
104
|
+
steps: [
|
|
105
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
106
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
107
|
+
],
|
|
108
|
+
outcome: failureOutcome('error'),
|
|
109
|
+
agentId: 'agent',
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const successScores = await outcomeCreditAssignment(successTrajectory, DEFAULT_CONFIG);
|
|
113
|
+
const failScores = await outcomeCreditAssignment(failTrajectory, DEFAULT_CONFIG);
|
|
114
|
+
|
|
115
|
+
expect(successScores[0]).toBe(0.7);
|
|
116
|
+
expect(successScores[1]).toBe(1.0); // Last step boosted
|
|
117
|
+
expect(failScores[0]).toBe(0.3);
|
|
118
|
+
expect(failScores[1]).toBe(0.3);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe('getCreditAssignmentFn', () => {
|
|
123
|
+
it('should return correct function for strategy', () => {
|
|
124
|
+
expect(getCreditAssignmentFn('simple')).toBe(simpleCreditAssignment);
|
|
125
|
+
expect(getCreditAssignmentFn('llm')).toBe(simpleCreditAssignment); // Falls back
|
|
126
|
+
expect(getCreditAssignmentFn('counterfactual')).toBe(outcomeCreditAssignment);
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
describe('TrajectoryAnalyzer', () => {
|
|
131
|
+
const task = createTask({
|
|
132
|
+
domain: 'code',
|
|
133
|
+
description: 'Write a test function',
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
describe('analyze', () => {
|
|
137
|
+
it('should analyze a successful trajectory', async () => {
|
|
138
|
+
const steps = [
|
|
139
|
+
createStep({ thought: 'First, read the code', action: 'read', observation: 'Code content' }),
|
|
140
|
+
createStep({ thought: 'Now write the test', action: 'write', observation: 'Test written' }),
|
|
141
|
+
createStep({ action: 'run_test', observation: 'Tests passed' }),
|
|
142
|
+
];
|
|
143
|
+
const trajectory = createTrajectory({
|
|
144
|
+
task,
|
|
145
|
+
steps,
|
|
146
|
+
outcome: successOutcome('Test implemented'),
|
|
147
|
+
agentId: 'agent',
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
const analyzer = new TrajectoryAnalyzer('simple');
|
|
151
|
+
const result = await analyzer.analyze(trajectory);
|
|
152
|
+
|
|
153
|
+
expect(result.success).toBe(true);
|
|
154
|
+
expect(result.stepAttribution).toHaveLength(3);
|
|
155
|
+
expect(result.keySteps.length).toBeGreaterThan(0);
|
|
156
|
+
expect(result.abstractable).toBe(true);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('should detect error patterns in failed trajectories', async () => {
|
|
160
|
+
const trajectory = createTrajectory({
|
|
161
|
+
task,
|
|
162
|
+
steps: [createStep({ action: 'try', observation: 'error' })],
|
|
163
|
+
outcome: failureOutcome('Type error: null is not an object'),
|
|
164
|
+
agentId: 'agent',
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
const analyzer = createAnalyzer();
|
|
168
|
+
const result = await analyzer.analyze(trajectory);
|
|
169
|
+
|
|
170
|
+
expect(result.success).toBe(false);
|
|
171
|
+
expect(result.errorPatterns.length).toBeGreaterThan(0);
|
|
172
|
+
// Should match type error pattern
|
|
173
|
+
expect(result.errorPatterns.some(p => p.type === 'type' || p.type === 'general')).toBe(true);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('should detect timeout errors', async () => {
|
|
177
|
+
const trajectory = createTrajectory({
|
|
178
|
+
task,
|
|
179
|
+
steps: [],
|
|
180
|
+
outcome: failureOutcome('Operation timeout exceeded'),
|
|
181
|
+
agentId: 'agent',
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
const analyzer = createAnalyzer();
|
|
185
|
+
const result = await analyzer.analyze(trajectory);
|
|
186
|
+
|
|
187
|
+
expect(result.errorPatterns.some(p => p.type === 'timeout')).toBe(true);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it('should detect repeated actions', async () => {
|
|
191
|
+
const steps = [
|
|
192
|
+
createStep({ action: 'retry_connection', observation: 'failed' }),
|
|
193
|
+
createStep({ action: 'retry_connection', observation: 'failed' }),
|
|
194
|
+
createStep({ action: 'retry_connection', observation: 'failed' }),
|
|
195
|
+
createStep({ action: 'retry_connection', observation: 'failed' }),
|
|
196
|
+
];
|
|
197
|
+
const trajectory = createTrajectory({
|
|
198
|
+
task,
|
|
199
|
+
steps,
|
|
200
|
+
outcome: failureOutcome('Max retries exceeded'),
|
|
201
|
+
agentId: 'agent',
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
const analyzer = createAnalyzer();
|
|
205
|
+
const result = await analyzer.analyze(trajectory);
|
|
206
|
+
|
|
207
|
+
expect(result.errorPatterns.some(p => p.type === 'repetition')).toBe(true);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('should extract training examples', async () => {
|
|
211
|
+
const steps = [
|
|
212
|
+
createStep({
|
|
213
|
+
thought: 'I should check the input type',
|
|
214
|
+
action: 'validate_input(data)',
|
|
215
|
+
observation: 'Input is valid',
|
|
216
|
+
}),
|
|
217
|
+
createStep({
|
|
218
|
+
thought: 'Now process the data',
|
|
219
|
+
action: 'process(data)',
|
|
220
|
+
observation: 'Processing complete',
|
|
221
|
+
}),
|
|
222
|
+
];
|
|
223
|
+
const trajectory = createTrajectory({
|
|
224
|
+
task,
|
|
225
|
+
steps,
|
|
226
|
+
outcome: successOutcome({ result: 'done' }),
|
|
227
|
+
agentId: 'agent',
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
const analyzer = createAnalyzer();
|
|
231
|
+
const result = await analyzer.analyze(trajectory);
|
|
232
|
+
|
|
233
|
+
expect(result.trainingExamples.length).toBeGreaterThan(0);
|
|
234
|
+
// Should include task -> solution example
|
|
235
|
+
expect(result.trainingExamples.some(e =>
|
|
236
|
+
e.input === task.description
|
|
237
|
+
)).toBe(true);
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
it('should apply attribution scores to steps', async () => {
|
|
241
|
+
const steps = [
|
|
242
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
243
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
244
|
+
];
|
|
245
|
+
const trajectory = createTrajectory({
|
|
246
|
+
task,
|
|
247
|
+
steps,
|
|
248
|
+
outcome: successOutcome('x'),
|
|
249
|
+
agentId: 'agent',
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
const analyzer = createAnalyzer();
|
|
253
|
+
await analyzer.analyze(trajectory);
|
|
254
|
+
|
|
255
|
+
// Steps should now have attribution scores
|
|
256
|
+
expect(trajectory.steps[0].attributionScore).toBeDefined();
|
|
257
|
+
expect(trajectory.steps[1].attributionScore).toBeDefined();
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
describe('setStrategy and setThreshold', () => {
|
|
262
|
+
it('should change credit assignment strategy', async () => {
|
|
263
|
+
const analyzer = createAnalyzer('simple');
|
|
264
|
+
|
|
265
|
+
const trajectory = createTrajectory({
|
|
266
|
+
task,
|
|
267
|
+
steps: [
|
|
268
|
+
createStep({ action: 'a', observation: 'o' }),
|
|
269
|
+
createStep({ action: 'b', observation: 'o' }),
|
|
270
|
+
],
|
|
271
|
+
outcome: successOutcome('x'),
|
|
272
|
+
agentId: 'agent',
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
const result1 = await analyzer.analyze(trajectory);
|
|
276
|
+
|
|
277
|
+
analyzer.setStrategy('counterfactual');
|
|
278
|
+
const result2 = await analyzer.analyze(trajectory);
|
|
279
|
+
|
|
280
|
+
// Different strategies may produce different attributions
|
|
281
|
+
expect(result1.stepAttribution).not.toEqual(result2.stepAttribution);
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it('should change attribution threshold', async () => {
|
|
285
|
+
const analyzer = createAnalyzer();
|
|
286
|
+
|
|
287
|
+
const steps = [
|
|
288
|
+
{ ...createStep({ action: 'a', observation: 'o' }), attributionScore: 0.2 },
|
|
289
|
+
{ ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.1 },
|
|
290
|
+
];
|
|
291
|
+
const trajectory = createTrajectory({
|
|
292
|
+
task,
|
|
293
|
+
steps,
|
|
294
|
+
outcome: successOutcome('x'),
|
|
295
|
+
agentId: 'agent',
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
analyzer.setThreshold(0.15);
|
|
299
|
+
const result1 = await analyzer.analyze(trajectory);
|
|
300
|
+
|
|
301
|
+
analyzer.setThreshold(0.05);
|
|
302
|
+
const result2 = await analyzer.analyze(trajectory);
|
|
303
|
+
|
|
304
|
+
// Lower threshold should include more key steps
|
|
305
|
+
expect(result2.keySteps.length).toBeGreaterThanOrEqual(result1.keySteps.length);
|
|
306
|
+
});
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
describe('configuration', () => {
|
|
310
|
+
it('should get current configuration', () => {
|
|
311
|
+
const analyzer = createAnalyzer('simple', {
|
|
312
|
+
creditDecayFactor: 0.7,
|
|
313
|
+
actionRepetitionThreshold: 5,
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
const config = analyzer.getConfig();
|
|
317
|
+
expect(config.creditDecayFactor).toBe(0.7);
|
|
318
|
+
expect(config.actionRepetitionThreshold).toBe(5);
|
|
319
|
+
expect(config.attributionThreshold).toBe(0.15); // Default
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
it('should update configuration', () => {
|
|
323
|
+
const analyzer = createAnalyzer();
|
|
324
|
+
analyzer.updateConfig({ creditDecayFactor: 0.9 });
|
|
325
|
+
|
|
326
|
+
const config = analyzer.getConfig();
|
|
327
|
+
expect(config.creditDecayFactor).toBe(0.9);
|
|
328
|
+
});
|
|
329
|
+
|
|
330
|
+
it('should preserve unchanged values when updating', () => {
|
|
331
|
+
const analyzer = createAnalyzer('simple', {
|
|
332
|
+
creditDecayFactor: 0.6,
|
|
333
|
+
actionRepetitionThreshold: 4,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
analyzer.updateConfig({ creditDecayFactor: 0.8 });
|
|
337
|
+
|
|
338
|
+
const config = analyzer.getConfig();
|
|
339
|
+
expect(config.creditDecayFactor).toBe(0.8);
|
|
340
|
+
expect(config.actionRepetitionThreshold).toBe(4); // Unchanged
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
it('should use configurable repetition threshold', async () => {
|
|
344
|
+
const analyzer = createAnalyzer('simple', {
|
|
345
|
+
actionRepetitionThreshold: 5, // Higher threshold
|
|
346
|
+
});
|
|
347
|
+
|
|
348
|
+
// Only 3 repetitions - shouldn't trigger with threshold of 5
|
|
349
|
+
const steps = [
|
|
350
|
+
createStep({ action: 'same_action', observation: 'obs1' }),
|
|
351
|
+
createStep({ action: 'same_action', observation: 'obs2' }),
|
|
352
|
+
createStep({ action: 'same_action', observation: 'obs3' }),
|
|
353
|
+
];
|
|
354
|
+
|
|
355
|
+
const trajectory = createTrajectory({
|
|
356
|
+
task,
|
|
357
|
+
steps,
|
|
358
|
+
outcome: failureOutcome('Failed'),
|
|
359
|
+
agentId: 'agent',
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
const result = await analyzer.analyze(trajectory);
|
|
363
|
+
// Should NOT detect repetition with threshold of 5
|
|
364
|
+
expect(result.errorPatterns.some(p => p.type === 'repetition')).toBe(false);
|
|
365
|
+
});
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
describe('abstractability assessment', () => {
|
|
369
|
+
it('should mark successful trajectories with key steps as abstractable', async () => {
|
|
370
|
+
const steps = [
|
|
371
|
+
createStep({ action: 'a1', observation: 'o1' }),
|
|
372
|
+
createStep({ action: 'a2', observation: 'o2' }),
|
|
373
|
+
createStep({ action: 'a3', observation: 'o3' }),
|
|
374
|
+
];
|
|
375
|
+
const trajectory = createTrajectory({
|
|
376
|
+
task,
|
|
377
|
+
steps,
|
|
378
|
+
outcome: successOutcome('done'),
|
|
379
|
+
agentId: 'agent',
|
|
380
|
+
});
|
|
381
|
+
|
|
382
|
+
const analyzer = createAnalyzer();
|
|
383
|
+
const result = await analyzer.analyze(trajectory);
|
|
384
|
+
|
|
385
|
+
expect(result.abstractable).toBe(true);
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
it('should not mark simple failed trajectories as abstractable', async () => {
|
|
389
|
+
const trajectory = createTrajectory({
|
|
390
|
+
task,
|
|
391
|
+
steps: [createStep({ action: 'fail', observation: 'error' })],
|
|
392
|
+
outcome: failureOutcome('Unknown error'),
|
|
393
|
+
agentId: 'agent',
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
const analyzer = createAnalyzer();
|
|
397
|
+
const result = await analyzer.analyze(trajectory);
|
|
398
|
+
|
|
399
|
+
// No clear error pattern, not abstractable
|
|
400
|
+
expect(result.abstractable).toBe(false);
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
it('should mark failed trajectories with partial progress as potentially abstractable', async () => {
|
|
404
|
+
const trajectory = createTrajectory({
|
|
405
|
+
task,
|
|
406
|
+
steps: [
|
|
407
|
+
createStep({ action: 'setup', observation: 'done' }),
|
|
408
|
+
createStep({ action: 'execute', observation: 'partial' }),
|
|
409
|
+
],
|
|
410
|
+
outcome: {
|
|
411
|
+
success: false,
|
|
412
|
+
partialScore: 0.5,
|
|
413
|
+
errorInfo: 'Timeout after partial completion',
|
|
414
|
+
verificationDetails: {},
|
|
415
|
+
},
|
|
416
|
+
agentId: 'agent',
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
const analyzer = createAnalyzer();
|
|
420
|
+
const result = await analyzer.analyze(trajectory);
|
|
421
|
+
|
|
422
|
+
// Has error pattern with partial progress, might be abstractable
|
|
423
|
+
expect(result.errorPatterns.length).toBeGreaterThan(0);
|
|
424
|
+
});
|
|
425
|
+
});
|
|
426
|
+
});
|