cognitive-core 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +363 -2
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +43 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +61 -9
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/index.d.ts +0 -4
- package/index.js +0 -4
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
TaskSchema,
|
|
4
|
+
VerificationSpecSchema,
|
|
5
|
+
createTask,
|
|
6
|
+
type Task,
|
|
7
|
+
type VerificationSpec,
|
|
8
|
+
} from '../../src/types/task.js';
|
|
9
|
+
|
|
10
|
+
describe('VerificationSpec', () => {
|
|
11
|
+
it('should validate exact_match verification', () => {
|
|
12
|
+
const spec: VerificationSpec = {
|
|
13
|
+
type: 'exact_match',
|
|
14
|
+
expected: 'hello',
|
|
15
|
+
};
|
|
16
|
+
const result = VerificationSpecSchema.parse(spec);
|
|
17
|
+
expect(result.type).toBe('exact_match');
|
|
18
|
+
expect(result.expected).toBe('hello');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('should validate fuzzy_match with tolerance', () => {
|
|
22
|
+
const spec: VerificationSpec = {
|
|
23
|
+
type: 'fuzzy_match',
|
|
24
|
+
expected: 42,
|
|
25
|
+
tolerance: 0.1,
|
|
26
|
+
};
|
|
27
|
+
const result = VerificationSpecSchema.parse(spec);
|
|
28
|
+
expect(result.type).toBe('fuzzy_match');
|
|
29
|
+
expect(result.tolerance).toBe(0.1);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it('should validate function verification', () => {
|
|
33
|
+
const spec: VerificationSpec = {
|
|
34
|
+
type: 'function',
|
|
35
|
+
verifier_id: 'custom-verifier',
|
|
36
|
+
};
|
|
37
|
+
const result = VerificationSpecSchema.parse(spec);
|
|
38
|
+
expect(result.type).toBe('function');
|
|
39
|
+
expect(result.verifier_id).toBe('custom-verifier');
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should validate llm verification', () => {
|
|
43
|
+
const spec: VerificationSpec = {
|
|
44
|
+
type: 'llm',
|
|
45
|
+
expected: 'The output should be correct',
|
|
46
|
+
};
|
|
47
|
+
const result = VerificationSpecSchema.parse(spec);
|
|
48
|
+
expect(result.type).toBe('llm');
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it('should reject invalid verification types', () => {
|
|
52
|
+
expect(() =>
|
|
53
|
+
VerificationSpecSchema.parse({ type: 'invalid' })
|
|
54
|
+
).toThrow();
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
describe('Task', () => {
|
|
59
|
+
it('should validate a complete task', () => {
|
|
60
|
+
const task: Task = {
|
|
61
|
+
id: 'task-1',
|
|
62
|
+
domain: 'code',
|
|
63
|
+
description: 'Write a function to add two numbers',
|
|
64
|
+
context: { language: 'typescript' },
|
|
65
|
+
verification: { type: 'exact_match', expected: 5 },
|
|
66
|
+
embedding: [0.1, 0.2, 0.3],
|
|
67
|
+
metadata: { priority: 'high' },
|
|
68
|
+
createdAt: new Date(),
|
|
69
|
+
};
|
|
70
|
+
const result = TaskSchema.parse(task);
|
|
71
|
+
expect(result.id).toBe('task-1');
|
|
72
|
+
expect(result.domain).toBe('code');
|
|
73
|
+
expect(result.context.language).toBe('typescript');
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('should apply defaults for optional fields', () => {
|
|
77
|
+
const task = TaskSchema.parse({
|
|
78
|
+
id: 'task-2',
|
|
79
|
+
domain: 'reasoning',
|
|
80
|
+
description: 'Solve a puzzle',
|
|
81
|
+
});
|
|
82
|
+
expect(task.context).toEqual({});
|
|
83
|
+
expect(task.metadata).toEqual({});
|
|
84
|
+
expect(task.createdAt).toBeInstanceOf(Date);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('should allow missing verification', () => {
|
|
88
|
+
const task = TaskSchema.parse({
|
|
89
|
+
id: 'task-3',
|
|
90
|
+
domain: 'arc',
|
|
91
|
+
description: 'Pattern matching task',
|
|
92
|
+
});
|
|
93
|
+
expect(task.verification).toBeUndefined();
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe('createTask', () => {
|
|
98
|
+
it('should create a task with required fields', () => {
|
|
99
|
+
const task = createTask({
|
|
100
|
+
domain: 'code',
|
|
101
|
+
description: 'Implement a sorting algorithm',
|
|
102
|
+
});
|
|
103
|
+
expect(task.id).toBeDefined();
|
|
104
|
+
expect(task.id.length).toBeGreaterThan(0);
|
|
105
|
+
expect(task.domain).toBe('code');
|
|
106
|
+
expect(task.description).toBe('Implement a sorting algorithm');
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('should create a task with custom id', () => {
|
|
110
|
+
const task = createTask({
|
|
111
|
+
id: 'my-custom-id',
|
|
112
|
+
domain: 'swe',
|
|
113
|
+
description: 'Fix a bug',
|
|
114
|
+
});
|
|
115
|
+
expect(task.id).toBe('my-custom-id');
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('should create a task with context', () => {
|
|
119
|
+
const task = createTask({
|
|
120
|
+
domain: 'code',
|
|
121
|
+
description: 'Write tests',
|
|
122
|
+
context: { framework: 'vitest', coverage: true },
|
|
123
|
+
});
|
|
124
|
+
expect(task.context.framework).toBe('vitest');
|
|
125
|
+
expect(task.context.coverage).toBe(true);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('should create a task with verification', () => {
|
|
129
|
+
const task = createTask({
|
|
130
|
+
domain: 'reasoning',
|
|
131
|
+
description: 'Calculate 2+2',
|
|
132
|
+
verification: { type: 'exact_match', expected: 4 },
|
|
133
|
+
});
|
|
134
|
+
expect(task.verification?.type).toBe('exact_match');
|
|
135
|
+
expect(task.verification?.expected).toBe(4);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('should create a task with metadata', () => {
|
|
139
|
+
const task = createTask({
|
|
140
|
+
domain: 'code',
|
|
141
|
+
description: 'Refactor code',
|
|
142
|
+
metadata: { author: 'test', source: 'manual' },
|
|
143
|
+
});
|
|
144
|
+
expect(task.metadata.author).toBe('test');
|
|
145
|
+
expect(task.metadata.source).toBe('manual');
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('should set createdAt to current time', () => {
|
|
149
|
+
const before = new Date();
|
|
150
|
+
const task = createTask({
|
|
151
|
+
domain: 'test',
|
|
152
|
+
description: 'Test task',
|
|
153
|
+
});
|
|
154
|
+
const after = new Date();
|
|
155
|
+
expect(task.createdAt.getTime()).toBeGreaterThanOrEqual(before.getTime());
|
|
156
|
+
expect(task.createdAt.getTime()).toBeLessThanOrEqual(after.getTime());
|
|
157
|
+
});
|
|
158
|
+
});
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
TrajectorySchema,
|
|
4
|
+
createTrajectory,
|
|
5
|
+
isSuccessful,
|
|
6
|
+
getKeySteps,
|
|
7
|
+
type Trajectory,
|
|
8
|
+
} from '../../src/types/trajectory.js';
|
|
9
|
+
import { createTask } from '../../src/types/task.js';
|
|
10
|
+
import { createStep } from '../../src/types/step.js';
|
|
11
|
+
import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
|
|
12
|
+
|
|
13
|
+
describe('Trajectory', () => {
|
|
14
|
+
const sampleTask = createTask({
|
|
15
|
+
id: 'task-1',
|
|
16
|
+
domain: 'code',
|
|
17
|
+
description: 'Write a hello world function',
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
const sampleSteps = [
|
|
21
|
+
createStep({
|
|
22
|
+
thought: 'I need to create a function',
|
|
23
|
+
action: 'write_code()',
|
|
24
|
+
observation: 'Function written',
|
|
25
|
+
}),
|
|
26
|
+
createStep({
|
|
27
|
+
action: 'run_tests()',
|
|
28
|
+
observation: 'Tests passed',
|
|
29
|
+
}),
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
it('should validate a complete trajectory', () => {
|
|
33
|
+
const trajectory: Trajectory = {
|
|
34
|
+
id: 'traj-1',
|
|
35
|
+
task: sampleTask,
|
|
36
|
+
steps: sampleSteps,
|
|
37
|
+
outcome: successOutcome('function helloWorld() { return "hello"; }'),
|
|
38
|
+
agentId: 'claude-code',
|
|
39
|
+
timestamp: new Date(),
|
|
40
|
+
llmCalls: 5,
|
|
41
|
+
totalTokens: 1500,
|
|
42
|
+
wallTimeSeconds: 30,
|
|
43
|
+
metadata: { version: '1.0' },
|
|
44
|
+
};
|
|
45
|
+
const result = TrajectorySchema.parse(trajectory);
|
|
46
|
+
expect(result.id).toBe('traj-1');
|
|
47
|
+
expect(result.agentId).toBe('claude-code');
|
|
48
|
+
expect(result.llmCalls).toBe(5);
|
|
49
|
+
expect(result.totalTokens).toBe(1500);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('should apply defaults for optional fields', () => {
|
|
53
|
+
const trajectory = TrajectorySchema.parse({
|
|
54
|
+
id: 'traj-2',
|
|
55
|
+
task: sampleTask,
|
|
56
|
+
steps: [],
|
|
57
|
+
outcome: { success: true },
|
|
58
|
+
agentId: 'test-agent',
|
|
59
|
+
});
|
|
60
|
+
expect(trajectory.llmCalls).toBe(0);
|
|
61
|
+
expect(trajectory.totalTokens).toBe(0);
|
|
62
|
+
expect(trajectory.wallTimeSeconds).toBe(0);
|
|
63
|
+
expect(trajectory.metadata).toEqual({});
|
|
64
|
+
expect(trajectory.timestamp).toBeInstanceOf(Date);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it('should allow empty steps array', () => {
|
|
68
|
+
const trajectory = TrajectorySchema.parse({
|
|
69
|
+
id: 'traj-3',
|
|
70
|
+
task: sampleTask,
|
|
71
|
+
steps: [],
|
|
72
|
+
outcome: failureOutcome('No steps taken'),
|
|
73
|
+
agentId: 'agent',
|
|
74
|
+
});
|
|
75
|
+
expect(trajectory.steps).toEqual([]);
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
describe('createTrajectory', () => {
|
|
80
|
+
const task = createTask({
|
|
81
|
+
domain: 'reasoning',
|
|
82
|
+
description: 'Solve a logic puzzle',
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const steps = [
|
|
86
|
+
createStep({ action: 'analyze', observation: 'Found pattern' }),
|
|
87
|
+
createStep({ action: 'apply', observation: 'Applied rule' }),
|
|
88
|
+
createStep({ action: 'verify', observation: 'Correct' }),
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
it('should create a trajectory with required fields', () => {
|
|
92
|
+
const trajectory = createTrajectory({
|
|
93
|
+
task,
|
|
94
|
+
steps,
|
|
95
|
+
outcome: successOutcome('solved'),
|
|
96
|
+
agentId: 'reasoning-agent',
|
|
97
|
+
});
|
|
98
|
+
expect(trajectory.id).toBeDefined();
|
|
99
|
+
expect(trajectory.id.length).toBeGreaterThan(0);
|
|
100
|
+
expect(trajectory.task).toEqual(task);
|
|
101
|
+
expect(trajectory.steps).toHaveLength(3);
|
|
102
|
+
expect(trajectory.agentId).toBe('reasoning-agent');
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('should create a trajectory with custom id', () => {
|
|
106
|
+
const trajectory = createTrajectory({
|
|
107
|
+
id: 'custom-traj-id',
|
|
108
|
+
task,
|
|
109
|
+
steps: [],
|
|
110
|
+
outcome: failureOutcome('failed'),
|
|
111
|
+
agentId: 'agent',
|
|
112
|
+
});
|
|
113
|
+
expect(trajectory.id).toBe('custom-traj-id');
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should create a trajectory with metrics', () => {
|
|
117
|
+
const trajectory = createTrajectory({
|
|
118
|
+
task,
|
|
119
|
+
steps,
|
|
120
|
+
outcome: successOutcome(42),
|
|
121
|
+
agentId: 'agent',
|
|
122
|
+
llmCalls: 10,
|
|
123
|
+
totalTokens: 5000,
|
|
124
|
+
wallTimeSeconds: 120,
|
|
125
|
+
});
|
|
126
|
+
expect(trajectory.llmCalls).toBe(10);
|
|
127
|
+
expect(trajectory.totalTokens).toBe(5000);
|
|
128
|
+
expect(trajectory.wallTimeSeconds).toBe(120);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('should create a trajectory with metadata', () => {
|
|
132
|
+
const trajectory = createTrajectory({
|
|
133
|
+
task,
|
|
134
|
+
steps,
|
|
135
|
+
outcome: successOutcome('done'),
|
|
136
|
+
agentId: 'agent',
|
|
137
|
+
metadata: { attempt: 2, model: 'gpt-4' },
|
|
138
|
+
});
|
|
139
|
+
expect(trajectory.metadata.attempt).toBe(2);
|
|
140
|
+
expect(trajectory.metadata.model).toBe('gpt-4');
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('should set timestamp to current time', () => {
|
|
144
|
+
const before = new Date();
|
|
145
|
+
const trajectory = createTrajectory({
|
|
146
|
+
task,
|
|
147
|
+
steps: [],
|
|
148
|
+
outcome: successOutcome('x'),
|
|
149
|
+
agentId: 'agent',
|
|
150
|
+
});
|
|
151
|
+
const after = new Date();
|
|
152
|
+
expect(trajectory.timestamp.getTime()).toBeGreaterThanOrEqual(before.getTime());
|
|
153
|
+
expect(trajectory.timestamp.getTime()).toBeLessThanOrEqual(after.getTime());
|
|
154
|
+
});
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
describe('isSuccessful', () => {
|
|
158
|
+
const task = createTask({ domain: 'test', description: 'test' });
|
|
159
|
+
|
|
160
|
+
it('should return true for successful trajectory', () => {
|
|
161
|
+
const trajectory = createTrajectory({
|
|
162
|
+
task,
|
|
163
|
+
steps: [],
|
|
164
|
+
outcome: successOutcome('done'),
|
|
165
|
+
agentId: 'agent',
|
|
166
|
+
});
|
|
167
|
+
expect(isSuccessful(trajectory)).toBe(true);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should return false for failed trajectory', () => {
|
|
171
|
+
const trajectory = createTrajectory({
|
|
172
|
+
task,
|
|
173
|
+
steps: [],
|
|
174
|
+
outcome: failureOutcome('error'),
|
|
175
|
+
agentId: 'agent',
|
|
176
|
+
});
|
|
177
|
+
expect(isSuccessful(trajectory)).toBe(false);
|
|
178
|
+
});
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
describe('getKeySteps', () => {
|
|
182
|
+
const task = createTask({ domain: 'test', description: 'test' });
|
|
183
|
+
|
|
184
|
+
it('should return steps above threshold', () => {
|
|
185
|
+
const steps = [
|
|
186
|
+
{ ...createStep({ action: 'a', observation: 'o' }), attributionScore: 0.1 },
|
|
187
|
+
{ ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.3 },
|
|
188
|
+
{ ...createStep({ action: 'c', observation: 'o' }), attributionScore: 0.8 },
|
|
189
|
+
{ ...createStep({ action: 'd', observation: 'o' }), attributionScore: 0.15 },
|
|
190
|
+
];
|
|
191
|
+
const trajectory = createTrajectory({
|
|
192
|
+
task,
|
|
193
|
+
steps,
|
|
194
|
+
outcome: successOutcome('x'),
|
|
195
|
+
agentId: 'agent',
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
const keySteps = getKeySteps(trajectory);
|
|
199
|
+
expect(keySteps).toHaveLength(3);
|
|
200
|
+
expect(keySteps[0].action).toBe('b');
|
|
201
|
+
expect(keySteps[1].action).toBe('c');
|
|
202
|
+
expect(keySteps[2].action).toBe('d');
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it('should use custom threshold', () => {
|
|
206
|
+
const steps = [
|
|
207
|
+
{ ...createStep({ action: 'a', observation: 'o' }), attributionScore: 0.4 },
|
|
208
|
+
{ ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.6 },
|
|
209
|
+
];
|
|
210
|
+
const trajectory = createTrajectory({
|
|
211
|
+
task,
|
|
212
|
+
steps,
|
|
213
|
+
outcome: successOutcome('x'),
|
|
214
|
+
agentId: 'agent',
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
expect(getKeySteps(trajectory, 0.5)).toHaveLength(1);
|
|
218
|
+
expect(getKeySteps(trajectory, 0.3)).toHaveLength(2);
|
|
219
|
+
expect(getKeySteps(trajectory, 0.7)).toHaveLength(0);
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
it('should handle steps without attribution scores', () => {
|
|
223
|
+
const steps = [
|
|
224
|
+
createStep({ action: 'a', observation: 'o' }),
|
|
225
|
+
{ ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.5 },
|
|
226
|
+
];
|
|
227
|
+
const trajectory = createTrajectory({
|
|
228
|
+
task,
|
|
229
|
+
steps,
|
|
230
|
+
outcome: successOutcome('x'),
|
|
231
|
+
agentId: 'agent',
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const keySteps = getKeySteps(trajectory);
|
|
235
|
+
expect(keySteps).toHaveLength(1);
|
|
236
|
+
expect(keySteps[0].action).toBe('b');
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
it('should return empty array for trajectory with no key steps', () => {
|
|
240
|
+
const steps = [
|
|
241
|
+
{ ...createStep({ action: 'a', observation: 'o' }), attributionScore: 0.05 },
|
|
242
|
+
{ ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.1 },
|
|
243
|
+
];
|
|
244
|
+
const trajectory = createTrajectory({
|
|
245
|
+
task,
|
|
246
|
+
steps,
|
|
247
|
+
outcome: successOutcome('x'),
|
|
248
|
+
agentId: 'agent',
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
expect(getKeySteps(trajectory)).toHaveLength(0);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
cosineSimilarity,
|
|
4
|
+
jaccardSimilarity,
|
|
5
|
+
ngramSimilarity,
|
|
6
|
+
textSimilarity,
|
|
7
|
+
tokenize,
|
|
8
|
+
findTopK,
|
|
9
|
+
} from '../../src/utils/similarity.js';
|
|
10
|
+
|
|
11
|
+
describe('cosineSimilarity', () => {
|
|
12
|
+
it('should return 1 for identical vectors', () => {
|
|
13
|
+
expect(cosineSimilarity([1, 0, 0], [1, 0, 0])).toBeCloseTo(1);
|
|
14
|
+
expect(cosineSimilarity([0.5, 0.5], [0.5, 0.5])).toBeCloseTo(1);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it('should return 0 for orthogonal vectors', () => {
|
|
18
|
+
expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0);
|
|
19
|
+
expect(cosineSimilarity([1, 0, 0], [0, 1, 0])).toBeCloseTo(0);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
it('should return -1 for opposite vectors', () => {
|
|
23
|
+
expect(cosineSimilarity([1, 0], [-1, 0])).toBeCloseTo(-1);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it('should handle arbitrary vectors', () => {
|
|
27
|
+
const a = [1, 2, 3];
|
|
28
|
+
const b = [4, 5, 6];
|
|
29
|
+
// dot = 1*4 + 2*5 + 3*6 = 32
|
|
30
|
+
// normA = sqrt(14), normB = sqrt(77)
|
|
31
|
+
const expected = 32 / (Math.sqrt(14) * Math.sqrt(77));
|
|
32
|
+
expect(cosineSimilarity(a, b)).toBeCloseTo(expected);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('should return 0 for zero vectors', () => {
|
|
36
|
+
expect(cosineSimilarity([0, 0, 0], [1, 2, 3])).toBe(0);
|
|
37
|
+
expect(cosineSimilarity([1, 2, 3], [0, 0, 0])).toBe(0);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('should throw for vectors of different lengths', () => {
|
|
41
|
+
expect(() => cosineSimilarity([1, 2], [1, 2, 3])).toThrow();
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('tokenize', () => {
|
|
46
|
+
it('should split text into lowercase tokens', () => {
|
|
47
|
+
expect(tokenize('Hello World')).toEqual(['hello', 'world']);
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('should remove punctuation', () => {
|
|
51
|
+
expect(tokenize('Hello, world!')).toEqual(['hello', 'world']);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should handle multiple spaces', () => {
|
|
55
|
+
expect(tokenize('a b c')).toEqual(['a', 'b', 'c']);
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it('should return empty array for empty string', () => {
|
|
59
|
+
expect(tokenize('')).toEqual([]);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it('should handle special characters', () => {
|
|
63
|
+
expect(tokenize("it's a test-case")).toEqual(['it', 's', 'a', 'test', 'case']);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('jaccardSimilarity', () => {
|
|
68
|
+
it('should return 1 for identical strings', () => {
|
|
69
|
+
expect(jaccardSimilarity('hello world', 'hello world')).toBe(1);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('should return 0 for completely different strings', () => {
|
|
73
|
+
expect(jaccardSimilarity('abc', 'xyz')).toBe(0);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('should handle partial overlap', () => {
|
|
77
|
+
// 'hello' appears in both
|
|
78
|
+
const sim = jaccardSimilarity('hello world', 'hello there');
|
|
79
|
+
expect(sim).toBeGreaterThan(0);
|
|
80
|
+
expect(sim).toBeLessThan(1);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should return 0 for empty strings', () => {
|
|
84
|
+
expect(jaccardSimilarity('', '')).toBe(0);
|
|
85
|
+
expect(jaccardSimilarity('hello', '')).toBe(0);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('should be case insensitive', () => {
|
|
89
|
+
expect(jaccardSimilarity('HELLO', 'hello')).toBe(1);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe('ngramSimilarity', () => {
|
|
94
|
+
it('should return 1 for identical strings', () => {
|
|
95
|
+
expect(ngramSimilarity('hello', 'hello')).toBe(1);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('should return 0 for completely different strings', () => {
|
|
99
|
+
expect(ngramSimilarity('abc', 'xyz')).toBe(0);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('should handle partial overlap', () => {
|
|
103
|
+
const sim = ngramSimilarity('hello', 'hella');
|
|
104
|
+
expect(sim).toBeGreaterThan(0);
|
|
105
|
+
expect(sim).toBeLessThan(1);
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
it('should use custom n-gram size', () => {
|
|
109
|
+
const sim2 = ngramSimilarity('hello', 'help', 2);
|
|
110
|
+
const sim3 = ngramSimilarity('hello', 'help', 3);
|
|
111
|
+
// Different n values give different results
|
|
112
|
+
expect(sim2).not.toBe(sim3);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
describe('textSimilarity', () => {
|
|
117
|
+
it('should return 1 for identical text', () => {
|
|
118
|
+
expect(textSimilarity('hello world', 'hello world')).toBe(1);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('should return 0 for completely different text', () => {
|
|
122
|
+
expect(textSimilarity('abc', 'xyz')).toBe(0);
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
it('should return intermediate values for partial matches', () => {
|
|
126
|
+
const sim = textSimilarity('fix the bug', 'debug the issue');
|
|
127
|
+
expect(sim).toBeGreaterThan(0);
|
|
128
|
+
expect(sim).toBeLessThan(1);
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
describe('findTopK', () => {
|
|
133
|
+
it('should find top-k items by string similarity', () => {
|
|
134
|
+
const items = [
|
|
135
|
+
{ id: 1, text: 'hello world' },
|
|
136
|
+
{ id: 2, text: 'hello there' },
|
|
137
|
+
{ id: 3, text: 'goodbye world' },
|
|
138
|
+
{ id: 4, text: 'completely different' },
|
|
139
|
+
];
|
|
140
|
+
|
|
141
|
+
const results = findTopK('hello world', items, (item) => item.text, 2);
|
|
142
|
+
expect(results).toHaveLength(2);
|
|
143
|
+
expect(results[0].item.id).toBe(1); // Exact match
|
|
144
|
+
expect(results[0].score).toBe(1);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('should find top-k items by vector similarity', () => {
|
|
148
|
+
const items = [
|
|
149
|
+
{ id: 1, vec: [1, 0, 0] },
|
|
150
|
+
{ id: 2, vec: [0.9, 0.1, 0] },
|
|
151
|
+
{ id: 3, vec: [0, 1, 0] },
|
|
152
|
+
{ id: 4, vec: [-1, 0, 0] },
|
|
153
|
+
];
|
|
154
|
+
|
|
155
|
+
const results = findTopK([1, 0, 0], items, (item) => item.vec, 2);
|
|
156
|
+
expect(results).toHaveLength(2);
|
|
157
|
+
expect(results[0].item.id).toBe(1);
|
|
158
|
+
expect(results[1].item.id).toBe(2);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
it('should handle items without vectors', () => {
|
|
162
|
+
const items = [
|
|
163
|
+
{ id: 1, text: 'hello' },
|
|
164
|
+
{ id: 2, text: undefined },
|
|
165
|
+
{ id: 3, text: 'world' },
|
|
166
|
+
];
|
|
167
|
+
|
|
168
|
+
const results = findTopK(
|
|
169
|
+
'hello',
|
|
170
|
+
items,
|
|
171
|
+
(item) => item.text,
|
|
172
|
+
3
|
|
173
|
+
);
|
|
174
|
+
expect(results).toHaveLength(3);
|
|
175
|
+
expect(results[2].score).toBe(0); // Item with undefined text
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('should return fewer items if k is larger than array', () => {
|
|
179
|
+
const items = [{ id: 1, text: 'hello' }];
|
|
180
|
+
const results = findTopK('hello', items, (item) => item.text, 10);
|
|
181
|
+
expect(results).toHaveLength(1);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it('should handle empty items array', () => {
|
|
185
|
+
const results = findTopK('hello', [], (item: { text: string }) => item.text, 5);
|
|
186
|
+
expect(results).toHaveLength(0);
|
|
187
|
+
});
|
|
188
|
+
});
|