cognitive-core 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +302 -116
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +30 -453
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -509
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +50 -34
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/dist/index.d.mts +0 -466
- package/dist/index.mjs +0 -478
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Learning Effectiveness Tracker
|
|
3
|
+
*
|
|
4
|
+
* Measures whether the learning system is actually improving agent outcomes.
|
|
5
|
+
* Uses post-task reflection annotations rather than A/B replay evaluation.
|
|
6
|
+
*
|
|
7
|
+
* After each trajectory completes, the tracker annotates it with:
|
|
8
|
+
* - What knowledge was surfaced (playbooks, experiences)
|
|
9
|
+
* - Whether the surfaced knowledge was applied (inferred from trajectory)
|
|
10
|
+
* - Whether the outcome improved relative to similar unaided tasks
|
|
11
|
+
*
|
|
12
|
+
* These annotations accumulate into aggregate metrics that answer:
|
|
13
|
+
* "Is learning making agents better over time?"
|
|
14
|
+
*
|
|
15
|
+
* Inspired by Dash's eval framework but adapted for domain-agnostic
|
|
16
|
+
* trajectory learning rather than text-to-SQL.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { Trajectory } from '../types/index.js';
|
|
20
|
+
import type { PlaybookMatch } from '../memory/playbook.js';
|
|
21
|
+
import { JsonStore } from '../utils/storage.js';
|
|
22
|
+
|
|
23
|
+
// === TYPES ===
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Annotation attached to a trajectory after completion.
|
|
27
|
+
* Records what knowledge was available and whether it helped.
|
|
28
|
+
*/
|
|
29
|
+
export interface TaskAnnotation {
|
|
30
|
+
id: string;
|
|
31
|
+
trajectoryId: string;
|
|
32
|
+
timestamp: Date;
|
|
33
|
+
|
|
34
|
+
/** What was surfaced before/during execution */
|
|
35
|
+
knowledgeSurfaced: {
|
|
36
|
+
playbookIds: string[];
|
|
37
|
+
playbookNames: string[];
|
|
38
|
+
experienceIds: string[];
|
|
39
|
+
/** Total number of knowledge items retrieved */
|
|
40
|
+
totalItems: number;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/** What was actually applied (inferred from trajectory analysis) */
|
|
44
|
+
knowledgeApplied: {
|
|
45
|
+
/** Playbook IDs whose tactics appeared in the trajectory steps */
|
|
46
|
+
playbookIdsUsed: string[];
|
|
47
|
+
/** Whether any surfaced knowledge was used */
|
|
48
|
+
anyKnowledgeUsed: boolean;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/** Outcome metrics for this task */
|
|
52
|
+
outcome: {
|
|
53
|
+
success: boolean;
|
|
54
|
+
stepCount: number;
|
|
55
|
+
/** Number of error-recovery cycles in the trajectory */
|
|
56
|
+
errorRecoveries: number;
|
|
57
|
+
/** Whether the task domain had prior playbooks */
|
|
58
|
+
domainHadPlaybooks: boolean;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
/** Optional human or agent reflection after task completion */
|
|
62
|
+
reflection?: {
|
|
63
|
+
/** Was the surfaced knowledge relevant? */
|
|
64
|
+
knowledgeRelevance: 'helpful' | 'irrelevant' | 'misleading' | 'not_assessed';
|
|
65
|
+
/** Free-form notes on what worked or didn't */
|
|
66
|
+
notes?: string;
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Aggregate effectiveness metrics computed from annotations
|
|
72
|
+
*/
|
|
73
|
+
export interface EffectivenessMetrics {
|
|
74
|
+
/** Total tasks tracked */
|
|
75
|
+
totalTasks: number;
|
|
76
|
+
|
|
77
|
+
/** Tasks where knowledge was surfaced vs not */
|
|
78
|
+
guidedTasks: number;
|
|
79
|
+
unguidedTasks: number;
|
|
80
|
+
|
|
81
|
+
/** Success rates */
|
|
82
|
+
guidedSuccessRate: number;
|
|
83
|
+
unguidedSuccessRate: number;
|
|
84
|
+
/** Difference: positive means guidance helps */
|
|
85
|
+
successRateDelta: number;
|
|
86
|
+
|
|
87
|
+
/** Average step counts */
|
|
88
|
+
guidedAvgSteps: number;
|
|
89
|
+
unguidedAvgSteps: number;
|
|
90
|
+
/** Negative means guidance reduces steps (good) */
|
|
91
|
+
stepCountDelta: number;
|
|
92
|
+
|
|
93
|
+
/** Knowledge application rates */
|
|
94
|
+
knowledgeApplicationRate: number;
|
|
95
|
+
/** Of applied knowledge, how often did the task succeed? */
|
|
96
|
+
appliedKnowledgeSuccessRate: number;
|
|
97
|
+
|
|
98
|
+
/** Error recovery */
|
|
99
|
+
guidedAvgErrorRecoveries: number;
|
|
100
|
+
unguidedAvgErrorRecoveries: number;
|
|
101
|
+
|
|
102
|
+
/** Per-playbook effectiveness */
|
|
103
|
+
playbookEffectiveness: PlaybookEffectivenessEntry[];
|
|
104
|
+
|
|
105
|
+
/** Time window these metrics cover */
|
|
106
|
+
windowStart: Date;
|
|
107
|
+
windowEnd: Date;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Effectiveness metrics for a single playbook
|
|
112
|
+
*/
|
|
113
|
+
export interface PlaybookEffectivenessEntry {
|
|
114
|
+
playbookId: string;
|
|
115
|
+
playbookName: string;
|
|
116
|
+
/** Times this playbook was surfaced */
|
|
117
|
+
surfacedCount: number;
|
|
118
|
+
/** Times this playbook was actually applied */
|
|
119
|
+
appliedCount: number;
|
|
120
|
+
/** Success rate when applied */
|
|
121
|
+
appliedSuccessRate: number;
|
|
122
|
+
/** Average step count when applied */
|
|
123
|
+
appliedAvgSteps: number;
|
|
124
|
+
/** How often agents found this knowledge relevant */
|
|
125
|
+
relevanceRate: number;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// === TRACKER ===
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Tracks learning effectiveness through post-task annotations
|
|
132
|
+
*/
|
|
133
|
+
export class LearningEffectivenessTracker {
|
|
134
|
+
private store: JsonStore<TaskAnnotation>;
|
|
135
|
+
private initialized = false;
|
|
136
|
+
|
|
137
|
+
constructor(baseDir: string) {
|
|
138
|
+
this.store = new JsonStore<TaskAnnotation>(baseDir, 'effectiveness', {
|
|
139
|
+
autoSaveInterval: 30000,
|
|
140
|
+
pretty: true,
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
async init(): Promise<void> {
|
|
145
|
+
if (this.initialized) return;
|
|
146
|
+
await this.store.init();
|
|
147
|
+
this.initialized = true;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Annotate a completed trajectory with knowledge usage data.
|
|
152
|
+
* Call this after a task finishes and usage inference has run.
|
|
153
|
+
*/
|
|
154
|
+
async annotate(
|
|
155
|
+
trajectory: Trajectory,
|
|
156
|
+
surfacedPlaybooks: PlaybookMatch[],
|
|
157
|
+
surfacedExperienceIds: string[],
|
|
158
|
+
appliedPlaybookIds: string[],
|
|
159
|
+
): Promise<TaskAnnotation> {
|
|
160
|
+
await this.init();
|
|
161
|
+
|
|
162
|
+
const errorRecoveries = countErrorRecoveries(trajectory);
|
|
163
|
+
|
|
164
|
+
const annotation: TaskAnnotation = {
|
|
165
|
+
id: `ann-${trajectory.id}`,
|
|
166
|
+
trajectoryId: trajectory.id,
|
|
167
|
+
timestamp: new Date(),
|
|
168
|
+
knowledgeSurfaced: {
|
|
169
|
+
playbookIds: surfacedPlaybooks.map((m) => m.playbook.id),
|
|
170
|
+
playbookNames: surfacedPlaybooks.map((m) => m.playbook.name),
|
|
171
|
+
experienceIds: surfacedExperienceIds,
|
|
172
|
+
totalItems: surfacedPlaybooks.length + surfacedExperienceIds.length,
|
|
173
|
+
},
|
|
174
|
+
knowledgeApplied: {
|
|
175
|
+
playbookIdsUsed: appliedPlaybookIds,
|
|
176
|
+
anyKnowledgeUsed: appliedPlaybookIds.length > 0,
|
|
177
|
+
},
|
|
178
|
+
outcome: {
|
|
179
|
+
success: trajectory.outcome.success,
|
|
180
|
+
stepCount: trajectory.steps.length,
|
|
181
|
+
errorRecoveries,
|
|
182
|
+
domainHadPlaybooks: surfacedPlaybooks.length > 0,
|
|
183
|
+
},
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
this.store.set(annotation.id, annotation);
|
|
187
|
+
await this.store.save(annotation.id);
|
|
188
|
+
|
|
189
|
+
return annotation;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Add a reflection to an existing annotation.
|
|
194
|
+
* Called after human review or agent self-reflection.
|
|
195
|
+
*/
|
|
196
|
+
async addReflection(
|
|
197
|
+
annotationId: string,
|
|
198
|
+
reflection: TaskAnnotation['reflection'],
|
|
199
|
+
): Promise<void> {
|
|
200
|
+
await this.init();
|
|
201
|
+
|
|
202
|
+
const annotation = this.store.get(annotationId);
|
|
203
|
+
if (!annotation) return;
|
|
204
|
+
|
|
205
|
+
annotation.reflection = reflection;
|
|
206
|
+
this.store.set(annotationId, annotation);
|
|
207
|
+
await this.store.save(annotationId);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Compute aggregate effectiveness metrics over a time window.
|
|
212
|
+
* Defaults to all time if no window specified.
|
|
213
|
+
*/
|
|
214
|
+
async computeMetrics(options?: {
|
|
215
|
+
since?: Date;
|
|
216
|
+
until?: Date;
|
|
217
|
+
domain?: string;
|
|
218
|
+
}): Promise<EffectivenessMetrics> {
|
|
219
|
+
await this.init();
|
|
220
|
+
|
|
221
|
+
let annotations = this.store.values();
|
|
222
|
+
|
|
223
|
+
// Filter by time window
|
|
224
|
+
if (options?.since) {
|
|
225
|
+
const since = options.since.getTime();
|
|
226
|
+
annotations = annotations.filter(
|
|
227
|
+
(a) => new Date(a.timestamp).getTime() >= since
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
if (options?.until) {
|
|
231
|
+
const until = options.until.getTime();
|
|
232
|
+
annotations = annotations.filter(
|
|
233
|
+
(a) => new Date(a.timestamp).getTime() <= until
|
|
234
|
+
);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const guided = annotations.filter((a) => a.knowledgeSurfaced.totalItems > 0);
|
|
238
|
+
const unguided = annotations.filter((a) => a.knowledgeSurfaced.totalItems === 0);
|
|
239
|
+
|
|
240
|
+
const guidedSuccesses = guided.filter((a) => a.outcome.success);
|
|
241
|
+
const unguidedSuccesses = unguided.filter((a) => a.outcome.success);
|
|
242
|
+
|
|
243
|
+
const guidedSuccessRate = guided.length > 0
|
|
244
|
+
? guidedSuccesses.length / guided.length
|
|
245
|
+
: 0;
|
|
246
|
+
const unguidedSuccessRate = unguided.length > 0
|
|
247
|
+
? unguidedSuccesses.length / unguided.length
|
|
248
|
+
: 0;
|
|
249
|
+
|
|
250
|
+
const guidedAvgSteps = guided.length > 0
|
|
251
|
+
? guided.reduce((sum, a) => sum + a.outcome.stepCount, 0) / guided.length
|
|
252
|
+
: 0;
|
|
253
|
+
const unguidedAvgSteps = unguided.length > 0
|
|
254
|
+
? unguided.reduce((sum, a) => sum + a.outcome.stepCount, 0) / unguided.length
|
|
255
|
+
: 0;
|
|
256
|
+
|
|
257
|
+
const applied = annotations.filter((a) => a.knowledgeApplied.anyKnowledgeUsed);
|
|
258
|
+
const appliedSuccesses = applied.filter((a) => a.outcome.success);
|
|
259
|
+
|
|
260
|
+
const knowledgeApplicationRate = guided.length > 0
|
|
261
|
+
? applied.length / guided.length
|
|
262
|
+
: 0;
|
|
263
|
+
const appliedKnowledgeSuccessRate = applied.length > 0
|
|
264
|
+
? appliedSuccesses.length / applied.length
|
|
265
|
+
: 0;
|
|
266
|
+
|
|
267
|
+
const guidedAvgErrors = guided.length > 0
|
|
268
|
+
? guided.reduce((sum, a) => sum + a.outcome.errorRecoveries, 0) / guided.length
|
|
269
|
+
: 0;
|
|
270
|
+
const unguidedAvgErrors = unguided.length > 0
|
|
271
|
+
? unguided.reduce((sum, a) => sum + a.outcome.errorRecoveries, 0) / unguided.length
|
|
272
|
+
: 0;
|
|
273
|
+
|
|
274
|
+
// Per-playbook effectiveness
|
|
275
|
+
const playbookEffectiveness = this.computePlaybookEffectiveness(annotations);
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
totalTasks: annotations.length,
|
|
279
|
+
guidedTasks: guided.length,
|
|
280
|
+
unguidedTasks: unguided.length,
|
|
281
|
+
guidedSuccessRate,
|
|
282
|
+
unguidedSuccessRate,
|
|
283
|
+
successRateDelta: guidedSuccessRate - unguidedSuccessRate,
|
|
284
|
+
guidedAvgSteps,
|
|
285
|
+
unguidedAvgSteps,
|
|
286
|
+
stepCountDelta: guidedAvgSteps - unguidedAvgSteps,
|
|
287
|
+
knowledgeApplicationRate,
|
|
288
|
+
appliedKnowledgeSuccessRate,
|
|
289
|
+
guidedAvgErrorRecoveries: guidedAvgErrors,
|
|
290
|
+
unguidedAvgErrorRecoveries: unguidedAvgErrors,
|
|
291
|
+
playbookEffectiveness,
|
|
292
|
+
windowStart: options?.since ?? new Date(0),
|
|
293
|
+
windowEnd: options?.until ?? new Date(),
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Get all annotations (for export/inspection)
|
|
299
|
+
*/
|
|
300
|
+
async getAll(): Promise<TaskAnnotation[]> {
|
|
301
|
+
await this.init();
|
|
302
|
+
return this.store.values();
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Get annotation by trajectory ID
|
|
307
|
+
*/
|
|
308
|
+
async getByTrajectoryId(trajectoryId: string): Promise<TaskAnnotation | undefined> {
|
|
309
|
+
await this.init();
|
|
310
|
+
return this.store.values().find((a) => a.trajectoryId === trajectoryId);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Get annotation count
|
|
315
|
+
*/
|
|
316
|
+
async count(): Promise<number> {
|
|
317
|
+
await this.init();
|
|
318
|
+
return this.store.size();
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Close the store
|
|
323
|
+
*/
|
|
324
|
+
async close(): Promise<void> {
|
|
325
|
+
await this.store.close();
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// === PRIVATE ===
|
|
329
|
+
|
|
330
|
+
private computePlaybookEffectiveness(
|
|
331
|
+
annotations: TaskAnnotation[]
|
|
332
|
+
): PlaybookEffectivenessEntry[] {
|
|
333
|
+
const playbookStats = new Map<
|
|
334
|
+
string,
|
|
335
|
+
{
|
|
336
|
+
name: string;
|
|
337
|
+
surfaced: number;
|
|
338
|
+
applied: number;
|
|
339
|
+
appliedSuccesses: number;
|
|
340
|
+
appliedStepSum: number;
|
|
341
|
+
relevantCount: number;
|
|
342
|
+
assessedCount: number;
|
|
343
|
+
}
|
|
344
|
+
>();
|
|
345
|
+
|
|
346
|
+
for (const ann of annotations) {
|
|
347
|
+
// Track surfaced
|
|
348
|
+
for (let i = 0; i < ann.knowledgeSurfaced.playbookIds.length; i++) {
|
|
349
|
+
const id = ann.knowledgeSurfaced.playbookIds[i];
|
|
350
|
+
const name = ann.knowledgeSurfaced.playbookNames[i] ?? id;
|
|
351
|
+
|
|
352
|
+
if (!playbookStats.has(id)) {
|
|
353
|
+
playbookStats.set(id, {
|
|
354
|
+
name,
|
|
355
|
+
surfaced: 0,
|
|
356
|
+
applied: 0,
|
|
357
|
+
appliedSuccesses: 0,
|
|
358
|
+
appliedStepSum: 0,
|
|
359
|
+
relevantCount: 0,
|
|
360
|
+
assessedCount: 0,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const stats = playbookStats.get(id)!;
|
|
365
|
+
stats.surfaced++;
|
|
366
|
+
|
|
367
|
+
const wasApplied = ann.knowledgeApplied.playbookIdsUsed.includes(id);
|
|
368
|
+
if (wasApplied) {
|
|
369
|
+
stats.applied++;
|
|
370
|
+
if (ann.outcome.success) stats.appliedSuccesses++;
|
|
371
|
+
stats.appliedStepSum += ann.outcome.stepCount;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (ann.reflection) {
|
|
375
|
+
stats.assessedCount++;
|
|
376
|
+
if (ann.reflection.knowledgeRelevance === 'helpful') {
|
|
377
|
+
stats.relevantCount++;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return Array.from(playbookStats.entries())
|
|
384
|
+
.map(([id, stats]) => ({
|
|
385
|
+
playbookId: id,
|
|
386
|
+
playbookName: stats.name,
|
|
387
|
+
surfacedCount: stats.surfaced,
|
|
388
|
+
appliedCount: stats.applied,
|
|
389
|
+
appliedSuccessRate:
|
|
390
|
+
stats.applied > 0 ? stats.appliedSuccesses / stats.applied : 0,
|
|
391
|
+
appliedAvgSteps:
|
|
392
|
+
stats.applied > 0 ? stats.appliedStepSum / stats.applied : 0,
|
|
393
|
+
relevanceRate:
|
|
394
|
+
stats.assessedCount > 0 ? stats.relevantCount / stats.assessedCount : 0,
|
|
395
|
+
}))
|
|
396
|
+
.sort((a, b) => b.surfacedCount - a.surfacedCount);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// === HELPERS ===
|
|
401
|
+
|
|
402
|
+
/**
|
|
403
|
+
* Count error-recovery cycles in a trajectory.
|
|
404
|
+
* An error recovery is when a step has an error observation
|
|
405
|
+
* followed by a subsequent step that doesn't.
|
|
406
|
+
*/
|
|
407
|
+
function countErrorRecoveries(trajectory: Trajectory): number {
|
|
408
|
+
let recoveries = 0;
|
|
409
|
+
for (let i = 0; i < trajectory.steps.length - 1; i++) {
|
|
410
|
+
const current = trajectory.steps[i];
|
|
411
|
+
const next = trajectory.steps[i + 1];
|
|
412
|
+
const hasError = current.observation?.toLowerCase().includes('error');
|
|
413
|
+
const nextOk = !next.observation?.toLowerCase().includes('error');
|
|
414
|
+
if (hasError && nextOk) {
|
|
415
|
+
recoveries++;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
return recoveries;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Create a learning effectiveness tracker
|
|
423
|
+
*/
|
|
424
|
+
export function createEffectivenessTracker(
|
|
425
|
+
baseDir: string
|
|
426
|
+
): LearningEffectivenessTracker {
|
|
427
|
+
return new LearningEffectivenessTracker(baseDir);
|
|
428
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
export {
|
|
2
|
+
TrajectoryAnalyzer,
|
|
3
|
+
createAnalyzer,
|
|
4
|
+
simpleCreditAssignment,
|
|
5
|
+
outcomeCreditAssignment,
|
|
6
|
+
getCreditAssignmentFn,
|
|
7
|
+
type AnalysisResult,
|
|
8
|
+
type ErrorPattern,
|
|
9
|
+
type TrainingExample,
|
|
10
|
+
type CreditAssignmentFn,
|
|
11
|
+
type AnalyzerConfig,
|
|
12
|
+
} from './analyzer.js';
|
|
13
|
+
|
|
14
|
+
export {
|
|
15
|
+
LearningPipeline,
|
|
16
|
+
createLearningPipeline,
|
|
17
|
+
type ProcessResult,
|
|
18
|
+
type BatchResult,
|
|
19
|
+
} from './pipeline.js';
|
|
20
|
+
|
|
21
|
+
// Playbook-based learning
|
|
22
|
+
export {
|
|
23
|
+
PlaybookExtractor,
|
|
24
|
+
createPlaybookExtractor,
|
|
25
|
+
type ExtractedPlaybooks,
|
|
26
|
+
type PlaybookUpdate,
|
|
27
|
+
type PlaybookExtractorConfig,
|
|
28
|
+
} from './playbook-extractor.js';
|
|
29
|
+
|
|
30
|
+
export {
|
|
31
|
+
MetaLearner,
|
|
32
|
+
createMetaLearner,
|
|
33
|
+
type MetaLearnerConfig,
|
|
34
|
+
} from './meta-learner.js';
|
|
35
|
+
|
|
36
|
+
export {
|
|
37
|
+
PlaybookUsageInference,
|
|
38
|
+
createUsageInference,
|
|
39
|
+
type PlaybookUsageResult,
|
|
40
|
+
type UsageInferenceConfig,
|
|
41
|
+
} from './usage-inference.js';
|
|
42
|
+
|
|
43
|
+
// LLM-based extraction
|
|
44
|
+
export {
|
|
45
|
+
LLMPlaybookExtractor,
|
|
46
|
+
createLLMExtractor,
|
|
47
|
+
type LLMExtractorConfig,
|
|
48
|
+
type LLMExtractionResult,
|
|
49
|
+
} from './llm-extractor.js';
|
|
50
|
+
|
|
51
|
+
// Effectiveness tracking
|
|
52
|
+
export {
|
|
53
|
+
LearningEffectivenessTracker,
|
|
54
|
+
createEffectivenessTracker,
|
|
55
|
+
type TaskAnnotation,
|
|
56
|
+
type EffectivenessMetrics,
|
|
57
|
+
type PlaybookEffectivenessEntry,
|
|
58
|
+
} from './effectiveness.js';
|