cognitive-core 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.json +111 -2
- package/.sessionlog/settings.json +4 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -1
- package/dist/index.js.map +1 -1
- package/dist/learning/index.d.ts +1 -1
- package/dist/learning/index.d.ts.map +1 -1
- package/dist/learning/index.js.map +1 -1
- package/dist/learning/unified-pipeline.d.ts +30 -0
- package/dist/learning/unified-pipeline.d.ts.map +1 -1
- package/dist/learning/unified-pipeline.js +207 -0
- package/dist/learning/unified-pipeline.js.map +1 -1
- package/dist/memory/candidate-retrieval.d.ts.map +1 -1
- package/dist/memory/candidate-retrieval.js +3 -1
- package/dist/memory/candidate-retrieval.js.map +1 -1
- package/dist/utils/error-classifier.js +8 -8
- package/dist/utils/error-classifier.js.map +1 -1
- package/dist/workspace/efficacy-toolkit.d.ts +164 -0
- package/dist/workspace/efficacy-toolkit.d.ts.map +1 -0
- package/dist/workspace/efficacy-toolkit.js +281 -0
- package/dist/workspace/efficacy-toolkit.js.map +1 -0
- package/dist/workspace/index.d.ts +2 -1
- package/dist/workspace/index.d.ts.map +1 -1
- package/dist/workspace/index.js +3 -1
- package/dist/workspace/index.js.map +1 -1
- package/dist/workspace/templates/index.d.ts +3 -0
- package/dist/workspace/templates/index.d.ts.map +1 -1
- package/dist/workspace/templates/index.js +6 -0
- package/dist/workspace/templates/index.js.map +1 -1
- package/dist/workspace/templates/playbook-decay-detection.d.ts +46 -0
- package/dist/workspace/templates/playbook-decay-detection.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-decay-detection.js +197 -0
- package/dist/workspace/templates/playbook-decay-detection.js.map +1 -0
- package/dist/workspace/templates/playbook-efficacy-audit.d.ts +46 -0
- package/dist/workspace/templates/playbook-efficacy-audit.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-efficacy-audit.js +160 -0
- package/dist/workspace/templates/playbook-efficacy-audit.js.map +1 -0
- package/dist/workspace/templates/playbook-lifecycle-review.d.ts +51 -0
- package/dist/workspace/templates/playbook-lifecycle-review.d.ts.map +1 -0
- package/dist/workspace/templates/playbook-lifecycle-review.js +187 -0
- package/dist/workspace/templates/playbook-lifecycle-review.js.map +1 -0
- package/package.json +7 -1
- package/src/index.ts +27 -0
- package/src/learning/index.ts +1 -0
- package/src/learning/unified-pipeline.ts +271 -1
- package/src/memory/candidate-retrieval.ts +2 -1
- package/src/utils/error-classifier.ts +8 -8
- package/src/workspace/efficacy-toolkit.ts +496 -0
- package/src/workspace/index.ts +29 -0
- package/src/workspace/templates/index.ts +24 -0
- package/src/workspace/templates/playbook-decay-detection.ts +272 -0
- package/src/workspace/templates/playbook-efficacy-audit.ts +246 -0
- package/src/workspace/templates/playbook-lifecycle-review.ts +274 -0
- package/tests/fixtures/behavioral-trajectories.ts +210 -0
- package/tests/integration/pipeline-data-correctness.test.ts +794 -0
- package/tests/learning/meta-learner.test.ts +418 -0
- package/tests/learning/pipeline-memory-updates.test.ts +721 -0
- package/tests/learning/unified-pipeline-efficacy.test.ts +232 -0
- package/tests/memory/candidate-retrieval.test.ts +167 -0
- package/tests/memory/meta.test.ts +399 -0
- package/tests/search/evaluator.test.ts +257 -0
- package/tests/search/verification-runner.test.ts +357 -0
- package/tests/utils/error-classifier.test.ts +149 -0
- package/tests/utils/trajectory-helpers.test.ts +163 -0
- package/tests/workspace/efficacy-toolkit.test.ts +404 -0
- package/tests/workspace/templates/playbook-efficacy.test.ts +377 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Playbook Lifecycle Review Template
|
|
3
|
+
*
|
|
4
|
+
* Portfolio-wide health assessment with lifecycle recommendations.
|
|
5
|
+
* Uses the efficacy toolkit's portfolio snapshot to give the agent
|
|
6
|
+
* a pre-computed view of the entire playbook portfolio, then asks
|
|
7
|
+
* for promotion, deprecation, merge, and evolution recommendations.
|
|
8
|
+
*
|
|
9
|
+
* This is the "skill designer" review step (inspired by MemSkill)
|
|
10
|
+
* that periodically evaluates the whole portfolio.
|
|
11
|
+
*/
|
|
12
|
+
import { buildPortfolioSnapshot, } from '../efficacy-toolkit.js';
|
|
13
|
+
// ============================================================
|
|
14
|
+
// Template Implementation
|
|
15
|
+
// ============================================================
|
|
16
|
+
export const playbookLifecycleReviewTemplate = {
|
|
17
|
+
taskType: 'playbook-lifecycle-review',
|
|
18
|
+
domain: 'meta-learning',
|
|
19
|
+
description: 'Portfolio-wide playbook health assessment with lifecycle recommendations',
|
|
20
|
+
assessComplexity(input) {
|
|
21
|
+
if (input.playbooks.length === 0)
|
|
22
|
+
return 'heuristic';
|
|
23
|
+
if (input.playbooks.length <= 3 && input.annotations.length < 10)
|
|
24
|
+
return 'heuristic';
|
|
25
|
+
if (input.playbooks.length > 30)
|
|
26
|
+
return 'thorough';
|
|
27
|
+
if (input.playbooks.length > 10)
|
|
28
|
+
return 'standard';
|
|
29
|
+
return 'lightweight';
|
|
30
|
+
},
|
|
31
|
+
async heuristicFallback(input) {
|
|
32
|
+
const snapshot = buildPortfolioSnapshot(input.playbooks, input.annotations, input.playbookEffectiveness, input.unguidedSuccessRate, input.trajectoryDomainMap);
|
|
33
|
+
// Simple heuristic recommendations
|
|
34
|
+
const recommendations = [];
|
|
35
|
+
for (const flag of snapshot.decaying) {
|
|
36
|
+
recommendations.push({
|
|
37
|
+
playbookId: flag.playbookId,
|
|
38
|
+
playbookName: flag.playbookName,
|
|
39
|
+
currentState: 'active',
|
|
40
|
+
proposedState: 'evolve',
|
|
41
|
+
rationale: flag.reason,
|
|
42
|
+
priority: 'high',
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
for (const flag of snapshot.stale) {
|
|
46
|
+
recommendations.push({
|
|
47
|
+
playbookId: flag.playbookId,
|
|
48
|
+
playbookName: flag.playbookName,
|
|
49
|
+
currentState: 'stale',
|
|
50
|
+
proposedState: 'archive',
|
|
51
|
+
rationale: flag.reason,
|
|
52
|
+
priority: 'medium',
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
snapshot,
|
|
57
|
+
recommendations,
|
|
58
|
+
insights: [],
|
|
59
|
+
actionPlan: recommendations.map(r => `${r.proposedState} "${r.playbookName}" — ${r.rationale}`),
|
|
60
|
+
};
|
|
61
|
+
},
|
|
62
|
+
async prepareWorkspace(input, handle) {
|
|
63
|
+
const snapshot = buildPortfolioSnapshot(input.playbooks, input.annotations, input.playbookEffectiveness, input.unguidedSuccessRate, input.trajectoryDomainMap);
|
|
64
|
+
// Portfolio snapshot — the core data
|
|
65
|
+
await handle.writeJson('input', 'portfolio-snapshot.json', snapshot);
|
|
66
|
+
// Per-playbook summaries for the agent to reference
|
|
67
|
+
const playbookSummaries = input.playbooks.map(pb => ({
|
|
68
|
+
id: pb.id,
|
|
69
|
+
name: pb.name,
|
|
70
|
+
confidence: pb.confidence,
|
|
71
|
+
successCount: pb.evolution.successCount,
|
|
72
|
+
failureCount: pb.evolution.failureCount,
|
|
73
|
+
successRate: pb.evolution.successCount + pb.evolution.failureCount > 0
|
|
74
|
+
? pb.evolution.successCount / (pb.evolution.successCount + pb.evolution.failureCount)
|
|
75
|
+
: 0,
|
|
76
|
+
domains: pb.applicability.domains,
|
|
77
|
+
strategy: pb.guidance.strategy,
|
|
78
|
+
tacticsCount: pb.guidance.tactics.length,
|
|
79
|
+
refinementsCount: pb.evolution.refinements.length,
|
|
80
|
+
version: pb.evolution.version,
|
|
81
|
+
lastUsed: pb.evolution.lastUsed,
|
|
82
|
+
isCore: input.corePlaybookIds?.includes(pb.id) ?? false,
|
|
83
|
+
consolidationStrength: pb.evolution.consolidationStrength ?? 0,
|
|
84
|
+
}));
|
|
85
|
+
await handle.writeJson('input', 'playbook-summaries.json', playbookSummaries);
|
|
86
|
+
// Redundancy pairs (pre-computed by toolkit)
|
|
87
|
+
if (snapshot.redundant.length > 0) {
|
|
88
|
+
await handle.writeJson('input', 'redundancy-pairs.json', snapshot.redundant);
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
buildTaskPrompt(input) {
|
|
92
|
+
return [
|
|
93
|
+
`Review the health of the entire playbook portfolio (${input.playbooks.length} playbooks).`,
|
|
94
|
+
'',
|
|
95
|
+
'Read:',
|
|
96
|
+
'- input/portfolio-snapshot.json — Portfolio health metrics, flagged issues (decaying, stale, low adoption, redundant)',
|
|
97
|
+
'- input/playbook-summaries.json — Per-playbook summaries with confidence, success rates, domains',
|
|
98
|
+
'- input/redundancy-pairs.json — (if present) Playbook pairs with high content overlap',
|
|
99
|
+
'',
|
|
100
|
+
'For each playbook, recommend a lifecycle action:',
|
|
101
|
+
'- **promote-to-core**: High confidence + success rate, proven across tasks',
|
|
102
|
+
'- **keep-active**: Performing well, no changes needed',
|
|
103
|
+
'- **evolve**: Decent foundation but guidance needs updating based on failure patterns',
|
|
104
|
+
'- **merge**: Two playbooks cover the same ground — merge into one stronger playbook',
|
|
105
|
+
'- **split**: One playbook covers too many domains — split into domain-specific variants',
|
|
106
|
+
'- **deprecate**: Low value, actively misleading, or superseded',
|
|
107
|
+
'- **archive**: No longer relevant but worth preserving for reference',
|
|
108
|
+
'',
|
|
109
|
+
'Also provide:',
|
|
110
|
+
'- Strategic insights about the portfolio (coverage gaps, over-invested domains, etc.)',
|
|
111
|
+
'- A priority-ordered action plan (most impactful changes first)',
|
|
112
|
+
'',
|
|
113
|
+
'Write to output/lifecycle-review.json:',
|
|
114
|
+
'```json',
|
|
115
|
+
'{',
|
|
116
|
+
' "recommendations": [',
|
|
117
|
+
' {',
|
|
118
|
+
' "playbookId": "id",',
|
|
119
|
+
' "playbookName": "name",',
|
|
120
|
+
' "currentState": "active" | "core" | "stale" | "underperforming",',
|
|
121
|
+
' "proposedState": "promote-to-core" | "keep-active" | "deprecate" | "archive" | "merge" | "evolve" | "split",',
|
|
122
|
+
' "rationale": "why this change",',
|
|
123
|
+
' "mergeTarget": { "id": "...", "name": "..." },',
|
|
124
|
+
' "evolutionNotes": "optional: what to change in guidance",',
|
|
125
|
+
' "priority": "high" | "medium" | "low"',
|
|
126
|
+
' }',
|
|
127
|
+
' ],',
|
|
128
|
+
' "insights": ["insight1", "insight2"],',
|
|
129
|
+
' "actionPlan": ["step1", "step2"]',
|
|
130
|
+
'}',
|
|
131
|
+
'```',
|
|
132
|
+
].join('\n');
|
|
133
|
+
},
|
|
134
|
+
getSkills() { return []; },
|
|
135
|
+
getResources() { return []; },
|
|
136
|
+
outputConfig: {
|
|
137
|
+
files: [
|
|
138
|
+
{
|
|
139
|
+
path: 'lifecycle-review.json',
|
|
140
|
+
format: 'json',
|
|
141
|
+
required: true,
|
|
142
|
+
description: 'Playbook lifecycle review results',
|
|
143
|
+
},
|
|
144
|
+
],
|
|
145
|
+
},
|
|
146
|
+
async collectOutput(handle) {
|
|
147
|
+
const raw = await handle.readJson('output', 'lifecycle-review.json');
|
|
148
|
+
const snapshot = await handle.readJson('input', 'portfolio-snapshot.json');
|
|
149
|
+
const recommendations = Array.isArray(raw.recommendations)
|
|
150
|
+
? raw.recommendations.map(r => {
|
|
151
|
+
const mergeTarget = r.mergeTarget;
|
|
152
|
+
return {
|
|
153
|
+
playbookId: String(r.playbookId ?? ''),
|
|
154
|
+
playbookName: String(r.playbookName ?? ''),
|
|
155
|
+
currentState: String(r.currentState ?? 'active'),
|
|
156
|
+
proposedState: String(r.proposedState ?? 'keep-active'),
|
|
157
|
+
rationale: String(r.rationale ?? ''),
|
|
158
|
+
mergeTarget: mergeTarget
|
|
159
|
+
? { id: String(mergeTarget.id ?? ''), name: String(mergeTarget.name ?? '') }
|
|
160
|
+
: undefined,
|
|
161
|
+
evolutionNotes: r.evolutionNotes ? String(r.evolutionNotes) : undefined,
|
|
162
|
+
priority: String(r.priority ?? 'medium'),
|
|
163
|
+
};
|
|
164
|
+
})
|
|
165
|
+
: [];
|
|
166
|
+
return {
|
|
167
|
+
snapshot,
|
|
168
|
+
recommendations,
|
|
169
|
+
insights: Array.isArray(raw.insights) ? raw.insights.map(String) : [],
|
|
170
|
+
actionPlan: Array.isArray(raw.actionPlan) ? raw.actionPlan.map(String) : [],
|
|
171
|
+
};
|
|
172
|
+
},
|
|
173
|
+
async processOutput() {
|
|
174
|
+
// Caller decides how to act on lifecycle recommendations
|
|
175
|
+
},
|
|
176
|
+
getComputeRequirements(_input, complexity) {
|
|
177
|
+
return {
|
|
178
|
+
mode: 'local',
|
|
179
|
+
complexity,
|
|
180
|
+
timeout: complexity === 'thorough' ? 240_000 : 120_000,
|
|
181
|
+
};
|
|
182
|
+
},
|
|
183
|
+
agentType: 'claude-code',
|
|
184
|
+
timeout: 180_000,
|
|
185
|
+
captureToolCalls: true,
|
|
186
|
+
};
|
|
187
|
+
//# sourceMappingURL=playbook-lifecycle-review.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"playbook-lifecycle-review.js","sourceRoot":"","sources":["../../../src/workspace/templates/playbook-lifecycle-review.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AASH,OAAO,EACL,sBAAsB,GAEvB,MAAM,wBAAwB,CAAC;AAyChC,+DAA+D;AAC/D,0BAA0B;AAC1B,+DAA+D;AAE/D,MAAM,CAAC,MAAM,+BAA+B,GAGxC;IACF,QAAQ,EAAE,2BAA2B;IACrC,MAAM,EAAE,eAAe;IACvB,WAAW,EAAE,0EAA0E;IAEvF,gBAAgB,CAAC,KAAmC;QAClD,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,WAAW,CAAC;QACrD,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,IAAI,CAAC,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,WAAW,CAAC;QACrF,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,UAAU,CAAC;QACnD,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,EAAE;YAAE,OAAO,UAAU,CAAC;QACnD,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,KAAmC;QACzD,MAAM,QAAQ,GAAG,sBAAsB,CACrC,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,qBAAqB,EAC3B,KAAK,CAAC,mBAAmB,EACzB,KAAK,CAAC,mBAAmB,CAC1B,CAAC;QAEF,mCAAmC;QACnC,MAAM,eAAe,GAA8B,EAAE,CAAC;QAEtD,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACrC,eAAe,CAAC,IAAI,CAAC;gBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;gBAC/B,YAAY,EAAE,QAAQ;gBACtB,aAAa,EAAE,QAAQ;gBACvB,SAAS,EAAE,IAAI,CAAC,MAAM;gBACtB,QAAQ,EAAE,MAAM;aACjB,CAAC,CAAC;QACL,CAAC;QAED,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAClC,eAAe,CAAC,IAAI,CAAC;gBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;gBAC/B,YAAY,EAAE,OAAO;gBACrB,aAAa,EAAE,SAAS;gBACxB,SAAS,EAAE,IAAI,CAAC,MAAM;gBACtB,QAAQ,EAAE,QAAQ;aACnB,CAAC,CAAC;QACL,CAAC;QAED,OAAO;YACL,QAAQ;YACR,eAAe;YACf,QAAQ,EAAE,EAAE;YACZ,UAAU,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAClC,GAAG,CAAC,CAAC,aAAa,KAAK,CAAC,CAAC,YAAY,OAAO,CAAC,CAAC,SAAS,EAAE,CAC1D;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,gBAAgB,CACpB,KAAmC,EACnC,MAAuB;QAEvB,MAAM,QAAQ,GAAG,sBAAsB,CACrC,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,WAAW,EACjB,KAAK,CAAC,qBAAqB,EAC3B,KAAK,CAAC,mBAAmB,EACzB,KAAK,CAAC,mBAAmB,CAC1B,CAAC;QAEF,qCAAqC;QACrC,MAAM,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,yBAAyB,EAAE,QAAQ,CAAC,CAAC;QAErE,oDAAoD;QACpD,MAAM,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;YACnD,EAAE,EAAE,EAAE,CAAC,EAAE;YACT,IAAI,EAAE,EAAE,CAAC,IAAI;YACb,UAAU,EAAE,EAAE,CAAC,UAAU;YACzB,YAAY,EAAE,EAAE,CAAC,SAAS,CAAC,YAAY;YACvC,YAAY,EAAE,EAAE,CAAC,SAAS,CAAC,YAAY;YACvC,WAAW,EAAE,EAAE,CAAC,SAAS,CAAC,YAAY,GAAG,EAAE,CAAC,SAAS,CAAC,YAAY,GAAG,CAAC;gBACpE,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,YAAY,GAAG,CAAC,EAAE,CAAC,SAAS,CAAC,YAAY,GAAG,EAAE,CAAC,SAAS,CAAC,YAAY,CAAC;gBACrF,CAAC,CAAC,CAAC;YACL,OAAO,EAAE,EAAE,CAAC,aAAa,CAAC,OAAO;YACjC,QAAQ,EAAE,EAAE,CAAC,QAAQ,CAAC,QAAQ;YAC9B,YAAY,EAAE,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM;YACxC,gBAAgB,EAAE,EAAE,CAAC,SAAS,CAAC,WAAW,CAAC,MAAM;YACjD,OAAO,EAAE,EAAE,CAAC,SAAS,CAAC,OAAO;YAC7B,QAAQ,EAAE,EAAE,CAAC,SAAS,CAAC,QAAQ;YAC/B,MAAM,EAAE,KAAK,CAAC,eAAe,EAAE,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK;YACvD,qBAAqB,EAAE,EAAE,CAAC,SAAS,CAAC,qBAAqB,IAAI,CAAC;SAC/D,CAAC,CAAC,CAAC;QAEJ,MAAM,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,yBAAyB,EAAE,iBAAiB,CAAC,CAAC;QAE9E,6CAA6C;QAC7C,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,MAAM,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,uBAAuB,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,eAAe,CAAC,KAAmC;QACjD,OAAO;YACL,uDAAuD,KAAK,CAAC,SAAS,CAAC,MAAM,cAAc;YAC3F,EAAE;YACF,OAAO;YACP,uHAAuH;YACvH,kGAAkG;YAClG,uFAAuF;YACvF,EAAE;YACF,kDAAkD;YAClD,4EAA4E;YAC5E,uDAAuD;YACvD,uFAAuF;YACvF,qFAAqF;YACrF,yFAAyF;YACzF,gEAAgE;YAChE,sEAAsE;YACtE,EAAE;YACF,eAAe;YACf,uFAAuF;YACvF,iEAAiE;YACjE,EAAE;YACF,wCAAwC;YACxC,SAAS;YACT,GAAG;YACH,wBAAwB;YACxB,OAAO;YACP,2BAA2B;YAC3B,+BAA+B;YAC/B,wEAAwE;YACxE,oHAAoH;YACpH,uCAAuC;YACvC,sDAAsD;YACtD,iEAAiE;YACjE,6CAA6C;YAC7C,OAAO;YACP,MAAM;YACN,yCAAyC;YACzC,oCAAoC;YACpC,GAAG;YACH,KAAK;SACN,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,SAAS,KAAK,OAAO,EAAE,CAAC,CAAC,CAAC;IAC1B,YAAY,KAAK,OAAO,EAAE,CAAC,CAAC,CAAC;IAE7B,YAAY,EAAE;QACZ,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,uBAAuB;gBAC7B,MAAM,EAAE,MAAe;gBACvB,QAAQ,EAAE,IAAI;gBACd,WAAW,EAAE,mCAAmC;aACjD;SACF;KACF;IAED,KAAK,CAAC,aAAa,CAAC,MAAuB;QACzC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,uBAAuB,CAA4B,CAAC;QAChG,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,OAAO,EAAE,yBAAyB,CAAsB,CAAC;QAEhG,MAAM,eAAe,GAA8B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;YACnF,CAAC,CAAE,GAAG,CAAC,eAA6C,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;gBACzD,MAAM,WAAW,GAAG,CAAC,CAAC,WAAkD,CAAC;gBACzE,OAAO;oBACL,UAAU,EAAE,MAAM,CAAC,CAAC,CAAC,UAAU,IAAI,EAAE,CAAC;oBACtC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,IAAI,EAAE,CAAC;oBAC1C,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC,YAAY,IAAI,QAAQ,CAA4C;oBAC3F,aAAa,EAAE,MAAM,CAAC,CAAC,CAAC,aAAa,IAAI,aAAa,CAA6C;oBACnG,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,IAAI,EAAE,CAAC;oBACpC,WAAW,EAAE,WAAW;wBACtB,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,CAAC,WAAW,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE;wBAC5E,CAAC,CAAC,SAAS;oBACb,cAAc,EAAE,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,SAAS;oBACvE,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,IAAI,QAAQ,CAAwC;iBAChF,CAAC;YACJ,CAAC,CAAC;YACJ,CAAC,CAAC,EAAE,CAAC;QAEP,OAAO;YACL,QAAQ;YACR,eAAe;YACf,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;YACrE,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE;SAC5E,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,yDAAyD;IAC3D,CAAC;IAED,sBAAsB,CAAC,MAAM,EAAE,UAAU;QACvC,OAAO;YACL,IAAI,EAAE,OAAgB;YACtB,UAAU;YACV,OAAO,EAAE,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;SACvD,CAAC;IACJ,CAAC;IAED,SAAS,EAAE,aAAa;IACxB,OAAO,EAAE,OAAO;IAChB,gBAAgB,EAAE,IAAI;CACvB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cognitive-core",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "TypeScript-native cognitive core for adaptive learning and abstraction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -12,26 +12,32 @@
|
|
|
12
12
|
"exports": {
|
|
13
13
|
".": {
|
|
14
14
|
"import": "./dist/index.js",
|
|
15
|
+
"default": "./dist/index.js",
|
|
15
16
|
"types": "./dist/index.d.ts"
|
|
16
17
|
},
|
|
17
18
|
"./memory": {
|
|
18
19
|
"import": "./dist/memory/index.js",
|
|
20
|
+
"default": "./dist/memory/index.js",
|
|
19
21
|
"types": "./dist/memory/index.d.ts"
|
|
20
22
|
},
|
|
21
23
|
"./learning": {
|
|
22
24
|
"import": "./dist/learning/index.js",
|
|
25
|
+
"default": "./dist/learning/index.js",
|
|
23
26
|
"types": "./dist/learning/index.d.ts"
|
|
24
27
|
},
|
|
25
28
|
"./agents": {
|
|
26
29
|
"import": "./dist/agents/index.js",
|
|
30
|
+
"default": "./dist/agents/index.js",
|
|
27
31
|
"types": "./dist/agents/index.d.ts"
|
|
28
32
|
},
|
|
29
33
|
"./embeddings": {
|
|
30
34
|
"import": "./dist/embeddings/index.js",
|
|
35
|
+
"default": "./dist/embeddings/index.js",
|
|
31
36
|
"types": "./dist/embeddings/index.d.ts"
|
|
32
37
|
},
|
|
33
38
|
"./session-bank": {
|
|
34
39
|
"import": "./dist/session-bank/index.js",
|
|
40
|
+
"default": "./dist/session-bank/index.js",
|
|
35
41
|
"types": "./dist/session-bank/index.d.ts"
|
|
36
42
|
}
|
|
37
43
|
},
|
package/src/index.ts
CHANGED
|
@@ -435,6 +435,33 @@ export {
|
|
|
435
435
|
type AgenticTaskResult,
|
|
436
436
|
type AgenticTaskMetrics,
|
|
437
437
|
AgenticTaskOutputError,
|
|
438
|
+
// Efficacy toolkit
|
|
439
|
+
computeNormalizedGain,
|
|
440
|
+
computeTemporalTrend,
|
|
441
|
+
computeDomainBreakdown,
|
|
442
|
+
buildEfficacyProfile,
|
|
443
|
+
buildPortfolioSnapshot,
|
|
444
|
+
type PlaybookEfficacyProfile,
|
|
445
|
+
type DomainBreakdown,
|
|
446
|
+
type TemporalTrend,
|
|
447
|
+
type UsageProfile,
|
|
448
|
+
type EvolutionSummary,
|
|
449
|
+
type PortfolioSnapshot,
|
|
450
|
+
type PlaybookHealthFlag,
|
|
451
|
+
type RedundancyPair,
|
|
452
|
+
// Efficacy templates
|
|
453
|
+
playbookEfficacyAuditTemplate,
|
|
454
|
+
type PlaybookEfficacyAuditInput,
|
|
455
|
+
type PlaybookEfficacyAuditOutput,
|
|
456
|
+
type PlaybookRecommendation,
|
|
457
|
+
playbookDecayDetectionTemplate,
|
|
458
|
+
type PlaybookDecayDetectionInput,
|
|
459
|
+
type PlaybookDecayDetectionOutput,
|
|
460
|
+
type DecaySignal,
|
|
461
|
+
playbookLifecycleReviewTemplate,
|
|
462
|
+
type PlaybookLifecycleReviewInput,
|
|
463
|
+
type PlaybookLifecycleReviewOutput,
|
|
464
|
+
type LifecycleRecommendation,
|
|
438
465
|
} from './workspace/index.js';
|
|
439
466
|
|
|
440
467
|
// Persistence - Unified SQLite database for system-internal state
|
package/src/learning/index.ts
CHANGED
|
@@ -54,7 +54,12 @@ import { playbookExtractionTemplate } from '../workspace/templates/playbook-extr
|
|
|
54
54
|
|
|
55
55
|
// Meta-learning and effectiveness
|
|
56
56
|
import { MetaLearner, type MetaLearnerConfig } from './meta-learner.js';
|
|
57
|
-
import { LearningEffectivenessTracker } from './effectiveness.js';
|
|
57
|
+
import { LearningEffectivenessTracker, type TaskAnnotation, type PlaybookEffectivenessEntry } from './effectiveness.js';
|
|
58
|
+
|
|
59
|
+
// Efficacy templates (used in maintenance tasks)
|
|
60
|
+
import { playbookDecayDetectionTemplate, type PlaybookDecayDetectionOutput } from '../workspace/templates/playbook-decay-detection.js';
|
|
61
|
+
import { playbookEfficacyAuditTemplate } from '../workspace/templates/playbook-efficacy-audit.js';
|
|
62
|
+
import { playbookLifecycleReviewTemplate } from '../workspace/templates/playbook-lifecycle-review.js';
|
|
58
63
|
|
|
59
64
|
// Energy evaluator (batch trigger)
|
|
60
65
|
import {
|
|
@@ -121,6 +126,8 @@ export interface UnifiedPipelineFeatures {
|
|
|
121
126
|
effectivenessTracking?: boolean;
|
|
122
127
|
/** Enable healing orchestrator with built-in detectors (default: true) */
|
|
123
128
|
healing?: boolean;
|
|
129
|
+
/** Enable efficacy maintenance tasks (decay detection, audit, lifecycle review) (default: true) */
|
|
130
|
+
efficacy?: boolean;
|
|
124
131
|
}
|
|
125
132
|
|
|
126
133
|
/**
|
|
@@ -140,10 +147,30 @@ export interface UnifiedPipelineConfig {
|
|
|
140
147
|
metaLearner: Partial<MetaLearnerConfig>;
|
|
141
148
|
/** Feature flags */
|
|
142
149
|
features: UnifiedPipelineFeatures;
|
|
150
|
+
/** Efficacy maintenance config */
|
|
151
|
+
efficacy: EfficacyMaintenanceConfig;
|
|
143
152
|
/** Max latency warning threshold for instant loop in ms (default: 200) */
|
|
144
153
|
maxInstantLatencyMs: number;
|
|
145
154
|
}
|
|
146
155
|
|
|
156
|
+
/**
|
|
157
|
+
* Configuration for efficacy maintenance tasks.
|
|
158
|
+
*/
|
|
159
|
+
export interface EfficacyMaintenanceConfig {
|
|
160
|
+
/** Run lifecycle review every Nth maintenance cycle (default: 3) */
|
|
161
|
+
lifecycleReviewFrequency: number;
|
|
162
|
+
/** Minimum playbooks required before running efficacy tasks (default: 1) */
|
|
163
|
+
minPlaybooks: number;
|
|
164
|
+
/** Minimum annotations required before running efficacy tasks (default: 3) */
|
|
165
|
+
minAnnotations: number;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const DEFAULT_EFFICACY_CONFIG: EfficacyMaintenanceConfig = {
|
|
169
|
+
lifecycleReviewFrequency: 3,
|
|
170
|
+
minPlaybooks: 1,
|
|
171
|
+
minAnnotations: 3,
|
|
172
|
+
};
|
|
173
|
+
|
|
147
174
|
const DEFAULT_UNIFIED_CONFIG: UnifiedPipelineConfig = {
|
|
148
175
|
instant: {},
|
|
149
176
|
batch: {
|
|
@@ -153,6 +180,7 @@ const DEFAULT_UNIFIED_CONFIG: UnifiedPipelineConfig = {
|
|
|
153
180
|
maintenance: {},
|
|
154
181
|
metaLearner: {},
|
|
155
182
|
features: {},
|
|
183
|
+
efficacy: DEFAULT_EFFICACY_CONFIG,
|
|
156
184
|
maxInstantLatencyMs: 200,
|
|
157
185
|
};
|
|
158
186
|
|
|
@@ -267,6 +295,7 @@ export class UnifiedLearningPipeline {
|
|
|
267
295
|
private batchCyclesRun = 0;
|
|
268
296
|
private maintenanceCyclesRun = 0;
|
|
269
297
|
private _onLatencyWarning: ((durationMs: number) => void) | null = null;
|
|
298
|
+
private lastDecayResult: PlaybookDecayDetectionOutput | null = null;
|
|
270
299
|
|
|
271
300
|
constructor(
|
|
272
301
|
memory: MemorySystem,
|
|
@@ -284,6 +313,7 @@ export class UnifiedLearningPipeline {
|
|
|
284
313
|
maintenance: { ...DEFAULT_UNIFIED_CONFIG.maintenance, ...config?.maintenance },
|
|
285
314
|
metaLearner: { ...DEFAULT_UNIFIED_CONFIG.metaLearner, ...config?.metaLearner },
|
|
286
315
|
features: { ...DEFAULT_UNIFIED_CONFIG.features, ...config?.features },
|
|
316
|
+
efficacy: { ...DEFAULT_EFFICACY_CONFIG, ...config?.efficacy },
|
|
287
317
|
};
|
|
288
318
|
|
|
289
319
|
this.memory = memory;
|
|
@@ -822,6 +852,246 @@ export class UnifiedLearningPipeline {
|
|
|
822
852
|
},
|
|
823
853
|
});
|
|
824
854
|
}
|
|
855
|
+
|
|
856
|
+
// Efficacy maintenance tasks
|
|
857
|
+
this.registerEfficacyMaintenanceTasks();
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
/**
|
|
861
|
+
* Register efficacy maintenance tasks (decay detection, audit, lifecycle review).
|
|
862
|
+
* All tasks are log-only — results are returned in details but no automatic mutations.
|
|
863
|
+
*/
|
|
864
|
+
private registerEfficacyMaintenanceTasks(): void {
|
|
865
|
+
if (!(this.config.features.efficacy ?? true)) return;
|
|
866
|
+
|
|
867
|
+
const efficacyCfg = this.config.efficacy;
|
|
868
|
+
|
|
869
|
+
// 1. Playbook Decay Detection (priority 70) — runs every maintenance cycle
|
|
870
|
+
this.maintenanceScheduler.registerTask({
|
|
871
|
+
name: 'playbook-decay-detection',
|
|
872
|
+
priority: 70,
|
|
873
|
+
execute: async () => {
|
|
874
|
+
const start = Date.now();
|
|
875
|
+
this.lastDecayResult = null;
|
|
876
|
+
|
|
877
|
+
const { annotations, playbooks, playbookEffectiveness, unguidedSuccessRate } =
|
|
878
|
+
await this.getEfficacyData();
|
|
879
|
+
|
|
880
|
+
if (playbooks.length < efficacyCfg.minPlaybooks || annotations.length < efficacyCfg.minAnnotations) {
|
|
881
|
+
return {
|
|
882
|
+
taskName: 'playbook-decay-detection',
|
|
883
|
+
success: true,
|
|
884
|
+
durationMs: Date.now() - start,
|
|
885
|
+
details: { skipped: true, reason: 'insufficient data' },
|
|
886
|
+
};
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
const result = await playbookDecayDetectionTemplate.heuristicFallback!({
|
|
890
|
+
playbooks,
|
|
891
|
+
annotations,
|
|
892
|
+
playbookEffectiveness,
|
|
893
|
+
unguidedSuccessRate,
|
|
894
|
+
});
|
|
895
|
+
|
|
896
|
+
this.lastDecayResult = result;
|
|
897
|
+
|
|
898
|
+
return {
|
|
899
|
+
taskName: 'playbook-decay-detection',
|
|
900
|
+
success: true,
|
|
901
|
+
durationMs: Date.now() - start,
|
|
902
|
+
details: {
|
|
903
|
+
decayingCount: result.decaying.length,
|
|
904
|
+
atRiskCount: result.atRisk.length,
|
|
905
|
+
healthyCount: result.healthy.length,
|
|
906
|
+
decaying: result.decaying,
|
|
907
|
+
atRisk: result.atRisk,
|
|
908
|
+
portfolioAssessment: result.portfolioAssessment,
|
|
909
|
+
},
|
|
910
|
+
};
|
|
911
|
+
},
|
|
912
|
+
});
|
|
913
|
+
|
|
914
|
+
// 2. Playbook Efficacy Audit (priority 65) — audits flagged playbooks from decay detection
|
|
915
|
+
this.maintenanceScheduler.registerTask({
|
|
916
|
+
name: 'playbook-efficacy-audit',
|
|
917
|
+
priority: 65,
|
|
918
|
+
execute: async () => {
|
|
919
|
+
const start = Date.now();
|
|
920
|
+
|
|
921
|
+
if (!this.lastDecayResult) {
|
|
922
|
+
return {
|
|
923
|
+
taskName: 'playbook-efficacy-audit',
|
|
924
|
+
success: true,
|
|
925
|
+
durationMs: Date.now() - start,
|
|
926
|
+
details: { skipped: true, reason: 'no decay detection results' },
|
|
927
|
+
};
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
const flagged = [
|
|
931
|
+
...this.lastDecayResult.decaying,
|
|
932
|
+
...this.lastDecayResult.atRisk,
|
|
933
|
+
];
|
|
934
|
+
|
|
935
|
+
if (flagged.length === 0) {
|
|
936
|
+
return {
|
|
937
|
+
taskName: 'playbook-efficacy-audit',
|
|
938
|
+
success: true,
|
|
939
|
+
durationMs: Date.now() - start,
|
|
940
|
+
details: { skipped: true, reason: 'no flagged playbooks' },
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
const { annotations, playbooks, playbookEffectiveness, unguidedSuccessRate } =
|
|
945
|
+
await this.getEfficacyData();
|
|
946
|
+
|
|
947
|
+
const playbookMap = new Map(playbooks.map(p => [p.id, p]));
|
|
948
|
+
const effectivenessMap = new Map(playbookEffectiveness.map(e => [e.playbookId, e]));
|
|
949
|
+
const trajectoryDomainMap = this.buildTrajectoryDomainMap(annotations);
|
|
950
|
+
|
|
951
|
+
const audits: Array<{
|
|
952
|
+
playbookId: string;
|
|
953
|
+
health: string;
|
|
954
|
+
recommendations: unknown[];
|
|
955
|
+
summary: string;
|
|
956
|
+
}> = [];
|
|
957
|
+
|
|
958
|
+
for (const signal of flagged) {
|
|
959
|
+
const playbook = playbookMap.get(signal.playbookId);
|
|
960
|
+
if (!playbook) continue;
|
|
961
|
+
|
|
962
|
+
const auditResult = await playbookEfficacyAuditTemplate.heuristicFallback!({
|
|
963
|
+
playbook,
|
|
964
|
+
annotations,
|
|
965
|
+
playbookEffectiveness: effectivenessMap.get(signal.playbookId),
|
|
966
|
+
unguidedSuccessRate,
|
|
967
|
+
trajectoryDomainMap,
|
|
968
|
+
});
|
|
969
|
+
|
|
970
|
+
audits.push({
|
|
971
|
+
playbookId: signal.playbookId,
|
|
972
|
+
health: auditResult.assessment.health,
|
|
973
|
+
recommendations: auditResult.assessment.recommendations,
|
|
974
|
+
summary: auditResult.assessment.summary,
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
return {
|
|
979
|
+
taskName: 'playbook-efficacy-audit',
|
|
980
|
+
success: true,
|
|
981
|
+
durationMs: Date.now() - start,
|
|
982
|
+
details: {
|
|
983
|
+
auditsPerformed: audits.length,
|
|
984
|
+
audits,
|
|
985
|
+
},
|
|
986
|
+
};
|
|
987
|
+
},
|
|
988
|
+
});
|
|
989
|
+
|
|
990
|
+
// 3. Playbook Lifecycle Review (priority 50) — runs every Nth maintenance cycle
|
|
991
|
+
this.maintenanceScheduler.registerTask({
|
|
992
|
+
name: 'playbook-lifecycle-review',
|
|
993
|
+
priority: 50,
|
|
994
|
+
execute: async () => {
|
|
995
|
+
const start = Date.now();
|
|
996
|
+
|
|
997
|
+
// Only run every Nth maintenance cycle
|
|
998
|
+
if (this.maintenanceCyclesRun % efficacyCfg.lifecycleReviewFrequency !== 0) {
|
|
999
|
+
return {
|
|
1000
|
+
taskName: 'playbook-lifecycle-review',
|
|
1001
|
+
success: true,
|
|
1002
|
+
durationMs: Date.now() - start,
|
|
1003
|
+
details: {
|
|
1004
|
+
skipped: true,
|
|
1005
|
+
reason: `runs every ${efficacyCfg.lifecycleReviewFrequency} maintenance cycles`,
|
|
1006
|
+
},
|
|
1007
|
+
};
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
const { annotations, playbooks, playbookEffectiveness, unguidedSuccessRate } =
|
|
1011
|
+
await this.getEfficacyData();
|
|
1012
|
+
|
|
1013
|
+
if (playbooks.length < efficacyCfg.minPlaybooks || annotations.length < efficacyCfg.minAnnotations) {
|
|
1014
|
+
return {
|
|
1015
|
+
taskName: 'playbook-lifecycle-review',
|
|
1016
|
+
success: true,
|
|
1017
|
+
durationMs: Date.now() - start,
|
|
1018
|
+
details: { skipped: true, reason: 'insufficient data' },
|
|
1019
|
+
};
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
const trajectoryDomainMap = this.buildTrajectoryDomainMap(annotations);
|
|
1023
|
+
|
|
1024
|
+
const result = await playbookLifecycleReviewTemplate.heuristicFallback!({
|
|
1025
|
+
playbooks,
|
|
1026
|
+
annotations,
|
|
1027
|
+
playbookEffectiveness,
|
|
1028
|
+
unguidedSuccessRate,
|
|
1029
|
+
trajectoryDomainMap,
|
|
1030
|
+
});
|
|
1031
|
+
|
|
1032
|
+
return {
|
|
1033
|
+
taskName: 'playbook-lifecycle-review',
|
|
1034
|
+
success: true,
|
|
1035
|
+
durationMs: Date.now() - start,
|
|
1036
|
+
details: {
|
|
1037
|
+
totalPlaybooks: result.snapshot.totalPlaybooks,
|
|
1038
|
+
recommendationsCount: result.recommendations.length,
|
|
1039
|
+
recommendations: result.recommendations,
|
|
1040
|
+
insights: result.insights,
|
|
1041
|
+
actionPlan: result.actionPlan,
|
|
1042
|
+
snapshot: {
|
|
1043
|
+
avgConfidence: result.snapshot.avgConfidence,
|
|
1044
|
+
avgSuccessRate: result.snapshot.avgSuccessRate,
|
|
1045
|
+
confidenceBands: result.snapshot.confidenceBands,
|
|
1046
|
+
decayingCount: result.snapshot.decaying.length,
|
|
1047
|
+
staleCount: result.snapshot.stale.length,
|
|
1048
|
+
},
|
|
1049
|
+
},
|
|
1050
|
+
};
|
|
1051
|
+
},
|
|
1052
|
+
});
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
/**
|
|
1056
|
+
* Fetch shared efficacy data used by all three efficacy maintenance tasks.
|
|
1057
|
+
*/
|
|
1058
|
+
private async getEfficacyData(): Promise<{
|
|
1059
|
+
annotations: TaskAnnotation[];
|
|
1060
|
+
playbooks: Playbook[];
|
|
1061
|
+
playbookEffectiveness: PlaybookEffectivenessEntry[];
|
|
1062
|
+
unguidedSuccessRate: number;
|
|
1063
|
+
}> {
|
|
1064
|
+
const playbooks = await this.memory.playbooks.getAll();
|
|
1065
|
+
|
|
1066
|
+
let annotations: TaskAnnotation[] = [];
|
|
1067
|
+
let playbookEffectiveness: PlaybookEffectivenessEntry[] = [];
|
|
1068
|
+
let unguidedSuccessRate = 0;
|
|
1069
|
+
|
|
1070
|
+
if (this.effectivenessTracker) {
|
|
1071
|
+
annotations = await this.effectivenessTracker.getAll();
|
|
1072
|
+
const metrics = await this.effectivenessTracker.computeMetrics();
|
|
1073
|
+
playbookEffectiveness = metrics.playbookEffectiveness;
|
|
1074
|
+
unguidedSuccessRate = metrics.unguidedSuccessRate;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
return { annotations, playbooks, playbookEffectiveness, unguidedSuccessRate };
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
/**
|
|
1081
|
+
* Build a trajectoryId→domain map from annotations.
|
|
1082
|
+
* Uses accumulated trajectories when available, otherwise infers from task annotations.
|
|
1083
|
+
*/
|
|
1084
|
+
private buildTrajectoryDomainMap(_annotations: TaskAnnotation[]): Map<string, string> {
|
|
1085
|
+
const domainMap = new Map<string, string>();
|
|
1086
|
+
|
|
1087
|
+
// Map from accumulated trajectories (has task.domain directly).
|
|
1088
|
+
// Annotations don't carry domain, so historical trajectories will
|
|
1089
|
+
// fall through to the toolkit's 'unknown' default — acceptable for log-only mode.
|
|
1090
|
+
for (const t of this.accumulated) {
|
|
1091
|
+
domainMap.set(t.id, t.task.domain);
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
return domainMap;
|
|
825
1095
|
}
|
|
826
1096
|
|
|
827
1097
|
/**
|
|
@@ -63,7 +63,8 @@ export function getCandidates<T>(
|
|
|
63
63
|
const item = resolver.get(id);
|
|
64
64
|
if (item) items.push(item);
|
|
65
65
|
}
|
|
66
|
-
|
|
66
|
+
// If all IDs were stale/unresolvable, fall back to full scan
|
|
67
|
+
if (items.length > 0) return items;
|
|
67
68
|
}
|
|
68
69
|
|
|
69
70
|
// Too few candidates — fall back
|
|
@@ -60,26 +60,26 @@ const ERROR_RULES: Array<{ test: (s: string) => boolean; type: ErrorType; descri
|
|
|
60
60
|
type: 'permission-error',
|
|
61
61
|
description: 'Permission denied',
|
|
62
62
|
},
|
|
63
|
-
{
|
|
64
|
-
test: (s) => /not\s*found|missing|no\s*such\s*file/i.test(s),
|
|
65
|
-
type: 'not-found',
|
|
66
|
-
description: 'Resource not found',
|
|
67
|
-
},
|
|
68
63
|
{
|
|
69
64
|
test: (s) => /network|connection|ECONNREFUSED|ENOTFOUND/i.test(s),
|
|
70
65
|
type: 'network-error',
|
|
71
66
|
description: 'Network or connection error',
|
|
72
67
|
},
|
|
73
68
|
{
|
|
74
|
-
test: (s) => /
|
|
75
|
-
type: '
|
|
76
|
-
description: '
|
|
69
|
+
test: (s) => /not\s*found|missing|no\s*such\s*file/i.test(s),
|
|
70
|
+
type: 'not-found',
|
|
71
|
+
description: 'Resource not found',
|
|
77
72
|
},
|
|
78
73
|
{
|
|
79
74
|
test: (s) => /assertion|assert/i.test(s),
|
|
80
75
|
type: 'assertion-error',
|
|
81
76
|
description: 'Assertion failed',
|
|
82
77
|
},
|
|
78
|
+
{
|
|
79
|
+
test: (s) => /test.*fail|fail.*test/i.test(s),
|
|
80
|
+
type: 'test-failure',
|
|
81
|
+
description: 'Test or assertion failure',
|
|
82
|
+
},
|
|
83
83
|
{
|
|
84
84
|
test: (s) => /exception|error/i.test(s),
|
|
85
85
|
type: 'general-error',
|