cognitive-core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.claude/settings.json +111 -2
  2. package/.sessionlog/settings.json +4 -0
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +5 -1
  6. package/dist/index.js.map +1 -1
  7. package/dist/learning/index.d.ts +1 -1
  8. package/dist/learning/index.d.ts.map +1 -1
  9. package/dist/learning/index.js.map +1 -1
  10. package/dist/learning/unified-pipeline.d.ts +30 -0
  11. package/dist/learning/unified-pipeline.d.ts.map +1 -1
  12. package/dist/learning/unified-pipeline.js +207 -0
  13. package/dist/learning/unified-pipeline.js.map +1 -1
  14. package/dist/memory/candidate-retrieval.d.ts.map +1 -1
  15. package/dist/memory/candidate-retrieval.js +3 -1
  16. package/dist/memory/candidate-retrieval.js.map +1 -1
  17. package/dist/utils/error-classifier.js +8 -8
  18. package/dist/utils/error-classifier.js.map +1 -1
  19. package/dist/workspace/efficacy-toolkit.d.ts +164 -0
  20. package/dist/workspace/efficacy-toolkit.d.ts.map +1 -0
  21. package/dist/workspace/efficacy-toolkit.js +281 -0
  22. package/dist/workspace/efficacy-toolkit.js.map +1 -0
  23. package/dist/workspace/index.d.ts +2 -1
  24. package/dist/workspace/index.d.ts.map +1 -1
  25. package/dist/workspace/index.js +3 -1
  26. package/dist/workspace/index.js.map +1 -1
  27. package/dist/workspace/templates/index.d.ts +3 -0
  28. package/dist/workspace/templates/index.d.ts.map +1 -1
  29. package/dist/workspace/templates/index.js +6 -0
  30. package/dist/workspace/templates/index.js.map +1 -1
  31. package/dist/workspace/templates/playbook-decay-detection.d.ts +46 -0
  32. package/dist/workspace/templates/playbook-decay-detection.d.ts.map +1 -0
  33. package/dist/workspace/templates/playbook-decay-detection.js +197 -0
  34. package/dist/workspace/templates/playbook-decay-detection.js.map +1 -0
  35. package/dist/workspace/templates/playbook-efficacy-audit.d.ts +46 -0
  36. package/dist/workspace/templates/playbook-efficacy-audit.d.ts.map +1 -0
  37. package/dist/workspace/templates/playbook-efficacy-audit.js +160 -0
  38. package/dist/workspace/templates/playbook-efficacy-audit.js.map +1 -0
  39. package/dist/workspace/templates/playbook-lifecycle-review.d.ts +51 -0
  40. package/dist/workspace/templates/playbook-lifecycle-review.d.ts.map +1 -0
  41. package/dist/workspace/templates/playbook-lifecycle-review.js +187 -0
  42. package/dist/workspace/templates/playbook-lifecycle-review.js.map +1 -0
  43. package/package.json +7 -1
  44. package/src/index.ts +27 -0
  45. package/src/learning/index.ts +1 -0
  46. package/src/learning/unified-pipeline.ts +271 -1
  47. package/src/memory/candidate-retrieval.ts +2 -1
  48. package/src/utils/error-classifier.ts +8 -8
  49. package/src/workspace/efficacy-toolkit.ts +496 -0
  50. package/src/workspace/index.ts +29 -0
  51. package/src/workspace/templates/index.ts +24 -0
  52. package/src/workspace/templates/playbook-decay-detection.ts +272 -0
  53. package/src/workspace/templates/playbook-efficacy-audit.ts +246 -0
  54. package/src/workspace/templates/playbook-lifecycle-review.ts +274 -0
  55. package/tests/fixtures/behavioral-trajectories.ts +210 -0
  56. package/tests/integration/pipeline-data-correctness.test.ts +794 -0
  57. package/tests/learning/meta-learner.test.ts +418 -0
  58. package/tests/learning/pipeline-memory-updates.test.ts +721 -0
  59. package/tests/learning/unified-pipeline-efficacy.test.ts +232 -0
  60. package/tests/memory/candidate-retrieval.test.ts +167 -0
  61. package/tests/memory/meta.test.ts +399 -0
  62. package/tests/search/evaluator.test.ts +257 -0
  63. package/tests/search/verification-runner.test.ts +357 -0
  64. package/tests/utils/error-classifier.test.ts +149 -0
  65. package/tests/utils/trajectory-helpers.test.ts +163 -0
  66. package/tests/workspace/efficacy-toolkit.test.ts +404 -0
  67. package/tests/workspace/templates/playbook-efficacy.test.ts +377 -0
@@ -0,0 +1,274 @@
1
+ /**
2
+ * Playbook Lifecycle Review Template
3
+ *
4
+ * Portfolio-wide health assessment with lifecycle recommendations.
5
+ * Uses the efficacy toolkit's portfolio snapshot to give the agent
6
+ * a pre-computed view of the entire playbook portfolio, then asks
7
+ * for promotion, deprecation, merge, and evolution recommendations.
8
+ *
9
+ * This is the "skill designer" review step (inspired by MemSkill)
10
+ * that periodically evaluates the whole portfolio.
11
+ */
12
+
13
+ import type { WorkspaceHandle } from 'agent-workspace';
14
+ import type { Playbook } from '../../types/index.js';
15
+ import type { TaskAnnotation, PlaybookEffectivenessEntry } from '../../learning/effectiveness.js';
16
+ import type {
17
+ AgenticTaskTemplate,
18
+ AnalysisComplexity,
19
+ } from '../types.js';
20
+ import {
21
+ buildPortfolioSnapshot,
22
+ type PortfolioSnapshot,
23
+ } from '../efficacy-toolkit.js';
24
+
25
+ // ============================================================
26
+ // Input / Output Types
27
+ // ============================================================
28
+
29
+ export interface PlaybookLifecycleReviewInput {
30
+ playbooks: Playbook[];
31
+ annotations: TaskAnnotation[];
32
+ playbookEffectiveness: PlaybookEffectivenessEntry[];
33
+ unguidedSuccessRate: number;
34
+ trajectoryDomainMap: Map<string, string>;
35
+ /** Current core tier playbook IDs (from SkillLibrary) */
36
+ corePlaybookIds?: string[];
37
+ }
38
+
39
+ export interface PlaybookLifecycleReviewOutput {
40
+ /** Pre-computed portfolio snapshot */
41
+ snapshot: PortfolioSnapshot;
42
+ /** Agent's lifecycle recommendations */
43
+ recommendations: LifecycleRecommendation[];
44
+ /** Portfolio-level strategic insights */
45
+ insights: string[];
46
+ /** Priority-ordered action plan */
47
+ actionPlan: string[];
48
+ }
49
+
50
+ export interface LifecycleRecommendation {
51
+ playbookId: string;
52
+ playbookName: string;
53
+ currentState: 'active' | 'core' | 'stale' | 'underperforming';
54
+ proposedState: 'promote-to-core' | 'keep-active' | 'deprecate' | 'archive'
55
+ | 'merge' | 'evolve' | 'split';
56
+ rationale: string;
57
+ /** For merge: target playbook */
58
+ mergeTarget?: { id: string; name: string };
59
+ /** For evolve: suggested guidance changes */
60
+ evolutionNotes?: string;
61
+ priority: 'high' | 'medium' | 'low';
62
+ }
63
+
64
+ // ============================================================
65
+ // Template Implementation
66
+ // ============================================================
67
+
68
+ export const playbookLifecycleReviewTemplate: AgenticTaskTemplate<
69
+ PlaybookLifecycleReviewInput,
70
+ PlaybookLifecycleReviewOutput
71
+ > = {
72
+ taskType: 'playbook-lifecycle-review',
73
+ domain: 'meta-learning',
74
+ description: 'Portfolio-wide playbook health assessment with lifecycle recommendations',
75
+
76
+ assessComplexity(input: PlaybookLifecycleReviewInput): AnalysisComplexity {
77
+ if (input.playbooks.length === 0) return 'heuristic';
78
+ if (input.playbooks.length <= 3 && input.annotations.length < 10) return 'heuristic';
79
+ if (input.playbooks.length > 30) return 'thorough';
80
+ if (input.playbooks.length > 10) return 'standard';
81
+ return 'lightweight';
82
+ },
83
+
84
+ async heuristicFallback(input: PlaybookLifecycleReviewInput): Promise<PlaybookLifecycleReviewOutput> {
85
+ const snapshot = buildPortfolioSnapshot(
86
+ input.playbooks,
87
+ input.annotations,
88
+ input.playbookEffectiveness,
89
+ input.unguidedSuccessRate,
90
+ input.trajectoryDomainMap,
91
+ );
92
+
93
+ // Simple heuristic recommendations
94
+ const recommendations: LifecycleRecommendation[] = [];
95
+
96
+ for (const flag of snapshot.decaying) {
97
+ recommendations.push({
98
+ playbookId: flag.playbookId,
99
+ playbookName: flag.playbookName,
100
+ currentState: 'active',
101
+ proposedState: 'evolve',
102
+ rationale: flag.reason,
103
+ priority: 'high',
104
+ });
105
+ }
106
+
107
+ for (const flag of snapshot.stale) {
108
+ recommendations.push({
109
+ playbookId: flag.playbookId,
110
+ playbookName: flag.playbookName,
111
+ currentState: 'stale',
112
+ proposedState: 'archive',
113
+ rationale: flag.reason,
114
+ priority: 'medium',
115
+ });
116
+ }
117
+
118
+ return {
119
+ snapshot,
120
+ recommendations,
121
+ insights: [],
122
+ actionPlan: recommendations.map(r =>
123
+ `${r.proposedState} "${r.playbookName}" — ${r.rationale}`
124
+ ),
125
+ };
126
+ },
127
+
128
+ async prepareWorkspace(
129
+ input: PlaybookLifecycleReviewInput,
130
+ handle: WorkspaceHandle
131
+ ): Promise<void> {
132
+ const snapshot = buildPortfolioSnapshot(
133
+ input.playbooks,
134
+ input.annotations,
135
+ input.playbookEffectiveness,
136
+ input.unguidedSuccessRate,
137
+ input.trajectoryDomainMap,
138
+ );
139
+
140
+ // Portfolio snapshot — the core data
141
+ await handle.writeJson('input', 'portfolio-snapshot.json', snapshot);
142
+
143
+ // Per-playbook summaries for the agent to reference
144
+ const playbookSummaries = input.playbooks.map(pb => ({
145
+ id: pb.id,
146
+ name: pb.name,
147
+ confidence: pb.confidence,
148
+ successCount: pb.evolution.successCount,
149
+ failureCount: pb.evolution.failureCount,
150
+ successRate: pb.evolution.successCount + pb.evolution.failureCount > 0
151
+ ? pb.evolution.successCount / (pb.evolution.successCount + pb.evolution.failureCount)
152
+ : 0,
153
+ domains: pb.applicability.domains,
154
+ strategy: pb.guidance.strategy,
155
+ tacticsCount: pb.guidance.tactics.length,
156
+ refinementsCount: pb.evolution.refinements.length,
157
+ version: pb.evolution.version,
158
+ lastUsed: pb.evolution.lastUsed,
159
+ isCore: input.corePlaybookIds?.includes(pb.id) ?? false,
160
+ consolidationStrength: pb.evolution.consolidationStrength ?? 0,
161
+ }));
162
+
163
+ await handle.writeJson('input', 'playbook-summaries.json', playbookSummaries);
164
+
165
+ // Redundancy pairs (pre-computed by toolkit)
166
+ if (snapshot.redundant.length > 0) {
167
+ await handle.writeJson('input', 'redundancy-pairs.json', snapshot.redundant);
168
+ }
169
+ },
170
+
171
+ buildTaskPrompt(input: PlaybookLifecycleReviewInput): string {
172
+ return [
173
+ `Review the health of the entire playbook portfolio (${input.playbooks.length} playbooks).`,
174
+ '',
175
+ 'Read:',
176
+ '- input/portfolio-snapshot.json — Portfolio health metrics, flagged issues (decaying, stale, low adoption, redundant)',
177
+ '- input/playbook-summaries.json — Per-playbook summaries with confidence, success rates, domains',
178
+ '- input/redundancy-pairs.json — (if present) Playbook pairs with high content overlap',
179
+ '',
180
+ 'For each playbook, recommend a lifecycle action:',
181
+ '- **promote-to-core**: High confidence + success rate, proven across tasks',
182
+ '- **keep-active**: Performing well, no changes needed',
183
+ '- **evolve**: Decent foundation but guidance needs updating based on failure patterns',
184
+ '- **merge**: Two playbooks cover the same ground — merge into one stronger playbook',
185
+ '- **split**: One playbook covers too many domains — split into domain-specific variants',
186
+ '- **deprecate**: Low value, actively misleading, or superseded',
187
+ '- **archive**: No longer relevant but worth preserving for reference',
188
+ '',
189
+ 'Also provide:',
190
+ '- Strategic insights about the portfolio (coverage gaps, over-invested domains, etc.)',
191
+ '- A priority-ordered action plan (most impactful changes first)',
192
+ '',
193
+ 'Write to output/lifecycle-review.json:',
194
+ '```json',
195
+ '{',
196
+ ' "recommendations": [',
197
+ ' {',
198
+ ' "playbookId": "id",',
199
+ ' "playbookName": "name",',
200
+ ' "currentState": "active" | "core" | "stale" | "underperforming",',
201
+ ' "proposedState": "promote-to-core" | "keep-active" | "deprecate" | "archive" | "merge" | "evolve" | "split",',
202
+ ' "rationale": "why this change",',
203
+ ' "mergeTarget": { "id": "...", "name": "..." },',
204
+ ' "evolutionNotes": "optional: what to change in guidance",',
205
+ ' "priority": "high" | "medium" | "low"',
206
+ ' }',
207
+ ' ],',
208
+ ' "insights": ["insight1", "insight2"],',
209
+ ' "actionPlan": ["step1", "step2"]',
210
+ '}',
211
+ '```',
212
+ ].join('\n');
213
+ },
214
+
215
+ getSkills() { return []; },
216
+ getResources() { return []; },
217
+
218
+ outputConfig: {
219
+ files: [
220
+ {
221
+ path: 'lifecycle-review.json',
222
+ format: 'json' as const,
223
+ required: true,
224
+ description: 'Playbook lifecycle review results',
225
+ },
226
+ ],
227
+ },
228
+
229
+ async collectOutput(handle: WorkspaceHandle): Promise<PlaybookLifecycleReviewOutput> {
230
+ const raw = await handle.readJson('output', 'lifecycle-review.json') as Record<string, unknown>;
231
+ const snapshot = await handle.readJson('input', 'portfolio-snapshot.json') as PortfolioSnapshot;
232
+
233
+ const recommendations: LifecycleRecommendation[] = Array.isArray(raw.recommendations)
234
+ ? (raw.recommendations as Record<string, unknown>[]).map(r => {
235
+ const mergeTarget = r.mergeTarget as Record<string, unknown> | undefined;
236
+ return {
237
+ playbookId: String(r.playbookId ?? ''),
238
+ playbookName: String(r.playbookName ?? ''),
239
+ currentState: String(r.currentState ?? 'active') as LifecycleRecommendation['currentState'],
240
+ proposedState: String(r.proposedState ?? 'keep-active') as LifecycleRecommendation['proposedState'],
241
+ rationale: String(r.rationale ?? ''),
242
+ mergeTarget: mergeTarget
243
+ ? { id: String(mergeTarget.id ?? ''), name: String(mergeTarget.name ?? '') }
244
+ : undefined,
245
+ evolutionNotes: r.evolutionNotes ? String(r.evolutionNotes) : undefined,
246
+ priority: String(r.priority ?? 'medium') as LifecycleRecommendation['priority'],
247
+ };
248
+ })
249
+ : [];
250
+
251
+ return {
252
+ snapshot,
253
+ recommendations,
254
+ insights: Array.isArray(raw.insights) ? raw.insights.map(String) : [],
255
+ actionPlan: Array.isArray(raw.actionPlan) ? raw.actionPlan.map(String) : [],
256
+ };
257
+ },
258
+
259
+ async processOutput(): Promise<void> {
260
+ // Caller decides how to act on lifecycle recommendations
261
+ },
262
+
263
+ getComputeRequirements(_input, complexity) {
264
+ return {
265
+ mode: 'local' as const,
266
+ complexity,
267
+ timeout: complexity === 'thorough' ? 240_000 : 120_000,
268
+ };
269
+ },
270
+
271
+ agentType: 'claude-code',
272
+ timeout: 180_000,
273
+ captureToolCalls: true,
274
+ };
@@ -0,0 +1,210 @@
1
+ /**
2
+ * Story-based trajectory fixtures for behavioral tests.
3
+ *
4
+ * Each factory produces a realistic trajectory with semantically meaningful
5
+ * steps — the kind of thing an agent actually does when solving a task.
6
+ */
7
+
8
+ import { createTrajectory, type Trajectory } from '../../src/types/trajectory.js';
9
+ import { createTask } from '../../src/types/task.js';
10
+ import { createStep } from '../../src/types/step.js';
11
+ import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
12
+
13
+ /**
14
+ * Successful "fix broken TypeScript import" trajectory.
15
+ * Steps: read file → identify wrong path → edit import → run tsc → pass.
16
+ */
17
+ export function makeTypescriptImportFix(variant: number): Trajectory {
18
+ const files = ['routes.ts', 'utils.ts', 'handler.ts', 'service.ts', 'controller.ts', 'middleware.ts'];
19
+ const file = files[variant % files.length];
20
+ return createTrajectory({
21
+ task: createTask({
22
+ domain: 'typescript',
23
+ description: `Fix broken TypeScript import in src/${file}`,
24
+ }),
25
+ steps: [
26
+ createStep({
27
+ thought: `Check the import statements in ${file}`,
28
+ action: `ReadFile src/${file}`,
29
+ observation: `import { UserService } from "./services/user"\n// TS2307: Cannot find module './services/user'`,
30
+ }),
31
+ createStep({
32
+ thought: 'The import path is missing the .js extension required by ESM',
33
+ action: `EditFile src/${file}`,
34
+ observation: `Updated: import { UserService } from "./services/user.js"`,
35
+ }),
36
+ createStep({
37
+ thought: 'Verify the fix compiles',
38
+ action: 'Bash tsc --noEmit',
39
+ observation: 'Compilation successful. No errors found.',
40
+ }),
41
+ ],
42
+ outcome: successOutcome(`Fixed ESM import extension in src/${file}`),
43
+ agentId: 'test-agent',
44
+ });
45
+ }
46
+
47
+ /**
48
+ * Successful "fix Python type error" trajectory.
49
+ * Steps: read traceback → identify wrong type → edit function → run pytest → pass.
50
+ */
51
+ export function makePythonTypeFix(variant: number): Trajectory {
52
+ const files = ['api.py', 'models.py', 'views.py', 'serializers.py', 'utils.py'];
53
+ const file = files[variant % files.length];
54
+ return createTrajectory({
55
+ task: createTask({
56
+ domain: 'python',
57
+ description: `Fix Python type error in ${file}`,
58
+ }),
59
+ steps: [
60
+ createStep({
61
+ thought: `Read the failing file ${file}`,
62
+ action: `ReadFile ${file}`,
63
+ observation: `def process_items(items: list[str]) -> int:\n return items.count`,
64
+ }),
65
+ createStep({
66
+ thought: 'The function returns the method reference instead of calling it',
67
+ action: `EditFile ${file}`,
68
+ observation: `Updated: return items.count() # was missing parentheses`,
69
+ }),
70
+ createStep({
71
+ thought: 'Run the tests to verify',
72
+ action: 'Bash pytest -x',
73
+ observation: '5 passed in 0.3s',
74
+ }),
75
+ ],
76
+ outcome: successOutcome(`Fixed missing parentheses in ${file}`),
77
+ agentId: 'test-agent',
78
+ });
79
+ }
80
+
81
+ /**
82
+ * Failed "fix authentication bypass" trajectory.
83
+ * Steps: read auth code → attempt fix → tests still fail.
84
+ */
85
+ export function makeFailedAuthFix(errorDetail: string): Trajectory {
86
+ return createTrajectory({
87
+ task: createTask({
88
+ domain: 'security',
89
+ description: 'Fix authentication bypass vulnerability in auth middleware',
90
+ }),
91
+ steps: [
92
+ createStep({
93
+ thought: 'Read the auth middleware to understand the vulnerability',
94
+ action: 'ReadFile src/middleware/auth.ts',
95
+ observation: 'JWT token validation skips expiry check when token has admin role',
96
+ }),
97
+ createStep({
98
+ thought: 'Add expiry validation for all tokens regardless of role',
99
+ action: 'EditFile src/middleware/auth.ts',
100
+ observation: 'Added: if (isTokenExpired(token)) throw new AuthError("expired")',
101
+ }),
102
+ createStep({
103
+ thought: 'Run the security test suite',
104
+ action: 'Bash npm run test:security',
105
+ observation: `FAIL: ${errorDetail}`,
106
+ }),
107
+ ],
108
+ outcome: failureOutcome(errorDetail),
109
+ agentId: 'test-agent',
110
+ });
111
+ }
112
+
113
+ /**
114
+ * Successful Kubernetes deployment — completely different domain.
115
+ */
116
+ export function makeKubernetesDeploy(): Trajectory {
117
+ return createTrajectory({
118
+ task: createTask({
119
+ domain: 'devops',
120
+ description: 'Deploy updated API service to Kubernetes staging cluster',
121
+ }),
122
+ steps: [
123
+ createStep({
124
+ thought: 'Check current deployment status',
125
+ action: 'Bash kubectl get pods -n staging',
126
+ observation: 'api-service-7f8d9c 1/1 Running 0 12h',
127
+ }),
128
+ createStep({
129
+ thought: 'Apply the new deployment manifest',
130
+ action: 'Bash kubectl apply -f k8s/api-service.yaml',
131
+ observation: 'deployment.apps/api-service configured',
132
+ }),
133
+ createStep({
134
+ thought: 'Wait for rollout to complete',
135
+ action: 'Bash kubectl rollout status deployment/api-service -n staging',
136
+ observation: 'deployment "api-service" successfully rolled out',
137
+ }),
138
+ ],
139
+ outcome: successOutcome('Deployed api-service v2.3.1 to staging'),
140
+ agentId: 'test-agent',
141
+ });
142
+ }
143
+
144
+ /**
145
+ * Inefficient trajectory with many repeated read actions.
146
+ * Useful for testing reflexion critique of wasted steps.
147
+ */
148
+ export function makeInefficient(stepCount: number): Trajectory {
149
+ const steps = [];
150
+ for (let i = 0; i < stepCount; i++) {
151
+ steps.push(
152
+ createStep({
153
+ thought: `Check config again (attempt ${i + 1})`,
154
+ action: 'ReadFile src/config.ts',
155
+ observation: 'export const config = { debug: false, port: 3000 }',
156
+ }),
157
+ );
158
+ }
159
+ // Finally do the actual fix
160
+ steps.push(
161
+ createStep({
162
+ thought: 'After re-reading many times, change the debug flag',
163
+ action: 'EditFile src/config.ts',
164
+ observation: 'Updated: debug: true',
165
+ }),
166
+ );
167
+
168
+ return createTrajectory({
169
+ task: createTask({
170
+ domain: 'typescript',
171
+ description: 'Enable debug mode in application config',
172
+ }),
173
+ steps,
174
+ outcome: failureOutcome('Timed out after too many read operations'),
175
+ agentId: 'test-agent',
176
+ });
177
+ }
178
+
179
+ /**
180
+ * Successful trajectory with a specific solution description,
181
+ * useful for verifying causal edge extraction.
182
+ */
183
+ export function makeSuccessfulWithSolution(
184
+ domain: string,
185
+ description: string,
186
+ solution: string,
187
+ ): Trajectory {
188
+ return createTrajectory({
189
+ task: createTask({ domain, description }),
190
+ steps: [
191
+ createStep({
192
+ thought: 'Analyze the problem',
193
+ action: 'ReadFile src/main.ts',
194
+ observation: 'Found the issue',
195
+ }),
196
+ createStep({
197
+ thought: 'Apply the fix',
198
+ action: 'EditFile src/main.ts',
199
+ observation: 'File updated',
200
+ }),
201
+ createStep({
202
+ thought: 'Verify',
203
+ action: 'Bash npm test',
204
+ observation: 'All tests passed',
205
+ }),
206
+ ],
207
+ outcome: successOutcome(solution),
208
+ agentId: 'test-agent',
209
+ });
210
+ }