cognitive-core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.claude/settings.json +111 -2
  2. package/.sessionlog/settings.json +4 -0
  3. package/dist/index.d.ts +1 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +5 -1
  6. package/dist/index.js.map +1 -1
  7. package/dist/learning/index.d.ts +1 -1
  8. package/dist/learning/index.d.ts.map +1 -1
  9. package/dist/learning/index.js.map +1 -1
  10. package/dist/learning/unified-pipeline.d.ts +30 -0
  11. package/dist/learning/unified-pipeline.d.ts.map +1 -1
  12. package/dist/learning/unified-pipeline.js +207 -0
  13. package/dist/learning/unified-pipeline.js.map +1 -1
  14. package/dist/memory/candidate-retrieval.d.ts.map +1 -1
  15. package/dist/memory/candidate-retrieval.js +3 -1
  16. package/dist/memory/candidate-retrieval.js.map +1 -1
  17. package/dist/utils/error-classifier.js +8 -8
  18. package/dist/utils/error-classifier.js.map +1 -1
  19. package/dist/workspace/efficacy-toolkit.d.ts +164 -0
  20. package/dist/workspace/efficacy-toolkit.d.ts.map +1 -0
  21. package/dist/workspace/efficacy-toolkit.js +281 -0
  22. package/dist/workspace/efficacy-toolkit.js.map +1 -0
  23. package/dist/workspace/index.d.ts +2 -1
  24. package/dist/workspace/index.d.ts.map +1 -1
  25. package/dist/workspace/index.js +3 -1
  26. package/dist/workspace/index.js.map +1 -1
  27. package/dist/workspace/templates/index.d.ts +3 -0
  28. package/dist/workspace/templates/index.d.ts.map +1 -1
  29. package/dist/workspace/templates/index.js +6 -0
  30. package/dist/workspace/templates/index.js.map +1 -1
  31. package/dist/workspace/templates/playbook-decay-detection.d.ts +46 -0
  32. package/dist/workspace/templates/playbook-decay-detection.d.ts.map +1 -0
  33. package/dist/workspace/templates/playbook-decay-detection.js +197 -0
  34. package/dist/workspace/templates/playbook-decay-detection.js.map +1 -0
  35. package/dist/workspace/templates/playbook-efficacy-audit.d.ts +46 -0
  36. package/dist/workspace/templates/playbook-efficacy-audit.d.ts.map +1 -0
  37. package/dist/workspace/templates/playbook-efficacy-audit.js +160 -0
  38. package/dist/workspace/templates/playbook-efficacy-audit.js.map +1 -0
  39. package/dist/workspace/templates/playbook-lifecycle-review.d.ts +51 -0
  40. package/dist/workspace/templates/playbook-lifecycle-review.d.ts.map +1 -0
  41. package/dist/workspace/templates/playbook-lifecycle-review.js +187 -0
  42. package/dist/workspace/templates/playbook-lifecycle-review.js.map +1 -0
  43. package/package.json +7 -1
  44. package/src/index.ts +27 -0
  45. package/src/learning/index.ts +1 -0
  46. package/src/learning/unified-pipeline.ts +271 -1
  47. package/src/memory/candidate-retrieval.ts +2 -1
  48. package/src/utils/error-classifier.ts +8 -8
  49. package/src/workspace/efficacy-toolkit.ts +496 -0
  50. package/src/workspace/index.ts +29 -0
  51. package/src/workspace/templates/index.ts +24 -0
  52. package/src/workspace/templates/playbook-decay-detection.ts +272 -0
  53. package/src/workspace/templates/playbook-efficacy-audit.ts +246 -0
  54. package/src/workspace/templates/playbook-lifecycle-review.ts +274 -0
  55. package/tests/fixtures/behavioral-trajectories.ts +210 -0
  56. package/tests/integration/pipeline-data-correctness.test.ts +794 -0
  57. package/tests/learning/meta-learner.test.ts +418 -0
  58. package/tests/learning/pipeline-memory-updates.test.ts +721 -0
  59. package/tests/learning/unified-pipeline-efficacy.test.ts +232 -0
  60. package/tests/memory/candidate-retrieval.test.ts +167 -0
  61. package/tests/memory/meta.test.ts +399 -0
  62. package/tests/search/evaluator.test.ts +257 -0
  63. package/tests/search/verification-runner.test.ts +357 -0
  64. package/tests/utils/error-classifier.test.ts +149 -0
  65. package/tests/utils/trajectory-helpers.test.ts +163 -0
  66. package/tests/workspace/efficacy-toolkit.test.ts +404 -0
  67. package/tests/workspace/templates/playbook-efficacy.test.ts +377 -0
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Playbook Decay Detection Template
3
+ *
4
+ * Portfolio-wide scan for playbooks showing declining efficacy.
5
+ * Uses temporal trend analysis to identify decay early, before
6
+ * playbooks accumulate enough failures to trigger confidence drops.
7
+ *
8
+ * The agent reviews pre-computed trends and cross-references with
9
+ * failure patterns to determine whether decay is real (environment
10
+ * changed) or noise (small sample size).
11
+ */
12
+
13
+ import type { WorkspaceHandle } from 'agent-workspace';
14
+ import type { Playbook } from '../../types/index.js';
15
+ import type { TaskAnnotation, PlaybookEffectivenessEntry } from '../../learning/effectiveness.js';
16
+ import type {
17
+ AgenticTaskTemplate,
18
+ AnalysisComplexity,
19
+ } from '../types.js';
20
+ import {
21
+ computeTemporalTrend,
22
+ computeNormalizedGain,
23
+ type TemporalTrend,
24
+ } from '../efficacy-toolkit.js';
25
+ import { getPlaybookSuccessRate } from '../../types/index.js';
26
+
27
+ // ============================================================
28
+ // Input / Output Types
29
+ // ============================================================
30
+
31
+ export interface PlaybookDecayDetectionInput {
32
+ playbooks: Playbook[];
33
+ annotations: TaskAnnotation[];
34
+ playbookEffectiveness: PlaybookEffectivenessEntry[];
35
+ unguidedSuccessRate: number;
36
+ }
37
+
38
+ export interface PlaybookDecayDetectionOutput {
39
+ /** Playbooks with detected decay signals */
40
+ decaying: DecaySignal[];
41
+ /** Playbooks at risk of decay (early warning) */
42
+ atRisk: DecaySignal[];
43
+ /** Playbooks confirmed stable or improving */
44
+ healthy: string[];
45
+ /** Agent's overall assessment */
46
+ portfolioAssessment: string;
47
+ }
48
+
49
+ export interface DecaySignal {
50
+ playbookId: string;
51
+ playbookName: string;
52
+ trend: TemporalTrend;
53
+ /** Agent's explanation of why this playbook is decaying */
54
+ diagnosis: string;
55
+ /** Recommended action */
56
+ recommendation: 'investigate' | 'refine' | 'deprecate' | 'monitor';
57
+ /** Severity: how urgently this needs attention */
58
+ severity: 'critical' | 'warning' | 'info';
59
+ /** Supporting evidence */
60
+ evidence: string[];
61
+ }
62
+
63
+ // ============================================================
64
+ // Template Implementation
65
+ // ============================================================
66
+
67
+ export const playbookDecayDetectionTemplate: AgenticTaskTemplate<
68
+ PlaybookDecayDetectionInput,
69
+ PlaybookDecayDetectionOutput
70
+ > = {
71
+ taskType: 'playbook-decay-detection',
72
+ domain: 'meta-learning',
73
+ description: 'Detect declining efficacy across the playbook portfolio',
74
+
75
+ assessComplexity(input: PlaybookDecayDetectionInput): AnalysisComplexity {
76
+ if (input.playbooks.length === 0) return 'heuristic';
77
+ if (input.annotations.length < 10) return 'heuristic';
78
+ if (input.playbooks.length > 20) return 'standard';
79
+ return 'lightweight';
80
+ },
81
+
82
+ async heuristicFallback(input: PlaybookDecayDetectionInput): Promise<PlaybookDecayDetectionOutput> {
83
+ const decaying: DecaySignal[] = [];
84
+ const atRisk: DecaySignal[] = [];
85
+ const healthy: string[] = [];
86
+
87
+ for (const pb of input.playbooks) {
88
+ const trend = computeTemporalTrend(input.annotations, pb.id);
89
+
90
+ if (trend.dataPoints < 4) {
91
+ healthy.push(pb.id);
92
+ continue;
93
+ }
94
+
95
+ if (trend.direction === 'decaying') {
96
+ decaying.push({
97
+ playbookId: pb.id,
98
+ playbookName: pb.name,
99
+ trend,
100
+ diagnosis: `Success rate declining from ${(trend.oldestSuccessRate * 100).toFixed(0)}% to ${(trend.recentSuccessRate * 100).toFixed(0)}%`,
101
+ recommendation: trend.slope < -0.15 ? 'refine' : 'monitor',
102
+ severity: trend.slope < -0.15 ? 'warning' : 'info',
103
+ evidence: [`Trend slope: ${trend.slope.toFixed(3)}`],
104
+ });
105
+ } else if (trend.daysSinceLastUse !== null && trend.daysSinceLastUse > 30) {
106
+ atRisk.push({
107
+ playbookId: pb.id,
108
+ playbookName: pb.name,
109
+ trend,
110
+ diagnosis: `No usage in ${Math.round(trend.daysSinceLastUse)} days — may be going stale`,
111
+ recommendation: 'investigate',
112
+ severity: 'info',
113
+ evidence: [`Last used ${Math.round(trend.daysSinceLastUse)} days ago`],
114
+ });
115
+ } else {
116
+ healthy.push(pb.id);
117
+ }
118
+ }
119
+
120
+ return {
121
+ decaying,
122
+ atRisk,
123
+ healthy,
124
+ portfolioAssessment: `${decaying.length} decaying, ${atRisk.length} at risk, ${healthy.length} healthy.`,
125
+ };
126
+ },
127
+
128
+ async prepareWorkspace(
129
+ input: PlaybookDecayDetectionInput,
130
+ handle: WorkspaceHandle
131
+ ): Promise<void> {
132
+ const effectivenessMap = new Map(
133
+ input.playbookEffectiveness.map(e => [e.playbookId, e])
134
+ );
135
+
136
+ // Pre-compute trends for all playbooks
137
+ const trendData = input.playbooks.map(pb => {
138
+ const trend = computeTemporalTrend(input.annotations, pb.id);
139
+ const eff = effectivenessMap.get(pb.id);
140
+ const successRate = getPlaybookSuccessRate(pb);
141
+ const appliedRate = eff?.appliedSuccessRate ?? successRate;
142
+ const normalizedGain = computeNormalizedGain(appliedRate, input.unguidedSuccessRate);
143
+
144
+ return {
145
+ playbookId: pb.id,
146
+ playbookName: pb.name,
147
+ confidence: pb.confidence,
148
+ successRate,
149
+ normalizedGain,
150
+ totalUses: pb.evolution.successCount + pb.evolution.failureCount,
151
+ domains: pb.applicability.domains,
152
+ trend,
153
+ recentFailureModes: pb.evolution.failures.slice(-3).map(f => f.failureMode),
154
+ adoptionRate: eff && eff.surfacedCount > 0
155
+ ? eff.appliedCount / eff.surfacedCount
156
+ : null,
157
+ };
158
+ });
159
+
160
+ await handle.writeJson('input', 'playbook-trends.json', trendData);
161
+
162
+ // Summary stats
163
+ await handle.writeJson('input', 'summary.json', {
164
+ totalPlaybooks: input.playbooks.length,
165
+ totalAnnotations: input.annotations.length,
166
+ unguidedSuccessRate: input.unguidedSuccessRate,
167
+ decayingCount: trendData.filter(t => t.trend.direction === 'decaying').length,
168
+ improvingCount: trendData.filter(t => t.trend.direction === 'improving').length,
169
+ stableCount: trendData.filter(t => t.trend.direction === 'stable').length,
170
+ });
171
+ },
172
+
173
+ buildTaskPrompt(input: PlaybookDecayDetectionInput): string {
174
+ return [
175
+ `Analyze ${input.playbooks.length} playbooks for signs of declining efficacy.`,
176
+ '',
177
+ 'Read:',
178
+ '- input/playbook-trends.json — Pre-computed temporal trends, normalized gains, and failure modes for each playbook',
179
+ '- input/summary.json — Portfolio summary statistics',
180
+ '',
181
+ 'For each playbook showing negative trends or staleness:',
182
+ '1. Is the decay real or just noise from small sample size?',
183
+ '2. What might be causing the decline? (environment changes, scope mismatch, guidance outdated)',
184
+ '3. What action should be taken?',
185
+ '',
186
+ 'Also flag playbooks that aren\'t decaying yet but show early warning signs:',
187
+ '- Declining adoption rate (agents stop choosing to use it)',
188
+ '- Increasing failure modes diversity (suggests scope drift)',
189
+ '- High normalized gain but low confidence (fragile effectiveness)',
190
+ '',
191
+ 'Write results to output/decay-report.json:',
192
+ '```json',
193
+ '{',
194
+ ' "decaying": [',
195
+ ' {',
196
+ ' "playbookId": "id",',
197
+ ' "playbookName": "name",',
198
+ ' "diagnosis": "why this is decaying",',
199
+ ' "recommendation": "investigate" | "refine" | "deprecate" | "monitor",',
200
+ ' "severity": "critical" | "warning" | "info",',
201
+ ' "evidence": ["supporting facts"]',
202
+ ' }',
203
+ ' ],',
204
+ ' "atRisk": [same structure],',
205
+ ' "healthy": ["playbookId1", "playbookId2"],',
206
+ ' "portfolioAssessment": "2-3 sentence overall assessment"',
207
+ '}',
208
+ '```',
209
+ ].join('\n');
210
+ },
211
+
212
+ getSkills() { return []; },
213
+ getResources() { return []; },
214
+
215
+ outputConfig: {
216
+ files: [
217
+ {
218
+ path: 'decay-report.json',
219
+ format: 'json' as const,
220
+ required: true,
221
+ description: 'Playbook decay detection results',
222
+ },
223
+ ],
224
+ },
225
+
226
+ async collectOutput(handle: WorkspaceHandle): Promise<PlaybookDecayDetectionOutput> {
227
+ const raw = await handle.readJson('output', 'decay-report.json') as Record<string, unknown>;
228
+
229
+ // Also read back trend data for full DecaySignal objects
230
+ const trendData = await handle.readJson('input', 'playbook-trends.json') as Array<{
231
+ playbookId: string;
232
+ trend: TemporalTrend;
233
+ }>;
234
+ const trendMap = new Map(trendData.map(t => [t.playbookId, t.trend]));
235
+
236
+ const parseSignals = (items: unknown): DecaySignal[] => {
237
+ if (!Array.isArray(items)) return [];
238
+ return (items as Record<string, unknown>[]).map(item => ({
239
+ playbookId: String(item.playbookId ?? ''),
240
+ playbookName: String(item.playbookName ?? ''),
241
+ trend: trendMap.get(String(item.playbookId ?? '')) ?? {
242
+ slope: 0, direction: 'stable' as const, dataPoints: 0,
243
+ recentSuccessRate: 0, oldestSuccessRate: 0, daysSinceLastUse: null,
244
+ },
245
+ diagnosis: String(item.diagnosis ?? ''),
246
+ recommendation: String(item.recommendation ?? 'monitor') as DecaySignal['recommendation'],
247
+ severity: String(item.severity ?? 'info') as DecaySignal['severity'],
248
+ evidence: Array.isArray(item.evidence) ? item.evidence.map(String) : [],
249
+ }));
250
+ };
251
+
252
+ return {
253
+ decaying: parseSignals(raw.decaying),
254
+ atRisk: parseSignals(raw.atRisk),
255
+ healthy: Array.isArray(raw.healthy) ? raw.healthy.map(String) : [],
256
+ portfolioAssessment: String(raw.portfolioAssessment ?? ''),
257
+ };
258
+ },
259
+
260
+ async processOutput(): Promise<void> {
261
+ // Caller decides how to act on decay signals
262
+ },
263
+
264
+ computeRequirements: {
265
+ mode: 'local',
266
+ complexity: 'lightweight',
267
+ },
268
+
269
+ agentType: 'claude-code',
270
+ timeout: 120_000,
271
+ captureToolCalls: true,
272
+ };
@@ -0,0 +1,246 @@
1
+ /**
2
+ * Playbook Efficacy Audit Template
3
+ *
4
+ * Deep-dive analysis of a single playbook's effectiveness.
5
+ * Uses the efficacy toolkit to pre-compute metrics, then asks an agent
6
+ * to synthesize findings, identify root causes, and recommend actions.
7
+ *
8
+ * Heuristic fallback: for playbooks with very few data points, return
9
+ * a summary without agent analysis.
10
+ */
11
+
12
+ import type { WorkspaceHandle } from 'agent-workspace';
13
+ import type { Playbook } from '../../types/index.js';
14
+ import type { TaskAnnotation, PlaybookEffectivenessEntry } from '../../learning/effectiveness.js';
15
+ import type {
16
+ AgenticTaskTemplate,
17
+ AnalysisComplexity,
18
+ } from '../types.js';
19
+ import {
20
+ buildEfficacyProfile,
21
+ type PlaybookEfficacyProfile,
22
+ } from '../efficacy-toolkit.js';
23
+
24
+ // ============================================================
25
+ // Input / Output Types
26
+ // ============================================================
27
+
28
+ export interface PlaybookEfficacyAuditInput {
29
+ playbook: Playbook;
30
+ annotations: TaskAnnotation[];
31
+ playbookEffectiveness: PlaybookEffectivenessEntry | undefined;
32
+ unguidedSuccessRate: number;
33
+ /** Map of trajectoryId → domain for per-domain breakdown */
34
+ trajectoryDomainMap: Map<string, string>;
35
+ }
36
+
37
+ export interface PlaybookEfficacyAuditOutput {
38
+ /** Pre-computed profile (from toolkit) */
39
+ profile: PlaybookEfficacyProfile;
40
+ /** Agent's synthesis and recommendations */
41
+ assessment: {
42
+ /** Overall health rating */
43
+ health: 'healthy' | 'at-risk' | 'underperforming' | 'insufficient-data';
44
+ /** Root cause analysis for any issues */
45
+ rootCauses: string[];
46
+ /** Specific actionable recommendations */
47
+ recommendations: PlaybookRecommendation[];
48
+ /** Brief narrative summary */
49
+ summary: string;
50
+ };
51
+ }
52
+
53
+ export interface PlaybookRecommendation {
54
+ action: 'refine-guidance' | 'add-anti-pattern' | 'narrow-scope' | 'broaden-scope'
55
+ | 'deprecate' | 'merge-with' | 'split' | 'no-change';
56
+ description: string;
57
+ priority: 'high' | 'medium' | 'low';
58
+ /** For merge-with: target playbook name */
59
+ targetPlaybook?: string;
60
+ }
61
+
62
+ // ============================================================
63
+ // Template Implementation
64
+ // ============================================================
65
+
66
+ export const playbookEfficacyAuditTemplate: AgenticTaskTemplate<
67
+ PlaybookEfficacyAuditInput,
68
+ PlaybookEfficacyAuditOutput
69
+ > = {
70
+ taskType: 'playbook-efficacy-audit',
71
+ domain: 'meta-learning',
72
+ description: 'Deep-dive efficacy analysis of a single playbook with recommendations',
73
+
74
+ assessComplexity(input: PlaybookEfficacyAuditInput): AnalysisComplexity {
75
+ const totalUses = input.playbook.evolution.successCount + input.playbook.evolution.failureCount;
76
+ if (totalUses < 3) return 'heuristic';
77
+ if (totalUses < 10 && input.playbook.evolution.failures.length === 0) return 'heuristic';
78
+ if (totalUses > 20 || input.playbook.evolution.failures.length > 3) return 'standard';
79
+ return 'lightweight';
80
+ },
81
+
82
+ async heuristicFallback(input: PlaybookEfficacyAuditInput): Promise<PlaybookEfficacyAuditOutput> {
83
+ const profile = buildEfficacyProfile(
84
+ input.playbook,
85
+ input.annotations,
86
+ input.playbookEffectiveness,
87
+ input.unguidedSuccessRate,
88
+ input.trajectoryDomainMap,
89
+ );
90
+
91
+ return {
92
+ profile,
93
+ assessment: {
94
+ health: profile.totalUses < 3 ? 'insufficient-data' : (
95
+ profile.successRate >= 0.7 ? 'healthy' :
96
+ profile.successRate >= 0.4 ? 'at-risk' : 'underperforming'
97
+ ),
98
+ rootCauses: [],
99
+ recommendations: profile.totalUses < 3
100
+ ? [{ action: 'no-change', description: 'Insufficient data for assessment', priority: 'low' }]
101
+ : [],
102
+ summary: profile.totalUses < 3
103
+ ? `${profile.playbookName} has only ${profile.totalUses} uses — not enough for reliable assessment.`
104
+ : `${profile.playbookName}: ${(profile.successRate * 100).toFixed(0)}% success rate, normalized gain ${profile.normalizedGain.toFixed(2)}.`,
105
+ },
106
+ };
107
+ },
108
+
109
+ async prepareWorkspace(
110
+ input: PlaybookEfficacyAuditInput,
111
+ handle: WorkspaceHandle
112
+ ): Promise<void> {
113
+ const profile = buildEfficacyProfile(
114
+ input.playbook,
115
+ input.annotations,
116
+ input.playbookEffectiveness,
117
+ input.unguidedSuccessRate,
118
+ input.trajectoryDomainMap,
119
+ );
120
+
121
+ // Pre-computed efficacy profile — the core data the agent analyzes
122
+ await handle.writeJson('input', 'efficacy-profile.json', profile);
123
+
124
+ // Full playbook definition for context
125
+ await handle.writeJson('input', 'playbook.json', {
126
+ id: input.playbook.id,
127
+ name: input.playbook.name,
128
+ applicability: input.playbook.applicability,
129
+ guidance: input.playbook.guidance,
130
+ verification: input.playbook.verification,
131
+ evolution: {
132
+ version: input.playbook.evolution.version,
133
+ failures: input.playbook.evolution.failures.slice(-5),
134
+ refinements: input.playbook.evolution.refinements,
135
+ successCount: input.playbook.evolution.successCount,
136
+ failureCount: input.playbook.evolution.failureCount,
137
+ },
138
+ confidence: input.playbook.confidence,
139
+ complexity: input.playbook.complexity,
140
+ });
141
+
142
+ // Recent trajectory annotations for this playbook
143
+ const relevantAnnotations = input.annotations
144
+ .filter(a =>
145
+ a.knowledgeSurfaced.playbookIds.includes(input.playbook.id)
146
+ )
147
+ .slice(-20)
148
+ .map(a => ({
149
+ trajectoryId: a.trajectoryId,
150
+ success: a.outcome.success,
151
+ stepCount: a.outcome.stepCount,
152
+ errorRecoveries: a.outcome.errorRecoveries,
153
+ wasApplied: a.knowledgeApplied.playbookIdsUsed.includes(input.playbook.id),
154
+ reflection: a.reflection,
155
+ timestamp: a.timestamp,
156
+ }));
157
+
158
+ await handle.writeJson('input', 'recent-annotations.json', relevantAnnotations);
159
+ },
160
+
161
+ buildTaskPrompt(input: PlaybookEfficacyAuditInput): string {
162
+ return [
163
+ `Evaluate the efficacy of playbook "${input.playbook.name}".`,
164
+ '',
165
+ 'Read the following input files:',
166
+ '- input/efficacy-profile.json — Pre-computed metrics (normalized gain, temporal trend, domain breakdown, usage profile)',
167
+ '- input/playbook.json — The playbook definition (guidance, verification, evolution history)',
168
+ '- input/recent-annotations.json — Recent trajectory outcomes when this playbook was surfaced',
169
+ '',
170
+ 'Analyze:',
171
+ '1. Is this playbook providing marginal value above the unguided baseline? (check normalizedGain)',
172
+ '2. Is efficacy trending up, stable, or decaying? (check temporalTrend)',
173
+ '3. Does it work equally well across all domains, or is it domain-specific?',
174
+ '4. Are agents actually adopting it when surfaced? (check usage.adoptionRate)',
175
+ '5. Are there recurring failure modes that suggest guidance needs updating?',
176
+ '',
177
+ 'Write your assessment to output/audit.json:',
178
+ '```json',
179
+ '{',
180
+ ' "health": "healthy" | "at-risk" | "underperforming" | "insufficient-data",',
181
+ ' "rootCauses": ["reason1", "reason2"],',
182
+ ' "recommendations": [',
183
+ ' {',
184
+ ' "action": "refine-guidance" | "add-anti-pattern" | "narrow-scope" | "broaden-scope" | "deprecate" | "merge-with" | "split" | "no-change",',
185
+ ' "description": "specific actionable recommendation",',
186
+ ' "priority": "high" | "medium" | "low",',
187
+ ' "targetPlaybook": "optional: for merge-with actions"',
188
+ ' }',
189
+ ' ],',
190
+ ' "summary": "2-3 sentence narrative summary"',
191
+ '}',
192
+ '```',
193
+ ].join('\n');
194
+ },
195
+
196
+ getSkills() { return []; },
197
+ getResources() { return []; },
198
+
199
+ outputConfig: {
200
+ files: [
201
+ {
202
+ path: 'audit.json',
203
+ format: 'json' as const,
204
+ required: true,
205
+ description: 'Playbook efficacy audit results',
206
+ },
207
+ ],
208
+ },
209
+
210
+ async collectOutput(handle: WorkspaceHandle): Promise<PlaybookEfficacyAuditOutput> {
211
+ const raw = await handle.readJson('output', 'audit.json') as Record<string, unknown>;
212
+ const profile = await handle.readJson('input', 'efficacy-profile.json') as PlaybookEfficacyProfile;
213
+
214
+ const recommendations: PlaybookRecommendation[] = Array.isArray(raw.recommendations)
215
+ ? (raw.recommendations as Record<string, unknown>[]).map(r => ({
216
+ action: String(r.action ?? 'no-change') as PlaybookRecommendation['action'],
217
+ description: String(r.description ?? ''),
218
+ priority: String(r.priority ?? 'medium') as PlaybookRecommendation['priority'],
219
+ targetPlaybook: r.targetPlaybook ? String(r.targetPlaybook) : undefined,
220
+ }))
221
+ : [];
222
+
223
+ return {
224
+ profile,
225
+ assessment: {
226
+ health: String(raw.health ?? 'insufficient-data') as PlaybookEfficacyAuditOutput['assessment']['health'],
227
+ rootCauses: Array.isArray(raw.rootCauses) ? raw.rootCauses.map(String) : [],
228
+ recommendations,
229
+ summary: String(raw.summary ?? ''),
230
+ },
231
+ };
232
+ },
233
+
234
+ async processOutput(): Promise<void> {
235
+ // Caller decides how to act on recommendations
236
+ },
237
+
238
+ computeRequirements: {
239
+ mode: 'local',
240
+ complexity: 'lightweight',
241
+ },
242
+
243
+ agentType: 'claude-code',
244
+ timeout: 120_000,
245
+ captureToolCalls: true,
246
+ };