pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +183 -0
  2. package/README.md +17 -1
  3. package/docs/architecture.md +2 -0
  4. package/docs/bugs/cross-session-notification-leakage.md +82 -0
  5. package/docs/coding-agent-optimization.md +268 -0
  6. package/docs/deep-review-report.md +384 -0
  7. package/docs/distillation/cybersecurity-patterns.md +294 -0
  8. package/docs/migration-v0.4-v0.5.md +208 -0
  9. package/docs/optimization-plan.md +642 -0
  10. package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
  11. package/docs/pi-mono-opportunities.md +969 -0
  12. package/docs/pi-mono-review.md +291 -0
  13. package/docs/skills/REFERENCE.md +144 -0
  14. package/package.json +12 -9
  15. package/skills/artifact-analysis-loop/SKILL.md +302 -0
  16. package/skills/async-worker-recovery/SKILL.md +19 -1
  17. package/skills/child-pi-spawning/SKILL.md +19 -6
  18. package/skills/context-artifact-hygiene/SKILL.md +19 -2
  19. package/skills/delegation-patterns/SKILL.md +68 -3
  20. package/skills/detection-pipeline-design/SKILL.md +285 -0
  21. package/skills/event-log-tracing/SKILL.md +20 -6
  22. package/skills/git-master/SKILL.md +20 -6
  23. package/skills/hunting-investigation-loop/SKILL.md +401 -0
  24. package/skills/incident-playbook-construction/SKILL.md +383 -0
  25. package/skills/live-agent-lifecycle/SKILL.md +20 -6
  26. package/skills/mailbox-interactive/SKILL.md +19 -6
  27. package/skills/model-routing-context/SKILL.md +19 -1
  28. package/skills/multi-perspective-review/SKILL.md +19 -4
  29. package/skills/observability-reliability/SKILL.md +19 -2
  30. package/skills/orchestration/SKILL.md +20 -2
  31. package/skills/ownership-session-security/SKILL.md +20 -2
  32. package/skills/pi-extension-lifecycle/SKILL.md +20 -2
  33. package/skills/post-mortem/SKILL.md +7 -2
  34. package/skills/read-only-explorer/SKILL.md +20 -6
  35. package/skills/requirements-to-task-packet/SKILL.md +23 -3
  36. package/skills/resource-discovery-config/SKILL.md +20 -2
  37. package/skills/runtime-state-reader/SKILL.md +20 -2
  38. package/skills/safe-bash/SKILL.md +21 -6
  39. package/skills/scrutinize/SKILL.md +20 -2
  40. package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
  41. package/skills/security-review/SKILL.md +560 -0
  42. package/skills/state-mutation-locking/SKILL.md +22 -2
  43. package/skills/systematic-debugging/SKILL.md +8 -6
  44. package/skills/threat-hypothesis-framework/SKILL.md +175 -0
  45. package/skills/ui-render-performance/SKILL.md +20 -2
  46. package/skills/verification-before-done/SKILL.md +17 -2
  47. package/skills/widget-rendering/SKILL.md +21 -6
  48. package/skills/workspace-isolation/SKILL.md +20 -6
  49. package/skills/worktree-isolation/SKILL.md +20 -6
  50. package/src/agents/agent-config.ts +40 -1
  51. package/src/benchmark/benchmark-runner.ts +45 -0
  52. package/src/benchmark/feedback-loop.ts +5 -0
  53. package/src/config/config.ts +32 -5
  54. package/src/config/role-tools.ts +82 -0
  55. package/src/config/suggestions.ts +8 -0
  56. package/src/config/types.ts +4 -0
  57. package/src/extension/async-notifier.ts +10 -1
  58. package/src/extension/crew-cleanup.ts +114 -0
  59. package/src/extension/cross-extension-rpc.ts +1 -1
  60. package/src/extension/notification-router.ts +18 -0
  61. package/src/extension/register.ts +27 -19
  62. package/src/extension/registration/subagent-tools.ts +1 -1
  63. package/src/extension/team-tool/anchor.ts +201 -0
  64. package/src/extension/team-tool/api.ts +2 -1
  65. package/src/extension/team-tool/auto-summarize.ts +154 -0
  66. package/src/extension/team-tool/run.ts +42 -7
  67. package/src/extension/team-tool.ts +44 -2
  68. package/src/hooks/registry.ts +1 -3
  69. package/src/observability/event-bus.ts +69 -0
  70. package/src/observability/event-to-metric.ts +0 -2
  71. package/src/runtime/anchor-manager.ts +473 -0
  72. package/src/runtime/async-runner.ts +8 -4
  73. package/src/runtime/auto-summarize.ts +350 -0
  74. package/src/runtime/background-runner.ts +10 -3
  75. package/src/runtime/budget-tracker.ts +354 -0
  76. package/src/runtime/chain-runner.ts +507 -0
  77. package/src/runtime/child-pi.ts +123 -35
  78. package/src/runtime/crash-recovery.ts +5 -4
  79. package/src/runtime/crew-agent-runtime.ts +1 -0
  80. package/src/runtime/custom-tools/irc-tool.ts +13 -0
  81. package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
  82. package/src/runtime/delivery-coordinator.ts +10 -3
  83. package/src/runtime/dynamic-script-runner.ts +482 -0
  84. package/src/runtime/foreground-control.ts +87 -17
  85. package/src/runtime/handoff-manager.ts +589 -0
  86. package/src/runtime/hidden-handoff.ts +424 -0
  87. package/src/runtime/live-agent-manager.ts +20 -4
  88. package/src/runtime/live-session-runtime.ts +39 -4
  89. package/src/runtime/manifest-cache.ts +2 -1
  90. package/src/runtime/model-resolver.ts +16 -4
  91. package/src/runtime/phase-tracker.ts +373 -0
  92. package/src/runtime/pi-args.ts +11 -1
  93. package/src/runtime/pi-json-output.ts +31 -0
  94. package/src/runtime/pipeline-runner.ts +514 -0
  95. package/src/runtime/progress-tracker.ts +124 -0
  96. package/src/runtime/retry-runner.ts +354 -0
  97. package/src/runtime/sandbox.ts +252 -0
  98. package/src/runtime/scheduler.ts +7 -2
  99. package/src/runtime/skill-effectiveness.ts +473 -0
  100. package/src/runtime/skill-instructions.ts +37 -3
  101. package/src/runtime/subagent-manager.ts +1 -1
  102. package/src/runtime/task-graph.ts +11 -1
  103. package/src/runtime/task-runner.ts +92 -18
  104. package/src/runtime/team-runner.ts +13 -12
  105. package/src/runtime/tool-progress.ts +10 -3
  106. package/src/runtime/verification-gates.ts +367 -0
  107. package/src/schema/team-tool-schema.ts +37 -0
  108. package/src/skills/discover-skills.ts +5 -0
  109. package/src/state/active-run-registry.ts +9 -2
  110. package/src/state/contracts.ts +9 -0
  111. package/src/state/crew-init.ts +3 -3
  112. package/src/state/decision-ledger.ts +98 -55
  113. package/src/state/event-log-rotation.ts +2 -2
  114. package/src/state/event-log.ts +144 -10
  115. package/src/state/hook-instinct-bridge.ts +5 -5
  116. package/src/state/mailbox.ts +10 -0
  117. package/src/state/run-cache.ts +18 -8
  118. package/src/state/state-store.ts +3 -1
  119. package/src/state/types.ts +4 -0
  120. package/src/tools/safe-bash-extension.ts +1 -0
  121. package/src/tools/safe-bash.ts +152 -20
  122. package/src/types/new-api-types.ts +34 -0
  123. package/src/ui/agent-management-overlay.ts +5 -1
  124. package/src/ui/crew-widget.ts +29 -15
  125. package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
  126. package/src/ui/powerbar-publisher.ts +101 -7
  127. package/src/ui/tool-render.ts +15 -15
  128. package/src/ui/transcript-cache.ts +13 -0
  129. package/src/utils/bm25-search.ts +16 -8
  130. package/src/utils/env-filter.ts +8 -5
  131. package/src/utils/redaction.ts +169 -15
  132. package/src/utils/session-utils.ts +52 -0
  133. package/src/utils/sse-parser.ts +10 -1
  134. package/src/worktree/cleanup.ts +6 -1
  135. package/src/worktree/worktree-manager.ts +32 -13
  136. package/workflows/chain.workflow.md +252 -0
  137. package/workflows/pipeline.workflow.md +27 -0
@@ -0,0 +1,473 @@
1
+ /**
2
+ * Skill Effectiveness — ECC INSTINCT/CONFIDENCE Pattern Implementation
3
+ *
4
+ * Implements confidence-weighted skill activation based on ECC's instinct system.
5
+ * Tracks skill activation success and adjusts confidence scores.
6
+ *
7
+ * Based on: docs/distillation/ECC-hooks-instincts.md §2-3 (instinct system, confidence thresholds)
8
+ * Based on: docs/distillation/ECC-10-skills.md §8 (continuous-learning-v2)
9
+ *
10
+ * @module skill-effectiveness
11
+ */
12
+
13
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
14
+ import { dirname, join } from "path";
15
+ import { crewHooks } from "./crew-hooks.ts";
16
+
17
+ /**
18
+ * Confidence thresholds per ECC instinct system.
19
+ * Skills below 0.3 threshold are considered tentative and not enforced.
20
+ */
21
+ export const CONFIDENCE_THRESHOLDS = {
22
+ TENTATIVE: 0.3, // Suggested but not enforced
23
+ MODERATE: 0.5, // Applied when relevant
24
+ STRONG: 0.7, // Auto-approved for application
25
+ NEAR_CERTAIN: 0.9, // Core behavior
26
+ } as const;
27
+
28
+ /**
29
+ * Initial confidence by observation frequency.
30
+ * From ECC instinct system: 1-2 observations → 0.3, 3-5 → 0.5, etc.
31
+ */
32
+ export const INITIAL_CONFIDENCE_BY_FREQUENCY: Record<string, number> = {
33
+ "1": 0.3, // 1 observation → tentative
34
+ "2": 0.3, // 2 observations → tentative
35
+ "3": 0.5, // 3 observations → moderate
36
+ "4": 0.5,
37
+ "5": 0.5,
38
+ "6": 0.7, // 6-10 observations → strong
39
+ "7": 0.7,
40
+ "8": 0.7,
41
+ "9": 0.7,
42
+ "10": 0.7,
43
+ "11+": 0.85, // 11+ observations → very strong
44
+ } as const;
45
+
46
+ /**
47
+ * Confidence adjustments per ECC instinct system.
48
+ */
49
+ export const CONFIDENCE_ADJUSTMENTS = {
50
+ CONFIRMING: 0.05, // Each confirming observation
51
+ CONTRADICTING: -0.1, // Each contradicting observation
52
+ DECAY_PER_WEEK: -0.02, // Per week without observation
53
+ } as const;
54
+
55
+ /**
56
+ * Promotion gate criteria for skills.
57
+ * Skill can be promoted to "strong enforcement" when these are met.
58
+ */
59
+ export const PROMOTION_GATE_CRITERIA = {
60
+ MIN_CORRECTNESS: 0.8, // 80% pass rate
61
+ MIN_ACTIVATIONS: 5, // Minimum observations before filtering
62
+ MIN_AVG_CONFIDENCE: 0.7, // Average confidence threshold
63
+ } as const;
64
+
65
+ /**
66
+ * Skill activation record - captures each time a skill is used.
67
+ */
68
+ export interface SkillActivation {
69
+ id: string; // Unique activation ID
70
+ skillId: string; // Skill identifier (e.g., "verification-before-done")
71
+ role: string; // Role that activated the skill
72
+ runId: string; // Run ID
73
+ taskId: string; // Task ID
74
+ timestamp: string; // ISO timestamp
75
+ passed: boolean; // Whether the skill was successfully applied
76
+ outcome?: string; // Optional outcome description
77
+ confidence: number; // Confidence at time of activation
78
+ }
79
+
80
+ /**
81
+ * Skill metrics - aggregated statistics for a skill.
82
+ */
83
+ export interface SkillMetrics {
84
+ skillId: string;
85
+ totalActivations: number;
86
+ passedActivations: number;
87
+ failedActivations: number;
88
+ passRate: number; // passed / total
89
+ avgConfidence: number; // Rolling average confidence
90
+ currentConfidence: number; // Current confidence score
91
+ trend: "improving" | "stable" | "declining";
92
+ lastActivation?: string; // ISO timestamp
93
+ firstActivation?: string; // ISO timestamp
94
+ roleBreakdown: Record<string, number>; // Activations per role
95
+ }
96
+
97
+ /**
98
+ * Confidence-weighted skill entry for activation decisions.
99
+ */
100
+ export interface WeightedSkill {
101
+ skillId: string;
102
+ confidence: number;
103
+ threshold: keyof typeof CONFIDENCE_THRESHOLDS;
104
+ behavior: "suggest" | "apply_if_asked" | "apply_auto" | "act_autonomous";
105
+ evidence: string; // Evidence for confidence score
106
+ metrics: SkillMetrics;
107
+ }
108
+
109
+ /**
110
+ * Get skill effectiveness storage path.
111
+ */
112
+ function getSkillMetricsPath(runId: string): string {
113
+ return join(
114
+ process.cwd(),
115
+ `.crew/state/runs/${runId}/skill-metrics.jsonl`,
116
+ );
117
+ }
118
+
119
+ /**
120
+ * Get skill activations path.
121
+ */
122
+ function getSkillActivationsPath(runId: string): string {
123
+ return join(
124
+ process.cwd(),
125
+ `.crew/state/runs/${runId}/skill-activations.jsonl`,
126
+ );
127
+ }
128
+
129
+ /**
130
+ * Ensure directory exists for skill metrics.
131
+ */
132
+ function ensureSkillMetricsDir(runId: string): void {
133
+ const dir = dirname(getSkillMetricsPath(runId));
134
+ if (!existsSync(dir)) {
135
+ mkdirSync(dir, { recursive: true });
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Compute initial confidence from observation count.
141
+ */
142
+ export function computeInitialConfidence(observationCount: number): number {
143
+ if (observationCount <= 2) return INITIAL_CONFIDENCE_BY_FREQUENCY["1"];
144
+ if (observationCount <= 5) return INITIAL_CONFIDENCE_BY_FREQUENCY["3"];
145
+ if (observationCount <= 10) return INITIAL_CONFIDENCE_BY_FREQUENCY["6"];
146
+ return INITIAL_CONFIDENCE_BY_FREQUENCY["11+"];
147
+ }
148
+
149
+ /**
150
+ * Adjust confidence based on outcome.
151
+ * Per ECC instinct system: +0.05 for success, -0.1 for failure.
152
+ */
153
+ export function adjustConfidence(current: number, passed: boolean): number {
154
+ const delta = passed
155
+ ? CONFIDENCE_ADJUSTMENTS.CONFIRMING
156
+ : CONFIDENCE_ADJUSTMENTS.CONTRADICTING;
157
+ return Math.max(0.1, Math.min(0.95, current + delta)); // Clamp to [0.1, 0.95]
158
+ }
159
+
160
+ /**
161
+ * Apply decay to confidence for skills not observed recently.
162
+ */
163
+ export function applyDecay(current: number, lastActivation?: string): number {
164
+ if (!lastActivation) return current;
165
+
166
+ const daysSince = (Date.now() - new Date(lastActivation).getTime()) / (1000 * 60 * 60 * 24);
167
+ const decayWeeks = Math.floor(daysSince / 7);
168
+ const decay = decayWeeks * CONFIDENCE_ADJUSTMENTS.DECAY_PER_WEEK;
169
+
170
+ return Math.max(0.1, current + decay);
171
+ }
172
+
173
+ /**
174
+ * Determine behavior based on confidence threshold.
175
+ */
176
+ export function confidenceToBehavior(confidence: number): WeightedSkill["behavior"] {
177
+ if (confidence >= CONFIDENCE_THRESHOLDS.NEAR_CERTAIN) return "act_autonomous";
178
+ if (confidence >= CONFIDENCE_THRESHOLDS.STRONG) return "apply_auto";
179
+ if (confidence >= CONFIDENCE_THRESHOLDS.MODERATE) return "apply_if_asked";
180
+ return "suggest";
181
+ }
182
+
183
+ /**
184
+ * Determine threshold name from confidence.
185
+ */
186
+ export function confidenceToThreshold(confidence: number): keyof typeof CONFIDENCE_THRESHOLDS {
187
+ if (confidence >= CONFIDENCE_THRESHOLDS.NEAR_CERTAIN) return "NEAR_CERTAIN";
188
+ if (confidence >= CONFIDENCE_THRESHOLDS.STRONG) return "STRONG";
189
+ if (confidence >= CONFIDENCE_THRESHOLDS.TENTATIVE) return "MODERATE";
190
+ return "TENTATIVE";
191
+ }
192
+
193
+ /**
194
+ * Record a skill activation.
195
+ * Appends to the run's skill-activations.jsonl for learning.
196
+ */
197
+ export function recordSkillActivation(
198
+ activation: SkillActivation,
199
+ ): SkillActivation {
200
+ ensureSkillMetricsDir(activation.runId);
201
+
202
+ const path = getSkillActivationsPath(activation.runId);
203
+ const line = JSON.stringify(activation) + "\n";
204
+ writeFileSync(path, line, { flag: "a", encoding: "utf-8" });
205
+
206
+ return activation;
207
+ }
208
+
209
+ /**
210
+ * Get all skill activations for a run.
211
+ */
212
+ export function getSkillActivations(runId: string): SkillActivation[] {
213
+ const path = getSkillActivationsPath(runId);
214
+
215
+ if (!existsSync(path)) {
216
+ return [];
217
+ }
218
+
219
+ const content = readFileSync(path, "utf-8");
220
+ if (!content.trim()) {
221
+ return [];
222
+ }
223
+
224
+ return content
225
+ .split("\n")
226
+ .filter((line) => line.trim())
227
+ .map((line) => JSON.parse(line) as SkillActivation);
228
+ }
229
+
230
+ /**
231
+ * Compute metrics for a skill across all activations.
232
+ */
233
+ export function computeSkillMetrics(
234
+ skillId: string,
235
+ activations: SkillActivation[],
236
+ ): SkillMetrics {
237
+ const skillActivations = activations.filter((a) => a.skillId === skillId);
238
+
239
+ if (skillActivations.length === 0) {
240
+ return {
241
+ skillId,
242
+ totalActivations: 0,
243
+ passedActivations: 0,
244
+ failedActivations: 0,
245
+ passRate: 0,
246
+ avgConfidence: 0,
247
+ currentConfidence: computeInitialConfidence(0),
248
+ trend: "stable",
249
+ roleBreakdown: {},
250
+ };
251
+ }
252
+
253
+ const passed = skillActivations.filter((a) => a.passed).length;
254
+ const failed = skillActivations.filter((a) => !a.passed).length;
255
+ const avgConfidence =
256
+ skillActivations.reduce((sum, a) => sum + a.confidence, 0) /
257
+ skillActivations.length;
258
+ const currentConfidence =
259
+ skillActivations[skillActivations.length - 1]?.confidence ?? avgConfidence;
260
+
261
+ // Compute trend from last 5 activations
262
+ const recent = skillActivations.slice(-5);
263
+ const recentPassRate = recent.filter((a) => a.passed).length / recent.length;
264
+ const earlier = skillActivations.slice(0, -5);
265
+ const earlierPassRate =
266
+ earlier.length > 0
267
+ ? earlier.filter((a) => a.passed).length / earlier.length
268
+ : recentPassRate;
269
+
270
+ let trend: SkillMetrics["trend"] = "stable";
271
+ if (recentPassRate > earlierPassRate + 0.1) {
272
+ trend = "improving";
273
+ } else if (recentPassRate < earlierPassRate - 0.1) {
274
+ trend = "declining";
275
+ }
276
+
277
+ // Role breakdown
278
+ const roleBreakdown: Record<string, number> = {};
279
+ for (const activation of skillActivations) {
280
+ roleBreakdown[activation.role] =
281
+ (roleBreakdown[activation.role] ?? 0) + 1;
282
+ }
283
+
284
+ // Apply decay if not observed recently
285
+ const lastActivation = skillActivations[skillActivations.length - 1]?.timestamp;
286
+ const decayedConfidence = applyDecay(currentConfidence, lastActivation);
287
+
288
+ return {
289
+ skillId,
290
+ totalActivations: skillActivations.length,
291
+ passedActivations: passed,
292
+ failedActivations: failed,
293
+ passRate: passed / skillActivations.length,
294
+ avgConfidence,
295
+ currentConfidence: decayedConfidence,
296
+ trend,
297
+ lastActivation,
298
+ firstActivation: skillActivations[0]?.timestamp,
299
+ roleBreakdown,
300
+ };
301
+ }
302
+
303
+ /**
304
+ * Evaluate if a skill passes the promotion gate.
305
+ * Skill can be promoted to "strong enforcement" when criteria are met.
306
+ */
307
+ export function evaluatePromotionGate(metrics: SkillMetrics): {
308
+ passed: boolean;
309
+ criteria: {
310
+ correctness: boolean;
311
+ evidence: boolean;
312
+ rollback: boolean;
313
+ encoding: boolean;
314
+ };
315
+ reason: string;
316
+ } {
317
+ const criteria = {
318
+ correctness: metrics.passRate >= PROMOTION_GATE_CRITERIA.MIN_CORRECTNESS,
319
+ evidence: metrics.totalActivations >= PROMOTION_GATE_CRITERIA.MIN_ACTIVATIONS,
320
+ rollback: metrics.trend !== "declining",
321
+ encoding: metrics.avgConfidence >= PROMOTION_GATE_CRITERIA.MIN_AVG_CONFIDENCE,
322
+ };
323
+
324
+ const allPassed = Object.values(criteria).every(Boolean);
325
+
326
+ let reason: string;
327
+ if (allPassed) {
328
+ reason = `All promotion gate criteria met: ${metrics.passRate.toFixed(1)} pass rate, ${metrics.totalActivations} activations, ${metrics.trend} trend`;
329
+ } else {
330
+ const failedCriteria = Object.entries(criteria)
331
+ .filter(([, passed]) => !passed)
332
+ .map(([name]) => name);
333
+ reason = `Promotion gate not passed. Failed: ${failedCriteria.join(", ")}`;
334
+ }
335
+
336
+ return { passed: allPassed, criteria, reason };
337
+ }
338
+
339
+ /**
340
+ * Get weighted skills for a role based on activation history.
341
+ * Filters by minimum confidence threshold.
342
+ */
343
+ export function getWeightedSkillsForRole(
344
+ role: string,
345
+ skillIds: string[],
346
+ runId: string,
347
+ minConfidence: number = CONFIDENCE_THRESHOLDS.TENTATIVE,
348
+ ): WeightedSkill[] {
349
+ const activations = getSkillActivations(runId);
350
+
351
+ return skillIds
352
+ .map((skillId) => {
353
+ const metrics = computeSkillMetrics(skillId, activations);
354
+ const confidence = metrics.currentConfidence;
355
+
356
+ if (confidence < minConfidence) {
357
+ return null;
358
+ }
359
+
360
+ return {
361
+ skillId,
362
+ confidence,
363
+ threshold: confidenceToThreshold(confidence),
364
+ behavior: confidenceToBehavior(confidence),
365
+ evidence: `${metrics.totalActivations} activations, ${(metrics.passRate * 100).toFixed(0)}% pass rate`,
366
+ metrics,
367
+ };
368
+ })
369
+ .filter((s): s is WeightedSkill => s !== null)
370
+ .sort((a, b) => b.confidence - a.confidence);
371
+ }
372
+
373
+ /**
374
+ * Filter skills by confidence threshold.
375
+ * Skills below threshold are marked as "suggest" only.
376
+ */
377
+ export function filterSkillsByConfidence(
378
+ skillIds: string[],
379
+ runId: string,
380
+ threshold: keyof typeof CONFIDENCE_THRESHOLDS = "MODERATE",
381
+ ): WeightedSkill[] {
382
+ const minConfidence = CONFIDENCE_THRESHOLDS[threshold];
383
+ return getWeightedSkillsForRole("global", skillIds, runId, minConfidence);
384
+ }
385
+
386
+ /**
387
+ * Register crew hooks for automatic skill activation tracking.
388
+ * Hooks are registered once per process lifetime.
389
+ */
390
+ let hooksRegistered = false;
391
+
392
+ export function registerSkillEffectivenessHooks(): void {
393
+ if (hooksRegistered) return;
394
+ hooksRegistered = true;
395
+
396
+ // Track task completion for skill effectiveness
397
+ crewHooks.register("task_completed", (event) => {
398
+ const { taskId, runId, data } = event;
399
+ if (!taskId || !runId) return;
400
+
401
+ // Extract skills that were activated from task data
402
+ const skillNames = (data?.skills as string[]) ?? [];
403
+ const success = (data?.status as string) === "completed";
404
+
405
+ // Record each skill activation
406
+ for (const skillId of skillNames) {
407
+ const activation: SkillActivation = {
408
+ id: `act-${Date.now()}-${Math.random().toString(36).slice(2)}`,
409
+ skillId,
410
+ role: (data?.role as string) ?? "unknown",
411
+ runId,
412
+ taskId,
413
+ timestamp: new Date().toISOString(),
414
+ passed: success,
415
+ confidence: computeInitialConfidence(1),
416
+ };
417
+ recordSkillActivation(activation);
418
+ }
419
+ });
420
+
421
+ // Track task failures
422
+ crewHooks.register("task_failed", (event) => {
423
+ const { taskId, runId, data } = event;
424
+ if (!taskId || !runId) return;
425
+
426
+ // Downgrade confidence for skills associated with failed tasks
427
+ // This is handled by computeSkillMetrics when processing activations
428
+ });
429
+ }
430
+
431
+ /**
432
+ * Generate a skill effectiveness report for a run.
433
+ */
434
+ export function generateSkillEffectivenessReport(
435
+ runId: string,
436
+ skillIds: string[],
437
+ ): string {
438
+ const activations = getSkillActivations(runId);
439
+ const lines: string[] = [
440
+ `# Skill Effectiveness Report: ${runId}`,
441
+ "",
442
+ `Generated: ${new Date().toISOString()}`,
443
+ `Total Activations: ${activations.length}`,
444
+ "",
445
+ ];
446
+
447
+ if (activations.length === 0) {
448
+ lines.push("*No skill activations recorded yet.*");
449
+ return lines.join("\n");
450
+ }
451
+
452
+ lines.push("## Skill Metrics");
453
+ lines.push("");
454
+
455
+ for (const skillId of skillIds) {
456
+ const metrics = computeSkillMetrics(skillId, activations);
457
+ const gate = evaluatePromotionGate(metrics);
458
+
459
+ lines.push(`### ${skillId}`);
460
+ lines.push(`- **Confidence**: ${metrics.currentConfidence.toFixed(2)} (${metrics.trend})`);
461
+ lines.push(`- **Pass Rate**: ${(metrics.passRate * 100).toFixed(1)}% (${metrics.passedActivations}/${metrics.totalActivations})`);
462
+ lines.push(`- **Avg Confidence**: ${metrics.avgConfidence.toFixed(2)}`);
463
+ lines.push(`- **Promotion Gate**: ${gate.passed ? "PASSED ✅" : "NOT MET"}`);
464
+
465
+ if (Object.keys(metrics.roleBreakdown).length > 0) {
466
+ lines.push(`- **By Role**: ${JSON.stringify(metrics.roleBreakdown)}`);
467
+ }
468
+
469
+ lines.push("");
470
+ }
471
+
472
+ return lines.join("\n");
473
+ }
@@ -5,6 +5,11 @@ import type { AgentConfig } from "../agents/agent-config.ts";
5
5
  import type { TeamRole } from "../teams/team-config.ts";
6
6
  import type { WorkflowStep } from "../workflows/workflow-config.ts";
7
7
  import { isSafePathId, resolveContainedPath, resolveRealContainedPath } from "../utils/safe-paths.ts";
8
+ import {
9
+ getWeightedSkillsForRole,
10
+ registerSkillEffectivenessHooks,
11
+ CONFIDENCE_THRESHOLDS,
12
+ } from "./skill-effectiveness.ts";
8
13
 
9
14
  const PACKAGE_SKILLS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "skills");
10
15
  const MAX_SKILL_CHARS = 1500;
@@ -76,7 +81,8 @@ export function defaultSkillsForRole(role: string): string[] {
76
81
  return DEFAULT_ROLE_SKILLS[role] ?? [];
77
82
  }
78
83
 
79
- function collectTaskSkillNames(input: ResolveTaskSkillsInput): string[] {
84
+ function collectTaskSkillNames(input: ResolveTaskSkillsInput | undefined): string[] {
85
+ if (!input) return [];
80
86
  if (input.override === false) return [];
81
87
  const roleDefaultsDisabled = input.teamRole?.skills === false || input.step?.skills === false;
82
88
  const names = roleDefaultsDisabled ? [] : defaultSkillsForRole(input.role);
@@ -182,9 +188,16 @@ export interface RenderedSkillInstructions {
182
188
  names: string[];
183
189
  paths: string[];
184
190
  block: string;
191
+ /** Confidence-weighted skills for this render, sorted by confidence */
192
+ weightedSkills?: Array<{
193
+ skillId: string;
194
+ confidence: number;
195
+ behavior: string;
196
+ threshold: string;
197
+ }>;
185
198
  }
186
199
 
187
- export function renderSkillInstructions(input: RenderSkillInstructionsInput): RenderedSkillInstructions {
200
+ export function renderSkillInstructions(input: RenderSkillInstructionsInput & { runId?: string } = {} as RenderSkillInstructionsInput & { runId?: string }): RenderedSkillInstructions {
188
201
  const allNames = collectTaskSkillNames(input);
189
202
  const names = allNames.slice(0, MAX_SELECTED_SKILLS);
190
203
  const overflowCount = Math.max(0, allNames.length - names.length);
@@ -193,6 +206,21 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
193
206
  const skillPaths: string[] = [];
194
207
  let total = 0;
195
208
  let omittedCount = overflowCount;
209
+
210
+ // ECC INSTINCT: Get confidence-weighted skills if runId is provided
211
+ let weightedSkills: RenderedSkillInstructions["weightedSkills"] = undefined;
212
+ if (input.runId) {
213
+ // Register effectiveness hooks once per process
214
+ registerSkillEffectivenessHooks();
215
+ const weighted = getWeightedSkillsForRole(input.role, names, input.runId, CONFIDENCE_THRESHOLDS.TENTATIVE);
216
+ weightedSkills = weighted.map(w => ({
217
+ skillId: w.skillId,
218
+ confidence: w.confidence,
219
+ behavior: w.behavior,
220
+ threshold: w.threshold,
221
+ }));
222
+ }
223
+
196
224
  const pushSection = (section: string): boolean => {
197
225
  if (total + section.length > MAX_TOTAL_CHARS) return false;
198
226
  sections.push(section);
@@ -210,7 +238,12 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
210
238
  skillPaths.push(path.dirname(loaded.path));
211
239
  const description = frontmatterDescription(loaded.content);
212
240
  const source = loaded.source === "project" ? `project:skills/${safeName}` : `package:skills/${safeName}`;
213
- const header = [`## ${safeName}`, description ? `Description: ${description}` : undefined, `Source: ${source}`].filter(Boolean).join("\n");
241
+
242
+ // ECC INSTINCT: Add confidence annotation from weighted skills
243
+ const weighted = weightedSkills?.find(w => w.skillId === name);
244
+ const confidenceNote = weighted ? ` [Confidence: ${(weighted.confidence * 100).toFixed(0)}% — ${weighted.threshold}]` : "";
245
+
246
+ const header = [`## ${safeName}`, description ? `Description: ${description}${confidenceNote}` : undefined, `Source: ${source}`].filter(Boolean).join("\n");
214
247
  const section = `${header}\n\n${compactSkillContent(loaded.content)}`;
215
248
  if (!pushSection(section)) omittedCount += 1;
216
249
  }
@@ -234,5 +267,6 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
234
267
  "If a project skill instruction conflicts with the explicit task packet, system guidance, or user request — ALWAYS follow the task packet or higher-priority instruction. Report the conflict to the user.",
235
268
  sections.join("\n\n---\n\n"),
236
269
  ].join("\n"),
270
+ weightedSkills,
237
271
  };
238
272
  }
@@ -220,7 +220,7 @@ export class SubagentManager {
220
220
  const record = this.records.get(id);
221
221
  if (!record) return undefined;
222
222
  if (record.status !== "running" && record.status !== "queued") return record;
223
- if (record.promise) await record.promise.catch(() => { /* status already set to error */ });
223
+ if (record.promise) await record.promise.catch((error) => { logInternalError("subagent-manager.waitForRecord", error, `id=${id}`); });
224
224
  else await new Promise((resolve) => setTimeout(resolve, 100));
225
225
  }
226
226
  }
@@ -34,12 +34,21 @@ export interface ExecutionPlan {
34
34
  * - Each subsequent wave contains tasks whose dependencies are all in earlier waves.
35
35
  * - If all tasks have empty `dependsOn`, they all go into wave 0 (backward compatible).
36
36
  * - If a cycle is detected, `hasCycle` is true and `cycleNodes` lists the involved IDs.
37
+ *
38
+ * @throws Error if a task depends on itself (self-dependency).
37
39
  */
38
40
  export function buildExecutionPlan(tasks: TaskNode[]): ExecutionPlan {
39
41
  if (tasks.length === 0) {
40
42
  return { waves: [], hasCycle: false };
41
43
  }
42
44
 
45
+ // HIGH-9: Detect self-dependency
46
+ for (const task of tasks) {
47
+ if (task.dependsOn.includes(task.id)) {
48
+ throw new Error(`Task "${task.id}" has self-dependency (depends on itself)`);
49
+ }
50
+ }
51
+
43
52
  const idSet = new Set<string>(tasks.map((t) => t.id));
44
53
  const adjacency = new Map<string, Set<string>>(); // id -> ids that depend on it
45
54
  const inDegree = new Map<string, number>();
@@ -108,7 +117,8 @@ export function buildExecutionPlan(tasks: TaskNode[]): ExecutionPlan {
108
117
  */
109
118
  function buildWave(tasks: TaskNode[], ids: string[], index: number): ExecutionWave {
110
119
  const taskMap = new Map(tasks.map((t) => [t.id, t]));
111
- const waveTasks = ids.map((id) => taskMap.get(id)!).filter(Boolean);
120
+ // MEDIUM-12: Filter out undefined values instead of using non-null assertion
121
+ const waveTasks = ids.map((id) => taskMap.get(id)).filter(Boolean) as TaskNode[];
112
122
 
113
123
  let label: string | undefined;
114
124
  if (waveTasks.length > 0 && waveTasks.every((t) => t.phase !== undefined)) {