npm - pi-crew - Versions diffs - 0.5.2 → 0.5.6 - Mend

pi-crew 0.5.2 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/CHANGELOG.md +183 -0
package/README.md +17 -1
package/docs/architecture.md +2 -0
package/docs/bugs/cross-session-notification-leakage.md +82 -0
package/docs/coding-agent-optimization.md +268 -0
package/docs/deep-review-report.md +384 -0
package/docs/distillation/cybersecurity-patterns.md +294 -0
package/docs/migration-v0.4-v0.5.md +208 -0
package/docs/optimization-plan.md +642 -0
package/docs/pi-crew-v0.5.5-audit-fix-plan.md +133 -0
package/docs/pi-mono-opportunities.md +969 -0
package/docs/pi-mono-review.md +291 -0
package/docs/skills/REFERENCE.md +144 -0
package/package.json +12 -9
package/skills/artifact-analysis-loop/SKILL.md +302 -0
package/skills/async-worker-recovery/SKILL.md +19 -1
package/skills/child-pi-spawning/SKILL.md +19 -6
package/skills/context-artifact-hygiene/SKILL.md +19 -2
package/skills/delegation-patterns/SKILL.md +68 -3
package/skills/detection-pipeline-design/SKILL.md +285 -0
package/skills/event-log-tracing/SKILL.md +20 -6
package/skills/git-master/SKILL.md +20 -6
package/skills/hunting-investigation-loop/SKILL.md +401 -0
package/skills/incident-playbook-construction/SKILL.md +383 -0
package/skills/live-agent-lifecycle/SKILL.md +20 -6
package/skills/mailbox-interactive/SKILL.md +19 -6
package/skills/model-routing-context/SKILL.md +19 -1
package/skills/multi-perspective-review/SKILL.md +19 -4
package/skills/observability-reliability/SKILL.md +19 -2
package/skills/orchestration/SKILL.md +20 -2
package/skills/ownership-session-security/SKILL.md +20 -2
package/skills/pi-extension-lifecycle/SKILL.md +20 -2
package/skills/post-mortem/SKILL.md +7 -2
package/skills/read-only-explorer/SKILL.md +20 -6
package/skills/requirements-to-task-packet/SKILL.md +23 -3
package/skills/resource-discovery-config/SKILL.md +20 -2
package/skills/runtime-state-reader/SKILL.md +20 -2
package/skills/safe-bash/SKILL.md +21 -6
package/skills/scrutinize/SKILL.md +20 -2
package/skills/secure-agent-orchestration-review/SKILL.md +29 -2
package/skills/security-review/SKILL.md +560 -0
package/skills/state-mutation-locking/SKILL.md +22 -2
package/skills/systematic-debugging/SKILL.md +8 -6
package/skills/threat-hypothesis-framework/SKILL.md +175 -0
package/skills/ui-render-performance/SKILL.md +20 -2
package/skills/verification-before-done/SKILL.md +17 -2
package/skills/widget-rendering/SKILL.md +21 -6
package/skills/workspace-isolation/SKILL.md +20 -6
package/skills/worktree-isolation/SKILL.md +20 -6
package/src/agents/agent-config.ts +40 -1
package/src/benchmark/benchmark-runner.ts +45 -0
package/src/benchmark/feedback-loop.ts +5 -0
package/src/config/config.ts +32 -5
package/src/config/role-tools.ts +82 -0
package/src/config/suggestions.ts +8 -0
package/src/config/types.ts +4 -0
package/src/extension/async-notifier.ts +10 -1
package/src/extension/crew-cleanup.ts +114 -0
package/src/extension/cross-extension-rpc.ts +1 -1
package/src/extension/notification-router.ts +18 -0
package/src/extension/register.ts +27 -19
package/src/extension/registration/subagent-tools.ts +1 -1
package/src/extension/team-tool/anchor.ts +201 -0
package/src/extension/team-tool/api.ts +2 -1
package/src/extension/team-tool/auto-summarize.ts +154 -0
package/src/extension/team-tool/run.ts +42 -7
package/src/extension/team-tool.ts +44 -2
package/src/hooks/registry.ts +1 -3
package/src/observability/event-bus.ts +69 -0
package/src/observability/event-to-metric.ts +0 -2
package/src/runtime/anchor-manager.ts +473 -0
package/src/runtime/async-runner.ts +8 -4
package/src/runtime/auto-summarize.ts +350 -0
package/src/runtime/background-runner.ts +10 -3
package/src/runtime/budget-tracker.ts +354 -0
package/src/runtime/chain-runner.ts +507 -0
package/src/runtime/child-pi.ts +123 -35
package/src/runtime/crash-recovery.ts +5 -4
package/src/runtime/crew-agent-runtime.ts +1 -0
package/src/runtime/custom-tools/irc-tool.ts +13 -0
package/src/runtime/custom-tools/submit-result-tool.ts +3 -2
package/src/runtime/delivery-coordinator.ts +10 -3
package/src/runtime/dynamic-script-runner.ts +482 -0
package/src/runtime/foreground-control.ts +87 -17
package/src/runtime/handoff-manager.ts +589 -0
package/src/runtime/hidden-handoff.ts +424 -0
package/src/runtime/live-agent-manager.ts +20 -4
package/src/runtime/live-session-runtime.ts +39 -4
package/src/runtime/manifest-cache.ts +2 -1
package/src/runtime/model-resolver.ts +16 -4
package/src/runtime/phase-tracker.ts +373 -0
package/src/runtime/pi-args.ts +11 -1
package/src/runtime/pi-json-output.ts +31 -0
package/src/runtime/pipeline-runner.ts +514 -0
package/src/runtime/progress-tracker.ts +124 -0
package/src/runtime/retry-runner.ts +354 -0
package/src/runtime/sandbox.ts +252 -0
package/src/runtime/scheduler.ts +7 -2
package/src/runtime/skill-effectiveness.ts +473 -0
package/src/runtime/skill-instructions.ts +37 -3
package/src/runtime/subagent-manager.ts +1 -1
package/src/runtime/task-graph.ts +11 -1
package/src/runtime/task-runner.ts +92 -18
package/src/runtime/team-runner.ts +13 -12
package/src/runtime/tool-progress.ts +10 -3
package/src/runtime/verification-gates.ts +367 -0
package/src/schema/team-tool-schema.ts +37 -0
package/src/skills/discover-skills.ts +5 -0
package/src/state/active-run-registry.ts +9 -2
package/src/state/contracts.ts +9 -0
package/src/state/crew-init.ts +3 -3
package/src/state/decision-ledger.ts +98 -55
package/src/state/event-log-rotation.ts +2 -2
package/src/state/event-log.ts +144 -10
package/src/state/hook-instinct-bridge.ts +5 -5
package/src/state/mailbox.ts +10 -0
package/src/state/run-cache.ts +18 -8
package/src/state/state-store.ts +3 -1
package/src/state/types.ts +4 -0
package/src/tools/safe-bash-extension.ts +1 -0
package/src/tools/safe-bash.ts +152 -20
package/src/types/new-api-types.ts +34 -0
package/src/ui/agent-management-overlay.ts +5 -1
package/src/ui/crew-widget.ts +29 -15
package/src/ui/overlays/mailbox-detail-overlay.ts +13 -2
package/src/ui/powerbar-publisher.ts +101 -7
package/src/ui/tool-render.ts +15 -15
package/src/ui/transcript-cache.ts +13 -0
package/src/utils/bm25-search.ts +16 -8
package/src/utils/env-filter.ts +8 -5
package/src/utils/redaction.ts +169 -15
package/src/utils/session-utils.ts +52 -0
package/src/utils/sse-parser.ts +10 -1
package/src/worktree/cleanup.ts +6 -1
package/src/worktree/worktree-manager.ts +32 -13
package/workflows/chain.workflow.md +252 -0
package/workflows/pipeline.workflow.md +27 -0

package/src/runtime/skill-effectiveness.ts ADDED Viewed

@@ -0,0 +1,473 @@
+/**
+ * Skill Effectiveness — ECC INSTINCT/CONFIDENCE Pattern Implementation
+ *
+ * Implements confidence-weighted skill activation based on ECC's instinct system.
+ * Tracks skill activation success and adjusts confidence scores.
+ *
+ * Based on: docs/distillation/ECC-hooks-instincts.md §2-3 (instinct system, confidence thresholds)
+ * Based on: docs/distillation/ECC-10-skills.md §8 (continuous-learning-v2)
+ *
+ * @module skill-effectiveness
+ */
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
+import { dirname, join } from "path";
+import { crewHooks } from "./crew-hooks.ts";
+/**
+ * Confidence thresholds per ECC instinct system.
+ * Skills below 0.3 threshold are considered tentative and not enforced.
+ */
+export const CONFIDENCE_THRESHOLDS = {
+	TENTATIVE: 0.3,      // Suggested but not enforced
+	MODERATE: 0.5,       // Applied when relevant
+	STRONG: 0.7,         // Auto-approved for application
+	NEAR_CERTAIN: 0.9,   // Core behavior
+} as const;
+/**
+ * Initial confidence by observation frequency.
+ * From ECC instinct system: 1-2 observations → 0.3, 3-5 → 0.5, etc.
+ */
+export const INITIAL_CONFIDENCE_BY_FREQUENCY: Record<string, number> = {
+	"1": 0.3,  // 1 observation → tentative
+	"2": 0.3,  // 2 observations → tentative
+	"3": 0.5,  // 3 observations → moderate
+	"4": 0.5,
+	"5": 0.5,
+	"6": 0.7,  // 6-10 observations → strong
+	"7": 0.7,
+	"8": 0.7,
+	"9": 0.7,
+	"10": 0.7,
+	"11+": 0.85, // 11+ observations → very strong
+} as const;
+/**
+ * Confidence adjustments per ECC instinct system.
+ */
+export const CONFIDENCE_ADJUSTMENTS = {
+	CONFIRMING: 0.05,      // Each confirming observation
+	CONTRADICTING: -0.1,   // Each contradicting observation
+	DECAY_PER_WEEK: -0.02, // Per week without observation
+} as const;
+/**
+ * Promotion gate criteria for skills.
+ * Skill can be promoted to "strong enforcement" when these are met.
+ */
+export const PROMOTION_GATE_CRITERIA = {
+	MIN_CORRECTNESS: 0.8,      // 80% pass rate
+	MIN_ACTIVATIONS: 5,         // Minimum observations before filtering
+	MIN_AVG_CONFIDENCE: 0.7,   // Average confidence threshold
+} as const;
+/**
+ * Skill activation record - captures each time a skill is used.
+ */
+export interface SkillActivation {
+	id: string;           // Unique activation ID
+	skillId: string;      // Skill identifier (e.g., "verification-before-done")
+	role: string;         // Role that activated the skill
+	runId: string;        // Run ID
+	taskId: string;       // Task ID
+	timestamp: string;    // ISO timestamp
+	passed: boolean;       // Whether the skill was successfully applied
+	outcome?: string;     // Optional outcome description
+	confidence: number;    // Confidence at time of activation
+}
+/**
+ * Skill metrics - aggregated statistics for a skill.
+ */
+export interface SkillMetrics {
+	skillId: string;
+	totalActivations: number;
+	passedActivations: number;
+	failedActivations: number;
+	passRate: number;           // passed / total
+	avgConfidence: number;       // Rolling average confidence
+	currentConfidence: number;   // Current confidence score
+	trend: "improving" | "stable" | "declining";
+	lastActivation?: string;    // ISO timestamp
+	firstActivation?: string;   // ISO timestamp
+	roleBreakdown: Record<string, number>;  // Activations per role
+}
+/**
+ * Confidence-weighted skill entry for activation decisions.
+ */
+export interface WeightedSkill {
+	skillId: string;
+	confidence: number;
+	threshold: keyof typeof CONFIDENCE_THRESHOLDS;
+	behavior: "suggest" | "apply_if_asked" | "apply_auto" | "act_autonomous";
+	evidence: string;  // Evidence for confidence score
+	metrics: SkillMetrics;
+}
+/**
+ * Get skill effectiveness storage path.
+ */
+function getSkillMetricsPath(runId: string): string {
+	return join(
+		process.cwd(),
+		`.crew/state/runs/${runId}/skill-metrics.jsonl`,
+	);
+}
+/**
+ * Get skill activations path.
+ */
+function getSkillActivationsPath(runId: string): string {
+	return join(
+		process.cwd(),
+		`.crew/state/runs/${runId}/skill-activations.jsonl`,
+	);
+}
+/**
+ * Ensure directory exists for skill metrics.
+ */
+function ensureSkillMetricsDir(runId: string): void {
+	const dir = dirname(getSkillMetricsPath(runId));
+	if (!existsSync(dir)) {
+		mkdirSync(dir, { recursive: true });
+	}
+}
+/**
+ * Compute initial confidence from observation count.
+ */
+export function computeInitialConfidence(observationCount: number): number {
+	if (observationCount <= 2) return INITIAL_CONFIDENCE_BY_FREQUENCY["1"];
+	if (observationCount <= 5) return INITIAL_CONFIDENCE_BY_FREQUENCY["3"];
+	if (observationCount <= 10) return INITIAL_CONFIDENCE_BY_FREQUENCY["6"];
+	return INITIAL_CONFIDENCE_BY_FREQUENCY["11+"];
+}
+/**
+ * Adjust confidence based on outcome.
+ * Per ECC instinct system: +0.05 for success, -0.1 for failure.
+ */
+export function adjustConfidence(current: number, passed: boolean): number {
+	const delta = passed
+		? CONFIDENCE_ADJUSTMENTS.CONFIRMING
+		: CONFIDENCE_ADJUSTMENTS.CONTRADICTING;
+	return Math.max(0.1, Math.min(0.95, current + delta)); // Clamp to [0.1, 0.95]
+}
+/**
+ * Apply decay to confidence for skills not observed recently.
+ */
+export function applyDecay(current: number, lastActivation?: string): number {
+	if (!lastActivation) return current;
+	const daysSince = (Date.now() - new Date(lastActivation).getTime()) / (1000 * 60 * 60 * 24);
+	const decayWeeks = Math.floor(daysSince / 7);
+	const decay = decayWeeks * CONFIDENCE_ADJUSTMENTS.DECAY_PER_WEEK;
+	return Math.max(0.1, current + decay);
+}
+/**
+ * Determine behavior based on confidence threshold.
+ */
+export function confidenceToBehavior(confidence: number): WeightedSkill["behavior"] {
+	if (confidence >= CONFIDENCE_THRESHOLDS.NEAR_CERTAIN) return "act_autonomous";
+	if (confidence >= CONFIDENCE_THRESHOLDS.STRONG) return "apply_auto";
+	if (confidence >= CONFIDENCE_THRESHOLDS.MODERATE) return "apply_if_asked";
+	return "suggest";
+}
+/**
+ * Determine threshold name from confidence.
+ */
+export function confidenceToThreshold(confidence: number): keyof typeof CONFIDENCE_THRESHOLDS {
+	if (confidence >= CONFIDENCE_THRESHOLDS.NEAR_CERTAIN) return "NEAR_CERTAIN";
+	if (confidence >= CONFIDENCE_THRESHOLDS.STRONG) return "STRONG";
+	if (confidence >= CONFIDENCE_THRESHOLDS.TENTATIVE) return "MODERATE";
+	return "TENTATIVE";
+}
+/**
+ * Record a skill activation.
+ * Appends to the run's skill-activations.jsonl for learning.
+ */
+export function recordSkillActivation(
+	activation: SkillActivation,
+): SkillActivation {
+	ensureSkillMetricsDir(activation.runId);
+	const path = getSkillActivationsPath(activation.runId);
+	const line = JSON.stringify(activation) + "\n";
+	writeFileSync(path, line, { flag: "a", encoding: "utf-8" });
+	return activation;
+}
+/**
+ * Get all skill activations for a run.
+ */
+export function getSkillActivations(runId: string): SkillActivation[] {
+	const path = getSkillActivationsPath(runId);
+	if (!existsSync(path)) {
+		return [];
+	}
+	const content = readFileSync(path, "utf-8");
+	if (!content.trim()) {
+		return [];
+	}
+	return content
+		.split("\n")
+		.filter((line) => line.trim())
+		.map((line) => JSON.parse(line) as SkillActivation);
+}
+/**
+ * Compute metrics for a skill across all activations.
+ */
+export function computeSkillMetrics(
+	skillId: string,
+	activations: SkillActivation[],
+): SkillMetrics {
+	const skillActivations = activations.filter((a) => a.skillId === skillId);
+	if (skillActivations.length === 0) {
+		return {
+			skillId,
+			totalActivations: 0,
+			passedActivations: 0,
+			failedActivations: 0,
+			passRate: 0,
+			avgConfidence: 0,
+			currentConfidence: computeInitialConfidence(0),
+			trend: "stable",
+			roleBreakdown: {},
+		};
+	}
+	const passed = skillActivations.filter((a) => a.passed).length;
+	const failed = skillActivations.filter((a) => !a.passed).length;
+	const avgConfidence =
+		skillActivations.reduce((sum, a) => sum + a.confidence, 0) /
+		skillActivations.length;
+	const currentConfidence =
+		skillActivations[skillActivations.length - 1]?.confidence ?? avgConfidence;
+	// Compute trend from last 5 activations
+	const recent = skillActivations.slice(-5);
+	const recentPassRate = recent.filter((a) => a.passed).length / recent.length;
+	const earlier = skillActivations.slice(0, -5);
+	const earlierPassRate =
+		earlier.length > 0
+			? earlier.filter((a) => a.passed).length / earlier.length
+			: recentPassRate;
+	let trend: SkillMetrics["trend"] = "stable";
+	if (recentPassRate > earlierPassRate + 0.1) {
+		trend = "improving";
+	} else if (recentPassRate < earlierPassRate - 0.1) {
+		trend = "declining";
+	}
+	// Role breakdown
+	const roleBreakdown: Record<string, number> = {};
+	for (const activation of skillActivations) {
+		roleBreakdown[activation.role] =
+			(roleBreakdown[activation.role] ?? 0) + 1;
+	}
+	// Apply decay if not observed recently
+	const lastActivation = skillActivations[skillActivations.length - 1]?.timestamp;
+	const decayedConfidence = applyDecay(currentConfidence, lastActivation);
+	return {
+		skillId,
+		totalActivations: skillActivations.length,
+		passedActivations: passed,
+		failedActivations: failed,
+		passRate: passed / skillActivations.length,
+		avgConfidence,
+		currentConfidence: decayedConfidence,
+		trend,
+		lastActivation,
+		firstActivation: skillActivations[0]?.timestamp,
+		roleBreakdown,
+	};
+}
+/**
+ * Evaluate if a skill passes the promotion gate.
+ * Skill can be promoted to "strong enforcement" when criteria are met.
+ */
+export function evaluatePromotionGate(metrics: SkillMetrics): {
+	passed: boolean;
+	criteria: {
+		correctness: boolean;
+		evidence: boolean;
+		rollback: boolean;
+		encoding: boolean;
+	};
+	reason: string;
+} {
+	const criteria = {
+		correctness: metrics.passRate >= PROMOTION_GATE_CRITERIA.MIN_CORRECTNESS,
+		evidence: metrics.totalActivations >= PROMOTION_GATE_CRITERIA.MIN_ACTIVATIONS,
+		rollback: metrics.trend !== "declining",
+		encoding: metrics.avgConfidence >= PROMOTION_GATE_CRITERIA.MIN_AVG_CONFIDENCE,
+	};
+	const allPassed = Object.values(criteria).every(Boolean);
+	let reason: string;
+	if (allPassed) {
+		reason = `All promotion gate criteria met: ${metrics.passRate.toFixed(1)} pass rate, ${metrics.totalActivations} activations, ${metrics.trend} trend`;
+	} else {
+		const failedCriteria = Object.entries(criteria)
+			.filter(([, passed]) => !passed)
+			.map(([name]) => name);
+		reason = `Promotion gate not passed. Failed: ${failedCriteria.join(", ")}`;
+	}
+	return { passed: allPassed, criteria, reason };
+}
+/**
+ * Get weighted skills for a role based on activation history.
+ * Filters by minimum confidence threshold.
+ */
+export function getWeightedSkillsForRole(
+	role: string,
+	skillIds: string[],
+	runId: string,
+	minConfidence: number = CONFIDENCE_THRESHOLDS.TENTATIVE,
+): WeightedSkill[] {
+	const activations = getSkillActivations(runId);
+	return skillIds
+		.map((skillId) => {
+			const metrics = computeSkillMetrics(skillId, activations);
+			const confidence = metrics.currentConfidence;
+			if (confidence < minConfidence) {
+				return null;
+			}
+			return {
+				skillId,
+				confidence,
+				threshold: confidenceToThreshold(confidence),
+				behavior: confidenceToBehavior(confidence),
+				evidence: `${metrics.totalActivations} activations, ${(metrics.passRate * 100).toFixed(0)}% pass rate`,
+				metrics,
+			};
+		})
+		.filter((s): s is WeightedSkill => s !== null)
+		.sort((a, b) => b.confidence - a.confidence);
+}
+/**
+ * Filter skills by confidence threshold.
+ * Skills below threshold are marked as "suggest" only.
+ */
+export function filterSkillsByConfidence(
+	skillIds: string[],
+	runId: string,
+	threshold: keyof typeof CONFIDENCE_THRESHOLDS = "MODERATE",
+): WeightedSkill[] {
+	const minConfidence = CONFIDENCE_THRESHOLDS[threshold];
+	return getWeightedSkillsForRole("global", skillIds, runId, minConfidence);
+}
+/**
+ * Register crew hooks for automatic skill activation tracking.
+ * Hooks are registered once per process lifetime.
+ */
+let hooksRegistered = false;
+export function registerSkillEffectivenessHooks(): void {
+	if (hooksRegistered) return;
+	hooksRegistered = true;
+	// Track task completion for skill effectiveness
+	crewHooks.register("task_completed", (event) => {
+		const { taskId, runId, data } = event;
+		if (!taskId || !runId) return;
+		// Extract skills that were activated from task data
+		const skillNames = (data?.skills as string[]) ?? [];
+		const success = (data?.status as string) === "completed";
+		// Record each skill activation
+		for (const skillId of skillNames) {
+			const activation: SkillActivation = {
+				id: `act-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+				skillId,
+				role: (data?.role as string) ?? "unknown",
+				runId,
+				taskId,
+				timestamp: new Date().toISOString(),
+				passed: success,
+				confidence: computeInitialConfidence(1),
+			};
+			recordSkillActivation(activation);
+		}
+	});
+	// Track task failures
+	crewHooks.register("task_failed", (event) => {
+		const { taskId, runId, data } = event;
+		if (!taskId || !runId) return;
+		// Downgrade confidence for skills associated with failed tasks
+		// This is handled by computeSkillMetrics when processing activations
+	});
+}
+/**
+ * Generate a skill effectiveness report for a run.
+ */
+export function generateSkillEffectivenessReport(
+	runId: string,
+	skillIds: string[],
+): string {
+	const activations = getSkillActivations(runId);
+	const lines: string[] = [
+		`# Skill Effectiveness Report: ${runId}`,
+		"",
+		`Generated: ${new Date().toISOString()}`,
+		`Total Activations: ${activations.length}`,
+		"",
+	];
+	if (activations.length === 0) {
+		lines.push("*No skill activations recorded yet.*");
+		return lines.join("\n");
+	}
+	lines.push("## Skill Metrics");
+	lines.push("");
+	for (const skillId of skillIds) {
+		const metrics = computeSkillMetrics(skillId, activations);
+		const gate = evaluatePromotionGate(metrics);
+		lines.push(`### ${skillId}`);
+		lines.push(`- **Confidence**: ${metrics.currentConfidence.toFixed(2)} (${metrics.trend})`);
+		lines.push(`- **Pass Rate**: ${(metrics.passRate * 100).toFixed(1)}% (${metrics.passedActivations}/${metrics.totalActivations})`);
+		lines.push(`- **Avg Confidence**: ${metrics.avgConfidence.toFixed(2)}`);
+		lines.push(`- **Promotion Gate**: ${gate.passed ? "PASSED ✅" : "NOT MET"}`);
+		if (Object.keys(metrics.roleBreakdown).length > 0) {
+			lines.push(`- **By Role**: ${JSON.stringify(metrics.roleBreakdown)}`);
+		}
+		lines.push("");
+	}
+	return lines.join("\n");
+}

package/src/runtime/skill-instructions.ts CHANGED Viewed

@@ -5,6 +5,11 @@ import type { AgentConfig } from "../agents/agent-config.ts";
 import type { TeamRole } from "../teams/team-config.ts";
 import type { WorkflowStep } from "../workflows/workflow-config.ts";
 import { isSafePathId, resolveContainedPath, resolveRealContainedPath } from "../utils/safe-paths.ts";
+import {
+	getWeightedSkillsForRole,
+	registerSkillEffectivenessHooks,
+	CONFIDENCE_THRESHOLDS,
+} from "./skill-effectiveness.ts";
 const PACKAGE_SKILLS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "skills");
 const MAX_SKILL_CHARS = 1500;
@@ -76,7 +81,8 @@ export function defaultSkillsForRole(role: string): string[] {
 	return DEFAULT_ROLE_SKILLS[role] ?? [];
 }
-function collectTaskSkillNames(input: ResolveTaskSkillsInput): string[] {
+function collectTaskSkillNames(input: ResolveTaskSkillsInput | undefined): string[] {
+	if (!input) return [];
 	if (input.override === false) return [];
 	const roleDefaultsDisabled = input.teamRole?.skills === false || input.step?.skills === false;
 	const names = roleDefaultsDisabled ? [] : defaultSkillsForRole(input.role);
@@ -182,9 +188,16 @@ export interface RenderedSkillInstructions {
 	names: string[];
 	paths: string[];
 	block: string;
+	/** Confidence-weighted skills for this render, sorted by confidence */
+	weightedSkills?: Array<{
+		skillId: string;
+		confidence: number;
+		behavior: string;
+		threshold: string;
+	}>;
 }
-export function renderSkillInstructions(input: RenderSkillInstructionsInput): RenderedSkillInstructions {
+export function renderSkillInstructions(input: RenderSkillInstructionsInput & { runId?: string } = {} as RenderSkillInstructionsInput & { runId?: string }): RenderedSkillInstructions {
 	const allNames = collectTaskSkillNames(input);
 	const names = allNames.slice(0, MAX_SELECTED_SKILLS);
 	const overflowCount = Math.max(0, allNames.length - names.length);
@@ -193,6 +206,21 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
 	const skillPaths: string[] = [];
 	let total = 0;
 	let omittedCount = overflowCount;
+	// ECC INSTINCT: Get confidence-weighted skills if runId is provided
+	let weightedSkills: RenderedSkillInstructions["weightedSkills"] = undefined;
+	if (input.runId) {
+		// Register effectiveness hooks once per process
+		registerSkillEffectivenessHooks();
+		const weighted = getWeightedSkillsForRole(input.role, names, input.runId, CONFIDENCE_THRESHOLDS.TENTATIVE);
+		weightedSkills = weighted.map(w => ({
+			skillId: w.skillId,
+			confidence: w.confidence,
+			behavior: w.behavior,
+			threshold: w.threshold,
+		}));
+	}
 	const pushSection = (section: string): boolean => {
 		if (total + section.length > MAX_TOTAL_CHARS) return false;
 		sections.push(section);
@@ -210,7 +238,12 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
 		skillPaths.push(path.dirname(loaded.path));
 		const description = frontmatterDescription(loaded.content);
 		const source = loaded.source === "project" ? `project:skills/${safeName}` : `package:skills/${safeName}`;
-		const header = [`## ${safeName}`, description ? `Description: ${description}` : undefined, `Source: ${source}`].filter(Boolean).join("\n");
+		// ECC INSTINCT: Add confidence annotation from weighted skills
+		const weighted = weightedSkills?.find(w => w.skillId === name);
+		const confidenceNote = weighted ? ` [Confidence: ${(weighted.confidence * 100).toFixed(0)}% — ${weighted.threshold}]` : "";
+		const header = [`## ${safeName}`, description ? `Description: ${description}${confidenceNote}` : undefined, `Source: ${source}`].filter(Boolean).join("\n");
 		const section = `${header}\n\n${compactSkillContent(loaded.content)}`;
 		if (!pushSection(section)) omittedCount += 1;
 	}
@@ -234,5 +267,6 @@ export function renderSkillInstructions(input: RenderSkillInstructionsInput): Re
 			"If a project skill instruction conflicts with the explicit task packet, system guidance, or user request — ALWAYS follow the task packet or higher-priority instruction. Report the conflict to the user.",
 			sections.join("\n\n---\n\n"),
 		].join("\n"),
+		weightedSkills,
 	};
 }

package/src/runtime/subagent-manager.ts CHANGED Viewed

@@ -220,7 +220,7 @@ export class SubagentManager {
 			const record = this.records.get(id);
 			if (!record) return undefined;
 			if (record.status !== "running" && record.status !== "queued") return record;
-			if (record.promise) await record.promise.catch(() => { /* status already set to error */ });
+			if (record.promise) await record.promise.catch((error) => { logInternalError("subagent-manager.waitForRecord", error, `id=${id}`); });
 			else await new Promise((resolve) => setTimeout(resolve, 100));
 		}
 	}

package/src/runtime/task-graph.ts CHANGED Viewed

@@ -34,12 +34,21 @@ export interface ExecutionPlan {
  * - Each subsequent wave contains tasks whose dependencies are all in earlier waves.
  * - If all tasks have empty `dependsOn`, they all go into wave 0 (backward compatible).
  * - If a cycle is detected, `hasCycle` is true and `cycleNodes` lists the involved IDs.
+ *
+ * @throws Error if a task depends on itself (self-dependency).
  */
 export function buildExecutionPlan(tasks: TaskNode[]): ExecutionPlan {
 	if (tasks.length === 0) {
 		return { waves: [], hasCycle: false };
 	}
+	// HIGH-9: Detect self-dependency
+	for (const task of tasks) {
+		if (task.dependsOn.includes(task.id)) {
+			throw new Error(`Task "${task.id}" has self-dependency (depends on itself)`);
+		}
+	}
 	const idSet = new Set<string>(tasks.map((t) => t.id));
 	const adjacency = new Map<string, Set<string>>();       // id -> ids that depend on it
 	const inDegree = new Map<string, number>();
@@ -108,7 +117,8 @@ export function buildExecutionPlan(tasks: TaskNode[]): ExecutionPlan {
  */
 function buildWave(tasks: TaskNode[], ids: string[], index: number): ExecutionWave {
 	const taskMap = new Map(tasks.map((t) => [t.id, t]));
-	const waveTasks = ids.map((id) => taskMap.get(id)!).filter(Boolean);
+	// MEDIUM-12: Filter out undefined values instead of using non-null assertion
+	const waveTasks = ids.map((id) => taskMap.get(id)).filter(Boolean) as TaskNode[];
 	let label: string | undefined;
 	if (waveTasks.length > 0 && waveTasks.every((t) => t.phase !== undefined)) {