npm - selftune - Versions diffs - 0.2.22 → 0.2.23 - Mend

selftune 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/README.md +4 -2
package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/adapters/pi/hook.ts +273 -0
package/cli/selftune/adapters/pi/install.ts +207 -0
package/cli/selftune/constants.ts +10 -1
package/cli/selftune/dashboard-contract.ts +14 -0
package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
package/cli/selftune/evolution/evidence.ts +2 -6
package/cli/selftune/evolution/evolve-body.ts +73 -20
package/cli/selftune/evolution/validate-body.ts +78 -42
package/cli/selftune/evolution/validate-routing.ts +45 -104
package/cli/selftune/hooks/skill-eval.ts +2 -1
package/cli/selftune/hooks-shared/types.ts +1 -0
package/cli/selftune/index.ts +23 -5
package/cli/selftune/ingestors/pi-ingest.ts +726 -0
package/cli/selftune/init.ts +11 -1
package/cli/selftune/localdb/direct-write.ts +85 -0
package/cli/selftune/localdb/materialize.ts +6 -7
package/cli/selftune/localdb/queries.ts +126 -0
package/cli/selftune/localdb/schema.ts +38 -0
package/cli/selftune/observability.ts +8 -1
package/cli/selftune/orchestrate.ts +43 -0
package/cli/selftune/registry/client.ts +74 -0
package/cli/selftune/registry/history.ts +54 -0
package/cli/selftune/registry/index.ts +90 -0
package/cli/selftune/registry/install.ts +141 -0
package/cli/selftune/registry/list.ts +44 -0
package/cli/selftune/registry/push.ts +171 -0
package/cli/selftune/registry/rollback.ts +49 -0
package/cli/selftune/registry/status.ts +62 -0
package/cli/selftune/registry/sync.ts +125 -0
package/cli/selftune/repair/skill-usage.ts +4 -1
package/cli/selftune/status.ts +31 -0
package/cli/selftune/sync.ts +127 -23
package/cli/selftune/types.ts +2 -1
package/cli/selftune/utils/jsonl.ts +1 -30
package/cli/selftune/utils/skill-discovery.ts +22 -0
package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/node_modules/@selftune/telemetry-contract/package.json +1 -1
package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
package/package.json +1 -1
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/index.ts +1 -0
package/packages/telemetry-contract/src/schemas.ts +22 -4
package/packages/telemetry-contract/src/types.ts +1 -12
package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
package/packages/ui/AGENTS.md +16 -0
package/packages/ui/README.md +1 -1
package/packages/ui/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
package/packages/ui/src/components/InfoTip.tsx +1 -2
package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
package/packages/ui/src/components/OverviewPanels.tsx +652 -0
package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
package/packages/ui/src/components/index.ts +56 -1
package/packages/ui/src/components/section-cards.tsx +18 -35
package/packages/ui/src/components/skill-health-grid.tsx +47 -37
package/packages/ui/src/lib/constants.tsx +0 -1
package/packages/ui/src/primitives/card.tsx +1 -1
package/packages/ui/src/primitives/checkbox.tsx +1 -1
package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
package/packages/ui/src/primitives/select.tsx +2 -2
package/packages/ui/src/types.ts +172 -4
package/skill/SKILL.md +18 -4
package/skill/Workflows/Ingest.md +60 -2
package/skill/Workflows/Initialize.md +8 -5
package/skill/Workflows/PlatformHooks.md +19 -3
package/skill/Workflows/Registry.md +99 -0
package/skill/Workflows/Sync.md +3 -1
package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
package/cli/selftune/utils/html.ts +0 -27
package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117

package/cli/selftune/evolution/validate-routing.ts CHANGED Viewed

@@ -3,6 +3,9 @@
  *
  * Validates a routing table evolution proposal by checking structural validity
  * and running trigger accuracy checks against an eval set.
+ *
+ * Delegates replay-based and judge-based validation to dedicated engines
+ * (engines/replay-engine.ts and engines/judge-engine.ts).
  */
 import type {
@@ -10,28 +13,20 @@ import type {
   BodyValidationResult,
   EvalEntry,
   RoutingReplayEntryResult,
-  RoutingReplayFixture,
   ValidationMode,
 } from "../types.js";
-import { callLlm } from "../utils/llm-call.js";
-import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
-import { runHostReplayFixture } from "./validate-host-replay.js";
-export interface RoutingReplayRunnerInput {
-  routing: string;
-  evalSet: EvalEntry[];
-  agent: string;
-  fixture: RoutingReplayFixture;
-}
-export type RoutingReplayRunner = (
-  input: RoutingReplayRunnerInput,
-) => Promise<RoutingReplayEntryResult[]>;
-export interface RoutingValidationOptions {
-  replayFixture?: RoutingReplayFixture;
-  replayRunner?: RoutingReplayRunner;
-}
+import { runJudgeValidation } from "./engines/judge-engine.js";
+import {
+  runReplayValidation,
+  type ReplayRunner,
+  type ReplayRunnerInput,
+  type ReplayValidationOptions,
+} from "./engines/replay-engine.js";
+// Re-export engine types for backward compatibility
+export type { ReplayRunnerInput as RoutingReplayRunnerInput };
+export type { ReplayRunner as RoutingReplayRunner };
+export type { ReplayValidationOptions as RoutingValidationOptions };
 export interface RoutingTriggerAccuracyResult {
   before_pass_rate: number;
@@ -41,6 +36,7 @@ export interface RoutingTriggerAccuracyResult {
   validation_agent: string;
   validation_fixture_id?: string;
   per_entry_results?: RoutingReplayEntryResult[];
+  before_entry_results?: RoutingReplayEntryResult[];
 }
 // ---------------------------------------------------------------------------
@@ -104,6 +100,9 @@ export function validateRoutingStructure(routing: string): { valid: boolean; rea
 /**
  * Run before/after trigger checks on the eval set using the routing content.
  * Returns pass rates for comparison.
+ *
+ * Prefers replay-backed validation when a fixture is available,
+ * falls back to LLM judge otherwise.
  */
 export async function validateRoutingTriggerAccuracy(
   originalRouting: string,
@@ -111,7 +110,7 @@ export async function validateRoutingTriggerAccuracy(
   evalSet: EvalEntry[],
   agent: string,
   modelFlag?: string,
-  options: RoutingValidationOptions = {},
+  options: ReplayValidationOptions = {},
 ): Promise<RoutingTriggerAccuracyResult> {
   if (evalSet.length === 0) {
     return {
@@ -123,93 +122,34 @@ export async function validateRoutingTriggerAccuracy(
     };
   }
-  if (options.replayFixture && options.replayRunner) {
-    const beforeResults = await options.replayRunner({
-      routing: originalRouting,
-      evalSet,
-      agent,
-      fixture: options.replayFixture,
-    });
-    const afterResults = await options.replayRunner({
-      routing: proposedRouting,
-      evalSet,
-      agent,
-      fixture: options.replayFixture,
-    });
-    const beforePassed = beforeResults.filter((result) => result.passed).length;
-    const afterPassed = afterResults.filter((result) => result.passed).length;
-    const total = evalSet.length;
-    return {
-      before_pass_rate: beforePassed / total,
-      after_pass_rate: afterPassed / total,
-      improved: afterPassed > beforePassed,
-      validation_mode: "host_replay",
-      validation_agent: agent,
-      validation_fixture_id: options.replayFixture.fixture_id,
-      per_entry_results: afterResults,
-    };
-  }
-  if (options.replayFixture) {
-    const beforeResults = runHostReplayFixture({
-      routing: originalRouting,
-      evalSet,
-      fixture: options.replayFixture,
-    });
-    const afterResults = runHostReplayFixture({
-      routing: proposedRouting,
-      evalSet,
-      fixture: options.replayFixture,
-    });
-    const beforePassed = beforeResults.filter((result) => result.passed).length;
-    const afterPassed = afterResults.filter((result) => result.passed).length;
-    const total = evalSet.length;
-    return {
-      before_pass_rate: beforePassed / total,
-      after_pass_rate: afterPassed / total,
-      improved: afterPassed > beforePassed,
-      validation_mode: "host_replay",
-      validation_agent: agent,
-      validation_fixture_id: options.replayFixture.fixture_id,
-      per_entry_results: afterResults,
-    };
-  }
-  const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
-  let beforePassed = 0;
-  let afterPassed = 0;
-  for (const entry of evalSet) {
-    // Check with original routing
-    const beforePrompt = buildTriggerCheckPrompt(originalRouting, entry.query);
-    const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
-    const beforeTriggered = parseTriggerResponse(beforeRaw);
-    const beforePass =
-      (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
-    // Check with proposed routing
-    const afterPrompt = buildTriggerCheckPrompt(proposedRouting, entry.query);
-    const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
-    const afterTriggered = parseTriggerResponse(afterRaw);
-    const afterPass =
-      (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
+  // Try replay-backed validation first
+  const replayResult = await runReplayValidation(
+    originalRouting,
+    proposedRouting,
+    evalSet,
+    agent,
+    options,
+  );
-    if (beforePass) beforePassed++;
-    if (afterPass) afterPassed++;
+  if (replayResult) {
+    return replayResult;
   }
-  const total = evalSet.length;
-  const beforePassRate = beforePassed / total;
-  const afterPassRate = afterPassed / total;
+  // Fall back to LLM judge
+  const judgeResult = await runJudgeValidation(
+    originalRouting,
+    proposedRouting,
+    evalSet,
+    agent,
+    modelFlag,
+  );
   return {
-    before_pass_rate: beforePassRate,
-    after_pass_rate: afterPassRate,
-    improved: afterPassRate > beforePassRate,
-    validation_mode: "llm_judge",
-    validation_agent: agent,
+    before_pass_rate: judgeResult.before_pass_rate,
+    after_pass_rate: judgeResult.after_pass_rate,
+    improved: judgeResult.improved,
+    validation_mode: judgeResult.validation_mode,
+    validation_agent: judgeResult.validation_agent,
   };
 }
@@ -223,7 +163,7 @@ export async function validateRoutingProposal(
   evalSet: EvalEntry[],
   agent: string,
   modelFlag?: string,
-  options: RoutingValidationOptions = {},
+  options: ReplayValidationOptions = {},
 ): Promise<BodyValidationResult> {
   const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
@@ -280,5 +220,6 @@ export async function validateRoutingProposal(
     before_pass_rate: accuracy.before_pass_rate,
     after_pass_rate: accuracy.after_pass_rate,
     per_entry_results: accuracy.per_entry_results,
+    before_entry_results: accuracy.before_entry_results,
   };
 }

package/cli/selftune/hooks/skill-eval.ts CHANGED Viewed

@@ -25,7 +25,7 @@ import {
   getLatestPromptIdentity,
 } from "../normalization.js";
 import type { PostToolUsePayload, SkillUsageRecord } from "../types.js";
-import { classifySkillPath } from "../utils/skill-discovery.js";
+import { classifySkillPath, isTestFixturePath } from "../utils/skill-discovery.js";
 import { getLastUserMessage } from "../utils/transcript.js";
 /**
@@ -122,6 +122,7 @@ export async function processToolUse(
   const skillName = extractSkillName(filePath);
   if (skillName === null) return null;
+  if (isTestFixturePath(filePath)) return null;
   const transcriptPath = payload.transcript_path ?? "";
   const sessionId = payload.session_id ?? "unknown";

package/cli/selftune/hooks-shared/types.ts CHANGED Viewed

@@ -83,6 +83,7 @@ export const PLATFORM_EVENT_MAP: Record<HookPlatform, Partial<Record<HookEventTy
     session_end: "TaskComplete",
   },
   pi: {
+    prompt_submit: "message",
     pre_tool_use: "tool_call",
     post_tool_use: "tool_result",
     session_end: "session_shutdown",

package/cli/selftune/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * selftune CLI entry point.
  *
  * Usage:
- *   selftune ingest <agent>     — Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
+ *   selftune ingest <agent>     — Ingest agent sessions (claude, codex, opencode, openclaw, pi, wrap-codex)
  *   selftune grade [mode]       — Grade skill sessions (auto, baseline)
  *   selftune evolve [target]    — Evolve skill descriptions (body, rollback)
  *   selftune eval <action>      — Evaluation tools (generate, unit-test, import, composability, family-overlap)
@@ -28,11 +28,13 @@
  *   selftune export-canonical   — Export canonical telemetry for downstream ingestion
  *   selftune recover            — Recover SQLite from legacy/exported JSONL
  *   selftune telemetry          — Manage anonymous usage analytics (status, enable, disable)
+ *   selftune registry <sub>    — Team skill distribution (push, install, sync, status, rollback, history, list)
  *   selftune alpha <subcommand> — Alpha program management (upload)
  *   selftune hook <name>        — Run a hook by name (prompt-log, session-stop, etc.)
  *   selftune codex <subcommand> — Codex platform hooks (hook, install)
  *   selftune opencode <sub>     — OpenCode platform hooks (hook, install)
  *   selftune cline <subcommand> — Cline platform hooks (hook, install)
+ *   selftune pi <subcommand>    — Pi platform hooks (hook, install)
  */
 import { CLIError, handleCLIError } from "./utils/cli-error.js";
@@ -49,7 +51,7 @@ Usage:
   selftune <command> [options]
 Commands:
-  ingest <agent>     Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
+  ingest <agent>     Ingest agent sessions (claude, codex, opencode, openclaw, pi, wrap-codex)
   grade [mode]       Grade skill sessions (auto, baseline)
   evolve [target]    Evolve skill descriptions (body, rollback)
   eval <action>      Evaluation tools (generate, unit-test, import, composability, family-overlap)
@@ -73,19 +75,21 @@ Commands:
   export             Export SQLite data to JSONL snapshots
   export-canonical   Export canonical telemetry for downstream ingestion
   recover            Recover SQLite from legacy/exported JSONL
+  registry <sub>    Team skill distribution (push, install, sync, status, rollback, history, list)
   alpha <subcommand> Alpha program management (upload)
   telemetry          Manage anonymous usage analytics (status, enable, disable)
   hook <name>        Run a hook by name (prompt-log, session-stop, etc.)
   codex <sub>        Codex platform hooks (hook, install)
   opencode <sub>     OpenCode platform hooks (hook, install)
   cline <sub>        Cline platform hooks (hook, install)
+  pi <sub>           Pi platform hooks (hook, install)
 Run 'selftune <command> --help' for command-specific options.`);
   process.exit(0);
 }
 // Fast-path commands (real-time hooks) — skip analytics and auto-update to minimize latency
-const FAST_COMMANDS: ReadonlySet<string> = new Set(["hook", "codex", "opencode", "cline"]);
+const FAST_COMMANDS: ReadonlySet<string> = new Set(["hook", "codex", "opencode", "cline", "pi"]);
 // Track command usage (lazy import — skip for hooks and --help to avoid loading crypto/os)
 if (command && !FAST_COMMANDS.has(command) && command !== "--help" && command !== "-h") {
@@ -129,6 +133,7 @@ Agents:
   codex        Ingest Codex rollout logs (experimental)
   opencode     Ingest OpenCode sessions (experimental)
   openclaw     Ingest OpenClaw sessions (experimental)
+  pi           Ingest Pi sessions (experimental)
   wrap-codex   Wrap codex exec with real-time telemetry (experimental)
 Run 'selftune ingest <agent> --help' for agent-specific options.`);
@@ -157,6 +162,11 @@ Run 'selftune ingest <agent> --help' for agent-specific options.`);
         cliMain();
         break;
       }
+      case "pi": {
+        const { cliMain } = await import("./ingestors/pi-ingest.js");
+        cliMain();
+        break;
+      }
       case "wrap-codex": {
         const { cliMain } = await import("./ingestors/codex-wrapper.js");
         await cliMain();
@@ -620,6 +630,11 @@ Options:
     await cliMain();
     break;
   }
+  case "registry": {
+    const { cliMain } = await import("./registry/index.js");
+    await cliMain();
+    break;
+  }
   case "alpha": {
     const sub = process.argv[2];
     if (!sub || sub === "--help" || sub === "-h") {
@@ -828,9 +843,12 @@ Output:
   case "codex":
   case "opencode":
-  case "cline": {
+  case "cline":
+  case "pi": {
     const platform = command;
-    const displayName = { codex: "Codex", opencode: "OpenCode", cline: "Cline" }[platform];
+    const displayName = { codex: "Codex", opencode: "OpenCode", cline: "Cline", pi: "Pi" }[
+      platform
+    ];
     const sub = process.argv[2];
     if (!sub || sub === "--help" || sub === "-h") {
       console.log(`selftune ${platform} — ${displayName} platform hooks