selftune 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +4 -2
  2. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/adapters/pi/hook.ts +273 -0
  7. package/cli/selftune/adapters/pi/install.ts +207 -0
  8. package/cli/selftune/constants.ts +10 -1
  9. package/cli/selftune/dashboard-contract.ts +14 -0
  10. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  11. package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
  12. package/cli/selftune/evolution/evidence.ts +2 -6
  13. package/cli/selftune/evolution/evolve-body.ts +73 -20
  14. package/cli/selftune/evolution/validate-body.ts +78 -42
  15. package/cli/selftune/evolution/validate-routing.ts +45 -104
  16. package/cli/selftune/hooks/skill-eval.ts +2 -1
  17. package/cli/selftune/hooks-shared/types.ts +1 -0
  18. package/cli/selftune/index.ts +23 -5
  19. package/cli/selftune/ingestors/pi-ingest.ts +726 -0
  20. package/cli/selftune/init.ts +11 -1
  21. package/cli/selftune/localdb/direct-write.ts +85 -0
  22. package/cli/selftune/localdb/materialize.ts +6 -7
  23. package/cli/selftune/localdb/queries.ts +126 -0
  24. package/cli/selftune/localdb/schema.ts +38 -0
  25. package/cli/selftune/observability.ts +8 -1
  26. package/cli/selftune/orchestrate.ts +43 -0
  27. package/cli/selftune/registry/client.ts +74 -0
  28. package/cli/selftune/registry/history.ts +54 -0
  29. package/cli/selftune/registry/index.ts +90 -0
  30. package/cli/selftune/registry/install.ts +141 -0
  31. package/cli/selftune/registry/list.ts +44 -0
  32. package/cli/selftune/registry/push.ts +171 -0
  33. package/cli/selftune/registry/rollback.ts +49 -0
  34. package/cli/selftune/registry/status.ts +62 -0
  35. package/cli/selftune/registry/sync.ts +125 -0
  36. package/cli/selftune/repair/skill-usage.ts +4 -1
  37. package/cli/selftune/status.ts +31 -0
  38. package/cli/selftune/sync.ts +127 -23
  39. package/cli/selftune/types.ts +2 -1
  40. package/cli/selftune/utils/jsonl.ts +1 -30
  41. package/cli/selftune/utils/skill-discovery.ts +22 -0
  42. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  43. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  44. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  45. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  46. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  47. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
  48. package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
  49. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  50. package/package.json +1 -1
  51. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  52. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  53. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  54. package/packages/telemetry-contract/package.json +1 -1
  55. package/packages/telemetry-contract/src/index.ts +1 -0
  56. package/packages/telemetry-contract/src/schemas.ts +22 -4
  57. package/packages/telemetry-contract/src/types.ts +1 -12
  58. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  59. package/packages/ui/AGENTS.md +16 -0
  60. package/packages/ui/README.md +1 -1
  61. package/packages/ui/package.json +1 -1
  62. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  63. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  64. package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
  65. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  66. package/packages/ui/src/components/InfoTip.tsx +1 -2
  67. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  68. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  69. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  70. package/packages/ui/src/components/OverviewPanels.tsx +652 -0
  71. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  72. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  73. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  74. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  75. package/packages/ui/src/components/index.ts +56 -1
  76. package/packages/ui/src/components/section-cards.tsx +18 -35
  77. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  78. package/packages/ui/src/lib/constants.tsx +0 -1
  79. package/packages/ui/src/primitives/card.tsx +1 -1
  80. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  81. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  82. package/packages/ui/src/primitives/select.tsx +2 -2
  83. package/packages/ui/src/types.ts +172 -4
  84. package/skill/SKILL.md +18 -4
  85. package/skill/Workflows/Ingest.md +60 -2
  86. package/skill/Workflows/Initialize.md +8 -5
  87. package/skill/Workflows/PlatformHooks.md +19 -3
  88. package/skill/Workflows/Registry.md +99 -0
  89. package/skill/Workflows/Sync.md +3 -1
  90. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  91. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  92. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  93. package/cli/selftune/utils/html.ts +0 -27
  94. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
@@ -3,6 +3,9 @@
3
3
  *
4
4
  * Validates a routing table evolution proposal by checking structural validity
5
5
  * and running trigger accuracy checks against an eval set.
6
+ *
7
+ * Delegates replay-based and judge-based validation to dedicated engines
8
+ * (engines/replay-engine.ts and engines/judge-engine.ts).
6
9
  */
7
10
 
8
11
  import type {
@@ -10,28 +13,20 @@ import type {
10
13
  BodyValidationResult,
11
14
  EvalEntry,
12
15
  RoutingReplayEntryResult,
13
- RoutingReplayFixture,
14
16
  ValidationMode,
15
17
  } from "../types.js";
16
- import { callLlm } from "../utils/llm-call.js";
17
- import { buildTriggerCheckPrompt, parseTriggerResponse } from "../utils/trigger-check.js";
18
- import { runHostReplayFixture } from "./validate-host-replay.js";
19
-
20
- export interface RoutingReplayRunnerInput {
21
- routing: string;
22
- evalSet: EvalEntry[];
23
- agent: string;
24
- fixture: RoutingReplayFixture;
25
- }
26
-
27
- export type RoutingReplayRunner = (
28
- input: RoutingReplayRunnerInput,
29
- ) => Promise<RoutingReplayEntryResult[]>;
30
-
31
- export interface RoutingValidationOptions {
32
- replayFixture?: RoutingReplayFixture;
33
- replayRunner?: RoutingReplayRunner;
34
- }
18
+ import { runJudgeValidation } from "./engines/judge-engine.js";
19
+ import {
20
+ runReplayValidation,
21
+ type ReplayRunner,
22
+ type ReplayRunnerInput,
23
+ type ReplayValidationOptions,
24
+ } from "./engines/replay-engine.js";
25
+
26
+ // Re-export engine types for backward compatibility
27
+ export type { ReplayRunnerInput as RoutingReplayRunnerInput };
28
+ export type { ReplayRunner as RoutingReplayRunner };
29
+ export type { ReplayValidationOptions as RoutingValidationOptions };
35
30
 
36
31
  export interface RoutingTriggerAccuracyResult {
37
32
  before_pass_rate: number;
@@ -41,6 +36,7 @@ export interface RoutingTriggerAccuracyResult {
41
36
  validation_agent: string;
42
37
  validation_fixture_id?: string;
43
38
  per_entry_results?: RoutingReplayEntryResult[];
39
+ before_entry_results?: RoutingReplayEntryResult[];
44
40
  }
45
41
 
46
42
  // ---------------------------------------------------------------------------
@@ -104,6 +100,9 @@ export function validateRoutingStructure(routing: string): { valid: boolean; rea
104
100
  /**
105
101
  * Run before/after trigger checks on the eval set using the routing content.
106
102
  * Returns pass rates for comparison.
103
+ *
104
+ * Prefers replay-backed validation when a fixture is available,
105
+ * falls back to LLM judge otherwise.
107
106
  */
108
107
  export async function validateRoutingTriggerAccuracy(
109
108
  originalRouting: string,
@@ -111,7 +110,7 @@ export async function validateRoutingTriggerAccuracy(
111
110
  evalSet: EvalEntry[],
112
111
  agent: string,
113
112
  modelFlag?: string,
114
- options: RoutingValidationOptions = {},
113
+ options: ReplayValidationOptions = {},
115
114
  ): Promise<RoutingTriggerAccuracyResult> {
116
115
  if (evalSet.length === 0) {
117
116
  return {
@@ -123,93 +122,34 @@ export async function validateRoutingTriggerAccuracy(
123
122
  };
124
123
  }
125
124
 
126
- if (options.replayFixture && options.replayRunner) {
127
- const beforeResults = await options.replayRunner({
128
- routing: originalRouting,
129
- evalSet,
130
- agent,
131
- fixture: options.replayFixture,
132
- });
133
- const afterResults = await options.replayRunner({
134
- routing: proposedRouting,
135
- evalSet,
136
- agent,
137
- fixture: options.replayFixture,
138
- });
139
- const beforePassed = beforeResults.filter((result) => result.passed).length;
140
- const afterPassed = afterResults.filter((result) => result.passed).length;
141
- const total = evalSet.length;
142
-
143
- return {
144
- before_pass_rate: beforePassed / total,
145
- after_pass_rate: afterPassed / total,
146
- improved: afterPassed > beforePassed,
147
- validation_mode: "host_replay",
148
- validation_agent: agent,
149
- validation_fixture_id: options.replayFixture.fixture_id,
150
- per_entry_results: afterResults,
151
- };
152
- }
153
-
154
- if (options.replayFixture) {
155
- const beforeResults = runHostReplayFixture({
156
- routing: originalRouting,
157
- evalSet,
158
- fixture: options.replayFixture,
159
- });
160
- const afterResults = runHostReplayFixture({
161
- routing: proposedRouting,
162
- evalSet,
163
- fixture: options.replayFixture,
164
- });
165
- const beforePassed = beforeResults.filter((result) => result.passed).length;
166
- const afterPassed = afterResults.filter((result) => result.passed).length;
167
- const total = evalSet.length;
168
-
169
- return {
170
- before_pass_rate: beforePassed / total,
171
- after_pass_rate: afterPassed / total,
172
- improved: afterPassed > beforePassed,
173
- validation_mode: "host_replay",
174
- validation_agent: agent,
175
- validation_fixture_id: options.replayFixture.fixture_id,
176
- per_entry_results: afterResults,
177
- };
178
- }
179
-
180
- const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
181
- let beforePassed = 0;
182
- let afterPassed = 0;
183
-
184
- for (const entry of evalSet) {
185
- // Check with original routing
186
- const beforePrompt = buildTriggerCheckPrompt(originalRouting, entry.query);
187
- const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
188
- const beforeTriggered = parseTriggerResponse(beforeRaw);
189
- const beforePass =
190
- (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
191
-
192
- // Check with proposed routing
193
- const afterPrompt = buildTriggerCheckPrompt(proposedRouting, entry.query);
194
- const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
195
- const afterTriggered = parseTriggerResponse(afterRaw);
196
- const afterPass =
197
- (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
125
+ // Try replay-backed validation first
126
+ const replayResult = await runReplayValidation(
127
+ originalRouting,
128
+ proposedRouting,
129
+ evalSet,
130
+ agent,
131
+ options,
132
+ );
198
133
 
199
- if (beforePass) beforePassed++;
200
- if (afterPass) afterPassed++;
134
+ if (replayResult) {
135
+ return replayResult;
201
136
  }
202
137
 
203
- const total = evalSet.length;
204
- const beforePassRate = beforePassed / total;
205
- const afterPassRate = afterPassed / total;
138
+ // Fall back to LLM judge
139
+ const judgeResult = await runJudgeValidation(
140
+ originalRouting,
141
+ proposedRouting,
142
+ evalSet,
143
+ agent,
144
+ modelFlag,
145
+ );
206
146
 
207
147
  return {
208
- before_pass_rate: beforePassRate,
209
- after_pass_rate: afterPassRate,
210
- improved: afterPassRate > beforePassRate,
211
- validation_mode: "llm_judge",
212
- validation_agent: agent,
148
+ before_pass_rate: judgeResult.before_pass_rate,
149
+ after_pass_rate: judgeResult.after_pass_rate,
150
+ improved: judgeResult.improved,
151
+ validation_mode: judgeResult.validation_mode,
152
+ validation_agent: judgeResult.validation_agent,
213
153
  };
214
154
  }
215
155
 
@@ -223,7 +163,7 @@ export async function validateRoutingProposal(
223
163
  evalSet: EvalEntry[],
224
164
  agent: string,
225
165
  modelFlag?: string,
226
- options: RoutingValidationOptions = {},
166
+ options: ReplayValidationOptions = {},
227
167
  ): Promise<BodyValidationResult> {
228
168
  const gateResults: Array<{ gate: string; passed: boolean; reason: string }> = [];
229
169
 
@@ -280,5 +220,6 @@ export async function validateRoutingProposal(
280
220
  before_pass_rate: accuracy.before_pass_rate,
281
221
  after_pass_rate: accuracy.after_pass_rate,
282
222
  per_entry_results: accuracy.per_entry_results,
223
+ before_entry_results: accuracy.before_entry_results,
283
224
  };
284
225
  }
@@ -25,7 +25,7 @@ import {
25
25
  getLatestPromptIdentity,
26
26
  } from "../normalization.js";
27
27
  import type { PostToolUsePayload, SkillUsageRecord } from "../types.js";
28
- import { classifySkillPath } from "../utils/skill-discovery.js";
28
+ import { classifySkillPath, isTestFixturePath } from "../utils/skill-discovery.js";
29
29
  import { getLastUserMessage } from "../utils/transcript.js";
30
30
 
31
31
  /**
@@ -122,6 +122,7 @@ export async function processToolUse(
122
122
  const skillName = extractSkillName(filePath);
123
123
 
124
124
  if (skillName === null) return null;
125
+ if (isTestFixturePath(filePath)) return null;
125
126
 
126
127
  const transcriptPath = payload.transcript_path ?? "";
127
128
  const sessionId = payload.session_id ?? "unknown";
@@ -83,6 +83,7 @@ export const PLATFORM_EVENT_MAP: Record<HookPlatform, Partial<Record<HookEventTy
83
83
  session_end: "TaskComplete",
84
84
  },
85
85
  pi: {
86
+ prompt_submit: "message",
86
87
  pre_tool_use: "tool_call",
87
88
  post_tool_use: "tool_result",
88
89
  session_end: "session_shutdown",
@@ -3,7 +3,7 @@
3
3
  * selftune CLI entry point.
4
4
  *
5
5
  * Usage:
6
- * selftune ingest <agent> — Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
6
+ * selftune ingest <agent> — Ingest agent sessions (claude, codex, opencode, openclaw, pi, wrap-codex)
7
7
  * selftune grade [mode] — Grade skill sessions (auto, baseline)
8
8
  * selftune evolve [target] — Evolve skill descriptions (body, rollback)
9
9
  * selftune eval <action> — Evaluation tools (generate, unit-test, import, composability, family-overlap)
@@ -28,11 +28,13 @@
28
28
  * selftune export-canonical — Export canonical telemetry for downstream ingestion
29
29
  * selftune recover — Recover SQLite from legacy/exported JSONL
30
30
  * selftune telemetry — Manage anonymous usage analytics (status, enable, disable)
31
+ * selftune registry <sub> — Team skill distribution (push, install, sync, status, rollback, history, list)
31
32
  * selftune alpha <subcommand> — Alpha program management (upload)
32
33
  * selftune hook <name> — Run a hook by name (prompt-log, session-stop, etc.)
33
34
  * selftune codex <subcommand> — Codex platform hooks (hook, install)
34
35
  * selftune opencode <sub> — OpenCode platform hooks (hook, install)
35
36
  * selftune cline <subcommand> — Cline platform hooks (hook, install)
37
+ * selftune pi <subcommand> — Pi platform hooks (hook, install)
36
38
  */
37
39
 
38
40
  import { CLIError, handleCLIError } from "./utils/cli-error.js";
@@ -49,7 +51,7 @@ Usage:
49
51
  selftune <command> [options]
50
52
 
51
53
  Commands:
52
- ingest <agent> Ingest agent sessions (claude, codex, opencode, openclaw, wrap-codex)
54
+ ingest <agent> Ingest agent sessions (claude, codex, opencode, openclaw, pi, wrap-codex)
53
55
  grade [mode] Grade skill sessions (auto, baseline)
54
56
  evolve [target] Evolve skill descriptions (body, rollback)
55
57
  eval <action> Evaluation tools (generate, unit-test, import, composability, family-overlap)
@@ -73,19 +75,21 @@ Commands:
73
75
  export Export SQLite data to JSONL snapshots
74
76
  export-canonical Export canonical telemetry for downstream ingestion
75
77
  recover Recover SQLite from legacy/exported JSONL
78
+ registry <sub> Team skill distribution (push, install, sync, status, rollback, history, list)
76
79
  alpha <subcommand> Alpha program management (upload)
77
80
  telemetry Manage anonymous usage analytics (status, enable, disable)
78
81
  hook <name> Run a hook by name (prompt-log, session-stop, etc.)
79
82
  codex <sub> Codex platform hooks (hook, install)
80
83
  opencode <sub> OpenCode platform hooks (hook, install)
81
84
  cline <sub> Cline platform hooks (hook, install)
85
+ pi <sub> Pi platform hooks (hook, install)
82
86
 
83
87
  Run 'selftune <command> --help' for command-specific options.`);
84
88
  process.exit(0);
85
89
  }
86
90
 
87
91
  // Fast-path commands (real-time hooks) — skip analytics and auto-update to minimize latency
88
- const FAST_COMMANDS: ReadonlySet<string> = new Set(["hook", "codex", "opencode", "cline"]);
92
+ const FAST_COMMANDS: ReadonlySet<string> = new Set(["hook", "codex", "opencode", "cline", "pi"]);
89
93
 
90
94
  // Track command usage (lazy import — skip for hooks and --help to avoid loading crypto/os)
91
95
  if (command && !FAST_COMMANDS.has(command) && command !== "--help" && command !== "-h") {
@@ -129,6 +133,7 @@ Agents:
129
133
  codex Ingest Codex rollout logs (experimental)
130
134
  opencode Ingest OpenCode sessions (experimental)
131
135
  openclaw Ingest OpenClaw sessions (experimental)
136
+ pi Ingest Pi sessions (experimental)
132
137
  wrap-codex Wrap codex exec with real-time telemetry (experimental)
133
138
 
134
139
  Run 'selftune ingest <agent> --help' for agent-specific options.`);
@@ -157,6 +162,11 @@ Run 'selftune ingest <agent> --help' for agent-specific options.`);
157
162
  cliMain();
158
163
  break;
159
164
  }
165
+ case "pi": {
166
+ const { cliMain } = await import("./ingestors/pi-ingest.js");
167
+ cliMain();
168
+ break;
169
+ }
160
170
  case "wrap-codex": {
161
171
  const { cliMain } = await import("./ingestors/codex-wrapper.js");
162
172
  await cliMain();
@@ -620,6 +630,11 @@ Options:
620
630
  await cliMain();
621
631
  break;
622
632
  }
633
+ case "registry": {
634
+ const { cliMain } = await import("./registry/index.js");
635
+ await cliMain();
636
+ break;
637
+ }
623
638
  case "alpha": {
624
639
  const sub = process.argv[2];
625
640
  if (!sub || sub === "--help" || sub === "-h") {
@@ -828,9 +843,12 @@ Output:
828
843
 
829
844
  case "codex":
830
845
  case "opencode":
831
- case "cline": {
846
+ case "cline":
847
+ case "pi": {
832
848
  const platform = command;
833
- const displayName = { codex: "Codex", opencode: "OpenCode", cline: "Cline" }[platform];
849
+ const displayName = { codex: "Codex", opencode: "OpenCode", cline: "Cline", pi: "Pi" }[
850
+ platform
851
+ ];
834
852
  const sub = process.argv[2];
835
853
  if (!sub || sub === "--help" || sub === "-h") {
836
854
  console.log(`selftune ${platform} — ${displayName} platform hooks