selftune 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +4 -2
  2. package/apps/local-dashboard/dist/assets/index-CwOtTrUS.css +1 -0
  3. package/apps/local-dashboard/dist/assets/index-f1HQpbeH.js +59 -0
  4. package/apps/local-dashboard/dist/assets/vendor-ui-jVSaIZey.js +12 -0
  5. package/apps/local-dashboard/dist/index.html +3 -3
  6. package/cli/selftune/adapters/pi/hook.ts +273 -0
  7. package/cli/selftune/adapters/pi/install.ts +207 -0
  8. package/cli/selftune/constants.ts +10 -1
  9. package/cli/selftune/dashboard-contract.ts +14 -0
  10. package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
  11. package/cli/selftune/evolution/engines/replay-engine.ts +158 -0
  12. package/cli/selftune/evolution/evidence.ts +2 -6
  13. package/cli/selftune/evolution/evolve-body.ts +73 -20
  14. package/cli/selftune/evolution/validate-body.ts +78 -42
  15. package/cli/selftune/evolution/validate-routing.ts +45 -104
  16. package/cli/selftune/hooks/skill-eval.ts +2 -1
  17. package/cli/selftune/hooks-shared/types.ts +1 -0
  18. package/cli/selftune/index.ts +23 -5
  19. package/cli/selftune/ingestors/pi-ingest.ts +726 -0
  20. package/cli/selftune/init.ts +11 -1
  21. package/cli/selftune/localdb/direct-write.ts +85 -0
  22. package/cli/selftune/localdb/materialize.ts +6 -7
  23. package/cli/selftune/localdb/queries.ts +126 -0
  24. package/cli/selftune/localdb/schema.ts +38 -0
  25. package/cli/selftune/observability.ts +8 -1
  26. package/cli/selftune/orchestrate.ts +43 -0
  27. package/cli/selftune/registry/client.ts +74 -0
  28. package/cli/selftune/registry/history.ts +54 -0
  29. package/cli/selftune/registry/index.ts +90 -0
  30. package/cli/selftune/registry/install.ts +141 -0
  31. package/cli/selftune/registry/list.ts +44 -0
  32. package/cli/selftune/registry/push.ts +171 -0
  33. package/cli/selftune/registry/rollback.ts +49 -0
  34. package/cli/selftune/registry/status.ts +62 -0
  35. package/cli/selftune/registry/sync.ts +125 -0
  36. package/cli/selftune/repair/skill-usage.ts +4 -1
  37. package/cli/selftune/status.ts +31 -0
  38. package/cli/selftune/sync.ts +127 -23
  39. package/cli/selftune/types.ts +2 -1
  40. package/cli/selftune/utils/jsonl.ts +1 -30
  41. package/cli/selftune/utils/skill-discovery.ts +22 -0
  42. package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  43. package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
  44. package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  45. package/node_modules/@selftune/telemetry-contract/package.json +1 -1
  46. package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
  47. package/node_modules/@selftune/telemetry-contract/src/schemas.ts +22 -4
  48. package/node_modules/@selftune/telemetry-contract/src/types.ts +1 -12
  49. package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
  50. package/package.json +1 -1
  51. package/packages/telemetry-contract/fixtures/evidence-only-push.ts +1 -1
  52. package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
  53. package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +1 -1
  54. package/packages/telemetry-contract/package.json +1 -1
  55. package/packages/telemetry-contract/src/index.ts +1 -0
  56. package/packages/telemetry-contract/src/schemas.ts +22 -4
  57. package/packages/telemetry-contract/src/types.ts +1 -12
  58. package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
  59. package/packages/ui/AGENTS.md +16 -0
  60. package/packages/ui/README.md +1 -1
  61. package/packages/ui/package.json +1 -1
  62. package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
  63. package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
  64. package/packages/ui/src/components/EvidenceViewer.tsx +153 -443
  65. package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
  66. package/packages/ui/src/components/InfoTip.tsx +1 -2
  67. package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
  68. package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
  69. package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
  70. package/packages/ui/src/components/OverviewPanels.tsx +652 -0
  71. package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
  72. package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
  73. package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
  74. package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
  75. package/packages/ui/src/components/index.ts +56 -1
  76. package/packages/ui/src/components/section-cards.tsx +18 -35
  77. package/packages/ui/src/components/skill-health-grid.tsx +47 -37
  78. package/packages/ui/src/lib/constants.tsx +0 -1
  79. package/packages/ui/src/primitives/card.tsx +1 -1
  80. package/packages/ui/src/primitives/checkbox.tsx +1 -1
  81. package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
  82. package/packages/ui/src/primitives/select.tsx +2 -2
  83. package/packages/ui/src/types.ts +172 -4
  84. package/skill/SKILL.md +18 -4
  85. package/skill/Workflows/Ingest.md +60 -2
  86. package/skill/Workflows/Initialize.md +8 -5
  87. package/skill/Workflows/PlatformHooks.md +19 -3
  88. package/skill/Workflows/Registry.md +99 -0
  89. package/skill/Workflows/Sync.md +3 -1
  90. package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
  91. package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
  92. package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
  93. package/cli/selftune/utils/html.ts +0 -27
  94. package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
@@ -0,0 +1,273 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Pi hook adapter for selftune.
4
+ *
5
+ * Reads Pi hook payloads from stdin and delegates to shared selftune hook logic.
6
+ * Pi extensions emit events for tool calls, tool results, and session lifecycle.
7
+ *
8
+ * Usage: echo '$HOOK_PAYLOAD' | selftune pi hook
9
+ *
10
+ * Event routing:
11
+ * tool_call -> skill-change-guard + evolution-guard (PreToolUse)
12
+ * tool_result -> skill-eval (processToolUse) + commit-track (processCommitTrack)
13
+ * message (user) -> prompt-log (processPrompt) + auto-activate
14
+ * session_shutdown -> session-stop (processSessionStop)
15
+ *
16
+ * Fail-open: any unhandled error -> exit 0, never crash the host agent.
17
+ */
18
+
19
+ import type {
20
+ PostToolUsePayload,
21
+ PreToolUsePayload,
22
+ PromptSubmitPayload,
23
+ StopPayload,
24
+ } from "../../types.js";
25
+
26
+ // ---------------------------------------------------------------------------
27
+ // Types
28
+ // ---------------------------------------------------------------------------
29
+
30
+ /** Pi hook payload — superset of all event fields. */
31
+ export interface PiHookPayload {
32
+ event_type?: string;
33
+ session_id?: string;
34
+ cwd?: string;
35
+ tool_name?: string;
36
+ tool_input?: Record<string, unknown>;
37
+ tool_use_id?: string;
38
+ tool_output?: Record<string, unknown>;
39
+ prompt?: string;
40
+ user_prompt?: string;
41
+ model?: string;
42
+ provider?: string;
43
+ last_assistant_message?: string;
44
+ [key: string]: unknown;
45
+ }
46
+
47
+ /** Response written to stdout. Empty object = no-op. */
48
+ type HookResponse = Record<string, unknown>;
49
+
50
+ const EMPTY_RESPONSE: HookResponse = {};
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Event handlers (dynamic imports for fast startup)
54
+ // ---------------------------------------------------------------------------
55
+
56
+ async function handlePromptSubmit(payload: PiHookPayload): Promise<HookResponse> {
57
+ // 1. Prompt logging
58
+ try {
59
+ const { processPrompt } = await import("../../hooks/prompt-log.js");
60
+ const promptPayload: PromptSubmitPayload = {
61
+ session_id: payload.session_id,
62
+ cwd: payload.cwd,
63
+ prompt: payload.prompt ?? payload.user_prompt,
64
+ user_prompt: payload.user_prompt ?? payload.prompt,
65
+ hook_event_name: "UserPromptSubmit",
66
+ };
67
+ await processPrompt(promptPayload);
68
+ } catch {
69
+ // fail-open
70
+ }
71
+
72
+ // 2. Auto-activate suggestions
73
+ let response: HookResponse = EMPTY_RESPONSE;
74
+ try {
75
+ const { processAutoActivate } = await import("../../hooks/auto-activate.js");
76
+ const sessionId = payload.session_id ?? "unknown";
77
+ const suggestions = await processAutoActivate(sessionId);
78
+ if (suggestions.length > 0) {
79
+ const context = suggestions.map((s) => `[selftune] Suggestion: ${s}`).join("\n");
80
+ response = { additionalContext: context };
81
+ }
82
+ } catch {
83
+ // fail-open
84
+ }
85
+
86
+ return response;
87
+ }
88
+
89
+ async function handlePreToolUse(
90
+ payload: PiHookPayload,
91
+ ): Promise<{ response: HookResponse; exitCode: number }> {
92
+ const prePayload: PreToolUsePayload = {
93
+ tool_name: payload.tool_name ?? "",
94
+ tool_input: payload.tool_input ?? {},
95
+ tool_use_id: payload.tool_use_id,
96
+ session_id: payload.session_id,
97
+ cwd: payload.cwd,
98
+ hook_event_name: "PreToolUse",
99
+ };
100
+
101
+ let constants:
102
+ | { EVOLUTION_AUDIT_LOG: string; SELFTUNE_CONFIG_DIR: string; SESSION_STATE_DIR: string }
103
+ | undefined;
104
+ try {
105
+ constants = await import("../../constants.js");
106
+ } catch {
107
+ // fail-open
108
+ }
109
+
110
+ // 1. Evolution guard (can block with exit 2)
111
+ try {
112
+ if (constants) {
113
+ const { processEvolutionGuard } = await import("../../hooks/evolution-guard.js");
114
+ const guardResult = await processEvolutionGuard(prePayload, {
115
+ auditLogPath: constants.EVOLUTION_AUDIT_LOG,
116
+ selftuneDir: constants.SELFTUNE_CONFIG_DIR,
117
+ });
118
+ if (guardResult) {
119
+ process.stderr.write(`${guardResult.message}\n`);
120
+ return { response: EMPTY_RESPONSE, exitCode: guardResult.exitCode };
121
+ }
122
+ }
123
+ } catch {
124
+ // fail-open
125
+ }
126
+
127
+ // 2. Skill change guard (advisory only, never blocks)
128
+ try {
129
+ if (constants) {
130
+ const { processPreToolUse } = await import("../../hooks/skill-change-guard.js");
131
+ const sessionId = payload.session_id ?? "unknown";
132
+ const safe = sessionId.replace(/[^a-zA-Z0-9_-]/g, "_");
133
+ const statePath = `${constants.SESSION_STATE_DIR}/guard-state-${safe}.json`;
134
+ const suggestion = processPreToolUse(prePayload, statePath);
135
+ if (suggestion) {
136
+ process.stderr.write(`[selftune] Suggestion: ${suggestion}\n`);
137
+ }
138
+ }
139
+ } catch {
140
+ // fail-open
141
+ }
142
+
143
+ return { response: EMPTY_RESPONSE, exitCode: 0 };
144
+ }
145
+
146
+ async function handlePostToolUse(payload: PiHookPayload): Promise<HookResponse> {
147
+ const postPayload: PostToolUsePayload = {
148
+ tool_name: payload.tool_name ?? "",
149
+ tool_input: payload.tool_input ?? {},
150
+ tool_use_id: payload.tool_use_id,
151
+ tool_response: payload.tool_output,
152
+ session_id: payload.session_id,
153
+ cwd: payload.cwd,
154
+ hook_event_name: "PostToolUse",
155
+ };
156
+
157
+ // 1. Skill eval (Read/Skill tool usage tracking)
158
+ try {
159
+ const { processToolUse } = await import("../../hooks/skill-eval.js");
160
+ await processToolUse(postPayload);
161
+ } catch {
162
+ // fail-open
163
+ }
164
+
165
+ // 2. Commit tracking (git commit detection in Bash output)
166
+ try {
167
+ const { processCommitTrack } = await import("../../hooks/commit-track.js");
168
+ await processCommitTrack(postPayload);
169
+ } catch {
170
+ // fail-open
171
+ }
172
+
173
+ return EMPTY_RESPONSE;
174
+ }
175
+
176
+ async function handleSessionEnd(payload: PiHookPayload): Promise<HookResponse> {
177
+ try {
178
+ const { processSessionStop } = await import("../../hooks/session-stop.js");
179
+ const stopPayload: StopPayload = {
180
+ session_id: payload.session_id,
181
+ cwd: payload.cwd,
182
+ last_assistant_message:
183
+ typeof payload.last_assistant_message === "string"
184
+ ? payload.last_assistant_message
185
+ : undefined,
186
+ hook_event_name: "Stop",
187
+ };
188
+ await processSessionStop(stopPayload);
189
+ } catch {
190
+ // fail-open
191
+ }
192
+ return EMPTY_RESPONSE;
193
+ }
194
+
195
+ // ---------------------------------------------------------------------------
196
+ // Main entry point
197
+ // ---------------------------------------------------------------------------
198
+
199
+ function writeResponseAndExit(response: HookResponse, code: number): void {
200
+ const data = JSON.stringify(response);
201
+ process.stdout.write(data, () => {
202
+ process.exit(code);
203
+ });
204
+ }
205
+
206
+ /**
207
+ * CLI entry point. Reads stdin, routes to the correct handler, writes response.
208
+ */
209
+ export async function cliMain(): Promise<void> {
210
+ let exitCode = 0;
211
+
212
+ try {
213
+ const raw = await Bun.stdin.text();
214
+
215
+ // Fast-path: empty stdin -> no-op
216
+ if (!raw.trim()) {
217
+ writeResponseAndExit(EMPTY_RESPONSE, 0);
218
+ return;
219
+ }
220
+
221
+ let payload: PiHookPayload;
222
+ try {
223
+ payload = JSON.parse(raw) as PiHookPayload;
224
+ } catch {
225
+ writeResponseAndExit(EMPTY_RESPONSE, 0);
226
+ return;
227
+ }
228
+
229
+ const eventType = typeof payload.event_type === "string" ? payload.event_type : "";
230
+
231
+ if (!eventType) {
232
+ writeResponseAndExit(EMPTY_RESPONSE, 0);
233
+ return;
234
+ }
235
+
236
+ let response: HookResponse = EMPTY_RESPONSE;
237
+
238
+ switch (eventType) {
239
+ case "message": {
240
+ response = await handlePromptSubmit(payload);
241
+ break;
242
+ }
243
+ case "tool_call": {
244
+ const result = await handlePreToolUse(payload);
245
+ response = result.response;
246
+ exitCode = result.exitCode;
247
+ break;
248
+ }
249
+ case "tool_result": {
250
+ response = await handlePostToolUse(payload);
251
+ break;
252
+ }
253
+ case "session_shutdown": {
254
+ response = await handleSessionEnd(payload);
255
+ break;
256
+ }
257
+ default: {
258
+ // Unknown event — no-op
259
+ break;
260
+ }
261
+ }
262
+
263
+ writeResponseAndExit(response, exitCode);
264
+ } catch {
265
+ // Fail-open: never crash
266
+ writeResponseAndExit(EMPTY_RESPONSE, 0);
267
+ }
268
+ }
269
+
270
+ // --- stdin main (only when executed directly, not when imported) ---
271
+ if (import.meta.main) {
272
+ await cliMain();
273
+ }
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Install selftune hooks into Pi coding agent environment.
4
+ *
5
+ * Pi supports extensions that hook into its lifecycle. This installer
6
+ * creates a selftune extension that pipes events to `selftune pi hook`.
7
+ *
8
+ * Extension location: ~/.pi/extensions/selftune/
9
+ *
10
+ * Events hooked:
11
+ * - tool_call (pre-tool — skill guards, inline)
12
+ * - tool_result (post-tool — skill eval + commit tracking, inline)
13
+ * - message (prompt submit — prompt logging + auto-activate, inline)
14
+ * - session_shutdown (session end — session telemetry, background)
15
+ *
16
+ * Usage: selftune pi install [--dry-run] [--uninstall]
17
+ */
18
+
19
+ import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
20
+ import { homedir } from "node:os";
21
+ import { join } from "node:path";
22
+
23
+ const PI_DIR = process.env.SELFTUNE_PI_DIR ?? join(homedir(), ".pi");
24
+ const PI_EXTENSIONS_DIR = join(PI_DIR, "extensions", "selftune");
25
+ const MARKER = "# selftune-managed";
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Hook script generators
29
+ // ---------------------------------------------------------------------------
30
+
31
+ /** Build a hook command that prefers SELFTUNE_CLI_PATH, then npx. */
32
+ const HOOK_CMD =
33
+ 'if [ -n "$SELFTUNE_CLI_PATH" ]; then "$SELFTUNE_CLI_PATH" pi hook; else npx selftune pi hook; fi';
34
+
35
+ function hookScript(eventType: string, inline: boolean): string {
36
+ if (inline) {
37
+ // Inline — fast path; finish before Pi moves on.
38
+ // Capture output and exit code separately to avoid double JSON and preserve guard blocks (exit 2).
39
+ return `#!/usr/bin/env bash
40
+ ${MARKER}
41
+ input=$(cat)
42
+ result=$(echo "$input" | (${HOOK_CMD}) 2>/dev/null)
43
+ rc=$?
44
+ [ -z "$result" ] && result='{}'
45
+ echo "$result"
46
+ exit $rc
47
+ `;
48
+ }
49
+
50
+ // Background — don't block Pi
51
+ return `#!/usr/bin/env bash
52
+ ${MARKER}
53
+ input=$(cat)
54
+ echo "$input" | (${HOOK_CMD}) &>/dev/null &
55
+ echo '{}'
56
+ `;
57
+ }
58
+
59
+ // ---------------------------------------------------------------------------
60
+ // Hook definitions
61
+ // ---------------------------------------------------------------------------
62
+
63
+ const HOOKS: Array<{ name: string; description: string; inline: boolean }> = [
64
+ { name: "tool_call", description: "Pre-tool guards (evolution, skill change)", inline: true },
65
+ { name: "tool_result", description: "Post-tool eval + commit tracking", inline: true },
66
+ { name: "message", description: "Prompt logging + auto-activate", inline: true },
67
+ { name: "session_shutdown", description: "Session telemetry recording", inline: false },
68
+ ];
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // Install
72
+ // ---------------------------------------------------------------------------
73
+
74
+ function installHooks(dryRun: boolean): void {
75
+ console.log("Setting up selftune hooks for Pi...");
76
+ console.log(`Extensions directory: ${PI_EXTENSIONS_DIR}`);
77
+ console.log("");
78
+
79
+ if (!dryRun) {
80
+ mkdirSync(PI_EXTENSIONS_DIR, { recursive: true });
81
+ }
82
+
83
+ let installed = 0;
84
+ let skipped = 0;
85
+
86
+ for (const hook of HOOKS) {
87
+ const hookPath = join(PI_EXTENSIONS_DIR, hook.name);
88
+
89
+ if (existsSync(hookPath)) {
90
+ const existing = readFileSync(hookPath, "utf-8");
91
+ if (existing.includes(MARKER)) {
92
+ if (dryRun) {
93
+ console.log(` Would update: ${hook.name}`);
94
+ } else {
95
+ writeFileSync(hookPath, hookScript(hook.name, hook.inline), { mode: 0o755 });
96
+ chmodSync(hookPath, 0o755);
97
+ console.log(` Updated: ${hook.name}`);
98
+ }
99
+ installed++;
100
+ } else {
101
+ console.log(` Skipped: ${hook.name} (existing hook not managed by selftune)`);
102
+ skipped++;
103
+ }
104
+ } else {
105
+ if (dryRun) {
106
+ console.log(` Would create: ${hook.name}`);
107
+ } else {
108
+ writeFileSync(hookPath, hookScript(hook.name, hook.inline), { mode: 0o755 });
109
+ console.log(` Created: ${hook.name}`);
110
+ }
111
+ installed++;
112
+ }
113
+ }
114
+
115
+ console.log("");
116
+ if (dryRun) {
117
+ console.log(`Dry run: ${installed} hook(s) would be installed.`);
118
+ } else if (installed > 0) {
119
+ console.log(`Installed ${installed} hook(s).`);
120
+ }
121
+ if (skipped > 0) {
122
+ console.log(`Skipped ${skipped} hook(s) with existing non-selftune content.`);
123
+ }
124
+ if (!dryRun && installed > 0) {
125
+ console.log("");
126
+ if (skipped === 0) {
127
+ console.log("Pi will now track commits and record session telemetry.");
128
+ } else {
129
+ console.log("Partial install: some hooks were skipped. Telemetry may be incomplete.");
130
+ }
131
+ console.log("Run `selftune status` to verify setup.");
132
+ }
133
+ }
134
+
135
+ // ---------------------------------------------------------------------------
136
+ // Uninstall
137
+ // ---------------------------------------------------------------------------
138
+
139
+ function uninstallHooks(dryRun: boolean): void {
140
+ console.log("Removing selftune hooks from Pi...");
141
+ console.log("");
142
+
143
+ let removed = 0;
144
+ let skipped = 0;
145
+
146
+ for (const hook of HOOKS) {
147
+ const hookPath = join(PI_EXTENSIONS_DIR, hook.name);
148
+
149
+ if (!existsSync(hookPath)) {
150
+ console.log(` Not found: ${hook.name}`);
151
+ continue;
152
+ }
153
+
154
+ const existing = readFileSync(hookPath, "utf-8");
155
+ if (!existing.includes(MARKER)) {
156
+ console.log(` Skipped: ${hook.name} (not managed by selftune)`);
157
+ skipped++;
158
+ continue;
159
+ }
160
+
161
+ if (dryRun) {
162
+ console.log(` Would remove: ${hook.name}`);
163
+ } else {
164
+ rmSync(hookPath);
165
+ console.log(` Removed: ${hook.name}`);
166
+ }
167
+ removed++;
168
+ }
169
+
170
+ console.log("");
171
+ if (dryRun) {
172
+ console.log(`Dry run: ${removed} hook(s) would be removed.`);
173
+ } else if (removed > 0) {
174
+ console.log(`Removed ${removed} hook(s).`);
175
+ }
176
+ if (skipped > 0) {
177
+ console.log(`Skipped ${skipped} hook(s) not managed by selftune.`);
178
+ }
179
+ }
180
+
181
+ // ---------------------------------------------------------------------------
182
+ // Main entry point
183
+ // ---------------------------------------------------------------------------
184
+
185
+ export async function cliMain(): Promise<void> {
186
+ const args = process.argv.slice(2);
187
+ const dryRun = args.includes("--dry-run");
188
+ const uninstall = args.includes("--uninstall");
189
+
190
+ if (uninstall) {
191
+ uninstallHooks(dryRun);
192
+ } else {
193
+ installHooks(dryRun);
194
+ }
195
+ }
196
+
197
+ // --- stdin main (only when executed directly, not when imported) ---
198
+ if (import.meta.main) {
199
+ try {
200
+ await cliMain();
201
+ } catch (err) {
202
+ console.error(
203
+ `[selftune] Pi install failed: ${err instanceof Error ? err.message : String(err)}`,
204
+ );
205
+ process.exit(1);
206
+ }
207
+ }
@@ -13,6 +13,8 @@ const claudeHomeDir =
13
13
  const openclawHomeDir =
14
14
  process.env.SELFTUNE_OPENCLAW_DIR ??
15
15
  (resolvedHome ? join(defaultHome, ".openclaw") : join(homedir(), ".openclaw"));
16
+ const piHomeDir =
17
+ process.env.SELFTUNE_PI_DIR ?? (resolvedHome ? join(defaultHome, ".pi") : join(homedir(), ".pi"));
16
18
 
17
19
  export const SELFTUNE_CONFIG_DIR =
18
20
  (process.env.SELFTUNE_CONFIG_DIR || undefined) ??
@@ -100,7 +102,7 @@ export const REQUIRED_FIELDS: Record<string, Set<string>> = {
100
102
  };
101
103
 
102
104
  /** Agent CLI candidates in detection order. */
103
- export const AGENT_CANDIDATES = ["claude", "codex", "opencode", "openclaw"] as const;
105
+ export const AGENT_CANDIDATES = ["claude", "codex", "opencode", "openclaw", "pi"] as const;
104
106
 
105
107
  /** Required Claude Code hook keys in settings.json. */
106
108
  export const CLAUDE_CODE_HOOK_KEYS = [
@@ -158,6 +160,13 @@ export const OPENCLAW_AGENTS_DIR =
158
160
  /** Marker file tracking which OpenClaw sessions have been ingested. */
159
161
  export const OPENCLAW_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "openclaw-ingest-marker.json");
160
162
 
163
+ /** Pi sessions directory. */
164
+ export const PI_SESSIONS_DIR =
165
+ process.env.SELFTUNE_PI_SESSIONS_DIR ?? join(piHomeDir, "agent", "sessions");
166
+
167
+ /** Marker file tracking which Pi sessions have been ingested. */
168
+ export const PI_INGEST_MARKER = join(SELFTUNE_CONFIG_DIR, "pi-ingest-marker.json");
169
+
161
170
  /** Default output directory for contribution bundles. */
162
171
  export const CONTRIBUTIONS_DIR = join(SELFTUNE_CONFIG_DIR, "contributions");
163
172
  /** Creator-directed contribution preferences (per-skill opt-in state). */
@@ -397,6 +397,20 @@ export interface HealthResponse {
397
397
  port: number;
398
398
  }
399
399
 
400
+ // -- Replay entry result types ------------------------------------------------
401
+
402
+ export interface ReplayEntryResult {
403
+ proposal_id: string;
404
+ skill_name: string;
405
+ validation_mode: string;
406
+ phase: string;
407
+ query: string;
408
+ should_trigger: boolean;
409
+ triggered: boolean;
410
+ passed: boolean;
411
+ evidence: string | null;
412
+ }
413
+
400
414
  // -- Doctor / health check types ----------------------------------------------
401
415
  export type { DoctorResult, HealthCheck, HealthStatus } from "./types.js";
402
416
 
@@ -0,0 +1,96 @@
1
+ /**
2
+ * judge-engine.ts
3
+ *
4
+ * LLM judge validation engine: runs trigger accuracy checks using
5
+ * an LLM as a YES/NO judge for each eval entry.
6
+ *
7
+ * Extracted from validate-routing.ts and validate-body.ts to isolate
8
+ * LLM-judge-specific concerns from replay-specific concerns.
9
+ */
10
+
11
+ import type { EvalEntry, ValidationMode } from "../../types.js";
12
+ import { callLlm } from "../../utils/llm-call.js";
13
+ import { buildTriggerCheckPrompt, parseTriggerResponse } from "../../utils/trigger-check.js";
14
+
15
+ // ---------------------------------------------------------------------------
16
+ // Types
17
+ // ---------------------------------------------------------------------------
18
+
19
+ export interface JudgeValidationResult {
20
+ before_pass_rate: number;
21
+ after_pass_rate: number;
22
+ improved: boolean;
23
+ regressions: string[];
24
+ validation_mode: ValidationMode;
25
+ validation_agent: string;
26
+ }
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Judge validation engine
30
+ // ---------------------------------------------------------------------------
31
+
32
+ /**
33
+ * Run LLM-judge-based trigger accuracy checks on an eval set.
34
+ * For each entry, asks the LLM whether the content would trigger
35
+ * the skill for the given query, comparing original vs proposed.
36
+ */
37
+ export async function runJudgeValidation(
38
+ originalContent: string,
39
+ proposedContent: string,
40
+ evalSet: EvalEntry[],
41
+ agent: string,
42
+ modelFlag?: string,
43
+ ): Promise<JudgeValidationResult> {
44
+ if (evalSet.length === 0) {
45
+ return {
46
+ before_pass_rate: 0,
47
+ after_pass_rate: 0,
48
+ improved: false,
49
+ regressions: [],
50
+ validation_mode: "llm_judge",
51
+ validation_agent: agent,
52
+ };
53
+ }
54
+
55
+ const systemPrompt = "You are an evaluation assistant. Answer only YES or NO.";
56
+ let beforePassed = 0;
57
+ let afterPassed = 0;
58
+ const regressions: string[] = [];
59
+
60
+ for (const entry of evalSet) {
61
+ // Check with original content
62
+ const beforePrompt = buildTriggerCheckPrompt(originalContent, entry.query);
63
+ const beforeRaw = await callLlm(systemPrompt, beforePrompt, agent, modelFlag);
64
+ const beforeTriggered = parseTriggerResponse(beforeRaw);
65
+ const beforePass =
66
+ (entry.should_trigger && beforeTriggered) || (!entry.should_trigger && !beforeTriggered);
67
+
68
+ // Check with proposed content
69
+ const afterPrompt = buildTriggerCheckPrompt(proposedContent, entry.query);
70
+ const afterRaw = await callLlm(systemPrompt, afterPrompt, agent, modelFlag);
71
+ const afterTriggered = parseTriggerResponse(afterRaw);
72
+ const afterPass =
73
+ (entry.should_trigger && afterTriggered) || (!entry.should_trigger && !afterTriggered);
74
+
75
+ if (beforePass) beforePassed++;
76
+ if (afterPass) afterPassed++;
77
+
78
+ // Track regressions
79
+ if (beforePass && !afterPass) {
80
+ regressions.push(entry.query);
81
+ }
82
+ }
83
+
84
+ const total = evalSet.length;
85
+ const beforePassRate = beforePassed / total;
86
+ const afterPassRate = afterPassed / total;
87
+
88
+ return {
89
+ before_pass_rate: beforePassRate,
90
+ after_pass_rate: afterPassRate,
91
+ improved: afterPassRate > beforePassRate,
92
+ regressions,
93
+ validation_mode: "llm_judge",
94
+ validation_agent: agent,
95
+ };
96
+ }