selftune 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
  2. package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +16 -0
  3. package/apps/local-dashboard/dist/index.html +2 -2
  4. package/cli/selftune/activation-rules.ts +24 -48
  5. package/cli/selftune/constants.ts +7 -0
  6. package/cli/selftune/contribute/bundle.ts +9 -44
  7. package/cli/selftune/dashboard-contract.ts +12 -0
  8. package/cli/selftune/eval/hooks-to-evals.ts +5 -22
  9. package/cli/selftune/grading/auto-grade.ts +3 -13
  10. package/cli/selftune/grading/grade-session.ts +3 -13
  11. package/cli/selftune/hooks/evolution-guard.ts +14 -24
  12. package/cli/selftune/hooks/prompt-log.ts +0 -8
  13. package/cli/selftune/hooks/session-stop.ts +0 -8
  14. package/cli/selftune/ingestors/codex-rollout.ts +9 -4
  15. package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
  16. package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
  17. package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
  18. package/cli/selftune/localdb/queries.ts +57 -0
  19. package/cli/selftune/monitoring/watch.ts +7 -22
  20. package/cli/selftune/normalization.ts +2 -23
  21. package/cli/selftune/orchestrate.ts +213 -14
  22. package/cli/selftune/schedule.ts +51 -5
  23. package/cli/selftune/utils/jsonl.ts +2 -0
  24. package/package.json +3 -1
  25. package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
  26. package/packages/ui/src/components/index.ts +1 -0
  27. package/packages/ui/src/components/section-cards.tsx +13 -0
  28. package/skill/SKILL.md +1 -1
  29. package/skill/Workflows/Orchestrate.md +11 -7
  30. package/skill/Workflows/Schedule.md +11 -0
  31. package/skill/references/logs.md +22 -21
  32. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
  33. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
@@ -19,6 +19,25 @@ import { parseArgs } from "node:util";
19
19
 
20
20
  import { DEFAULT_CRON_JOBS } from "./cron/setup.js";
21
21
 
22
+ // ---------------------------------------------------------------------------
23
+ // Binary resolution — launchd runs with minimal PATH, so we need full paths
24
+ // ---------------------------------------------------------------------------
25
+
26
+ /**
27
+ * Resolve the absolute path to the `selftune` binary.
28
+ * 1. Bun.which (Bun-native, no spawn)
29
+ * 2. Fallback: ~/.bun/bin/selftune (common bun global install location)
30
+ */
31
+ export function resolveSelftuneBin(): string {
32
+ try {
33
+ const resolved = Bun.which("selftune");
34
+ if (resolved) return resolved;
35
+ } catch {
36
+ // Bun.which may throw in edge cases — fall through
37
+ }
38
+ return join(homedir(), ".bun", "bin", "selftune");
39
+ }
40
+
22
41
  // ---------------------------------------------------------------------------
23
42
  // Schedule definitions — derived from the shared DEFAULT_CRON_JOBS
24
43
  // ---------------------------------------------------------------------------
@@ -137,6 +156,8 @@ function toSystemdExecStart(command: string): string {
137
156
  // ---------------------------------------------------------------------------
138
157
 
139
158
  export function generateCrontab(): string {
159
+ const resolvedBin = resolveSelftuneBin();
160
+ const home = homedir();
140
161
  const lines = [
141
162
  "# selftune automation — add to your crontab with: crontab -e",
142
163
  "#",
@@ -144,10 +165,13 @@ export function generateCrontab(): string {
144
165
  "# status remains a reporting job; orchestrate handles sync, candidate",
145
166
  "# selection, low-risk description evolution, and watch/rollback follow-up.",
146
167
  "#",
168
+ `PATH=${home}/.bun/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin`,
169
+ "",
147
170
  ];
148
171
  for (const entry of SCHEDULE_ENTRIES) {
172
+ const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
149
173
  lines.push(`# ${entry.description}`);
150
- lines.push(`${entry.schedule} ${entry.command}`);
174
+ lines.push(`${entry.schedule} ${resolvedCommand}`);
151
175
  lines.push("");
152
176
  }
153
177
  return lines.join("\n");
@@ -177,10 +201,17 @@ export function mergeManagedCrontab(existing: string, managedContent: string): s
177
201
  return `${withoutExistingBlock}\n\n${managedBlock}`;
178
202
  }
179
203
 
180
- function buildLaunchdDefinition(entry: ScheduleEntry): { label: string; content: string } {
204
+ function buildLaunchdDefinition(
205
+ entry: ScheduleEntry,
206
+ binPath?: string,
207
+ ): { label: string; content: string } {
181
208
  const label = `com.selftune.${entry.name.replace("selftune-", "")}`;
182
- const args = toLaunchdArgs(entry.command);
209
+ const resolvedBin = binPath ?? resolveSelftuneBin();
210
+ // Replace bare `selftune` with the resolved absolute path
211
+ const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
212
+ const args = toLaunchdArgs(resolvedCommand);
183
213
  const schedule = cronToLaunchdSchedule(entry.schedule);
214
+ const home = homedir();
184
215
 
185
216
  return {
186
217
  label,
@@ -198,6 +229,13 @@ function buildLaunchdDefinition(entry: ScheduleEntry): { label: string; content:
198
229
  <dict>
199
230
  <key>Label</key>
200
231
  <string>${label}</string>
232
+ <key>EnvironmentVariables</key>
233
+ <dict>
234
+ <key>PATH</key>
235
+ <string>${home}/.bun/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin</string>
236
+ <key>HOME</key>
237
+ <string>${home}</string>
238
+ </dict>
201
239
  <key>ProgramArguments</key>
202
240
  <array>
203
241
  ${args}
@@ -222,14 +260,20 @@ export function generateLaunchd(): string {
222
260
  return plists.join("\n\n");
223
261
  }
224
262
 
225
- function buildSystemdDefinition(entry: ScheduleEntry): {
263
+ function buildSystemdDefinition(
264
+ entry: ScheduleEntry,
265
+ binPath?: string,
266
+ ): {
226
267
  baseName: string;
227
268
  timerContent: string;
228
269
  serviceContent: string;
229
270
  } {
230
271
  const unitName = entry.name;
231
272
  const calendar = cronToOnCalendar(entry.schedule);
232
- const execStart = toSystemdExecStart(entry.command);
273
+ const resolvedBin = binPath ?? resolveSelftuneBin();
274
+ const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
275
+ const execStart = toSystemdExecStart(resolvedCommand);
276
+ const home = homedir();
233
277
 
234
278
  return {
235
279
  baseName: unitName,
@@ -247,6 +291,8 @@ Description=${entry.description}
247
291
 
248
292
  [Service]
249
293
  Type=oneshot
294
+ Environment="PATH=${home}/.bun/bin:/usr/local/bin:/usr/bin:/bin"
295
+ Environment="HOME=${home}"
250
296
  ExecStart=${execStart}`,
251
297
  };
252
298
  }
@@ -90,6 +90,8 @@ export function readJsonlFrom<T = Record<string, unknown>>(
90
90
  * Append a single record to a JSONL file. Creates parent directories if needed.
91
91
  * When logType is provided, validates the record and logs warnings on failure
92
92
  * but still writes the record (fail-open: hooks must never block).
93
+ *
94
+ * @deprecated Phase 3: JSONL writes removed. Retained for materializer/test utilities only.
93
95
  */
94
96
  export function appendJsonl(path: string, record: unknown, logType?: LogType): void {
95
97
  if (logType) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "selftune",
3
- "version": "0.2.13",
3
+ "version": "0.2.14",
4
4
  "description": "Self-improving skills CLI for AI agents",
5
5
  "keywords": [
6
6
  "agent",
@@ -73,12 +73,14 @@
73
73
  "prepublishOnly": "bun run sync-version && bun run build:dashboard",
74
74
  "typecheck:dashboard": "cd apps/local-dashboard && bunx tsc --noEmit",
75
75
  "check": "bun run lint && bun run format:check && bun run lint:arch && bun run typecheck:dashboard && bun run test",
76
+ "prepare": "bunx lefthook install || true",
76
77
  "start": "bun run cli/selftune/index.ts --help"
77
78
  },
78
79
  "dependencies": {
79
80
  "@selftune/telemetry-contract": "file:packages/telemetry-contract"
80
81
  },
81
82
  "devDependencies": {
83
+ "@evilmartians/lefthook": "^1.13.6",
82
84
  "@types/bun": "^1.1.0",
83
85
  "oxfmt": "^0.41.0",
84
86
  "oxlint": "^1.56.0"
@@ -0,0 +1,86 @@
1
+ import { ZapIcon, CircleDotIcon } from "lucide-react";
2
+
3
+ import { timeAgo } from "../lib/format";
4
+ import { Badge } from "../primitives/badge";
5
+ import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "../primitives/card";
6
+
7
+ export interface RecentActivityItem {
8
+ timestamp: string;
9
+ session_id: string;
10
+ skill_name: string;
11
+ query: string;
12
+ triggered: boolean;
13
+ is_live: boolean;
14
+ }
15
+
16
+ export function RecentActivityFeed({ items }: { items: RecentActivityItem[] }) {
17
+ if (items.length === 0) {
18
+ return (
19
+ <Card>
20
+ <CardHeader>
21
+ <CardTitle className="flex items-center gap-2 text-sm">
22
+ <ZapIcon className="size-4" />
23
+ Recent Activity
24
+ </CardTitle>
25
+ </CardHeader>
26
+ <CardContent>
27
+ <p className="text-sm text-muted-foreground text-center py-8">
28
+ No recent skill invocations
29
+ </p>
30
+ </CardContent>
31
+ </Card>
32
+ );
33
+ }
34
+
35
+ return (
36
+ <Card>
37
+ <CardHeader>
38
+ <CardTitle className="flex items-center gap-2 text-sm">
39
+ <ZapIcon className="size-4" />
40
+ Recent Activity
41
+ </CardTitle>
42
+ <CardDescription>Latest skill invocations across sessions</CardDescription>
43
+ </CardHeader>
44
+ <CardContent className="space-y-2.5">
45
+ {items.slice(0, 20).map((item, i) => (
46
+ <div
47
+ key={`${item.session_id}-${item.skill_name}-${i}`}
48
+ className="flex gap-3 rounded-md p-1.5"
49
+ >
50
+ <div
51
+ className={`mt-1 size-2 shrink-0 rounded-full ${
52
+ item.triggered ? "bg-emerald-500" : "bg-muted-foreground/40"
53
+ }`}
54
+ />
55
+ <div className="flex-1 min-w-0 space-y-0.5">
56
+ <div className="flex items-center gap-2 flex-wrap">
57
+ <span className="text-xs font-medium truncate">{item.skill_name}</span>
58
+ {item.is_live && (
59
+ <Badge variant="outline" className="h-4 px-1 text-[10px] gap-1">
60
+ <CircleDotIcon className="size-2.5 text-emerald-500" />
61
+ live
62
+ </Badge>
63
+ )}
64
+ {item.triggered ? (
65
+ <Badge variant="default" className="h-4 px-1 text-[10px]">
66
+ triggered
67
+ </Badge>
68
+ ) : (
69
+ <Badge variant="secondary" className="h-4 px-1 text-[10px]">
70
+ checked
71
+ </Badge>
72
+ )}
73
+ <span className="text-[10px] text-muted-foreground font-mono ml-auto shrink-0">
74
+ {timeAgo(item.timestamp)}
75
+ </span>
76
+ </div>
77
+ {item.query && (
78
+ <p className="text-xs text-muted-foreground line-clamp-1 font-mono">{item.query}</p>
79
+ )}
80
+ </div>
81
+ </div>
82
+ ))}
83
+ </CardContent>
84
+ </Card>
85
+ );
86
+ }
@@ -3,5 +3,6 @@ export { EvidenceViewer } from "./EvidenceViewer";
3
3
  export { EvolutionTimeline } from "./EvolutionTimeline";
4
4
  export { InfoTip } from "./InfoTip";
5
5
  export { OrchestrateRunsPanel } from "./OrchestrateRunsPanel";
6
+ export { RecentActivityFeed } from "./RecentActivityFeed";
6
7
  export { SectionCards } from "./section-cards";
7
8
  export { SkillHealthGrid } from "./skill-health-grid";
@@ -21,6 +21,7 @@ interface SectionCardsProps {
21
21
  pendingCount: number;
22
22
  evidenceCount: number;
23
23
  hasEvolution?: boolean;
24
+ activeSessionsCount?: number;
24
25
  }
25
26
 
26
27
  export function SectionCards({
@@ -31,6 +32,7 @@ export function SectionCards({
31
32
  pendingCount,
32
33
  evidenceCount,
33
34
  hasEvolution = true,
35
+ activeSessionsCount = 0,
34
36
  }: SectionCardsProps) {
35
37
  const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--";
36
38
  const passRateGood = avgPassRate !== null && avgPassRate >= 0.7;
@@ -118,6 +120,17 @@ export function SectionCards({
118
120
  <CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
119
121
  {sessionsCount}
120
122
  </CardTitle>
123
+ {activeSessionsCount > 0 && (
124
+ <CardAction>
125
+ <Badge variant="outline" className="gap-1.5">
126
+ <span className="relative flex size-2">
127
+ <span className="absolute inline-flex size-full animate-ping rounded-full bg-emerald-400 opacity-75" />
128
+ <span className="relative inline-flex size-2 rounded-full bg-emerald-500" />
129
+ </span>
130
+ {activeSessionsCount} in progress
131
+ </Badge>
132
+ </CardAction>
133
+ )}
121
134
  </CardHeader>
122
135
  </Card>
123
136
 
package/skill/SKILL.md CHANGED
@@ -12,7 +12,7 @@ description: >
12
12
  even if they don't say "selftune" explicitly.
13
13
  metadata:
14
14
  author: selftune-dev
15
- version: 0.2.13
15
+ version: 0.2.14
16
16
  category: developer-tools
17
17
  ---
18
18
 
@@ -31,12 +31,14 @@ selftune orchestrate
31
31
  | `--max-skills <n>` | Cap how many candidates are processed in one run | `5` |
32
32
  | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `48` |
33
33
  | `--sync-force` | Force a full source replay before candidate selection | Off |
34
+ | `--max-auto-grade <n>` | Max ungraded skills to auto-grade per run (0 to disable) | `5` |
34
35
  | `--loop` | Run as a long-lived process that cycles continuously | Off |
35
36
  | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
36
37
 
37
38
  ## Default Behavior
38
39
 
39
40
  - Sync source-truth telemetry first
41
+ - Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
40
42
  - Prioritize critical/warning/ungraded skills with real missed-query signal
41
43
  - Deploy validated low-risk description changes automatically
42
44
  - Watch recent deployments and roll back regressions automatically
@@ -78,10 +80,11 @@ A phased decision report printed to stderr so you can see exactly what happened
78
80
 
79
81
  1. **Phase 1: Sync** — which sources were scanned, how many records synced, repair counts
80
82
  2. **Phase 2: Status** — skill count, system health, breakdown by status category
81
- 3. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
82
- 4. **Phase 4: Evolution Results** — validation pass-rate changes (before after), deployment status
83
- 5. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
84
- 6. **Summary** — evaluated/deployed/watched/skipped counts and elapsed time
83
+ 3. **Auto-grade** — how many ungraded skills were graded (logged to stderr, included in summary)
84
+ 4. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
85
+ 5. **Phase 4: Evolution Results** — validation pass-rate changes (before after), deployment status
86
+ 6. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
87
+ 7. **Summary** — auto-graded/evaluated/deployed/watched/skipped counts and elapsed time
85
88
 
86
89
  A mode banner at the top shows DRY RUN, REVIEW, or AUTONOMOUS with rerun hints when applicable.
87
90
 
@@ -140,9 +143,10 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
140
143
 
141
144
  1. **Sync** — refresh source-truth telemetry across all supported agents (`selftune sync`)
142
145
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
143
- 3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
144
- 4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
145
- 5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
146
+ 3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
147
+ 4. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
148
+ 5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
149
+ 6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
146
150
 
147
151
  Between candidate selection and evolution, orchestrate checks for
148
152
  **cross-skill eval set overlap**. When two or more evolution candidates
@@ -53,6 +53,17 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
53
53
 
54
54
  `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
55
55
 
56
+ ## PATH Resolution (All Platforms)
57
+
58
+ All three scheduling formats resolve the absolute path to the `selftune` binary
59
+ (via `Bun.which` with a `~/.bun/bin/selftune` fallback) and set explicit PATH
60
+ environment variables. This prevents silent failures from minimal default
61
+ environments that don't include homebrew, bun, or node binary locations.
62
+
63
+ - **launchd** — Injects an `EnvironmentVariables` dict with PATH and HOME into each plist.
64
+ - **systemd** — Adds `Environment="PATH=..."` and `Environment="HOME=..."` to each service unit.
65
+ - **cron** — Prepends a `PATH=...` declaration at the top of the generated crontab.
66
+
56
67
  ## Common Patterns
57
68
 
58
69
  - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
@@ -1,14 +1,17 @@
1
1
  # Log Format Reference
2
2
 
3
- selftune writes raw legacy logs plus a canonical event log. This reference
4
- describes each format in detail for the skill to use when parsing sessions,
5
- audit trails, and cloud-ingest exports.
6
-
7
- > **Note:** JSONL files are now backup/recovery only. SQLite (`~/.selftune/selftune.db`)
8
- > is the sole operational store for all runtime reads. JSONL writes are retained for
9
- > append-only durability, but all dashboard queries, hook reads, grading, monitoring,
10
- > and upload staging read from SQLite. JSONL reads only occur when custom log paths
11
- > are provided (e.g., `--telemetry-log`, `--skill-log`) for test isolation.
3
+ selftune uses SQLite as its sole write target and operational store. This
4
+ reference describes the legacy JSONL log formats that remain on disk for
5
+ disaster recovery and export, plus the canonical event schema.
6
+
7
+ > **Important (Phase 3 complete):** JSONL writes have been removed from all hooks,
8
+ > ingestors, and normalization pipelines. New data is written exclusively to SQLite
9
+ > (`~/.selftune/selftune.db`). Existing JSONL files are retained on disk but only
10
+ > contain pre-cutover history. The materializer (`localdb/materialize.ts`) can
11
+ > rebuild SQLite from these files but only for data written before Phase 3.
12
+ > Post-cutover recovery requires `selftune export` snapshots or SQLite backups.
13
+ > The file formats below are preserved as a reference for the materializer and
14
+ > export tooling.
12
15
 
13
16
  ---
14
17
 
@@ -54,11 +57,11 @@ One JSON record per line. Each record is one completed agent session.
54
57
 
55
58
  ## ~/.claude/skill_usage_log.jsonl
56
59
 
57
- > **Deprecated.** The `skill_usage` and `skill_invocations` data paths have been
60
+ > **Legacy.** The `skill_usage` and `skill_invocations` data paths have been
58
61
  > consolidated into a single `skill_invocations` table in SQLite. This JSONL file
59
- > is still written by hooks for backward compatibility, but the dashboard and
60
- > queries now read exclusively from `skill_invocations`. New consumers should use
61
- > the SQLite table via `localdb/queries.ts`.
62
+ > is no longer written (Phase 3). The dashboard and all queries read exclusively
63
+ > from `skill_invocations`. New consumers should use the SQLite table via
64
+ > `localdb/queries.ts`.
62
65
 
63
66
  One record per skill trigger event. Populated by skill-eval.ts (PostToolUse hook).
64
67
 
@@ -208,10 +211,10 @@ This is operational state, not an analytics source of truth.
208
211
 
209
212
  ## ~/.claude/improvement_signals.jsonl
210
213
 
211
- One record per detected improvement signal. Written by `prompt-log.ts` when a
212
- user correction or explicit skill request is detected. Read by the orchestrator
213
- for signal-aware candidate selection, and by `session-stop.ts` to decide whether
214
- to spawn a reactive orchestrate run.
214
+ One record per detected improvement signal. Previously written by `prompt-log.ts`;
215
+ now written directly to SQLite (`improvement_signals` table). This JSONL file is
216
+ no longer appended to (Phase 3). Read by the orchestrator for signal-aware
217
+ candidate selection via SQLite queries.
215
218
 
216
219
  ```json
217
220
  {
@@ -225,10 +228,8 @@ to spawn a reactive orchestrate run.
225
228
  ```
226
229
 
227
230
  Signal records are append-only. When an orchestrate run processes a signal,
228
- the original record remains unchanged and the orchestrator rewrites the file
229
- with `consumed: true` set on processed entries. This is the one exception
230
- to strict append-only semantics in the log system — the rewrite is atomic
231
- and race-protected by the orchestrate lockfile.
231
+ it sets `consumed: true` via `updateSignalConsumed()` in SQLite. The JSONL
232
+ format below is retained as a reference for the materializer and export.
232
233
 
233
234
  Consumed signal example:
234
235