selftune 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
  2. package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
  3. package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
  4. package/apps/local-dashboard/dist/index.html +3 -3
  5. package/cli/selftune/activation-rules.ts +24 -48
  6. package/cli/selftune/analytics.ts +13 -11
  7. package/cli/selftune/badge/badge.ts +13 -9
  8. package/cli/selftune/canonical-export.ts +6 -6
  9. package/cli/selftune/constants.ts +7 -0
  10. package/cli/selftune/contribute/bundle.ts +9 -44
  11. package/cli/selftune/contribute/contribute.ts +2 -1
  12. package/cli/selftune/cron/setup.ts +3 -1
  13. package/cli/selftune/dashboard-contract.ts +22 -0
  14. package/cli/selftune/dashboard.ts +10 -5
  15. package/cli/selftune/eval/baseline.ts +20 -30
  16. package/cli/selftune/eval/hooks-to-evals.ts +27 -34
  17. package/cli/selftune/eval/import-skillsbench.ts +21 -8
  18. package/cli/selftune/eval/unit-test-cli.ts +22 -11
  19. package/cli/selftune/evolution/description-quality.ts +224 -0
  20. package/cli/selftune/evolution/evolve-body.ts +17 -10
  21. package/cli/selftune/evolution/evolve.ts +70 -57
  22. package/cli/selftune/evolution/rollback.ts +7 -6
  23. package/cli/selftune/grading/auto-grade.ts +27 -35
  24. package/cli/selftune/grading/grade-session.ts +24 -30
  25. package/cli/selftune/hooks/auto-activate.ts +12 -3
  26. package/cli/selftune/hooks/evolution-guard.ts +14 -24
  27. package/cli/selftune/hooks/prompt-log.ts +7 -9
  28. package/cli/selftune/hooks/session-stop.ts +0 -8
  29. package/cli/selftune/index.ts +66 -69
  30. package/cli/selftune/ingestors/claude-replay.ts +29 -14
  31. package/cli/selftune/ingestors/codex-rollout.ts +15 -5
  32. package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
  33. package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
  34. package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
  35. package/cli/selftune/init.ts +14 -9
  36. package/cli/selftune/localdb/queries.ts +57 -0
  37. package/cli/selftune/monitoring/watch.ts +39 -38
  38. package/cli/selftune/normalization.ts +2 -23
  39. package/cli/selftune/orchestrate.ts +224 -24
  40. package/cli/selftune/routes/skill-report.ts +17 -0
  41. package/cli/selftune/schedule.ts +74 -14
  42. package/cli/selftune/sync.ts +7 -3
  43. package/cli/selftune/types.ts +44 -10
  44. package/cli/selftune/utils/cli-error.ts +102 -0
  45. package/cli/selftune/utils/jsonl.ts +2 -0
  46. package/cli/selftune/workflows/workflows.ts +23 -17
  47. package/package.json +3 -1
  48. package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
  49. package/packages/ui/src/components/index.ts +1 -0
  50. package/packages/ui/src/components/section-cards.tsx +13 -0
  51. package/skill/SKILL.md +1 -1
  52. package/skill/Workflows/Evolve.md +4 -0
  53. package/skill/Workflows/Initialize.md +8 -8
  54. package/skill/Workflows/Orchestrate.md +11 -7
  55. package/skill/Workflows/Schedule.md +11 -0
  56. package/skill/references/logs.md +22 -21
  57. package/skill/settings_snippet.json +29 -6
  58. package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
  59. package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
  60. package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
@@ -126,14 +126,14 @@ Code subagent calls stay up to date.
126
126
 
127
127
  **Hook reference** (for troubleshooting):
128
128
 
129
- | Hook | Script | Purpose |
130
- | -------------------------- | ----------------------------- | ----------------------------------------------- |
131
- | `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query |
132
- | `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing |
133
- | `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits |
134
- | `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills |
135
- | `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations |
136
- | `Stop` | `hooks/session-stop.ts` | Capture session telemetry |
129
+ | Hook | Script | Purpose | Notes |
130
+ | -------------------------- | ----------------------------- | ----------------------------------------------- | ---------------------------------------------- |
131
+ | `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query | Accepts both `prompt` and legacy `user_prompt` |
132
+ | `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing | Uses `additionalContext` JSON for suggestions |
133
+ | `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits | `if` filter: only fires on `*SKILL.md` paths |
134
+ | `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills | `if` filter: only fires on `*SKILL.md` paths |
135
+ | `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations | |
136
+ | `Stop` | `hooks/session-stop.ts` | Capture session telemetry | Runs async (non-blocking), 60s timeout |
137
137
 
138
138
  **Codex agents:**
139
139
 
@@ -31,12 +31,14 @@ selftune orchestrate
31
31
  | `--max-skills <n>` | Cap how many candidates are processed in one run | `5` |
32
32
  | `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `48` |
33
33
  | `--sync-force` | Force a full source replay before candidate selection | Off |
34
+ | `--max-auto-grade <n>` | Max ungraded skills to auto-grade per run (0 to disable) | `5` |
34
35
  | `--loop` | Run as a long-lived process that cycles continuously | Off |
35
36
  | `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
36
37
 
37
38
  ## Default Behavior
38
39
 
39
40
  - Sync source-truth telemetry first
41
+ - Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
40
42
  - Prioritize critical/warning/ungraded skills with real missed-query signal
41
43
  - Deploy validated low-risk description changes automatically
42
44
  - Watch recent deployments and roll back regressions automatically
@@ -78,10 +80,11 @@ A phased decision report printed to stderr so you can see exactly what happened
78
80
 
79
81
  1. **Phase 1: Sync** — which sources were scanned, how many records synced, repair counts
80
82
  2. **Phase 2: Status** — skill count, system health, breakdown by status category
81
- 3. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
82
- 4. **Phase 4: Evolution Results** — validation pass-rate changes (before after), deployment status
83
- 5. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
84
- 6. **Summary** — evaluated/deployed/watched/skipped counts and elapsed time
83
+ 3. **Auto-grade** — how many ungraded skills were graded (logged to stderr, included in summary)
84
+ 4. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
85
+ 5. **Phase 4: Evolution Results** — validation pass-rate changes (before after), deployment status
86
+ 6. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
87
+ 7. **Summary** — auto-graded/evaluated/deployed/watched/skipped counts and elapsed time
85
88
 
86
89
  A mode banner at the top shows DRY RUN, REVIEW, or AUTONOMOUS with rerun hints when applicable.
87
90
 
@@ -140,9 +143,10 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
140
143
 
141
144
  1. **Sync** — refresh source-truth telemetry across all supported agents (`selftune sync`)
142
145
  2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
143
- 3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
144
- 4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
145
- 5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
146
+ 3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
147
+ 4. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
148
+ 5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
149
+ 6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
146
150
 
147
151
  Between candidate selection and evolution, orchestrate checks for
148
152
  **cross-skill eval set overlap**. When two or more evolution candidates
@@ -53,6 +53,17 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
53
53
 
54
54
  `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
55
55
 
56
+ ## PATH Resolution (All Platforms)
57
+
58
+ All three scheduling formats resolve the absolute path to the `selftune` binary
59
+ (via `Bun.which` with a `~/.bun/bin/selftune` fallback) and set explicit PATH
60
+ environment variables. This prevents silent failures from minimal default
61
+ environments that don't include homebrew, bun, or node binary locations.
62
+
63
+ - **launchd** — Injects an `EnvironmentVariables` dict with PATH and HOME into each plist.
64
+ - **systemd** — Adds `Environment="PATH=..."` and `Environment="HOME=..."` to each service unit.
65
+ - **cron** — Prepends a `PATH=...` declaration at the top of the generated crontab.
66
+
56
67
  ## Common Patterns
57
68
 
58
69
  - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
@@ -1,14 +1,17 @@
1
1
  # Log Format Reference
2
2
 
3
- selftune writes raw legacy logs plus a canonical event log. This reference
4
- describes each format in detail for the skill to use when parsing sessions,
5
- audit trails, and cloud-ingest exports.
6
-
7
- > **Note:** JSONL files are now backup/recovery only. SQLite (`~/.selftune/selftune.db`)
8
- > is the sole operational store for all runtime reads. JSONL writes are retained for
9
- > append-only durability, but all dashboard queries, hook reads, grading, monitoring,
10
- > and upload staging read from SQLite. JSONL reads only occur when custom log paths
11
- > are provided (e.g., `--telemetry-log`, `--skill-log`) for test isolation.
3
+ selftune uses SQLite as its sole write target and operational store. This
4
+ reference describes the legacy JSONL log formats that remain on disk for
5
+ disaster recovery and export, plus the canonical event schema.
6
+
7
+ > **Important (Phase 3 complete):** JSONL writes have been removed from all hooks,
8
+ > ingestors, and normalization pipelines. New data is written exclusively to SQLite
9
+ > (`~/.selftune/selftune.db`). Existing JSONL files are retained on disk but only
10
+ > contain pre-cutover history. The materializer (`localdb/materialize.ts`) can
11
+ > rebuild SQLite from these files but only for data written before Phase 3.
12
+ > Post-cutover recovery requires `selftune export` snapshots or SQLite backups.
13
+ > The file formats below are preserved as a reference for the materializer and
14
+ > export tooling.
12
15
 
13
16
  ---
14
17
 
@@ -54,11 +57,11 @@ One JSON record per line. Each record is one completed agent session.
54
57
 
55
58
  ## ~/.claude/skill_usage_log.jsonl
56
59
 
57
- > **Deprecated.** The `skill_usage` and `skill_invocations` data paths have been
60
+ > **Legacy.** The `skill_usage` and `skill_invocations` data paths have been
58
61
  > consolidated into a single `skill_invocations` table in SQLite. This JSONL file
59
- > is still written by hooks for backward compatibility, but the dashboard and
60
- > queries now read exclusively from `skill_invocations`. New consumers should use
61
- > the SQLite table via `localdb/queries.ts`.
62
+ > is no longer written (Phase 3). The dashboard and all queries read exclusively
63
+ > from `skill_invocations`. New consumers should use the SQLite table via
64
+ > `localdb/queries.ts`.
62
65
 
63
66
  One record per skill trigger event. Populated by skill-eval.ts (PostToolUse hook).
64
67
 
@@ -208,10 +211,10 @@ This is operational state, not an analytics source of truth.
208
211
 
209
212
  ## ~/.claude/improvement_signals.jsonl
210
213
 
211
- One record per detected improvement signal. Written by `prompt-log.ts` when a
212
- user correction or explicit skill request is detected. Read by the orchestrator
213
- for signal-aware candidate selection, and by `session-stop.ts` to decide whether
214
- to spawn a reactive orchestrate run.
214
+ One record per detected improvement signal. Previously written by `prompt-log.ts`;
215
+ now written directly to SQLite (`improvement_signals` table). This JSONL file is
216
+ no longer appended to (Phase 3). Read by the orchestrator for signal-aware
217
+ candidate selection via SQLite queries.
215
218
 
216
219
  ```json
217
220
  {
@@ -225,10 +228,8 @@ to spawn a reactive orchestrate run.
225
228
  ```
226
229
 
227
230
  Signal records are append-only. When an orchestrate run processes a signal,
228
- the original record remains unchanged and the orchestrator rewrites the file
229
- with `consumed: true` set on processed entries. This is the one exception
230
- to strict append-only semantics in the log system — the rewrite is atomic
231
- and race-protected by the orchestrate lockfile.
231
+ it sets `consumed: true` via `updateSignalConsumed()` in SQLite. The JSONL
232
+ format below is retained as a reference for the materializer and export.
232
233
 
233
234
  Consumed signal example:
234
235
 
@@ -10,12 +10,14 @@
10
10
  {
11
11
  "type": "command",
12
12
  "command": "bun run /PATH/TO/cli/selftune/hooks/prompt-log.ts",
13
- "timeout": 5
13
+ "timeout": 5,
14
+ "statusMessage": "selftune: logging prompt"
14
15
  },
15
16
  {
16
17
  "type": "command",
17
18
  "command": "bun run /PATH/TO/cli/selftune/hooks/auto-activate.ts",
18
- "timeout": 5
19
+ "timeout": 5,
20
+ "statusMessage": "selftune: checking activation rules"
19
21
  }
20
22
  ]
21
23
  }
@@ -26,13 +28,31 @@
26
28
  "hooks": [
27
29
  {
28
30
  "type": "command",
31
+ "if": "Write(*SKILL.md)",
29
32
  "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
30
- "timeout": 5
33
+ "timeout": 5,
34
+ "statusMessage": "selftune: checking skill change guard"
31
35
  },
32
36
  {
33
37
  "type": "command",
38
+ "if": "Edit(*SKILL.md)",
39
+ "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
40
+ "timeout": 5,
41
+ "statusMessage": "selftune: checking skill change guard"
42
+ },
43
+ {
44
+ "type": "command",
45
+ "if": "Write(*SKILL.md)",
46
+ "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
47
+ "timeout": 5,
48
+ "statusMessage": "selftune: checking evolution guard"
49
+ },
50
+ {
51
+ "type": "command",
52
+ "if": "Edit(*SKILL.md)",
34
53
  "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
35
- "timeout": 5
54
+ "timeout": 5,
55
+ "statusMessage": "selftune: checking evolution guard"
36
56
  }
37
57
  ]
38
58
  }
@@ -44,7 +64,8 @@
44
64
  {
45
65
  "type": "command",
46
66
  "command": "bun run /PATH/TO/cli/selftune/hooks/skill-eval.ts",
47
- "timeout": 5
67
+ "timeout": 5,
68
+ "statusMessage": "selftune: evaluating skill usage"
48
69
  }
49
70
  ]
50
71
  }
@@ -55,7 +76,9 @@
55
76
  {
56
77
  "type": "command",
57
78
  "command": "bun run /PATH/TO/cli/selftune/hooks/session-stop.ts",
58
- "timeout": 15
79
+ "timeout": 60,
80
+ "async": true,
81
+ "statusMessage": "selftune: capturing session telemetry"
59
82
  }
60
83
  ]
61
84
  }