selftune 0.2.13 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
- package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
- package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
- package/apps/local-dashboard/dist/index.html +3 -3
- package/cli/selftune/activation-rules.ts +24 -48
- package/cli/selftune/analytics.ts +13 -11
- package/cli/selftune/badge/badge.ts +13 -9
- package/cli/selftune/canonical-export.ts +6 -6
- package/cli/selftune/constants.ts +7 -0
- package/cli/selftune/contribute/bundle.ts +9 -44
- package/cli/selftune/contribute/contribute.ts +2 -1
- package/cli/selftune/cron/setup.ts +3 -1
- package/cli/selftune/dashboard-contract.ts +22 -0
- package/cli/selftune/dashboard.ts +10 -5
- package/cli/selftune/eval/baseline.ts +20 -30
- package/cli/selftune/eval/hooks-to-evals.ts +27 -34
- package/cli/selftune/eval/import-skillsbench.ts +21 -8
- package/cli/selftune/eval/unit-test-cli.ts +22 -11
- package/cli/selftune/evolution/description-quality.ts +224 -0
- package/cli/selftune/evolution/evolve-body.ts +17 -10
- package/cli/selftune/evolution/evolve.ts +70 -57
- package/cli/selftune/evolution/rollback.ts +7 -6
- package/cli/selftune/grading/auto-grade.ts +27 -35
- package/cli/selftune/grading/grade-session.ts +24 -30
- package/cli/selftune/hooks/auto-activate.ts +12 -3
- package/cli/selftune/hooks/evolution-guard.ts +14 -24
- package/cli/selftune/hooks/prompt-log.ts +7 -9
- package/cli/selftune/hooks/session-stop.ts +0 -8
- package/cli/selftune/index.ts +66 -69
- package/cli/selftune/ingestors/claude-replay.ts +29 -14
- package/cli/selftune/ingestors/codex-rollout.ts +15 -5
- package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
- package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
- package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
- package/cli/selftune/init.ts +14 -9
- package/cli/selftune/localdb/queries.ts +57 -0
- package/cli/selftune/monitoring/watch.ts +39 -38
- package/cli/selftune/normalization.ts +2 -23
- package/cli/selftune/orchestrate.ts +224 -24
- package/cli/selftune/routes/skill-report.ts +17 -0
- package/cli/selftune/schedule.ts +74 -14
- package/cli/selftune/sync.ts +7 -3
- package/cli/selftune/types.ts +44 -10
- package/cli/selftune/utils/cli-error.ts +102 -0
- package/cli/selftune/utils/jsonl.ts +2 -0
- package/cli/selftune/workflows/workflows.ts +23 -17
- package/package.json +3 -1
- package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
- package/packages/ui/src/components/index.ts +1 -0
- package/packages/ui/src/components/section-cards.tsx +13 -0
- package/skill/SKILL.md +1 -1
- package/skill/Workflows/Evolve.md +4 -0
- package/skill/Workflows/Initialize.md +8 -8
- package/skill/Workflows/Orchestrate.md +11 -7
- package/skill/Workflows/Schedule.md +11 -0
- package/skill/references/logs.md +22 -21
- package/skill/settings_snippet.json +29 -6
- package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
- package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
- package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12
|
@@ -126,14 +126,14 @@ Code subagent calls stay up to date.
|
|
|
126
126
|
|
|
127
127
|
**Hook reference** (for troubleshooting):
|
|
128
128
|
|
|
129
|
-
| Hook | Script | Purpose |
|
|
130
|
-
| -------------------------- | ----------------------------- | ----------------------------------------------- |
|
|
131
|
-
| `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query |
|
|
132
|
-
| `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing |
|
|
133
|
-
| `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits |
|
|
134
|
-
| `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills |
|
|
135
|
-
| `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations |
|
|
136
|
-
| `Stop` | `hooks/session-stop.ts` | Capture session telemetry |
|
|
129
|
+
| Hook | Script | Purpose | Notes |
|
|
130
|
+
| -------------------------- | ----------------------------- | ----------------------------------------------- | ---------------------------------------------- |
|
|
131
|
+
| `UserPromptSubmit` | `hooks/prompt-log.ts` | Log every user query | Accepts both `prompt` and legacy `user_prompt` |
|
|
132
|
+
| `UserPromptSubmit` | `hooks/auto-activate.ts` | Suggest skills before prompt processing | Uses `additionalContext` JSON for suggestions |
|
|
133
|
+
| `PreToolUse` (Write/Edit) | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits | `if` filter: only fires on `*SKILL.md` paths |
|
|
134
|
+
| `PreToolUse` (Write/Edit) | `hooks/evolution-guard.ts` | Block SKILL.md edits on monitored skills | `if` filter: only fires on `*SKILL.md` paths |
|
|
135
|
+
| `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts` | Track skill triggers and Skill tool invocations | |
|
|
136
|
+
| `Stop` | `hooks/session-stop.ts` | Capture session telemetry | Runs async (non-blocking), 60s timeout |
|
|
137
137
|
|
|
138
138
|
**Codex agents:**
|
|
139
139
|
|
|
@@ -31,12 +31,14 @@ selftune orchestrate
|
|
|
31
31
|
| `--max-skills <n>` | Cap how many candidates are processed in one run | `5` |
|
|
32
32
|
| `--recent-window <hours>` | Window for post-deploy watch/rollback checks | `48` |
|
|
33
33
|
| `--sync-force` | Force a full source replay before candidate selection | Off |
|
|
34
|
+
| `--max-auto-grade <n>` | Max ungraded skills to auto-grade per run (0 to disable) | `5` |
|
|
34
35
|
| `--loop` | Run as a long-lived process that cycles continuously | Off |
|
|
35
36
|
| `--loop-interval <seconds>` | Pause between cycles (minimum 60) | `3600` |
|
|
36
37
|
|
|
37
38
|
## Default Behavior
|
|
38
39
|
|
|
39
40
|
- Sync source-truth telemetry first
|
|
41
|
+
- Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
|
|
40
42
|
- Prioritize critical/warning/ungraded skills with real missed-query signal
|
|
41
43
|
- Deploy validated low-risk description changes automatically
|
|
42
44
|
- Watch recent deployments and roll back regressions automatically
|
|
@@ -78,10 +80,11 @@ A phased decision report printed to stderr so you can see exactly what happened
|
|
|
78
80
|
|
|
79
81
|
1. **Phase 1: Sync** — which sources were scanned, how many records synced, repair counts
|
|
80
82
|
2. **Phase 2: Status** — skill count, system health, breakdown by status category
|
|
81
|
-
3. **
|
|
82
|
-
4. **Phase
|
|
83
|
-
5. **Phase
|
|
84
|
-
6. **
|
|
83
|
+
3. **Auto-grade** — how many ungraded skills were graded (logged to stderr, included in summary)
|
|
84
|
+
4. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
|
|
85
|
+
5. **Phase 4: Evolution Results** — validation pass-rate changes (before → after), deployment status
|
|
86
|
+
6. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
|
|
87
|
+
7. **Summary** — auto-graded/evaluated/deployed/watched/skipped counts and elapsed time
|
|
85
88
|
|
|
86
89
|
A mode banner at the top shows DRY RUN, REVIEW, or AUTONOMOUS with rerun hints when applicable.
|
|
87
90
|
|
|
@@ -140,9 +143,10 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
|
|
|
140
143
|
|
|
141
144
|
1. **Sync** — refresh source-truth telemetry across all supported agents (`selftune sync`)
|
|
142
145
|
2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
|
|
143
|
-
3. **
|
|
144
|
-
4. **
|
|
145
|
-
5. **
|
|
146
|
+
3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
|
|
147
|
+
4. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
|
|
148
|
+
5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
|
|
149
|
+
6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
|
|
146
150
|
|
|
147
151
|
Between candidate selection and evolution, orchestrate checks for
|
|
148
152
|
**cross-skill eval set overlap**. When two or more evolution candidates
|
|
@@ -53,6 +53,17 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
|
|
|
53
53
|
|
|
54
54
|
`selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
|
|
55
55
|
|
|
56
|
+
## PATH Resolution (All Platforms)
|
|
57
|
+
|
|
58
|
+
All three scheduling formats resolve the absolute path to the `selftune` binary
|
|
59
|
+
(via `Bun.which` with a `~/.bun/bin/selftune` fallback) and set explicit PATH
|
|
60
|
+
environment variables. This prevents silent failures from minimal default
|
|
61
|
+
environments that don't include homebrew, bun, or node binary locations.
|
|
62
|
+
|
|
63
|
+
- **launchd** — Injects an `EnvironmentVariables` dict with PATH and HOME into each plist.
|
|
64
|
+
- **systemd** — Adds `Environment="PATH=..."` and `Environment="HOME=..."` to each service unit.
|
|
65
|
+
- **cron** — Prepends a `PATH=...` declaration at the top of the generated crontab.
|
|
66
|
+
|
|
56
67
|
## Common Patterns
|
|
57
68
|
|
|
58
69
|
- **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.
|
package/skill/references/logs.md
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
# Log Format Reference
|
|
2
2
|
|
|
3
|
-
selftune
|
|
4
|
-
describes
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
> **
|
|
8
|
-
>
|
|
9
|
-
>
|
|
10
|
-
>
|
|
11
|
-
>
|
|
3
|
+
selftune uses SQLite as its sole write target and operational store. This
|
|
4
|
+
reference describes the legacy JSONL log formats that remain on disk for
|
|
5
|
+
disaster recovery and export, plus the canonical event schema.
|
|
6
|
+
|
|
7
|
+
> **Important (Phase 3 complete):** JSONL writes have been removed from all hooks,
|
|
8
|
+
> ingestors, and normalization pipelines. New data is written exclusively to SQLite
|
|
9
|
+
> (`~/.selftune/selftune.db`). Existing JSONL files are retained on disk but only
|
|
10
|
+
> contain pre-cutover history. The materializer (`localdb/materialize.ts`) can
|
|
11
|
+
> rebuild SQLite from these files but only for data written before Phase 3.
|
|
12
|
+
> Post-cutover recovery requires `selftune export` snapshots or SQLite backups.
|
|
13
|
+
> The file formats below are preserved as a reference for the materializer and
|
|
14
|
+
> export tooling.
|
|
12
15
|
|
|
13
16
|
---
|
|
14
17
|
|
|
@@ -54,11 +57,11 @@ One JSON record per line. Each record is one completed agent session.
|
|
|
54
57
|
|
|
55
58
|
## ~/.claude/skill_usage_log.jsonl
|
|
56
59
|
|
|
57
|
-
> **
|
|
60
|
+
> **Legacy.** The `skill_usage` and `skill_invocations` data paths have been
|
|
58
61
|
> consolidated into a single `skill_invocations` table in SQLite. This JSONL file
|
|
59
|
-
> is
|
|
60
|
-
>
|
|
61
|
-
>
|
|
62
|
+
> is no longer written (Phase 3). The dashboard and all queries read exclusively
|
|
63
|
+
> from `skill_invocations`. New consumers should use the SQLite table via
|
|
64
|
+
> `localdb/queries.ts`.
|
|
62
65
|
|
|
63
66
|
One record per skill trigger event. Populated by skill-eval.ts (PostToolUse hook).
|
|
64
67
|
|
|
@@ -208,10 +211,10 @@ This is operational state, not an analytics source of truth.
|
|
|
208
211
|
|
|
209
212
|
## ~/.claude/improvement_signals.jsonl
|
|
210
213
|
|
|
211
|
-
One record per detected improvement signal.
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
214
|
+
One record per detected improvement signal. Previously written by `prompt-log.ts`;
|
|
215
|
+
now written directly to SQLite (`improvement_signals` table). This JSONL file is
|
|
216
|
+
no longer appended to (Phase 3). Read by the orchestrator for signal-aware
|
|
217
|
+
candidate selection via SQLite queries.
|
|
215
218
|
|
|
216
219
|
```json
|
|
217
220
|
{
|
|
@@ -225,10 +228,8 @@ to spawn a reactive orchestrate run.
|
|
|
225
228
|
```
|
|
226
229
|
|
|
227
230
|
Signal records are append-only. When an orchestrate run processes a signal,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
to strict append-only semantics in the log system — the rewrite is atomic
|
|
231
|
-
and race-protected by the orchestrate lockfile.
|
|
231
|
+
it sets `consumed: true` via `updateSignalConsumed()` in SQLite. The JSONL
|
|
232
|
+
format below is retained as a reference for the materializer and export.
|
|
232
233
|
|
|
233
234
|
Consumed signal example:
|
|
234
235
|
|
|
@@ -10,12 +10,14 @@
|
|
|
10
10
|
{
|
|
11
11
|
"type": "command",
|
|
12
12
|
"command": "bun run /PATH/TO/cli/selftune/hooks/prompt-log.ts",
|
|
13
|
-
"timeout": 5
|
|
13
|
+
"timeout": 5,
|
|
14
|
+
"statusMessage": "selftune: logging prompt"
|
|
14
15
|
},
|
|
15
16
|
{
|
|
16
17
|
"type": "command",
|
|
17
18
|
"command": "bun run /PATH/TO/cli/selftune/hooks/auto-activate.ts",
|
|
18
|
-
"timeout": 5
|
|
19
|
+
"timeout": 5,
|
|
20
|
+
"statusMessage": "selftune: checking activation rules"
|
|
19
21
|
}
|
|
20
22
|
]
|
|
21
23
|
}
|
|
@@ -26,13 +28,31 @@
|
|
|
26
28
|
"hooks": [
|
|
27
29
|
{
|
|
28
30
|
"type": "command",
|
|
31
|
+
"if": "Write(*SKILL.md)",
|
|
29
32
|
"command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
|
|
30
|
-
"timeout": 5
|
|
33
|
+
"timeout": 5,
|
|
34
|
+
"statusMessage": "selftune: checking skill change guard"
|
|
31
35
|
},
|
|
32
36
|
{
|
|
33
37
|
"type": "command",
|
|
38
|
+
"if": "Edit(*SKILL.md)",
|
|
39
|
+
"command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
|
|
40
|
+
"timeout": 5,
|
|
41
|
+
"statusMessage": "selftune: checking skill change guard"
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"type": "command",
|
|
45
|
+
"if": "Write(*SKILL.md)",
|
|
46
|
+
"command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
|
|
47
|
+
"timeout": 5,
|
|
48
|
+
"statusMessage": "selftune: checking evolution guard"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"type": "command",
|
|
52
|
+
"if": "Edit(*SKILL.md)",
|
|
34
53
|
"command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
|
|
35
|
-
"timeout": 5
|
|
54
|
+
"timeout": 5,
|
|
55
|
+
"statusMessage": "selftune: checking evolution guard"
|
|
36
56
|
}
|
|
37
57
|
]
|
|
38
58
|
}
|
|
@@ -44,7 +64,8 @@
|
|
|
44
64
|
{
|
|
45
65
|
"type": "command",
|
|
46
66
|
"command": "bun run /PATH/TO/cli/selftune/hooks/skill-eval.ts",
|
|
47
|
-
"timeout": 5
|
|
67
|
+
"timeout": 5,
|
|
68
|
+
"statusMessage": "selftune: evaluating skill usage"
|
|
48
69
|
}
|
|
49
70
|
]
|
|
50
71
|
}
|
|
@@ -55,7 +76,9 @@
|
|
|
55
76
|
{
|
|
56
77
|
"type": "command",
|
|
57
78
|
"command": "bun run /PATH/TO/cli/selftune/hooks/session-stop.ts",
|
|
58
|
-
"timeout":
|
|
79
|
+
"timeout": 60,
|
|
80
|
+
"async": true,
|
|
81
|
+
"statusMessage": "selftune: capturing session telemetry"
|
|
59
82
|
}
|
|
60
83
|
]
|
|
61
84
|
}
|