npm - selftune - Versions diffs - 0.2.13 → 0.2.15 - Mend

selftune 0.2.13 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
package/apps/local-dashboard/dist/assets/index-DOu3iLD9.js +16 -0
package/apps/local-dashboard/dist/assets/vendor-ui-DIwlrGlb.js +12 -0
package/apps/local-dashboard/dist/index.html +3 -3
package/cli/selftune/activation-rules.ts +24 -48
package/cli/selftune/analytics.ts +13 -11
package/cli/selftune/badge/badge.ts +13 -9
package/cli/selftune/canonical-export.ts +6 -6
package/cli/selftune/constants.ts +7 -0
package/cli/selftune/contribute/bundle.ts +9 -44
package/cli/selftune/contribute/contribute.ts +2 -1
package/cli/selftune/cron/setup.ts +3 -1
package/cli/selftune/dashboard-contract.ts +22 -0
package/cli/selftune/dashboard.ts +10 -5
package/cli/selftune/eval/baseline.ts +20 -30
package/cli/selftune/eval/hooks-to-evals.ts +27 -34
package/cli/selftune/eval/import-skillsbench.ts +21 -8
package/cli/selftune/eval/unit-test-cli.ts +22 -11
package/cli/selftune/evolution/description-quality.ts +224 -0
package/cli/selftune/evolution/evolve-body.ts +17 -10
package/cli/selftune/evolution/evolve.ts +70 -57
package/cli/selftune/evolution/rollback.ts +7 -6
package/cli/selftune/grading/auto-grade.ts +27 -35
package/cli/selftune/grading/grade-session.ts +24 -30
package/cli/selftune/hooks/auto-activate.ts +12 -3
package/cli/selftune/hooks/evolution-guard.ts +14 -24
package/cli/selftune/hooks/prompt-log.ts +7 -9
package/cli/selftune/hooks/session-stop.ts +0 -8
package/cli/selftune/index.ts +66 -69
package/cli/selftune/ingestors/claude-replay.ts +29 -14
package/cli/selftune/ingestors/codex-rollout.ts +15 -5
package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
package/cli/selftune/init.ts +14 -9
package/cli/selftune/localdb/queries.ts +57 -0
package/cli/selftune/monitoring/watch.ts +39 -38
package/cli/selftune/normalization.ts +2 -23
package/cli/selftune/orchestrate.ts +224 -24
package/cli/selftune/routes/skill-report.ts +17 -0
package/cli/selftune/schedule.ts +74 -14
package/cli/selftune/sync.ts +7 -3
package/cli/selftune/types.ts +44 -10
package/cli/selftune/utils/cli-error.ts +102 -0
package/cli/selftune/utils/jsonl.ts +2 -0
package/cli/selftune/workflows/workflows.ts +23 -17
package/package.json +3 -1
package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
package/packages/ui/src/components/index.ts +1 -0
package/packages/ui/src/components/section-cards.tsx +13 -0
package/skill/SKILL.md +1 -1
package/skill/Workflows/Evolve.md +4 -0
package/skill/Workflows/Initialize.md +8 -8
package/skill/Workflows/Orchestrate.md +11 -7
package/skill/Workflows/Schedule.md +11 -0
package/skill/references/logs.md +22 -21
package/skill/settings_snippet.json +29 -6
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2
package/apps/local-dashboard/dist/assets/vendor-ui-7xD7fNEU.js +0 -12

package/skill/Workflows/Initialize.md CHANGED Viewed

@@ -126,14 +126,14 @@ Code subagent calls stay up to date.
 **Hook reference** (for troubleshooting):
-| Hook                       | Script                        | Purpose                                         |
-| -------------------------- | ----------------------------- | ----------------------------------------------- |
-| `UserPromptSubmit`         | `hooks/prompt-log.ts`         | Log every user query                            |
-| `UserPromptSubmit`         | `hooks/auto-activate.ts`      | Suggest skills before prompt processing         |
-| `PreToolUse` (Write/Edit)  | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits                 |
-| `PreToolUse` (Write/Edit)  | `hooks/evolution-guard.ts`    | Block SKILL.md edits on monitored skills        |
-| `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts`         | Track skill triggers and Skill tool invocations |
-| `Stop`                     | `hooks/session-stop.ts`       | Capture session telemetry                       |
+| Hook                       | Script                        | Purpose                                         | Notes                                          |
+| -------------------------- | ----------------------------- | ----------------------------------------------- | ---------------------------------------------- |
+| `UserPromptSubmit`         | `hooks/prompt-log.ts`         | Log every user query                            | Accepts both `prompt` and legacy `user_prompt` |
+| `UserPromptSubmit`         | `hooks/auto-activate.ts`      | Suggest skills before prompt processing         | Uses `additionalContext` JSON for suggestions  |
+| `PreToolUse` (Write/Edit)  | `hooks/skill-change-guard.ts` | Detect uncontrolled skill edits                 | `if` filter: only fires on `*SKILL.md` paths   |
+| `PreToolUse` (Write/Edit)  | `hooks/evolution-guard.ts`    | Block SKILL.md edits on monitored skills        | `if` filter: only fires on `*SKILL.md` paths   |
+| `PostToolUse` (Read/Skill) | `hooks/skill-eval.ts`         | Track skill triggers and Skill tool invocations |                                                |
+| `Stop`                     | `hooks/session-stop.ts`       | Capture session telemetry                       | Runs async (non-blocking), 60s timeout         |
 **Codex agents:**

package/skill/Workflows/Orchestrate.md CHANGED Viewed

@@ -31,12 +31,14 @@ selftune orchestrate
 | `--max-skills <n>`          | Cap how many candidates are processed in one run           | `5`        |
 | `--recent-window <hours>`   | Window for post-deploy watch/rollback checks               | `48`       |
 | `--sync-force`              | Force a full source replay before candidate selection      | Off        |
+| `--max-auto-grade <n>`      | Max ungraded skills to auto-grade per run (0 to disable)   | `5`        |
 | `--loop`                    | Run as a long-lived process that cycles continuously       | Off        |
 | `--loop-interval <seconds>` | Pause between cycles (minimum 60)                          | `3600`     |
 ## Default Behavior
 - Sync source-truth telemetry first
+- Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
 - Prioritize critical/warning/ungraded skills with real missed-query signal
 - Deploy validated low-risk description changes automatically
 - Watch recent deployments and roll back regressions automatically
@@ -78,10 +80,11 @@ A phased decision report printed to stderr so you can see exactly what happened
 1. **Phase 1: Sync** — which sources were scanned, how many records synced, repair counts
 2. **Phase 2: Status** — skill count, system health, breakdown by status category
-3. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
-4. **Phase 4: Evolution Results** — validation pass-rate changes (before → after), deployment status
-5. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
-6. **Summary** — evaluated/deployed/watched/skipped counts and elapsed time
+3. **Auto-grade** — how many ungraded skills were graded (logged to stderr, included in summary)
+4. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
+5. **Phase 4: Evolution Results** — validation pass-rate changes (before → after), deployment status
+6. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
+7. **Summary** — auto-graded/evaluated/deployed/watched/skipped counts and elapsed time
 A mode banner at the top shows DRY RUN, REVIEW, or AUTONOMOUS with rerun hints when applicable.
@@ -140,9 +143,10 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
 1. **Sync** — refresh source-truth telemetry across all supported agents (`selftune sync`)
 2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
-3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
-4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
-5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
+3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
+4. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
+5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
+6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
 Between candidate selection and evolution, orchestrate checks for
 **cross-skill eval set overlap**. When two or more evolution candidates

package/skill/Workflows/Schedule.md CHANGED Viewed

@@ -53,6 +53,17 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
 `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
+## PATH Resolution (All Platforms)
+All three scheduling formats resolve the absolute path to the `selftune` binary
+(via `Bun.which` with a `~/.bun/bin/selftune` fallback) and set explicit PATH
+environment variables. This prevents silent failures from minimal default
+environments that don't include homebrew, bun, or node binary locations.
+- **launchd** — Injects an `EnvironmentVariables` dict with PATH and HOME into each plist.
+- **systemd** — Adds `Environment="PATH=..."` and `Environment="HOME=..."` to each service unit.
+- **cron** — Prepends a `PATH=...` declaration at the top of the generated crontab.
 ## Common Patterns
 - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.

package/skill/references/logs.md CHANGED Viewed

@@ -1,14 +1,17 @@
 # Log Format Reference
-selftune writes raw legacy logs plus a canonical event log. This reference
-describes each format in detail for the skill to use when parsing sessions,
-audit trails, and cloud-ingest exports.
-> **Note:** JSONL files are now backup/recovery only. SQLite (`~/.selftune/selftune.db`)
-> is the sole operational store for all runtime reads. JSONL writes are retained for
-> append-only durability, but all dashboard queries, hook reads, grading, monitoring,
-> and upload staging read from SQLite. JSONL reads only occur when custom log paths
-> are provided (e.g., `--telemetry-log`, `--skill-log`) for test isolation.
+selftune uses SQLite as its sole write target and operational store. This
+reference describes the legacy JSONL log formats that remain on disk for
+disaster recovery and export, plus the canonical event schema.
+> **Important (Phase 3 complete):** JSONL writes have been removed from all hooks,
+> ingestors, and normalization pipelines. New data is written exclusively to SQLite
+> (`~/.selftune/selftune.db`). Existing JSONL files are retained on disk but only
+> contain pre-cutover history. The materializer (`localdb/materialize.ts`) can
+> rebuild SQLite from these files but only for data written before Phase 3.
+> Post-cutover recovery requires `selftune export` snapshots or SQLite backups.
+> The file formats below are preserved as a reference for the materializer and
+> export tooling.
 ---
@@ -54,11 +57,11 @@ One JSON record per line. Each record is one completed agent session.
 ## ~/.claude/skill_usage_log.jsonl
-> **Deprecated.** The `skill_usage` and `skill_invocations` data paths have been
+> **Legacy.** The `skill_usage` and `skill_invocations` data paths have been
 > consolidated into a single `skill_invocations` table in SQLite. This JSONL file
-> is still written by hooks for backward compatibility, but the dashboard and
-> queries now read exclusively from `skill_invocations`. New consumers should use
-> the SQLite table via `localdb/queries.ts`.
+> is no longer written (Phase 3). The dashboard and all queries read exclusively
+> from `skill_invocations`. New consumers should use the SQLite table via
+> `localdb/queries.ts`.
 One record per skill trigger event. Populated by skill-eval.ts (PostToolUse hook).
@@ -208,10 +211,10 @@ This is operational state, not an analytics source of truth.
 ## ~/.claude/improvement_signals.jsonl
-One record per detected improvement signal. Written by `prompt-log.ts` when a
-user correction or explicit skill request is detected. Read by the orchestrator
-for signal-aware candidate selection, and by `session-stop.ts` to decide whether
-to spawn a reactive orchestrate run.
+One record per detected improvement signal. Previously written by `prompt-log.ts`;
+now written directly to SQLite (`improvement_signals` table). This JSONL file is
+no longer appended to (Phase 3). Read by the orchestrator for signal-aware
+candidate selection via SQLite queries.
 ```json
 {
@@ -225,10 +228,8 @@ to spawn a reactive orchestrate run.
 ```
 Signal records are append-only. When an orchestrate run processes a signal,
-the original record remains unchanged and the orchestrator rewrites the file
-with `consumed: true` set on processed entries. This is the one exception
-to strict append-only semantics in the log system — the rewrite is atomic
-and race-protected by the orchestrate lockfile.
+it sets `consumed: true` via `updateSignalConsumed()` in SQLite. The JSONL
+format below is retained as a reference for the materializer and export.
 Consumed signal example:

package/skill/settings_snippet.json CHANGED Viewed

@@ -10,12 +10,14 @@
           {
             "type": "command",
             "command": "bun run /PATH/TO/cli/selftune/hooks/prompt-log.ts",
-            "timeout": 5
+            "timeout": 5,
+            "statusMessage": "selftune: logging prompt"
           },
           {
             "type": "command",
             "command": "bun run /PATH/TO/cli/selftune/hooks/auto-activate.ts",
-            "timeout": 5
+            "timeout": 5,
+            "statusMessage": "selftune: checking activation rules"
           }
         ]
       }
@@ -26,13 +28,31 @@
         "hooks": [
           {
             "type": "command",
+            "if": "Write(*SKILL.md)",
             "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
-            "timeout": 5
+            "timeout": 5,
+            "statusMessage": "selftune: checking skill change guard"
           },
           {
             "type": "command",
+            "if": "Edit(*SKILL.md)",
+            "command": "bun run /PATH/TO/cli/selftune/hooks/skill-change-guard.ts",
+            "timeout": 5,
+            "statusMessage": "selftune: checking skill change guard"
+          },
+          {
+            "type": "command",
+            "if": "Write(*SKILL.md)",
+            "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
+            "timeout": 5,
+            "statusMessage": "selftune: checking evolution guard"
+          },
+          {
+            "type": "command",
+            "if": "Edit(*SKILL.md)",
             "command": "bun run /PATH/TO/cli/selftune/hooks/evolution-guard.ts",
-            "timeout": 5
+            "timeout": 5,
+            "statusMessage": "selftune: checking evolution guard"
           }
         ]
       }
@@ -44,7 +64,8 @@
           {
             "type": "command",
             "command": "bun run /PATH/TO/cli/selftune/hooks/skill-eval.ts",
-            "timeout": 5
+            "timeout": 5,
+            "statusMessage": "selftune: evaluating skill usage"
           }
         ]
       }
@@ -55,7 +76,9 @@
           {
             "type": "command",
             "command": "bun run /PATH/TO/cli/selftune/hooks/session-stop.ts",
-            "timeout": 15
+            "timeout": 60,
+            "async": true,
+            "statusMessage": "selftune: capturing session telemetry"
           }
         ]
       }