npm - selftune - Versions diffs - 0.2.22 → 0.2.24 - Mend

selftune 0.2.22 → 0.2.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

package/CHANGELOG.md +6 -0
package/README.md +95 -15
package/apps/local-dashboard/dist/assets/index-DgY2KGP-.css +1 -0
package/apps/local-dashboard/dist/assets/index-Dmx7LPVX.js +15 -0
package/apps/local-dashboard/dist/assets/vendor-react-C5oyHiV1.js +11 -0
package/apps/local-dashboard/dist/assets/{vendor-table-BIiI3YhS.js → vendor-table-Bc_bbKd8.js} +1 -1
package/apps/local-dashboard/dist/assets/vendor-ui-B3BPIYy7.js +1 -0
package/apps/local-dashboard/dist/index.html +5 -5
package/cli/selftune/adapters/codex/install.ts +310 -78
package/cli/selftune/adapters/opencode/install.ts +3 -4
package/cli/selftune/adapters/pi/hook.ts +273 -0
package/cli/selftune/adapters/pi/install.ts +207 -0
package/cli/selftune/alpha-upload/build-payloads.ts +3 -3
package/cli/selftune/alpha-upload/stage-canonical.ts +17 -11
package/cli/selftune/auto-update.ts +200 -8
package/cli/selftune/canonical-export.ts +55 -25
package/cli/selftune/command-surface.ts +397 -0
package/cli/selftune/constants.ts +10 -1
package/cli/selftune/contribute/contribute.ts +64 -13
package/cli/selftune/contribution-config.ts +57 -3
package/cli/selftune/contribution-preferences.ts +117 -0
package/cli/selftune/contribution-signals.ts +8 -4
package/cli/selftune/contribution-staging.ts +13 -2
package/cli/selftune/contributions.ts +55 -121
package/cli/selftune/creator-contributions.ts +29 -10
package/cli/selftune/cron/setup.ts +7 -3
package/cli/selftune/dashboard-contract.ts +87 -0
package/cli/selftune/dashboard-server.ts +168 -17
package/cli/selftune/dashboard.ts +350 -17
package/cli/selftune/eval/baseline.ts +21 -5
package/cli/selftune/eval/execution-eval.ts +170 -0
package/cli/selftune/eval/family-overlap.ts +2 -2
package/cli/selftune/eval/hooks-to-evals.ts +228 -82
package/cli/selftune/eval/import-skillsbench.ts +2 -2
package/cli/selftune/eval/invocation-classifier.ts +56 -0
package/cli/selftune/eval/synthetic-evals.ts +5 -3
package/cli/selftune/eval/unit-test-cli.ts +7 -4
package/cli/selftune/evolution/apply-proposal.ts +295 -0
package/cli/selftune/evolution/engines/judge-engine.ts +96 -0
package/cli/selftune/evolution/engines/replay-engine.ts +180 -0
package/cli/selftune/evolution/evidence.ts +2 -6
package/cli/selftune/evolution/evolve-body.ts +152 -38
package/cli/selftune/evolution/evolve.ts +244 -52
package/cli/selftune/evolution/rollback.ts +0 -1
package/cli/selftune/evolution/validate-body.ts +111 -49
package/cli/selftune/evolution/validate-host-replay.ts +510 -60
package/cli/selftune/evolution/validate-proposal.ts +11 -150
package/cli/selftune/evolution/validate-routing.ts +51 -108
package/cli/selftune/evolution/validation-contract.ts +91 -0
package/cli/selftune/grading/auto-grade.ts +11 -7
package/cli/selftune/grading/grade-session.ts +10 -16
package/cli/selftune/hooks/skill-eval.ts +2 -1
package/cli/selftune/hooks-shared/types.ts +1 -0
package/cli/selftune/index.ts +58 -15
package/cli/selftune/ingestors/claude-replay.ts +15 -10
package/cli/selftune/ingestors/codex-wrapper.ts +3 -3
package/cli/selftune/ingestors/opencode-ingest.ts +2 -2
package/cli/selftune/ingestors/pi-ingest.ts +727 -0
package/cli/selftune/init.ts +38 -4
package/cli/selftune/localdb/direct-write.ts +120 -1
package/cli/selftune/localdb/materialize.ts +6 -7
package/cli/selftune/localdb/queries/cron.ts +34 -0
package/cli/selftune/localdb/queries/dashboard.ts +834 -0
package/cli/selftune/localdb/queries/evolution.ts +158 -0
package/cli/selftune/localdb/queries/execution.ts +133 -0
package/cli/selftune/localdb/queries/json.ts +18 -0
package/cli/selftune/localdb/queries/monitoring.ts +263 -0
package/cli/selftune/localdb/queries/raw.ts +95 -0
package/cli/selftune/localdb/queries/staging.ts +270 -0
package/cli/selftune/localdb/queries/trust.ts +392 -0
package/cli/selftune/localdb/queries.ts +60 -2162
package/cli/selftune/localdb/schema.ts +59 -0
package/cli/selftune/monitoring/watch.ts +96 -29
package/cli/selftune/normalization.ts +3 -0
package/cli/selftune/observability.ts +12 -3
package/cli/selftune/orchestrate/cli.ts +161 -0
package/cli/selftune/orchestrate/execute.ts +295 -0
package/cli/selftune/orchestrate/finalize.ts +157 -0
package/cli/selftune/orchestrate/locks.ts +40 -0
package/cli/selftune/orchestrate/plan.ts +131 -0
package/cli/selftune/orchestrate/post-run.ts +59 -0
package/cli/selftune/orchestrate/prepare.ts +334 -0
package/cli/selftune/orchestrate/report.ts +182 -0
package/cli/selftune/orchestrate/runtime.ts +120 -0
package/cli/selftune/orchestrate/signals.ts +48 -0
package/cli/selftune/orchestrate.ts +162 -1142
package/cli/selftune/registry/client.ts +74 -0
package/cli/selftune/registry/history.ts +54 -0
package/cli/selftune/registry/index.ts +90 -0
package/cli/selftune/registry/install.ts +141 -0
package/cli/selftune/registry/list.ts +44 -0
package/cli/selftune/registry/push.ts +171 -0
package/cli/selftune/registry/rollback.ts +49 -0
package/cli/selftune/registry/status.ts +62 -0
package/cli/selftune/registry/sync.ts +125 -0
package/cli/selftune/repair/skill-usage.ts +9 -3
package/cli/selftune/routes/overview.ts +5 -2
package/cli/selftune/routes/skill-report.ts +15 -2
package/cli/selftune/schedule.ts +5 -5
package/cli/selftune/status.ts +70 -2
package/cli/selftune/sync.ts +127 -23
package/cli/selftune/testing-readiness.ts +597 -0
package/cli/selftune/types.ts +46 -5
package/cli/selftune/uninstall.ts +2 -1
package/cli/selftune/utils/canonical-log.ts +1 -9
package/cli/selftune/utils/cli-error.ts +9 -0
package/cli/selftune/utils/jsonl.ts +1 -30
package/cli/selftune/utils/llm-call.ts +126 -6
package/cli/selftune/utils/skill-discovery.ts +24 -0
package/cli/selftune/workflows/proposals.ts +184 -0
package/cli/selftune/workflows/skill-scaffold.ts +241 -0
package/cli/selftune/workflows/workflows.ts +100 -26
package/node_modules/@selftune/telemetry-contract/fixtures/complete-push.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
package/node_modules/@selftune/telemetry-contract/fixtures/golden.test.ts +0 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
package/node_modules/@selftune/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
package/node_modules/@selftune/telemetry-contract/package.json +1 -1
package/node_modules/@selftune/telemetry-contract/src/index.ts +1 -0
package/node_modules/@selftune/telemetry-contract/src/schemas.ts +63 -5
package/node_modules/@selftune/telemetry-contract/src/types.ts +97 -7
package/node_modules/@selftune/telemetry-contract/tests/compatibility.test.ts +0 -1
package/package.json +25 -9
package/packages/dashboard-core/AGENTS.md +18 -0
package/packages/dashboard-core/README.md +30 -0
package/packages/dashboard-core/index.ts +3 -0
package/packages/dashboard-core/package.json +39 -0
package/packages/dashboard-core/src/chrome/DashboardChrome.tsx +74 -0
package/packages/dashboard-core/src/chrome/DashboardHeader.tsx +200 -0
package/packages/dashboard-core/src/chrome/DashboardSidebar.tsx +219 -0
package/packages/dashboard-core/src/chrome/RuntimeBadge.tsx +46 -0
package/packages/dashboard-core/src/chrome/index.ts +14 -0
package/packages/dashboard-core/src/chrome/types.ts +81 -0
package/packages/dashboard-core/src/chrome/utils.ts +23 -0
package/packages/dashboard-core/src/gates/FeatureGate.tsx +11 -0
package/packages/dashboard-core/src/gates/LockedRoute.tsx +29 -0
package/packages/dashboard-core/src/gates/UpgradeCard.tsx +89 -0
package/packages/dashboard-core/src/gates/index.ts +3 -0
package/packages/dashboard-core/src/host/DashboardHostProvider.tsx +62 -0
package/packages/dashboard-core/src/host/adapter.ts +47 -0
package/packages/dashboard-core/src/host/capabilities.ts +55 -0
package/packages/dashboard-core/src/host/index.ts +3 -0
package/packages/dashboard-core/src/models/analytics.ts +39 -0
package/packages/dashboard-core/src/models/index.ts +4 -0
package/packages/dashboard-core/src/models/overview.ts +98 -0
package/packages/dashboard-core/src/models/runtime.ts +7 -0
package/packages/dashboard-core/src/models/skills.ts +34 -0
package/packages/dashboard-core/src/routes/index.ts +2 -0
package/packages/dashboard-core/src/routes/manifest.test.ts +70 -0
package/packages/dashboard-core/src/routes/manifest.ts +451 -0
package/packages/dashboard-core/src/routes/types.ts +39 -0
package/packages/dashboard-core/src/screens/analytics/AnalyticsScreen.tsx +278 -0
package/packages/dashboard-core/src/screens/analytics/index.ts +1 -0
package/packages/dashboard-core/src/screens/index.ts +37 -0
package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.test.ts +101 -0
package/packages/dashboard-core/src/screens/overview/OverviewComparisonSurface.tsx +393 -0
package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.test.tsx +113 -0
package/packages/dashboard-core/src/screens/overview/OverviewCompositionSurface.tsx +72 -0
package/packages/dashboard-core/src/screens/overview/OverviewCoreSurface.tsx +71 -0
package/packages/dashboard-core/src/screens/overview/OverviewOnboardingBanner.tsx +90 -0
package/packages/dashboard-core/src/screens/overview/OverviewRunSummary.tsx +40 -0
package/packages/dashboard-core/src/screens/overview/index.ts +16 -0
package/packages/dashboard-core/src/screens/overview/types.ts +13 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportDailyBreakdownSection.tsx +99 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportDataQualityTabContent.tsx +35 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceRail.tsx +71 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceSection.tsx +63 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportEvidenceTabContent.tsx +25 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportInvocationsSection.tsx +24 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportMissedQueriesSection.tsx +79 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportScaffold.tsx +150 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportSections.test.tsx +224 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.test.tsx +76 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTabs.tsx +88 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTrendSection.tsx +33 -0
package/packages/dashboard-core/src/screens/skill-report/SkillReportTrustBadge.tsx +67 -0
package/packages/dashboard-core/src/screens/skill-report/index.ts +45 -0
package/packages/dashboard-core/src/screens/skills/SkillsLibraryScreen.tsx +162 -0
package/packages/dashboard-core/src/screens/skills/index.ts +6 -0
package/packages/telemetry-contract/fixtures/complete-push.ts +1 -1
package/packages/telemetry-contract/fixtures/evidence-only-push.ts +2 -2
package/packages/telemetry-contract/fixtures/golden.test.ts +0 -1
package/packages/telemetry-contract/fixtures/partial-push-no-sessions.ts +1 -1
package/packages/telemetry-contract/fixtures/partial-push-unresolved-parents.ts +2 -2
package/packages/telemetry-contract/package.json +1 -1
package/packages/telemetry-contract/src/index.ts +1 -0
package/packages/telemetry-contract/src/schemas.ts +63 -5
package/packages/telemetry-contract/src/types.ts +97 -7
package/packages/telemetry-contract/tests/compatibility.test.ts +0 -1
package/packages/ui/AGENTS.md +16 -0
package/packages/ui/README.md +1 -1
package/packages/ui/package.json +1 -1
package/packages/ui/src/components/ActivityTimeline.tsx +152 -168
package/packages/ui/src/components/AnalyticsCharts.tsx +344 -0
package/packages/ui/src/components/EvidenceViewer.tsx +229 -464
package/packages/ui/src/components/EvolutionTimeline.tsx +34 -87
package/packages/ui/src/components/InfoTip.tsx +1 -2
package/packages/ui/src/components/InvocationsPanel.tsx +413 -0
package/packages/ui/src/components/JobHistoryTimeline.tsx +156 -0
package/packages/ui/src/components/OrchestrateRunsPanel.tsx +18 -36
package/packages/ui/src/components/OverviewPanels.tsx +693 -0
package/packages/ui/src/components/PipelineStatusBar.tsx +65 -0
package/packages/ui/src/components/SkillReportGuide.tsx +215 -0
package/packages/ui/src/components/SkillReportPanels.tsx +919 -0
package/packages/ui/src/components/SkillsLibrary.tsx +437 -0
package/packages/ui/src/components/index.ts +56 -1
package/packages/ui/src/components/section-cards.tsx +18 -35
package/packages/ui/src/components/skill-health-grid.tsx +47 -37
package/packages/ui/src/lib/constants.tsx +0 -1
package/packages/ui/src/primitives/card.tsx +1 -1
package/packages/ui/src/primitives/checkbox.tsx +1 -1
package/packages/ui/src/primitives/dropdown-menu.tsx +2 -2
package/packages/ui/src/primitives/select.tsx +2 -2
package/packages/ui/src/primitives/tabs.tsx +7 -6
package/packages/ui/src/types.ts +182 -4
package/skill/SKILL.md +130 -318
package/skill/agents/diagnosis-analyst.md +3 -3
package/skill/agents/evolution-reviewer.md +3 -3
package/skill/agents/integration-guide.md +3 -3
package/skill/agents/pattern-analyst.md +2 -2
package/skill/references/cli-quick-reference.md +89 -0
package/skill/references/creator-playbook.md +131 -0
package/skill/references/examples.md +48 -0
package/skill/references/troubleshooting.md +47 -0
package/skill/references/version-history.md +1 -1
package/skill/selftune.contribute.json +11 -0
package/skill/{Workflows → workflows}/Baseline.md +20 -1
package/skill/{Workflows → workflows}/Contribute.md +23 -10
package/skill/{Workflows → workflows}/Contributions.md +13 -5
package/skill/workflows/CreateTestDeploy.md +170 -0
package/skill/{Workflows → workflows}/CreatorContributions.md +18 -6
package/skill/{Workflows → workflows}/Cron.md +1 -1
package/skill/{Workflows → workflows}/Dashboard.md +20 -0
package/skill/{Workflows → workflows}/Doctor.md +1 -1
package/skill/{Workflows → workflows}/Evals.md +67 -2
package/skill/{Workflows → workflows}/Evolve.md +119 -30
package/skill/{Workflows → workflows}/EvolveBody.md +41 -1
package/skill/{Workflows → workflows}/Grade.md +1 -1
package/skill/{Workflows → workflows}/Ingest.md +60 -2
package/skill/{Workflows → workflows}/Initialize.md +16 -9
package/skill/{Workflows → workflows}/Orchestrate.md +13 -3
package/skill/{Workflows → workflows}/PlatformHooks.md +19 -3
package/skill/workflows/Registry.md +99 -0
package/skill/{Workflows → workflows}/Schedule.md +3 -3
package/skill/workflows/SignalsDashboard.md +87 -0
package/skill/{Workflows → workflows}/Sync.md +3 -1
package/skill/{Workflows → workflows}/UnitTest.md +19 -0
package/skill/{Workflows → workflows}/Watch.md +42 -2
package/skill/{Workflows → workflows}/Workflows.md +39 -2
package/apps/local-dashboard/dist/assets/index-D8O-RG1I.js +0 -60
package/apps/local-dashboard/dist/assets/index-_EcLywDg.css +0 -1
package/apps/local-dashboard/dist/assets/vendor-react-CKkiCskZ.js +0 -11
package/apps/local-dashboard/dist/assets/vendor-ui-CGEmUayx.js +0 -12
package/cli/selftune/utils/html.ts +0 -27
package/packages/ui/src/components/RecentActivityFeed.tsx +0 -117
/package/skill/{Workflows → workflows}/AlphaUpload.md +0 -0
/package/skill/{Workflows → workflows}/AutoActivation.md +0 -0
/package/skill/{Workflows → workflows}/Badge.md +0 -0
/package/skill/{Workflows → workflows}/Composability.md +0 -0
/package/skill/{Workflows → workflows}/EvolutionMemory.md +0 -0
/package/skill/{Workflows → workflows}/ExportCanonical.md +0 -0
/package/skill/{Workflows → workflows}/Hook.md +0 -0
/package/skill/{Workflows → workflows}/ImportSkillsBench.md +0 -0
/package/skill/{Workflows → workflows}/Quickstart.md +0 -0
/package/skill/{Workflows → workflows}/Recover.md +0 -0
/package/skill/{Workflows → workflows}/RepairSkillUsage.md +0 -0
/package/skill/{Workflows → workflows}/Replay.md +0 -0
/package/skill/{Workflows → workflows}/Rollback.md +0 -0
/package/skill/{Workflows → workflows}/Telemetry.md +0 -0
/package/skill/{Workflows → workflows}/Uninstall.md +0 -0

package/skill/{Workflows → workflows}/CreatorContributions.md RENAMED Viewed

@@ -1,10 +1,12 @@
 # selftune Creator-Contributions Workflow
-Manage the creator-side `selftune.contribute.json` file bundled with a skill.
+Manage the **creator sharing setup** — the `selftune.contribute.json` file
+bundled with a skill package.
 This is **not** the same as:
-- `selftune contributions` — end-user opt-in / opt-out preferences
-- `selftune contribute` — community export bundle
+- `selftune contributions` — end-user **sharing preferences** (opt-in / opt-out)
+- `selftune contribute` — community **export bundle** (anonymized data export)
+- The signals dashboard — viewing aggregated **contributor signal data** from all contributors
 ## When to Use
@@ -45,8 +47,17 @@ selftune creator-contributions disable --skill <name> [--skill-path <path>]
 ## Notes
 - This is local packaging/setup only. It does **not** upload creator-directed signals yet.
-- The creator ID is currently sourced from `--creator-id` or the local alpha identity's `cloud_user_id`.
+- The `creator_id` field must be the creator's cloud user UUID (the `cloud_user_id` from alpha enrollment). This is the canonical identifier used to route signals back to the correct creator account.
+- The creator ID is sourced from `--creator-id` or the local alpha identity's `cloud_user_id`.
 - Use this workflow when the user is preparing a skill package.
+- For the full creator lifecycle, read `references/creator-playbook.md` before shipping.
+## Selftune Dogfood Config
+The selftune skill itself ships a bundled `selftune.contribute.json` at
+`oss/selftune/skill/selftune.contribute.json`. This is the selftune project
+dogfooding its own creator-directed relay flow. The `creator_id` field is
+set to the production selftune creator's cloud user UUID.
 ## Common Patterns
@@ -60,13 +71,14 @@ selftune creator-contributions disable --skill <name> [--skill-path <path>]
 > Run `selftune creator-contributions enable --skill <name>`.
 > If auto-discovery fails, rerun with `--skill-path /path/to/SKILL.md`.
 > If no creator identity is available locally, rerun with `--creator-id <id>`.
-> Example: `selftune creator-contributions enable --skill sc-search --skill-path ./skills/sc-search/SKILL.md --creator-id cr_state_change --signals trigger,grade,miss_category --message "Share privacy-safe usage signals with the skill creator." --privacy-url https://statechange.ai/privacy`
+> The command rejects non-UUID creator IDs and unsupported signal names.
+> Example: `selftune creator-contributions enable --skill sc-search --skill-path ./skills/sc-search/SKILL.md --creator-id 550e8400-e29b-41d4-a716-446655440000 --signals trigger,grade,miss_category --message "Share privacy-safe usage signals with the skill creator." --privacy-url https://statechange.ai/privacy`
 **User wants to enable creator contributions for a whole installed skill suite**
 > Run `selftune creator-contributions enable --all --prefix sc-`.
 > This is the fastest path when preparing a whole family of skills like State Change skills.
-> Example: `selftune creator-contributions enable --all --prefix sc- --creator-id cr_state_change`
+> Example: `selftune creator-contributions enable --all --prefix sc- --creator-id 550e8400-e29b-41d4-a716-446655440000`
 **User wants to stop bundling creator contribution config**

package/skill/{Workflows → workflows}/Cron.md RENAMED Viewed

@@ -130,4 +130,4 @@ interactive mode is for user-directed improvements.
 - **User needs a specific timezone (OpenClaw)** -- Run `selftune cron setup --platform openclaw --tz America/New_York`.
 - **User asks what jobs are registered** -- Run `selftune cron list`. Shows a table of all selftune cron jobs with their schedules and descriptions.
 - **User wants to remove cron automation** -- Run `selftune cron remove`. Preview first with `selftune cron remove --dry-run`.
-- **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `Workflows/Rollback.md`.
+- **Skill regressed after cron evolution** -- The watch job should catch this automatically. If not, run `selftune evolve rollback --skill <name> --skill-path <path>` manually. See `workflows/Rollback.md`.

package/skill/{Workflows → workflows}/Dashboard.md RENAMED Viewed

@@ -22,6 +22,7 @@ generate JSONL from SQLite for debugging or offline analysis.
 | Flag            | Description                               | Default |
 | --------------- | ----------------------------------------- | ------- |
 | `--port <port>` | Custom port for the server                | 3141    |
+| `--restart`     | Force-restart an existing dashboard on the target port | Off |
 | `--no-open`     | Start server without opening browser      | Off     |
 | `--serve`       | _(Deprecated)_ Alias for default behavior | —       |
@@ -35,6 +36,16 @@ suggesting `selftune dashboard` instead.
 The live server binds to `localhost:3141` by default. Use `--port` to
 override.
+If a healthy selftune dashboard is already running on the requested port,
+`selftune dashboard` reuses it instead of failing. If the running standalone
+dashboard version is older than the installed CLI, the command restarts it
+automatically to pick up the update. Use `--restart` to force that behavior
+even when the versions match.
+The dashboard client also polls `/api/health` for `spa_build_id`. If the server
+is newer than the loaded client, the UI shows a reload prompt instead of silently
+staying stale.
 ### Endpoints
 | Method | Path                       | Description                                                |
@@ -162,6 +173,7 @@ checked file paths.
 ```bash
 selftune dashboard
 selftune dashboard --port 8080
+selftune dashboard --restart
 selftune dashboard --no-open
 ```
@@ -182,6 +194,14 @@ to trigger watch, evolve, or rollback directly from the dashboard.
 > Run `selftune dashboard`. The server provides real-time updates via SSE
 > (~1 second latency).
+**User just updated selftune and wants the dashboard to pick up the new UI**
+> Run `selftune dashboard`. It reuses a healthy instance when possible and
+> automatically restarts an older standalone dashboard version on the same port.
+> If the user explicitly wants a restart, run `selftune dashboard --restart`.
+> If the browser still has an older client loaded, the dashboard shows a reload
+> prompt based on `/api/health` build metadata.
 **Dashboard shows no data**
 > Run `selftune doctor` to verify hooks are installed. If hooks are missing,

package/skill/{Workflows → workflows}/Doctor.md RENAMED Viewed

@@ -163,7 +163,7 @@ For each failed check, take the appropriate action:
 | `evolution_audit`          | Remove corrupted entries. Future operations will append clean entries.                                                                           |
 | `dashboard_freshness_mode` | This is an operator warning, not a broken install. Expect possible freshness gaps for SQLite-only writes and export before destructive recovery. |
 | `skill_version_sync`       | Run `bun run sync-version` to stamp SKILL.md from package.json.                                                                                  |
-| `version_up_to_date`       | Run `npm install -g selftune` to update.                                                                                                         |
+| `version_up_to_date`       | Follow `.checks[].guidance.next_command` for the active install source. Common fixes are `npm install -g selftune@latest`, `bun add -g selftune@latest`, or `npx skills add selftune-dev/selftune`. |
 ### 4. Re-run Doctor

package/skill/{Workflows → workflows}/Evals.md RENAMED Viewed

@@ -20,6 +20,24 @@ Invoke this workflow when the user requests any of the following:
 selftune eval generate --skill <name> [options]
 ```
+## Recommended Creator Loop
+Use eval generation as step 1 of the default creator loop:
+```bash
+selftune eval generate --skill <name>
+selftune eval unit-test --skill <name> --generate --skill-path <path>
+selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
+selftune grade baseline --skill <name> --skill-path <path>
+selftune evolve --skill <name> --skill-path <path> --with-baseline
+selftune watch --skill <name>
+```
+The command still writes the requested output path, and it now also mirrors a canonical copy into
+`~/.selftune/eval-sets/<skill>.json` so the dashboard and `selftune status` can track whether eval
+coverage exists. Once the earlier steps are complete, the creator loop surfaces now flip from
+"needs testing" to "ready to deploy" and then "watching" after ship.
 ## Options
 | Flag                               | Description                                           | Default                           |
@@ -39,6 +57,8 @@ selftune eval generate --skill <name> [options]
 | `--auto-synthetic`                 | Fall back to SKILL.md-based cold-start evals when no trusted triggers exist | Off                  |
 | `--skill-path <path>`              | Path to SKILL.md (required with `--synthetic`)        | —                                 |
 | `--model <model>`                  | LLM model to use for synthetic generation             | Agent default                     |
+| `--blend`                          | Blend log-based and synthetic evals into one set      | Off                               |
+| `--help`                           | Show command help                                     | Off                               |
 ## Output Format
@@ -49,11 +69,14 @@ selftune eval generate --skill <name> [options]
   {
     "query": "Make me a slide deck for the Q3 board meeting",
     "should_trigger": true,
-    "invocation_type": "contextual"
+    "invocation_type": "contextual",
+    "source": "log",
+    "created_at": "2026-04-01T12:00:00Z"
   },
   {
     "query": "What format should I use for a presentation?",
-    "should_trigger": false
+    "should_trigger": false,
+    "source": "synthetic"
   }
 ]
 ```
@@ -61,6 +84,24 @@ selftune eval generate --skill <name> [options]
 Each entry has `query` (string, max 500 chars), `should_trigger` (boolean),
 and optional `invocation_type` (omitted when `--no-taxonomy` is set).
+Entries also carry optional provenance fields:
+- `source` — `"log"` (from real usage logs), `"synthetic"` (LLM-generated from SKILL.md), or `"blended"` (synthetic entry that survived dedup in a blended set)
+- `created_at` — ISO timestamp of when the entry was created
+Use `computeEvalSourceStats(entries)` to get aggregate provenance statistics:
+```json
+{
+  "total": 80,
+  "synthetic": 10,
+  "log": 50,
+  "blended": 20,
+  "oldest": "2026-03-01T00:00:00Z",
+  "newest": "2026-04-01T12:00:00Z"
+}
+```
 ### List Skills
 ```json
@@ -181,6 +222,30 @@ Use `--model` to override the default LLM model:
 selftune eval generate --skill pptx --synthetic --skill-path ./skills/pptx/SKILL.md --model claude-sonnet-4-5-20250514
 ```
+### Generate Blended Evals
+When a skill has real log data but you want to fill coverage gaps with synthetic
+entries, use `--blend` to combine both sources into one eval set.
+```bash
+selftune eval generate --skill pptx --blend --skill-path /path/to/skills/pptx/SKILL.md
+```
+The blending policy:
+1. Keep ALL log-based entries (marked `source: "log"`)
+2. Generate synthetic entries from SKILL.md
+3. Deduplicate: drop any synthetic entry whose normalized Levenshtein distance to any log entry is < 0.3
+4. Mark surviving synthetic entries as `source: "blended"`
+5. Cap total entries at 2x the log-based count
+This preserves real-world boundary cases from logs while filling underrepresented
+invocation types with synthetic entries. The 2x cap prevents synthetic entries from
+overwhelming log signal.
+`--blend` requires a resolvable SKILL.md path. Use `--skill-path` or install the
+skill locally so selftune can find it.
 ### Generate Evals (Log-Based)
 Cross-reference `skill_usage_log.jsonl` (positive triggers) against

package/skill/{Workflows → workflows}/Evolve.md RENAMED Viewed

@@ -19,6 +19,23 @@ Invoke this workflow when the user requests any of the following:
 selftune evolve --skill <name> --skill-path <path> [options]
 ```
+## Recommended Creator Loop
+Do not treat `evolve` as the first step when a creator asks whether a skill is
+ready. The default loop is:
+```bash
+selftune eval generate --skill <name> --skill-path <path>
+selftune eval unit-test --skill <name> --generate --skill-path <path>
+selftune evolve --skill <name> --skill-path <path> --dry-run --validation-mode replay
+selftune grade baseline --skill <name> --skill-path <path>
+```
+Then move to a live `selftune evolve ...` or `selftune watch ...` run.
+If canonical evals or stored unit-test results already exist, reuse them rather
+than regenerating everything.
 ## Options
 | Flag                         | Description                                                             | Default                        |
@@ -26,7 +43,7 @@ selftune evolve --skill <name> --skill-path <path> [options]
 | `--skill <name>`             | Skill name                                                              | Required                       |
 | `--skill-path <path>`        | Path to the skill's SKILL.md                                            | Required                       |
 | `--eval-set <path>`          | Pre-built eval set JSON                                                 | Auto-generated from logs       |
-| `--agent <name>`             | Agent CLI to use (claude, codex, opencode)                              | Auto-detected                  |
+| `--agent <name>`             | Agent CLI to use (claude, codex, opencode, pi)                          | Auto-detected                  |
 | `--dry-run`                  | Propose and validate without deploying                                  | Off                            |
 | `--confidence <n>`           | Minimum confidence threshold (0-1)                                      | 0.6                            |
 | `--max-iterations <n>`       | Maximum retry iterations                                                | 3                              |
@@ -42,8 +59,10 @@ selftune evolve --skill <name> --skill-path <path> [options]
 | `--gate-effort <level>`      | Thinking effort for the final gate (`low|medium|high|max`)              | None                           |
 | `--adaptive-gate`            | Escalate risky gate checks to `opus` + `high` effort                    | Off                            |
 | `--proposal-model <model>`   | Model for proposal generation LLM calls                                 | None                           |
+| `--validation-mode <mode>`   | Validation strategy: `auto`, `replay`, or `judge`                       | `auto`                         |
 | `--sync-first`               | Refresh source-truth telemetry before generating evals/failure patterns | Off                            |
 | `--sync-force`               | Force a full source rescan during `--sync-first`                        | Off                            |
+| `--help`                     | Show command help                                                       | Off                            |
 ## Output Format
@@ -83,37 +102,42 @@ Routing/body validation may also carry provenance fields such as:
 - `validation_fixture_id` — fixture identifier when replay-backed validation is used
 - `before_pass_rate` / `after_pass_rate` — only present when trigger validation actually ran; structural-guard exits do not emit synthetic pass rates
-Most evolve runs today still validate through `llm_judge`. Routing evolution now
-auto-builds a replay fixture from the target skill plus installed sibling
-skills in the same registry, so replay-backed validation is preferred whenever
-that local fixture can be constructed because it captures host-style routing
-behavior instead of model judgment.
-For Claude Code, the replay path now stages a temporary project-local
-`.claude/skills` registry, swaps in the candidate routing table, and runs a
-one-turn Claude print-mode session with project/local settings only. Validation
-records whether Claude actually invoked the target skill, invoked a competing
-skill, invoked an unrelated skill, or made no routing decision at all.
-Unrelated skill use is treated as a replay failure even on negative evals,
-because it still indicates the runtime routed somewhere unexpected. If that
-runtime path is unavailable or fails to reach a runtime decision, selftune
-falls back to the existing fixture-backed surface simulation and notes the
-fallback in the replay evidence instead of pretending it was a runtime result.
-For non-Claude platforms today, replay remains fixture-backed: it evaluates the
-target routing table against the installed target/competing skill surfaces in a
-controlled replay fixture and records per-entry evidence. That is still a
-stronger signal than a free-form judge prompt, but you should describe it as
-replay-backed validation, not as live operator telemetry.
+Most evolve runs today still validate through `llm_judge`. Replay-backed
+validation is only considered available when selftune can run a real
+host/runtime replay for the target host. Today that means the Claude Code,
+Codex, and OpenCode paths can stage a temporary local registry, apply the
+candidate skill content, and observe the runtime's actual routing decision;
+when that runtime path is unavailable, `auto` falls back to `llm_judge` and
+`replay` errors explicitly instead of silently downgrading to fixture
+simulation.
+Description, routing, and full-body evolution now share the same public
+validation contract: `auto` prefers replay and falls back to judge, `replay`
+requires a replay path, and `judge` bypasses replay entirely. Audit and
+evidence records may also include `validation_fallback_reason` when `auto`
+had to fall back from replay to judge.
+Replay stages the candidate into the target host's project-local registry:
+Claude Code uses `.claude/skills`, Codex uses `.agents/skills`, and OpenCode
+uses `.opencode/skills`. Validation records whether the runtime selected the
+target skill, selected a competing skill, selected an unrelated skill, or made
+no routing decision at all. Reads outside the staged skill set are treated as
+replay failures even on negative evals, because they indicate the runtime left
+the controlled evaluation surface.
+For hosts without runtime replay support today, replay is not available. In
+`auto` mode selftune falls back to `llm_judge`; in `replay` mode it exits with
+`REPLAY_UNAVAILABLE`. Do not describe fixture-only surface matching as replay
+validation in user-facing summaries.
 Replay parsing is intentionally conservative: unreadable skill files degrade to
 empty surfaces instead of throwing, and malformed routing rows with empty
-trigger cells are ignored rather than treated as valid triggers. Claude replay
-also normalizes observed `Read` paths against the staged workspace, so relative
-skill reads still count as read-only evidence for the target or competing
-skill. Reads outside the staged skill set are treated as replay failures rather
-than benign negatives, because they indicate the runtime left the controlled
-evaluation surface.
+trigger cells are ignored rather than treated as valid triggers. Replay also
+normalizes observed skill reads against the staged workspace, so relative skill
+paths from Claude, Codex, or OpenCode still count as evidence for the target or
+competing skill. Reads outside the staged skill set are treated as replay
+failures rather than benign negatives, because they indicate the runtime left
+the controlled evaluation surface.
 ## Parsing Instructions
@@ -281,6 +305,40 @@ The candidate is tested against the full eval set:
 If validation fails, the command retries up to `--max-iterations` times
 with adjusted proposals.
+### Validation Mode (`--validation-mode`)
+The `--validation-mode` flag controls which validation engine is used for
+description proposals. Three modes are available:
+| Mode     | Behavior                                                                 |
+| -------- | ------------------------------------------------------------------------ |
+| `auto`   | Try replay-based validation first; fall back to LLM judge if unavailable |
+| `replay` | Replay engine only; error if no replay fixture or runner is available    |
+| `judge`  | LLM judge only (legacy path via `validateProposal`)                      |
+The default is `auto`, which provides the strongest available signal without
+requiring manual fixture configuration. When replay is available, it stages the
+candidate skill content into a temporary local registry and records the
+runtime's actual routing decision per eval entry. For description evolution,
+that means the proposed description is applied to the target skill before
+replay. When replay is not available, `auto` falls back to the LLM judge and
+logs the fallback.
+The actual mode used is recorded as `validation_mode` in audit entries
+(`llm_judge`, `host_replay`, or `structural_guard`), along with
+`validation_agent` and `validation_fixture_id` when applicable.
+```bash
+# Default: auto (replay-first, judge fallback)
+selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md
+# Force replay only (error if unavailable)
+selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md --validation-mode replay
+# Force judge only (legacy behavior)
+selftune evolve --skill pptx --skill-path ./skills/pptx/SKILL.md --validation-mode judge
+```
 ### Aggregate Metrics To Report
 When summarizing an evolution run, include these aggregate metrics rather
@@ -378,6 +436,37 @@ selftune evolve --skill X --skill-path Y --cheap-loop --gate-model opus --gate-e
 selftune evolve --skill X --skill-path Y --proposal-model haiku --validation-model sonnet
 ```
+## Apply Contributor Proposal
+The `apply-proposal` subcommand fetches an approved contributor aggregate
+proposal from the cloud dashboard and applies it to the local SKILL.md.
+```bash
+selftune evolve apply-proposal --id <proposal-id> --skill-path <path> [--dry-run]
+```
+### Apply-Proposal Options
+| Flag              | Description                                     | Default  |
+| ----------------- | ----------------------------------------------- | -------- |
+| `--id <uuid>`     | Proposal UUID from the dashboard                | Required |
+| `--skill-path`    | Path to the target SKILL.md                     | Required |
+| `--dry-run`       | Preview the proposal without writing to disk    | Off      |
+### Apply-Proposal Flow
+1. Fetch the proposal via `GET /api/v1/proposals/:id`
+2. Verify `proposed_by` is `contributor_aggregate` and status is `approved`
+3. Display a summary (type, reason, pass rate change, diff preview)
+4. If not `--dry-run`: back up SKILL.md, apply the proposed value, and
+   `PATCH /api/v1/proposals/:id` with status `applied`
+### When to Use
+- After reviewing and approving a contributor proposal in the cloud dashboard
+- When community signal suggests a description or body improvement
+- As the final step in the contributor-driven evolution workflow
 ## Common Patterns
 **User asks to evolve a specific skill (e.g., "evolve the pptx skill"):**
@@ -398,7 +487,7 @@ Also check if the eval set has contradictory expectations.
 **Agent CLI override needed:**
 The evolve command auto-detects the installed agent CLI.
-Use `--agent <name>` to override (claude, codex, opencode).
+Use `--agent <name>` to override (claude, codex, opencode, pi).
 ## Subagent Escalation

package/skill/{Workflows → workflows}/EvolveBody.md RENAMED Viewed

@@ -10,6 +10,22 @@ LLM validates them through a 3-gate pipeline.
 selftune evolve body --skill <name> --skill-path <path> --target <target> [options]
 ```
+## Recommended Creator Loop
+Before mutating routing or the full body, make sure the creator trust loop is in
+place:
+```bash
+selftune eval generate --skill <name> --skill-path <path>
+selftune eval unit-test --skill <name> --generate --skill-path <path>
+selftune evolve body --skill <name> --skill-path <path> --target <target> --dry-run --validation-mode replay
+selftune grade baseline --skill <name> --skill-path <path>
+```
+If replay validation or the baseline is still missing, prefer filling that gap
+before live deployment. Body and routing evolution are much harder to trust than
+description-only changes when the creator loop is incomplete.
 ## Options
 | Flag                         | Description                                                                           | Default                  |
@@ -26,6 +42,7 @@ selftune evolve body --skill <name> --skill-path <path> --target <target> [optio
 | `--max-iterations <n>`       | Maximum refinement iterations                                                         | 3                        |
 | `--task-description <text>`  | Context for the evolution goal                                                        | None                     |
 | `--validation-model <model>` | Model for trigger-check validation calls (overrides `--student-model` for validation) | None                     |
+| `--validation-mode <mode>`   | Validation strategy: `auto`, `replay`, or `judge`                                     | `auto`                   |
 | `--teacher-effort <level>`   | Effort level for teacher LLM: `low`, `medium`, `high`, `max`                          | `high`                   |
 | `--review`                   | Run `evolution-reviewer` subagent as Gate 4 before deployment                         | Off                      |
 | `--few-shot <paths>`         | Comma-separated paths to example SKILL.md files                                       | None                     |
@@ -51,7 +68,7 @@ Every proposal passes through three sequential gates:
 | Gate                          | Type        | What it checks                                                                                  | Cost     |
 | ----------------------------- | ----------- | ----------------------------------------------------------------------------------------------- | -------- |
 | **Gate 1: Structural**        | Pure code   | YAML frontmatter present, `# Title` exists, `## Workflow Routing` preserved if original had one | Free     |
-| **Gate 2: Trigger Accuracy**  | Student LLM | YES/NO trigger check per eval entry on the extracted description                                | Cheap    |
+| **Gate 2: Trigger Accuracy**  | Replay or student LLM | Runtime replay when available; otherwise YES/NO trigger check per eval entry                     | Cheap    |
 | **Gate 3: Quality**           | Student LLM | Body clarity and completeness score (0.0-1.0)                                                   | Cheap    |
 | **Gate 4: Reviewer** (opt-in) | Subagent    | `evolution-reviewer` multi-turn review — reads files, checks evidence, APPROVE/REJECT verdict   | Moderate |
@@ -141,6 +158,25 @@ Few-shot examples from `--few-shot` paths provide structural guidance.
 Each gate runs in sequence. If a gate fails, the teacher receives the
 failure details and generates a refined proposal.
+### Validation Mode (`--validation-mode`)
+`evolve body` uses the same validation contract as `evolve`:
+| Mode     | Behavior                                                                 |
+| -------- | ------------------------------------------------------------------------ |
+| `auto`   | Try replay-backed validation first; fall back to LLM judge if unavailable |
+| `replay` | Replay engine only; error if no replay fixture or runner is available    |
+| `judge`  | LLM judge only                                                           |
+When replay is available, selftune stages the candidate skill content into a
+temporary local registry before running the real host/runtime replay. Claude
+Code uses `.claude/skills`, Codex uses `.agents/skills`, and OpenCode uses
+`.opencode/skills`. Routing targets stage the candidate `## Workflow Routing`
+section; body targets stage the full candidate body while preserving
+frontmatter and title. When replay is not available, `auto` falls back to the
+LLM judge and records the `validation_fallback_reason` in audit/evidence
+output.
 ### 6. Deploy or Preview
 If `--dry-run`, prints the proposal without deploying. Otherwise:
@@ -164,6 +200,10 @@ If `--dry-run`, prints the proposal without deploying. Otherwise:
 > `selftune evolve body --skill pptx --skill-path /path/SKILL.md --target body --teacher-model opus --student-model haiku`
+**"Force replay-only validation for a routing change"**
+> `selftune evolve body --skill Research --skill-path ~/.claude/skills/Research/SKILL.md --target routing --validation-mode replay`
 **"Preview what would change"**
 > Always start with `--dry-run` to review the proposal before deploying.

package/skill/{Workflows → workflows}/Grade.md RENAMED Viewed

@@ -17,7 +17,7 @@ selftune grade --skill <name> [options]
 | `--expectations "..."` | Explicit expectations (semicolon-separated) | Auto-derived  |
 | `--evals-json <path>`  | Pre-built eval set JSON file                | None          |
 | `--eval-id <n>`        | Specific eval ID to grade from the eval set | None          |
-| `--agent <name>`       | Agent CLI to use (claude, codex, opencode)  | Auto-detected |
+| `--agent <name>`       | Agent CLI to use (claude, codex, opencode, pi)  | Auto-detected |
 ## Output Format

package/skill/{Workflows → workflows}/Ingest.md RENAMED Viewed

@@ -3,8 +3,8 @@
 > **Note:** Claude Code is the fully supported platform. Codex, OpenCode, and OpenClaw adapters are experimental and may have gaps.
 Import sessions from agent platforms into the shared selftune log format.
-Covers five sub-commands: `ingest claude`, `ingest codex`, `ingest opencode`,
-`ingest openclaw`, and `ingest wrap-codex`.
+Covers six sub-commands: `ingest claude`, `ingest codex`, `ingest opencode`,
+`ingest openclaw`, `ingest pi`, and `ingest wrap-codex`.
 ## When to Use Each
@@ -14,6 +14,7 @@ Covers five sub-commands: `ingest claude`, `ingest codex`, `ingest opencode`,
 | `ingest codex`      | Codex       | Batch     | Import existing Codex rollout logs                  |
 | `ingest opencode`   | OpenCode    | Batch     | Import existing OpenCode sessions                   |
 | `ingest openclaw`   | OpenClaw    | Batch     | Import existing OpenClaw agent sessions             |
+| `ingest pi`         | Pi          | Batch     | Import existing Pi agent sessions                   |
 | `ingest wrap-codex` | Codex       | Real-time | Wrap `codex exec` to capture telemetry live         |
 ---
@@ -200,6 +201,55 @@ Writes to:
 ---
+## ingest pi
+Batch ingest Pi agent session histories into the shared JSONL schema.
+### Default Command
+```bash
+selftune ingest pi
+```
+### Options
+| Flag                    | Description                                                        |
+| ----------------------- | ------------------------------------------------------------------ |
+| `--sessions-dir <path>` | Override default `~/.pi/agent/sessions/` directory                |
+| `--since <date>`        | Only ingest sessions modified after this date (e.g., `2026-01-01`) |
+| `--dry-run`             | Show what would be ingested without writing to logs                |
+| `--force`               | Re-ingest all sessions, ignoring the marker file                   |
+| `--verbose` / `-v`      | Show per-session progress during ingestion                         |
+### Source
+Reads from `~/.pi/agent/sessions/`. Each session file contains Pi agent
+conversation history in JSONL format.
+### Output
+Writes to:
+- `~/.claude/all_queries_log.jsonl` -- extracted user queries
+- `~/.claude/session_telemetry_log.jsonl` -- per-session metrics with `source: "pi"`
+- `~/.claude/skill_usage_log.jsonl` -- skill triggers with `source: "pi"`
+### Steps
+1. Run `selftune ingest pi --dry-run` to preview what would be ingested
+2. Run `selftune ingest pi` to ingest all sessions
+3. Run `selftune doctor` to confirm logs are healthy
+4. Run `selftune eval generate --list-skills` to see if the ingested sessions appear
+### Notes
+- Idempotent: uses a marker file to track which sessions have already been ingested.
+  Safe to run repeatedly. Use `--force` to re-ingest everything.
+- Skill detection heuristic: identifies skills by checking for `SKILL.md` file reads in
+  tool calls and by matching known skill names in assistant text content.
+---
 ## ingest wrap-codex
 Wrap `codex exec` with real-time telemetry capture. Drop-in replacement
@@ -269,6 +319,14 @@ through hooks.
 > Run `selftune ingest openclaw --since 2026-02-01` with an appropriate date.
+**"Ingest Pi sessions"**
+> Run `selftune ingest pi`. Reads from `~/.pi/agent/sessions/` automatically.
+**"Import only recent Pi sessions"**
+> Run `selftune ingest pi --since 2026-02-01` with an appropriate date.
 **"Run codex through selftune"**
 > Use `selftune ingest wrap-codex -- <codex args>` instead of `codex exec <args>` directly.