npm - selftune - Versions diffs - 0.2.13 → 0.2.14 - Mend

selftune 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/apps/local-dashboard/dist/assets/index-BMIS6uUh.css +2 -0
package/apps/local-dashboard/dist/assets/index-DIrdlu2_.js +16 -0
package/apps/local-dashboard/dist/index.html +2 -2
package/cli/selftune/activation-rules.ts +24 -48
package/cli/selftune/constants.ts +7 -0
package/cli/selftune/contribute/bundle.ts +9 -44
package/cli/selftune/dashboard-contract.ts +12 -0
package/cli/selftune/eval/hooks-to-evals.ts +5 -22
package/cli/selftune/grading/auto-grade.ts +3 -13
package/cli/selftune/grading/grade-session.ts +3 -13
package/cli/selftune/hooks/evolution-guard.ts +14 -24
package/cli/selftune/hooks/prompt-log.ts +0 -8
package/cli/selftune/hooks/session-stop.ts +0 -8
package/cli/selftune/ingestors/codex-rollout.ts +9 -4
package/cli/selftune/ingestors/codex-wrapper.ts +15 -13
package/cli/selftune/ingestors/openclaw-ingest.ts +24 -5
package/cli/selftune/ingestors/opencode-ingest.ts +9 -4
package/cli/selftune/localdb/queries.ts +57 -0
package/cli/selftune/monitoring/watch.ts +7 -22
package/cli/selftune/normalization.ts +2 -23
package/cli/selftune/orchestrate.ts +213 -14
package/cli/selftune/schedule.ts +51 -5
package/cli/selftune/utils/jsonl.ts +2 -0
package/package.json +3 -1
package/packages/ui/src/components/RecentActivityFeed.tsx +86 -0
package/packages/ui/src/components/index.ts +1 -0
package/packages/ui/src/components/section-cards.tsx +13 -0
package/skill/SKILL.md +1 -1
package/skill/Workflows/Orchestrate.md +11 -7
package/skill/Workflows/Schedule.md +11 -0
package/skill/references/logs.md +22 -21
package/apps/local-dashboard/dist/assets/index-4_dAY17K.js +0 -16
package/apps/local-dashboard/dist/assets/index-BxV5WZHc.css +0 -2

package/cli/selftune/schedule.ts CHANGED Viewed

@@ -19,6 +19,25 @@ import { parseArgs } from "node:util";
 import { DEFAULT_CRON_JOBS } from "./cron/setup.js";
+// ---------------------------------------------------------------------------
+// Binary resolution — launchd runs with minimal PATH, so we need full paths
+// ---------------------------------------------------------------------------
+/**
+ * Resolve the absolute path to the `selftune` binary.
+ * 1. Bun.which (Bun-native, no spawn)
+ * 2. Fallback: ~/.bun/bin/selftune (common bun global install location)
+ */
+export function resolveSelftuneBin(): string {
+  try {
+    const resolved = Bun.which("selftune");
+    if (resolved) return resolved;
+  } catch {
+    // Bun.which may throw in edge cases — fall through
+  }
+  return join(homedir(), ".bun", "bin", "selftune");
+}
 // ---------------------------------------------------------------------------
 // Schedule definitions — derived from the shared DEFAULT_CRON_JOBS
 // ---------------------------------------------------------------------------
@@ -137,6 +156,8 @@ function toSystemdExecStart(command: string): string {
 // ---------------------------------------------------------------------------
 export function generateCrontab(): string {
+  const resolvedBin = resolveSelftuneBin();
+  const home = homedir();
   const lines = [
     "# selftune automation — add to your crontab with: crontab -e",
     "#",
@@ -144,10 +165,13 @@ export function generateCrontab(): string {
     "# status remains a reporting job; orchestrate handles sync, candidate",
     "# selection, low-risk description evolution, and watch/rollback follow-up.",
     "#",
+    `PATH=${home}/.bun/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin`,
+    "",
   ];
   for (const entry of SCHEDULE_ENTRIES) {
+    const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
     lines.push(`# ${entry.description}`);
-    lines.push(`${entry.schedule}  ${entry.command}`);
+    lines.push(`${entry.schedule}  ${resolvedCommand}`);
     lines.push("");
   }
   return lines.join("\n");
@@ -177,10 +201,17 @@ export function mergeManagedCrontab(existing: string, managedContent: string): s
   return `${withoutExistingBlock}\n\n${managedBlock}`;
 }
-function buildLaunchdDefinition(entry: ScheduleEntry): { label: string; content: string } {
+function buildLaunchdDefinition(
+  entry: ScheduleEntry,
+  binPath?: string,
+): { label: string; content: string } {
   const label = `com.selftune.${entry.name.replace("selftune-", "")}`;
-  const args = toLaunchdArgs(entry.command);
+  const resolvedBin = binPath ?? resolveSelftuneBin();
+  // Replace bare `selftune` with the resolved absolute path
+  const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
+  const args = toLaunchdArgs(resolvedCommand);
   const schedule = cronToLaunchdSchedule(entry.schedule);
+  const home = homedir();
   return {
     label,
@@ -198,6 +229,13 @@ function buildLaunchdDefinition(entry: ScheduleEntry): { label: string; content:
 <dict>
   <key>Label</key>
   <string>${label}</string>
+  <key>EnvironmentVariables</key>
+  <dict>
+    <key>PATH</key>
+    <string>${home}/.bun/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin</string>
+    <key>HOME</key>
+    <string>${home}</string>
+  </dict>
   <key>ProgramArguments</key>
   <array>
 ${args}
@@ -222,14 +260,20 @@ export function generateLaunchd(): string {
   return plists.join("\n\n");
 }
-function buildSystemdDefinition(entry: ScheduleEntry): {
+function buildSystemdDefinition(
+  entry: ScheduleEntry,
+  binPath?: string,
+): {
   baseName: string;
   timerContent: string;
   serviceContent: string;
 } {
   const unitName = entry.name;
   const calendar = cronToOnCalendar(entry.schedule);
-  const execStart = toSystemdExecStart(entry.command);
+  const resolvedBin = binPath ?? resolveSelftuneBin();
+  const resolvedCommand = entry.command.replace(/\bselftune\b/g, resolvedBin);
+  const execStart = toSystemdExecStart(resolvedCommand);
+  const home = homedir();
   return {
     baseName: unitName,
@@ -247,6 +291,8 @@ Description=${entry.description}
 [Service]
 Type=oneshot
+Environment="PATH=${home}/.bun/bin:/usr/local/bin:/usr/bin:/bin"
+Environment="HOME=${home}"
 ExecStart=${execStart}`,
   };
 }

package/cli/selftune/utils/jsonl.ts CHANGED Viewed

@@ -90,6 +90,8 @@ export function readJsonlFrom<T = Record<string, unknown>>(
  * Append a single record to a JSONL file. Creates parent directories if needed.
  * When logType is provided, validates the record and logs warnings on failure
  * but still writes the record (fail-open: hooks must never block).
+ *
+ * @deprecated Phase 3: JSONL writes removed. Retained for materializer/test utilities only.
  */
 export function appendJsonl(path: string, record: unknown, logType?: LogType): void {
   if (logType) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "selftune",
-  "version": "0.2.13",
+  "version": "0.2.14",
   "description": "Self-improving skills CLI for AI agents",
   "keywords": [
     "agent",
@@ -73,12 +73,14 @@
     "prepublishOnly": "bun run sync-version && bun run build:dashboard",
     "typecheck:dashboard": "cd apps/local-dashboard && bunx tsc --noEmit",
     "check": "bun run lint && bun run format:check && bun run lint:arch && bun run typecheck:dashboard && bun run test",
+    "prepare": "bunx lefthook install || true",
     "start": "bun run cli/selftune/index.ts --help"
   },
   "dependencies": {
     "@selftune/telemetry-contract": "file:packages/telemetry-contract"
   },
   "devDependencies": {
+    "@evilmartians/lefthook": "^1.13.6",
     "@types/bun": "^1.1.0",
     "oxfmt": "^0.41.0",
     "oxlint": "^1.56.0"

package/packages/ui/src/components/RecentActivityFeed.tsx ADDED Viewed

@@ -0,0 +1,86 @@
+import { ZapIcon, CircleDotIcon } from "lucide-react";
+import { timeAgo } from "../lib/format";
+import { Badge } from "../primitives/badge";
+import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "../primitives/card";
+export interface RecentActivityItem {
+  timestamp: string;
+  session_id: string;
+  skill_name: string;
+  query: string;
+  triggered: boolean;
+  is_live: boolean;
+}
+export function RecentActivityFeed({ items }: { items: RecentActivityItem[] }) {
+  if (items.length === 0) {
+    return (
+      <Card>
+        <CardHeader>
+          <CardTitle className="flex items-center gap-2 text-sm">
+            <ZapIcon className="size-4" />
+            Recent Activity
+          </CardTitle>
+        </CardHeader>
+        <CardContent>
+          <p className="text-sm text-muted-foreground text-center py-8">
+            No recent skill invocations
+          </p>
+        </CardContent>
+      </Card>
+    );
+  }
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle className="flex items-center gap-2 text-sm">
+          <ZapIcon className="size-4" />
+          Recent Activity
+        </CardTitle>
+        <CardDescription>Latest skill invocations across sessions</CardDescription>
+      </CardHeader>
+      <CardContent className="space-y-2.5">
+        {items.slice(0, 20).map((item, i) => (
+          <div
+            key={`${item.session_id}-${item.skill_name}-${i}`}
+            className="flex gap-3 rounded-md p-1.5"
+          >
+            <div
+              className={`mt-1 size-2 shrink-0 rounded-full ${
+                item.triggered ? "bg-emerald-500" : "bg-muted-foreground/40"
+              }`}
+            />
+            <div className="flex-1 min-w-0 space-y-0.5">
+              <div className="flex items-center gap-2 flex-wrap">
+                <span className="text-xs font-medium truncate">{item.skill_name}</span>
+                {item.is_live && (
+                  <Badge variant="outline" className="h-4 px-1 text-[10px] gap-1">
+                    <CircleDotIcon className="size-2.5 text-emerald-500" />
+                    live
+                  </Badge>
+                )}
+                {item.triggered ? (
+                  <Badge variant="default" className="h-4 px-1 text-[10px]">
+                    triggered
+                  </Badge>
+                ) : (
+                  <Badge variant="secondary" className="h-4 px-1 text-[10px]">
+                    checked
+                  </Badge>
+                )}
+                <span className="text-[10px] text-muted-foreground font-mono ml-auto shrink-0">
+                  {timeAgo(item.timestamp)}
+                </span>
+              </div>
+              {item.query && (
+                <p className="text-xs text-muted-foreground line-clamp-1 font-mono">{item.query}</p>
+              )}
+            </div>
+          </div>
+        ))}
+      </CardContent>
+    </Card>
+  );
+}

package/packages/ui/src/components/index.ts CHANGED Viewed

@@ -3,5 +3,6 @@ export { EvidenceViewer } from "./EvidenceViewer";
 export { EvolutionTimeline } from "./EvolutionTimeline";
 export { InfoTip } from "./InfoTip";
 export { OrchestrateRunsPanel } from "./OrchestrateRunsPanel";
+export { RecentActivityFeed } from "./RecentActivityFeed";
 export { SectionCards } from "./section-cards";
 export { SkillHealthGrid } from "./skill-health-grid";

package/packages/ui/src/components/section-cards.tsx CHANGED Viewed

@@ -21,6 +21,7 @@ interface SectionCardsProps {
   pendingCount: number;
   evidenceCount: number;
   hasEvolution?: boolean;
+  activeSessionsCount?: number;
 }
 export function SectionCards({
@@ -31,6 +32,7 @@ export function SectionCards({
   pendingCount,
   evidenceCount,
   hasEvolution = true,
+  activeSessionsCount = 0,
 }: SectionCardsProps) {
   const passRateStr = avgPassRate !== null ? `${Math.round(avgPassRate * 100)}%` : "--";
   const passRateGood = avgPassRate !== null && avgPassRate >= 0.7;
@@ -118,6 +120,17 @@ export function SectionCards({
           <CardTitle className="text-2xl font-semibold tabular-nums @[250px]/card:text-3xl">
             {sessionsCount}
           </CardTitle>
+          {activeSessionsCount > 0 && (
+            <CardAction>
+              <Badge variant="outline" className="gap-1.5">
+                <span className="relative flex size-2">
+                  <span className="absolute inline-flex size-full animate-ping rounded-full bg-emerald-400 opacity-75" />
+                  <span className="relative inline-flex size-2 rounded-full bg-emerald-500" />
+                </span>
+                {activeSessionsCount} in progress
+              </Badge>
+            </CardAction>
+          )}
         </CardHeader>
       </Card>

package/skill/SKILL.md CHANGED Viewed

@@ -12,7 +12,7 @@ description: >
   even if they don't say "selftune" explicitly.
 metadata:
   author: selftune-dev
-  version: 0.2.13
+  version: 0.2.14
   category: developer-tools
 ---

package/skill/Workflows/Orchestrate.md CHANGED Viewed

@@ -31,12 +31,14 @@ selftune orchestrate
 | `--max-skills <n>`          | Cap how many candidates are processed in one run           | `5`        |
 | `--recent-window <hours>`   | Window for post-deploy watch/rollback checks               | `48`       |
 | `--sync-force`              | Force a full source replay before candidate selection      | Off        |
+| `--max-auto-grade <n>`      | Max ungraded skills to auto-grade per run (0 to disable)   | `5`        |
 | `--loop`                    | Run as a long-lived process that cycles continuously       | Off        |
 | `--loop-interval <seconds>` | Pause between cycles (minimum 60)                          | `3600`     |
 ## Default Behavior
 - Sync source-truth telemetry first
+- Auto-grade up to 5 ungraded skills that have session data (enables evolution on first run after ingest)
 - Prioritize critical/warning/ungraded skills with real missed-query signal
 - Deploy validated low-risk description changes automatically
 - Watch recent deployments and roll back regressions automatically
@@ -78,10 +80,11 @@ A phased decision report printed to stderr so you can see exactly what happened
 1. **Phase 1: Sync** — which sources were scanned, how many records synced, repair counts
 2. **Phase 2: Status** — skill count, system health, breakdown by status category
-3. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
-4. **Phase 4: Evolution Results** — validation pass-rate changes (before → after), deployment status
-5. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
-6. **Summary** — evaluated/deployed/watched/skipped counts and elapsed time
+3. **Auto-grade** — how many ungraded skills were graded (logged to stderr, included in summary)
+4. **Phase 3: Skill Decisions** — each skill with its action (EVOLVE / WATCH / SKIP) and reason
+5. **Phase 4: Evolution Results** — validation pass-rate changes (before → after), deployment status
+6. **Phase 5: Watch** — post-deploy monitoring with alert and rollback indicators
+7. **Summary** — auto-graded/evaluated/deployed/watched/skipped counts and elapsed time
 A mode banner at the top shows DRY RUN, REVIEW, or AUTONOMOUS with rerun hints when applicable.
@@ -140,9 +143,10 @@ In autonomous mode, orchestrate calls sub-workflows in this fixed order:
 1. **Sync** — refresh source-truth telemetry across all supported agents (`selftune sync`)
 2. **Status** — compute skill health using existing grade results (reads `grading.json` outputs from previous sessions)
-3. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
-4. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
-5. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
+3. **Auto-grade** — grade up to `--max-auto-grade` (default 5) ungraded skills that have session data but no grades yet. Skipped during `--dry-run` (grading makes LLM calls). After grading, status is recomputed so candidate selection sees updated grades. Fail-open: individual grading errors are logged but never block the loop.
+4. **Evolve** — run evolution on selected candidates (pre-flight is skipped, cheap-loop mode enabled, defaults used)
+5. **Watch** — monitor recently evolved skills (auto-rollback enabled by default, `--recent-window` hours lookback)
+6. **Alpha Upload** — if enrolled in the alpha program (`config.alpha.enrolled === true`) and an API key is configured, stage new canonical records (sessions, invocations, evolution evidence, orchestrate runs) into `canonical_upload_staging`, build V2 push payloads, and flush to the cloud API (`POST /api/v1/push`) with Bearer auth. Fail-open: upload errors never block the orchestrate loop. Respects `--dry-run`.
 Between candidate selection and evolution, orchestrate checks for
 **cross-skill eval set overlap**. When two or more evolution candidates

package/skill/Workflows/Schedule.md CHANGED Viewed

@@ -53,6 +53,17 @@ Outputs examples for all three scheduling systems (cron, launchd, systemd).
 `selftune schedule` is now an alias for `selftune cron`. Both commands are interchangeable. See `Workflows/Cron.md` for the full cron workflow reference.
+## PATH Resolution (All Platforms)
+All three scheduling formats resolve the absolute path to the `selftune` binary
+(via `Bun.which` with a `~/.bun/bin/selftune` fallback) and set explicit PATH
+environment variables. This prevents silent failures from minimal default
+environments that don't include homebrew, bun, or node binary locations.
+- **launchd** — Injects an `EnvironmentVariables` dict with PATH and HOME into each plist.
+- **systemd** — Adds `Environment="PATH=..."` and `Environment="HOME=..."` to each service unit.
+- **cron** — Prepends a `PATH=...` declaration at the top of the generated crontab.
 ## Common Patterns
 - **User wants quick setup on a Linux server** -- Run `selftune schedule --install --format cron`.

package/skill/references/logs.md CHANGED Viewed

@@ -1,14 +1,17 @@
 # Log Format Reference
-selftune writes raw legacy logs plus a canonical event log. This reference
-describes each format in detail for the skill to use when parsing sessions,
-audit trails, and cloud-ingest exports.
-> **Note:** JSONL files are now backup/recovery only. SQLite (`~/.selftune/selftune.db`)
-> is the sole operational store for all runtime reads. JSONL writes are retained for
-> append-only durability, but all dashboard queries, hook reads, grading, monitoring,
-> and upload staging read from SQLite. JSONL reads only occur when custom log paths
-> are provided (e.g., `--telemetry-log`, `--skill-log`) for test isolation.
+selftune uses SQLite as its sole write target and operational store. This
+reference describes the legacy JSONL log formats that remain on disk for
+disaster recovery and export, plus the canonical event schema.
+> **Important (Phase 3 complete):** JSONL writes have been removed from all hooks,
+> ingestors, and normalization pipelines. New data is written exclusively to SQLite
+> (`~/.selftune/selftune.db`). Existing JSONL files are retained on disk but only
+> contain pre-cutover history. The materializer (`localdb/materialize.ts`) can
+> rebuild SQLite from these files but only for data written before Phase 3.
+> Post-cutover recovery requires `selftune export` snapshots or SQLite backups.
+> The file formats below are preserved as a reference for the materializer and
+> export tooling.
 ---
@@ -54,11 +57,11 @@ One JSON record per line. Each record is one completed agent session.
 ## ~/.claude/skill_usage_log.jsonl
-> **Deprecated.** The `skill_usage` and `skill_invocations` data paths have been
+> **Legacy.** The `skill_usage` and `skill_invocations` data paths have been
 > consolidated into a single `skill_invocations` table in SQLite. This JSONL file
-> is still written by hooks for backward compatibility, but the dashboard and
-> queries now read exclusively from `skill_invocations`. New consumers should use
-> the SQLite table via `localdb/queries.ts`.
+> is no longer written (Phase 3). The dashboard and all queries read exclusively
+> from `skill_invocations`. New consumers should use the SQLite table via
+> `localdb/queries.ts`.
 One record per skill trigger event. Populated by skill-eval.ts (PostToolUse hook).
@@ -208,10 +211,10 @@ This is operational state, not an analytics source of truth.
 ## ~/.claude/improvement_signals.jsonl
-One record per detected improvement signal. Written by `prompt-log.ts` when a
-user correction or explicit skill request is detected. Read by the orchestrator
-for signal-aware candidate selection, and by `session-stop.ts` to decide whether
-to spawn a reactive orchestrate run.
+One record per detected improvement signal. Previously written by `prompt-log.ts`;
+now written directly to SQLite (`improvement_signals` table). This JSONL file is
+no longer appended to (Phase 3). Read by the orchestrator for signal-aware
+candidate selection via SQLite queries.
 ```json
 {
@@ -225,10 +228,8 @@ to spawn a reactive orchestrate run.
 ```
 Signal records are append-only. When an orchestrate run processes a signal,
-the original record remains unchanged and the orchestrator rewrites the file
-with `consumed: true` set on processed entries. This is the one exception
-to strict append-only semantics in the log system — the rewrite is atomic
-and race-protected by the orchestrate lockfile.
+it sets `consumed: true` via `updateSignalConsumed()` in SQLite. The JSONL
+format below is retained as a reference for the materializer and export.
 Consumed signal example: