npm - selftune - Versions diffs - 0.2.20 → 0.2.22 - Mend

selftune 0.2.20 → 0.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +12 -7
package/cli/selftune/adapters/cline/hook.ts +167 -0
package/cli/selftune/adapters/cline/install.ts +197 -0
package/cli/selftune/adapters/codex/hook.ts +296 -0
package/cli/selftune/adapters/codex/install.ts +289 -0
package/cli/selftune/adapters/opencode/hook.ts +222 -0
package/cli/selftune/adapters/opencode/install.ts +543 -0
package/cli/selftune/evolution/evolve-body.ts +26 -2
package/cli/selftune/evolution/validate-host-replay.ts +390 -2
package/cli/selftune/hooks/auto-activate.ts +43 -37
package/cli/selftune/hooks-shared/git-metadata.ts +149 -0
package/cli/selftune/hooks-shared/hook-output.ts +105 -0
package/cli/selftune/hooks-shared/normalize.ts +196 -0
package/cli/selftune/hooks-shared/session-state.ts +76 -0
package/cli/selftune/hooks-shared/skill-paths.ts +50 -0
package/cli/selftune/hooks-shared/stdin-dispatch.ts +59 -0
package/cli/selftune/hooks-shared/types.ts +90 -0
package/cli/selftune/index.ts +56 -4
package/cli/selftune/utils/llm-call.ts +99 -34
package/package.json +1 -1
package/skill/SKILL.md +10 -0
package/skill/Workflows/Evolve.md +22 -6
package/skill/Workflows/Initialize.md +48 -6
package/skill/Workflows/PlatformHooks.md +93 -0

package/cli/selftune/utils/llm-call.ts CHANGED Viewed

@@ -6,9 +6,9 @@
  * modules can reuse the same calling logic.
  */
-import { readFileSync, writeFileSync } from "node:fs";
+import { existsSync, readFileSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
-import { join } from "node:path";
+import { dirname, join, resolve } from "node:path";
 import { AGENT_CANDIDATES } from "../constants.js";
 import { createLogger } from "./logging.js";
@@ -33,6 +33,40 @@ function resolveModelFlag(flag: string): string {
   return CLAUDE_MODEL_ALIASES[flag] ?? flag;
 }
+/**
+ * Map selftune model aliases to OpenCode provider/model format.
+ * OpenCode uses "provider/model" syntax (e.g. "anthropic/claude-sonnet-4-20250514").
+ */
+const OPENCODE_MODEL_MAP: Record<string, string> = {
+  haiku: "anthropic/claude-haiku-4-5-20251001",
+  sonnet: "anthropic/claude-sonnet-4-20250514",
+  opus: "anthropic/claude-opus-4-20250514",
+};
+/** Resolve a model alias to OpenCode's provider/model format. */
+function resolveOpenCodeModel(flag: string): string {
+  return OPENCODE_MODEL_MAP[flag] ?? flag;
+}
+// ---------------------------------------------------------------------------
+// Bundled agent file loading (for codex inline prompt injection)
+// ---------------------------------------------------------------------------
+const BUNDLED_AGENT_DIR = resolve(dirname(import.meta.path), "..", "..", "..", "skill", "agents");
+/**
+ * Read the bundled agent markdown file and return its body (without frontmatter).
+ * Used by codex path to inline agent instructions into the prompt since codex
+ * has no --agent flag.
+ */
+function loadAgentInstructions(agentName: string): string | null {
+  const filePath = join(BUNDLED_AGENT_DIR, `${agentName}.md`);
+  if (!existsSync(filePath)) return null;
+  const content = readFileSync(filePath, "utf-8");
+  // Strip YAML frontmatter
+  return content.replace(/^---\n[\s\S]*?\n---\n*/, "").trim();
+}
 // ---------------------------------------------------------------------------
 // Agent detection
 // ---------------------------------------------------------------------------
@@ -155,7 +189,11 @@ export async function callViaAgent(
     } else if (agent === "codex") {
       cmd = ["codex", "exec", "--skip-git-repo-check", promptContent];
     } else if (agent === "opencode") {
-      cmd = ["opencode", "-p", promptContent, "-f", "text", "-q"];
+      cmd = ["opencode", "run"];
+      if (modelFlag) {
+        cmd.push("--model", resolveOpenCodeModel(modelFlag));
+      }
+      cmd.push(promptContent);
     } else {
       throw new Error(`Unknown agent: ${agent}`);
     }
@@ -222,9 +260,9 @@ export async function callViaAgent(
 // Call LLM via named subagent (multi-turn, agentic)
 // ---------------------------------------------------------------------------
-/** Options for calling a named Claude Code subagent. */
+/** Options for calling a named subagent (Claude Code or OpenCode). */
 export interface SubagentCallOptions {
-  /** Name of the subagent (synced into ~/.claude/agents/ by selftune init/update). */
+  /** Name of the subagent (synced into ~/.claude/agents/ or opencode.json by selftune init/update). */
   agentName: string;
   /** The task prompt for the subagent. */
   prompt: string;
@@ -243,13 +281,13 @@ export interface SubagentCallOptions {
 }
 /**
- * Call a named Claude Code subagent in print mode. The subagent runs its
- * multi-turn workflow (reading files, running commands, etc.) and returns
- * the final text output.
+ * Call a named subagent in print mode. The subagent runs its multi-turn
+ * workflow (reading files, running commands, etc.) and returns the final
+ * text output.
  *
- * Unlike callViaAgent(), this does NOT use --bare (agents need discovery)
- * and passes --agent + --max-turns for agentic multi-turn behavior.
- * Only supports the claude CLI.
+ * Supports Claude Code (`claude --agent`), OpenCode (`opencode run --agent`),
+ * and Codex (`codex exec` with agent instructions inlined into the prompt).
+ * Auto-detects the available agent CLI.
  */
 export async function callViaSubagent(options: SubagentCallOptions): Promise<string> {
   const {
@@ -263,31 +301,58 @@ export async function callViaSubagent(options: SubagentCallOptions): Promise<str
     allowedTools,
   } = options;
-  const cmd: string[] = [
-    "claude",
-    "-p",
-    prompt,
-    "--agent",
-    agentName,
-    "--max-turns",
-    String(maxTurns),
-  ];
-  if (appendSystemPrompt) {
-    cmd.push("--append-system-prompt", appendSystemPrompt);
-  }
-  if (modelFlag) {
-    const resolved = resolveModelFlag(modelFlag);
-    cmd.push("--model", resolved);
+  const agent = detectAgent();
+  if (!agent || (agent !== "claude" && agent !== "opencode" && agent !== "codex")) {
+    throw new Error(
+      `Subagent calls require 'claude', 'opencode', or 'codex' CLI in PATH (detected: ${agent ?? "none"})`,
+    );
   }
-  if (effort) {
-    cmd.push("--effort", effort);
-  }
-  if (allowedTools && allowedTools.length > 0) {
-    cmd.push("--allowedTools", ...allowedTools);
+  let cmd: string[];
+  if (agent === "opencode") {
+    // OpenCode supports --agent and --model but not allowedTools, appendSystemPrompt, or maxTurns
+    if (allowedTools?.length || appendSystemPrompt) {
+      logger.warn(
+        `Subagent '${agentName}' on opencode: allowedTools and appendSystemPrompt are not supported and will be ignored`,
+      );
+    }
+    cmd = ["opencode", "run", "--agent", agentName];
+    if (modelFlag) {
+      cmd.push("--model", resolveOpenCodeModel(modelFlag));
+    }
+    cmd.push(prompt);
+  } else if (agent === "codex") {
+    // Codex has no --agent flag; inline the agent instructions into the prompt.
+    // allowedTools, appendSystemPrompt, maxTurns, and effort are not supported.
+    if (allowedTools?.length || appendSystemPrompt) {
+      logger.warn(
+        `Subagent '${agentName}' on codex: allowedTools and appendSystemPrompt are not supported and will be ignored`,
+      );
+    }
+    const agentInstructions = loadAgentInstructions(agentName);
+    const fullPrompt = agentInstructions ? `${agentInstructions}\n\n---\n\n${prompt}` : prompt;
+    cmd = ["codex", "exec", "--skip-git-repo-check", fullPrompt];
+  } else {
+    // Claude Code
+    cmd = ["claude", "-p", prompt, "--agent", agentName, "--max-turns", String(maxTurns)];
+    if (appendSystemPrompt) {
+      cmd.push("--append-system-prompt", appendSystemPrompt);
+    }
+    if (modelFlag) {
+      const resolved = resolveModelFlag(modelFlag);
+      cmd.push("--model", resolved);
+    }
+    if (effort) {
+      cmd.push("--effort", effort);
+    }
+    if (allowedTools && allowedTools.length > 0) {
+      cmd.push("--allowedTools", ...allowedTools);
+    }
+    // Skip permissions since this runs non-interactively in a pipeline
+    cmd.push("--dangerously-skip-permissions");
   }
-  // Skip permissions since this runs non-interactively in a pipeline
-  cmd.push("--dangerously-skip-permissions");
   const maxRetries = retryOpts?.maxRetries ?? DEFAULT_MAX_RETRIES;
   const initialBackoffMs = retryOpts?.initialBackoffMs ?? DEFAULT_INITIAL_BACKOFF_MS;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "selftune",
-  "version": "0.2.20",
+  "version": "0.2.22",
   "description": "Self-improving skills CLI for AI agents",
   "keywords": [
     "agent",

package/skill/SKILL.md CHANGED Viewed

@@ -125,6 +125,14 @@ selftune uninstall          [--dry-run] [--keep-logs] [--npm-uninstall]
 # Hook dispatch (for debugging/manual invocation)
 selftune hook <name>   # prompt-log | session-stop | skill-eval | auto-activate | skill-change-guard | evolution-guard
+# Platform hooks (non-Claude-Code agents)
+selftune codex hook
+selftune codex install    [--dry-run] [--uninstall]
+selftune opencode hook
+selftune opencode install [--dry-run] [--uninstall]
+selftune cline hook
+selftune cline install    [--dry-run] [--uninstall]
 # Alpha enrollment (device-code flow — browser opens automatically)
 selftune init --alpha --alpha-email <email>
 selftune alpha upload [--dry-run]
@@ -169,6 +177,7 @@ selftune status                                                        # shows c
 | repair, rebuild usage, fix skill usage, trustworthy usage, repair-skill-usage                                                           | RepairSkillUsage  | Workflows/RepairSkillUsage.md         |
 | export canonical, canonical export, canonical telemetry, push payload                                                                   | ExportCanonical   | Workflows/ExportCanonical.md          |
 | hook, run hook, invoke hook, manual hook, debug hook                                                                                    | Hook              | Workflows/Hook.md                     |
+| codex hooks, codex install, codex setup, opencode hooks, opencode install, opencode setup, cline hooks, cline install, cline setup, multi-platform, platform hooks, non-claude hooks, multiple agents, multi-agent | PlatformHooks     | Workflows/PlatformHooks.md            |
 | export, dump, jsonl, export sqlite, debug export                                                                                        | Export            | _(direct command — no workflow file)_ |
 | status, health summary, skill health, how are skills, skills doing, run selftune                                                        | Status            | _(direct command — no workflow file)_ |
 | last, last session, recent session, what happened, what changed                                                                         | Last              | _(direct command — no workflow file)_ |
@@ -357,6 +366,7 @@ accomplish a task _using_ a skill, route to that skill instead.
 | `Workflows/CreatorContributions.md` | Manage bundled `selftune.contribute.json` configs   | When preparing a skill package for creator contributions |
 | `Workflows/ExportCanonical.md`      | Export canonical telemetry for downstream use       | When exporting data for external consumption    |
 | `Workflows/Hook.md`                 | Manual hook invocation for debugging                | When debugging or testing hooks manually        |
+| `Workflows/PlatformHooks.md`        | Non-Claude-Code platform hook install/config        | When setting up Codex, OpenCode, or Cline hooks |
 | `references/logs.md`                | Log file formats (telemetry, usage, queries, audit) | When parsing or debugging log files             |
 | `references/grading-methodology.md` | 3-tier grading model, evidence standards            | When grading sessions or interpreting grades    |
 | `references/invocation-taxonomy.md` | 4 invocation types, coverage analysis               | When analyzing trigger coverage                 |

package/skill/Workflows/Evolve.md CHANGED Viewed

@@ -89,15 +89,31 @@ skills in the same registry, so replay-backed validation is preferred whenever
 that local fixture can be constructed because it captures host-style routing
 behavior instead of model judgment.
-The current replay path is fixture-backed: it evaluates the target routing table
-against the installed target/competing skill surfaces in a controlled replay
-fixture and records per-entry evidence. That is still a stronger signal than a
-free-form judge prompt, but you should describe it as replay-backed validation,
-not as live operator telemetry.
+For Claude Code, the replay path now stages a temporary project-local
+`.claude/skills` registry, swaps in the candidate routing table, and runs a
+one-turn Claude print-mode session with project/local settings only. Validation
+records whether Claude actually invoked the target skill, invoked a competing
+skill, invoked an unrelated skill, or made no routing decision at all.
+Unrelated skill use is treated as a replay failure even on negative evals,
+because it still indicates the runtime routed somewhere unexpected. If that
+runtime path is unavailable or fails to reach a runtime decision, selftune
+falls back to the existing fixture-backed surface simulation and notes the
+fallback in the replay evidence instead of pretending it was a runtime result.
+For non-Claude platforms today, replay remains fixture-backed: it evaluates the
+target routing table against the installed target/competing skill surfaces in a
+controlled replay fixture and records per-entry evidence. That is still a
+stronger signal than a free-form judge prompt, but you should describe it as
+replay-backed validation, not as live operator telemetry.
 Replay parsing is intentionally conservative: unreadable skill files degrade to
 empty surfaces instead of throwing, and malformed routing rows with empty
-trigger cells are ignored rather than treated as valid triggers.
+trigger cells are ignored rather than treated as valid triggers. Claude replay
+also normalizes observed `Read` paths against the staged workspace, so relative
+skill reads still count as read-only evidence for the target or competing
+skill. Reads outside the staged skill set are treated as replay failures rather
+than benign negatives, because they indicate the runtime left the controlled
+evaluation surface.
 ## Parsing Instructions

package/skill/Workflows/Initialize.md CHANGED Viewed

@@ -7,6 +7,7 @@ Bootstrap selftune for first-time use or after changing environments.
 - The user asks to set up selftune, configure selftune, or initialize selftune
 - The agent detects `~/.selftune/config.json` does not exist
 - The user has switched agent platforms (Claude Code, Codex, OpenCode)
+- The user wants to add hooks for additional platforms (multi-agent setup)
 ## Default Command
@@ -136,15 +137,49 @@ Code subagent calls stay up to date.
 | `PostToolUse` (Bash)       | `hooks/commit-track.ts`       | Track git commits for session traceability      | Fast-path: skips non-git Bash commands          |
 | `Stop`                     | `hooks/session-stop.ts`       | Capture session telemetry                       | Runs async (non-blocking), 60s timeout          |
-**Codex agents:**
+### 4b. Multi-Platform Hooks
-- Use `selftune ingest wrap-codex` for real-time telemetry capture (see `Workflows/Ingest.md`)
-- Or batch-ingest existing sessions with `selftune ingest codex`
+After Claude Code hooks are installed, check whether the user has **other** agent
+CLIs available. Run these checks:
-**OpenCode agents:**
+```bash
+which codex 2>/dev/null && echo "codex available"
+which opencode 2>/dev/null && echo "opencode available"
+ls ~/Documents/Cline/Hooks/ 2>/dev/null && echo "cline available"
+```
+If **any** additional platforms are detected, use `AskUserQuestion` listing only
+the platforms that were actually found:
+> I detected these agent platforms in addition to your primary one:
+> - [list only detected platforms, e.g. "Codex", "OpenCode"]
+>
+> Would you like to install selftune hooks for any of them? This enables
+> real-time skill tracking across all your agents.
+Options:
+- `Yes — install hooks for all detected platforms`
+- `Let me pick — show me the list` (then present only the detected platforms)
+- `No — skip for now` (they can always run `selftune <platform> install` later)
+For each platform the user selects, run the install command:
+```bash
+selftune codex install      # writes hooks.json entries
+selftune opencode install   # writes shell shim + config entries
+selftune cline install      # creates hook scripts
+```
+Use `--dry-run` first if the user wants to preview. See `Workflows/PlatformHooks.md`
+for platform-specific details.
+**Batch ingest** fallback for platforms without real-time hooks or to backfill history:
-- Use `selftune ingest opencode` to import sessions from the SQLite database
-- See `Workflows/Ingest.md` for details
+```bash
+selftune ingest codex       # import Codex rollout sessions
+selftune ingest opencode    # import OpenCode sessions from SQLite
+selftune ingest openclaw    # import OpenClaw sessions
+```
 ### 5. Initialize Memory Directory
@@ -387,6 +422,13 @@ retrying with `selftune init --alpha --alpha-email <email> --force`.
 > and optional display name in chat, then run `selftune init --alpha --alpha-email ...`.
 > The browser opens automatically for approval. No manual key management needed.
+**User uses multiple agents (Claude Code + Codex, etc.)**
+> Run `selftune init` for the primary agent, then offer to install hooks for
+> additional detected platforms. Run `selftune codex install`, `selftune opencode install`,
+> or `selftune cline install` as needed. All platforms write to the same shared
+> log schema — no extra config required.
 **Hooks not capturing data**
 > Run `selftune doctor` to check hook installation. Parse the JSON output

package/skill/Workflows/PlatformHooks.md ADDED Viewed

@@ -0,0 +1,93 @@
+# Platform Hooks Workflow
+## Purpose
+Install and configure selftune hooks for non-Claude-Code platforms (Codex, OpenCode, Cline).
+## When to Use
+- User wants selftune on Codex, OpenCode, or Cline
+- User asks about multi-platform support
+- User wants real-time skill tracking on a non-Claude-Code agent
+## Commands
+### Install hooks for a platform
+```bash
+selftune <platform> install [--dry-run] [--uninstall]
+```
+Supported platforms: `codex`, `opencode`, `cline`
+| Flag          | Description                                    |
+| ------------- | ---------------------------------------------- |
+| `--dry-run`   | Preview what would be installed without writing |
+| `--uninstall` | Remove selftune hooks from the platform         |
+| `--help, -h`  | Show usage help                                 |
+### Hook handler (called by the agent, not the user)
+```bash
+selftune <platform> hook
+```
+This is called automatically by the agent's hook system. Users don't run this directly.
+## Platform Details
+### Codex
+- Config: `~/.codex/hooks.json`
+- Events: SessionStart, PreToolUse, PostToolUse, Stop
+- Install creates hooks.json entries that prefer `$SELFTUNE_CLI_PATH codex hook`, otherwise `npx -y selftune@latest codex hook`
+### OpenCode
+- Config: `./opencode.json` or `~/.config/opencode/opencode.json`
+- Plugin dir: `~/.config/opencode/plugins/` (global) or `./.opencode/plugins/` (project)
+- Events: tool.execute.before, tool.execute.after, session.idle (via event handler)
+- Install writes a TypeScript plugin file (`selftune-opencode-plugin.ts`) into the plugins directory (auto-discovered by OpenCode at startup)
+- Agents are registered in the `agent` config key (identified by `[selftune]` description prefix)
+### Cline
+- Config: `~/Documents/Cline/Hooks/`
+- Events: PostToolUse, TaskComplete, TaskCancel
+- Install creates executable shell scripts in the hooks directory
+## Examples
+### Codex
+```bash
+selftune codex install              # Install hooks into ~/.codex/hooks.json
+selftune codex install --dry-run    # Preview changes without writing
+selftune codex install --uninstall  # Remove selftune hooks
+```
+### OpenCode
+```bash
+selftune opencode install              # Install plugin (selftune-opencode-plugin.ts) + config entries
+selftune opencode install --dry-run    # Preview changes without writing
+selftune opencode install --uninstall  # Remove selftune plugin and config entries
+```
+### Cline
+```bash
+selftune cline install              # Create hook scripts in ~/Documents/Cline/Hooks/
+selftune cline install --dry-run    # Preview what would be created
+selftune cline install --uninstall  # Remove selftune hook scripts
+```
+### Hook handler (agent-only, not user-facing)
+The hook subcommand is called automatically by the agent. Users do not run it directly:
+```bash
+printf '%s\n' "$PAYLOAD" | selftune codex hook
+printf '%s\n' "$PAYLOAD" | selftune opencode hook
+printf '%s\n' "$PAYLOAD" | selftune cline hook
+```