npm - oh-my-workflow - Versions diffs - 0.2.1 → 0.4.0 - Mend

oh-my-workflow 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +190 -105
package/conformance/budget-loop.ts +16 -0
package/conformance/fanout.ts +20 -0
package/conformance/pipeline.ts +21 -0
package/conformance/schema-gate.ts +21 -0
package/conformance/strict-throws.ts +13 -0
package/docs/launch/show-hn.md +41 -0
package/docs/site/index.html +540 -0
package/docs/site/robots.txt +2 -0
package/examples/deep-research/workflow.ts +11 -11
package/package.json +11 -3
package/scripts/build-docs.ts +10 -0
package/scripts/check-docs.ts +58 -0
package/skill/SKILL.md +230 -133
package/src/adapters/claude.ts +31 -5
package/src/adapters/codex.ts +5 -3
package/src/adapters/exec.ts +103 -0
package/src/adapters/fake.ts +4 -4
package/src/adapters/hermes.ts +24 -0
package/src/adapters/types.ts +33 -3
package/src/cli/codemod.ts +99 -0
package/src/cli/omw.ts +7 -2
package/src/cli/run.ts +222 -13
package/src/cli/skill.ts +32 -10
package/src/runtime.ts +171 -11
package/src/worktree.ts +72 -0
package/vercel.json +5 -0

package/src/runtime.ts CHANGED Viewed

@@ -11,6 +11,18 @@ import type { Journal } from "./journal";
 import { promptHash, optsHash } from "./journal";
 import type { ResumeIndex } from "./resume";
 import { schemaGate, makeValidator, type GateCall, type GateFeedback } from "./schema-gate";
+import { withWorktree as defaultWithWorktree } from "./worktree";
+/** Optional `export const meta` a workflow can declare to describe itself and
+ *  its phases. Mirrors native dynamic-workflow's meta block: a pure literal the
+ *  loader reads for naming, phase titles, and per-phase/default model hints. */
+export type WorkflowMeta = {
+  name?: string;
+  description?: string;
+  whenToUse?: string;
+  model?: string;
+  phases?: Array<{ title: string; model?: string; detail?: string }>;
+};
 export type AgentOpts = {
   label?: string;
@@ -20,6 +32,16 @@ export type AgentOpts = {
   cwd?: string;
   timeoutMs?: number;
   maxRetries?: number;
+  /** Inherit the host's MCP servers in this node (default false → isolated, fast). */
+  inheritMcp?: boolean;
+  /** Reasoning-effort hint for this node (adapter maps it where supported). */
+  effort?: "low" | "medium" | "high" | "xhigh" | "max";
+  /** Cross-vendor node profile (named agent persona) for this node. */
+  agentType?: string;
+  /** Run this node in a fresh ephemeral git worktree (cwd = the worktree), so
+   *  parallel file-mutating nodes don't clobber each other. Best-effort: a
+   *  non-git cwd runs in place with a warning. */
+  isolation?: "worktree";
 };
 // `prev`/`item` are intentionally `any`: orchestration scripts are plain JS the
@@ -27,12 +49,22 @@ export type AgentOpts = {
 // without fighting the type system. The runtime treats every value opaquely.
 export type Stage = (prev: any, item: any, index: number) => unknown | Promise<unknown>;
+/** Shared, mutable token-spend accumulator. Lives outside makeRuntime so a
+ *  parent and any nested workflow() child can point at the SAME counter — the
+ *  token pool is shared across the whole run, not per-runtime. */
+export type BudgetState = { spent: number };
 export type Runtime = {
   agent(prompt: string, opts?: AgentOpts): Promise<unknown | null>;
   pipeline(items: unknown[], ...stages: Stage[]): Promise<unknown[]>;
   parallel(thunks: Array<() => Promise<unknown>>): Promise<unknown[]>;
+  workflow(ref: string | { scriptPath: string }, args?: unknown): Promise<unknown>;
   phase(title: string): void;
   log(msg: string): void;
+  /** Token budget view. `total` is the ceiling (null = unbounded); `spent()`
+   *  reads the shared accumulator; `remaining()` is `total - spent` (Infinity
+   *  when unbounded). The ceiling is enforced in agent() (BudgetExceededError). */
+  budget: { total: number | null; spent(): number; remaining(): number };
 };
 /** Bounded-concurrency gate: at most `max` bodies run at once; the rest queue.
@@ -61,12 +93,57 @@ export function makeLimiter(max: number) {
 const errMsg = (e: unknown): string => (e instanceof Error ? e.message : String(e));
+/** The semantic subset of a node's options — everything that changes WHAT the
+ *  node computes, and nothing cosmetic. The resume key hashes only this, so a
+ *  display-only change (label/phase) or a retry-policy tweak (timeoutMs/
+ *  maxRetries) re-uses the cached result instead of needlessly re-running. The
+ *  resolved model (after the opts>phase>meta chain) is passed in so a meta/phase
+ *  model change still busts the cache even when opts.model is unset. */
+function pickSemantic(opts: AgentOpts, model: string | undefined) {
+  return {
+    model,
+    schema: opts.schema,
+    effort: opts.effort,
+    isolation: opts.isolation,
+    agentType: opts.agentType,
+    cwd: opts.cwd,
+    inheritMcp: opts.inheritMcp,
+  };
+}
+/** The ONE documented exception to the null-contract: when a token budget is set
+ *  and already exhausted, agent() throws this instead of returning null, so a
+ *  budget-bounded loop terminates instead of silently spinning out null nodes.
+ *  It is thrown OUTSIDE the per-node try, so it propagates; a throw that lands
+ *  inside parallel()/pipeline() is still swallowed to null (matches native). */
+export class BudgetExceededError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "BudgetExceededError";
+  }
+}
+// Cap the echoed prior output so a huge malformed dump can't blow the fresh prompt.
+const RETRY_RAWTEXT_CAP = 4000;
 function retryPrompt(original: string, feedback: GateFeedback, fresh: boolean): string {
   const note =
     "Your previous output failed validation:\n" +
     feedback.errors.map((e) => `- ${e}`).join("\n") +
     "\nReturn ONLY corrected JSON, no prose.";
-  return fresh ? `${original}\n\n${note}` : note;
+  // In-session followUp (fresh=false): the prior attempt is still in the live
+  // transcript, so the errors alone are enough. Fresh invoke (fresh=true): a
+  // brand-new subprocess has NO memory of what it produced, so hand its own
+  // non-conforming output back (capped) to repair against — otherwise it repairs
+  // blind and tends to regress on a different field (the B6 whack-a-mole).
+  if (!fresh) return note;
+  const prior = feedback.rawText.trim();
+  const echo = prior
+    ? "\nYour previous output (fix THIS, do not start over):\n```\n" +
+      (prior.length > RETRY_RAWTEXT_CAP ? prior.slice(0, RETRY_RAWTEXT_CAP) + "\n…(truncated)" : prior) +
+      "\n```\n"
+    : "";
+  return `${original}${echo}\n${note}`;
 }
 export function makeRuntime(deps: {
@@ -78,17 +155,45 @@ export function makeRuntime(deps: {
    *  the longest-unchanged-prefix resume model. A miss (incl. a prior failure)
    *  runs live, so resume only re-executes failed/changed nodes. */
   resume?: ResumeIndex;
+  /** Token ceiling for the run (null/undefined = unbounded). */
+  budget?: number | null;
+  /** Shared spend accumulator. When omitted, a local one is created; a nested
+   *  workflow() passes the parent's so the pool is shared across the run. */
+  budgetState?: BudgetState;
+  /** The workflow's meta, used to resolve the effective model per node along the
+   *  `opts.model > phase model > meta.model` chain. */
+  meta?: WorkflowMeta;
+  /** Injected for isolation:'worktree'; defaults to the real git-backed helper.
+   *  Overridable so the runtime is testable without a git subprocess. */
+  withWorktree?: typeof defaultWithWorktree;
 }): Runtime {
   const { adapter, journal, resume } = deps;
+  const withWorktree = deps.withWorktree ?? defaultWithWorktree;
   const limit = makeLimiter(deps.concurrency ?? 4);
   let callCounter = 0;
   let currentPhase: string | undefined;
+  const budgetTotal = deps.budget ?? null;
+  const budgetState: BudgetState = deps.budgetState ?? { spent: 0 };
+  // Effective model along the precedence chain opts > phase > meta default.
+  // Resolved per node so a phase or meta default applies without the script
+  // repeating `model` on every agent() call.
+  const resolveModel = (opts: AgentOpts, phase: string | undefined): string | undefined => {
+    if (opts.model !== undefined) return opts.model;
+    // `?.` guards null/undefined but NOT a wrong type — an author typo like
+    // `phases: "scan"` would make `.find` throw. Array.isArray closes that gap so
+    // a malformed meta degrades to the default model instead of killing the run.
+    const phases = deps.meta?.phases;
+    const phaseModel = phase && Array.isArray(phases) ? phases.find((p) => p.title === phase)?.model : undefined;
+    return phaseModel ?? deps.meta?.model;
+  };
   async function agent(prompt: string, opts: AgentOpts = {}): Promise<unknown | null> {
     const call = ++callCounter;
     const phase = opts.phase ?? currentPhase;
+    const model = resolveModel(opts, phase);
     const pHash = promptHash(prompt);
-    const oHash = optsHash(opts);
+    const oHash = optsHash(pickSemantic(opts, model));
     journal.agentStart({
       call,
       label: opts.label,
@@ -108,23 +213,48 @@ export function makeRuntime(deps: {
       }
     }
+    // Budget ceiling: checked AFTER the resume short-circuit (a cached hit costs
+    // nothing) and OUTSIDE limit()'s try, so it propagates as the one sanctioned
+    // null-contract exception rather than being swallowed to null.
+    if (budgetTotal != null && budgetState.spent >= budgetTotal) {
+      throw new BudgetExceededError(`budget exhausted: ${budgetState.spent}/${budgetTotal} tokens`);
+    }
     return limit(async () => {
+      // The node body, parameterized by the effective working directory so an
+      // isolation:'worktree' node runs the SAME logic with cwd = the worktree.
+      const body = async (effCwd: string | undefined): Promise<unknown | null> => {
       let durationMs = 0;
       const account = (r: AgentResult) => {
         durationMs += r.ok ? r.meta.durationMs : (r.meta?.durationMs ?? 0);
+        // Count tokens whether the node succeeded or failed: a failed node that
+        // still reported `usage` (error/refusal envelope) consumed real budget,
+        // so a loop on a failing node trips the ceiling instead of spinning.
+        // Guard the value: a buggy/custom adapter (the pluggable boundary) could
+        // hand back NaN, a negative, or a non-number — any of which would corrupt
+        // `spent` and silently disable the ceiling. Coerce junk to 0.
+        const tokens = r.meta?.outputTokens;
+        budgetState.spent += typeof tokens === "number" && Number.isFinite(tokens) && tokens > 0 ? tokens : 0;
       };
+      // A fresh node invocation carrying this call's options. Built in one place
+      // so the next InvokeRequest field is threaded once, not per call site.
+      const invokeFresh = (p: string) =>
+        adapter.invoke({
+          prompt: p,
+          model,
+          cwd: effCwd,
+          timeoutMs: opts.timeoutMs,
+          inheritMcp: opts.inheritMcp,
+          effort: opts.effort,
+          agentType: opts.agentType,
+        });
       try {
         // No schema: one shot, raw text out (or null).
         if (!opts.schema) {
           let r: AgentResult;
           try {
-            r = await adapter.invoke({
-              prompt,
-              model: opts.model,
-              cwd: opts.cwd,
-              timeoutMs: opts.timeoutMs,
-            });
+            r = await invokeFresh(prompt);
           } catch (e) {
             // A throw at the adapter boundary IS an adapter failure.
             journal.agentEnd({ call, ok: false, kind: "spawn_failure", stderr: errMsg(e), durationMs });
@@ -146,10 +276,24 @@ export function makeRuntime(deps: {
         const gateCall: GateCall = async (_n, feedback) => {
           let r: AgentResult;
           if (feedback && lastSessionId && adapter.followUp) {
-            r = await adapter.followUp(lastSessionId, retryPrompt(prompt, feedback, false));
+            // Resume in the original cwd and with the same MCP choice, so the
+            // repair turn runs in the same environment as the turn it continues.
+            r = await adapter.followUp(lastSessionId, retryPrompt(prompt, feedback, false), {
+              cwd: effCwd,
+              inheritMcp: opts.inheritMcp,
+              timeoutMs: opts.timeoutMs,
+            });
+            // Resume can fail even when the format hiccup was recoverable (e.g. a
+            // killed/expired session). Don't let a broken resume be terminal —
+            // fall back to a fresh invoke with the error appended (the contract
+            // AgentPort documents for the no-followUp case). Account the failed
+            // resume too: it spawned a real subprocess, so its time is real cost.
+            if (!r.ok) {
+              account(r);
+              r = await invokeFresh(retryPrompt(prompt, feedback, true));
+            }
           } else {
-            const p = feedback ? retryPrompt(prompt, feedback, true) : prompt;
-            r = await adapter.invoke({ prompt: p, model: opts.model, cwd: opts.cwd, timeoutMs: opts.timeoutMs });
+            r = await invokeFresh(feedback ? retryPrompt(prompt, feedback, true) : prompt);
           }
           account(r);
           if (r.ok && r.meta.sessionId) lastSessionId = r.meta.sessionId;
@@ -185,6 +329,14 @@ export function makeRuntime(deps: {
         journal.agentEnd({ call, ok: false, kind: "internal_error", error: errMsg(e), durationMs });
         return null;
       }
+      };
+      // isolation:'worktree' gives the node its own ephemeral checkout as cwd;
+      // otherwise it runs in the caller-provided cwd (or the process cwd).
+      if (opts.isolation === "worktree") {
+        return withWorktree(opts.cwd ?? process.cwd(), (wt) => body(wt));
+      }
+      return body(opts.cwd);
     });
   }
@@ -226,10 +378,18 @@ export function makeRuntime(deps: {
     agent,
     parallel,
     pipeline,
+    workflow: async () => {
+      throw new Error("workflow() hook is only available through runWorkflow");
+    },
     phase: (title: string) => {
       currentPhase = title;
       journal.phase(title);
     },
     log: (msg: string) => journal.log(msg),
+    budget: {
+      total: budgetTotal,
+      spent: () => budgetState.spent,
+      remaining: () => (budgetTotal == null ? Infinity : budgetTotal - budgetState.spent),
+    },
   };
 }

package/src/worktree.ts ADDED Viewed

@@ -0,0 +1,72 @@
+// Ephemeral git worktree per node, for `agent(prompt, { isolation: 'worktree' })`.
+// When several nodes mutate files in parallel they would clobber each other in a
+// shared checkout; giving each its own `git worktree` isolates them. The worktree
+// is auto-removed when the node left it unchanged, and LEFT IN PLACE (with a warn)
+// when it has changes, so a caller can inspect/merge them. A non-git cwd has no
+// worktree to make — we run in place and warn rather than fail (honest-scope:
+// isolation is best-effort, the null-contract still holds).
+import { join } from "node:path";
+export type GitSpawnResult = { code: number; stdout: string; stderr: string };
+export type GitSpawn = (args: string[], cwd: string) => Promise<GitSpawnResult>;
+export type WorktreeDeps = {
+  /** Injected so the unit under test drives git without a subprocess; defaults
+   *  to a real `git` over Bun.spawn. */
+  spawn?: GitSpawn;
+  warn?: (msg: string) => void;
+};
+function defaultGitSpawn(): GitSpawn {
+  return async (args, cwd) => {
+    const proc = Bun.spawn(["git", ...args], { cwd, stdout: "pipe", stderr: "pipe" });
+    const [stdout, stderr] = await Promise.all([
+      new Response(proc.stdout).text(),
+      new Response(proc.stderr).text(),
+    ]);
+    const code = await proc.exited;
+    return { code, stdout, stderr };
+  };
+}
+// Per-process counter so concurrent worktrees get distinct dirs WITHOUT Date.now
+// or Math.random (kept deterministic-friendly, mirroring the rest of the engine).
+let wtCounter = 0;
+/** Run `fn` with an ephemeral detached git worktree as its working directory,
+ *  then clean up. Returns whatever `fn` returns. */
+export async function withWorktree<T>(
+  repoCwd: string,
+  fn: (worktreeDir: string) => Promise<T>,
+  deps: WorktreeDeps = {},
+): Promise<T> {
+  const spawn = deps.spawn ?? defaultGitSpawn();
+  const warn = deps.warn ?? ((m: string) => console.error(m));
+  const top = await spawn(["rev-parse", "--show-toplevel"], repoCwd);
+  if (top.code !== 0) {
+    warn(`omw(worktree): ${repoCwd} is not a git repo; running the node in place.`);
+    return fn(repoCwd);
+  }
+  const dir = join(repoCwd, ".omw-worktrees", `wt-${process.pid}-${++wtCounter}`);
+  const add = await spawn(["worktree", "add", "--detach", dir], repoCwd);
+  if (add.code !== 0) {
+    warn(`omw(worktree): \`git worktree add\` failed (${add.stderr.trim()}); running in place.`);
+    return fn(repoCwd);
+  }
+  try {
+    return await fn(dir);
+  } finally {
+    // Auto-remove only when the node left the worktree clean; otherwise keep it
+    // so the changes aren't silently discarded.
+    const status = await spawn(["status", "--porcelain"], dir);
+    if (status.code === 0 && status.stdout.trim() === "") {
+      await spawn(["worktree", "remove", "--force", dir], repoCwd);
+    } else {
+      warn(`omw(worktree): ${dir} has uncommitted changes; leaving it for inspection.`);
+    }
+  }
+}

package/vercel.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "buildCommand": "bun run docs:build",
+  "outputDirectory": "dist/docs",
+  "installCommand": "bun install --frozen-lockfile"
+}