npm - @glrs-dev/harness-plugin-opencode - Versions diffs - 0.3.1 → 1.0.0 - Mend

@glrs-dev/harness-plugin-opencode 0.3.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +190 -0
package/dist/agents/prompts/pilot-builder.md +18 -3
package/dist/agents/prompts/pilot-planner.md +19 -9
package/dist/chunk-57EOY72Y.js +174 -0
package/dist/chunk-5TAMY7P6.js +67 -0
package/dist/chunk-BKTFWXLG.js +204 -0
package/dist/chunk-KB7M7JXU.js +145 -0
package/dist/chunk-RNRCXQ65.js +56 -0
package/dist/cli.js +955 -1453
package/dist/index.js +1 -1
package/dist/paths-LT3QQKCF.js +18 -0
package/dist/pilot/mcp/status-server.d.ts +1 -0
package/dist/pilot/mcp/status-server.js +228 -0
package/dist/pilot-config-7LJZ23YK.js +55 -0
package/dist/runs-QWPL3TKV.js +18 -0
package/dist/safety-gate-WM3EWOCY.js +10 -0
package/dist/setup-hook-FHTXMAQL.js +88 -0
package/dist/skills/adr/SKILL.md +328 -0
package/dist/skills/pilot-planning/SKILL.md +40 -13
package/dist/skills/pilot-planning/rules/decomposition.md +27 -0
package/dist/skills/pilot-planning/rules/self-review.md +1 -1
package/dist/skills/pilot-planning/rules/touches-scope.md +34 -0
package/dist/skills/pilot-planning/rules/verify-design.md +78 -14
package/dist/tasks-KJ3WN2KY.js +32 -0
package/package.json +4 -2
package/dist/skills/pilot-planning/rules/setup-authoring.md +0 -68

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,195 @@
 # Changelog
+## 1.0.0
+### Major Changes
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: scorched-earth rollback of worktree isolation — cwd mode is the only execution shape
+  **Breaking change.** The pilot subsystem no longer manages a per-task worktree pool. `pilot build` now runs each task directly in the user's current worktree (`process.cwd()`), committing on HEAD of the user's feature branch after each task's verify passes.
+  User-visible changes:
+  - **Pre-flight safety gate.** `pilot build` refuses to run when the working tree is on `main`/`master`/the remote's default branch, outside a git repo, or has uncommitted changes. Match `/fresh --yes` semantics.
+  - **`setup:` field removed.** Plans that declare a top-level `setup:` array fail `pilot validate` with a friendly message pointing at `src/pilot/AGENTS.md`. Users should run setup manually (install, compose, migrate, seed) before invoking `pilot build`.
+  - **CLI verbs removed.** `pilot resume`, `pilot retry`, and `pilot worktrees` are deleted. cwd-mode resume/retry semantics are future work.
+  - **No `PILOT_*` env injection.** Verify commands inherit `process.env` verbatim. The COMPOSE_PROJECT_NAME default is gone.
+  - **Auto-commit contract preserved.** The worker still auto-commits after each successful task — just on HEAD of the user's current branch instead of a throwaway per-task branch.
+  Internal:
+  - Deleted `src/pilot/worktree/` directory and its `pool.ts`/`git.ts` modules.
+  - New `src/pilot/worker/safety-gate.ts` with `checkCwdSafety()`.
+  - `enforceTouches()` now takes `cwd` instead of `worktree`.
+  - Plan schema uses `.passthrough().superRefine(...)` to surface the friendly setup-removal message alongside standard unknown-key rejection.
+  - `pilot-planning` skill is now 9 rules (was 10); `setup-authoring.md` deleted.
+### Minor Changes
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: add `pilot build-resume` — continue a partially-completed run
+  When `pilot build` fails mid-run (task failure, stall, abort), previously the only recovery was to rerun from scratch or finish manually. `pilot build-resume` picks up where the run left off:
+  - Discovers the latest non-terminal run in the repo (or honors `--run <id>`).
+  - Skips `succeeded` tasks — their commits are already on HEAD.
+  - Resets every non-succeeded task (failed/blocked/aborted/running) to `pending` with `attempts=0` and a fresh retry budget. Cost is preserved.
+  - Re-marks the run as `running`, clears `finished_at`.
+  - Pre-flight: same safety gate as `pilot build` (clean tree, feature branch) PLUS a branch-match check — refuses if the current branch name doesn't equal the branch recorded on any succeeded task from the run. Prevents "I switched branches since" mistakes.
+  - Loads the plan from the path recorded on the run row. If the user edited the plan between runs, the resume picks up the edited version.
+  Usage:
+  ```bash
+  # resume the latest failed/blocked run in this repo
+  pilot build-resume
+  # or target a specific run
+  pilot build-resume --run 01KQDEDKGMAF6NGSKNS2H8QB4V
+  ```
+  Exit codes:
+  - `0` — resume succeeded (every remaining task completed).
+  - `1` — wiring failure, branch mismatch, or safety gate refusal.
+  - `2` — no resumable tasks (all succeeded, or no runs found).
+  - `3` — resume ran but at least one task failed.
+  - `130` — SIGINT.
+  New state accessors: `resetTasksForResume()`, `markRunResumed()`.
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: clean the working tree after every task (success OR failure)
+  The worker now guarantees the tree is pristine between tasks. After every task the worker runs `git reset --hard HEAD && git clean -fd` (preserves `.gitignored`). This makes the tree-clean-between-tasks invariant explicit: `git status --porcelain` is empty before the next task starts.
+  - **Success paths** already had this implicitly via `commitAll`. No behavior change — the reset is a no-op on an already-clean tree.
+  - **Failure paths** previously left partial agent edits in the working tree. Now they're reverted. The forensic record of what the failed task did lives in `runs/<runId>/tasks/<taskId>/session.jsonl` — unchanged.
+  Consequences:
+  1. `pilot build-resume` no longer trips on a dirty tree left behind by the failed run — the failed task's own cleanup already handled it. Resume just works.
+  2. Subsequent tasks in the same run start from a known-clean state. No more "task B silently ran on top of task A's partial edits."
+  3. If the post-task cleanup itself fails (locked ref, permissions), the worker halts the whole run with a clear error and emits a `run.cleanup.failed` event. Subsequent tasks cannot safely run on a mixed tree.
+  Users who need to inspect what a failed task produced should open the session's JSONL log under `~/.glorious/opencode/<repo>/pilot/runs/<runId>/tasks/<taskId>/session.jsonl` — the git diff is no longer the canonical record.
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot-planner: accept multi-issue cross-cutting plans as a first-class shape
+  The pilot-planning skill previously encouraged the planner to refuse
+  ambitious multi-issue scopes — pushing users to run multiple pilot
+  sessions with 3× the setup cost. Skill rework:
+  - `decomposition.md` gains a "Plan sizing" section: 5–30 tasks is the
+    sweet spot, and bundling 2–4 related issues into one plan is first-
+    class when they share repo + package manager + docker-compose + test
+    runner. Cross-references `dag-shape.md`'s "Disconnected" pattern.
+  - `SKILL.md` gains a "When to bundle vs. split plans" section placed
+    before "When to refuse". The refuse section is rewritten to refuse
+    ONLY for underspecified / ambiguous / no-concrete-acceptance work
+    (e.g., "refactor auth", "clean up tech debt"), explicitly stating
+    plan size, multi-issue scope, and disconnected-subtree shape are
+    NOT refusal reasons.
+  - `self-review.md` question 6 is rewritten: task-level `cascadeFail`
+    only blocks DEPENDENTS of the failing task, not siblings in
+    disconnected subtrees. The question now asks whether the dependency
+    graph concentrates too much value in one critical task (a real
+    anti-pattern), not whether the plan is "too big" (a false one).
+  Observable effect: the planner now bundles cross-cutting work like
+  "rule-engine cleanup + cache invalidation + admin UI" into one plan
+  instead of refusing the scope.
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: safety gate tolerates framework-owned dirty files (`.opencode/**`, `next-env.d.ts`, etc.)
+  When opencode auto-updates its plugin dep in the background, it bumps `.opencode/package.json` + `.opencode/package-lock.json`. Previously the pilot safety gate rejected those dirty files as "user uncommitted work," blocking `pilot build` on something the user didn't do and couldn't preempt.
+  **Fix:** A new `SAFETY_GATE_TOLERATE` list mirrors the post-task `DEFAULT_TOLERATE` pattern. Dirt ONLY in these paths is allowed; pilot proceeds with a one-line warning showing which framework-owned files were modified. Genuine user dirt (anywhere else) still refuses as before. Mixed dirty trees (framework + user) refuse and surface the user-owned path in the error message.
+  Tolerated paths:
+  - `.opencode/**` — opencode plugin installer churn.
+  - `**/next-env.d.ts`, `**/.next/types/**`, `**/.next/dev/types/**` — Next.js artifacts.
+  - `**/*.tsbuildinfo` — TypeScript incremental build cache.
+  - `**/__snapshots__/**`, `**/*.snap` — test snapshot files.
+  User-visible:
+  - `pilot build` prints `[pilot] working tree has N modified file(s) in framework-owned paths; treating tree as clean:` followed by the first 5 paths before starting.
+  - `pilot build-resume` does the same.
+  Also fixed a porcelain-parser bug that ate the leading space off `git status --porcelain` lines; new tests cover the round-trip.
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: add `.glrs/hooks/pilot_setup` — repo-level setup hook
+  A user-authored shell script at `.glrs/hooks/pilot_setup` (relative to the repo root) is auto-invoked once at the start of `pilot build` and `pilot build-resume`, before any task runs. Its job is to make the dev stack ready: install deps, start docker services, run migrations, seed data — whatever the plan's verify commands expect to already be running.
+  Contract:
+  - **Missing file → skip silently.** No hook = no setup = the old behavior.
+  - **Present + executable → run it.** stdout/stderr stream live to the terminal so the user sees install progress.
+  - **Non-zero exit → abort the pilot run.** User fixes their env first.
+  - **10-minute timeout → abort.** Prevents hung installs from blocking indefinitely.
+  - **Not executable → abort with a clear message** (`chmod +x .glrs/hooks/pilot_setup`).
+  Why this instead of the old plan-level `setup:` field:
+  - It's version-controlled in the user's repo, not LLM-authored.
+  - One hook per repo covers every plan — no cross-plan drift.
+  - The user controls exactly what runs (no pilot-opinionated defaults).
+  - It's idempotent by convention — safe to re-run on resume.
+  Example `.glrs/hooks/pilot_setup`:
+  ```bash
+  #!/bin/sh
+  set -e
+  pnpm install --frozen-lockfile
+  docker compose up -d postgres redis
+  pnpm prisma migrate dev --skip-generate
+  ```
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: add `tolerate:` task field + default allowlist for framework-generated files
+  **Problem:** Tasks with verify steps like `next build` would fail touches-enforcement on files the framework itself rewrites (`next-env.d.ts`, `.next/types/**`), not files the agent edited. The fix-loop couldn't recover — reverting the file just made the next verify regenerate it.
+  **Fix:** Two complementary escape hatches.
+  1. **Built-in default allowlist.** `enforceTouches` now accepts a small, opinionated set of framework-generated globs without requiring plan authors to list them:
+     - `**/next-env.d.ts`
+     - `**/.next/types/**`, `**/.next/dev/types/**`
+     - `**/*.tsbuildinfo`
+     - `**/__snapshots__/**`, `**/*.snap`
+  2. **Task-level `tolerate:` field.** Plan authors can extend the allowlist per-task for project-specific codegen (prisma/client, graphql/generated, etc.). `tolerate:` is unioned with `touches:` and defaults at enforcement time.
+  **Behavior change:** Tasks that previously failed touches-enforcement on these paths will now pass. `touches: []` (verify-only) tasks where ONLY tolerated/default-allowed files change also pass. Real drift (file outside touches + tolerate + defaults) still fails as before.
+  Planner prompt and `pilot-planning/rules/touches-scope.md` both updated with the new `tolerate:` contract and examples.
+- [#26](https://github.com/iceglober/glrs/pull/26) [`6cec227`](https://github.com/iceglober/glrs/commit/6cec227eeb4360344a8a5cb9b944f3070459084c) Thanks [@iceglober](https://github.com/iceglober)! - pilot: inject PILOT\_\* env vars into setup and verify commands
+  Pilot setup and per-task verify commands now run with a fixed set of `PILOT_*` env vars plus a default `COMPOSE_PROJECT_NAME` injected by the harness. This lets plan authors isolate per-worktree local infrastructure (docker-compose projects, host ports, named volumes) so parallel and retried pilot worktrees don't collide with each other or with a developer's background dev stack.
+  Injected vars:
+  - `PILOT_RUN_ID` — ULID of the current run.
+  - `PILOT_TASK_ID` — stable task id.
+  - `PILOT_SLOT_INDEX` — pool slot index (0 in v0.1).
+  - `PILOT_SLOT_SEQ` — unique sequence `= slot_index * 100 + retry_counter`.
+  - `PILOT_WORKTREE_DIR` — absolute worktree path.
+  - `PILOT_PORT_BASE` — opinionated port base `= 10000 + PILOT_SLOT_SEQ * 100`.
+  - `COMPOSE_PROJECT_NAME` — default `pilot-<runIdShort>-<slotSeq>`, only when unset (user/CI intent preserved).
+  Plan authors using docker-compose for local infra no longer need to hand-roll slot-unique project names or port offsets. See `src/skills/pilot-planning/rules/setup-authoring.md` (updated) for a worked example.
+### Patch Changes
+- [#27](https://github.com/iceglober/glrs/pull/27) [`cf74f2d`](https://github.com/iceglober/glrs/commit/cf74f2dca60ee099a92a500d90de1c1886b6aed0) Thanks [@iceglober](https://github.com/iceglober)! - chore(changesets): move @glrs-dev/cli and @glrs-dev/harness-plugin-opencode from `linked` to `fixed`
+  The `linked` group synchronizes versions only among packages that are ALREADY being bumped — it does not force a package into a release. A changeset that named only the harness (as most of our changesets do) would ship a new harness on npm without republishing the CLI, even though the CLI vendors the harness `dist/` at build time (`packages/cli/scripts/vendor-harness.ts`). End users running `glrs oc ...` would keep getting the old vendored harness until somebody remembered to write a no-op CLI changeset.
+  Moving the pair to `fixed` guarantees any harness publish drags the CLI along at a matching version, so a fresh CLI tarball always re-vendors the latest harness `dist/`. The trade-off — CLI-only changesets now also force a no-op harness republish — is cheap because CLI-only changes are rare in this repo.
 ## 0.3.1
 ### Patch Changes

package/dist/agents/prompts/pilot-builder.md CHANGED Viewed

@@ -80,7 +80,7 @@ If `task.prompt` says "add lodash to handle deep merging", install it. If the ta
 If a verify failure clearly points to an environmental issue — `Cannot find module 'X'` where `X` is a workspace/monorepo dep, `node_modules` absent despite a lockfile committed to the repo, a stale build artifact a typecheck depends on — you ARE expected to run the obvious install command BEFORE giving up with STOP.
-Recognise these canonical bootstrap commands: `pnpm install`, `bun install`, `npm install`, `npm ci`, `cargo fetch`, `cargo build`. If the plan declared a `setup:` block, treat that block as the canonical list — run those commands verbatim.
+Recognise these canonical bootstrap commands: `pnpm install`, `bun install`, `npm install`, `npm ci`, `cargo fetch`, `cargo build`.
 The plugin deny list does not block any of these; they are not task-level dependency additions and they do not require lockfile edits.
@@ -111,7 +111,22 @@ If the fix prompt names `touchesViolators`: revert your edits to those files. Us
 - Plan. The plan is `pilot.yaml`. Each task in it was already designed by the pilot-planner agent. You are not a co-author.
 - Refactor unrelated code. The task names a scope; respect it. If you see a glaring issue elsewhere, ignore it — that's a separate task for the human.
 - Add observability/logging beyond what the task asks for. If the task didn't say "add structured logs", don't add structured logs.
-- Run the verify commands yourself. The worker runs them after you stop. Running them yourself wastes turns and can leave residue (test artifacts, cached state) that messes up the worker's run.
 - Apologize, hedge, or narrate. Each turn is a billable opencode session call; chat preamble buys you nothing.
+- **Write TODO, FIXME, HACK, or XXX comments.** Many repos have pre-commit hooks that reject these annotations. The worker commits your work automatically after verify passes; if the commit is blocked by a hook, the task fails. If you need to note future work, put it in the task's output summary, not in a code comment.
-You're a focused, fast, pessimistic implementer. Make the change. Stop. The worker will tell you if anything is wrong.
+# Self-verification — run the tests BEFORE you stop
+**You SHOULD run the task's verify commands yourself during your work session.** The worker runs them formally after you stop, but you should iterate locally first:
+1. Write the code.
+2. Run the verify command(s) listed in the task's `verify:` field.
+3. If they fail, fix the code and re-run. Iterate until they pass.
+4. THEN stop.
+This is faster and cheaper than the worker's retry loop (which requires a full session round-trip per attempt). The worker's formal verify is a gate, not your development loop — arrive at the gate already passing.
+**How to find the verify commands:** They're in the task kickoff prompt under "Verify commands". Run them exactly as written via bash. They execute in the repo root (cwd).
+**Exception:** If a verify command requires infrastructure you can't reach (e.g., a running server on a specific port), note that in your output and stop. The worker will handle it.
+You're a focused, fast, pessimistic implementer. Make the change. Verify it passes. Stop.

package/dist/agents/prompts/pilot-planner.md CHANGED Viewed

@@ -45,13 +45,13 @@ Use Serena and grep to map out:
 - Existing tests that already cover related code (the verify commands will likely be variations of those).
 - Existing patterns the change should match.
 - Any module boundaries that suggest natural task splits.
-- **Tooling footprint** — lockfiles, docker-compose services, migration tooling, UI/API/DB test frameworks. You'll use these in Section 3 to propose a `setup:` block and per-surface verify patterns.
+- **Tooling footprint** — lockfiles, docker-compose services, migration tooling, UI/API/DB test frameworks. Understanding these informs your per-surface verify patterns in Section 3.
 Be thorough here. A planner who shipped a sloppy plan because they only skimmed the codebase wastes hours of pilot-builder time chasing bad scope.
 ## 3. Apply the planning methodology
-The `pilot-planning` skill carries the ten rules. Apply them:
+The `pilot-planning` skill carries the nine rules. Apply them:
 1. First-principles task framing.
 2. Decomposition into right-sized tasks.
@@ -61,8 +61,7 @@ The `pilot-planning` skill carries the ten rules. Apply them:
 6. Optional milestone grouping.
 7. Self-review.
 8. Per-task `context:` population (rationale, code pointers, acceptance shorthand).
-9. **Setup-block authoring** — detect lockfiles (pnpm, bun, npm, yarn, Cargo), docker-compose services, and migration tooling (prisma, drizzle-kit, knex, flyway), then propose specific setup commands to the user for confirmation.
-10. **QA-expectations establishment** — detect per-surface test frameworks and propose concrete verify patterns:
+9. **QA-expectations establishment** — detect per-surface test frameworks and propose concrete verify patterns:
     - **UI**: Playwright, Cypress, or Vitest browser mode for visual/interaction assertions
     - **API**: curl against local endpoints or OpenAPI-based contract tests
     - **DB**: Postgres readiness checks and migration verification (prisma migrate, drizzle-kit push)
@@ -70,7 +69,9 @@ The `pilot-planning` skill carries the ten rules. Apply them:
     - **Browser-based component**: Storybook or Chromatic visual tests
     - **CLI**: bin/ smoke tests or `--help` verification
-Rules 9 and 10 typically involve ONE bundled `question` tool call to the user — combine setup proposals and per-surface verify proposals into a single round (respecting "talk to the user — once" guidance).
+Rule 9 typically involves ONE bundled `question` tool call to the user for QA verify patterns (respecting "talk to the user — once" guidance).
+Note: The `setup:` field was removed in the cwd-mode rollback. Plans assume the user's dev stack is already running (install, compose, migrate, seed) before `pilot build` is invoked. Remind the user of this at hand-off.
 ## 4. Write the YAML
@@ -80,10 +81,6 @@ Required schema (see `src/pilot/plan/schema.ts` for the canonical Zod definition
 ```yaml
 name: <human-readable plan name>
-setup:                          # optional — run once per worktree before any task
-  - pnpm install --frozen-lockfile
-  - docker compose up -d postgres
-  - pnpm prisma migrate dev
 defaults:                       # optional, override per-task as needed
   agent: pilot-builder          # default
   model: anthropic/claude-sonnet-4-6
@@ -114,6 +111,17 @@ tasks:
     touches:
       - src/api/**
       - test/api/**
+    tolerate:                   # optional — files that may appear in
+                                # the diff but aren't part of the task's
+                                # scope (project-specific codegen,
+                                # framework side-effects beyond the
+                                # built-in defaults like next-env.d.ts).
+                                # Common entries: prisma/client/**,
+                                # graphql/generated/**, schema.graphql.
+                                # Built-in defaults already cover
+                                # next-env.d.ts, .next/types/**,
+                                # *.tsbuildinfo, __snapshots__/**.
+      - prisma/client/**
     verify:
       - bun test test/api
     depends_on: [ ]              # other task ids
@@ -154,6 +162,8 @@ Don't elaborate. Don't summarize the plan in chat. The user can read it.
 - **Asking the human to clarify mid-build.** Don't write tasks whose prompts contain things like "ask the user about X". Pilot is unattended. If you don't know X, either ASK NOW (during the planning session) or design the task to discover X via reading code.
+- **YAML quoting errors in titles/prompts.** If a string contains double quotes, wrap it in single quotes: `title: '"Test rule set" UI + hook'`. If it contains single quotes, use double quotes with escaped inner quotes: `title: "it's a \"test\""`. NEVER write `title: "word" more words` — YAML closes the scalar at the second `"`. Run `pilot validate` after saving; it catches these.
 # What "done" looks like
 A plan that:

package/dist/chunk-57EOY72Y.js ADDED Viewed

@@ -0,0 +1,174 @@
+// src/pilot/state/tasks.ts
+function upsertFromPlan(db, runId, plan) {
+  const stmt = db.prepare(
+    `INSERT OR IGNORE INTO tasks (run_id, task_id, status) VALUES (?, ?, 'pending')`
+  );
+  const tx = db.transaction(() => {
+    for (const t of plan.tasks) {
+      stmt.run(runId, t.id);
+    }
+  });
+  tx();
+}
+function markReady(db, runId, taskId) {
+  requireStatus(db, runId, taskId, ["pending"], "ready");
+  db.run(
+    "UPDATE tasks SET status='ready' WHERE run_id=? AND task_id=?",
+    [runId, taskId]
+  );
+}
+function markRunning(db, args) {
+  requireStatus(db, args.runId, args.taskId, ["ready"], "running");
+  const now = args.now ?? Date.now();
+  db.run(
+    `UPDATE tasks
+     SET status='running',
+         attempts = attempts + 1,
+         session_id = ?,
+         branch = ?,
+         worktree_path = ?,
+         started_at = COALESCE(started_at, ?)
+     WHERE run_id=? AND task_id=?`,
+    [args.sessionId, args.branch, args.worktreePath, now, args.runId, args.taskId]
+  );
+}
+function markSucceeded(db, runId, taskId, now = Date.now()) {
+  requireStatus(db, runId, taskId, ["running"], "succeeded");
+  db.run(
+    `UPDATE tasks
+     SET status='succeeded', finished_at=?, last_error=NULL
+     WHERE run_id=? AND task_id=?`,
+    [now, runId, taskId]
+  );
+}
+function markFailed(db, runId, taskId, reason, now = Date.now()) {
+  requireStatus(db, runId, taskId, ["running", "ready"], "failed");
+  db.run(
+    `UPDATE tasks
+     SET status='failed', finished_at=?, last_error=?
+     WHERE run_id=? AND task_id=?`,
+    [now, reason, runId, taskId]
+  );
+}
+function markBlocked(db, runId, taskId, reason) {
+  requireStatus(db, runId, taskId, ["pending", "ready"], "blocked");
+  db.run(
+    `UPDATE tasks
+     SET status='blocked', last_error=?
+     WHERE run_id=? AND task_id=?`,
+    [reason, runId, taskId]
+  );
+}
+function markAborted(db, runId, taskId, reason, now = Date.now()) {
+  requireStatus(db, runId, taskId, ["running", "ready"], "aborted");
+  db.run(
+    `UPDATE tasks
+     SET status='aborted', finished_at=?, last_error=?
+     WHERE run_id=? AND task_id=?`,
+    [now, reason, runId, taskId]
+  );
+}
+function markPending(db, runId, taskId) {
+  const cur = getTask(db, runId, taskId);
+  if (!cur) {
+    throw new Error(
+      `markPending: task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
+    );
+  }
+  db.run(
+    `UPDATE tasks
+     SET status='pending',
+         session_id=NULL,
+         branch=NULL,
+         worktree_path=NULL,
+         started_at=NULL,
+         finished_at=NULL,
+         last_error=NULL
+     WHERE run_id=? AND task_id=?`,
+    [runId, taskId]
+  );
+}
+function setCostUsd(db, runId, taskId, costUsd) {
+  if (!Number.isFinite(costUsd) || costUsd < 0) {
+    throw new RangeError(`setCostUsd: invalid cost ${costUsd}`);
+  }
+  db.run(
+    "UPDATE tasks SET cost_usd=? WHERE run_id=? AND task_id=?",
+    [costUsd, runId, taskId]
+  );
+}
+function getTask(db, runId, taskId) {
+  return db.query("SELECT * FROM tasks WHERE run_id=? AND task_id=?").get(runId, taskId);
+}
+function listTasks(db, runId) {
+  return db.query("SELECT * FROM tasks WHERE run_id=? ORDER BY task_id").all(runId);
+}
+function readyTasks(db, runId) {
+  return db.query("SELECT * FROM tasks WHERE run_id=? AND status='ready' ORDER BY task_id").all(runId);
+}
+function countByStatus(db, runId) {
+  const rows = db.query("SELECT status, COUNT(*) as n FROM tasks WHERE run_id=? GROUP BY status").all(runId);
+  const out = {
+    pending: 0,
+    ready: 0,
+    running: 0,
+    succeeded: 0,
+    failed: 0,
+    blocked: 0,
+    aborted: 0
+  };
+  for (const r of rows) out[r.status] = r.n;
+  return out;
+}
+function resetTasksForResume(db, runId) {
+  const rows = listTasks(db, runId);
+  const resettable = rows.filter((r) => r.status !== "succeeded");
+  if (resettable.length === 0) return [];
+  const stmt = db.prepare(
+    `UPDATE tasks
+     SET status='pending',
+         attempts=0,
+         session_id=NULL,
+         last_error=NULL,
+         started_at=NULL,
+         finished_at=NULL,
+         branch=NULL,
+         worktree_path=NULL
+     WHERE run_id=? AND task_id=? AND status != 'succeeded'`
+  );
+  const tx = db.transaction(() => {
+    for (const r of resettable) stmt.run(runId, r.task_id);
+  });
+  tx();
+  return resettable.map((r) => r.task_id);
+}
+function requireStatus(db, runId, taskId, expected, intended) {
+  const row = getTask(db, runId, taskId);
+  if (!row) {
+    throw new Error(
+      `task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
+    );
+  }
+  if (!expected.includes(row.status)) {
+    throw new Error(
+      `cannot move task ${JSON.stringify(taskId)} from ${row.status} to ${intended} (expected one of: ${expected.join(", ")})`
+    );
+  }
+}
+export {
+  upsertFromPlan,
+  markReady,
+  markRunning,
+  markSucceeded,
+  markFailed,
+  markBlocked,
+  markAborted,
+  markPending,
+  setCostUsd,
+  getTask,
+  listTasks,
+  readyTasks,
+  countByStatus,
+  resetTasksForResume
+};

package/dist/chunk-5TAMY7P6.js ADDED Viewed

@@ -0,0 +1,67 @@
+// src/pilot/state/runs.ts
+import { ulid } from "ulid";
+function createRun(db, args) {
+  const id = ulid();
+  const now = args.now ?? Date.now();
+  db.run(
+    `INSERT INTO runs (id, plan_path, plan_slug, started_at, status)
+     VALUES (?, ?, ?, ?, 'pending')`,
+    [id, args.planPath, args.slug, now]
+  );
+  void args.plan;
+  return id;
+}
+function markRunRunning(db, runId) {
+  const cur = getRun(db, runId);
+  if (!cur) throw new Error(`markRunRunning: run ${JSON.stringify(runId)} not found`);
+  if (cur.status === "running") return;
+  if (cur.status !== "pending") {
+    throw new Error(
+      `markRunRunning: cannot move run ${JSON.stringify(runId)} from ${cur.status} to running`
+    );
+  }
+  db.run("UPDATE runs SET status='running' WHERE id=?", [runId]);
+}
+function markRunFinished(db, runId, status, now = Date.now()) {
+  if (status !== "completed" && status !== "aborted" && status !== "failed") {
+    throw new Error(
+      `markRunFinished: ${JSON.stringify(status)} is not a terminal status`
+    );
+  }
+  const cur = getRun(db, runId);
+  if (!cur) {
+    throw new Error(`markRunFinished: run ${JSON.stringify(runId)} not found`);
+  }
+  db.run("UPDATE runs SET status=?, finished_at=? WHERE id=?", [status, now, runId]);
+}
+function markRunResumed(db, runId) {
+  const cur = getRun(db, runId);
+  if (!cur) throw new Error(`markRunResumed: run ${JSON.stringify(runId)} not found`);
+  if (cur.status === "completed") {
+    throw new Error(
+      `markRunResumed: run ${JSON.stringify(runId)} is already completed; nothing to resume`
+    );
+  }
+  db.run("UPDATE runs SET status='running', finished_at=NULL WHERE id=?", [runId]);
+}
+function getRun(db, runId) {
+  const row = db.query("SELECT * FROM runs WHERE id=?").get(runId);
+  return row;
+}
+function listRuns(db, limit = 100) {
+  return db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT ?").all(limit);
+}
+function latestRun(db) {
+  const row = db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT 1").get();
+  return row;
+}
+export {
+  createRun,
+  markRunRunning,
+  markRunFinished,
+  markRunResumed,
+  getRun,
+  listRuns,
+  latestRun
+};