@glrs-dev/cli 0.1.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-builder.md +29 -4
  3. package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-planner.md +26 -1
  4. package/dist/vendor/harness-opencode/dist/agents/prompts/research-auto.md +37 -0
  5. package/dist/vendor/harness-opencode/dist/agents/prompts/research-local.md +33 -0
  6. package/dist/vendor/harness-opencode/dist/agents/prompts/research-web.md +32 -0
  7. package/dist/vendor/harness-opencode/dist/agents/prompts/research.md +15 -20
  8. package/dist/vendor/harness-opencode/dist/chunk-57EOY72Y.js +174 -0
  9. package/dist/vendor/harness-opencode/dist/chunk-5TAMY7P6.js +67 -0
  10. package/dist/vendor/harness-opencode/dist/chunk-BKTFWXLG.js +204 -0
  11. package/dist/vendor/harness-opencode/dist/{chunk-XCZ3NOXR.js → chunk-CZMAJISX.js} +28 -0
  12. package/dist/vendor/harness-opencode/dist/chunk-KB7M7JXU.js +145 -0
  13. package/dist/vendor/harness-opencode/dist/chunk-RNRCXQ65.js +56 -0
  14. package/dist/vendor/harness-opencode/dist/{chunk-VVMP6QWS.js → chunk-WBBN7OVN.js} +162 -2
  15. package/dist/vendor/harness-opencode/dist/cli.js +964 -1383
  16. package/dist/vendor/harness-opencode/dist/index.js +2 -2
  17. package/dist/vendor/harness-opencode/dist/install-X5KEANRB.js +13 -0
  18. package/dist/vendor/harness-opencode/dist/paths-LT3QQKCF.js +18 -0
  19. package/dist/vendor/harness-opencode/dist/pilot/mcp/status-server.d.ts +1 -0
  20. package/dist/vendor/harness-opencode/dist/pilot/mcp/status-server.js +228 -0
  21. package/dist/vendor/harness-opencode/dist/pilot-config-7LJZ23YK.js +55 -0
  22. package/dist/vendor/harness-opencode/dist/runs-QWPL3TKV.js +18 -0
  23. package/dist/vendor/harness-opencode/dist/safety-gate-WM3EWOCY.js +10 -0
  24. package/dist/vendor/harness-opencode/dist/setup-hook-FHTXMAQL.js +88 -0
  25. package/dist/vendor/harness-opencode/dist/skills/adr/SKILL.md +328 -0
  26. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/SKILL.md +41 -10
  27. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/decomposition.md +27 -0
  28. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/qa-expectations.md +120 -0
  29. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/self-review.md +1 -1
  30. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/touches-scope.md +34 -0
  31. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/verify-design.md +81 -13
  32. package/dist/vendor/harness-opencode/dist/tasks-KJ3WN2KY.js +32 -0
  33. package/dist/vendor/harness-opencode/package.json +1 -1
  34. package/package.json +1 -1
  35. package/dist/vendor/harness-opencode/dist/install-4EYR56OR.js +0 -9
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # @glrs-dev/cli
2
2
 
3
+ ## 1.0.0
4
+
5
+ ### Patch Changes
6
+
7
+ - [#27](https://github.com/iceglober/glrs/pull/27) [`cf74f2d`](https://github.com/iceglober/glrs/commit/cf74f2dca60ee099a92a500d90de1c1886b6aed0) Thanks [@iceglober](https://github.com/iceglober)! - chore(changesets): move @glrs-dev/cli and @glrs-dev/harness-plugin-opencode from `linked` to `fixed`
8
+
9
+ The `linked` group synchronizes versions only among packages that are ALREADY being bumped — it does not force a package into a release. A changeset that named only the harness (as most of our changesets do) would ship a new harness on npm without republishing the CLI, even though the CLI vendors the harness `dist/` at build time (`packages/cli/scripts/vendor-harness.ts`). End users running `glrs oc ...` would keep getting the old vendored harness until somebody remembered to write a no-op CLI changeset.
10
+
11
+ Moving the pair to `fixed` guarantees any harness publish drags the CLI along at a matching version, so a fresh CLI tarball always re-vendors the latest harness `dist/`. The trade-off — CLI-only changesets now also force a no-op harness republish — is cheap because CLI-only changes are rare in this repo.
12
+
13
+ ## 0.3.1
14
+
15
+ ### Patch Changes
16
+
17
+ - [#19](https://github.com/iceglober/glrs/pull/19) [`6e942c5`](https://github.com/iceglober/glrs/commit/6e942c5099a535a7d1cda161a1bbc1692f937008) Thanks [@iceglober](https://github.com/iceglober)! - Link `@glrs-dev/cli` and `@glrs-dev/harness-plugin-opencode` versions in Changesets config so they always release together. The CLI vendors the harness plugin's `dist/` at build time (via `packages/cli/scripts/vendor-harness.ts`), so plugin fixes don't reach users running `glrs oc install` until a CLI release is cut. Linking the two ensures every harness-plugin bump produces a matching CLI bump, closing the gap where a plugin fix sat on npm without a CLI tarball that bundled it.
18
+
19
+ This bump also forces a CLI republish that vendors `@glrs-dev/harness-plugin-opencode@0.3.0` so users get the recent `glrs oc install` reconfigure fix via `glrs oc install`, not just `glrs-oc install` directly.
20
+
3
21
  ## 0.1.1
4
22
 
5
23
  ### Patch Changes
@@ -68,12 +68,22 @@ Write the minimal code that makes verify pass:
68
68
  - Modify existing? Read the surrounding 30 lines first; mirror the existing patterns in indentation, error handling, log format.
69
69
  - Add a test? Look at one existing test in the same dir; copy its scaffolding (imports, setup, teardown). Don't invent a new test pattern when the codebase has a strong convention.
70
70
 
71
- ## 4. Do NOT install new dependencies unless the task asks for one
71
+ ## 4. Dependency rules task-level vs environment bootstrap
72
72
 
73
- If `task.prompt` says "add lodash to handle deep merging", install it. If the task is silent on deps, don't add them — find an existing util, write a tiny helper inline, or ask via STOP if the task is genuinely impossible without a dep.
73
+ ### 4a. Task-level dependencies still require task approval
74
+
75
+ If `task.prompt` says "add lodash to handle deep merging", install it. If the task is silent on deps, don't add them — find an existing util, write a tiny helper inline, or STOP if the task is genuinely impossible without a dep.
74
76
 
75
77
  `package.json` / `bun.lock` / `Cargo.lock` etc. are typically NOT in your `touches:` scope. Adding a dep when the scope forbids editing the lock file is a touches violation; the worker will catch it.
76
78
 
79
+ ### 4b. Environment bootstrap self-heals during the fix-loop
80
+
81
+ If a verify failure clearly points to an environmental issue — `Cannot find module 'X'` where `X` is a workspace/monorepo dep, `node_modules` absent despite a lockfile committed to the repo, a stale build artifact a typecheck depends on — you ARE expected to run the obvious install command BEFORE giving up with STOP.
82
+
83
+ Recognise these canonical bootstrap commands: `pnpm install`, `bun install`, `npm install`, `npm ci`, `cargo fetch`, `cargo build`.
84
+
85
+ The plugin deny list does not block any of these; they are not task-level dependency additions and they do not require lockfile edits.
86
+
77
87
  ## 5. When you think you're done, just stop
78
88
 
79
89
  Don't write a "Summary" message. Don't list the files you changed. Don't propose follow-ups. The worker monitors session-idle events; when you stop sending output, it runs verify. If verify passes, the work commits with the message `<task.id>: <task.title>`. If verify fails, you'll get a fix prompt with the failure output verbatim.
@@ -101,7 +111,22 @@ If the fix prompt names `touchesViolators`: revert your edits to those files. Us
101
111
  - Plan. The plan is `pilot.yaml`. Each task in it was already designed by the pilot-planner agent. You are not a co-author.
102
112
  - Refactor unrelated code. The task names a scope; respect it. If you see a glaring issue elsewhere, ignore it — that's a separate task for the human.
103
113
  - Add observability/logging beyond what the task asks for. If the task didn't say "add structured logs", don't add structured logs.
104
- - Run the verify commands yourself. The worker runs them after you stop. Running them yourself wastes turns and can leave residue (test artifacts, cached state) that messes up the worker's run.
105
114
  - Apologize, hedge, or narrate. Each turn is a billable opencode session call; chat preamble buys you nothing.
115
+ - **Write TODO, FIXME, HACK, or XXX comments.** Many repos have pre-commit hooks that reject these annotations. The worker commits your work automatically after verify passes; if the commit is blocked by a hook, the task fails. If you need to note future work, put it in the task's output summary, not in a code comment.
116
+
117
+ # Self-verification — run the tests BEFORE you stop
118
+
119
+ **You SHOULD run the task's verify commands yourself during your work session.** The worker runs them formally after you stop, but you should iterate locally first:
120
+
121
+ 1. Write the code.
122
+ 2. Run the verify command(s) listed in the task's `verify:` field.
123
+ 3. If they fail, fix the code and re-run. Iterate until they pass.
124
+ 4. THEN stop.
125
+
126
+ This is faster and cheaper than the worker's retry loop (which requires a full session round-trip per attempt). The worker's formal verify is a gate, not your development loop — arrive at the gate already passing.
127
+
128
+ **How to find the verify commands:** They're in the task kickoff prompt under "Verify commands". Run them exactly as written via bash. They execute in the repo root (cwd).
129
+
130
+ **Exception:** If a verify command requires infrastructure you can't reach (e.g., a running server on a specific port), note that in your output and stop. The worker will handle it.
106
131
 
107
- You're a focused, fast, pessimistic implementer. Make the change. Stop. The worker will tell you if anything is wrong.
132
+ You're a focused, fast, pessimistic implementer. Make the change. Verify it passes. Stop.
@@ -45,12 +45,13 @@ Use Serena and grep to map out:
45
45
  - Existing tests that already cover related code (the verify commands will likely be variations of those).
46
46
  - Existing patterns the change should match.
47
47
  - Any module boundaries that suggest natural task splits.
48
+ - **Tooling footprint** — lockfiles, docker-compose services, migration tooling, UI/API/DB test frameworks. Understanding these informs your per-surface verify patterns in Section 3.
48
49
 
49
50
  Be thorough here. A planner who shipped a sloppy plan because they only skimmed the codebase wastes hours of pilot-builder time chasing bad scope.
50
51
 
51
52
  ## 3. Apply the planning methodology
52
53
 
53
- The `pilot-planning` skill carries the eight rules. Apply them:
54
+ The `pilot-planning` skill carries the nine rules. Apply them:
54
55
 
55
56
  1. First-principles task framing.
56
57
  2. Decomposition into right-sized tasks.
@@ -60,6 +61,17 @@ The `pilot-planning` skill carries the eight rules. Apply them:
60
61
  6. Optional milestone grouping.
61
62
  7. Self-review.
62
63
  8. Per-task `context:` population (rationale, code pointers, acceptance shorthand).
64
+ 9. **QA-expectations establishment** — detect per-surface test frameworks and propose concrete verify patterns:
65
+ - **UI**: Playwright, Cypress, or Vitest browser mode for visual/interaction assertions
66
+ - **API**: curl against local endpoints or OpenAPI-based contract tests
67
+ - **DB**: Postgres readiness checks and migration verification (prisma migrate, drizzle-kit push)
68
+ - **Integration**: `test/integration` or `e2e` directory patterns
69
+ - **Browser-based component**: Storybook or Chromatic visual tests
70
+ - **CLI**: bin/ smoke tests or `--help` verification
71
+
72
+ Rule 9 typically involves ONE bundled `question` tool call to the user for QA verify patterns (respecting "talk to the user — once" guidance).
73
+
74
+ Note: The `setup:` field was removed in the cwd-mode rollback. Plans assume the user's dev stack is already running (install, compose, migrate, seed) before `pilot build` is invoked. Remind the user of this at hand-off.
63
75
 
64
76
  ## 4. Write the YAML
65
77
 
@@ -99,6 +111,17 @@ tasks:
99
111
  touches:
100
112
  - src/api/**
101
113
  - test/api/**
114
+ tolerate: # optional — files that may appear in
115
+ # the diff but aren't part of the task's
116
+ # scope (project-specific codegen,
117
+ # framework side-effects beyond the
118
+ # built-in defaults like next-env.d.ts).
119
+ # Common entries: prisma/client/**,
120
+ # graphql/generated/**, schema.graphql.
121
+ # Built-in defaults already cover
122
+ # next-env.d.ts, .next/types/**,
123
+ # *.tsbuildinfo, __snapshots__/**.
124
+ - prisma/client/**
102
125
  verify:
103
126
  - bun test test/api
104
127
  depends_on: [ ] # other task ids
@@ -139,6 +162,8 @@ Don't elaborate. Don't summarize the plan in chat. The user can read it.
139
162
 
140
163
  - **Asking the human to clarify mid-build.** Don't write tasks whose prompts contain things like "ask the user about X". Pilot is unattended. If you don't know X, either ASK NOW (during the planning session) or design the task to discover X via reading code.
141
164
 
165
+ - **YAML quoting errors in titles/prompts.** If a string contains double quotes, wrap it in single quotes: `title: '"Test rule set" UI + hook'`. If it contains single quotes, use double quotes with escaped inner quotes: `title: "it's a \"test\""`. NEVER write `title: "word" more words` — YAML closes the scalar at the second `"`. Run `pilot validate` after saving; it catches these.
166
+
142
167
  # What "done" looks like
143
168
 
144
169
  A plan that:
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: research-auto
3
+ description: Research orchestrator subagent — Autonomous experimentation skill. Agent interviews the user, sets up a lab, then explores freely (think, test, reflect) until stopped or a target is hit. Works for any domain where you can measure or evaluate a result. Use when user says 'optimize this', 'experiment with', 'find the best approach', 'iterate on', 'research mode'. Do NOT use for binary validation tests (use /spec-lab instead). Based on ResearcherSkill v1.4.4 by krzysztofdudek.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-auto — Autonomous Experimentation Agent
10
+
11
+ You are the `research-auto` agent. Your job is to run autonomous experiments by following the bundled `research-auto` skill methodology end-to-end.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-auto` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full experimentation workflow from discovery through conclusion
20
+
21
+ ## Notes on Experiment Commands
22
+
23
+ This agent may run arbitrary user-supplied commands as part of experiments. The `.lab/` directory is used for scratch writes and experiment tracking. These are expected behaviors per the skill methodology.
24
+
25
+ ## PRIME-Delegation Brief Contract
26
+
27
+ When PRIME passes a brief via task tool:
28
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
29
+ - Do not re-interview on points already resolved in the brief.
30
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
31
+
32
+ ## STOP — Do Not
33
+
34
+ - Do NOT experiment directly without following the skill methodology
35
+ - Do NOT skip the discovery phase — it is mandatory
36
+ - Do NOT skip the commit-before-run guardrail — it is mandatory
37
+ - Do NOT exceed 3 rounds without presenting — MAX 3 ROUNDS, THEN PRESENT
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: research-local
3
+ description: Research orchestrator subagent — Deep codebase research using parallel Explore subagents. Decomposes a question about the local codebase into research tasks, launches parallel explorations, reviews for gaps, iterates, and synthesizes findings with specific file paths and line numbers. Use when user says 'how does X work in this codebase', 'where is Y implemented', 'trace the data flow for Z', 'what patterns does this repo use', 'explain the architecture of'. Provide the research topic as arguments.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-local — Codebase Research Agent
10
+
11
+ You are the `research-local` agent. Your job is to execute deep codebase research by following the bundled `research-local` skill methodology end-to-end. Scope is local codebase ONLY — no web research.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-local` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full research workflow from decomposition through synthesis
20
+
21
+ ## PRIME-Delegation Brief Contract
22
+
23
+ When PRIME passes a brief via task tool:
24
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
25
+ - Do not re-interview on points already resolved in the brief.
26
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
27
+
28
+ ## STOP — Do Not
29
+
30
+ - Do NOT research directly — always follow the research-local skill methodology
31
+ - Do NOT use exploration tools yourself — every phase is a subagent
32
+ - Do NOT skip the decomposition phase — it is mandatory
33
+ - Do NOT synthesize findings yourself — synthesis is a subagent
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: research-web
3
+ description: Research orchestrator subagent — Multi-agent web research orchestrator. Decomposes a research question into parallel agent workstreams, launches them, monitors progress, and synthesizes results. Use when user says 'research this topic', 'I need to understand', 'deep dive into', 'investigate the market for', 'what do we know about'. Provide the research topic and context.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-web — Web Research Agent
10
+
11
+ You are the `research-web` agent. Your job is to execute web research by following the bundled `research-web` skill methodology end-to-end.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-web` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full research workflow from planning through synthesis
20
+
21
+ ## PRIME-Delegation Brief Contract
22
+
23
+ When PRIME passes a brief via task tool:
24
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
25
+ - Do not re-interview on points already resolved in the brief.
26
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
27
+
28
+ ## STOP — Do Not
29
+
30
+ - Do NOT research directly — always follow the research-web skill methodology
31
+ - Do NOT skip the planning phase — it is mandatory
32
+ - Do NOT launch agents sequentially — dispatch all independent workstreams in ONE message
@@ -22,30 +22,25 @@ You are an **orchestrator only**. You do NOT:
22
22
 
23
23
  Every cognitive task is a subagent. You launch subagents and pass their outputs to other subagents.
24
24
 
25
- ## How to Invoke Skills
25
+ ## How to Invoke Research Agents
26
26
 
27
- The four research skills are bundled with the harness:
27
+ The four research agents are available:
28
28
 
29
- 1. **`research`** (this skill) — umbrella orchestrator for multi-workstream research
30
- 2. **`research-local`** — deep codebase research using parallel Explore subagents
31
- 3. **`research-web`** — multi-agent web research with skeleton-file pattern
32
- 4. **`research-auto`** — autonomous experimentation with `.lab/` directory
29
+ 1. **`@research`** (this agent) — umbrella orchestrator for multi-workstream research
30
+ 2. **`@research-local`** — deep codebase research using parallel Explore subagents
31
+ 3. **`@research-web`** — multi-agent web research with skeleton-file pattern
32
+ 4. **`@research-auto`** — autonomous experimentation with `.lab/` directory
33
33
 
34
- **To invoke a skill:** Use the Agent tool with a prompt instructing the subagent to read the skill via the Skill tool:
34
+ **To dispatch a research subagent:** Use the task tool with the agent name and pass the sub-question as the prompt:
35
35
 
36
36
  ```
37
- Agent tool:
38
- "You are a research agent.
39
-
40
- ## Research Query
41
- {the full query or sub-question}
42
-
43
- ## Task
44
- 1. Read the bundled {skill-name} skill via the Skill tool and follow every instruction
45
- 2. Focus specifically on: {sub-question}
46
- 3. Report back with your complete findings"
37
+ task tool:
38
+ agent: "research-web"
39
+ prompt: "Research the competitive landscape for X. Focus on: {specific angle}."
47
40
  ```
48
41
 
42
+ The research agents are thin shims that load their matching bundled skill and follow it end-to-end. Trust the brief — the task-tool arguments ARE the research query.
43
+
49
44
  ## 7-Phase Flow
50
45
 
51
46
  ### Phase 1: Plan — Subagent
@@ -77,9 +72,9 @@ Output 3-6 workstreams. Mark dependencies explicitly."
77
72
 
78
73
  Dispatch **one Agent per workstream**. Launch ALL independent workstreams in a SINGLE message.
79
74
 
80
- For LOCAL workstreams: invoke `research-local` skill.
81
- For WEB workstreams: invoke `research-web` skill.
82
- For AUTO workstreams: invoke `research-auto` skill.
75
+ For LOCAL workstreams: dispatch `@research-local` via task tool.
76
+ For WEB workstreams: dispatch `@research-web` via task tool.
77
+ For AUTO workstreams: dispatch `@research-auto` via task tool.
83
78
 
84
79
  ### Phase 3: Review Round 1 — Subagent
85
80
 
@@ -0,0 +1,174 @@
1
+ // src/pilot/state/tasks.ts
2
+ function upsertFromPlan(db, runId, plan) {
3
+ const stmt = db.prepare(
4
+ `INSERT OR IGNORE INTO tasks (run_id, task_id, status) VALUES (?, ?, 'pending')`
5
+ );
6
+ const tx = db.transaction(() => {
7
+ for (const t of plan.tasks) {
8
+ stmt.run(runId, t.id);
9
+ }
10
+ });
11
+ tx();
12
+ }
13
+ function markReady(db, runId, taskId) {
14
+ requireStatus(db, runId, taskId, ["pending"], "ready");
15
+ db.run(
16
+ "UPDATE tasks SET status='ready' WHERE run_id=? AND task_id=?",
17
+ [runId, taskId]
18
+ );
19
+ }
20
+ function markRunning(db, args) {
21
+ requireStatus(db, args.runId, args.taskId, ["ready"], "running");
22
+ const now = args.now ?? Date.now();
23
+ db.run(
24
+ `UPDATE tasks
25
+ SET status='running',
26
+ attempts = attempts + 1,
27
+ session_id = ?,
28
+ branch = ?,
29
+ worktree_path = ?,
30
+ started_at = COALESCE(started_at, ?)
31
+ WHERE run_id=? AND task_id=?`,
32
+ [args.sessionId, args.branch, args.worktreePath, now, args.runId, args.taskId]
33
+ );
34
+ }
35
+ function markSucceeded(db, runId, taskId, now = Date.now()) {
36
+ requireStatus(db, runId, taskId, ["running"], "succeeded");
37
+ db.run(
38
+ `UPDATE tasks
39
+ SET status='succeeded', finished_at=?, last_error=NULL
40
+ WHERE run_id=? AND task_id=?`,
41
+ [now, runId, taskId]
42
+ );
43
+ }
44
+ function markFailed(db, runId, taskId, reason, now = Date.now()) {
45
+ requireStatus(db, runId, taskId, ["running", "ready"], "failed");
46
+ db.run(
47
+ `UPDATE tasks
48
+ SET status='failed', finished_at=?, last_error=?
49
+ WHERE run_id=? AND task_id=?`,
50
+ [now, reason, runId, taskId]
51
+ );
52
+ }
53
+ function markBlocked(db, runId, taskId, reason) {
54
+ requireStatus(db, runId, taskId, ["pending", "ready"], "blocked");
55
+ db.run(
56
+ `UPDATE tasks
57
+ SET status='blocked', last_error=?
58
+ WHERE run_id=? AND task_id=?`,
59
+ [reason, runId, taskId]
60
+ );
61
+ }
62
+ function markAborted(db, runId, taskId, reason, now = Date.now()) {
63
+ requireStatus(db, runId, taskId, ["running", "ready"], "aborted");
64
+ db.run(
65
+ `UPDATE tasks
66
+ SET status='aborted', finished_at=?, last_error=?
67
+ WHERE run_id=? AND task_id=?`,
68
+ [now, reason, runId, taskId]
69
+ );
70
+ }
71
+ function markPending(db, runId, taskId) {
72
+ const cur = getTask(db, runId, taskId);
73
+ if (!cur) {
74
+ throw new Error(
75
+ `markPending: task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
76
+ );
77
+ }
78
+ db.run(
79
+ `UPDATE tasks
80
+ SET status='pending',
81
+ session_id=NULL,
82
+ branch=NULL,
83
+ worktree_path=NULL,
84
+ started_at=NULL,
85
+ finished_at=NULL,
86
+ last_error=NULL
87
+ WHERE run_id=? AND task_id=?`,
88
+ [runId, taskId]
89
+ );
90
+ }
91
+ function setCostUsd(db, runId, taskId, costUsd) {
92
+ if (!Number.isFinite(costUsd) || costUsd < 0) {
93
+ throw new RangeError(`setCostUsd: invalid cost ${costUsd}`);
94
+ }
95
+ db.run(
96
+ "UPDATE tasks SET cost_usd=? WHERE run_id=? AND task_id=?",
97
+ [costUsd, runId, taskId]
98
+ );
99
+ }
100
+ function getTask(db, runId, taskId) {
101
+ return db.query("SELECT * FROM tasks WHERE run_id=? AND task_id=?").get(runId, taskId);
102
+ }
103
+ function listTasks(db, runId) {
104
+ return db.query("SELECT * FROM tasks WHERE run_id=? ORDER BY task_id").all(runId);
105
+ }
106
+ function readyTasks(db, runId) {
107
+ return db.query("SELECT * FROM tasks WHERE run_id=? AND status='ready' ORDER BY task_id").all(runId);
108
+ }
109
+ function countByStatus(db, runId) {
110
+ const rows = db.query("SELECT status, COUNT(*) as n FROM tasks WHERE run_id=? GROUP BY status").all(runId);
111
+ const out = {
112
+ pending: 0,
113
+ ready: 0,
114
+ running: 0,
115
+ succeeded: 0,
116
+ failed: 0,
117
+ blocked: 0,
118
+ aborted: 0
119
+ };
120
+ for (const r of rows) out[r.status] = r.n;
121
+ return out;
122
+ }
123
+ function resetTasksForResume(db, runId) {
124
+ const rows = listTasks(db, runId);
125
+ const resettable = rows.filter((r) => r.status !== "succeeded");
126
+ if (resettable.length === 0) return [];
127
+ const stmt = db.prepare(
128
+ `UPDATE tasks
129
+ SET status='pending',
130
+ attempts=0,
131
+ session_id=NULL,
132
+ last_error=NULL,
133
+ started_at=NULL,
134
+ finished_at=NULL,
135
+ branch=NULL,
136
+ worktree_path=NULL
137
+ WHERE run_id=? AND task_id=? AND status != 'succeeded'`
138
+ );
139
+ const tx = db.transaction(() => {
140
+ for (const r of resettable) stmt.run(runId, r.task_id);
141
+ });
142
+ tx();
143
+ return resettable.map((r) => r.task_id);
144
+ }
145
+ function requireStatus(db, runId, taskId, expected, intended) {
146
+ const row = getTask(db, runId, taskId);
147
+ if (!row) {
148
+ throw new Error(
149
+ `task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
150
+ );
151
+ }
152
+ if (!expected.includes(row.status)) {
153
+ throw new Error(
154
+ `cannot move task ${JSON.stringify(taskId)} from ${row.status} to ${intended} (expected one of: ${expected.join(", ")})`
155
+ );
156
+ }
157
+ }
158
+
159
+ export {
160
+ upsertFromPlan,
161
+ markReady,
162
+ markRunning,
163
+ markSucceeded,
164
+ markFailed,
165
+ markBlocked,
166
+ markAborted,
167
+ markPending,
168
+ setCostUsd,
169
+ getTask,
170
+ listTasks,
171
+ readyTasks,
172
+ countByStatus,
173
+ resetTasksForResume
174
+ };
@@ -0,0 +1,67 @@
1
+ // src/pilot/state/runs.ts
2
+ import { ulid } from "ulid";
3
+ function createRun(db, args) {
4
+ const id = ulid();
5
+ const now = args.now ?? Date.now();
6
+ db.run(
7
+ `INSERT INTO runs (id, plan_path, plan_slug, started_at, status)
8
+ VALUES (?, ?, ?, ?, 'pending')`,
9
+ [id, args.planPath, args.slug, now]
10
+ );
11
+ void args.plan;
12
+ return id;
13
+ }
14
+ function markRunRunning(db, runId) {
15
+ const cur = getRun(db, runId);
16
+ if (!cur) throw new Error(`markRunRunning: run ${JSON.stringify(runId)} not found`);
17
+ if (cur.status === "running") return;
18
+ if (cur.status !== "pending") {
19
+ throw new Error(
20
+ `markRunRunning: cannot move run ${JSON.stringify(runId)} from ${cur.status} to running`
21
+ );
22
+ }
23
+ db.run("UPDATE runs SET status='running' WHERE id=?", [runId]);
24
+ }
25
+ function markRunFinished(db, runId, status, now = Date.now()) {
26
+ if (status !== "completed" && status !== "aborted" && status !== "failed") {
27
+ throw new Error(
28
+ `markRunFinished: ${JSON.stringify(status)} is not a terminal status`
29
+ );
30
+ }
31
+ const cur = getRun(db, runId);
32
+ if (!cur) {
33
+ throw new Error(`markRunFinished: run ${JSON.stringify(runId)} not found`);
34
+ }
35
+ db.run("UPDATE runs SET status=?, finished_at=? WHERE id=?", [status, now, runId]);
36
+ }
37
+ function markRunResumed(db, runId) {
38
+ const cur = getRun(db, runId);
39
+ if (!cur) throw new Error(`markRunResumed: run ${JSON.stringify(runId)} not found`);
40
+ if (cur.status === "completed") {
41
+ throw new Error(
42
+ `markRunResumed: run ${JSON.stringify(runId)} is already completed; nothing to resume`
43
+ );
44
+ }
45
+ db.run("UPDATE runs SET status='running', finished_at=NULL WHERE id=?", [runId]);
46
+ }
47
+ function getRun(db, runId) {
48
+ const row = db.query("SELECT * FROM runs WHERE id=?").get(runId);
49
+ return row;
50
+ }
51
+ function listRuns(db, limit = 100) {
52
+ return db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT ?").all(limit);
53
+ }
54
+ function latestRun(db) {
55
+ const row = db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT 1").get();
56
+ return row;
57
+ }
58
+
59
+ export {
60
+ createRun,
61
+ markRunRunning,
62
+ markRunFinished,
63
+ markRunResumed,
64
+ getRun,
65
+ listRuns,
66
+ latestRun
67
+ };