@glrs-dev/cli 0.1.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-builder.md +29 -4
- package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-planner.md +26 -1
- package/dist/vendor/harness-opencode/dist/agents/prompts/research-auto.md +37 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/research-local.md +33 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/research-web.md +32 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/research.md +15 -20
- package/dist/vendor/harness-opencode/dist/chunk-57EOY72Y.js +174 -0
- package/dist/vendor/harness-opencode/dist/chunk-5TAMY7P6.js +67 -0
- package/dist/vendor/harness-opencode/dist/chunk-BKTFWXLG.js +204 -0
- package/dist/vendor/harness-opencode/dist/{chunk-XCZ3NOXR.js → chunk-CZMAJISX.js} +28 -0
- package/dist/vendor/harness-opencode/dist/chunk-KB7M7JXU.js +145 -0
- package/dist/vendor/harness-opencode/dist/chunk-RNRCXQ65.js +56 -0
- package/dist/vendor/harness-opencode/dist/{chunk-VVMP6QWS.js → chunk-WBBN7OVN.js} +162 -2
- package/dist/vendor/harness-opencode/dist/cli.js +964 -1383
- package/dist/vendor/harness-opencode/dist/index.js +2 -2
- package/dist/vendor/harness-opencode/dist/install-X5KEANRB.js +13 -0
- package/dist/vendor/harness-opencode/dist/paths-LT3QQKCF.js +18 -0
- package/dist/vendor/harness-opencode/dist/pilot/mcp/status-server.d.ts +1 -0
- package/dist/vendor/harness-opencode/dist/pilot/mcp/status-server.js +228 -0
- package/dist/vendor/harness-opencode/dist/pilot-config-7LJZ23YK.js +55 -0
- package/dist/vendor/harness-opencode/dist/runs-QWPL3TKV.js +18 -0
- package/dist/vendor/harness-opencode/dist/safety-gate-WM3EWOCY.js +10 -0
- package/dist/vendor/harness-opencode/dist/setup-hook-FHTXMAQL.js +88 -0
- package/dist/vendor/harness-opencode/dist/skills/adr/SKILL.md +328 -0
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/SKILL.md +41 -10
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/decomposition.md +27 -0
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/qa-expectations.md +120 -0
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/self-review.md +1 -1
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/touches-scope.md +34 -0
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/verify-design.md +81 -13
- package/dist/vendor/harness-opencode/dist/tasks-KJ3WN2KY.js +32 -0
- package/dist/vendor/harness-opencode/package.json +1 -1
- package/package.json +1 -1
- package/dist/vendor/harness-opencode/dist/install-4EYR56OR.js +0 -9
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# @glrs-dev/cli
|
|
2
2
|
|
|
3
|
+
## 1.0.0
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#27](https://github.com/iceglober/glrs/pull/27) [`cf74f2d`](https://github.com/iceglober/glrs/commit/cf74f2dca60ee099a92a500d90de1c1886b6aed0) Thanks [@iceglober](https://github.com/iceglober)! - chore(changesets): move @glrs-dev/cli and @glrs-dev/harness-plugin-opencode from `linked` to `fixed`
|
|
8
|
+
|
|
9
|
+
The `linked` group synchronizes versions only among packages that are ALREADY being bumped — it does not force a package into a release. A changeset that named only the harness (as most of our changesets do) would ship a new harness on npm without republishing the CLI, even though the CLI vendors the harness `dist/` at build time (`packages/cli/scripts/vendor-harness.ts`). End users running `glrs oc ...` would keep getting the old vendored harness until somebody remembered to write a no-op CLI changeset.
|
|
10
|
+
|
|
11
|
+
Moving the pair to `fixed` guarantees any harness publish drags the CLI along at a matching version, so a fresh CLI tarball always re-vendors the latest harness `dist/`. The trade-off — CLI-only changesets now also force a no-op harness republish — is cheap because CLI-only changes are rare in this repo.
|
|
12
|
+
|
|
13
|
+
## 0.3.1
|
|
14
|
+
|
|
15
|
+
### Patch Changes
|
|
16
|
+
|
|
17
|
+
- [#19](https://github.com/iceglober/glrs/pull/19) [`6e942c5`](https://github.com/iceglober/glrs/commit/6e942c5099a535a7d1cda161a1bbc1692f937008) Thanks [@iceglober](https://github.com/iceglober)! - Link `@glrs-dev/cli` and `@glrs-dev/harness-plugin-opencode` versions in Changesets config so they always release together. The CLI vendors the harness plugin's `dist/` at build time (via `packages/cli/scripts/vendor-harness.ts`), so plugin fixes don't reach users running `glrs oc install` until a CLI release is cut. Linking the two ensures every harness-plugin bump produces a matching CLI bump, closing the gap where a plugin fix sat on npm without a CLI tarball that bundled it.
|
|
18
|
+
|
|
19
|
+
This bump also forces a CLI republish that vendors `@glrs-dev/harness-plugin-opencode@0.3.0` so users get the recent `glrs oc install` reconfigure fix via `glrs oc install`, not just `glrs-oc install` directly.
|
|
20
|
+
|
|
3
21
|
## 0.1.1
|
|
4
22
|
|
|
5
23
|
### Patch Changes
|
|
@@ -68,12 +68,22 @@ Write the minimal code that makes verify pass:
|
|
|
68
68
|
- Modify existing? Read the surrounding 30 lines first; mirror the existing patterns in indentation, error handling, log format.
|
|
69
69
|
- Add a test? Look at one existing test in the same dir; copy its scaffolding (imports, setup, teardown). Don't invent a new test pattern when the codebase has a strong convention.
|
|
70
70
|
|
|
71
|
-
## 4.
|
|
71
|
+
## 4. Dependency rules — task-level vs environment bootstrap
|
|
72
72
|
|
|
73
|
-
|
|
73
|
+
### 4a. Task-level dependencies still require task approval
|
|
74
|
+
|
|
75
|
+
If `task.prompt` says "add lodash to handle deep merging", install it. If the task is silent on deps, don't add them — find an existing util, write a tiny helper inline, or STOP if the task is genuinely impossible without a dep.
|
|
74
76
|
|
|
75
77
|
`package.json` / `bun.lock` / `Cargo.lock` etc. are typically NOT in your `touches:` scope. Adding a dep when the scope forbids editing the lock file is a touches violation; the worker will catch it.
|
|
76
78
|
|
|
79
|
+
### 4b. Environment bootstrap self-heals during the fix-loop
|
|
80
|
+
|
|
81
|
+
If a verify failure clearly points to an environmental issue — `Cannot find module 'X'` where `X` is a workspace/monorepo dep, `node_modules` absent despite a lockfile committed to the repo, a stale build artifact a typecheck depends on — you ARE expected to run the obvious install command BEFORE giving up with STOP.
|
|
82
|
+
|
|
83
|
+
Recognise these canonical bootstrap commands: `pnpm install`, `bun install`, `npm install`, `npm ci`, `cargo fetch`, `cargo build`.
|
|
84
|
+
|
|
85
|
+
The plugin deny list does not block any of these; they are not task-level dependency additions and they do not require lockfile edits.
|
|
86
|
+
|
|
77
87
|
## 5. When you think you're done, just stop
|
|
78
88
|
|
|
79
89
|
Don't write a "Summary" message. Don't list the files you changed. Don't propose follow-ups. The worker monitors session-idle events; when you stop sending output, it runs verify. If verify passes, the work commits with the message `<task.id>: <task.title>`. If verify fails, you'll get a fix prompt with the failure output verbatim.
|
|
@@ -101,7 +111,22 @@ If the fix prompt names `touchesViolators`: revert your edits to those files. Us
|
|
|
101
111
|
- Plan. The plan is `pilot.yaml`. Each task in it was already designed by the pilot-planner agent. You are not a co-author.
|
|
102
112
|
- Refactor unrelated code. The task names a scope; respect it. If you see a glaring issue elsewhere, ignore it — that's a separate task for the human.
|
|
103
113
|
- Add observability/logging beyond what the task asks for. If the task didn't say "add structured logs", don't add structured logs.
|
|
104
|
-
- Run the verify commands yourself. The worker runs them after you stop. Running them yourself wastes turns and can leave residue (test artifacts, cached state) that messes up the worker's run.
|
|
105
114
|
- Apologize, hedge, or narrate. Each turn is a billable opencode session call; chat preamble buys you nothing.
|
|
115
|
+
- **Write TODO, FIXME, HACK, or XXX comments.** Many repos have pre-commit hooks that reject these annotations. The worker commits your work automatically after verify passes; if the commit is blocked by a hook, the task fails. If you need to note future work, put it in the task's output summary, not in a code comment.
|
|
116
|
+
|
|
117
|
+
# Self-verification — run the tests BEFORE you stop
|
|
118
|
+
|
|
119
|
+
**You SHOULD run the task's verify commands yourself during your work session.** The worker runs them formally after you stop, but you should iterate locally first:
|
|
120
|
+
|
|
121
|
+
1. Write the code.
|
|
122
|
+
2. Run the verify command(s) listed in the task's `verify:` field.
|
|
123
|
+
3. If they fail, fix the code and re-run. Iterate until they pass.
|
|
124
|
+
4. THEN stop.
|
|
125
|
+
|
|
126
|
+
This is faster and cheaper than the worker's retry loop (which requires a full session round-trip per attempt). The worker's formal verify is a gate, not your development loop — arrive at the gate already passing.
|
|
127
|
+
|
|
128
|
+
**How to find the verify commands:** They're in the task kickoff prompt under "Verify commands". Run them exactly as written via bash. They execute in the repo root (cwd).
|
|
129
|
+
|
|
130
|
+
**Exception:** If a verify command requires infrastructure you can't reach (e.g., a running server on a specific port), note that in your output and stop. The worker will handle it.
|
|
106
131
|
|
|
107
|
-
You're a focused, fast, pessimistic implementer. Make the change.
|
|
132
|
+
You're a focused, fast, pessimistic implementer. Make the change. Verify it passes. Stop.
|
|
@@ -45,12 +45,13 @@ Use Serena and grep to map out:
|
|
|
45
45
|
- Existing tests that already cover related code (the verify commands will likely be variations of those).
|
|
46
46
|
- Existing patterns the change should match.
|
|
47
47
|
- Any module boundaries that suggest natural task splits.
|
|
48
|
+
- **Tooling footprint** — lockfiles, docker-compose services, migration tooling, UI/API/DB test frameworks. Understanding these informs your per-surface verify patterns in Section 3.
|
|
48
49
|
|
|
49
50
|
Be thorough here. A planner who shipped a sloppy plan because they only skimmed the codebase wastes hours of pilot-builder time chasing bad scope.
|
|
50
51
|
|
|
51
52
|
## 3. Apply the planning methodology
|
|
52
53
|
|
|
53
|
-
The `pilot-planning` skill carries the
|
|
54
|
+
The `pilot-planning` skill carries the nine rules. Apply them:
|
|
54
55
|
|
|
55
56
|
1. First-principles task framing.
|
|
56
57
|
2. Decomposition into right-sized tasks.
|
|
@@ -60,6 +61,17 @@ The `pilot-planning` skill carries the eight rules. Apply them:
|
|
|
60
61
|
6. Optional milestone grouping.
|
|
61
62
|
7. Self-review.
|
|
62
63
|
8. Per-task `context:` population (rationale, code pointers, acceptance shorthand).
|
|
64
|
+
9. **QA-expectations establishment** — detect per-surface test frameworks and propose concrete verify patterns:
|
|
65
|
+
- **UI**: Playwright, Cypress, or Vitest browser mode for visual/interaction assertions
|
|
66
|
+
- **API**: curl against local endpoints or OpenAPI-based contract tests
|
|
67
|
+
- **DB**: Postgres readiness checks and migration verification (prisma migrate, drizzle-kit push)
|
|
68
|
+
- **Integration**: `test/integration` or `e2e` directory patterns
|
|
69
|
+
- **Browser-based component**: Storybook or Chromatic visual tests
|
|
70
|
+
- **CLI**: bin/ smoke tests or `--help` verification
|
|
71
|
+
|
|
72
|
+
Rule 9 typically involves ONE bundled `question` tool call to the user for QA verify patterns (respecting "talk to the user — once" guidance).
|
|
73
|
+
|
|
74
|
+
Note: The `setup:` field was removed in the cwd-mode rollback. Plans assume the user's dev stack is already running (install, compose, migrate, seed) before `pilot build` is invoked. Remind the user of this at hand-off.
|
|
63
75
|
|
|
64
76
|
## 4. Write the YAML
|
|
65
77
|
|
|
@@ -99,6 +111,17 @@ tasks:
|
|
|
99
111
|
touches:
|
|
100
112
|
- src/api/**
|
|
101
113
|
- test/api/**
|
|
114
|
+
tolerate: # optional — files that may appear in
|
|
115
|
+
# the diff but aren't part of the task's
|
|
116
|
+
# scope (project-specific codegen,
|
|
117
|
+
# framework side-effects beyond the
|
|
118
|
+
# built-in defaults like next-env.d.ts).
|
|
119
|
+
# Common entries: prisma/client/**,
|
|
120
|
+
# graphql/generated/**, schema.graphql.
|
|
121
|
+
# Built-in defaults already cover
|
|
122
|
+
# next-env.d.ts, .next/types/**,
|
|
123
|
+
# *.tsbuildinfo, __snapshots__/**.
|
|
124
|
+
- prisma/client/**
|
|
102
125
|
verify:
|
|
103
126
|
- bun test test/api
|
|
104
127
|
depends_on: [ ] # other task ids
|
|
@@ -139,6 +162,8 @@ Don't elaborate. Don't summarize the plan in chat. The user can read it.
|
|
|
139
162
|
|
|
140
163
|
- **Asking the human to clarify mid-build.** Don't write tasks whose prompts contain things like "ask the user about X". Pilot is unattended. If you don't know X, either ASK NOW (during the planning session) or design the task to discover X via reading code.
|
|
141
164
|
|
|
165
|
+
- **YAML quoting errors in titles/prompts.** If a string contains double quotes, wrap it in single quotes: `title: '"Test rule set" UI + hook'`. If it contains single quotes, use double quotes with escaped inner quotes: `title: "it's a \"test\""`. NEVER write `title: "word" more words` — YAML closes the scalar at the second `"`. Run `pilot validate` after saving; it catches these.
|
|
166
|
+
|
|
142
167
|
# What "done" looks like
|
|
143
168
|
|
|
144
169
|
A plan that:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-auto
|
|
3
|
+
description: Research orchestrator subagent — Autonomous experimentation skill. Agent interviews the user, sets up a lab, then explores freely (think, test, reflect) until stopped or a target is hit. Works for any domain where you can measure or evaluate a result. Use when user says 'optimize this', 'experiment with', 'find the best approach', 'iterate on', 'research mode'. Do NOT use for binary validation tests (use /spec-lab instead). Based on ResearcherSkill v1.4.4 by krzysztofdudek.
|
|
4
|
+
mode: all
|
|
5
|
+
model: anthropic/claude-opus-4-7
|
|
6
|
+
temperature: 0.3
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# @research-auto — Autonomous Experimentation Agent
|
|
10
|
+
|
|
11
|
+
You are the `research-auto` agent. Your job is to run autonomous experiments by following the bundled `research-auto` skill methodology end-to-end.
|
|
12
|
+
|
|
13
|
+
**Research Query:** $ARGUMENTS
|
|
14
|
+
|
|
15
|
+
## Task
|
|
16
|
+
|
|
17
|
+
1. Read the bundled `research-auto` skill via the Skill tool
|
|
18
|
+
2. Follow every instruction in the skill exactly
|
|
19
|
+
3. Execute the full experimentation workflow from discovery through conclusion
|
|
20
|
+
|
|
21
|
+
## Notes on Experiment Commands
|
|
22
|
+
|
|
23
|
+
This agent may run arbitrary user-supplied commands as part of experiments. The `.lab/` directory is used for scratch writes and experiment tracking. These are expected behaviors per the skill methodology.
|
|
24
|
+
|
|
25
|
+
## PRIME-Delegation Brief Contract
|
|
26
|
+
|
|
27
|
+
When PRIME passes a brief via task tool:
|
|
28
|
+
- Trust the brief. The task-tool arguments ARE the research query — proceed directly.
|
|
29
|
+
- Do not re-interview on points already resolved in the brief.
|
|
30
|
+
- If the brief lacks critical context (e.g., no query provided), ask once then proceed.
|
|
31
|
+
|
|
32
|
+
## STOP — Do Not
|
|
33
|
+
|
|
34
|
+
- Do NOT experiment directly without following the skill methodology
|
|
35
|
+
- Do NOT skip the discovery phase — it is mandatory
|
|
36
|
+
- Do NOT skip the commit-before-run guardrail — it is mandatory
|
|
37
|
+
- Do NOT exceed 3 rounds without presenting — MAX 3 ROUNDS, THEN PRESENT
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-local
|
|
3
|
+
description: Research orchestrator subagent — Deep codebase research using parallel Explore subagents. Decomposes a question about the local codebase into research tasks, launches parallel explorations, reviews for gaps, iterates, and synthesizes findings with specific file paths and line numbers. Use when user says 'how does X work in this codebase', 'where is Y implemented', 'trace the data flow for Z', 'what patterns does this repo use', 'explain the architecture of'. Provide the research topic as arguments.
|
|
4
|
+
mode: all
|
|
5
|
+
model: anthropic/claude-opus-4-7
|
|
6
|
+
temperature: 0.3
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# @research-local — Codebase Research Agent
|
|
10
|
+
|
|
11
|
+
You are the `research-local` agent. Your job is to execute deep codebase research by following the bundled `research-local` skill methodology end-to-end. Scope is local codebase ONLY — no web research.
|
|
12
|
+
|
|
13
|
+
**Research Query:** $ARGUMENTS
|
|
14
|
+
|
|
15
|
+
## Task
|
|
16
|
+
|
|
17
|
+
1. Read the bundled `research-local` skill via the Skill tool
|
|
18
|
+
2. Follow every instruction in the skill exactly
|
|
19
|
+
3. Execute the full research workflow from decomposition through synthesis
|
|
20
|
+
|
|
21
|
+
## PRIME-Delegation Brief Contract
|
|
22
|
+
|
|
23
|
+
When PRIME passes a brief via task tool:
|
|
24
|
+
- Trust the brief. The task-tool arguments ARE the research query — proceed directly.
|
|
25
|
+
- Do not re-interview on points already resolved in the brief.
|
|
26
|
+
- If the brief lacks critical context (e.g., no query provided), ask once then proceed.
|
|
27
|
+
|
|
28
|
+
## STOP — Do Not
|
|
29
|
+
|
|
30
|
+
- Do NOT research directly — always follow the research-local skill methodology
|
|
31
|
+
- Do NOT use exploration tools yourself — every phase is a subagent
|
|
32
|
+
- Do NOT skip the decomposition phase — it is mandatory
|
|
33
|
+
- Do NOT synthesize findings yourself — synthesis is a subagent
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: research-web
|
|
3
|
+
description: Research orchestrator subagent — Multi-agent web research orchestrator. Decomposes a research question into parallel agent workstreams, launches them, monitors progress, and synthesizes results. Use when user says 'research this topic', 'I need to understand', 'deep dive into', 'investigate the market for', 'what do we know about'. Provide the research topic and context.
|
|
4
|
+
mode: all
|
|
5
|
+
model: anthropic/claude-opus-4-7
|
|
6
|
+
temperature: 0.3
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# @research-web — Web Research Agent
|
|
10
|
+
|
|
11
|
+
You are the `research-web` agent. Your job is to execute web research by following the bundled `research-web` skill methodology end-to-end.
|
|
12
|
+
|
|
13
|
+
**Research Query:** $ARGUMENTS
|
|
14
|
+
|
|
15
|
+
## Task
|
|
16
|
+
|
|
17
|
+
1. Read the bundled `research-web` skill via the Skill tool
|
|
18
|
+
2. Follow every instruction in the skill exactly
|
|
19
|
+
3. Execute the full research workflow from planning through synthesis
|
|
20
|
+
|
|
21
|
+
## PRIME-Delegation Brief Contract
|
|
22
|
+
|
|
23
|
+
When PRIME passes a brief via task tool:
|
|
24
|
+
- Trust the brief. The task-tool arguments ARE the research query — proceed directly.
|
|
25
|
+
- Do not re-interview on points already resolved in the brief.
|
|
26
|
+
- If the brief lacks critical context (e.g., no query provided), ask once then proceed.
|
|
27
|
+
|
|
28
|
+
## STOP — Do Not
|
|
29
|
+
|
|
30
|
+
- Do NOT research directly — always follow the research-web skill methodology
|
|
31
|
+
- Do NOT skip the planning phase — it is mandatory
|
|
32
|
+
- Do NOT launch agents sequentially — dispatch all independent workstreams in ONE message
|
|
@@ -22,30 +22,25 @@ You are an **orchestrator only**. You do NOT:
|
|
|
22
22
|
|
|
23
23
|
Every cognitive task is a subagent. You launch subagents and pass their outputs to other subagents.
|
|
24
24
|
|
|
25
|
-
## How to Invoke
|
|
25
|
+
## How to Invoke Research Agents
|
|
26
26
|
|
|
27
|
-
The four research
|
|
27
|
+
The four research agents are available:
|
|
28
28
|
|
|
29
|
-
1.
|
|
30
|
-
2.
|
|
31
|
-
3.
|
|
32
|
-
4.
|
|
29
|
+
1. **`@research`** (this agent) — umbrella orchestrator for multi-workstream research
|
|
30
|
+
2. **`@research-local`** — deep codebase research using parallel Explore subagents
|
|
31
|
+
3. **`@research-web`** — multi-agent web research with skeleton-file pattern
|
|
32
|
+
4. **`@research-auto`** — autonomous experimentation with `.lab/` directory
|
|
33
33
|
|
|
34
|
-
**To
|
|
34
|
+
**To dispatch a research subagent:** Use the task tool with the agent name and pass the sub-question as the prompt:
|
|
35
35
|
|
|
36
36
|
```
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
## Research Query
|
|
41
|
-
{the full query or sub-question}
|
|
42
|
-
|
|
43
|
-
## Task
|
|
44
|
-
1. Read the bundled {skill-name} skill via the Skill tool and follow every instruction
|
|
45
|
-
2. Focus specifically on: {sub-question}
|
|
46
|
-
3. Report back with your complete findings"
|
|
37
|
+
task tool:
|
|
38
|
+
agent: "research-web"
|
|
39
|
+
prompt: "Research the competitive landscape for X. Focus on: {specific angle}."
|
|
47
40
|
```
|
|
48
41
|
|
|
42
|
+
The research agents are thin shims that load their matching bundled skill and follow it end-to-end. Trust the brief — the task-tool arguments ARE the research query.
|
|
43
|
+
|
|
49
44
|
## 7-Phase Flow
|
|
50
45
|
|
|
51
46
|
### Phase 1: Plan — Subagent
|
|
@@ -77,9 +72,9 @@ Output 3-6 workstreams. Mark dependencies explicitly."
|
|
|
77
72
|
|
|
78
73
|
Dispatch **one Agent per workstream**. Launch ALL independent workstreams in a SINGLE message.
|
|
79
74
|
|
|
80
|
-
For LOCAL workstreams:
|
|
81
|
-
For WEB workstreams:
|
|
82
|
-
For AUTO workstreams:
|
|
75
|
+
For LOCAL workstreams: dispatch `@research-local` via task tool.
|
|
76
|
+
For WEB workstreams: dispatch `@research-web` via task tool.
|
|
77
|
+
For AUTO workstreams: dispatch `@research-auto` via task tool.
|
|
83
78
|
|
|
84
79
|
### Phase 3: Review Round 1 — Subagent
|
|
85
80
|
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
// src/pilot/state/tasks.ts
|
|
2
|
+
function upsertFromPlan(db, runId, plan) {
|
|
3
|
+
const stmt = db.prepare(
|
|
4
|
+
`INSERT OR IGNORE INTO tasks (run_id, task_id, status) VALUES (?, ?, 'pending')`
|
|
5
|
+
);
|
|
6
|
+
const tx = db.transaction(() => {
|
|
7
|
+
for (const t of plan.tasks) {
|
|
8
|
+
stmt.run(runId, t.id);
|
|
9
|
+
}
|
|
10
|
+
});
|
|
11
|
+
tx();
|
|
12
|
+
}
|
|
13
|
+
function markReady(db, runId, taskId) {
|
|
14
|
+
requireStatus(db, runId, taskId, ["pending"], "ready");
|
|
15
|
+
db.run(
|
|
16
|
+
"UPDATE tasks SET status='ready' WHERE run_id=? AND task_id=?",
|
|
17
|
+
[runId, taskId]
|
|
18
|
+
);
|
|
19
|
+
}
|
|
20
|
+
function markRunning(db, args) {
|
|
21
|
+
requireStatus(db, args.runId, args.taskId, ["ready"], "running");
|
|
22
|
+
const now = args.now ?? Date.now();
|
|
23
|
+
db.run(
|
|
24
|
+
`UPDATE tasks
|
|
25
|
+
SET status='running',
|
|
26
|
+
attempts = attempts + 1,
|
|
27
|
+
session_id = ?,
|
|
28
|
+
branch = ?,
|
|
29
|
+
worktree_path = ?,
|
|
30
|
+
started_at = COALESCE(started_at, ?)
|
|
31
|
+
WHERE run_id=? AND task_id=?`,
|
|
32
|
+
[args.sessionId, args.branch, args.worktreePath, now, args.runId, args.taskId]
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
function markSucceeded(db, runId, taskId, now = Date.now()) {
|
|
36
|
+
requireStatus(db, runId, taskId, ["running"], "succeeded");
|
|
37
|
+
db.run(
|
|
38
|
+
`UPDATE tasks
|
|
39
|
+
SET status='succeeded', finished_at=?, last_error=NULL
|
|
40
|
+
WHERE run_id=? AND task_id=?`,
|
|
41
|
+
[now, runId, taskId]
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
function markFailed(db, runId, taskId, reason, now = Date.now()) {
|
|
45
|
+
requireStatus(db, runId, taskId, ["running", "ready"], "failed");
|
|
46
|
+
db.run(
|
|
47
|
+
`UPDATE tasks
|
|
48
|
+
SET status='failed', finished_at=?, last_error=?
|
|
49
|
+
WHERE run_id=? AND task_id=?`,
|
|
50
|
+
[now, reason, runId, taskId]
|
|
51
|
+
);
|
|
52
|
+
}
|
|
53
|
+
function markBlocked(db, runId, taskId, reason) {
|
|
54
|
+
requireStatus(db, runId, taskId, ["pending", "ready"], "blocked");
|
|
55
|
+
db.run(
|
|
56
|
+
`UPDATE tasks
|
|
57
|
+
SET status='blocked', last_error=?
|
|
58
|
+
WHERE run_id=? AND task_id=?`,
|
|
59
|
+
[reason, runId, taskId]
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
function markAborted(db, runId, taskId, reason, now = Date.now()) {
|
|
63
|
+
requireStatus(db, runId, taskId, ["running", "ready"], "aborted");
|
|
64
|
+
db.run(
|
|
65
|
+
`UPDATE tasks
|
|
66
|
+
SET status='aborted', finished_at=?, last_error=?
|
|
67
|
+
WHERE run_id=? AND task_id=?`,
|
|
68
|
+
[now, reason, runId, taskId]
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
function markPending(db, runId, taskId) {
|
|
72
|
+
const cur = getTask(db, runId, taskId);
|
|
73
|
+
if (!cur) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
`markPending: task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
db.run(
|
|
79
|
+
`UPDATE tasks
|
|
80
|
+
SET status='pending',
|
|
81
|
+
session_id=NULL,
|
|
82
|
+
branch=NULL,
|
|
83
|
+
worktree_path=NULL,
|
|
84
|
+
started_at=NULL,
|
|
85
|
+
finished_at=NULL,
|
|
86
|
+
last_error=NULL
|
|
87
|
+
WHERE run_id=? AND task_id=?`,
|
|
88
|
+
[runId, taskId]
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
function setCostUsd(db, runId, taskId, costUsd) {
|
|
92
|
+
if (!Number.isFinite(costUsd) || costUsd < 0) {
|
|
93
|
+
throw new RangeError(`setCostUsd: invalid cost ${costUsd}`);
|
|
94
|
+
}
|
|
95
|
+
db.run(
|
|
96
|
+
"UPDATE tasks SET cost_usd=? WHERE run_id=? AND task_id=?",
|
|
97
|
+
[costUsd, runId, taskId]
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
function getTask(db, runId, taskId) {
|
|
101
|
+
return db.query("SELECT * FROM tasks WHERE run_id=? AND task_id=?").get(runId, taskId);
|
|
102
|
+
}
|
|
103
|
+
function listTasks(db, runId) {
|
|
104
|
+
return db.query("SELECT * FROM tasks WHERE run_id=? ORDER BY task_id").all(runId);
|
|
105
|
+
}
|
|
106
|
+
function readyTasks(db, runId) {
|
|
107
|
+
return db.query("SELECT * FROM tasks WHERE run_id=? AND status='ready' ORDER BY task_id").all(runId);
|
|
108
|
+
}
|
|
109
|
+
function countByStatus(db, runId) {
|
|
110
|
+
const rows = db.query("SELECT status, COUNT(*) as n FROM tasks WHERE run_id=? GROUP BY status").all(runId);
|
|
111
|
+
const out = {
|
|
112
|
+
pending: 0,
|
|
113
|
+
ready: 0,
|
|
114
|
+
running: 0,
|
|
115
|
+
succeeded: 0,
|
|
116
|
+
failed: 0,
|
|
117
|
+
blocked: 0,
|
|
118
|
+
aborted: 0
|
|
119
|
+
};
|
|
120
|
+
for (const r of rows) out[r.status] = r.n;
|
|
121
|
+
return out;
|
|
122
|
+
}
|
|
123
|
+
function resetTasksForResume(db, runId) {
|
|
124
|
+
const rows = listTasks(db, runId);
|
|
125
|
+
const resettable = rows.filter((r) => r.status !== "succeeded");
|
|
126
|
+
if (resettable.length === 0) return [];
|
|
127
|
+
const stmt = db.prepare(
|
|
128
|
+
`UPDATE tasks
|
|
129
|
+
SET status='pending',
|
|
130
|
+
attempts=0,
|
|
131
|
+
session_id=NULL,
|
|
132
|
+
last_error=NULL,
|
|
133
|
+
started_at=NULL,
|
|
134
|
+
finished_at=NULL,
|
|
135
|
+
branch=NULL,
|
|
136
|
+
worktree_path=NULL
|
|
137
|
+
WHERE run_id=? AND task_id=? AND status != 'succeeded'`
|
|
138
|
+
);
|
|
139
|
+
const tx = db.transaction(() => {
|
|
140
|
+
for (const r of resettable) stmt.run(runId, r.task_id);
|
|
141
|
+
});
|
|
142
|
+
tx();
|
|
143
|
+
return resettable.map((r) => r.task_id);
|
|
144
|
+
}
|
|
145
|
+
function requireStatus(db, runId, taskId, expected, intended) {
|
|
146
|
+
const row = getTask(db, runId, taskId);
|
|
147
|
+
if (!row) {
|
|
148
|
+
throw new Error(
|
|
149
|
+
`task ${JSON.stringify(taskId)} not found in run ${JSON.stringify(runId)}`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
if (!expected.includes(row.status)) {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`cannot move task ${JSON.stringify(taskId)} from ${row.status} to ${intended} (expected one of: ${expected.join(", ")})`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
export {
|
|
160
|
+
upsertFromPlan,
|
|
161
|
+
markReady,
|
|
162
|
+
markRunning,
|
|
163
|
+
markSucceeded,
|
|
164
|
+
markFailed,
|
|
165
|
+
markBlocked,
|
|
166
|
+
markAborted,
|
|
167
|
+
markPending,
|
|
168
|
+
setCostUsd,
|
|
169
|
+
getTask,
|
|
170
|
+
listTasks,
|
|
171
|
+
readyTasks,
|
|
172
|
+
countByStatus,
|
|
173
|
+
resetTasksForResume
|
|
174
|
+
};
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// src/pilot/state/runs.ts
|
|
2
|
+
import { ulid } from "ulid";
|
|
3
|
+
function createRun(db, args) {
|
|
4
|
+
const id = ulid();
|
|
5
|
+
const now = args.now ?? Date.now();
|
|
6
|
+
db.run(
|
|
7
|
+
`INSERT INTO runs (id, plan_path, plan_slug, started_at, status)
|
|
8
|
+
VALUES (?, ?, ?, ?, 'pending')`,
|
|
9
|
+
[id, args.planPath, args.slug, now]
|
|
10
|
+
);
|
|
11
|
+
void args.plan;
|
|
12
|
+
return id;
|
|
13
|
+
}
|
|
14
|
+
function markRunRunning(db, runId) {
|
|
15
|
+
const cur = getRun(db, runId);
|
|
16
|
+
if (!cur) throw new Error(`markRunRunning: run ${JSON.stringify(runId)} not found`);
|
|
17
|
+
if (cur.status === "running") return;
|
|
18
|
+
if (cur.status !== "pending") {
|
|
19
|
+
throw new Error(
|
|
20
|
+
`markRunRunning: cannot move run ${JSON.stringify(runId)} from ${cur.status} to running`
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
db.run("UPDATE runs SET status='running' WHERE id=?", [runId]);
|
|
24
|
+
}
|
|
25
|
+
function markRunFinished(db, runId, status, now = Date.now()) {
|
|
26
|
+
if (status !== "completed" && status !== "aborted" && status !== "failed") {
|
|
27
|
+
throw new Error(
|
|
28
|
+
`markRunFinished: ${JSON.stringify(status)} is not a terminal status`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
const cur = getRun(db, runId);
|
|
32
|
+
if (!cur) {
|
|
33
|
+
throw new Error(`markRunFinished: run ${JSON.stringify(runId)} not found`);
|
|
34
|
+
}
|
|
35
|
+
db.run("UPDATE runs SET status=?, finished_at=? WHERE id=?", [status, now, runId]);
|
|
36
|
+
}
|
|
37
|
+
function markRunResumed(db, runId) {
|
|
38
|
+
const cur = getRun(db, runId);
|
|
39
|
+
if (!cur) throw new Error(`markRunResumed: run ${JSON.stringify(runId)} not found`);
|
|
40
|
+
if (cur.status === "completed") {
|
|
41
|
+
throw new Error(
|
|
42
|
+
`markRunResumed: run ${JSON.stringify(runId)} is already completed; nothing to resume`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
db.run("UPDATE runs SET status='running', finished_at=NULL WHERE id=?", [runId]);
|
|
46
|
+
}
|
|
47
|
+
function getRun(db, runId) {
|
|
48
|
+
const row = db.query("SELECT * FROM runs WHERE id=?").get(runId);
|
|
49
|
+
return row;
|
|
50
|
+
}
|
|
51
|
+
function listRuns(db, limit = 100) {
|
|
52
|
+
return db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT ?").all(limit);
|
|
53
|
+
}
|
|
54
|
+
function latestRun(db) {
|
|
55
|
+
const row = db.query("SELECT * FROM runs ORDER BY started_at DESC LIMIT 1").get();
|
|
56
|
+
return row;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export {
|
|
60
|
+
createRun,
|
|
61
|
+
markRunRunning,
|
|
62
|
+
markRunFinished,
|
|
63
|
+
markRunResumed,
|
|
64
|
+
getRun,
|
|
65
|
+
listRuns,
|
|
66
|
+
latestRun
|
|
67
|
+
};
|