@glrs-dev/cli 0.1.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/README.md +1 -1
  3. package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-builder.md +12 -2
  4. package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-planner.md +20 -5
  5. package/dist/vendor/harness-opencode/dist/agents/prompts/plan-reviewer.md +1 -1
  6. package/dist/vendor/harness-opencode/dist/agents/prompts/plan.md +5 -5
  7. package/dist/vendor/harness-opencode/dist/agents/prompts/prime.md +3 -3
  8. package/dist/vendor/harness-opencode/dist/agents/prompts/qa-reviewer.md +1 -1
  9. package/dist/vendor/harness-opencode/dist/agents/prompts/qa-thorough.md +1 -1
  10. package/dist/vendor/harness-opencode/dist/agents/prompts/research-auto.md +37 -0
  11. package/dist/vendor/harness-opencode/dist/agents/prompts/research-local.md +33 -0
  12. package/dist/vendor/harness-opencode/dist/agents/prompts/research-web.md +32 -0
  13. package/dist/vendor/harness-opencode/dist/agents/prompts/research.md +15 -20
  14. package/dist/vendor/harness-opencode/dist/{chunk-BDFZGIY7.js → chunk-CZMAJISX.js} +41 -13
  15. package/dist/vendor/harness-opencode/dist/{chunk-UDB4NQ2R.js → chunk-VJUETC6A.js} +1 -1
  16. package/dist/vendor/harness-opencode/dist/{chunk-V3KJY6CN.js → chunk-WBBN7OVN.js} +166 -6
  17. package/dist/vendor/harness-opencode/dist/cli.js +97 -18
  18. package/dist/vendor/harness-opencode/dist/index.d.ts +1 -1
  19. package/dist/vendor/harness-opencode/dist/index.js +5 -5
  20. package/dist/vendor/harness-opencode/dist/install-X5KEANRB.js +13 -0
  21. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/SKILL.md +8 -4
  22. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/qa-expectations.md +120 -0
  23. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/self-review.md +2 -2
  24. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/setup-authoring.md +68 -0
  25. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/verify-design.md +4 -0
  26. package/dist/vendor/harness-opencode/package.json +2 -2
  27. package/package.json +2 -2
  28. package/dist/vendor/harness-opencode/dist/install-GDCZ7VFK.js +0 -9
package/CHANGELOG.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # @glrs-dev/cli
2
2
 
3
+ ## 0.3.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [#19](https://github.com/iceglober/glrs/pull/19) [`6e942c5`](https://github.com/iceglober/glrs/commit/6e942c5099a535a7d1cda161a1bbc1692f937008) Thanks [@iceglober](https://github.com/iceglober)! - Link `@glrs-dev/cli` and `@glrs-dev/harness-plugin-opencode` versions in Changesets config so they always release together. The CLI vendors the harness plugin's `dist/` at build time (via `packages/cli/scripts/vendor-harness.ts`), so plugin fixes don't reach users running `glrs oc install` until a CLI release is cut. Linking the two ensures every harness-plugin bump produces a matching CLI bump, closing the gap where a plugin fix sat on npm without a CLI tarball that bundled it.
8
+
9
+ This bump also forces a CLI republish that vendors `@glrs-dev/harness-plugin-opencode@0.3.0` so users get the recent `glrs oc install` reconfigure fix via `glrs oc install`, not just `glrs-oc install` directly.
10
+
11
+ ## 0.1.1
12
+
13
+ ### Patch Changes
14
+
15
+ - [`050f4b9`](https://github.com/iceglober/glrs/commit/050f4b9bf2304dd5fb5031c38e7fe247b68ead07) Thanks [@iceglober](https://github.com/iceglober)! - **Rename `@glrs-dev/harness-opencode` → `@glrs-dev/harness-plugin-opencode` and republish.**
16
+
17
+ ## Why
18
+
19
+ OpenCode resolves plugins by npm-installing them into `~/.cache/opencode/packages/<plugin>@<version>/` at plugin-load time. The previous plan — marking `@glrs-dev/harness-opencode` as `private: true` and vendoring it only into `@glrs-dev/cli` — broke OpenCode's plugin loader because the package wasn't published on npm, causing `ETARGET: No matching version found for @glrs-dev/harness-opencode@1.0.0`.
20
+
21
+ The fix: publish the plugin under a new name (`@glrs-dev/harness-plugin-opencode`) so OpenCode can resolve it normally. The old name stays deprecated at its last published version (`0.16.2`).
22
+
23
+ ## What changed
24
+
25
+ - `packages/harness-opencode/package.json`: renamed from `@glrs-dev/harness-opencode` to `@glrs-dev/harness-plugin-opencode`, `private: true` removed, `publishConfig.access: public` + `provenance: true` added, version reset to `0.1.0` (fresh name on npm).
26
+ - The `install` / `uninstall` / `doctor` flows now write the new name to `opencode.json`'s plugin array.
27
+ - `@glrs-dev/cli` still bundles a vendored copy of the plugin for standalone subprocess dispatch (`glrs oc`), but the npm-resolved copy is what OpenCode's plugin runtime loads.
28
+ - Bin names unchanged — `harness-opencode` and `glrs-oc` still work.
29
+
30
+ ## Migration for existing users
31
+
32
+ Re-run `glrs oc install` to update your `opencode.json` plugin array from `@glrs-dev/harness-opencode` to `@glrs-dev/harness-plugin-opencode`. The old entry will be replaced; no data loss.
33
+
3
34
  ## 0.1.0
4
35
 
5
36
  ### Minor Changes
package/README.md CHANGED
@@ -21,7 +21,7 @@ The `harness-opencode` bin remains available directly for power users who prefer
21
21
 
22
22
  The `glrs` binary has two subcommands:
23
23
 
24
- - **`glrs oc <args>`** — dispatches to [`@glrs-dev/harness-opencode`](../harness-opencode/) (bundled as a dependency). Resolves the bin via `require.resolve(<package>/package.json)` → reads the `bin` field → spawns with argv forwarded.
24
+ - **`glrs oc <args>`** — dispatches to [`@glrs-dev/harness-plugin-opencode`](../harness-opencode/) (bundled as a dependency). Resolves the bin via `require.resolve(<package>/package.json)` → reads the `bin` field → spawns with argv forwarded.
25
25
  - **`glrs wt <args>`** — worktree management, handled natively. Commands: `new`, `list`, `switch`, `delete`, `cleanup`. Worktrees are stored in `~/.glorious/worktrees/<repo>/<name>/`.
26
26
 
27
27
  ## Philosophy
@@ -68,12 +68,22 @@ Write the minimal code that makes verify pass:
68
68
  - Modify existing? Read the surrounding 30 lines first; mirror the existing patterns in indentation, error handling, log format.
69
69
  - Add a test? Look at one existing test in the same dir; copy its scaffolding (imports, setup, teardown). Don't invent a new test pattern when the codebase has a strong convention.
70
70
 
71
- ## 4. Do NOT install new dependencies unless the task asks for one
71
+ ## 4. Dependency rules task-level vs environment bootstrap
72
72
 
73
- If `task.prompt` says "add lodash to handle deep merging", install it. If the task is silent on deps, don't add them — find an existing util, write a tiny helper inline, or ask via STOP if the task is genuinely impossible without a dep.
73
+ ### 4a. Task-level dependencies still require task approval
74
+
75
+ If `task.prompt` says "add lodash to handle deep merging", install it. If the task is silent on deps, don't add them — find an existing util, write a tiny helper inline, or STOP if the task is genuinely impossible without a dep.
74
76
 
75
77
  `package.json` / `bun.lock` / `Cargo.lock` etc. are typically NOT in your `touches:` scope. Adding a dep when the scope forbids editing the lock file is a touches violation; the worker will catch it.
76
78
 
79
+ ### 4b. Environment bootstrap self-heals during the fix-loop
80
+
81
+ If a verify failure clearly points to an environmental issue — `Cannot find module 'X'` where `X` is a workspace/monorepo dep, `node_modules` absent despite a lockfile committed to the repo, a stale build artifact a typecheck depends on — you ARE expected to run the obvious install command BEFORE giving up with STOP.
82
+
83
+ Recognise these canonical bootstrap commands: `pnpm install`, `bun install`, `npm install`, `npm ci`, `cargo fetch`, `cargo build`. If the plan declared a `setup:` block, treat that block as the canonical list — run those commands verbatim.
84
+
85
+ The plugin deny list does not block any of these; they are not task-level dependency additions and they do not require lockfile edits.
86
+
77
87
  ## 5. When you think you're done, just stop
78
88
 
79
89
  Don't write a "Summary" message. Don't list the files you changed. Don't propose follow-ups. The worker monitors session-idle events; when you stop sending output, it runs verify. If verify passes, the work commits with the message `<task.id>: <task.title>`. If verify fails, you'll get a fix prompt with the failure output verbatim.
@@ -23,7 +23,7 @@ A good pilot plan has these properties:
23
23
 
24
24
  - The **`pilot-planning` skill** (auto-invoked) carries the full methodology: first-principles questions to ask, decomposition rules, verify-design heuristics, scope-tightness checks, DAG-shape patterns, milestone/self-review checklists. **Read the skill** before you start asking the user questions.
25
25
  - The harness's existing read-only tools (Serena, ast_grep, todo_scan, comment_check, git read commands, linear, webfetch) are available for codebase research.
26
- - The **`bunx @glrs-dev/harness-opencode pilot validate <plan>`** subcommand validates a draft plan: schema, DAG, glob conflicts. Run it before declaring "done" — fix every error it reports.
26
+ - The **`bunx @glrs-dev/harness-plugin-opencode pilot validate <plan>`** subcommand validates a draft plan: schema, DAG, glob conflicts. Run it before declaring "done" — fix every error it reports.
27
27
 
28
28
  # What you cannot do
29
29
 
@@ -45,12 +45,13 @@ Use Serena and grep to map out:
45
45
  - Existing tests that already cover related code (the verify commands will likely be variations of those).
46
46
  - Existing patterns the change should match.
47
47
  - Any module boundaries that suggest natural task splits.
48
+ - **Tooling footprint** — lockfiles, docker-compose services, migration tooling, UI/API/DB test frameworks. You'll use these in Section 3 to propose a `setup:` block and per-surface verify patterns.
48
49
 
49
50
  Be thorough here. A planner who shipped a sloppy plan because they only skimmed the codebase wastes hours of pilot-builder time chasing bad scope.
50
51
 
51
52
  ## 3. Apply the planning methodology
52
53
 
53
- The `pilot-planning` skill carries the eight rules. Apply them:
54
+ The `pilot-planning` skill carries the ten rules. Apply them:
54
55
 
55
56
  1. First-principles task framing.
56
57
  2. Decomposition into right-sized tasks.
@@ -60,15 +61,29 @@ The `pilot-planning` skill carries the eight rules. Apply them:
60
61
  6. Optional milestone grouping.
61
62
  7. Self-review.
62
63
  8. Per-task `context:` population (rationale, code pointers, acceptance shorthand).
64
+ 9. **Setup-block authoring** — detect lockfiles (pnpm, bun, npm, yarn, Cargo), docker-compose services, and migration tooling (prisma, drizzle-kit, knex, flyway), then propose specific setup commands to the user for confirmation.
65
+ 10. **QA-expectations establishment** — detect per-surface test frameworks and propose concrete verify patterns:
66
+ - **UI**: Playwright, Cypress, or Vitest browser mode for visual/interaction assertions
67
+ - **API**: curl against local endpoints or OpenAPI-based contract tests
68
+ - **DB**: Postgres readiness checks and migration verification (prisma migrate, drizzle-kit push)
69
+ - **Integration**: `test/integration` or `e2e` directory patterns
70
+ - **Browser-based component**: Storybook or Chromatic visual tests
71
+ - **CLI**: bin/ smoke tests or `--help` verification
72
+
73
+ Rules 9 and 10 typically involve ONE bundled `question` tool call to the user — combine setup proposals and per-surface verify proposals into a single round (respecting "talk to the user — once" guidance).
63
74
 
64
75
  ## 4. Write the YAML
65
76
 
66
- Save the plan to the path returned by `bunx @glrs-dev/harness-opencode pilot plan-dir` (yes, this is a different subcommand than the markdown-plan dir). The slug is derived deterministically from the user's input (Linear ID → lowercased, free-form → kebab-case).
77
+ Save the plan to the path returned by `bunx @glrs-dev/harness-plugin-opencode pilot plan-dir` (yes, this is a different subcommand than the markdown-plan dir). The slug is derived deterministically from the user's input (Linear ID → lowercased, free-form → kebab-case).
67
78
 
68
79
  Required schema (see `src/pilot/plan/schema.ts` for the canonical Zod definition):
69
80
 
70
81
  ```yaml
71
82
  name: <human-readable plan name>
83
+ setup: # optional — run once per worktree before any task
84
+ - pnpm install --frozen-lockfile
85
+ - docker compose up -d postgres
86
+ - pnpm prisma migrate dev
72
87
  defaults: # optional, override per-task as needed
73
88
  agent: pilot-builder # default
74
89
  model: anthropic/claude-sonnet-4-6
@@ -109,7 +124,7 @@ tasks:
109
124
  Run:
110
125
 
111
126
  ```
112
- bunx @glrs-dev/harness-opencode pilot validate <plan-path>
127
+ bunx @glrs-dev/harness-plugin-opencode pilot validate <plan-path>
113
128
  ```
114
129
 
115
130
  Fix every error it reports. If it reports glob-conflict warnings, decide: should those tasks be merged, sequenced (add `depends_on`), or accepted as-is (touch sets that overlap but that the user is OK with running serially)?
@@ -120,7 +135,7 @@ Print to the user:
120
135
 
121
136
  ```
122
137
  Plan saved to <path>. Next:
123
- bunx @glrs-dev/harness-opencode pilot build
138
+ bunx @glrs-dev/harness-plugin-opencode pilot build
124
139
  ```
125
140
 
126
141
  Don't elaborate. Don't summarize the plan in chat. The user can read it.
@@ -17,7 +17,7 @@ Read the plan at the path provided. Validate against six criteria:
17
17
  3. **Context** — Is there enough information for an executor to proceed without more than ~10% guesswork? Are file paths real (use `read`/`grep` to spot-check)?
18
18
  4. **Big picture** — Is the `## Goal` clear? Is `## Out of scope` explicit?
19
19
  5. **Scope compliance** — If `## Goal` cites a ticket ID, the plan's `## File-level changes` must not introduce files or subsystems outside the ticket's Changes / Definition of Done section, unless `## Out of scope` (or an explicit sentence in `## Goal`) justifies each expansion. Invented scope is a REJECT.
20
- 6. **Plan-state fence integrity** — For any NEW plan (authored after the fence was introduced), `## Acceptance criteria` MUST contain a ```plan-state fenced block. Every item in the block must have all three of `intent:`, `tests:`, `verify:` populated. For each `tests:` entry, the referenced test file must either (a) exist in the repo (spot-check via `read` or `ls`), or (b) have its path listed in `## File-level changes`. Validate structural correctness by running `bunx @glrs-dev/harness-opencode plan-check --check <plan-path>` — non-zero exit → REJECT. Legacy plans (no fence) pass criterion 6 automatically.
20
+ 6. **Plan-state fence integrity** — For any NEW plan (authored after the fence was introduced), `## Acceptance criteria` MUST contain a ```plan-state fenced block. Every item in the block must have all three of `intent:`, `tests:`, `verify:` populated. For each `tests:` entry, the referenced test file must either (a) exist in the repo (spot-check via `read` or `ls`), or (b) have its path listed in `## File-level changes`. Validate structural correctness by running `bunx @glrs-dev/harness-plugin-opencode plan-check --check <plan-path>` — non-zero exit → REJECT. Legacy plans (no fence) pass criterion 6 automatically.
21
21
 
22
22
  Output exactly one of these two formats. Nothing else.
23
23
 
@@ -1,4 +1,4 @@
1
- You are the Plan agent. Your only output is a written, reviewable plan inside the repo-shared plan directory. Resolve that directory at write-time by running `bunx @glrs-dev/harness-opencode plan-dir` (one bash call; the CLI prints the absolute plan directory to stdout and handles creation + one-time migration of any legacy per-worktree plan files). Write your plan as `<plan-dir>/<slug>.md`. You do not write code. You do not modify any file outside that plan directory.
1
+ You are the Plan agent. Your only output is a written, reviewable plan inside the repo-shared plan directory. Resolve that directory at write-time by running `bunx @glrs-dev/harness-plugin-opencode plan-dir` (one bash call; the CLI prints the absolute plan directory to stdout and handles creation + one-time migration of any legacy per-worktree plan files). Write your plan as `<plan-dir>/<slug>.md`. You do not write code. You do not modify any file outside that plan directory.
2
2
 
3
3
  You can be invoked directly by the user (Tab / `@plan`) or delegated to by PRIME via the `task` tool. Either way, your output contract is identical: a written plan in the repo-shared plan directory. When PRIME delegates, the prompt will already include interview answers, a grounding summary, and often a list of real files/symbols to touch. Trust that brief — do not re-interview the user on points already answered, and do not re-ground from scratch on files the PRIME has already mapped. You're still responsible for gap analysis, the plan draft, and the `@plan-reviewer` loop; you just skip redundant work the PRIME has already done.
4
4
 
@@ -45,7 +45,7 @@ Also run `comment_check` on the directories the plan will touch. Any `@TODO`/`@F
45
45
  Determine a slug from the task (kebab-case, ≤ 5 words). Resolve the plan directory with `bash` by running:
46
46
 
47
47
  ```bash
48
- PLAN_DIR="$(bunx @glrs-dev/harness-opencode plan-dir)"
48
+ PLAN_DIR="$(bunx @glrs-dev/harness-plugin-opencode plan-dir)"
49
49
  ```
50
50
 
51
51
  Then write `$PLAN_DIR/<slug>.md` with this exact structure:
@@ -115,7 +115,7 @@ For each file:
115
115
  - Legacy plans without a fence (old `- [ ]` checkboxes directly under
116
116
  `## Acceptance criteria`) still execute and pass review — the fence
117
117
  is required only for NEW plans.
118
- - The plan-check tool (`bunx @glrs-dev/harness-opencode plan-check`) parses the fence
118
+ - The plan-check tool (`bunx @glrs-dev/harness-plugin-opencode plan-check`) parses the fence
119
119
  and can emit verify commands for execution (`--run`) or validate
120
120
  structure (`--check`).
121
121
 
@@ -138,7 +138,7 @@ Stop. Do not begin implementation.
138
138
 
139
139
  # Hard rules
140
140
 
141
- - You write only to the plan directory resolved via `bunx @glrs-dev/harness-opencode plan-dir`. Do not edit or create any other file under any circumstance.
142
- - The ONLY bash command you may run is `bunx @glrs-dev/harness-opencode plan-dir` (no other flags needed; `plan-check` is invoked by `qa-reviewer`, not by you). Your permission block denies everything else.
141
+ - You write only to the plan directory resolved via `bunx @glrs-dev/harness-plugin-opencode plan-dir`. Do not edit or create any other file under any circumstance.
142
+ - The ONLY bash command you may run is `bunx @glrs-dev/harness-plugin-opencode plan-dir` (no other flags needed; `plan-check` is invoked by `qa-reviewer`, not by you). Your permission block denies everything else.
143
143
  - You never invent file paths or symbol names. If you can't find something, say so in `## Open questions`.
144
144
  - A plan that hasn't passed `@plan-reviewer` is not finished.
@@ -86,7 +86,7 @@ If the TUI fails to dispatch a plugin-registered slash command, the raw text flo
86
86
  **Action.** When a fallback fires:
87
87
 
88
88
  1. Announce in plain chat (one line, no `question` tool): `→ Slash command /<cmd> fallback (TUI dispatch missed — executing inline)`.
89
- 2. Read the template file from the bundled plugin cache path: `~/.cache/opencode/packages/@glrs-dev/harness-opencode@latest/node_modules/@glrs-dev/harness-opencode/dist/commands/prompts/<cmd>.md`.
89
+ 2. Read the template file from the bundled plugin cache path: `~/.cache/opencode/packages/@glrs-dev/harness-plugin-opencode@latest/node_modules/@glrs-dev/harness-plugin-opencode/dist/commands/prompts/<cmd>.md`.
90
90
  3. Strip YAML frontmatter if present (delimited by an opening `---` line through the next `---` line). Execute the body only.
91
91
  4. Substitute `$ARGUMENTS` with everything after `/<cmd> ` on the first line — whitespace-trimmed, empty string if no args.
92
92
  5. Execute the resulting instructions verbatim as this turn's directive.
@@ -110,7 +110,7 @@ Before Phase 1, run this probe inline (no subagent) — sessions typically start
110
110
  1. `pwd` — confirm working directory.
111
111
  2. `git status --short` — see uncommitted work.
112
112
  3. `git log --oneline -5` — recent history.
113
- 4. `PLAN_DIR="$(bunx @glrs-dev/harness-opencode plan-dir 2>/dev/null)" && ls "$PLAN_DIR" 2>/dev/null | tail -5` — plans for this repo (resolved from `~/.glorious/opencode/<repo>/plans/`; falls back silently if the CLI or repo isn't available).
113
+ 4. `PLAN_DIR="$(bunx @glrs-dev/harness-plugin-opencode plan-dir 2>/dev/null)" && ls "$PLAN_DIR" 2>/dev/null | tail -5` — plans for this repo (resolved from `~/.glorious/opencode/<repo>/plans/`; falls back silently if the CLI or repo isn't available).
114
114
 
115
115
  For each plan found, read it and count unchecked acceptance items. Classify as **stale** (ignore) only if `git merge-base --is-ancestor HEAD origin/main` (fallback `origin/master`) exits 0 — meaning this worktree's work is already landed. If classification fails (no origin fetched, detached HEAD, etc.), treat as active — over-surface is safer than silently dropping.
116
116
 
@@ -363,7 +363,7 @@ The PRIME's context window is expensive (Opus). Protect it by delegating anythin
363
363
 
364
364
  # Subagent reference (recap)
365
365
 
366
- - `@plan` — writes the plan under the repo-shared plan directory (resolves via `bunx @glrs-dev/harness-opencode plan-dir`; absolute path returned) and runs its own gap-analysis + adversarial-review loop. PRIME delegates Phase 2 plan authoring here.
366
+ - `@plan` — writes the plan under the repo-shared plan directory (resolves via `bunx @glrs-dev/harness-plugin-opencode plan-dir`; absolute path returned) and runs its own gap-analysis + adversarial-review loop. PRIME delegates Phase 2 plan authoring here.
367
367
  - `@build` — executes a written plan file-by-file. Runs per-file lint/tests inline, checks acceptance boxes, commits locally. Returns a structured payload with commit SHAs, plan mutations, and any STOP conditions. PRIME delegates Phase 3 execution here.
368
368
  - `@research` — multi-round research orchestrator for complex investigations that would otherwise pollute your context with 4-6 parallel explorations. Delegate when the user asks to investigate / deep-dive / understand a topic that needs codebase + external-web context, or multi-workstream planning. Returns a synthesized report; pass it to the user (or feed into `@plan` as grounding if it precedes a plan authoring step).
369
369
  - `@code-searcher` — fast codebase grep + structural search, returns paths and short snippets
@@ -31,7 +31,7 @@ If any of those phrases is missing from the delegation prompt, OR if the diff ha
31
31
  3. **Plan-drift check (AUTO-FAIL).** For each modified file in the diff, verify it appears in the plan's `## File-level changes`. A modified file NOT listed in `## File-level changes` is AUTO-FAIL regardless of how "implicit" the coverage seems — the plan should have listed it. Report as `Plan drift: <path> modified but not in ## File-level changes`.
32
32
  4. **Scope-creep check.** For each UNTRACKED file (from `git status`) that is NOT in `## File-level changes`, run `git log --oneline -- <file>` to determine whether the file is pre-existing work or scope creep. Do NOT accept the PRIME's verbal "pre-existing" claim without this check. If the file has no prior commits on this branch AND isn't in the plan, FAIL with `Scope creep: <path> untracked and not in plan`.
33
33
  5. **Semantic verification.** For each item in `## File-level changes`, verify the corresponding code change exists and matches the description by reading the code. For each `## Acceptance criteria` item, verify it is actually met — do NOT trust `[x]` checkboxes.
34
- 6. **Plan-state verify commands (fenced plans only).** Run `bunx @glrs-dev/harness-opencode plan-check --run <plan-path>` to get the list of verify commands for pending items. Execute each one via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), plan-check emits `legacy (no plan-state fence)` — skip this step.
34
+ 6. **Plan-state verify commands (fenced plans only).** Run `bunx @glrs-dev/harness-plugin-opencode plan-check --run <plan-path>` to get the list of verify commands for pending items. Execute each one via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), plan-check emits `legacy (no plan-state fence)` — skip this step.
35
35
  7. **Conditional full-suite re-run (gated by trust-recent-green).** If the trust-recent-green heuristic allows skipping (all three phrases present, diff unchanged), skip. Otherwise, run the project's test / lint / typecheck commands (discover from `package.json` scripts / `Makefile` / `AGENTS.md`). Any failure → FAIL.
36
36
  8. **Scan for new tech debt.** Run `todo_scan` with `onlyChanged: true`. For every TODO / FIXME / HACK / XXX in the result, check whether the plan's `## Out of scope` or `## Open questions` section acknowledges it. Unacknowledged new debt → FAIL with the specific `file:line`.
37
37
  9. **AGENTS.md freshness (light check).** If the change shifts a convention documented in a local `AGENTS.md` in a touched directory, FAIL with `Update <path>/AGENTS.md to reflect <specific change>`. Do not fail on unrelated staleness.
@@ -19,7 +19,7 @@ You are distinct from `@qa-reviewer`. That variant trusts the PRIME's recent gre
19
19
  3. **Plan-drift check (AUTO-FAIL).** For each modified file in the diff, verify it appears in the plan's `## File-level changes`. A modified file NOT listed in `## File-level changes` is AUTO-FAIL regardless of how "implicit" the coverage seems — the plan should have listed it. Report as `Plan drift: <path> modified but not in ## File-level changes`.
20
20
  4. **Scope-creep check.** For each UNTRACKED file (from `git status`) that is NOT in `## File-level changes`, run `git log --oneline -- <file>` to determine whether the file is pre-existing work or scope creep. Do NOT accept the PRIME's verbal "pre-existing" claim without this check. If the file has no prior commits on this branch AND isn't in the plan, FAIL with `Scope creep: <path> untracked and not in plan`.
21
21
  5. **Semantic verification.** For each item in `## File-level changes`, verify the corresponding code change exists and matches the description. For each `## Acceptance criteria` item, verify it is actually met by reading the code — do NOT trust `[x]` checkboxes.
22
- 6. **Plan-state verify commands (fenced plans only).** Run `bunx @glrs-dev/harness-opencode plan-check --run <plan-path>` and execute each returned verify command via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), skip.
22
+ 6. **Plan-state verify commands (fenced plans only).** Run `bunx @glrs-dev/harness-plugin-opencode plan-check --run <plan-path>` and execute each returned verify command via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), skip.
23
23
  7. **Re-run the project's test command.** Unconditionally. Discover the invocation from `package.json` scripts / `Makefile` / `CONTRIBUTING.md` / `AGENTS.md` — typical forms: `pnpm test`, `npm test`, `bun test`, `cargo test`, `pytest`, `go test ./...`. Any failure → FAIL.
24
24
  8. **Re-run the project's lint command.** Unconditionally. E.g., `pnpm lint`, `npm run lint`, `ruff check`, `golangci-lint run`. Any failure → FAIL.
25
25
  9. **Re-run the project's typecheck / build command.** Unconditionally. E.g., `pnpm typecheck`, `tsc --noEmit`, `mypy`, `cargo check`. Any failure → FAIL.
@@ -0,0 +1,37 @@
1
+ ---
2
+ name: research-auto
3
+ description: Research orchestrator subagent — Autonomous experimentation skill. Agent interviews the user, sets up a lab, then explores freely (think, test, reflect) until stopped or a target is hit. Works for any domain where you can measure or evaluate a result. Use when user says 'optimize this', 'experiment with', 'find the best approach', 'iterate on', 'research mode'. Do NOT use for binary validation tests (use /spec-lab instead). Based on ResearcherSkill v1.4.4 by krzysztofdudek.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-auto — Autonomous Experimentation Agent
10
+
11
+ You are the `research-auto` agent. Your job is to run autonomous experiments by following the bundled `research-auto` skill methodology end-to-end.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-auto` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full experimentation workflow from discovery through conclusion
20
+
21
+ ## Notes on Experiment Commands
22
+
23
+ This agent may run arbitrary user-supplied commands as part of experiments. The `.lab/` directory is used for scratch writes and experiment tracking. These are expected behaviors per the skill methodology.
24
+
25
+ ## PRIME-Delegation Brief Contract
26
+
27
+ When PRIME passes a brief via task tool:
28
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
29
+ - Do not re-interview on points already resolved in the brief.
30
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
31
+
32
+ ## STOP — Do Not
33
+
34
+ - Do NOT experiment directly without following the skill methodology
35
+ - Do NOT skip the discovery phase — it is mandatory
36
+ - Do NOT skip the commit-before-run guardrail — it is mandatory
37
+ - Do NOT exceed 3 rounds without presenting — MAX 3 ROUNDS, THEN PRESENT
@@ -0,0 +1,33 @@
1
+ ---
2
+ name: research-local
3
+ description: Research orchestrator subagent — Deep codebase research using parallel Explore subagents. Decomposes a question about the local codebase into research tasks, launches parallel explorations, reviews for gaps, iterates, and synthesizes findings with specific file paths and line numbers. Use when user says 'how does X work in this codebase', 'where is Y implemented', 'trace the data flow for Z', 'what patterns does this repo use', 'explain the architecture of'. Provide the research topic as arguments.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-local — Codebase Research Agent
10
+
11
+ You are the `research-local` agent. Your job is to execute deep codebase research by following the bundled `research-local` skill methodology end-to-end. Scope is local codebase ONLY — no web research.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-local` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full research workflow from decomposition through synthesis
20
+
21
+ ## PRIME-Delegation Brief Contract
22
+
23
+ When PRIME passes a brief via task tool:
24
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
25
+ - Do not re-interview on points already resolved in the brief.
26
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
27
+
28
+ ## STOP — Do Not
29
+
30
+ - Do NOT research directly — always follow the research-local skill methodology
31
+ - Do NOT use exploration tools yourself — every phase is a subagent
32
+ - Do NOT skip the decomposition phase — it is mandatory
33
+ - Do NOT synthesize findings yourself — synthesis is a subagent
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: research-web
3
+ description: Research orchestrator subagent — Multi-agent web research orchestrator. Decomposes a research question into parallel agent workstreams, launches them, monitors progress, and synthesizes results. Use when user says 'research this topic', 'I need to understand', 'deep dive into', 'investigate the market for', 'what do we know about'. Provide the research topic and context.
4
+ mode: all
5
+ model: anthropic/claude-opus-4-7
6
+ temperature: 0.3
7
+ ---
8
+
9
+ # @research-web — Web Research Agent
10
+
11
+ You are the `research-web` agent. Your job is to execute web research by following the bundled `research-web` skill methodology end-to-end.
12
+
13
+ **Research Query:** $ARGUMENTS
14
+
15
+ ## Task
16
+
17
+ 1. Read the bundled `research-web` skill via the Skill tool
18
+ 2. Follow every instruction in the skill exactly
19
+ 3. Execute the full research workflow from planning through synthesis
20
+
21
+ ## PRIME-Delegation Brief Contract
22
+
23
+ When PRIME passes a brief via task tool:
24
+ - Trust the brief. The task-tool arguments ARE the research query — proceed directly.
25
+ - Do not re-interview on points already resolved in the brief.
26
+ - If the brief lacks critical context (e.g., no query provided), ask once then proceed.
27
+
28
+ ## STOP — Do Not
29
+
30
+ - Do NOT research directly — always follow the research-web skill methodology
31
+ - Do NOT skip the planning phase — it is mandatory
32
+ - Do NOT launch agents sequentially — dispatch all independent workstreams in ONE message
@@ -22,30 +22,25 @@ You are an **orchestrator only**. You do NOT:
22
22
 
23
23
  Every cognitive task is a subagent. You launch subagents and pass their outputs to other subagents.
24
24
 
25
- ## How to Invoke Skills
25
+ ## How to Invoke Research Agents
26
26
 
27
- The four research skills are bundled with the harness:
27
+ The four research agents are available:
28
28
 
29
- 1. **`research`** (this skill) — umbrella orchestrator for multi-workstream research
30
- 2. **`research-local`** — deep codebase research using parallel Explore subagents
31
- 3. **`research-web`** — multi-agent web research with skeleton-file pattern
32
- 4. **`research-auto`** — autonomous experimentation with `.lab/` directory
29
+ 1. **`@research`** (this agent) — umbrella orchestrator for multi-workstream research
30
+ 2. **`@research-local`** — deep codebase research using parallel Explore subagents
31
+ 3. **`@research-web`** — multi-agent web research with skeleton-file pattern
32
+ 4. **`@research-auto`** — autonomous experimentation with `.lab/` directory
33
33
 
34
- **To invoke a skill:** Use the Agent tool with a prompt instructing the subagent to read the skill via the Skill tool:
34
+ **To dispatch a research subagent:** Use the task tool with the agent name and pass the sub-question as the prompt:
35
35
 
36
36
  ```
37
- Agent tool:
38
- "You are a research agent.
39
-
40
- ## Research Query
41
- {the full query or sub-question}
42
-
43
- ## Task
44
- 1. Read the bundled {skill-name} skill via the Skill tool and follow every instruction
45
- 2. Focus specifically on: {sub-question}
46
- 3. Report back with your complete findings"
37
+ task tool:
38
+ agent: "research-web"
39
+ prompt: "Research the competitive landscape for X. Focus on: {specific angle}."
47
40
  ```
48
41
 
42
+ The research agents are thin shims that load their matching bundled skill and follow it end-to-end. Trust the brief — the task-tool arguments ARE the research query.
43
+
49
44
  ## 7-Phase Flow
50
45
 
51
46
  ### Phase 1: Plan — Subagent
@@ -77,9 +72,9 @@ Output 3-6 workstreams. Mark dependencies explicitly."
77
72
 
78
73
  Dispatch **one Agent per workstream**. Launch ALL independent workstreams in a SINGLE message.
79
74
 
80
- For LOCAL workstreams: invoke `research-local` skill.
81
- For WEB workstreams: invoke `research-web` skill.
82
- For AUTO workstreams: invoke `research-auto` skill.
75
+ For LOCAL workstreams: dispatch `@research-local` via task tool.
76
+ For WEB workstreams: dispatch `@research-web` via task tool.
77
+ For AUTO workstreams: dispatch `@research-auto` via task tool.
83
78
 
84
79
  ### Phase 3: Review Round 1 — Subagent
85
80
 
@@ -59,6 +59,9 @@ var agentsMdWriterPrompt = readPrompt("agents-md-writer.md");
59
59
  var pilotBuilderPrompt = readPrompt("pilot-builder.md");
60
60
  var pilotPlannerPrompt = readPrompt("pilot-planner.md");
61
61
  var researchPrompt = readPrompt("research.md");
62
+ var researchWebPrompt = readPrompt("research-web.md");
63
+ var researchLocalPrompt = readPrompt("research-local.md");
64
+ var researchAutoPrompt = readPrompt("research-auto.md");
62
65
  function stripFrontmatter(md) {
63
66
  if (!md.startsWith("---")) return md;
64
67
  const end = md.indexOf("\n---", 3);
@@ -181,7 +184,7 @@ var CORE_BASH_ALLOW_LIST = {
181
184
  "prettier *": "allow",
182
185
  "biome *": "allow",
183
186
  // Our own CLI — the plan agent and qa-reviewer both call plan-check/plan-dir.
184
- "bunx @glrs-dev/harness-opencode *": "allow",
187
+ "bunx @glrs-dev/harness-plugin-opencode *": "allow",
185
188
  "glrs-oc *": "allow",
186
189
  // GitHub CLI — read-only gh calls are fine; destructive `gh pr merge`
187
190
  // is gated at the PRIME level by human intent (user runs /ship).
@@ -242,12 +245,12 @@ var PLAN_PERMISSIONS = {
242
245
  // subcommand (returns an absolute path derived from the worktree's
243
246
  // repo-folder key; see src/plan-paths.ts and src/cli.ts). The object-
244
247
  // form denies bash broadly and re-allows only `bunx
245
- // @glrs-dev/harness-opencode plan-dir[...]`. No other bash invocation
248
+ // @glrs-dev/harness-plugin-opencode plan-dir[...]`. No other bash invocation
246
249
  // is permitted, so the read-only-aside-from-plans invariant holds.
247
250
  bash: {
248
251
  "*": "deny",
249
- "bunx @glrs-dev/harness-opencode plan-dir": "allow",
250
- "bunx @glrs-dev/harness-opencode plan-dir *": "allow",
252
+ "bunx @glrs-dev/harness-plugin-opencode plan-dir": "allow",
253
+ "bunx @glrs-dev/harness-plugin-opencode plan-dir *": "allow",
251
254
  "glrs-oc plan-dir": "allow",
252
255
  "glrs-oc plan-dir *": "allow"
253
256
  },
@@ -495,12 +498,12 @@ var PILOT_PLANNER_PERMISSIONS = {
495
498
  "git branch *": "allow",
496
499
  "git rev-parse *": "allow",
497
500
  // Pilot CLI: validate, plan-dir for self-check + path resolution.
498
- "bunx @glrs-dev/harness-opencode pilot validate *": "allow",
499
- "bunx @glrs-dev/harness-opencode pilot validate": "allow",
500
- "bunx @glrs-dev/harness-opencode pilot plan-dir": "allow",
501
- "bunx @glrs-dev/harness-opencode pilot plan-dir *": "allow",
502
- "bunx @glrs-dev/harness-opencode plan-dir": "allow",
503
- "bunx @glrs-dev/harness-opencode plan-dir *": "allow",
501
+ "bunx @glrs-dev/harness-plugin-opencode pilot validate *": "allow",
502
+ "bunx @glrs-dev/harness-plugin-opencode pilot validate": "allow",
503
+ "bunx @glrs-dev/harness-plugin-opencode pilot plan-dir": "allow",
504
+ "bunx @glrs-dev/harness-plugin-opencode pilot plan-dir *": "allow",
505
+ "bunx @glrs-dev/harness-plugin-opencode plan-dir": "allow",
506
+ "bunx @glrs-dev/harness-plugin-opencode plan-dir *": "allow",
504
507
  "glrs-oc pilot validate *": "allow",
505
508
  "glrs-oc pilot validate": "allow",
506
509
  "glrs-oc pilot plan-dir": "allow",
@@ -557,6 +560,9 @@ var AGENT_TIERS = {
557
560
  "gap-analyzer": "deep",
558
561
  "pilot-planner": "deep",
559
562
  research: "deep",
563
+ "research-web": "deep",
564
+ "research-local": "deep",
565
+ "research-auto": "deep",
560
566
  build: "mid",
561
567
  "qa-reviewer": "mid",
562
568
  "docs-maintainer": "mid",
@@ -576,7 +582,7 @@ function createAgents() {
576
582
  permission: PRIME_PERMISSIONS
577
583
  }),
578
584
  plan: agentFromPrompt(planPrompt, {
579
- description: "Interactive planner. Orchestrates gap analysis and adversarial review. Produces a written plan in the repo-shared plan directory (resolve via `bunx @glrs-dev/harness-opencode plan-dir`).",
585
+ description: "Interactive planner. Orchestrates gap analysis and adversarial review. Produces a written plan in the repo-shared plan directory (resolve via `bunx @glrs-dev/harness-plugin-opencode plan-dir`).",
580
586
  mode: "all",
581
587
  model: "anthropic/claude-opus-4-7",
582
588
  temperature: 0.3,
@@ -641,6 +647,28 @@ function createAgents() {
641
647
  model: "anthropic/claude-opus-4-7",
642
648
  temperature: 0.3,
643
649
  permission: RESEARCH_PERMISSIONS
650
+ }),
651
+ // Research subagents — thin shims that load the bundled skills
652
+ "research-web": agentFromPrompt(researchWebPrompt, {
653
+ description: "Research orchestrator subagent \u2014 Multi-agent web research orchestrator. Decomposes a research question into parallel agent workstreams, launches them, monitors progress, and synthesizes results. Use when user says 'research this topic', 'I need to understand', 'deep dive into', 'investigate the market for', 'what do we know about'. Provide the research topic and context.",
654
+ mode: "all",
655
+ model: "anthropic/claude-opus-4-7",
656
+ temperature: 0.3,
657
+ permission: RESEARCH_PERMISSIONS
658
+ }),
659
+ "research-local": agentFromPrompt(researchLocalPrompt, {
660
+ description: "Research orchestrator subagent \u2014 Deep codebase research using parallel Explore subagents. Decomposes a question about the local codebase into research tasks, launches parallel explorations, reviews for gaps, iterates, and synthesizes findings with specific file paths and line numbers. Use when user says 'how does X work in this codebase', 'where is Y implemented', 'trace the data flow for Z', 'what patterns does this repo use', 'explain the architecture of'. Provide the research topic as arguments.",
661
+ mode: "all",
662
+ model: "anthropic/claude-opus-4-7",
663
+ temperature: 0.3,
664
+ permission: RESEARCH_PERMISSIONS
665
+ }),
666
+ "research-auto": agentFromPrompt(researchAutoPrompt, {
667
+ description: "Research orchestrator subagent \u2014 Autonomous experimentation skill. Agent interviews the user, sets up a lab, then explores freely (think, test, reflect) until stopped or a target is hit. Works for any domain where you can measure or evaluate a result. Use when user says 'optimize this', 'experiment with', 'find the best approach', 'iterate on', 'research mode'. Do NOT use for binary validation tests (use /spec-lab instead). Based on ResearcherSkill v1.4.4 by krzysztofdudek.",
668
+ mode: "all",
669
+ model: "anthropic/claude-opus-4-7",
670
+ temperature: 0.3,
671
+ permission: RESEARCH_PERMISSIONS
644
672
  })
645
673
  };
646
674
  }
@@ -684,7 +712,7 @@ function validateModelOverride(id) {
684
712
  if (typeof id !== "string") return { valid: true };
685
713
  if (id.length === 0) return { valid: true };
686
714
  if (CATWALK_PROVIDER_PATTERN.test(id)) {
687
- const suggestion = LEGACY_TO_MODELS_DEV[id] ?? "run `bunx @glrs-dev/harness-opencode install` to pick a current preset";
715
+ const suggestion = LEGACY_TO_MODELS_DEV[id] ?? "run `bunx @glrs-dev/harness-plugin-opencode install` to pick a current preset";
688
716
  const reason = LEGACY_PRE_100_PATTERN.test(id) ? `"${id}" is a pre-PR-#100 model ID format that does not resolve in OpenCode. Bedrock IDs need the \`amazon-bedrock\` provider prefix (not \`bedrock\`); Vertex Claude IDs need the \`google-vertex-anthropic\` provider prefix (not \`vertex\` / \`vertexai\`).` : `"${id}" uses a provider prefix (\`${id.split("/")[0]}\`) that does not exist in OpenCode's runtime. AWS Bedrock's provider ID is \`amazon-bedrock\`; Vertex Claude's is \`google-vertex-anthropic\`.`;
689
717
  return { valid: false, reason, suggestion };
690
718
  }
@@ -692,7 +720,7 @@ function validateModelOverride(id) {
692
720
  }
693
721
  function formatModelOverrideWarning(id, source, suggestion) {
694
722
  const suggestionText = suggestion ? ` Suggested replacement: \`${suggestion}\`.` : "";
695
- return `[@glrs-dev/harness-opencode] Warning: invalid model override "${id}" (from ${source}).${suggestionText} Run \`bunx @glrs-dev/harness-opencode doctor\` for details.`;
723
+ return `[@glrs-dev/harness-plugin-opencode] Warning: invalid model override "${id}" (from ${source}).${suggestionText} Run \`bunx @glrs-dev/harness-plugin-opencode doctor\` for details.`;
696
724
  }
697
725
 
698
726
  export {
@@ -5,7 +5,7 @@ import * as path from "path";
5
5
  import * as os from "os";
6
6
  import { fileURLToPath } from "url";
7
7
  import { execFile } from "child_process";
8
- var PACKAGE_NAME = "@glrs-dev/harness-opencode";
8
+ var PACKAGE_NAME = "@glrs-dev/harness-plugin-opencode";
9
9
  function getOpenCodeCachePackageDir() {
10
10
  const cacheHome = process.env["XDG_CACHE_HOME"] ?? path.join(os.homedir(), ".cache");
11
11
  return path.join(