@glrs-dev/cli 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/dist/{chunk-DEODG2LC.js → chunk-E2UNZIZT.js} +1 -1
  3. package/dist/{chunk-GQBZREK5.js → chunk-EM4MJBOD.js} +6 -4
  4. package/dist/{chunk-VJFNIKQJ.js → chunk-F3AFRUT2.js} +4 -3
  5. package/dist/{chunk-6RHN2EDH.js → chunk-I2KUXY3I.js} +2 -2
  6. package/dist/{chunk-FSAGM22T.js → chunk-OABVEBWW.js} +1 -1
  7. package/dist/{chunk-NLPX2KOF.js → chunk-RZWOWTKF.js} +1 -1
  8. package/dist/{chunk-VCN7RNLU.js → chunk-SPULDN7P.js} +8 -7
  9. package/dist/{chunk-HWMRY35D.js → chunk-UXBOTMDY.js} +1 -1
  10. package/dist/cli.js +10 -11
  11. package/dist/commands/cleanup.js +2 -2
  12. package/dist/commands/create.js +3 -3
  13. package/dist/commands/delete.js +2 -2
  14. package/dist/commands/go.js +2 -2
  15. package/dist/commands/list.js +3 -3
  16. package/dist/commands/switch.js +3 -3
  17. package/dist/lib/registry.js +1 -1
  18. package/dist/lib/worktree.js +2 -2
  19. package/dist/vendor/harness-opencode/dist/agents/prompts/build.open.md +88 -0
  20. package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-builder.open.md +129 -0
  21. package/dist/vendor/harness-opencode/dist/agents/prompts/plan.md +7 -0
  22. package/dist/vendor/harness-opencode/dist/agents/prompts/prime.md +38 -0
  23. package/dist/vendor/harness-opencode/dist/agents/prompts/qa-reviewer.open.md +58 -0
  24. package/dist/vendor/harness-opencode/dist/{chunk-WBBN7OVN.js → chunk-BWERBERN.js} +31 -3
  25. package/dist/vendor/harness-opencode/dist/{chunk-CZMAJISX.js → chunk-EK7K4NTV.js} +19 -3
  26. package/dist/vendor/harness-opencode/dist/cli.js +316 -23
  27. package/dist/vendor/harness-opencode/dist/index.js +20 -4
  28. package/dist/vendor/harness-opencode/dist/{install-X5KEANRB.js → install-5JKWK6Z4.js} +1 -1
  29. package/dist/vendor/harness-opencode/dist/skills/code-quality/SKILL.md +45 -0
  30. package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/building.md +125 -0
  31. package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/gap-analysis.md +92 -0
  32. package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/planning.md +96 -0
  33. package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/review.md +104 -0
  34. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/self-review.md +1 -1
  35. package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/verify-design.md +42 -0
  36. package/dist/vendor/harness-opencode/package.json +1 -1
  37. package/package.json +1 -1
@@ -0,0 +1,58 @@
1
+ ---
2
+ name: qa-reviewer
3
+ description: Fast adversarial reviewer. Always re-runs verifiers. Returns [PASS] or [FAIL]. Default for typical diffs.
4
+ mode: subagent
5
+ model: anthropic/claude-sonnet-4-6
6
+ temperature: 0.1
7
+ ---
8
+
9
+ <!-- STRICT_EXECUTOR_VARIANT -->
10
+
11
+ You are the QA Reviewer (fast variant, open-weights edition). Your job is to verify that the diff matches the plan **semantically**, detect **scope creep**, and detect **plan drift**.
12
+
13
+ Do not ask the user questions. Return `[PASS]` or `[FAIL]` only. If you're tempted to ask, FAIL instead and let the build agent fix it.
14
+
15
+ **Always re-run tests, lint, and typecheck.** Do not skip verification steps. Run every command yourself before returning `[PASS]`.
16
+
17
+ # Process
18
+
19
+ 1. **Read the plan** at the path provided.
20
+ 2. **Inspect the diff.** Run `git diff` (against merge base — try `git merge-base HEAD origin/main` then `origin/master`) and `git diff --stat`. Also run `git status` to see untracked files.
21
+ 3. **Plan-drift check (AUTO-FAIL).** For each modified file in the diff, verify it appears in the plan's `## File-level changes`. A modified file NOT listed in `## File-level changes` is AUTO-FAIL. Report as `Plan drift: <path> modified but not in ## File-level changes`.
22
+ 4. **Scope-creep check.** For each UNTRACKED file (from `git status`) that is NOT in `## File-level changes`, run `git log --oneline -- <file>` to determine whether the file is pre-existing work or scope creep. If the file has no prior commits on this branch AND isn't in the plan, FAIL with `Scope creep: <path> untracked and not in plan`.
23
+ 5. **Semantic verification.** For each item in `## File-level changes`, verify the corresponding code change exists and matches the description by reading the code. For each `## Acceptance criteria` item, verify it is actually met — do NOT trust `[x]` checkboxes.
24
+ 6. **Plan-state verify commands.** Run `bunx @glrs-dev/harness-plugin-opencode plan-check --run <plan-path>` to get the list of verify commands for pending items. Execute each one via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), plan-check emits `legacy (no plan-state fence)` — skip this step.
25
+ 7. **Full-suite re-run.** Run the project's test / lint / typecheck commands (discover from `package.json` scripts / `Makefile` / `AGENTS.md`). Any failure → FAIL.
26
+ 8. **Scan for new tech debt.** Run `todo_scan` with `onlyChanged: true`. For every TODO / FIXME / HACK / XXX in the result, check whether the plan's `## Out of scope` or `## Open questions` section acknowledges it. Unacknowledged new debt → FAIL with the specific `file:line`.
27
+ 9. **AGENTS.md freshness (light check).** If the change shifts a convention documented in a local `AGENTS.md` in a touched directory, FAIL with `Update <path>/AGENTS.md to reflect <specific change>`.
28
+
29
+ # Output
30
+
31
+ Exactly one of these two formats. Nothing else.
32
+
33
+ **If everything passes:**
34
+
35
+ ```
36
+ [PASS]
37
+
38
+ <2–3 sentence summary of verified changes.>
39
+ ```
40
+
41
+ **If anything fails:**
42
+
43
+ ```
44
+ [FAIL]
45
+
46
+ 1. <File:line> — <Specific issue>
47
+ 2. <File:line> — <Next issue>
48
+ ...
49
+ ```
50
+
51
+ # Rules
52
+
53
+ - Never suggest fixes. Report precisely; the build agent will fix.
54
+ - Never trust the build agent's narrative. "Pre-existing work" requires `git log --oneline -- <file>` evidence.
55
+ - A single failing item is enough to FAIL. Do not minimize.
56
+ - **AUTO-FAIL on plan drift.** Modified file not in `## File-level changes` → FAIL, no exceptions.
57
+ - **AUTO-FAIL on scope creep.** Untracked file not in plan with no prior commits → FAIL.
58
+ - If the diff is large (>10 files or >500 lines) or touches high-risk paths (auth / crypto / billing / migrations), tell the PRIME to delegate to `@qa-thorough` instead.
@@ -257,7 +257,7 @@ async function requirePlugin() {
257
257
  );
258
258
  process.exit(1);
259
259
  }
260
- const { install: install2 } = await import("./install-X5KEANRB.js");
260
+ const { install: install2 } = await import("./install-5JKWK6Z4.js");
261
261
  await install2({ nonInteractive: true });
262
262
  }
263
263
 
@@ -764,6 +764,25 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
764
764
  fast: [preset.fast]
765
765
  };
766
766
  ok(`Models configured`);
767
+ const midExecIdx = await promptChoice(
768
+ " Use a strict executor for build agents? (recommended for Kimi/Qwen/DeepSeek)",
769
+ ["No (use mid model as reasoning builder)", "Yes (configure mid-execute model)"],
770
+ 0
771
+ );
772
+ if (midExecIdx === 1) {
773
+ const { input } = await import("@inquirer/prompts");
774
+ const midExecModel = await input({
775
+ message: " mid-execute model ID:",
776
+ default: preset.mid
777
+ });
778
+ if (midExecModel) {
779
+ pluginOpts.models["mid-execute"] = [midExecModel];
780
+ newModelsValue["mid-execute"] = [midExecModel];
781
+ info(` mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
782
+ }
783
+ } else {
784
+ info(` mid-execute: skipped (build agents use mid model with reasoning prompts)`);
785
+ }
767
786
  } else if (!pluginOpts._skipModels) {
768
787
  info("Enter model IDs in <provider>/<model-id> format (e.g. amazon-bedrock/global.anthropic.claude-opus-4-7)");
769
788
  const { input } = await import("@inquirer/prompts");
@@ -771,17 +790,26 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
771
790
  const midModel = await input({ message: " mid (balanced):" });
772
791
  const fastModel = await input({ message: " fast (cheapest):" });
773
792
  if (deepModel) {
793
+ const resolvedMid = midModel || deepModel;
774
794
  pluginOpts.models = {
775
795
  deep: [deepModel],
776
- mid: [midModel || deepModel],
796
+ mid: [resolvedMid],
777
797
  fast: [fastModel || midModel || deepModel]
778
798
  };
779
799
  newModelsValue = {
780
800
  deep: [deepModel],
781
- mid: [midModel || deepModel],
801
+ mid: [resolvedMid],
782
802
  fast: [fastModel || midModel || deepModel]
783
803
  };
784
804
  ok("Models: custom");
805
+ const midExecModel = await input({ message: " mid-execute (optional strict executor, press Enter to skip):" });
806
+ if (midExecModel) {
807
+ pluginOpts.models["mid-execute"] = [midExecModel];
808
+ newModelsValue["mid-execute"] = [midExecModel];
809
+ info(` mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
810
+ } else {
811
+ info(` mid-execute: skipped (build agents use mid model with reasoning prompts)`);
812
+ }
785
813
  } else {
786
814
  ok("Models: OpenCode defaults");
787
815
  }
@@ -47,7 +47,9 @@ function readPrompt(name) {
47
47
  var primePrompt = readPrompt("prime.md");
48
48
  var planPrompt = readPrompt("plan.md");
49
49
  var buildPrompt = readPrompt("build.md");
50
+ var buildOpenPrompt = readPrompt("build.open.md");
50
51
  var qaReviewerPrompt = readPrompt("qa-reviewer.md");
52
+ var qaReviewerOpenPrompt = readPrompt("qa-reviewer.open.md");
51
53
  var qaThoroughPrompt = readPrompt("qa-thorough.md");
52
54
  var planReviewerPrompt = readPrompt("plan-reviewer.md");
53
55
  var codeSearcherPrompt = readPrompt("code-searcher.md");
@@ -57,11 +59,24 @@ var docsMaintainerPrompt = readPrompt("docs-maintainer.md");
57
59
  var libReaderPrompt = readPrompt("lib-reader.md");
58
60
  var agentsMdWriterPrompt = readPrompt("agents-md-writer.md");
59
61
  var pilotBuilderPrompt = readPrompt("pilot-builder.md");
62
+ var pilotBuilderOpenPrompt = readPrompt("pilot-builder.open.md");
60
63
  var pilotPlannerPrompt = readPrompt("pilot-planner.md");
61
64
  var researchPrompt = readPrompt("research.md");
62
65
  var researchWebPrompt = readPrompt("research-web.md");
63
66
  var researchLocalPrompt = readPrompt("research-local.md");
64
67
  var researchAutoPrompt = readPrompt("research-auto.md");
68
+ var EXECUTOR_VARIANT_AGENTS = {
69
+ build: { reasoning: buildPrompt, strict: buildOpenPrompt },
70
+ "qa-reviewer": { reasoning: qaReviewerPrompt, strict: qaReviewerOpenPrompt },
71
+ "pilot-builder": { reasoning: pilotBuilderPrompt, strict: pilotBuilderOpenPrompt }
72
+ };
73
+ function getStrictPrompt(agentName) {
74
+ const variants = EXECUTOR_VARIANT_AGENTS[agentName];
75
+ if (!variants) {
76
+ throw new Error(`getStrictPrompt: no strict variant registered for agent "${agentName}"`);
77
+ }
78
+ return variants.strict;
79
+ }
65
80
  function stripFrontmatter(md) {
66
81
  if (!md.startsWith("---")) return md;
67
82
  const end = md.indexOf("\n---", 3);
@@ -563,12 +578,12 @@ var AGENT_TIERS = {
563
578
  "research-web": "deep",
564
579
  "research-local": "deep",
565
580
  "research-auto": "deep",
566
- build: "mid",
567
- "qa-reviewer": "mid",
581
+ build: "mid-execute",
582
+ "qa-reviewer": "mid-execute",
583
+ "pilot-builder": "mid-execute",
568
584
  "docs-maintainer": "mid",
569
585
  "lib-reader": "mid",
570
586
  "agents-md-writer": "mid",
571
- "pilot-builder": "mid",
572
587
  "code-searcher": "fast"
573
588
  };
574
589
  function createAgents() {
@@ -724,6 +739,7 @@ function formatModelOverrideWarning(id, source, suggestion) {
724
739
  }
725
740
 
726
741
  export {
742
+ getStrictPrompt,
727
743
  AGENT_TIERS,
728
744
  createAgents,
729
745
  validateModelOverride,
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  createAgents,
4
4
  validateModelOverride
5
- } from "./chunk-CZMAJISX.js";
5
+ } from "./chunk-EK7K4NTV.js";
6
6
  import {
7
7
  getSessionsPath,
8
8
  registerSession,
@@ -11,7 +11,7 @@ import {
11
11
  import {
12
12
  install,
13
13
  requirePlugin
14
- } from "./chunk-WBBN7OVN.js";
14
+ } from "./chunk-BWERBERN.js";
15
15
  import "./chunk-VJUETC6A.js";
16
16
  import {
17
17
  getPilotDir,
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
1142
1142
  CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
1143
1143
  CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
1144
1144
  `.trim();
1145
+ var V2_SQL = `
1146
+ CREATE TABLE IF NOT EXISTS workflows (
1147
+ id TEXT NOT NULL PRIMARY KEY,
1148
+ goal TEXT NOT NULL,
1149
+ started_at INTEGER NOT NULL,
1150
+ finished_at INTEGER,
1151
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
1152
+ current_phase TEXT
1153
+ );
1154
+
1155
+ CREATE TABLE IF NOT EXISTS phases (
1156
+ workflow_id TEXT NOT NULL,
1157
+ name TEXT NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
1158
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
1159
+ started_at INTEGER,
1160
+ finished_at INTEGER,
1161
+ artifact_path TEXT,
1162
+ PRIMARY KEY (workflow_id, name),
1163
+ FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
1164
+ );
1165
+
1166
+ CREATE TABLE IF NOT EXISTS artifacts (
1167
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1168
+ workflow_id TEXT NOT NULL,
1169
+ phase TEXT NOT NULL,
1170
+ kind TEXT NOT NULL,
1171
+ path TEXT NOT NULL,
1172
+ created_at INTEGER NOT NULL,
1173
+ sha256 TEXT,
1174
+ FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
1175
+ );
1176
+
1177
+ CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
1178
+
1179
+ ALTER TABLE events ADD COLUMN phase TEXT;
1180
+
1181
+ INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
1182
+ SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
1183
+
1184
+ INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
1185
+ SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
1186
+
1187
+ UPDATE events SET phase = 'build' WHERE phase IS NULL;
1188
+ `.trim();
1145
1189
  var MIGRATIONS = [
1146
1190
  {
1147
1191
  version: 1,
1148
1192
  description: "initial pilot schema (runs/tasks/events)",
1149
1193
  sql: V1_SQL
1194
+ },
1195
+ {
1196
+ version: 2,
1197
+ description: "workflows/phases/artifacts tables + events.phase column",
1198
+ sql: V2_SQL
1150
1199
  }
1151
1200
  ];
1152
1201
  function applyMigrations(db) {
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
1279
1328
  });
1280
1329
  }
1281
1330
  db.run(
1282
- `INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
1283
- [args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
1331
+ `INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
1332
+ [args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
1284
1333
  );
1285
1334
  if (eventSubscribers.length > 0) {
1286
1335
  const snapshot = eventSubscribers.slice();
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
1291
1340
  taskId: args.taskId ?? null,
1292
1341
  kind: args.kind,
1293
1342
  payload: args.payload,
1343
+ phase: args.phase ?? null,
1294
1344
  ts
1295
1345
  });
1296
1346
  } catch {
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
1865
1915
  return sections.join("\n");
1866
1916
  }
1867
1917
 
1868
- // src/pilot/verify/runner.ts
1869
- import { spawn as spawn2 } from "child_process";
1870
- var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
1871
- var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
1872
- var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
1873
- async function runVerify(commands, options) {
1918
+ // src/pilot/gates/composite.ts
1919
+ async function evalAllGate(gate, ctx) {
1920
+ const startedAt = Date.now();
1874
1921
  const results = [];
1875
- for (const command10 of commands) {
1876
- const result = await runOne(command10, options);
1877
- results.push(result);
1878
- if (!result.ok) {
1879
- return { ok: false, results, failure: result };
1922
+ for (const sub of gate.gates) {
1923
+ const subResult = await evalGate(sub, ctx);
1924
+ results.push({ gate: sub, result: subResult });
1925
+ if (!subResult.ok) {
1926
+ const evidence2 = {
1927
+ kind: "all",
1928
+ results,
1929
+ failure: subResult
1930
+ };
1931
+ return {
1932
+ ok: false,
1933
+ reason: subResult.reason,
1934
+ evidence: evidence2,
1935
+ durationMs: Date.now() - startedAt
1936
+ };
1880
1937
  }
1881
1938
  }
1939
+ const evidence = { kind: "all", results };
1882
1940
  return {
1883
1941
  ok: true,
1884
- results
1942
+ evidence,
1943
+ durationMs: Date.now() - startedAt
1885
1944
  };
1886
1945
  }
1946
+ async function evalAnyGate(gate, ctx) {
1947
+ const startedAt = Date.now();
1948
+ const results = [];
1949
+ if (gate.gates.length === 0) {
1950
+ const evidence2 = { kind: "any", results };
1951
+ return {
1952
+ ok: false,
1953
+ reason: "any-gate has no sub-gates to satisfy",
1954
+ evidence: evidence2,
1955
+ durationMs: Date.now() - startedAt
1956
+ };
1957
+ }
1958
+ let lastResult = null;
1959
+ for (const sub of gate.gates) {
1960
+ const subResult = await evalGate(sub, ctx);
1961
+ results.push({ gate: sub, result: subResult });
1962
+ lastResult = subResult;
1963
+ if (subResult.ok) {
1964
+ const evidence2 = { kind: "any", results };
1965
+ return {
1966
+ ok: true,
1967
+ evidence: evidence2,
1968
+ durationMs: Date.now() - startedAt
1969
+ };
1970
+ }
1971
+ }
1972
+ const evidence = {
1973
+ kind: "any",
1974
+ results,
1975
+ failure: lastResult ?? void 0
1976
+ };
1977
+ return {
1978
+ ok: false,
1979
+ reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
1980
+ evidence,
1981
+ durationMs: Date.now() - startedAt
1982
+ };
1983
+ }
1984
+
1985
+ // src/pilot/verify/spawn.ts
1986
+ import { spawn as spawn2 } from "child_process";
1987
+ var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
1988
+ var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
1989
+ var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
1887
1990
  async function runOne(command10, options) {
1888
1991
  if (typeof command10 !== "string" || command10.length === 0) {
1889
1992
  throw new TypeError(`runOne: command must be a non-empty string`);
@@ -2020,6 +2123,147 @@ function killTree(child) {
2020
2123
  }, 2e3).unref();
2021
2124
  }
2022
2125
 
2126
+ // src/pilot/gates/shell.ts
2127
+ async function evalShellGate(gate, ctx) {
2128
+ const result = await runOne(gate.command, {
2129
+ cwd: ctx.cwd,
2130
+ env: ctx.env,
2131
+ abortSignal: ctx.abortSignal,
2132
+ onLine: ctx.onShellLine,
2133
+ timeoutMs: gate.timeoutMs,
2134
+ outputCapBytes: ctx.shellOutputCapBytes
2135
+ });
2136
+ return toGateResult(result);
2137
+ }
2138
+ function toGateResult(result) {
2139
+ if (result.ok) {
2140
+ return {
2141
+ ok: true,
2142
+ durationMs: result.durationMs,
2143
+ evidence: { kind: "shell", result }
2144
+ };
2145
+ }
2146
+ const reason = formatShellFailure(result);
2147
+ return {
2148
+ ok: false,
2149
+ reason,
2150
+ durationMs: result.durationMs,
2151
+ evidence: { kind: "shell", result }
2152
+ };
2153
+ }
2154
+ function formatShellFailure(result) {
2155
+ const flags = [];
2156
+ if (result.timedOut) flags.push("timed-out");
2157
+ if (result.aborted) flags.push("aborted");
2158
+ if (result.signal) flags.push(`signal=${result.signal}`);
2159
+ const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
2160
+ return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
2161
+ }
2162
+
2163
+ // src/pilot/gates/eval.ts
2164
+ async function evalGate(gate, ctx) {
2165
+ switch (gate.kind) {
2166
+ case "shell":
2167
+ return evalShellGate(gate, ctx);
2168
+ case "all":
2169
+ return evalAllGate(gate, ctx);
2170
+ case "any":
2171
+ return evalAnyGate(gate, ctx);
2172
+ default: {
2173
+ const _exhaustive = gate;
2174
+ throw new Error(
2175
+ `evalGate: unknown gate kind ${_exhaustive.kind}`
2176
+ );
2177
+ }
2178
+ }
2179
+ }
2180
+
2181
+ // src/pilot/gates/types.ts
2182
+ function asShellEvidence(evidence) {
2183
+ if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
2184
+ return evidence;
2185
+ }
2186
+ return null;
2187
+ }
2188
+ function asCompositeEvidence(evidence) {
2189
+ if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
2190
+ return evidence;
2191
+ }
2192
+ return null;
2193
+ }
2194
+
2195
+ // src/pilot/verify/runner.ts
2196
+ async function runVerify(commands, options) {
2197
+ if (commands.length === 0) {
2198
+ return { ok: true, results: [] };
2199
+ }
2200
+ const gate = {
2201
+ kind: "all",
2202
+ gates: commands.map((command10) => ({
2203
+ kind: "shell",
2204
+ command: command10,
2205
+ timeoutMs: options.timeoutMs
2206
+ }))
2207
+ };
2208
+ const ctx = {
2209
+ cwd: options.cwd,
2210
+ env: options.env,
2211
+ abortSignal: options.abortSignal,
2212
+ onShellLine: options.onLine,
2213
+ shellOutputCapBytes: options.outputCapBytes
2214
+ };
2215
+ const gateResult = await evalGate(gate, ctx);
2216
+ return toRunVerifyResult(gateResult);
2217
+ }
2218
+ function toRunVerifyResult(gateResult) {
2219
+ const composite = asCompositeEvidence(gateResult.evidence);
2220
+ if (composite === null || composite.kind !== "all") {
2221
+ throw new Error(
2222
+ `runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
2223
+ );
2224
+ }
2225
+ const results = composite.results.map((entry) => extractCommandResult(entry));
2226
+ if (gateResult.ok) {
2227
+ return {
2228
+ ok: true,
2229
+ results
2230
+ };
2231
+ }
2232
+ const failingEntry = composite.results[composite.results.length - 1];
2233
+ if (!failingEntry || failingEntry.result.ok) {
2234
+ throw new Error(
2235
+ "runVerify: all-gate failed but no failing sub-result was recorded"
2236
+ );
2237
+ }
2238
+ const failureCommandResult = extractCommandResult(failingEntry);
2239
+ if (failureCommandResult.ok) {
2240
+ throw new Error(
2241
+ "runVerify: failing sub-gate produced a successful CommandResult"
2242
+ );
2243
+ }
2244
+ return {
2245
+ ok: false,
2246
+ results,
2247
+ failure: failureCommandResult
2248
+ };
2249
+ }
2250
+ function extractCommandResult(entry) {
2251
+ const shell = asShellEvidence(entry.result.evidence);
2252
+ if (shell === null) {
2253
+ throw new Error(
2254
+ `runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
2255
+ );
2256
+ }
2257
+ return shell.result;
2258
+ }
2259
+ function gateResultDescriptor(result) {
2260
+ const evidence = result.evidence;
2261
+ return JSON.stringify({
2262
+ ok: result.ok,
2263
+ evidenceKind: evidence?.kind ?? null
2264
+ });
2265
+ }
2266
+
2023
2267
  // src/pilot/verify/touches.ts
2024
2268
  import picomatch2 from "picomatch";
2025
2269
  import { execFile as execFile2 } from "child_process";
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
2530
2774
  command: f.command,
2531
2775
  exitCode: f.exitCode,
2532
2776
  output: f.output.slice(0, 4096),
2533
- reason: reason2
2777
+ reason: reason2,
2778
+ // Step 1 of pilot redesign: gate descriptor on every
2779
+ // verify-derived event. Future LLM/approval gates emit
2780
+ // identically-shaped events with a different `gate.kind`.
2781
+ gate: { kind: "shell", command: f.command }
2534
2782
  }
2535
2783
  });
2536
2784
  return;
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
2539
2787
  runId: deps.runId,
2540
2788
  taskId: task.id,
2541
2789
  kind: "task.baseline.passed",
2542
- payload: { commands: allVerify.length }
2790
+ payload: {
2791
+ commands: allVerify.length,
2792
+ gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
2793
+ }
2543
2794
  });
2544
2795
  }
2545
2796
  let lastFailure = null;
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
2695
2946
  exitCode: lastFailure.exitCode,
2696
2947
  timedOut: verifyResult.failure.timedOut,
2697
2948
  aborted: verifyResult.failure.aborted,
2698
- output: verifyResult.failure.output.slice(-2048)
2949
+ output: verifyResult.failure.output.slice(-2048),
2950
+ gate: { kind: "shell", command: lastFailure.command }
2699
2951
  }
2700
2952
  });
2701
2953
  if (verifyResult.failure.aborted) {
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
2721
2973
  runId: deps.runId,
2722
2974
  taskId: task.id,
2723
2975
  kind: "task.verify.passed",
2724
- payload: { attempt }
2976
+ payload: {
2977
+ attempt,
2978
+ gate: { kind: "all", subKind: "shell", count: allVerify.length }
2979
+ }
2725
2980
  });
2726
2981
  const touches = await enforceTouches({
2727
2982
  cwd,
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
3311
3566
  const taskStart = /* @__PURE__ */ new Map();
3312
3567
  let succeeded = 0;
3313
3568
  let failed = 0;
3314
- const INLINE_BLOCKED_CAP = 5;
3569
+ const INLINE_BLOCKED_CAP = 0;
3315
3570
  let blockedCount = 0;
3316
3571
  let blockedInlineEmitted = 0;
3317
3572
  let blockedOverflowEmitted = false;
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
3350
3605
  if (id !== null) taskStart.set(id, event.ts);
3351
3606
  write(`task.started ${id ?? "?"}`);
3352
3607
  break;
3608
+ case "task.baseline.passed":
3609
+ break;
3610
+ case "task.baseline.failed": {
3611
+ const bp = event.payload;
3612
+ if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
3613
+ write(
3614
+ `task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
3615
+ );
3616
+ const output = typeof bp.output === "string" ? bp.output : null;
3617
+ if (output !== null && output.trim().length > 0) {
3618
+ const tail = output.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
3619
+ writeRaw(tail);
3620
+ }
3621
+ } else {
3622
+ write(`task.baseline.failed ${id ?? "?"}`);
3623
+ }
3624
+ break;
3625
+ }
3353
3626
  case "task.verify.passed":
3354
3627
  write(`task.verify.passed ${id ?? "?"}`);
3355
3628
  break;
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
3435
3708
  case "task.attempt": {
3436
3709
  const p = event.payload;
3437
3710
  if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
3438
- writeRaw(` attempt ${p.attempt}/${p.of} (retry with fix prompt)`);
3711
+ write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
3439
3712
  }
3440
3713
  break;
3441
3714
  }
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
3561
3834
  session: ${session}
3562
3835
  worktree: ${worktree}
3563
3836
  elapsed: ${elapsed} attempts: ${t.attempts}
3564
-
3565
3837
  `
3566
3838
  );
3839
+ const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
3840
+ if (baselineOutput !== null) {
3841
+ const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
3842
+ process.stdout.write(` output:
3843
+ ${tail}
3844
+ `);
3845
+ }
3846
+ process.stdout.write(`
3847
+ `);
3567
3848
  }
3568
3849
  }
3569
3850
  }
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
3592
3873
  reason: row.last_error ?? "(no reason recorded)"
3593
3874
  };
3594
3875
  }
3876
+ function resolveBaselineOutput(db, runId, taskId) {
3877
+ const events = readEventsDecoded(db, { runId, taskId });
3878
+ for (let i = events.length - 1; i >= 0; i--) {
3879
+ const e = events[i];
3880
+ if (e.kind !== "task.baseline.failed") continue;
3881
+ const p = e.payload;
3882
+ if (p !== null && typeof p === "object" && typeof p.output === "string") {
3883
+ return p.output;
3884
+ }
3885
+ }
3886
+ return null;
3887
+ }
3595
3888
  function truncateSummary(s, maxChars) {
3596
3889
  if (s.length <= maxChars) return s;
3597
3890
  return s.slice(0, maxChars - 1) + "\u2026";