@glrs-dev/cli 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/{chunk-DEODG2LC.js → chunk-E2UNZIZT.js} +1 -1
- package/dist/{chunk-GQBZREK5.js → chunk-EM4MJBOD.js} +6 -4
- package/dist/{chunk-VJFNIKQJ.js → chunk-F3AFRUT2.js} +4 -3
- package/dist/{chunk-6RHN2EDH.js → chunk-I2KUXY3I.js} +2 -2
- package/dist/{chunk-FSAGM22T.js → chunk-OABVEBWW.js} +1 -1
- package/dist/{chunk-NLPX2KOF.js → chunk-RZWOWTKF.js} +1 -1
- package/dist/{chunk-VCN7RNLU.js → chunk-SPULDN7P.js} +8 -7
- package/dist/{chunk-HWMRY35D.js → chunk-UXBOTMDY.js} +1 -1
- package/dist/cli.js +10 -11
- package/dist/commands/cleanup.js +2 -2
- package/dist/commands/create.js +3 -3
- package/dist/commands/delete.js +2 -2
- package/dist/commands/go.js +2 -2
- package/dist/commands/list.js +3 -3
- package/dist/commands/switch.js +3 -3
- package/dist/lib/registry.js +1 -1
- package/dist/lib/worktree.js +2 -2
- package/dist/vendor/harness-opencode/dist/agents/prompts/build.open.md +88 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/pilot-builder.open.md +129 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/plan.md +7 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/prime.md +38 -0
- package/dist/vendor/harness-opencode/dist/agents/prompts/qa-reviewer.open.md +58 -0
- package/dist/vendor/harness-opencode/dist/{chunk-WBBN7OVN.js → chunk-BWERBERN.js} +31 -3
- package/dist/vendor/harness-opencode/dist/{chunk-CZMAJISX.js → chunk-EK7K4NTV.js} +19 -3
- package/dist/vendor/harness-opencode/dist/cli.js +316 -23
- package/dist/vendor/harness-opencode/dist/index.js +20 -4
- package/dist/vendor/harness-opencode/dist/{install-X5KEANRB.js → install-5JKWK6Z4.js} +1 -1
- package/dist/vendor/harness-opencode/dist/skills/code-quality/SKILL.md +45 -0
- package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/building.md +125 -0
- package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/gap-analysis.md +92 -0
- package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/planning.md +96 -0
- package/dist/vendor/harness-opencode/dist/skills/code-quality/rules/review.md +104 -0
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/self-review.md +1 -1
- package/dist/vendor/harness-opencode/dist/skills/pilot-planning/rules/verify-design.md +42 -0
- package/dist/vendor/harness-opencode/package.json +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: qa-reviewer
|
|
3
|
+
description: Fast adversarial reviewer. Always re-runs verifiers. Returns [PASS] or [FAIL]. Default for typical diffs.
|
|
4
|
+
mode: subagent
|
|
5
|
+
model: anthropic/claude-sonnet-4-6
|
|
6
|
+
temperature: 0.1
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
<!-- STRICT_EXECUTOR_VARIANT -->
|
|
10
|
+
|
|
11
|
+
You are the QA Reviewer (fast variant, open-weights edition). Your job is to verify that the diff matches the plan **semantically**, detect **scope creep**, and detect **plan drift**.
|
|
12
|
+
|
|
13
|
+
Do not ask the user questions. Return `[PASS]` or `[FAIL]` only. If you're tempted to ask, FAIL instead and let the build agent fix it.
|
|
14
|
+
|
|
15
|
+
**Always re-run tests, lint, and typecheck.** Do not skip verification steps. Run every command yourself before returning `[PASS]`.
|
|
16
|
+
|
|
17
|
+
# Process
|
|
18
|
+
|
|
19
|
+
1. **Read the plan** at the path provided.
|
|
20
|
+
2. **Inspect the diff.** Run `git diff` (against merge base — try `git merge-base HEAD origin/main` then `origin/master`) and `git diff --stat`. Also run `git status` to see untracked files.
|
|
21
|
+
3. **Plan-drift check (AUTO-FAIL).** For each modified file in the diff, verify it appears in the plan's `## File-level changes`. A modified file NOT listed in `## File-level changes` is AUTO-FAIL. Report as `Plan drift: <path> modified but not in ## File-level changes`.
|
|
22
|
+
4. **Scope-creep check.** For each UNTRACKED file (from `git status`) that is NOT in `## File-level changes`, run `git log --oneline -- <file>` to determine whether the file is pre-existing work or scope creep. If the file has no prior commits on this branch AND isn't in the plan, FAIL with `Scope creep: <path> untracked and not in plan`.
|
|
23
|
+
5. **Semantic verification.** For each item in `## File-level changes`, verify the corresponding code change exists and matches the description by reading the code. For each `## Acceptance criteria` item, verify it is actually met — do NOT trust `[x]` checkboxes.
|
|
24
|
+
6. **Plan-state verify commands.** Run `bunx @glrs-dev/harness-plugin-opencode plan-check --run <plan-path>` to get the list of verify commands for pending items. Execute each one via `bash`. Any non-zero exit → FAIL with `Verify failed: <command> (exit N)`. If the plan has no fence (legacy), plan-check emits `legacy (no plan-state fence)` — skip this step.
|
|
25
|
+
7. **Full-suite re-run.** Run the project's test / lint / typecheck commands (discover from `package.json` scripts / `Makefile` / `AGENTS.md`). Any failure → FAIL.
|
|
26
|
+
8. **Scan for new tech debt.** Run `todo_scan` with `onlyChanged: true`. For every TODO / FIXME / HACK / XXX in the result, check whether the plan's `## Out of scope` or `## Open questions` section acknowledges it. Unacknowledged new debt → FAIL with the specific `file:line`.
|
|
27
|
+
9. **AGENTS.md freshness (light check).** If the change shifts a convention documented in a local `AGENTS.md` in a touched directory, FAIL with `Update <path>/AGENTS.md to reflect <specific change>`.
|
|
28
|
+
|
|
29
|
+
# Output
|
|
30
|
+
|
|
31
|
+
Exactly one of these two formats. Nothing else.
|
|
32
|
+
|
|
33
|
+
**If everything passes:**
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
[PASS]
|
|
37
|
+
|
|
38
|
+
<2–3 sentence summary of verified changes.>
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**If anything fails:**
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
[FAIL]
|
|
45
|
+
|
|
46
|
+
1. <File:line> — <Specific issue>
|
|
47
|
+
2. <File:line> — <Next issue>
|
|
48
|
+
...
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
# Rules
|
|
52
|
+
|
|
53
|
+
- Never suggest fixes. Report precisely; the build agent will fix.
|
|
54
|
+
- Never trust the build agent's narrative. "Pre-existing work" requires `git log --oneline -- <file>` evidence.
|
|
55
|
+
- A single failing item is enough to FAIL. Do not minimize.
|
|
56
|
+
- **AUTO-FAIL on plan drift.** Modified file not in `## File-level changes` → FAIL, no exceptions.
|
|
57
|
+
- **AUTO-FAIL on scope creep.** Untracked file not in plan with no prior commits → FAIL.
|
|
58
|
+
- If the diff is large (>10 files or >500 lines) or touches high-risk paths (auth / crypto / billing / migrations), tell the PRIME to delegate to `@qa-thorough` instead.
|
|
@@ -257,7 +257,7 @@ async function requirePlugin() {
|
|
|
257
257
|
);
|
|
258
258
|
process.exit(1);
|
|
259
259
|
}
|
|
260
|
-
const { install: install2 } = await import("./install-
|
|
260
|
+
const { install: install2 } = await import("./install-5JKWK6Z4.js");
|
|
261
261
|
await install2({ nonInteractive: true });
|
|
262
262
|
}
|
|
263
263
|
|
|
@@ -764,6 +764,25 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
|
|
|
764
764
|
fast: [preset.fast]
|
|
765
765
|
};
|
|
766
766
|
ok(`Models configured`);
|
|
767
|
+
const midExecIdx = await promptChoice(
|
|
768
|
+
" Use a strict executor for build agents? (recommended for Kimi/Qwen/DeepSeek)",
|
|
769
|
+
["No (use mid model as reasoning builder)", "Yes (configure mid-execute model)"],
|
|
770
|
+
0
|
|
771
|
+
);
|
|
772
|
+
if (midExecIdx === 1) {
|
|
773
|
+
const { input } = await import("@inquirer/prompts");
|
|
774
|
+
const midExecModel = await input({
|
|
775
|
+
message: " mid-execute model ID:",
|
|
776
|
+
default: preset.mid
|
|
777
|
+
});
|
|
778
|
+
if (midExecModel) {
|
|
779
|
+
pluginOpts.models["mid-execute"] = [midExecModel];
|
|
780
|
+
newModelsValue["mid-execute"] = [midExecModel];
|
|
781
|
+
info(` mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
|
|
782
|
+
}
|
|
783
|
+
} else {
|
|
784
|
+
info(` mid-execute: skipped (build agents use mid model with reasoning prompts)`);
|
|
785
|
+
}
|
|
767
786
|
} else if (!pluginOpts._skipModels) {
|
|
768
787
|
info("Enter model IDs in <provider>/<model-id> format (e.g. amazon-bedrock/global.anthropic.claude-opus-4-7)");
|
|
769
788
|
const { input } = await import("@inquirer/prompts");
|
|
@@ -771,17 +790,26 @@ ${c.bold}Ready.${c.reset} Run ${c.green}opencode${c.reset} to start.
|
|
|
771
790
|
const midModel = await input({ message: " mid (balanced):" });
|
|
772
791
|
const fastModel = await input({ message: " fast (cheapest):" });
|
|
773
792
|
if (deepModel) {
|
|
793
|
+
const resolvedMid = midModel || deepModel;
|
|
774
794
|
pluginOpts.models = {
|
|
775
795
|
deep: [deepModel],
|
|
776
|
-
mid: [
|
|
796
|
+
mid: [resolvedMid],
|
|
777
797
|
fast: [fastModel || midModel || deepModel]
|
|
778
798
|
};
|
|
779
799
|
newModelsValue = {
|
|
780
800
|
deep: [deepModel],
|
|
781
|
-
mid: [
|
|
801
|
+
mid: [resolvedMid],
|
|
782
802
|
fast: [fastModel || midModel || deepModel]
|
|
783
803
|
};
|
|
784
804
|
ok("Models: custom");
|
|
805
|
+
const midExecModel = await input({ message: " mid-execute (optional strict executor, press Enter to skip):" });
|
|
806
|
+
if (midExecModel) {
|
|
807
|
+
pluginOpts.models["mid-execute"] = [midExecModel];
|
|
808
|
+
newModelsValue["mid-execute"] = [midExecModel];
|
|
809
|
+
info(` mid-execute \u2192 ${midExecModel} (strict executor prompts)`);
|
|
810
|
+
} else {
|
|
811
|
+
info(` mid-execute: skipped (build agents use mid model with reasoning prompts)`);
|
|
812
|
+
}
|
|
785
813
|
} else {
|
|
786
814
|
ok("Models: OpenCode defaults");
|
|
787
815
|
}
|
|
@@ -47,7 +47,9 @@ function readPrompt(name) {
|
|
|
47
47
|
var primePrompt = readPrompt("prime.md");
|
|
48
48
|
var planPrompt = readPrompt("plan.md");
|
|
49
49
|
var buildPrompt = readPrompt("build.md");
|
|
50
|
+
var buildOpenPrompt = readPrompt("build.open.md");
|
|
50
51
|
var qaReviewerPrompt = readPrompt("qa-reviewer.md");
|
|
52
|
+
var qaReviewerOpenPrompt = readPrompt("qa-reviewer.open.md");
|
|
51
53
|
var qaThoroughPrompt = readPrompt("qa-thorough.md");
|
|
52
54
|
var planReviewerPrompt = readPrompt("plan-reviewer.md");
|
|
53
55
|
var codeSearcherPrompt = readPrompt("code-searcher.md");
|
|
@@ -57,11 +59,24 @@ var docsMaintainerPrompt = readPrompt("docs-maintainer.md");
|
|
|
57
59
|
var libReaderPrompt = readPrompt("lib-reader.md");
|
|
58
60
|
var agentsMdWriterPrompt = readPrompt("agents-md-writer.md");
|
|
59
61
|
var pilotBuilderPrompt = readPrompt("pilot-builder.md");
|
|
62
|
+
var pilotBuilderOpenPrompt = readPrompt("pilot-builder.open.md");
|
|
60
63
|
var pilotPlannerPrompt = readPrompt("pilot-planner.md");
|
|
61
64
|
var researchPrompt = readPrompt("research.md");
|
|
62
65
|
var researchWebPrompt = readPrompt("research-web.md");
|
|
63
66
|
var researchLocalPrompt = readPrompt("research-local.md");
|
|
64
67
|
var researchAutoPrompt = readPrompt("research-auto.md");
|
|
68
|
+
var EXECUTOR_VARIANT_AGENTS = {
|
|
69
|
+
build: { reasoning: buildPrompt, strict: buildOpenPrompt },
|
|
70
|
+
"qa-reviewer": { reasoning: qaReviewerPrompt, strict: qaReviewerOpenPrompt },
|
|
71
|
+
"pilot-builder": { reasoning: pilotBuilderPrompt, strict: pilotBuilderOpenPrompt }
|
|
72
|
+
};
|
|
73
|
+
function getStrictPrompt(agentName) {
|
|
74
|
+
const variants = EXECUTOR_VARIANT_AGENTS[agentName];
|
|
75
|
+
if (!variants) {
|
|
76
|
+
throw new Error(`getStrictPrompt: no strict variant registered for agent "${agentName}"`);
|
|
77
|
+
}
|
|
78
|
+
return variants.strict;
|
|
79
|
+
}
|
|
65
80
|
function stripFrontmatter(md) {
|
|
66
81
|
if (!md.startsWith("---")) return md;
|
|
67
82
|
const end = md.indexOf("\n---", 3);
|
|
@@ -563,12 +578,12 @@ var AGENT_TIERS = {
|
|
|
563
578
|
"research-web": "deep",
|
|
564
579
|
"research-local": "deep",
|
|
565
580
|
"research-auto": "deep",
|
|
566
|
-
build: "mid",
|
|
567
|
-
"qa-reviewer": "mid",
|
|
581
|
+
build: "mid-execute",
|
|
582
|
+
"qa-reviewer": "mid-execute",
|
|
583
|
+
"pilot-builder": "mid-execute",
|
|
568
584
|
"docs-maintainer": "mid",
|
|
569
585
|
"lib-reader": "mid",
|
|
570
586
|
"agents-md-writer": "mid",
|
|
571
|
-
"pilot-builder": "mid",
|
|
572
587
|
"code-searcher": "fast"
|
|
573
588
|
};
|
|
574
589
|
function createAgents() {
|
|
@@ -724,6 +739,7 @@ function formatModelOverrideWarning(id, source, suggestion) {
|
|
|
724
739
|
}
|
|
725
740
|
|
|
726
741
|
export {
|
|
742
|
+
getStrictPrompt,
|
|
727
743
|
AGENT_TIERS,
|
|
728
744
|
createAgents,
|
|
729
745
|
validateModelOverride,
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
createAgents,
|
|
4
4
|
validateModelOverride
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-EK7K4NTV.js";
|
|
6
6
|
import {
|
|
7
7
|
getSessionsPath,
|
|
8
8
|
registerSession,
|
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
import {
|
|
12
12
|
install,
|
|
13
13
|
requirePlugin
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-BWERBERN.js";
|
|
15
15
|
import "./chunk-VJUETC6A.js";
|
|
16
16
|
import {
|
|
17
17
|
getPilotDir,
|
|
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
|
|
|
1142
1142
|
CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
|
|
1143
1143
|
CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
|
|
1144
1144
|
`.trim();
|
|
1145
|
+
var V2_SQL = `
|
|
1146
|
+
CREATE TABLE IF NOT EXISTS workflows (
|
|
1147
|
+
id TEXT NOT NULL PRIMARY KEY,
|
|
1148
|
+
goal TEXT NOT NULL,
|
|
1149
|
+
started_at INTEGER NOT NULL,
|
|
1150
|
+
finished_at INTEGER,
|
|
1151
|
+
status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
|
|
1152
|
+
current_phase TEXT
|
|
1153
|
+
);
|
|
1154
|
+
|
|
1155
|
+
CREATE TABLE IF NOT EXISTS phases (
|
|
1156
|
+
workflow_id TEXT NOT NULL,
|
|
1157
|
+
name TEXT NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
|
|
1158
|
+
status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
|
|
1159
|
+
started_at INTEGER,
|
|
1160
|
+
finished_at INTEGER,
|
|
1161
|
+
artifact_path TEXT,
|
|
1162
|
+
PRIMARY KEY (workflow_id, name),
|
|
1163
|
+
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
|
|
1164
|
+
);
|
|
1165
|
+
|
|
1166
|
+
CREATE TABLE IF NOT EXISTS artifacts (
|
|
1167
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1168
|
+
workflow_id TEXT NOT NULL,
|
|
1169
|
+
phase TEXT NOT NULL,
|
|
1170
|
+
kind TEXT NOT NULL,
|
|
1171
|
+
path TEXT NOT NULL,
|
|
1172
|
+
created_at INTEGER NOT NULL,
|
|
1173
|
+
sha256 TEXT,
|
|
1174
|
+
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
|
|
1175
|
+
);
|
|
1176
|
+
|
|
1177
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
|
|
1178
|
+
|
|
1179
|
+
ALTER TABLE events ADD COLUMN phase TEXT;
|
|
1180
|
+
|
|
1181
|
+
INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
|
|
1182
|
+
SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
|
|
1183
|
+
|
|
1184
|
+
INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
|
|
1185
|
+
SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
|
|
1186
|
+
|
|
1187
|
+
UPDATE events SET phase = 'build' WHERE phase IS NULL;
|
|
1188
|
+
`.trim();
|
|
1145
1189
|
var MIGRATIONS = [
|
|
1146
1190
|
{
|
|
1147
1191
|
version: 1,
|
|
1148
1192
|
description: "initial pilot schema (runs/tasks/events)",
|
|
1149
1193
|
sql: V1_SQL
|
|
1194
|
+
},
|
|
1195
|
+
{
|
|
1196
|
+
version: 2,
|
|
1197
|
+
description: "workflows/phases/artifacts tables + events.phase column",
|
|
1198
|
+
sql: V2_SQL
|
|
1150
1199
|
}
|
|
1151
1200
|
];
|
|
1152
1201
|
function applyMigrations(db) {
|
|
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
|
|
|
1279
1328
|
});
|
|
1280
1329
|
}
|
|
1281
1330
|
db.run(
|
|
1282
|
-
`INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
|
|
1283
|
-
[args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
|
|
1331
|
+
`INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1332
|
+
[args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
|
|
1284
1333
|
);
|
|
1285
1334
|
if (eventSubscribers.length > 0) {
|
|
1286
1335
|
const snapshot = eventSubscribers.slice();
|
|
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
|
|
|
1291
1340
|
taskId: args.taskId ?? null,
|
|
1292
1341
|
kind: args.kind,
|
|
1293
1342
|
payload: args.payload,
|
|
1343
|
+
phase: args.phase ?? null,
|
|
1294
1344
|
ts
|
|
1295
1345
|
});
|
|
1296
1346
|
} catch {
|
|
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
|
|
|
1865
1915
|
return sections.join("\n");
|
|
1866
1916
|
}
|
|
1867
1917
|
|
|
1868
|
-
// src/pilot/
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
|
|
1872
|
-
var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
|
|
1873
|
-
async function runVerify(commands, options) {
|
|
1918
|
+
// src/pilot/gates/composite.ts
|
|
1919
|
+
async function evalAllGate(gate, ctx) {
|
|
1920
|
+
const startedAt = Date.now();
|
|
1874
1921
|
const results = [];
|
|
1875
|
-
for (const
|
|
1876
|
-
const
|
|
1877
|
-
results.push(result);
|
|
1878
|
-
if (!
|
|
1879
|
-
|
|
1922
|
+
for (const sub of gate.gates) {
|
|
1923
|
+
const subResult = await evalGate(sub, ctx);
|
|
1924
|
+
results.push({ gate: sub, result: subResult });
|
|
1925
|
+
if (!subResult.ok) {
|
|
1926
|
+
const evidence2 = {
|
|
1927
|
+
kind: "all",
|
|
1928
|
+
results,
|
|
1929
|
+
failure: subResult
|
|
1930
|
+
};
|
|
1931
|
+
return {
|
|
1932
|
+
ok: false,
|
|
1933
|
+
reason: subResult.reason,
|
|
1934
|
+
evidence: evidence2,
|
|
1935
|
+
durationMs: Date.now() - startedAt
|
|
1936
|
+
};
|
|
1880
1937
|
}
|
|
1881
1938
|
}
|
|
1939
|
+
const evidence = { kind: "all", results };
|
|
1882
1940
|
return {
|
|
1883
1941
|
ok: true,
|
|
1884
|
-
|
|
1942
|
+
evidence,
|
|
1943
|
+
durationMs: Date.now() - startedAt
|
|
1885
1944
|
};
|
|
1886
1945
|
}
|
|
1946
|
+
async function evalAnyGate(gate, ctx) {
|
|
1947
|
+
const startedAt = Date.now();
|
|
1948
|
+
const results = [];
|
|
1949
|
+
if (gate.gates.length === 0) {
|
|
1950
|
+
const evidence2 = { kind: "any", results };
|
|
1951
|
+
return {
|
|
1952
|
+
ok: false,
|
|
1953
|
+
reason: "any-gate has no sub-gates to satisfy",
|
|
1954
|
+
evidence: evidence2,
|
|
1955
|
+
durationMs: Date.now() - startedAt
|
|
1956
|
+
};
|
|
1957
|
+
}
|
|
1958
|
+
let lastResult = null;
|
|
1959
|
+
for (const sub of gate.gates) {
|
|
1960
|
+
const subResult = await evalGate(sub, ctx);
|
|
1961
|
+
results.push({ gate: sub, result: subResult });
|
|
1962
|
+
lastResult = subResult;
|
|
1963
|
+
if (subResult.ok) {
|
|
1964
|
+
const evidence2 = { kind: "any", results };
|
|
1965
|
+
return {
|
|
1966
|
+
ok: true,
|
|
1967
|
+
evidence: evidence2,
|
|
1968
|
+
durationMs: Date.now() - startedAt
|
|
1969
|
+
};
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
const evidence = {
|
|
1973
|
+
kind: "any",
|
|
1974
|
+
results,
|
|
1975
|
+
failure: lastResult ?? void 0
|
|
1976
|
+
};
|
|
1977
|
+
return {
|
|
1978
|
+
ok: false,
|
|
1979
|
+
reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
|
|
1980
|
+
evidence,
|
|
1981
|
+
durationMs: Date.now() - startedAt
|
|
1982
|
+
};
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
// src/pilot/verify/spawn.ts
|
|
1986
|
+
import { spawn as spawn2 } from "child_process";
|
|
1987
|
+
var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
1988
|
+
var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
|
|
1989
|
+
var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
|
|
1887
1990
|
async function runOne(command10, options) {
|
|
1888
1991
|
if (typeof command10 !== "string" || command10.length === 0) {
|
|
1889
1992
|
throw new TypeError(`runOne: command must be a non-empty string`);
|
|
@@ -2020,6 +2123,147 @@ function killTree(child) {
|
|
|
2020
2123
|
}, 2e3).unref();
|
|
2021
2124
|
}
|
|
2022
2125
|
|
|
2126
|
+
// src/pilot/gates/shell.ts
|
|
2127
|
+
async function evalShellGate(gate, ctx) {
|
|
2128
|
+
const result = await runOne(gate.command, {
|
|
2129
|
+
cwd: ctx.cwd,
|
|
2130
|
+
env: ctx.env,
|
|
2131
|
+
abortSignal: ctx.abortSignal,
|
|
2132
|
+
onLine: ctx.onShellLine,
|
|
2133
|
+
timeoutMs: gate.timeoutMs,
|
|
2134
|
+
outputCapBytes: ctx.shellOutputCapBytes
|
|
2135
|
+
});
|
|
2136
|
+
return toGateResult(result);
|
|
2137
|
+
}
|
|
2138
|
+
function toGateResult(result) {
|
|
2139
|
+
if (result.ok) {
|
|
2140
|
+
return {
|
|
2141
|
+
ok: true,
|
|
2142
|
+
durationMs: result.durationMs,
|
|
2143
|
+
evidence: { kind: "shell", result }
|
|
2144
|
+
};
|
|
2145
|
+
}
|
|
2146
|
+
const reason = formatShellFailure(result);
|
|
2147
|
+
return {
|
|
2148
|
+
ok: false,
|
|
2149
|
+
reason,
|
|
2150
|
+
durationMs: result.durationMs,
|
|
2151
|
+
evidence: { kind: "shell", result }
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
function formatShellFailure(result) {
|
|
2155
|
+
const flags = [];
|
|
2156
|
+
if (result.timedOut) flags.push("timed-out");
|
|
2157
|
+
if (result.aborted) flags.push("aborted");
|
|
2158
|
+
if (result.signal) flags.push(`signal=${result.signal}`);
|
|
2159
|
+
const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
|
|
2160
|
+
return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
// src/pilot/gates/eval.ts
|
|
2164
|
+
async function evalGate(gate, ctx) {
|
|
2165
|
+
switch (gate.kind) {
|
|
2166
|
+
case "shell":
|
|
2167
|
+
return evalShellGate(gate, ctx);
|
|
2168
|
+
case "all":
|
|
2169
|
+
return evalAllGate(gate, ctx);
|
|
2170
|
+
case "any":
|
|
2171
|
+
return evalAnyGate(gate, ctx);
|
|
2172
|
+
default: {
|
|
2173
|
+
const _exhaustive = gate;
|
|
2174
|
+
throw new Error(
|
|
2175
|
+
`evalGate: unknown gate kind ${_exhaustive.kind}`
|
|
2176
|
+
);
|
|
2177
|
+
}
|
|
2178
|
+
}
|
|
2179
|
+
}
|
|
2180
|
+
|
|
2181
|
+
// src/pilot/gates/types.ts
|
|
2182
|
+
function asShellEvidence(evidence) {
|
|
2183
|
+
if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
|
|
2184
|
+
return evidence;
|
|
2185
|
+
}
|
|
2186
|
+
return null;
|
|
2187
|
+
}
|
|
2188
|
+
function asCompositeEvidence(evidence) {
|
|
2189
|
+
if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
|
|
2190
|
+
return evidence;
|
|
2191
|
+
}
|
|
2192
|
+
return null;
|
|
2193
|
+
}
|
|
2194
|
+
|
|
2195
|
+
// src/pilot/verify/runner.ts
|
|
2196
|
+
async function runVerify(commands, options) {
|
|
2197
|
+
if (commands.length === 0) {
|
|
2198
|
+
return { ok: true, results: [] };
|
|
2199
|
+
}
|
|
2200
|
+
const gate = {
|
|
2201
|
+
kind: "all",
|
|
2202
|
+
gates: commands.map((command10) => ({
|
|
2203
|
+
kind: "shell",
|
|
2204
|
+
command: command10,
|
|
2205
|
+
timeoutMs: options.timeoutMs
|
|
2206
|
+
}))
|
|
2207
|
+
};
|
|
2208
|
+
const ctx = {
|
|
2209
|
+
cwd: options.cwd,
|
|
2210
|
+
env: options.env,
|
|
2211
|
+
abortSignal: options.abortSignal,
|
|
2212
|
+
onShellLine: options.onLine,
|
|
2213
|
+
shellOutputCapBytes: options.outputCapBytes
|
|
2214
|
+
};
|
|
2215
|
+
const gateResult = await evalGate(gate, ctx);
|
|
2216
|
+
return toRunVerifyResult(gateResult);
|
|
2217
|
+
}
|
|
2218
|
+
function toRunVerifyResult(gateResult) {
|
|
2219
|
+
const composite = asCompositeEvidence(gateResult.evidence);
|
|
2220
|
+
if (composite === null || composite.kind !== "all") {
|
|
2221
|
+
throw new Error(
|
|
2222
|
+
`runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
|
|
2223
|
+
);
|
|
2224
|
+
}
|
|
2225
|
+
const results = composite.results.map((entry) => extractCommandResult(entry));
|
|
2226
|
+
if (gateResult.ok) {
|
|
2227
|
+
return {
|
|
2228
|
+
ok: true,
|
|
2229
|
+
results
|
|
2230
|
+
};
|
|
2231
|
+
}
|
|
2232
|
+
const failingEntry = composite.results[composite.results.length - 1];
|
|
2233
|
+
if (!failingEntry || failingEntry.result.ok) {
|
|
2234
|
+
throw new Error(
|
|
2235
|
+
"runVerify: all-gate failed but no failing sub-result was recorded"
|
|
2236
|
+
);
|
|
2237
|
+
}
|
|
2238
|
+
const failureCommandResult = extractCommandResult(failingEntry);
|
|
2239
|
+
if (failureCommandResult.ok) {
|
|
2240
|
+
throw new Error(
|
|
2241
|
+
"runVerify: failing sub-gate produced a successful CommandResult"
|
|
2242
|
+
);
|
|
2243
|
+
}
|
|
2244
|
+
return {
|
|
2245
|
+
ok: false,
|
|
2246
|
+
results,
|
|
2247
|
+
failure: failureCommandResult
|
|
2248
|
+
};
|
|
2249
|
+
}
|
|
2250
|
+
function extractCommandResult(entry) {
|
|
2251
|
+
const shell = asShellEvidence(entry.result.evidence);
|
|
2252
|
+
if (shell === null) {
|
|
2253
|
+
throw new Error(
|
|
2254
|
+
`runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
|
|
2255
|
+
);
|
|
2256
|
+
}
|
|
2257
|
+
return shell.result;
|
|
2258
|
+
}
|
|
2259
|
+
function gateResultDescriptor(result) {
|
|
2260
|
+
const evidence = result.evidence;
|
|
2261
|
+
return JSON.stringify({
|
|
2262
|
+
ok: result.ok,
|
|
2263
|
+
evidenceKind: evidence?.kind ?? null
|
|
2264
|
+
});
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2023
2267
|
// src/pilot/verify/touches.ts
|
|
2024
2268
|
import picomatch2 from "picomatch";
|
|
2025
2269
|
import { execFile as execFile2 } from "child_process";
|
|
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2530
2774
|
command: f.command,
|
|
2531
2775
|
exitCode: f.exitCode,
|
|
2532
2776
|
output: f.output.slice(0, 4096),
|
|
2533
|
-
reason: reason2
|
|
2777
|
+
reason: reason2,
|
|
2778
|
+
// Step 1 of pilot redesign: gate descriptor on every
|
|
2779
|
+
// verify-derived event. Future LLM/approval gates emit
|
|
2780
|
+
// identically-shaped events with a different `gate.kind`.
|
|
2781
|
+
gate: { kind: "shell", command: f.command }
|
|
2534
2782
|
}
|
|
2535
2783
|
});
|
|
2536
2784
|
return;
|
|
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2539
2787
|
runId: deps.runId,
|
|
2540
2788
|
taskId: task.id,
|
|
2541
2789
|
kind: "task.baseline.passed",
|
|
2542
|
-
payload: {
|
|
2790
|
+
payload: {
|
|
2791
|
+
commands: allVerify.length,
|
|
2792
|
+
gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
|
|
2793
|
+
}
|
|
2543
2794
|
});
|
|
2544
2795
|
}
|
|
2545
2796
|
let lastFailure = null;
|
|
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2695
2946
|
exitCode: lastFailure.exitCode,
|
|
2696
2947
|
timedOut: verifyResult.failure.timedOut,
|
|
2697
2948
|
aborted: verifyResult.failure.aborted,
|
|
2698
|
-
output: verifyResult.failure.output.slice(-2048)
|
|
2949
|
+
output: verifyResult.failure.output.slice(-2048),
|
|
2950
|
+
gate: { kind: "shell", command: lastFailure.command }
|
|
2699
2951
|
}
|
|
2700
2952
|
});
|
|
2701
2953
|
if (verifyResult.failure.aborted) {
|
|
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2721
2973
|
runId: deps.runId,
|
|
2722
2974
|
taskId: task.id,
|
|
2723
2975
|
kind: "task.verify.passed",
|
|
2724
|
-
payload: {
|
|
2976
|
+
payload: {
|
|
2977
|
+
attempt,
|
|
2978
|
+
gate: { kind: "all", subKind: "shell", count: allVerify.length }
|
|
2979
|
+
}
|
|
2725
2980
|
});
|
|
2726
2981
|
const touches = await enforceTouches({
|
|
2727
2982
|
cwd,
|
|
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
|
|
|
3311
3566
|
const taskStart = /* @__PURE__ */ new Map();
|
|
3312
3567
|
let succeeded = 0;
|
|
3313
3568
|
let failed = 0;
|
|
3314
|
-
const INLINE_BLOCKED_CAP =
|
|
3569
|
+
const INLINE_BLOCKED_CAP = 0;
|
|
3315
3570
|
let blockedCount = 0;
|
|
3316
3571
|
let blockedInlineEmitted = 0;
|
|
3317
3572
|
let blockedOverflowEmitted = false;
|
|
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
|
|
|
3350
3605
|
if (id !== null) taskStart.set(id, event.ts);
|
|
3351
3606
|
write(`task.started ${id ?? "?"}`);
|
|
3352
3607
|
break;
|
|
3608
|
+
case "task.baseline.passed":
|
|
3609
|
+
break;
|
|
3610
|
+
case "task.baseline.failed": {
|
|
3611
|
+
const bp = event.payload;
|
|
3612
|
+
if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
|
|
3613
|
+
write(
|
|
3614
|
+
`task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
|
|
3615
|
+
);
|
|
3616
|
+
const output = typeof bp.output === "string" ? bp.output : null;
|
|
3617
|
+
if (output !== null && output.trim().length > 0) {
|
|
3618
|
+
const tail = output.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
|
|
3619
|
+
writeRaw(tail);
|
|
3620
|
+
}
|
|
3621
|
+
} else {
|
|
3622
|
+
write(`task.baseline.failed ${id ?? "?"}`);
|
|
3623
|
+
}
|
|
3624
|
+
break;
|
|
3625
|
+
}
|
|
3353
3626
|
case "task.verify.passed":
|
|
3354
3627
|
write(`task.verify.passed ${id ?? "?"}`);
|
|
3355
3628
|
break;
|
|
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
|
|
|
3435
3708
|
case "task.attempt": {
|
|
3436
3709
|
const p = event.payload;
|
|
3437
3710
|
if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
|
|
3438
|
-
|
|
3711
|
+
write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
|
|
3439
3712
|
}
|
|
3440
3713
|
break;
|
|
3441
3714
|
}
|
|
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
|
|
|
3561
3834
|
session: ${session}
|
|
3562
3835
|
worktree: ${worktree}
|
|
3563
3836
|
elapsed: ${elapsed} attempts: ${t.attempts}
|
|
3564
|
-
|
|
3565
3837
|
`
|
|
3566
3838
|
);
|
|
3839
|
+
const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
|
|
3840
|
+
if (baselineOutput !== null) {
|
|
3841
|
+
const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
|
|
3842
|
+
process.stdout.write(` output:
|
|
3843
|
+
${tail}
|
|
3844
|
+
`);
|
|
3845
|
+
}
|
|
3846
|
+
process.stdout.write(`
|
|
3847
|
+
`);
|
|
3567
3848
|
}
|
|
3568
3849
|
}
|
|
3569
3850
|
}
|
|
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
|
|
|
3592
3873
|
reason: row.last_error ?? "(no reason recorded)"
|
|
3593
3874
|
};
|
|
3594
3875
|
}
|
|
3876
|
+
function resolveBaselineOutput(db, runId, taskId) {
|
|
3877
|
+
const events = readEventsDecoded(db, { runId, taskId });
|
|
3878
|
+
for (let i = events.length - 1; i >= 0; i--) {
|
|
3879
|
+
const e = events[i];
|
|
3880
|
+
if (e.kind !== "task.baseline.failed") continue;
|
|
3881
|
+
const p = e.payload;
|
|
3882
|
+
if (p !== null && typeof p === "object" && typeof p.output === "string") {
|
|
3883
|
+
return p.output;
|
|
3884
|
+
}
|
|
3885
|
+
}
|
|
3886
|
+
return null;
|
|
3887
|
+
}
|
|
3595
3888
|
function truncateSummary(s, maxChars) {
|
|
3596
3889
|
if (s.length <= maxChars) return s;
|
|
3597
3890
|
return s.slice(0, maxChars - 1) + "\u2026";
|