@glrs-dev/harness-plugin-opencode 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/agents/prompts/build.open.md +88 -0
- package/dist/agents/prompts/pilot-builder.open.md +129 -0
- package/dist/agents/prompts/plan.md +7 -0
- package/dist/agents/prompts/prime.md +38 -0
- package/dist/agents/prompts/qa-reviewer.open.md +58 -0
- package/dist/{chunk-WBBN7OVN.js → chunk-BWERBERN.js} +31 -3
- package/dist/{chunk-CZMAJISX.js → chunk-EK7K4NTV.js} +19 -3
- package/dist/cli.js +316 -23
- package/dist/index.js +20 -4
- package/dist/{install-X5KEANRB.js → install-5JKWK6Z4.js} +1 -1
- package/dist/skills/code-quality/SKILL.md +45 -0
- package/dist/skills/code-quality/rules/building.md +125 -0
- package/dist/skills/code-quality/rules/gap-analysis.md +92 -0
- package/dist/skills/code-quality/rules/planning.md +96 -0
- package/dist/skills/code-quality/rules/review.md +104 -0
- package/dist/skills/pilot-planning/rules/self-review.md +1 -1
- package/dist/skills/pilot-planning/rules/verify-design.md +42 -0
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
createAgents,
|
|
4
4
|
validateModelOverride
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-EK7K4NTV.js";
|
|
6
6
|
import {
|
|
7
7
|
getSessionsPath,
|
|
8
8
|
registerSession,
|
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
import {
|
|
12
12
|
install,
|
|
13
13
|
requirePlugin
|
|
14
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-BWERBERN.js";
|
|
15
15
|
import "./chunk-VJUETC6A.js";
|
|
16
16
|
import {
|
|
17
17
|
getPilotDir,
|
|
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
|
|
|
1142
1142
|
CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
|
|
1143
1143
|
CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
|
|
1144
1144
|
`.trim();
|
|
1145
|
+
var V2_SQL = `
|
|
1146
|
+
CREATE TABLE IF NOT EXISTS workflows (
|
|
1147
|
+
id TEXT NOT NULL PRIMARY KEY,
|
|
1148
|
+
goal TEXT NOT NULL,
|
|
1149
|
+
started_at INTEGER NOT NULL,
|
|
1150
|
+
finished_at INTEGER,
|
|
1151
|
+
status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
|
|
1152
|
+
current_phase TEXT
|
|
1153
|
+
);
|
|
1154
|
+
|
|
1155
|
+
CREATE TABLE IF NOT EXISTS phases (
|
|
1156
|
+
workflow_id TEXT NOT NULL,
|
|
1157
|
+
name TEXT NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
|
|
1158
|
+
status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
|
|
1159
|
+
started_at INTEGER,
|
|
1160
|
+
finished_at INTEGER,
|
|
1161
|
+
artifact_path TEXT,
|
|
1162
|
+
PRIMARY KEY (workflow_id, name),
|
|
1163
|
+
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
|
|
1164
|
+
);
|
|
1165
|
+
|
|
1166
|
+
CREATE TABLE IF NOT EXISTS artifacts (
|
|
1167
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1168
|
+
workflow_id TEXT NOT NULL,
|
|
1169
|
+
phase TEXT NOT NULL,
|
|
1170
|
+
kind TEXT NOT NULL,
|
|
1171
|
+
path TEXT NOT NULL,
|
|
1172
|
+
created_at INTEGER NOT NULL,
|
|
1173
|
+
sha256 TEXT,
|
|
1174
|
+
FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
|
|
1175
|
+
);
|
|
1176
|
+
|
|
1177
|
+
CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
|
|
1178
|
+
|
|
1179
|
+
ALTER TABLE events ADD COLUMN phase TEXT;
|
|
1180
|
+
|
|
1181
|
+
INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
|
|
1182
|
+
SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
|
|
1183
|
+
|
|
1184
|
+
INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
|
|
1185
|
+
SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
|
|
1186
|
+
|
|
1187
|
+
UPDATE events SET phase = 'build' WHERE phase IS NULL;
|
|
1188
|
+
`.trim();
|
|
1145
1189
|
var MIGRATIONS = [
|
|
1146
1190
|
{
|
|
1147
1191
|
version: 1,
|
|
1148
1192
|
description: "initial pilot schema (runs/tasks/events)",
|
|
1149
1193
|
sql: V1_SQL
|
|
1194
|
+
},
|
|
1195
|
+
{
|
|
1196
|
+
version: 2,
|
|
1197
|
+
description: "workflows/phases/artifacts tables + events.phase column",
|
|
1198
|
+
sql: V2_SQL
|
|
1150
1199
|
}
|
|
1151
1200
|
];
|
|
1152
1201
|
function applyMigrations(db) {
|
|
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
|
|
|
1279
1328
|
});
|
|
1280
1329
|
}
|
|
1281
1330
|
db.run(
|
|
1282
|
-
`INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
|
|
1283
|
-
[args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
|
|
1331
|
+
`INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
|
|
1332
|
+
[args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
|
|
1284
1333
|
);
|
|
1285
1334
|
if (eventSubscribers.length > 0) {
|
|
1286
1335
|
const snapshot = eventSubscribers.slice();
|
|
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
|
|
|
1291
1340
|
taskId: args.taskId ?? null,
|
|
1292
1341
|
kind: args.kind,
|
|
1293
1342
|
payload: args.payload,
|
|
1343
|
+
phase: args.phase ?? null,
|
|
1294
1344
|
ts
|
|
1295
1345
|
});
|
|
1296
1346
|
} catch {
|
|
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
|
|
|
1865
1915
|
return sections.join("\n");
|
|
1866
1916
|
}
|
|
1867
1917
|
|
|
1868
|
-
// src/pilot/
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
|
|
1872
|
-
var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
|
|
1873
|
-
async function runVerify(commands, options) {
|
|
1918
|
+
// src/pilot/gates/composite.ts
|
|
1919
|
+
async function evalAllGate(gate, ctx) {
|
|
1920
|
+
const startedAt = Date.now();
|
|
1874
1921
|
const results = [];
|
|
1875
|
-
for (const
|
|
1876
|
-
const
|
|
1877
|
-
results.push(result);
|
|
1878
|
-
if (!
|
|
1879
|
-
|
|
1922
|
+
for (const sub of gate.gates) {
|
|
1923
|
+
const subResult = await evalGate(sub, ctx);
|
|
1924
|
+
results.push({ gate: sub, result: subResult });
|
|
1925
|
+
if (!subResult.ok) {
|
|
1926
|
+
const evidence2 = {
|
|
1927
|
+
kind: "all",
|
|
1928
|
+
results,
|
|
1929
|
+
failure: subResult
|
|
1930
|
+
};
|
|
1931
|
+
return {
|
|
1932
|
+
ok: false,
|
|
1933
|
+
reason: subResult.reason,
|
|
1934
|
+
evidence: evidence2,
|
|
1935
|
+
durationMs: Date.now() - startedAt
|
|
1936
|
+
};
|
|
1880
1937
|
}
|
|
1881
1938
|
}
|
|
1939
|
+
const evidence = { kind: "all", results };
|
|
1882
1940
|
return {
|
|
1883
1941
|
ok: true,
|
|
1884
|
-
|
|
1942
|
+
evidence,
|
|
1943
|
+
durationMs: Date.now() - startedAt
|
|
1885
1944
|
};
|
|
1886
1945
|
}
|
|
1946
|
+
async function evalAnyGate(gate, ctx) {
|
|
1947
|
+
const startedAt = Date.now();
|
|
1948
|
+
const results = [];
|
|
1949
|
+
if (gate.gates.length === 0) {
|
|
1950
|
+
const evidence2 = { kind: "any", results };
|
|
1951
|
+
return {
|
|
1952
|
+
ok: false,
|
|
1953
|
+
reason: "any-gate has no sub-gates to satisfy",
|
|
1954
|
+
evidence: evidence2,
|
|
1955
|
+
durationMs: Date.now() - startedAt
|
|
1956
|
+
};
|
|
1957
|
+
}
|
|
1958
|
+
let lastResult = null;
|
|
1959
|
+
for (const sub of gate.gates) {
|
|
1960
|
+
const subResult = await evalGate(sub, ctx);
|
|
1961
|
+
results.push({ gate: sub, result: subResult });
|
|
1962
|
+
lastResult = subResult;
|
|
1963
|
+
if (subResult.ok) {
|
|
1964
|
+
const evidence2 = { kind: "any", results };
|
|
1965
|
+
return {
|
|
1966
|
+
ok: true,
|
|
1967
|
+
evidence: evidence2,
|
|
1968
|
+
durationMs: Date.now() - startedAt
|
|
1969
|
+
};
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
const evidence = {
|
|
1973
|
+
kind: "any",
|
|
1974
|
+
results,
|
|
1975
|
+
failure: lastResult ?? void 0
|
|
1976
|
+
};
|
|
1977
|
+
return {
|
|
1978
|
+
ok: false,
|
|
1979
|
+
reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
|
|
1980
|
+
evidence,
|
|
1981
|
+
durationMs: Date.now() - startedAt
|
|
1982
|
+
};
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
// src/pilot/verify/spawn.ts
|
|
1986
|
+
import { spawn as spawn2 } from "child_process";
|
|
1987
|
+
var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
|
|
1988
|
+
var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
|
|
1989
|
+
var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
|
|
1887
1990
|
async function runOne(command10, options) {
|
|
1888
1991
|
if (typeof command10 !== "string" || command10.length === 0) {
|
|
1889
1992
|
throw new TypeError(`runOne: command must be a non-empty string`);
|
|
@@ -2020,6 +2123,147 @@ function killTree(child) {
|
|
|
2020
2123
|
}, 2e3).unref();
|
|
2021
2124
|
}
|
|
2022
2125
|
|
|
2126
|
+
// src/pilot/gates/shell.ts
|
|
2127
|
+
async function evalShellGate(gate, ctx) {
|
|
2128
|
+
const result = await runOne(gate.command, {
|
|
2129
|
+
cwd: ctx.cwd,
|
|
2130
|
+
env: ctx.env,
|
|
2131
|
+
abortSignal: ctx.abortSignal,
|
|
2132
|
+
onLine: ctx.onShellLine,
|
|
2133
|
+
timeoutMs: gate.timeoutMs,
|
|
2134
|
+
outputCapBytes: ctx.shellOutputCapBytes
|
|
2135
|
+
});
|
|
2136
|
+
return toGateResult(result);
|
|
2137
|
+
}
|
|
2138
|
+
function toGateResult(result) {
|
|
2139
|
+
if (result.ok) {
|
|
2140
|
+
return {
|
|
2141
|
+
ok: true,
|
|
2142
|
+
durationMs: result.durationMs,
|
|
2143
|
+
evidence: { kind: "shell", result }
|
|
2144
|
+
};
|
|
2145
|
+
}
|
|
2146
|
+
const reason = formatShellFailure(result);
|
|
2147
|
+
return {
|
|
2148
|
+
ok: false,
|
|
2149
|
+
reason,
|
|
2150
|
+
durationMs: result.durationMs,
|
|
2151
|
+
evidence: { kind: "shell", result }
|
|
2152
|
+
};
|
|
2153
|
+
}
|
|
2154
|
+
function formatShellFailure(result) {
|
|
2155
|
+
const flags = [];
|
|
2156
|
+
if (result.timedOut) flags.push("timed-out");
|
|
2157
|
+
if (result.aborted) flags.push("aborted");
|
|
2158
|
+
if (result.signal) flags.push(`signal=${result.signal}`);
|
|
2159
|
+
const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
|
|
2160
|
+
return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
// src/pilot/gates/eval.ts
|
|
2164
|
+
async function evalGate(gate, ctx) {
|
|
2165
|
+
switch (gate.kind) {
|
|
2166
|
+
case "shell":
|
|
2167
|
+
return evalShellGate(gate, ctx);
|
|
2168
|
+
case "all":
|
|
2169
|
+
return evalAllGate(gate, ctx);
|
|
2170
|
+
case "any":
|
|
2171
|
+
return evalAnyGate(gate, ctx);
|
|
2172
|
+
default: {
|
|
2173
|
+
const _exhaustive = gate;
|
|
2174
|
+
throw new Error(
|
|
2175
|
+
`evalGate: unknown gate kind ${_exhaustive.kind}`
|
|
2176
|
+
);
|
|
2177
|
+
}
|
|
2178
|
+
}
|
|
2179
|
+
}
|
|
2180
|
+
|
|
2181
|
+
// src/pilot/gates/types.ts
|
|
2182
|
+
function asShellEvidence(evidence) {
|
|
2183
|
+
if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
|
|
2184
|
+
return evidence;
|
|
2185
|
+
}
|
|
2186
|
+
return null;
|
|
2187
|
+
}
|
|
2188
|
+
function asCompositeEvidence(evidence) {
|
|
2189
|
+
if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
|
|
2190
|
+
return evidence;
|
|
2191
|
+
}
|
|
2192
|
+
return null;
|
|
2193
|
+
}
|
|
2194
|
+
|
|
2195
|
+
// src/pilot/verify/runner.ts
|
|
2196
|
+
async function runVerify(commands, options) {
|
|
2197
|
+
if (commands.length === 0) {
|
|
2198
|
+
return { ok: true, results: [] };
|
|
2199
|
+
}
|
|
2200
|
+
const gate = {
|
|
2201
|
+
kind: "all",
|
|
2202
|
+
gates: commands.map((command10) => ({
|
|
2203
|
+
kind: "shell",
|
|
2204
|
+
command: command10,
|
|
2205
|
+
timeoutMs: options.timeoutMs
|
|
2206
|
+
}))
|
|
2207
|
+
};
|
|
2208
|
+
const ctx = {
|
|
2209
|
+
cwd: options.cwd,
|
|
2210
|
+
env: options.env,
|
|
2211
|
+
abortSignal: options.abortSignal,
|
|
2212
|
+
onShellLine: options.onLine,
|
|
2213
|
+
shellOutputCapBytes: options.outputCapBytes
|
|
2214
|
+
};
|
|
2215
|
+
const gateResult = await evalGate(gate, ctx);
|
|
2216
|
+
return toRunVerifyResult(gateResult);
|
|
2217
|
+
}
|
|
2218
|
+
function toRunVerifyResult(gateResult) {
|
|
2219
|
+
const composite = asCompositeEvidence(gateResult.evidence);
|
|
2220
|
+
if (composite === null || composite.kind !== "all") {
|
|
2221
|
+
throw new Error(
|
|
2222
|
+
`runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
|
|
2223
|
+
);
|
|
2224
|
+
}
|
|
2225
|
+
const results = composite.results.map((entry) => extractCommandResult(entry));
|
|
2226
|
+
if (gateResult.ok) {
|
|
2227
|
+
return {
|
|
2228
|
+
ok: true,
|
|
2229
|
+
results
|
|
2230
|
+
};
|
|
2231
|
+
}
|
|
2232
|
+
const failingEntry = composite.results[composite.results.length - 1];
|
|
2233
|
+
if (!failingEntry || failingEntry.result.ok) {
|
|
2234
|
+
throw new Error(
|
|
2235
|
+
"runVerify: all-gate failed but no failing sub-result was recorded"
|
|
2236
|
+
);
|
|
2237
|
+
}
|
|
2238
|
+
const failureCommandResult = extractCommandResult(failingEntry);
|
|
2239
|
+
if (failureCommandResult.ok) {
|
|
2240
|
+
throw new Error(
|
|
2241
|
+
"runVerify: failing sub-gate produced a successful CommandResult"
|
|
2242
|
+
);
|
|
2243
|
+
}
|
|
2244
|
+
return {
|
|
2245
|
+
ok: false,
|
|
2246
|
+
results,
|
|
2247
|
+
failure: failureCommandResult
|
|
2248
|
+
};
|
|
2249
|
+
}
|
|
2250
|
+
function extractCommandResult(entry) {
|
|
2251
|
+
const shell = asShellEvidence(entry.result.evidence);
|
|
2252
|
+
if (shell === null) {
|
|
2253
|
+
throw new Error(
|
|
2254
|
+
`runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
|
|
2255
|
+
);
|
|
2256
|
+
}
|
|
2257
|
+
return shell.result;
|
|
2258
|
+
}
|
|
2259
|
+
function gateResultDescriptor(result) {
|
|
2260
|
+
const evidence = result.evidence;
|
|
2261
|
+
return JSON.stringify({
|
|
2262
|
+
ok: result.ok,
|
|
2263
|
+
evidenceKind: evidence?.kind ?? null
|
|
2264
|
+
});
|
|
2265
|
+
}
|
|
2266
|
+
|
|
2023
2267
|
// src/pilot/verify/touches.ts
|
|
2024
2268
|
import picomatch2 from "picomatch";
|
|
2025
2269
|
import { execFile as execFile2 } from "child_process";
|
|
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2530
2774
|
command: f.command,
|
|
2531
2775
|
exitCode: f.exitCode,
|
|
2532
2776
|
output: f.output.slice(0, 4096),
|
|
2533
|
-
reason: reason2
|
|
2777
|
+
reason: reason2,
|
|
2778
|
+
// Step 1 of pilot redesign: gate descriptor on every
|
|
2779
|
+
// verify-derived event. Future LLM/approval gates emit
|
|
2780
|
+
// identically-shaped events with a different `gate.kind`.
|
|
2781
|
+
gate: { kind: "shell", command: f.command }
|
|
2534
2782
|
}
|
|
2535
2783
|
});
|
|
2536
2784
|
return;
|
|
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2539
2787
|
runId: deps.runId,
|
|
2540
2788
|
taskId: task.id,
|
|
2541
2789
|
kind: "task.baseline.passed",
|
|
2542
|
-
payload: {
|
|
2790
|
+
payload: {
|
|
2791
|
+
commands: allVerify.length,
|
|
2792
|
+
gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
|
|
2793
|
+
}
|
|
2543
2794
|
});
|
|
2544
2795
|
}
|
|
2545
2796
|
let lastFailure = null;
|
|
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2695
2946
|
exitCode: lastFailure.exitCode,
|
|
2696
2947
|
timedOut: verifyResult.failure.timedOut,
|
|
2697
2948
|
aborted: verifyResult.failure.aborted,
|
|
2698
|
-
output: verifyResult.failure.output.slice(-2048)
|
|
2949
|
+
output: verifyResult.failure.output.slice(-2048),
|
|
2950
|
+
gate: { kind: "shell", command: lastFailure.command }
|
|
2699
2951
|
}
|
|
2700
2952
|
});
|
|
2701
2953
|
if (verifyResult.failure.aborted) {
|
|
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
|
|
|
2721
2973
|
runId: deps.runId,
|
|
2722
2974
|
taskId: task.id,
|
|
2723
2975
|
kind: "task.verify.passed",
|
|
2724
|
-
payload: {
|
|
2976
|
+
payload: {
|
|
2977
|
+
attempt,
|
|
2978
|
+
gate: { kind: "all", subKind: "shell", count: allVerify.length }
|
|
2979
|
+
}
|
|
2725
2980
|
});
|
|
2726
2981
|
const touches = await enforceTouches({
|
|
2727
2982
|
cwd,
|
|
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
|
|
|
3311
3566
|
const taskStart = /* @__PURE__ */ new Map();
|
|
3312
3567
|
let succeeded = 0;
|
|
3313
3568
|
let failed = 0;
|
|
3314
|
-
const INLINE_BLOCKED_CAP =
|
|
3569
|
+
const INLINE_BLOCKED_CAP = 0;
|
|
3315
3570
|
let blockedCount = 0;
|
|
3316
3571
|
let blockedInlineEmitted = 0;
|
|
3317
3572
|
let blockedOverflowEmitted = false;
|
|
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
|
|
|
3350
3605
|
if (id !== null) taskStart.set(id, event.ts);
|
|
3351
3606
|
write(`task.started ${id ?? "?"}`);
|
|
3352
3607
|
break;
|
|
3608
|
+
case "task.baseline.passed":
|
|
3609
|
+
break;
|
|
3610
|
+
case "task.baseline.failed": {
|
|
3611
|
+
const bp = event.payload;
|
|
3612
|
+
if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
|
|
3613
|
+
write(
|
|
3614
|
+
`task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
|
|
3615
|
+
);
|
|
3616
|
+
const output = typeof bp.output === "string" ? bp.output : null;
|
|
3617
|
+
if (output !== null && output.trim().length > 0) {
|
|
3618
|
+
const tail = output.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
|
|
3619
|
+
writeRaw(tail);
|
|
3620
|
+
}
|
|
3621
|
+
} else {
|
|
3622
|
+
write(`task.baseline.failed ${id ?? "?"}`);
|
|
3623
|
+
}
|
|
3624
|
+
break;
|
|
3625
|
+
}
|
|
3353
3626
|
case "task.verify.passed":
|
|
3354
3627
|
write(`task.verify.passed ${id ?? "?"}`);
|
|
3355
3628
|
break;
|
|
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
|
|
|
3435
3708
|
case "task.attempt": {
|
|
3436
3709
|
const p = event.payload;
|
|
3437
3710
|
if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
|
|
3438
|
-
|
|
3711
|
+
write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
|
|
3439
3712
|
}
|
|
3440
3713
|
break;
|
|
3441
3714
|
}
|
|
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
|
|
|
3561
3834
|
session: ${session}
|
|
3562
3835
|
worktree: ${worktree}
|
|
3563
3836
|
elapsed: ${elapsed} attempts: ${t.attempts}
|
|
3564
|
-
|
|
3565
3837
|
`
|
|
3566
3838
|
);
|
|
3839
|
+
const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
|
|
3840
|
+
if (baselineOutput !== null) {
|
|
3841
|
+
const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
|
|
3842
|
+
process.stdout.write(` output:
|
|
3843
|
+
${tail}
|
|
3844
|
+
`);
|
|
3845
|
+
}
|
|
3846
|
+
process.stdout.write(`
|
|
3847
|
+
`);
|
|
3567
3848
|
}
|
|
3568
3849
|
}
|
|
3569
3850
|
}
|
|
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
|
|
|
3592
3873
|
reason: row.last_error ?? "(no reason recorded)"
|
|
3593
3874
|
};
|
|
3594
3875
|
}
|
|
3876
|
+
function resolveBaselineOutput(db, runId, taskId) {
|
|
3877
|
+
const events = readEventsDecoded(db, { runId, taskId });
|
|
3878
|
+
for (let i = events.length - 1; i >= 0; i--) {
|
|
3879
|
+
const e = events[i];
|
|
3880
|
+
if (e.kind !== "task.baseline.failed") continue;
|
|
3881
|
+
const p = e.payload;
|
|
3882
|
+
if (p !== null && typeof p === "object" && typeof p.output === "string") {
|
|
3883
|
+
return p.output;
|
|
3884
|
+
}
|
|
3885
|
+
}
|
|
3886
|
+
return null;
|
|
3887
|
+
}
|
|
3595
3888
|
function truncateSummary(s, maxChars) {
|
|
3596
3889
|
if (s.length <= maxChars) return s;
|
|
3597
3890
|
return s.slice(0, maxChars - 1) + "\u2026";
|
package/dist/index.js
CHANGED
|
@@ -2,8 +2,9 @@ import {
|
|
|
2
2
|
AGENT_TIERS,
|
|
3
3
|
createAgents,
|
|
4
4
|
formatModelOverrideWarning,
|
|
5
|
+
getStrictPrompt,
|
|
5
6
|
validateModelOverride
|
|
6
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-EK7K4NTV.js";
|
|
7
8
|
import {
|
|
8
9
|
PACKAGE_NAME,
|
|
9
10
|
readOurPackageVersion,
|
|
@@ -163,6 +164,7 @@ function writePermDebugSnapshot(config) {
|
|
|
163
164
|
function resolveHarnessModels(agents, config, pluginOptions) {
|
|
164
165
|
const modelsConfig = pluginOptions?.models ?? config.harness?.models;
|
|
165
166
|
if (!modelsConfig) return agents;
|
|
167
|
+
const midExecuteConfigured = modelsConfig["mid-execute"] !== void 0;
|
|
166
168
|
const warnedIds = /* @__PURE__ */ new Set();
|
|
167
169
|
const warnIfInvalid = (value, source) => {
|
|
168
170
|
const result = validateModelOverride(value);
|
|
@@ -181,11 +183,25 @@ function resolveHarnessModels(agents, config, pluginOptions) {
|
|
|
181
183
|
}
|
|
182
184
|
const tier = AGENT_TIERS[agentName];
|
|
183
185
|
if (tier) {
|
|
184
|
-
|
|
186
|
+
let perTier = modelsConfig[tier];
|
|
187
|
+
if (tier === "mid-execute" && perTier === void 0) {
|
|
188
|
+
perTier = modelsConfig["mid"];
|
|
189
|
+
}
|
|
185
190
|
if (perTier !== void 0) {
|
|
186
191
|
const picked = Array.isArray(perTier) ? perTier[0] : perTier;
|
|
187
192
|
agentCfg.model = picked;
|
|
188
|
-
warnIfInvalid(picked, `models.${tier}`);
|
|
193
|
+
warnIfInvalid(picked, `models.${tier === "mid-execute" && !midExecuteConfigured ? "mid (fallback)" : tier}`);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
if (midExecuteConfigured) {
|
|
198
|
+
const EXECUTOR_AGENTS = ["build", "qa-reviewer", "pilot-builder"];
|
|
199
|
+
for (const agentName of EXECUTOR_AGENTS) {
|
|
200
|
+
const agentCfg = agents[agentName];
|
|
201
|
+
if (!agentCfg) continue;
|
|
202
|
+
try {
|
|
203
|
+
agentCfg.prompt = getStrictPrompt(agentName);
|
|
204
|
+
} catch {
|
|
189
205
|
}
|
|
190
206
|
}
|
|
191
207
|
}
|
|
@@ -1850,7 +1866,7 @@ import { join as join8 } from "path";
|
|
|
1850
1866
|
var APP_KEY = "A-US-3617699429";
|
|
1851
1867
|
var ENDPOINT = "https://us.aptabase.com/api/v0/event";
|
|
1852
1868
|
var PKG_NAME = "@glrs-dev/harness-plugin-opencode";
|
|
1853
|
-
var PKG_VERSION = true ? "1.0
|
|
1869
|
+
var PKG_VERSION = true ? "1.2.0" : "dev";
|
|
1854
1870
|
var DISABLED = process.env.HARNESS_OPENCODE_TELEMETRY === "0" || process.env.HARNESS_OPENCODE_TELEMETRY === "false" || process.env.DO_NOT_TRACK === "1" || process.env.CI === "true";
|
|
1855
1871
|
var SESSION_ID = randomUUID();
|
|
1856
1872
|
function getInstallId() {
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: code-quality
|
|
3
|
+
description: Four principles for autonomous code quality — think before coding, simplicity first, surgical changes, goal-driven execution. Load this skill when planning, building, or reviewing any non-trivial change. Derived from observed patterns in AI-agent-authored PRs where review feedback clustered around wrong assumptions, overcomplication, scope creep, and missing failure-mode coverage.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Code Quality Principles
|
|
7
|
+
|
|
8
|
+
Four principles that prevent the most common classes of defects in AI-agent-authored code. Each principle applies at every pipeline phase, but the enforcement actions differ by phase. Load the rule file for your current role.
|
|
9
|
+
|
|
10
|
+
These principles are derived from empirical analysis of recurring review feedback on agent-authored PRs. The top defect categories — wrong assumptions at system boundaries, overcomplicated implementations, unplanned side-effects, and happy-path-only coverage — are all preventable by applying the right check at the right phase.
|
|
11
|
+
|
|
12
|
+
## The four principles
|
|
13
|
+
|
|
14
|
+
1. **Think Before Coding** — Don't assume. Surface ambiguity, verify cross-boundary names, present tradeoffs, stop when confused.
|
|
15
|
+
2. **Simplicity First** — Minimum code that solves the problem. No speculative features, no single-use abstractions, no "flexibility" that wasn't requested.
|
|
16
|
+
3. **Surgical Changes** — Touch only what you must. Every changed line traces to the plan. Minimize blast radius on security-sensitive files.
|
|
17
|
+
4. **Goal-Driven Execution** — Define success criteria with real verify commands. Enumerate failure modes. Test the error paths, not just the happy path.
|
|
18
|
+
|
|
19
|
+
## Phase-specific rules
|
|
20
|
+
|
|
21
|
+
Each rule file applies all four principles through the lens of a specific pipeline phase. Load the one that matches your current role:
|
|
22
|
+
|
|
23
|
+
1. [`rules/gap-analysis.md`](rules/gap-analysis.md) — For `@gap-analyzer`. Surface hidden assumptions, missing failure modes, naming mismatches, and overscoped plans before the draft is written.
|
|
24
|
+
|
|
25
|
+
2. [`rules/planning.md`](rules/planning.md) — For `@plan` and `@plan-reviewer`. Verify every cross-boundary identifier. Reject plans that exceed what the goal requires. Require failure-mode coverage in acceptance criteria.
|
|
26
|
+
|
|
27
|
+
3. [`rules/building.md`](rules/building.md) — For `@build`. Enforce surgical changes. Verify names before using them. Flag unplanned edits. Write failure-path tests before happy-path code.
|
|
28
|
+
|
|
29
|
+
4. [`rules/review.md`](rules/review.md) — For `@qa-reviewer` and `@qa-thorough`. Verify failure-path coverage in the diff. Grep-confirm cross-boundary string literals. Reject diffs with unplanned scope.
|
|
30
|
+
|
|
31
|
+
## When to load this skill
|
|
32
|
+
|
|
33
|
+
Any non-trivial change — defined as any plan with 3+ file-level changes, or any change touching a system boundary (API contract, database schema, config/security file, cross-service integration).
|
|
34
|
+
|
|
35
|
+
Do NOT load for trivial work (typo fixes, single-file renames, doc-only changes). The overhead isn't worth it.
|
|
36
|
+
|
|
37
|
+
## Observable outcomes
|
|
38
|
+
|
|
39
|
+
These are the signals that the principles are working:
|
|
40
|
+
|
|
41
|
+
- Fewer naming mismatches at system boundaries (cross-boundary identifiers are grep-confirmed before use)
|
|
42
|
+
- Smaller, more focused PRs (plans that exceed ~15 files get split or justified)
|
|
43
|
+
- Zero unplanned changes in diffs (every changed line traces to the plan)
|
|
44
|
+
- Failure-mode coverage in acceptance criteria (negative tests exist for medium+ risk changes)
|
|
45
|
+
- Narrower security-config changes (specific paths instead of broad globs)
|