@glrs-dev/harness-plugin-opencode 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  createAgents,
4
4
  validateModelOverride
5
- } from "./chunk-CZMAJISX.js";
5
+ } from "./chunk-EK7K4NTV.js";
6
6
  import {
7
7
  getSessionsPath,
8
8
  registerSession,
@@ -11,7 +11,7 @@ import {
11
11
  import {
12
12
  install,
13
13
  requirePlugin
14
- } from "./chunk-WBBN7OVN.js";
14
+ } from "./chunk-BWERBERN.js";
15
15
  import "./chunk-VJUETC6A.js";
16
16
  import {
17
17
  getPilotDir,
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
1142
1142
  CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
1143
1143
  CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
1144
1144
  `.trim();
1145
+ var V2_SQL = `
1146
+ CREATE TABLE IF NOT EXISTS workflows (
1147
+ id TEXT NOT NULL PRIMARY KEY,
1148
+ goal TEXT NOT NULL,
1149
+ started_at INTEGER NOT NULL,
1150
+ finished_at INTEGER,
1151
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
1152
+ current_phase TEXT
1153
+ );
1154
+
1155
+ CREATE TABLE IF NOT EXISTS phases (
1156
+ workflow_id TEXT NOT NULL,
1157
+ name TEXT NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
1158
+ status TEXT NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
1159
+ started_at INTEGER,
1160
+ finished_at INTEGER,
1161
+ artifact_path TEXT,
1162
+ PRIMARY KEY (workflow_id, name),
1163
+ FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
1164
+ );
1165
+
1166
+ CREATE TABLE IF NOT EXISTS artifacts (
1167
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
1168
+ workflow_id TEXT NOT NULL,
1169
+ phase TEXT NOT NULL,
1170
+ kind TEXT NOT NULL,
1171
+ path TEXT NOT NULL,
1172
+ created_at INTEGER NOT NULL,
1173
+ sha256 TEXT,
1174
+ FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
1175
+ );
1176
+
1177
+ CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
1178
+
1179
+ ALTER TABLE events ADD COLUMN phase TEXT;
1180
+
1181
+ INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
1182
+ SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
1183
+
1184
+ INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
1185
+ SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
1186
+
1187
+ UPDATE events SET phase = 'build' WHERE phase IS NULL;
1188
+ `.trim();
1145
1189
  var MIGRATIONS = [
1146
1190
  {
1147
1191
  version: 1,
1148
1192
  description: "initial pilot schema (runs/tasks/events)",
1149
1193
  sql: V1_SQL
1194
+ },
1195
+ {
1196
+ version: 2,
1197
+ description: "workflows/phases/artifacts tables + events.phase column",
1198
+ sql: V2_SQL
1150
1199
  }
1151
1200
  ];
1152
1201
  function applyMigrations(db) {
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
1279
1328
  });
1280
1329
  }
1281
1330
  db.run(
1282
- `INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
1283
- [args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
1331
+ `INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
1332
+ [args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
1284
1333
  );
1285
1334
  if (eventSubscribers.length > 0) {
1286
1335
  const snapshot = eventSubscribers.slice();
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
1291
1340
  taskId: args.taskId ?? null,
1292
1341
  kind: args.kind,
1293
1342
  payload: args.payload,
1343
+ phase: args.phase ?? null,
1294
1344
  ts
1295
1345
  });
1296
1346
  } catch {
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
1865
1915
  return sections.join("\n");
1866
1916
  }
1867
1917
 
1868
- // src/pilot/verify/runner.ts
1869
- import { spawn as spawn2 } from "child_process";
1870
- var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
1871
- var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
1872
- var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
1873
- async function runVerify(commands, options) {
1918
+ // src/pilot/gates/composite.ts
1919
+ async function evalAllGate(gate, ctx) {
1920
+ const startedAt = Date.now();
1874
1921
  const results = [];
1875
- for (const command10 of commands) {
1876
- const result = await runOne(command10, options);
1877
- results.push(result);
1878
- if (!result.ok) {
1879
- return { ok: false, results, failure: result };
1922
+ for (const sub of gate.gates) {
1923
+ const subResult = await evalGate(sub, ctx);
1924
+ results.push({ gate: sub, result: subResult });
1925
+ if (!subResult.ok) {
1926
+ const evidence2 = {
1927
+ kind: "all",
1928
+ results,
1929
+ failure: subResult
1930
+ };
1931
+ return {
1932
+ ok: false,
1933
+ reason: subResult.reason,
1934
+ evidence: evidence2,
1935
+ durationMs: Date.now() - startedAt
1936
+ };
1880
1937
  }
1881
1938
  }
1939
+ const evidence = { kind: "all", results };
1882
1940
  return {
1883
1941
  ok: true,
1884
- results
1942
+ evidence,
1943
+ durationMs: Date.now() - startedAt
1885
1944
  };
1886
1945
  }
1946
+ async function evalAnyGate(gate, ctx) {
1947
+ const startedAt = Date.now();
1948
+ const results = [];
1949
+ if (gate.gates.length === 0) {
1950
+ const evidence2 = { kind: "any", results };
1951
+ return {
1952
+ ok: false,
1953
+ reason: "any-gate has no sub-gates to satisfy",
1954
+ evidence: evidence2,
1955
+ durationMs: Date.now() - startedAt
1956
+ };
1957
+ }
1958
+ let lastResult = null;
1959
+ for (const sub of gate.gates) {
1960
+ const subResult = await evalGate(sub, ctx);
1961
+ results.push({ gate: sub, result: subResult });
1962
+ lastResult = subResult;
1963
+ if (subResult.ok) {
1964
+ const evidence2 = { kind: "any", results };
1965
+ return {
1966
+ ok: true,
1967
+ evidence: evidence2,
1968
+ durationMs: Date.now() - startedAt
1969
+ };
1970
+ }
1971
+ }
1972
+ const evidence = {
1973
+ kind: "any",
1974
+ results,
1975
+ failure: lastResult ?? void 0
1976
+ };
1977
+ return {
1978
+ ok: false,
1979
+ reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
1980
+ evidence,
1981
+ durationMs: Date.now() - startedAt
1982
+ };
1983
+ }
1984
+
1985
+ // src/pilot/verify/spawn.ts
1986
+ import { spawn as spawn2 } from "child_process";
1987
+ var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
1988
+ var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
1989
+ var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
1887
1990
  async function runOne(command10, options) {
1888
1991
  if (typeof command10 !== "string" || command10.length === 0) {
1889
1992
  throw new TypeError(`runOne: command must be a non-empty string`);
@@ -2020,6 +2123,147 @@ function killTree(child) {
2020
2123
  }, 2e3).unref();
2021
2124
  }
2022
2125
 
2126
+ // src/pilot/gates/shell.ts
2127
+ async function evalShellGate(gate, ctx) {
2128
+ const result = await runOne(gate.command, {
2129
+ cwd: ctx.cwd,
2130
+ env: ctx.env,
2131
+ abortSignal: ctx.abortSignal,
2132
+ onLine: ctx.onShellLine,
2133
+ timeoutMs: gate.timeoutMs,
2134
+ outputCapBytes: ctx.shellOutputCapBytes
2135
+ });
2136
+ return toGateResult(result);
2137
+ }
2138
+ function toGateResult(result) {
2139
+ if (result.ok) {
2140
+ return {
2141
+ ok: true,
2142
+ durationMs: result.durationMs,
2143
+ evidence: { kind: "shell", result }
2144
+ };
2145
+ }
2146
+ const reason = formatShellFailure(result);
2147
+ return {
2148
+ ok: false,
2149
+ reason,
2150
+ durationMs: result.durationMs,
2151
+ evidence: { kind: "shell", result }
2152
+ };
2153
+ }
2154
+ function formatShellFailure(result) {
2155
+ const flags = [];
2156
+ if (result.timedOut) flags.push("timed-out");
2157
+ if (result.aborted) flags.push("aborted");
2158
+ if (result.signal) flags.push(`signal=${result.signal}`);
2159
+ const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
2160
+ return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
2161
+ }
2162
+
2163
+ // src/pilot/gates/eval.ts
2164
+ async function evalGate(gate, ctx) {
2165
+ switch (gate.kind) {
2166
+ case "shell":
2167
+ return evalShellGate(gate, ctx);
2168
+ case "all":
2169
+ return evalAllGate(gate, ctx);
2170
+ case "any":
2171
+ return evalAnyGate(gate, ctx);
2172
+ default: {
2173
+ const _exhaustive = gate;
2174
+ throw new Error(
2175
+ `evalGate: unknown gate kind ${_exhaustive.kind}`
2176
+ );
2177
+ }
2178
+ }
2179
+ }
2180
+
2181
+ // src/pilot/gates/types.ts
2182
+ function asShellEvidence(evidence) {
2183
+ if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
2184
+ return evidence;
2185
+ }
2186
+ return null;
2187
+ }
2188
+ function asCompositeEvidence(evidence) {
2189
+ if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
2190
+ return evidence;
2191
+ }
2192
+ return null;
2193
+ }
2194
+
2195
+ // src/pilot/verify/runner.ts
2196
+ async function runVerify(commands, options) {
2197
+ if (commands.length === 0) {
2198
+ return { ok: true, results: [] };
2199
+ }
2200
+ const gate = {
2201
+ kind: "all",
2202
+ gates: commands.map((command10) => ({
2203
+ kind: "shell",
2204
+ command: command10,
2205
+ timeoutMs: options.timeoutMs
2206
+ }))
2207
+ };
2208
+ const ctx = {
2209
+ cwd: options.cwd,
2210
+ env: options.env,
2211
+ abortSignal: options.abortSignal,
2212
+ onShellLine: options.onLine,
2213
+ shellOutputCapBytes: options.outputCapBytes
2214
+ };
2215
+ const gateResult = await evalGate(gate, ctx);
2216
+ return toRunVerifyResult(gateResult);
2217
+ }
2218
+ function toRunVerifyResult(gateResult) {
2219
+ const composite = asCompositeEvidence(gateResult.evidence);
2220
+ if (composite === null || composite.kind !== "all") {
2221
+ throw new Error(
2222
+ `runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
2223
+ );
2224
+ }
2225
+ const results = composite.results.map((entry) => extractCommandResult(entry));
2226
+ if (gateResult.ok) {
2227
+ return {
2228
+ ok: true,
2229
+ results
2230
+ };
2231
+ }
2232
+ const failingEntry = composite.results[composite.results.length - 1];
2233
+ if (!failingEntry || failingEntry.result.ok) {
2234
+ throw new Error(
2235
+ "runVerify: all-gate failed but no failing sub-result was recorded"
2236
+ );
2237
+ }
2238
+ const failureCommandResult = extractCommandResult(failingEntry);
2239
+ if (failureCommandResult.ok) {
2240
+ throw new Error(
2241
+ "runVerify: failing sub-gate produced a successful CommandResult"
2242
+ );
2243
+ }
2244
+ return {
2245
+ ok: false,
2246
+ results,
2247
+ failure: failureCommandResult
2248
+ };
2249
+ }
2250
+ function extractCommandResult(entry) {
2251
+ const shell = asShellEvidence(entry.result.evidence);
2252
+ if (shell === null) {
2253
+ throw new Error(
2254
+ `runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
2255
+ );
2256
+ }
2257
+ return shell.result;
2258
+ }
2259
+ function gateResultDescriptor(result) {
2260
+ const evidence = result.evidence;
2261
+ return JSON.stringify({
2262
+ ok: result.ok,
2263
+ evidenceKind: evidence?.kind ?? null
2264
+ });
2265
+ }
2266
+
2023
2267
  // src/pilot/verify/touches.ts
2024
2268
  import picomatch2 from "picomatch";
2025
2269
  import { execFile as execFile2 } from "child_process";
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
2530
2774
  command: f.command,
2531
2775
  exitCode: f.exitCode,
2532
2776
  output: f.output.slice(0, 4096),
2533
- reason: reason2
2777
+ reason: reason2,
2778
+ // Step 1 of pilot redesign: gate descriptor on every
2779
+ // verify-derived event. Future LLM/approval gates emit
2780
+ // identically-shaped events with a different `gate.kind`.
2781
+ gate: { kind: "shell", command: f.command }
2534
2782
  }
2535
2783
  });
2536
2784
  return;
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
2539
2787
  runId: deps.runId,
2540
2788
  taskId: task.id,
2541
2789
  kind: "task.baseline.passed",
2542
- payload: { commands: allVerify.length }
2790
+ payload: {
2791
+ commands: allVerify.length,
2792
+ gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
2793
+ }
2543
2794
  });
2544
2795
  }
2545
2796
  let lastFailure = null;
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
2695
2946
  exitCode: lastFailure.exitCode,
2696
2947
  timedOut: verifyResult.failure.timedOut,
2697
2948
  aborted: verifyResult.failure.aborted,
2698
- output: verifyResult.failure.output.slice(-2048)
2949
+ output: verifyResult.failure.output.slice(-2048),
2950
+ gate: { kind: "shell", command: lastFailure.command }
2699
2951
  }
2700
2952
  });
2701
2953
  if (verifyResult.failure.aborted) {
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
2721
2973
  runId: deps.runId,
2722
2974
  taskId: task.id,
2723
2975
  kind: "task.verify.passed",
2724
- payload: { attempt }
2976
+ payload: {
2977
+ attempt,
2978
+ gate: { kind: "all", subKind: "shell", count: allVerify.length }
2979
+ }
2725
2980
  });
2726
2981
  const touches = await enforceTouches({
2727
2982
  cwd,
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
3311
3566
  const taskStart = /* @__PURE__ */ new Map();
3312
3567
  let succeeded = 0;
3313
3568
  let failed = 0;
3314
- const INLINE_BLOCKED_CAP = 5;
3569
+ const INLINE_BLOCKED_CAP = 0;
3315
3570
  let blockedCount = 0;
3316
3571
  let blockedInlineEmitted = 0;
3317
3572
  let blockedOverflowEmitted = false;
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
3350
3605
  if (id !== null) taskStart.set(id, event.ts);
3351
3606
  write(`task.started ${id ?? "?"}`);
3352
3607
  break;
3608
+ case "task.baseline.passed":
3609
+ break;
3610
+ case "task.baseline.failed": {
3611
+ const bp = event.payload;
3612
+ if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
3613
+ write(
3614
+ `task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
3615
+ );
3616
+ const output = typeof bp.output === "string" ? bp.output : null;
3617
+ if (output !== null && output.trim().length > 0) {
3618
+ const tail = output.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
3619
+ writeRaw(tail);
3620
+ }
3621
+ } else {
3622
+ write(`task.baseline.failed ${id ?? "?"}`);
3623
+ }
3624
+ break;
3625
+ }
3353
3626
  case "task.verify.passed":
3354
3627
  write(`task.verify.passed ${id ?? "?"}`);
3355
3628
  break;
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
3435
3708
  case "task.attempt": {
3436
3709
  const p = event.payload;
3437
3710
  if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
3438
- writeRaw(` attempt ${p.attempt}/${p.of} (retry with fix prompt)`);
3711
+ write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
3439
3712
  }
3440
3713
  break;
3441
3714
  }
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
3561
3834
  session: ${session}
3562
3835
  worktree: ${worktree}
3563
3836
  elapsed: ${elapsed} attempts: ${t.attempts}
3564
-
3565
3837
  `
3566
3838
  );
3839
+ const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
3840
+ if (baselineOutput !== null) {
3841
+ const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => ` ${l}`).join("\n");
3842
+ process.stdout.write(` output:
3843
+ ${tail}
3844
+ `);
3845
+ }
3846
+ process.stdout.write(`
3847
+ `);
3567
3848
  }
3568
3849
  }
3569
3850
  }
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
3592
3873
  reason: row.last_error ?? "(no reason recorded)"
3593
3874
  };
3594
3875
  }
3876
+ function resolveBaselineOutput(db, runId, taskId) {
3877
+ const events = readEventsDecoded(db, { runId, taskId });
3878
+ for (let i = events.length - 1; i >= 0; i--) {
3879
+ const e = events[i];
3880
+ if (e.kind !== "task.baseline.failed") continue;
3881
+ const p = e.payload;
3882
+ if (p !== null && typeof p === "object" && typeof p.output === "string") {
3883
+ return p.output;
3884
+ }
3885
+ }
3886
+ return null;
3887
+ }
3595
3888
  function truncateSummary(s, maxChars) {
3596
3889
  if (s.length <= maxChars) return s;
3597
3890
  return s.slice(0, maxChars - 1) + "\u2026";
package/dist/index.js CHANGED
@@ -2,8 +2,9 @@ import {
2
2
  AGENT_TIERS,
3
3
  createAgents,
4
4
  formatModelOverrideWarning,
5
+ getStrictPrompt,
5
6
  validateModelOverride
6
- } from "./chunk-CZMAJISX.js";
7
+ } from "./chunk-EK7K4NTV.js";
7
8
  import {
8
9
  PACKAGE_NAME,
9
10
  readOurPackageVersion,
@@ -163,6 +164,7 @@ function writePermDebugSnapshot(config) {
163
164
  function resolveHarnessModels(agents, config, pluginOptions) {
164
165
  const modelsConfig = pluginOptions?.models ?? config.harness?.models;
165
166
  if (!modelsConfig) return agents;
167
+ const midExecuteConfigured = modelsConfig["mid-execute"] !== void 0;
166
168
  const warnedIds = /* @__PURE__ */ new Set();
167
169
  const warnIfInvalid = (value, source) => {
168
170
  const result = validateModelOverride(value);
@@ -181,11 +183,25 @@ function resolveHarnessModels(agents, config, pluginOptions) {
181
183
  }
182
184
  const tier = AGENT_TIERS[agentName];
183
185
  if (tier) {
184
- const perTier = modelsConfig[tier];
186
+ let perTier = modelsConfig[tier];
187
+ if (tier === "mid-execute" && perTier === void 0) {
188
+ perTier = modelsConfig["mid"];
189
+ }
185
190
  if (perTier !== void 0) {
186
191
  const picked = Array.isArray(perTier) ? perTier[0] : perTier;
187
192
  agentCfg.model = picked;
188
- warnIfInvalid(picked, `models.${tier}`);
193
+ warnIfInvalid(picked, `models.${tier === "mid-execute" && !midExecuteConfigured ? "mid (fallback)" : tier}`);
194
+ }
195
+ }
196
+ }
197
+ if (midExecuteConfigured) {
198
+ const EXECUTOR_AGENTS = ["build", "qa-reviewer", "pilot-builder"];
199
+ for (const agentName of EXECUTOR_AGENTS) {
200
+ const agentCfg = agents[agentName];
201
+ if (!agentCfg) continue;
202
+ try {
203
+ agentCfg.prompt = getStrictPrompt(agentName);
204
+ } catch {
189
205
  }
190
206
  }
191
207
  }
@@ -1850,7 +1866,7 @@ import { join as join8 } from "path";
1850
1866
  var APP_KEY = "A-US-3617699429";
1851
1867
  var ENDPOINT = "https://us.aptabase.com/api/v0/event";
1852
1868
  var PKG_NAME = "@glrs-dev/harness-plugin-opencode";
1853
- var PKG_VERSION = true ? "1.0.1" : "dev";
1869
+ var PKG_VERSION = true ? "1.2.0" : "dev";
1854
1870
  var DISABLED = process.env.HARNESS_OPENCODE_TELEMETRY === "0" || process.env.HARNESS_OPENCODE_TELEMETRY === "false" || process.env.DO_NOT_TRACK === "1" || process.env.CI === "true";
1855
1871
  var SESSION_ID = randomUUID();
1856
1872
  function getInstallId() {
@@ -3,7 +3,7 @@ import {
3
3
  install,
4
4
  writeMcpToggles,
5
5
  writePluginOption
6
- } from "./chunk-WBBN7OVN.js";
6
+ } from "./chunk-BWERBERN.js";
7
7
  import "./chunk-VJUETC6A.js";
8
8
  export {
9
9
  MODEL_PRESETS,
@@ -0,0 +1,45 @@
1
+ ---
2
+ name: code-quality
3
+ description: Four principles for autonomous code quality — think before coding, simplicity first, surgical changes, goal-driven execution. Load this skill when planning, building, or reviewing any non-trivial change. Derived from observed patterns in AI-agent-authored PRs where review feedback clustered around wrong assumptions, overcomplication, scope creep, and missing failure-mode coverage.
4
+ ---
5
+
6
+ # Code Quality Principles
7
+
8
+ Four principles that prevent the most common classes of defects in AI-agent-authored code. Each principle applies at every pipeline phase, but the enforcement actions differ by phase. Load the rule file for your current role.
9
+
10
+ These principles are derived from empirical analysis of recurring review feedback on agent-authored PRs. The top defect categories — wrong assumptions at system boundaries, overcomplicated implementations, unplanned side-effects, and happy-path-only coverage — are all preventable by applying the right check at the right phase.
11
+
12
+ ## The four principles
13
+
14
+ 1. **Think Before Coding** — Don't assume. Surface ambiguity, verify cross-boundary names, present tradeoffs, stop when confused.
15
+ 2. **Simplicity First** — Minimum code that solves the problem. No speculative features, no single-use abstractions, no "flexibility" that wasn't requested.
16
+ 3. **Surgical Changes** — Touch only what you must. Every changed line traces to the plan. Minimize blast radius on security-sensitive files.
17
+ 4. **Goal-Driven Execution** — Define success criteria with real verify commands. Enumerate failure modes. Test the error paths, not just the happy path.
18
+
19
+ ## Phase-specific rules
20
+
21
+ Each rule file applies all four principles through the lens of a specific pipeline phase. Load the one that matches your current role:
22
+
23
+ 1. [`rules/gap-analysis.md`](rules/gap-analysis.md) — For `@gap-analyzer`. Surface hidden assumptions, missing failure modes, naming mismatches, and overscoped plans before the draft is written.
24
+
25
+ 2. [`rules/planning.md`](rules/planning.md) — For `@plan` and `@plan-reviewer`. Verify every cross-boundary identifier. Reject plans that exceed what the goal requires. Require failure-mode coverage in acceptance criteria.
26
+
27
+ 3. [`rules/building.md`](rules/building.md) — For `@build`. Enforce surgical changes. Verify names before using them. Flag unplanned edits. Write failure-path tests before happy-path code.
28
+
29
+ 4. [`rules/review.md`](rules/review.md) — For `@qa-reviewer` and `@qa-thorough`. Verify failure-path coverage in the diff. Grep-confirm cross-boundary string literals. Reject diffs with unplanned scope.
30
+
31
+ ## When to load this skill
32
+
33
+ Any non-trivial change — defined as any plan with 3+ file-level changes, or any change touching a system boundary (API contract, database schema, config/security file, cross-service integration).
34
+
35
+ Do NOT load for trivial work (typo fixes, single-file renames, doc-only changes). The overhead isn't worth it.
36
+
37
+ ## Observable outcomes
38
+
39
+ These are the signals that the principles are working:
40
+
41
+ - Fewer naming mismatches at system boundaries (cross-boundary identifiers are grep-confirmed before use)
42
+ - Smaller, more focused PRs (plans that exceed ~15 files get split or justified)
43
+ - Zero unplanned changes in diffs (every changed line traces to the plan)
44
+ - Failure-mode coverage in acceptance criteria (negative tests exist for medium+ risk changes)
45
+ - Narrower security-config changes (specific paths instead of broad globs)