npm - @glrs-dev/harness-plugin-opencode - Versions diffs - 1.0.1 → 1.2.0 - Mend

@glrs-dev/harness-plugin-opencode 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +14 -0
package/dist/agents/prompts/build.open.md +88 -0
package/dist/agents/prompts/pilot-builder.open.md +129 -0
package/dist/agents/prompts/plan.md +7 -0
package/dist/agents/prompts/prime.md +38 -0
package/dist/agents/prompts/qa-reviewer.open.md +58 -0
package/dist/{chunk-WBBN7OVN.js → chunk-BWERBERN.js} +31 -3
package/dist/{chunk-CZMAJISX.js → chunk-EK7K4NTV.js} +19 -3
package/dist/cli.js +316 -23
package/dist/index.js +20 -4
package/dist/{install-X5KEANRB.js → install-5JKWK6Z4.js} +1 -1
package/dist/skills/code-quality/SKILL.md +45 -0
package/dist/skills/code-quality/rules/building.md +125 -0
package/dist/skills/code-quality/rules/gap-analysis.md +92 -0
package/dist/skills/code-quality/rules/planning.md +96 -0
package/dist/skills/code-quality/rules/review.md +104 -0
package/dist/skills/pilot-planning/rules/self-review.md +1 -1
package/dist/skills/pilot-planning/rules/verify-design.md +42 -0
package/package.json +1 -1

package/dist/cli.js CHANGED Viewed

@@ -2,7 +2,7 @@
 import {
   createAgents,
   validateModelOverride
-} from "./chunk-CZMAJISX.js";
+} from "./chunk-EK7K4NTV.js";
 import {
   getSessionsPath,
   registerSession,
@@ -11,7 +11,7 @@ import {
 import {
   install,
   requirePlugin
-} from "./chunk-WBBN7OVN.js";
+} from "./chunk-BWERBERN.js";
 import "./chunk-VJUETC6A.js";
 import {
   getPilotDir,
@@ -1142,11 +1142,60 @@ CREATE TABLE IF NOT EXISTS events (
 CREATE INDEX IF NOT EXISTS idx_events_run ON events(run_id, id);
 CREATE INDEX IF NOT EXISTS idx_events_run_task ON events(run_id, task_id, id);
 `.trim();
+var V2_SQL = `
+CREATE TABLE IF NOT EXISTS workflows (
+  id            TEXT    NOT NULL PRIMARY KEY,
+  goal          TEXT    NOT NULL,
+  started_at    INTEGER NOT NULL,
+  finished_at   INTEGER,
+  status        TEXT    NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
+  current_phase TEXT
+);
+CREATE TABLE IF NOT EXISTS phases (
+  workflow_id   TEXT    NOT NULL,
+  name          TEXT    NOT NULL CHECK (name IN ('scope','plan','build','qa','followup')),
+  status        TEXT    NOT NULL CHECK (status IN ('pending','running','completed','aborted','failed')),
+  started_at    INTEGER,
+  finished_at   INTEGER,
+  artifact_path TEXT,
+  PRIMARY KEY (workflow_id, name),
+  FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
+);
+CREATE TABLE IF NOT EXISTS artifacts (
+  id          INTEGER PRIMARY KEY AUTOINCREMENT,
+  workflow_id TEXT    NOT NULL,
+  phase       TEXT    NOT NULL,
+  kind        TEXT    NOT NULL,
+  path        TEXT    NOT NULL,
+  created_at  INTEGER NOT NULL,
+  sha256      TEXT,
+  FOREIGN KEY (workflow_id) REFERENCES workflows(id) ON DELETE CASCADE
+);
+CREATE INDEX IF NOT EXISTS idx_artifacts_workflow_phase ON artifacts(workflow_id, phase);
+ALTER TABLE events ADD COLUMN phase TEXT;
+INSERT INTO workflows (id, goal, started_at, finished_at, status, current_phase)
+SELECT id, plan_slug, started_at, finished_at, status, 'build' FROM runs;
+INSERT INTO phases (workflow_id, name, status, started_at, finished_at, artifact_path)
+SELECT id, 'build', status, started_at, finished_at, NULL FROM runs;
+UPDATE events SET phase = 'build' WHERE phase IS NULL;
+`.trim();
 var MIGRATIONS = [
   {
     version: 1,
     description: "initial pilot schema (runs/tasks/events)",
     sql: V1_SQL
+  },
+  {
+    version: 2,
+    description: "workflows/phases/artifacts tables + events.phase column",
+    sql: V2_SQL
   }
 ];
 function applyMigrations(db) {
@@ -1279,8 +1328,8 @@ function appendEvent(db, args) {
     });
   }
   db.run(
-    `INSERT INTO events (run_id, task_id, ts, kind, payload) VALUES (?, ?, ?, ?, ?)`,
-    [args.runId, args.taskId ?? null, ts, args.kind, payloadStr]
+    `INSERT INTO events (run_id, task_id, ts, kind, payload, phase) VALUES (?, ?, ?, ?, ?, ?)`,
+    [args.runId, args.taskId ?? null, ts, args.kind, payloadStr, args.phase ?? null]
   );
   if (eventSubscribers.length > 0) {
     const snapshot = eventSubscribers.slice();
@@ -1291,6 +1340,7 @@ function appendEvent(db, args) {
           taskId: args.taskId ?? null,
           kind: args.kind,
           payload: args.payload,
+          phase: args.phase ?? null,
           ts
         });
       } catch {
@@ -1865,25 +1915,78 @@ function fixPrompt(_task, last) {
   return sections.join("\n");
 }
-// src/pilot/verify/runner.ts
-import { spawn as spawn2 } from "child_process";
-var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
-var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
-var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
-async function runVerify(commands, options) {
+// src/pilot/gates/composite.ts
+async function evalAllGate(gate, ctx) {
+  const startedAt = Date.now();
   const results = [];
-  for (const command10 of commands) {
-    const result = await runOne(command10, options);
-    results.push(result);
-    if (!result.ok) {
-      return { ok: false, results, failure: result };
+  for (const sub of gate.gates) {
+    const subResult = await evalGate(sub, ctx);
+    results.push({ gate: sub, result: subResult });
+    if (!subResult.ok) {
+      const evidence2 = {
+        kind: "all",
+        results,
+        failure: subResult
+      };
+      return {
+        ok: false,
+        reason: subResult.reason,
+        evidence: evidence2,
+        durationMs: Date.now() - startedAt
+      };
     }
   }
+  const evidence = { kind: "all", results };
   return {
     ok: true,
-    results
+    evidence,
+    durationMs: Date.now() - startedAt
   };
 }
+async function evalAnyGate(gate, ctx) {
+  const startedAt = Date.now();
+  const results = [];
+  if (gate.gates.length === 0) {
+    const evidence2 = { kind: "any", results };
+    return {
+      ok: false,
+      reason: "any-gate has no sub-gates to satisfy",
+      evidence: evidence2,
+      durationMs: Date.now() - startedAt
+    };
+  }
+  let lastResult = null;
+  for (const sub of gate.gates) {
+    const subResult = await evalGate(sub, ctx);
+    results.push({ gate: sub, result: subResult });
+    lastResult = subResult;
+    if (subResult.ok) {
+      const evidence2 = { kind: "any", results };
+      return {
+        ok: true,
+        evidence: evidence2,
+        durationMs: Date.now() - startedAt
+      };
+    }
+  }
+  const evidence = {
+    kind: "any",
+    results,
+    failure: lastResult ?? void 0
+  };
+  return {
+    ok: false,
+    reason: `any-gate exhausted: all ${results.length} sub-gates failed`,
+    evidence,
+    durationMs: Date.now() - startedAt
+  };
+}
+// src/pilot/verify/spawn.ts
+import { spawn as spawn2 } from "child_process";
+var DEFAULT_TIMEOUT_MS = 5 * 60 * 1e3;
+var DEFAULT_OUTPUT_CAP_BYTES = 256 * 1024;
+var TRUNCATION_NOTICE = "\n[pilot] verify output truncated\n";
 async function runOne(command10, options) {
   if (typeof command10 !== "string" || command10.length === 0) {
     throw new TypeError(`runOne: command must be a non-empty string`);
@@ -2020,6 +2123,147 @@ function killTree(child) {
   }, 2e3).unref();
 }
+// src/pilot/gates/shell.ts
+async function evalShellGate(gate, ctx) {
+  const result = await runOne(gate.command, {
+    cwd: ctx.cwd,
+    env: ctx.env,
+    abortSignal: ctx.abortSignal,
+    onLine: ctx.onShellLine,
+    timeoutMs: gate.timeoutMs,
+    outputCapBytes: ctx.shellOutputCapBytes
+  });
+  return toGateResult(result);
+}
+function toGateResult(result) {
+  if (result.ok) {
+    return {
+      ok: true,
+      durationMs: result.durationMs,
+      evidence: { kind: "shell", result }
+    };
+  }
+  const reason = formatShellFailure(result);
+  return {
+    ok: false,
+    reason,
+    durationMs: result.durationMs,
+    evidence: { kind: "shell", result }
+  };
+}
+function formatShellFailure(result) {
+  const flags = [];
+  if (result.timedOut) flags.push("timed-out");
+  if (result.aborted) flags.push("aborted");
+  if (result.signal) flags.push(`signal=${result.signal}`);
+  const flagSuffix = flags.length > 0 ? ` [${flags.join(",")}]` : "";
+  return `shell gate failed: ${result.command} \u2192 exit ${result.exitCode}${flagSuffix}`;
+}
+// src/pilot/gates/eval.ts
+async function evalGate(gate, ctx) {
+  switch (gate.kind) {
+    case "shell":
+      return evalShellGate(gate, ctx);
+    case "all":
+      return evalAllGate(gate, ctx);
+    case "any":
+      return evalAnyGate(gate, ctx);
+    default: {
+      const _exhaustive = gate;
+      throw new Error(
+        `evalGate: unknown gate kind ${_exhaustive.kind}`
+      );
+    }
+  }
+}
+// src/pilot/gates/types.ts
+function asShellEvidence(evidence) {
+  if (typeof evidence === "object" && evidence !== null && evidence.kind === "shell") {
+    return evidence;
+  }
+  return null;
+}
+function asCompositeEvidence(evidence) {
+  if (typeof evidence === "object" && evidence !== null && (evidence.kind === "all" || evidence.kind === "any")) {
+    return evidence;
+  }
+  return null;
+}
+// src/pilot/verify/runner.ts
+async function runVerify(commands, options) {
+  if (commands.length === 0) {
+    return { ok: true, results: [] };
+  }
+  const gate = {
+    kind: "all",
+    gates: commands.map((command10) => ({
+      kind: "shell",
+      command: command10,
+      timeoutMs: options.timeoutMs
+    }))
+  };
+  const ctx = {
+    cwd: options.cwd,
+    env: options.env,
+    abortSignal: options.abortSignal,
+    onShellLine: options.onLine,
+    shellOutputCapBytes: options.outputCapBytes
+  };
+  const gateResult = await evalGate(gate, ctx);
+  return toRunVerifyResult(gateResult);
+}
+function toRunVerifyResult(gateResult) {
+  const composite = asCompositeEvidence(gateResult.evidence);
+  if (composite === null || composite.kind !== "all") {
+    throw new Error(
+      `runVerify: expected composite all-gate evidence, got ${gateResultDescriptor(gateResult)}`
+    );
+  }
+  const results = composite.results.map((entry) => extractCommandResult(entry));
+  if (gateResult.ok) {
+    return {
+      ok: true,
+      results
+    };
+  }
+  const failingEntry = composite.results[composite.results.length - 1];
+  if (!failingEntry || failingEntry.result.ok) {
+    throw new Error(
+      "runVerify: all-gate failed but no failing sub-result was recorded"
+    );
+  }
+  const failureCommandResult = extractCommandResult(failingEntry);
+  if (failureCommandResult.ok) {
+    throw new Error(
+      "runVerify: failing sub-gate produced a successful CommandResult"
+    );
+  }
+  return {
+    ok: false,
+    results,
+    failure: failureCommandResult
+  };
+}
+function extractCommandResult(entry) {
+  const shell = asShellEvidence(entry.result.evidence);
+  if (shell === null) {
+    throw new Error(
+      `runVerify: expected shell-gate evidence in all-gate child, got ${gateResultDescriptor(entry.result)}`
+    );
+  }
+  return shell.result;
+}
+function gateResultDescriptor(result) {
+  const evidence = result.evidence;
+  return JSON.stringify({
+    ok: result.ok,
+    evidenceKind: evidence?.kind ?? null
+  });
+}
 // src/pilot/verify/touches.ts
 import picomatch2 from "picomatch";
 import { execFile as execFile2 } from "child_process";
@@ -2530,7 +2774,11 @@ async function runOneTaskImpl(deps, task, opts) {
           command: f.command,
           exitCode: f.exitCode,
           output: f.output.slice(0, 4096),
-          reason: reason2
+          reason: reason2,
+          // Step 1 of pilot redesign: gate descriptor on every
+          // verify-derived event. Future LLM/approval gates emit
+          // identically-shaped events with a different `gate.kind`.
+          gate: { kind: "shell", command: f.command }
         }
       });
       return;
@@ -2539,7 +2787,10 @@ async function runOneTaskImpl(deps, task, opts) {
       runId: deps.runId,
       taskId: task.id,
       kind: "task.baseline.passed",
-      payload: { commands: allVerify.length }
+      payload: {
+        commands: allVerify.length,
+        gate: { kind: "all", subKind: "shell", count: baselineVerify.length }
+      }
     });
   }
   let lastFailure = null;
@@ -2695,7 +2946,8 @@ async function runOneTaskImpl(deps, task, opts) {
           exitCode: lastFailure.exitCode,
           timedOut: verifyResult.failure.timedOut,
           aborted: verifyResult.failure.aborted,
-          output: verifyResult.failure.output.slice(-2048)
+          output: verifyResult.failure.output.slice(-2048),
+          gate: { kind: "shell", command: lastFailure.command }
         }
       });
       if (verifyResult.failure.aborted) {
@@ -2721,7 +2973,10 @@ async function runOneTaskImpl(deps, task, opts) {
       runId: deps.runId,
       taskId: task.id,
       kind: "task.verify.passed",
-      payload: { attempt }
+      payload: {
+        attempt,
+        gate: { kind: "all", subKind: "shell", count: allVerify.length }
+      }
     });
     const touches = await enforceTouches({
       cwd,
@@ -3311,7 +3566,7 @@ function startStreamingLogger(args) {
   const taskStart = /* @__PURE__ */ new Map();
   let succeeded = 0;
   let failed = 0;
-  const INLINE_BLOCKED_CAP = 5;
+  const INLINE_BLOCKED_CAP = 0;
   let blockedCount = 0;
   let blockedInlineEmitted = 0;
   let blockedOverflowEmitted = false;
@@ -3350,6 +3605,24 @@ function startStreamingLogger(args) {
         if (id !== null) taskStart.set(id, event.ts);
         write(`task.started ${id ?? "?"}`);
         break;
+      case "task.baseline.passed":
+        break;
+      case "task.baseline.failed": {
+        const bp = event.payload;
+        if (bp !== null && typeof bp === "object" && typeof bp.command === "string" && typeof bp.exitCode === "number") {
+          write(
+            `task.baseline.failed ${id ?? "?"} (${bp.command} \u2192 exit ${bp.exitCode})`
+          );
+          const output = typeof bp.output === "string" ? bp.output : null;
+          if (output !== null && output.trim().length > 0) {
+            const tail = output.trim().split("\n").slice(-6).map((l) => `    ${l}`).join("\n");
+            writeRaw(tail);
+          }
+        } else {
+          write(`task.baseline.failed ${id ?? "?"}`);
+        }
+        break;
+      }
       case "task.verify.passed":
         write(`task.verify.passed ${id ?? "?"}`);
         break;
@@ -3435,7 +3708,7 @@ function startStreamingLogger(args) {
       case "task.attempt": {
         const p = event.payload;
         if (p !== null && typeof p === "object" && typeof p.attempt === "number" && typeof p.of === "number" && p.attempt >= 2) {
-          writeRaw(`  attempt ${p.attempt}/${p.of} (retry with fix prompt)`);
+          write(`task.retry ${id ?? "?"} attempt ${p.attempt}/${p.of}`);
         }
         break;
       }
@@ -3561,9 +3834,17 @@ Failed tasks (${failed.length}):
     session:  ${session}
     worktree: ${worktree}
     elapsed:  ${elapsed}   attempts: ${t.attempts}
 `
         );
+        const baselineOutput = resolveBaselineOutput(db, runId, t.task_id);
+        if (baselineOutput !== null) {
+          const tail = baselineOutput.trim().split("\n").slice(-6).map((l) => `      ${l}`).join("\n");
+          process.stdout.write(`    output:
+${tail}
+`);
+        }
+        process.stdout.write(`
+`);
       }
     }
   }
@@ -3592,6 +3873,18 @@ function resolveFailureDetail(db, runId, row) {
     reason: row.last_error ?? "(no reason recorded)"
   };
 }
+function resolveBaselineOutput(db, runId, taskId) {
+  const events = readEventsDecoded(db, { runId, taskId });
+  for (let i = events.length - 1; i >= 0; i--) {
+    const e = events[i];
+    if (e.kind !== "task.baseline.failed") continue;
+    const p = e.payload;
+    if (p !== null && typeof p === "object" && typeof p.output === "string") {
+      return p.output;
+    }
+  }
+  return null;
+}
 function truncateSummary(s, maxChars) {
   if (s.length <= maxChars) return s;
   return s.slice(0, maxChars - 1) + "\u2026";

package/dist/index.js CHANGED Viewed

@@ -2,8 +2,9 @@ import {
   AGENT_TIERS,
   createAgents,
   formatModelOverrideWarning,
+  getStrictPrompt,
   validateModelOverride
-} from "./chunk-CZMAJISX.js";
+} from "./chunk-EK7K4NTV.js";
 import {
   PACKAGE_NAME,
   readOurPackageVersion,
@@ -163,6 +164,7 @@ function writePermDebugSnapshot(config) {
 function resolveHarnessModels(agents, config, pluginOptions) {
   const modelsConfig = pluginOptions?.models ?? config.harness?.models;
   if (!modelsConfig) return agents;
+  const midExecuteConfigured = modelsConfig["mid-execute"] !== void 0;
   const warnedIds = /* @__PURE__ */ new Set();
   const warnIfInvalid = (value, source) => {
     const result = validateModelOverride(value);
@@ -181,11 +183,25 @@ function resolveHarnessModels(agents, config, pluginOptions) {
     }
     const tier = AGENT_TIERS[agentName];
     if (tier) {
-      const perTier = modelsConfig[tier];
+      let perTier = modelsConfig[tier];
+      if (tier === "mid-execute" && perTier === void 0) {
+        perTier = modelsConfig["mid"];
+      }
       if (perTier !== void 0) {
         const picked = Array.isArray(perTier) ? perTier[0] : perTier;
         agentCfg.model = picked;
-        warnIfInvalid(picked, `models.${tier}`);
+        warnIfInvalid(picked, `models.${tier === "mid-execute" && !midExecuteConfigured ? "mid (fallback)" : tier}`);
+      }
+    }
+  }
+  if (midExecuteConfigured) {
+    const EXECUTOR_AGENTS = ["build", "qa-reviewer", "pilot-builder"];
+    for (const agentName of EXECUTOR_AGENTS) {
+      const agentCfg = agents[agentName];
+      if (!agentCfg) continue;
+      try {
+        agentCfg.prompt = getStrictPrompt(agentName);
+      } catch {
       }
     }
   }
@@ -1850,7 +1866,7 @@ import { join as join8 } from "path";
 var APP_KEY = "A-US-3617699429";
 var ENDPOINT = "https://us.aptabase.com/api/v0/event";
 var PKG_NAME = "@glrs-dev/harness-plugin-opencode";
-var PKG_VERSION = true ? "1.0.1" : "dev";
+var PKG_VERSION = true ? "1.2.0" : "dev";
 var DISABLED = process.env.HARNESS_OPENCODE_TELEMETRY === "0" || process.env.HARNESS_OPENCODE_TELEMETRY === "false" || process.env.DO_NOT_TRACK === "1" || process.env.CI === "true";
 var SESSION_ID = randomUUID();
 function getInstallId() {

package/dist/{install-X5KEANRB.js → install-5JKWK6Z4.js} RENAMED Viewed

@@ -3,7 +3,7 @@ import {
   install,
   writeMcpToggles,
   writePluginOption
-} from "./chunk-WBBN7OVN.js";
+} from "./chunk-BWERBERN.js";
 import "./chunk-VJUETC6A.js";
 export {
   MODEL_PRESETS,

package/dist/skills/code-quality/SKILL.md ADDED Viewed

@@ -0,0 +1,45 @@
+---
+name: code-quality
+description: Four principles for autonomous code quality — think before coding, simplicity first, surgical changes, goal-driven execution. Load this skill when planning, building, or reviewing any non-trivial change. Derived from observed patterns in AI-agent-authored PRs where review feedback clustered around wrong assumptions, overcomplication, scope creep, and missing failure-mode coverage.
+---
+# Code Quality Principles
+Four principles that prevent the most common classes of defects in AI-agent-authored code. Each principle applies at every pipeline phase, but the enforcement actions differ by phase. Load the rule file for your current role.
+These principles are derived from empirical analysis of recurring review feedback on agent-authored PRs. The top defect categories — wrong assumptions at system boundaries, overcomplicated implementations, unplanned side-effects, and happy-path-only coverage — are all preventable by applying the right check at the right phase.
+## The four principles
+1. **Think Before Coding** — Don't assume. Surface ambiguity, verify cross-boundary names, present tradeoffs, stop when confused.
+2. **Simplicity First** — Minimum code that solves the problem. No speculative features, no single-use abstractions, no "flexibility" that wasn't requested.
+3. **Surgical Changes** — Touch only what you must. Every changed line traces to the plan. Minimize blast radius on security-sensitive files.
+4. **Goal-Driven Execution** — Define success criteria with real verify commands. Enumerate failure modes. Test the error paths, not just the happy path.
+## Phase-specific rules
+Each rule file applies all four principles through the lens of a specific pipeline phase. Load the one that matches your current role:
+1. [`rules/gap-analysis.md`](rules/gap-analysis.md) — For `@gap-analyzer`. Surface hidden assumptions, missing failure modes, naming mismatches, and overscoped plans before the draft is written.
+2. [`rules/planning.md`](rules/planning.md) — For `@plan` and `@plan-reviewer`. Verify every cross-boundary identifier. Reject plans that exceed what the goal requires. Require failure-mode coverage in acceptance criteria.
+3. [`rules/building.md`](rules/building.md) — For `@build`. Enforce surgical changes. Verify names before using them. Flag unplanned edits. Write failure-path tests before happy-path code.
+4. [`rules/review.md`](rules/review.md) — For `@qa-reviewer` and `@qa-thorough`. Verify failure-path coverage in the diff. Grep-confirm cross-boundary string literals. Reject diffs with unplanned scope.
+## When to load this skill
+Any non-trivial change — defined as any plan with 3+ file-level changes, or any change touching a system boundary (API contract, database schema, config/security file, cross-service integration).
+Do NOT load for trivial work (typo fixes, single-file renames, doc-only changes). The overhead isn't worth it.
+## Observable outcomes
+These are the signals that the principles are working:
+- Fewer naming mismatches at system boundaries (cross-boundary identifiers are grep-confirmed before use)
+- Smaller, more focused PRs (plans that exceed ~15 files get split or justified)
+- Zero unplanned changes in diffs (every changed line traces to the plan)
+- Failure-mode coverage in acceptance criteria (negative tests exist for medium+ risk changes)
+- Narrower security-config changes (specific paths instead of broad globs)