npm - @kody-ade/kody-engine - Versions diffs - 0.3.40 → 0.3.42 - Mend

@kody-ade/kody-engine 0.3.40 → 0.3.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/dist/bin/kody.js +200 -16
package/dist/executables/classify/prompt.md +33 -0
package/dist/executables/fix/profile.json +2 -1
package/dist/executables/fix/prompt.md +41 -0
package/dist/executables/fix-ci/profile.json +1 -1
package/dist/executables/fix-ci/prompt.md +42 -6
package/dist/executables/plan/profile.json +1 -1
package/dist/executables/probe-skill/profile.json +35 -0
package/dist/executables/probe-skill/prompt.md +31 -0
package/dist/executables/probe-skill/skills/probe-skill-marker/SKILL.md +18 -0
package/dist/executables/research/profile.json +4 -1
package/dist/executables/research/prompt.md +5 -0
package/dist/executables/resolve/profile.json +1 -1
package/dist/executables/resolve/prompt.md +19 -2
package/dist/executables/review/profile.json +1 -1
package/dist/executables/review/prompt.md +35 -1
package/dist/executables/run/profile.json +2 -1
package/dist/executables/run/prompt.md +13 -1
package/dist/executables/ui-review/profile.json +1 -1
package/dist/executables/ui-review/prompt.md +10 -0
package/dist/plugins/hooks/block-git.json +16 -0
package/dist/plugins/hooks/block-write.json +16 -0
package/package.json +1 -1

package/dist/bin/kody.js CHANGED Viewed

@@ -3,7 +3,7 @@
 // package.json
 var package_default = {
   name: "@kody-ade/kody-engine",
-  version: "0.3.40",
+  version: "0.3.42",
   description: "kody \u2014 autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
   license: "MIT",
   type: "module",
@@ -612,6 +612,68 @@ import * as path20 from "path";
 // src/dispatch.ts
 import * as fs6 from "fs";
+// src/cron-match.ts
+var FIELD_BOUNDS = [
+  [0, 59],
+  // minute
+  [0, 23],
+  // hour
+  [1, 31],
+  // day-of-month
+  [1, 12],
+  // month
+  [0, 6]
+  // day-of-week
+];
+function parseCron(spec) {
+  const fields = spec.trim().split(/\s+/);
+  if (fields.length !== 5) {
+    throw new Error(`Invalid cron expression: "${spec}" \u2014 expected 5 space-separated fields`);
+  }
+  const sets = fields.map((f, i) => parseField(f, FIELD_BOUNDS[i][0], FIELD_BOUNDS[i][1]));
+  return { minute: sets[0], hour: sets[1], dom: sets[2], month: sets[3], dow: sets[4] };
+}
+function parseField(field, min, max) {
+  const out = /* @__PURE__ */ new Set();
+  for (const part of field.split(",")) {
+    const [base, stepStr] = part.split("/");
+    const step = stepStr ? parseInt(stepStr, 10) : 1;
+    if (!Number.isFinite(step) || step < 1) {
+      throw new Error(`Invalid step in cron field "${field}"`);
+    }
+    let lo;
+    let hi;
+    if (base === "*") {
+      lo = min;
+      hi = max;
+    } else if (base.includes("-")) {
+      const [aStr, bStr] = base.split("-");
+      lo = parseInt(aStr, 10);
+      hi = parseInt(bStr, 10);
+    } else {
+      lo = parseInt(base, 10);
+      hi = lo;
+    }
+    if (!Number.isFinite(lo) || !Number.isFinite(hi) || lo < min || hi > max || lo > hi) {
+      throw new Error(`Invalid cron field "${field}" \u2014 out of range [${min},${max}] or reversed`);
+    }
+    for (let i = lo; i <= hi; i += step) out.add(i);
+  }
+  return out;
+}
+function cronMatchesAt(expr, date) {
+  return expr.minute.has(date.getUTCMinutes()) && expr.hour.has(date.getUTCHours()) && expr.dom.has(date.getUTCDate()) && expr.month.has(date.getUTCMonth() + 1) && expr.dow.has(date.getUTCDay());
+}
+function cronMatchesInWindow(spec, end, windowSec) {
+  const expr = parseCron(spec);
+  const endMs = Math.floor(end.getTime() / 6e4) * 6e4;
+  const minuteSteps = Math.max(1, Math.ceil(windowSec / 60));
+  for (let i = 0; i < minuteSteps; i++) {
+    if (cronMatchesAt(expr, new Date(endMs - i * 6e4))) return true;
+  }
+  return false;
+}
 // src/registry.ts
 import * as fs5 from "fs";
 import * as path5 from "path";
@@ -725,11 +787,9 @@ function autoDispatch(opts) {
     if (!Number.isNaN(n) && n > 0) {
       return { executable: "run", cliArgs: { issue: n }, target: n };
     }
-    return { executable: "mission-scheduler", cliArgs: {}, target: 0 };
-  }
-  if (eventName === "schedule") {
-    return { executable: "mission-scheduler", cliArgs: {}, target: 0 };
+    return null;
   }
+  if (eventName === "schedule") return null;
   if (eventName === "pull_request") return null;
   if (eventName !== "issue_comment") return null;
   const rawBody = String(event.comment?.body ?? "");
@@ -771,6 +831,39 @@ function autoDispatch(opts) {
   }
   return { executable, cliArgs: args, target: targetNum };
 }
+function dispatchScheduledWatches(opts) {
+  const now = opts?.now ?? /* @__PURE__ */ new Date();
+  const envWindow = Number(process.env.KODY_SCHEDULE_WINDOW_SEC);
+  const windowSec = opts?.windowSec ?? (Number.isFinite(envWindow) && envWindow > 0 ? envWindow : 300);
+  const out = [];
+  for (const exe of listExecutables()) {
+    let raw;
+    try {
+      raw = fs6.readFileSync(exe.profilePath, "utf-8");
+    } catch {
+      continue;
+    }
+    let profile;
+    try {
+      profile = JSON.parse(raw);
+    } catch {
+      continue;
+    }
+    if (profile.role !== "watch") continue;
+    if (profile.kind !== "scheduled") continue;
+    const schedule = profile.schedule;
+    if (typeof schedule !== "string" || schedule.trim().length === 0) continue;
+    if (!opts?.force) {
+      try {
+        if (!cronMatchesInWindow(schedule, now, windowSec)) continue;
+      } catch {
+        continue;
+      }
+    }
+    out.push({ executable: exe.name, cliArgs: {}, target: 0 });
+  }
+  return out;
+}
 function extractAfterTag(body) {
   const idx = body.indexOf("@kody");
   if (idx === -1) return body;
@@ -1674,31 +1767,34 @@ var buildSyntheticPlugin = async (ctx, profile) => {
   const runId = `${profile.name}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
   const root = path9.join(os2.tmpdir(), `kody-synth-${runId}`);
   fs10.mkdirSync(path9.join(root, ".claude-plugin"), { recursive: true });
+  const resolvePart = (bucket, entry) => {
+    const local = path9.join(profile.dir, bucket, entry);
+    if (fs10.existsSync(local)) return local;
+    const central = path9.join(catalog, bucket, entry);
+    if (fs10.existsSync(central)) return central;
+    throw new Error(
+      `buildSyntheticPlugin: ${bucket} entry '${entry}' not found in executable dir (${profile.dir}/${bucket}/) or catalog (${catalog}/${bucket}/)`
+    );
+  };
   if (cc.skills.length > 0) {
     const dst = path9.join(root, "skills");
     fs10.mkdirSync(dst, { recursive: true });
     for (const name of cc.skills) {
-      const src = path9.join(catalog, "skills", name);
-      if (!fs10.existsSync(src)) throw new Error(`buildSyntheticPlugin: skill not found in catalog: ${name}`);
-      copyDir(src, path9.join(dst, name));
+      copyDir(resolvePart("skills", name), path9.join(dst, name));
     }
   }
   if (cc.commands.length > 0) {
     const dst = path9.join(root, "commands");
     fs10.mkdirSync(dst, { recursive: true });
     for (const name of cc.commands) {
-      const src = path9.join(catalog, "commands", `${name}.md`);
-      if (!fs10.existsSync(src)) throw new Error(`buildSyntheticPlugin: command not found in catalog: ${name}`);
-      fs10.copyFileSync(src, path9.join(dst, `${name}.md`));
+      fs10.copyFileSync(resolvePart("commands", `${name}.md`), path9.join(dst, `${name}.md`));
     }
   }
   if (cc.subagents.length > 0) {
     const dst = path9.join(root, "agents");
     fs10.mkdirSync(dst, { recursive: true });
     for (const name of cc.subagents) {
-      const src = path9.join(catalog, "agents", `${name}.md`);
-      if (!fs10.existsSync(src)) throw new Error(`buildSyntheticPlugin: subagent not found in catalog: ${name}`);
-      fs10.copyFileSync(src, path9.join(dst, `${name}.md`));
+      fs10.copyFileSync(resolvePart("agents", `${name}.md`), path9.join(dst, `${name}.md`));
     }
   }
   if (cc.hooks.length > 0) {
@@ -1706,8 +1802,7 @@ var buildSyntheticPlugin = async (ctx, profile) => {
     fs10.mkdirSync(dst, { recursive: true });
     const merged = { hooks: {} };
     for (const name of cc.hooks) {
-      const src = path9.join(catalog, "hooks", `${name}.json`);
-      if (!fs10.existsSync(src)) throw new Error(`buildSyntheticPlugin: hook not found in catalog: ${name}`);
+      const src = resolvePart("hooks", `${name}.json`);
       const parsed = JSON.parse(fs10.readFileSync(src, "utf-8"));
       for (const [event, entries] of Object.entries(parsed.hooks ?? {})) {
         if (!Array.isArray(entries)) continue;
@@ -6333,6 +6428,22 @@ async function runCi(argv) {
   } catch {
   }
   const autoFallback = !args.issueNumber ? autoDispatch({ config: earlyConfig }) : null;
+  const eventName = process.env.GITHUB_EVENT_NAME;
+  const dispatchEventPath = process.env.GITHUB_EVENT_PATH;
+  let manualWorkflowDispatch = false;
+  if (!args.issueNumber && !autoFallback && eventName === "workflow_dispatch" && dispatchEventPath && fs23.existsSync(dispatchEventPath)) {
+    try {
+      const evt = JSON.parse(fs23.readFileSync(dispatchEventPath, "utf-8"));
+      const issueInput = parseInt(String(evt?.inputs?.issue_number ?? ""), 10);
+      const sessionInput = String(evt?.inputs?.sessionId ?? "");
+      manualWorkflowDispatch = !sessionInput && !(Number.isFinite(issueInput) && issueInput > 0);
+    } catch {
+      manualWorkflowDispatch = false;
+    }
+  }
+  if (!args.issueNumber && !autoFallback && (eventName === "schedule" || manualWorkflowDispatch)) {
+    return runScheduledFanOut(cwd, args, { force: manualWorkflowDispatch });
+  }
   if (!args.issueNumber && !autoFallback && process.env.GITHUB_EVENT_NAME) {
     process.stdout.write(`\u2192 kody: no action for event ${process.env.GITHUB_EVENT_NAME} \u2014 exiting cleanly
 `);
@@ -6418,6 +6529,79 @@ ${CI_HELP}`);
     return 99;
   }
 }
+async function runScheduledFanOut(cwd, args, opts) {
+  const matches = dispatchScheduledWatches({ force: opts.force });
+  if (matches.length === 0) {
+    process.stdout.write(
+      `\u2192 kody: scheduled wake \u2014 no watches matched ${opts.force ? "(force mode, no watches discovered)" : "(window)"}, exiting cleanly
+`
+    );
+    return 0;
+  }
+  const names = matches.map((m) => m.executable).join(", ");
+  process.stdout.write(`\u2192 kody: scheduled wake \u2014 firing ${matches.length} watch(es): ${names}
+`);
+  try {
+    const n = unpackAllSecrets();
+    if (n > 0) process.stdout.write(`\u2192 kody: unpacked ${n} secret(s) from ALL_SECRETS
+`);
+    resolveAuthToken();
+    const pm = args.packageManager ?? detectPackageManager2(cwd);
+    process.stdout.write(`\u2192 kody: package manager = ${pm}
+`);
+    if (!args.skipInstall) {
+      const code = installDeps(pm, cwd);
+      if (code !== 0) {
+        process.stderr.write(`[kody] dep install failed (${pm}, exit ${code})
+`);
+        return 99;
+      }
+    }
+    if (!args.skipLitellm) {
+      const code = installLitellmIfNeeded(cwd);
+      if (code !== 0) {
+        process.stderr.write(`[kody] litellm install failed (exit ${code})
+`);
+        return 99;
+      }
+    }
+    configureGitIdentity(cwd);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    process.stderr.write(`[kody] preflight crashed: ${msg}
+`);
+    return 99;
+  }
+  const config = loadConfig(cwd);
+  let worstExit = 0;
+  for (const match of matches) {
+    process.stdout.write(`
+\u2192 kody: running watch \`${match.executable}\`
+`);
+    try {
+      const result = await runExecutable(match.executable, {
+        cliArgs: match.cliArgs,
+        cwd,
+        config,
+        verbose: args.verbose,
+        quiet: args.quiet
+      });
+      if (result.exitCode !== 0) {
+        process.stderr.write(
+          `[kody] watch \`${match.executable}\` exited ${result.exitCode}: ${result.reason ?? "(no reason)"}
+`
+        );
+        if (result.exitCode > worstExit) worstExit = result.exitCode;
+      }
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      process.stderr.write(`[kody] watch \`${match.executable}\` crashed: ${msg}
+`);
+      worstExit = Math.max(worstExit, 99);
+    }
+  }
+  return worstExit;
+}
 // src/chat-cli.ts
 var DEFAULT_MODEL = "claude/claude-haiku-4-5-20251001";

package/dist/executables/classify/prompt.md CHANGED Viewed

@@ -30,6 +30,39 @@ Pick **exactly one** of:
 **If the issue is "tweak config / bump dep / fix typo" with no real design choice → `chore`.**
 **Otherwise → `feature`.**
+# Worked disambiguation examples
+These are the cases that catch classifiers out. Read them before deciding.
+**Example A — label says "bug", body opens design space → `feature`**
+> Title: "Login is slow"
+> Labels: `bug`
+> Body: "Login takes 4 seconds. We should figure out why and fix it. Probably involves the auth service, the session cache, and possibly the new SSO integration."
+Pick: `feature`. The body opens an investigation across multiple subsystems — that's a design space, not a localized fix. Label loses to content.
+**Example B — body says "bug" but the ask is exploratory → `spec`**
+> Title: "Investigate why our queue throughput dropped"
+> Body: "Throughput dropped 30% last week. Write up what you find — root cause, options for fixing, recommendation. We'll decide next steps from your write-up."
+Pick: `spec`. The deliverable is an analysis document. No code change is being requested in this issue.
+**Example C — labeled `feature` but trivial → `chore`**
+> Title: "Bump prettier to 3.4"
+> Labels: `feature`, `dependencies`
+> Body: "Bump devDep prettier to 3.4. Format will not change."
+Pick: `chore`. No design choice; mechanical dep bump. Label is wrong.
+**Example D — labeled `chore` but real → `bug`**
+> Title: "README typo"
+> Labels: `chore`
+> Body: "The README claims our API returns `data` but actually returns `result`. Fix the docs OR the API to make them match."
+Pick: `bug`. The "OR" forces a real decision and the fix may touch code, not just docs. Not chore-grade.
+**Precedence rule:** when label and body conflict, body wins. Labels are author hints, often stale or wrong; the body is the actual ask.
 # Required output
 Your FINAL message must be exactly this shape (no extra text before or after):

package/dist/executables/fix/profile.json CHANGED Viewed

@@ -33,7 +33,7 @@
       "Glob",
       "mcp__playwright"
     ],
-    "hooks": [],
+    "hooks": ["block-git"],
     "skills": [],
     "commands": [],
     "subagents": [],
@@ -72,6 +72,7 @@
       { "script": "fixFlow" },
       { "script": "loadTaskState" },
       { "script": "loadConventions" },
+      { "script": "loadPriorArt" },
       { "script": "loadCoverageRules" },
       { "script": "composePrompt" }
     ],

package/dist/executables/fix/prompt.md CHANGED Viewed

@@ -16,10 +16,20 @@ You are Kody, an autonomous engineer. Apply the feedback below to the existing P
 {{prDiff}}
 ```
+# Prior art (closed/merged PRs that previously attempted this work, if any)
+{{priorArt}}
+If a prior-art block is present above, scan it before editing — those are earlier attempts (possibly by you, possibly by a human) at the same fix. Note what was rejected and why; do not repeat a discarded approach.
 # Required steps
 1. **Extract** every actionable item from the feedback. A structured review uses headings like `### Concerns`, `### Suggestions`, and `### Bugs`; each bullet under those headings is a distinct item. `### Strengths`, `### Summary`, and `### Bottom line` are NOT items — skip them. If the feedback has no headings (plain inline feedback), treat the whole feedback as one item.
 2. **Number each item** internally (Item 1, Item 2, …). You will account for every one of them in your final message below.
 3. **Research** — read only what's needed to act on the items. Make the minimum edits required to implement each one. If the feedback or PR body links to external URLs (reproduction sites, bug recordings, spec pages), use the **Playwright MCP** tools (`mcp__playwright__browser_navigate`, `mcp__playwright__browser_snapshot`) to load them — do not rely on your interpretation of the URL alone.
+   **Research floor (MUST be met before any Edit/Write):**
+   - Read the **full** contents of every file you intend to change.
+   - Read the test file for each of those files, if one exists.
+   - Skipping the floor on the assumption "feedback says exactly what to change" is a hard failure when the change touches code with non-obvious invariants.
 4. **Verify** — run each quality command with Bash. Fix the root cause of any failure you introduced by this round of edits.
 5. Your FINAL message MUST use this exact format (or a single `FAILED: <reason>` line on failure). The `FEEDBACK_ACTIONS:` block is REQUIRED — omitting it or leaving it empty makes your DONE invalid.
@@ -34,12 +44,43 @@ You are Kody, an autonomous engineer. Apply the feedback below to the existing P
    <2-4 bullets describing what changed in THIS fix round — not the whole PR>
    ```
+   **Worked example.** Suppose the feedback was:
+   > ### Concerns
+   > - The retry loop in `src/queue.ts:42` has no upper bound — could spin forever if the API is down.
+   > - `validateInput` accepts negative numbers but the schema says positive.
+   >
+   > ### Suggestions
+   > - Consider extracting the date-parsing logic into a helper.
+   A valid `FEEDBACK_ACTIONS` block:
+   ```
+   FEEDBACK_ACTIONS:
+   - Item 1: "retry loop has no upper bound" — fixed: src/queue.ts:42 added maxRetries=5 with exponential backoff and a final throw.
+   - Item 2: "validateInput accepts negative numbers but schema says positive" — fixed: src/validate.ts:18 changed z.number() to z.number().positive(); added test cases for -1 and 0.
+   - Item 3: "extract date-parsing helper" — declined: the parsing only appears in one call site (src/handlers/webhook.ts:71); extracting now would create a one-caller helper. Will revisit if a second call site appears.
+   ```
+   Notes on the example:
+   - Every extracted item appears as exactly one line. None are dropped, none merged.
+   - "Strengths" / "Summary" / "Bottom line" sections from the feedback do NOT become items.
+   - `declined:` is paired with concrete evidence (one call site + path), not a vague preference.
 # Rules
 - **The feedback is the scope.** You are here to address the extracted items — nothing else. Do NOT make unrelated refactors, rename variables the reviewer did not flag, or "tighten" types that were not called out. Every edit in your diff must trace back to a specific Item in `FEEDBACK_ACTIONS`.
 - **Default to `fixed`.** `declined` is only acceptable when (a) the item is factually wrong about the code, or (b) it is explicitly out of scope per the issue body. In both cases the `declined: <reason>` line must point to concrete evidence (a file:line that contradicts the item, or a specific issue-body clause).
 - **Treat each item as a concrete change request, not a code review to argue with.** "Add an X branch" means add an X branch — not document that Y already covers the case. "Already handles it in a different way" is NOT an acceptable reason to decline.
 - **Your DONE is only valid if your diff materially implements each `fixed` item.** A diff that only adds tests asserting the current behavior, or only tweaks comments/docs, does NOT count as addressing a change request. If an item asks for a new code path, the diff MUST contain that new code path.
 - **"Already satisfied" (i.e. skipping the edit because the code already does what's asked) is only allowed when you can cite the exact file:line that already implements it.** If in doubt, make the edit — under `fixed`.
+- **Stale feedback.** If the existing PR diff already addresses an item (the reviewer was looking at an older revision, or another fix round handled it), mark the item `fixed: already addressed at <file:line> in commit <short-sha or "earlier round">` and do NOT re-edit. Re-applying an edit that's already in the diff produces noise and confuses the reviewer about whether their feedback was understood.
+- **Not all feedback is an item.** These are NOT items, even if they appear in the feedback body:
+  - Questions ("why did you choose X?") — answer in the PR comment thread, not via an edit.
+  - Hedges and asides ("interesting", "let me know", "thoughts?") — no action required.
+  - Documentation links and references that aren't tied to a concrete change ask.
+  - Praise / strengths bullets, even if they suggest improvements implicitly.
+  When in doubt: an item is something with an imperative or a concrete change that would alter the diff. If editing nothing would still satisfy the reviewer's literal words, it's not an item.
 - Do NOT run git/gh commands. The wrapper handles it.
 - Stay on `{{branch}}`.
 - Do not modify files under `.kody/`, `.kody-engine/`, `.kody/`, `node_modules/`, `dist/`, `build/`, `.env`, `*.log`.

package/dist/executables/fix-ci/profile.json CHANGED Viewed

@@ -31,7 +31,7 @@
       "Grep",
       "Glob"
     ],
-    "hooks": [],
+    "hooks": ["block-git"],
     "skills": [],
     "commands": [],
     "subagents": [],

package/dist/executables/fix-ci/prompt.md CHANGED Viewed

@@ -22,10 +22,23 @@ You are Kody, an autonomous engineer. A CI workflow on PR #{{pr.number}} (`{{bra
 ```
 # Required steps
-1. Read the log carefully. Identify the actual failure — compile error, failing test, lint rule, missing dep, etc.
-2. Make the minimum edits to fix the root cause. Do NOT disable tests or rules just to make CI pass.
-3. Re-run the relevant quality command locally with Bash and confirm exit 0.
-4. Final message format (or `FAILED: <reason>` on failure):
+1. **Classify the failure.** Read the log and identify which type of failure this is. Different failure types call for different strategies; misidentifying the type usually leads to masking the symptom rather than fixing the root cause.
+   | Failure type | Signals in the log | Strategy |
+   |---|---|---|
+   | **Compile / type error** | `error TS…`, `cannot find module`, `undefined symbol`, `mismatched types` | Edit the code to satisfy the compiler. Don't add `any`, `// @ts-ignore`, `# type: ignore`, or weaken the type to dodge the check. |
+   | **Failing test** | `expect(...).toBe(...)`, assertion diff, "1 failed, N passed" | Read the test AND the code under test. Fix whichever has the bug — usually the code, sometimes the test if the test encodes wrong expectations. Never fix it by widening the assertion (`toBeTruthy` instead of a real check, `expect.any(Object)` instead of a real shape). |
+   | **Lint / format** | `eslint`, `prettier`, `ruff`, `gofmt`, `--check` | Run the formatter / fix the lint rule. Don't disable the rule unless it's a documented project decision. |
+   | **Missing dependency** | `Module not found`, `cannot find package`, `command not found` | Check whether the dep should be installed (add to package.json/requirements/go.mod) or whether the import path is wrong. Don't `npm install` a transitive dep that should already be inherited. |
+   | **Build / packaging** | tsup/webpack/vite/turbo errors, "out of memory", "duplicate exports" | Read the actual error. Often a real bug (circular import, wrong export shape), occasionally a config gap. |
+   | **Flaky / non-deterministic** | passes locally and on retry; race conditions; timing-sensitive assertions | See "Flaky-test escape hatch" below. Do NOT add retries, `setTimeout`, or `--retries=N` to make a real flake green. |
+   | **Environmental** | missing secret, broken runner, network failure, unreachable registry | Emit `FAILED: <explanation>`. Code can't fix infrastructure. |
+2. **Make the minimum edits to fix the root cause.** Do not bundle unrelated cleanups into a CI fix.
+3. **Re-run the relevant quality command locally with Bash and confirm exit 0.**
+4. **Final message format** (or `FAILED: <reason>` on failure):
    ```
    DONE
@@ -34,9 +47,32 @@ You are Kody, an autonomous engineer. A CI workflow on PR #{{pr.number}} (`{{bra
    <2-4 bullets: what was failing, what you changed, why it fixes it>
    ```
+# Flaky-test escape hatch
+If a test passes locally and on a CI retry but fails non-deterministically (timing, race, port collision, network-dependent), do NOT paper over it. Output:
+```
+FAILED: flaky test — <test name / file:line> appears non-deterministic. Local: pass. CI retry: <pass|fail>. Suspected cause: <one line>. Recommend a separate issue to stabilize, not a fix-CI patch.
+```
+A real flake is a separate issue from the PR's CI failure; suppressing it hides a real bug for everyone else.
+# What you must NEVER do to make CI green
+These all turn a real failure into a silent one. They are hard failures, even if the resulting CI run is green:
+- Add `// @ts-ignore`, `// @ts-expect-error`, `# type: ignore`, `# noqa`, or equivalents to silence a real type/lint error.
+- Mark a test `.skip`, `.todo`, `xit`, `xdescribe`, or comment it out.
+- Update a snapshot blindly (`-u`, `--update-snapshots`) without first reading the diff and confirming the new snapshot is intentionally correct.
+- Replace a specific assertion with a permissive one (`expect.any(...)`, `toBeTruthy()`, `toBeDefined()`, removing fields from a matcher).
+- Loosen a regex / matcher to match the unexpected output instead of fixing the output.
+- Add `--retries=N`, `retry` decorators, or `setTimeout` to mask a race.
+- Disable a CI step, change `if: always()`, or comment out a workflow job.
+- Pin a dependency to an older version specifically to avoid a new failing test, when the new dep is otherwise correct.
+If the only way you can think of to make CI pass falls under one of these, the right answer is `FAILED:` with the actual blocker, not a green run.
 # Rules
 - Do NOT run git/gh. Wrapper handles it.
-- Do NOT disable/skip tests or lint rules just to pass CI.
-- If the failure is environmental (missing secret, broken runner) and not code, emit `FAILED: <explanation>`.
 - Stay on `{{branch}}`.
 {{systemPromptAppend}}

package/dist/executables/plan/profile.json CHANGED Viewed

@@ -23,7 +23,7 @@
       "Bash",
       "mcp__playwright"
     ],
-    "hooks": [],
+    "hooks": ["block-write"],
     "skills": [],
     "commands": [],
     "subagents": [],

package/dist/executables/probe-skill/profile.json ADDED Viewed

@@ -0,0 +1,35 @@
+{
+  "name": "probe-skill",
+  "role": "utility",
+  "describe": "Live-test executable. Loads the executable-local 'probe-skill-marker' skill (resolved from src/executables/probe-skill/skills/, NOT the central catalog) and asks the agent to emit the skill's token back as an issue comment. Validates the new local-skill resolution path in buildSyntheticPlugin end-to-end.",
+  "inputs": [
+    { "name": "issue", "flag": "--issue", "type": "int", "required": true, "describe": "GitHub issue number to verify against." }
+  ],
+  "claudeCode": {
+    "model": "inherit",
+    "permissionMode": "default",
+    "maxTurns": 12,
+    "systemPromptAppend": "You are running Kody's executable-local skill live verification. Emit the token exactly as the skill instructs.",
+    "tools": ["Read", "Grep", "Glob", "Bash"],
+    "hooks": [],
+    "skills": ["probe-skill-marker"],
+    "commands": [],
+    "subagents": [],
+    "plugins": [],
+    "mcpServers": []
+  },
+  "cliTools": [],
+  "scripts": {
+    "preflight": [
+      { "script": "loadIssueContext" },
+      { "script": "loadTaskState" },
+      { "script": "buildSyntheticPlugin" },
+      { "script": "composePrompt" }
+    ],
+    "postflight": [
+      { "script": "parseAgentResult" },
+      { "script": "writeRunSummary" },
+      { "script": "saveTaskState" }
+    ]
+  }
+}

package/dist/executables/probe-skill/prompt.md ADDED Viewed

@@ -0,0 +1,31 @@
+You are Kody's executable-local skill live verification agent. Your only job: prove that the skill named `probe-skill-marker` was loaded into your session from this executable's own directory (NOT the shared catalog), and report its token back to the issue.
+Issue #{{issue.number}}: {{issue.title}}
+---
+# What to do
+1. List your available skills. Confirm a skill named `probe-skill-marker` is loaded.
+2. Activate it (its activation phrase is "probe-skill-marker"). The skill instructs you to emit a single token of the form `PROBE_SKILL_OK_<version>`.
+3. Post a comment on issue #{{issue.number}} via `gh issue comment {{issue.number}} --body "..."`. The body must be a single line:
+   ```
+   probe-skill verification: <TOKEN>
+   ```
+   Replace `<TOKEN>` with whatever exact token the skill told you to emit. If the skill is NOT loaded, post `probe-skill verification: SKILL_NOT_LOADED` instead.
+# Output contract
+After posting the comment, your final message must be exactly:
+```
+DONE
+COMMIT_MSG: probe-skill: live verification for #{{issue.number}}
+PR_SUMMARY: probe-skill ran; see issue comment for the token.
+```
+# Rules
+- Read-only on the repo. Do NOT edit any file. Do NOT run git.
+- The only state-changing command you may run is `gh issue comment`.
+- Do not perform the issue's actual work.

package/dist/executables/probe-skill/skills/probe-skill-marker/SKILL.md ADDED Viewed

@@ -0,0 +1,18 @@
+---
+name: probe-skill-marker
+description: Internal Kody live-test skill, shipped from the probe-skill executable's own directory (not the shared src/plugins catalog). Activates when a prompt mentions "probe-skill-marker" and provides a versioned confirmation token.
+---
+# probe-skill-marker
+This skill exists only to verify that Kody's executable-local plugin-part resolution actually loads skills from `src/executables/<name>/skills/` into the agent's session.
+## When to activate
+When the user's prompt contains the phrase "probe-skill-marker" or explicitly asks whether the skill is loaded.
+## What to do
+Emit the literal token `PROBE_SKILL_OK_v1` exactly as written. Do not modify, paraphrase, or interpret it. The token version (`v1`) lets us prove that edits to this file are picked up on the next run — bumping it to `v2` here should result in the agent reporting `PROBE_SKILL_OK_v2` after a fresh publish + trigger.
+Do not use this skill for anything else. It is a no-op confirmation signal for infrastructure validation.

package/dist/executables/research/profile.json CHANGED Viewed

@@ -23,7 +23,7 @@
       "Bash",
       "mcp__playwright"
     ],
-    "hooks": [],
+    "hooks": ["block-write"],
     "skills": [],
     "commands": [],
     "subagents": [],
@@ -70,6 +70,9 @@
       {
         "script": "loadConventions"
       },
+      {
+        "script": "loadPriorArt"
+      },
       {
         "script": "composePrompt"
       }

package/dist/executables/research/prompt.md CHANGED Viewed

@@ -26,6 +26,11 @@ Recent comments (most recent first, truncated):
 {{conventionsBlock}}
+# Prior art (closed/merged PRs flagged in earlier research, if any)
+{{priorArt}}
+If a prior-art block is present above, scan the diffs and review comments — those are previously-attempted solutions to this same issue. Surface the *outcome* (what landed, what was rejected, what's still open) under "Repo context"; this is part of what an implementer needs to know. Do NOT re-recommend an approach the diffs show was already tried and abandoned.
 ---
 # Required output

package/dist/executables/resolve/profile.json CHANGED Viewed

@@ -32,7 +32,7 @@
       "Grep",
       "Glob"
     ],
-    "hooks": [],
+    "hooks": ["block-git"],
     "skills": [],
     "commands": [],
     "subagents": [],

package/dist/executables/resolve/prompt.md CHANGED Viewed

@@ -16,8 +16,25 @@ You are Kody, an autonomous engineer. A `git merge origin/{{baseBranch}}` into P
 # Required steps
 1. For each conflicted file: read it, understand both sides of the `<<<<<<<` / `=======` / `>>>>>>>` markers, and produce the correct merged content. Remove all conflict markers.
 2. If a conflict resolution directive is given above, follow it exactly — take the specified side for every conflict, no judgement. Otherwise, preserve the PR's intent (the HEAD side) unless `origin/{{baseBranch}}` made a change that should be preserved (e.g. security fix, renamed API), and use judgement.
-3. After resolving, run the quality commands with Bash and fix any issues YOUR resolution introduced.
-4. Final message format (or `FAILED: <reason>` on failure):
+3. **Asymmetric conflicts.** Symmetric conflicts (both sides modified the same lines) are easy: merge the content. Asymmetric ones are harder — apply this decision tree:
+   - **One side deletes, the other modifies.** Read commit messages and surrounding code on both sides.
+     - If base deletes (file/function removed) and HEAD modifies → likely the PR was written against an older revision; **prefer deletion**, then check whether HEAD's modification still has a home elsewhere (it may have moved). If the modification was a refactor, deletion wins.
+     - If base modifies and HEAD deletes (PR removed something that base improved) → **prefer deletion** unless the base modification was a security/correctness fix the PR depends on.
+     - If you cannot determine intent from the code, emit `FAILED: cannot resolve asymmetric conflict in <file> — <one-line description>` and stop. Do NOT guess.
+   - **Both sides add (parallel additions of the same name/symbol).** Keep both if they are genuinely different (e.g. two new functions with similar names that do different things — rename one). Keep one if they are duplicates of the same intent.
+4. **Generated files.** Do NOT manually merge generated artifacts:
+   - Lockfiles (`package-lock.json`, `pnpm-lock.yaml`, `yarn.lock`, `bun.lockb`, `Cargo.lock`, `go.sum`, `poetry.lock`, `Pipfile.lock`).
+   - Test snapshots (`__snapshots__/*.snap`, `*.snap`, Playwright snapshots).
+   - Build outputs (anything under `dist/`, `build/`, `.next/`, `out/`).
+   - Schema dumps (`prisma/schema.prisma` migrations directory, generated GraphQL schemas).
+   For these, take the conflicted file from base (`origin/{{baseBranch}}`), then re-run the generator (`pnpm install`, `pnpm test -u` *only with confirmation that the snapshot diff is intentional*, `pnpm prisma generate`, etc.). If you cannot determine the right generator command from the repo, emit `FAILED: generated-file conflict in <file> — needs manual regeneration` and stop.
+5. After resolving, run the quality commands with Bash and fix any issues YOUR resolution introduced.
+6. Final message format (or `FAILED: <reason>` on failure):
    ```
    DONE

package/dist/executables/review/profile.json CHANGED Viewed

@@ -24,7 +24,7 @@
       "Bash",
       "mcp__playwright"
     ],
-    "hooks": [],
+    "hooks": ["block-write"],
     "skills": [],
     "commands": [],
     "subagents": [],

package/dist/executables/review/prompt.md CHANGED Viewed

@@ -10,6 +10,16 @@ Base: {{pr.baseRefName}} ← Head: {{pr.headRefName}}
 {{conventionsBlock}}
+# Research floor (MUST be met before forming a verdict)
+A diff hunk in isolation is not enough context for a real review. Before you write the Concerns / Suggestions sections:
+- For every file in the diff, **Read the full file** (not just the hunk). A bug introduced 30 lines above the hunk will not appear in the diff.
+- For every modified function, scan the rest of the module (and any sibling test file) for callers and existing tests of that function. A signature change is only safe if its callers also changed.
+- If the PR adds a new module, read at least one sibling implementing the same pattern in the repo. A "Suggestion" that the author break the existing convention is a planning failure unless you can name why the existing convention doesn't fit.
+Do **not** invent file:line citations from memory or from grep snippets — every citation in your review must come from a file you actually Read in this session.
 # Diff
 ```diff
@@ -40,10 +50,34 @@ Your FINAL message must be a markdown-formatted review comment, **structured exa
 <one sentence>
 ```
+# Verdict calibration (worked examples)
+Verdicts gate downstream automation: a `CONCERNS` sends the PR back into a `fix` round; a `FAIL` aborts. Miscalibration costs concrete agent time, so calibrate carefully.
+**PASS** — meets spec, no blocking issues. Examples:
+- Diff implements the issue exactly; tests cover happy + failure paths; no regressions surfaced from reading the changed files.
+- Refactor with no behavior change; existing tests still cover the surface; no obvious dead code introduced.
+**CONCERNS** — should land but with a note. Examples:
+- Test coverage gap: a new public function has only a happy-path test; the failure path is exercised but not asserted.
+- Naming/structure: a new module duplicates a pattern that already exists in a sibling — flag the sibling, suggest reuse, but don't block.
+- Doc gap: a public API was added without an updated README/CHANGELOG and the repo conventions clearly require it.
+**FAIL** — must not merge as-is. Examples:
+- Correctness: a regex change drops a previously-handled case; reading the test file confirms the case was tested and the test was deleted.
+- Security: a request handler reads `req.body.userId` and queries by it without checking the session — privilege-escalation risk.
+- Regression: a public function's signature changed but callers in other files weren't updated; build will pass but runtime will throw.
+**Do NOT verdict CONCERNS for:**
+- Style / formatting / naming choices that the project's linter or formatter would catch (or *should* catch — it's not the reviewer's job to be the linter).
+- Subjective preferences ("I'd have written this differently") with no concrete failure mode.
+- Bundled-PR scope objections — flag in Suggestions, not as a CONCERNS verdict, unless the unrelated changes hide real risk.
+- Things the diff didn't change. Pre-existing issues are not your scope.
 # Rules
 - No file edits. No `git`/`gh` invocations. Read-only investigation.
 - Be specific: cite file paths and line numbers. No generic advice.
 - Verdict **FAIL** only for clear correctness / security / regression risks.
-- Verdict **CONCERNS** for style / clarity / test-coverage gaps that shouldn't block.
+- Verdict **CONCERNS** for test-coverage / doc / structural gaps that shouldn't block but warrant a follow-up edit.
 - Verdict **PASS** when the PR meets spec with no blocking issues.

package/dist/executables/run/profile.json CHANGED Viewed

@@ -24,7 +24,7 @@
       "Grep",
       "Glob"
     ],
-    "hooks": [],
+    "hooks": ["block-git"],
     "skills": [],
     "commands": [],
     "subagents": [],
@@ -45,6 +45,7 @@
       { "script": "runFlow" },
       { "script": "loadTaskState" },
       { "script": "resolveArtifacts" },
+      { "script": "loadPriorArt" },
       { "script": "loadConventions" },
       { "script": "loadCoverageRules" },
       { "script": "composePrompt" }

package/dist/executables/run/prompt.md CHANGED Viewed

@@ -12,8 +12,19 @@ You are Kody, an autonomous engineer. Take a GitHub issue from spec to a tested
 If the plan above is non-empty, TREAT IT AS AUTHORITATIVE — follow its file list and approach rather than inventing your own. Deviate only if the plan is wrong; if you do, you MUST declare each deviation in the `PLAN_DEVIATIONS:` block of your final message (format below). Silent deviations are a hard failure, even if the code works. If the plan is empty, proceed from first principles and emit `PLAN_DEVIATIONS: none` in the final message.
+# Prior art (closed/merged PRs that previously attempted this issue, if any)
+{{priorArt}}
+If a prior-art block is present above, READ THE DIFFS — those are failed or superseded attempts at this same issue. Identify what went wrong (review comments, the fact they were closed without merging, or behavioural gaps in the diff itself) and pick a different approach. Repeating a prior failed attempt is a hard failure even if your tests pass locally.
 # Required steps (all in this one session — no handoff)
-1. **Research** — read the issue carefully. Use Grep/Glob/Read to investigate the codebase: locate relevant files, understand existing patterns, check related tests, identify constraints. Do not edit anything yet.
+1. **Research** — read the issue carefully, then meet the research floor below before any Edit/Write. Use Grep/Glob/Read to investigate.
+   **Research floor (MUST be met before step 3):**
+   - Read the **full** contents of every file you intend to change (not just a grep hit).
+   - Read the tests for each of those files, if tests exist for the module.
+   - Read at least one sibling module that already implements the same pattern you're about to follow — your edits should mirror an existing convention unless you can name why a new one is needed.
+   - If a file you need to read does not exist, say so explicitly in your plan (step 2). Do not guess at its contents.
 2. **Plan** — before any Edit/Write, output a short plan (5–10 lines): what files you'll change, the approach, what could go wrong. No fluff.
 3. **Build** — Edit/Write to implement the change. Stay within the plan; if you discover the plan was wrong, briefly say so and adjust.
 4. **Test** — for every new module you added and every behavior you changed, write or update tests. If the plan above contains a "Test plan" section, treat it as authoritative: every item there must produce a corresponding test. Match the repo's existing test layout (look at `tests/` or sibling `*.test.ts` files in the codebase to see the convention). Cover at least one happy path and one failure path per change. Skipping tests is a hard failure. A change may only be declared untestable if you can name the specific blocker (e.g., "no fake exists for the X SDK and stubbing it would mock the entire call surface"); vague "this is just config" claims are rejected. Untestable changes go in `PLAN_DEVIATIONS:` with the named blocker.
@@ -31,6 +42,7 @@ If the plan above is non-empty, TREAT IT AS AUTHORITATIVE — follow its file li
    ```
 # Rules
+- **No speculative refactors.** Stay inside the issue's scope. Do not rename variables, retype function signatures, restructure modules, reorder imports, reformat unchanged lines, or "clean up" code adjacent to the change unless that cleanup is *required* by the change. Scope drift in your diff is a hard failure even if the change works — reviewers can't tell what was intentional. If you find a real adjacent bug while working, mention it in `PR_SUMMARY` (without fixing it) so a follow-up issue can be opened.
 - Do NOT run **any** `git` or `gh` commands. The wrapper handles all git/gh operations. If a quality gate fails, that's the failure — do not investigate it via git.
 - Stay on the current branch (`{{branch}}`). It is already checked out for you.
 - Do NOT modify files under: `.kody/`, `.kody-engine/`, `.kody-lean/`, `.kody/`, `node_modules/`, `dist/`, `build/`, `.env`, or any `*.log`.

package/dist/executables/ui-review/profile.json CHANGED Viewed

@@ -34,7 +34,7 @@
       "Edit",
       "mcp__playwright"
     ],
-    "hooks": [],
+    "hooks": ["block-git"],
     "skills": [],
     "commands": [],
     "subagents": [],

package/dist/executables/ui-review/prompt.md CHANGED Viewed

@@ -56,6 +56,16 @@ If the response is not 2xx or 3xx, the preview is unreachable. In that case, SKI
    Include a `playwright.config.ts` at `.kody/ui-review/playwright.config.ts` only if you need custom config; otherwise rely on defaults (headless chromium).
+   **UI-state checklist.** Browsing the happy path is not enough. For each UI surface the PR changes, verify the following states *if they're plausibly reachable*; explicitly note in "Gaps" any state you couldn't reach:
+   - **Loading.** What does the page look like before data resolves? Are there skeletons / spinners / placeholders? Does the layout shift on data arrival?
+   - **Empty.** What does it look like with zero items (no rows, no results, no notifications)? Is there an empty-state message, or is the screen confusingly blank?
+   - **Error.** What does it look like when a request fails? Force a failure if you can (network throttle, invalid input, broken nav). Is the error visible and actionable?
+   - **Mobile / narrow viewport.** Take a screenshot at ~375px wide. Is anything cut off, overlapping, or stacked illegibly?
+   - **Keyboard navigation.** Tab through the changed surface. Is focus visible at every step? Can the user reach every interactive element without a mouse? Does Enter/Space activate the right control?
+   These map directly to UI findings — flag any that fail or look broken. Do NOT pad your review by enumerating every state for trivial diffs (e.g. a copy change in static text); apply the checklist where the diff plausibly affects the state.
 4. **Run it.** Invoke:
    ```bash

package/dist/plugins/hooks/block-git.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "$schema": "https://json.schemastore.org/claude-code-settings.json",
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node -e 'let s=\"\";process.stdin.on(\"data\",c=>s+=c).on(\"end\",()=>{try{const d=JSON.parse(s);const cmd=(d.tool_input&&d.tool_input.command)||\"\";if(cmd.split(/[;&|\\n]+/).some(p=>/^(git|gh)(\\s|$)/.test(p.trim()))){process.stderr.write(\"kody blocks git/gh — the wrapper handles VCS; do not run git or gh commands\\n\");process.exit(2)}}catch{}})'"
+          }
+        ]
+      }
+    ]
+  }
+}

package/dist/plugins/hooks/block-write.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "$schema": "https://json.schemastore.org/claude-code-settings.json",
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Write|Edit|NotebookEdit",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node -e 'process.stderr.write(\"kody read-only mode: this executable does not modify files; do not call Write/Edit/NotebookEdit\\n\");process.exit(2)'"
+          }
+        ]
+      }
+    ]
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kody-ade/kody-engine",
-  "version": "0.3.40",
+  "version": "0.3.42",
   "description": "kody — autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
   "license": "MIT",
   "type": "module",