@kody-ade/kody-engine 0.4.10 → 0.4.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/kody.js CHANGED
@@ -3,7 +3,7 @@
3
3
  // package.json
4
4
  var package_default = {
5
5
  name: "@kody-ade/kody-engine",
6
- version: "0.4.10",
6
+ version: "0.4.13",
7
7
  description: "kody \u2014 autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
8
8
  license: "MIT",
9
9
  type: "module",
@@ -388,7 +388,15 @@ async function runAgent(opts) {
388
388
  ...process.env,
389
389
  SKIP_HOOKS: "1",
390
390
  HUSKY: "0",
391
- CI: process.env.CI ?? "1"
391
+ CI: process.env.CI ?? "1",
392
+ // MCP servers are spawned asynchronously by the SDK. With the default
393
+ // non-blocking behavior, the SDK announces its tool list at session
394
+ // init while servers are still in `pending`, so their tools never
395
+ // reach the model. Block until each MCP completes its handshake (or
396
+ // the timeout below elapses) so the tool list is complete on first
397
+ // turn.
398
+ MCP_CONNECTION_NONBLOCKING: process.env.MCP_CONNECTION_NONBLOCKING ?? "false",
399
+ MCP_TIMEOUT: process.env.MCP_TIMEOUT ?? "60000"
392
400
  };
393
401
  if (opts.litellmUrl) {
394
402
  env.ANTHROPIC_BASE_URL = opts.litellmUrl;
@@ -1253,7 +1261,7 @@ function coerceBare(spec, value) {
1253
1261
  }
1254
1262
 
1255
1263
  // src/executor.ts
1256
- import { execFileSync as execFileSync27, spawn as spawn4 } from "child_process";
1264
+ import { execFileSync as execFileSync27, spawn as spawn5 } from "child_process";
1257
1265
  import * as fs26 from "fs";
1258
1266
  import * as path23 from "path";
1259
1267
 
@@ -3904,10 +3912,14 @@ function ensurePr(opts) {
3904
3912
  const title = buildPrTitle(effectiveOpts.issueNumber, effectiveOpts.issueTitle, effectiveOpts.draft);
3905
3913
  const body = buildPrBody(effectiveOpts);
3906
3914
  if (existing) {
3915
+ const stripped = existing.url.replace(/^https:\/\/github\.com\//, "");
3916
+ const [owner, repo] = stripped.split("/");
3907
3917
  try {
3908
- gh2(["pr", "edit", String(existing.number), "--body-file", "-"], { input: body, cwd: opts.cwd });
3918
+ gh2(["api", "--method", "PATCH", `repos/${owner}/${repo}/pulls/${existing.number}`, "-f", `body=${body}`], {
3919
+ cwd: opts.cwd
3920
+ });
3909
3921
  } catch (err) {
3910
- throw new Error(`gh pr edit #${existing.number} failed: ${err instanceof Error ? err.message : String(err)}`);
3922
+ throw new Error(`gh api PATCH #${existing.number} failed: ${err instanceof Error ? err.message : String(err)}`);
3911
3923
  }
3912
3924
  return { url: existing.url, number: existing.number, draft: opts.draft, action: "updated" };
3913
3925
  }
@@ -3966,15 +3978,19 @@ var ensureMemorizePr = async (ctx) => {
3966
3978
  const body = buildBody(ctx, branch, datestamp);
3967
3979
  const existing = findExistingPr(branch, ctx.cwd);
3968
3980
  if (existing) {
3981
+ const stripped = existing.url.replace(/^https:\/\/github\.com\//, "");
3982
+ const [owner, repo] = stripped.split("/");
3969
3983
  try {
3970
- gh2(["pr", "edit", String(existing.number), "--body-file", "-"], { input: body, cwd: ctx.cwd });
3984
+ gh2(["api", "--method", "PATCH", `repos/${owner}/${repo}/pulls/${existing.number}`, "-f", `body=${body}`], {
3985
+ cwd: ctx.cwd
3986
+ });
3971
3987
  ctx.output.prUrl = existing.url;
3972
3988
  ctx.data.prResult = { url: existing.url, number: existing.number, action: "updated" };
3973
3989
  process.stdout.write(`[kody memorize] updated PR ${existing.url}
3974
3990
  `);
3975
3991
  } catch (err) {
3976
3992
  ctx.output.exitCode = 4;
3977
- ctx.output.reason = `gh pr edit #${existing.number} failed: ${err instanceof Error ? err.message : String(err)}`;
3993
+ ctx.output.reason = `gh api PATCH #${existing.number} failed: ${err instanceof Error ? err.message : String(err)}`;
3978
3994
  }
3979
3995
  return;
3980
3996
  }
@@ -4347,15 +4363,54 @@ function ensureFeatureBranch(issueNumber, title, defaultBranch, cwd, baseBranch)
4347
4363
  git2(["fetch", "origin"], cwd);
4348
4364
  } catch {
4349
4365
  }
4366
+ let originBranchExists = false;
4350
4367
  try {
4351
4368
  git2(["rev-parse", "--verify", `origin/${branchName}`], cwd);
4369
+ originBranchExists = true;
4370
+ } catch {
4371
+ }
4372
+ if (originBranchExists && baseBranch && baseBranch !== defaultBranch) {
4373
+ let baseExists = false;
4374
+ try {
4375
+ git2(["rev-parse", "--verify", `origin/${baseBranch}`], cwd);
4376
+ baseExists = true;
4377
+ } catch {
4378
+ }
4379
+ if (baseExists) {
4380
+ let descendsFromBase = false;
4381
+ try {
4382
+ git2(["merge-base", "--is-ancestor", `origin/${baseBranch}`, `origin/${branchName}`], cwd);
4383
+ descendsFromBase = true;
4384
+ } catch {
4385
+ }
4386
+ if (!descendsFromBase) {
4387
+ process.stderr.write(
4388
+ `[kody branch] origin/${branchName} does not descend from origin/${baseBranch} \u2014 recreating from base
4389
+ `
4390
+ );
4391
+ try {
4392
+ git2(["push", "origin", "--delete", branchName], cwd);
4393
+ } catch {
4394
+ }
4395
+ try {
4396
+ git2(["update-ref", "-d", `refs/remotes/origin/${branchName}`], cwd);
4397
+ } catch {
4398
+ }
4399
+ try {
4400
+ git2(["branch", "-D", branchName], cwd);
4401
+ } catch {
4402
+ }
4403
+ originBranchExists = false;
4404
+ }
4405
+ }
4406
+ }
4407
+ if (originBranchExists) {
4352
4408
  git2(["checkout", branchName], cwd);
4353
4409
  try {
4354
4410
  git2(["pull", "origin", branchName], cwd);
4355
4411
  } catch {
4356
4412
  }
4357
4413
  return { branch: branchName, created: false };
4358
- } catch {
4359
4414
  }
4360
4415
  try {
4361
4416
  git2(["rev-parse", "--verify", branchName], cwd);
@@ -5571,6 +5626,171 @@ function composeBody({ label, exit, prUrl, reason, dryRun }) {
5571
5626
  return `\u2705 kody ${label} complete`;
5572
5627
  }
5573
5628
 
5629
+ // src/scripts/postReviewResult.ts
5630
+ function detectVerdict(body) {
5631
+ const m = body.match(/##\s*Verdict\s*:\s*(PASS|CONCERNS|FAIL)\b/i);
5632
+ if (!m) return "UNKNOWN";
5633
+ return m[1].toUpperCase();
5634
+ }
5635
+ function reviewAction(verdict, payload) {
5636
+ const type = verdict === "PASS" ? "REVIEW_PASS" : verdict === "CONCERNS" ? "REVIEW_CONCERNS" : verdict === "FAIL" ? "REVIEW_FAIL" : "REVIEW_COMPLETED";
5637
+ return { type, payload: { verdict, ...payload }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5638
+ }
5639
+ function failedAction2(reason) {
5640
+ return { type: "REVIEW_FAILED", payload: { reason }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5641
+ }
5642
+ var postReviewResult = async (ctx, _profile, agentResult) => {
5643
+ const prNumber = ctx.data.commentTargetNumber;
5644
+ if (!prNumber) {
5645
+ ctx.output.exitCode = 99;
5646
+ ctx.output.reason = "review postflight: no PR number in context";
5647
+ ctx.data.action = failedAction2(ctx.output.reason);
5648
+ return;
5649
+ }
5650
+ if (!agentResult || agentResult.outcome !== "completed") {
5651
+ const reason = agentResult?.error ?? "agent did not complete";
5652
+ try {
5653
+ postPrReviewComment(prNumber, `\u26A0\uFE0F kody review FAILED: ${truncate2(reason, 1e3)}`, ctx.cwd);
5654
+ } catch {
5655
+ }
5656
+ ctx.output.exitCode = 1;
5657
+ ctx.output.reason = reason;
5658
+ ctx.data.action = failedAction2(reason);
5659
+ return;
5660
+ }
5661
+ const reviewBody = agentResult.finalText.trim();
5662
+ if (!reviewBody) {
5663
+ try {
5664
+ postPrReviewComment(prNumber, `\u26A0\uFE0F kody review FAILED: agent produced no review body`, ctx.cwd);
5665
+ } catch {
5666
+ }
5667
+ ctx.output.exitCode = 1;
5668
+ ctx.output.reason = "empty review body";
5669
+ ctx.data.action = failedAction2("empty review body");
5670
+ return;
5671
+ }
5672
+ try {
5673
+ postPrReviewComment(prNumber, reviewBody, ctx.cwd);
5674
+ } catch (err) {
5675
+ const msg = err instanceof Error ? err.message : String(err);
5676
+ ctx.output.exitCode = 4;
5677
+ ctx.output.reason = `failed to post review comment: ${msg}`;
5678
+ ctx.data.action = failedAction2(ctx.output.reason);
5679
+ return;
5680
+ }
5681
+ const verdict = detectVerdict(reviewBody);
5682
+ ctx.data.reviewVerdict = verdict;
5683
+ ctx.data.reviewBody = reviewBody;
5684
+ ctx.data.action = reviewAction(verdict, { bodyPreview: truncate2(reviewBody, 500) });
5685
+ ctx.output.exitCode = verdict === "FAIL" ? 1 : 0;
5686
+ process.stdout.write(
5687
+ `
5688
+ REVIEW_POSTED=https://github.com/${ctx.config.github.owner}/${ctx.config.github.repo}/pull/${prNumber} (verdict: ${verdict})
5689
+ `
5690
+ );
5691
+ };
5692
+
5693
+ // src/scripts/openQaIssue.ts
5694
+ var QA_LABEL = "kody:qa-report";
5695
+ function qaAction(verdict, payload) {
5696
+ const type = verdict === "PASS" ? "QA_PASS" : verdict === "CONCERNS" ? "QA_CONCERNS" : verdict === "FAIL" ? "QA_FAIL" : "QA_COMPLETED";
5697
+ return { type, payload: { verdict, ...payload }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5698
+ }
5699
+ function failedAction3(reason) {
5700
+ return { type: "QA_FAILED", payload: { reason }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5701
+ }
5702
+ function slugifyScope(scope) {
5703
+ return scope.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "").slice(0, 60);
5704
+ }
5705
+ function buildIssueTitle(scope, verdict) {
5706
+ const date = (/* @__PURE__ */ new Date()).toISOString().slice(0, 10);
5707
+ const focus = scope?.trim() ? scope.trim() : "smoke";
5708
+ const verdictTag = verdict === "UNKNOWN" ? "REPORT" : verdict;
5709
+ return `QA [${verdictTag}]: ${focus} \u2014 ${date}`.slice(0, 240);
5710
+ }
5711
+ function ensureLabel(cwd) {
5712
+ try {
5713
+ gh2(["label", "create", QA_LABEL, "--color", "8b5cf6", "--description", "kody: QA report", "--force"], { cwd });
5714
+ return true;
5715
+ } catch {
5716
+ return false;
5717
+ }
5718
+ }
5719
+ function createQaIssue(title, body, hasLabel, cwd) {
5720
+ const args = ["issue", "create", "--title", title, "--body-file", "-"];
5721
+ if (hasLabel) args.push("--label", QA_LABEL);
5722
+ const out = gh2(args, { input: body, cwd });
5723
+ const url = out.split("\n").map((l) => l.trim()).filter(Boolean).pop() ?? "";
5724
+ const m = url.match(/\/issues\/(\d+)\b/);
5725
+ if (!m) throw new Error(`gh issue create returned unexpected output: ${out}`);
5726
+ return { number: Number(m[1]), url };
5727
+ }
5728
+ var openQaIssue = async (ctx, _profile, agentResult) => {
5729
+ if (!agentResult || agentResult.outcome !== "completed") {
5730
+ const reason = agentResult?.error ?? "agent did not complete";
5731
+ process.stderr.write(`qa-engineer: ${reason}
5732
+ `);
5733
+ ctx.output.exitCode = 1;
5734
+ ctx.output.reason = reason;
5735
+ ctx.data.action = failedAction3(reason);
5736
+ return;
5737
+ }
5738
+ const reportBody = agentResult.finalText.trim();
5739
+ if (!reportBody) {
5740
+ process.stderr.write("qa-engineer: agent produced no report body\n");
5741
+ ctx.output.exitCode = 1;
5742
+ ctx.output.reason = "empty report body";
5743
+ ctx.data.action = failedAction3("empty report body");
5744
+ return;
5745
+ }
5746
+ const verdict = detectVerdict(reportBody);
5747
+ ctx.data.qaVerdict = verdict;
5748
+ ctx.data.qaReport = reportBody;
5749
+ const existingIssue = ctx.args.issue;
5750
+ if (typeof existingIssue === "number" && Number.isFinite(existingIssue) && existingIssue > 0) {
5751
+ try {
5752
+ postIssueComment(existingIssue, reportBody, ctx.cwd);
5753
+ } catch (err) {
5754
+ const msg = err instanceof Error ? err.message : String(err);
5755
+ ctx.output.exitCode = 4;
5756
+ ctx.output.reason = `failed to comment on issue #${existingIssue}: ${msg}`;
5757
+ ctx.data.action = failedAction3(ctx.output.reason);
5758
+ return;
5759
+ }
5760
+ process.stdout.write(
5761
+ `
5762
+ QA_REPORT_POSTED=https://github.com/${ctx.config.github.owner}/${ctx.config.github.repo}/issues/${existingIssue} (verdict: ${verdict})
5763
+ `
5764
+ );
5765
+ ctx.data.action = qaAction(verdict, { issueNumber: existingIssue, mode: "comment" });
5766
+ ctx.output.exitCode = verdict === "FAIL" ? 1 : 0;
5767
+ return;
5768
+ }
5769
+ const scope = ctx.args.scope;
5770
+ const title = buildIssueTitle(scope, verdict);
5771
+ const hasLabel = ensureLabel(ctx.cwd);
5772
+ let created;
5773
+ try {
5774
+ created = createQaIssue(title, reportBody, hasLabel, ctx.cwd);
5775
+ } catch (err) {
5776
+ const msg = err instanceof Error ? err.message : String(err);
5777
+ ctx.output.exitCode = 4;
5778
+ ctx.output.reason = `failed to open QA issue: ${truncate2(msg, 1e3)}`;
5779
+ ctx.data.action = failedAction3(ctx.output.reason);
5780
+ return;
5781
+ }
5782
+ process.stdout.write(`
5783
+ QA_REPORT_POSTED=${created.url} (verdict: ${verdict})
5784
+ `);
5785
+ ctx.data.action = qaAction(verdict, {
5786
+ issueNumber: created.number,
5787
+ issueUrl: created.url,
5788
+ titleSlug: scope ? slugifyScope(scope) : "smoke",
5789
+ mode: "create"
5790
+ });
5791
+ ctx.output.exitCode = verdict === "FAIL" ? 1 : 0;
5792
+ };
5793
+
5574
5794
  // src/scripts/parseAgentResult.ts
5575
5795
  var parseAgentResult2 = async (ctx, profile, agentResult) => {
5576
5796
  if (!agentResult) {
@@ -5945,70 +6165,6 @@ function renderResearchComment(issueNumber, body) {
5945
6165
  ${body}`;
5946
6166
  }
5947
6167
 
5948
- // src/scripts/postReviewResult.ts
5949
- function detectVerdict(body) {
5950
- const m = body.match(/##\s*Verdict\s*:\s*(PASS|CONCERNS|FAIL)\b/i);
5951
- if (!m) return "UNKNOWN";
5952
- return m[1].toUpperCase();
5953
- }
5954
- function reviewAction(verdict, payload) {
5955
- const type = verdict === "PASS" ? "REVIEW_PASS" : verdict === "CONCERNS" ? "REVIEW_CONCERNS" : verdict === "FAIL" ? "REVIEW_FAIL" : "REVIEW_COMPLETED";
5956
- return { type, payload: { verdict, ...payload }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5957
- }
5958
- function failedAction2(reason) {
5959
- return { type: "REVIEW_FAILED", payload: { reason }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
5960
- }
5961
- var postReviewResult = async (ctx, _profile, agentResult) => {
5962
- const prNumber = ctx.data.commentTargetNumber;
5963
- if (!prNumber) {
5964
- ctx.output.exitCode = 99;
5965
- ctx.output.reason = "review postflight: no PR number in context";
5966
- ctx.data.action = failedAction2(ctx.output.reason);
5967
- return;
5968
- }
5969
- if (!agentResult || agentResult.outcome !== "completed") {
5970
- const reason = agentResult?.error ?? "agent did not complete";
5971
- try {
5972
- postPrReviewComment(prNumber, `\u26A0\uFE0F kody review FAILED: ${truncate2(reason, 1e3)}`, ctx.cwd);
5973
- } catch {
5974
- }
5975
- ctx.output.exitCode = 1;
5976
- ctx.output.reason = reason;
5977
- ctx.data.action = failedAction2(reason);
5978
- return;
5979
- }
5980
- const reviewBody = agentResult.finalText.trim();
5981
- if (!reviewBody) {
5982
- try {
5983
- postPrReviewComment(prNumber, `\u26A0\uFE0F kody review FAILED: agent produced no review body`, ctx.cwd);
5984
- } catch {
5985
- }
5986
- ctx.output.exitCode = 1;
5987
- ctx.output.reason = "empty review body";
5988
- ctx.data.action = failedAction2("empty review body");
5989
- return;
5990
- }
5991
- try {
5992
- postPrReviewComment(prNumber, reviewBody, ctx.cwd);
5993
- } catch (err) {
5994
- const msg = err instanceof Error ? err.message : String(err);
5995
- ctx.output.exitCode = 4;
5996
- ctx.output.reason = `failed to post review comment: ${msg}`;
5997
- ctx.data.action = failedAction2(ctx.output.reason);
5998
- return;
5999
- }
6000
- const verdict = detectVerdict(reviewBody);
6001
- ctx.data.reviewVerdict = verdict;
6002
- ctx.data.reviewBody = reviewBody;
6003
- ctx.data.action = reviewAction(verdict, { bodyPreview: truncate2(reviewBody, 500) });
6004
- ctx.output.exitCode = verdict === "FAIL" ? 1 : 0;
6005
- process.stdout.write(
6006
- `
6007
- REVIEW_POSTED=https://github.com/${ctx.config.github.owner}/${ctx.config.github.repo}/pull/${prNumber} (verdict: ${verdict})
6008
- `
6009
- );
6010
- };
6011
-
6012
6168
  // src/scripts/recordClassification.ts
6013
6169
  import { execFileSync as execFileSync19 } from "child_process";
6014
6170
  var API_TIMEOUT_MS8 = 3e4;
@@ -6028,7 +6184,7 @@ var recordClassification = async (ctx) => {
6028
6184
  reason = parsed?.reason ?? null;
6029
6185
  }
6030
6186
  if (!classification) {
6031
- ctx.data.action = failedAction3("classification missing or invalid");
6187
+ ctx.data.action = failedAction4("classification missing or invalid");
6032
6188
  tryAuditComment(
6033
6189
  issueNumber,
6034
6190
  "\u26A0\uFE0F kody classifier could not decide \u2014 please re-run with an explicit `@kody <type>`.",
@@ -6069,7 +6225,7 @@ function tryAuditComment(issueNumber, body, cwd) {
6069
6225
  function makeAction3(type, payload) {
6070
6226
  return { type, payload, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
6071
6227
  }
6072
- function failedAction3(reason) {
6228
+ function failedAction4(reason) {
6073
6229
  return { type: "CLASSIFY_FAILED", payload: { reason }, timestamp: (/* @__PURE__ */ new Date()).toISOString() };
6074
6230
  }
6075
6231
 
@@ -6108,12 +6264,12 @@ function fail(ctx, profile, reason) {
6108
6264
  ctx.data.agentDone = false;
6109
6265
  ctx.data.agentFailureReason = reason;
6110
6266
  const modeSeg = profile.name.replace(/-/g, "_").toUpperCase();
6111
- const failedAction4 = {
6267
+ const failedAction5 = {
6112
6268
  type: `${modeSeg}_FAILED`,
6113
6269
  payload: { reason },
6114
6270
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
6115
6271
  };
6116
- ctx.data.action = failedAction4;
6272
+ ctx.data.action = failedAction5;
6117
6273
  }
6118
6274
  function countActionItems(block) {
6119
6275
  if (!block.trim()) return 0;
@@ -6154,12 +6310,12 @@ function fail2(ctx, profile, reason) {
6154
6310
  ctx.data.agentDone = false;
6155
6311
  ctx.data.agentFailureReason = reason;
6156
6312
  const modeSeg = profile.name.replace(/-/g, "_").toUpperCase();
6157
- const failedAction4 = {
6313
+ const failedAction5 = {
6158
6314
  type: `${modeSeg}_FAILED`,
6159
6315
  payload: { reason },
6160
6316
  timestamp: (/* @__PURE__ */ new Date()).toISOString()
6161
6317
  };
6162
- ctx.data.action = failedAction4;
6318
+ ctx.data.action = failedAction5;
6163
6319
  }
6164
6320
 
6165
6321
  // src/scripts/resolveArtifacts.ts
@@ -7137,6 +7293,151 @@ function sleep2(ms) {
7137
7293
  return new Promise((res) => setTimeout(res, ms));
7138
7294
  }
7139
7295
 
7296
+ // src/scripts/warmupMcp.ts
7297
+ import { spawn as spawn4 } from "child_process";
7298
+ var PER_SERVER_TIMEOUT_MS = 6e4;
7299
+ var PER_REQUEST_TIMEOUT_MS = 2e4;
7300
+ var warmupMcp = async (_ctx, profile) => {
7301
+ const servers = profile.claudeCode.mcpServers ?? [];
7302
+ if (servers.length === 0) return;
7303
+ for (const s of servers) {
7304
+ const start = Date.now();
7305
+ try {
7306
+ const result = await warmupOne(s.command, s.args ?? [], s.env);
7307
+ const ms = Date.now() - start;
7308
+ process.stderr.write(`[kody warmup] ${s.name}: ${result.toolCount} tools (${ms}ms)
7309
+ `);
7310
+ } catch (err) {
7311
+ const ms = Date.now() - start;
7312
+ const reason = err instanceof Error ? err.message : String(err);
7313
+ process.stderr.write(`[kody warmup] ${s.name} FAILED after ${ms}ms: ${reason}
7314
+ `);
7315
+ }
7316
+ }
7317
+ };
7318
+ async function warmupOne(command, args, env) {
7319
+ const child = spawn4(command, args, {
7320
+ stdio: ["pipe", "pipe", "pipe"],
7321
+ env: env ? { ...process.env, ...env } : process.env
7322
+ });
7323
+ let stderrBuf = "";
7324
+ child.stderr.on("data", (b) => {
7325
+ stderrBuf += b.toString("utf8");
7326
+ if (stderrBuf.length > 4096) stderrBuf = stderrBuf.slice(-4096);
7327
+ });
7328
+ const overallDeadline = Date.now() + PER_SERVER_TIMEOUT_MS;
7329
+ const lines = lineStream(child.stdout);
7330
+ let nextId = 1;
7331
+ const send = (method, params) => {
7332
+ const id = nextId++;
7333
+ const payload = JSON.stringify({ jsonrpc: "2.0", id, method, params }) + "\n";
7334
+ child.stdin.write(payload);
7335
+ return id;
7336
+ };
7337
+ const notify = (method, params) => {
7338
+ const payload = JSON.stringify({ jsonrpc: "2.0", method, params }) + "\n";
7339
+ child.stdin.write(payload);
7340
+ };
7341
+ const awaitResponse = async (id) => {
7342
+ const reqDeadline = Math.min(Date.now() + PER_REQUEST_TIMEOUT_MS, overallDeadline);
7343
+ while (Date.now() < reqDeadline) {
7344
+ const line = await lines.next(reqDeadline - Date.now());
7345
+ if (line === null) break;
7346
+ let msg = null;
7347
+ try {
7348
+ msg = JSON.parse(line);
7349
+ } catch {
7350
+ continue;
7351
+ }
7352
+ if (msg && msg.id === id) return msg;
7353
+ }
7354
+ throw new Error(`request id=${id} timed out (stderr tail: ${stderrBuf.trim().slice(-300) || "(empty)"})`);
7355
+ };
7356
+ try {
7357
+ const initId = send("initialize", {
7358
+ protocolVersion: "2024-11-05",
7359
+ capabilities: {},
7360
+ clientInfo: { name: "kody-warmup", version: "0.1.0" }
7361
+ });
7362
+ const initResp = await awaitResponse(initId);
7363
+ if (initResp.error) throw new Error(`initialize error: ${initResp.error.message}`);
7364
+ notify("notifications/initialized");
7365
+ const listId = send("tools/list");
7366
+ const listResp = await awaitResponse(listId);
7367
+ if (listResp.error) throw new Error(`tools/list error: ${listResp.error.message}`);
7368
+ const tools = listResp.result?.tools;
7369
+ const toolCount = Array.isArray(tools) ? tools.length : 0;
7370
+ if (toolCount === 0) throw new Error("tools/list returned 0 tools");
7371
+ return { toolCount };
7372
+ } finally {
7373
+ try {
7374
+ child.kill("SIGTERM");
7375
+ } catch {
7376
+ }
7377
+ setTimeout(() => {
7378
+ try {
7379
+ child.kill("SIGKILL");
7380
+ } catch {
7381
+ }
7382
+ }, 2e3).unref();
7383
+ }
7384
+ }
7385
+ function lineStream(stream) {
7386
+ let buf = "";
7387
+ const queue = [];
7388
+ let waiter = null;
7389
+ let ended = false;
7390
+ const tryDeliver = () => {
7391
+ if (waiter && queue.length > 0) {
7392
+ const w = waiter;
7393
+ waiter = null;
7394
+ w(queue.shift());
7395
+ } else if (waiter && ended) {
7396
+ const w = waiter;
7397
+ waiter = null;
7398
+ w(null);
7399
+ }
7400
+ };
7401
+ stream.on("data", (chunk) => {
7402
+ buf += typeof chunk === "string" ? chunk : chunk.toString("utf8");
7403
+ let idx;
7404
+ while ((idx = buf.indexOf("\n")) >= 0) {
7405
+ const line = buf.slice(0, idx).replace(/\r$/, "");
7406
+ buf = buf.slice(idx + 1);
7407
+ if (line.length > 0) queue.push(line);
7408
+ }
7409
+ tryDeliver();
7410
+ });
7411
+ stream.on("end", () => {
7412
+ if (buf.length > 0) {
7413
+ queue.push(buf);
7414
+ buf = "";
7415
+ }
7416
+ ended = true;
7417
+ tryDeliver();
7418
+ });
7419
+ return {
7420
+ next: (timeoutMs) => new Promise((resolve4) => {
7421
+ if (queue.length > 0) {
7422
+ resolve4(queue.shift());
7423
+ return;
7424
+ }
7425
+ if (ended) {
7426
+ resolve4(null);
7427
+ return;
7428
+ }
7429
+ waiter = resolve4;
7430
+ const t = setTimeout(() => {
7431
+ if (waiter === resolve4) {
7432
+ waiter = null;
7433
+ resolve4(null);
7434
+ }
7435
+ }, Math.max(0, timeoutMs));
7436
+ t.unref?.();
7437
+ })
7438
+ };
7439
+ }
7440
+
7140
7441
  // src/scripts/watchStalePrsFlow.ts
7141
7442
  function readWatchConfig(ctx) {
7142
7443
  const cfg = ctx.config.watch;
@@ -7322,6 +7623,7 @@ var preflightScripts = {
7322
7623
  skipAgent,
7323
7624
  classifyByLabel,
7324
7625
  diagMcp,
7626
+ warmupMcp,
7325
7627
  dispatchJobTicks,
7326
7628
  dispatchJobFileTicks
7327
7629
  };
@@ -7358,6 +7660,7 @@ var postflightScripts = {
7358
7660
  recordClassification,
7359
7661
  dispatchClassified,
7360
7662
  notifyTerminal,
7663
+ openQaIssue,
7361
7664
  recordOutcome,
7362
7665
  mergeReleasePr,
7363
7666
  waitForCi,
@@ -7699,7 +8002,7 @@ async function runShellEntry(entry, ctx, profile) {
7699
8002
  env[`KODY_CFG_${k}`] = v;
7700
8003
  }
7701
8004
  const timeoutMs = resolveShellTimeoutMs(entry);
7702
- const child = spawn4("bash", [shellPath, ...positional], {
8005
+ const child = spawn5("bash", [shellPath, ...positional], {
7703
8006
  cwd: ctx.cwd,
7704
8007
  env,
7705
8008
  stdio: ["pipe", "pipe", "pipe"],
@@ -0,0 +1,91 @@
1
+ {
2
+ "name": "qa-engineer",
3
+ "role": "primitive",
4
+ "describe": "Free-form QA: browses a running site with Playwright MCP, explores routes, exercises UI states, posts a structured QA report. Opens a new issue per run by default; pass --issue <N> to comment on an existing one. Read-only on the repo.",
5
+ "kind": "oneshot",
6
+ "inputs": [
7
+ {
8
+ "name": "url",
9
+ "flag": "--url",
10
+ "type": "string",
11
+ "required": true,
12
+ "describe": "Base URL the agent should browse (e.g. http://localhost:3000)."
13
+ },
14
+ {
15
+ "name": "scope",
16
+ "flag": "--scope",
17
+ "type": "string",
18
+ "required": false,
19
+ "describe": "Optional feature focus (e.g. 'admin chat memory recall'). Without a scope the agent does a broad smoke pass over discovered routes."
20
+ },
21
+ {
22
+ "name": "issue",
23
+ "flag": "--issue",
24
+ "type": "int",
25
+ "required": false,
26
+ "describe": "Optional: comment the QA report on this existing issue instead of opening a new one."
27
+ },
28
+ {
29
+ "name": "authProfile",
30
+ "flag": "--auth-profile",
31
+ "type": "string",
32
+ "required": false,
33
+ "describe": "Path to a Playwright storageState.json for pre-authenticated sessions (skips manual login)."
34
+ }
35
+ ],
36
+ "claudeCode": {
37
+ "model": "inherit",
38
+ "permissionMode": "acceptEdits",
39
+ "maxTurns": null,
40
+ "maxThinkingTokens": null,
41
+ "systemPromptAppend": null,
42
+ "tools": [
43
+ "Read",
44
+ "Grep",
45
+ "Glob",
46
+ "Bash",
47
+ "Write",
48
+ "Edit",
49
+ "mcp__playwright"
50
+ ],
51
+ "hooks": ["block-git"],
52
+ "skills": [],
53
+ "commands": [],
54
+ "subagents": [],
55
+ "plugins": [],
56
+ "mcpServers": [
57
+ {
58
+ "name": "playwright",
59
+ "command": "npx",
60
+ "args": ["-y", "--package=@playwright/mcp@latest", "--", "playwright-mcp"]
61
+ }
62
+ ]
63
+ },
64
+ "cliTools": [
65
+ {
66
+ "name": "playwright",
67
+ "install": {
68
+ "required": false,
69
+ "checkCommand": "ls \"$HOME/.cache/ms-playwright\" 2>/dev/null | grep -q '^chromium' || ls \"$HOME/Library/Caches/ms-playwright\" 2>/dev/null | grep -q '^chromium'",
70
+ "installCommand": "npx --yes playwright install chromium"
71
+ },
72
+ "verify": "ls \"$HOME/.cache/ms-playwright\" 2>/dev/null | grep -q '^chromium' || ls \"$HOME/Library/Caches/ms-playwright\" 2>/dev/null | grep -q '^chromium'",
73
+ "usage": "The Playwright MCP server uses Chromium under the hood. Preflight ensures it is installed. Save screenshots under `.kody/qa-reports/<run>/` if you take any — that directory is gitignored.",
74
+ "allowedUses": ["--version"]
75
+ }
76
+ ],
77
+ "inputArtifacts": [],
78
+ "outputArtifacts": [],
79
+ "scripts": {
80
+ "preflight": [
81
+ { "script": "discoverQaContext" },
82
+ { "script": "loadQaGuide" },
83
+ { "script": "loadConventions" },
84
+ { "script": "warmupMcp" },
85
+ { "script": "composePrompt" }
86
+ ],
87
+ "postflight": [
88
+ { "script": "openQaIssue" }
89
+ ]
90
+ }
91
+ }
@@ -0,0 +1,103 @@
1
+ You are Kody, a senior QA engineer. Your job is to **browse the running app like a real user**, exercise the UI broadly and intentionally, and produce one structured QA report. You do NOT fix bugs. You do NOT touch tracked source files. You do NOT run `git` or `gh`.
2
+
3
+ You may write throwaway artifacts (screenshots, ad-hoc Playwright specs) under `.kody/qa-reports/` — that path is gitignored.
4
+
5
+ # Target
6
+
7
+ Base URL: `{{args.url}}`
8
+ {{#args.scope}}Focus: **{{args.scope}}**{{/args.scope}}
9
+ {{^args.scope}}Focus: broad smoke across discovered routes.{{/args.scope}}
10
+ {{#args.authProfile}}Auth: a saved Playwright `storageState.json` is available at `{{args.authProfile}}`. Pass it to `mcp__playwright__browser_navigate` via the `storageState` parameter so the session starts pre-authenticated.{{/args.authProfile}}
11
+ {{^args.authProfile}}Auth: log in fresh using credentials from the QA guide if needed.{{/args.authProfile}}
12
+
13
+ Report destination: {{#args.issue}}existing issue #{{args.issue}} (postflight will comment on it){{/args.issue}}{{^args.issue}}a new issue (postflight will open one and label it `kody:qa-report`){{/args.issue}}.
14
+
15
+ # How to browse
16
+
17
+ You have the **Playwright MCP** tools (`mcp__playwright__browser_navigate`, `mcp__playwright__browser_snapshot`, `mcp__playwright__browser_click`, `mcp__playwright__browser_type`, `mcp__playwright__browser_take_screenshot`, etc.). These return structured accessibility snapshots — prefer them over raw screenshots when you need to reason about the DOM. Reach for screenshots when something *looks* wrong rather than *is* wrong.
18
+
19
+ Before anything else, navigate to the base URL:
20
+
21
+ ```
22
+ mcp__playwright__browser_navigate({ url: "{{args.url}}" })
23
+ ```
24
+
25
+ If that errors (timeout, DNS, connection refused), the app is unreachable. STOP browsing, write a short report explaining the failure, and exit. Don't fabricate findings.
26
+
27
+ # QA context (auto-discovered from the repo)
28
+
29
+ ```
30
+ {{qaContext}}
31
+ ```
32
+
33
+ # QA guide (committed in the repo — authoritative over the auto-discovery above)
34
+
35
+ {{qaGuide}}
36
+
37
+ {{conventionsBlock}}
38
+
39
+ {{toolsUsage}}
40
+
41
+ # What to do
42
+
43
+ 1. **Plan the session.** From the QA context, the QA guide, and the focus, build a short test matrix. For each candidate UI surface, list the user-visible behaviors worth verifying. Skip surfaces unrelated to the focus.
44
+
45
+ 2. **Authenticate if required.** If a route under test needs a role and you have credentials (in the QA guide or via `--auth-profile`), log in once. If credentials for a needed role are missing, note it as a gap and browse only what you can.
46
+
47
+ 3. **Exercise each surface.** For every UI surface in your matrix, run through the relevant states. Don't pad — apply the checklist where it actually matters:
48
+ - **Happy path.** The user-visible behavior the surface exists to support, end to end.
49
+ - **Empty state.** Zero items, no rows, no results. Is the screen meaningfully empty or just confusingly blank?
50
+ - **Loading.** What renders before data resolves? Skeletons? Layout shift?
51
+ - **Error.** Force a failure where you reasonably can — invalid input, broken nav, network throttle. Is the error visible and actionable?
52
+ - **Validation.** Submit forms with invalid / boundary / empty inputs. What's the feedback?
53
+ - **Mobile / narrow viewport.** Resize to ~375px wide. Anything cut off, overlapping, illegible?
54
+ - **Keyboard nav.** Tab through. Is focus visible at every step? Can a keyboard-only user reach every interactive element? Does Enter/Space activate the right control?
55
+ - **Destructive action.** If present (delete, archive, sign out), confirm it's gated behind a confirmation and the gate works.
56
+
57
+ 4. **Capture evidence.** Save screenshots that show the bug or the verified-good state under `.kody/qa-reports/<scope-slug>/<finding-slug>.png`. Reference them by relative path in the report. Don't screenshot every step — only what you need to back a finding.
58
+
59
+ 5. **Write the report.** Your FINAL MESSAGE must be **the entire QA report markdown, verbatim** — no preamble, no `DONE` marker, no `COMMIT_MSG` marker. The postflight reads your final message and posts it.
60
+
61
+ # Required output format
62
+
63
+ ```
64
+ ## Verdict: PASS | CONCERNS | FAIL
65
+
66
+ _QA by kody — browsed `{{args.url}}`{{#args.scope}} (focus: {{args.scope}}){{/args.scope}}_
67
+
68
+ ### Summary
69
+ <2–3 sentences: what you covered and what the running app actually does>
70
+
71
+ ### What I browsed
72
+ - `<route>` — <surface checked, states exercised, screenshot path if any>
73
+ - ...
74
+
75
+ ### Findings
76
+ - **[P0 | P1 | P2 | P3] <short title>** — `<route>`
77
+ - **Steps:** 1) … 2) … 3) …
78
+ - **Expected:** …
79
+ - **Actual:** …
80
+ - **Evidence:** `.kody/qa-reports/.../shot.png` (if applicable)
81
+ - ...
82
+ - (write "None." if you found no defects)
83
+
84
+ ### Gaps
85
+ - <anything you could NOT verify and why — missing creds, unreachable surface, no test data — say "None." if you covered everything in your matrix>
86
+
87
+ ### Bottom line
88
+ <one sentence>
89
+ ```
90
+
91
+ # Severity rubric
92
+
93
+ - **P0** — blocks core flow, data loss, security exposure, total breakage on a critical path. Verdict must be FAIL if any P0 lands.
94
+ - **P1** — broken feature on a non-critical path, or a P0-class issue with a workaround. Verdict typically FAIL.
95
+ - **P2** — degraded UX (visual bugs, minor a11y, confusing copy, edge-case handling). Verdict typically CONCERNS.
96
+ - **P3** — polish (alignment, micro-copy, non-blocking inconsistency). Doesn't affect verdict on its own.
97
+
98
+ # Rules
99
+
100
+ - No commits. No `git` / `gh`. No edits outside `.kody/qa-reports/`.
101
+ - Verdict **PASS** only when every UI surface you exercised behaved as the user would expect.
102
+ - Be specific in every finding: route + concrete steps + screenshot path (or DOM snapshot reference). No "consider improving X" advice.
103
+ - If the base URL was unreachable, the report should still be valid markdown — just say so under "Bottom line" and "Gaps", and use verdict **CONCERNS** (not FAIL — there's no defect, only an unreachable target).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kody-ade/kody-engine",
3
- "version": "0.4.10",
3
+ "version": "0.4.13",
4
4
  "description": "kody — autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -81,13 +81,16 @@ jobs:
81
81
  node-version: 22
82
82
 
83
83
  - name: Write pip cache key for LiteLLM
84
- run: echo "litellm[proxy]" > "${{ runner.temp }}/kody-pip-requirements.txt"
84
+ run: echo "litellm[proxy]" > .kody-pip-requirements.txt
85
85
 
86
86
  - uses: actions/setup-python@v5
87
87
  with:
88
88
  python-version: "3.12"
89
89
  cache: "pip"
90
- cache-dependency-path: ${{ runner.temp }}/kody-pip-requirements.txt
90
+ cache-dependency-path: .kody-pip-requirements.txt
91
+
92
+ - name: Remove pip cache key file (avoid blocking branch switches)
93
+ run: rm -f .kody-pip-requirements.txt
91
94
 
92
95
  - env:
93
96
  ALL_SECRETS: ${{ toJSON(secrets) }}