@kody-ade/kody-engine 0.4.114 → 0.4.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bin/kody.js +79 -33
  2. package/package.json +1 -1
package/dist/bin/kody.js CHANGED
@@ -880,7 +880,7 @@ var init_loadPriorArt = __esm({
880
880
  // package.json
881
881
  var package_default = {
882
882
  name: "@kody-ade/kody-engine",
883
- version: "0.4.114",
883
+ version: "0.4.116",
884
884
  description: "kody \u2014 autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
885
885
  license: "MIT",
886
886
  type: "module",
@@ -4695,6 +4695,10 @@ async function defaultRunJob(job) {
4695
4695
  process.exit(cloneCode);
4696
4696
  return;
4697
4697
  }
4698
+ const authorName = process.env.GIT_AUTHOR_NAME ?? "Kody Bot";
4699
+ const authorEmail = process.env.GIT_AUTHOR_EMAIL ?? "kody-bot@users.noreply.github.com";
4700
+ await run("git", ["config", "user.name", authorName], workdir);
4701
+ await run("git", ["config", "user.email", authorEmail], workdir);
4698
4702
  process.stdout.write(`[runner-serve] job ${job.jobId}: running issue #${job.issueNumber}
4699
4703
  `);
4700
4704
  const runCode = await run("kody", ["run", "--issue", String(job.issueNumber)], workdir);
@@ -4880,6 +4884,7 @@ function sleep2(ms) {
4880
4884
  }
4881
4885
 
4882
4886
  // src/pool/manager.ts
4887
+ var MAX_CLAIM_ATTEMPTS = 3;
4883
4888
  var PoolManager = class {
4884
4889
  constructor(deps) {
4885
4890
  this.deps = deps;
@@ -4919,43 +4924,84 @@ var PoolManager = class {
4919
4924
  await this.refill();
4920
4925
  }
4921
4926
  /**
4922
- * Claim a warm machine for a job. Returns ok:false (caller falls back to
4923
- * create-fresh) when the pool is empty or the woken machine fails to take
4924
- * the job. The pick is synchronous the atomic step.
4927
+ * Claim a warm machine for a job. Tries free machines in turn: if a woken
4928
+ * machine is stale/unhealthy/rejecting (e.g. it vanished out-of-band), it's
4929
+ * destroyed and the next free one is tried, up to MAX_CLAIM_ATTEMPTS. Only
4930
+ * when none work (or the pool is empty) does it return ok:false so the
4931
+ * caller falls back to create-fresh. The pick (shift) is synchronous — the
4932
+ * atomic step that prevents two concurrent claims grabbing the same machine.
4925
4933
  */
4926
4934
  async claim(job) {
4927
- const machine = this.free.shift();
4928
- if (!machine) {
4929
- this.log("claim: pool empty");
4930
- void this.refill();
4931
- return { ok: false, reason: "pool empty" };
4932
- }
4933
- this.claimsInFlight++;
4934
- try {
4935
- await this.deps.fly.start(machine.id);
4936
- const base = this.baseUrl(machine);
4937
- const healthy = await this.deps.fly.waitHealthy(base, { timeoutMs: this.deps.config.healthTimeoutMs });
4938
- if (!healthy) {
4939
- this.log(`claim: machine ${machine.id} unhealthy after wake \u2014 destroying`);
4940
- await this.safeDestroy(machine.id);
4941
- return { ok: false, reason: "woken machine unhealthy" };
4942
- }
4943
- const accepted = await this.postRun(machine, job, this.deps.config);
4944
- if (!accepted) {
4945
- this.log(`claim: machine ${machine.id} rejected job \u2014 destroying`);
4935
+ let lastReason = "pool empty";
4936
+ for (let attempt = 0; attempt < MAX_CLAIM_ATTEMPTS; attempt++) {
4937
+ const machine = this.free.shift();
4938
+ if (!machine) break;
4939
+ this.claimsInFlight++;
4940
+ try {
4941
+ await this.deps.fly.start(machine.id);
4942
+ const healthy = await this.deps.fly.waitHealthy(this.baseUrl(machine), {
4943
+ timeoutMs: this.deps.config.healthTimeoutMs
4944
+ });
4945
+ if (!healthy) {
4946
+ this.log(`claim: machine ${machine.id} unhealthy after wake \u2014 destroying, trying next`);
4947
+ await this.safeDestroy(machine.id);
4948
+ lastReason = "woken machine unhealthy";
4949
+ continue;
4950
+ }
4951
+ const accepted = await this.postRun(machine, job, this.deps.config);
4952
+ if (!accepted) {
4953
+ this.log(`claim: machine ${machine.id} rejected job \u2014 destroying, trying next`);
4954
+ await this.safeDestroy(machine.id);
4955
+ lastReason = "machine rejected job";
4956
+ continue;
4957
+ }
4958
+ this.log(`claim: machine ${machine.id} took job ${job.jobId}`);
4959
+ void this.refill();
4960
+ return { ok: true, machineId: machine.id };
4961
+ } catch (err) {
4962
+ this.log(`claim: error on ${machine.id}: ${errMsg2(err)} \u2014 destroying, trying next`);
4946
4963
  await this.safeDestroy(machine.id);
4947
- return { ok: false, reason: "machine rejected job" };
4964
+ lastReason = errMsg2(err);
4965
+ } finally {
4966
+ this.claimsInFlight--;
4948
4967
  }
4949
- this.log(`claim: machine ${machine.id} took job ${job.jobId}`);
4950
- return { ok: true, machineId: machine.id };
4968
+ }
4969
+ void this.refill();
4970
+ return { ok: false, reason: lastReason };
4971
+ }
4972
+ /**
4973
+ * Periodic self-heal: reconcile the in-memory free list against actual Fly
4974
+ * state. Prunes free entries whose machine vanished out-of-band (auto-destroy
4975
+ * after a job, manual ops) so a later claim never tries a dead machine, and
4976
+ * adopts any suspended machines we lost track of. Then tops up. Unlike
4977
+ * reconcile() this MERGES rather than rebuilds, so it won't drop a machine
4978
+ * that's momentarily not yet reflected as suspended by Fly's eventual
4979
+ * consistency.
4980
+ */
4981
+ async resync() {
4982
+ let machines;
4983
+ try {
4984
+ machines = await this.deps.fly.listPooled();
4951
4985
  } catch (err) {
4952
- this.log(`claim: error on ${machine.id}: ${errMsg2(err)} \u2014 destroying`);
4953
- await this.safeDestroy(machine.id);
4954
- return { ok: false, reason: errMsg2(err) };
4955
- } finally {
4956
- this.claimsInFlight--;
4957
- void this.refill();
4986
+ this.log(`resync: listPooled failed: ${errMsg2(err)}`);
4987
+ return;
4958
4988
  }
4989
+ const liveIds = new Set(machines.map((m) => m.id));
4990
+ const before = this.free.length;
4991
+ this.free = this.free.filter((f) => liveIds.has(f.id));
4992
+ const pruned = before - this.free.length;
4993
+ const tracked = new Set(this.free.map((f) => f.id));
4994
+ let adopted = 0;
4995
+ for (const m of machines) {
4996
+ if ((m.state === "suspended" || m.state === "suspending") && m.private_ip && !tracked.has(m.id)) {
4997
+ this.free.push({ id: m.id, privateIp: m.private_ip });
4998
+ adopted++;
4999
+ }
5000
+ }
5001
+ if (pruned > 0 || adopted > 0) {
5002
+ this.log(`resync: pruned ${pruned} stale, adopted ${adopted} (free=${this.free.length})`);
5003
+ }
5004
+ await this.refill();
4959
5005
  }
4960
5006
  /** Top up free machines to `min`. Serialized so it never overshoots. */
4961
5007
  async refill() {
@@ -5176,7 +5222,7 @@ var poolServe = async (ctx) => {
5176
5222
  manager.reconcile().catch((err) => log(`reconcile failed: ${err instanceof Error ? err.message : String(err)}`));
5177
5223
  const refillMs = envInt("POOL_REFILL_INTERVAL_MS", 6e4);
5178
5224
  const tick = setInterval(() => {
5179
- manager.refill().catch((err) => log(`refill tick failed: ${err instanceof Error ? err.message : String(err)}`));
5225
+ manager.resync().catch((err) => log(`resync tick failed: ${err instanceof Error ? err.message : String(err)}`));
5180
5226
  }, refillMs);
5181
5227
  const server = createServer3(async (req, res) => {
5182
5228
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kody-ade/kody-engine",
3
- "version": "0.4.114",
3
+ "version": "0.4.116",
4
4
  "description": "kody — autonomous development engine. Single-session Claude Code agent behind a generic executor + declarative executable profiles.",
5
5
  "license": "MIT",
6
6
  "type": "module",