npm - @jhlee0619/codexloop - Versions diffs - 0.1.0 - Mend

@jhlee0619/codexloop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/.claude-plugin/marketplace.json +34 -0
package/.claude-plugin/plugin.json +8 -0
package/.codex-plugin/plugin.json +38 -0
package/LICENSE +21 -0
package/README.md +425 -0
package/assets/banner.png +0 -0
package/bin/cloop +45 -0
package/commands/iterate.md +25 -0
package/commands/model.md +33 -0
package/commands/result.md +17 -0
package/commands/start.md +188 -0
package/commands/status.md +10 -0
package/commands/stop.md +12 -0
package/package.json +60 -0
package/prompts/evaluate.md +91 -0
package/prompts/rank.md +97 -0
package/prompts/suggest.md +69 -0
package/schemas/evaluation.schema.json +65 -0
package/schemas/loop-state.schema.json +103 -0
package/schemas/proposal.schema.json +74 -0
package/schemas/ranking.schema.json +77 -0
package/scripts/lib/apply.mjs +254 -0
package/scripts/lib/args.mjs +202 -0
package/scripts/lib/codex-exec.mjs +318 -0
package/scripts/lib/convergence.mjs +153 -0
package/scripts/lib/iteration.mjs +484 -0
package/scripts/lib/process.mjs +164 -0
package/scripts/lib/prompts.mjs +53 -0
package/scripts/lib/rank.mjs +149 -0
package/scripts/lib/render.mjs +240 -0
package/scripts/lib/state.mjs +378 -0
package/scripts/lib/validate.mjs +71 -0
package/scripts/lib/workspace.mjs +49 -0
package/scripts/loop-companion.mjs +849 -0
package/skills/cloop/SKILL.md +177 -0

package/scripts/lib/rank.mjs ADDED Viewed

@@ -0,0 +1,149 @@
+// Deterministic ranking math. The "judge" Codex call returns dimensional
+// scores + a declared winner; this module re-computes the weighted sum from
+// the dimensional scores and picks the winner itself, so the runtime's
+// decision is reproducible and does not depend on the judge's own arithmetic.
+//
+// Tiebreaker rule (when top-2 weighted scores are within 0.03):
+//   1. higher correctness
+//   2. higher riskInverse (lower risk)
+//   3. smaller patch size (fewer diff lines)
+export const WEIGHTS = Object.freeze({
+  correctness: 0.30,
+  requirementSatisfaction: 0.25,
+  maintainability: 0.15,
+  testability: 0.10,
+  simplicity: 0.10,
+  riskInverse: 0.10
+});
+export const DIMENSIONS = Object.freeze(Object.keys(WEIGHTS));
+export const TIE_EPSILON = 0.03;
+export function clamp01(x) {
+  if (!Number.isFinite(x)) return 0;
+  if (x < 0) return 0;
+  if (x > 1) return 1;
+  return x;
+}
+export function computeWeightedScore(scoreEntry) {
+  let sum = 0;
+  for (const dim of DIMENSIONS) {
+    sum += WEIGHTS[dim] * clamp01(scoreEntry?.[dim] ?? 0);
+  }
+  return Number(sum.toFixed(6));
+}
+export function measurePatchSize(patch) {
+  if (!patch || typeof patch !== "string") return 0;
+  return patch.split("\n").length;
+}
+// Apply a reward-hacking floor: if a proposal modifies tests without a
+// substantive testChangeJustification, force its correctness to 0. This
+// mirrors the same check in apply.mjs but runs at ranking time so the judge
+// never picks a blocked proposal as the winner.
+export function applyRewardHackingFloor(scoreEntry, proposal) {
+  if (!proposal) return scoreEntry;
+  const modifiesTests = proposal.modifiesTests === true;
+  const justification = String(proposal.testChangeJustification ?? "").trim();
+  const short = justification.length < 20;
+  if (modifiesTests && short) {
+    return { ...scoreEntry, correctness: 0, _rewardHackingFloor: true };
+  }
+  return scoreEntry;
+}
+// The core runtime authority: given the judge's ranking output and the
+// original proposals, return a canonical ranking the loop will trust.
+export function recomputeWinner(ranking, proposals) {
+  if (!ranking || !Array.isArray(ranking.scores) || ranking.scores.length === 0) {
+    throw new Error("recomputeWinner: ranking.scores is empty");
+  }
+  const proposalById = new Map();
+  for (const proposal of proposals ?? []) {
+    if (proposal?.id) proposalById.set(proposal.id, proposal);
+  }
+  const canonical = ranking.scores.map((entry) => {
+    const proposal = proposalById.get(entry.proposalId);
+    const floored = applyRewardHackingFloor(entry, proposal);
+    // Reward-hacking proposals are disqualified from winner selection:
+    // their weighted score is forced to 0, which ranks them below every
+    // non-floored proposal deterministically.
+    const weighted = floored._rewardHackingFloor ? 0 : computeWeightedScore(floored);
+    const patchSize = measurePatchSize(proposal?.patch);
+    return {
+      ...floored,
+      weighted,
+      _patchSize: patchSize
+    };
+  });
+  const sorted = [...canonical].sort((a, b) => {
+    if (b.weighted !== a.weighted) return b.weighted - a.weighted;
+    if (b.correctness !== a.correctness) return b.correctness - a.correctness;
+    if (b.riskInverse !== a.riskInverse) return b.riskInverse - a.riskInverse;
+    return a._patchSize - b._patchSize;
+  });
+  const winnerEntry = sorted[0];
+  let tiebreaker = null;
+  if (sorted.length >= 2) {
+    const gap = Math.abs(sorted[0].weighted - sorted[1].weighted);
+    if (gap < TIE_EPSILON) {
+      tiebreaker = buildTiebreakerReason(sorted[0], sorted[1]);
+    }
+  }
+  const runtimeWinnerId = winnerEntry.proposalId;
+  const judgeWinnerId = ranking.winner?.id ?? null;
+  const disagreement = judgeWinnerId !== null && judgeWinnerId !== runtimeWinnerId;
+  const rejections = { ...(ranking.rejections ?? {}) };
+  for (const entry of canonical) {
+    if (entry.proposalId === runtimeWinnerId) continue;
+    if (!rejections[entry.proposalId]) {
+      rejections[entry.proposalId] =
+        `weighted ${entry.weighted.toFixed(3)} below winner ${winnerEntry.weighted.toFixed(3)}`;
+    }
+    if (entry._rewardHackingFloor) {
+      rejections[entry.proposalId] =
+        `reward-hacking floor: correctness forced to 0 (modifiesTests without justification). ` +
+        (rejections[entry.proposalId] ?? "");
+    }
+  }
+  return {
+    scores: canonical.map(({ _patchSize, _rewardHackingFloor, ...rest }) => rest),
+    winner: {
+      id: runtimeWinnerId,
+      justification:
+        ranking.winner?.justification ??
+        `highest weighted score (${winnerEntry.weighted.toFixed(3)}) in deterministic re-computation`,
+      confidence:
+        typeof ranking.winner?.confidence === "number" ? ranking.winner.confidence : null
+    },
+    rejections,
+    tiebreaker,
+    disagreement,
+    judgeWinnerId,
+    runtimeWinnerId
+  };
+}
+function buildTiebreakerReason(a, b) {
+  if (a.correctness !== b.correctness) {
+    return `correctness (${a.correctness.toFixed(3)} vs ${b.correctness.toFixed(3)})`;
+  }
+  if (a.riskInverse !== b.riskInverse) {
+    return `riskInverse (${a.riskInverse.toFixed(3)} vs ${b.riskInverse.toFixed(3)})`;
+  }
+  if (a._patchSize !== b._patchSize) {
+    return `patch size (${a._patchSize} vs ${b._patchSize} lines)`;
+  }
+  return "exact tie — deterministic order";
+}

package/scripts/lib/render.mjs ADDED Viewed

@@ -0,0 +1,240 @@
+// Terminal-friendly rendering for /cloop:status, /cloop:result, and the
+// per-iteration transcript printed by /cloop:iterate. All helpers take a
+// `state` (optionally an `iteration`) and return a string — no I/O.
+function truncate(text, max = 80) {
+  if (text == null) return "";
+  const s = String(text).replace(/\s+/g, " ").trim();
+  if (s.length <= max) return s;
+  return `${s.slice(0, Math.max(0, max - 3))}...`;
+}
+function fmtNum(n, digits = 3) {
+  if (n == null || !Number.isFinite(n)) return "?";
+  return Number(n).toFixed(digits);
+}
+function fmtDelta(n) {
+  if (n == null || !Number.isFinite(n)) return "?";
+  const v = Number(n).toFixed(3);
+  return n >= 0 ? `+${v}` : v;
+}
+function fmtMs(ms) {
+  if (!Number.isFinite(ms) || ms <= 0) return "?";
+  if (ms < 1000) return `${ms}ms`;
+  const s = Math.floor(ms / 1000);
+  if (s < 60) return `${s}s`;
+  const m = Math.floor(s / 60);
+  const rs = s % 60;
+  if (m < 60) return `${m}m${rs}s`;
+  const h = Math.floor(m / 60);
+  const rm = m % 60;
+  return `${h}h${rm}m`;
+}
+export function renderStatusReport(state) {
+  const lines = [];
+  lines.push("# CodexLoop status");
+  lines.push("");
+  lines.push(`| field    | value |`);
+  lines.push(`|----------|-------|`);
+  lines.push(`| loopId   | \`${state.loopId ?? "(none)"}\` |`);
+  lines.push(`| status   | **${state.status}** |`);
+  lines.push(`| mode     | ${state.mode ?? "interactive"} |`);
+  lines.push(`| goal     | ${truncate(state.goal?.text ?? "(unspecified)", 120)} |`);
+  if (Array.isArray(state.goal?.acceptanceCriteria) && state.goal.acceptanceCriteria.length > 0) {
+    lines.push(`| criteria | ${state.goal.acceptanceCriteria.length} items |`);
+  }
+  if (state.goal?.seedCommit) {
+    lines.push(`| seed     | \`${state.goal.seedCommit.slice(0, 12)}\` |`);
+  }
+  const consumed = state.budget?.consumed ?? {};
+  const maxIt = state.budget?.maxIterations ?? "?";
+  const maxCalls = state.budget?.maxCodexCalls ?? "?";
+  const elapsed = consumed.elapsedMs ?? 0;
+  const maxTime = state.budget?.maxElapsedMs ?? null;
+  lines.push(
+    `| budget   | ${consumed.iterations ?? 0}/${maxIt} iter, ${consumed.codexCalls ?? 0}/${maxCalls} calls, ${fmtMs(elapsed)}${maxTime ? `/${fmtMs(maxTime)}` : ""} |`
+  );
+  if (Array.isArray(state.iterations) && state.iterations.length > 0) {
+    const last = state.iterations[state.iterations.length - 1];
+    lines.push(
+      `| quality  | last=${fmtNum(last.qualityScore)} (Δ ${fmtDelta(last.qualityDelta)}) |`
+    );
+  }
+  if (state.stopReason) {
+    lines.push(`| stop     | ${state.stopReason} |`);
+  }
+  if (state.error) {
+    const errText = typeof state.error === "string" ? state.error : state.error.message ?? JSON.stringify(state.error);
+    lines.push(`| error    | ${truncate(errText, 200)} |`);
+  }
+  if (Array.isArray(state.iterations) && state.iterations.length > 0) {
+    lines.push("");
+    lines.push("## Iteration history");
+    lines.push("");
+    lines.push("| # | verdict | q | Δq | winner | apply | validate |");
+    lines.push("|---|---------|---|----|--------|-------|----------|");
+    for (const iter of state.iterations) {
+      const applyTag = iter.apply?.applied
+        ? "ok"
+        : iter.apply?.empty
+          ? "empty"
+          : iter.apply?.skipped
+            ? "skip"
+            : iter.apply?.error
+              ? "fail"
+              : "-";
+      const validateTag = iter.validate?.skipped
+        ? "skip"
+        : iter.validate?.passed === true
+          ? "pass"
+          : iter.validate?.passed === false
+            ? "fail"
+            : "-";
+      lines.push(
+        `| ${iter.index} | ${iter.evaluate?.verdict ?? "?"} | ${fmtNum(iter.qualityScore)} | ${fmtDelta(iter.qualityDelta)} | ${iter.acceptedProposalId ?? "-"} | ${applyTag} | ${validateTag} |`
+      );
+    }
+  }
+  return lines.join("\n");
+}
+export function renderIterationReport(iteration) {
+  if (!iteration) return "(no iteration)";
+  const lines = [];
+  lines.push(`## Iteration ${iteration.index}${iteration.dryRun ? " [DRY-RUN]" : ""}`);
+  lines.push("");
+  if (iteration.evaluate) {
+    lines.push(
+      `**evaluate**: verdict=${iteration.evaluate.verdict}, distance=${fmtNum(iteration.evaluate.distanceFromGoal, 2)}, openIssues=${iteration.evaluate.openIssues?.length ?? 0}`
+    );
+    if (iteration.evaluate.rationale) {
+      lines.push(`> ${truncate(iteration.evaluate.rationale, 400)}`);
+    }
+  }
+  if (Array.isArray(iteration.proposals) && iteration.proposals.length > 0) {
+    lines.push("");
+    lines.push(`**suggest**: ${iteration.proposals.length} proposals`);
+    for (const p of iteration.proposals) {
+      lines.push(
+        `- \`${p.id}\` — ${truncate(p.approach, 120)} (risk=${p.estimatedRisk}, impact=${p.estimatedImpact})`
+      );
+    }
+  }
+  if (iteration.ranking) {
+    lines.push("");
+    lines.push(`**rank**: winner=\`${iteration.ranking.winner?.id ?? "?"}\``);
+    for (const s of iteration.ranking.scores ?? []) {
+      lines.push(
+        `  - \`${s.proposalId}\`: weighted=${fmtNum(s.weighted)} (corr=${fmtNum(s.correctness, 2)}, req=${fmtNum(s.requirementSatisfaction, 2)}, risk⁻=${fmtNum(s.riskInverse, 2)})`
+      );
+    }
+    if (iteration.ranking.tiebreaker) {
+      lines.push(`  tiebreaker: ${iteration.ranking.tiebreaker}`);
+    }
+    if (iteration.ranking.disagreement) {
+      lines.push(
+        `  ⚠ judge picked \`${iteration.ranking.judgeWinnerId}\` but runtime overrode to \`${iteration.ranking.winner?.id}\``
+      );
+    }
+    for (const [id, reason] of Object.entries(iteration.ranking.rejections ?? {})) {
+      lines.push(`  ✗ \`${id}\`: ${truncate(reason, 200)}`);
+    }
+  }
+  if (iteration.apply) {
+    lines.push("");
+    if (iteration.apply.applied) {
+      lines.push(
+        `**apply**: applied ${iteration.apply.filesTouched?.length ?? 0} file(s), HEAD=\`${iteration.apply.postSha?.slice(0, 12) ?? "?"}\``
+      );
+    } else if (iteration.apply.empty) {
+      lines.push("**apply**: empty patch (no code change this iteration)");
+    } else if (iteration.apply.skipped) {
+      lines.push(`**apply**: skipped (${iteration.apply.skipped})`);
+    } else if (iteration.apply.error) {
+      lines.push(`**apply**: FAILED — ${truncate(iteration.apply.error, 300)}`);
+    }
+    if (iteration.apply.hackingFindings?.length) {
+      for (const f of iteration.apply.hackingFindings) {
+        lines.push(
+          `  ⚠ reward-hacking: ${f.kind}${f.file ? ` (${f.file})` : ""}${f.summary ? ` — ${truncate(f.summary, 200)}` : ""}`
+        );
+      }
+    }
+  }
+  if (iteration.validate) {
+    lines.push("");
+    if (iteration.validate.skipped) {
+      lines.push(`**validate**: skipped (${iteration.validate.skipped})`);
+    } else {
+      const passed = iteration.validate.passed;
+      lines.push(
+        `**validate**: ${passed === true ? "pass" : passed === false ? "FAIL" : "?"}${iteration.validate.regression ? " (regression)" : ""}`
+      );
+      for (const c of iteration.validate.commands ?? []) {
+        lines.push(
+          `  - ${c.kind}: exit=${c.status ?? "?"} ${fmtMs(c.durationMs)} — \`${truncate(c.cmd, 120)}\``
+        );
+      }
+    }
+  }
+  if (iteration.qualityScore != null) {
+    lines.push("");
+    lines.push(`**quality**: ${fmtNum(iteration.qualityScore)} (Δ ${fmtDelta(iteration.qualityDelta)})`);
+  }
+  if (iteration.stopReason) {
+    lines.push("");
+    lines.push(`**STOP**: ${iteration.stopReason}`);
+  }
+  if (iteration.error) {
+    lines.push("");
+    lines.push(`**ERROR**: ${truncate(iteration.error, 400)}`);
+  }
+  return lines.join("\n");
+}
+export function renderResultReport(state, fullIterations, { iterationIndex = null, withDiff = false } = {}) {
+  const lines = [];
+  lines.push(renderStatusReport(state));
+  lines.push("");
+  lines.push("---");
+  lines.push("");
+  if (iterationIndex != null) {
+    const match = (fullIterations ?? []).find((i) => i.index === iterationIndex);
+    if (match) {
+      lines.push(renderIterationReport(match));
+    } else {
+      lines.push(`No iteration ${iterationIndex} found.`);
+    }
+  } else {
+    for (const iter of fullIterations ?? []) {
+      lines.push(renderIterationReport(iter));
+      lines.push("");
+    }
+  }
+  if (withDiff && state.goal?.seedCommit) {
+    lines.push("");
+    lines.push("## Cumulative diff since seed commit");
+    lines.push("");
+    lines.push("(run `git diff " + state.goal.seedCommit + " HEAD` in the target repo)");
+  }
+  return lines.join("\n");
+}