npm - @kody-ade/kody-engine-lite - Versions diffs - 0.1.114 → 0.1.116 - Mend

@kody-ade/kody-engine-lite 0.1.114 → 0.1.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/bin/cli.js +196 -32
package/package.json +1 -1

package/dist/bin/cli.js CHANGED Viewed

@@ -214,7 +214,9 @@ function getLitellmUrl() {
   return LITELLM_DEFAULT_URL;
 }
 function providerApiKeyEnvVar(provider) {
-  if (provider === "anthropic") return "ANTHROPIC_API_KEY";
+  if (provider === "anthropic" || provider === "claude") return "ANTHROPIC_API_KEY";
+  const derived = `${provider.toUpperCase()}_API_KEY`;
+  if (process.env[derived]) return derived;
   return "ANTHROPIC_COMPATIBLE_API_KEY";
 }
 function setConfigDir(dir) {
@@ -1574,6 +1576,9 @@ import * as os2 from "os";
 import * as path11 from "path";
 import * as zlib from "zlib";
 import { spawnSync, execSync as execSync2 } from "child_process";
+function canRunApiTests(ctx) {
+  return !!ctx.apiKey;
+}
 async function apiCall(ctx, body) {
   try {
     const res = await fetch(`${ctx.proxyUrl}/v1/messages`, {
@@ -1636,6 +1641,8 @@ function filterStderr(stderr) {
 }
 function runClaudeTest(ctx, prompt, extraFlags = [], timeout = 9e4) {
   try {
+    const isDirectAnthropic = ctx.proxyUrl.includes("api.anthropic.com");
+    const envOverrides = isDirectAnthropic ? {} : { ANTHROPIC_BASE_URL: ctx.proxyUrl, ANTHROPIC_API_KEY: ctx.apiKey };
     const result2 = spawnSync("claude", [
       "--print",
       "--model",
@@ -1645,7 +1652,7 @@ function runClaudeTest(ctx, prompt, extraFlags = [], timeout = 9e4) {
       "-p",
       prompt
     ], {
-      env: { ...process.env, ANTHROPIC_BASE_URL: ctx.proxyUrl, ANTHROPIC_API_KEY: ctx.apiKey },
+      env: { ...process.env, ...envOverrides },
       timeout,
       encoding: "utf-8",
       cwd: ctx.projectDir
@@ -1712,6 +1719,18 @@ function createRedPng() {
 }
 async function testSimplePrompt(ctx) {
   const t = Date.now();
+  if (!canRunApiTests(ctx)) {
+    const r = runClaudeTest(ctx, "Reply with exactly: KODY_TEST_OK");
+    const ok2 = r.stdout.includes("KODY_TEST_OK");
+    return result(
+      "simple_prompt",
+      "basic",
+      ok2 ? "pass" : "fail",
+      ok2 ? 100 : 0,
+      Date.now() - t,
+      ok2 ? "Model responded correctly (via CLI)" : `Got: ${r.stdout.slice(0, 80)}`
+    );
+  }
   const res = await apiCall(ctx, {
     max_tokens: 50,
     temperature: 0,
@@ -1730,6 +1749,17 @@ async function testSimplePrompt(ctx) {
   );
 }
 async function testJsonOutput(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const r = runClaudeTest(ctx, 'Respond with ONLY valid JSON, no markdown fences. Return: {"status":"ok","model":"your name"}');
+    let text2 = r.stdout.trim().replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
+    try {
+      JSON.parse(text2);
+      return result("json_output", "basic", "pass", 100, Date.now() - t2, "Valid JSON via CLI");
+    } catch {
+      return result("json_output", "basic", "fail", 0, Date.now() - t2, `Invalid JSON: ${text2.slice(0, 80)}`);
+    }
+  }
   const t = Date.now();
   const res = await apiCall(ctx, {
     max_tokens: 200,
@@ -1755,23 +1785,7 @@ async function testJsonOutput(ctx) {
     return result("json_output", "basic", "fail", 0, Date.now() - t, `Invalid JSON: ${text.slice(0, 80)}`);
   }
 }
-async function testSystemPromptRules(ctx) {
-  const t = Date.now();
-  const res = await apiCall(ctx, {
-    max_tokens: 200,
-    temperature: 0,
-    system: [
-      "STRICT RULES \u2014 violating ANY will crash the system:",
-      "1) Start every response with 'KODY:'",
-      "2) Never use the word 'the'",
-      "3) Keep response under 50 words",
-      "4) End your response with 'END'",
-      "5) Use ONLY lowercase letters (no uppercase anywhere)"
-    ].join("\n"),
-    messages: [{ role: "user", content: "Describe what a compiler does." }]
-  });
-  if (!res.ok) return result("system_prompt_rules", "basic", "fail", 0, Date.now() - t, `API error: ${res.errorMsg}`);
-  const text = extractText(res.data).trim();
+function scoreRules(text) {
   let score = 0;
   const checks = [];
   if (text.startsWith("KODY:") || text.startsWith("kody:")) {
@@ -1794,6 +1808,51 @@ async function testSystemPromptRules(ctx) {
     score += 20;
     checks.push("all-lowercase");
   }
+  return { score, checks };
+}
+async function testSystemPromptRules(ctx) {
+  const rulesPrompt = [
+    "STRICT RULES \u2014 violating ANY will crash the system:",
+    "1) Start every response with 'KODY:'",
+    "2) Never use the word 'the'",
+    "3) Keep response under 50 words",
+    "4) End your response with 'END'",
+    "5) Use ONLY lowercase letters (no uppercase anywhere)"
+  ].join("\n");
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const r = runClaudeTest(ctx, [
+      "Follow ALL these rules in your response:",
+      "1) Your response must start with the word 'KODY:'",
+      "2) Do not use the word 'the' anywhere",
+      "3) Keep your response under 50 words total",
+      "4) End your response with the word 'END'",
+      "5) Use only lowercase letters throughout",
+      "",
+      "Now describe what a compiler does. Remember: follow ALL 5 rules above exactly."
+    ].join("\n"));
+    const { score: score2, checks: checks2 } = scoreRules(r.stdout.trim());
+    const status2 = score2 >= 80 ? "pass" : score2 >= 40 ? "warn" : "fail";
+    return result(
+      "system_prompt_rules",
+      "basic",
+      status2,
+      score2,
+      Date.now() - t2,
+      `${score2 / 20}/5 rules followed: ${checks2.join(", ")}`,
+      { instructionCompliance: score2 }
+    );
+  }
+  const t = Date.now();
+  const res = await apiCall(ctx, {
+    max_tokens: 200,
+    temperature: 0,
+    system: rulesPrompt,
+    messages: [{ role: "user", content: "Describe what a compiler does." }]
+  });
+  if (!res.ok) return result("system_prompt_rules", "basic", "fail", 0, Date.now() - t, `API error: ${res.errorMsg}`);
+  const text = extractText(res.data).trim();
+  const { score, checks } = scoreRules(text);
   const status = score >= 80 ? "pass" : score >= 40 ? "warn" : "fail";
   return result(
     "system_prompt_rules",
@@ -1806,6 +1865,19 @@ async function testSystemPromptRules(ctx) {
   );
 }
 async function testExtendedThinking(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const r = runClaudeTest(ctx, "What is 15 * 23? Reply with just the number.");
+    const ok = r.stdout.includes("345");
+    return result(
+      "extended_thinking",
+      "infrastructure",
+      ok ? "pass" : "warn",
+      ok ? 100 : 50,
+      Date.now() - t2,
+      ok ? "Model responded correctly (thinking assumed via CLI)" : `Got: ${r.stdout.slice(0, 80)}`
+    );
+  }
   const t = Date.now();
   const res = await apiCall(ctx, {
     max_tokens: 200,
@@ -1827,6 +1899,26 @@ async function testExtendedThinking(ctx) {
   return result("extended_thinking", "infrastructure", "fail", 0, Date.now() - t, "No content in response");
 }
 async function testToolRead(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const testFile2 = path11.join(os2.tmpdir(), "kody-test-model-read.txt");
+    fs12.writeFileSync(testFile2, "KODY_SECRET_CONTENT_42");
+    try {
+      const r = runClaudeTest(ctx, `Read the file ${testFile2} and tell me its exact contents. Reply with ONLY the file contents.`);
+      const ok = r.stdout.includes("KODY_SECRET_CONTENT_42");
+      return result(
+        "tool_read",
+        "tool-use",
+        ok ? "pass" : "fail",
+        ok ? 100 : 0,
+        Date.now() - t2,
+        ok ? "Read tool works via CLI" : `Got: ${r.stdout.slice(0, 80)}`,
+        { toolSelection: ok ? 100 : 0 }
+      );
+    } finally {
+      fs12.rmSync(testFile2, { force: true });
+    }
+  }
   const t = Date.now();
   const testFile = path11.join(os2.tmpdir(), "kody-test-model-read.txt");
   fs12.writeFileSync(testFile, "KODY_SECRET_CONTENT_42");
@@ -1862,6 +1954,27 @@ async function testToolRead(ctx) {
   }
 }
 async function testToolEdit(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const testFile = path11.join(os2.tmpdir(), "kody-test-model-edit.txt");
+    fs12.writeFileSync(testFile, "hello world");
+    try {
+      const r = runClaudeTest(ctx, `Use the Edit tool to replace "hello" with "goodbye" in ${testFile}. Do nothing else.`);
+      const content = fs12.existsSync(testFile) ? fs12.readFileSync(testFile, "utf-8") : "";
+      const ok = content.includes("goodbye");
+      return result(
+        "tool_edit",
+        "tool-use",
+        ok ? "pass" : "fail",
+        ok ? 100 : 0,
+        Date.now() - t2,
+        ok ? "Edit tool works via CLI" : `File content: ${content.slice(0, 80)}`,
+        { toolSelection: ok ? 100 : 0 }
+      );
+    } finally {
+      fs12.rmSync(testFile, { force: true });
+    }
+  }
   const t = Date.now();
   const conv = await runToolConversation(
     ctx,
@@ -1892,6 +2005,20 @@ async function testToolEdit(ctx) {
   );
 }
 async function testToolBash(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const r = runClaudeTest(ctx, "Run this bash command and tell me its output: echo KODY_BASH_OK");
+    const ok = r.stdout.includes("KODY_BASH_OK");
+    return result(
+      "tool_bash",
+      "tool-use",
+      ok ? "pass" : "fail",
+      ok ? 100 : 0,
+      Date.now() - t2,
+      ok ? "Bash tool works via CLI" : `Got: ${r.stdout.slice(0, 80)}`,
+      { toolSelection: ok ? 100 : 0 }
+    );
+  }
   const t = Date.now();
   const conv = await runToolConversation(
     ctx,
@@ -1917,6 +2044,26 @@ async function testToolBash(ctx) {
   );
 }
 async function testImageAttachment(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const tmpPng = path11.join(os2.tmpdir(), "kody-test-image.png");
+    fs12.writeFileSync(tmpPng, createRedPng());
+    try {
+      const r = runClaudeTest(ctx, `Read the image file at ${tmpPng} and tell me what color it is. Reply with just the color name.`);
+      const text2 = r.stdout.toLowerCase();
+      const ok = text2.includes("red");
+      return result(
+        "image_attachment",
+        "tool-use",
+        ok ? "pass" : "warn",
+        ok ? 100 : 50,
+        Date.now() - t2,
+        ok ? "Image processed correctly via CLI" : `Got: ${text2.slice(0, 80)}`
+      );
+    } finally {
+      fs12.rmSync(tmpPng, { force: true });
+    }
+  }
   const t = Date.now();
   const pngData = createRedPng().toString("base64");
   const res = await apiCall(ctx, {
@@ -1952,6 +2099,19 @@ async function testImageAttachment(ctx) {
   );
 }
 async function testErrorRecovery(ctx) {
+  if (!canRunApiTests(ctx)) {
+    const t2 = Date.now();
+    const r = runClaudeTest(ctx, "Read the file /tmp/kody-nonexistent-test-file-xyz.txt and tell me what's in it. If it doesn't exist, say 'FILE_NOT_FOUND'.");
+    const ok = r.stdout.includes("FILE_NOT_FOUND") || r.stdout.toLowerCase().includes("not found") || r.stdout.toLowerCase().includes("does not exist") || r.stdout.toLowerCase().includes("doesn't exist");
+    return result(
+      "error_recovery",
+      "advanced",
+      ok ? "pass" : "warn",
+      ok ? 100 : 50,
+      Date.now() - t2,
+      ok ? "Graceful error handling via CLI" : `Got: ${r.stdout.slice(0, 80)}`
+    );
+  }
   const t = Date.now();
   let errorGiven = false;
   const conv = await runToolConversation(
@@ -2252,19 +2412,21 @@ function formatReport(report) {
   }
   const passed = report.results.filter((r) => r.status === "pass").length;
   const failed = report.results.filter((r) => r.status === "fail").length;
-  const warned = report.results.filter((r) => r.status === "warn").length;
+  const skipped = report.results.filter((r) => r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")).length;
+  const warned = report.results.filter((r) => r.status === "warn").length - skipped;
   const total = report.results.length;
-  const avgAccuracy = total > 0 ? Math.round(report.results.reduce((s, r) => s + r.accuracy, 0) / total) : 0;
+  const scored = report.results.filter((r) => !(r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")));
+  const avgAccuracy = scored.length > 0 ? Math.round(scored.reduce((s, r) => s + r.accuracy, 0) / scored.length) : 0;
   lines.push("");
   lines.push("-".repeat(W));
   lines.push("");
-  lines.push(`  RESULTS: ${passed}/${total} PASS | ${failed} FAIL | ${warned} WARN`);
+  lines.push(`  RESULTS: ${passed}/${total - skipped} PASS | ${failed} FAIL | ${warned} WARN${skipped > 0 ? ` | ${skipped} SKIPPED` : ""}`);
   lines.push(`  OVERALL ACCURACY: ${avgAccuracy}%`);
   lines.push(`  drop_params required: ${report.dropParamsRequired ? "YES" : "NO"}`);
   lines.push("");
   lines.push("  ACCURACY BY CATEGORY:");
   for (const cat of CATEGORY_ORDER) {
-    const cr = report.results.filter((r) => r.category === cat);
+    const cr = report.results.filter((r) => r.category === cat && !(r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")));
     if (cr.length === 0) continue;
     const avg = Math.round(cr.reduce((s, r) => s + r.accuracy, 0) / cr.length);
     lines.push(`    ${pad(CATEGORY_LABELS[cat], 22)} ${avg}%`);
@@ -2341,9 +2503,9 @@ function parseTestModelArgs() {
       "Usage: kody test-model --provider <provider> --model <model> --key <api-key> [options]",
       "",
       "Options:",
-      "  --provider     LLM provider name (e.g. gemini, openai, mistral)",
-      "  --model        Model identifier (e.g. gemini-2.5-flash)",
-      "  --key          API key for the provider",
+      "  --provider     LLM provider name (e.g. gemini, openai, claude)",
+      "  --model        Model identifier (e.g. gemini-2.5-flash, claude-sonnet-4-6)",
+      "  --key          API key (optional for claude/anthropic \u2014 uses CLI auth)",
       "  --key-env      Read API key from this environment variable",
       "  --skip-proxy   Use an already-running LiteLLM proxy (don't start one)",
       "  --litellm-url  LiteLLM proxy URL (default: http://localhost:4099)",
@@ -2367,18 +2529,20 @@ function parseTestModelArgs() {
     logger.error("Run with --help for usage.");
     process.exit(1);
   }
+  const isDirectAnthropic = provider === "claude" || provider === "anthropic";
   let apiKey = key;
   if (!apiKey && keyEnv) apiKey = process.env[keyEnv];
-  if (!apiKey) {
+  if (!apiKey && !isDirectAnthropic) {
     logger.error("API key required: use --key <value> or --key-env <ENV_VAR>");
+    logger.error("(For claude/anthropic provider, --key is optional \u2014 uses Claude Code auth)");
     process.exit(1);
   }
   return {
     provider,
     model,
-    apiKey,
-    proxyUrl: getArg3("--litellm-url") ?? TEST_URL,
-    skipProxy: hasFlag3("--skip-proxy"),
+    apiKey: apiKey ?? "",
+    proxyUrl: isDirectAnthropic ? "https://api.anthropic.com" : getArg3("--litellm-url") ?? TEST_URL,
+    skipProxy: isDirectAnthropic || hasFlag3("--skip-proxy"),
     filter: getArg3("--filter")?.split(",")
   };
 }
@@ -2433,7 +2597,7 @@ async function quickApiTest(url, model, apiKey) {
       headers: { "Content-Type": "application/json", "x-api-key": apiKey, "anthropic-version": "2023-06-01" },
       body: JSON.stringify({
         model,
-        max_tokens: 10,
+        max_tokens: 32,
         messages: [{ role: "user", content: "Say ok" }],
         context_management: { policy: "smart" }
       }),
@@ -5945,7 +6109,7 @@ async function main() {
     logger.info(`Working directory: ${projectDir}`);
   }
   const isPRFix = (input.command === "fix" || input.command === "fix-ci") && !!input.prNumber;
-  const skipStateCheck = input.command === "review" || input.command === "resolve" || input.command === "rerun";
+  const skipStateCheck = input.command === "review" || input.command === "resolve" || input.command === "rerun" || input.command === "status";
   if (input.issueNumber && !skipStateCheck && !isPRFix) {
     const taskAction = resolveForIssue(input.issueNumber, projectDir);
     logger.info(`Task action: ${taskAction.action}`);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kody-ade/kody-engine-lite",
-  "version": "0.1.114",
+  "version": "0.1.116",
   "description": "Autonomous SDLC pipeline: Kody orchestration + Claude Code + LiteLLM",
   "license": "MIT",
   "type": "module",