@kody-ade/kody-engine-lite 0.1.114 → 0.1.116

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bin/cli.js +196 -32
  2. package/package.json +1 -1
package/dist/bin/cli.js CHANGED
@@ -214,7 +214,9 @@ function getLitellmUrl() {
214
214
  return LITELLM_DEFAULT_URL;
215
215
  }
216
216
  function providerApiKeyEnvVar(provider) {
217
- if (provider === "anthropic") return "ANTHROPIC_API_KEY";
217
+ if (provider === "anthropic" || provider === "claude") return "ANTHROPIC_API_KEY";
218
+ const derived = `${provider.toUpperCase()}_API_KEY`;
219
+ if (process.env[derived]) return derived;
218
220
  return "ANTHROPIC_COMPATIBLE_API_KEY";
219
221
  }
220
222
  function setConfigDir(dir) {
@@ -1574,6 +1576,9 @@ import * as os2 from "os";
1574
1576
  import * as path11 from "path";
1575
1577
  import * as zlib from "zlib";
1576
1578
  import { spawnSync, execSync as execSync2 } from "child_process";
1579
+ function canRunApiTests(ctx) {
1580
+ return !!ctx.apiKey;
1581
+ }
1577
1582
  async function apiCall(ctx, body) {
1578
1583
  try {
1579
1584
  const res = await fetch(`${ctx.proxyUrl}/v1/messages`, {
@@ -1636,6 +1641,8 @@ function filterStderr(stderr) {
1636
1641
  }
1637
1642
  function runClaudeTest(ctx, prompt, extraFlags = [], timeout = 9e4) {
1638
1643
  try {
1644
+ const isDirectAnthropic = ctx.proxyUrl.includes("api.anthropic.com");
1645
+ const envOverrides = isDirectAnthropic ? {} : { ANTHROPIC_BASE_URL: ctx.proxyUrl, ANTHROPIC_API_KEY: ctx.apiKey };
1639
1646
  const result2 = spawnSync("claude", [
1640
1647
  "--print",
1641
1648
  "--model",
@@ -1645,7 +1652,7 @@ function runClaudeTest(ctx, prompt, extraFlags = [], timeout = 9e4) {
1645
1652
  "-p",
1646
1653
  prompt
1647
1654
  ], {
1648
- env: { ...process.env, ANTHROPIC_BASE_URL: ctx.proxyUrl, ANTHROPIC_API_KEY: ctx.apiKey },
1655
+ env: { ...process.env, ...envOverrides },
1649
1656
  timeout,
1650
1657
  encoding: "utf-8",
1651
1658
  cwd: ctx.projectDir
@@ -1712,6 +1719,18 @@ function createRedPng() {
1712
1719
  }
1713
1720
  async function testSimplePrompt(ctx) {
1714
1721
  const t = Date.now();
1722
+ if (!canRunApiTests(ctx)) {
1723
+ const r = runClaudeTest(ctx, "Reply with exactly: KODY_TEST_OK");
1724
+ const ok2 = r.stdout.includes("KODY_TEST_OK");
1725
+ return result(
1726
+ "simple_prompt",
1727
+ "basic",
1728
+ ok2 ? "pass" : "fail",
1729
+ ok2 ? 100 : 0,
1730
+ Date.now() - t,
1731
+ ok2 ? "Model responded correctly (via CLI)" : `Got: ${r.stdout.slice(0, 80)}`
1732
+ );
1733
+ }
1715
1734
  const res = await apiCall(ctx, {
1716
1735
  max_tokens: 50,
1717
1736
  temperature: 0,
@@ -1730,6 +1749,17 @@ async function testSimplePrompt(ctx) {
1730
1749
  );
1731
1750
  }
1732
1751
  async function testJsonOutput(ctx) {
1752
+ if (!canRunApiTests(ctx)) {
1753
+ const t2 = Date.now();
1754
+ const r = runClaudeTest(ctx, 'Respond with ONLY valid JSON, no markdown fences. Return: {"status":"ok","model":"your name"}');
1755
+ let text2 = r.stdout.trim().replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "").trim();
1756
+ try {
1757
+ JSON.parse(text2);
1758
+ return result("json_output", "basic", "pass", 100, Date.now() - t2, "Valid JSON via CLI");
1759
+ } catch {
1760
+ return result("json_output", "basic", "fail", 0, Date.now() - t2, `Invalid JSON: ${text2.slice(0, 80)}`);
1761
+ }
1762
+ }
1733
1763
  const t = Date.now();
1734
1764
  const res = await apiCall(ctx, {
1735
1765
  max_tokens: 200,
@@ -1755,23 +1785,7 @@ async function testJsonOutput(ctx) {
1755
1785
  return result("json_output", "basic", "fail", 0, Date.now() - t, `Invalid JSON: ${text.slice(0, 80)}`);
1756
1786
  }
1757
1787
  }
1758
- async function testSystemPromptRules(ctx) {
1759
- const t = Date.now();
1760
- const res = await apiCall(ctx, {
1761
- max_tokens: 200,
1762
- temperature: 0,
1763
- system: [
1764
- "STRICT RULES \u2014 violating ANY will crash the system:",
1765
- "1) Start every response with 'KODY:'",
1766
- "2) Never use the word 'the'",
1767
- "3) Keep response under 50 words",
1768
- "4) End your response with 'END'",
1769
- "5) Use ONLY lowercase letters (no uppercase anywhere)"
1770
- ].join("\n"),
1771
- messages: [{ role: "user", content: "Describe what a compiler does." }]
1772
- });
1773
- if (!res.ok) return result("system_prompt_rules", "basic", "fail", 0, Date.now() - t, `API error: ${res.errorMsg}`);
1774
- const text = extractText(res.data).trim();
1788
+ function scoreRules(text) {
1775
1789
  let score = 0;
1776
1790
  const checks = [];
1777
1791
  if (text.startsWith("KODY:") || text.startsWith("kody:")) {
@@ -1794,6 +1808,51 @@ async function testSystemPromptRules(ctx) {
1794
1808
  score += 20;
1795
1809
  checks.push("all-lowercase");
1796
1810
  }
1811
+ return { score, checks };
1812
+ }
1813
+ async function testSystemPromptRules(ctx) {
1814
+ const rulesPrompt = [
1815
+ "STRICT RULES \u2014 violating ANY will crash the system:",
1816
+ "1) Start every response with 'KODY:'",
1817
+ "2) Never use the word 'the'",
1818
+ "3) Keep response under 50 words",
1819
+ "4) End your response with 'END'",
1820
+ "5) Use ONLY lowercase letters (no uppercase anywhere)"
1821
+ ].join("\n");
1822
+ if (!canRunApiTests(ctx)) {
1823
+ const t2 = Date.now();
1824
+ const r = runClaudeTest(ctx, [
1825
+ "Follow ALL these rules in your response:",
1826
+ "1) Your response must start with the word 'KODY:'",
1827
+ "2) Do not use the word 'the' anywhere",
1828
+ "3) Keep your response under 50 words total",
1829
+ "4) End your response with the word 'END'",
1830
+ "5) Use only lowercase letters throughout",
1831
+ "",
1832
+ "Now describe what a compiler does. Remember: follow ALL 5 rules above exactly."
1833
+ ].join("\n"));
1834
+ const { score: score2, checks: checks2 } = scoreRules(r.stdout.trim());
1835
+ const status2 = score2 >= 80 ? "pass" : score2 >= 40 ? "warn" : "fail";
1836
+ return result(
1837
+ "system_prompt_rules",
1838
+ "basic",
1839
+ status2,
1840
+ score2,
1841
+ Date.now() - t2,
1842
+ `${score2 / 20}/5 rules followed: ${checks2.join(", ")}`,
1843
+ { instructionCompliance: score2 }
1844
+ );
1845
+ }
1846
+ const t = Date.now();
1847
+ const res = await apiCall(ctx, {
1848
+ max_tokens: 200,
1849
+ temperature: 0,
1850
+ system: rulesPrompt,
1851
+ messages: [{ role: "user", content: "Describe what a compiler does." }]
1852
+ });
1853
+ if (!res.ok) return result("system_prompt_rules", "basic", "fail", 0, Date.now() - t, `API error: ${res.errorMsg}`);
1854
+ const text = extractText(res.data).trim();
1855
+ const { score, checks } = scoreRules(text);
1797
1856
  const status = score >= 80 ? "pass" : score >= 40 ? "warn" : "fail";
1798
1857
  return result(
1799
1858
  "system_prompt_rules",
@@ -1806,6 +1865,19 @@ async function testSystemPromptRules(ctx) {
1806
1865
  );
1807
1866
  }
1808
1867
  async function testExtendedThinking(ctx) {
1868
+ if (!canRunApiTests(ctx)) {
1869
+ const t2 = Date.now();
1870
+ const r = runClaudeTest(ctx, "What is 15 * 23? Reply with just the number.");
1871
+ const ok = r.stdout.includes("345");
1872
+ return result(
1873
+ "extended_thinking",
1874
+ "infrastructure",
1875
+ ok ? "pass" : "warn",
1876
+ ok ? 100 : 50,
1877
+ Date.now() - t2,
1878
+ ok ? "Model responded correctly (thinking assumed via CLI)" : `Got: ${r.stdout.slice(0, 80)}`
1879
+ );
1880
+ }
1809
1881
  const t = Date.now();
1810
1882
  const res = await apiCall(ctx, {
1811
1883
  max_tokens: 200,
@@ -1827,6 +1899,26 @@ async function testExtendedThinking(ctx) {
1827
1899
  return result("extended_thinking", "infrastructure", "fail", 0, Date.now() - t, "No content in response");
1828
1900
  }
1829
1901
  async function testToolRead(ctx) {
1902
+ if (!canRunApiTests(ctx)) {
1903
+ const t2 = Date.now();
1904
+ const testFile2 = path11.join(os2.tmpdir(), "kody-test-model-read.txt");
1905
+ fs12.writeFileSync(testFile2, "KODY_SECRET_CONTENT_42");
1906
+ try {
1907
+ const r = runClaudeTest(ctx, `Read the file ${testFile2} and tell me its exact contents. Reply with ONLY the file contents.`);
1908
+ const ok = r.stdout.includes("KODY_SECRET_CONTENT_42");
1909
+ return result(
1910
+ "tool_read",
1911
+ "tool-use",
1912
+ ok ? "pass" : "fail",
1913
+ ok ? 100 : 0,
1914
+ Date.now() - t2,
1915
+ ok ? "Read tool works via CLI" : `Got: ${r.stdout.slice(0, 80)}`,
1916
+ { toolSelection: ok ? 100 : 0 }
1917
+ );
1918
+ } finally {
1919
+ fs12.rmSync(testFile2, { force: true });
1920
+ }
1921
+ }
1830
1922
  const t = Date.now();
1831
1923
  const testFile = path11.join(os2.tmpdir(), "kody-test-model-read.txt");
1832
1924
  fs12.writeFileSync(testFile, "KODY_SECRET_CONTENT_42");
@@ -1862,6 +1954,27 @@ async function testToolRead(ctx) {
1862
1954
  }
1863
1955
  }
1864
1956
  async function testToolEdit(ctx) {
1957
+ if (!canRunApiTests(ctx)) {
1958
+ const t2 = Date.now();
1959
+ const testFile = path11.join(os2.tmpdir(), "kody-test-model-edit.txt");
1960
+ fs12.writeFileSync(testFile, "hello world");
1961
+ try {
1962
+ const r = runClaudeTest(ctx, `Use the Edit tool to replace "hello" with "goodbye" in ${testFile}. Do nothing else.`);
1963
+ const content = fs12.existsSync(testFile) ? fs12.readFileSync(testFile, "utf-8") : "";
1964
+ const ok = content.includes("goodbye");
1965
+ return result(
1966
+ "tool_edit",
1967
+ "tool-use",
1968
+ ok ? "pass" : "fail",
1969
+ ok ? 100 : 0,
1970
+ Date.now() - t2,
1971
+ ok ? "Edit tool works via CLI" : `File content: ${content.slice(0, 80)}`,
1972
+ { toolSelection: ok ? 100 : 0 }
1973
+ );
1974
+ } finally {
1975
+ fs12.rmSync(testFile, { force: true });
1976
+ }
1977
+ }
1865
1978
  const t = Date.now();
1866
1979
  const conv = await runToolConversation(
1867
1980
  ctx,
@@ -1892,6 +2005,20 @@ async function testToolEdit(ctx) {
1892
2005
  );
1893
2006
  }
1894
2007
  async function testToolBash(ctx) {
2008
+ if (!canRunApiTests(ctx)) {
2009
+ const t2 = Date.now();
2010
+ const r = runClaudeTest(ctx, "Run this bash command and tell me its output: echo KODY_BASH_OK");
2011
+ const ok = r.stdout.includes("KODY_BASH_OK");
2012
+ return result(
2013
+ "tool_bash",
2014
+ "tool-use",
2015
+ ok ? "pass" : "fail",
2016
+ ok ? 100 : 0,
2017
+ Date.now() - t2,
2018
+ ok ? "Bash tool works via CLI" : `Got: ${r.stdout.slice(0, 80)}`,
2019
+ { toolSelection: ok ? 100 : 0 }
2020
+ );
2021
+ }
1895
2022
  const t = Date.now();
1896
2023
  const conv = await runToolConversation(
1897
2024
  ctx,
@@ -1917,6 +2044,26 @@ async function testToolBash(ctx) {
1917
2044
  );
1918
2045
  }
1919
2046
  async function testImageAttachment(ctx) {
2047
+ if (!canRunApiTests(ctx)) {
2048
+ const t2 = Date.now();
2049
+ const tmpPng = path11.join(os2.tmpdir(), "kody-test-image.png");
2050
+ fs12.writeFileSync(tmpPng, createRedPng());
2051
+ try {
2052
+ const r = runClaudeTest(ctx, `Read the image file at ${tmpPng} and tell me what color it is. Reply with just the color name.`);
2053
+ const text2 = r.stdout.toLowerCase();
2054
+ const ok = text2.includes("red");
2055
+ return result(
2056
+ "image_attachment",
2057
+ "tool-use",
2058
+ ok ? "pass" : "warn",
2059
+ ok ? 100 : 50,
2060
+ Date.now() - t2,
2061
+ ok ? "Image processed correctly via CLI" : `Got: ${text2.slice(0, 80)}`
2062
+ );
2063
+ } finally {
2064
+ fs12.rmSync(tmpPng, { force: true });
2065
+ }
2066
+ }
1920
2067
  const t = Date.now();
1921
2068
  const pngData = createRedPng().toString("base64");
1922
2069
  const res = await apiCall(ctx, {
@@ -1952,6 +2099,19 @@ async function testImageAttachment(ctx) {
1952
2099
  );
1953
2100
  }
1954
2101
  async function testErrorRecovery(ctx) {
2102
+ if (!canRunApiTests(ctx)) {
2103
+ const t2 = Date.now();
2104
+ const r = runClaudeTest(ctx, "Read the file /tmp/kody-nonexistent-test-file-xyz.txt and tell me what's in it. If it doesn't exist, say 'FILE_NOT_FOUND'.");
2105
+ const ok = r.stdout.includes("FILE_NOT_FOUND") || r.stdout.toLowerCase().includes("not found") || r.stdout.toLowerCase().includes("does not exist") || r.stdout.toLowerCase().includes("doesn't exist");
2106
+ return result(
2107
+ "error_recovery",
2108
+ "advanced",
2109
+ ok ? "pass" : "warn",
2110
+ ok ? 100 : 50,
2111
+ Date.now() - t2,
2112
+ ok ? "Graceful error handling via CLI" : `Got: ${r.stdout.slice(0, 80)}`
2113
+ );
2114
+ }
1955
2115
  const t = Date.now();
1956
2116
  let errorGiven = false;
1957
2117
  const conv = await runToolConversation(
@@ -2252,19 +2412,21 @@ function formatReport(report) {
2252
2412
  }
2253
2413
  const passed = report.results.filter((r) => r.status === "pass").length;
2254
2414
  const failed = report.results.filter((r) => r.status === "fail").length;
2255
- const warned = report.results.filter((r) => r.status === "warn").length;
2415
+ const skipped = report.results.filter((r) => r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")).length;
2416
+ const warned = report.results.filter((r) => r.status === "warn").length - skipped;
2256
2417
  const total = report.results.length;
2257
- const avgAccuracy = total > 0 ? Math.round(report.results.reduce((s, r) => s + r.accuracy, 0) / total) : 0;
2418
+ const scored = report.results.filter((r) => !(r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")));
2419
+ const avgAccuracy = scored.length > 0 ? Math.round(scored.reduce((s, r) => s + r.accuracy, 0) / scored.length) : 0;
2258
2420
  lines.push("");
2259
2421
  lines.push("-".repeat(W));
2260
2422
  lines.push("");
2261
- lines.push(` RESULTS: ${passed}/${total} PASS | ${failed} FAIL | ${warned} WARN`);
2423
+ lines.push(` RESULTS: ${passed}/${total - skipped} PASS | ${failed} FAIL | ${warned} WARN${skipped > 0 ? ` | ${skipped} SKIPPED` : ""}`);
2262
2424
  lines.push(` OVERALL ACCURACY: ${avgAccuracy}%`);
2263
2425
  lines.push(` drop_params required: ${report.dropParamsRequired ? "YES" : "NO"}`);
2264
2426
  lines.push("");
2265
2427
  lines.push(" ACCURACY BY CATEGORY:");
2266
2428
  for (const cat of CATEGORY_ORDER) {
2267
- const cr = report.results.filter((r) => r.category === cat);
2429
+ const cr = report.results.filter((r) => r.category === cat && !(r.status === "warn" && r.durationMs === 0 && r.detail.includes("Skipped")));
2268
2430
  if (cr.length === 0) continue;
2269
2431
  const avg = Math.round(cr.reduce((s, r) => s + r.accuracy, 0) / cr.length);
2270
2432
  lines.push(` ${pad(CATEGORY_LABELS[cat], 22)} ${avg}%`);
@@ -2341,9 +2503,9 @@ function parseTestModelArgs() {
2341
2503
  "Usage: kody test-model --provider <provider> --model <model> --key <api-key> [options]",
2342
2504
  "",
2343
2505
  "Options:",
2344
- " --provider LLM provider name (e.g. gemini, openai, mistral)",
2345
- " --model Model identifier (e.g. gemini-2.5-flash)",
2346
- " --key API key for the provider",
2506
+ " --provider LLM provider name (e.g. gemini, openai, claude)",
2507
+ " --model Model identifier (e.g. gemini-2.5-flash, claude-sonnet-4-6)",
2508
+ " --key API key (optional for claude/anthropic \u2014 uses CLI auth)",
2347
2509
  " --key-env Read API key from this environment variable",
2348
2510
  " --skip-proxy Use an already-running LiteLLM proxy (don't start one)",
2349
2511
  " --litellm-url LiteLLM proxy URL (default: http://localhost:4099)",
@@ -2367,18 +2529,20 @@ function parseTestModelArgs() {
2367
2529
  logger.error("Run with --help for usage.");
2368
2530
  process.exit(1);
2369
2531
  }
2532
+ const isDirectAnthropic = provider === "claude" || provider === "anthropic";
2370
2533
  let apiKey = key;
2371
2534
  if (!apiKey && keyEnv) apiKey = process.env[keyEnv];
2372
- if (!apiKey) {
2535
+ if (!apiKey && !isDirectAnthropic) {
2373
2536
  logger.error("API key required: use --key <value> or --key-env <ENV_VAR>");
2537
+ logger.error("(For claude/anthropic provider, --key is optional \u2014 uses Claude Code auth)");
2374
2538
  process.exit(1);
2375
2539
  }
2376
2540
  return {
2377
2541
  provider,
2378
2542
  model,
2379
- apiKey,
2380
- proxyUrl: getArg3("--litellm-url") ?? TEST_URL,
2381
- skipProxy: hasFlag3("--skip-proxy"),
2543
+ apiKey: apiKey ?? "",
2544
+ proxyUrl: isDirectAnthropic ? "https://api.anthropic.com" : getArg3("--litellm-url") ?? TEST_URL,
2545
+ skipProxy: isDirectAnthropic || hasFlag3("--skip-proxy"),
2382
2546
  filter: getArg3("--filter")?.split(",")
2383
2547
  };
2384
2548
  }
@@ -2433,7 +2597,7 @@ async function quickApiTest(url, model, apiKey) {
2433
2597
  headers: { "Content-Type": "application/json", "x-api-key": apiKey, "anthropic-version": "2023-06-01" },
2434
2598
  body: JSON.stringify({
2435
2599
  model,
2436
- max_tokens: 10,
2600
+ max_tokens: 32,
2437
2601
  messages: [{ role: "user", content: "Say ok" }],
2438
2602
  context_management: { policy: "smart" }
2439
2603
  }),
@@ -5945,7 +6109,7 @@ async function main() {
5945
6109
  logger.info(`Working directory: ${projectDir}`);
5946
6110
  }
5947
6111
  const isPRFix = (input.command === "fix" || input.command === "fix-ci") && !!input.prNumber;
5948
- const skipStateCheck = input.command === "review" || input.command === "resolve" || input.command === "rerun";
6112
+ const skipStateCheck = input.command === "review" || input.command === "resolve" || input.command === "rerun" || input.command === "status";
5949
6113
  if (input.issueNumber && !skipStateCheck && !isPRFix) {
5950
6114
  const taskAction = resolveForIssue(input.issueNumber, projectDir);
5951
6115
  logger.info(`Task action: ${taskAction.action}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kody-ade/kody-engine-lite",
3
- "version": "0.1.114",
3
+ "version": "0.1.116",
4
4
  "description": "Autonomous SDLC pipeline: Kody orchestration + Claude Code + LiteLLM",
5
5
  "license": "MIT",
6
6
  "type": "module",