npm - caik-cli - Versions diffs - 0.1.1 → 0.6.0 - Mend

caik-cli 0.1.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/README.md +8 -7
package/dist/api-6OX4ICXN.js +9 -0
package/dist/auto-improve-skills-2COKTU5C.js +8 -0
package/dist/autoresearch-Y7WW6L4O.js +24 -0
package/dist/chunk-2YHUDOJL.js +54 -0
package/dist/chunk-3TXNZINH.js +775 -0
package/dist/chunk-5MHNQAV4.js +317 -0
package/dist/chunk-7AIZTHHZ.js +152 -0
package/dist/chunk-D4IM3YRX.js +166 -0
package/dist/chunk-DJJHS7KK.js +62 -0
package/dist/chunk-DKZBQRR3.js +91 -0
package/dist/chunk-FLSHJZLC.js +613 -0
package/dist/chunk-H2ZKCXMJ.js +202 -0
package/dist/chunk-ILMOSMD3.js +83 -0
package/dist/chunk-KYTHKH6V.js +79 -0
package/dist/chunk-LTKHLRM4.js +272 -0
package/dist/chunk-T32AEP3O.js +146 -0
package/dist/chunk-T73Z5UMA.js +14437 -0
package/dist/chunk-TFKT7V7H.js +1545 -0
package/dist/chunk-US4CYDNS.js +524 -0
package/dist/chunk-ZLRN7Q7C.js +27 -0
package/dist/claude-code-6DF4YARB.js +8 -0
package/dist/config-CS7734SA.js +24 -0
package/dist/correction-classifier-TLPKRNLI.js +93 -0
package/dist/cursor-Z4XXDCAM.js +8 -0
package/dist/daemon/autoresearch-2MAEM2YI.js +272 -0
package/dist/daemon/chunk-545XA5CB.js +77 -0
package/dist/daemon/chunk-HEYFAUHL.js +90 -0
package/dist/daemon/chunk-MLKGABMK.js +9 -0
package/dist/daemon/chunk-NJICGNCK.js +150 -0
package/dist/daemon/chunk-OD5NUFH2.js +181 -0
package/dist/daemon/chunk-SM2FSXIP.js +60 -0
package/dist/daemon/chunk-UMDJFPN6.js +163 -0
package/dist/daemon/config-F7HE3JRY.js +23 -0
package/dist/daemon/db-QEXVVTAL.js +15 -0
package/dist/daemon/eval-generator-OR2FAYLB.js +316 -0
package/dist/daemon/improver-TGEK6MPE.js +186 -0
package/dist/daemon/llm-FUJ2TBYT.js +11 -0
package/dist/daemon/nudge-detector-NFRHWZY6.js +140 -0
package/dist/daemon/platform-7N3LQDIB.js +16381 -0
package/dist/daemon/registry-FI4GTO3H.js +20 -0
package/dist/daemon/server.js +356 -0
package/dist/daemon/trace-store-T7XFGQSX.js +19 -0
package/dist/daemon-UXYMG46V.js +85 -0
package/dist/db-TLNRIXLK.js +18 -0
package/dist/eval-generator-GGMRPO3K.js +21 -0
package/dist/eval-runner-EF4K6T5Y.js +15 -0
package/dist/index.js +8033 -568
package/dist/llm-3UUZX6PX.js +12 -0
package/dist/platform-52NREMBS.js +33 -0
package/dist/repo-installer-K6ADOW3E.js +25 -0
package/dist/setup-P744STZE.js +16 -0
package/dist/test-loop-Y7QQE55P.js +127 -0
package/dist/trace-store-FVLMNNDK.js +20 -0
package/package.json +9 -3

package/dist/daemon/chunk-OD5NUFH2.js ADDED Viewed

@@ -0,0 +1,181 @@
+// src/daemon/db.ts
+import Database from "better-sqlite3";
+import { mkdirSync, existsSync } from "fs";
+import { join, dirname } from "path";
+import { homedir } from "os";
+var _db = null;
+function getDefaultDbPath() {
+  return join(homedir(), ".caik", "autoresearch.db");
+}
+function openDb(dbPath) {
+  const path = dbPath ?? getDefaultDbPath();
+  const dir = dirname(path);
+  if (!existsSync(dir)) {
+    mkdirSync(dir, { recursive: true, mode: 448 });
+  }
+  const db = new Database(path);
+  db.pragma("journal_mode = WAL");
+  db.pragma("foreign_keys = ON");
+  db.pragma("busy_timeout = 5000");
+  return db;
+}
+function getDb(dbPath) {
+  if (!_db) {
+    _db = openDb(dbPath);
+    initSchema(_db);
+  }
+  return _db;
+}
+function closeDb() {
+  if (_db) {
+    _db.close();
+    _db = null;
+  }
+}
+function initSchema(db) {
+  db.exec(`
+    -- Session buffer: ephemeral tool calls captured during a session
+    CREATE TABLE IF NOT EXISTS session_buffer (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      session_id TEXT NOT NULL,
+      type TEXT NOT NULL CHECK(type IN ('tool_call', 'correction', 'prompt')),
+      timestamp TEXT NOT NULL,
+      tool_name TEXT,
+      tool_input TEXT,
+      tool_response TEXT,
+      prompt TEXT,
+      correction_type TEXT,
+      slug TEXT,
+      success INTEGER
+    );
+    CREATE INDEX IF NOT EXISTS idx_buffer_session ON session_buffer(session_id);
+    -- Traces: permanent records of corrections and sampled successes
+    CREATE TABLE IF NOT EXISTS traces (
+      id TEXT PRIMARY KEY,
+      session_id TEXT NOT NULL,
+      slug TEXT NOT NULL,
+      timestamp TEXT NOT NULL,
+      kind TEXT NOT NULL CHECK(kind IN ('correction', 'success')),
+      tool_name TEXT NOT NULL,
+      tool_input TEXT,
+      tool_response TEXT,
+      correction_type TEXT,
+      correction_prompt TEXT,
+      skill_content_hash TEXT,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    CREATE INDEX IF NOT EXISTS idx_traces_slug ON traces(slug);
+    CREATE INDEX IF NOT EXISTS idx_traces_slug_kind ON traces(slug, kind);
+    -- Eval suites: one per skill, tracks validation state
+    CREATE TABLE IF NOT EXISTS eval_suites (
+      slug TEXT PRIMARY KEY,
+      version INTEGER NOT NULL DEFAULT 1,
+      tpr REAL,
+      tnr REAL,
+      validation_sample_size INTEGER,
+      validated_at TEXT,
+      created_at TEXT NOT NULL DEFAULT (datetime('now')),
+      updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    -- Eval cases: individual test assertions linked to a suite
+    CREATE TABLE IF NOT EXISTS eval_cases (
+      id TEXT PRIMARY KEY,
+      suite_slug TEXT NOT NULL REFERENCES eval_suites(slug) ON DELETE CASCADE,
+      trace_id TEXT REFERENCES traces(id),
+      source TEXT NOT NULL CHECK(source IN ('trace', 'synthetic')),
+      category TEXT NOT NULL,
+      assertion_type TEXT NOT NULL,
+      assertion_value TEXT NOT NULL,
+      description TEXT NOT NULL,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    CREATE INDEX IF NOT EXISTS idx_cases_suite ON eval_cases(suite_slug);
+    -- Loop results: archive of every autoresearch run
+    CREATE TABLE IF NOT EXISTS loop_results (
+      id TEXT PRIMARY KEY,
+      slug TEXT NOT NULL,
+      baseline_pass_rate REAL NOT NULL,
+      best_pass_rate REAL NOT NULL,
+      baseline_pass_count INTEGER NOT NULL,
+      best_pass_count INTEGER NOT NULL,
+      total_cases INTEGER NOT NULL,
+      iterations INTEGER NOT NULL,
+      total_llm_calls INTEGER NOT NULL,
+      duration_ms INTEGER NOT NULL,
+      strategies TEXT NOT NULL,
+      best_content TEXT,
+      applied INTEGER NOT NULL DEFAULT 0,
+      rolled_back INTEGER NOT NULL DEFAULT 0,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    CREATE INDEX IF NOT EXISTS idx_results_slug ON loop_results(slug, created_at);
+    -- Observations: persistent tool call records from hooks
+    CREATE TABLE IF NOT EXISTS observations (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      session_id TEXT NOT NULL,
+      slug TEXT NOT NULL,
+      tool TEXT NOT NULL,
+      success INTEGER NOT NULL,
+      platform TEXT,
+      correction_type TEXT,
+      correction_source TEXT,
+      timestamp TEXT NOT NULL,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    CREATE INDEX IF NOT EXISTS idx_obs_session ON observations(session_id);
+    CREATE INDEX IF NOT EXISTS idx_obs_slug ON observations(slug);
+    CREATE INDEX IF NOT EXISTS idx_obs_timestamp ON observations(timestamp);
+    -- Pending events: queue for API posting with retry
+    CREATE TABLE IF NOT EXISTS pending_events (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      payload TEXT NOT NULL,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    -- Session context: per-session state (fingerprint, model) for session-end
+    CREATE TABLE IF NOT EXISTS session_context (
+      session_id TEXT PRIMARY KEY,
+      stack_fingerprint TEXT,
+      repo_scale TEXT,
+      agent_model TEXT,
+      created_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+    -- Session engagement: per-artifact call counts within a session
+    CREATE TABLE IF NOT EXISTS session_engagement (
+      id INTEGER PRIMARY KEY AUTOINCREMENT,
+      session_id TEXT NOT NULL,
+      slug TEXT NOT NULL,
+      count INTEGER NOT NULL DEFAULT 0,
+      UNIQUE(session_id, slug)
+    );
+    CREATE INDEX IF NOT EXISTS idx_engagement_session ON session_engagement(session_id);
+    -- Retention checks: last check timestamp per artifact slug
+    CREATE TABLE IF NOT EXISTS retention_checks (
+      slug TEXT PRIMARY KEY,
+      last_checked_at INTEGER NOT NULL
+    );
+    -- Session-end dedup: track which sessions have already buffered a session_end
+    CREATE TABLE IF NOT EXISTS session_end_sent (
+      session_id TEXT PRIMARY KEY,
+      pending_event_id INTEGER,
+      sent_at TEXT NOT NULL DEFAULT (datetime('now'))
+    );
+  `);
+}
+export {
+  getDefaultDbPath,
+  openDb,
+  getDb,
+  closeDb,
+  initSchema
+};

package/dist/daemon/chunk-SM2FSXIP.js ADDED Viewed

@@ -0,0 +1,60 @@
+// src/daemon/llm.ts
+import { readFileSync, existsSync } from "fs";
+import { join } from "path";
+import { homedir } from "os";
+var ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages";
+function getAnthropicApiKey() {
+  if (process.env.ANTHROPIC_API_KEY) return process.env.ANTHROPIC_API_KEY;
+  try {
+    const configPath = join(homedir(), ".caik", "config.json");
+    if (existsSync(configPath)) {
+      const raw = JSON.parse(readFileSync(configPath, "utf-8"));
+      if (typeof raw.anthropicApiKey === "string") return raw.anthropicApiKey;
+    }
+  } catch {
+  }
+  return void 0;
+}
+async function callAnthropic(apiKey, opts) {
+  const body = JSON.stringify({
+    model: opts.model,
+    max_tokens: opts.maxTokens ?? 4096,
+    system: opts.system,
+    messages: [{ role: "user", content: opts.userMessage }]
+  });
+  const res = await fetch(ANTHROPIC_API_URL, {
+    method: "POST",
+    headers: {
+      "x-api-key": apiKey,
+      "anthropic-version": "2023-06-01",
+      "content-type": "application/json"
+    },
+    body
+  });
+  if (!res.ok) {
+    const text = await res.text();
+    throw new Error(`Anthropic API ${res.status}: ${text}`);
+  }
+  const data = await res.json();
+  const textBlock = data.content.find((b) => b.type === "text");
+  if (!textBlock?.text) throw new Error("No text in Anthropic response");
+  return {
+    text: textBlock.text,
+    inputTokens: data.usage?.input_tokens ?? 0,
+    outputTokens: data.usage?.output_tokens ?? 0
+  };
+}
+function parseLLMJson(text) {
+  let cleaned = text.trim();
+  if (cleaned.startsWith("```")) {
+    cleaned = cleaned.replace(/^```[a-z]*\n?/i, "");
+    cleaned = cleaned.replace(/\n?```\s*$/, "");
+  }
+  return JSON.parse(cleaned);
+}
+export {
+  getAnthropicApiKey,
+  callAnthropic,
+  parseLLMJson
+};

package/dist/daemon/chunk-UMDJFPN6.js ADDED Viewed

@@ -0,0 +1,163 @@
+import {
+  callAnthropic
+} from "./chunk-SM2FSXIP.js";
+// src/daemon/eval-runner.ts
+import { createHash } from "crypto";
+function simCacheKey(skillContent, scenario) {
+  return createHash("sha256").update(skillContent + "\0" + scenario).digest("hex").slice(0, 16);
+}
+async function simulateSkill(skillContent, scenario, apiKey, simOpts) {
+  const key = simCacheKey(skillContent, scenario);
+  const cached = simOpts.cache.get(key);
+  if (cached !== void 0) return cached;
+  const result = await callAnthropic(apiKey, {
+    model: simOpts.model,
+    system: skillContent,
+    userMessage: scenario,
+    maxTokens: 2048
+  });
+  simOpts.cache.set(key, result.text);
+  return result.text;
+}
+function checkPatternAssertion(content, type, pattern) {
+  try {
+    let flags = "";
+    let cleanPattern = pattern;
+    if (cleanPattern.startsWith("(?i)")) {
+      flags = "i";
+      cleanPattern = cleanPattern.slice(4);
+    }
+    const regex = new RegExp(cleanPattern, flags);
+    const found = regex.test(content);
+    if (type === "must_contain") {
+      return found ? { passed: true, reason: `Pattern "${pattern}" found` } : { passed: false, reason: `Pattern "${pattern}" not found in content` };
+    } else {
+      return found ? { passed: false, reason: `Pattern "${pattern}" found in content (should be absent)` } : { passed: true, reason: `Pattern "${pattern}" correctly absent` };
+    }
+  } catch (err) {
+    return {
+      passed: false,
+      reason: `Invalid regex pattern: ${err instanceof Error ? err.message : String(err)}`
+    };
+  }
+}
+function checkCodeAssertion(content, fn) {
+  try {
+    const check = new Function("content", fn);
+    const result = check(content);
+    return result ? { passed: true, reason: "Code check passed" } : { passed: false, reason: "Code check returned false" };
+  } catch (err) {
+    return {
+      passed: false,
+      reason: `Code check error: ${err instanceof Error ? err.message : String(err)}`
+    };
+  }
+}
+async function checkLLMJudge(skillContent, judgePrompt, apiKey) {
+  try {
+    const result = await callAnthropic(apiKey, {
+      model: "claude-haiku-4-5-20251001",
+      system: `You are an eval judge for Claude Code skills. Given a skill and a judge prompt, determine if the skill PASSES or FAILS the criteria. Respond with ONLY a JSON object: {"passed": true/false, "reason": "brief explanation"}`,
+      userMessage: `Skill content:
+<skill>
+${skillContent}
+</skill>
+Judge criteria: ${judgePrompt}`,
+      maxTokens: 256
+    });
+    const parsed = JSON.parse(result.text);
+    return parsed;
+  } catch (err) {
+    return {
+      passed: false,
+      reason: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
+    };
+  }
+}
+async function runSingleCase(skillContent, evalCase, apiKey, simOpts) {
+  const assertion = evalCase.assertion;
+  switch (assertion.type) {
+    // ── Structural (check SKILL.md text directly) ──────────────────
+    case "must_contain":
+      return checkPatternAssertion(skillContent, "must_contain", assertion.pattern);
+    case "must_not_contain":
+      return checkPatternAssertion(skillContent, "must_not_contain", assertion.pattern);
+    case "code_check":
+      return checkCodeAssertion(skillContent, assertion.fn);
+    case "llm_judge":
+      return checkLLMJudge(skillContent, assertion.prompt, apiKey);
+    case "max_output_length":
+      return {
+        passed: skillContent.length <= assertion.tokens * 4,
+        reason: skillContent.length <= assertion.tokens * 4 ? `Content length ${skillContent.length} within limit` : `Content length ${skillContent.length} exceeds token limit ${assertion.tokens}`
+      };
+    // ── Behavioral (simulate skill, check output) ──────────────────
+    case "behavioral_must_contain": {
+      if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
+      const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
+      return checkPatternAssertion(output, "must_contain", assertion.pattern);
+    }
+    case "behavioral_must_not_contain": {
+      if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
+      const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
+      return checkPatternAssertion(output, "must_not_contain", assertion.pattern);
+    }
+    case "behavioral_max_length": {
+      if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
+      const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
+      const tokenEstimate = Math.round(output.length / 4);
+      return {
+        passed: tokenEstimate <= assertion.tokens,
+        reason: tokenEstimate <= assertion.tokens ? `Simulated output ~${tokenEstimate} tokens, within limit ${assertion.tokens}` : `Simulated output ~${tokenEstimate} tokens, exceeds limit ${assertion.tokens}`
+      };
+    }
+    case "behavioral_judge": {
+      if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
+      const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
+      return checkLLMJudge(output, assertion.criteria, apiKey);
+    }
+    case "behavioral_code_check": {
+      if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
+      const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
+      return checkCodeAssertion(output, assertion.fn);
+    }
+    default:
+      return { passed: false, reason: `Unknown assertion type` };
+  }
+}
+async function runEvalSuite(skillContent, suite, baselineContent, apiKey, simulationModel) {
+  if (suite.cases.length === 0) {
+    return {
+      passRate: 1,
+      passCount: 0,
+      totalCases: 0,
+      failedCaseIds: [],
+      lengthRatio: baselineContent.length > 0 ? skillContent.length / baselineContent.length : 1
+    };
+  }
+  let passCount = 0;
+  const failedCaseIds = [];
+  const simOpts = simulationModel ? { model: simulationModel, cache: /* @__PURE__ */ new Map() } : void 0;
+  for (const evalCase of suite.cases) {
+    const result = await runSingleCase(skillContent, evalCase, apiKey, simOpts);
+    if (result.passed) {
+      passCount++;
+    } else {
+      failedCaseIds.push(evalCase.id);
+    }
+  }
+  return {
+    passRate: passCount / suite.cases.length,
+    passCount,
+    totalCases: suite.cases.length,
+    failedCaseIds,
+    lengthRatio: baselineContent.length > 0 ? skillContent.length / baselineContent.length : 1
+  };
+}
+export {
+  runSingleCase,
+  runEvalSuite
+};

package/dist/daemon/config-F7HE3JRY.js ADDED Viewed

@@ -0,0 +1,23 @@
+import {
+  CONTRIBUTION_LEVELS,
+  getApiKey,
+  getConfigDir,
+  getConfigPath,
+  getOrCreateInstallationId,
+  readConfig,
+  resolveConfig,
+  setApiKey,
+  writeConfig
+} from "./chunk-545XA5CB.js";
+import "./chunk-MLKGABMK.js";
+export {
+  CONTRIBUTION_LEVELS,
+  getApiKey,
+  getConfigDir,
+  getConfigPath,
+  getOrCreateInstallationId,
+  readConfig,
+  resolveConfig,
+  setApiKey,
+  writeConfig
+};

package/dist/daemon/db-QEXVVTAL.js ADDED Viewed

@@ -0,0 +1,15 @@
+import {
+  closeDb,
+  getDb,
+  getDefaultDbPath,
+  initSchema,
+  openDb
+} from "./chunk-OD5NUFH2.js";
+import "./chunk-MLKGABMK.js";
+export {
+  closeDb,
+  getDb,
+  getDefaultDbPath,
+  initSchema,
+  openDb
+};