caik-cli 0.1.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +8 -7
  2. package/dist/api-6OX4ICXN.js +9 -0
  3. package/dist/auto-improve-skills-2COKTU5C.js +8 -0
  4. package/dist/autoresearch-Y7WW6L4O.js +24 -0
  5. package/dist/chunk-2YHUDOJL.js +54 -0
  6. package/dist/chunk-3TXNZINH.js +775 -0
  7. package/dist/chunk-5MHNQAV4.js +317 -0
  8. package/dist/chunk-7AIZTHHZ.js +152 -0
  9. package/dist/chunk-D4IM3YRX.js +166 -0
  10. package/dist/chunk-DJJHS7KK.js +62 -0
  11. package/dist/chunk-DKZBQRR3.js +91 -0
  12. package/dist/chunk-FLSHJZLC.js +613 -0
  13. package/dist/chunk-H2ZKCXMJ.js +202 -0
  14. package/dist/chunk-ILMOSMD3.js +83 -0
  15. package/dist/chunk-KYTHKH6V.js +79 -0
  16. package/dist/chunk-LTKHLRM4.js +272 -0
  17. package/dist/chunk-T32AEP3O.js +146 -0
  18. package/dist/chunk-T73Z5UMA.js +14437 -0
  19. package/dist/chunk-TFKT7V7H.js +1545 -0
  20. package/dist/chunk-US4CYDNS.js +524 -0
  21. package/dist/chunk-ZLRN7Q7C.js +27 -0
  22. package/dist/claude-code-6DF4YARB.js +8 -0
  23. package/dist/config-CS7734SA.js +24 -0
  24. package/dist/correction-classifier-TLPKRNLI.js +93 -0
  25. package/dist/cursor-Z4XXDCAM.js +8 -0
  26. package/dist/daemon/autoresearch-2MAEM2YI.js +272 -0
  27. package/dist/daemon/chunk-545XA5CB.js +77 -0
  28. package/dist/daemon/chunk-HEYFAUHL.js +90 -0
  29. package/dist/daemon/chunk-MLKGABMK.js +9 -0
  30. package/dist/daemon/chunk-NJICGNCK.js +150 -0
  31. package/dist/daemon/chunk-OD5NUFH2.js +181 -0
  32. package/dist/daemon/chunk-SM2FSXIP.js +60 -0
  33. package/dist/daemon/chunk-UMDJFPN6.js +163 -0
  34. package/dist/daemon/config-F7HE3JRY.js +23 -0
  35. package/dist/daemon/db-QEXVVTAL.js +15 -0
  36. package/dist/daemon/eval-generator-OR2FAYLB.js +316 -0
  37. package/dist/daemon/improver-TGEK6MPE.js +186 -0
  38. package/dist/daemon/llm-FUJ2TBYT.js +11 -0
  39. package/dist/daemon/nudge-detector-NFRHWZY6.js +140 -0
  40. package/dist/daemon/platform-7N3LQDIB.js +16381 -0
  41. package/dist/daemon/registry-FI4GTO3H.js +20 -0
  42. package/dist/daemon/server.js +356 -0
  43. package/dist/daemon/trace-store-T7XFGQSX.js +19 -0
  44. package/dist/daemon-UXYMG46V.js +85 -0
  45. package/dist/db-TLNRIXLK.js +18 -0
  46. package/dist/eval-generator-GGMRPO3K.js +21 -0
  47. package/dist/eval-runner-EF4K6T5Y.js +15 -0
  48. package/dist/index.js +8033 -568
  49. package/dist/llm-3UUZX6PX.js +12 -0
  50. package/dist/platform-52NREMBS.js +33 -0
  51. package/dist/repo-installer-K6ADOW3E.js +25 -0
  52. package/dist/setup-P744STZE.js +16 -0
  53. package/dist/test-loop-Y7QQE55P.js +127 -0
  54. package/dist/trace-store-FVLMNNDK.js +20 -0
  55. package/package.json +9 -3
@@ -0,0 +1,202 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ __esm,
4
+ __export
5
+ } from "./chunk-ZLRN7Q7C.js";
6
+
7
+ // src/daemon/db.ts
8
+ var db_exports = {};
9
+ __export(db_exports, {
10
+ closeDb: () => closeDb,
11
+ getDb: () => getDb,
12
+ getDefaultDbPath: () => getDefaultDbPath,
13
+ initSchema: () => initSchema,
14
+ openDb: () => openDb
15
+ });
16
+ import Database from "better-sqlite3";
17
+ import { mkdirSync, existsSync } from "fs";
18
+ import { join, dirname } from "path";
19
+ import { homedir } from "os";
20
+ function getDefaultDbPath() {
21
+ return join(homedir(), ".caik", "autoresearch.db");
22
+ }
23
+ function openDb(dbPath) {
24
+ const path = dbPath ?? getDefaultDbPath();
25
+ const dir = dirname(path);
26
+ if (!existsSync(dir)) {
27
+ mkdirSync(dir, { recursive: true, mode: 448 });
28
+ }
29
+ const db = new Database(path);
30
+ db.pragma("journal_mode = WAL");
31
+ db.pragma("foreign_keys = ON");
32
+ db.pragma("busy_timeout = 5000");
33
+ return db;
34
+ }
35
+ function getDb(dbPath) {
36
+ if (!_db) {
37
+ _db = openDb(dbPath);
38
+ initSchema(_db);
39
+ }
40
+ return _db;
41
+ }
42
+ function closeDb() {
43
+ if (_db) {
44
+ _db.close();
45
+ _db = null;
46
+ }
47
+ }
48
+ function initSchema(db) {
49
+ db.exec(`
50
+ -- Session buffer: ephemeral tool calls captured during a session
51
+ CREATE TABLE IF NOT EXISTS session_buffer (
52
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
53
+ session_id TEXT NOT NULL,
54
+ type TEXT NOT NULL CHECK(type IN ('tool_call', 'correction', 'prompt')),
55
+ timestamp TEXT NOT NULL,
56
+ tool_name TEXT,
57
+ tool_input TEXT,
58
+ tool_response TEXT,
59
+ prompt TEXT,
60
+ correction_type TEXT,
61
+ slug TEXT,
62
+ success INTEGER
63
+ );
64
+ CREATE INDEX IF NOT EXISTS idx_buffer_session ON session_buffer(session_id);
65
+
66
+ -- Traces: permanent records of corrections and sampled successes
67
+ CREATE TABLE IF NOT EXISTS traces (
68
+ id TEXT PRIMARY KEY,
69
+ session_id TEXT NOT NULL,
70
+ slug TEXT NOT NULL,
71
+ timestamp TEXT NOT NULL,
72
+ kind TEXT NOT NULL CHECK(kind IN ('correction', 'success')),
73
+ tool_name TEXT NOT NULL,
74
+ tool_input TEXT,
75
+ tool_response TEXT,
76
+ correction_type TEXT,
77
+ correction_prompt TEXT,
78
+ skill_content_hash TEXT,
79
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
80
+ );
81
+ CREATE INDEX IF NOT EXISTS idx_traces_slug ON traces(slug);
82
+ CREATE INDEX IF NOT EXISTS idx_traces_slug_kind ON traces(slug, kind);
83
+
84
+ -- Eval suites: one per skill, tracks validation state
85
+ CREATE TABLE IF NOT EXISTS eval_suites (
86
+ slug TEXT PRIMARY KEY,
87
+ version INTEGER NOT NULL DEFAULT 1,
88
+ tpr REAL,
89
+ tnr REAL,
90
+ validation_sample_size INTEGER,
91
+ validated_at TEXT,
92
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
93
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
94
+ );
95
+
96
+ -- Eval cases: individual test assertions linked to a suite
97
+ CREATE TABLE IF NOT EXISTS eval_cases (
98
+ id TEXT PRIMARY KEY,
99
+ suite_slug TEXT NOT NULL REFERENCES eval_suites(slug) ON DELETE CASCADE,
100
+ trace_id TEXT REFERENCES traces(id),
101
+ source TEXT NOT NULL CHECK(source IN ('trace', 'synthetic')),
102
+ category TEXT NOT NULL,
103
+ assertion_type TEXT NOT NULL,
104
+ assertion_value TEXT NOT NULL,
105
+ description TEXT NOT NULL,
106
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
107
+ );
108
+ CREATE INDEX IF NOT EXISTS idx_cases_suite ON eval_cases(suite_slug);
109
+
110
+ -- Loop results: archive of every autoresearch run
111
+ CREATE TABLE IF NOT EXISTS loop_results (
112
+ id TEXT PRIMARY KEY,
113
+ slug TEXT NOT NULL,
114
+ baseline_pass_rate REAL NOT NULL,
115
+ best_pass_rate REAL NOT NULL,
116
+ baseline_pass_count INTEGER NOT NULL,
117
+ best_pass_count INTEGER NOT NULL,
118
+ total_cases INTEGER NOT NULL,
119
+ iterations INTEGER NOT NULL,
120
+ total_llm_calls INTEGER NOT NULL,
121
+ duration_ms INTEGER NOT NULL,
122
+ strategies TEXT NOT NULL,
123
+ best_content TEXT,
124
+ applied INTEGER NOT NULL DEFAULT 0,
125
+ rolled_back INTEGER NOT NULL DEFAULT 0,
126
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
127
+ );
128
+ CREATE INDEX IF NOT EXISTS idx_results_slug ON loop_results(slug, created_at);
129
+
130
+ -- Observations: persistent tool call records from hooks
131
+ CREATE TABLE IF NOT EXISTS observations (
132
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
133
+ session_id TEXT NOT NULL,
134
+ slug TEXT NOT NULL,
135
+ tool TEXT NOT NULL,
136
+ success INTEGER NOT NULL,
137
+ platform TEXT,
138
+ correction_type TEXT,
139
+ correction_source TEXT,
140
+ timestamp TEXT NOT NULL,
141
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
142
+ );
143
+ CREATE INDEX IF NOT EXISTS idx_obs_session ON observations(session_id);
144
+ CREATE INDEX IF NOT EXISTS idx_obs_slug ON observations(slug);
145
+ CREATE INDEX IF NOT EXISTS idx_obs_timestamp ON observations(timestamp);
146
+
147
+ -- Pending events: queue for API posting with retry
148
+ CREATE TABLE IF NOT EXISTS pending_events (
149
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
150
+ payload TEXT NOT NULL,
151
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
152
+ );
153
+
154
+ -- Session context: per-session state (fingerprint, model) for session-end
155
+ CREATE TABLE IF NOT EXISTS session_context (
156
+ session_id TEXT PRIMARY KEY,
157
+ stack_fingerprint TEXT,
158
+ repo_scale TEXT,
159
+ agent_model TEXT,
160
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
161
+ );
162
+
163
+ -- Session engagement: per-artifact call counts within a session
164
+ CREATE TABLE IF NOT EXISTS session_engagement (
165
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
166
+ session_id TEXT NOT NULL,
167
+ slug TEXT NOT NULL,
168
+ count INTEGER NOT NULL DEFAULT 0,
169
+ UNIQUE(session_id, slug)
170
+ );
171
+ CREATE INDEX IF NOT EXISTS idx_engagement_session ON session_engagement(session_id);
172
+
173
+ -- Retention checks: last check timestamp per artifact slug
174
+ CREATE TABLE IF NOT EXISTS retention_checks (
175
+ slug TEXT PRIMARY KEY,
176
+ last_checked_at INTEGER NOT NULL
177
+ );
178
+
179
+ -- Session-end dedup: track which sessions have already buffered a session_end
180
+ CREATE TABLE IF NOT EXISTS session_end_sent (
181
+ session_id TEXT PRIMARY KEY,
182
+ pending_event_id INTEGER,
183
+ sent_at TEXT NOT NULL DEFAULT (datetime('now'))
184
+ );
185
+ `);
186
+ }
187
+ var _db;
188
+ var init_db = __esm({
189
+ "src/daemon/db.ts"() {
190
+ _db = null;
191
+ }
192
+ });
193
+
194
+ export {
195
+ getDefaultDbPath,
196
+ openDb,
197
+ getDb,
198
+ closeDb,
199
+ initSchema,
200
+ db_exports,
201
+ init_db
202
+ };
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ mapHttpError,
4
+ mapNetworkError
5
+ } from "./chunk-2YHUDOJL.js";
6
+
7
+ // src/api.ts
8
+ var CaikApiClient = class {
9
+ baseUrl;
10
+ apiKey;
11
+ verbose;
12
+ constructor(config) {
13
+ this.baseUrl = config.apiUrl.replace(/\/+$/, "");
14
+ this.apiKey = config.apiKey;
15
+ this.verbose = config.verbose ?? false;
16
+ }
17
+ async get(path, params) {
18
+ const url = new URL(`${this.baseUrl}/api/v1${path}`);
19
+ if (params) {
20
+ for (const [key, value] of Object.entries(params)) {
21
+ if (value !== void 0) {
22
+ url.searchParams.set(key, String(value));
23
+ }
24
+ }
25
+ }
26
+ return this.request(url.toString(), { method: "GET" });
27
+ }
28
+ async post(path, body) {
29
+ return this.request(`${this.baseUrl}/api/v1${path}`, {
30
+ method: "POST",
31
+ headers: { "Content-Type": "application/json" },
32
+ body: body !== void 0 ? JSON.stringify(body) : void 0
33
+ });
34
+ }
35
+ async patch(path, body) {
36
+ return this.request(`${this.baseUrl}/api/v1${path}`, {
37
+ method: "PATCH",
38
+ headers: { "Content-Type": "application/json" },
39
+ body: body !== void 0 ? JSON.stringify(body) : void 0
40
+ });
41
+ }
42
+ async del(path) {
43
+ return this.request(`${this.baseUrl}/api/v1${path}`, { method: "DELETE" });
44
+ }
45
+ async request(url, init) {
46
+ const headers = {
47
+ ...init.headers
48
+ };
49
+ if (this.apiKey) {
50
+ headers["Authorization"] = `Bearer ${this.apiKey}`;
51
+ }
52
+ if (this.verbose) {
53
+ console.error(`[verbose] ${init.method} ${url}`);
54
+ }
55
+ const start = Date.now();
56
+ let response;
57
+ try {
58
+ response = await fetch(url, { ...init, headers });
59
+ } catch (err) {
60
+ throw mapNetworkError(err);
61
+ }
62
+ if (this.verbose) {
63
+ console.error(`[verbose] ${response.status} (${Date.now() - start}ms)`);
64
+ }
65
+ if (!response.ok) {
66
+ let body;
67
+ try {
68
+ body = await response.json();
69
+ } catch {
70
+ body = await response.text().catch(() => null);
71
+ }
72
+ throw mapHttpError(response.status, body);
73
+ }
74
+ if (response.status === 204) {
75
+ return void 0;
76
+ }
77
+ return response.json();
78
+ }
79
+ };
80
+
81
+ export {
82
+ CaikApiClient
83
+ };
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/config.ts
4
+ import { readFileSync, writeFileSync, mkdirSync, chmodSync, existsSync } from "fs";
5
+ import { join } from "path";
6
+ import { homedir } from "os";
7
+ var CONTRIBUTION_LEVELS = [
8
+ { value: "none", name: "None", description: "Nothing sent. Directory access only." },
9
+ { value: "minimal", name: "Minimal", description: "Install/uninstall + engagement + retention. Basic recommendations." },
10
+ { value: "contributor", name: "Contributor", description: "Error/success signals + co-installs. Full recommendations. (default)" },
11
+ { value: "collective", name: "Collective", description: "Stack context + session shape + workflow patterns. Proactive recommendations + leaderboard." }
12
+ ];
13
+ var DEFAULT_CONFIG = {
14
+ apiUrl: "https://www.caik.dev",
15
+ defaultLimit: 10,
16
+ version: 1
17
+ };
18
+ function getConfigDir() {
19
+ return join(homedir(), ".caik");
20
+ }
21
+ function getConfigPath() {
22
+ return join(getConfigDir(), "config.json");
23
+ }
24
+ function readConfig() {
25
+ const path = getConfigPath();
26
+ if (!existsSync(path)) {
27
+ return { ...DEFAULT_CONFIG };
28
+ }
29
+ try {
30
+ const raw = readFileSync(path, "utf-8");
31
+ const parsed = JSON.parse(raw);
32
+ return { ...DEFAULT_CONFIG, ...parsed };
33
+ } catch {
34
+ return { ...DEFAULT_CONFIG };
35
+ }
36
+ }
37
+ function writeConfig(config) {
38
+ const dir = getConfigDir();
39
+ if (!existsSync(dir)) {
40
+ mkdirSync(dir, { recursive: true, mode: 448 });
41
+ }
42
+ const path = getConfigPath();
43
+ writeFileSync(path, JSON.stringify(config, null, 2) + "\n", "utf-8");
44
+ chmodSync(path, 384);
45
+ }
46
+ function getOrCreateInstallationId() {
47
+ const config = readConfig();
48
+ if (config.installationId) return config.installationId;
49
+ const id = `inst_${globalThis.crypto.randomUUID().replace(/-/g, "").slice(0, 16)}`;
50
+ writeConfig({ ...config, installationId: id });
51
+ return id;
52
+ }
53
+ function getApiKey() {
54
+ return process.env.CAIK_API_KEY ?? readConfig().apiKey;
55
+ }
56
+ function setApiKey(key) {
57
+ const config = readConfig();
58
+ config.apiKey = key;
59
+ writeConfig(config);
60
+ }
61
+ function resolveConfig(opts) {
62
+ const config = readConfig();
63
+ return {
64
+ apiUrl: opts.apiUrl ?? process.env.CAIK_API_URL ?? config.apiUrl ?? DEFAULT_CONFIG.apiUrl,
65
+ apiKey: opts.apiKey ?? process.env.CAIK_API_KEY ?? config.apiKey
66
+ };
67
+ }
68
+
69
+ export {
70
+ CONTRIBUTION_LEVELS,
71
+ getConfigDir,
72
+ getConfigPath,
73
+ readConfig,
74
+ writeConfig,
75
+ getOrCreateInstallationId,
76
+ getApiKey,
77
+ setApiKey,
78
+ resolveConfig
79
+ };
@@ -0,0 +1,272 @@
1
+ #!/usr/bin/env node
2
+ import {
3
+ runEvalSuite
4
+ } from "./chunk-D4IM3YRX.js";
5
+ import {
6
+ callAnthropic
7
+ } from "./chunk-DJJHS7KK.js";
8
+ import {
9
+ findRegistryEntry,
10
+ upsertRegistryEntry
11
+ } from "./chunk-DKZBQRR3.js";
12
+
13
+ // src/daemon/autoresearch.ts
14
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
15
+ import { join } from "path";
16
+ import { homedir } from "os";
17
+ import { randomUUID } from "crypto";
18
+ var STRATEGY_DESCRIPTIONS = {
19
+ targeted_fix: "Fix the specific failing eval cases while preserving passing behavior.",
20
+ simplify: "Simplify the skill instructions while maintaining the same behavior.",
21
+ restructure: "Restructure the skill for clarity and completeness.",
22
+ scope_narrow: "Narrow the scope \u2014 be more specific about what the skill does and doesn't do.",
23
+ format_refine: "Add explicit format specifications for output structure."
24
+ };
25
+ function selectStrategies(suite, failedCaseIds) {
26
+ const strategies = ["targeted_fix"];
27
+ if (failedCaseIds.length === 0) return strategies;
28
+ const failedCases = suite.cases.filter((c) => failedCaseIds.includes(c.id));
29
+ const failedCategories = failedCases.map((c) => c.category.toLowerCase());
30
+ const failRate = failedCaseIds.length / Math.max(suite.cases.length, 1);
31
+ if (failedCategories.some((c) => c.includes("scope"))) {
32
+ strategies.push("scope_narrow");
33
+ }
34
+ if (failedCategories.some((c) => c.includes("format"))) {
35
+ strategies.push("format_refine");
36
+ }
37
+ if (failRate <= 0.3) {
38
+ strategies.push("simplify");
39
+ }
40
+ const uniqueCategories = new Set(failedCategories);
41
+ if (failRate > 0.5 && uniqueCategories.size >= 2) {
42
+ strategies.push("restructure");
43
+ }
44
+ return [...new Set(strategies)];
45
+ }
46
+ function shouldAutoApply(mode, result) {
47
+ if (mode === "manual") return false;
48
+ if (!result.bestContent) return false;
49
+ if (mode === "assisted") {
50
+ return result.bestScore.passRate === 1 && result.bestScore.lengthRatio >= 0.7 && result.bestScore.lengthRatio <= 1.5;
51
+ }
52
+ if (mode === "autonomous") {
53
+ return result.bestScore.passRate > result.baselineScore.passRate;
54
+ }
55
+ return false;
56
+ }
57
+ var MUTATION_SYSTEM = `You are improving a Claude Code artifact. You will be given the current content, the specific eval cases it fails, and context from the correction traces that generated those cases. Make targeted changes to pass the failing cases while preserving behavior for passing cases. Return ONLY the improved content, no explanation or commentary.`;
58
+ async function generateMutations(currentContent, suite, failedCaseIds, strategies, apiKey, model) {
59
+ const failedCases = suite.cases.filter((c) => failedCaseIds.includes(c.id));
60
+ const mutations = [];
61
+ let llmCalls = 0;
62
+ const failedCaseSummary = failedCases.map((c) => `- [${c.category}] ${c.description} (${c.assertion.type})`).join("\n");
63
+ const passingCases = suite.cases.filter((c) => !failedCaseIds.includes(c.id));
64
+ const passingCaseSummary = passingCases.length > 0 ? passingCases.map((c) => `- [${c.category}] ${c.description}`).join("\n") : "(none)";
65
+ for (const strategy of strategies) {
66
+ const userMessage = `Current artifact content:
67
+ <skill>
68
+ ${currentContent}
69
+ </skill>
70
+
71
+ Failing eval cases (${failedCaseIds.length}/${suite.cases.length}) \u2014 FIX THESE:
72
+ ${failedCaseSummary}
73
+
74
+ Passing eval cases (${passingCases.length}/${suite.cases.length}) \u2014 PRESERVE THESE:
75
+ ${passingCaseSummary}
76
+
77
+ Strategy: ${strategy} \u2014 ${STRATEGY_DESCRIPTIONS[strategy] ?? "Improve the skill."}
78
+
79
+ Rewrite the skill to pass the failing cases while keeping the passing cases passing.`;
80
+ try {
81
+ const result = await callAnthropic(apiKey, {
82
+ model,
83
+ system: MUTATION_SYSTEM,
84
+ userMessage,
85
+ maxTokens: 4096
86
+ });
87
+ llmCalls++;
88
+ let content = result.text.trim();
89
+ content = content.replace(/^<skill>\s*/i, "").replace(/\s*<\/skill>\s*$/i, "");
90
+ const ratio = content.length / currentContent.length;
91
+ if (ratio >= 0.3 && ratio <= 3 && content.length > 10) {
92
+ mutations.push({ content, strategy });
93
+ }
94
+ } catch {
95
+ }
96
+ }
97
+ return { mutations, llmCalls };
98
+ }
99
+ async function autoResearchLoop(slug, skillPath, suite, config, apiKey) {
100
+ const startTime = Date.now();
101
+ let totalLLMCalls = 0;
102
+ const usedStrategies = [];
103
+ const currentContent = readFileSync(skillPath, "utf-8");
104
+ const baselineScore = await runEvalSuite(currentContent, suite, currentContent, apiKey, config.mutationModel);
105
+ let best = currentContent;
106
+ let bestScore = baselineScore;
107
+ for (let iteration = 0; iteration < config.maxIterations; iteration++) {
108
+ if (totalLLMCalls >= config.maxLLMCalls) break;
109
+ const strategies = selectStrategies(suite, bestScore.failedCaseIds).slice(
110
+ 0,
111
+ config.mutationsPerIteration
112
+ );
113
+ const { mutations, llmCalls } = await generateMutations(
114
+ best,
115
+ suite,
116
+ bestScore.failedCaseIds,
117
+ strategies,
118
+ apiKey,
119
+ config.mutationModel
120
+ );
121
+ totalLLMCalls += llmCalls;
122
+ let improved = false;
123
+ for (const mutation of mutations) {
124
+ if (totalLLMCalls >= config.maxLLMCalls) break;
125
+ const score = await runEvalSuite(mutation.content, suite, currentContent, apiKey, config.mutationModel);
126
+ if (score.passRate > bestScore.passRate) {
127
+ best = mutation.content;
128
+ bestScore = score;
129
+ improved = true;
130
+ if (!usedStrategies.includes(mutation.strategy)) {
131
+ usedStrategies.push(mutation.strategy);
132
+ }
133
+ }
134
+ }
135
+ if (bestScore.passRate === 1) break;
136
+ if (!improved) break;
137
+ }
138
+ const duration = Date.now() - startTime;
139
+ return {
140
+ bestContent: best !== currentContent ? best : null,
141
+ baselineScore,
142
+ bestScore,
143
+ iterations: Math.min(config.maxIterations, totalLLMCalls),
144
+ totalLLMCalls,
145
+ strategies: usedStrategies,
146
+ duration
147
+ };
148
+ }
149
+ function applyImprovement(slug, skillPath, improvedContent, result, db) {
150
+ const entry = findRegistryEntry(slug);
151
+ if (!entry) return;
152
+ const versionsDir = join(homedir(), ".caik", "versions", slug);
153
+ mkdirSync(versionsDir, { recursive: true });
154
+ const versionFile = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-") + ".md";
155
+ writeFileSync(join(versionsDir, versionFile), readFileSync(skillPath, "utf-8"), "utf-8");
156
+ writeFileSync(skillPath, improvedContent, "utf-8");
157
+ const obsPath = join(homedir(), ".caik", "observations", `${slug}.jsonl`);
158
+ if (existsSync(obsPath)) {
159
+ writeFileSync(obsPath, "", "utf-8");
160
+ }
161
+ const now = (/* @__PURE__ */ new Date()).toISOString();
162
+ const updatedEntry = {
163
+ ...entry,
164
+ updatedAt: now,
165
+ lastImprovedAt: now,
166
+ localVersion: (entry.localVersion ?? 0) + 1,
167
+ pendingImprovement: false,
168
+ lastAutoAppliedAt: now,
169
+ preApplyCorrectionRate: result.baselineScore.passRate,
170
+ improvementLog: [
171
+ ...entry.improvementLog ?? [],
172
+ { ts: now, type: `autoresearch:${result.strategies.join("+")}` }
173
+ ],
174
+ lastLoopResult: {
175
+ score: result.bestScore.passRate,
176
+ iterations: result.iterations,
177
+ timestamp: now
178
+ }
179
+ };
180
+ upsertRegistryEntry(updatedEntry);
181
+ db.prepare(
182
+ `INSERT INTO loop_results (id, slug, baseline_pass_rate, best_pass_rate, baseline_pass_count, best_pass_count,
183
+ total_cases, iterations, total_llm_calls, duration_ms, strategies, best_content, applied, created_at)
184
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1, ?)`
185
+ ).run(
186
+ randomUUID(),
187
+ slug,
188
+ result.baselineScore.passRate,
189
+ result.bestScore.passRate,
190
+ result.baselineScore.passCount,
191
+ result.bestScore.passCount,
192
+ result.bestScore.totalCases,
193
+ result.iterations,
194
+ result.totalLLMCalls,
195
+ result.duration,
196
+ JSON.stringify(result.strategies),
197
+ improvedContent,
198
+ now
199
+ );
200
+ }
201
+ function proposeImprovement(slug, skillPath, improvedContent, result, db) {
202
+ const entry = findRegistryEntry(slug);
203
+ if (!entry) return;
204
+ const currentContent = readFileSync(skillPath, "utf-8");
205
+ const versionsDir = join(homedir(), ".caik", "versions", slug);
206
+ mkdirSync(versionsDir, { recursive: true });
207
+ const versionFile = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-") + ".md";
208
+ writeFileSync(join(versionsDir, versionFile), currentContent, "utf-8");
209
+ const proposedPath = skillPath.endsWith("SKILL.md") ? skillPath.replace(/SKILL\.md$/, "SKILL.proposed.md") : `${skillPath}.proposed`;
210
+ writeFileSync(proposedPath, improvedContent, "utf-8");
211
+ const now = (/* @__PURE__ */ new Date()).toISOString();
212
+ const updatedEntry = {
213
+ ...entry,
214
+ updatedAt: now,
215
+ pendingImprovement: true,
216
+ improvementLog: [
217
+ ...entry.improvementLog ?? [],
218
+ { ts: now, type: `autoresearch:${result.strategies.join("+")}` }
219
+ ],
220
+ lastLoopResult: {
221
+ score: result.bestScore.passRate,
222
+ iterations: result.iterations,
223
+ timestamp: now
224
+ }
225
+ };
226
+ upsertRegistryEntry(updatedEntry);
227
+ db.prepare(
228
+ `INSERT INTO loop_results (id, slug, baseline_pass_rate, best_pass_rate, baseline_pass_count, best_pass_count,
229
+ total_cases, iterations, total_llm_calls, duration_ms, strategies, best_content, applied, created_at)
230
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?)`
231
+ ).run(
232
+ randomUUID(),
233
+ slug,
234
+ result.baselineScore.passRate,
235
+ result.bestScore.passRate,
236
+ result.baselineScore.passCount,
237
+ result.bestScore.passCount,
238
+ result.bestScore.totalCases,
239
+ result.iterations,
240
+ result.totalLLMCalls,
241
+ result.duration,
242
+ JSON.stringify(result.strategies),
243
+ improvedContent,
244
+ now
245
+ );
246
+ }
247
+ function checkWatchdog(slug, entry, observations) {
248
+ if (!entry.lastAutoAppliedAt) return { shouldRollback: false };
249
+ const postApply = observations.filter(
250
+ (o) => new Date(o.timestamp) > new Date(entry.lastAutoAppliedAt)
251
+ );
252
+ if (postApply.length < 10) return { shouldRollback: false };
253
+ const postCorrectionRate = postApply.filter((o) => o.correctionType).length / postApply.length;
254
+ const preCorrectionRate = entry.preApplyCorrectionRate ?? 0;
255
+ if (postCorrectionRate - preCorrectionRate > 0.2) {
256
+ return {
257
+ shouldRollback: true,
258
+ reason: `Correction rate increased from ${(preCorrectionRate * 100).toFixed(0)}% to ${(postCorrectionRate * 100).toFixed(0)}%`
259
+ };
260
+ }
261
+ return { shouldRollback: false };
262
+ }
263
+
264
+ export {
265
+ selectStrategies,
266
+ shouldAutoApply,
267
+ generateMutations,
268
+ autoResearchLoop,
269
+ applyImprovement,
270
+ proposeImprovement,
271
+ checkWatchdog
272
+ };