caik-cli 0.1.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +8 -7
  2. package/dist/api-6OX4ICXN.js +9 -0
  3. package/dist/auto-improve-skills-2COKTU5C.js +8 -0
  4. package/dist/autoresearch-Y7WW6L4O.js +24 -0
  5. package/dist/chunk-2YHUDOJL.js +54 -0
  6. package/dist/chunk-3TXNZINH.js +775 -0
  7. package/dist/chunk-5MHNQAV4.js +317 -0
  8. package/dist/chunk-7AIZTHHZ.js +152 -0
  9. package/dist/chunk-D4IM3YRX.js +166 -0
  10. package/dist/chunk-DJJHS7KK.js +62 -0
  11. package/dist/chunk-DKZBQRR3.js +91 -0
  12. package/dist/chunk-FLSHJZLC.js +613 -0
  13. package/dist/chunk-H2ZKCXMJ.js +202 -0
  14. package/dist/chunk-ILMOSMD3.js +83 -0
  15. package/dist/chunk-KYTHKH6V.js +79 -0
  16. package/dist/chunk-LTKHLRM4.js +272 -0
  17. package/dist/chunk-T32AEP3O.js +146 -0
  18. package/dist/chunk-T73Z5UMA.js +14437 -0
  19. package/dist/chunk-TFKT7V7H.js +1545 -0
  20. package/dist/chunk-US4CYDNS.js +524 -0
  21. package/dist/chunk-ZLRN7Q7C.js +27 -0
  22. package/dist/claude-code-6DF4YARB.js +8 -0
  23. package/dist/config-CS7734SA.js +24 -0
  24. package/dist/correction-classifier-TLPKRNLI.js +93 -0
  25. package/dist/cursor-Z4XXDCAM.js +8 -0
  26. package/dist/daemon/autoresearch-2MAEM2YI.js +272 -0
  27. package/dist/daemon/chunk-545XA5CB.js +77 -0
  28. package/dist/daemon/chunk-HEYFAUHL.js +90 -0
  29. package/dist/daemon/chunk-MLKGABMK.js +9 -0
  30. package/dist/daemon/chunk-NJICGNCK.js +150 -0
  31. package/dist/daemon/chunk-OD5NUFH2.js +181 -0
  32. package/dist/daemon/chunk-SM2FSXIP.js +60 -0
  33. package/dist/daemon/chunk-UMDJFPN6.js +163 -0
  34. package/dist/daemon/config-F7HE3JRY.js +23 -0
  35. package/dist/daemon/db-QEXVVTAL.js +15 -0
  36. package/dist/daemon/eval-generator-OR2FAYLB.js +316 -0
  37. package/dist/daemon/improver-TGEK6MPE.js +186 -0
  38. package/dist/daemon/llm-FUJ2TBYT.js +11 -0
  39. package/dist/daemon/nudge-detector-NFRHWZY6.js +140 -0
  40. package/dist/daemon/platform-7N3LQDIB.js +16381 -0
  41. package/dist/daemon/registry-FI4GTO3H.js +20 -0
  42. package/dist/daemon/server.js +356 -0
  43. package/dist/daemon/trace-store-T7XFGQSX.js +19 -0
  44. package/dist/daemon-UXYMG46V.js +85 -0
  45. package/dist/db-TLNRIXLK.js +18 -0
  46. package/dist/eval-generator-GGMRPO3K.js +21 -0
  47. package/dist/eval-runner-EF4K6T5Y.js +15 -0
  48. package/dist/index.js +8033 -568
  49. package/dist/llm-3UUZX6PX.js +12 -0
  50. package/dist/platform-52NREMBS.js +33 -0
  51. package/dist/repo-installer-K6ADOW3E.js +25 -0
  52. package/dist/setup-P744STZE.js +16 -0
  53. package/dist/test-loop-Y7QQE55P.js +127 -0
  54. package/dist/trace-store-FVLMNNDK.js +20 -0
  55. package/package.json +9 -3
@@ -0,0 +1,181 @@
1
+ // src/daemon/db.ts
2
+ import Database from "better-sqlite3";
3
+ import { mkdirSync, existsSync } from "fs";
4
+ import { join, dirname } from "path";
5
+ import { homedir } from "os";
6
+ var _db = null;
7
+ function getDefaultDbPath() {
8
+ return join(homedir(), ".caik", "autoresearch.db");
9
+ }
10
+ function openDb(dbPath) {
11
+ const path = dbPath ?? getDefaultDbPath();
12
+ const dir = dirname(path);
13
+ if (!existsSync(dir)) {
14
+ mkdirSync(dir, { recursive: true, mode: 448 });
15
+ }
16
+ const db = new Database(path);
17
+ db.pragma("journal_mode = WAL");
18
+ db.pragma("foreign_keys = ON");
19
+ db.pragma("busy_timeout = 5000");
20
+ return db;
21
+ }
22
+ function getDb(dbPath) {
23
+ if (!_db) {
24
+ _db = openDb(dbPath);
25
+ initSchema(_db);
26
+ }
27
+ return _db;
28
+ }
29
+ function closeDb() {
30
+ if (_db) {
31
+ _db.close();
32
+ _db = null;
33
+ }
34
+ }
35
+ function initSchema(db) {
36
+ db.exec(`
37
+ -- Session buffer: ephemeral tool calls captured during a session
38
+ CREATE TABLE IF NOT EXISTS session_buffer (
39
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
40
+ session_id TEXT NOT NULL,
41
+ type TEXT NOT NULL CHECK(type IN ('tool_call', 'correction', 'prompt')),
42
+ timestamp TEXT NOT NULL,
43
+ tool_name TEXT,
44
+ tool_input TEXT,
45
+ tool_response TEXT,
46
+ prompt TEXT,
47
+ correction_type TEXT,
48
+ slug TEXT,
49
+ success INTEGER
50
+ );
51
+ CREATE INDEX IF NOT EXISTS idx_buffer_session ON session_buffer(session_id);
52
+
53
+ -- Traces: permanent records of corrections and sampled successes
54
+ CREATE TABLE IF NOT EXISTS traces (
55
+ id TEXT PRIMARY KEY,
56
+ session_id TEXT NOT NULL,
57
+ slug TEXT NOT NULL,
58
+ timestamp TEXT NOT NULL,
59
+ kind TEXT NOT NULL CHECK(kind IN ('correction', 'success')),
60
+ tool_name TEXT NOT NULL,
61
+ tool_input TEXT,
62
+ tool_response TEXT,
63
+ correction_type TEXT,
64
+ correction_prompt TEXT,
65
+ skill_content_hash TEXT,
66
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
67
+ );
68
+ CREATE INDEX IF NOT EXISTS idx_traces_slug ON traces(slug);
69
+ CREATE INDEX IF NOT EXISTS idx_traces_slug_kind ON traces(slug, kind);
70
+
71
+ -- Eval suites: one per skill, tracks validation state
72
+ CREATE TABLE IF NOT EXISTS eval_suites (
73
+ slug TEXT PRIMARY KEY,
74
+ version INTEGER NOT NULL DEFAULT 1,
75
+ tpr REAL,
76
+ tnr REAL,
77
+ validation_sample_size INTEGER,
78
+ validated_at TEXT,
79
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
80
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
81
+ );
82
+
83
+ -- Eval cases: individual test assertions linked to a suite
84
+ CREATE TABLE IF NOT EXISTS eval_cases (
85
+ id TEXT PRIMARY KEY,
86
+ suite_slug TEXT NOT NULL REFERENCES eval_suites(slug) ON DELETE CASCADE,
87
+ trace_id TEXT REFERENCES traces(id),
88
+ source TEXT NOT NULL CHECK(source IN ('trace', 'synthetic')),
89
+ category TEXT NOT NULL,
90
+ assertion_type TEXT NOT NULL,
91
+ assertion_value TEXT NOT NULL,
92
+ description TEXT NOT NULL,
93
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
94
+ );
95
+ CREATE INDEX IF NOT EXISTS idx_cases_suite ON eval_cases(suite_slug);
96
+
97
+ -- Loop results: archive of every autoresearch run
98
+ CREATE TABLE IF NOT EXISTS loop_results (
99
+ id TEXT PRIMARY KEY,
100
+ slug TEXT NOT NULL,
101
+ baseline_pass_rate REAL NOT NULL,
102
+ best_pass_rate REAL NOT NULL,
103
+ baseline_pass_count INTEGER NOT NULL,
104
+ best_pass_count INTEGER NOT NULL,
105
+ total_cases INTEGER NOT NULL,
106
+ iterations INTEGER NOT NULL,
107
+ total_llm_calls INTEGER NOT NULL,
108
+ duration_ms INTEGER NOT NULL,
109
+ strategies TEXT NOT NULL,
110
+ best_content TEXT,
111
+ applied INTEGER NOT NULL DEFAULT 0,
112
+ rolled_back INTEGER NOT NULL DEFAULT 0,
113
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
114
+ );
115
+ CREATE INDEX IF NOT EXISTS idx_results_slug ON loop_results(slug, created_at);
116
+
117
+ -- Observations: persistent tool call records from hooks
118
+ CREATE TABLE IF NOT EXISTS observations (
119
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
120
+ session_id TEXT NOT NULL,
121
+ slug TEXT NOT NULL,
122
+ tool TEXT NOT NULL,
123
+ success INTEGER NOT NULL,
124
+ platform TEXT,
125
+ correction_type TEXT,
126
+ correction_source TEXT,
127
+ timestamp TEXT NOT NULL,
128
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
129
+ );
130
+ CREATE INDEX IF NOT EXISTS idx_obs_session ON observations(session_id);
131
+ CREATE INDEX IF NOT EXISTS idx_obs_slug ON observations(slug);
132
+ CREATE INDEX IF NOT EXISTS idx_obs_timestamp ON observations(timestamp);
133
+
134
+ -- Pending events: queue for API posting with retry
135
+ CREATE TABLE IF NOT EXISTS pending_events (
136
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
137
+ payload TEXT NOT NULL,
138
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
139
+ );
140
+
141
+ -- Session context: per-session state (fingerprint, model) for session-end
142
+ CREATE TABLE IF NOT EXISTS session_context (
143
+ session_id TEXT PRIMARY KEY,
144
+ stack_fingerprint TEXT,
145
+ repo_scale TEXT,
146
+ agent_model TEXT,
147
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
148
+ );
149
+
150
+ -- Session engagement: per-artifact call counts within a session
151
+ CREATE TABLE IF NOT EXISTS session_engagement (
152
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
153
+ session_id TEXT NOT NULL,
154
+ slug TEXT NOT NULL,
155
+ count INTEGER NOT NULL DEFAULT 0,
156
+ UNIQUE(session_id, slug)
157
+ );
158
+ CREATE INDEX IF NOT EXISTS idx_engagement_session ON session_engagement(session_id);
159
+
160
+ -- Retention checks: last check timestamp per artifact slug
161
+ CREATE TABLE IF NOT EXISTS retention_checks (
162
+ slug TEXT PRIMARY KEY,
163
+ last_checked_at INTEGER NOT NULL
164
+ );
165
+
166
+ -- Session-end dedup: track which sessions have already buffered a session_end
167
+ CREATE TABLE IF NOT EXISTS session_end_sent (
168
+ session_id TEXT PRIMARY KEY,
169
+ pending_event_id INTEGER,
170
+ sent_at TEXT NOT NULL DEFAULT (datetime('now'))
171
+ );
172
+ `);
173
+ }
174
+
175
+ export {
176
+ getDefaultDbPath,
177
+ openDb,
178
+ getDb,
179
+ closeDb,
180
+ initSchema
181
+ };
@@ -0,0 +1,60 @@
1
+ // src/daemon/llm.ts
2
+ import { readFileSync, existsSync } from "fs";
3
+ import { join } from "path";
4
+ import { homedir } from "os";
5
+ var ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages";
6
+ function getAnthropicApiKey() {
7
+ if (process.env.ANTHROPIC_API_KEY) return process.env.ANTHROPIC_API_KEY;
8
+ try {
9
+ const configPath = join(homedir(), ".caik", "config.json");
10
+ if (existsSync(configPath)) {
11
+ const raw = JSON.parse(readFileSync(configPath, "utf-8"));
12
+ if (typeof raw.anthropicApiKey === "string") return raw.anthropicApiKey;
13
+ }
14
+ } catch {
15
+ }
16
+ return void 0;
17
+ }
18
+ async function callAnthropic(apiKey, opts) {
19
+ const body = JSON.stringify({
20
+ model: opts.model,
21
+ max_tokens: opts.maxTokens ?? 4096,
22
+ system: opts.system,
23
+ messages: [{ role: "user", content: opts.userMessage }]
24
+ });
25
+ const res = await fetch(ANTHROPIC_API_URL, {
26
+ method: "POST",
27
+ headers: {
28
+ "x-api-key": apiKey,
29
+ "anthropic-version": "2023-06-01",
30
+ "content-type": "application/json"
31
+ },
32
+ body
33
+ });
34
+ if (!res.ok) {
35
+ const text = await res.text();
36
+ throw new Error(`Anthropic API ${res.status}: ${text}`);
37
+ }
38
+ const data = await res.json();
39
+ const textBlock = data.content.find((b) => b.type === "text");
40
+ if (!textBlock?.text) throw new Error("No text in Anthropic response");
41
+ return {
42
+ text: textBlock.text,
43
+ inputTokens: data.usage?.input_tokens ?? 0,
44
+ outputTokens: data.usage?.output_tokens ?? 0
45
+ };
46
+ }
47
+ function parseLLMJson(text) {
48
+ let cleaned = text.trim();
49
+ if (cleaned.startsWith("```")) {
50
+ cleaned = cleaned.replace(/^```[a-z]*\n?/i, "");
51
+ cleaned = cleaned.replace(/\n?```\s*$/, "");
52
+ }
53
+ return JSON.parse(cleaned);
54
+ }
55
+
56
+ export {
57
+ getAnthropicApiKey,
58
+ callAnthropic,
59
+ parseLLMJson
60
+ };
@@ -0,0 +1,163 @@
1
+ import {
2
+ callAnthropic
3
+ } from "./chunk-SM2FSXIP.js";
4
+
5
+ // src/daemon/eval-runner.ts
6
+ import { createHash } from "crypto";
7
+ function simCacheKey(skillContent, scenario) {
8
+ return createHash("sha256").update(skillContent + "\0" + scenario).digest("hex").slice(0, 16);
9
+ }
10
+ async function simulateSkill(skillContent, scenario, apiKey, simOpts) {
11
+ const key = simCacheKey(skillContent, scenario);
12
+ const cached = simOpts.cache.get(key);
13
+ if (cached !== void 0) return cached;
14
+ const result = await callAnthropic(apiKey, {
15
+ model: simOpts.model,
16
+ system: skillContent,
17
+ userMessage: scenario,
18
+ maxTokens: 2048
19
+ });
20
+ simOpts.cache.set(key, result.text);
21
+ return result.text;
22
+ }
23
+ function checkPatternAssertion(content, type, pattern) {
24
+ try {
25
+ let flags = "";
26
+ let cleanPattern = pattern;
27
+ if (cleanPattern.startsWith("(?i)")) {
28
+ flags = "i";
29
+ cleanPattern = cleanPattern.slice(4);
30
+ }
31
+ const regex = new RegExp(cleanPattern, flags);
32
+ const found = regex.test(content);
33
+ if (type === "must_contain") {
34
+ return found ? { passed: true, reason: `Pattern "${pattern}" found` } : { passed: false, reason: `Pattern "${pattern}" not found in content` };
35
+ } else {
36
+ return found ? { passed: false, reason: `Pattern "${pattern}" found in content (should be absent)` } : { passed: true, reason: `Pattern "${pattern}" correctly absent` };
37
+ }
38
+ } catch (err) {
39
+ return {
40
+ passed: false,
41
+ reason: `Invalid regex pattern: ${err instanceof Error ? err.message : String(err)}`
42
+ };
43
+ }
44
+ }
45
+ function checkCodeAssertion(content, fn) {
46
+ try {
47
+ const check = new Function("content", fn);
48
+ const result = check(content);
49
+ return result ? { passed: true, reason: "Code check passed" } : { passed: false, reason: "Code check returned false" };
50
+ } catch (err) {
51
+ return {
52
+ passed: false,
53
+ reason: `Code check error: ${err instanceof Error ? err.message : String(err)}`
54
+ };
55
+ }
56
+ }
57
+ async function checkLLMJudge(skillContent, judgePrompt, apiKey) {
58
+ try {
59
+ const result = await callAnthropic(apiKey, {
60
+ model: "claude-haiku-4-5-20251001",
61
+ system: `You are an eval judge for Claude Code skills. Given a skill and a judge prompt, determine if the skill PASSES or FAILS the criteria. Respond with ONLY a JSON object: {"passed": true/false, "reason": "brief explanation"}`,
62
+ userMessage: `Skill content:
63
+ <skill>
64
+ ${skillContent}
65
+ </skill>
66
+
67
+ Judge criteria: ${judgePrompt}`,
68
+ maxTokens: 256
69
+ });
70
+ const parsed = JSON.parse(result.text);
71
+ return parsed;
72
+ } catch (err) {
73
+ return {
74
+ passed: false,
75
+ reason: `LLM judge error: ${err instanceof Error ? err.message : String(err)}`
76
+ };
77
+ }
78
+ }
79
+ async function runSingleCase(skillContent, evalCase, apiKey, simOpts) {
80
+ const assertion = evalCase.assertion;
81
+ switch (assertion.type) {
82
+ // ── Structural (check SKILL.md text directly) ──────────────────
83
+ case "must_contain":
84
+ return checkPatternAssertion(skillContent, "must_contain", assertion.pattern);
85
+ case "must_not_contain":
86
+ return checkPatternAssertion(skillContent, "must_not_contain", assertion.pattern);
87
+ case "code_check":
88
+ return checkCodeAssertion(skillContent, assertion.fn);
89
+ case "llm_judge":
90
+ return checkLLMJudge(skillContent, assertion.prompt, apiKey);
91
+ case "max_output_length":
92
+ return {
93
+ passed: skillContent.length <= assertion.tokens * 4,
94
+ reason: skillContent.length <= assertion.tokens * 4 ? `Content length ${skillContent.length} within limit` : `Content length ${skillContent.length} exceeds token limit ${assertion.tokens}`
95
+ };
96
+ // ── Behavioral (simulate skill, check output) ──────────────────
97
+ case "behavioral_must_contain": {
98
+ if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
99
+ const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
100
+ return checkPatternAssertion(output, "must_contain", assertion.pattern);
101
+ }
102
+ case "behavioral_must_not_contain": {
103
+ if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
104
+ const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
105
+ return checkPatternAssertion(output, "must_not_contain", assertion.pattern);
106
+ }
107
+ case "behavioral_max_length": {
108
+ if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
109
+ const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
110
+ const tokenEstimate = Math.round(output.length / 4);
111
+ return {
112
+ passed: tokenEstimate <= assertion.tokens,
113
+ reason: tokenEstimate <= assertion.tokens ? `Simulated output ~${tokenEstimate} tokens, within limit ${assertion.tokens}` : `Simulated output ~${tokenEstimate} tokens, exceeds limit ${assertion.tokens}`
114
+ };
115
+ }
116
+ case "behavioral_judge": {
117
+ if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
118
+ const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
119
+ return checkLLMJudge(output, assertion.criteria, apiKey);
120
+ }
121
+ case "behavioral_code_check": {
122
+ if (!simOpts) return { passed: false, reason: "No simulation config for behavioral eval" };
123
+ const output = await simulateSkill(skillContent, assertion.scenario, apiKey, simOpts);
124
+ return checkCodeAssertion(output, assertion.fn);
125
+ }
126
+ default:
127
+ return { passed: false, reason: `Unknown assertion type` };
128
+ }
129
+ }
130
+ async function runEvalSuite(skillContent, suite, baselineContent, apiKey, simulationModel) {
131
+ if (suite.cases.length === 0) {
132
+ return {
133
+ passRate: 1,
134
+ passCount: 0,
135
+ totalCases: 0,
136
+ failedCaseIds: [],
137
+ lengthRatio: baselineContent.length > 0 ? skillContent.length / baselineContent.length : 1
138
+ };
139
+ }
140
+ let passCount = 0;
141
+ const failedCaseIds = [];
142
+ const simOpts = simulationModel ? { model: simulationModel, cache: /* @__PURE__ */ new Map() } : void 0;
143
+ for (const evalCase of suite.cases) {
144
+ const result = await runSingleCase(skillContent, evalCase, apiKey, simOpts);
145
+ if (result.passed) {
146
+ passCount++;
147
+ } else {
148
+ failedCaseIds.push(evalCase.id);
149
+ }
150
+ }
151
+ return {
152
+ passRate: passCount / suite.cases.length,
153
+ passCount,
154
+ totalCases: suite.cases.length,
155
+ failedCaseIds,
156
+ lengthRatio: baselineContent.length > 0 ? skillContent.length / baselineContent.length : 1
157
+ };
158
+ }
159
+
160
+ export {
161
+ runSingleCase,
162
+ runEvalSuite
163
+ };
@@ -0,0 +1,23 @@
1
+ import {
2
+ CONTRIBUTION_LEVELS,
3
+ getApiKey,
4
+ getConfigDir,
5
+ getConfigPath,
6
+ getOrCreateInstallationId,
7
+ readConfig,
8
+ resolveConfig,
9
+ setApiKey,
10
+ writeConfig
11
+ } from "./chunk-545XA5CB.js";
12
+ import "./chunk-MLKGABMK.js";
13
+ export {
14
+ CONTRIBUTION_LEVELS,
15
+ getApiKey,
16
+ getConfigDir,
17
+ getConfigPath,
18
+ getOrCreateInstallationId,
19
+ readConfig,
20
+ resolveConfig,
21
+ setApiKey,
22
+ writeConfig
23
+ };
@@ -0,0 +1,15 @@
1
+ import {
2
+ closeDb,
3
+ getDb,
4
+ getDefaultDbPath,
5
+ initSchema,
6
+ openDb
7
+ } from "./chunk-OD5NUFH2.js";
8
+ import "./chunk-MLKGABMK.js";
9
+ export {
10
+ closeDb,
11
+ getDb,
12
+ getDefaultDbPath,
13
+ initSchema,
14
+ openDb
15
+ };