@aikdna/kdna-cli 0.9.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,177 @@
1
+ /**
2
+ * KDNA Test commands — Phase 3: Test Lab / Evaluation.
3
+ *
4
+ * kdna test run <domain> --input <file> [--json]
5
+ * Run a test case against a domain, recording results.
6
+ *
7
+ * kdna test import <run-file> --as-eval --out <file>
8
+ * Convert a test run result into an eval card draft.
9
+ */
10
+
11
+ const fs = require('fs');
12
+ const path = require('path');
13
+ const { error, readJson, writeJson, EXIT } = require('./_common');
14
+ const { parseName } = require('../registry');
15
+
16
+ const USER_KDNA_DIR = path.join(process.env.HOME || process.env.USERPROFILE || '.', '.kdna');
17
+ const INSTALL_DIR = path.join(USER_KDNA_DIR, 'domains');
18
+ const RUNS_DIR = path.join(USER_KDNA_DIR, 'runs');
19
+
20
+ function cmdTestRun(args = []) {
21
+ const jsonMode = args.includes('--json');
22
+ const positional = args.filter((a) => !a.startsWith('--'));
23
+ const domain = positional[1];
24
+ const inputIdx = args.indexOf('--input');
25
+ const inputFile = inputIdx >= 0 ? args[inputIdx + 1] : null;
26
+ const saveIdx = args.indexOf('--save');
27
+ const saveDir = saveIdx >= 0 ? args[saveIdx + 1] : null;
28
+
29
+ if (!domain || !inputFile) {
30
+ error(
31
+ 'Usage:\n' +
32
+ ' kdna test run <domain> --input <test-file> [--save <dir>] [--json]\n' +
33
+ '\n' +
34
+ 'Runs test input through LLM with/without KDNA and records the result.',
35
+ EXIT.INPUT_ERROR,
36
+ );
37
+ }
38
+
39
+ const parsed = parseName(domain);
40
+ if (!parsed) error(`Invalid name "${domain}".`, EXIT.INPUT_ERROR);
41
+ const destDir = path.join(INSTALL_DIR, parsed.scope, parsed.ident);
42
+ if (!fs.existsSync(destDir)) {
43
+ error(`${parsed.full} not installed. Run: kdna install ${domain}`, EXIT.INPUT_ERROR);
44
+ }
45
+
46
+ const absInput = path.resolve(inputFile);
47
+ if (!fs.existsSync(absInput)) error(`Input file not found: ${absInput}`, EXIT.INPUT_ERROR);
48
+
49
+ // Read test case
50
+ let testCase;
51
+ try {
52
+ testCase = JSON.parse(fs.readFileSync(absInput, 'utf8'));
53
+ } catch {
54
+ error(`Invalid JSON in test file: ${absInput}`, EXIT.INPUT_ERROR);
55
+ }
56
+
57
+ // Validate test case structure
58
+ const expectedClassification = testCase.expected?.classification;
59
+ const expectedTriggeredAxioms = testCase.expected?.triggered_axioms;
60
+ const expectedAvoidedMisunderstandings = testCase.expected?.avoided_misunderstandings;
61
+ const expectedAvoidedBannedTerms = testCase.expected?.avoided_banned_terms;
62
+
63
+ // Build test result
64
+ const result = {
65
+ test_id: testCase.id || `test_${Date.now()}`,
66
+ domain: parsed.full,
67
+ domain_path: destDir,
68
+ input: typeof testCase.input === 'string' ? testCase.input : JSON.stringify(testCase.input),
69
+ run_at: new Date().toISOString(),
70
+ expected: {
71
+ classification: expectedClassification || null,
72
+ triggered_axioms: expectedTriggeredAxioms || [],
73
+ avoided_misunderstandings: expectedAvoidedMisunderstandings || [],
74
+ avoided_banned_terms: expectedAvoidedBannedTerms || [],
75
+ },
76
+ results: {
77
+ classification: null,
78
+ triggered_axioms: [],
79
+ avoided_misunderstandings: [],
80
+ avoided_banned_terms: [],
81
+ self_checks: [],
82
+ risk_flags: [],
83
+ },
84
+ human_grade: null,
85
+ human_notes: null,
86
+ };
87
+
88
+ /**
89
+ * Note: Full LLM-based compare can be run separately via:
90
+ * kdna compare <domain> --input "<text>"
91
+ * Test run records the structure for human grading.
92
+ */
93
+
94
+ // Save result
95
+ if (saveDir) {
96
+ const outDir = path.resolve(saveDir);
97
+ fs.mkdirSync(outDir, { recursive: true });
98
+ const outFile = path.join(outDir, `run-${result.test_id}.json`);
99
+ writeJson(outFile, result);
100
+ if (!jsonMode) console.log(`Test result saved: ${outFile}`);
101
+ result.saved_to = outFile;
102
+ } else {
103
+ const outDir = RUNS_DIR;
104
+ fs.mkdirSync(outDir, { recursive: true });
105
+ const outFile = path.join(outDir, `run-${result.test_id}.json`);
106
+ writeJson(outFile, result);
107
+ if (!jsonMode) console.log(`Test result saved: ${outFile}`);
108
+ result.saved_to = outFile;
109
+ }
110
+
111
+ if (jsonMode) {
112
+ console.log(JSON.stringify(result, null, 2));
113
+ }
114
+
115
+ if (!jsonMode) {
116
+ console.log(`Test run recorded: ${result.test_id}`);
117
+ console.log(` Domain: ${result.domain}`);
118
+ console.log(` Input: ${result.input.slice(0, 100)}${result.input.length > 100 ? '...' : ''}`);
119
+ if (result.expected.classification) console.log(` Expected classification: ${result.expected.classification}`);
120
+ }
121
+ }
122
+
123
+ function cmdTestImport(args = []) {
124
+ const positional = args.filter((a) => !a.startsWith('--'));
125
+ const runFile = positional[1];
126
+ const outIdx = args.indexOf('--out');
127
+ const outFile = outIdx >= 0 ? args[outIdx + 1] : null;
128
+ const asEval = args.includes('--as-eval');
129
+
130
+ if (!runFile) {
131
+ error('Usage: kdna test import <run-file> --as-eval --out <file>', EXIT.INPUT_ERROR);
132
+ }
133
+
134
+ const abs = path.resolve(runFile);
135
+ if (!fs.existsSync(abs)) error(`Run file not found: ${abs}`, EXIT.INPUT_ERROR);
136
+
137
+ const runData = readJson(abs);
138
+ if (!runData || !runData.test_id) error(`Not a valid test run file: ${abs}`, EXIT.INPUT_ERROR);
139
+
140
+ if (asEval) {
141
+ // Convert run result into an eval card draft
142
+ const evalCard = {
143
+ id: `eval_${runData.test_id}`,
144
+ type: 'eval_case',
145
+ domain: runData.domain,
146
+ input: runData.input,
147
+ expected_classification: runData.expected?.classification || null,
148
+ expected_triggered_axioms: runData.expected?.triggered_axioms || [],
149
+ expected_avoided_misunderstandings: runData.expected?.avoided_misunderstandings || [],
150
+ expected_avoided_banned_terms: runData.expected?.avoided_banned_terms || [],
151
+ actual_classification: runData.results?.classification || null,
152
+ actual_triggered_axioms: runData.results?.triggered_axioms || [],
153
+ actual_avoided_misunderstandings: runData.results?.avoided_misunderstandings || [],
154
+ actual_avoided_banned_terms: runData.results?.avoided_banned_terms || [],
155
+ human_grade: runData.human_grade || null,
156
+ human_notes: runData.human_notes || null,
157
+ source_run: path.basename(abs),
158
+ created: new Date().toISOString(),
159
+ };
160
+
161
+ const outPath = outFile
162
+ ? path.resolve(outFile)
163
+ : path.join(path.dirname(abs), `eval-${runData.test_id}.json`);
164
+
165
+ writeJson(outPath, evalCard);
166
+ console.log(`Eval card created: ${outPath}`);
167
+ console.log(` ID: ${evalCard.id}`);
168
+ console.log(` Domain: ${evalCard.domain}`);
169
+ if (evalCard.expected_classification) {
170
+ console.log(` Expected: ${evalCard.expected_classification}`);
171
+ }
172
+ } else {
173
+ console.log(JSON.stringify(runData, null, 2));
174
+ }
175
+ }
176
+
177
+ module.exports = { cmdTestRun, cmdTestImport };
@@ -0,0 +1,225 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const crypto = require('crypto');
4
+ const { EXIT, error, readJson } = require('./_common');
5
+
6
+ const USER_KDNA_DIR = path.join(process.env.HOME || process.env.USERPROFILE || '.', '.kdna');
7
+ const TRACES_DIR = path.join(USER_KDNA_DIR, 'traces');
8
+
9
+ function ensureTracesDir() {
10
+ fs.mkdirSync(TRACES_DIR, { recursive: true });
11
+ }
12
+
13
+ function todayFile() {
14
+ const d = new Date();
15
+ const yyyy = d.getFullYear();
16
+ const mm = String(d.getMonth() + 1).padStart(2, '0');
17
+ const dd = String(d.getDate()).padStart(2, '0');
18
+ return path.join(TRACES_DIR, `${yyyy}-${mm}-${dd}.jsonl`);
19
+ }
20
+
21
+ function traceFiles(sinceDate) {
22
+ ensureTracesDir();
23
+ let files = fs.readdirSync(TRACES_DIR).filter((f) => f.endsWith('.jsonl')).sort();
24
+ if (sinceDate) {
25
+ const since = sinceDate instanceof Date ? sinceDate : new Date(sinceDate);
26
+ files = files.filter((f) => {
27
+ const d = f.replace('.jsonl', '');
28
+ return new Date(d) >= since;
29
+ });
30
+ }
31
+ return files.map((f) => path.join(TRACES_DIR, f));
32
+ }
33
+
34
+ function readAllTraces(opts = {}) {
35
+ const { since, agent, domain } = opts;
36
+ const entries = [];
37
+ const files = traceFiles(since);
38
+
39
+ for (const file of files) {
40
+ try {
41
+ const lines = fs.readFileSync(file, 'utf8').trim().split('\n').filter(Boolean);
42
+ for (const line of lines) {
43
+ try {
44
+ const entry = JSON.parse(line);
45
+ if (agent && entry.agent !== agent) continue;
46
+ if (domain && entry.domain !== domain) continue;
47
+ entries.push(entry);
48
+ } catch { /* skip malformed lines */ }
49
+ }
50
+ } catch { /* skip unreadable files */ }
51
+ }
52
+ return entries;
53
+ }
54
+
55
+ function recordTrace(entry) {
56
+ ensureTracesDir();
57
+ const line = JSON.stringify(entry) + '\n';
58
+ fs.appendFileSync(todayFile(), line);
59
+ }
60
+
61
+ function parseSinceFlag(args) {
62
+ const idx = args.indexOf('--since');
63
+ if (idx >= 0 && idx < args.length - 1) {
64
+ const val = args[idx + 1];
65
+ if (val === '7d') {
66
+ const d = new Date();
67
+ d.setDate(d.getDate() - 7);
68
+ return d;
69
+ }
70
+ if (val === '30d') {
71
+ const d = new Date();
72
+ d.setDate(d.getDate() - 30);
73
+ return d;
74
+ }
75
+ if (val === '90d') {
76
+ const d = new Date();
77
+ d.setDate(d.getDate() - 90);
78
+ return d;
79
+ }
80
+ // ISO date
81
+ const parsed = new Date(val);
82
+ if (!isNaN(parsed.getTime())) return parsed;
83
+ }
84
+ // default: last 7 days
85
+ const d = new Date();
86
+ d.setDate(d.getDate() - 7);
87
+ return d;
88
+ }
89
+
90
+ function cmdTrace(args) {
91
+ const json = args.includes('--json');
92
+ const exportPath = args.includes('--export') ? args[args.indexOf('--export') + 1] : null;
93
+ const clear = args.includes('--clear');
94
+ const since = parseSinceFlag(args);
95
+
96
+ if (clear) {
97
+ if (fs.existsSync(TRACES_DIR)) {
98
+ const files = fs.readdirSync(TRACES_DIR).filter((f) => f.endsWith('.jsonl'));
99
+ for (const f of files) fs.unlinkSync(path.join(TRACES_DIR, f));
100
+ }
101
+ console.log('Trace logs cleared.');
102
+ process.exit(EXIT.OK);
103
+ }
104
+
105
+ const entries = readAllTraces({ since });
106
+
107
+ if (exportPath) {
108
+ const data = {
109
+ period: { since: since.toISOString(), until: new Date().toISOString() },
110
+ entries,
111
+ };
112
+ fs.writeFileSync(exportPath, JSON.stringify(data, null, 2) + '\n');
113
+ console.log(`Exported ${entries.length} trace entries to ${exportPath}`);
114
+ process.exit(EXIT.OK);
115
+ }
116
+
117
+ if (json) {
118
+ console.log(JSON.stringify({ entries, count: entries.length }, null, 2));
119
+ process.exit(EXIT.OK);
120
+ }
121
+
122
+ // Human-readable table
123
+ if (entries.length === 0) {
124
+ console.log('No trace entries found.');
125
+ console.log('Load a domain via kdna load or use KDNA in an agent to generate traces.');
126
+ process.exit(EXIT.OK);
127
+ }
128
+
129
+ console.log(`${'Timestamp'.padEnd(20)} ${'Agent'.padEnd(15)} ${'Domain'.padEnd(25)} ${'Result'}`);
130
+ console.log('-'.repeat(75));
131
+ for (const e of entries.slice(-50).reverse()) {
132
+ const ts = e.timestamp ? new Date(e.timestamp).toISOString().replace('T', ' ').slice(0, 19) : 'unknown';
133
+ const agent = (e.agent || 'unknown').padEnd(15);
134
+ const domain = (e.domain || '(none)').padEnd(25);
135
+ const result = e.postvalidate?.result || 'loaded';
136
+ console.log(`${ts} ${agent} ${domain} ${result}`);
137
+ }
138
+ console.log('');
139
+ console.log(`${entries.length} entries total. --export <file> for audit export. --clear to reset.`);
140
+ }
141
+
142
+ function cmdHistory(args) {
143
+ const json = args.includes('--json');
144
+ const stats = args.includes('--stats');
145
+ const agentFilter = args.includes('--agent') ? args[args.indexOf('--agent') + 1] : null;
146
+ const domainFilter = args.includes('--domain') ? args[args.indexOf('--domain') + 1] : null;
147
+ const count = parseInt(args.includes('-n') ? args[args.indexOf('-n') + 1] : '20', 10);
148
+
149
+ const entries = readAllTraces({ agent: agentFilter, domain: domainFilter });
150
+
151
+ if (stats) {
152
+ const total = entries.length;
153
+ const domainCounts = {};
154
+ const agentCounts = {};
155
+ let skipped = 0;
156
+
157
+ for (const e of entries) {
158
+ if (e.domain) {
159
+ domainCounts[e.domain] = (domainCounts[e.domain] || 0) + 1;
160
+ } else {
161
+ skipped++;
162
+ }
163
+ if (e.agent) {
164
+ agentCounts[e.agent] = (agentCounts[e.agent] || 0) + 1;
165
+ }
166
+ }
167
+
168
+ if (json) {
169
+ console.log(JSON.stringify({
170
+ total,
171
+ skipped,
172
+ domainCounts,
173
+ agentCounts,
174
+ skipRate: total > 0 ? Math.round((skipped / total) * 100) : 0,
175
+ }, null, 2));
176
+ } else {
177
+ console.log(`Total KDNA loads: ${total}`);
178
+ console.log(`Skipped (no domain): ${skipped}`);
179
+ if (total > 0) console.log(`Skip rate: ${Math.round((skipped / total) * 100)}%`);
180
+ console.log('');
181
+ console.log('By domain:');
182
+ const sortedDomains = Object.entries(domainCounts).sort((a, b) => b[1] - a[1]);
183
+ for (const [domain, c] of sortedDomains) {
184
+ const pct = total > 0 ? Math.round((c / total) * 100) : 0;
185
+ console.log(` ${domain}: ${c} (${pct}%)`);
186
+ }
187
+ if (Object.keys(agentCounts).length > 0) {
188
+ console.log('');
189
+ console.log('By agent:');
190
+ for (const [agent, c] of Object.entries(agentCounts)) {
191
+ console.log(` ${agent}: ${c}`);
192
+ }
193
+ }
194
+ }
195
+ process.exit(EXIT.OK);
196
+ }
197
+
198
+ // Recent entries
199
+ const recent = entries.slice(-count).reverse();
200
+
201
+ if (json) {
202
+ console.log(JSON.stringify({ entries: recent, total: entries.length }, null, 2));
203
+ process.exit(EXIT.OK);
204
+ }
205
+
206
+ if (recent.length === 0) {
207
+ console.log('No history entries found.');
208
+ process.exit(EXIT.OK);
209
+ }
210
+
211
+ console.log(`${'Timestamp'.padEnd(20)} ${'Agent'.padEnd(15)} ${'Domain'.padEnd(28)} ${'Result'.padEnd(10)} ${'Score'}`);
212
+ console.log('-'.repeat(85));
213
+ for (const e of recent) {
214
+ const ts = e.timestamp ? new Date(e.timestamp).toISOString().replace('T', ' ').slice(0, 19) : 'unknown';
215
+ const agent = (e.agent || 'unknown').padEnd(15);
216
+ const domain = (e.domain || '(none)').padEnd(28);
217
+ const result = (e.postvalidate?.result || 'loaded').padEnd(10);
218
+ const score = e.postvalidate?.score ? e.postvalidate.score.toFixed(1) : '-';
219
+ console.log(`${ts} ${agent} ${domain} ${result} ${score}`);
220
+ }
221
+ console.log('');
222
+ console.log(`Showing ${recent.length} of ${entries.length} total entries. --stats for summary. --domain <name> to filter.`);
223
+ }
224
+
225
+ module.exports = { cmdTrace, cmdHistory, recordTrace, readAllTraces };
package/src/compare.js CHANGED
@@ -28,6 +28,7 @@ const INSTALL_DIR = path.join(USER_KDNA_DIR, 'domains');
28
28
  const CONFIG_FILE = path.join(USER_KDNA_DIR, 'config.json');
29
29
 
30
30
  const { parseName } = require('./registry');
31
+ const { EXIT } = require('./cmds/_common');
31
32
 
32
33
  function readJson(p) {
33
34
  try {
@@ -37,9 +38,9 @@ function readJson(p) {
37
38
  }
38
39
  }
39
40
 
40
- function error(msg) {
41
+ function error(msg, code = EXIT.VALIDATION_FAILED) {
41
42
  console.error(`Error: ${msg}`);
42
- process.exit(1);
43
+ process.exit(code);
43
44
  }
44
45
 
45
46
  // ─── Config ─────────────────────────────────────────────────────────────
@@ -71,6 +72,7 @@ function loadLlmConfig() {
71
72
  ` "base_url": "https://... (optional, for OpenAI-compatible endpoints)"\n` +
72
73
  ` }\n` +
73
74
  ` }`,
75
+ EXIT.PROVIDER_ERROR,
74
76
  );
75
77
  }
76
78
  return { provider, model, apiKey, envName, baseUrl };
@@ -261,27 +263,30 @@ Diff the reasoning trajectory.`;
261
263
  // ─── Main ──────────────────────────────────────────────────────────────
262
264
 
263
265
  async function cmdCompare(input, args = []) {
266
+ const jsonMode = args.includes('--json');
264
267
  const idxInput = args.indexOf('--input');
265
268
  if (idxInput < 0 || !args[idxInput + 1]) {
266
- error('Usage: kdna compare <name> --input "<text>"');
269
+ error('Usage: kdna compare <name> --input "<text>"', EXIT.INPUT_ERROR);
267
270
  }
268
271
  const userInput = args[idxInput + 1];
269
272
 
270
273
  const parsed = parseName(input);
271
- if (!parsed) error(`Invalid name "${input}".`);
274
+ if (!parsed) error(`Invalid name "${input}".`, EXIT.INPUT_ERROR);
272
275
  const destDir = path.join(INSTALL_DIR, parsed.scope, parsed.ident);
273
276
  if (!fs.existsSync(destDir)) {
274
- error(`${parsed.full} not installed. Run: kdna install ${input}`);
277
+ error(`${parsed.full} not installed. Run: kdna install ${input}`, EXIT.INPUT_ERROR);
275
278
  }
276
279
 
277
280
  const llm = loadLlmConfig();
278
281
 
279
- console.log('═'.repeat(64));
280
- console.log(` kdna compare ${parsed.full}`);
281
- console.log(` provider: ${llm.provider} / ${llm.model}`);
282
- console.log(` input length: ${userInput.length} chars`);
283
- console.log('═'.repeat(64));
284
- console.log('');
282
+ if (!jsonMode) {
283
+ console.log('═'.repeat(64));
284
+ console.log(` kdna compare ${parsed.full}`);
285
+ console.log(` provider: ${llm.provider} / ${llm.model}`);
286
+ console.log(` input length: ${userInput.length} chars`);
287
+ console.log(''.repeat(64));
288
+ console.log('');
289
+ }
285
290
 
286
291
  const BASELINE_SYSTEM =
287
292
  'You are a helpful assistant. Respond to the user request concisely and specifically.';
@@ -291,34 +296,43 @@ async function cmdCompare(input, args = []) {
291
296
  'You are a helpful assistant. The following domain judgment is loaded and you MUST apply it when relevant.\n\n' +
292
297
  kdnaPrompt;
293
298
 
294
- console.log('[1/3] Running baseline (no KDNA)...');
299
+ if (!jsonMode) console.log('[1/3] Running baseline (no KDNA)...');
295
300
  const responseA = await callLlm(llm, BASELINE_SYSTEM, userInput);
296
- console.log(` ${responseA.length} chars returned`);
301
+ if (!jsonMode) console.log(` ${responseA.length} chars returned`);
297
302
 
298
- console.log('[2/3] Running with KDNA loaded...');
303
+ if (!jsonMode) console.log('[2/3] Running with KDNA loaded...');
299
304
  const responseB = await callLlm(llm, TREATMENT_SYSTEM, userInput);
300
- console.log(` ${responseB.length} chars returned`);
305
+ if (!jsonMode) console.log(` ${responseB.length} chars returned`);
301
306
 
302
- console.log('[3/3] Diffing reasoning trajectories...');
307
+ if (!jsonMode) console.log('[3/3] Diffing reasoning trajectories...');
303
308
  const diffPrompt = makeDiffPrompt(userInput, responseA, responseB);
304
309
  const diff = await callLlm(llm, DIFF_SYSTEM, diffPrompt);
305
310
 
306
- console.log('');
307
- console.log('─'.repeat(64));
308
- console.log(' WITHOUT KDNA');
309
- console.log('─'.repeat(64));
310
- console.log(responseA);
311
- console.log('');
312
- console.log('─'.repeat(64));
313
- console.log(' WITH KDNA');
314
- console.log(''.repeat(64));
315
- console.log(responseB);
316
- console.log('');
317
- console.log('─'.repeat(64));
318
- console.log(' REASONING TRAJECTORY DIFF');
319
- console.log(''.repeat(64));
320
- console.log(diff);
321
- console.log('');
311
+ if (jsonMode) {
312
+ const result = {
313
+ baseline_output: responseA,
314
+ kdna_output: responseB,
315
+ judgment_delta: diff,
316
+ };
317
+ console.log(JSON.stringify(result, null, 2));
318
+ } else {
319
+ console.log('');
320
+ console.log('─'.repeat(64));
321
+ console.log(' WITHOUT KDNA');
322
+ console.log('─'.repeat(64));
323
+ console.log(responseA);
324
+ console.log('');
325
+ console.log('─'.repeat(64));
326
+ console.log(' WITH KDNA');
327
+ console.log('─'.repeat(64));
328
+ console.log(responseB);
329
+ console.log('');
330
+ console.log('─'.repeat(64));
331
+ console.log(' REASONING TRAJECTORY DIFF');
332
+ console.log('─'.repeat(64));
333
+ console.log(diff);
334
+ console.log('');
335
+ }
322
336
  }
323
337
 
324
338
  module.exports = { cmdCompare, buildKdnaPrompt };