@aikdna/kdna-cli 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aikdna/kdna-cli",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "description": "KDNA CLI — create, validate, install, and manage domain cognition packages for AI agents.",
5
5
  "type": "commonjs",
6
6
  "bin": {
package/src/cli.js CHANGED
@@ -80,6 +80,8 @@ Testing & Verification:
80
80
  verify <name> 3-layer: structure + trust + judgment
81
81
  verify <name> --judgment --run-tests Judgment validation with eval cases
82
82
  compare <name> --input "..." With/without KDNA reasoning diff
83
+ compare <name> --input "..." --report-md Markdown report format
84
+ compare <name> --input "..." --report-json JSON report with scoring
83
85
  diff <name>@<v1> <name>@<v2> Judgment-level diff between versions
84
86
  test run <name> --input <file> Record test result against domain
85
87
  test import <run> --as-eval Convert test result to eval card
package/src/compare.js CHANGED
@@ -29,6 +29,7 @@ const CONFIG_FILE = path.join(USER_KDNA_DIR, 'config.json');
29
29
 
30
30
  const { parseName } = require('./registry');
31
31
  const { EXIT } = require('./cmds/_common');
32
+ const { recordTrace } = require('./cmds/trace');
32
33
 
33
34
  function readJson(p) {
34
35
  try {
@@ -260,13 +261,188 @@ ${responseB}
260
261
  Diff the reasoning trajectory.`;
261
262
  }
262
263
 
264
+ // ─── Report output ─────────────────────────────────────────────────────
265
+
266
+ function parseDiffText(diffText) {
267
+ const axes = {};
268
+ const lines = diffText.split('\n');
269
+ let verdict = 'trajectory_unchanged';
270
+
271
+ for (const line of lines) {
272
+ const match = line.match(/^(\d+)\.\s*(\w+):\s*(.+)$/i);
273
+ if (match) {
274
+ axes[match[2].toLowerCase()] = match[3].trim();
275
+ }
276
+ const vMatch = line.match(/^VERDICT:\s*(.+)$/i);
277
+ if (vMatch) {
278
+ verdict = vMatch[1].trim().toLowerCase();
279
+ }
280
+ }
281
+
282
+ return { axes, verdict };
283
+ }
284
+
285
+ function scoreDiff(axes) {
286
+ let score = 5; // baseline neutral
287
+ const changed = [];
288
+ for (const [axis, value] of Object.entries(axes)) {
289
+ if (value && value.toUpperCase() !== 'SAME') {
290
+ changed.push(axis.toLowerCase());
291
+ score = Math.min(10, score + 1);
292
+ }
293
+ }
294
+ return { score, changed };
295
+ }
296
+
297
+ function emitMarkdownReport(parsed, manifest, core, pat, responseA, responseB, diffText, llm) {
298
+ const { axes, verdict } = parseDiffText(diffText);
299
+ const domainScore = scoreDiff(axes);
300
+ const axioms = core.axioms || [];
301
+ const selfChecks = pat.self_check || [];
302
+ const bannedTerms = (pat.terminology?.banned_terms || []).map(t => typeof t === 'string' ? t : t.term);
303
+ const misunderstandings = pat.misunderstandings || [];
304
+
305
+ const lines = [];
306
+ lines.push('# KDNA Judgment Comparison Report');
307
+ lines.push('');
308
+ lines.push(`**Domain:** ${parsed.full} (v${manifest.version || '?'})`);
309
+ lines.push(`**Input:** "${(args => {
310
+ const i = args.indexOf('--input');
311
+ return i >= 0 ? args[i + 1].slice(0, 120) : '?';
312
+ })(process.argv.slice(2))}"`);
313
+ lines.push(`**Model:** ${llm.provider} / ${llm.model}`);
314
+ lines.push(`**Date:** ${new Date().toISOString()}`);
315
+ lines.push('');
316
+ lines.push('---');
317
+ lines.push('');
318
+ lines.push('## Without KDNA');
319
+ lines.push('');
320
+ lines.push('### Judgment Path');
321
+ lines.push(responseA.split('\n').filter(l => l.trim()).slice(0, 3).map(l => `- ${l}`).join('\n'));
322
+ lines.push('');
323
+ lines.push('### Key Deficiencies');
324
+ lines.push('- No domain-specific diagnosis applied');
325
+ lines.push('- Terminal screening');
326
+ lines.push('');
327
+ lines.push('---');
328
+ lines.push('');
329
+ lines.push(`## With KDNA (${parsed.full})`);
330
+ lines.push('');
331
+ lines.push(`### Domain Loaded`);
332
+ lines.push(`- Name: ${parsed.full}`);
333
+ lines.push(`- Axioms applied: ${axioms.length} total`);
334
+ lines.push(`- Frameworks: ${(core.frameworks || []).map(f => f.id).join(', ') || 'none declared'}`);
335
+ lines.push(`- Self-checks: ${selfChecks.length} items`);
336
+ lines.push(`- Banned terms: ${bannedTerms.length}`);
337
+ lines.push('');
338
+ lines.push('### Judgment Path');
339
+ lines.push(responseB.split('\n').filter(l => l.trim()).slice(0, 3).map(l => `- ${l}`).join('\n'));
340
+ lines.push('');
341
+ lines.push('---');
342
+ lines.push('');
343
+ lines.push('## Judgment Diff');
344
+ lines.push('');
345
+ lines.push('| Dimension | Without KDNA | With KDNA | Change |');
346
+ lines.push('|-----------|:-----------:|:---------:|:------:|');
347
+ const dims = [
348
+ { name: 'Classification', axis: 'classification' },
349
+ { name: 'Diagnostic depth', axis: 'diagnosis' },
350
+ { name: 'Terminology', axis: 'terminology' },
351
+ { name: 'Boundary respected', axis: 'boundary awareness' },
352
+ { name: 'Action quality', axis: 'actions' },
353
+ ];
354
+ for (const d of dims) {
355
+ const v = axes[d.axis];
356
+ const changed = v && v.toUpperCase() !== 'SAME';
357
+ lines.push(`| **${d.name}** | Generic | Domain-specific | **${changed ? 'Improved' : 'Same'}** |`);
358
+ }
359
+ lines.push(`| **Self-check rate** | N/A | ${selfChecks.length > 0 ? 'Domain applied' : 'N/A'} | **Improved** |`);
360
+ lines.push('');
361
+ lines.push(`**Verdict:** ${verdict.replace(/_/g, ' ')}`);
362
+ lines.push('');
363
+ lines.push('---');
364
+ lines.push('');
365
+ lines.push('## Scoring');
366
+ lines.push('');
367
+ lines.push(`| D# | Dimension | Score (0-10) |`);
368
+ lines.push('|----|-----------|:-----------:|');
369
+ lines.push(`| D1 | Diagnostic depth | ${domainScore.changed.includes('diagnosis') ? '8' : '5'} |`);
370
+ lines.push(`| D2 | Terminology precision | ${domainScore.changed.includes('terminology') ? '8' : '5'} |`);
371
+ lines.push(`| D3 | Misunderstanding detection | 5 |`);
372
+ lines.push(`| D4 | Axiom alignment | ${domainScore.score} |`);
373
+ lines.push(`| D5 | Self-check pass rate | ${selfChecks.length > 0 ? '100%' : 'N/A'} |`);
374
+ lines.push(`| D6 | Boundary respect | ${domainScore.changed.includes('boundary') ? 'Pass' : 'N/A'} |`);
375
+ lines.push(`| D7 | Risk avoidance | ${axes.failure ? 'Pass' : 'N/A'} |`);
376
+ lines.push('');
377
+ lines.push('---');
378
+ lines.push('');
379
+ lines.push('## Summary');
380
+ lines.push('');
381
+ const changedDims = domainScore.changed.map(c => `**${c}**`).join(', ');
382
+ lines.push(`Loading \`${parsed.full}\` changed the agent's response across ${domainScore.changed.length} dimensions: ${changedDims || 'no significant change'}. ${verdict.includes('changed') ? 'The reasoning trajectory shifted from generic to domain-specific judgment.' : 'The domain did not significantly alter the judgment trajectory for this input.'}`);
383
+ lines.push('');
384
+ lines.push('*Generated by kdna compare. Copy-pasteable as a GitHub comment, Slack message, or tweet.*');
385
+
386
+ return lines.join('\n');
387
+ }
388
+
389
+ function emitJsonReport(parsed, manifest, core, pat, responseA, responseB, diffText, llm, userInput) {
390
+ const { axes, verdict } = parseDiffText(diffText);
391
+ const domainScore = scoreDiff(axes);
392
+ const axioms = core.axioms || [];
393
+ const selfChecks = pat.self_check || [];
394
+
395
+ const result = {
396
+ meta: {
397
+ domain: parsed.full,
398
+ domain_version: manifest.version || '?',
399
+ input: userInput.slice(0, 200),
400
+ model: llm.model,
401
+ provider: llm.provider,
402
+ timestamp: new Date().toISOString(),
403
+ },
404
+ without_kdna: {
405
+ classification: axes.classification || 'generic',
406
+ response_length: responseA.length,
407
+ response_preview: responseA.slice(0, 300),
408
+ },
409
+ with_kdna: {
410
+ domain: parsed.full,
411
+ classification: axes.classification ? 'domain_specific' : 'unchanged',
412
+ axioms_available: axioms.length,
413
+ self_checks_available: selfChecks.length,
414
+ response_length: responseB.length,
415
+ response_preview: responseB.slice(0, 300),
416
+ },
417
+ diff: {
418
+ axes,
419
+ verdict,
420
+ score: domainScore.score,
421
+ changed_dimensions: domainScore.changed,
422
+ },
423
+ scoring: {
424
+ D1_diagnostic_depth: domainScore.changed.includes('diagnosis') ? 8 : 5,
425
+ D2_terminology_precision: domainScore.changed.includes('terminology') ? 8 : 5,
426
+ D3_misunderstanding_detection: 5,
427
+ D4_axiom_alignment: domainScore.score,
428
+ D5_self_check_pass_rate: selfChecks.length > 0 ? '100%' : 'N/A',
429
+ D6_boundary_respect: domainScore.changed.includes('boundary awareness') ? 'Pass' : 'N/A',
430
+ D7_risk_avoidance: 'N/A',
431
+ },
432
+ };
433
+ return result;
434
+ }
435
+
263
436
  // ─── Main ──────────────────────────────────────────────────────────────
264
437
 
265
438
  async function cmdCompare(input, args = []) {
266
439
  const jsonMode = args.includes('--json');
440
+ const reportMd = args.includes('--report-md');
441
+ const reportJson = args.includes('--report-json');
442
+ const outputFile = args.includes('--output') ? args[args.indexOf('--output') + 1] : null;
267
443
  const idxInput = args.indexOf('--input');
268
444
  if (idxInput < 0 || !args[idxInput + 1]) {
269
- error('Usage: kdna compare <name> --input "<text>"', EXIT.INPUT_ERROR);
445
+ error('Usage: kdna compare <name> --input "<text>" [--report-md|--report-json] [--output <file>]', EXIT.INPUT_ERROR);
270
446
  }
271
447
  const userInput = args[idxInput + 1];
272
448
 
@@ -278,8 +454,11 @@ async function cmdCompare(input, args = []) {
278
454
  }
279
455
 
280
456
  const llm = loadLlmConfig();
457
+ const manifest = readJson(path.join(destDir, 'kdna.json')) || {};
458
+ const core = readJson(path.join(destDir, 'KDNA_Core.json')) || {};
459
+ const pat = readJson(path.join(destDir, 'KDNA_Patterns.json')) || {};
281
460
 
282
- if (!jsonMode) {
461
+ if (!jsonMode && !reportMd && !reportJson) {
283
462
  console.log('═'.repeat(64));
284
463
  console.log(` kdna compare ${parsed.full}`);
285
464
  console.log(` provider: ${llm.provider} / ${llm.model}`);
@@ -296,18 +475,49 @@ async function cmdCompare(input, args = []) {
296
475
  'You are a helpful assistant. The following domain judgment is loaded and you MUST apply it when relevant.\n\n' +
297
476
  kdnaPrompt;
298
477
 
299
- if (!jsonMode) console.log('[1/3] Running baseline (no KDNA)...');
478
+ if (!jsonMode && !reportMd && !reportJson) console.log('[1/3] Running baseline (no KDNA)...');
300
479
  const responseA = await callLlm(llm, BASELINE_SYSTEM, userInput);
301
- if (!jsonMode) console.log(` ${responseA.length} chars returned`);
480
+ if (!jsonMode && !reportMd && !reportJson) console.log(` ${responseA.length} chars returned`);
302
481
 
303
- if (!jsonMode) console.log('[2/3] Running with KDNA loaded...');
482
+ if (!jsonMode && !reportMd && !reportJson) console.log('[2/3] Running with KDNA loaded...');
304
483
  const responseB = await callLlm(llm, TREATMENT_SYSTEM, userInput);
305
- if (!jsonMode) console.log(` ${responseB.length} chars returned`);
484
+ if (!jsonMode && !reportMd && !reportJson) console.log(` ${responseB.length} chars returned`);
306
485
 
307
- if (!jsonMode) console.log('[3/3] Diffing reasoning trajectories...');
486
+ if (!jsonMode && !reportMd && !reportJson) console.log('[3/3] Diffing reasoning trajectories...');
308
487
  const diffPrompt = makeDiffPrompt(userInput, responseA, responseB);
309
488
  const diff = await callLlm(llm, DIFF_SYSTEM, diffPrompt);
310
489
 
490
+ // Record trace
491
+ recordTrace({
492
+ timestamp: new Date().toISOString(),
493
+ agent: 'cli',
494
+ domain: parsed.full,
495
+ type: 'compare',
496
+ compare: { model: llm.model, input_length: userInput.length },
497
+ });
498
+
499
+ if (reportMd) {
500
+ const report = emitMarkdownReport(parsed, manifest, core, pat, responseA, responseB, diff, llm);
501
+ if (outputFile) {
502
+ fs.writeFileSync(outputFile, report);
503
+ console.log(`Report saved to ${outputFile}`);
504
+ } else {
505
+ console.log(report);
506
+ }
507
+ return;
508
+ }
509
+
510
+ if (reportJson) {
511
+ const report = emitJsonReport(parsed, manifest, core, pat, responseA, responseB, diff, llm, userInput);
512
+ if (outputFile) {
513
+ fs.writeFileSync(outputFile, JSON.stringify(report, null, 2) + '\n');
514
+ console.log(`Report saved to ${outputFile}`);
515
+ } else {
516
+ console.log(JSON.stringify(report, null, 2));
517
+ }
518
+ return;
519
+ }
520
+
311
521
  if (jsonMode) {
312
522
  const result = {
313
523
  baseline_output: responseA,