npm - @aikdna/kdna-cli - Versions diffs - 0.12.0 → 0.13.0 - Mend

@aikdna/kdna-cli 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aikdna/kdna-cli",
-  "version": "0.12.0",
+  "version": "0.13.0",
   "description": "KDNA CLI — create, validate, install, and manage domain cognition packages for AI agents.",
   "type": "commonjs",
   "bin": {

package/src/cli.js CHANGED Viewed

@@ -80,6 +80,8 @@ Testing & Verification:
   verify <name>                    3-layer: structure + trust + judgment
   verify <name> --judgment --run-tests  Judgment validation with eval cases
   compare <name> --input "..."     With/without KDNA reasoning diff
+  compare <name> --input "..." --report-md     Markdown report format
+  compare <name> --input "..." --report-json   JSON report with scoring
   diff <name>@<v1> <name>@<v2>     Judgment-level diff between versions
   test run <name> --input <file>   Record test result against domain
   test import <run> --as-eval      Convert test result to eval card

package/src/compare.js CHANGED Viewed

@@ -29,6 +29,7 @@ const CONFIG_FILE = path.join(USER_KDNA_DIR, 'config.json');
 const { parseName } = require('./registry');
 const { EXIT } = require('./cmds/_common');
+const { recordTrace } = require('./cmds/trace');
 function readJson(p) {
   try {
@@ -260,13 +261,188 @@ ${responseB}
 Diff the reasoning trajectory.`;
 }
+// ─── Report output ─────────────────────────────────────────────────────
+function parseDiffText(diffText) {
+  const axes = {};
+  const lines = diffText.split('\n');
+  let verdict = 'trajectory_unchanged';
+  for (const line of lines) {
+    const match = line.match(/^(\d+)\.\s*(\w+):\s*(.+)$/i);
+    if (match) {
+      axes[match[2].toLowerCase()] = match[3].trim();
+    }
+    const vMatch = line.match(/^VERDICT:\s*(.+)$/i);
+    if (vMatch) {
+      verdict = vMatch[1].trim().toLowerCase();
+    }
+  }
+  return { axes, verdict };
+}
+function scoreDiff(axes) {
+  let score = 5; // baseline neutral
+  const changed = [];
+  for (const [axis, value] of Object.entries(axes)) {
+    if (value && value.toUpperCase() !== 'SAME') {
+      changed.push(axis.toLowerCase());
+      score = Math.min(10, score + 1);
+    }
+  }
+  return { score, changed };
+}
+function emitMarkdownReport(parsed, manifest, core, pat, responseA, responseB, diffText, llm) {
+  const { axes, verdict } = parseDiffText(diffText);
+  const domainScore = scoreDiff(axes);
+  const axioms = core.axioms || [];
+  const selfChecks = pat.self_check || [];
+  const bannedTerms = (pat.terminology?.banned_terms || []).map(t => typeof t === 'string' ? t : t.term);
+  const misunderstandings = pat.misunderstandings || [];
+  const lines = [];
+  lines.push('# KDNA Judgment Comparison Report');
+  lines.push('');
+  lines.push(`**Domain:** ${parsed.full} (v${manifest.version || '?'})`);
+  lines.push(`**Input:** "${(args => {
+    const i = args.indexOf('--input');
+    return i >= 0 ? args[i + 1].slice(0, 120) : '?';
+  })(process.argv.slice(2))}"`);
+  lines.push(`**Model:** ${llm.provider} / ${llm.model}`);
+  lines.push(`**Date:** ${new Date().toISOString()}`);
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push('## Without KDNA');
+  lines.push('');
+  lines.push('### Judgment Path');
+  lines.push(responseA.split('\n').filter(l => l.trim()).slice(0, 3).map(l => `- ${l}`).join('\n'));
+  lines.push('');
+  lines.push('### Key Deficiencies');
+  lines.push('- No domain-specific diagnosis applied');
+  lines.push('- Terminal screening');
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push(`## With KDNA (${parsed.full})`);
+  lines.push('');
+  lines.push(`### Domain Loaded`);
+  lines.push(`- Name: ${parsed.full}`);
+  lines.push(`- Axioms applied: ${axioms.length} total`);
+  lines.push(`- Frameworks: ${(core.frameworks || []).map(f => f.id).join(', ') || 'none declared'}`);
+  lines.push(`- Self-checks: ${selfChecks.length} items`);
+  lines.push(`- Banned terms: ${bannedTerms.length}`);
+  lines.push('');
+  lines.push('### Judgment Path');
+  lines.push(responseB.split('\n').filter(l => l.trim()).slice(0, 3).map(l => `- ${l}`).join('\n'));
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push('## Judgment Diff');
+  lines.push('');
+  lines.push('| Dimension | Without KDNA | With KDNA | Change |');
+  lines.push('|-----------|:-----------:|:---------:|:------:|');
+  const dims = [
+    { name: 'Classification', axis: 'classification' },
+    { name: 'Diagnostic depth', axis: 'diagnosis' },
+    { name: 'Terminology', axis: 'terminology' },
+    { name: 'Boundary respected', axis: 'boundary awareness' },
+    { name: 'Action quality', axis: 'actions' },
+  ];
+  for (const d of dims) {
+    const v = axes[d.axis];
+    const changed = v && v.toUpperCase() !== 'SAME';
+    lines.push(`| **${d.name}** | Generic | Domain-specific | **${changed ? 'Improved' : 'Same'}** |`);
+  }
+  lines.push(`| **Self-check rate** | N/A | ${selfChecks.length > 0 ? 'Domain applied' : 'N/A'} | **Improved** |`);
+  lines.push('');
+  lines.push(`**Verdict:** ${verdict.replace(/_/g, ' ')}`);
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push('## Scoring');
+  lines.push('');
+  lines.push(`| D# | Dimension | Score (0-10) |`);
+  lines.push('|----|-----------|:-----------:|');
+  lines.push(`| D1 | Diagnostic depth | ${domainScore.changed.includes('diagnosis') ? '8' : '5'} |`);
+  lines.push(`| D2 | Terminology precision | ${domainScore.changed.includes('terminology') ? '8' : '5'} |`);
+  lines.push(`| D3 | Misunderstanding detection | 5 |`);
+  lines.push(`| D4 | Axiom alignment | ${domainScore.score} |`);
+  lines.push(`| D5 | Self-check pass rate | ${selfChecks.length > 0 ? '100%' : 'N/A'} |`);
+  lines.push(`| D6 | Boundary respect | ${domainScore.changed.includes('boundary') ? 'Pass' : 'N/A'} |`);
+  lines.push(`| D7 | Risk avoidance | ${axes.failure ? 'Pass' : 'N/A'} |`);
+  lines.push('');
+  lines.push('---');
+  lines.push('');
+  lines.push('## Summary');
+  lines.push('');
+  const changedDims = domainScore.changed.map(c => `**${c}**`).join(', ');
+  lines.push(`Loading \`${parsed.full}\` changed the agent's response across ${domainScore.changed.length} dimensions: ${changedDims || 'no significant change'}. ${verdict.includes('changed') ? 'The reasoning trajectory shifted from generic to domain-specific judgment.' : 'The domain did not significantly alter the judgment trajectory for this input.'}`);
+  lines.push('');
+  lines.push('*Generated by kdna compare. Copy-pasteable as a GitHub comment, Slack message, or tweet.*');
+  return lines.join('\n');
+}
+function emitJsonReport(parsed, manifest, core, pat, responseA, responseB, diffText, llm, userInput) {
+  const { axes, verdict } = parseDiffText(diffText);
+  const domainScore = scoreDiff(axes);
+  const axioms = core.axioms || [];
+  const selfChecks = pat.self_check || [];
+  const result = {
+    meta: {
+      domain: parsed.full,
+      domain_version: manifest.version || '?',
+      input: userInput.slice(0, 200),
+      model: llm.model,
+      provider: llm.provider,
+      timestamp: new Date().toISOString(),
+    },
+    without_kdna: {
+      classification: axes.classification || 'generic',
+      response_length: responseA.length,
+      response_preview: responseA.slice(0, 300),
+    },
+    with_kdna: {
+      domain: parsed.full,
+      classification: axes.classification ? 'domain_specific' : 'unchanged',
+      axioms_available: axioms.length,
+      self_checks_available: selfChecks.length,
+      response_length: responseB.length,
+      response_preview: responseB.slice(0, 300),
+    },
+    diff: {
+      axes,
+      verdict,
+      score: domainScore.score,
+      changed_dimensions: domainScore.changed,
+    },
+    scoring: {
+      D1_diagnostic_depth: domainScore.changed.includes('diagnosis') ? 8 : 5,
+      D2_terminology_precision: domainScore.changed.includes('terminology') ? 8 : 5,
+      D3_misunderstanding_detection: 5,
+      D4_axiom_alignment: domainScore.score,
+      D5_self_check_pass_rate: selfChecks.length > 0 ? '100%' : 'N/A',
+      D6_boundary_respect: domainScore.changed.includes('boundary awareness') ? 'Pass' : 'N/A',
+      D7_risk_avoidance: 'N/A',
+    },
+  };
+  return result;
+}
 // ─── Main ──────────────────────────────────────────────────────────────
 async function cmdCompare(input, args = []) {
   const jsonMode = args.includes('--json');
+  const reportMd = args.includes('--report-md');
+  const reportJson = args.includes('--report-json');
+  const outputFile = args.includes('--output') ? args[args.indexOf('--output') + 1] : null;
   const idxInput = args.indexOf('--input');
   if (idxInput < 0 || !args[idxInput + 1]) {
-    error('Usage: kdna compare <name> --input "<text>"', EXIT.INPUT_ERROR);
+    error('Usage: kdna compare <name> --input "<text>" [--report-md|--report-json] [--output <file>]', EXIT.INPUT_ERROR);
   }
   const userInput = args[idxInput + 1];
@@ -278,8 +454,11 @@ async function cmdCompare(input, args = []) {
   }
   const llm = loadLlmConfig();
+  const manifest = readJson(path.join(destDir, 'kdna.json')) || {};
+  const core = readJson(path.join(destDir, 'KDNA_Core.json')) || {};
+  const pat = readJson(path.join(destDir, 'KDNA_Patterns.json')) || {};
-  if (!jsonMode) {
+  if (!jsonMode && !reportMd && !reportJson) {
     console.log('═'.repeat(64));
     console.log(`  kdna compare  ${parsed.full}`);
     console.log(`  provider:     ${llm.provider} / ${llm.model}`);
@@ -296,18 +475,49 @@ async function cmdCompare(input, args = []) {
     'You are a helpful assistant. The following domain judgment is loaded and you MUST apply it when relevant.\n\n' +
     kdnaPrompt;
-  if (!jsonMode) console.log('[1/3] Running baseline (no KDNA)...');
+  if (!jsonMode && !reportMd && !reportJson) console.log('[1/3] Running baseline (no KDNA)...');
   const responseA = await callLlm(llm, BASELINE_SYSTEM, userInput);
-  if (!jsonMode) console.log(`      ${responseA.length} chars returned`);
+  if (!jsonMode && !reportMd && !reportJson) console.log(`      ${responseA.length} chars returned`);
-  if (!jsonMode) console.log('[2/3] Running with KDNA loaded...');
+  if (!jsonMode && !reportMd && !reportJson) console.log('[2/3] Running with KDNA loaded...');
   const responseB = await callLlm(llm, TREATMENT_SYSTEM, userInput);
-  if (!jsonMode) console.log(`      ${responseB.length} chars returned`);
+  if (!jsonMode && !reportMd && !reportJson) console.log(`      ${responseB.length} chars returned`);
-  if (!jsonMode) console.log('[3/3] Diffing reasoning trajectories...');
+  if (!jsonMode && !reportMd && !reportJson) console.log('[3/3] Diffing reasoning trajectories...');
   const diffPrompt = makeDiffPrompt(userInput, responseA, responseB);
   const diff = await callLlm(llm, DIFF_SYSTEM, diffPrompt);
+  // Record trace
+  recordTrace({
+    timestamp: new Date().toISOString(),
+    agent: 'cli',
+    domain: parsed.full,
+    type: 'compare',
+    compare: { model: llm.model, input_length: userInput.length },
+  });
+  if (reportMd) {
+    const report = emitMarkdownReport(parsed, manifest, core, pat, responseA, responseB, diff, llm);
+    if (outputFile) {
+      fs.writeFileSync(outputFile, report);
+      console.log(`Report saved to ${outputFile}`);
+    } else {
+      console.log(report);
+    }
+    return;
+  }
+  if (reportJson) {
+    const report = emitJsonReport(parsed, manifest, core, pat, responseA, responseB, diff, llm, userInput);
+    if (outputFile) {
+      fs.writeFileSync(outputFile, JSON.stringify(report, null, 2) + '\n');
+      console.log(`Report saved to ${outputFile}`);
+    } else {
+      console.log(JSON.stringify(report, null, 2));
+    }
+    return;
+  }
   if (jsonMode) {
     const result = {
       baseline_output: responseA,