npm - @hone-ai/cli - Versions diffs - 1.5.0 → 1.6.0 - Mend

@hone-ai/cli 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/hone-cli.js +64 -7
package/lib/doctor-bind-default.js +4 -0
package/lib/eval-contracts.js +256 -0
package/lib/eval-regression.js +202 -0
package/package.json +3 -2

package/hone-cli.js CHANGED Viewed

@@ -2750,8 +2750,9 @@ program
     }
     const storyId = ps.extractStoryIdFromBranch(branch);
     if (!storyId) {
-      console.error(`Could not derive STORY-ID from branch '${branch}'.`);
-      process.exit(1);
+      console.log(`No story branch active (current: ${branch}).`);
+      console.log('Switch to a story branch (e.g., feat/HC-001-description) to use hone next.');
+      process.exit(0);
     }
     const storyDir = path.join(pipelineRoot, storyId);
     const metaPath = path.join(storyDir, 'metadata.yml');
@@ -4139,22 +4140,78 @@ program
   .option('--format <fmt>', 'Output format: pretty | json', 'pretty')
   .option('--evals-dir <path>', 'Override eval scenarios directory')
   .option('--fail-fast', 'Stop on first failure')
+  .option('--contracts', 'Run contract validation between pipeline agents')
+  .option('--snapshot', 'Save current eval + contract results as regression baseline')
+  .option('--regression', 'Compare current results against saved baseline (detect drift)')
   .action(async (opts) => {
     const path = require('path');
     const fs = require('fs');
     const yaml = require('js-yaml');
-    const { loadScenarios, runAllScenarios, formatResults } = require('./lib/eval-runner');
+    // Load agent prompts from seed-agent-prompts.js
+    const seedPath = path.resolve(__dirname, '..', 'scripts', 'seed-agent-prompts.js');
+    const { AGENT_PROMPTS } = require(seedPath);
     const evalDir = opts.evalsDir || path.resolve(__dirname, '..', 'evals');
+    // Snapshot mode (HC-019h): save baseline
+    if (opts.snapshot) {
+      const { loadScenarios, runAllScenarios } = require('./lib/eval-runner');
+      const { validateAllContracts } = require('./lib/eval-contracts');
+      const { saveBaseline } = require('./lib/eval-regression');
+      const scenarios = loadScenarios({
+        evalDir, readFile: (p) => fs.readFileSync(p, 'utf8'),
+        listDir: (p) => fs.readdirSync(p), isDir: (p) => fs.statSync(p).isDirectory(),
+        parseYaml: (text) => yaml.load(text),
+      });
+      const evalResults = runAllScenarios(scenarios, AGENT_PROMPTS);
+      const contractResults = validateAllContracts(AGENT_PROMPTS);
+      const { saved } = saveBaseline(evalResults, contractResults,
+        (p, c) => fs.writeFileSync(p, c, 'utf8'), evalDir);
+      console.log(`Baseline saved: ${saved} entries → evals/.baseline.json`);
+      process.exit(0);
+    }
+    // Regression mode (HC-019h): compare against baseline
+    if (opts.regression) {
+      const { loadScenarios, runAllScenarios } = require('./lib/eval-runner');
+      const { validateAllContracts } = require('./lib/eval-contracts');
+      const { loadBaseline, detectRegressions, formatRegressionResults } = require('./lib/eval-regression');
+      const baseline = loadBaseline((p) => fs.readFileSync(p, 'utf8'), evalDir);
+      if (!baseline) {
+        console.error('No baseline found. Run: hone eval --snapshot');
+        process.exit(1);
+      }
+      const scenarios = loadScenarios({
+        evalDir, readFile: (p) => fs.readFileSync(p, 'utf8'),
+        listDir: (p) => fs.readdirSync(p), isDir: (p) => fs.statSync(p).isDirectory(),
+        parseYaml: (text) => yaml.load(text),
+      });
+      const evalResults = runAllScenarios(scenarios, AGENT_PROMPTS);
+      const contractResults = validateAllContracts(AGENT_PROMPTS);
+      const results = detectRegressions(evalResults, contractResults, baseline);
+      console.log(formatRegressionResults(results, opts.format));
+      process.exit(results.summary.regressions > 0 ? 1 : 0);
+    }
+    // Contract validation mode (HC-019g)
+    if (opts.contracts) {
+      const { validateAllContracts, formatContractResults } = require('./lib/eval-contracts');
+      const results = validateAllContracts(AGENT_PROMPTS);
+      console.log(formatContractResults(results, opts.format));
+      process.exit(results.failed > 0 ? 1 : 0);
+    }
+    // Scenario evaluation mode
+    const { loadScenarios, runAllScenarios, formatResults } = require('./lib/eval-runner');
     if (!fs.existsSync(evalDir)) {
       console.error(`Eval directory not found: ${evalDir}`);
       process.exit(1);
     }
-    // Load agent prompts from seed-agent-prompts.js
-    const seedPath = path.resolve(__dirname, '..', 'scripts', 'seed-agent-prompts.js');
-    const { AGENT_PROMPTS } = require(seedPath);
     const scenarios = loadScenarios({
       evalDir,
       agent: opts.agent,

package/lib/doctor-bind-default.js CHANGED Viewed

@@ -90,6 +90,10 @@ function checkBindDefault(args) {
     if (/\bos\.getenv\b|\bprocess\.env\b|\bgetenv\(/.test(content)) continue;
     // Filter out lines that are clearly comments mentioning the anti-pattern
     if (/^\s*(#|\/\/|--).*0\.0\.0\.0/.test(content)) continue;
+    // Filter out docstrings/multiline strings (Python """, JS template literals, block comments)
+    if (/^\s*("""|'''|\/\*|\*)/.test(content)) continue;
+    // Filter out lines that describe/document the pattern rather than use it
+    if (/override|example|usage|default.*is|should|must|can\s/i.test(content) && !/=\s*['"]0\.0\.0\.0/.test(content)) continue;
     offenders.push({ file, line: lineNo, content: content.trim().slice(0, 100) });
   }

package/lib/eval-contracts.js ADDED Viewed

@@ -0,0 +1,256 @@
+'use strict';
+/**
+ * eval-contracts.js — HC-019g contract testing between pipeline agents.
+ *
+ * Validates that each agent's prompt correctly references:
+ *   1. The prior step's artifact (input dependency)
+ *   2. The metadata.yml gate check (validation contract)
+ *   3. The output artifact it produces (output contract)
+ *
+ * This is a deterministic structural check — zero LLM tokens.
+ */
+/**
+ * Pipeline agent contracts.
+ * Each entry defines what the agent must validate (input) and produce (output).
+ */
+const PIPELINE_CONTRACTS = [
+  {
+    agent: 'story-groomer',
+    step: 0,
+    inputArtifact: null,  // first in pipeline — no prior step
+    inputGate: null,
+    outputArtifact: 'step-0-grooming.md',
+    outputGate: 'step_0',
+    metadataField: 'step_0.gate_result',
+    extraChecks: [
+      { text: 'metadata.yml', check: 'creates_metadata', detail: 'creates metadata.yml' },
+      { text: 'Initialize Pipeline', check: 'pipeline_init', detail: 'initializes pipeline directory' },
+      { text: '.github/pipeline/', check: 'pipeline_dir', detail: 'creates .github/pipeline/ directory' },
+    ],
+  },
+  {
+    agent: 'implementation-planner',
+    step: 1,
+    inputArtifact: 'step-0-grooming.md',
+    inputGate: 'step_0.gate_result',
+    outputArtifact: 'step-1-plan.md',
+    outputGate: 'step_1',
+    metadataField: 'step_1.gate_result',
+  },
+  {
+    agent: 'unit-test-case-writer',
+    step: 2,
+    inputArtifact: 'step-1-plan.md',
+    inputGate: 'step_1.gate_result',
+    outputArtifact: 'step-2-tests.md',
+    outputGate: 'step_2',
+    metadataField: 'step_2.gate_result',
+  },
+  {
+    agent: 'e2e-qa-planner',
+    step: 3,
+    inputArtifact: 'step-2-tests.md',
+    inputGate: 'step_2.gate_result',
+    outputArtifact: 'step-3-e2e-plan.md',
+    outputGate: 'step_3',
+    metadataField: 'step_3.gate_result',
+  },
+  {
+    agent: 'code-builder',
+    step: 4,
+    inputArtifact: 'step-3-e2e-plan.md',
+    inputGate: 'step_3.gate_result',
+    outputArtifact: 'step-4-implementation.md',
+    outputGate: 'step_4',
+    metadataField: 'step_4.gate_result',
+  },
+  {
+    agent: 'code-reviewer',
+    step: 5,
+    inputArtifact: 'step-4-implementation.md',
+    inputGate: 'step_4.gate_result',
+    outputArtifact: 'step-5-review.md',
+    outputGate: 'step_5',
+    metadataField: 'step_5.gate_result',
+  },
+  {
+    agent: 'security-agent',
+    step: '5d',
+    inputArtifact: 'step-5-review.md',
+    inputGate: 'step_5.status',
+    outputArtifact: 'step-5d-security.md',
+    outputGate: null,
+    metadataField: null,
+  },
+  {
+    agent: 'performance-agent',
+    step: '5e',
+    inputArtifact: 'step-5-review.md',
+    inputGate: 'step_5.status',
+    outputArtifact: 'step-5e-performance.md',
+    outputGate: null,
+    metadataField: null,
+  },
+  {
+    agent: 'delivery-architect',
+    step: 'independent',
+    inputArtifact: null,
+    inputGate: null,
+    outputArtifact: 'EXECUTION_PLAN.yml',
+    outputGate: null,
+    metadataField: null,
+    extraChecks: [
+      { text: 'Checklist A', check: 'checklist_a', detail: 'has Checklist A (epic decomposition)' },
+      { text: 'Checklist B', check: 'checklist_b', detail: 'has Checklist B (architecture validation)' },
+      { text: 'test_strategy', check: 'test_strategy', detail: 'includes test_strategy in plan' },
+    ],
+  },
+];
+/**
+ * Validate agent prompt against its contract.
+ * @param {string} promptText — agent prompt content
+ * @param {object} contract — from PIPELINE_CONTRACTS
+ * @returns {{ agent, checks: Array<{ check, passed, detail }> }}
+ */
+function validateContract(promptText, contract) {
+  const checks = [];
+  // Check 1: Input artifact reference (skip for story-groomer — first in pipeline)
+  if (contract.inputArtifact) {
+    const found = promptText.includes(contract.inputArtifact);
+    checks.push({
+      check: 'input_artifact',
+      passed: found,
+      detail: found
+        ? `references prior step "${contract.inputArtifact}"`
+        : `MISSING reference to prior step "${contract.inputArtifact}"`,
+    });
+  }
+  // Check 2: Input gate validation
+  if (contract.inputGate) {
+    const found = promptText.includes(contract.inputGate);
+    checks.push({
+      check: 'input_gate',
+      passed: found,
+      detail: found
+        ? `validates "${contract.inputGate}" before proceeding`
+        : `MISSING validation of "${contract.inputGate}"`,
+    });
+  }
+  // Check 3: Output artifact reference
+  if (contract.outputArtifact) {
+    const found = promptText.includes(contract.outputArtifact);
+    checks.push({
+      check: 'output_artifact',
+      passed: found,
+      detail: found
+        ? `produces "${contract.outputArtifact}"`
+        : `MISSING output artifact "${contract.outputArtifact}"`,
+    });
+  }
+  // Check 4: metadata.yml update
+  if (contract.metadataField) {
+    const found = promptText.includes('metadata.yml');
+    checks.push({
+      check: 'metadata_update',
+      passed: found,
+      detail: found
+        ? `updates metadata.yml`
+        : `MISSING metadata.yml update`,
+    });
+  }
+  // Check 5: PIPELINE VALIDATION block
+  if (contract.inputArtifact) {
+    const found = promptText.includes('PIPELINE VALIDATION');
+    checks.push({
+      check: 'pipeline_validation_block',
+      passed: found,
+      detail: found
+        ? `has PIPELINE VALIDATION block`
+        : `MISSING PIPELINE VALIDATION block`,
+    });
+  }
+  // Check 6: Extra checks (agent-specific)
+  if (contract.extraChecks) {
+    for (const ec of contract.extraChecks) {
+      const found = promptText.includes(ec.text);
+      checks.push({
+        check: ec.check,
+        passed: found,
+        detail: found ? ec.detail : `MISSING: ${ec.detail}`,
+      });
+    }
+  }
+  return {
+    agent: contract.agent,
+    step: contract.step,
+    checks,
+    passed: checks.every(c => c.passed),
+    failures: checks.filter(c => !c.passed),
+  };
+}
+/**
+ * Run all contract validations.
+ * @param {object} agentPrompts — { agentName: promptText }
+ * @returns {{ total, passed, failed, results }}
+ */
+function validateAllContracts(agentPrompts) {
+  const results = [];
+  for (const contract of PIPELINE_CONTRACTS) {
+    const promptText = agentPrompts[contract.agent];
+    if (!promptText) {
+      results.push({
+        agent: contract.agent,
+        step: contract.step,
+        checks: [],
+        passed: false,
+        failures: [{ check: 'agent_exists', passed: false, detail: `agent "${contract.agent}" not found` }],
+      });
+      continue;
+    }
+    results.push(validateContract(promptText, contract));
+  }
+  return {
+    total: results.length,
+    passed: results.filter(r => r.passed).length,
+    failed: results.filter(r => !r.passed).length,
+    results,
+  };
+}
+/**
+ * Format contract results.
+ */
+function formatContractResults(results, format = 'pretty') {
+  if (format === 'json') return JSON.stringify(results, null, 2);
+  const lines = ['', 'Hone AI — Agent Contract Validation', '====================================', ''];
+  for (const r of results.results) {
+    const icon = r.passed ? 'PASS' : 'FAIL';
+    lines.push(`[${icon}] Step ${r.step}: ${r.agent} (${r.checks.length} checks)`);
+    for (const f of r.failures) {
+      lines.push(`       x ${f.check}: ${f.detail}`);
+    }
+  }
+  lines.push('');
+  lines.push('----------------------------------');
+  lines.push(`Summary: ${results.total} agents | ${results.passed} passed | ${results.failed} failed`);
+  lines.push('');
+  return lines.join('\n');
+}
+module.exports = { PIPELINE_CONTRACTS, validateContract, validateAllContracts, formatContractResults };

package/lib/eval-regression.js ADDED Viewed

@@ -0,0 +1,202 @@
+'use strict';
+/**
+ * eval-regression.js — HC-019h prompt regression testing.
+ *
+ * Saves eval results as a baseline snapshot, then detects drift
+ * when a prompt change causes a previously-passing eval to fail.
+ *
+ * Baseline file: evals/.baseline.json
+ * Pure helper with injected I/O.
+ */
+const BASELINE_FILENAME = '.baseline.json';
+/**
+ * Save current eval results as baseline.
+ * @param {object} results — from runAllScenarios
+ * @param {object} contractResults — from validateAllContracts
+ * @param {(path, content) => void} writeFile
+ * @param {string} evalDir
+ * @returns {{ saved: number }}
+ */
+function saveBaseline(results, contractResults, writeFile, evalDir) {
+  const baseline = {
+    created_at: new Date().toISOString(),
+    eval_scenarios: {},
+    contracts: {},
+  };
+  for (const s of results.scenarios) {
+    baseline.eval_scenarios[s.id] = {
+      agent: s.agent,
+      result: s.result,
+      checks: s.checks,
+      checks_passed: s.checks_passed,
+    };
+  }
+  for (const r of contractResults.results) {
+    baseline.contracts[r.agent] = {
+      step: r.step,
+      passed: r.passed,
+      checks: r.checks.length,
+    };
+  }
+  writeFile(`${evalDir}/${BASELINE_FILENAME}`, JSON.stringify(baseline, null, 2));
+  const totalEntries = Object.keys(baseline.eval_scenarios).length + Object.keys(baseline.contracts).length;
+  return { saved: totalEntries };
+}
+/**
+ * Load baseline from file.
+ * @param {(path) => string} readFile
+ * @param {string} evalDir
+ * @returns {object|null}
+ */
+function loadBaseline(readFile, evalDir) {
+  try {
+    const content = readFile(`${evalDir}/${BASELINE_FILENAME}`);
+    return JSON.parse(content);
+  } catch {
+    return null;
+  }
+}
+/**
+ * Compare current results against baseline. Detect regressions.
+ * @param {object} currentResults — from runAllScenarios
+ * @param {object} currentContracts — from validateAllContracts
+ * @param {object} baseline — from loadBaseline
+ * @returns {{ regressions: Array, improvements: Array, new_scenarios: Array, summary }}
+ */
+function detectRegressions(currentResults, currentContracts, baseline) {
+  const regressions = [];
+  const improvements = [];
+  const newScenarios = [];
+  // Check eval scenarios
+  for (const s of currentResults.scenarios) {
+    const prev = baseline.eval_scenarios[s.id];
+    if (!prev) {
+      newScenarios.push({ type: 'scenario', id: s.id, agent: s.agent, result: s.result });
+      continue;
+    }
+    if (prev.result === 'pass' && s.result !== 'pass') {
+      regressions.push({
+        type: 'scenario',
+        id: s.id,
+        agent: s.agent,
+        was: 'pass',
+        now: s.result,
+        failures: s.failures.map(f => f.detail),
+      });
+    }
+    if (prev.result !== 'pass' && s.result === 'pass') {
+      improvements.push({ type: 'scenario', id: s.id, agent: s.agent, was: prev.result, now: 'pass' });
+    }
+  }
+  // Check contracts
+  for (const r of currentContracts.results) {
+    const prev = baseline.contracts[r.agent];
+    if (!prev) {
+      newScenarios.push({ type: 'contract', agent: r.agent, passed: r.passed });
+      continue;
+    }
+    if (prev.passed && !r.passed) {
+      regressions.push({
+        type: 'contract',
+        agent: r.agent,
+        was: 'pass',
+        now: 'fail',
+        failures: r.failures.map(f => f.detail),
+      });
+    }
+    if (!prev.passed && r.passed) {
+      improvements.push({ type: 'contract', agent: r.agent, was: 'fail', now: 'pass' });
+    }
+  }
+  // Check for removed scenarios (in baseline but not in current)
+  const removedScenarios = [];
+  const currentIds = new Set(currentResults.scenarios.map(s => s.id));
+  for (const [id, prev] of Object.entries(baseline.eval_scenarios)) {
+    if (!currentIds.has(id)) {
+      removedScenarios.push({ type: 'scenario', id, agent: prev.agent, was: prev.result });
+    }
+  }
+  const currentAgents = new Set(currentContracts.results.map(r => r.agent));
+  for (const [agent, prev] of Object.entries(baseline.contracts)) {
+    if (!currentAgents.has(agent)) {
+      removedScenarios.push({ type: 'contract', agent, was: prev.passed ? 'pass' : 'fail' });
+    }
+  }
+  return {
+    regressions,
+    improvements,
+    new_scenarios: newScenarios,
+    removed: removedScenarios,
+    summary: {
+      regressions: regressions.length,
+      improvements: improvements.length,
+      new: newScenarios.length,
+      removed: removedScenarios.length,
+      baseline_date: baseline.created_at,
+    },
+  };
+}
+/**
+ * Format regression results.
+ */
+function formatRegressionResults(results, format = 'pretty') {
+  if (format === 'json') return JSON.stringify(results, null, 2);
+  const lines = ['', 'Hone AI — Prompt Regression Check', '==================================', ''];
+  lines.push(`Baseline: ${results.summary.baseline_date}`);
+  lines.push('');
+  if (results.regressions.length > 0) {
+    lines.push('REGRESSIONS (previously passing, now failing):');
+    for (const r of results.regressions) {
+      lines.push(`  [!!] ${r.type === 'scenario' ? r.id : r.agent}: ${r.was} → ${r.now}`);
+      for (const f of r.failures) {
+        lines.push(`       ${f}`);
+      }
+    }
+    lines.push('');
+  }
+  if (results.improvements.length > 0) {
+    lines.push('Improvements (previously failing, now passing):');
+    for (const i of results.improvements) {
+      lines.push(`  [ok] ${i.type === 'scenario' ? i.id : i.agent}: ${i.was} → ${i.now}`);
+    }
+    lines.push('');
+  }
+  if (results.new_scenarios.length > 0) {
+    lines.push(`New (${results.new_scenarios.length} scenarios/contracts not in baseline — run --snapshot to update)`);
+    lines.push('');
+  }
+  if (results.removed && results.removed.length > 0) {
+    lines.push('Removed (in baseline but no longer exist — coverage gap):');
+    for (const r of results.removed) {
+      lines.push(`  [??] ${r.type === 'scenario' ? r.id : r.agent}: was ${r.was}, now deleted`);
+    }
+    lines.push('');
+  }
+  lines.push('----------------------------------');
+  lines.push(`Summary: ${results.summary.regressions} regressions | ${results.summary.improvements} improvements | ${results.summary.new} new | ${results.summary.removed || 0} removed`);
+  lines.push(`Exit code: ${results.summary.regressions > 0 ? 1 : 0}`);
+  lines.push('');
+  return lines.join('\n');
+}
+module.exports = { saveBaseline, loadBaseline, detectRegressions, formatRegressionResults, BASELINE_FILENAME };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hone-ai/cli",
-  "version": "1.5.0",
+  "version": "1.6.0",
   "description": "Hone AI — Enterprise SDLC Pipeline CLI",
   "main": "hone-cli.js",
   "bin": {
@@ -14,7 +14,8 @@
   ],
   "scripts": {
     "test": "echo \"No tests yet\" && exit 0",
-    "link": "npm link"
+    "link": "npm link",
+    "postinstall": "echo '\\n  Hone AI CLI installed successfully.\\n  Next: run `hone init --token <YOUR_TOKEN>` to configure.\\n  Docs: https://github.com/subbareddyvani/hone-server\\n'"
   },
   "dependencies": {
     "ajv": "^8.20.0",