npm - @hone-ai/cli - Versions diffs - 1.7.0 → 1.7.1 - Mend

@hone-ai/cli 1.7.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/hone-cli.js +4 -3
package/lib/auto-detect.js +47 -12
package/lib/eval-ab-testing.js +113 -0
package/package.json +3 -2
package/schema/metadata.schema.json +134 -0

package/hone-cli.js CHANGED Viewed

@@ -1274,7 +1274,8 @@ program
     try {
       const health = await axios.get(`${config.apiUrl}/health`);
-      console.log(`✓ API health: ${health.data.status} (v${health.data.version})`);
+      const ver = health.data.version ? ` (v${health.data.version})` : '';
+      console.log(`✓ API health: ${health.data.status}${ver}`);
     } catch (e) {
       console.error(`✗ API health: ${e.message}`);
     }
@@ -3927,12 +3928,12 @@ program
   .description('Validate .github/pipeline/<STORY-ID>/metadata.yml against the framework JSON schema. Implements SC-010 §10 (metadata.yml as wire protocol).')
   .option('--all', 'validate every metadata.yml in .github/pipeline/')
   .option('--repo-root <path>', 'repo root (default: process.cwd())')
-  .option('--schema <path>', 'override schema path (default: enterprise-assets/.github/schema/metadata.schema.json)')
+  .option('--schema <path>', 'override schema path (default: bundled metadata.schema.json)')
   .option('--json', 'emit findings as JSON')
   .action((storyId, opts) => {
     const { validateMetadata, validateAllMetadata } = require('./lib/validate-metadata');
     const repoRoot = opts.repoRoot || process.cwd();
-    const schemaPath = opts.schema || require('node:path').join(repoRoot, 'enterprise-assets/.github/schema/metadata.schema.json');
+    const schemaPath = opts.schema || require('node:path').join(__dirname, 'schema', 'metadata.schema.json');
     if (opts.all) {
       const result = validateAllMetadata({ repoRoot, schemaPath });

package/lib/auto-detect.js CHANGED Viewed

@@ -174,23 +174,58 @@ function detectE2EConvention(signals) {
  * we recommend. Returns either status:'ok' or status:'drift' with an
  * actionable suggested fix.
  */
+/**
+ * Check if configuredPattern is a superset of recommendedPattern.
+ * A broader configured pattern is intentional (not drift).
+ * Only flag when configured is NARROWER than recommended.
+ */
+function isPatternBroader(configured, recommended) {
+  try {
+    const recRe = new RegExp(recommended);
+    const cfgRe = new RegExp(configured);
+    // Test a set of sample strings that the recommended pattern matches
+    // If configured matches all of them, it's at least as broad
+    const samples = [];
+    // Generate simple test strings from recommended pattern components
+    const recStr = recommended.replace(/\\/g, '');
+    if (recStr.includes('|')) {
+      // Pattern has alternations — extract them
+      const alts = recommended.split('|').map(a => a.replace(/[()^$]/g, ''));
+      for (const alt of alts) {
+        // Generate a plausible match for each alternative
+        const sample = alt.replace(/\[0-9\]\+/g, '123').replace(/\[A-Z\]\+?/g, 'A').replace(/\[A-Za-z0-9\]\+/g, 'abc');
+        samples.push(sample);
+      }
+    }
+    // If configured matches everything recommended matches, it's broader or equal
+    return samples.length > 0 && samples.every(s => cfgRe.test(s));
+  } catch {
+    return false;
+  }
+}
 function checkPatternDrift({ configured, recommended }) {
   const findings = [];
   if (configured?.story_id_pattern && configured.story_id_pattern !== recommended.story.pattern) {
-    findings.push({
-      key: 'story_id_pattern',
-      configured: configured.story_id_pattern,
-      recommended: recommended.story.pattern,
-      reason: `repo looks like ${recommended.story.shape} (${recommended.story.confidence} confidence)`,
-    });
+    // Only flag if configured is NARROWER — broader is intentional
+    if (!isPatternBroader(configured.story_id_pattern, recommended.story.pattern)) {
+      findings.push({
+        key: 'story_id_pattern',
+        configured: configured.story_id_pattern,
+        recommended: recommended.story.pattern,
+        reason: `repo looks like ${recommended.story.shape} (${recommended.story.confidence} confidence)`,
+      });
+    }
   }
   if (configured?.e2e_spec_pattern && configured.e2e_spec_pattern !== recommended.e2e.pattern) {
-    findings.push({
-      key: 'e2e_spec_pattern',
-      configured: configured.e2e_spec_pattern,
-      recommended: recommended.e2e.pattern,
-      reason: `detected ${recommended.e2e.framework} under ${recommended.e2e.dir}/e2e/ (${recommended.e2e.confidence} confidence)`,
-    });
+    if (!isPatternBroader(configured.e2e_spec_pattern, recommended.e2e.pattern)) {
+      findings.push({
+        key: 'e2e_spec_pattern',
+        configured: configured.e2e_spec_pattern,
+        recommended: recommended.e2e.pattern,
+        reason: `detected ${recommended.e2e.framework} under ${recommended.e2e.dir}/e2e/ (${recommended.e2e.confidence} confidence)`,
+      });
+    }
   }
   if (findings.length === 0) {

package/lib/eval-ab-testing.js ADDED Viewed

@@ -0,0 +1,113 @@
+'use strict';
+/**
+ * eval-ab-testing.js — HC-019k Agent A/B testing.
+ *
+ * Compare two prompt variants using the same eval scenarios.
+ * Reports which variant performs better across all checks.
+ *
+ * Usage: provide two versions of an agent prompt (A and B),
+ * run the same deterministic evals against both, compare results.
+ *
+ * Pure helper — no I/O, no LLM calls.
+ */
+const { runScenario } = require('./eval-runner');
+const { wrapDeterministic } = require('./eval-three-valued');
+/**
+ * Run A/B comparison for a set of scenarios against two prompt variants.
+ *
+ * @param {Array<object>} scenarios — eval scenarios (filtered for one agent)
+ * @param {string} promptA — current prompt text (control)
+ * @param {string} promptB — new prompt text (variant)
+ * @param {object} [opts]
+ * @param {string} [opts.labelA='A (current)']
+ * @param {string} [opts.labelB='B (variant)']
+ * @returns {{ agent, labelA, labelB, scenarios: Array, summary }}
+ */
+function comparePrompts(scenarios, promptA, promptB, opts = {}) {
+  const { labelA = 'A (current)', labelB = 'B (variant)' } = opts;
+  const results = [];
+  for (const scenario of scenarios) {
+    const resultA = wrapDeterministic(runScenario(scenario, promptA));
+    const resultB = wrapDeterministic(runScenario(scenario, promptB));
+    const winner =
+      resultA.verdict === 'pass' && resultB.verdict !== 'pass' ? 'A' :
+      resultB.verdict === 'pass' && resultA.verdict !== 'pass' ? 'B' :
+      resultA.checks_passed > resultB.checks_passed ? 'A' :
+      resultB.checks_passed > resultA.checks_passed ? 'B' :
+      'tie';
+    results.push({
+      id: scenario.id,
+      name: scenario.name || scenario.id,
+      a: { verdict: resultA.verdict, checks_passed: resultA.checks_passed, checks: resultA.checks },
+      b: { verdict: resultB.verdict, checks_passed: resultB.checks_passed, checks: resultB.checks },
+      winner,
+    });
+  }
+  const aWins = results.filter(r => r.winner === 'A').length;
+  const bWins = results.filter(r => r.winner === 'B').length;
+  const ties = results.filter(r => r.winner === 'tie').length;
+  const aTotal = results.reduce((s, r) => s + r.a.checks_passed, 0);
+  const bTotal = results.reduce((s, r) => s + r.b.checks_passed, 0);
+  const maxChecks = results.reduce((s, r) => s + r.a.checks, 0);
+  return {
+    agent: scenarios[0]?.evalAgent || scenarios[0]?.agent || 'unknown',
+    labelA,
+    labelB,
+    scenarios: results,
+    summary: {
+      total: results.length,
+      a_wins: aWins,
+      b_wins: bWins,
+      ties,
+      a_score: maxChecks > 0 ? Math.round((aTotal / maxChecks) * 100) : 0,
+      b_score: maxChecks > 0 ? Math.round((bTotal / maxChecks) * 100) : 0,
+      recommendation: aWins > bWins ? 'keep_a' : bWins > aWins ? 'use_b' : 'no_difference',
+    },
+  };
+}
+/**
+ * Format A/B comparison results.
+ */
+function formatComparison(result, format = 'pretty') {
+  if (format === 'json') return JSON.stringify(result, null, 2);
+  const lines = ['', 'Hone AI — A/B Prompt Comparison', '================================', ''];
+  lines.push(`Agent:     ${result.agent}`);
+  lines.push(`Variant A: ${result.labelA}`);
+  lines.push(`Variant B: ${result.labelB}`);
+  lines.push('');
+  lines.push('  Scenario                          A        B     Winner');
+  lines.push('  --------                          -        -     ------');
+  for (const s of result.scenarios) {
+    const name = (s.name || s.id).padEnd(32).slice(0, 32);
+    const a = `${s.a.checks_passed}/${s.a.checks}`.padStart(6);
+    const b = `${s.b.checks_passed}/${s.b.checks}`.padStart(6);
+    const winner = s.winner === 'tie' ? ' tie' : s.winner === 'A' ? ' <-A' : ' B->';
+    lines.push(`  ${name} ${a}   ${b}   ${winner}`);
+  }
+  lines.push('');
+  lines.push('----------------------------------');
+  lines.push(`Score: A=${result.summary.a_score}% | B=${result.summary.b_score}%`);
+  lines.push(`Wins:  A=${result.summary.a_wins} | B=${result.summary.b_wins} | Ties=${result.summary.ties}`);
+  const rec = result.summary.recommendation;
+  const msg = rec === 'keep_a' ? 'Keep current prompt (A wins)' :
+    rec === 'use_b' ? 'Switch to variant B (B wins)' :
+    'No significant difference';
+  lines.push(`Recommendation: ${msg}`);
+  lines.push('');
+  return lines.join('\n');
+}
+module.exports = { comparePrompts, formatComparison };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hone-ai/cli",
-  "version": "1.7.0",
+  "version": "1.7.1",
   "description": "Hone AI — Enterprise SDLC Pipeline CLI",
   "main": "hone-cli.js",
   "bin": {
@@ -10,7 +10,8 @@
     "bin/",
     "hone-cli.js",
     "lib/",
-    "!lib/*.test.js"
+    "!lib/*.test.js",
+    "schema/"
   ],
   "scripts": {
     "test": "echo \"No tests yet\" && exit 0",

package/schema/metadata.schema.json ADDED Viewed

@@ -0,0 +1,134 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://hone.ai/schema/metadata.schema.json",
+  "title": "Hone SDLC Pipeline Story Metadata",
+  "description": "Schema for .github/pipeline/<STORY-ID>/metadata.yml. Implements SC-010 §10 (metadata.yml as wire protocol) — fields read by 3+ agents must validate against this schema before agents run, otherwise typos silently degrade to no-op behavior.",
+  "type": "object",
+  "additionalProperties": true,
+  "required": ["story_id", "title", "branch", "base", "steps"],
+  "properties": {
+    "story_id": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Story identifier. Hone-server convention: SC-NNN, H-NNN, RP-NNN, AU-NNN, SR-NNN, HC-NNN; OptionsFlow convention: E-NNN-X. Adopters may use any pattern."
+    },
+    "title": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Human-readable story title."
+    },
+    "issue": {
+      "type": ["integer", "string", "null"],
+      "description": "GitHub issue reference. Modern convention: integer issue number or null. Legacy convention: full GitHub issue URL string. Both accepted."
+    },
+    "branch": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Git branch name. Convention: feat/<STORY-ID>-<slug> | fix/<STORY-ID>-<slug> | chore/..."
+    },
+    "base": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Base branch the story merges to. Convention: develop. Stacked story chains (story B branched off feature/A) may use feat/...-style base names. Adopter override schemas may tighten the enum."
+    },
+    "captured_at": {
+      "type": ["string", "null"],
+      "description": "ISO date when the story was captured (YYYY-MM-DD). May be null."
+    },
+    "type": {
+      "type": "string",
+      "enum": ["feature", "enhancement", "bug", "bug_fix", "chore", "refactor", "docs", "meta-epic", "fix"],
+      "description": "Story type per SC-001 classifier vocabulary. 'bug_fix' is a legacy alias for 'bug'/'fix'; both accepted."
+    },
+    "priority": {
+      "type": "string",
+      "description": "Adopter priority. Optional. No canonical enum — different stories use P0/P1/P2/P3, high/medium/low, M, low-medium, etc. Adopters with strict policies override via --schema."
+    },
+    "phase": {
+      "type": ["string", "number"],
+      "description": "Story lifecycle phase. Modern convention: string ('backlog', 'in-progress', 'blocked', 'done', 'completed'). Legacy convention: numeric phase identifier (e.g. 6.2, 2.1). Both accepted; adopter override schema may tighten."
+    },
+    "story_type": {
+      "type": "string",
+      "description": "Free-form story type label (legacy field; prefer 'type' for new stories)."
+    },
+    "fast_track": {
+      "type": "boolean",
+      "description": "True if story is on the fast-track pipeline (skip steps 0-3 gates). Per SC-001 classifier output."
+    },
+    "hot_fix": {
+      "type": "boolean",
+      "description": "True if story is on the hot-fix pipeline. Per SC-001 classifier output."
+    },
+    "fix_for": {
+      "type": ["string", "null"],
+      "description": "Story ID this fixes (for regression-test policy per H-030 + SC-009 §Guardrail-before-fix). Null when this story is not a fix."
+    },
+    "parent_story": {
+      "type": ["string", "null"],
+      "description": "Parent story ID for sub-stories or follow-ups. Null at top level."
+    },
+    "sibling_pipelines": {
+      "type": ["array", "null"],
+      "items": { "type": "string" },
+      "description": "Story IDs sharing this pipeline. Used for multi-story epics."
+    },
+    "author": {
+      "type": ["string", "null"]
+    },
+    "created": {
+      "type": ["string", "null"]
+    },
+    "base_sha": {
+      "type": ["string", "number", "null"],
+      "description": "Base commit SHA. Permissive type — hex-like SHA strings (92192e7) get parsed as numbers by js-yaml. Adopters who want strict SHA strings can override schema."
+    },
+    "steps": {
+      "type": "object",
+      "additionalProperties": false,
+      "description": "Pipeline step status table. Wire-protocol object — no extra step IDs allowed.",
+      "properties": {
+        "step_0":  { "$ref": "#/$defs/step" },
+        "step_1":  { "$ref": "#/$defs/step" },
+        "step_2":  { "$ref": "#/$defs/step" },
+        "step_3a": { "$ref": "#/$defs/step" },
+        "step_3b": { "$ref": "#/$defs/step" },
+        "step_4":  { "$ref": "#/$defs/step" },
+        "step_5":  { "$ref": "#/$defs/step" },
+        "step_5b": { "$ref": "#/$defs/step" },
+        "step_5c": { "$ref": "#/$defs/step" }
+      }
+    },
+    "cross_validation": {
+      "type": "object",
+      "additionalProperties": true,
+      "description": "Cross-validation findings per H-035 + SC-001..SC-005."
+    },
+    "self_applied_classifier": {
+      "type": "object",
+      "additionalProperties": true,
+      "description": "SC-001 classifier output recorded for the story. Tracks the classification decision so future readers can audit the routing."
+    }
+  },
+  "$defs": {
+    "step": {
+      "type": "object",
+      "additionalProperties": true,
+      "description": "Per-step status. The wire-protocol fields (status, agent) are validated; adopter and historical extension fields (acceptance_criteria_count, acs_met, automation_rationale, bug_caught_in_step, etc.) are passed through. Set additionalProperties: false in your adopter override schema for stricter enforcement.",
+      "properties": {
+        "status": {
+          "type": "string",
+          "description": "Step status. Hone-server uses 'completed'/'in_progress'/'skipped'; older files may use 'complete'/'in-progress' — both accepted. Adopter override schemas may tighten the enum."
+        },
+        "agent": {
+          "type": "string",
+          "description": "Agent ID (story-groomer, implementation-planner, unit-test-writer, e2e-qa-planner, e2e-test-spec-writer, code-builder, code-reviewer, delivery-architect, etc.)."
+        },
+        "artifact": {
+          "type": ["string", "null"],
+          "description": "Path or filename of the step's artifact (e.g., step-0-grooming.md). May be null if the step is pending."
+        }
+      }
+    }
+  }
+}