npm - @traits-dev/cli - Versions diffs - 0.2.0 → 0.4.0 - Mend

@traits-dev/cli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/traits.js +236 -7
package/package.json +2 -2

package/dist/traits.js CHANGED Viewed

@@ -22,6 +22,8 @@ function printCompileUsage(out = process.stderr) {
       "  --model <model>           Model target (required)",
       "  --json                    Output structured JSON",
       "  --strict                  Treat warnings as compile-blocking",
+      "  --budget                  Print estimated token count (chars/4)",
+      "  --budget-limit <tokens>   Warn to stderr if estimate exceeds limit",
       "  --explain                 Include compilation trace output",
       "  --context key=value       Activate context adaptation (repeatable)",
       "  --knowledge-base-dir      Directory containing compiler pattern files",
@@ -52,6 +54,8 @@ function parseCompileArgs(args) {
     model: null,
     strict: false,
     json: false,
+    budget: false,
+    budgetLimit: null,
     explain: false,
     verbose: false,
     noColor: false,
@@ -70,6 +74,10 @@ function parseCompileArgs(args) {
       result.json = true;
       continue;
     }
+    if (arg === "--budget") {
+      result.budget = true;
+      continue;
+    }
     if (arg === "--explain") {
       result.explain = true;
       continue;
@@ -82,7 +90,7 @@ function parseCompileArgs(args) {
       result.noColor = true;
       continue;
     }
-    if (arg === "--model" || arg === "--bundled-profiles-dir" || arg === "--context" || arg === "--knowledge-base-dir") {
+    if (arg === "--model" || arg === "--bundled-profiles-dir" || arg === "--context" || arg === "--knowledge-base-dir" || arg === "--budget-limit") {
       const value = args[index + 1];
       if (!value) return { error: `Missing value for "${arg}"` };
       if (arg === "--model") {
@@ -91,6 +99,12 @@ function parseCompileArgs(args) {
         result.bundledProfilesDir = value;
       } else if (arg === "--knowledge-base-dir") {
         result.knowledgeBaseDir = value;
+      } else if (arg === "--budget-limit") {
+        const parsedBudgetLimit = Number(value);
+        if (!Number.isFinite(parsedBudgetLimit) || parsedBudgetLimit <= 0) {
+          return { error: `Invalid value for "--budget-limit": "${value}"` };
+        }
+        result.budgetLimit = Math.round(parsedBudgetLimit);
       } else {
         const parsedContext = parseContextArg(value);
         if ("error" in parsedContext) return { error: parsedContext.error };
@@ -111,8 +125,14 @@ function parseCompileArgs(args) {
   if (!result.model) {
     return { error: 'Missing required option "--model"' };
   }
+  if (result.budgetLimit != null) {
+    result.budget = true;
+  }
   return { value: result };
 }
+function estimateBudgetTokens(text) {
+  return Math.ceil(String(text ?? "").length / 4);
+}
 function runCompile(args, io = process) {
   const parsed = parseCompileArgs(args);
   if ("error" in parsed) {
@@ -156,10 +176,32 @@ function runCompile(args, io = process) {
     if (options.json) {
       io.stdout.write(`${JSON.stringify(compiled, null, 2)}
 `);
+      if (options.budget) {
+        const budgetEstimate = estimateBudgetTokens(compiled.text);
+        io.stderr.write(`Estimated token count: ${budgetEstimate}
+`);
+        if (options.budgetLimit != null && budgetEstimate > options.budgetLimit) {
+          io.stderr.write(
+            `Warning: Estimated token count ${budgetEstimate} exceeds budget limit ${options.budgetLimit}
+`
+          );
+        }
+      }
       return 0;
     }
     io.stdout.write(`${compiled.text}
 `);
+    if (options.budget) {
+      const budgetEstimate = estimateBudgetTokens(compiled.text);
+      io.stderr.write(`Estimated token count: ${budgetEstimate}
+`);
+      if (options.budgetLimit != null && budgetEstimate > options.budgetLimit) {
+        io.stderr.write(
+          `Warning: Estimated token count ${budgetEstimate} exceeds budget limit ${options.budgetLimit}
+`
+        );
+      }
+    }
     if (options.explain && compiled.trace) {
       io.stdout.write(`
 [TRACE]
@@ -211,6 +253,7 @@ import {
 import {
   detectEvalTierAvailability,
   formatValidationResult as formatValidationResult2,
+  loadBuiltInEvalSuite,
   resolveTierExecution,
   runOfflineBaselineScaffold,
   toValidationResultObject as toValidationResultObject2
@@ -224,6 +267,7 @@ function printEvalUsage(out = process.stderr) {
       "Options:",
       "  --model <model>           Model target (required)",
       "  --tier <1|2|3>            Highest tier to run (default: highest available)",
+      "  --suite <name>            Built-in baseline suite: support|healthcare|developer",
       "  --provider <name>         Judge provider for Tier 3: auto|openai|anthropic",
       "  --embedding-model <name>  Embedding model for Tier 2 (OpenAI)",
       "  --judge-model <name>      Judge model for Tier 3 provider",
@@ -236,6 +280,11 @@ function printEvalUsage(out = process.stderr) {
       "  --samples <path>          JSON file with samples: [{ id, response }]",
       "  --scenarios <path>        Alias for --samples in this scaffold",
       "  --json                    Output structured JSON",
+      "  --format <text|json|junit> Output format (default: text)",
+      "  --junit-threshold <num>   Global JUnit pass threshold in [0,1] (default: 0.7)",
+      "  --junit-threshold-tier1 <num> Tier 1 JUnit threshold override",
+      "  --junit-threshold-tier2 <num> Tier 2 JUnit threshold override",
+      "  --junit-threshold-tier3 <num> Tier 3 JUnit threshold override",
       "  --strict                  Treat validation warnings as errors",
       "  --verbose                 Include command metadata output",
       "  --no-color                Disable colorized output",
@@ -251,6 +300,7 @@ function parseEvalArgs(args) {
     profilePath: null,
     model: null,
     tier: null,
+    suite: null,
     provider: "auto",
     embeddingModel: null,
     judgeModel: null,
@@ -260,6 +310,11 @@ function parseEvalArgs(args) {
     maxRetries: null,
     retryBaseMs: null,
     json: false,
+    format: "text",
+    junitThreshold: null,
+    junitThresholdTier1: null,
+    junitThresholdTier2: null,
+    junitThresholdTier3: null,
     strict: false,
     verbose: false,
     noColor: false,
@@ -274,6 +329,7 @@ function parseEvalArgs(args) {
     const arg = args[index];
     if (arg === "--json") {
       result.json = true;
+      result.format = "json";
       continue;
     }
     if (arg === "--strict") {
@@ -300,14 +356,18 @@ function parseEvalArgs(args) {
       result.constraintImpact = true;
       continue;
     }
-    if (arg === "--model" || arg === "--tier" || arg === "--provider" || arg === "--embedding-model" || arg === "--judge-model" || arg === "--openai-base-url" || arg === "--anthropic-base-url" || arg === "--timeout-ms" || arg === "--max-retries" || arg === "--retry-base-ms" || arg === "--response" || arg === "--samples" || arg === "--scenarios") {
+    if (arg === "--model" || arg === "--tier" || arg === "--suite" || arg === "--provider" || arg === "--format" || arg === "--embedding-model" || arg === "--judge-model" || arg === "--openai-base-url" || arg === "--anthropic-base-url" || arg === "--timeout-ms" || arg === "--max-retries" || arg === "--retry-base-ms" || arg === "--junit-threshold" || arg === "--junit-threshold-tier1" || arg === "--junit-threshold-tier2" || arg === "--junit-threshold-tier3" || arg === "--response" || arg === "--samples" || arg === "--scenarios") {
       const value = args[index + 1];
       if (!value) return { error: `Missing value for "${arg}"` };
       if (arg === "--model") result.model = value;
       if (arg === "--tier") result.tier = Number(value);
+      if (arg === "--suite") result.suite = String(value).toLowerCase();
       if (arg === "--provider") {
         result.provider = String(value).toLowerCase();
       }
+      if (arg === "--format") {
+        result.format = String(value).toLowerCase();
+      }
       if (arg === "--embedding-model") result.embeddingModel = value;
       if (arg === "--judge-model") result.judgeModel = value;
       if (arg === "--openai-base-url") result.openaiBaseUrl = value;
@@ -315,6 +375,10 @@ function parseEvalArgs(args) {
       if (arg === "--timeout-ms") result.timeoutMs = Number(value);
       if (arg === "--max-retries") result.maxRetries = Number(value);
       if (arg === "--retry-base-ms") result.retryBaseMs = Number(value);
+      if (arg === "--junit-threshold") result.junitThreshold = Number(value);
+      if (arg === "--junit-threshold-tier1") result.junitThresholdTier1 = Number(value);
+      if (arg === "--junit-threshold-tier2") result.junitThresholdTier2 = Number(value);
+      if (arg === "--junit-threshold-tier3") result.junitThresholdTier3 = Number(value);
       if (arg === "--response") result.responses.push(value);
       if (arg === "--samples" || arg === "--scenarios") result.samplesPath = value;
       index += 1;
@@ -338,6 +402,20 @@ function parseEvalArgs(args) {
   if (!["auto", "openai", "anthropic"].includes(result.provider)) {
     return { error: 'Invalid "--provider" value. Expected auto, openai, or anthropic.' };
   }
+  if (result.suite != null && !["support", "healthcare", "developer"].includes(
+    result.suite
+  )) {
+    return { error: 'Invalid "--suite" value. Expected support, healthcare, or developer.' };
+  }
+  if (result.suite != null && result.samplesPath != null) {
+    return { error: 'Use either "--suite" or "--samples/--scenarios", not both.' };
+  }
+  if (result.suite != null && result.responses.length > 0) {
+    return { error: 'Use either "--suite" or "--response", not both.' };
+  }
+  if (!["text", "json", "junit"].includes(result.format)) {
+    return { error: 'Invalid "--format" value. Expected text, json, or junit.' };
+  }
   if (result.timeoutMs != null && (!Number.isInteger(result.timeoutMs) || result.timeoutMs < 0)) {
     return { error: 'Invalid "--timeout-ms" value. Expected a non-negative integer.' };
   }
@@ -347,9 +425,33 @@ function parseEvalArgs(args) {
   if (result.retryBaseMs != null && (!Number.isInteger(result.retryBaseMs) || result.retryBaseMs < 0)) {
     return { error: 'Invalid "--retry-base-ms" value. Expected a non-negative integer.' };
   }
+  for (const [flag, value] of [
+    ["--junit-threshold", result.junitThreshold],
+    ["--junit-threshold-tier1", result.junitThresholdTier1],
+    ["--junit-threshold-tier2", result.junitThresholdTier2],
+    ["--junit-threshold-tier3", result.junitThresholdTier3]
+  ]) {
+    if (value == null) continue;
+    if (!Number.isFinite(value) || value < 0 || value > 1) {
+      return { error: `Invalid "${flag}" value. Expected a number in [0, 1].` };
+    }
+  }
   return { value: result };
 }
 function loadSamples(options, cwd) {
+  if (options.suite) {
+    const suite = loadBuiltInEvalSuite(options.suite);
+    if (!suite) {
+      throw new Error(
+        `Unknown suite "${options.suite}". Expected support, healthcare, or developer.`
+      );
+    }
+    return suite.scenarios.map((scenario) => ({
+      id: scenario.id,
+      prompt: scenario.messages.map((message) => `${message.role}: ${message.content}`).join("\n"),
+      response: scenario.expected_behavior ?? ""
+    }));
+  }
   if (options.samplesPath) {
     const sampleFile = path2.resolve(cwd, options.samplesPath);
     const parsed = JSON.parse(fs.readFileSync(sampleFile, "utf8"));
@@ -377,10 +479,118 @@ function loadSamples(options, cwd) {
   }));
 }
 function writeProgress(io, options, message) {
-  if (options.json) return;
+  if (options.format !== "text") return;
   io.stderr.write(`${message}
 `);
 }
+function escapeXml(value) {
+  return String(value ?? "").replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
+}
+function resolveJUnitThresholds(options) {
+  const base = options.junitThreshold ?? 0.7;
+  return {
+    tier1: options.junitThresholdTier1 ?? base,
+    tier2: options.junitThresholdTier2 ?? base,
+    tier3: options.junitThresholdTier3 ?? base
+  };
+}
+function buildSampleScoreMap(samples) {
+  return new Map((samples ?? []).map((sample) => [String(sample.id), Number(sample.score)]));
+}
+function collectScenarioIds(reports) {
+  const ids = [];
+  const seen = /* @__PURE__ */ new Set();
+  for (const sample of reports.tier1?.samples ?? []) {
+    const id = String(sample.id);
+    if (seen.has(id)) continue;
+    seen.add(id);
+    ids.push(id);
+  }
+  for (const sample of reports.tier2?.samples ?? []) {
+    const id = String(sample.id);
+    if (seen.has(id)) continue;
+    seen.add(id);
+    ids.push(id);
+  }
+  for (const sample of reports.tier3?.samples ?? []) {
+    const id = String(sample.id);
+    if (seen.has(id)) continue;
+    seen.add(id);
+    ids.push(id);
+  }
+  return ids;
+}
+function buildJUnitReport(args) {
+  const ids = collectScenarioIds(args.tierReports);
+  const tier1Scores = buildSampleScoreMap(args.tierReports.tier1?.samples);
+  const tier2Scores = buildSampleScoreMap(args.tierReports.tier2?.samples);
+  const tier3Scores = buildSampleScoreMap(args.tierReports.tier3?.samples);
+  const className = `traits.eval.${path2.basename(args.profilePath, path2.extname(args.profilePath))}`;
+  let failures = 0;
+  const testCases = [];
+  for (const id of ids) {
+    const reasons = [];
+    const scoreLines = [];
+    const tier1 = tier1Scores.get(id);
+    if (tier1 != null) {
+      scoreLines.push(`tier1=${tier1.toFixed(3)} threshold=${args.thresholds.tier1.toFixed(3)}`);
+      if (tier1 < args.thresholds.tier1) {
+        reasons.push(
+          `Tier 1 score ${tier1.toFixed(3)} below threshold ${args.thresholds.tier1.toFixed(3)}`
+        );
+      }
+    }
+    const tier2 = tier2Scores.get(id);
+    if (tier2 != null) {
+      scoreLines.push(`tier2=${tier2.toFixed(3)} threshold=${args.thresholds.tier2.toFixed(3)}`);
+      if (tier2 < args.thresholds.tier2) {
+        reasons.push(
+          `Tier 2 score ${tier2.toFixed(3)} below threshold ${args.thresholds.tier2.toFixed(3)}`
+        );
+      }
+    }
+    const tier3 = tier3Scores.get(id);
+    if (tier3 != null) {
+      scoreLines.push(`tier3=${tier3.toFixed(3)} threshold=${args.thresholds.tier3.toFixed(3)}`);
+      if (tier3 < args.thresholds.tier3) {
+        reasons.push(
+          `Tier 3 score ${tier3.toFixed(3)} below threshold ${args.thresholds.tier3.toFixed(3)}`
+        );
+      }
+    }
+    const testcase = [];
+    testcase.push(
+      `  <testcase classname="${escapeXml(className)}" name="${escapeXml(id)}" time="0">`
+    );
+    if (reasons.length > 0) {
+      failures += 1;
+      testcase.push(
+        `    <failure message="${escapeXml("traits eval threshold failure")}">${escapeXml(
+          reasons.join(" | ")
+        )}</failure>`
+      );
+    }
+    if (scoreLines.length > 0) {
+      testcase.push(`    <system-out>${escapeXml(scoreLines.join(" | "))}</system-out>`);
+    }
+    testcase.push("  </testcase>");
+    testCases.push(testcase.join("\n"));
+  }
+  const xml = [
+    '<?xml version="1.0" encoding="UTF-8"?>',
+    "<testsuites>",
+    `  <testsuite name="traits.eval" tests="${ids.length}" failures="${failures}" errors="0" skipped="0" time="0">`,
+    `    <properties><property name="profile" value="${escapeXml(args.profilePath)}" /><property name="model" value="${escapeXml(args.model)}" /><property name="threshold_tier1" value="${args.thresholds.tier1.toFixed(3)}" /><property name="threshold_tier2" value="${args.thresholds.tier2.toFixed(3)}" /><property name="threshold_tier3" value="${args.thresholds.tier3.toFixed(3)}" /></properties>`,
+    ...testCases,
+    "  </testsuite>",
+    "</testsuites>"
+  ].join("\n");
+  return {
+    xml,
+    tests: ids.length,
+    failures
+  };
+}
 async function runEval(args, io = process) {
   const parsed = parseEvalArgs(args);
   if ("error" in parsed) {
@@ -515,6 +725,8 @@ async function runEval(args, io = process) {
     const payload = {
       profile: profilePath,
       model: options.model,
+      format: options.format,
+      suite: options.suite,
       tier_requested: requestedTier,
       tier_executed: tierResolution.tier_executed,
       tier_resolution: tierResolution,
@@ -531,11 +743,22 @@ async function runEval(args, io = process) {
         errors: evaluation.validation.errors.length
       };
     }
-    if (options.json) {
+    if (options.format === "json") {
       io.stdout.write(`${JSON.stringify(payload, null, 2)}
 `);
       return 0;
     }
+    if (options.format === "junit") {
+      const junit = buildJUnitReport({
+        profilePath,
+        model: options.model,
+        tierReports,
+        thresholds: resolveJUnitThresholds(options)
+      });
+      io.stdout.write(`${junit.xml}
+`);
+      return junit.failures > 0 ? 1 : 0;
+    }
     if (tierReports.tier1) {
       io.stdout.write(`Tier 1 average score: ${tierReports.tier1.average_score.toFixed(3)}
 `);
@@ -547,10 +770,16 @@ async function runEval(args, io = process) {
     if (tierReports.tier2) {
       io.stdout.write(`Tier 2 average score: ${tierReports.tier2.average_score.toFixed(3)}
 `);
+      io.stdout.write(
+        "Note: Tier 2 embedding scores are directionally useful but sensitive to model granularity.\n"
+      );
     }
     if (tierReports.tier3) {
       io.stdout.write(`Tier 3 average score: ${tierReports.tier3.average_score.toFixed(3)}
 `);
+      io.stdout.write(
+        "Note: Tier 3 judge scores are noisy across runs. Do not use as a sole merge gate.\n"
+      );
     }
     if (baselineReport?.tier1) {
       io.stdout.write(
@@ -578,12 +807,12 @@ async function runEval(args, io = process) {
     return 0;
   } catch (error) {
     const typedError = error;
-    if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && !options.json) {
+    if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.format !== "json") {
       io.stderr.write(`Error: ${typedError.message ?? "Evaluation tier unavailable."}
 `);
       return 2;
     }
-    if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.json) {
+    if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.format === "json") {
       io.stdout.write(
         `${JSON.stringify(
           {
@@ -599,7 +828,7 @@ async function runEval(args, io = process) {
     }
     const validation = typedError.validation;
     if (typedError.code === "E_EVAL_VALIDATION" && validation) {
-      if (options.json) {
+      if (options.format === "json") {
         io.stdout.write(
           `${JSON.stringify(
             {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@traits-dev/cli",
-  "version": "0.2.0",
+  "version": "0.4.0",
   "description": "traits.dev command-line interface for voice profile init, validate, compile, eval, and import workflows.",
   "keywords": [
     "traits-dev",
@@ -41,7 +41,7 @@
     "provenance": true
   },
   "dependencies": {
-    "@traits-dev/core": "^0.2.0"
+    "@traits-dev/core": "^0.4.0"
   },
   "devDependencies": {
     "@types/node": "^25.2.3",