@traits-dev/cli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/traits.js +236 -7
  2. package/package.json +2 -2
package/dist/traits.js CHANGED
@@ -22,6 +22,8 @@ function printCompileUsage(out = process.stderr) {
22
22
  " --model <model> Model target (required)",
23
23
  " --json Output structured JSON",
24
24
  " --strict Treat warnings as compile-blocking",
25
+ " --budget Print estimated token count (chars/4)",
26
+ " --budget-limit <tokens> Warn to stderr if estimate exceeds limit",
25
27
  " --explain Include compilation trace output",
26
28
  " --context key=value Activate context adaptation (repeatable)",
27
29
  " --knowledge-base-dir Directory containing compiler pattern files",
@@ -52,6 +54,8 @@ function parseCompileArgs(args) {
52
54
  model: null,
53
55
  strict: false,
54
56
  json: false,
57
+ budget: false,
58
+ budgetLimit: null,
55
59
  explain: false,
56
60
  verbose: false,
57
61
  noColor: false,
@@ -70,6 +74,10 @@ function parseCompileArgs(args) {
70
74
  result.json = true;
71
75
  continue;
72
76
  }
77
+ if (arg === "--budget") {
78
+ result.budget = true;
79
+ continue;
80
+ }
73
81
  if (arg === "--explain") {
74
82
  result.explain = true;
75
83
  continue;
@@ -82,7 +90,7 @@ function parseCompileArgs(args) {
82
90
  result.noColor = true;
83
91
  continue;
84
92
  }
85
- if (arg === "--model" || arg === "--bundled-profiles-dir" || arg === "--context" || arg === "--knowledge-base-dir") {
93
+ if (arg === "--model" || arg === "--bundled-profiles-dir" || arg === "--context" || arg === "--knowledge-base-dir" || arg === "--budget-limit") {
86
94
  const value = args[index + 1];
87
95
  if (!value) return { error: `Missing value for "${arg}"` };
88
96
  if (arg === "--model") {
@@ -91,6 +99,12 @@ function parseCompileArgs(args) {
91
99
  result.bundledProfilesDir = value;
92
100
  } else if (arg === "--knowledge-base-dir") {
93
101
  result.knowledgeBaseDir = value;
102
+ } else if (arg === "--budget-limit") {
103
+ const parsedBudgetLimit = Number(value);
104
+ if (!Number.isFinite(parsedBudgetLimit) || parsedBudgetLimit <= 0) {
105
+ return { error: `Invalid value for "--budget-limit": "${value}"` };
106
+ }
107
+ result.budgetLimit = Math.round(parsedBudgetLimit);
94
108
  } else {
95
109
  const parsedContext = parseContextArg(value);
96
110
  if ("error" in parsedContext) return { error: parsedContext.error };
@@ -111,8 +125,14 @@ function parseCompileArgs(args) {
111
125
  if (!result.model) {
112
126
  return { error: 'Missing required option "--model"' };
113
127
  }
128
+ if (result.budgetLimit != null) {
129
+ result.budget = true;
130
+ }
114
131
  return { value: result };
115
132
  }
133
+ function estimateBudgetTokens(text) {
134
+ return Math.ceil(String(text ?? "").length / 4);
135
+ }
116
136
  function runCompile(args, io = process) {
117
137
  const parsed = parseCompileArgs(args);
118
138
  if ("error" in parsed) {
@@ -156,10 +176,32 @@ function runCompile(args, io = process) {
156
176
  if (options.json) {
157
177
  io.stdout.write(`${JSON.stringify(compiled, null, 2)}
158
178
  `);
179
+ if (options.budget) {
180
+ const budgetEstimate = estimateBudgetTokens(compiled.text);
181
+ io.stderr.write(`Estimated token count: ${budgetEstimate}
182
+ `);
183
+ if (options.budgetLimit != null && budgetEstimate > options.budgetLimit) {
184
+ io.stderr.write(
185
+ `Warning: Estimated token count ${budgetEstimate} exceeds budget limit ${options.budgetLimit}
186
+ `
187
+ );
188
+ }
189
+ }
159
190
  return 0;
160
191
  }
161
192
  io.stdout.write(`${compiled.text}
162
193
  `);
194
+ if (options.budget) {
195
+ const budgetEstimate = estimateBudgetTokens(compiled.text);
196
+ io.stderr.write(`Estimated token count: ${budgetEstimate}
197
+ `);
198
+ if (options.budgetLimit != null && budgetEstimate > options.budgetLimit) {
199
+ io.stderr.write(
200
+ `Warning: Estimated token count ${budgetEstimate} exceeds budget limit ${options.budgetLimit}
201
+ `
202
+ );
203
+ }
204
+ }
163
205
  if (options.explain && compiled.trace) {
164
206
  io.stdout.write(`
165
207
  [TRACE]
@@ -211,6 +253,7 @@ import {
211
253
  import {
212
254
  detectEvalTierAvailability,
213
255
  formatValidationResult as formatValidationResult2,
256
+ loadBuiltInEvalSuite,
214
257
  resolveTierExecution,
215
258
  runOfflineBaselineScaffold,
216
259
  toValidationResultObject as toValidationResultObject2
@@ -224,6 +267,7 @@ function printEvalUsage(out = process.stderr) {
224
267
  "Options:",
225
268
  " --model <model> Model target (required)",
226
269
  " --tier <1|2|3> Highest tier to run (default: highest available)",
270
+ " --suite <name> Built-in baseline suite: support|healthcare|developer",
227
271
  " --provider <name> Judge provider for Tier 3: auto|openai|anthropic",
228
272
  " --embedding-model <name> Embedding model for Tier 2 (OpenAI)",
229
273
  " --judge-model <name> Judge model for Tier 3 provider",
@@ -236,6 +280,11 @@ function printEvalUsage(out = process.stderr) {
236
280
  " --samples <path> JSON file with samples: [{ id, response }]",
237
281
  " --scenarios <path> Alias for --samples in this scaffold",
238
282
  " --json Output structured JSON",
283
+ " --format <text|json|junit> Output format (default: text)",
284
+ " --junit-threshold <num> Global JUnit pass threshold in [0,1] (default: 0.7)",
285
+ " --junit-threshold-tier1 <num> Tier 1 JUnit threshold override",
286
+ " --junit-threshold-tier2 <num> Tier 2 JUnit threshold override",
287
+ " --junit-threshold-tier3 <num> Tier 3 JUnit threshold override",
239
288
  " --strict Treat validation warnings as errors",
240
289
  " --verbose Include command metadata output",
241
290
  " --no-color Disable colorized output",
@@ -251,6 +300,7 @@ function parseEvalArgs(args) {
251
300
  profilePath: null,
252
301
  model: null,
253
302
  tier: null,
303
+ suite: null,
254
304
  provider: "auto",
255
305
  embeddingModel: null,
256
306
  judgeModel: null,
@@ -260,6 +310,11 @@ function parseEvalArgs(args) {
260
310
  maxRetries: null,
261
311
  retryBaseMs: null,
262
312
  json: false,
313
+ format: "text",
314
+ junitThreshold: null,
315
+ junitThresholdTier1: null,
316
+ junitThresholdTier2: null,
317
+ junitThresholdTier3: null,
263
318
  strict: false,
264
319
  verbose: false,
265
320
  noColor: false,
@@ -274,6 +329,7 @@ function parseEvalArgs(args) {
274
329
  const arg = args[index];
275
330
  if (arg === "--json") {
276
331
  result.json = true;
332
+ result.format = "json";
277
333
  continue;
278
334
  }
279
335
  if (arg === "--strict") {
@@ -300,14 +356,18 @@ function parseEvalArgs(args) {
300
356
  result.constraintImpact = true;
301
357
  continue;
302
358
  }
303
- if (arg === "--model" || arg === "--tier" || arg === "--provider" || arg === "--embedding-model" || arg === "--judge-model" || arg === "--openai-base-url" || arg === "--anthropic-base-url" || arg === "--timeout-ms" || arg === "--max-retries" || arg === "--retry-base-ms" || arg === "--response" || arg === "--samples" || arg === "--scenarios") {
359
+ if (arg === "--model" || arg === "--tier" || arg === "--suite" || arg === "--provider" || arg === "--format" || arg === "--embedding-model" || arg === "--judge-model" || arg === "--openai-base-url" || arg === "--anthropic-base-url" || arg === "--timeout-ms" || arg === "--max-retries" || arg === "--retry-base-ms" || arg === "--junit-threshold" || arg === "--junit-threshold-tier1" || arg === "--junit-threshold-tier2" || arg === "--junit-threshold-tier3" || arg === "--response" || arg === "--samples" || arg === "--scenarios") {
304
360
  const value = args[index + 1];
305
361
  if (!value) return { error: `Missing value for "${arg}"` };
306
362
  if (arg === "--model") result.model = value;
307
363
  if (arg === "--tier") result.tier = Number(value);
364
+ if (arg === "--suite") result.suite = String(value).toLowerCase();
308
365
  if (arg === "--provider") {
309
366
  result.provider = String(value).toLowerCase();
310
367
  }
368
+ if (arg === "--format") {
369
+ result.format = String(value).toLowerCase();
370
+ }
311
371
  if (arg === "--embedding-model") result.embeddingModel = value;
312
372
  if (arg === "--judge-model") result.judgeModel = value;
313
373
  if (arg === "--openai-base-url") result.openaiBaseUrl = value;
@@ -315,6 +375,10 @@ function parseEvalArgs(args) {
315
375
  if (arg === "--timeout-ms") result.timeoutMs = Number(value);
316
376
  if (arg === "--max-retries") result.maxRetries = Number(value);
317
377
  if (arg === "--retry-base-ms") result.retryBaseMs = Number(value);
378
+ if (arg === "--junit-threshold") result.junitThreshold = Number(value);
379
+ if (arg === "--junit-threshold-tier1") result.junitThresholdTier1 = Number(value);
380
+ if (arg === "--junit-threshold-tier2") result.junitThresholdTier2 = Number(value);
381
+ if (arg === "--junit-threshold-tier3") result.junitThresholdTier3 = Number(value);
318
382
  if (arg === "--response") result.responses.push(value);
319
383
  if (arg === "--samples" || arg === "--scenarios") result.samplesPath = value;
320
384
  index += 1;
@@ -338,6 +402,20 @@ function parseEvalArgs(args) {
338
402
  if (!["auto", "openai", "anthropic"].includes(result.provider)) {
339
403
  return { error: 'Invalid "--provider" value. Expected auto, openai, or anthropic.' };
340
404
  }
405
+ if (result.suite != null && !["support", "healthcare", "developer"].includes(
406
+ result.suite
407
+ )) {
408
+ return { error: 'Invalid "--suite" value. Expected support, healthcare, or developer.' };
409
+ }
410
+ if (result.suite != null && result.samplesPath != null) {
411
+ return { error: 'Use either "--suite" or "--samples/--scenarios", not both.' };
412
+ }
413
+ if (result.suite != null && result.responses.length > 0) {
414
+ return { error: 'Use either "--suite" or "--response", not both.' };
415
+ }
416
+ if (!["text", "json", "junit"].includes(result.format)) {
417
+ return { error: 'Invalid "--format" value. Expected text, json, or junit.' };
418
+ }
341
419
  if (result.timeoutMs != null && (!Number.isInteger(result.timeoutMs) || result.timeoutMs < 0)) {
342
420
  return { error: 'Invalid "--timeout-ms" value. Expected a non-negative integer.' };
343
421
  }
@@ -347,9 +425,33 @@ function parseEvalArgs(args) {
347
425
  if (result.retryBaseMs != null && (!Number.isInteger(result.retryBaseMs) || result.retryBaseMs < 0)) {
348
426
  return { error: 'Invalid "--retry-base-ms" value. Expected a non-negative integer.' };
349
427
  }
428
+ for (const [flag, value] of [
429
+ ["--junit-threshold", result.junitThreshold],
430
+ ["--junit-threshold-tier1", result.junitThresholdTier1],
431
+ ["--junit-threshold-tier2", result.junitThresholdTier2],
432
+ ["--junit-threshold-tier3", result.junitThresholdTier3]
433
+ ]) {
434
+ if (value == null) continue;
435
+ if (!Number.isFinite(value) || value < 0 || value > 1) {
436
+ return { error: `Invalid "${flag}" value. Expected a number in [0, 1].` };
437
+ }
438
+ }
350
439
  return { value: result };
351
440
  }
352
441
  function loadSamples(options, cwd) {
442
+ if (options.suite) {
443
+ const suite = loadBuiltInEvalSuite(options.suite);
444
+ if (!suite) {
445
+ throw new Error(
446
+ `Unknown suite "${options.suite}". Expected support, healthcare, or developer.`
447
+ );
448
+ }
449
+ return suite.scenarios.map((scenario) => ({
450
+ id: scenario.id,
451
+ prompt: scenario.messages.map((message) => `${message.role}: ${message.content}`).join("\n"),
452
+ response: scenario.expected_behavior ?? ""
453
+ }));
454
+ }
353
455
  if (options.samplesPath) {
354
456
  const sampleFile = path2.resolve(cwd, options.samplesPath);
355
457
  const parsed = JSON.parse(fs.readFileSync(sampleFile, "utf8"));
@@ -377,10 +479,118 @@ function loadSamples(options, cwd) {
377
479
  }));
378
480
  }
379
481
  function writeProgress(io, options, message) {
380
- if (options.json) return;
482
+ if (options.format !== "text") return;
381
483
  io.stderr.write(`${message}
382
484
  `);
383
485
  }
486
+ function escapeXml(value) {
487
+ return String(value ?? "").replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
488
+ }
489
+ function resolveJUnitThresholds(options) {
490
+ const base = options.junitThreshold ?? 0.7;
491
+ return {
492
+ tier1: options.junitThresholdTier1 ?? base,
493
+ tier2: options.junitThresholdTier2 ?? base,
494
+ tier3: options.junitThresholdTier3 ?? base
495
+ };
496
+ }
497
+ function buildSampleScoreMap(samples) {
498
+ return new Map((samples ?? []).map((sample) => [String(sample.id), Number(sample.score)]));
499
+ }
500
+ function collectScenarioIds(reports) {
501
+ const ids = [];
502
+ const seen = /* @__PURE__ */ new Set();
503
+ for (const sample of reports.tier1?.samples ?? []) {
504
+ const id = String(sample.id);
505
+ if (seen.has(id)) continue;
506
+ seen.add(id);
507
+ ids.push(id);
508
+ }
509
+ for (const sample of reports.tier2?.samples ?? []) {
510
+ const id = String(sample.id);
511
+ if (seen.has(id)) continue;
512
+ seen.add(id);
513
+ ids.push(id);
514
+ }
515
+ for (const sample of reports.tier3?.samples ?? []) {
516
+ const id = String(sample.id);
517
+ if (seen.has(id)) continue;
518
+ seen.add(id);
519
+ ids.push(id);
520
+ }
521
+ return ids;
522
+ }
523
+ function buildJUnitReport(args) {
524
+ const ids = collectScenarioIds(args.tierReports);
525
+ const tier1Scores = buildSampleScoreMap(args.tierReports.tier1?.samples);
526
+ const tier2Scores = buildSampleScoreMap(args.tierReports.tier2?.samples);
527
+ const tier3Scores = buildSampleScoreMap(args.tierReports.tier3?.samples);
528
+ const className = `traits.eval.${path2.basename(args.profilePath, path2.extname(args.profilePath))}`;
529
+ let failures = 0;
530
+ const testCases = [];
531
+ for (const id of ids) {
532
+ const reasons = [];
533
+ const scoreLines = [];
534
+ const tier1 = tier1Scores.get(id);
535
+ if (tier1 != null) {
536
+ scoreLines.push(`tier1=${tier1.toFixed(3)} threshold=${args.thresholds.tier1.toFixed(3)}`);
537
+ if (tier1 < args.thresholds.tier1) {
538
+ reasons.push(
539
+ `Tier 1 score ${tier1.toFixed(3)} below threshold ${args.thresholds.tier1.toFixed(3)}`
540
+ );
541
+ }
542
+ }
543
+ const tier2 = tier2Scores.get(id);
544
+ if (tier2 != null) {
545
+ scoreLines.push(`tier2=${tier2.toFixed(3)} threshold=${args.thresholds.tier2.toFixed(3)}`);
546
+ if (tier2 < args.thresholds.tier2) {
547
+ reasons.push(
548
+ `Tier 2 score ${tier2.toFixed(3)} below threshold ${args.thresholds.tier2.toFixed(3)}`
549
+ );
550
+ }
551
+ }
552
+ const tier3 = tier3Scores.get(id);
553
+ if (tier3 != null) {
554
+ scoreLines.push(`tier3=${tier3.toFixed(3)} threshold=${args.thresholds.tier3.toFixed(3)}`);
555
+ if (tier3 < args.thresholds.tier3) {
556
+ reasons.push(
557
+ `Tier 3 score ${tier3.toFixed(3)} below threshold ${args.thresholds.tier3.toFixed(3)}`
558
+ );
559
+ }
560
+ }
561
+ const testcase = [];
562
+ testcase.push(
563
+ ` <testcase classname="${escapeXml(className)}" name="${escapeXml(id)}" time="0">`
564
+ );
565
+ if (reasons.length > 0) {
566
+ failures += 1;
567
+ testcase.push(
568
+ ` <failure message="${escapeXml("traits eval threshold failure")}">${escapeXml(
569
+ reasons.join(" | ")
570
+ )}</failure>`
571
+ );
572
+ }
573
+ if (scoreLines.length > 0) {
574
+ testcase.push(` <system-out>${escapeXml(scoreLines.join(" | "))}</system-out>`);
575
+ }
576
+ testcase.push(" </testcase>");
577
+ testCases.push(testcase.join("\n"));
578
+ }
579
+ const xml = [
580
+ '<?xml version="1.0" encoding="UTF-8"?>',
581
+ "<testsuites>",
582
+ ` <testsuite name="traits.eval" tests="${ids.length}" failures="${failures}" errors="0" skipped="0" time="0">`,
583
+ ` <properties><property name="profile" value="${escapeXml(args.profilePath)}" /><property name="model" value="${escapeXml(args.model)}" /><property name="threshold_tier1" value="${args.thresholds.tier1.toFixed(3)}" /><property name="threshold_tier2" value="${args.thresholds.tier2.toFixed(3)}" /><property name="threshold_tier3" value="${args.thresholds.tier3.toFixed(3)}" /></properties>`,
584
+ ...testCases,
585
+ " </testsuite>",
586
+ "</testsuites>"
587
+ ].join("\n");
588
+ return {
589
+ xml,
590
+ tests: ids.length,
591
+ failures
592
+ };
593
+ }
384
594
  async function runEval(args, io = process) {
385
595
  const parsed = parseEvalArgs(args);
386
596
  if ("error" in parsed) {
@@ -515,6 +725,8 @@ async function runEval(args, io = process) {
515
725
  const payload = {
516
726
  profile: profilePath,
517
727
  model: options.model,
728
+ format: options.format,
729
+ suite: options.suite,
518
730
  tier_requested: requestedTier,
519
731
  tier_executed: tierResolution.tier_executed,
520
732
  tier_resolution: tierResolution,
@@ -531,11 +743,22 @@ async function runEval(args, io = process) {
531
743
  errors: evaluation.validation.errors.length
532
744
  };
533
745
  }
534
- if (options.json) {
746
+ if (options.format === "json") {
535
747
  io.stdout.write(`${JSON.stringify(payload, null, 2)}
536
748
  `);
537
749
  return 0;
538
750
  }
751
+ if (options.format === "junit") {
752
+ const junit = buildJUnitReport({
753
+ profilePath,
754
+ model: options.model,
755
+ tierReports,
756
+ thresholds: resolveJUnitThresholds(options)
757
+ });
758
+ io.stdout.write(`${junit.xml}
759
+ `);
760
+ return junit.failures > 0 ? 1 : 0;
761
+ }
539
762
  if (tierReports.tier1) {
540
763
  io.stdout.write(`Tier 1 average score: ${tierReports.tier1.average_score.toFixed(3)}
541
764
  `);
@@ -547,10 +770,16 @@ async function runEval(args, io = process) {
547
770
  if (tierReports.tier2) {
548
771
  io.stdout.write(`Tier 2 average score: ${tierReports.tier2.average_score.toFixed(3)}
549
772
  `);
773
+ io.stdout.write(
774
+ "Note: Tier 2 embedding scores are directionally useful but sensitive to model granularity.\n"
775
+ );
550
776
  }
551
777
  if (tierReports.tier3) {
552
778
  io.stdout.write(`Tier 3 average score: ${tierReports.tier3.average_score.toFixed(3)}
553
779
  `);
780
+ io.stdout.write(
781
+ "Note: Tier 3 judge scores are noisy across runs. Do not use as a sole merge gate.\n"
782
+ );
554
783
  }
555
784
  if (baselineReport?.tier1) {
556
785
  io.stdout.write(
@@ -578,12 +807,12 @@ async function runEval(args, io = process) {
578
807
  return 0;
579
808
  } catch (error) {
580
809
  const typedError = error;
581
- if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && !options.json) {
810
+ if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.format !== "json") {
582
811
  io.stderr.write(`Error: ${typedError.message ?? "Evaluation tier unavailable."}
583
812
  `);
584
813
  return 2;
585
814
  }
586
- if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.json) {
815
+ if ((typedError.code === "E_EVAL_TIER2_UNAVAILABLE" || typedError.code === "E_EVAL_TIER3_UNAVAILABLE") && options.format === "json") {
587
816
  io.stdout.write(
588
817
  `${JSON.stringify(
589
818
  {
@@ -599,7 +828,7 @@ async function runEval(args, io = process) {
599
828
  }
600
829
  const validation = typedError.validation;
601
830
  if (typedError.code === "E_EVAL_VALIDATION" && validation) {
602
- if (options.json) {
831
+ if (options.format === "json") {
603
832
  io.stdout.write(
604
833
  `${JSON.stringify(
605
834
  {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@traits-dev/cli",
3
- "version": "0.2.0",
3
+ "version": "0.4.0",
4
4
  "description": "traits.dev command-line interface for voice profile init, validate, compile, eval, and import workflows.",
5
5
  "keywords": [
6
6
  "traits-dev",
@@ -41,7 +41,7 @@
41
41
  "provenance": true
42
42
  },
43
43
  "dependencies": {
44
- "@traits-dev/core": "^0.2.0"
44
+ "@traits-dev/core": "^0.4.0"
45
45
  },
46
46
  "devDependencies": {
47
47
  "@types/node": "^25.2.3",