@intentsolutions/jrig-cli 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +39 -2
  2. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -82,6 +82,9 @@ var CriterionSchema = z.object({
82
82
  deterministic_check_params: z.record(z.string(), z.unknown()).optional().describe(
83
83
  "Parameters forwarded to the deterministic check (e.g. { value: 'needle' } for 'contains', { pattern: '\\\\d+' } for 'regex_match')"
84
84
  )
85
+ }).refine((c) => c.method !== "deterministic" || !!c.deterministic_check, {
86
+ message: "deterministic criteria must define deterministic_check",
87
+ path: ["deterministic_check"]
85
88
  });
86
89
  var TestCaseTier = z2.enum(["core", "edge", "regression", "adversarial"]);
87
90
  var TriggerExpectation = z2.enum(["should_trigger", "should_not_trigger"]);
@@ -115,6 +118,19 @@ var EvalSpecSchema = z3.object({
115
118
  models: z3.array(ModelTarget).default(["sonnet"]).describe("Models to test independently"),
116
119
  siblings: z3.array(SiblingSkillSchema).optional().describe("Sibling skills for pack-sensitive evaluation"),
117
120
  tags: z3.array(z3.string()).optional().describe("Categorization tags")
121
+ }).superRefine((spec, ctx) => {
122
+ const knownCriteria = new Set(spec.criteria.map((c) => c.id));
123
+ spec.test_cases.forEach((tc, ti) => {
124
+ tc.criteria_ids?.forEach((cid, ci) => {
125
+ if (!knownCriteria.has(cid)) {
126
+ ctx.addIssue({
127
+ code: z3.ZodIssueCode.custom,
128
+ message: `test case "${tc.id}" references unknown criterion id "${cid}"`,
129
+ path: ["test_cases", ti, "criteria_ids", ci]
130
+ });
131
+ }
132
+ });
133
+ });
118
134
  });
119
135
  var EvalContractSchema = z4.object({
120
136
  contract_version: z4.literal("1.0").describe("Schema version for forward compatibility"),
@@ -1009,6 +1025,16 @@ async function judgeWithLLM(criterion, outcome, provider, model) {
1009
1025
  };
1010
1026
  }
1011
1027
  }
1028
+ function selectCriteriaForTestCase(criteria, criteriaIds) {
1029
+ if (criteriaIds === void 0) return criteria;
1030
+ const available = new Set(criteria.map((c) => c.id));
1031
+ const unknown = criteriaIds.filter((id) => !available.has(id));
1032
+ if (unknown.length > 0) {
1033
+ throw new Error(`Test case references unknown criteria_ids: ${unknown.join(", ")}`);
1034
+ }
1035
+ const wanted = new Set(criteriaIds);
1036
+ return criteria.filter((c) => wanted.has(c.id));
1037
+ }
1012
1038
  function computeScoreCard(results, criteria, regressions = []) {
1013
1039
  const criteriaMap = new Map(criteria.map((c) => [c.id, c]));
1014
1040
  let passed = 0, failed = 0, unsure = 0, blockerFailures = 0;
@@ -3019,9 +3045,20 @@ function registerEvalCommand(program) {
3019
3045
  ` Functional: ${outcomes.length}/${spec.test_cases.length} test case(s) executed`
3020
3046
  );
3021
3047
  }
3048
+ const testCaseById = new Map(spec.test_cases.map((tc) => [tc.id, tc]));
3022
3049
  const allJudgments = [];
3023
3050
  for (const outcome of outcomes) {
3024
- const judgments = await judgeCriteria(spec.criteria, outcome, providers.judge, {
3051
+ const testCase = testCaseById.get(outcome.test_case_id);
3052
+ if (!testCase) {
3053
+ throw new Error(
3054
+ `Outcome references unknown test case id: "${outcome.test_case_id}"`
3055
+ );
3056
+ }
3057
+ const applicableCriteria = selectCriteriaForTestCase(
3058
+ spec.criteria,
3059
+ testCase.criteria_ids
3060
+ );
3061
+ const judgments = await judgeCriteria(applicableCriteria, outcome, providers.judge, {
3025
3062
  model
3026
3063
  });
3027
3064
  for (const j of judgments) {
@@ -4207,7 +4244,7 @@ function registerSkillSignalCommands(program) {
4207
4244
  import { registerRefineCommand } from "@intentsolutions/refiner";
4208
4245
  function createProgram() {
4209
4246
  const program = new Command();
4210
- program.name("j-rig").description("Seven-layer binary evaluation harness for Claude Skills").version("0.1.0");
4247
+ program.name("j-rig").description("Seven-layer binary evaluation harness for Claude Skills").version("0.1.1");
4211
4248
  registerCheckCommand(program);
4212
4249
  registerValidateCommand(program);
4213
4250
  registerEvalCommand(program);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@intentsolutions/jrig-cli",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "private": false,
5
5
  "description": "J-Rig seven-layer binary eval CLI for Claude Skills — the `j-rig` command: package integrity, trigger/functional/regression/baseline scoring, optimizer, and rollout-gate evidence. Self-contained (bundles the internal eval engine).",
6
6
  "keywords": [
@@ -61,8 +61,8 @@
61
61
  "zod": "^4.4.3"
62
62
  },
63
63
  "devDependencies": {
64
- "@j-rig/core": "2.1.0",
65
64
  "@j-rig/db": "2.1.0",
65
+ "@j-rig/core": "2.1.0",
66
66
  "@j-rig/migrate": "2.1.0"
67
67
  },
68
68
  "scripts": {