@jean.gnc/harness-kit 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +1 -1
  2. package/dist/agent/schema.d.ts +2 -2
  3. package/dist/cli.js +65 -0
  4. package/dist/cli.js.map +1 -1
  5. package/dist/compile/emit.d.ts +0 -3
  6. package/dist/compile/emit.d.ts.map +1 -1
  7. package/dist/compile/emit.js +2 -3
  8. package/dist/compile/emit.js.map +1 -1
  9. package/dist/compile/index.d.ts.map +1 -1
  10. package/dist/compile/index.js +0 -9
  11. package/dist/compile/index.js.map +1 -1
  12. package/dist/compile/validators.d.ts +1 -9
  13. package/dist/compile/validators.d.ts.map +1 -1
  14. package/dist/compile/validators.js +13 -29
  15. package/dist/compile/validators.js.map +1 -1
  16. package/dist/configs/compile.d.ts.map +1 -1
  17. package/dist/configs/compile.js +1 -3
  18. package/dist/configs/compile.js.map +1 -1
  19. package/dist/eval/cases.d.ts +14 -0
  20. package/dist/eval/cases.d.ts.map +1 -0
  21. package/dist/eval/cases.js +84 -0
  22. package/dist/eval/cases.js.map +1 -0
  23. package/dist/eval/detect.d.ts +14 -0
  24. package/dist/eval/detect.d.ts.map +1 -0
  25. package/dist/eval/detect.js +105 -0
  26. package/dist/eval/detect.js.map +1 -0
  27. package/dist/eval/index.d.ts +20 -0
  28. package/dist/eval/index.d.ts.map +1 -0
  29. package/dist/eval/index.js +46 -0
  30. package/dist/eval/index.js.map +1 -0
  31. package/dist/eval/report.d.ts +15 -0
  32. package/dist/eval/report.d.ts.map +1 -0
  33. package/dist/eval/report.js +81 -0
  34. package/dist/eval/report.js.map +1 -0
  35. package/dist/eval/runner.d.ts +17 -0
  36. package/dist/eval/runner.d.ts.map +1 -0
  37. package/dist/eval/runner.js +89 -0
  38. package/dist/eval/runner.js.map +1 -0
  39. package/dist/eval/schema.d.ts +253 -0
  40. package/dist/eval/schema.d.ts.map +1 -0
  41. package/dist/eval/schema.js +50 -0
  42. package/dist/eval/schema.js.map +1 -0
  43. package/dist/eval/score.d.ts +13 -0
  44. package/dist/eval/score.d.ts.map +1 -0
  45. package/dist/eval/score.js +52 -0
  46. package/dist/eval/score.js.map +1 -0
  47. package/dist/index.d.ts +4 -0
  48. package/dist/index.d.ts.map +1 -1
  49. package/dist/index.js +2 -0
  50. package/dist/index.js.map +1 -1
  51. package/package.json +1 -1
@@ -0,0 +1,253 @@
1
+ import { z } from "zod";
2
+ declare const Expectation: z.ZodUnion<[z.ZodObject<{
3
+ first: z.ZodString;
4
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
5
+ }, "strip", z.ZodTypeAny, {
6
+ first: string;
7
+ not?: string[] | undefined;
8
+ }, {
9
+ first: string;
10
+ not?: string[] | undefined;
11
+ }>, z.ZodObject<{
12
+ anyOf: z.ZodArray<z.ZodString, "many">;
13
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
14
+ }, "strip", z.ZodTypeAny, {
15
+ anyOf: string[];
16
+ not?: string[] | undefined;
17
+ }, {
18
+ anyOf: string[];
19
+ not?: string[] | undefined;
20
+ }>, z.ZodObject<{
21
+ path: z.ZodArray<z.ZodString, "many">;
22
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
23
+ }, "strip", z.ZodTypeAny, {
24
+ path: string[];
25
+ not?: string[] | undefined;
26
+ }, {
27
+ path: string[];
28
+ not?: string[] | undefined;
29
+ }>, z.ZodObject<{
30
+ noSkill: z.ZodLiteral<true>;
31
+ }, "strip", z.ZodTypeAny, {
32
+ noSkill: true;
33
+ }, {
34
+ noSkill: true;
35
+ }>]>;
36
+ declare const Case: z.ZodObject<{
37
+ id: z.ZodString;
38
+ prompt: z.ZodString;
39
+ expect: z.ZodUnion<[z.ZodObject<{
40
+ first: z.ZodString;
41
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
42
+ }, "strip", z.ZodTypeAny, {
43
+ first: string;
44
+ not?: string[] | undefined;
45
+ }, {
46
+ first: string;
47
+ not?: string[] | undefined;
48
+ }>, z.ZodObject<{
49
+ anyOf: z.ZodArray<z.ZodString, "many">;
50
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
51
+ }, "strip", z.ZodTypeAny, {
52
+ anyOf: string[];
53
+ not?: string[] | undefined;
54
+ }, {
55
+ anyOf: string[];
56
+ not?: string[] | undefined;
57
+ }>, z.ZodObject<{
58
+ path: z.ZodArray<z.ZodString, "many">;
59
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
60
+ }, "strip", z.ZodTypeAny, {
61
+ path: string[];
62
+ not?: string[] | undefined;
63
+ }, {
64
+ path: string[];
65
+ not?: string[] | undefined;
66
+ }>, z.ZodObject<{
67
+ noSkill: z.ZodLiteral<true>;
68
+ }, "strip", z.ZodTypeAny, {
69
+ noSkill: true;
70
+ }, {
71
+ noSkill: true;
72
+ }>]>;
73
+ cwd: z.ZodOptional<z.ZodString>;
74
+ runs: z.ZodOptional<z.ZodNumber>;
75
+ threshold: z.ZodOptional<z.ZodNumber>;
76
+ note: z.ZodOptional<z.ZodString>;
77
+ }, "strip", z.ZodTypeAny, {
78
+ id: string;
79
+ prompt: string;
80
+ expect: {
81
+ first: string;
82
+ not?: string[] | undefined;
83
+ } | {
84
+ anyOf: string[];
85
+ not?: string[] | undefined;
86
+ } | {
87
+ path: string[];
88
+ not?: string[] | undefined;
89
+ } | {
90
+ noSkill: true;
91
+ };
92
+ cwd?: string | undefined;
93
+ runs?: number | undefined;
94
+ threshold?: number | undefined;
95
+ note?: string | undefined;
96
+ }, {
97
+ id: string;
98
+ prompt: string;
99
+ expect: {
100
+ first: string;
101
+ not?: string[] | undefined;
102
+ } | {
103
+ anyOf: string[];
104
+ not?: string[] | undefined;
105
+ } | {
106
+ path: string[];
107
+ not?: string[] | undefined;
108
+ } | {
109
+ noSkill: true;
110
+ };
111
+ cwd?: string | undefined;
112
+ runs?: number | undefined;
113
+ threshold?: number | undefined;
114
+ note?: string | undefined;
115
+ }>;
116
+ export declare const TIERS: readonly ["routing", "solving"];
117
+ export declare const CaseFileSchema: z.ZodObject<{
118
+ suite: z.ZodString;
119
+ tier: z.ZodEnum<["routing", "solving"]>;
120
+ cases: z.ZodArray<z.ZodObject<{
121
+ id: z.ZodString;
122
+ prompt: z.ZodString;
123
+ expect: z.ZodUnion<[z.ZodObject<{
124
+ first: z.ZodString;
125
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
126
+ }, "strip", z.ZodTypeAny, {
127
+ first: string;
128
+ not?: string[] | undefined;
129
+ }, {
130
+ first: string;
131
+ not?: string[] | undefined;
132
+ }>, z.ZodObject<{
133
+ anyOf: z.ZodArray<z.ZodString, "many">;
134
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
135
+ }, "strip", z.ZodTypeAny, {
136
+ anyOf: string[];
137
+ not?: string[] | undefined;
138
+ }, {
139
+ anyOf: string[];
140
+ not?: string[] | undefined;
141
+ }>, z.ZodObject<{
142
+ path: z.ZodArray<z.ZodString, "many">;
143
+ not: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
144
+ }, "strip", z.ZodTypeAny, {
145
+ path: string[];
146
+ not?: string[] | undefined;
147
+ }, {
148
+ path: string[];
149
+ not?: string[] | undefined;
150
+ }>, z.ZodObject<{
151
+ noSkill: z.ZodLiteral<true>;
152
+ }, "strip", z.ZodTypeAny, {
153
+ noSkill: true;
154
+ }, {
155
+ noSkill: true;
156
+ }>]>;
157
+ cwd: z.ZodOptional<z.ZodString>;
158
+ runs: z.ZodOptional<z.ZodNumber>;
159
+ threshold: z.ZodOptional<z.ZodNumber>;
160
+ note: z.ZodOptional<z.ZodString>;
161
+ }, "strip", z.ZodTypeAny, {
162
+ id: string;
163
+ prompt: string;
164
+ expect: {
165
+ first: string;
166
+ not?: string[] | undefined;
167
+ } | {
168
+ anyOf: string[];
169
+ not?: string[] | undefined;
170
+ } | {
171
+ path: string[];
172
+ not?: string[] | undefined;
173
+ } | {
174
+ noSkill: true;
175
+ };
176
+ cwd?: string | undefined;
177
+ runs?: number | undefined;
178
+ threshold?: number | undefined;
179
+ note?: string | undefined;
180
+ }, {
181
+ id: string;
182
+ prompt: string;
183
+ expect: {
184
+ first: string;
185
+ not?: string[] | undefined;
186
+ } | {
187
+ anyOf: string[];
188
+ not?: string[] | undefined;
189
+ } | {
190
+ path: string[];
191
+ not?: string[] | undefined;
192
+ } | {
193
+ noSkill: true;
194
+ };
195
+ cwd?: string | undefined;
196
+ runs?: number | undefined;
197
+ threshold?: number | undefined;
198
+ note?: string | undefined;
199
+ }>, "many">;
200
+ }, "strip", z.ZodTypeAny, {
201
+ suite: string;
202
+ tier: "routing" | "solving";
203
+ cases: {
204
+ id: string;
205
+ prompt: string;
206
+ expect: {
207
+ first: string;
208
+ not?: string[] | undefined;
209
+ } | {
210
+ anyOf: string[];
211
+ not?: string[] | undefined;
212
+ } | {
213
+ path: string[];
214
+ not?: string[] | undefined;
215
+ } | {
216
+ noSkill: true;
217
+ };
218
+ cwd?: string | undefined;
219
+ runs?: number | undefined;
220
+ threshold?: number | undefined;
221
+ note?: string | undefined;
222
+ }[];
223
+ }, {
224
+ suite: string;
225
+ tier: "routing" | "solving";
226
+ cases: {
227
+ id: string;
228
+ prompt: string;
229
+ expect: {
230
+ first: string;
231
+ not?: string[] | undefined;
232
+ } | {
233
+ anyOf: string[];
234
+ not?: string[] | undefined;
235
+ } | {
236
+ path: string[];
237
+ not?: string[] | undefined;
238
+ } | {
239
+ noSkill: true;
240
+ };
241
+ cwd?: string | undefined;
242
+ runs?: number | undefined;
243
+ threshold?: number | undefined;
244
+ note?: string | undefined;
245
+ }[];
246
+ }>;
247
+ export type Tier = (typeof TIERS)[number];
248
+ export type Expectation = z.infer<typeof Expectation>;
249
+ export type EvalCase = z.infer<typeof Case>;
250
+ export type CaseFile = z.infer<typeof CaseFileSchema>;
251
+ export declare function expectedSkills(expectation: Expectation): readonly string[];
252
+ export {};
253
+ //# sourceMappingURL=schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../src/eval/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAyBxB,QAAA,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAKf,CAAC;AAEH,QAAA,MAAM,IAAI;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAQR,CAAC;AAEH,eAAO,MAAM,KAAK,iCAAkC,CAAC;AAErD,eAAO,MAAM,cAAc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAIzB,CAAC;AAEH,MAAM,MAAM,IAAI,GAAG,CAAC,OAAO,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC;AAC1C,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,WAAW,CAAC,CAAC;AACtD,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;AAC5C,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAEtD,wBAAgB,cAAc,CAAC,WAAW,EAAE,WAAW,GAAG,SAAS,MAAM,EAAE,CAS1E"}
@@ -0,0 +1,50 @@
1
+ import { z } from "zod";
2
+ import { FQ_ID } from "../ids.js";
3
+ const FqId = z.string().regex(FQ_ID, "must be a `plugin:name` id");
4
+ const FirstExpectation = z.object({
5
+ first: FqId,
6
+ not: z.array(FqId).optional(),
7
+ });
8
+ const AnyOfExpectation = z.object({
9
+ anyOf: z.array(FqId).min(1),
10
+ not: z.array(FqId).optional(),
11
+ });
12
+ const PathExpectation = z.object({
13
+ path: z.array(FqId).min(2),
14
+ not: z.array(FqId).optional(),
15
+ });
16
+ const NoSkillExpectation = z.object({
17
+ noSkill: z.literal(true),
18
+ });
19
+ const Expectation = z.union([
20
+ FirstExpectation,
21
+ AnyOfExpectation,
22
+ PathExpectation,
23
+ NoSkillExpectation,
24
+ ]);
25
+ const Case = z.object({
26
+ id: z.string().min(1),
27
+ prompt: z.string().min(1),
28
+ expect: Expectation,
29
+ cwd: z.string().optional(),
30
+ runs: z.number().int().positive().optional(),
31
+ threshold: z.number().min(0).max(1).optional(),
32
+ note: z.string().optional(),
33
+ });
34
+ export const TIERS = ["routing", "solving"];
35
+ export const CaseFileSchema = z.object({
36
+ suite: z.string().min(1),
37
+ tier: z.enum(TIERS),
38
+ cases: z.array(Case).min(1),
39
+ });
40
+ export function expectedSkills(expectation) {
41
+ if ("noSkill" in expectation)
42
+ return [];
43
+ const positive = "first" in expectation
44
+ ? [expectation.first]
45
+ : "anyOf" in expectation
46
+ ? expectation.anyOf
47
+ : expectation.path;
48
+ return [...positive, ...(expectation.not ?? [])];
49
+ }
50
+ //# sourceMappingURL=schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/eval/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,MAAM,WAAW,CAAC;AAElC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,KAAK,CAAC,KAAK,EAAE,4BAA4B,CAAC,CAAC;AAEnE,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,KAAK,EAAE,IAAI;IACX,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,gBAAgB,GAAG,CAAC,CAAC,MAAM,CAAC;IAChC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC3B,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1B,GAAG,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE;CAC9B,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,OAAO,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;CACzB,CAAC,CAAC;AAEH,MAAM,WAAW,GAAG,CAAC,CAAC,KAAK,CAAC;IAC1B,gBAAgB;IAChB,gBAAgB;IAChB,eAAe;IACf,kBAAkB;CACnB,CAAC,CAAC;AAEH,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC;IACpB,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACrB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACzB,MAAM,EAAE,WAAW;IACnB,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC1B,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;IAC5C,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE;IAC9C,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAC5B,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,KAAK,GAAG,CAAC,SAAS,EAAE,SAAS,CAAU,CAAC;AAErD,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;IACxB,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;IACnB,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;CAC5B,CAAC,CAAC;AAOH,MAAM,UAAU,cAAc,CAAC,WAAwB;IACrD,IAAI,SAAS,IAAI,WAAW;QAAE,OAAO,EAAE,CAAC;IACxC,MAAM,QAAQ,GACZ,OAAO,IAAI,WAAW;QACpB,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC;QACrB,CAAC,CAAC,OAAO,IAAI,WAAW;YACtB,CAAC,CAAC,WAAW,CAAC,KAAK;YACnB,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC;IACzB,OAAO,CAAC,GAAG,QAAQ,EAAE,GAAG,CAAC,WAAW,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,CAAC;AACnD,CAAC"}
@@ -0,0 +1,13 @@
1
+ import type { DetectionResult } from "./detect.js";
2
+ import type { Expectation } from "./schema.js";
3
+ export interface CaseScore {
4
+ readonly matched: number;
5
+ readonly runs: number;
6
+ readonly triggerRate: number;
7
+ readonly threshold: number;
8
+ readonly pass: boolean;
9
+ readonly histogram: ReadonlyMap<string, number>;
10
+ }
11
+ export declare function matchesExpectation(expectation: Expectation, run: DetectionResult): boolean;
12
+ export declare function scoreCase(expectation: Expectation, runs: readonly DetectionResult[], threshold?: number): CaseScore;
13
+ //# sourceMappingURL=score.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score.d.ts","sourceRoot":"","sources":["../../src/eval/score.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AACnD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAI/C,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;IAC7B,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACjD;AAED,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,WAAW,EAAE,GAAG,EAAE,eAAe,GAAG,OAAO,CAa1F;AAED,wBAAgB,SAAS,CACvB,WAAW,EAAE,WAAW,EACxB,IAAI,EAAE,SAAS,eAAe,EAAE,EAChC,SAAS,SAAoB,GAC5B,SAAS,CAWX"}
@@ -0,0 +1,52 @@
1
+ const DEFAULT_THRESHOLD = 1.0;
2
+ export function matchesExpectation(expectation, run) {
3
+ if (violatesNot(expectation, run))
4
+ return false;
5
+ if ("noSkill" in expectation) {
6
+ return run.firstSkill === null;
7
+ }
8
+ if ("first" in expectation) {
9
+ return run.firstSkill === expectation.first;
10
+ }
11
+ if ("anyOf" in expectation) {
12
+ return run.firstSkill !== null && expectation.anyOf.includes(run.firstSkill);
13
+ }
14
+ return isOrderedSubsequence(expectation.path, run.observed);
15
+ }
16
+ export function scoreCase(expectation, runs, threshold = DEFAULT_THRESHOLD) {
17
+ const matched = runs.filter((run) => matchesExpectation(expectation, run)).length;
18
+ const triggerRate = runs.length === 0 ? 0 : matched / runs.length;
19
+ return {
20
+ matched,
21
+ runs: runs.length,
22
+ triggerRate,
23
+ threshold,
24
+ pass: triggerRate >= threshold,
25
+ histogram: histogramOf(runs),
26
+ };
27
+ }
28
+ function violatesNot(expectation, run) {
29
+ const forbidden = "noSkill" in expectation ? undefined : expectation.not;
30
+ if (!forbidden || forbidden.length === 0)
31
+ return false;
32
+ return run.firstSkill !== null && forbidden.includes(run.firstSkill);
33
+ }
34
+ function isOrderedSubsequence(needle, haystack) {
35
+ let cursor = 0;
36
+ for (const id of haystack) {
37
+ if (id === needle[cursor])
38
+ cursor += 1;
39
+ if (cursor === needle.length)
40
+ return true;
41
+ }
42
+ return cursor === needle.length;
43
+ }
44
+ function histogramOf(runs) {
45
+ const counts = new Map();
46
+ for (const run of runs) {
47
+ const key = run.firstSkill ?? "(no skill)";
48
+ counts.set(key, (counts.get(key) ?? 0) + 1);
49
+ }
50
+ return counts;
51
+ }
52
+ //# sourceMappingURL=score.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"score.js","sourceRoot":"","sources":["../../src/eval/score.ts"],"names":[],"mappings":"AAGA,MAAM,iBAAiB,GAAG,GAAG,CAAC;AAW9B,MAAM,UAAU,kBAAkB,CAAC,WAAwB,EAAE,GAAoB;IAC/E,IAAI,WAAW,CAAC,WAAW,EAAE,GAAG,CAAC;QAAE,OAAO,KAAK,CAAC;IAEhD,IAAI,SAAS,IAAI,WAAW,EAAE,CAAC;QAC7B,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,CAAC;IACjC,CAAC;IACD,IAAI,OAAO,IAAI,WAAW,EAAE,CAAC;QAC3B,OAAO,GAAG,CAAC,UAAU,KAAK,WAAW,CAAC,KAAK,CAAC;IAC9C,CAAC;IACD,IAAI,OAAO,IAAI,WAAW,EAAE,CAAC;QAC3B,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,IAAI,WAAW,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC/E,CAAC;IACD,OAAO,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC9D,CAAC;AAED,MAAM,UAAU,SAAS,CACvB,WAAwB,EACxB,IAAgC,EAChC,SAAS,GAAG,iBAAiB;IAE7B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IAClF,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;IAClE,OAAO;QACL,OAAO;QACP,IAAI,EAAE,IAAI,CAAC,MAAM;QACjB,WAAW;QACX,SAAS;QACT,IAAI,EAAE,WAAW,IAAI,SAAS;QAC9B,SAAS,EAAE,WAAW,CAAC,IAAI,CAAC;KAC7B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,WAAwB,EAAE,GAAoB;IACjE,MAAM,SAAS,GAAG,SAAS,IAAI,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC;IACzE,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACvD,OAAO,GAAG,CAAC,UAAU,KAAK,IAAI,IAAI,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;AACvE,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAyB,EAAE,QAA2B;IAClF,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;QAC1B,IAAI,EAAE,KAAK,MAAM,CAAC,MAAM,CAAC;YAAE,MAAM,IAAI,CAAC,CAAC;QACvC,IAAI,MAAM,KAAK,MAAM,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;IAC5C,CAAC;IACD,OAAO,MAAM,KAAK,MAAM,CAAC,MAAM,CAAC;AAClC,CAAC;AAED,SAAS,WAAW,CAAC,IAAgC;IACnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,IAAI,YAAY,CAAC;QAC3C,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC"}
package/dist/index.d.ts CHANGED
@@ -27,6 +27,10 @@ export { resolveVendors } from "./vendor/registry.js";
27
27
  export type { DiscoveredVendorPlugin, LinkedFile, Vendor, VendorEmitContext, VendorInstallContext, } from "./vendor/schema.js";
28
28
  export { check } from "./check/index.js";
29
29
  export type { CheckOptions, CheckResult, ReferenceViolation, ReferenceViolationKind, SourceSummary, } from "./check/index.js";
30
+ export { runEval, formatConsole, toJson } from "./eval/index.js";
31
+ export type { EvalOptions, EvalReport, CaseReport, LoadedCase, CaseLoadError, } from "./eval/index.js";
32
+ export { CaseFileSchema, TIERS } from "./eval/schema.js";
33
+ export type { CaseFile, EvalCase, Expectation, Tier } from "./eval/schema.js";
30
34
  export { defaultSources, discoverInstalled, indexInstalled } from "./installed.js";
31
35
  export type { InstalledAgent, InstalledArtifacts, InstalledCommand, InstalledIndex, InstalledSkill, PluginSource, } from "./installed.js";
32
36
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,SAAS,EACT,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,KAAK,EACL,SAAS,EACT,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC/D,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AACtC,YAAY,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACxE,YAAY,EACV,WAAW,EACX,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EACjB,eAAe,GAChB,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAE7E,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACnF,YAAY,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,sBAAsB,EACtB,UAAU,EACV,MAAM,EACN,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,YAAY,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,sBAAsB,EACtB,aAAa,GACd,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACnF,YAAY,EACV,cAAc,EACd,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAC1B,YAAY,EACV,SAAS,EACT,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,KAAK,EACL,SAAS,EACT,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAC/D,YAAY,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAC9E,YAAY,EAAE,gBAAgB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAE9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AACtC,YAAY,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAE1C,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACxE,YAAY,EACV,WAAW,EACX,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EACjB,eAAe,GAChB,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,YAAY,EAAE,qBAAqB,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAE7E,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,YAAY,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAEnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AAEzD,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACnF,YAAY,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAErD,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAC9C,YAAY,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAEnD,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACxD,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAEzD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,YAAY,EACV,sBAAsB,EACtB,UAAU,EACV,MAAM,EACN,iBAAiB,EACjB,oBAAoB,GACrB,MAAM,oBAAoB,CAAC;AAE5B,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzC,YAAY,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,sBAAsB,EACtB,aAAa,GACd,MAAM,kBAAkB,CAAC;AAE1B,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AACjE,YAAY,EACV,WAAW,EACX,UAAU,EACV,UAAU,EACV,UAAU,EACV,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AACzD,YAAY,EAAE,QAAQ,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAE9E,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACnF,YAAY,EACV,cAAc,EACd,kBAAkB,EAClB,gBAAgB,EAChB,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,gBAAgB,CAAC"}
package/dist/index.js CHANGED
@@ -13,5 +13,7 @@ export { loadHarnessConfig } from "./config/harness.js";
13
13
  export { builtinVendors } from "./vendor/builtins.js";
14
14
  export { resolveVendors } from "./vendor/registry.js";
15
15
  export { check } from "./check/index.js";
16
+ export { runEval, formatConsole, toJson } from "./eval/index.js";
17
+ export { CaseFileSchema, TIERS } from "./eval/schema.js";
16
18
  export { defaultSources, discoverInstalled, indexInstalled } from "./installed.js";
17
19
  //# sourceMappingURL=index.js.map
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAW1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAG/D,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AAGtC,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AASxE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGpD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAGnF,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAG9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAStD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AASzC,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,EACf,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,aAAa,EACb,oBAAoB,EACpB,2BAA2B,EAC3B,SAAS,GACV,MAAM,kBAAkB,CAAC;AAW1B,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AAG/D,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAG9E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE,MAAM,aAAa,CAAC;AAGtC,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AASxE,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGpD,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAGnF,OAAO,EAAE,WAAW,EAAE,MAAM,iBAAiB,CAAC;AAG9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAGxD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AACtD,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAStD,OAAO,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AASzC,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAQjE,OAAO,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAGzD,OAAO,EAAE,cAAc,EAAE,iBAAiB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jean.gnc/harness-kit",
3
- "version": "0.11.1",
3
+ "version": "0.12.0",
4
4
  "type": "module",
5
5
  "description": "Build your own multi-agent harness: typed toolkit for authoring plugins (skills, agents, commands, hooks) and shipping them to Claude Code and Codex from a single source tree.",
6
6
  "license": "MIT",