@cogitator-ai/evals 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +456 -0
  2. package/dist/assertions/custom.d.ts +11 -0
  3. package/dist/assertions/custom.d.ts.map +1 -0
  4. package/dist/assertions/custom.js +13 -0
  5. package/dist/assertions/custom.js.map +1 -0
  6. package/dist/assertions/index.d.ts +27 -0
  7. package/dist/assertions/index.d.ts.map +1 -0
  8. package/dist/assertions/index.js +4 -0
  9. package/dist/assertions/index.js.map +1 -0
  10. package/dist/assertions/regression.d.ts +5 -0
  11. package/dist/assertions/regression.d.ts.map +1 -0
  12. package/dist/assertions/regression.js +58 -0
  13. package/dist/assertions/regression.js.map +1 -0
  14. package/dist/assertions/threshold.d.ts +3 -0
  15. package/dist/assertions/threshold.d.ts.map +1 -0
  16. package/dist/assertions/threshold.js +45 -0
  17. package/dist/assertions/threshold.js.map +1 -0
  18. package/dist/datasets/csv-loader.d.ts +3 -0
  19. package/dist/datasets/csv-loader.d.ts.map +1 -0
  20. package/dist/datasets/csv-loader.js +43 -0
  21. package/dist/datasets/csv-loader.js.map +1 -0
  22. package/dist/datasets/dataset.d.ts +15 -0
  23. package/dist/datasets/dataset.d.ts.map +1 -0
  24. package/dist/datasets/dataset.js +62 -0
  25. package/dist/datasets/dataset.js.map +1 -0
  26. package/dist/datasets/index.d.ts +4 -0
  27. package/dist/datasets/index.d.ts.map +1 -0
  28. package/dist/datasets/index.js +4 -0
  29. package/dist/datasets/index.js.map +1 -0
  30. package/dist/datasets/jsonl-loader.d.ts +3 -0
  31. package/dist/datasets/jsonl-loader.d.ts.map +1 -0
  32. package/dist/datasets/jsonl-loader.js +27 -0
  33. package/dist/datasets/jsonl-loader.js.map +1 -0
  34. package/dist/eval-builder.d.ts +30 -0
  35. package/dist/eval-builder.d.ts.map +1 -0
  36. package/dist/eval-builder.js +82 -0
  37. package/dist/eval-builder.js.map +1 -0
  38. package/dist/eval-comparison.d.ts +43 -0
  39. package/dist/eval-comparison.d.ts.map +1 -0
  40. package/dist/eval-comparison.js +125 -0
  41. package/dist/eval-comparison.js.map +1 -0
  42. package/dist/eval-suite.d.ts +63 -0
  43. package/dist/eval-suite.d.ts.map +1 -0
  44. package/dist/eval-suite.js +230 -0
  45. package/dist/eval-suite.js.map +1 -0
  46. package/dist/index.d.ts +31 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +20 -0
  49. package/dist/index.js.map +1 -0
  50. package/dist/metrics/custom.d.ts +18 -0
  51. package/dist/metrics/custom.d.ts.map +1 -0
  52. package/dist/metrics/custom.js +28 -0
  53. package/dist/metrics/custom.js.map +1 -0
  54. package/dist/metrics/deterministic.d.ts +11 -0
  55. package/dist/metrics/deterministic.d.ts.map +1 -0
  56. package/dist/metrics/deterministic.js +74 -0
  57. package/dist/metrics/deterministic.js.map +1 -0
  58. package/dist/metrics/index.d.ts +8 -0
  59. package/dist/metrics/index.d.ts.map +1 -0
  60. package/dist/metrics/index.js +5 -0
  61. package/dist/metrics/index.js.map +1 -0
  62. package/dist/metrics/llm-judge.d.ts +27 -0
  63. package/dist/metrics/llm-judge.d.ts.map +1 -0
  64. package/dist/metrics/llm-judge.js +77 -0
  65. package/dist/metrics/llm-judge.js.map +1 -0
  66. package/dist/metrics/statistical.d.ts +5 -0
  67. package/dist/metrics/statistical.d.ts.map +1 -0
  68. package/dist/metrics/statistical.js +85 -0
  69. package/dist/metrics/statistical.js.map +1 -0
  70. package/dist/metrics/types.d.ts +31 -0
  71. package/dist/metrics/types.d.ts.map +1 -0
  72. package/dist/metrics/types.js +2 -0
  73. package/dist/metrics/types.js.map +1 -0
  74. package/dist/reporters/ci.d.ts +3 -0
  75. package/dist/reporters/ci.d.ts.map +1 -0
  76. package/dist/reporters/ci.js +21 -0
  77. package/dist/reporters/ci.js.map +1 -0
  78. package/dist/reporters/console.d.ts +3 -0
  79. package/dist/reporters/console.d.ts.map +1 -0
  80. package/dist/reporters/console.js +46 -0
  81. package/dist/reporters/console.js.map +1 -0
  82. package/dist/reporters/csv.d.ts +5 -0
  83. package/dist/reporters/csv.d.ts.map +1 -0
  84. package/dist/reporters/csv.js +31 -0
  85. package/dist/reporters/csv.js.map +1 -0
  86. package/dist/reporters/index.d.ts +50 -0
  87. package/dist/reporters/index.d.ts.map +1 -0
  88. package/dist/reporters/index.js +28 -0
  89. package/dist/reporters/index.js.map +1 -0
  90. package/dist/reporters/json.d.ts +5 -0
  91. package/dist/reporters/json.d.ts.map +1 -0
  92. package/dist/reporters/json.js +5 -0
  93. package/dist/reporters/json.js.map +1 -0
  94. package/dist/schema.d.ts +29 -0
  95. package/dist/schema.d.ts.map +1 -0
  96. package/dist/schema.js +23 -0
  97. package/dist/schema.js.map +1 -0
  98. package/dist/stats/index.d.ts +6 -0
  99. package/dist/stats/index.d.ts.map +1 -0
  100. package/dist/stats/index.js +4 -0
  101. package/dist/stats/index.js.map +1 -0
  102. package/dist/stats/mcnemar.d.ts +7 -0
  103. package/dist/stats/mcnemar.d.ts.map +1 -0
  104. package/dist/stats/mcnemar.js +34 -0
  105. package/dist/stats/mcnemar.js.map +1 -0
  106. package/dist/stats/percentiles.d.ts +15 -0
  107. package/dist/stats/percentiles.d.ts.map +1 -0
  108. package/dist/stats/percentiles.js +54 -0
  109. package/dist/stats/percentiles.js.map +1 -0
  110. package/dist/stats/t-test.d.ts +9 -0
  111. package/dist/stats/t-test.d.ts.map +1 -0
  112. package/dist/stats/t-test.js +129 -0
  113. package/dist/stats/t-test.js.map +1 -0
  114. package/dist/tools.d.ts +16 -0
  115. package/dist/tools.d.ts.map +1 -0
  116. package/dist/tools.js +58 -0
  117. package/dist/tools.js.map +1 -0
  118. package/package.json +57 -0
@@ -0,0 +1,230 @@
1
+ import { writeFileSync } from 'node:fs';
2
+ import { EvalSuiteConfigSchema } from './schema';
3
+ import { bindJudgeContext } from './metrics/llm-judge';
4
+ import { aggregate } from './stats';
5
+ import { report } from './reporters';
6
+ function isLLMMetric(m) {
7
+ return 'requiresJudge' in m && m.requiresJudge === true;
8
+ }
9
+ export class EvalSuite {
10
+ dataset;
11
+ target;
12
+ boundMetrics;
13
+ statisticalMetrics;
14
+ assertionFns;
15
+ concurrency;
16
+ timeout;
17
+ retries;
18
+ onProgress;
19
+ constructor(opts) {
20
+ const config = EvalSuiteConfigSchema.parse({
21
+ concurrency: opts.concurrency,
22
+ timeout: opts.timeout,
23
+ retries: opts.retries,
24
+ });
25
+ this.dataset = opts.dataset;
26
+ this.target = opts.target;
27
+ this.statisticalMetrics = opts.statisticalMetrics ?? [];
28
+ this.assertionFns = opts.assertions ?? [];
29
+ this.concurrency = config.concurrency;
30
+ this.timeout = config.timeout;
31
+ this.retries = config.retries;
32
+ this.onProgress = opts.onProgress;
33
+ this.validateTarget();
34
+ const rawMetrics = opts.metrics ?? [];
35
+ const hasLLMMetrics = rawMetrics.some(isLLMMetric);
36
+ if (hasLLMMetrics && !opts.judge) {
37
+ throw new Error('LLM metrics require a judge config');
38
+ }
39
+ this.boundMetrics = rawMetrics.map((m) => {
40
+ if (isLLMMetric(m) && opts.judge) {
41
+ return bindJudgeContext(m, {
42
+ cogitator: { run: async ({ input }) => ({ output: input }) },
43
+ judgeConfig: opts.judge,
44
+ });
45
+ }
46
+ return m;
47
+ });
48
+ }
49
+ validateTarget() {
50
+ const { fn, agent, cogitator } = this.target;
51
+ const hasFn = fn !== undefined;
52
+ const hasAgent = agent !== undefined;
53
+ const hasCogitator = cogitator !== undefined;
54
+ if (hasFn && (hasAgent || hasCogitator)) {
55
+ throw new Error('Target must have either fn or agent+cogitator, not both');
56
+ }
57
+ if (!hasFn && !hasAgent && !hasCogitator) {
58
+ throw new Error('Target must have either fn or agent+cogitator');
59
+ }
60
+ if (hasAgent && !hasCogitator) {
61
+ throw new Error('Agent target requires cogitator instance');
62
+ }
63
+ if (hasCogitator && !hasAgent) {
64
+ throw new Error('Cogitator target requires agent instance');
65
+ }
66
+ }
67
+ async run() {
68
+ const suiteStart = Date.now();
69
+ const cases = [...this.dataset.cases];
70
+ const total = cases.length;
71
+ let completed = 0;
72
+ const indexed = [];
73
+ let active = 0;
74
+ let nextIdx = 0;
75
+ if (total > 0) {
76
+ await new Promise((resolve) => {
77
+ const drain = () => {
78
+ while (active < this.concurrency && nextIdx < total) {
79
+ const i = nextIdx++;
80
+ const evalCase = cases[i];
81
+ active++;
82
+ this.executeCase(evalCase)
83
+ .then((caseResult) => this.evaluateCaseMetrics(caseResult).then((scores) => ({ ...caseResult, scores })))
84
+ .then((scored) => {
85
+ indexed.push({ idx: i, result: scored });
86
+ completed++;
87
+ this.onProgress?.({ completed, total, currentCase: evalCase });
88
+ })
89
+ .finally(() => {
90
+ active--;
91
+ if (nextIdx >= total && active === 0) {
92
+ resolve();
93
+ }
94
+ else {
95
+ drain();
96
+ }
97
+ });
98
+ }
99
+ };
100
+ drain();
101
+ });
102
+ }
103
+ indexed.sort((a, b) => a.idx - b.idx);
104
+ const orderedResults = indexed.map((e) => e.result);
105
+ const aggregated = this.aggregateScores(orderedResults);
106
+ for (const statMetric of this.statisticalMetrics) {
107
+ const score = statMetric(orderedResults);
108
+ aggregated[score.name] = { name: score.name, ...aggregate([score.score]) };
109
+ }
110
+ const totalCost = orderedResults.reduce((sum, r) => sum + (r.usage?.cost ?? 0), 0);
111
+ const suiteDuration = Date.now() - suiteStart;
112
+ const stats = { total, duration: suiteDuration, cost: totalCost };
113
+ const assertionResults = this.assertionFns.map((fn) => fn(aggregated, stats));
114
+ const suiteResult = {
115
+ results: orderedResults,
116
+ aggregated,
117
+ assertions: assertionResults,
118
+ stats,
119
+ report: (type, options) => {
120
+ report({
121
+ results: orderedResults.map((r) => ({
122
+ case: { input: r.case.input, expected: r.case.expected },
123
+ output: r.output,
124
+ duration: r.duration,
125
+ scores: r.scores,
126
+ })),
127
+ aggregated,
128
+ assertions: assertionResults,
129
+ stats,
130
+ }, type, options);
131
+ },
132
+ saveBaseline: (path) => {
133
+ const baseline = {};
134
+ for (const [name, agg] of Object.entries(aggregated)) {
135
+ baseline[name] = agg.mean;
136
+ }
137
+ writeFileSync(path, JSON.stringify(baseline, null, 2));
138
+ },
139
+ };
140
+ return suiteResult;
141
+ }
142
+ async executeCase(evalCase) {
143
+ for (let attempt = 0; attempt <= this.retries; attempt++) {
144
+ try {
145
+ return await this.executeCaseAttempt(evalCase);
146
+ }
147
+ catch {
148
+ if (attempt < this.retries)
149
+ continue;
150
+ }
151
+ }
152
+ return {
153
+ case: evalCase,
154
+ output: '',
155
+ duration: 0,
156
+ };
157
+ }
158
+ async executeCaseAttempt(evalCase) {
159
+ const start = Date.now();
160
+ const work = this.target.fn
161
+ ? this.executeFnTarget(evalCase)
162
+ : this.executeAgentTarget(evalCase);
163
+ const timeoutPromise = new Promise((_, reject) => {
164
+ setTimeout(() => reject(new Error('__timeout__')), this.timeout);
165
+ });
166
+ try {
167
+ return await Promise.race([work, timeoutPromise]);
168
+ }
169
+ catch (err) {
170
+ if (err.message === '__timeout__') {
171
+ return {
172
+ case: evalCase,
173
+ output: '',
174
+ duration: Date.now() - start,
175
+ };
176
+ }
177
+ throw err;
178
+ }
179
+ }
180
+ async executeFnTarget(evalCase) {
181
+ const start = Date.now();
182
+ const output = await this.target.fn(evalCase.input);
183
+ return { case: evalCase, output, duration: Date.now() - start };
184
+ }
185
+ async executeAgentTarget(evalCase) {
186
+ const start = Date.now();
187
+ const cogitator = this.target.cogitator;
188
+ const runResult = await cogitator.run(this.target.agent, {
189
+ input: evalCase.input,
190
+ context: evalCase.context,
191
+ });
192
+ const duration = Date.now() - start;
193
+ const result = {
194
+ case: evalCase,
195
+ output: runResult.output ?? '',
196
+ duration,
197
+ };
198
+ if (runResult.usage) {
199
+ result.usage = runResult.usage;
200
+ }
201
+ if (runResult.toolCalls) {
202
+ result.toolCalls = runResult.toolCalls;
203
+ }
204
+ return result;
205
+ }
206
+ async evaluateCaseMetrics(result) {
207
+ if (this.boundMetrics.length === 0)
208
+ return [];
209
+ return Promise.all(this.boundMetrics.map((m) => m(result)));
210
+ }
211
+ aggregateScores(results) {
212
+ const scoresByMetric = new Map();
213
+ for (const r of results) {
214
+ for (const s of r.scores) {
215
+ let arr = scoresByMetric.get(s.name);
216
+ if (!arr) {
217
+ arr = [];
218
+ scoresByMetric.set(s.name, arr);
219
+ }
220
+ arr.push(s.score);
221
+ }
222
+ }
223
+ const aggregated = {};
224
+ for (const [name, values] of scoresByMetric) {
225
+ aggregated[name] = { name, ...aggregate(values) };
226
+ }
227
+ return aggregated;
228
+ }
229
+ }
230
+ //# sourceMappingURL=eval-suite.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval-suite.js","sourceRoot":"","sources":["../src/eval-suite.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAGxC,OAAO,EAAE,qBAAqB,EAAE,MAAM,UAAU,CAAC;AAIjD,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAEvD,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAqCrC,SAAS,WAAW,CAAC,CAAW;IAC9B,OAAO,eAAe,IAAI,CAAC,IAAK,CAAiB,CAAC,aAAa,KAAK,IAAI,CAAC;AAC3E,CAAC;AAED,MAAM,OAAO,SAAS;IACH,OAAO,CAAU;IACjB,MAAM,CAAa;IACnB,YAAY,CAAa;IACzB,kBAAkB,CAAwB;IAC1C,YAAY,CAAgB;IAC5B,WAAW,CAAS;IACpB,OAAO,CAAS;IAChB,OAAO,CAAS;IAChB,UAAU,CAAoC;IAE/D,YAAY,IAAsB;QAChC,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC;YACzC,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC5B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;QAC1B,IAAI,CAAC,kBAAkB,GAAG,IAAI,CAAC,kBAAkB,IAAI,EAAE,CAAC;QACxD,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,UAAU,IAAI,EAAE,CAAC;QAC1C,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;QACtC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC;QAElC,IAAI,CAAC,cAAc,EAAE,CAAC;QAEtB,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;QACtC,MAAM,aAAa,GAAG,UAAU,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAEnD,IAAI,aAAa,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC;QACxD,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACvC,IAAI,WAAW,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBACjC,OAAO,gBAAgB,CAAC,CAAC,EAAE;oBACzB,SAAS,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,KAAK,EAAqB,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,EAAE;oBAC/E,WAAW,EAAE,IAAI,CAAC,KAAK;iBACxB,CAAC,CAAC;YACL,CAAC;YACD,OAAO,CAAC,CAAC;QACX,CAAC,CAAC,CAAC;IACL,CAAC;IAEO,cAAc;QACpB,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAC7C,MAAM,KAAK,GAAG,EAAE,KAAK,SAAS,CAAC;QAC/B,MAAM,QAAQ,GAAG,KAAK,KAAK,SAAS,CAAC;QACrC,MAAM,YAAY,GAAG,SAAS,KAAK,SAAS,CAAC;QAE7C,IAAI,KAAK,IAAI,CAAC,QAAQ,IAAI,YAAY,CAAC,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QAED,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,IAAI,CAAC,YAAY,EAAE,CAAC;YACzC,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACnE,CAAC;QAED,IAAI,QAAQ,IAAI,CAAC,YAAY,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC9D,CAAC;QAED,IAAI,YAAY,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC9B,MAAM,IAAI,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,KAAK,CAAC,GAAG;QACP,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;QAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;QAGlB,MAAM,OAAO,GAAiD,EAAE,CAAC;QAEjE,IAAI,MAAM,GAAG,CAAC,CAAC;QACf,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YACd,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;gBAClC,MAAM,KAAK,GAAG,GAAG,EAAE;oBACjB,OAAO,MAAM,GAAG,IAAI,CAAC,WAAW,IAAI,OAAO,GAAG,KAAK,EAAE,CAAC;wBACpD,MAAM,CAAC,GAAG,OAAO,EAAE,CAAC;wBACpB,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;wBAC1B,MAAM,EAAE,CAAC;wBAET,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC;6BACvB,IAAI,CAAC,CAAC,UAAU,EAAE,EAAE,CACnB,IAAI,CAAC,mBAAmB,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC,CACnF;6BACA,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE;4BACf,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;4BACzC,SAAS,EAAE,CAAC;4BACZ,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC,CAAC;wBACjE,CAAC,CAAC;6BACD,OAAO,CAAC,GAAG,EAAE;4BACZ,MAAM,EAAE,CAAC;4BACT,IAAI,OAAO,IAAI,KAAK,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;gCACrC,OAAO,EAAE,CAAC;4BACZ,CAAC;iCAAM,CAAC;gCACN,KAAK,EAAE,CAAC;4BACV,CAAC;wBACH,CAAC,CAAC,CAAC;oBACP,CAAC;gBACH,CAAC,CAAC;gBACF,KAAK,EAAE,CAAC;YACV,CAAC,CAAC,CAAC;QACL,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;QACtC,MAAM,cAAc,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAEpD,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;QAExD,KAAK,MAAM,UAAU,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YACjD,MAAM,KAAK,GAAG,UAAU,CAAC,cAAc,CAAC,CAAC;YACzC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,GAAG,SAAS,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;QAC7E,CAAC;QAED,MAAM,SAAS,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACnF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC;QAC9C,MAAM,KAAK,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAElE,MAAM,gBAAgB,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC,CAAC;QAE9E,MAAM,WAAW,GAAoB;YACnC,OAAO,EAAE,cAAc;YACvB,UAAU;YACV,UAAU,EAAE,gBAAgB;YAC5B,KAAK;YACL,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;gBACxB,MAAM,CACJ;oBACE,OAAO,EAAE,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBAClC,IAAI,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,QAAQ,EAAE;wBACxD,MAAM,EAAE,CAAC,CAAC,MAAM;wBAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;wBACpB,MAAM,EAAE,CAAC,CAAC,MAAM;qBACjB,CAAC,CAAC;oBACH,UAAU;oBACV,UAAU,EAAE,gBAAgB;oBAC5B,KAAK;iBACN,EACD,IAAI,EACJ,OAAO,CACR,CAAC;YACJ,CAAC;YACD,YAAY,EAAE,CAAC,IAAI,EAAE,EAAE;gBACrB,MAAM,QAAQ,GAA2B,EAAE,CAAC;gBAC5C,KAAK,MAAM,CAAC,IAAI,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;oBACrD,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC;gBAC5B,CAAC;gBACD,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;YACzD,CAAC;SACF,CAAC;QAEF,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,QAAkB;QAC1C,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,CAAC;YACzD,IAAI,CAAC;gBACH,OAAO,MAAM,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YACjD,CAAC;YAAC,MAAM,CAAC;gBACP,IAAI,OAAO,GAAG,IAAI,CAAC,OAAO;oBAAE,SAAS;YACvC,CAAC;QACH,CAAC;QAED,OAAO;YACL,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,EAAE;YACV,QAAQ,EAAE,CAAC;SACZ,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAAC,QAAkB;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,EAAE;YACzB,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC;YAChC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAEtC,MAAM,cAAc,GAAG,IAAI,OAAO,CAAQ,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE;YACtD,UAAU,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC;QACpD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAK,GAAa,CAAC,OAAO,KAAK,aAAa,EAAE,CAAC;gBAC7C,OAAO;oBACL,IAAI,EAAE,QAAQ;oBACd,MAAM,EAAE,EAAE;oBACV,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;iBAC7B,CAAC;YACJ,CAAC;YACD,MAAM,GAAG,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,eAAe,CAAC,QAAkB;QAC9C,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,EAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACrD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IAClE,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAAC,QAAkB;QACjD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACzB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,SAK7B,CAAC;QACF,MAAM,SAAS,GAAG,MAAM,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE;YACvD,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,OAAO,EAAE,QAAQ,CAAC,OAAO;SAC1B,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;QAEpC,MAAM,MAAM,GAAmB;YAC7B,IAAI,EAAE,QAAQ;YACd,MAAM,EAAG,SAAS,CAAC,MAAiB,IAAI,EAAE;YAC1C,QAAQ;SACT,CAAC;QAEF,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;YACpB,MAAM,CAAC,KAAK,GAAG,SAAS,CAAC,KAAgC,CAAC;QAC5D,CAAC;QAED,IAAI,SAAS,CAAC,SAAS,EAAE,CAAC;YACxB,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC,SAAwC,CAAC;QACxE,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,KAAK,CAAC,mBAAmB,CAAC,MAAsB;QACtD,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAC9C,OAAO,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAC9D,CAAC;IAEO,eAAe,CACrB,OAA0D;QAE1D,MAAM,cAAc,GAAG,IAAI,GAAG,EAAoB,CAAC;QAEnD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;gBACzB,IAAI,GAAG,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;gBACrC,IAAI,CAAC,GAAG,EAAE,CAAC;oBACT,GAAG,GAAG,EAAE,CAAC;oBACT,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAClC,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;QAED,MAAM,UAAU,GAAqC,EAAE,CAAC;QACxD,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;YAC5C,UAAU,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,GAAG,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;QACpD,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;CACF"}
@@ -0,0 +1,31 @@
1
+ export declare const VERSION = "0.1.0";
2
+ export { EvalSuite } from './eval-suite';
3
+ export type { EvalTarget, EvalProgress, EvalSuiteOptions, EvalSuiteResult } from './eval-suite';
4
+ export { EvalComparison } from './eval-comparison';
5
+ export type { EvalComparisonOptions, MetricComparison, ComparisonResult } from './eval-comparison';
6
+ export { EvalBuilder } from './eval-builder';
7
+ export { Dataset } from './datasets';
8
+ export { loadJsonl } from './datasets';
9
+ export { loadCsv } from './datasets';
10
+ export { exactMatch, contains, regex, jsonSchema } from './metrics/deterministic';
11
+ export { faithfulness, relevance, coherence, helpfulness, llmMetric, bindJudgeContext, } from './metrics/llm-judge';
12
+ export type { LLMMetricFn, JudgeContext } from './metrics/llm-judge';
13
+ export { latency, cost, tokenUsage } from './metrics/statistical';
14
+ export { metric } from './metrics/custom';
15
+ export type { MetricFn, MetricScore, EvalCaseResult, StatisticalMetricFn } from './metrics/types';
16
+ export type { CustomMetricConfig } from './metrics/custom';
17
+ export { threshold } from './assertions';
18
+ export { noRegression } from './assertions';
19
+ export { assertion } from './assertions';
20
+ export type { AssertionFn, AssertionResult, AggregatedMetric } from './assertions';
21
+ export { report } from './reporters';
22
+ export type { ReporterType, ReporterOptions } from './reporters';
23
+ export { pairedTTest } from './stats/t-test';
24
+ export type { TTestResult } from './stats/t-test';
25
+ export { mcnemarsTest } from './stats/mcnemar';
26
+ export type { McNemarResult } from './stats/mcnemar';
27
+ export { mean, median, stdDev, percentile, aggregate } from './stats/percentiles';
28
+ export type { EvalCase, EvalSuiteConfig, JudgeConfig, EvalComparisonConfig } from './schema';
29
+ export { createRunEvalTool, evalTools } from './tools';
30
+ export type { EvalTool } from './tools';
31
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,OAAO,UAAU,CAAC;AAE/B,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAChG,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,YAAY,EAAE,qBAAqB,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AACnG,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAErC,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAClF,OAAO,EACL,YAAY,EACZ,SAAS,EACT,SAAS,EACT,WAAW,EACX,SAAS,EACT,gBAAgB,GACjB,MAAM,qBAAqB,CAAC;AAC7B,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACrE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAClG,YAAY,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AAE3D,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,YAAY,EAAE,WAAW,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAEnF,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,YAAY,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEjE,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAClD,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,YAAY,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAElF,YAAY,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,oBAAoB,EAAE,MAAM,UAAU,CAAC;AAE7F,OAAO,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACvD,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,20 @@
1
+ export const VERSION = '0.1.0';
2
+ export { EvalSuite } from './eval-suite';
3
+ export { EvalComparison } from './eval-comparison';
4
+ export { EvalBuilder } from './eval-builder';
5
+ export { Dataset } from './datasets';
6
+ export { loadJsonl } from './datasets';
7
+ export { loadCsv } from './datasets';
8
+ export { exactMatch, contains, regex, jsonSchema } from './metrics/deterministic';
9
+ export { faithfulness, relevance, coherence, helpfulness, llmMetric, bindJudgeContext, } from './metrics/llm-judge';
10
+ export { latency, cost, tokenUsage } from './metrics/statistical';
11
+ export { metric } from './metrics/custom';
12
+ export { threshold } from './assertions';
13
+ export { noRegression } from './assertions';
14
+ export { assertion } from './assertions';
15
+ export { report } from './reporters';
16
+ export { pairedTTest } from './stats/t-test';
17
+ export { mcnemarsTest } from './stats/mcnemar';
18
+ export { mean, median, stdDev, percentile, aggregate } from './stats/percentiles';
19
+ export { createRunEvalTool, evalTools } from './tools';
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,OAAO,GAAG,OAAO,CAAC;AAE/B,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAEnD,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAC;AAErC,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAClF,OAAO,EACL,YAAY,EACZ,SAAS,EACT,SAAS,EACT,WAAW,EACX,SAAS,EACT,gBAAgB,GACjB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAI1C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAGzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAE/C,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAIlF,OAAO,EAAE,iBAAiB,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC"}
@@ -0,0 +1,18 @@
1
+ import type { MetricFn } from './types';
2
+ export interface CustomMetricConfig {
3
+ name: string;
4
+ evaluate: (data: {
5
+ input: string;
6
+ output: string;
7
+ expected?: string;
8
+ context?: Record<string, unknown>;
9
+ }) => Promise<{
10
+ score: number;
11
+ details?: string;
12
+ }> | {
13
+ score: number;
14
+ details?: string;
15
+ };
16
+ }
17
+ export declare function metric(config: CustomMetricConfig): MetricFn;
18
+ //# sourceMappingURL=custom.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"custom.d.ts","sourceRoot":"","sources":["../../src/metrics/custom.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAkB,MAAM,SAAS,CAAC;AAExD,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,CAAC,IAAI,EAAE;QACf,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACnC,KAAK,OAAO,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,GAAG;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC1F;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,kBAAkB,GAAG,QAAQ,CA4B3D"}
@@ -0,0 +1,28 @@
1
+ export function metric(config) {
2
+ const fn = (async (result) => {
3
+ try {
4
+ const { score, details } = await config.evaluate({
5
+ input: result.case.input,
6
+ output: result.output,
7
+ expected: result.case.expected,
8
+ context: result.case.context,
9
+ });
10
+ const clamped = Math.max(0, Math.min(1, score));
11
+ return {
12
+ name: config.name,
13
+ score: clamped,
14
+ ...(details !== undefined && { details }),
15
+ };
16
+ }
17
+ catch (err) {
18
+ return {
19
+ name: config.name,
20
+ score: 0,
21
+ details: `evaluate error: ${err.message}`,
22
+ };
23
+ }
24
+ });
25
+ fn.metricName = config.name;
26
+ return fn;
27
+ }
28
+ //# sourceMappingURL=custom.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"custom.js","sourceRoot":"","sources":["../../src/metrics/custom.ts"],"names":[],"mappings":"AAYA,MAAM,UAAU,MAAM,CAAC,MAA0B;IAC/C,MAAM,EAAE,GAAG,CAAC,KAAK,EAAE,MAAsB,EAAE,EAAE;QAC3C,IAAI,CAAC;YACH,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC;gBAC/C,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK;gBACxB,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,QAAQ;gBAC9B,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO;aAC7B,CAAC,CAAC;YAEH,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;YAEhD,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,KAAK,EAAE,OAAO;gBACd,GAAG,CAAC,OAAO,KAAK,SAAS,IAAI,EAAE,OAAO,EAAE,CAAC;aAC1C,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,mBAAoB,GAAa,CAAC,OAAO,EAAE;aACrD,CAAC;QACJ,CAAC;IACH,CAAC,CAAa,CAAC;IAEf,EAAE,CAAC,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC;IAC5B,OAAO,EAAE,CAAC;AACZ,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { ZodType } from 'zod';
2
+ import type { MetricFn } from './types';
3
+ interface MatchOptions {
4
+ caseSensitive?: boolean;
5
+ }
6
+ export declare function exactMatch(opts?: MatchOptions): MetricFn;
7
+ export declare function contains(opts?: MatchOptions): MetricFn;
8
+ export declare function regex(pattern: string | RegExp): MetricFn;
9
+ export declare function jsonSchema(schema: ZodType): MetricFn;
10
+ export {};
11
+ //# sourceMappingURL=deterministic.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"deterministic.d.ts","sourceRoot":"","sources":["../../src/metrics/deterministic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,KAAK,CAAC;AACnC,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAExC,UAAU,YAAY;IACpB,aAAa,CAAC,EAAE,OAAO,CAAC;CACzB;AAOD,wBAAgB,UAAU,CAAC,IAAI,CAAC,EAAE,YAAY,GAAG,QAAQ,CAmBxD;AAED,wBAAgB,QAAQ,CAAC,IAAI,CAAC,EAAE,YAAY,GAAG,QAAQ,CAmBtD;AAED,wBAAgB,KAAK,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,GAAG,QAAQ,CAWxD;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE,OAAO,GAAG,QAAQ,CAwBpD"}
@@ -0,0 +1,74 @@
1
+ function createMetricFn(name, fn) {
2
+ fn.metricName = name;
3
+ return fn;
4
+ }
5
+ export function exactMatch(opts) {
6
+ const caseSensitive = opts?.caseSensitive ?? false;
7
+ return createMetricFn('exactMatch', (async (result) => {
8
+ const expected = result.case.expected;
9
+ if (expected === undefined) {
10
+ return { name: 'exactMatch', score: 0, details: 'no expected value provided' };
11
+ }
12
+ const output = result.output.trim();
13
+ const target = expected.trim();
14
+ const match = caseSensitive ? output === target : output.toLowerCase() === target.toLowerCase();
15
+ return {
16
+ name: 'exactMatch',
17
+ score: match ? 1 : 0,
18
+ details: match ? undefined : `expected "${target}", got "${output}"`,
19
+ };
20
+ }));
21
+ }
22
+ export function contains(opts) {
23
+ const caseSensitive = opts?.caseSensitive ?? false;
24
+ return createMetricFn('contains', (async (result) => {
25
+ const expected = result.case.expected;
26
+ if (expected === undefined) {
27
+ return { name: 'contains', score: 0, details: 'no expected value provided' };
28
+ }
29
+ const output = caseSensitive ? result.output : result.output.toLowerCase();
30
+ const target = caseSensitive ? expected : expected.toLowerCase();
31
+ const found = output.includes(target);
32
+ return {
33
+ name: 'contains',
34
+ score: found ? 1 : 0,
35
+ details: found ? undefined : `output does not contain "${expected}"`,
36
+ };
37
+ }));
38
+ }
39
+ export function regex(pattern) {
40
+ const re = typeof pattern === 'string' ? new RegExp(pattern) : pattern;
41
+ return createMetricFn('regex', (async (result) => {
42
+ const match = re.test(result.output);
43
+ return {
44
+ name: 'regex',
45
+ score: match ? 1 : 0,
46
+ details: match ? undefined : `output does not match pattern ${re}`,
47
+ };
48
+ }));
49
+ }
50
+ export function jsonSchema(schema) {
51
+ return createMetricFn('jsonSchema', (async (result) => {
52
+ let parsed;
53
+ try {
54
+ parsed = JSON.parse(result.output);
55
+ }
56
+ catch (e) {
57
+ return {
58
+ name: 'jsonSchema',
59
+ score: 0,
60
+ details: `invalid JSON: ${e.message}`,
61
+ };
62
+ }
63
+ const validation = schema.safeParse(parsed);
64
+ if (validation.success) {
65
+ return { name: 'jsonSchema', score: 1 };
66
+ }
67
+ return {
68
+ name: 'jsonSchema',
69
+ score: 0,
70
+ details: `schema validation failed: ${validation.error.message}`,
71
+ };
72
+ }));
73
+ }
74
+ //# sourceMappingURL=deterministic.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"deterministic.js","sourceRoot":"","sources":["../../src/metrics/deterministic.ts"],"names":[],"mappings":"AAOA,SAAS,cAAc,CAAC,IAAY,EAAE,EAAY;IAChD,EAAE,CAAC,UAAU,GAAG,IAAI,CAAC;IACrB,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,IAAmB;IAC5C,MAAM,aAAa,GAAG,IAAI,EAAE,aAAa,IAAI,KAAK,CAAC;IAEnD,OAAO,cAAc,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;QACpD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;QACtC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;QACjF,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACpC,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,MAAM,CAAC,WAAW,EAAE,CAAC;QAEhG,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACpB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,aAAa,MAAM,WAAW,MAAM,GAAG;SACrE,CAAC;IACJ,CAAC,CAAa,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,IAAmB;IAC1C,MAAM,aAAa,GAAG,IAAI,EAAE,aAAa,IAAI,KAAK,CAAC;IAEnD,OAAO,cAAc,CAAC,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;QAClD,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;QACtC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;QAC/E,CAAC;QAED,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC3E,MAAM,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjE,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEtC,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACpB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,4BAA4B,QAAQ,GAAG;SACrE,CAAC;IACJ,CAAC,CAAa,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,OAAwB;IAC5C,MAAM,EAAE,GAAG,OAAO,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;IAEvE,OAAO,cAAc,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;QAC/C,MAAM,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACrC,OAAO;YACL,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACpB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,iCAAiC,EAAE,EAAE;SACnE,CAAC;IACJ,CAAC,CAAa,CAAC,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAe;IACxC,OAAO,cAAc,CAAC,YAAY,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE;QACpD,IAAI,MAAe,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACrC,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,OAAO;gBACL,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,iBAAkB,CAAW,CAAC,OAAO,EAAE;aACjD,CAAC;QACJ,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC5C,IAAI,UAAU,CAAC,OAAO,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QAC1C,CAAC;QAED,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,CAAC;YACR,OAAO,EAAE,6BAA6B,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE;SACjE,CAAC;IACJ,CAAC,CAAa,CAAC,CAAC;AAClB,CAAC"}
@@ -0,0 +1,8 @@
1
+ export type { MetricScore, EvalCaseResult, MetricFn, StatisticalMetricFn } from './types';
2
+ export { exactMatch, contains, regex, jsonSchema } from './deterministic';
3
+ export { metric } from './custom';
4
+ export type { CustomMetricConfig } from './custom';
5
+ export { latency, cost, tokenUsage } from './statistical';
6
+ export { faithfulness, relevance, coherence, helpfulness, llmMetric, bindJudgeContext, } from './llm-judge';
7
+ export type { JudgeContext, LLMMetricFn } from './llm-judge';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/metrics/index.ts"],"names":[],"mappings":"AAAA,YAAY,EAAE,WAAW,EAAE,cAAc,EAAE,QAAQ,EAAE,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAC1F,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAClC,YAAY,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAC;AACnD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAC1D,OAAO,EACL,YAAY,EACZ,SAAS,EACT,SAAS,EACT,WAAW,EACX,SAAS,EACT,gBAAgB,GACjB,MAAM,aAAa,CAAC;AACrB,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,5 @@
1
+ export { exactMatch, contains, regex, jsonSchema } from './deterministic';
2
+ export { metric } from './custom';
3
+ export { latency, cost, tokenUsage } from './statistical';
4
+ export { faithfulness, relevance, coherence, helpfulness, llmMetric, bindJudgeContext, } from './llm-judge';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/metrics/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC1E,OAAO,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAElC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,eAAe,CAAC;AAC1D,OAAO,EACL,YAAY,EACZ,SAAS,EACT,SAAS,EACT,WAAW,EACX,SAAS,EACT,gBAAgB,GACjB,MAAM,aAAa,CAAC"}
@@ -0,0 +1,27 @@
1
+ import type { JudgeConfig } from '../schema';
2
+ import type { MetricFn } from './types';
3
+ export interface JudgeContext {
4
+ cogitator: {
5
+ run: (opts: {
6
+ input: string;
7
+ }) => Promise<{
8
+ output: string;
9
+ }>;
10
+ };
11
+ judgeConfig: JudgeConfig;
12
+ }
13
+ export interface LLMMetricFn extends MetricFn {
14
+ readonly requiresJudge: true;
15
+ readonly __judgeSystemPrompt: string;
16
+ readonly __judgeName: string;
17
+ }
18
+ export declare function bindJudgeContext(metric: LLMMetricFn, context: JudgeContext): MetricFn;
19
+ export declare function faithfulness(): LLMMetricFn;
20
+ export declare function relevance(): LLMMetricFn;
21
+ export declare function coherence(): LLMMetricFn;
22
+ export declare function helpfulness(): LLMMetricFn;
23
+ export declare function llmMetric(opts: {
24
+ name: string;
25
+ prompt: string;
26
+ }): LLMMetricFn;
27
+ //# sourceMappingURL=llm-judge.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-judge.d.ts","sourceRoot":"","sources":["../../src/metrics/llm-judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,KAAK,EAAE,QAAQ,EAA+B,MAAM,SAAS,CAAC;AAErE,MAAM,WAAW,YAAY;IAC3B,SAAS,EAAE;QAAE,GAAG,EAAE,CAAC,IAAI,EAAE;YAAE,KAAK,EAAE,MAAM,CAAA;SAAE,KAAK,OAAO,CAAC;YAAE,MAAM,EAAE,MAAM,CAAA;SAAE,CAAC,CAAA;KAAE,CAAC;IAC7E,WAAW,EAAE,WAAW,CAAC;CAC1B;AAED,MAAM,WAAW,WAAY,SAAQ,QAAQ;IAC3C,QAAQ,CAAC,aAAa,EAAE,IAAI,CAAC;IAC7B,QAAQ,CAAC,mBAAmB,EAAE,MAAM,CAAC;IACrC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC;CAC9B;AAsCD,wBAAgB,gBAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,OAAO,EAAE,YAAY,GAAG,QAAQ,CAmCrF;AAED,wBAAgB,YAAY,IAAI,WAAW,CAK1C;AAED,wBAAgB,SAAS,IAAI,WAAW,CAKvC;AAED,wBAAgB,SAAS,IAAI,WAAW,CAKvC;AAED,wBAAgB,WAAW,IAAI,WAAW,CAKzC;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAAG,WAAW,CAK7E"}
@@ -0,0 +1,77 @@
1
+ const SCORE_REGEX = /\b(0(?:\.\d+)?|1(?:\.0+)?)\b/;
2
+ function parseJudgeOutput(raw) {
3
+ try {
4
+ const parsed = JSON.parse(raw);
5
+ if (typeof parsed.score === 'number') {
6
+ return { score: parsed.score, reasoning: parsed.reasoning };
7
+ }
8
+ return null;
9
+ }
10
+ catch {
11
+ const match = raw.match(SCORE_REGEX);
12
+ if (match) {
13
+ return { score: parseFloat(match[1]) };
14
+ }
15
+ return null;
16
+ }
17
+ }
18
+ function createJudgeMetric(name, systemPrompt) {
19
+ const fn = (async (_result) => {
20
+ return { name, score: 0, details: 'unbound judge metric — call bindJudgeContext first' };
21
+ });
22
+ Object.defineProperty(fn, 'metricName', { value: name, writable: false });
23
+ Object.defineProperty(fn, 'requiresJudge', { value: true, writable: false });
24
+ Object.defineProperty(fn, '__judgeSystemPrompt', { value: systemPrompt, writable: false });
25
+ Object.defineProperty(fn, '__judgeName', { value: name, writable: false });
26
+ return fn;
27
+ }
28
+ function buildUserMessage(result) {
29
+ const expected = result.case.expected || 'N/A';
30
+ return `Input: ${result.case.input}\nExpected: ${expected}\nResponse: ${result.output}`;
31
+ }
32
+ export function bindJudgeContext(metric, context) {
33
+ const name = metric.__judgeName;
34
+ const systemPrompt = metric.__judgeSystemPrompt;
35
+ const bound = (async (result) => {
36
+ try {
37
+ const userMessage = buildUserMessage(result);
38
+ const prompt = `${systemPrompt}\n\n${userMessage}`;
39
+ const runResult = await context.cogitator.run({ input: prompt });
40
+ const parsed = parseJudgeOutput(runResult.output);
41
+ if (!parsed) {
42
+ return { name, score: 0, details: 'could not parse judge response' };
43
+ }
44
+ const clamped = Math.max(0, Math.min(1, parsed.score));
45
+ return {
46
+ name,
47
+ score: clamped,
48
+ ...(parsed.reasoning !== undefined && { details: parsed.reasoning }),
49
+ };
50
+ }
51
+ catch (err) {
52
+ return {
53
+ name,
54
+ score: 0,
55
+ details: `judge error: ${err.message}`,
56
+ };
57
+ }
58
+ });
59
+ Object.defineProperty(bound, 'metricName', { value: name, writable: false });
60
+ return bound;
61
+ }
62
+ export function faithfulness() {
63
+ return createJudgeMetric('faithfulness', 'You are evaluating the faithfulness of an AI assistant\'s response.\n\nGiven the input and the response, rate how faithful the response is to the facts and information in the input.\n\nScore from 0.0 (completely unfaithful) to 1.0 (perfectly faithful).\n\nRespond with JSON: {"score": <number>, "reasoning": "<explanation>"}');
64
+ }
65
+ export function relevance() {
66
+ return createJudgeMetric('relevance', 'You are evaluating the relevance of an AI assistant\'s response.\n\nGiven the input and the response, rate how relevant the response is to the question asked.\n\nScore from 0.0 (completely irrelevant) to 1.0 (perfectly relevant).\n\nRespond with JSON: {"score": <number>, "reasoning": "<explanation>"}');
67
+ }
68
+ export function coherence() {
69
+ return createJudgeMetric('coherence', 'You are evaluating the coherence of an AI assistant\'s response.\n\nGiven the input and the response, rate how coherent, logical, and well-structured the response is.\n\nScore from 0.0 (completely incoherent) to 1.0 (perfectly coherent).\n\nRespond with JSON: {"score": <number>, "reasoning": "<explanation>"}');
70
+ }
71
+ export function helpfulness() {
72
+ return createJudgeMetric('helpfulness', 'You are evaluating the helpfulness of an AI assistant\'s response.\n\nGiven the input and the response, rate how helpful and useful the response would be to the user.\n\nScore from 0.0 (completely unhelpful) to 1.0 (perfectly helpful).\n\nRespond with JSON: {"score": <number>, "reasoning": "<explanation>"}');
73
+ }
74
+ export function llmMetric(opts) {
75
+ return createJudgeMetric(opts.name, `${opts.prompt}\n\nScore from 0.0 to 1.0.\n\nRespond with JSON: {"score": <number>, "reasoning": "<explanation>"}`);
76
+ }
77
+ //# sourceMappingURL=llm-judge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"llm-judge.js","sourceRoot":"","sources":["../../src/metrics/llm-judge.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,GAAG,8BAA8B,CAAC;AAEnD,SAAS,gBAAgB,CAAC,GAAW;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACrC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;QAC9D,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACrC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACzC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY,EAAE,YAAoB;IAC3D,MAAM,EAAE,GAAG,CAAC,KAAK,EAAE,OAAuB,EAAwB,EAAE;QAClE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,oDAAoD,EAAE,CAAC;IAC3F,CAAC,CAAgB,CAAC;IAElB,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1E,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,eAAe,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAC7E,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,qBAAqB,EAAE,EAAE,KAAK,EAAE,YAAY,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3F,MAAM,CAAC,cAAc,CAAC,EAAE,EAAE,aAAa,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAE3E,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,gBAAgB,CAAC,MAAsB;IAC9C,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,IAAI,KAAK,CAAC;IAC/C,OAAO,UAAU,MAAM,CAAC,IAAI,CAAC,KAAK,eAAe,QAAQ,eAAe,MAAM,CAAC,MAAM,EAAE,CAAC;AAC1F,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,MAAmB,EAAE,OAAqB;IACzE,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC;IAChC,MAAM,YAAY,GAAG,MAAM,CAAC,mBAAmB,CAAC;IAEhD,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,MAAsB,EAAwB,EAAE;QACpE,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,GAAG,YAAY,OAAO,WAAW,EAAE,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YACjE,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAElD,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,OAAO,EAAE,gCAAgC,EAAE,CAAC;YACvE,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;YAEvD,OAAO;gBACL,IAAI;gBACJ,KAAK,EAAE,OAAO;gBACd,GAAG,CAAC,MAAM,CAAC,SAAS,KAAK,SAAS,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC;aACrE,CAAC;QACJ,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO;gBACL,IAAI;gBACJ,KAAK,EAAE,CAAC;gBACR,OAAO,EAAE,gBAAiB,GAAa,CAAC,OAAO,EAAE;aAClD,CAAC;QACJ,CAAC;IACH,CAAC,CAAa,CAAC;IAEf,MAAM,CAAC,cAAc,CAAC,KAAK,EAAE,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;IAE7E,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,YAAY;IAC1B,OAAO,iBAAiB,CACtB,cAAc,EACd,sUAAsU,CACvU,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,SAAS;IACvB,OAAO,iBAAiB,CACtB,WAAW,EACX,+SAA+S,CAChT,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,SAAS;IACvB,OAAO,iBAAiB,CACtB,WAAW,EACX,uTAAuT,CACxT,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,WAAW;IACzB,OAAO,iBAAiB,CACtB,aAAa,EACb,qTAAqT,CACtT,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,IAAsC;IAC9D,OAAO,iBAAiB,CACtB,IAAI,CAAC,IAAI,EACT,GAAG,IAAI,CAAC,MAAM,oGAAoG,CACnH,CAAC;AACJ,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { StatisticalMetricFn } from './types';
2
+ export declare function latency(): StatisticalMetricFn;
3
+ export declare function cost(): StatisticalMetricFn;
4
+ export declare function tokenUsage(): StatisticalMetricFn;
5
+ //# sourceMappingURL=statistical.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistical.d.ts","sourceRoot":"","sources":["../../src/metrics/statistical.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAA+B,mBAAmB,EAAE,MAAM,SAAS,CAAC;AAYhF,wBAAgB,OAAO,IAAI,mBAAmB,CAmB7C;AAED,wBAAgB,IAAI,IAAI,mBAAmB,CA8B1C;AAED,wBAAgB,UAAU,IAAI,mBAAmB,CAkChD"}