@cogitator-ai/evals 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +456 -0
- package/dist/assertions/custom.d.ts +11 -0
- package/dist/assertions/custom.d.ts.map +1 -0
- package/dist/assertions/custom.js +13 -0
- package/dist/assertions/custom.js.map +1 -0
- package/dist/assertions/index.d.ts +27 -0
- package/dist/assertions/index.d.ts.map +1 -0
- package/dist/assertions/index.js +4 -0
- package/dist/assertions/index.js.map +1 -0
- package/dist/assertions/regression.d.ts +5 -0
- package/dist/assertions/regression.d.ts.map +1 -0
- package/dist/assertions/regression.js +58 -0
- package/dist/assertions/regression.js.map +1 -0
- package/dist/assertions/threshold.d.ts +3 -0
- package/dist/assertions/threshold.d.ts.map +1 -0
- package/dist/assertions/threshold.js +45 -0
- package/dist/assertions/threshold.js.map +1 -0
- package/dist/datasets/csv-loader.d.ts +3 -0
- package/dist/datasets/csv-loader.d.ts.map +1 -0
- package/dist/datasets/csv-loader.js +43 -0
- package/dist/datasets/csv-loader.js.map +1 -0
- package/dist/datasets/dataset.d.ts +15 -0
- package/dist/datasets/dataset.d.ts.map +1 -0
- package/dist/datasets/dataset.js +62 -0
- package/dist/datasets/dataset.js.map +1 -0
- package/dist/datasets/index.d.ts +4 -0
- package/dist/datasets/index.d.ts.map +1 -0
- package/dist/datasets/index.js +4 -0
- package/dist/datasets/index.js.map +1 -0
- package/dist/datasets/jsonl-loader.d.ts +3 -0
- package/dist/datasets/jsonl-loader.d.ts.map +1 -0
- package/dist/datasets/jsonl-loader.js +27 -0
- package/dist/datasets/jsonl-loader.js.map +1 -0
- package/dist/eval-builder.d.ts +30 -0
- package/dist/eval-builder.d.ts.map +1 -0
- package/dist/eval-builder.js +82 -0
- package/dist/eval-builder.js.map +1 -0
- package/dist/eval-comparison.d.ts +43 -0
- package/dist/eval-comparison.d.ts.map +1 -0
- package/dist/eval-comparison.js +125 -0
- package/dist/eval-comparison.js.map +1 -0
- package/dist/eval-suite.d.ts +63 -0
- package/dist/eval-suite.d.ts.map +1 -0
- package/dist/eval-suite.js +230 -0
- package/dist/eval-suite.js.map +1 -0
- package/dist/index.d.ts +31 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/metrics/custom.d.ts +18 -0
- package/dist/metrics/custom.d.ts.map +1 -0
- package/dist/metrics/custom.js +28 -0
- package/dist/metrics/custom.js.map +1 -0
- package/dist/metrics/deterministic.d.ts +11 -0
- package/dist/metrics/deterministic.d.ts.map +1 -0
- package/dist/metrics/deterministic.js +74 -0
- package/dist/metrics/deterministic.js.map +1 -0
- package/dist/metrics/index.d.ts +8 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +5 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/llm-judge.d.ts +27 -0
- package/dist/metrics/llm-judge.d.ts.map +1 -0
- package/dist/metrics/llm-judge.js +77 -0
- package/dist/metrics/llm-judge.js.map +1 -0
- package/dist/metrics/statistical.d.ts +5 -0
- package/dist/metrics/statistical.d.ts.map +1 -0
- package/dist/metrics/statistical.js +85 -0
- package/dist/metrics/statistical.js.map +1 -0
- package/dist/metrics/types.d.ts +31 -0
- package/dist/metrics/types.d.ts.map +1 -0
- package/dist/metrics/types.js +2 -0
- package/dist/metrics/types.js.map +1 -0
- package/dist/reporters/ci.d.ts +3 -0
- package/dist/reporters/ci.d.ts.map +1 -0
- package/dist/reporters/ci.js +21 -0
- package/dist/reporters/ci.js.map +1 -0
- package/dist/reporters/console.d.ts +3 -0
- package/dist/reporters/console.d.ts.map +1 -0
- package/dist/reporters/console.js +46 -0
- package/dist/reporters/console.js.map +1 -0
- package/dist/reporters/csv.d.ts +5 -0
- package/dist/reporters/csv.d.ts.map +1 -0
- package/dist/reporters/csv.js +31 -0
- package/dist/reporters/csv.js.map +1 -0
- package/dist/reporters/index.d.ts +50 -0
- package/dist/reporters/index.d.ts.map +1 -0
- package/dist/reporters/index.js +28 -0
- package/dist/reporters/index.js.map +1 -0
- package/dist/reporters/json.d.ts +5 -0
- package/dist/reporters/json.d.ts.map +1 -0
- package/dist/reporters/json.js +5 -0
- package/dist/reporters/json.js.map +1 -0
- package/dist/schema.d.ts +29 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +23 -0
- package/dist/schema.js.map +1 -0
- package/dist/stats/index.d.ts +6 -0
- package/dist/stats/index.d.ts.map +1 -0
- package/dist/stats/index.js +4 -0
- package/dist/stats/index.js.map +1 -0
- package/dist/stats/mcnemar.d.ts +7 -0
- package/dist/stats/mcnemar.d.ts.map +1 -0
- package/dist/stats/mcnemar.js +34 -0
- package/dist/stats/mcnemar.js.map +1 -0
- package/dist/stats/percentiles.d.ts +15 -0
- package/dist/stats/percentiles.d.ts.map +1 -0
- package/dist/stats/percentiles.js +54 -0
- package/dist/stats/percentiles.js.map +1 -0
- package/dist/stats/t-test.d.ts +9 -0
- package/dist/stats/t-test.d.ts.map +1 -0
- package/dist/stats/t-test.js +129 -0
- package/dist/stats/t-test.js.map +1 -0
- package/dist/tools.d.ts +16 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +58 -0
- package/dist/tools.js.map +1 -0
- package/package.json +57 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { aggregate, mean } from '../stats';
|
|
2
|
+
function createStatisticalFn(name, fn) {
|
|
3
|
+
const statFn = fn;
|
|
4
|
+
statFn.metricName = name;
|
|
5
|
+
return statFn;
|
|
6
|
+
}
|
|
7
|
+
export function latency() {
|
|
8
|
+
return createStatisticalFn('latency', (results) => {
|
|
9
|
+
const durations = results.map((r) => r.duration);
|
|
10
|
+
const stats = aggregate(durations);
|
|
11
|
+
return {
|
|
12
|
+
name: 'latency',
|
|
13
|
+
score: 0,
|
|
14
|
+
metadata: {
|
|
15
|
+
p50: stats.p50,
|
|
16
|
+
p95: stats.p95,
|
|
17
|
+
p99: stats.p99,
|
|
18
|
+
mean: stats.mean,
|
|
19
|
+
median: stats.median,
|
|
20
|
+
min: stats.min,
|
|
21
|
+
max: stats.max,
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
export function cost() {
|
|
27
|
+
return createStatisticalFn('cost', (results) => {
|
|
28
|
+
const costs = results.filter((r) => r.usage).map((r) => r.usage.cost);
|
|
29
|
+
if (costs.length === 0) {
|
|
30
|
+
return {
|
|
31
|
+
name: 'cost',
|
|
32
|
+
score: 0,
|
|
33
|
+
metadata: { total: 0, mean: 0, median: 0, min: 0, max: 0 },
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
const stats = aggregate(costs);
|
|
37
|
+
let total = 0;
|
|
38
|
+
for (let i = 0; i < costs.length; i++) {
|
|
39
|
+
total += costs[i];
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
name: 'cost',
|
|
43
|
+
score: 0,
|
|
44
|
+
metadata: {
|
|
45
|
+
total,
|
|
46
|
+
mean: stats.mean,
|
|
47
|
+
median: stats.median,
|
|
48
|
+
min: stats.min,
|
|
49
|
+
max: stats.max,
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
export function tokenUsage() {
|
|
55
|
+
return createStatisticalFn('tokenUsage', (results) => {
|
|
56
|
+
const withUsage = results.filter((r) => r.usage);
|
|
57
|
+
if (withUsage.length === 0) {
|
|
58
|
+
return {
|
|
59
|
+
name: 'tokenUsage',
|
|
60
|
+
score: 0,
|
|
61
|
+
metadata: { totalInput: 0, totalOutput: 0, totalTokens: 0, meanInput: 0, meanOutput: 0 },
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
const inputTokens = withUsage.map((r) => r.usage.inputTokens);
|
|
65
|
+
const outputTokens = withUsage.map((r) => r.usage.outputTokens);
|
|
66
|
+
let totalInput = 0;
|
|
67
|
+
let totalOutput = 0;
|
|
68
|
+
for (let i = 0; i < withUsage.length; i++) {
|
|
69
|
+
totalInput += inputTokens[i];
|
|
70
|
+
totalOutput += outputTokens[i];
|
|
71
|
+
}
|
|
72
|
+
return {
|
|
73
|
+
name: 'tokenUsage',
|
|
74
|
+
score: 0,
|
|
75
|
+
metadata: {
|
|
76
|
+
totalInput,
|
|
77
|
+
totalOutput,
|
|
78
|
+
totalTokens: totalInput + totalOutput,
|
|
79
|
+
meanInput: mean(inputTokens),
|
|
80
|
+
meanOutput: mean(outputTokens),
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=statistical.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"statistical.js","sourceRoot":"","sources":["../../src/metrics/statistical.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAE3C,SAAS,mBAAmB,CAC1B,IAAY,EACZ,EAA8C;IAE9C,MAAM,MAAM,GAAG,EAAyB,CAAC;IACzC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;IACzB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,OAAO;IACrB,OAAO,mBAAmB,CAAC,SAAS,EAAE,CAAC,OAAyB,EAAE,EAAE;QAClE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAEnC,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;aACf;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,IAAI;IAClB,OAAO,mBAAmB,CAAC,MAAM,EAAE,CAAC,OAAyB,EAAE,EAAE;QAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,IAAI,CAAC,CAAC;QAEvE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;aAC3D,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;QAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,OAAO;YACL,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,KAAK;gBACL,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;aACf;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,OAAO,mBAAmB,CAAC,YAAY,EAAE,CAAC,OAAyB,EAAE,EAAE;QACrE,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAEjD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE;aACzF,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,WAAW,CAAC,CAAC;QAC/D,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,YAAY,CAAC,CAAC;QAEjE,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,UAAU,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC;YAC7B,WAAW,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,UAAU;gBACV,WAAW;gBACX,WAAW,EAAE,UAAU,GAAG,WAAW;gBACrC,SAAS,EAAE,IAAI,CAAC,WAAW,CAAC;gBAC5B,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC;aAC/B;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import type { EvalCase } from '../schema';
|
|
2
|
+
export interface MetricScore {
|
|
3
|
+
name: string;
|
|
4
|
+
score: number;
|
|
5
|
+
details?: string;
|
|
6
|
+
metadata?: Record<string, unknown>;
|
|
7
|
+
}
|
|
8
|
+
export interface EvalCaseResult {
|
|
9
|
+
case: EvalCase;
|
|
10
|
+
output: string;
|
|
11
|
+
duration: number;
|
|
12
|
+
usage?: {
|
|
13
|
+
inputTokens: number;
|
|
14
|
+
outputTokens: number;
|
|
15
|
+
totalTokens: number;
|
|
16
|
+
cost: number;
|
|
17
|
+
duration: number;
|
|
18
|
+
};
|
|
19
|
+
toolCalls?: readonly {
|
|
20
|
+
id: string;
|
|
21
|
+
name: string;
|
|
22
|
+
arguments: Record<string, unknown>;
|
|
23
|
+
}[];
|
|
24
|
+
}
|
|
25
|
+
export type MetricFn = ((result: EvalCaseResult) => Promise<MetricScore>) & {
|
|
26
|
+
metricName: string;
|
|
27
|
+
};
|
|
28
|
+
export type StatisticalMetricFn = ((results: EvalCaseResult[]) => MetricScore) & {
|
|
29
|
+
metricName: string;
|
|
30
|
+
};
|
|
31
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/metrics/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE;QACN,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,WAAW,EAAE,MAAM,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,SAAS,CAAC,EAAE,SAAS;QACnB,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACpC,EAAE,CAAC;CACL;AAED,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAAG;IAC1E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,OAAO,EAAE,cAAc,EAAE,KAAK,WAAW,CAAC,GAAG;IAC/E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/metrics/types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ci.d.ts","sourceRoot":"","sources":["../../src/reporters/ci.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAM/C,wBAAgB,QAAQ,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAqBtD"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
const GREEN = '\x1b[32m';
|
|
2
|
+
const RED = '\x1b[31m';
|
|
3
|
+
const RESET = '\x1b[0m';
|
|
4
|
+
export function ciReport(result) {
|
|
5
|
+
const passed = result.assertions.filter((a) => a.passed).length;
|
|
6
|
+
const failed = result.assertions.filter((a) => !a.passed).length;
|
|
7
|
+
console.log(`Eval: ${result.stats.total} cases | ${result.stats.duration}ms | $${result.stats.cost}`);
|
|
8
|
+
for (const a of result.assertions) {
|
|
9
|
+
if (a.passed) {
|
|
10
|
+
console.log(` ${GREEN}PASS${RESET} ${a.name}`);
|
|
11
|
+
}
|
|
12
|
+
else {
|
|
13
|
+
console.log(` ${RED}FAIL${RESET} ${a.name}: ${a.message}`);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
console.log(`Result: ${passed} passed, ${failed} failed`);
|
|
17
|
+
if (failed > 0) {
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=ci.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ci.js","sourceRoot":"","sources":["../../src/reporters/ci.ts"],"names":[],"mappings":"AAEA,MAAM,KAAK,GAAG,UAAU,CAAC;AACzB,MAAM,GAAG,GAAG,UAAU,CAAC;AACvB,MAAM,KAAK,GAAG,SAAS,CAAC;AAExB,MAAM,UAAU,QAAQ,CAAC,MAAuB;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAChE,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAEjE,OAAO,CAAC,GAAG,CACT,SAAS,MAAM,CAAC,KAAK,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,CAAC,QAAQ,SAAS,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CACzF,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,OAAO,KAAK,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,OAAO,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,YAAY,MAAM,SAAS,CAAC,CAAC;IAE1D,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"console.d.ts","sourceRoot":"","sources":["../../src/reporters/console.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAgB/C,wBAAgB,aAAa,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CA2C3D"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
const GREEN = '\x1b[32m';
|
|
2
|
+
const RED = '\x1b[31m';
|
|
3
|
+
const BOLD = '\x1b[1m';
|
|
4
|
+
const DIM = '\x1b[2m';
|
|
5
|
+
const RESET = '\x1b[0m';
|
|
6
|
+
function pad(str, len) {
|
|
7
|
+
return str.length >= len ? str : str + ' '.repeat(len - str.length);
|
|
8
|
+
}
|
|
9
|
+
function fmt(n) {
|
|
10
|
+
return Number.isInteger(n) ? String(n) : n.toFixed(4);
|
|
11
|
+
}
|
|
12
|
+
export function consoleReport(result) {
|
|
13
|
+
const metrics = Object.values(result.aggregated);
|
|
14
|
+
if (metrics.length > 0) {
|
|
15
|
+
const cols = { metric: 14, mean: 10, median: 10, p95: 10, min: 10, max: 10 };
|
|
16
|
+
const header = pad('Metric', cols.metric) +
|
|
17
|
+
pad('Mean', cols.mean) +
|
|
18
|
+
pad('Median', cols.median) +
|
|
19
|
+
pad('P95', cols.p95) +
|
|
20
|
+
pad('Min', cols.min) +
|
|
21
|
+
pad('Max', cols.max);
|
|
22
|
+
console.log(`\n${BOLD}${header}${RESET}`);
|
|
23
|
+
console.log(DIM + '─'.repeat(header.length) + RESET);
|
|
24
|
+
for (const m of metrics) {
|
|
25
|
+
const row = pad(m.name, cols.metric) +
|
|
26
|
+
pad(fmt(m.mean), cols.mean) +
|
|
27
|
+
pad(fmt(m.median), cols.median) +
|
|
28
|
+
pad(fmt(m.p95), cols.p95) +
|
|
29
|
+
pad(fmt(m.min), cols.min) +
|
|
30
|
+
pad(fmt(m.max), cols.max);
|
|
31
|
+
console.log(row);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (result.assertions.length > 0) {
|
|
35
|
+
console.log(`\n${BOLD}Assertions${RESET}`);
|
|
36
|
+
for (const a of result.assertions) {
|
|
37
|
+
const icon = a.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
|
|
38
|
+
const color = a.passed ? GREEN : RED;
|
|
39
|
+
console.log(` ${icon} ${color}${a.name}${RESET} ${DIM}${a.message}${RESET}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
const passed = result.assertions.filter((a) => a.passed).length;
|
|
43
|
+
const failed = result.assertions.filter((a) => !a.passed).length;
|
|
44
|
+
console.log(`\n${BOLD}Summary${RESET}: ${result.stats.total} cases | ${result.stats.duration}ms | $${result.stats.cost} | ${GREEN}${passed} passed${RESET} ${failed > 0 ? `${RED}${failed} failed${RESET}` : ''}`);
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=console.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"console.js","sourceRoot":"","sources":["../../src/reporters/console.ts"],"names":[],"mappings":"AAEA,MAAM,KAAK,GAAG,UAAU,CAAC;AACzB,MAAM,GAAG,GAAG,UAAU,CAAC;AACvB,MAAM,IAAI,GAAG,SAAS,CAAC;AACvB,MAAM,GAAG,GAAG,SAAS,CAAC;AACtB,MAAM,KAAK,GAAG,SAAS,CAAC;AAExB,SAAS,GAAG,CAAC,GAAW,EAAE,GAAW;IACnC,OAAO,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,GAAG,CAAC,CAAS;IACpB,OAAO,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAuB;IACnD,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAEjD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;QAC7E,MAAM,MAAM,GACV,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC;YAC1B,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC;YACtB,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC;YAC1B,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC;YACpB,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC;YACpB,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QAEvB,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC;QAErD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,GAAG,GACP,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;gBACxB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC;gBAC3B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC;gBAC/B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;gBACzB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;gBACzB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,aAAa,KAAK,EAAE,CAAC,CAAC;QAC3C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,GAAG,KAAK,EAAE,CAAC,CAAC;QAChF,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAChE,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAEjE,OAAO,CAAC,GAAG,CACT,KAAK,IAAI,UAAU,KAAK,KAAK,MAAM,CAAC,KAAK,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,CAAC,QAAQ,SAAS,MAAM,CAAC,KAAK,CAAC,IAAI,MAAM,KAAK,GAAG,MAAM,UAAU,KAAK,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,MAAM,UAAU,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CACtM,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/reporters/csv.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAS/C,wBAAgB,SAAS,CAAC,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAyBlF"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { writeFileSync } from 'node:fs';
|
|
2
|
+
function escapeField(value) {
|
|
3
|
+
if (value.includes(',') || value.includes('"') || value.includes('\n')) {
|
|
4
|
+
return `"${value.replace(/"/g, '""')}"`;
|
|
5
|
+
}
|
|
6
|
+
return value;
|
|
7
|
+
}
|
|
8
|
+
export function csvReport(result, options) {
|
|
9
|
+
const metricNames = new Set();
|
|
10
|
+
for (const r of result.results) {
|
|
11
|
+
for (const s of r.scores) {
|
|
12
|
+
metricNames.add(s.name);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
const metrics = [...metricNames];
|
|
16
|
+
const headers = ['input', 'expected', 'output', 'duration', ...metrics];
|
|
17
|
+
const lines = [headers.join(',')];
|
|
18
|
+
for (const r of result.results) {
|
|
19
|
+
const scoreMap = new Map(r.scores.map((s) => [s.name, s.score]));
|
|
20
|
+
const row = [
|
|
21
|
+
escapeField(r.case.input),
|
|
22
|
+
escapeField(r.case.expected ?? ''),
|
|
23
|
+
escapeField(r.output),
|
|
24
|
+
String(r.duration),
|
|
25
|
+
...metrics.map((m) => String(scoreMap.get(m) ?? '')),
|
|
26
|
+
];
|
|
27
|
+
lines.push(row.join(','));
|
|
28
|
+
}
|
|
29
|
+
writeFileSync(options.path, lines.join('\n') + '\n', 'utf-8');
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=csv.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/reporters/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAGxC,SAAS,WAAW,CAAC,KAAa;IAChC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,MAAuB,EAAE,OAAyB;IAC1E,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;YACzB,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,MAAM,OAAO,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;IAEjC,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,CAAC;IACxE,MAAM,KAAK,GAAa,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAE5C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,GAAG,GAAG;YACV,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;YAClC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC;YACrB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;YAClB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACrD,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
export interface AggregatedMetric {
|
|
2
|
+
name: string;
|
|
3
|
+
mean: number;
|
|
4
|
+
median: number;
|
|
5
|
+
min: number;
|
|
6
|
+
max: number;
|
|
7
|
+
stdDev: number;
|
|
8
|
+
p50: number;
|
|
9
|
+
p95: number;
|
|
10
|
+
p99: number;
|
|
11
|
+
}
|
|
12
|
+
export interface AssertionResult {
|
|
13
|
+
name: string;
|
|
14
|
+
passed: boolean;
|
|
15
|
+
message: string;
|
|
16
|
+
actual?: number;
|
|
17
|
+
expected?: number;
|
|
18
|
+
}
|
|
19
|
+
export interface EvalSuiteResult {
|
|
20
|
+
results: Array<{
|
|
21
|
+
case: {
|
|
22
|
+
input: string;
|
|
23
|
+
expected?: string;
|
|
24
|
+
};
|
|
25
|
+
output: string;
|
|
26
|
+
duration: number;
|
|
27
|
+
scores: Array<{
|
|
28
|
+
name: string;
|
|
29
|
+
score: number;
|
|
30
|
+
details?: string;
|
|
31
|
+
}>;
|
|
32
|
+
}>;
|
|
33
|
+
aggregated: Record<string, AggregatedMetric>;
|
|
34
|
+
assertions: AssertionResult[];
|
|
35
|
+
stats: {
|
|
36
|
+
total: number;
|
|
37
|
+
duration: number;
|
|
38
|
+
cost: number;
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
export type ReporterType = 'console' | 'json' | 'csv' | 'ci';
|
|
42
|
+
export type ReporterOptions = {
|
|
43
|
+
path?: string;
|
|
44
|
+
};
|
|
45
|
+
export declare function report(result: EvalSuiteResult, type: ReporterType | ReporterType[], options?: ReporterOptions): void;
|
|
46
|
+
export { consoleReport } from './console';
|
|
47
|
+
export { jsonReport } from './json';
|
|
48
|
+
export { csvReport } from './csv';
|
|
49
|
+
export { ciReport } from './ci';
|
|
50
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporters/index.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3C,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAC;YAAC,OAAO,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KAClE,CAAC,CAAC;IACH,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAC7C,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,KAAK,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC1D;AAED,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;AAC7D,MAAM,MAAM,eAAe,GAAG;IAAE,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEhD,wBAAgB,MAAM,CACpB,MAAM,EAAE,eAAe,EACvB,IAAI,EAAE,YAAY,GAAG,YAAY,EAAE,EACnC,OAAO,CAAC,EAAE,eAAe,GACxB,IAAI,CAmBN;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { consoleReport } from './console';
|
|
2
|
+
import { jsonReport } from './json';
|
|
3
|
+
import { csvReport } from './csv';
|
|
4
|
+
import { ciReport } from './ci';
|
|
5
|
+
export function report(result, type, options) {
|
|
6
|
+
const types = Array.isArray(type) ? type : [type];
|
|
7
|
+
for (const t of types) {
|
|
8
|
+
switch (t) {
|
|
9
|
+
case 'console':
|
|
10
|
+
consoleReport(result);
|
|
11
|
+
break;
|
|
12
|
+
case 'json':
|
|
13
|
+
jsonReport(result, { path: options?.path ?? 'eval-report.json' });
|
|
14
|
+
break;
|
|
15
|
+
case 'csv':
|
|
16
|
+
csvReport(result, { path: options?.path ?? 'eval-report.csv' });
|
|
17
|
+
break;
|
|
18
|
+
case 'ci':
|
|
19
|
+
ciReport(result);
|
|
20
|
+
break;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
export { consoleReport } from './console';
|
|
25
|
+
export { jsonReport } from './json';
|
|
26
|
+
export { csvReport } from './csv';
|
|
27
|
+
export { ciReport } from './ci';
|
|
28
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/reporters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAqChC,MAAM,UAAU,MAAM,CACpB,MAAuB,EACvB,IAAmC,EACnC,OAAyB;IAEzB,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,QAAQ,CAAC,EAAE,CAAC;YACV,KAAK,SAAS;gBACZ,aAAa,CAAC,MAAM,CAAC,CAAC;gBACtB,MAAM;YACR,KAAK,MAAM;gBACT,UAAU,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,IAAI,kBAAkB,EAAE,CAAC,CAAC;gBAClE,MAAM;YACR,KAAK,KAAK;gBACR,SAAS,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,IAAI,iBAAiB,EAAE,CAAC,CAAC;gBAChE,MAAM;YACR,KAAK,IAAI;gBACP,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACjB,MAAM;QACV,CAAC;IACH,CAAC;AACH,CAAC;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/reporters/json.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,wBAAgB,UAAU,CAAC,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAEnF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/reporters/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAGxC,MAAM,UAAU,UAAU,CAAC,MAAuB,EAAE,OAAyB;IAC3E,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACxE,CAAC"}
|
package/dist/schema.d.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export declare const EvalCaseSchema: z.ZodObject<{
|
|
3
|
+
input: z.ZodString;
|
|
4
|
+
expected: z.ZodOptional<z.ZodString>;
|
|
5
|
+
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
6
|
+
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
7
|
+
}, z.core.$strip>;
|
|
8
|
+
export declare const EvalSuiteConfigSchema: z.ZodObject<{
|
|
9
|
+
concurrency: z.ZodDefault<z.ZodNumber>;
|
|
10
|
+
timeout: z.ZodDefault<z.ZodNumber>;
|
|
11
|
+
retries: z.ZodDefault<z.ZodNumber>;
|
|
12
|
+
}, z.core.$strip>;
|
|
13
|
+
export declare const JudgeConfigSchema: z.ZodObject<{
|
|
14
|
+
model: z.ZodString;
|
|
15
|
+
temperature: z.ZodDefault<z.ZodNumber>;
|
|
16
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
17
|
+
}, z.core.$strip>;
|
|
18
|
+
export declare const EvalComparisonConfigSchema: z.ZodObject<{
|
|
19
|
+
concurrency: z.ZodDefault<z.ZodNumber>;
|
|
20
|
+
timeout: z.ZodDefault<z.ZodNumber>;
|
|
21
|
+
retries: z.ZodDefault<z.ZodNumber>;
|
|
22
|
+
}, z.core.$strip>;
|
|
23
|
+
export type EvalCase = z.output<typeof EvalCaseSchema>;
|
|
24
|
+
export type EvalCaseInput = z.input<typeof EvalCaseSchema>;
|
|
25
|
+
export type EvalSuiteConfig = z.output<typeof EvalSuiteConfigSchema>;
|
|
26
|
+
export type EvalSuiteConfigInput = z.input<typeof EvalSuiteConfigSchema>;
|
|
27
|
+
export type JudgeConfig = z.output<typeof JudgeConfigSchema>;
|
|
28
|
+
export type EvalComparisonConfig = z.output<typeof EvalComparisonConfigSchema>;
|
|
29
|
+
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../src/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,cAAc;;;;;iBAKzB,CAAC;AAEH,eAAO,MAAM,qBAAqB;;;;iBAIhC,CAAC;AAEH,eAAO,MAAM,iBAAiB;;;;iBAI5B,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;iBAIrC,CAAC;AAEH,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,cAAc,CAAC,CAAC;AACvD,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAC3D,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,qBAAqB,CAAC,CAAC;AACrE,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AACzE,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAC7D,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,0BAA0B,CAAC,CAAC"}
|
package/dist/schema.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const EvalCaseSchema = z.object({
|
|
3
|
+
input: z.string(),
|
|
4
|
+
expected: z.string().optional(),
|
|
5
|
+
context: z.record(z.string(), z.unknown()).optional(),
|
|
6
|
+
metadata: z.record(z.string(), z.unknown()).optional(),
|
|
7
|
+
});
|
|
8
|
+
export const EvalSuiteConfigSchema = z.object({
|
|
9
|
+
concurrency: z.number().int().min(1).default(5),
|
|
10
|
+
timeout: z.number().int().min(1000).default(30000),
|
|
11
|
+
retries: z.number().int().min(0).max(10).default(0),
|
|
12
|
+
});
|
|
13
|
+
export const JudgeConfigSchema = z.object({
|
|
14
|
+
model: z.string(),
|
|
15
|
+
temperature: z.number().default(0),
|
|
16
|
+
maxTokens: z.number().int().positive().optional(),
|
|
17
|
+
});
|
|
18
|
+
export const EvalComparisonConfigSchema = z.object({
|
|
19
|
+
concurrency: z.number().int().min(1).default(5),
|
|
20
|
+
timeout: z.number().int().min(1000).default(30000),
|
|
21
|
+
retries: z.number().int().min(0).max(10).default(0),
|
|
22
|
+
});
|
|
23
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../src/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC/B,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;IACrD,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;CACvD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5C,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAClD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;CACpD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAClC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAClD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,MAAM,CAAC;IACjD,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAClD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;CACpD,CAAC,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
export { percentile, mean, median, stdDev, aggregate } from './percentiles';
|
|
2
|
+
export { pairedTTest } from './t-test';
|
|
3
|
+
export type { TTestResult } from './t-test';
|
|
4
|
+
export { mcnemarsTest } from './mcnemar';
|
|
5
|
+
export type { McNemarResult } from './mcnemar';
|
|
6
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stats/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,YAAY,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stats/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAEvC,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcnemar.d.ts","sourceRoot":"","sources":["../../src/stats/mcnemar.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB;AAuBD,wBAAgB,YAAY,CAC1B,wBAAwB,EAAE,MAAM,EAChC,wBAAwB,EAAE,MAAM,GAC/B,aAAa,CAiBf"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
function erfc(x) {
|
|
2
|
+
const a1 = 0.254829592;
|
|
3
|
+
const a2 = -0.284496736;
|
|
4
|
+
const a3 = 1.421413741;
|
|
5
|
+
const a4 = -1.453152027;
|
|
6
|
+
const a5 = 1.061405429;
|
|
7
|
+
const p = 0.3275911;
|
|
8
|
+
const sign = x < 0 ? -1 : 1;
|
|
9
|
+
const absX = Math.abs(x);
|
|
10
|
+
const t = 1.0 / (1.0 + p * absX);
|
|
11
|
+
const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX);
|
|
12
|
+
return 1.0 - sign * y;
|
|
13
|
+
}
|
|
14
|
+
function chiSquareSurvival(x) {
|
|
15
|
+
if (x <= 0)
|
|
16
|
+
return 1;
|
|
17
|
+
return erfc(Math.sqrt(x / 2));
|
|
18
|
+
}
|
|
19
|
+
export function mcnemarsTest(pairsACorrect_BIncorrect, pairsAIncorrect_BCorrect) {
|
|
20
|
+
const b = pairsACorrect_BIncorrect;
|
|
21
|
+
const c = pairsAIncorrect_BCorrect;
|
|
22
|
+
if (b + c === 0) {
|
|
23
|
+
return { chiSquare: 0, pValue: 1, significant: false };
|
|
24
|
+
}
|
|
25
|
+
const diff = Math.abs(b - c) - 1;
|
|
26
|
+
const chiSquare = (diff * diff) / (b + c);
|
|
27
|
+
const pValue = chiSquareSurvival(chiSquare);
|
|
28
|
+
return {
|
|
29
|
+
chiSquare,
|
|
30
|
+
pValue,
|
|
31
|
+
significant: pValue < 0.05,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
//# sourceMappingURL=mcnemar.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcnemar.js","sourceRoot":"","sources":["../../src/stats/mcnemar.ts"],"names":[],"mappings":"AAMA,SAAS,IAAI,CAAC,CAAS;IACrB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,CAAC,GAAG,SAAS,CAAC;IAEpB,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACjC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAE5F,OAAO,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAS;IAClC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IACrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,wBAAgC,EAChC,wBAAgC;IAEhC,MAAM,CAAC,GAAG,wBAAwB,CAAC;IACnC,MAAM,CAAC,GAAG,wBAAwB,CAAC;IAEnC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAChB,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;IACzD,CAAC;IAED,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,SAAS,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE5C,OAAO;QACL,SAAS;QACT,MAAM;QACN,WAAW,EAAE,MAAM,GAAG,IAAI;KAC3B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export declare function mean(values: number[]): number;
|
|
2
|
+
export declare function stdDev(values: number[]): number;
|
|
3
|
+
export declare function percentile(values: number[], p: number): number;
|
|
4
|
+
export declare function median(values: number[]): number;
|
|
5
|
+
export declare function aggregate(values: number[]): {
|
|
6
|
+
mean: number;
|
|
7
|
+
median: number;
|
|
8
|
+
min: number;
|
|
9
|
+
max: number;
|
|
10
|
+
stdDev: number;
|
|
11
|
+
p50: number;
|
|
12
|
+
p95: number;
|
|
13
|
+
p99: number;
|
|
14
|
+
};
|
|
15
|
+
//# sourceMappingURL=percentiles.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"percentiles.d.ts","sourceRoot":"","sources":["../../src/stats/percentiles.ts"],"names":[],"mappings":"AAAA,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAO7C;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAS/C;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAa9D;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAE/C;AAED,wBAAgB,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb,CAiBA"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
export function mean(values) {
|
|
2
|
+
if (values.length === 0)
|
|
3
|
+
return 0;
|
|
4
|
+
let sum = 0;
|
|
5
|
+
for (let i = 0; i < values.length; i++) {
|
|
6
|
+
sum += values[i];
|
|
7
|
+
}
|
|
8
|
+
return sum / values.length;
|
|
9
|
+
}
|
|
10
|
+
export function stdDev(values) {
|
|
11
|
+
if (values.length < 2)
|
|
12
|
+
return 0;
|
|
13
|
+
const m = mean(values);
|
|
14
|
+
let sumSq = 0;
|
|
15
|
+
for (let i = 0; i < values.length; i++) {
|
|
16
|
+
const d = values[i] - m;
|
|
17
|
+
sumSq += d * d;
|
|
18
|
+
}
|
|
19
|
+
return Math.sqrt(sumSq / (values.length - 1));
|
|
20
|
+
}
|
|
21
|
+
export function percentile(values, p) {
|
|
22
|
+
if (values.length === 0)
|
|
23
|
+
return 0;
|
|
24
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
25
|
+
if (sorted.length === 1)
|
|
26
|
+
return sorted[0];
|
|
27
|
+
const rank = p * (sorted.length - 1);
|
|
28
|
+
const lower = Math.floor(rank);
|
|
29
|
+
const upper = Math.ceil(rank);
|
|
30
|
+
if (lower === upper)
|
|
31
|
+
return sorted[lower];
|
|
32
|
+
const fraction = rank - lower;
|
|
33
|
+
return sorted[lower] + fraction * (sorted[upper] - sorted[lower]);
|
|
34
|
+
}
|
|
35
|
+
export function median(values) {
|
|
36
|
+
return percentile(values, 0.5);
|
|
37
|
+
}
|
|
38
|
+
export function aggregate(values) {
|
|
39
|
+
if (values.length === 0) {
|
|
40
|
+
return { mean: 0, median: 0, min: 0, max: 0, stdDev: 0, p50: 0, p95: 0, p99: 0 };
|
|
41
|
+
}
|
|
42
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
43
|
+
return {
|
|
44
|
+
mean: mean(values),
|
|
45
|
+
median: percentile(values, 0.5),
|
|
46
|
+
min: sorted[0],
|
|
47
|
+
max: sorted[sorted.length - 1],
|
|
48
|
+
stdDev: stdDev(values),
|
|
49
|
+
p50: percentile(values, 0.5),
|
|
50
|
+
p95: percentile(values, 0.95),
|
|
51
|
+
p99: percentile(values, 0.99),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=percentiles.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"percentiles.js","sourceRoot":"","sources":["../../src/stats/percentiles.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,IAAI,CAAC,MAAgB;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IACD,OAAO,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAgB;IACrC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAgB,EAAE,CAAS;IACpD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;IAE1C,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE9B,IAAI,KAAK,KAAK,KAAK;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAE1C,MAAM,QAAQ,GAAG,IAAI,GAAG,KAAK,CAAC;IAC9B,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,QAAQ,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAgB;IACrC,OAAO,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,MAAgB;IAUxC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;IACnF,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEjD,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;QAClB,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;QAC/B,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;QACd,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;QACtB,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;QAC5B,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC;QAC7B,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC;KAC9B,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export interface TTestResult {
|
|
2
|
+
tStatistic: number;
|
|
3
|
+
degreesOfFreedom: number;
|
|
4
|
+
pValue: number;
|
|
5
|
+
significant: boolean;
|
|
6
|
+
confidenceInterval: [number, number];
|
|
7
|
+
}
|
|
8
|
+
export declare function pairedTTest(samplesA: number[], samplesB: number[]): TTestResult;
|
|
9
|
+
//# sourceMappingURL=t-test.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"t-test.d.ts","sourceRoot":"","sources":["../../src/stats/t-test.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;IACrB,kBAAkB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC;AAkGD,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CA4C/E"}
|