@cogitator-ai/evals 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +456 -0
  2. package/dist/assertions/custom.d.ts +11 -0
  3. package/dist/assertions/custom.d.ts.map +1 -0
  4. package/dist/assertions/custom.js +13 -0
  5. package/dist/assertions/custom.js.map +1 -0
  6. package/dist/assertions/index.d.ts +27 -0
  7. package/dist/assertions/index.d.ts.map +1 -0
  8. package/dist/assertions/index.js +4 -0
  9. package/dist/assertions/index.js.map +1 -0
  10. package/dist/assertions/regression.d.ts +5 -0
  11. package/dist/assertions/regression.d.ts.map +1 -0
  12. package/dist/assertions/regression.js +58 -0
  13. package/dist/assertions/regression.js.map +1 -0
  14. package/dist/assertions/threshold.d.ts +3 -0
  15. package/dist/assertions/threshold.d.ts.map +1 -0
  16. package/dist/assertions/threshold.js +45 -0
  17. package/dist/assertions/threshold.js.map +1 -0
  18. package/dist/datasets/csv-loader.d.ts +3 -0
  19. package/dist/datasets/csv-loader.d.ts.map +1 -0
  20. package/dist/datasets/csv-loader.js +43 -0
  21. package/dist/datasets/csv-loader.js.map +1 -0
  22. package/dist/datasets/dataset.d.ts +15 -0
  23. package/dist/datasets/dataset.d.ts.map +1 -0
  24. package/dist/datasets/dataset.js +62 -0
  25. package/dist/datasets/dataset.js.map +1 -0
  26. package/dist/datasets/index.d.ts +4 -0
  27. package/dist/datasets/index.d.ts.map +1 -0
  28. package/dist/datasets/index.js +4 -0
  29. package/dist/datasets/index.js.map +1 -0
  30. package/dist/datasets/jsonl-loader.d.ts +3 -0
  31. package/dist/datasets/jsonl-loader.d.ts.map +1 -0
  32. package/dist/datasets/jsonl-loader.js +27 -0
  33. package/dist/datasets/jsonl-loader.js.map +1 -0
  34. package/dist/eval-builder.d.ts +30 -0
  35. package/dist/eval-builder.d.ts.map +1 -0
  36. package/dist/eval-builder.js +82 -0
  37. package/dist/eval-builder.js.map +1 -0
  38. package/dist/eval-comparison.d.ts +43 -0
  39. package/dist/eval-comparison.d.ts.map +1 -0
  40. package/dist/eval-comparison.js +125 -0
  41. package/dist/eval-comparison.js.map +1 -0
  42. package/dist/eval-suite.d.ts +63 -0
  43. package/dist/eval-suite.d.ts.map +1 -0
  44. package/dist/eval-suite.js +230 -0
  45. package/dist/eval-suite.js.map +1 -0
  46. package/dist/index.d.ts +31 -0
  47. package/dist/index.d.ts.map +1 -0
  48. package/dist/index.js +20 -0
  49. package/dist/index.js.map +1 -0
  50. package/dist/metrics/custom.d.ts +18 -0
  51. package/dist/metrics/custom.d.ts.map +1 -0
  52. package/dist/metrics/custom.js +28 -0
  53. package/dist/metrics/custom.js.map +1 -0
  54. package/dist/metrics/deterministic.d.ts +11 -0
  55. package/dist/metrics/deterministic.d.ts.map +1 -0
  56. package/dist/metrics/deterministic.js +74 -0
  57. package/dist/metrics/deterministic.js.map +1 -0
  58. package/dist/metrics/index.d.ts +8 -0
  59. package/dist/metrics/index.d.ts.map +1 -0
  60. package/dist/metrics/index.js +5 -0
  61. package/dist/metrics/index.js.map +1 -0
  62. package/dist/metrics/llm-judge.d.ts +27 -0
  63. package/dist/metrics/llm-judge.d.ts.map +1 -0
  64. package/dist/metrics/llm-judge.js +77 -0
  65. package/dist/metrics/llm-judge.js.map +1 -0
  66. package/dist/metrics/statistical.d.ts +5 -0
  67. package/dist/metrics/statistical.d.ts.map +1 -0
  68. package/dist/metrics/statistical.js +85 -0
  69. package/dist/metrics/statistical.js.map +1 -0
  70. package/dist/metrics/types.d.ts +31 -0
  71. package/dist/metrics/types.d.ts.map +1 -0
  72. package/dist/metrics/types.js +2 -0
  73. package/dist/metrics/types.js.map +1 -0
  74. package/dist/reporters/ci.d.ts +3 -0
  75. package/dist/reporters/ci.d.ts.map +1 -0
  76. package/dist/reporters/ci.js +21 -0
  77. package/dist/reporters/ci.js.map +1 -0
  78. package/dist/reporters/console.d.ts +3 -0
  79. package/dist/reporters/console.d.ts.map +1 -0
  80. package/dist/reporters/console.js +46 -0
  81. package/dist/reporters/console.js.map +1 -0
  82. package/dist/reporters/csv.d.ts +5 -0
  83. package/dist/reporters/csv.d.ts.map +1 -0
  84. package/dist/reporters/csv.js +31 -0
  85. package/dist/reporters/csv.js.map +1 -0
  86. package/dist/reporters/index.d.ts +50 -0
  87. package/dist/reporters/index.d.ts.map +1 -0
  88. package/dist/reporters/index.js +28 -0
  89. package/dist/reporters/index.js.map +1 -0
  90. package/dist/reporters/json.d.ts +5 -0
  91. package/dist/reporters/json.d.ts.map +1 -0
  92. package/dist/reporters/json.js +5 -0
  93. package/dist/reporters/json.js.map +1 -0
  94. package/dist/schema.d.ts +29 -0
  95. package/dist/schema.d.ts.map +1 -0
  96. package/dist/schema.js +23 -0
  97. package/dist/schema.js.map +1 -0
  98. package/dist/stats/index.d.ts +6 -0
  99. package/dist/stats/index.d.ts.map +1 -0
  100. package/dist/stats/index.js +4 -0
  101. package/dist/stats/index.js.map +1 -0
  102. package/dist/stats/mcnemar.d.ts +7 -0
  103. package/dist/stats/mcnemar.d.ts.map +1 -0
  104. package/dist/stats/mcnemar.js +34 -0
  105. package/dist/stats/mcnemar.js.map +1 -0
  106. package/dist/stats/percentiles.d.ts +15 -0
  107. package/dist/stats/percentiles.d.ts.map +1 -0
  108. package/dist/stats/percentiles.js +54 -0
  109. package/dist/stats/percentiles.js.map +1 -0
  110. package/dist/stats/t-test.d.ts +9 -0
  111. package/dist/stats/t-test.d.ts.map +1 -0
  112. package/dist/stats/t-test.js +129 -0
  113. package/dist/stats/t-test.js.map +1 -0
  114. package/dist/tools.d.ts +16 -0
  115. package/dist/tools.d.ts.map +1 -0
  116. package/dist/tools.js +58 -0
  117. package/dist/tools.js.map +1 -0
  118. package/package.json +57 -0
@@ -0,0 +1,85 @@
1
+ import { aggregate, mean } from '../stats';
2
+ function createStatisticalFn(name, fn) {
3
+ const statFn = fn;
4
+ statFn.metricName = name;
5
+ return statFn;
6
+ }
7
+ export function latency() {
8
+ return createStatisticalFn('latency', (results) => {
9
+ const durations = results.map((r) => r.duration);
10
+ const stats = aggregate(durations);
11
+ return {
12
+ name: 'latency',
13
+ score: 0,
14
+ metadata: {
15
+ p50: stats.p50,
16
+ p95: stats.p95,
17
+ p99: stats.p99,
18
+ mean: stats.mean,
19
+ median: stats.median,
20
+ min: stats.min,
21
+ max: stats.max,
22
+ },
23
+ };
24
+ });
25
+ }
26
+ export function cost() {
27
+ return createStatisticalFn('cost', (results) => {
28
+ const costs = results.filter((r) => r.usage).map((r) => r.usage.cost);
29
+ if (costs.length === 0) {
30
+ return {
31
+ name: 'cost',
32
+ score: 0,
33
+ metadata: { total: 0, mean: 0, median: 0, min: 0, max: 0 },
34
+ };
35
+ }
36
+ const stats = aggregate(costs);
37
+ let total = 0;
38
+ for (let i = 0; i < costs.length; i++) {
39
+ total += costs[i];
40
+ }
41
+ return {
42
+ name: 'cost',
43
+ score: 0,
44
+ metadata: {
45
+ total,
46
+ mean: stats.mean,
47
+ median: stats.median,
48
+ min: stats.min,
49
+ max: stats.max,
50
+ },
51
+ };
52
+ });
53
+ }
54
+ export function tokenUsage() {
55
+ return createStatisticalFn('tokenUsage', (results) => {
56
+ const withUsage = results.filter((r) => r.usage);
57
+ if (withUsage.length === 0) {
58
+ return {
59
+ name: 'tokenUsage',
60
+ score: 0,
61
+ metadata: { totalInput: 0, totalOutput: 0, totalTokens: 0, meanInput: 0, meanOutput: 0 },
62
+ };
63
+ }
64
+ const inputTokens = withUsage.map((r) => r.usage.inputTokens);
65
+ const outputTokens = withUsage.map((r) => r.usage.outputTokens);
66
+ let totalInput = 0;
67
+ let totalOutput = 0;
68
+ for (let i = 0; i < withUsage.length; i++) {
69
+ totalInput += inputTokens[i];
70
+ totalOutput += outputTokens[i];
71
+ }
72
+ return {
73
+ name: 'tokenUsage',
74
+ score: 0,
75
+ metadata: {
76
+ totalInput,
77
+ totalOutput,
78
+ totalTokens: totalInput + totalOutput,
79
+ meanInput: mean(inputTokens),
80
+ meanOutput: mean(outputTokens),
81
+ },
82
+ };
83
+ });
84
+ }
85
+ //# sourceMappingURL=statistical.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"statistical.js","sourceRoot":"","sources":["../../src/metrics/statistical.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,UAAU,CAAC;AAE3C,SAAS,mBAAmB,CAC1B,IAAY,EACZ,EAA8C;IAE9C,MAAM,MAAM,GAAG,EAAyB,CAAC;IACzC,MAAM,CAAC,UAAU,GAAG,IAAI,CAAC;IACzB,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,OAAO;IACrB,OAAO,mBAAmB,CAAC,SAAS,EAAE,CAAC,OAAyB,EAAE,EAAE;QAClE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC;QAEnC,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;aACf;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,IAAI;IAClB,OAAO,mBAAmB,CAAC,MAAM,EAAE,CAAC,OAAyB,EAAE,EAAE;QAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,IAAI,CAAC,CAAC;QAEvE,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE;aAC3D,CAAC;QACJ,CAAC;QAED,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;QAC/B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,KAAK,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC;QACpB,CAAC;QAED,OAAO;YACL,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,KAAK;gBACL,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,MAAM,EAAE,KAAK,CAAC,MAAM;gBACpB,GAAG,EAAE,KAAK,CAAC,GAAG;gBACd,GAAG,EAAE,KAAK,CAAC,GAAG;aACf;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,OAAO,mBAAmB,CAAC,YAAY,EAAE,CAAC,OAAyB,EAAE,EAAE;QACrE,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;QAEjD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,YAAY;gBAClB,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE;aACzF,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,WAAW,CAAC,CAAC;QAC/D,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAM,CAAC,YAAY,CAAC,CAAC;QAEjE,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,UAAU,IAAI,WAAW,CAAC,CAAC,CAAC,CAAC;YAC7B,WAAW,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;QAED,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK,EAAE,CAAC;YACR,QAAQ,EAAE;gBACR,UAAU;gBACV,WAAW;gBACX,WAAW,EAAE,UAAU,GAAG,WAAW;gBACrC,SAAS,EAAE,IAAI,CAAC,WAAW,CAAC;gBAC5B,UAAU,EAAE,IAAI,CAAC,YAAY,CAAC;aAC/B;SACF,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -0,0 +1,31 @@
1
+ import type { EvalCase } from '../schema';
2
+ export interface MetricScore {
3
+ name: string;
4
+ score: number;
5
+ details?: string;
6
+ metadata?: Record<string, unknown>;
7
+ }
8
+ export interface EvalCaseResult {
9
+ case: EvalCase;
10
+ output: string;
11
+ duration: number;
12
+ usage?: {
13
+ inputTokens: number;
14
+ outputTokens: number;
15
+ totalTokens: number;
16
+ cost: number;
17
+ duration: number;
18
+ };
19
+ toolCalls?: readonly {
20
+ id: string;
21
+ name: string;
22
+ arguments: Record<string, unknown>;
23
+ }[];
24
+ }
25
+ export type MetricFn = ((result: EvalCaseResult) => Promise<MetricScore>) & {
26
+ metricName: string;
27
+ };
28
+ export type StatisticalMetricFn = ((results: EvalCaseResult[]) => MetricScore) & {
29
+ metricName: string;
30
+ };
31
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/metrics/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAE1C,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,QAAQ,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE;QACN,WAAW,EAAE,MAAM,CAAC;QACpB,YAAY,EAAE,MAAM,CAAC;QACrB,WAAW,EAAE,MAAM,CAAC;QACpB,IAAI,EAAE,MAAM,CAAC;QACb,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC;IACF,SAAS,CAAC,EAAE,SAAS;QACnB,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACpC,EAAE,CAAC;CACL;AAED,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,EAAE,cAAc,KAAK,OAAO,CAAC,WAAW,CAAC,CAAC,GAAG;IAC1E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,OAAO,EAAE,cAAc,EAAE,KAAK,WAAW,CAAC,GAAG;IAC/E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../src/metrics/types.ts"],"names":[],"mappings":""}
@@ -0,0 +1,3 @@
1
+ import type { EvalSuiteResult } from './index';
2
+ export declare function ciReport(result: EvalSuiteResult): void;
3
+ //# sourceMappingURL=ci.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ci.d.ts","sourceRoot":"","sources":["../../src/reporters/ci.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAM/C,wBAAgB,QAAQ,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CAqBtD"}
@@ -0,0 +1,21 @@
1
+ const GREEN = '\x1b[32m';
2
+ const RED = '\x1b[31m';
3
+ const RESET = '\x1b[0m';
4
+ export function ciReport(result) {
5
+ const passed = result.assertions.filter((a) => a.passed).length;
6
+ const failed = result.assertions.filter((a) => !a.passed).length;
7
+ console.log(`Eval: ${result.stats.total} cases | ${result.stats.duration}ms | $${result.stats.cost}`);
8
+ for (const a of result.assertions) {
9
+ if (a.passed) {
10
+ console.log(` ${GREEN}PASS${RESET} ${a.name}`);
11
+ }
12
+ else {
13
+ console.log(` ${RED}FAIL${RESET} ${a.name}: ${a.message}`);
14
+ }
15
+ }
16
+ console.log(`Result: ${passed} passed, ${failed} failed`);
17
+ if (failed > 0) {
18
+ process.exit(1);
19
+ }
20
+ }
21
+ //# sourceMappingURL=ci.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ci.js","sourceRoot":"","sources":["../../src/reporters/ci.ts"],"names":[],"mappings":"AAEA,MAAM,KAAK,GAAG,UAAU,CAAC;AACzB,MAAM,GAAG,GAAG,UAAU,CAAC;AACvB,MAAM,KAAK,GAAG,SAAS,CAAC;AAExB,MAAM,UAAU,QAAQ,CAAC,MAAuB;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAChE,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAEjE,OAAO,CAAC,GAAG,CACT,SAAS,MAAM,CAAC,KAAK,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,CAAC,QAAQ,SAAS,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CACzF,CAAC;IAEF,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAClC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,OAAO,KAAK,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,KAAK,GAAG,OAAO,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;QAC9D,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,WAAW,MAAM,YAAY,MAAM,SAAS,CAAC,CAAC;IAE1D,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;QACf,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC"}
@@ -0,0 +1,3 @@
1
+ import type { EvalSuiteResult } from './index';
2
+ export declare function consoleReport(result: EvalSuiteResult): void;
3
+ //# sourceMappingURL=console.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"console.d.ts","sourceRoot":"","sources":["../../src/reporters/console.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAgB/C,wBAAgB,aAAa,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI,CA2C3D"}
@@ -0,0 +1,46 @@
1
+ const GREEN = '\x1b[32m';
2
+ const RED = '\x1b[31m';
3
+ const BOLD = '\x1b[1m';
4
+ const DIM = '\x1b[2m';
5
+ const RESET = '\x1b[0m';
6
+ function pad(str, len) {
7
+ return str.length >= len ? str : str + ' '.repeat(len - str.length);
8
+ }
9
+ function fmt(n) {
10
+ return Number.isInteger(n) ? String(n) : n.toFixed(4);
11
+ }
12
+ export function consoleReport(result) {
13
+ const metrics = Object.values(result.aggregated);
14
+ if (metrics.length > 0) {
15
+ const cols = { metric: 14, mean: 10, median: 10, p95: 10, min: 10, max: 10 };
16
+ const header = pad('Metric', cols.metric) +
17
+ pad('Mean', cols.mean) +
18
+ pad('Median', cols.median) +
19
+ pad('P95', cols.p95) +
20
+ pad('Min', cols.min) +
21
+ pad('Max', cols.max);
22
+ console.log(`\n${BOLD}${header}${RESET}`);
23
+ console.log(DIM + '─'.repeat(header.length) + RESET);
24
+ for (const m of metrics) {
25
+ const row = pad(m.name, cols.metric) +
26
+ pad(fmt(m.mean), cols.mean) +
27
+ pad(fmt(m.median), cols.median) +
28
+ pad(fmt(m.p95), cols.p95) +
29
+ pad(fmt(m.min), cols.min) +
30
+ pad(fmt(m.max), cols.max);
31
+ console.log(row);
32
+ }
33
+ }
34
+ if (result.assertions.length > 0) {
35
+ console.log(`\n${BOLD}Assertions${RESET}`);
36
+ for (const a of result.assertions) {
37
+ const icon = a.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
38
+ const color = a.passed ? GREEN : RED;
39
+ console.log(` ${icon} ${color}${a.name}${RESET} ${DIM}${a.message}${RESET}`);
40
+ }
41
+ }
42
+ const passed = result.assertions.filter((a) => a.passed).length;
43
+ const failed = result.assertions.filter((a) => !a.passed).length;
44
+ console.log(`\n${BOLD}Summary${RESET}: ${result.stats.total} cases | ${result.stats.duration}ms | $${result.stats.cost} | ${GREEN}${passed} passed${RESET} ${failed > 0 ? `${RED}${failed} failed${RESET}` : ''}`);
45
+ }
46
+ //# sourceMappingURL=console.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"console.js","sourceRoot":"","sources":["../../src/reporters/console.ts"],"names":[],"mappings":"AAEA,MAAM,KAAK,GAAG,UAAU,CAAC;AACzB,MAAM,GAAG,GAAG,UAAU,CAAC;AACvB,MAAM,IAAI,GAAG,SAAS,CAAC;AACvB,MAAM,GAAG,GAAG,SAAS,CAAC;AACtB,MAAM,KAAK,GAAG,SAAS,CAAC;AAExB,SAAS,GAAG,CAAC,GAAW,EAAE,GAAW;IACnC,OAAO,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;AACtE,CAAC;AAED,SAAS,GAAG,CAAC,CAAS;IACpB,OAAO,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAuB;IACnD,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAEjD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;QAC7E,MAAM,MAAM,GACV,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC;YAC1B,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC;YACtB,GAAG,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC;YAC1B,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC;YACpB,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC;YACpB,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QAEvB,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,GAAG,MAAM,GAAG,KAAK,EAAE,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC;QAErD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,GAAG,GACP,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;gBACxB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC;gBAC3B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC;gBAC/B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;gBACzB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC;gBACzB,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;YAC5B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACjC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,aAAa,KAAK,EAAE,CAAC,CAAC;QAC3C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YAClC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC;YAChE,MAAM,KAAK,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,KAAK,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,GAAG,KAAK,EAAE,CAAC,CAAC;QAChF,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAChE,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAEjE,OAAO,CAAC,GAAG,CACT,KAAK,IAAI,UAAU,KAAK,KAAK,MAAM,CAAC,KAAK,CAAC,KAAK,YAAY,MAAM,CAAC,KAAK,CAAC,QAAQ,SAAS,MAAM,CAAC,KAAK,CAAC,IAAI,MAAM,KAAK,GAAG,MAAM,UAAU,KAAK,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,MAAM,UAAU,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CACtM,CAAC;AACJ,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { EvalSuiteResult } from './index';
2
+ export declare function csvReport(result: EvalSuiteResult, options: {
3
+ path: string;
4
+ }): void;
5
+ //# sourceMappingURL=csv.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.d.ts","sourceRoot":"","sources":["../../src/reporters/csv.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAS/C,wBAAgB,SAAS,CAAC,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAyBlF"}
@@ -0,0 +1,31 @@
1
+ import { writeFileSync } from 'node:fs';
2
+ function escapeField(value) {
3
+ if (value.includes(',') || value.includes('"') || value.includes('\n')) {
4
+ return `"${value.replace(/"/g, '""')}"`;
5
+ }
6
+ return value;
7
+ }
8
+ export function csvReport(result, options) {
9
+ const metricNames = new Set();
10
+ for (const r of result.results) {
11
+ for (const s of r.scores) {
12
+ metricNames.add(s.name);
13
+ }
14
+ }
15
+ const metrics = [...metricNames];
16
+ const headers = ['input', 'expected', 'output', 'duration', ...metrics];
17
+ const lines = [headers.join(',')];
18
+ for (const r of result.results) {
19
+ const scoreMap = new Map(r.scores.map((s) => [s.name, s.score]));
20
+ const row = [
21
+ escapeField(r.case.input),
22
+ escapeField(r.case.expected ?? ''),
23
+ escapeField(r.output),
24
+ String(r.duration),
25
+ ...metrics.map((m) => String(scoreMap.get(m) ?? '')),
26
+ ];
27
+ lines.push(row.join(','));
28
+ }
29
+ writeFileSync(options.path, lines.join('\n') + '\n', 'utf-8');
30
+ }
31
+ //# sourceMappingURL=csv.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/reporters/csv.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAGxC,SAAS,WAAW,CAAC,KAAa;IAChC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC;IAC1C,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,MAAuB,EAAE,OAAyB;IAC1E,MAAM,WAAW,GAAG,IAAI,GAAG,EAAU,CAAC;IACtC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC;YACzB,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,MAAM,OAAO,GAAG,CAAC,GAAG,WAAW,CAAC,CAAC;IAEjC,MAAM,OAAO,GAAG,CAAC,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,CAAC;IACxE,MAAM,KAAK,GAAa,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAE5C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,GAAG,GAAG;YACV,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC;YAClC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC;YACrB,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;YAClB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACrD,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5B,CAAC;IAED,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AAChE,CAAC"}
@@ -0,0 +1,50 @@
1
+ export interface AggregatedMetric {
2
+ name: string;
3
+ mean: number;
4
+ median: number;
5
+ min: number;
6
+ max: number;
7
+ stdDev: number;
8
+ p50: number;
9
+ p95: number;
10
+ p99: number;
11
+ }
12
+ export interface AssertionResult {
13
+ name: string;
14
+ passed: boolean;
15
+ message: string;
16
+ actual?: number;
17
+ expected?: number;
18
+ }
19
+ export interface EvalSuiteResult {
20
+ results: Array<{
21
+ case: {
22
+ input: string;
23
+ expected?: string;
24
+ };
25
+ output: string;
26
+ duration: number;
27
+ scores: Array<{
28
+ name: string;
29
+ score: number;
30
+ details?: string;
31
+ }>;
32
+ }>;
33
+ aggregated: Record<string, AggregatedMetric>;
34
+ assertions: AssertionResult[];
35
+ stats: {
36
+ total: number;
37
+ duration: number;
38
+ cost: number;
39
+ };
40
+ }
41
+ export type ReporterType = 'console' | 'json' | 'csv' | 'ci';
42
+ export type ReporterOptions = {
43
+ path?: string;
44
+ };
45
+ export declare function report(result: EvalSuiteResult, type: ReporterType | ReporterType[], options?: ReporterOptions): void;
46
+ export { consoleReport } from './console';
47
+ export { jsonReport } from './json';
48
+ export { csvReport } from './csv';
49
+ export { ciReport } from './ci';
50
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/reporters/index.ts"],"names":[],"mappings":"AAKA,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,KAAK,CAAC;QACb,IAAI,EAAE;YAAE,KAAK,EAAE,MAAM,CAAC;YAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3C,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,EAAE,MAAM,CAAC;QACjB,MAAM,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAC;YAAC,OAAO,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KAClE,CAAC,CAAC;IACH,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;IAC7C,UAAU,EAAE,eAAe,EAAE,CAAC;IAC9B,KAAK,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;CAC1D;AAED,MAAM,MAAM,YAAY,GAAG,SAAS,GAAG,MAAM,GAAG,KAAK,GAAG,IAAI,CAAC;AAC7D,MAAM,MAAM,eAAe,GAAG;IAAE,IAAI,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAEhD,wBAAgB,MAAM,CACpB,MAAM,EAAE,eAAe,EACvB,IAAI,EAAE,YAAY,GAAG,YAAY,EAAE,EACnC,OAAO,CAAC,EAAE,eAAe,GACxB,IAAI,CAmBN;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC"}
@@ -0,0 +1,28 @@
1
+ import { consoleReport } from './console';
2
+ import { jsonReport } from './json';
3
+ import { csvReport } from './csv';
4
+ import { ciReport } from './ci';
5
+ export function report(result, type, options) {
6
+ const types = Array.isArray(type) ? type : [type];
7
+ for (const t of types) {
8
+ switch (t) {
9
+ case 'console':
10
+ consoleReport(result);
11
+ break;
12
+ case 'json':
13
+ jsonReport(result, { path: options?.path ?? 'eval-report.json' });
14
+ break;
15
+ case 'csv':
16
+ csvReport(result, { path: options?.path ?? 'eval-report.csv' });
17
+ break;
18
+ case 'ci':
19
+ ciReport(result);
20
+ break;
21
+ }
22
+ }
23
+ }
24
+ export { consoleReport } from './console';
25
+ export { jsonReport } from './json';
26
+ export { csvReport } from './csv';
27
+ export { ciReport } from './ci';
28
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/reporters/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AAqChC,MAAM,UAAU,MAAM,CACpB,MAAuB,EACvB,IAAmC,EACnC,OAAyB;IAEzB,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElD,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,QAAQ,CAAC,EAAE,CAAC;YACV,KAAK,SAAS;gBACZ,aAAa,CAAC,MAAM,CAAC,CAAC;gBACtB,MAAM;YACR,KAAK,MAAM;gBACT,UAAU,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,IAAI,kBAAkB,EAAE,CAAC,CAAC;gBAClE,MAAM;YACR,KAAK,KAAK;gBACR,SAAS,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,IAAI,iBAAiB,EAAE,CAAC,CAAC;gBAChE,MAAM;YACR,KAAK,IAAI;gBACP,QAAQ,CAAC,MAAM,CAAC,CAAC;gBACjB,MAAM;QACV,CAAC;IACH,CAAC;AACH,CAAC;AAED,OAAO,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AAClC,OAAO,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC"}
@@ -0,0 +1,5 @@
1
+ import type { EvalSuiteResult } from './index';
2
+ export declare function jsonReport(result: EvalSuiteResult, options: {
3
+ path: string;
4
+ }): void;
5
+ //# sourceMappingURL=json.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.d.ts","sourceRoot":"","sources":["../../src/reporters/json.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,SAAS,CAAC;AAE/C,wBAAgB,UAAU,CAAC,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE;IAAE,IAAI,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAEnF"}
@@ -0,0 +1,5 @@
1
+ import { writeFileSync } from 'node:fs';
2
+ export function jsonReport(result, options) {
3
+ writeFileSync(options.path, JSON.stringify(result, null, 2), 'utf-8');
4
+ }
5
+ //# sourceMappingURL=json.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/reporters/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAGxC,MAAM,UAAU,UAAU,CAAC,MAAuB,EAAE,OAAyB;IAC3E,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACxE,CAAC"}
@@ -0,0 +1,29 @@
1
+ import { z } from 'zod';
2
+ export declare const EvalCaseSchema: z.ZodObject<{
3
+ input: z.ZodString;
4
+ expected: z.ZodOptional<z.ZodString>;
5
+ context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
6
+ metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
7
+ }, z.core.$strip>;
8
+ export declare const EvalSuiteConfigSchema: z.ZodObject<{
9
+ concurrency: z.ZodDefault<z.ZodNumber>;
10
+ timeout: z.ZodDefault<z.ZodNumber>;
11
+ retries: z.ZodDefault<z.ZodNumber>;
12
+ }, z.core.$strip>;
13
+ export declare const JudgeConfigSchema: z.ZodObject<{
14
+ model: z.ZodString;
15
+ temperature: z.ZodDefault<z.ZodNumber>;
16
+ maxTokens: z.ZodOptional<z.ZodNumber>;
17
+ }, z.core.$strip>;
18
+ export declare const EvalComparisonConfigSchema: z.ZodObject<{
19
+ concurrency: z.ZodDefault<z.ZodNumber>;
20
+ timeout: z.ZodDefault<z.ZodNumber>;
21
+ retries: z.ZodDefault<z.ZodNumber>;
22
+ }, z.core.$strip>;
23
+ export type EvalCase = z.output<typeof EvalCaseSchema>;
24
+ export type EvalCaseInput = z.input<typeof EvalCaseSchema>;
25
+ export type EvalSuiteConfig = z.output<typeof EvalSuiteConfigSchema>;
26
+ export type EvalSuiteConfigInput = z.input<typeof EvalSuiteConfigSchema>;
27
+ export type JudgeConfig = z.output<typeof JudgeConfigSchema>;
28
+ export type EvalComparisonConfig = z.output<typeof EvalComparisonConfigSchema>;
29
+ //# sourceMappingURL=schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../src/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,cAAc;;;;;iBAKzB,CAAC;AAEH,eAAO,MAAM,qBAAqB;;;;iBAIhC,CAAC;AAEH,eAAO,MAAM,iBAAiB;;;;iBAI5B,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;iBAIrC,CAAC;AAEH,MAAM,MAAM,QAAQ,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,cAAc,CAAC,CAAC;AACvD,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,cAAc,CAAC,CAAC;AAC3D,MAAM,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,qBAAqB,CAAC,CAAC;AACrE,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,qBAAqB,CAAC,CAAC;AACzE,MAAM,MAAM,WAAW,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,iBAAiB,CAAC,CAAC;AAC7D,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,0BAA0B,CAAC,CAAC"}
package/dist/schema.js ADDED
@@ -0,0 +1,23 @@
1
+ import { z } from 'zod';
2
+ export const EvalCaseSchema = z.object({
3
+ input: z.string(),
4
+ expected: z.string().optional(),
5
+ context: z.record(z.string(), z.unknown()).optional(),
6
+ metadata: z.record(z.string(), z.unknown()).optional(),
7
+ });
8
+ export const EvalSuiteConfigSchema = z.object({
9
+ concurrency: z.number().int().min(1).default(5),
10
+ timeout: z.number().int().min(1000).default(30000),
11
+ retries: z.number().int().min(0).max(10).default(0),
12
+ });
13
+ export const JudgeConfigSchema = z.object({
14
+ model: z.string(),
15
+ temperature: z.number().default(0),
16
+ maxTokens: z.number().int().positive().optional(),
17
+ });
18
+ export const EvalComparisonConfigSchema = z.object({
19
+ concurrency: z.number().int().min(1).default(5),
20
+ timeout: z.number().int().min(1000).default(30000),
21
+ retries: z.number().int().min(0).max(10).default(0),
22
+ });
23
+ //# sourceMappingURL=schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../src/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC/B,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;IACrD,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE;CACvD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC5C,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAClD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;CACpD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACxC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IAClC,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE;CAClD,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,0BAA0B,GAAG,CAAC,CAAC,MAAM,CAAC;IACjD,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAC/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC;IAClD,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;CACpD,CAAC,CAAC"}
@@ -0,0 +1,6 @@
1
+ export { percentile, mean, median, stdDev, aggregate } from './percentiles';
2
+ export { pairedTTest } from './t-test';
3
+ export type { TTestResult } from './t-test';
4
+ export { mcnemarsTest } from './mcnemar';
5
+ export type { McNemarResult } from './mcnemar';
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stats/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AACvC,YAAY,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,YAAY,EAAE,aAAa,EAAE,MAAM,WAAW,CAAC"}
@@ -0,0 +1,4 @@
1
+ export { percentile, mean, median, stdDev, aggregate } from './percentiles';
2
+ export { pairedTTest } from './t-test';
3
+ export { mcnemarsTest } from './mcnemar';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stats/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAC5E,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAEvC,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC"}
@@ -0,0 +1,7 @@
1
+ export interface McNemarResult {
2
+ chiSquare: number;
3
+ pValue: number;
4
+ significant: boolean;
5
+ }
6
+ export declare function mcnemarsTest(pairsACorrect_BIncorrect: number, pairsAIncorrect_BCorrect: number): McNemarResult;
7
+ //# sourceMappingURL=mcnemar.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcnemar.d.ts","sourceRoot":"","sources":["../../src/stats/mcnemar.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;CACtB;AAuBD,wBAAgB,YAAY,CAC1B,wBAAwB,EAAE,MAAM,EAChC,wBAAwB,EAAE,MAAM,GAC/B,aAAa,CAiBf"}
@@ -0,0 +1,34 @@
1
+ function erfc(x) {
2
+ const a1 = 0.254829592;
3
+ const a2 = -0.284496736;
4
+ const a3 = 1.421413741;
5
+ const a4 = -1.453152027;
6
+ const a5 = 1.061405429;
7
+ const p = 0.3275911;
8
+ const sign = x < 0 ? -1 : 1;
9
+ const absX = Math.abs(x);
10
+ const t = 1.0 / (1.0 + p * absX);
11
+ const y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.exp(-absX * absX);
12
+ return 1.0 - sign * y;
13
+ }
14
+ function chiSquareSurvival(x) {
15
+ if (x <= 0)
16
+ return 1;
17
+ return erfc(Math.sqrt(x / 2));
18
+ }
19
+ export function mcnemarsTest(pairsACorrect_BIncorrect, pairsAIncorrect_BCorrect) {
20
+ const b = pairsACorrect_BIncorrect;
21
+ const c = pairsAIncorrect_BCorrect;
22
+ if (b + c === 0) {
23
+ return { chiSquare: 0, pValue: 1, significant: false };
24
+ }
25
+ const diff = Math.abs(b - c) - 1;
26
+ const chiSquare = (diff * diff) / (b + c);
27
+ const pValue = chiSquareSurvival(chiSquare);
28
+ return {
29
+ chiSquare,
30
+ pValue,
31
+ significant: pValue < 0.05,
32
+ };
33
+ }
34
+ //# sourceMappingURL=mcnemar.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcnemar.js","sourceRoot":"","sources":["../../src/stats/mcnemar.ts"],"names":[],"mappings":"AAMA,SAAS,IAAI,CAAC,CAAS;IACrB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,EAAE,GAAG,CAAC,WAAW,CAAC;IACxB,MAAM,EAAE,GAAG,WAAW,CAAC;IACvB,MAAM,CAAC,GAAG,SAAS,CAAC;IAEpB,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACjC,MAAM,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,CAAC;IAE5F,OAAO,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,CAAS;IAClC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IACrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,wBAAgC,EAChC,wBAAgC;IAEhC,MAAM,CAAC,GAAG,wBAAwB,CAAC;IACnC,MAAM,CAAC,GAAG,wBAAwB,CAAC;IAEnC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAChB,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;IACzD,CAAC;IAED,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACjC,MAAM,SAAS,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAE5C,OAAO;QACL,SAAS;QACT,MAAM;QACN,WAAW,EAAE,MAAM,GAAG,IAAI;KAC3B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,15 @@
1
+ export declare function mean(values: number[]): number;
2
+ export declare function stdDev(values: number[]): number;
3
+ export declare function percentile(values: number[], p: number): number;
4
+ export declare function median(values: number[]): number;
5
+ export declare function aggregate(values: number[]): {
6
+ mean: number;
7
+ median: number;
8
+ min: number;
9
+ max: number;
10
+ stdDev: number;
11
+ p50: number;
12
+ p95: number;
13
+ p99: number;
14
+ };
15
+ //# sourceMappingURL=percentiles.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"percentiles.d.ts","sourceRoot":"","sources":["../../src/stats/percentiles.ts"],"names":[],"mappings":"AAAA,wBAAgB,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAO7C;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAS/C;AAED,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAa9D;AAED,wBAAgB,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAE/C;AAED,wBAAgB,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG;IAC3C,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;CACb,CAiBA"}
@@ -0,0 +1,54 @@
1
+ export function mean(values) {
2
+ if (values.length === 0)
3
+ return 0;
4
+ let sum = 0;
5
+ for (let i = 0; i < values.length; i++) {
6
+ sum += values[i];
7
+ }
8
+ return sum / values.length;
9
+ }
10
+ export function stdDev(values) {
11
+ if (values.length < 2)
12
+ return 0;
13
+ const m = mean(values);
14
+ let sumSq = 0;
15
+ for (let i = 0; i < values.length; i++) {
16
+ const d = values[i] - m;
17
+ sumSq += d * d;
18
+ }
19
+ return Math.sqrt(sumSq / (values.length - 1));
20
+ }
21
+ export function percentile(values, p) {
22
+ if (values.length === 0)
23
+ return 0;
24
+ const sorted = [...values].sort((a, b) => a - b);
25
+ if (sorted.length === 1)
26
+ return sorted[0];
27
+ const rank = p * (sorted.length - 1);
28
+ const lower = Math.floor(rank);
29
+ const upper = Math.ceil(rank);
30
+ if (lower === upper)
31
+ return sorted[lower];
32
+ const fraction = rank - lower;
33
+ return sorted[lower] + fraction * (sorted[upper] - sorted[lower]);
34
+ }
35
+ export function median(values) {
36
+ return percentile(values, 0.5);
37
+ }
38
+ export function aggregate(values) {
39
+ if (values.length === 0) {
40
+ return { mean: 0, median: 0, min: 0, max: 0, stdDev: 0, p50: 0, p95: 0, p99: 0 };
41
+ }
42
+ const sorted = [...values].sort((a, b) => a - b);
43
+ return {
44
+ mean: mean(values),
45
+ median: percentile(values, 0.5),
46
+ min: sorted[0],
47
+ max: sorted[sorted.length - 1],
48
+ stdDev: stdDev(values),
49
+ p50: percentile(values, 0.5),
50
+ p95: percentile(values, 0.95),
51
+ p99: percentile(values, 0.99),
52
+ };
53
+ }
54
+ //# sourceMappingURL=percentiles.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"percentiles.js","sourceRoot":"","sources":["../../src/stats/percentiles.ts"],"names":[],"mappings":"AAAA,MAAM,UAAU,IAAI,CAAC,MAAgB;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IACD,OAAO,GAAG,GAAG,MAAM,CAAC,MAAM,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAgB;IACrC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAChC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;IACvB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACxB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAgB,EAAE,CAAS;IACpD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAClC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACjD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC;IAE1C,MAAM,IAAI,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE9B,IAAI,KAAK,KAAK,KAAK;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IAE1C,MAAM,QAAQ,GAAG,IAAI,GAAG,KAAK,CAAC;IAC9B,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,QAAQ,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,MAAM,CAAC,MAAgB;IACrC,OAAO,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,UAAU,SAAS,CAAC,MAAgB;IAUxC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,IAAI,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;IACnF,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAEjD,OAAO;QACL,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;QAClB,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;QAC/B,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;QACd,GAAG,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;QAC9B,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;QACtB,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;QAC5B,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC;QAC7B,GAAG,EAAE,UAAU,CAAC,MAAM,EAAE,IAAI,CAAC;KAC9B,CAAC;AACJ,CAAC"}
@@ -0,0 +1,9 @@
1
+ export interface TTestResult {
2
+ tStatistic: number;
3
+ degreesOfFreedom: number;
4
+ pValue: number;
5
+ significant: boolean;
6
+ confidenceInterval: [number, number];
7
+ }
8
+ export declare function pairedTTest(samplesA: number[], samplesB: number[]): TTestResult;
9
+ //# sourceMappingURL=t-test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"t-test.d.ts","sourceRoot":"","sources":["../../src/stats/t-test.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,WAAW;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,OAAO,CAAC;IACrB,kBAAkB,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACtC;AAkGD,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CA4C/E"}