@hasna/evals 0.1.24 → 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/dist/adapters/adapters.test.d.ts +2 -0
  2. package/dist/adapters/adapters.test.d.ts.map +1 -0
  3. package/dist/adapters/anthropic-openai.test.d.ts +2 -0
  4. package/dist/adapters/anthropic-openai.test.d.ts.map +1 -0
  5. package/dist/adapters/anthropic.d.ts +4 -0
  6. package/dist/adapters/anthropic.d.ts.map +1 -0
  7. package/dist/adapters/cli.d.ts +4 -0
  8. package/dist/adapters/cli.d.ts.map +1 -0
  9. package/dist/adapters/function.d.ts +4 -0
  10. package/dist/adapters/function.d.ts.map +1 -0
  11. package/dist/adapters/http-cli-coverage.test.d.ts +2 -0
  12. package/dist/adapters/http-cli-coverage.test.d.ts.map +1 -0
  13. package/dist/adapters/http.d.ts +16 -0
  14. package/dist/adapters/http.d.ts.map +1 -0
  15. package/dist/adapters/mcp-adapter.test.d.ts +2 -0
  16. package/dist/adapters/mcp-adapter.test.d.ts.map +1 -0
  17. package/dist/adapters/mcp.d.ts +4 -0
  18. package/dist/adapters/mcp.d.ts.map +1 -0
  19. package/dist/adapters/openai.d.ts +4 -0
  20. package/dist/adapters/openai.d.ts.map +1 -0
  21. package/dist/cli/adapter-parser.d.ts +3 -0
  22. package/dist/cli/adapter-parser.d.ts.map +1 -0
  23. package/dist/cli/cli.test.d.ts +2 -0
  24. package/dist/cli/cli.test.d.ts.map +1 -0
  25. package/dist/cli/commands/calibrate.d.ts +3 -0
  26. package/dist/cli/commands/calibrate.d.ts.map +1 -0
  27. package/dist/cli/commands/capture.d.ts +3 -0
  28. package/dist/cli/commands/capture.d.ts.map +1 -0
  29. package/dist/cli/commands/ci.d.ts +3 -0
  30. package/dist/cli/commands/ci.d.ts.map +1 -0
  31. package/dist/cli/commands/compare.d.ts +5 -0
  32. package/dist/cli/commands/compare.d.ts.map +1 -0
  33. package/dist/cli/commands/compare.test.d.ts +2 -0
  34. package/dist/cli/commands/compare.test.d.ts.map +1 -0
  35. package/dist/cli/commands/completion.d.ts +3 -0
  36. package/dist/cli/commands/completion.d.ts.map +1 -0
  37. package/dist/cli/commands/doctor.d.ts +3 -0
  38. package/dist/cli/commands/doctor.d.ts.map +1 -0
  39. package/dist/cli/commands/estimate.d.ts +3 -0
  40. package/dist/cli/commands/estimate.d.ts.map +1 -0
  41. package/dist/cli/commands/generate.d.ts +3 -0
  42. package/dist/cli/commands/generate.d.ts.map +1 -0
  43. package/dist/cli/commands/judge.d.ts +3 -0
  44. package/dist/cli/commands/judge.d.ts.map +1 -0
  45. package/dist/cli/commands/mcp.d.ts +3 -0
  46. package/dist/cli/commands/mcp.d.ts.map +1 -0
  47. package/dist/cli/commands/run.d.ts +3 -0
  48. package/dist/cli/commands/run.d.ts.map +1 -0
  49. package/dist/cli/commands/sync.d.ts +3 -0
  50. package/dist/cli/commands/sync.d.ts.map +1 -0
  51. package/dist/cli/index.d.ts +3 -0
  52. package/dist/cli/index.d.ts.map +1 -0
  53. package/dist/cli/index.js +857 -170
  54. package/dist/core/assertions-coverage.test.d.ts +2 -0
  55. package/dist/core/assertions-coverage.test.d.ts.map +1 -0
  56. package/dist/core/assertions.d.ts +18 -0
  57. package/dist/core/assertions.d.ts.map +1 -0
  58. package/dist/core/assertions.test.d.ts +2 -0
  59. package/dist/core/assertions.test.d.ts.map +1 -0
  60. package/dist/core/e2e.test.d.ts +2 -0
  61. package/dist/core/e2e.test.d.ts.map +1 -0
  62. package/dist/core/judge.d.ts +13 -0
  63. package/dist/core/judge.d.ts.map +1 -0
  64. package/dist/core/judge.test.d.ts +2 -0
  65. package/dist/core/judge.test.d.ts.map +1 -0
  66. package/dist/core/reporter.d.ts +21 -0
  67. package/dist/core/reporter.d.ts.map +1 -0
  68. package/dist/core/reporter.test.d.ts +2 -0
  69. package/dist/core/reporter.test.d.ts.map +1 -0
  70. package/dist/core/runner.d.ts +4 -0
  71. package/dist/core/runner.d.ts.map +1 -0
  72. package/dist/core/runner.test.d.ts +2 -0
  73. package/dist/core/runner.test.d.ts.map +1 -0
  74. package/dist/datasets/loader.d.ts +18 -0
  75. package/dist/datasets/loader.d.ts.map +1 -0
  76. package/dist/datasets/loader.test.d.ts +2 -0
  77. package/dist/datasets/loader.test.d.ts.map +1 -0
  78. package/dist/db/store.d.ts +17 -0
  79. package/dist/db/store.d.ts.map +1 -0
  80. package/dist/db/store.test.d.ts +2 -0
  81. package/dist/db/store.test.d.ts.map +1 -0
  82. package/dist/index.d.ts +8 -0
  83. package/dist/index.d.ts.map +1 -0
  84. package/dist/index.js +24 -4
  85. package/dist/mcp/http.d.ts +13 -0
  86. package/dist/mcp/http.d.ts.map +1 -0
  87. package/dist/mcp/http.test.d.ts +2 -0
  88. package/dist/mcp/http.test.d.ts.map +1 -0
  89. package/dist/mcp/index.d.ts +3 -0
  90. package/dist/mcp/index.d.ts.map +1 -0
  91. package/dist/mcp/index.js +32454 -477
  92. package/dist/mcp/mcp.test.d.ts +2 -0
  93. package/dist/mcp/mcp.test.d.ts.map +1 -0
  94. package/dist/mcp/server.d.ts +5 -0
  95. package/dist/mcp/server.d.ts.map +1 -0
  96. package/dist/server/index.d.ts +3 -0
  97. package/dist/server/index.d.ts.map +1 -0
  98. package/dist/server/index.js +24 -4
  99. package/dist/server/server.test.d.ts +2 -0
  100. package/dist/server/server.test.d.ts.map +1 -0
  101. package/dist/types/index.d.ts +171 -0
  102. package/dist/types/index.d.ts.map +1 -0
  103. package/package.json +3 -2
@@ -0,0 +1,2 @@
1
+ export declare function echoFn(input: string): Promise<string>;
2
+ //# sourceMappingURL=mcp.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcp.test.d.ts","sourceRoot":"","sources":["../../src/mcp/mcp.test.ts"],"names":[],"mappings":"AAsDA,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAE3D"}
@@ -0,0 +1,5 @@
1
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
+ export declare const MCP_NAME = "evals";
3
+ export declare const DEFAULT_MCP_HTTP_PORT = 8862;
4
+ export declare function buildServer(): Server;
5
+ //# sourceMappingURL=server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAanE,eAAO,MAAM,QAAQ,UAAU,CAAC;AAChC,eAAO,MAAM,qBAAqB,OAAO,CAAC;AAE1C,wBAAgB,WAAW,IAAI,MAAM,CAmSpC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env bun
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":""}
@@ -11666,7 +11666,7 @@ var safeJSON2 = (text) => {
11666
11666
  var sleep2 = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
11667
11667
 
11668
11668
  // node_modules/openai/version.mjs
11669
- var VERSION2 = "6.39.0";
11669
+ var VERSION2 = "6.42.0";
11670
11670
 
11671
11671
  // node_modules/openai/internal/detect-platform.mjs
11672
11672
  var isRunningInBrowser2 = () => {
@@ -18252,7 +18252,10 @@ https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety
18252
18252
  if (isTimeout) {
18253
18253
  throw new APIConnectionTimeoutError2;
18254
18254
  }
18255
- throw new APIConnectionError2({ cause: response });
18255
+ throw new APIConnectionError2({
18256
+ message: getConnectionErrorMessage(response),
18257
+ cause: response
18258
+ });
18256
18259
  }
18257
18260
  const specialHeaders = [...response.headers.entries()].filter(([name]) => name === "x-request-id").map(([name, value]) => ", " + name + ": " + JSON.stringify(value)).join("");
18258
18261
  const responseInfo = `[${requestLogID}${retryLogStr}${specialHeaders}] ${req.method} ${url} ${response.ok ? "succeeded" : "failed"} with status ${response.status} in ${headersTime - startTime}ms`;
@@ -18525,6 +18528,23 @@ OpenAI.Evals = Evals;
18525
18528
  OpenAI.Containers = Containers;
18526
18529
  OpenAI.Skills = Skills2;
18527
18530
  OpenAI.Videos = Videos;
18531
+ function getConnectionErrorMessage(error3) {
18532
+ if (isUndiciDispatcherVersionMismatchError(error3)) {
18533
+ return `Connection error. This may be caused by passing an undici dispatcher, such as ProxyAgent, that is incompatible with the fetch implementation. If you are using undici's ProxyAgent, pass the fetch implementation from the same undici package: import { fetch, ProxyAgent } from 'undici'; new OpenAI({ fetch, fetchOptions: { dispatcher: new ProxyAgent(...) } });`;
18534
+ }
18535
+ return;
18536
+ }
18537
+ function isUndiciDispatcherVersionMismatchError(error3) {
18538
+ let current = error3;
18539
+ for (let i = 0;i < 8 && current && typeof current === "object"; i++) {
18540
+ const err = current;
18541
+ if (err.code === "UND_ERR_INVALID_ARG" && typeof err.message === "string" && err.message.includes("invalid onRequestStart method")) {
18542
+ return true;
18543
+ }
18544
+ current = err.cause;
18545
+ }
18546
+ return false;
18547
+ }
18528
18548
  // node_modules/openai/azure.mjs
18529
18549
  var _deployments_endpoints = new Set([
18530
18550
  "/completions",
@@ -22595,7 +22615,7 @@ function finalize(ctx, schema) {
22595
22615
  result.$schema = "http://json-schema.org/draft-07/schema#";
22596
22616
  } else if (ctx.target === "draft-04") {
22597
22617
  result.$schema = "http://json-schema.org/draft-04/schema#";
22598
- } else if (ctx.target === "openapi-3.0") {} else {}
22618
+ } else if (ctx.target === "openapi-3.0") {}
22599
22619
  if (ctx.external?.uri) {
22600
22620
  const id = ctx.external.registry.get(schema)?.id;
22601
22621
  if (!id)
@@ -22813,7 +22833,7 @@ var literalProcessor = (schema, ctx, json, _params) => {
22813
22833
  if (val === undefined) {
22814
22834
  if (ctx.unrepresentable === "throw") {
22815
22835
  throw new Error("Literal `undefined` cannot be represented in JSON Schema");
22816
- } else {}
22836
+ }
22817
22837
  } else if (typeof val === "bigint") {
22818
22838
  if (ctx.unrepresentable === "throw") {
22819
22839
  throw new Error("BigInt literals cannot be represented in JSON Schema");
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=server.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.test.d.ts","sourceRoot":"","sources":["../../src/server/server.test.ts"],"names":[],"mappings":""}
@@ -0,0 +1,171 @@
1
+ export type Verdict = "PASS" | "FAIL" | "UNKNOWN";
2
+ export interface HttpAdapterConfig {
3
+ type: "http";
4
+ url: string;
5
+ method?: "GET" | "POST" | "PUT" | "PATCH";
6
+ headers?: Record<string, string>;
7
+ /** Path into request body where the input message goes, e.g. "messages[-1].content" */
8
+ inputPath?: string;
9
+ /** Path into response body where the output text lives, e.g. "choices[0].message.content" */
10
+ outputPath?: string;
11
+ timeoutMs?: number;
12
+ }
13
+ export interface AnthropicAdapterConfig {
14
+ type: "anthropic";
15
+ model: string;
16
+ systemPrompt?: string;
17
+ maxTokens?: number;
18
+ apiKey?: string;
19
+ }
20
+ export interface OpenAIAdapterConfig {
21
+ type: "openai";
22
+ model: string;
23
+ systemPrompt?: string;
24
+ maxTokens?: number;
25
+ baseURL?: string;
26
+ apiKey?: string;
27
+ }
28
+ export interface McpAdapterConfig {
29
+ type: "mcp";
30
+ /** Command to start the MCP server, e.g. ["node", "dist/mcp/index.js"] */
31
+ command: string[];
32
+ /** Tool name to call */
33
+ tool: string;
34
+ /** How to map the EvalCase input into tool arguments */
35
+ inputMapping?: Record<string, string>;
36
+ timeoutMs?: number;
37
+ }
38
+ export interface FunctionAdapterConfig {
39
+ type: "function";
40
+ /** Absolute path to module */
41
+ modulePath: string;
42
+ /** Named export to call */
43
+ exportName?: string;
44
+ }
45
+ export interface CliAdapterConfig {
46
+ type: "cli";
47
+ /** Command template — use {{input}} as placeholder */
48
+ command: string;
49
+ timeoutMs?: number;
50
+ env?: Record<string, string>;
51
+ }
52
+ export type AdapterConfig = HttpAdapterConfig | AnthropicAdapterConfig | OpenAIAdapterConfig | McpAdapterConfig | FunctionAdapterConfig | CliAdapterConfig;
53
+ export type AssertionType = "contains" | "not_contains" | "starts_with" | "ends_with" | "equals" | "regex" | "not_regex" | "max_length" | "min_length" | "json_valid" | "json_schema" | "tool_called" | "tool_not_called" | "tool_call_count" | "tool_args_match" | "response_time_ms" | "token_count" | "cost_usd" | "semantic_similarity";
54
+ export interface Assertion {
55
+ type: AssertionType;
56
+ /** The value to check against — type depends on assertion type */
57
+ value?: string | number | boolean | Record<string, unknown>;
58
+ /** For range-based assertions */
59
+ min?: number;
60
+ max?: number;
61
+ /** For semantic_similarity — 0.0 to 1.0, default 0.8 */
62
+ threshold?: number;
63
+ /** Human-readable label for reports */
64
+ label?: string;
65
+ }
66
+ export interface AssertionResult {
67
+ type: AssertionType;
68
+ passed: boolean;
69
+ reason: string;
70
+ label?: string;
71
+ durationMs?: number;
72
+ }
73
+ export interface JudgeConfig {
74
+ /** Plain-English grading criteria. Required. */
75
+ rubric: string;
76
+ /** Judge model. Default: claude-sonnet-4-6 */
77
+ model?: string;
78
+ /** Judge provider. Default: anthropic */
79
+ provider?: "anthropic" | "openai";
80
+ /** API key override — falls back to env */
81
+ apiKey?: string;
82
+ }
83
+ export interface JudgeResult {
84
+ verdict: Verdict;
85
+ /** Chain-of-thought reasoning — always present before verdict */
86
+ reasoning: string;
87
+ durationMs: number;
88
+ inputTokens?: number;
89
+ outputTokens?: number;
90
+ costUsd?: number;
91
+ }
92
+ export interface ConversationTurn {
93
+ role: "user" | "assistant";
94
+ content: string;
95
+ /** For assistant turns: what the expected behavior should be (natural language) */
96
+ expected?: string;
97
+ }
98
+ export interface EvalCase {
99
+ id: string;
100
+ /** Single-turn: plain string input */
101
+ input?: string;
102
+ /** Multi-turn: conversation turns. If present, input is ignored. */
103
+ turns?: ConversationTurn[];
104
+ /** Natural language description of expected output (for judge) */
105
+ expected?: string;
106
+ /** Adapter config override — falls back to run-level config */
107
+ adapter?: AdapterConfig;
108
+ assertions?: Assertion[];
109
+ judge?: JudgeConfig;
110
+ /** Run this case N times and report pass_rate (Pass^k metric) */
111
+ repeat?: number;
112
+ /** Minimum pass rate for Pass^k to be considered passing (0.0–1.0, default 1.0) */
113
+ passThreshold?: number;
114
+ tags?: string[];
115
+ metadata?: Record<string, unknown>;
116
+ }
117
+ export interface EvalResult {
118
+ caseId: string;
119
+ verdict: Verdict;
120
+ /** Raw output from the app under test */
121
+ output: string;
122
+ /** For multi-turn: all turn outputs */
123
+ turnOutputs?: string[];
124
+ assertionResults: AssertionResult[];
125
+ judgeResult?: JudgeResult;
126
+ /** For Pass^k: individual verdicts per repeat */
127
+ repeatVerdicts?: Verdict[];
128
+ passRate?: number;
129
+ durationMs: number;
130
+ inputTokens?: number;
131
+ outputTokens?: number;
132
+ costUsd?: number;
133
+ error?: string;
134
+ }
135
+ export interface EvalRunStats {
136
+ total: number;
137
+ passed: number;
138
+ failed: number;
139
+ unknown: number;
140
+ errors: number;
141
+ passRate: number;
142
+ totalDurationMs: number;
143
+ totalCostUsd: number;
144
+ totalTokens: number;
145
+ }
146
+ export interface EvalRun {
147
+ id: string;
148
+ createdAt: string;
149
+ dataset: string;
150
+ adapterConfig?: AdapterConfig;
151
+ results: EvalResult[];
152
+ stats: EvalRunStats;
153
+ /** Named baseline tag if set */
154
+ baselineName?: string;
155
+ }
156
+ export interface RunOptions {
157
+ dataset: string;
158
+ adapter?: AdapterConfig;
159
+ concurrency?: number;
160
+ tags?: string[];
161
+ skipJudge?: boolean;
162
+ repeat?: number;
163
+ outputFormat?: "terminal" | "json" | "markdown";
164
+ verbose?: boolean;
165
+ }
166
+ export interface CiOptions extends RunOptions {
167
+ baselineName?: string;
168
+ baselineRunId?: string;
169
+ failIfRegressionPct?: number;
170
+ }
171
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAIlD,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,OAAO,CAAC;IAC1C,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uFAAuF;IACvF,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,WAAW,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,KAAK,CAAC;IACZ,0EAA0E;IAC1E,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,UAAU,CAAC;IACjB,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,KAAK,CAAC;IACZ,sDAAsD;IACtD,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,MAAM,aAAa,GACrB,iBAAiB,GACjB,sBAAsB,GACtB,mBAAmB,GACnB,gBAAgB,GAChB,qBAAqB,GACrB,gBAAgB,CAAC;AAIrB,MAAM,MAAM,aAAa,GACrB,UAAU,GACV,cAAc,GACd,aAAa,GACb,WAAW,GACX,QAAQ,GACR,OAAO,GACP,WAAW,GACX,YAAY,GACZ,YAAY,GACZ,YAAY,GACZ,aAAa,GACb,aAAa,GACb,iBAAiB,GACjB,iBAAiB,GACjB,iBAAiB,GACjB,kBAAkB,GAClB,aAAa,GACb,UAAU,GACV,qBAAqB,CAAC;AAE1B,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,aAAa,CAAC;IACpB,kEAAkE;IAClE,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5D,iCAAiC;IACjC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,aAAa,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,WAAW;IAC1B,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yCAAyC;IACzC,QAAQ,CAAC,EAAE,WAAW,GAAG,QAAQ,CAAC;IAClC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAID,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,sCAAsC;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oEAAoE;IACpE,KAAK,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAC3B,kEAAkE;IAClE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,iEAAiE;IACjE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mFAAmF;IACnF,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAID,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;IACjB,yCAAyC;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,uCAAuC;IACvC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,iDAAiD;IACjD,cAAc,CAAC,EAAE,OAAO,EAAE,CAAC;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,gCAAgC;IAChC,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAID,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,UAAU,CAAC;IAChD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,SAAU,SAAQ,UAAU;IAC3C,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/evals",
3
- "version": "0.1.24",
3
+ "version": "0.1.26",
4
4
  "description": "Open source AI evaluation framework — LLM-as-judge + assertion-based evals for any AI app. CLI + MCP server.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -65,7 +65,8 @@
65
65
  "license": "Apache-2.0",
66
66
  "dependencies": {
67
67
  "@anthropic-ai/sdk": "^0.82.0",
68
- "@hasna/cloud": "^0.1.30",
68
+ "@hasna/cloud": "0.1.24",
69
+ "@hasna/events": "^0.1.6",
69
70
  "@modelcontextprotocol/sdk": "^1.29.0",
70
71
  "ajv": "^8.18.0",
71
72
  "chalk": "^5.4.1",