@hasna/evals 0.1.24 → 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/adapters.test.d.ts +2 -0
- package/dist/adapters/adapters.test.d.ts.map +1 -0
- package/dist/adapters/anthropic-openai.test.d.ts +2 -0
- package/dist/adapters/anthropic-openai.test.d.ts.map +1 -0
- package/dist/adapters/anthropic.d.ts +4 -0
- package/dist/adapters/anthropic.d.ts.map +1 -0
- package/dist/adapters/cli.d.ts +4 -0
- package/dist/adapters/cli.d.ts.map +1 -0
- package/dist/adapters/function.d.ts +4 -0
- package/dist/adapters/function.d.ts.map +1 -0
- package/dist/adapters/http-cli-coverage.test.d.ts +2 -0
- package/dist/adapters/http-cli-coverage.test.d.ts.map +1 -0
- package/dist/adapters/http.d.ts +16 -0
- package/dist/adapters/http.d.ts.map +1 -0
- package/dist/adapters/mcp-adapter.test.d.ts +2 -0
- package/dist/adapters/mcp-adapter.test.d.ts.map +1 -0
- package/dist/adapters/mcp.d.ts +4 -0
- package/dist/adapters/mcp.d.ts.map +1 -0
- package/dist/adapters/openai.d.ts +4 -0
- package/dist/adapters/openai.d.ts.map +1 -0
- package/dist/cli/adapter-parser.d.ts +3 -0
- package/dist/cli/adapter-parser.d.ts.map +1 -0
- package/dist/cli/cli.test.d.ts +2 -0
- package/dist/cli/cli.test.d.ts.map +1 -0
- package/dist/cli/commands/calibrate.d.ts +3 -0
- package/dist/cli/commands/calibrate.d.ts.map +1 -0
- package/dist/cli/commands/capture.d.ts +3 -0
- package/dist/cli/commands/capture.d.ts.map +1 -0
- package/dist/cli/commands/ci.d.ts +3 -0
- package/dist/cli/commands/ci.d.ts.map +1 -0
- package/dist/cli/commands/compare.d.ts +5 -0
- package/dist/cli/commands/compare.d.ts.map +1 -0
- package/dist/cli/commands/compare.test.d.ts +2 -0
- package/dist/cli/commands/compare.test.d.ts.map +1 -0
- package/dist/cli/commands/completion.d.ts +3 -0
- package/dist/cli/commands/completion.d.ts.map +1 -0
- package/dist/cli/commands/doctor.d.ts +3 -0
- package/dist/cli/commands/doctor.d.ts.map +1 -0
- package/dist/cli/commands/estimate.d.ts +3 -0
- package/dist/cli/commands/estimate.d.ts.map +1 -0
- package/dist/cli/commands/generate.d.ts +3 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/judge.d.ts +3 -0
- package/dist/cli/commands/judge.d.ts.map +1 -0
- package/dist/cli/commands/mcp.d.ts +3 -0
- package/dist/cli/commands/mcp.d.ts.map +1 -0
- package/dist/cli/commands/run.d.ts +3 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +857 -170
- package/dist/core/assertions-coverage.test.d.ts +2 -0
- package/dist/core/assertions-coverage.test.d.ts.map +1 -0
- package/dist/core/assertions.d.ts +18 -0
- package/dist/core/assertions.d.ts.map +1 -0
- package/dist/core/assertions.test.d.ts +2 -0
- package/dist/core/assertions.test.d.ts.map +1 -0
- package/dist/core/e2e.test.d.ts +2 -0
- package/dist/core/e2e.test.d.ts.map +1 -0
- package/dist/core/judge.d.ts +13 -0
- package/dist/core/judge.d.ts.map +1 -0
- package/dist/core/judge.test.d.ts +2 -0
- package/dist/core/judge.test.d.ts.map +1 -0
- package/dist/core/reporter.d.ts +21 -0
- package/dist/core/reporter.d.ts.map +1 -0
- package/dist/core/reporter.test.d.ts +2 -0
- package/dist/core/reporter.test.d.ts.map +1 -0
- package/dist/core/runner.d.ts +4 -0
- package/dist/core/runner.d.ts.map +1 -0
- package/dist/core/runner.test.d.ts +2 -0
- package/dist/core/runner.test.d.ts.map +1 -0
- package/dist/datasets/loader.d.ts +18 -0
- package/dist/datasets/loader.d.ts.map +1 -0
- package/dist/datasets/loader.test.d.ts +2 -0
- package/dist/datasets/loader.test.d.ts.map +1 -0
- package/dist/db/store.d.ts +17 -0
- package/dist/db/store.d.ts.map +1 -0
- package/dist/db/store.test.d.ts +2 -0
- package/dist/db/store.test.d.ts.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -4
- package/dist/mcp/http.d.ts +13 -0
- package/dist/mcp/http.d.ts.map +1 -0
- package/dist/mcp/http.test.d.ts +2 -0
- package/dist/mcp/http.test.d.ts.map +1 -0
- package/dist/mcp/index.d.ts +3 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +32454 -477
- package/dist/mcp/mcp.test.d.ts +2 -0
- package/dist/mcp/mcp.test.d.ts.map +1 -0
- package/dist/mcp/server.d.ts +5 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/server/index.d.ts +3 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +24 -4
- package/dist/server/server.test.d.ts +2 -0
- package/dist/server/server.test.d.ts.map +1 -0
- package/dist/types/index.d.ts +171 -0
- package/dist/types/index.d.ts.map +1 -0
- package/package.json +3 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp.test.d.ts","sourceRoot":"","sources":["../../src/mcp/mcp.test.ts"],"names":[],"mappings":"AAsDA,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAE3D"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAanE,eAAO,MAAM,QAAQ,UAAU,CAAC;AAChC,eAAO,MAAM,qBAAqB,OAAO,CAAC;AAE1C,wBAAgB,WAAW,IAAI,MAAM,CAmSpC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":""}
|
package/dist/server/index.js
CHANGED
|
@@ -11666,7 +11666,7 @@ var safeJSON2 = (text) => {
|
|
|
11666
11666
|
var sleep2 = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
11667
11667
|
|
|
11668
11668
|
// node_modules/openai/version.mjs
|
|
11669
|
-
var VERSION2 = "6.
|
|
11669
|
+
var VERSION2 = "6.42.0";
|
|
11670
11670
|
|
|
11671
11671
|
// node_modules/openai/internal/detect-platform.mjs
|
|
11672
11672
|
var isRunningInBrowser2 = () => {
|
|
@@ -18252,7 +18252,10 @@ https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety
|
|
|
18252
18252
|
if (isTimeout) {
|
|
18253
18253
|
throw new APIConnectionTimeoutError2;
|
|
18254
18254
|
}
|
|
18255
|
-
throw new APIConnectionError2({
|
|
18255
|
+
throw new APIConnectionError2({
|
|
18256
|
+
message: getConnectionErrorMessage(response),
|
|
18257
|
+
cause: response
|
|
18258
|
+
});
|
|
18256
18259
|
}
|
|
18257
18260
|
const specialHeaders = [...response.headers.entries()].filter(([name]) => name === "x-request-id").map(([name, value]) => ", " + name + ": " + JSON.stringify(value)).join("");
|
|
18258
18261
|
const responseInfo = `[${requestLogID}${retryLogStr}${specialHeaders}] ${req.method} ${url} ${response.ok ? "succeeded" : "failed"} with status ${response.status} in ${headersTime - startTime}ms`;
|
|
@@ -18525,6 +18528,23 @@ OpenAI.Evals = Evals;
|
|
|
18525
18528
|
OpenAI.Containers = Containers;
|
|
18526
18529
|
OpenAI.Skills = Skills2;
|
|
18527
18530
|
OpenAI.Videos = Videos;
|
|
18531
|
+
function getConnectionErrorMessage(error3) {
|
|
18532
|
+
if (isUndiciDispatcherVersionMismatchError(error3)) {
|
|
18533
|
+
return `Connection error. This may be caused by passing an undici dispatcher, such as ProxyAgent, that is incompatible with the fetch implementation. If you are using undici's ProxyAgent, pass the fetch implementation from the same undici package: import { fetch, ProxyAgent } from 'undici'; new OpenAI({ fetch, fetchOptions: { dispatcher: new ProxyAgent(...) } });`;
|
|
18534
|
+
}
|
|
18535
|
+
return;
|
|
18536
|
+
}
|
|
18537
|
+
function isUndiciDispatcherVersionMismatchError(error3) {
|
|
18538
|
+
let current = error3;
|
|
18539
|
+
for (let i = 0;i < 8 && current && typeof current === "object"; i++) {
|
|
18540
|
+
const err = current;
|
|
18541
|
+
if (err.code === "UND_ERR_INVALID_ARG" && typeof err.message === "string" && err.message.includes("invalid onRequestStart method")) {
|
|
18542
|
+
return true;
|
|
18543
|
+
}
|
|
18544
|
+
current = err.cause;
|
|
18545
|
+
}
|
|
18546
|
+
return false;
|
|
18547
|
+
}
|
|
18528
18548
|
// node_modules/openai/azure.mjs
|
|
18529
18549
|
var _deployments_endpoints = new Set([
|
|
18530
18550
|
"/completions",
|
|
@@ -22595,7 +22615,7 @@ function finalize(ctx, schema) {
|
|
|
22595
22615
|
result.$schema = "http://json-schema.org/draft-07/schema#";
|
|
22596
22616
|
} else if (ctx.target === "draft-04") {
|
|
22597
22617
|
result.$schema = "http://json-schema.org/draft-04/schema#";
|
|
22598
|
-
} else if (ctx.target === "openapi-3.0") {}
|
|
22618
|
+
} else if (ctx.target === "openapi-3.0") {}
|
|
22599
22619
|
if (ctx.external?.uri) {
|
|
22600
22620
|
const id = ctx.external.registry.get(schema)?.id;
|
|
22601
22621
|
if (!id)
|
|
@@ -22813,7 +22833,7 @@ var literalProcessor = (schema, ctx, json, _params) => {
|
|
|
22813
22833
|
if (val === undefined) {
|
|
22814
22834
|
if (ctx.unrepresentable === "throw") {
|
|
22815
22835
|
throw new Error("Literal `undefined` cannot be represented in JSON Schema");
|
|
22816
|
-
}
|
|
22836
|
+
}
|
|
22817
22837
|
} else if (typeof val === "bigint") {
|
|
22818
22838
|
if (ctx.unrepresentable === "throw") {
|
|
22819
22839
|
throw new Error("BigInt literals cannot be represented in JSON Schema");
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.test.d.ts","sourceRoot":"","sources":["../../src/server/server.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
export type Verdict = "PASS" | "FAIL" | "UNKNOWN";
|
|
2
|
+
export interface HttpAdapterConfig {
|
|
3
|
+
type: "http";
|
|
4
|
+
url: string;
|
|
5
|
+
method?: "GET" | "POST" | "PUT" | "PATCH";
|
|
6
|
+
headers?: Record<string, string>;
|
|
7
|
+
/** Path into request body where the input message goes, e.g. "messages[-1].content" */
|
|
8
|
+
inputPath?: string;
|
|
9
|
+
/** Path into response body where the output text lives, e.g. "choices[0].message.content" */
|
|
10
|
+
outputPath?: string;
|
|
11
|
+
timeoutMs?: number;
|
|
12
|
+
}
|
|
13
|
+
export interface AnthropicAdapterConfig {
|
|
14
|
+
type: "anthropic";
|
|
15
|
+
model: string;
|
|
16
|
+
systemPrompt?: string;
|
|
17
|
+
maxTokens?: number;
|
|
18
|
+
apiKey?: string;
|
|
19
|
+
}
|
|
20
|
+
export interface OpenAIAdapterConfig {
|
|
21
|
+
type: "openai";
|
|
22
|
+
model: string;
|
|
23
|
+
systemPrompt?: string;
|
|
24
|
+
maxTokens?: number;
|
|
25
|
+
baseURL?: string;
|
|
26
|
+
apiKey?: string;
|
|
27
|
+
}
|
|
28
|
+
export interface McpAdapterConfig {
|
|
29
|
+
type: "mcp";
|
|
30
|
+
/** Command to start the MCP server, e.g. ["node", "dist/mcp/index.js"] */
|
|
31
|
+
command: string[];
|
|
32
|
+
/** Tool name to call */
|
|
33
|
+
tool: string;
|
|
34
|
+
/** How to map the EvalCase input into tool arguments */
|
|
35
|
+
inputMapping?: Record<string, string>;
|
|
36
|
+
timeoutMs?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface FunctionAdapterConfig {
|
|
39
|
+
type: "function";
|
|
40
|
+
/** Absolute path to module */
|
|
41
|
+
modulePath: string;
|
|
42
|
+
/** Named export to call */
|
|
43
|
+
exportName?: string;
|
|
44
|
+
}
|
|
45
|
+
export interface CliAdapterConfig {
|
|
46
|
+
type: "cli";
|
|
47
|
+
/** Command template — use {{input}} as placeholder */
|
|
48
|
+
command: string;
|
|
49
|
+
timeoutMs?: number;
|
|
50
|
+
env?: Record<string, string>;
|
|
51
|
+
}
|
|
52
|
+
export type AdapterConfig = HttpAdapterConfig | AnthropicAdapterConfig | OpenAIAdapterConfig | McpAdapterConfig | FunctionAdapterConfig | CliAdapterConfig;
|
|
53
|
+
export type AssertionType = "contains" | "not_contains" | "starts_with" | "ends_with" | "equals" | "regex" | "not_regex" | "max_length" | "min_length" | "json_valid" | "json_schema" | "tool_called" | "tool_not_called" | "tool_call_count" | "tool_args_match" | "response_time_ms" | "token_count" | "cost_usd" | "semantic_similarity";
|
|
54
|
+
export interface Assertion {
|
|
55
|
+
type: AssertionType;
|
|
56
|
+
/** The value to check against — type depends on assertion type */
|
|
57
|
+
value?: string | number | boolean | Record<string, unknown>;
|
|
58
|
+
/** For range-based assertions */
|
|
59
|
+
min?: number;
|
|
60
|
+
max?: number;
|
|
61
|
+
/** For semantic_similarity — 0.0 to 1.0, default 0.8 */
|
|
62
|
+
threshold?: number;
|
|
63
|
+
/** Human-readable label for reports */
|
|
64
|
+
label?: string;
|
|
65
|
+
}
|
|
66
|
+
export interface AssertionResult {
|
|
67
|
+
type: AssertionType;
|
|
68
|
+
passed: boolean;
|
|
69
|
+
reason: string;
|
|
70
|
+
label?: string;
|
|
71
|
+
durationMs?: number;
|
|
72
|
+
}
|
|
73
|
+
export interface JudgeConfig {
|
|
74
|
+
/** Plain-English grading criteria. Required. */
|
|
75
|
+
rubric: string;
|
|
76
|
+
/** Judge model. Default: claude-sonnet-4-6 */
|
|
77
|
+
model?: string;
|
|
78
|
+
/** Judge provider. Default: anthropic */
|
|
79
|
+
provider?: "anthropic" | "openai";
|
|
80
|
+
/** API key override — falls back to env */
|
|
81
|
+
apiKey?: string;
|
|
82
|
+
}
|
|
83
|
+
export interface JudgeResult {
|
|
84
|
+
verdict: Verdict;
|
|
85
|
+
/** Chain-of-thought reasoning — always present before verdict */
|
|
86
|
+
reasoning: string;
|
|
87
|
+
durationMs: number;
|
|
88
|
+
inputTokens?: number;
|
|
89
|
+
outputTokens?: number;
|
|
90
|
+
costUsd?: number;
|
|
91
|
+
}
|
|
92
|
+
export interface ConversationTurn {
|
|
93
|
+
role: "user" | "assistant";
|
|
94
|
+
content: string;
|
|
95
|
+
/** For assistant turns: what the expected behavior should be (natural language) */
|
|
96
|
+
expected?: string;
|
|
97
|
+
}
|
|
98
|
+
export interface EvalCase {
|
|
99
|
+
id: string;
|
|
100
|
+
/** Single-turn: plain string input */
|
|
101
|
+
input?: string;
|
|
102
|
+
/** Multi-turn: conversation turns. If present, input is ignored. */
|
|
103
|
+
turns?: ConversationTurn[];
|
|
104
|
+
/** Natural language description of expected output (for judge) */
|
|
105
|
+
expected?: string;
|
|
106
|
+
/** Adapter config override — falls back to run-level config */
|
|
107
|
+
adapter?: AdapterConfig;
|
|
108
|
+
assertions?: Assertion[];
|
|
109
|
+
judge?: JudgeConfig;
|
|
110
|
+
/** Run this case N times and report pass_rate (Pass^k metric) */
|
|
111
|
+
repeat?: number;
|
|
112
|
+
/** Minimum pass rate for Pass^k to be considered passing (0.0–1.0, default 1.0) */
|
|
113
|
+
passThreshold?: number;
|
|
114
|
+
tags?: string[];
|
|
115
|
+
metadata?: Record<string, unknown>;
|
|
116
|
+
}
|
|
117
|
+
export interface EvalResult {
|
|
118
|
+
caseId: string;
|
|
119
|
+
verdict: Verdict;
|
|
120
|
+
/** Raw output from the app under test */
|
|
121
|
+
output: string;
|
|
122
|
+
/** For multi-turn: all turn outputs */
|
|
123
|
+
turnOutputs?: string[];
|
|
124
|
+
assertionResults: AssertionResult[];
|
|
125
|
+
judgeResult?: JudgeResult;
|
|
126
|
+
/** For Pass^k: individual verdicts per repeat */
|
|
127
|
+
repeatVerdicts?: Verdict[];
|
|
128
|
+
passRate?: number;
|
|
129
|
+
durationMs: number;
|
|
130
|
+
inputTokens?: number;
|
|
131
|
+
outputTokens?: number;
|
|
132
|
+
costUsd?: number;
|
|
133
|
+
error?: string;
|
|
134
|
+
}
|
|
135
|
+
export interface EvalRunStats {
|
|
136
|
+
total: number;
|
|
137
|
+
passed: number;
|
|
138
|
+
failed: number;
|
|
139
|
+
unknown: number;
|
|
140
|
+
errors: number;
|
|
141
|
+
passRate: number;
|
|
142
|
+
totalDurationMs: number;
|
|
143
|
+
totalCostUsd: number;
|
|
144
|
+
totalTokens: number;
|
|
145
|
+
}
|
|
146
|
+
export interface EvalRun {
|
|
147
|
+
id: string;
|
|
148
|
+
createdAt: string;
|
|
149
|
+
dataset: string;
|
|
150
|
+
adapterConfig?: AdapterConfig;
|
|
151
|
+
results: EvalResult[];
|
|
152
|
+
stats: EvalRunStats;
|
|
153
|
+
/** Named baseline tag if set */
|
|
154
|
+
baselineName?: string;
|
|
155
|
+
}
|
|
156
|
+
export interface RunOptions {
|
|
157
|
+
dataset: string;
|
|
158
|
+
adapter?: AdapterConfig;
|
|
159
|
+
concurrency?: number;
|
|
160
|
+
tags?: string[];
|
|
161
|
+
skipJudge?: boolean;
|
|
162
|
+
repeat?: number;
|
|
163
|
+
outputFormat?: "terminal" | "json" | "markdown";
|
|
164
|
+
verbose?: boolean;
|
|
165
|
+
}
|
|
166
|
+
export interface CiOptions extends RunOptions {
|
|
167
|
+
baselineName?: string;
|
|
168
|
+
baselineRunId?: string;
|
|
169
|
+
failIfRegressionPct?: number;
|
|
170
|
+
}
|
|
171
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,OAAO,GAAG,MAAM,GAAG,MAAM,GAAG,SAAS,CAAC;AAIlD,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,CAAC,EAAE,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,OAAO,CAAC;IAC1C,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,uFAAuF;IACvF,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,6FAA6F;IAC7F,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,WAAW,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,QAAQ,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,KAAK,CAAC;IACZ,0EAA0E;IAC1E,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,wBAAwB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,UAAU,CAAC;IACjB,8BAA8B;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,2BAA2B;IAC3B,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,KAAK,CAAC;IACZ,sDAAsD;IACtD,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CAC9B;AAED,MAAM,MAAM,aAAa,GACrB,iBAAiB,GACjB,sBAAsB,GACtB,mBAAmB,GACnB,gBAAgB,GAChB,qBAAqB,GACrB,gBAAgB,CAAC;AAIrB,MAAM,MAAM,aAAa,GACrB,UAAU,GACV,cAAc,GACd,aAAa,GACb,WAAW,GACX,QAAQ,GACR,OAAO,GACP,WAAW,GACX,YAAY,GACZ,YAAY,GACZ,YAAY,GACZ,aAAa,GACb,aAAa,GACb,iBAAiB,GACjB,iBAAiB,GACjB,iBAAiB,GACjB,kBAAkB,GAClB,aAAa,GACb,UAAU,GACV,qBAAqB,CAAC;AAE1B,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,aAAa,CAAC;IACpB,kEAAkE;IAClE,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC5D,iCAAiC;IACjC,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,wDAAwD;IACxD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uCAAuC;IACvC,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,aAAa,CAAC;IACpB,MAAM,EAAE,OAAO,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAID,MAAM,WAAW,WAAW;IAC1B,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,8CAA8C;IAC9C,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,yCAAyC;IACzC,QAAQ,CAAC,EAAE,WAAW,GAAG,QAAQ,CAAC;IAClC,2CAA2C;IAC3C,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,iEAAiE;IACjE,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAID,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,mFAAmF;IACnF,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,sCAAsC;IACtC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,oEAAoE;IACpE,KAAK,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAC3B,kEAAkE;IAClE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,WAAW,CAAC;IACpB,iEAAiE;IACjE,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,mFAAmF;IACnF,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAID,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;IACjB,yCAAyC;IACzC,MAAM,EAAE,MAAM,CAAC;IACf,uCAAuC;IACvC,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;IACvB,gBAAgB,EAAE,eAAe,EAAE,CAAC;IACpC,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,iDAAiD;IACjD,cAAc,CAAC,EAAE,OAAO,EAAE,CAAC;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAID,MAAM,WAAW,YAAY;IAC3B,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,OAAO;IACtB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,KAAK,EAAE,YAAY,CAAC;IACpB,gCAAgC;IAChC,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAID,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,YAAY,CAAC,EAAE,UAAU,GAAG,MAAM,GAAG,UAAU,CAAC;IAChD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,MAAM,WAAW,SAAU,SAAQ,UAAU;IAC3C,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hasna/evals",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.26",
|
|
4
4
|
"description": "Open source AI evaluation framework — LLM-as-judge + assertion-based evals for any AI app. CLI + MCP server.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -65,7 +65,8 @@
|
|
|
65
65
|
"license": "Apache-2.0",
|
|
66
66
|
"dependencies": {
|
|
67
67
|
"@anthropic-ai/sdk": "^0.82.0",
|
|
68
|
-
"@hasna/cloud": "
|
|
68
|
+
"@hasna/cloud": "0.1.24",
|
|
69
|
+
"@hasna/events": "^0.1.6",
|
|
69
70
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
70
71
|
"ajv": "^8.18.0",
|
|
71
72
|
"chalk": "^5.4.1",
|