@hasna/evals 0.1.23 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -2
- package/README.md +12 -13
- package/dist/adapters/adapters.test.d.ts +2 -0
- package/dist/adapters/adapters.test.d.ts.map +1 -0
- package/dist/adapters/anthropic-openai.test.d.ts +2 -0
- package/dist/adapters/anthropic-openai.test.d.ts.map +1 -0
- package/dist/adapters/anthropic.d.ts +4 -0
- package/dist/adapters/anthropic.d.ts.map +1 -0
- package/dist/adapters/cli.d.ts +4 -0
- package/dist/adapters/cli.d.ts.map +1 -0
- package/dist/adapters/function.d.ts +4 -0
- package/dist/adapters/function.d.ts.map +1 -0
- package/dist/adapters/http-cli-coverage.test.d.ts +2 -0
- package/dist/adapters/http-cli-coverage.test.d.ts.map +1 -0
- package/dist/adapters/http.d.ts +16 -0
- package/dist/adapters/http.d.ts.map +1 -0
- package/dist/adapters/mcp-adapter.test.d.ts +2 -0
- package/dist/adapters/mcp-adapter.test.d.ts.map +1 -0
- package/dist/adapters/mcp.d.ts +4 -0
- package/dist/adapters/mcp.d.ts.map +1 -0
- package/dist/adapters/openai.d.ts +4 -0
- package/dist/adapters/openai.d.ts.map +1 -0
- package/dist/cli/adapter-parser.d.ts +3 -0
- package/dist/cli/adapter-parser.d.ts.map +1 -0
- package/dist/cli/cli.test.d.ts +2 -0
- package/dist/cli/cli.test.d.ts.map +1 -0
- package/dist/cli/commands/calibrate.d.ts +3 -0
- package/dist/cli/commands/calibrate.d.ts.map +1 -0
- package/dist/cli/commands/capture.d.ts +3 -0
- package/dist/cli/commands/capture.d.ts.map +1 -0
- package/dist/cli/commands/ci.d.ts +3 -0
- package/dist/cli/commands/ci.d.ts.map +1 -0
- package/dist/cli/commands/compare.d.ts +5 -0
- package/dist/cli/commands/compare.d.ts.map +1 -0
- package/dist/cli/commands/compare.test.d.ts +2 -0
- package/dist/cli/commands/compare.test.d.ts.map +1 -0
- package/dist/cli/commands/completion.d.ts +3 -0
- package/dist/cli/commands/completion.d.ts.map +1 -0
- package/dist/cli/commands/doctor.d.ts +3 -0
- package/dist/cli/commands/doctor.d.ts.map +1 -0
- package/dist/cli/commands/estimate.d.ts +3 -0
- package/dist/cli/commands/estimate.d.ts.map +1 -0
- package/dist/cli/commands/generate.d.ts +3 -0
- package/dist/cli/commands/generate.d.ts.map +1 -0
- package/dist/cli/commands/judge.d.ts +3 -0
- package/dist/cli/commands/judge.d.ts.map +1 -0
- package/dist/cli/commands/mcp.d.ts +3 -0
- package/dist/cli/commands/mcp.d.ts.map +1 -0
- package/dist/cli/commands/run.d.ts +3 -0
- package/dist/cli/commands/run.d.ts.map +1 -0
- package/dist/cli/commands/sync.d.ts +3 -0
- package/dist/cli/commands/sync.d.ts.map +1 -0
- package/dist/cli/index.d.ts +3 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +11688 -19142
- package/dist/core/assertions-coverage.test.d.ts +2 -0
- package/dist/core/assertions-coverage.test.d.ts.map +1 -0
- package/dist/core/assertions.d.ts +18 -0
- package/dist/core/assertions.d.ts.map +1 -0
- package/dist/core/assertions.test.d.ts +2 -0
- package/dist/core/assertions.test.d.ts.map +1 -0
- package/dist/core/e2e.test.d.ts +2 -0
- package/dist/core/e2e.test.d.ts.map +1 -0
- package/dist/core/judge.d.ts +13 -0
- package/dist/core/judge.d.ts.map +1 -0
- package/dist/core/judge.test.d.ts +2 -0
- package/dist/core/judge.test.d.ts.map +1 -0
- package/dist/core/reporter.d.ts +21 -0
- package/dist/core/reporter.d.ts.map +1 -0
- package/dist/core/reporter.test.d.ts +2 -0
- package/dist/core/reporter.test.d.ts.map +1 -0
- package/dist/core/runner.d.ts +4 -0
- package/dist/core/runner.d.ts.map +1 -0
- package/dist/core/runner.test.d.ts +2 -0
- package/dist/core/runner.test.d.ts.map +1 -0
- package/dist/datasets/loader.d.ts +18 -0
- package/dist/datasets/loader.d.ts.map +1 -0
- package/dist/datasets/loader.test.d.ts +2 -0
- package/dist/datasets/loader.test.d.ts.map +1 -0
- package/dist/db/cloud-sync.d.ts +33 -0
- package/dist/db/cloud-sync.d.ts.map +1 -0
- package/dist/db/pg-migrations.d.ts +5 -0
- package/dist/db/pg-migrations.d.ts.map +1 -0
- package/dist/db/remote-storage.d.ts +10 -0
- package/dist/db/remote-storage.d.ts.map +1 -0
- package/dist/db/store.d.ts +17 -0
- package/dist/db/store.d.ts.map +1 -0
- package/dist/db/store.test.d.ts +2 -0
- package/dist/db/store.test.d.ts.map +1 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +12309 -10076
- package/dist/mcp/index.d.ts +3 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +21247 -18303
- package/dist/mcp/mcp.test.d.ts +2 -0
- package/dist/mcp/mcp.test.d.ts.map +1 -0
- package/dist/server/index.d.ts +3 -0
- package/dist/server/index.d.ts.map +1 -0
- package/dist/server/index.js +903 -3792
- package/dist/server/server.test.d.ts +2 -0
- package/dist/server/server.test.d.ts.map +1 -0
- package/dist/types/index.d.ts +171 -0
- package/dist/types/index.d.ts.map +1 -0
- package/package.json +3 -2
package/LICENSE
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
Apache License
|
|
3
2
|
Version 2.0, January 2004
|
|
4
3
|
http://www.apache.org/licenses/
|
|
@@ -176,7 +175,7 @@
|
|
|
176
175
|
|
|
177
176
|
END OF TERMS AND CONDITIONS
|
|
178
177
|
|
|
179
|
-
Copyright 2026
|
|
178
|
+
Copyright 2026 hasna
|
|
180
179
|
|
|
181
180
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
182
181
|
you may not use this file except in compliance with the License.
|
package/README.md
CHANGED
|
@@ -197,6 +197,18 @@ evals mcp register --gemini # Gemini (~/.gemini/settings.json)
|
|
|
197
197
|
evals mcp register --all # all three at once
|
|
198
198
|
```
|
|
199
199
|
|
|
200
|
+
## Cloud Sync
|
|
201
|
+
|
|
202
|
+
Runs and baselines are stored locally in SQLite. Set a cloud PostgreSQL URL to sync them:
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
export HASNA_EVALS_CLOUD_DATABASE_URL="postgres://..."
|
|
206
|
+
|
|
207
|
+
evals sync status
|
|
208
|
+
evals sync push
|
|
209
|
+
evals sync pull
|
|
210
|
+
```
|
|
211
|
+
|
|
200
212
|
---
|
|
201
213
|
|
|
202
214
|
## CI / GitHub Actions
|
|
@@ -240,19 +252,6 @@ evals_run_single(
|
|
|
240
252
|
→ PASS — The response correctly identifies Paris.
|
|
241
253
|
```
|
|
242
254
|
|
|
243
|
-
## HTTP mode
|
|
244
|
-
|
|
245
|
-
Shared Streamable HTTP transport for multi-agent sessions (stdio remains the default):
|
|
246
|
-
|
|
247
|
-
```bash
|
|
248
|
-
evals-mcp --http # http://127.0.0.1:8817/mcp
|
|
249
|
-
MCP_HTTP=1 evals-mcp # same
|
|
250
|
-
evals-mcp --http --port 8817 # explicit port
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
- Health: `GET http://127.0.0.1:8817/health` → `{"status":"ok","name":"evals"}`
|
|
254
|
-
- Override port with `MCP_HTTP_PORT` or `--port`
|
|
255
|
-
|
|
256
255
|
---
|
|
257
256
|
|
|
258
257
|
## License
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapters.test.d.ts","sourceRoot":"","sources":["../../src/adapters/adapters.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anthropic-openai.test.d.ts","sourceRoot":"","sources":["../../src/adapters/anthropic-openai.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { AnthropicAdapterConfig, ConversationTurn } from "../types/index.js";
|
|
2
|
+
import type { AdapterResponse } from "./http.js";
|
|
3
|
+
export declare function callAnthropicAdapter(config: AnthropicAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
|
|
4
|
+
//# sourceMappingURL=anthropic.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/adapters/anthropic.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAClF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,oBAAoB,CACxC,MAAM,EAAE,sBAAsB,EAC9B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CA+C1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/adapters/cli.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,cAAc,CAClC,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CA4C1B"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { FunctionAdapterConfig } from "../types/index.js";
|
|
2
|
+
import type { AdapterResponse } from "./http.js";
|
|
3
|
+
export declare function callFunctionAdapter(config: FunctionAdapterConfig, input: string): Promise<AdapterResponse>;
|
|
4
|
+
//# sourceMappingURL=function.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"function.d.ts","sourceRoot":"","sources":["../../src/adapters/function.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,qBAAqB,EAC7B,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CA2B1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http-cli-coverage.test.d.ts","sourceRoot":"","sources":["../../src/adapters/http-cli-coverage.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { HttpAdapterConfig, ConversationTurn } from "../types/index.js";
|
|
2
|
+
export interface AdapterResponse {
|
|
3
|
+
output: string;
|
|
4
|
+
durationMs: number;
|
|
5
|
+
inputTokens?: number;
|
|
6
|
+
outputTokens?: number;
|
|
7
|
+
costUsd?: number;
|
|
8
|
+
toolCalls?: Array<{
|
|
9
|
+
name: string;
|
|
10
|
+
arguments?: Record<string, unknown>;
|
|
11
|
+
}>;
|
|
12
|
+
statusCode?: number;
|
|
13
|
+
error?: string;
|
|
14
|
+
}
|
|
15
|
+
export declare function callHttpAdapter(config: HttpAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
|
|
16
|
+
//# sourceMappingURL=http.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"http.d.ts","sourceRoot":"","sources":["../../src/adapters/http.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAE7E,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;KAAE,CAAC,CAAC;IACzE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA+BD,wBAAsB,eAAe,CACnC,MAAM,EAAE,iBAAiB,EACzB,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CAqE1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp-adapter.test.d.ts","sourceRoot":"","sources":["../../src/adapters/mcp-adapter.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../../src/adapters/mcp.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,cAAc,CAClC,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { OpenAIAdapterConfig, ConversationTurn } from "../types/index.js";
|
|
2
|
+
import type { AdapterResponse } from "./http.js";
|
|
3
|
+
export declare function callOpenAIAdapter(config: OpenAIAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
|
|
4
|
+
//# sourceMappingURL=openai.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../src/adapters/openai.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,mBAAmB,EAC3B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CAuD1B"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter-parser.d.ts","sourceRoot":"","sources":["../../src/cli/adapter-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAEvD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,aAAa,CA8B9E"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.test.d.ts","sourceRoot":"","sources":["../../src/cli/cli.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibrate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/calibrate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,gBAAgB,IAAI,OAAO,CA+C1C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"capture.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/capture.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,cAAc,IAAI,OAAO,CAyExC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ci.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/ci.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,SAAS,IAAI,OAAO,CAiEnC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { compareRuns } from "../../core/reporter.js";
|
|
3
|
+
export declare function renderMarkdownDiff(diff: ReturnType<typeof compareRuns>): string;
|
|
4
|
+
export declare function compareCommand(): Command;
|
|
5
|
+
//# sourceMappingURL=compare.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compare.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/compare.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAA+B,MAAM,wBAAwB,CAAC;AAGlF,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,GAAG,MAAM,CA2B/E;AAED,wBAAgB,cAAc,IAAI,OAAO,CAgCxC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"compare.test.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/compare.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"completion.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiFpC,wBAAgB,iBAAiB,IAAI,OAAO,CAgB3C"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/doctor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,wBAAgB,aAAa,IAAI,OAAO,CA6EvC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"estimate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/estimate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,wBAAgB,eAAe,IAAI,OAAO,CA2CzC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/generate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiBpC,wBAAgB,eAAe,IAAI,OAAO,CAyDzC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/judge.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,wBAAgB,YAAY,IAAI,OAAO,CA+BtC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,UAAU,IAAI,OAAO,CAgCpC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,UAAU,IAAI,OAAO,CAyDpC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sync.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/sync.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAkCpC,wBAAgB,WAAW,IAAI,OAAO,CA+FrC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":""}
|