@hasna/evals 0.1.22 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/dist/cli/index.js +1775 -324
  2. package/dist/index.js +3764 -898
  3. package/dist/mcp/index.js +3450 -1437
  4. package/dist/server/index.js +3764 -898
  5. package/package.json +1 -1
  6. package/dist/adapters/adapters.test.d.ts +0 -2
  7. package/dist/adapters/adapters.test.d.ts.map +0 -1
  8. package/dist/adapters/anthropic-openai.test.d.ts +0 -2
  9. package/dist/adapters/anthropic-openai.test.d.ts.map +0 -1
  10. package/dist/adapters/anthropic.d.ts +0 -4
  11. package/dist/adapters/anthropic.d.ts.map +0 -1
  12. package/dist/adapters/cli.d.ts +0 -4
  13. package/dist/adapters/cli.d.ts.map +0 -1
  14. package/dist/adapters/function.d.ts +0 -4
  15. package/dist/adapters/function.d.ts.map +0 -1
  16. package/dist/adapters/http-cli-coverage.test.d.ts +0 -2
  17. package/dist/adapters/http-cli-coverage.test.d.ts.map +0 -1
  18. package/dist/adapters/http.d.ts +0 -16
  19. package/dist/adapters/http.d.ts.map +0 -1
  20. package/dist/adapters/mcp-adapter.test.d.ts +0 -2
  21. package/dist/adapters/mcp-adapter.test.d.ts.map +0 -1
  22. package/dist/adapters/mcp.d.ts +0 -4
  23. package/dist/adapters/mcp.d.ts.map +0 -1
  24. package/dist/adapters/openai.d.ts +0 -4
  25. package/dist/adapters/openai.d.ts.map +0 -1
  26. package/dist/cli/adapter-parser.d.ts +0 -3
  27. package/dist/cli/adapter-parser.d.ts.map +0 -1
  28. package/dist/cli/cli.test.d.ts +0 -2
  29. package/dist/cli/cli.test.d.ts.map +0 -1
  30. package/dist/cli/commands/calibrate.d.ts +0 -3
  31. package/dist/cli/commands/calibrate.d.ts.map +0 -1
  32. package/dist/cli/commands/capture.d.ts +0 -3
  33. package/dist/cli/commands/capture.d.ts.map +0 -1
  34. package/dist/cli/commands/ci.d.ts +0 -3
  35. package/dist/cli/commands/ci.d.ts.map +0 -1
  36. package/dist/cli/commands/compare.d.ts +0 -5
  37. package/dist/cli/commands/compare.d.ts.map +0 -1
  38. package/dist/cli/commands/compare.test.d.ts +0 -2
  39. package/dist/cli/commands/compare.test.d.ts.map +0 -1
  40. package/dist/cli/commands/completion.d.ts +0 -3
  41. package/dist/cli/commands/completion.d.ts.map +0 -1
  42. package/dist/cli/commands/doctor.d.ts +0 -3
  43. package/dist/cli/commands/doctor.d.ts.map +0 -1
  44. package/dist/cli/commands/estimate.d.ts +0 -3
  45. package/dist/cli/commands/estimate.d.ts.map +0 -1
  46. package/dist/cli/commands/generate.d.ts +0 -3
  47. package/dist/cli/commands/generate.d.ts.map +0 -1
  48. package/dist/cli/commands/judge.d.ts +0 -3
  49. package/dist/cli/commands/judge.d.ts.map +0 -1
  50. package/dist/cli/commands/mcp.d.ts +0 -3
  51. package/dist/cli/commands/mcp.d.ts.map +0 -1
  52. package/dist/cli/commands/run.d.ts +0 -3
  53. package/dist/cli/commands/run.d.ts.map +0 -1
  54. package/dist/cli/commands/sync.d.ts +0 -3
  55. package/dist/cli/commands/sync.d.ts.map +0 -1
  56. package/dist/cli/index.d.ts +0 -3
  57. package/dist/cli/index.d.ts.map +0 -1
  58. package/dist/core/assertions-coverage.test.d.ts +0 -2
  59. package/dist/core/assertions-coverage.test.d.ts.map +0 -1
  60. package/dist/core/assertions.d.ts +0 -18
  61. package/dist/core/assertions.d.ts.map +0 -1
  62. package/dist/core/assertions.test.d.ts +0 -2
  63. package/dist/core/assertions.test.d.ts.map +0 -1
  64. package/dist/core/e2e.test.d.ts +0 -2
  65. package/dist/core/e2e.test.d.ts.map +0 -1
  66. package/dist/core/judge.d.ts +0 -13
  67. package/dist/core/judge.d.ts.map +0 -1
  68. package/dist/core/judge.test.d.ts +0 -2
  69. package/dist/core/judge.test.d.ts.map +0 -1
  70. package/dist/core/reporter.d.ts +0 -21
  71. package/dist/core/reporter.d.ts.map +0 -1
  72. package/dist/core/reporter.test.d.ts +0 -2
  73. package/dist/core/reporter.test.d.ts.map +0 -1
  74. package/dist/core/runner.d.ts +0 -4
  75. package/dist/core/runner.d.ts.map +0 -1
  76. package/dist/core/runner.test.d.ts +0 -2
  77. package/dist/core/runner.test.d.ts.map +0 -1
  78. package/dist/datasets/loader.d.ts +0 -18
  79. package/dist/datasets/loader.d.ts.map +0 -1
  80. package/dist/datasets/loader.test.d.ts +0 -2
  81. package/dist/datasets/loader.test.d.ts.map +0 -1
  82. package/dist/db/store.d.ts +0 -17
  83. package/dist/db/store.d.ts.map +0 -1
  84. package/dist/db/store.test.d.ts +0 -2
  85. package/dist/db/store.test.d.ts.map +0 -1
  86. package/dist/index.d.ts +0 -8
  87. package/dist/index.d.ts.map +0 -1
  88. package/dist/mcp/http.d.ts +0 -12
  89. package/dist/mcp/http.d.ts.map +0 -1
  90. package/dist/mcp/http.test.d.ts +0 -2
  91. package/dist/mcp/http.test.d.ts.map +0 -1
  92. package/dist/mcp/index.d.ts +0 -3
  93. package/dist/mcp/index.d.ts.map +0 -1
  94. package/dist/mcp/mcp.test.d.ts +0 -2
  95. package/dist/mcp/mcp.test.d.ts.map +0 -1
  96. package/dist/mcp/server.d.ts +0 -5
  97. package/dist/mcp/server.d.ts.map +0 -1
  98. package/dist/server/index.d.ts +0 -3
  99. package/dist/server/index.d.ts.map +0 -1
  100. package/dist/server/server.test.d.ts +0 -2
  101. package/dist/server/server.test.d.ts.map +0 -1
  102. package/dist/types/index.d.ts +0 -171
  103. package/dist/types/index.d.ts.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hasna/evals",
3
- "version": "0.1.22",
3
+ "version": "0.1.23",
4
4
  "description": "Open source AI evaluation framework — LLM-as-judge + assertion-based evals for any AI app. CLI + MCP server.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=adapters.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"adapters.test.d.ts","sourceRoot":"","sources":["../../src/adapters/adapters.test.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=anthropic-openai.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"anthropic-openai.test.d.ts","sourceRoot":"","sources":["../../src/adapters/anthropic-openai.test.ts"],"names":[],"mappings":""}
@@ -1,4 +0,0 @@
1
- import type { AnthropicAdapterConfig, ConversationTurn } from "../types/index.js";
2
- import type { AdapterResponse } from "./http.js";
3
- export declare function callAnthropicAdapter(config: AnthropicAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
4
- //# sourceMappingURL=anthropic.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/adapters/anthropic.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAClF,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,oBAAoB,CACxC,MAAM,EAAE,sBAAsB,EAC9B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CA+C1B"}
@@ -1,4 +0,0 @@
1
- import type { CliAdapterConfig } from "../types/index.js";
2
- import type { AdapterResponse } from "./http.js";
3
- export declare function callCliAdapter(config: CliAdapterConfig, input: string): Promise<AdapterResponse>;
4
- //# sourceMappingURL=cli.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../../src/adapters/cli.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,cAAc,CAClC,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CA4C1B"}
@@ -1,4 +0,0 @@
1
- import type { FunctionAdapterConfig } from "../types/index.js";
2
- import type { AdapterResponse } from "./http.js";
3
- export declare function callFunctionAdapter(config: FunctionAdapterConfig, input: string): Promise<AdapterResponse>;
4
- //# sourceMappingURL=function.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"function.d.ts","sourceRoot":"","sources":["../../src/adapters/function.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,mBAAmB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,mBAAmB,CACvC,MAAM,EAAE,qBAAqB,EAC7B,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CA2B1B"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=http-cli-coverage.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"http-cli-coverage.test.d.ts","sourceRoot":"","sources":["../../src/adapters/http-cli-coverage.test.ts"],"names":[],"mappings":""}
@@ -1,16 +0,0 @@
1
- import type { HttpAdapterConfig, ConversationTurn } from "../types/index.js";
2
- export interface AdapterResponse {
3
- output: string;
4
- durationMs: number;
5
- inputTokens?: number;
6
- outputTokens?: number;
7
- costUsd?: number;
8
- toolCalls?: Array<{
9
- name: string;
10
- arguments?: Record<string, unknown>;
11
- }>;
12
- statusCode?: number;
13
- error?: string;
14
- }
15
- export declare function callHttpAdapter(config: HttpAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
16
- //# sourceMappingURL=http.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"http.d.ts","sourceRoot":"","sources":["../../src/adapters/http.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAE7E,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;KAAE,CAAC,CAAC;IACzE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AA+BD,wBAAsB,eAAe,CACnC,MAAM,EAAE,iBAAiB,EACzB,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CAqE1B"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=mcp-adapter.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mcp-adapter.test.d.ts","sourceRoot":"","sources":["../../src/adapters/mcp-adapter.test.ts"],"names":[],"mappings":""}
@@ -1,4 +0,0 @@
1
- import type { McpAdapterConfig } from "../types/index.js";
2
- import type { AdapterResponse } from "./http.js";
3
- export declare function callMcpAdapter(config: McpAdapterConfig, input: string): Promise<AdapterResponse>;
4
- //# sourceMappingURL=mcp.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../../src/adapters/mcp.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,cAAc,CAClC,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,MAAM,GACZ,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
@@ -1,4 +0,0 @@
1
- import type { OpenAIAdapterConfig, ConversationTurn } from "../types/index.js";
2
- import type { AdapterResponse } from "./http.js";
3
- export declare function callOpenAIAdapter(config: OpenAIAdapterConfig, input: string, turns?: ConversationTurn[]): Promise<AdapterResponse>;
4
- //# sourceMappingURL=openai.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"openai.d.ts","sourceRoot":"","sources":["../../src/adapters/openai.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,mBAAmB,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAEjD,wBAAsB,iBAAiB,CACrC,MAAM,EAAE,mBAAmB,EAC3B,KAAK,EAAE,MAAM,EACb,KAAK,CAAC,EAAE,gBAAgB,EAAE,GACzB,OAAO,CAAC,eAAe,CAAC,CAuD1B"}
@@ -1,3 +0,0 @@
1
- import type { AdapterConfig } from "../types/index.js";
2
- export declare function parseAdapterConfig(opts: Record<string, string>): AdapterConfig;
3
- //# sourceMappingURL=adapter-parser.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"adapter-parser.d.ts","sourceRoot":"","sources":["../../src/cli/adapter-parser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAEvD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,aAAa,CA8B9E"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=cli.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cli.test.d.ts","sourceRoot":"","sources":["../../src/cli/cli.test.ts"],"names":[],"mappings":""}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function calibrateCommand(): Command;
3
- //# sourceMappingURL=calibrate.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"calibrate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/calibrate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,gBAAgB,IAAI,OAAO,CA+C1C"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function captureCommand(): Command;
3
- //# sourceMappingURL=capture.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"capture.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/capture.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,cAAc,IAAI,OAAO,CAyExC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function ciCommand(): Command;
3
- //# sourceMappingURL=ci.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"ci.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/ci.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,SAAS,IAAI,OAAO,CAiEnC"}
@@ -1,5 +0,0 @@
1
- import { Command } from "commander";
2
- import { compareRuns } from "../../core/reporter.js";
3
- export declare function renderMarkdownDiff(diff: ReturnType<typeof compareRuns>): string;
4
- export declare function compareCommand(): Command;
5
- //# sourceMappingURL=compare.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"compare.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/compare.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAA+B,MAAM,wBAAwB,CAAC;AAGlF,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,GAAG,MAAM,CA2B/E;AAED,wBAAgB,cAAc,IAAI,OAAO,CAgCxC"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=compare.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"compare.test.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/compare.test.ts"],"names":[],"mappings":""}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function completionCommand(): Command;
3
- //# sourceMappingURL=completion.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"completion.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/completion.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiFpC,wBAAgB,iBAAiB,IAAI,OAAO,CAgB3C"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function doctorCommand(): Command;
3
- //# sourceMappingURL=doctor.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"doctor.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/doctor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAqCpC,wBAAgB,aAAa,IAAI,OAAO,CAqFvC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function estimateCommand(): Command;
3
- //# sourceMappingURL=estimate.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"estimate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/estimate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAapC,wBAAgB,eAAe,IAAI,OAAO,CA2CzC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function generateCommand(): Command;
3
- //# sourceMappingURL=generate.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"generate.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/generate.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAiBpC,wBAAgB,eAAe,IAAI,OAAO,CAyDzC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function judgeCommand(): Command;
3
- //# sourceMappingURL=judge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/judge.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,wBAAgB,YAAY,IAAI,OAAO,CA+BtC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function mcpCommand(): Command;
3
- //# sourceMappingURL=mcp.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mcp.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/mcp.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAKpC,wBAAgB,UAAU,IAAI,OAAO,CAgCpC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function runCommand(): Command;
3
- //# sourceMappingURL=run.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"run.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAOpC,wBAAgB,UAAU,IAAI,OAAO,CAyDpC"}
@@ -1,3 +0,0 @@
1
- import { Command } from "commander";
2
- export declare function syncCommand(): Command;
3
- //# sourceMappingURL=sync.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"sync.d.ts","sourceRoot":"","sources":["../../../src/cli/commands/sync.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,wBAAgB,WAAW,IAAI,OAAO,CA2FrC"}
@@ -1,3 +0,0 @@
1
- #!/usr/bin/env bun
2
- export {};
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=assertions-coverage.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"assertions-coverage.test.d.ts","sourceRoot":"","sources":["../../src/core/assertions-coverage.test.ts"],"names":[],"mappings":""}
@@ -1,18 +0,0 @@
1
- import type { Assertion, AssertionResult } from "../types/index.js";
2
- export declare function sortAssertionsCheapestFirst(assertions: Assertion[]): Assertion[];
3
- export interface AssertionContext {
4
- output: string;
5
- durationMs?: number;
6
- inputTokens?: number;
7
- outputTokens?: number;
8
- costUsd?: number;
9
- toolCalls?: Array<{
10
- name: string;
11
- arguments?: Record<string, unknown>;
12
- }>;
13
- }
14
- export declare function runAssertion(assertion: Assertion, ctx: AssertionContext): Promise<AssertionResult>;
15
- export declare function runAssertions(assertions: Assertion[], ctx: AssertionContext): Promise<AssertionResult[]>;
16
- export declare function assertionsPassed(results: AssertionResult[]): boolean;
17
- export declare function allAssertionsPassed(results: AssertionResult[]): boolean;
18
- //# sourceMappingURL=assertions.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/core/assertions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AA6BpE,wBAAgB,2BAA2B,CAAC,UAAU,EAAE,SAAS,EAAE,GAAG,SAAS,EAAE,CAMhF;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;KAAE,CAAC,CAAC;CAC1E;AAED,wBAAsB,YAAY,CAChC,SAAS,EAAE,SAAS,EACpB,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,eAAe,CAAC,CAqB1B;AAED,wBAAsB,aAAa,CACjC,UAAU,EAAE,SAAS,EAAE,EACvB,GAAG,EAAE,gBAAgB,GACpB,OAAO,CAAC,eAAe,EAAE,CAAC,CAyB5B;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,OAAO,CAKpE;AAED,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,eAAe,EAAE,GAAG,OAAO,CAEvE"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=assertions.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"assertions.test.d.ts","sourceRoot":"","sources":["../../src/core/assertions.test.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export declare function echoInput(input: string): Promise<string>;
2
- //# sourceMappingURL=e2e.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"e2e.test.d.ts","sourceRoot":"","sources":["../../src/core/e2e.test.ts"],"names":[],"mappings":"AAyBA,wBAAsB,SAAS,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAE9D"}
@@ -1,13 +0,0 @@
1
- import type { JudgeConfig, JudgeResult } from "../types/index.js";
2
- export declare function runJudge(input: string, output: string, config: JudgeConfig, expected?: string): Promise<JudgeResult>;
3
- /** One-shot judge: no full eval case, just input/output/rubric */
4
- export declare function judgeOnce(params: {
5
- input: string;
6
- output: string;
7
- rubric: string;
8
- expected?: string;
9
- model?: string;
10
- provider?: "anthropic" | "openai";
11
- apiKey?: string;
12
- }): Promise<JudgeResult>;
13
- //# sourceMappingURL=judge.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"judge.d.ts","sourceRoot":"","sources":["../../src/core/judge.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,WAAW,EAAE,WAAW,EAAW,MAAM,mBAAmB,CAAC;AA4E3E,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,WAAW,EACnB,QAAQ,CAAC,EAAE,MAAM,GAChB,OAAO,CAAC,WAAW,CAAC,CAmBtB;AA4ED,kEAAkE;AAClE,wBAAsB,SAAS,CAAC,MAAM,EAAE;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,WAAW,GAAG,QAAQ,CAAC;IAClC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,GAAG,OAAO,CAAC,WAAW,CAAC,CAOvB"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=judge.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"judge.test.d.ts","sourceRoot":"","sources":["../../src/core/judge.test.ts"],"names":[],"mappings":""}
@@ -1,21 +0,0 @@
1
- import type { EvalRun, Verdict } from "../types/index.js";
2
- export declare function printTerminalReport(run: EvalRun): void;
3
- export declare function toJson(run: EvalRun): string;
4
- export declare function toMarkdown(run: EvalRun): string;
5
- export interface RunDiff {
6
- regressions: Array<{
7
- caseId: string;
8
- before: Verdict;
9
- after: Verdict;
10
- }>;
11
- improvements: Array<{
12
- caseId: string;
13
- before: Verdict;
14
- after: Verdict;
15
- }>;
16
- scoreDelta: number;
17
- passRateDelta: number;
18
- }
19
- export declare function compareRuns(before: EvalRun, after: EvalRun): RunDiff;
20
- export declare function printDiffReport(diff: RunDiff): void;
21
- //# sourceMappingURL=reporter.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"reporter.d.ts","sourceRoot":"","sources":["../../src/core/reporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAgB1D,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,OAAO,GAAG,IAAI,CAoCtD;AAID,wBAAgB,MAAM,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAE3C;AAID,wBAAgB,UAAU,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAkD/C;AAID,MAAM,WAAW,OAAO;IACtB,WAAW,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IACxE,YAAY,EAAE,KAAK,CAAC;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,KAAK,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IACzE,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,wBAAgB,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,GAAG,OAAO,CAuBpE;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,GAAG,IAAI,CAcnD"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=reporter.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"reporter.test.d.ts","sourceRoot":"","sources":["../../src/core/reporter.test.ts"],"names":[],"mappings":""}
@@ -1,4 +0,0 @@
1
- import type { AdapterConfig, EvalCase, EvalResult, EvalRun, RunOptions } from "../types/index.js";
2
- export declare function runEvals(cases: EvalCase[], options: RunOptions): Promise<EvalRun>;
3
- export declare function runSingleCase(evalCase: EvalCase, adapterConfig: AdapterConfig, skipJudge?: boolean): Promise<EvalResult>;
4
- //# sourceMappingURL=runner.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"runner.d.ts","sourceRoot":"","sources":["../../src/core/runner.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EACV,aAAa,EACb,QAAQ,EACR,UAAU,EACV,OAAO,EAEP,UAAU,EAGX,MAAM,mBAAmB,CAAC;AAqI3B,wBAAsB,QAAQ,CAC5B,KAAK,EAAE,QAAQ,EAAE,EACjB,OAAO,EAAE,UAAU,GAClB,OAAO,CAAC,OAAO,CAAC,CAqClB;AAID,wBAAsB,aAAa,CACjC,QAAQ,EAAE,QAAQ,EAClB,aAAa,EAAE,aAAa,EAC5B,SAAS,UAAQ,GAChB,OAAO,CAAC,UAAU,CAAC,CAKrB"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=runner.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"runner.test.d.ts","sourceRoot":"","sources":["../../src/core/runner.test.ts"],"names":[],"mappings":""}
@@ -1,18 +0,0 @@
1
- import type { EvalCase } from "../types/index.js";
2
- export interface LoadOptions {
3
- /** Fail hard on malformed lines instead of warning and skipping */
4
- strict?: boolean;
5
- /** Filter by tags */
6
- tags?: string[];
7
- }
8
- export interface LoadResult {
9
- cases: EvalCase[];
10
- warnings: string[];
11
- totalLines: number;
12
- skipped: number;
13
- }
14
- /** Load eval cases from a JSONL or JSON file (or glob pattern) */
15
- export declare function loadDataset(pathOrGlob: string, opts?: LoadOptions): Promise<LoadResult>;
16
- /** Stream large JSONL files case-by-case (for very large datasets) */
17
- export declare function streamDataset(path: string, opts?: LoadOptions): AsyncGenerator<EvalCase>;
18
- //# sourceMappingURL=loader.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"loader.d.ts","sourceRoot":"","sources":["../../src/datasets/loader.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAC;AAElD,MAAM,WAAW,WAAW;IAC1B,mEAAmE;IACnE,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,qBAAqB;IACrB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,kEAAkE;AAClE,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,GAAE,WAAgB,GAAG,OAAO,CAAC,UAAU,CAAC,CAiCjG;AAoFD,sEAAsE;AACtE,wBAAuB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,GAAE,WAAgB,GAAG,cAAc,CAAC,QAAQ,CAAC,CAyBnG"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=loader.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"loader.test.d.ts","sourceRoot":"","sources":["../../src/datasets/loader.test.ts"],"names":[],"mappings":""}
@@ -1,17 +0,0 @@
1
- import { Database } from "bun:sqlite";
2
- import type { EvalRun } from "../types/index.js";
3
- export declare function getDatabase(): Database;
4
- export declare function closeDatabase(): void;
5
- export declare function saveRun(run: EvalRun): void;
6
- export declare function getRun(id: string): EvalRun | null;
7
- export declare function listRuns(limit?: number, dataset?: string): EvalRun[];
8
- export declare function deleteRun(id: string): void;
9
- export declare function setBaseline(name: string, runId: string): void;
10
- export declare function getBaseline(name: string): EvalRun | null;
11
- export declare function listBaselines(): Array<{
12
- name: string;
13
- runId: string;
14
- createdAt: string;
15
- }>;
16
- export declare function clearBaseline(name: string): void;
17
- //# sourceMappingURL=store.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"store.d.ts","sourceRoot":"","sources":["../../src/db/store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAItC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAQjD,wBAAgB,WAAW,IAAI,QAAQ,CAUtC;AAED,wBAAgB,aAAa,IAAI,IAAI,CAGpC;AA6BD,wBAAgB,OAAO,CAAC,GAAG,EAAE,OAAO,GAAG,IAAI,CAa1C;AAED,wBAAgB,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CAIjD;AAED,wBAAgB,QAAQ,CAAC,KAAK,SAAK,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,CAMhE;AAED,wBAAgB,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,IAAI,CAE1C;AAID,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAI7D;AAED,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,GAAG,IAAI,CAKxD;AAED,wBAAgB,aAAa,IAAI,KAAK,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,CAAC,CAKzF;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAEhD"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=store.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"store.test.d.ts","sourceRoot":"","sources":["../../src/db/store.test.ts"],"names":[],"mappings":""}
package/dist/index.d.ts DELETED
@@ -1,8 +0,0 @@
1
- export * from "./types/index.js";
2
- export * from "./core/runner.js";
3
- export * from "./core/assertions.js";
4
- export * from "./core/judge.js";
5
- export * from "./core/reporter.js";
6
- export * from "./datasets/loader.js";
7
- export * from "./db/store.js";
8
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,cAAc,kBAAkB,CAAC;AACjC,cAAc,kBAAkB,CAAC;AACjC,cAAc,sBAAsB,CAAC;AACrC,cAAc,iBAAiB,CAAC;AAChC,cAAc,oBAAoB,CAAC;AACnC,cAAc,sBAAsB,CAAC;AACrC,cAAc,eAAe,CAAC"}
@@ -1,12 +0,0 @@
1
- import { DEFAULT_MCP_HTTP_PORT, MCP_NAME } from "./server.js";
2
- export { DEFAULT_MCP_HTTP_PORT, MCP_NAME };
3
- export declare function isHttpMode(argv?: string[]): boolean;
4
- export declare function resolveHttpPort(argv?: string[]): number;
5
- export declare function handleMcpHttpRequest(req: Request): Promise<Response>;
6
- export interface StartHttpServerOptions {
7
- port?: number;
8
- hostname?: string;
9
- log?: (message: string) => void;
10
- }
11
- export declare function startHttpServer(options?: StartHttpServerOptions): ReturnType<typeof Bun.serve>;
12
- //# sourceMappingURL=http.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"http.d.ts","sourceRoot":"","sources":["../../src/mcp/http.ts"],"names":[],"mappings":"AACA,OAAO,EAAe,qBAAqB,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAE3E,OAAO,EAAE,qBAAqB,EAAE,QAAQ,EAAE,CAAC;AAE3C,wBAAgB,UAAU,CAAC,IAAI,GAAE,MAAM,EAA0B,GAAG,OAAO,CAE1E;AAED,wBAAgB,eAAe,CAAC,IAAI,GAAE,MAAM,EAA0B,GAAG,MAAM,CAa9E;AAUD,wBAAsB,oBAAoB,CAAC,GAAG,EAAE,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAiB1E;AAED,MAAM,WAAW,sBAAsB;IACrC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,CAAC;CACjC;AAED,wBAAgB,eAAe,CAAC,OAAO,GAAE,sBAA2B,GAAG,UAAU,CAAC,OAAO,GAAG,CAAC,KAAK,CAAC,CAclG"}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=http.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"http.test.d.ts","sourceRoot":"","sources":["../../src/mcp/http.test.ts"],"names":[],"mappings":""}
@@ -1,3 +0,0 @@
1
- #!/usr/bin/env bun
2
- export {};
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/mcp/index.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export declare function echoFn(input: string): Promise<string>;
2
- //# sourceMappingURL=mcp.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"mcp.test.d.ts","sourceRoot":"","sources":["../../src/mcp/mcp.test.ts"],"names":[],"mappings":"AAsDA,wBAAsB,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAE3D"}
@@ -1,5 +0,0 @@
1
- import { Server } from "@modelcontextprotocol/sdk/server/index.js";
2
- export declare const MCP_NAME = "evals";
3
- export declare const DEFAULT_MCP_HTTP_PORT = 8817;
4
- export declare function buildServer(): Server;
5
- //# sourceMappingURL=server.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../src/mcp/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAanE,eAAO,MAAM,QAAQ,UAAU,CAAC;AAChC,eAAO,MAAM,qBAAqB,OAAO,CAAC;AAE1C,wBAAgB,WAAW,IAAI,MAAM,CAmSpC"}
@@ -1,3 +0,0 @@
1
- #!/usr/bin/env bun
2
- export {};
3
- //# sourceMappingURL=index.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/server/index.ts"],"names":[],"mappings":""}
@@ -1,2 +0,0 @@
1
- export {};
2
- //# sourceMappingURL=server.test.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"server.test.d.ts","sourceRoot":"","sources":["../../src/server/server.test.ts"],"names":[],"mappings":""}