evalsense 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +99 -82
- package/dist/{chunk-HDJID3GC.cjs → chunk-DFC6FRTG.cjs} +8 -26
- package/dist/chunk-DFC6FRTG.cjs.map +1 -0
- package/dist/chunk-DGUM43GV.js +10 -0
- package/dist/chunk-DGUM43GV.js.map +1 -0
- package/dist/chunk-JEQ2X3Z6.cjs +12 -0
- package/dist/chunk-JEQ2X3Z6.cjs.map +1 -0
- package/dist/{chunk-5P7LNNO6.js → chunk-JPVZL45G.js} +8 -26
- package/dist/chunk-JPVZL45G.js.map +1 -0
- package/dist/{chunk-Y23VHTD3.cjs → chunk-RZFLCWTW.cjs} +2 -2
- package/dist/chunk-RZFLCWTW.cjs.map +1 -0
- package/dist/{chunk-BRPM6AB6.js → chunk-Z3U6AUWX.js} +2 -2
- package/dist/chunk-Z3U6AUWX.js.map +1 -0
- package/dist/cli.cjs +39 -36
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +37 -34
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +300 -101
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +76 -6
- package/dist/index.d.ts +76 -6
- package/dist/index.js +222 -23
- package/dist/index.js.map +1 -1
- package/dist/metrics/index.cjs +257 -17
- package/dist/metrics/index.cjs.map +1 -1
- package/dist/metrics/index.d.cts +252 -1
- package/dist/metrics/index.d.ts +252 -1
- package/dist/metrics/index.js +240 -2
- package/dist/metrics/index.js.map +1 -1
- package/dist/metrics/opinionated/index.cjs +6 -5
- package/dist/metrics/opinionated/index.js +2 -1
- package/package.json +8 -6
- package/dist/chunk-5P7LNNO6.js.map +0 -1
- package/dist/chunk-BRPM6AB6.js.map +0 -1
- package/dist/chunk-HDJID3GC.cjs.map +0 -1
- package/dist/chunk-Y23VHTD3.cjs.map +0 -1
package/dist/cli.js
CHANGED
|
@@ -1,46 +1,49 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { discoverFromPath, filterFiles, ExitCodes, ConsoleReporter, executeEvalFiles, JsonReporter, getExitCode } from './chunk-
|
|
2
|
+
import { discoverFromPath, filterFiles, ExitCodes, ConsoleReporter, executeEvalFiles, JsonReporter, getExitCode } from './chunk-JPVZL45G.js';
|
|
3
|
+
import './chunk-DGUM43GV.js';
|
|
3
4
|
import { Command } from 'commander';
|
|
4
5
|
|
|
5
6
|
var program = new Command();
|
|
6
7
|
program.name("evalsense").description("JS-native LLM evaluation framework with Jest-like API").version("0.1.0");
|
|
7
|
-
program.command("run").description("Run evaluation tests").argument("[path]", "Path to eval file or directory", ".").option("-f, --filter <pattern>", "Filter tests by name pattern").option("-o, --output <file>", "Write JSON report to file").option("-r, --reporter <type>", "Reporter type: console, json, both", "console").option("-b, --bail", "Stop on first failure").option("-t, --timeout <ms>", "Test timeout in milliseconds", "30000").action(
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
8
|
+
program.command("run").description("Run evaluation tests").argument("[path]", "Path to eval file or directory", ".").option("-f, --filter <pattern>", "Filter tests by name pattern").option("-o, --output <file>", "Write JSON report to file").option("-r, --reporter <type>", "Reporter type: console, json, both", "console").option("-b, --bail", "Stop on first failure").option("-t, --timeout <ms>", "Test timeout in milliseconds", "30000").action(
|
|
9
|
+
async (path, options) => {
|
|
10
|
+
try {
|
|
11
|
+
const files = await discoverFromPath(path);
|
|
12
|
+
const filtered = filterFiles(files, options.filter);
|
|
13
|
+
if (filtered.length === 0) {
|
|
14
|
+
console.error("No eval files found");
|
|
15
|
+
process.exit(ExitCodes.CONFIGURATION_ERROR);
|
|
16
|
+
}
|
|
17
|
+
const consoleReporter = new ConsoleReporter();
|
|
18
|
+
consoleReporter.printHeader(filtered.length);
|
|
19
|
+
const report = await executeEvalFiles(filtered, {
|
|
20
|
+
bail: options.bail,
|
|
21
|
+
timeout: parseInt(options.timeout, 10),
|
|
22
|
+
filter: options.filter
|
|
23
|
+
});
|
|
24
|
+
const reporterType = options.reporter.toLowerCase();
|
|
25
|
+
if (reporterType === "console" || reporterType === "both") {
|
|
26
|
+
consoleReporter.printReport(report);
|
|
27
|
+
}
|
|
28
|
+
if (reporterType === "json" || reporterType === "both" || options.output) {
|
|
29
|
+
const jsonReporter = new JsonReporter();
|
|
30
|
+
const json = jsonReporter.format(report);
|
|
31
|
+
if (options.output) {
|
|
32
|
+
await jsonReporter.writeToFile(report, options.output);
|
|
33
|
+
console.log(`
|
|
32
34
|
Report written to: ${options.output}`);
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
} else if (reporterType === "json") {
|
|
36
|
+
console.log(json);
|
|
37
|
+
}
|
|
35
38
|
}
|
|
39
|
+
const exitCode = getExitCode(report);
|
|
40
|
+
process.exit(exitCode);
|
|
41
|
+
} catch (error) {
|
|
42
|
+
console.error("Error:", error instanceof Error ? error.message : String(error));
|
|
43
|
+
process.exit(ExitCodes.EXECUTION_ERROR);
|
|
36
44
|
}
|
|
37
|
-
const exitCode = getExitCode(report);
|
|
38
|
-
process.exit(exitCode);
|
|
39
|
-
} catch (error) {
|
|
40
|
-
console.error("Error:", error instanceof Error ? error.message : String(error));
|
|
41
|
-
process.exit(ExitCodes.EXECUTION_ERROR);
|
|
42
45
|
}
|
|
43
|
-
|
|
46
|
+
);
|
|
44
47
|
program.command("list").description("List discovered eval files").argument("[path]", "Path to search", ".").action(async (path) => {
|
|
45
48
|
try {
|
|
46
49
|
const files = await discoverFromPath(path);
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/runner/cli.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"sources":["../src/runner/cli.ts"],"names":[],"mappings":";;;;;AAaA,IAAM,OAAA,GAAU,IAAI,OAAA,EAAQ;AAE5B,OAAA,CACG,KAAK,WAAW,CAAA,CAChB,YAAY,uDAAuD,CAAA,CACnE,QAAQ,OAAO,CAAA;AAElB,OAAA,CACG,OAAA,CAAQ,KAAK,CAAA,CACb,WAAA,CAAY,sBAAsB,CAAA,CAClC,QAAA,CAAS,QAAA,EAAU,gCAAA,EAAkC,GAAG,CAAA,CACxD,MAAA,CAAO,wBAAA,EAA0B,8BAA8B,CAAA,CAC/D,MAAA,CAAO,qBAAA,EAAuB,2BAA2B,CAAA,CACzD,MAAA,CAAO,uBAAA,EAAyB,oCAAA,EAAsC,SAAS,CAAA,CAC/E,MAAA,CAAO,YAAA,EAAc,uBAAuB,CAAA,CAC5C,MAAA,CAAO,oBAAA,EAAsB,8BAAA,EAAgC,OAAO,CAAA,CACpE,MAAA;AAAA,EACC,OACE,MACA,OAAA,KAOG;AACH,IAAA,IAAI;AAEF,MAAA,MAAM,KAAA,GAAQ,MAAM,gBAAA,CAAiB,IAAI,CAAA;AACzC,MAAA,MAAM,QAAA,GAAW,WAAA,CAAY,KAAA,EAAO,OAAA,CAAQ,MAAM,CAAA;AAElD,MAAA,IAAI,QAAA,CAAS,WAAW,CAAA,EAAG;AACzB,QAAA,OAAA,CAAQ,MAAM,qBAAqB,CAAA;AACnC,QAAA,OAAA,CAAQ,IAAA,CAAK,UAAU,mBAAmB,CAAA;AAAA,MAC5C;AAEA,MAAA,MAAM,eAAA,GAAkB,IAAI,eAAA,EAAgB;AAG5C,MAAA,eAAA,CAAgB,WAAA,CAAY,SAAS,MAAM,CAAA;AAG3C,MAAA,MAAM,MAAA,GAAS,MAAM,gBAAA,CAAiB,QAAA,EAAU;AAAA,QAC9C,MAAM,OAAA,CAAQ,IAAA;AAAA,QACd,OAAA,EAAS,QAAA,CAAS,OAAA,CAAQ,OAAA,EAAS,EAAE,CAAA;AAAA,QACrC,QAAQ,OAAA,CAAQ;AAAA,OACjB,CAAA;AAGD,MAAA,MAAM,YAAA,GAAe,OAAA,CAAQ,QAAA,CAAS,WAAA,EAAY;AAElD,MAAA,IAAI,YAAA,KAAiB,SAAA,IAAa,YAAA,KAAiB,MAAA,EAAQ;AACzD,QAAA,eAAA,CAAgB,YAAY,MAAM,CAAA;AAAA,MACpC;AAEA,MAAA,IAAI,YAAA,KAAiB,MAAA,IAAU,YAAA,KAAiB,MAAA,IAAU,QAAQ,MAAA,EAAQ;AACxE,QAAA,MAAM,YAAA,GAAe,IAAI,YAAA,EAAa;AACtC,QAAA,MAAM,IAAA,GAAO,YAAA,CAAa,MAAA,CAAO,MAAM,CAAA;AAEvC,QAAA,IAAI,QAAQ,MAAA,EAAQ;AAClB,UAAA,MAAM,YAAA,CAAa,WAAA,CAAY,MAAA,EAAQ,OAAA,CAAQ,MAAM,CAAA;AACrD,UAAA,OAAA,CAAQ,GAAA,CAAI;AAAA,mBAAA,EAAwB,OAAA,CAAQ,MAAM,CAAA,CAAE,CAAA;AAAA,QACtD,CAAA,MAAA,IAAW,iBAAiB,MAAA,EAAQ;AAClC,UAAA,OAAA,CAAQ,IAAI,IAAI,CAAA;AAAA,QAClB;AAAA,MACF;AAGA,MAAA,MAAM,QAAA,GAAW,YAAY,MAAM,CAAA;AACnC,MAAA,OAAA,CAAQ,KAAK,QAAQ,CAAA;AAAA,IACvB,SAAS,KAAA,EAAO;AACd,MAAA,OAAA,CAAQ,KAAA,CAAM,UAAU,KAAA,YAAiB,KAAA,GAAQ,MAAM,OAAA,GAAU,MAAA,CAAO,KAAK,CAAC,CAAA;AAC9E,MAAA,OAAA,CAAQ,IAAA,CAAK,UAAU,eAAe,CAAA;AAAA,IACxC;AAAA,EACF;AACF,CAAA;AAEF,OAAA,CACG,OAAA,CAAQ,MAAM,CAAA,CACd,WAAA,CAAY,4BAA4B,CAAA,CACxC,QAAA,CAAS,QAAA,EAAU,gBAAA,EAAkB,GAAG,CAAA,CACxC,MAAA,CAAO,OAAO,IAAA,KAAiB;AAC9B,EAAA,IAAI;AACF,IAAA,MAAM,KAAA,GAAQ,MAAM,gBAAA,CAAiB,IAAI,CAAA;AAEzC,IAAA,IAAI,KAAA,CAAM,WAAW,CAAA,EAAG;AACtB,MAAA,OAAA,CAAQ,IAAI,qBAAqB,CAAA;AACjC,MAAA;AAAA,IACF;AAEA,IAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,MAAA,EAAS,KAAA,CAAM,MAAM,CAAA;AAAA,CAAkB,CAAA;AACnD,IAAA,KAAA,MAAW,QAAQ,KAAA,EAAO;AACxB,MAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,EAAA,EAAK,IAAI,CAAA,CAAE,CAAA;AAAA,IACzB;AAAA,EACF,SAAS,KAAA,EAAO;AACd,IAAA,OAAA,CAAQ,KAAA,CAAM,UAAU,KAAA,YAAiB,KAAA,GAAQ,MAAM,OAAA,GAAU,MAAA,CAAO,KAAK,CAAC,CAAA;AAC9E,IAAA,OAAA,CAAQ,IAAA,CAAK,UAAU,mBAAmB,CAAA;AAAA,EAC5C;AACF,CAAC,CAAA;AAEH,OAAA,CAAQ,KAAA,EAAM","file":"cli.js","sourcesContent":["#!/usr/bin/env node\n\n/**\n * EvalSense CLI\n */\n\nimport { Command } from \"commander\";\nimport { discoverFromPath, filterFiles } from \"./discovery.js\";\nimport { executeEvalFiles, getExitCode } from \"./executor.js\";\nimport { ConsoleReporter } from \"../report/console-reporter.js\";\nimport { JsonReporter } from \"../report/json-reporter.js\";\nimport { ExitCodes } from \"../core/types.js\";\n\nconst program = new Command();\n\nprogram\n .name(\"evalsense\")\n .description(\"JS-native LLM evaluation framework with Jest-like API\")\n .version(\"0.1.0\");\n\nprogram\n .command(\"run\")\n .description(\"Run evaluation tests\")\n .argument(\"[path]\", \"Path to eval file or directory\", \".\")\n .option(\"-f, --filter <pattern>\", \"Filter tests by name pattern\")\n .option(\"-o, --output <file>\", \"Write JSON report to file\")\n .option(\"-r, --reporter <type>\", \"Reporter type: console, json, both\", \"console\")\n .option(\"-b, --bail\", \"Stop on first failure\")\n .option(\"-t, --timeout <ms>\", \"Test timeout in milliseconds\", \"30000\")\n .action(\n async (\n path: string,\n options: {\n filter?: string;\n output?: string;\n reporter: string;\n bail?: boolean;\n timeout: string;\n }\n ) => {\n try {\n // Discover eval files\n const files = await discoverFromPath(path);\n const filtered = filterFiles(files, options.filter);\n\n if (filtered.length === 0) {\n console.error(\"No eval files found\");\n process.exit(ExitCodes.CONFIGURATION_ERROR);\n }\n\n const consoleReporter = new ConsoleReporter();\n\n // Print header\n consoleReporter.printHeader(filtered.length);\n\n // Execute tests\n const report = await executeEvalFiles(filtered, {\n bail: options.bail,\n timeout: parseInt(options.timeout, 10),\n filter: options.filter,\n });\n\n // Output results\n const reporterType = options.reporter.toLowerCase();\n\n if (reporterType === \"console\" || reporterType === \"both\") {\n consoleReporter.printReport(report);\n }\n\n if (reporterType === \"json\" || reporterType === \"both\" || options.output) {\n const jsonReporter = new JsonReporter();\n const json = jsonReporter.format(report);\n\n if (options.output) {\n await jsonReporter.writeToFile(report, options.output);\n console.log(`\\nReport written to: ${options.output}`);\n } else if (reporterType === \"json\") {\n console.log(json);\n }\n }\n\n // Exit with appropriate code\n const exitCode = getExitCode(report);\n process.exit(exitCode);\n } catch (error) {\n console.error(\"Error:\", error instanceof Error ? error.message : String(error));\n process.exit(ExitCodes.EXECUTION_ERROR);\n }\n }\n );\n\nprogram\n .command(\"list\")\n .description(\"List discovered eval files\")\n .argument(\"[path]\", \"Path to search\", \".\")\n .action(async (path: string) => {\n try {\n const files = await discoverFromPath(path);\n\n if (files.length === 0) {\n console.log(\"No eval files found\");\n return;\n }\n\n console.log(`Found ${files.length} eval file(s):\\n`);\n for (const file of files) {\n console.log(` ${file}`);\n }\n } catch (error) {\n console.error(\"Error:\", error instanceof Error ? error.message : String(error));\n process.exit(ExitCodes.CONFIGURATION_ERROR);\n }\n });\n\nprogram.parse();\n"]}
|