@m4trix/evals 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/dist/cli-simple.cjs +135 -26
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +135 -23
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +56 -12
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +56 -12
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +56 -12
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +17 -1
- package/dist/index.js +56 -12
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -77,8 +77,15 @@ export const myEvaluator = Evaluator.define({
|
|
|
77
77
|
inputSchema,
|
|
78
78
|
outputSchema: S.Unknown,
|
|
79
79
|
scoreSchema: S.Struct({ scores: S.Array(S.Unknown) }),
|
|
80
|
-
}).evaluate(async ({ input, ctx: _ctx, output }) => {
|
|
80
|
+
}).evaluate(async ({ input, ctx: _ctx, output, createError }) => {
|
|
81
81
|
const start = Date.now();
|
|
82
|
+
const value = 85;
|
|
83
|
+
if (value < 50) {
|
|
84
|
+
return createError(
|
|
85
|
+
{ reason: 'score below minimum', value, prompt: input.prompt, output },
|
|
86
|
+
{ label: 'quality-check' },
|
|
87
|
+
);
|
|
88
|
+
}
|
|
82
89
|
const latencyMs = Date.now() - start;
|
|
83
90
|
const minScore =
|
|
84
91
|
typeof output === 'object' &&
|
|
@@ -90,7 +97,7 @@ export const myEvaluator = Evaluator.define({
|
|
|
90
97
|
return {
|
|
91
98
|
scores: [
|
|
92
99
|
percentScore.make(
|
|
93
|
-
{ value
|
|
100
|
+
{ value },
|
|
94
101
|
{ definePassed: (d) => d.value >= (minScore ?? 50) },
|
|
95
102
|
),
|
|
96
103
|
],
|
package/dist/cli-simple.cjs
CHANGED
|
@@ -14,8 +14,6 @@ var ink = require('ink');
|
|
|
14
14
|
var jsxRuntime = require('react/jsx-runtime');
|
|
15
15
|
|
|
16
16
|
var _documentCurrentScript = typeof document !== 'undefined' ? document.currentScript : null;
|
|
17
|
-
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
|
|
18
|
-
|
|
19
17
|
function _interopNamespace(e) {
|
|
20
18
|
if (e && e.__esModule) return e;
|
|
21
19
|
var n = Object.create(null);
|
|
@@ -35,7 +33,7 @@ function _interopNamespace(e) {
|
|
|
35
33
|
}
|
|
36
34
|
|
|
37
35
|
var jitiModule__namespace = /*#__PURE__*/_interopNamespace(jitiModule);
|
|
38
|
-
var
|
|
36
|
+
var React2__namespace = /*#__PURE__*/_interopNamespace(React2);
|
|
39
37
|
|
|
40
38
|
// src/runner/config.ts
|
|
41
39
|
var defaultRunnerConfig = {
|
|
@@ -294,6 +292,8 @@ function createDiffString(expected, actual, diffOptions) {
|
|
|
294
292
|
function formatLogMessage(msg) {
|
|
295
293
|
if (typeof msg === "string")
|
|
296
294
|
return msg;
|
|
295
|
+
if (msg instanceof Error)
|
|
296
|
+
return msg.stack ?? msg.message;
|
|
297
297
|
try {
|
|
298
298
|
if (msg !== null && typeof msg === "object") {
|
|
299
299
|
return JSON.stringify(msg, null, 2);
|
|
@@ -633,6 +633,7 @@ function toNumericScore(value) {
|
|
|
633
633
|
}
|
|
634
634
|
|
|
635
635
|
// src/runner/execution.ts
|
|
636
|
+
var evaluatorErrorLogEntryKey = "__m4trixEvaluatorLogEntry";
|
|
636
637
|
function computeEvaluatorPassed(evaluator, result, scores) {
|
|
637
638
|
const scoresWithPassed = scores.filter((s) => "passed" in s && s.passed !== void 0);
|
|
638
639
|
if (scoresWithPassed.length > 0) {
|
|
@@ -674,13 +675,27 @@ function createArtifactPath(artifactDirectory, datasetId, runId) {
|
|
|
674
675
|
`${datasetId}_${runId}_${nowIsoForFile()}.jsonl`
|
|
675
676
|
);
|
|
676
677
|
}
|
|
677
|
-
function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, completedRef, passedRef, failedRef) {
|
|
678
|
+
function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent, persistenceQueue, updateSnapshot, startedRef, completedRef, passedRef, failedRef) {
|
|
678
679
|
return effect.Effect.gen(function* () {
|
|
679
680
|
const reruns = typeof testCaseItem.testCase.getReruns === "function" ? testCaseItem.testCase.getReruns() : 1;
|
|
680
681
|
const rerunPassed = [];
|
|
681
682
|
for (let r = 0; r < reruns; r++) {
|
|
682
683
|
const evaluatorRunId = `run-${crypto.randomUUID()}`;
|
|
683
684
|
const started = Date.now();
|
|
685
|
+
const startedEvaluations = yield* effect.Ref.modify(startedRef, (n) => [
|
|
686
|
+
n + 1,
|
|
687
|
+
n + 1
|
|
688
|
+
]);
|
|
689
|
+
yield* publishEvent({
|
|
690
|
+
type: "TestCaseStarted",
|
|
691
|
+
runId: task.runId,
|
|
692
|
+
testCaseId: testCaseItem.id,
|
|
693
|
+
testCaseName: testCaseItem.testCase.getName(),
|
|
694
|
+
startedTestCases: startedEvaluations,
|
|
695
|
+
totalTestCases: totalEvaluations,
|
|
696
|
+
rerunIndex: r + 1,
|
|
697
|
+
rerunTotal: reruns
|
|
698
|
+
});
|
|
684
699
|
const evaluatorScores = [];
|
|
685
700
|
let testCaseError;
|
|
686
701
|
const output = readOutput(testCaseItem.testCase);
|
|
@@ -689,20 +704,26 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
689
704
|
if (!evaluateFn) {
|
|
690
705
|
continue;
|
|
691
706
|
}
|
|
707
|
+
const logs = [];
|
|
708
|
+
const logDiff = (expected, actual, options) => {
|
|
709
|
+
logs.push(createDiffLogEntry(expected, actual, options));
|
|
710
|
+
};
|
|
711
|
+
const log = (message, options) => {
|
|
712
|
+
logs.push(createLogEntry(message, options));
|
|
713
|
+
};
|
|
714
|
+
const createError = (message, options) => {
|
|
715
|
+
const entry = createLogEntry(message, options);
|
|
716
|
+
const error = message instanceof Error ? message : new Error(entry.message);
|
|
717
|
+
error[evaluatorErrorLogEntryKey] = entry;
|
|
718
|
+
return error;
|
|
719
|
+
};
|
|
692
720
|
try {
|
|
693
|
-
const logs = [];
|
|
694
|
-
const logDiff = (expected, actual, options) => {
|
|
695
|
-
logs.push(createDiffLogEntry(expected, actual, options));
|
|
696
|
-
};
|
|
697
|
-
const log = (message, options) => {
|
|
698
|
-
logs.push(createLogEntry(message, options));
|
|
699
|
-
};
|
|
700
721
|
const ctx = yield* effect.Effect.promise(
|
|
701
722
|
() => Promise.resolve(evaluator.resolveContext())
|
|
702
723
|
);
|
|
703
724
|
const result = yield* effect.Effect.promise(
|
|
704
|
-
() => Promise.resolve(
|
|
705
|
-
evaluateFn({
|
|
725
|
+
() => Promise.resolve().then(
|
|
726
|
+
() => evaluateFn({
|
|
706
727
|
input: testCaseItem.testCase.getInput(),
|
|
707
728
|
ctx,
|
|
708
729
|
output,
|
|
@@ -712,10 +733,24 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
712
733
|
datasetId: task.datasetId
|
|
713
734
|
},
|
|
714
735
|
logDiff,
|
|
715
|
-
log
|
|
736
|
+
log,
|
|
737
|
+
createError
|
|
716
738
|
})
|
|
717
739
|
)
|
|
718
740
|
);
|
|
741
|
+
if (result instanceof Error) {
|
|
742
|
+
const evaluatorError = result;
|
|
743
|
+
const taggedEntry = evaluatorError[evaluatorErrorLogEntryKey];
|
|
744
|
+
logs.push(taggedEntry ?? createLogEntry(result));
|
|
745
|
+
testCaseError = result.message;
|
|
746
|
+
evaluatorScores.push({
|
|
747
|
+
evaluatorId,
|
|
748
|
+
scores: [],
|
|
749
|
+
passed: false,
|
|
750
|
+
logs: logs.length > 0 ? logs : void 0
|
|
751
|
+
});
|
|
752
|
+
continue;
|
|
753
|
+
}
|
|
719
754
|
const { scores, metrics } = normalizeResult(result);
|
|
720
755
|
const passed2 = computeEvaluatorPassed(evaluator, result, scores);
|
|
721
756
|
evaluatorScores.push({
|
|
@@ -726,11 +761,16 @@ function processOneTestCase(task, testCaseItem, totalEvaluations, publishEvent,
|
|
|
726
761
|
logs: logs.length > 0 ? logs : void 0
|
|
727
762
|
});
|
|
728
763
|
} catch (error) {
|
|
764
|
+
if (error instanceof Error) {
|
|
765
|
+
const taggedEntry = error[evaluatorErrorLogEntryKey];
|
|
766
|
+
logs.push(taggedEntry ?? createLogEntry(error));
|
|
767
|
+
}
|
|
729
768
|
testCaseError = error instanceof Error ? error.message : "Evaluator execution failed";
|
|
730
769
|
evaluatorScores.push({
|
|
731
770
|
evaluatorId,
|
|
732
771
|
scores: [],
|
|
733
|
-
passed: false
|
|
772
|
+
passed: false,
|
|
773
|
+
logs: logs.length > 0 ? logs : void 0
|
|
734
774
|
});
|
|
735
775
|
}
|
|
736
776
|
}
|
|
@@ -801,6 +841,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
801
841
|
);
|
|
802
842
|
const maxConcurrency = Math.max(1, task.maxConcurrency ?? 1);
|
|
803
843
|
const completedRef = yield* effect.Ref.make(0);
|
|
844
|
+
const startedRef = yield* effect.Ref.make(0);
|
|
804
845
|
const passedRef = yield* effect.Ref.make(0);
|
|
805
846
|
const failedRef = yield* effect.Ref.make(0);
|
|
806
847
|
const processTestCase = (testCaseItem) => processOneTestCase(
|
|
@@ -810,6 +851,7 @@ var executeRunTask = (task, publishEvent, persistenceQueue, updateSnapshot) => e
|
|
|
810
851
|
publishEvent,
|
|
811
852
|
persistenceQueue,
|
|
812
853
|
updateSnapshot,
|
|
854
|
+
startedRef,
|
|
813
855
|
completedRef,
|
|
814
856
|
passedRef,
|
|
815
857
|
failedRef
|
|
@@ -1461,7 +1503,7 @@ async function generateDatasetJsonCommandPlain(runner, datasetName) {
|
|
|
1461
1503
|
async function generateDatasetJsonCommandInk(runner, datasetName) {
|
|
1462
1504
|
return new Promise((resolve5, reject) => {
|
|
1463
1505
|
const app = ink.render(
|
|
1464
|
-
|
|
1506
|
+
React2__namespace.default.createElement(GenerateView, {
|
|
1465
1507
|
runner,
|
|
1466
1508
|
datasetName,
|
|
1467
1509
|
onComplete: (err) => {
|
|
@@ -1618,7 +1660,9 @@ function RunView({
|
|
|
1618
1660
|
);
|
|
1619
1661
|
const [runInfo, setRunInfo] = React2.useState(null);
|
|
1620
1662
|
const [testCases, setTestCases] = React2.useState([]);
|
|
1663
|
+
const [startedEvaluations, setStartedEvaluations] = React2.useState(0);
|
|
1621
1664
|
const [completedEvaluations, setCompletedEvaluations] = React2.useState(0);
|
|
1665
|
+
const [runningEvaluations, setRunningEvaluations] = React2.useState([]);
|
|
1622
1666
|
const [summary, setSummary] = React2.useState(null);
|
|
1623
1667
|
const [evaluatorNameById, setEvaluatorNameById] = React2.useState(/* @__PURE__ */ new Map());
|
|
1624
1668
|
const runEval = React2.useCallback(async () => {
|
|
@@ -1655,6 +1699,25 @@ function RunView({
|
|
|
1655
1699
|
let overallScoreCount = 0;
|
|
1656
1700
|
const done = new Promise((resolve5) => {
|
|
1657
1701
|
const unsubscribe = runner.subscribeRunEvents((event) => {
|
|
1702
|
+
if (event.type === "TestCaseStarted") {
|
|
1703
|
+
setStartedEvaluations(event.startedTestCases);
|
|
1704
|
+
setRunningEvaluations((prev) => {
|
|
1705
|
+
const withoutDuplicate = prev.filter(
|
|
1706
|
+
(item) => !(item.testCaseId === event.testCaseId && item.rerunIndex === event.rerunIndex)
|
|
1707
|
+
);
|
|
1708
|
+
return [
|
|
1709
|
+
...withoutDuplicate,
|
|
1710
|
+
{
|
|
1711
|
+
testCaseId: event.testCaseId,
|
|
1712
|
+
name: event.testCaseName,
|
|
1713
|
+
rerunIndex: event.rerunIndex,
|
|
1714
|
+
rerunTotal: event.rerunTotal,
|
|
1715
|
+
startedTestCases: event.startedTestCases,
|
|
1716
|
+
totalTestCases: event.totalTestCases
|
|
1717
|
+
}
|
|
1718
|
+
];
|
|
1719
|
+
});
|
|
1720
|
+
}
|
|
1658
1721
|
if (event.type === "TestCaseProgress") {
|
|
1659
1722
|
for (const item of event.evaluatorScores) {
|
|
1660
1723
|
const numeric = toNumericScoreFromScores(item.scores);
|
|
@@ -1714,12 +1777,18 @@ function RunView({
|
|
|
1714
1777
|
rerunTotal: event.rerunTotal,
|
|
1715
1778
|
durationMs: events.reduce((s, e) => s + e.durationMs, 0),
|
|
1716
1779
|
passed: events.every((e) => e.passed),
|
|
1780
|
+
errorMessage: event.errorMessage,
|
|
1717
1781
|
events,
|
|
1718
1782
|
aggregatedEvaluatorScores,
|
|
1719
1783
|
isAggregated
|
|
1720
1784
|
};
|
|
1721
1785
|
byId.set(event.testCaseId, merged);
|
|
1722
1786
|
setCompletedEvaluations(event.completedTestCases);
|
|
1787
|
+
setRunningEvaluations(
|
|
1788
|
+
(running) => running.filter(
|
|
1789
|
+
(item) => !(item.testCaseId === event.testCaseId && item.rerunIndex === event.rerunIndex)
|
|
1790
|
+
)
|
|
1791
|
+
);
|
|
1723
1792
|
return Array.from(byId.values());
|
|
1724
1793
|
});
|
|
1725
1794
|
}
|
|
@@ -1795,12 +1864,30 @@ function RunView({
|
|
|
1795
1864
|
runInfo.totalTestCases
|
|
1796
1865
|
] })
|
|
1797
1866
|
] }),
|
|
1798
|
-
phase === "running" && /* @__PURE__ */ jsxRuntime.
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1867
|
+
phase === "running" && /* @__PURE__ */ jsxRuntime.jsxs(ink.Box, { flexDirection: "column", marginBottom: 1, children: [
|
|
1868
|
+
/* @__PURE__ */ jsxRuntime.jsx(
|
|
1869
|
+
Spinner,
|
|
1870
|
+
{
|
|
1871
|
+
label: `Evaluations ${completedEvaluations}/${runInfo?.totalTestCases ?? 0} completed \u2022 ${startedEvaluations}/${runInfo?.totalTestCases ?? 0} started`
|
|
1872
|
+
}
|
|
1873
|
+
),
|
|
1874
|
+
runningEvaluations.length > 0 && /* @__PURE__ */ jsxRuntime.jsx(ink.Box, { flexDirection: "column", marginTop: 1, children: runningEvaluations.map((item) => /* @__PURE__ */ jsxRuntime.jsxs(ink.Text, { color: "yellow", children: [
|
|
1875
|
+
"[running ",
|
|
1876
|
+
item.startedTestCases,
|
|
1877
|
+
"/",
|
|
1878
|
+
item.totalTestCases,
|
|
1879
|
+
"] ",
|
|
1880
|
+
item.name,
|
|
1881
|
+
" ",
|
|
1882
|
+
/* @__PURE__ */ jsxRuntime.jsxs(ink.Text, { color: "gray", children: [
|
|
1883
|
+
"(",
|
|
1884
|
+
item.rerunIndex,
|
|
1885
|
+
"/",
|
|
1886
|
+
item.rerunTotal,
|
|
1887
|
+
")"
|
|
1888
|
+
] })
|
|
1889
|
+
] }, `${item.testCaseId}:${item.rerunIndex}`)) })
|
|
1890
|
+
] }),
|
|
1804
1891
|
testCases.length > 0 && /* @__PURE__ */ jsxRuntime.jsx(ink.Box, { flexDirection: "column", marginBottom: 1, children: testCases.map((tc) => /* @__PURE__ */ jsxRuntime.jsxs(ink.Box, { flexDirection: "column", marginBottom: 0, children: [
|
|
1805
1892
|
/* @__PURE__ */ jsxRuntime.jsxs(ink.Text, { children: [
|
|
1806
1893
|
/* @__PURE__ */ jsxRuntime.jsxs(ink.Text, { color: "cyan", children: [
|
|
@@ -1824,8 +1911,13 @@ function RunView({
|
|
|
1824
1911
|
" (",
|
|
1825
1912
|
tc.durationMs,
|
|
1826
1913
|
"ms)"
|
|
1827
|
-
] })
|
|
1914
|
+
] }),
|
|
1915
|
+
tc.errorMessage ? /* @__PURE__ */ jsxRuntime.jsxs(ink.Text, { color: "red", bold: true, children: [
|
|
1916
|
+
" ",
|
|
1917
|
+
"ERROR"
|
|
1918
|
+
] }) : null
|
|
1828
1919
|
] }),
|
|
1920
|
+
tc.errorMessage ? /* @__PURE__ */ jsxRuntime.jsx(ink.Text, { color: "red", children: tc.errorMessage }) : null,
|
|
1829
1921
|
tc.aggregatedEvaluatorScores.map((item) => /* @__PURE__ */ jsxRuntime.jsxs(
|
|
1830
1922
|
ink.Box,
|
|
1831
1923
|
{
|
|
@@ -2270,9 +2362,11 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2270
2362
|
let overallScoreTotal = 0;
|
|
2271
2363
|
let overallScoreSumSq = 0;
|
|
2272
2364
|
let overallScoreCount = 0;
|
|
2365
|
+
let startedCount = 0;
|
|
2273
2366
|
let completedCount = 0;
|
|
2274
2367
|
let totalCount = 0;
|
|
2275
2368
|
let runFinished = false;
|
|
2369
|
+
const inFlightReruns = /* @__PURE__ */ new Set();
|
|
2276
2370
|
const spinnerFrames = ["\u280B", "\u2819", "\u2838", "\u2834", "\u2826", "\u2807"];
|
|
2277
2371
|
let spinnerIndex = 0;
|
|
2278
2372
|
function clearLine() {
|
|
@@ -2296,7 +2390,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2296
2390
|
`\r${colorize(frame, ansi2.cyan)} Running evaluations ${colorize(
|
|
2297
2391
|
`${completedCount}/${totalCount}`,
|
|
2298
2392
|
ansi2.bold
|
|
2299
|
-
)} ${colorize(
|
|
2393
|
+
)} completed ${colorize(`${startedCount}/${totalCount}`, ansi2.bold)} started ${colorize(`(${inFlightReruns.size} running)`, ansi2.dim)}`
|
|
2300
2394
|
);
|
|
2301
2395
|
}
|
|
2302
2396
|
let lastPrintedTestCaseId = null;
|
|
@@ -2304,8 +2398,19 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2304
2398
|
let spinnerTimer;
|
|
2305
2399
|
const done = new Promise((resolve5) => {
|
|
2306
2400
|
const unsubscribe = runner.subscribeRunEvents((event) => {
|
|
2401
|
+
if (event.type === "TestCaseStarted") {
|
|
2402
|
+
startedCount = event.startedTestCases;
|
|
2403
|
+
inFlightReruns.add(`${event.testCaseId}:${event.rerunIndex}`);
|
|
2404
|
+
clearLine();
|
|
2405
|
+
process.stdout.write(
|
|
2406
|
+
`${colorize(`[started ${event.startedTestCases}/${event.totalTestCases}]`, ansi2.cyan)} ${event.testCaseName} ${colorize(`(${event.rerunIndex}/${event.rerunTotal})`, ansi2.cyan)} ${colorize("(running)", ansi2.dim)}
|
|
2407
|
+
`
|
|
2408
|
+
);
|
|
2409
|
+
drawSpinner();
|
|
2410
|
+
}
|
|
2307
2411
|
if (event.type === "TestCaseProgress") {
|
|
2308
2412
|
completedCount = event.completedTestCases;
|
|
2413
|
+
inFlightReruns.delete(`${event.testCaseId}:${event.rerunIndex}`);
|
|
2309
2414
|
const numericScores = event.evaluatorScores.map((item) => toNumericScoreFromScores(item.scores)).filter((item) => item !== void 0);
|
|
2310
2415
|
const averageScore = numericScores.length > 0 ? numericScores.reduce((sum, value) => sum + value, 0) / numericScores.length : void 0;
|
|
2311
2416
|
const testCaseId = event.testCaseId;
|
|
@@ -2363,9 +2468,13 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2363
2468
|
0
|
|
2364
2469
|
);
|
|
2365
2470
|
const lines = [];
|
|
2471
|
+
const statusSuffix = event.errorMessage ? ` ${colorize("ERROR", `${ansi2.bold}${ansi2.red}`)}` : "";
|
|
2366
2472
|
lines.push(
|
|
2367
|
-
`${colorize(`[${event.completedTestCases}/${event.totalTestCases}]`, ansi2.cyan)} ${event.testCaseName} ${colorize(`(${event.rerunIndex}/${event.rerunTotal})`, ansi2.cyan)} ${colorize(`(${durationMs}ms)`, ansi2.dim)}`
|
|
2473
|
+
`${colorize(`[${event.completedTestCases}/${event.totalTestCases}]`, ansi2.cyan)} ${event.testCaseName} ${colorize(`(${event.rerunIndex}/${event.rerunTotal})`, ansi2.cyan)} ${colorize(`(${durationMs}ms)`, ansi2.dim)}${statusSuffix}`
|
|
2368
2474
|
);
|
|
2475
|
+
if (event.errorMessage) {
|
|
2476
|
+
lines.push(colorize(event.errorMessage, ansi2.red));
|
|
2477
|
+
}
|
|
2369
2478
|
for (const item of aggregatedScores) {
|
|
2370
2479
|
const name = evaluatorNameById.get(item.evaluatorId) ?? item.evaluatorId;
|
|
2371
2480
|
lines.push(
|
|
@@ -2509,7 +2618,7 @@ async function runSimpleEvalCommandPlain(runner, datasetName, evaluatorPattern)
|
|
|
2509
2618
|
async function runSimpleEvalCommandInk(runner, datasetName, evaluatorPattern) {
|
|
2510
2619
|
return new Promise((resolve5, reject) => {
|
|
2511
2620
|
const app = ink.render(
|
|
2512
|
-
|
|
2621
|
+
React2__namespace.createElement(RunView, {
|
|
2513
2622
|
runner,
|
|
2514
2623
|
datasetName,
|
|
2515
2624
|
evaluatorPattern,
|