@m4trix/evals 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +20 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +20 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +10 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +10 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +8 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +17 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1704,8 +1704,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1704
1704
|
output,
|
|
1705
1705
|
meta: {
|
|
1706
1706
|
triggerId: task.triggerId,
|
|
1707
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
1707
1708
|
runId: evaluatorRunId,
|
|
1708
1709
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1710
|
+
testCaseId: testCaseItem.id,
|
|
1711
|
+
testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
|
|
1709
1712
|
repetitionId,
|
|
1710
1713
|
repetitionIndex,
|
|
1711
1714
|
repetitionCount,
|
|
@@ -2182,6 +2185,7 @@ var EffectRunner = class {
|
|
|
2182
2185
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
2183
2186
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
2184
2187
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
2188
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
2185
2189
|
const snapshots = [];
|
|
2186
2190
|
for (const job of request.jobs) {
|
|
2187
2191
|
snapshots.push(
|
|
@@ -2189,6 +2193,7 @@ var EffectRunner = class {
|
|
|
2189
2193
|
datasetId: job.datasetId,
|
|
2190
2194
|
evaluatorIds: job.evaluatorIds,
|
|
2191
2195
|
triggerId,
|
|
2196
|
+
triggerTimestamp,
|
|
2192
2197
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
2193
2198
|
globalEvaluationSemaphore: sem,
|
|
2194
2199
|
runConfigName: job.runConfigName,
|
|
@@ -2226,6 +2231,7 @@ var EffectRunner = class {
|
|
|
2226
2231
|
datasetId: request.datasetId,
|
|
2227
2232
|
evaluatorIds: request.evaluatorIds,
|
|
2228
2233
|
triggerId: request.triggerId,
|
|
2234
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
2229
2235
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2230
2236
|
repetitions: request.repetitions,
|
|
2231
2237
|
runConfigName,
|
|
@@ -2253,6 +2259,7 @@ var EffectRunner = class {
|
|
|
2253
2259
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
2254
2260
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
2255
2261
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
2262
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
2256
2263
|
const runId = `run-${randomUUID()}`;
|
|
2257
2264
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
2258
2265
|
const snapshot = {
|
|
@@ -2296,6 +2303,7 @@ var EffectRunner = class {
|
|
|
2296
2303
|
Queue.offer(this.runQueue, {
|
|
2297
2304
|
runId,
|
|
2298
2305
|
triggerId,
|
|
2306
|
+
triggerTimestamp,
|
|
2299
2307
|
datasetId: params.datasetId,
|
|
2300
2308
|
dataset: dataset.dataset,
|
|
2301
2309
|
evaluators: selectedEvaluators,
|
|
@@ -3074,7 +3082,8 @@ function EvalsCliApp({ data, args, runner }) {
|
|
|
3074
3082
|
void runner.runDatasetWith({
|
|
3075
3083
|
datasetId: selectedDataset.id,
|
|
3076
3084
|
evaluatorIds: clampedState.selectedEvaluatorIds,
|
|
3077
|
-
...PROGRAMMATIC_RUN_CONFIG
|
|
3085
|
+
...PROGRAMMATIC_RUN_CONFIG,
|
|
3086
|
+
triggerTimestamp: Date.now()
|
|
3078
3087
|
}).then((snapshot) => {
|
|
3079
3088
|
setRuntimeMessage(
|
|
3080
3089
|
`Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
|