@m4trix/evals 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +19 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +19 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +9 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +9 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +7 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +15 -0
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1704,6 +1704,8 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1704
1704
|
output,
|
|
1705
1705
|
meta: {
|
|
1706
1706
|
triggerId: task.triggerId,
|
|
1707
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
1708
|
+
triggeredAt: new Date(task.triggerTimestamp).toISOString(),
|
|
1707
1709
|
runId: evaluatorRunId,
|
|
1708
1710
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1709
1711
|
testCaseId: testCaseItem.id,
|
|
@@ -2184,6 +2186,7 @@ var EffectRunner = class {
|
|
|
2184
2186
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
2185
2187
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
2186
2188
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
2189
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
2187
2190
|
const snapshots = [];
|
|
2188
2191
|
for (const job of request.jobs) {
|
|
2189
2192
|
snapshots.push(
|
|
@@ -2191,6 +2194,7 @@ var EffectRunner = class {
|
|
|
2191
2194
|
datasetId: job.datasetId,
|
|
2192
2195
|
evaluatorIds: job.evaluatorIds,
|
|
2193
2196
|
triggerId,
|
|
2197
|
+
triggerTimestamp,
|
|
2194
2198
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
2195
2199
|
globalEvaluationSemaphore: sem,
|
|
2196
2200
|
runConfigName: job.runConfigName,
|
|
@@ -2228,6 +2232,7 @@ var EffectRunner = class {
|
|
|
2228
2232
|
datasetId: request.datasetId,
|
|
2229
2233
|
evaluatorIds: request.evaluatorIds,
|
|
2230
2234
|
triggerId: request.triggerId,
|
|
2235
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
2231
2236
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2232
2237
|
repetitions: request.repetitions,
|
|
2233
2238
|
runConfigName,
|
|
@@ -2255,6 +2260,7 @@ var EffectRunner = class {
|
|
|
2255
2260
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
2256
2261
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
2257
2262
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
2263
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
2258
2264
|
const runId = `run-${randomUUID()}`;
|
|
2259
2265
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
2260
2266
|
const snapshot = {
|
|
@@ -2298,6 +2304,7 @@ var EffectRunner = class {
|
|
|
2298
2304
|
Queue.offer(this.runQueue, {
|
|
2299
2305
|
runId,
|
|
2300
2306
|
triggerId,
|
|
2307
|
+
triggerTimestamp,
|
|
2301
2308
|
datasetId: params.datasetId,
|
|
2302
2309
|
dataset: dataset.dataset,
|
|
2303
2310
|
evaluators: selectedEvaluators,
|
|
@@ -3076,7 +3083,8 @@ function EvalsCliApp({ data, args, runner }) {
|
|
|
3076
3083
|
void runner.runDatasetWith({
|
|
3077
3084
|
datasetId: selectedDataset.id,
|
|
3078
3085
|
evaluatorIds: clampedState.selectedEvaluatorIds,
|
|
3079
|
-
...PROGRAMMATIC_RUN_CONFIG
|
|
3086
|
+
...PROGRAMMATIC_RUN_CONFIG,
|
|
3087
|
+
triggerTimestamp: Date.now()
|
|
3080
3088
|
}).then((snapshot) => {
|
|
3081
3089
|
setRuntimeMessage(
|
|
3082
3090
|
`Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
|