@m4trix/evals 0.28.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +20 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +20 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +10 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +10 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +8 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +17 -0
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.cjs
CHANGED
|
@@ -1731,8 +1731,11 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1731
1731
|
output,
|
|
1732
1732
|
meta: {
|
|
1733
1733
|
triggerId: task.triggerId,
|
|
1734
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
1734
1735
|
runId: evaluatorRunId,
|
|
1735
1736
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1737
|
+
testCaseId: testCaseItem.id,
|
|
1738
|
+
testCaseName: getTestCaseDisplayLabel(testCaseItem.testCase),
|
|
1736
1739
|
repetitionId,
|
|
1737
1740
|
repetitionIndex,
|
|
1738
1741
|
repetitionCount,
|
|
@@ -2209,6 +2212,7 @@ var EffectRunner = class {
|
|
|
2209
2212
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
2210
2213
|
const sem = effect.Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
2211
2214
|
const triggerId = request.triggerId ?? `trg-${crypto.randomUUID()}`;
|
|
2215
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
2212
2216
|
const snapshots = [];
|
|
2213
2217
|
for (const job of request.jobs) {
|
|
2214
2218
|
snapshots.push(
|
|
@@ -2216,6 +2220,7 @@ var EffectRunner = class {
|
|
|
2216
2220
|
datasetId: job.datasetId,
|
|
2217
2221
|
evaluatorIds: job.evaluatorIds,
|
|
2218
2222
|
triggerId,
|
|
2223
|
+
triggerTimestamp,
|
|
2219
2224
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
2220
2225
|
globalEvaluationSemaphore: sem,
|
|
2221
2226
|
runConfigName: job.runConfigName,
|
|
@@ -2253,6 +2258,7 @@ var EffectRunner = class {
|
|
|
2253
2258
|
datasetId: request.datasetId,
|
|
2254
2259
|
evaluatorIds: request.evaluatorIds,
|
|
2255
2260
|
triggerId: request.triggerId,
|
|
2261
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
2256
2262
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2257
2263
|
repetitions: request.repetitions,
|
|
2258
2264
|
runConfigName,
|
|
@@ -2280,6 +2286,7 @@ var EffectRunner = class {
|
|
|
2280
2286
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
2281
2287
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
2282
2288
|
const triggerId = params.triggerId ?? `trg-${crypto.randomUUID()}`;
|
|
2289
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
2283
2290
|
const runId = `run-${crypto.randomUUID()}`;
|
|
2284
2291
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
2285
2292
|
const snapshot = {
|
|
@@ -2323,6 +2330,7 @@ var EffectRunner = class {
|
|
|
2323
2330
|
effect.Queue.offer(this.runQueue, {
|
|
2324
2331
|
runId,
|
|
2325
2332
|
triggerId,
|
|
2333
|
+
triggerTimestamp,
|
|
2326
2334
|
datasetId: params.datasetId,
|
|
2327
2335
|
dataset: dataset.dataset,
|
|
2328
2336
|
evaluators: selectedEvaluators,
|
|
@@ -3101,7 +3109,8 @@ function EvalsCliApp({ data, args, runner }) {
|
|
|
3101
3109
|
void runner.runDatasetWith({
|
|
3102
3110
|
datasetId: selectedDataset.id,
|
|
3103
3111
|
evaluatorIds: clampedState.selectedEvaluatorIds,
|
|
3104
|
-
...PROGRAMMATIC_RUN_CONFIG
|
|
3112
|
+
...PROGRAMMATIC_RUN_CONFIG,
|
|
3113
|
+
triggerTimestamp: Date.now()
|
|
3105
3114
|
}).then((snapshot) => {
|
|
3106
3115
|
setRuntimeMessage(
|
|
3107
3116
|
`Started ${snapshot.runId} on ${selectedDataset.name} (${snapshot.totalTestCases} cases).`
|