@m4trix/evals 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +18 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +18 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +8 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +8 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +6 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -274,6 +274,11 @@ interface EvalMiddleware<TCtx> {
|
|
|
274
274
|
interface EvaluateMeta {
|
|
275
275
|
/** Identifier of the trigger that started the run (for example, a CLI invocation). */
|
|
276
276
|
triggerId: string;
|
|
277
|
+
/**
|
|
278
|
+
* Milliseconds since Unix epoch when the run was triggered (e.g. `Date.now()` at CLI start, or when
|
|
279
|
+
* `runDatasetWith` / `runDatasetJobsWithSharedConcurrency` was invoked). Shared across all jobs in a batch.
|
|
280
|
+
*/
|
|
281
|
+
triggerTimestamp: number;
|
|
277
282
|
/**
|
|
278
283
|
* Identifier of the current test-case execution shared across all evaluators
|
|
279
284
|
* for this specific test-case run.
|
|
@@ -593,6 +598,10 @@ interface RunDatasetRequest {
|
|
|
593
598
|
* When omitted, the runner generates one in the format `trg-[uuid]`.
|
|
594
599
|
*/
|
|
595
600
|
triggerId?: string;
|
|
601
|
+
/**
|
|
602
|
+
* When the run was triggered (`Date.now()` ms); defaults to now. Forwarded as `meta.triggerTimestamp`.
|
|
603
|
+
*/
|
|
604
|
+
triggerTimestamp?: number;
|
|
596
605
|
datasetId: string;
|
|
597
606
|
evaluatorIds: ReadonlyArray<string>;
|
|
598
607
|
/** RunConfig name surfaced on evaluator `meta` (from the job or `PROGRAMMATIC_RUN_CONFIG`). */
|
|
@@ -697,6 +706,10 @@ interface RunDatasetJobsWithSharedConcurrencyRequest {
|
|
|
697
706
|
jobs: ReadonlyArray<RunDatasetJob>;
|
|
698
707
|
globalConcurrency: number;
|
|
699
708
|
triggerId?: string;
|
|
709
|
+
/**
|
|
710
|
+
* When the batch was triggered (`Date.now()` ms); defaults to now. CLI sets this once at command start.
|
|
711
|
+
*/
|
|
712
|
+
triggerTimestamp?: number;
|
|
700
713
|
/** Applied to every job in this batch (e.g. CLI `--experiment`). */
|
|
701
714
|
experimentName?: string;
|
|
702
715
|
}
|
package/dist/index.js
CHANGED
|
@@ -1566,6 +1566,7 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1566
1566
|
output,
|
|
1567
1567
|
meta: {
|
|
1568
1568
|
triggerId: task.triggerId,
|
|
1569
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
1569
1570
|
runId: evaluatorRunId,
|
|
1570
1571
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1571
1572
|
testCaseId: testCaseItem.id,
|
|
@@ -2046,6 +2047,7 @@ var EffectRunner = class {
|
|
|
2046
2047
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
2047
2048
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
2048
2049
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
2050
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
2049
2051
|
const snapshots = [];
|
|
2050
2052
|
for (const job of request.jobs) {
|
|
2051
2053
|
snapshots.push(
|
|
@@ -2053,6 +2055,7 @@ var EffectRunner = class {
|
|
|
2053
2055
|
datasetId: job.datasetId,
|
|
2054
2056
|
evaluatorIds: job.evaluatorIds,
|
|
2055
2057
|
triggerId,
|
|
2058
|
+
triggerTimestamp,
|
|
2056
2059
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
2057
2060
|
globalEvaluationSemaphore: sem,
|
|
2058
2061
|
runConfigName: job.runConfigName,
|
|
@@ -2090,6 +2093,7 @@ var EffectRunner = class {
|
|
|
2090
2093
|
datasetId: request.datasetId,
|
|
2091
2094
|
evaluatorIds: request.evaluatorIds,
|
|
2092
2095
|
triggerId: request.triggerId,
|
|
2096
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
2093
2097
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2094
2098
|
repetitions: request.repetitions,
|
|
2095
2099
|
runConfigName,
|
|
@@ -2117,6 +2121,7 @@ var EffectRunner = class {
|
|
|
2117
2121
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
2118
2122
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
2119
2123
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
2124
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
2120
2125
|
const runId = `run-${randomUUID()}`;
|
|
2121
2126
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
2122
2127
|
const snapshot = {
|
|
@@ -2160,6 +2165,7 @@ var EffectRunner = class {
|
|
|
2160
2165
|
Queue.offer(this.runQueue, {
|
|
2161
2166
|
runId,
|
|
2162
2167
|
triggerId,
|
|
2168
|
+
triggerTimestamp,
|
|
2163
2169
|
datasetId: params.datasetId,
|
|
2164
2170
|
dataset: dataset.dataset,
|
|
2165
2171
|
evaluators: selectedEvaluators,
|