@m4trix/evals 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli-simple.cjs +19 -6
- package/dist/cli-simple.cjs.map +1 -1
- package/dist/cli-simple.js +19 -6
- package/dist/cli-simple.js.map +1 -1
- package/dist/cli.cjs +9 -1
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +9 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +7 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +15 -0
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -274,6 +274,13 @@ interface EvalMiddleware<TCtx> {
|
|
|
274
274
|
interface EvaluateMeta {
|
|
275
275
|
/** Identifier of the trigger that started the run (for example, a CLI invocation). */
|
|
276
276
|
triggerId: string;
|
|
277
|
+
/**
|
|
278
|
+
* Milliseconds since Unix epoch when the run was triggered (e.g. `Date.now()` at CLI start, or when
|
|
279
|
+
* `runDatasetWith` / `runDatasetJobsWithSharedConcurrency` was invoked). Shared across all jobs in a batch.
|
|
280
|
+
*/
|
|
281
|
+
triggerTimestamp: number;
|
|
282
|
+
/** Same instant as {@link triggerTimestamp}, as an ISO 8601 string (`toISOString()`). */
|
|
283
|
+
triggeredAt: string;
|
|
277
284
|
/**
|
|
278
285
|
* Identifier of the current test-case execution shared across all evaluators
|
|
279
286
|
* for this specific test-case run.
|
|
@@ -593,6 +600,10 @@ interface RunDatasetRequest {
|
|
|
593
600
|
* When omitted, the runner generates one in the format `trg-[uuid]`.
|
|
594
601
|
*/
|
|
595
602
|
triggerId?: string;
|
|
603
|
+
/**
|
|
604
|
+
* When the run was triggered (`Date.now()` ms); defaults to now. Forwarded as `meta.triggerTimestamp`.
|
|
605
|
+
*/
|
|
606
|
+
triggerTimestamp?: number;
|
|
596
607
|
datasetId: string;
|
|
597
608
|
evaluatorIds: ReadonlyArray<string>;
|
|
598
609
|
/** RunConfig name surfaced on evaluator `meta` (from the job or `PROGRAMMATIC_RUN_CONFIG`). */
|
|
@@ -697,6 +708,10 @@ interface RunDatasetJobsWithSharedConcurrencyRequest {
|
|
|
697
708
|
jobs: ReadonlyArray<RunDatasetJob>;
|
|
698
709
|
globalConcurrency: number;
|
|
699
710
|
triggerId?: string;
|
|
711
|
+
/**
|
|
712
|
+
* When the batch was triggered (`Date.now()` ms); defaults to now. CLI sets this once at command start.
|
|
713
|
+
*/
|
|
714
|
+
triggerTimestamp?: number;
|
|
700
715
|
/** Applied to every job in this batch (e.g. CLI `--experiment`). */
|
|
701
716
|
experimentName?: string;
|
|
702
717
|
}
|
package/dist/index.js
CHANGED
|
@@ -1566,6 +1566,8 @@ function processOneEvaluation(task, unit, totalEvaluations, publishEvent, persis
|
|
|
1566
1566
|
output,
|
|
1567
1567
|
meta: {
|
|
1568
1568
|
triggerId: task.triggerId,
|
|
1569
|
+
triggerTimestamp: task.triggerTimestamp,
|
|
1570
|
+
triggeredAt: new Date(task.triggerTimestamp).toISOString(),
|
|
1569
1571
|
runId: evaluatorRunId,
|
|
1570
1572
|
datasetName: task.dataset.getDisplayLabel(),
|
|
1571
1573
|
testCaseId: testCaseItem.id,
|
|
@@ -2046,6 +2048,7 @@ var EffectRunner = class {
|
|
|
2046
2048
|
const globalConcurrency = Math.max(1, request.globalConcurrency);
|
|
2047
2049
|
const sem = Effect.unsafeMakeSemaphore(globalConcurrency);
|
|
2048
2050
|
const triggerId = request.triggerId ?? `trg-${randomUUID()}`;
|
|
2051
|
+
const triggerTimestamp = request.triggerTimestamp ?? Date.now();
|
|
2049
2052
|
const snapshots = [];
|
|
2050
2053
|
for (const job of request.jobs) {
|
|
2051
2054
|
snapshots.push(
|
|
@@ -2053,6 +2056,7 @@ var EffectRunner = class {
|
|
|
2053
2056
|
datasetId: job.datasetId,
|
|
2054
2057
|
evaluatorIds: job.evaluatorIds,
|
|
2055
2058
|
triggerId,
|
|
2059
|
+
triggerTimestamp,
|
|
2056
2060
|
maxConcurrency: this.config.maxConcurrency ?? 1,
|
|
2057
2061
|
globalEvaluationSemaphore: sem,
|
|
2058
2062
|
runConfigName: job.runConfigName,
|
|
@@ -2090,6 +2094,7 @@ var EffectRunner = class {
|
|
|
2090
2094
|
datasetId: request.datasetId,
|
|
2091
2095
|
evaluatorIds: request.evaluatorIds,
|
|
2092
2096
|
triggerId: request.triggerId,
|
|
2097
|
+
triggerTimestamp: request.triggerTimestamp ?? Date.now(),
|
|
2093
2098
|
maxConcurrency: request.concurrency ?? this.config.maxConcurrency ?? 1,
|
|
2094
2099
|
repetitions: request.repetitions,
|
|
2095
2100
|
runConfigName,
|
|
@@ -2117,6 +2122,7 @@ var EffectRunner = class {
|
|
|
2117
2122
|
const totalEvaluations = selectedTestCases.length * repetitions;
|
|
2118
2123
|
const runConfigTags = [...params.runConfigTags ?? []];
|
|
2119
2124
|
const triggerId = params.triggerId ?? `trg-${randomUUID()}`;
|
|
2125
|
+
const triggerTimestamp = params.triggerTimestamp ?? Date.now();
|
|
2120
2126
|
const runId = `run-${randomUUID()}`;
|
|
2121
2127
|
const artifactPath = createArtifactPath(this.config.artifactDirectory, params.datasetId, runId);
|
|
2122
2128
|
const snapshot = {
|
|
@@ -2160,6 +2166,7 @@ var EffectRunner = class {
|
|
|
2160
2166
|
Queue.offer(this.runQueue, {
|
|
2161
2167
|
runId,
|
|
2162
2168
|
triggerId,
|
|
2169
|
+
triggerTimestamp,
|
|
2163
2170
|
datasetId: params.datasetId,
|
|
2164
2171
|
dataset: dataset.dataset,
|
|
2165
2172
|
evaluators: selectedEvaluators,
|