@open-insight/eval 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1103 @@
1
+ import { t as __exportAll$1 } from "./rolldown-runtime-D7D4PA-g.mjs";
2
+ import { Context, Data, Effect, Fiber, FileSystem, Latch, Layer, Match, Path, Pull, Queue, Ref, Schema, Semaphore, Stream, pipe } from "effect";
3
+ import { Agent, Sandbox } from "@open-insight/core/internal";
4
+ import picomatch from "picomatch";
5
+ import { ChildProcess } from "effect/unstable/process";
6
+ import { ChildProcessSpawner } from "effect/unstable/process/ChildProcessSpawner";
7
+ import { NodeHttpClient, NodeServices } from "@effect/platform-node";
8
+ //#region src/benchmark/error.ts
9
+ var InitError$2 = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
10
+ const BenchmarkErrorReason = Schema.Union([InitError$2]);
11
+ var BenchmarkError = class BenchmarkError extends Schema.TaggedErrorClass()("BenchmarkError", { reason: BenchmarkErrorReason }) {
12
+ static init = (cause) => new BenchmarkError({ reason: new InitError$2({ cause }) });
13
+ };
14
+ //#endregion
15
+ //#region src/utils/type.ts
16
+ function assertNonNull(val) {
17
+ if (val === null || val === void 0) throw new Error("Value cannot be null or undefined");
18
+ }
19
+ //#endregion
20
+ //#region src/benchmark/build.ts
21
+ const MetadataSchema$1 = Schema.Struct({
22
+ name: Schema.String,
23
+ description: Schema.String
24
+ });
25
+ const init$4 = (metadata) => Effect.succeed({ metadata });
26
+ const withTasks = (loader) => (build) => Effect.fn(function* () {
27
+ const tasks = yield* loader.pipe(Effect.mapError(BenchmarkError.init));
28
+ return {
29
+ ...yield* build,
30
+ tasks
31
+ };
32
+ })();
33
+ const build$6 = (build) => Effect.map(build, ({ tasks, metadata }) => {
34
+ assertNonNull(metadata);
35
+ assertNonNull(tasks);
36
+ return {
37
+ tasks,
38
+ metadata
39
+ };
40
+ });
41
+ //#endregion
42
+ //#region src/benchmark/index.ts
43
+ var benchmark_exports = /* @__PURE__ */ __exportAll$1({
44
+ BenchmarkError: () => BenchmarkError,
45
+ BenchmarkErrorReason: () => BenchmarkErrorReason,
46
+ InitError: () => InitError$2,
47
+ MetadataSchema: () => MetadataSchema$1,
48
+ build: () => build$6,
49
+ init: () => init$4,
50
+ withTasks: () => withTasks
51
+ });
52
+ //#endregion
53
+ //#region src/task/build.ts
54
+ const MetadataSchema = Schema.Struct({
55
+ name: Schema.String,
56
+ description: Schema.optional(Schema.String)
57
+ });
58
+ const init$3 = (metadata) => Effect.succeed({ metadata });
59
+ const withPrompt = (prompt) => (build) => Effect.map(build, (t) => ({
60
+ ...t,
61
+ prompt
62
+ }));
63
+ const withContext = (context) => (build) => Effect.map(build, (t) => ({
64
+ ...t,
65
+ context
66
+ }));
67
+ const withSnapshot = (snapshot) => (build) => Effect.map(build, (t) => ({
68
+ ...t,
69
+ snapshot
70
+ }));
71
+ const withGrader = (name, exec) => (build) => Effect.map(build, (t) => ({
72
+ ...t,
73
+ graders: Object.assign({}, t.graders, { [name]: exec })
74
+ }));
75
+ const withResources = (resources) => (build) => Effect.map(build, (t) => ({
76
+ ...t,
77
+ resources
78
+ }));
79
+ const build$5 = (build) => build;
80
+ //#endregion
81
+ //#region src/task/error.ts
82
+ var TaskLoadError$1 = class extends Schema.TaggedErrorClass()("TaskLoadError", { cause: Schema.Defect() }) {};
83
+ var GradeExecError = class extends Schema.TaggedErrorClass()("GradeError", {
84
+ name: Schema.String,
85
+ cause: Schema.Defect()
86
+ }) {};
87
+ var InvalidGradeResultError = class extends Schema.TaggedErrorClass()("InvalidGradeResultError", { cause: Schema.Defect() }) {};
88
+ const TaskErrorReason = Schema.Union([
89
+ TaskLoadError$1,
90
+ GradeExecError,
91
+ InvalidGradeResultError
92
+ ]);
93
+ var TaskError = class TaskError extends Schema.TaggedErrorClass()("TaskError", { reason: TaskErrorReason }) {
94
+ static load = (cause) => new TaskError({ reason: new TaskLoadError$1({ cause }) });
95
+ static gradeExec = (name) => (cause) => new TaskError({ reason: new GradeExecError({
96
+ name,
97
+ cause
98
+ }) });
99
+ static gradeResult = (cause) => new TaskError({ reason: new InvalidGradeResultError({ cause }) });
100
+ };
101
+ //#endregion
102
+ //#region src/task/grade/builtin/command.ts
103
+ const bash = (bash) => async (ctx) => {
104
+ return await ctx.$({
105
+ command: "bash",
106
+ args: ["-lc", bash]
107
+ });
108
+ };
109
+ //#endregion
110
+ //#region src/task/grade/index.ts
111
+ var grade_exports = /* @__PURE__ */ __exportAll$1({
112
+ ResultSchema: () => ResultSchema,
113
+ bash: () => bash,
114
+ run: () => run$2
115
+ });
116
+ const ResultSchema = Schema.Record(Schema.String, Schema.Json);
117
+ /**
118
+ * Run a collection of graders with the given context.
119
+ */
120
+ const run$2 = (map) => Effect.fn(function* (ctx) {
121
+ const result = {};
122
+ for (const [name, exec] of Object.entries(map)) result[name] = yield* Effect.tryPromise({
123
+ try: () => exec(ctx),
124
+ catch: TaskError.gradeExec(name)
125
+ });
126
+ return yield* Schema.decodeUnknownEffect(ResultSchema)(result).pipe(Effect.mapError(TaskError.gradeResult));
127
+ });
128
+ //#endregion
129
+ //#region src/task/load/file.ts
130
+ var file_exports = /* @__PURE__ */ __exportAll$1({ fromDir: () => fromDir });
131
+ const fromDir = ({ dir, glob = "**/index.ts" }) => Effect.gen(function* () {
132
+ const fs = yield* FileSystem.FileSystem;
133
+ const path = yield* Path.Path;
134
+ const entries = yield* fs.readDirectory(dir, { recursive: true }).pipe(Effect.mapError(TaskError.load));
135
+ const matcher = picomatch(glob);
136
+ return entries.filter((entry) => matcher(path.relative(dir, entry))).map((entry) => path.join(dir, entry)).map((taskFile) => Effect.gen(function* () {
137
+ const context = Sandbox.Context.makeDir(path.dirname(taskFile));
138
+ const fileUrl = yield* path.toFileUrl(taskFile).pipe(Effect.mapError(TaskError.load));
139
+ const module = yield* Effect.tryPromise({
140
+ try: () => import(fileUrl.href),
141
+ catch: TaskError.load
142
+ });
143
+ if (module.default === null) return yield* Effect.fail(TaskError.load(/* @__PURE__ */ new Error(`Loading task from file requires a default export, but the module at ${taskFile} does not export any.`)));
144
+ return {
145
+ ...module.default,
146
+ context
147
+ };
148
+ }));
149
+ });
150
+ //#endregion
151
+ //#region ../utils/dist/rolldown-runtime-D7D4PA-g.mjs
152
+ var __defProp = Object.defineProperty;
153
+ var __exportAll = (all, no_symbols) => {
154
+ let target = {};
155
+ for (var name in all) __defProp(target, name, {
156
+ get: all[name],
157
+ enumerable: true
158
+ });
159
+ if (!no_symbols) __defProp(target, Symbol.toStringTag, { value: "Module" });
160
+ return target;
161
+ };
162
+ //#endregion
163
+ //#region ../utils/dist/index.mjs
164
+ var spawn_exports = /* @__PURE__ */ __exportAll({
165
+ SpawnError: () => SpawnError,
166
+ SpawnExitCodeError: () => SpawnExitCodeError,
167
+ SpawnService: () => SpawnService
168
+ });
169
+ var SpawnExitCodeError = class extends Data.TaggedError("SpawnExitCodeError") {
170
+ get message() {
171
+ return `process exited with code ${this.exitCode}`;
172
+ }
173
+ };
174
+ var SpawnError = class SpawnError extends Data.TaggedError("SpawnError") {
175
+ get message() {
176
+ return this.reason.message;
177
+ }
178
+ static platform = (err) => new SpawnError({ reason: err });
179
+ static exit = ({ exitCode, stdout, stderr }) => new SpawnError({ reason: new SpawnExitCodeError({
180
+ exitCode,
181
+ stdout,
182
+ stderr
183
+ }) });
184
+ };
185
+ var SpawnService = class SpawnService extends Context.Service()("packages/utils/SpawnService") {
186
+ static layer = Layer.effect(SpawnService, Effect.gen(function* () {
187
+ const spawner = yield* ChildProcessSpawner;
188
+ const streamText = (stream) => Stream.mkString(Stream.decodeText(stream));
189
+ const spawn = Effect.fn(function* (command) {
190
+ const handle = yield* spawner.spawn(command).pipe(Effect.mapError(SpawnError.platform));
191
+ const exitCode = yield* handle.exitCode.pipe(Effect.mapError(SpawnError.platform));
192
+ if (exitCode !== 0) {
193
+ const output = yield* Effect.all({
194
+ stdout: streamText(handle.stdout),
195
+ stderr: streamText(handle.stderr)
196
+ }, { concurrency: "unbounded" }).pipe(Effect.mapError(SpawnError.platform));
197
+ return yield* SpawnError.exit({
198
+ exitCode,
199
+ ...output
200
+ });
201
+ }
202
+ return handle;
203
+ });
204
+ const streamString = (command, options) => spawn(command).pipe(Effect.map((handle) => Stream.decodeText(options?.includeStderr === true ? handle.all : handle.stdout).pipe(Stream.mapError(SpawnError.platform))), Stream.unwrap);
205
+ const streamLines = (command, options) => Stream.splitLines(streamString(command, options));
206
+ const exitCode = (command) => spawn(command).pipe(Effect.scoped, Effect.flatMap((handle) => handle.exitCode.pipe(Effect.mapError(SpawnError.platform))));
207
+ const string = (command, options) => Stream.mkString(streamString(command, options));
208
+ const lines = (command, options) => Stream.runCollect(streamLines(command, options));
209
+ return {
210
+ spawn,
211
+ exitCode,
212
+ streamString,
213
+ streamLines,
214
+ lines,
215
+ string
216
+ };
217
+ }));
218
+ };
219
+ var countdown_exports = /* @__PURE__ */ __exportAll({ make: () => make });
220
+ const make = Effect.fn(function* (count) {
221
+ const countDown = yield* Ref.make(count);
222
+ const latch = yield* Latch.make();
223
+ return {
224
+ open: Effect.gen(function* () {
225
+ if ((yield* Ref.updateAndGet(countDown, (c) => c - 1)) <= 0) yield* latch.open;
226
+ }),
227
+ await: latch.await
228
+ };
229
+ });
230
+ //#endregion
231
+ //#region src/task/load/git.ts
232
+ var git_exports = /* @__PURE__ */ __exportAll$1({ withGitRepo: () => withGitRepo });
233
+ const withGitRepo = (repoURL) => Effect.fn(function* (exec) {
234
+ const fs = yield* FileSystem.FileSystem;
235
+ const spawner = yield* spawn_exports.SpawnService;
236
+ const repoPath = yield* fs.makeTempDirectoryScoped({ prefix: "open-insight-task-" });
237
+ const clone = ChildProcess.make`git clone --depth 1 ${repoURL} ${repoPath}`;
238
+ yield* spawner.exitCode(clone);
239
+ return yield* exec(repoPath);
240
+ });
241
+ //#endregion
242
+ //#region src/task/load/iter.ts
243
+ const toTasks = (tasks) => Array.from(tasks);
244
+ const fromArray = (tasks) => Effect.succeed(toTasks(tasks));
245
+ const fromIterable = (iterable) => Effect.sync(() => toTasks(iterable));
246
+ const fromAsyncIterable = (iterable) => Effect.tryPromise({
247
+ try: () => Array.fromAsync(iterable),
248
+ catch: TaskError.load
249
+ }).pipe(Effect.map((tasks) => tasks));
250
+ const fromStream = (stream) => stream.pipe(Stream.runCollect, Effect.map(toTasks));
251
+ //#endregion
252
+ //#region src/task/load/index.ts
253
+ var load_exports = /* @__PURE__ */ __exportAll$1({
254
+ File: () => file_exports,
255
+ Git: () => git_exports,
256
+ fromArray: () => fromArray,
257
+ fromAsyncIterable: () => fromAsyncIterable,
258
+ fromIterable: () => fromIterable,
259
+ fromStream: () => fromStream
260
+ });
261
+ //#endregion
262
+ //#region src/task/index.ts
263
+ var task_exports$1 = /* @__PURE__ */ __exportAll$1({
264
+ Grade: () => grade_exports,
265
+ GradeExecError: () => GradeExecError,
266
+ InvalidGradeResultError: () => InvalidGradeResultError,
267
+ Load: () => load_exports,
268
+ MetadataSchema: () => MetadataSchema,
269
+ TaskError: () => TaskError,
270
+ TaskErrorReason: () => TaskErrorReason,
271
+ TaskLoadError: () => TaskLoadError$1,
272
+ build: () => build$5,
273
+ init: () => init$3,
274
+ withContext: () => withContext,
275
+ withGrader: () => withGrader,
276
+ withPrompt: () => withPrompt,
277
+ withResources: () => withResources,
278
+ withSnapshot: () => withSnapshot
279
+ });
280
+ //#endregion
281
+ //#region src/harness/error.ts
282
+ var InitError$1 = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
283
+ const HarnessErrorReason = Schema.Union([InitError$1]);
284
+ var HarnessError = class HarnessError extends Schema.TaggedErrorClass()("HarnessError", { reason: HarnessErrorReason }) {
285
+ static init = (cause) => new HarnessError({ reason: new InitError$1({ cause }) });
286
+ };
287
+ //#endregion
288
+ //#region src/harness/build.ts
289
+ const init$2 = () => Effect.succeed({});
290
+ const withSandboxProvider = (provider) => (builder) => Effect.gen(function* () {
291
+ const p = yield* provider.pipe(Effect.mapError(HarnessError.init));
292
+ const layer = Layer.effect(Sandbox.ProviderService, Effect.succeed(p));
293
+ return {
294
+ ...yield* builder,
295
+ sandbox: layer
296
+ };
297
+ });
298
+ const withAgentProvider = (provider) => (builder) => Effect.gen(function* () {
299
+ const p = yield* provider.pipe(Effect.mapError(HarnessError.init));
300
+ const layer = Layer.effect(Agent.ProviderService, Effect.succeed(p));
301
+ return {
302
+ ...yield* builder,
303
+ agent: layer
304
+ };
305
+ });
306
+ const build$4 = (build) => build;
307
+ //#endregion
308
+ //#region src/harness/index.ts
309
+ var harness_exports = /* @__PURE__ */ __exportAll$1({
310
+ HarnessError: () => HarnessError,
311
+ HarnessErrorReason: () => HarnessErrorReason,
312
+ InitError: () => InitError$1,
313
+ build: () => build$4,
314
+ init: () => init$2,
315
+ withAgentProvider: () => withAgentProvider,
316
+ withSandboxProvider: () => withSandboxProvider
317
+ });
318
+ //#endregion
319
+ //#region src/metric/error.ts
320
+ var ExecError$1 = class extends Schema.TaggedErrorClass()("ExecError", {
321
+ name: Schema.String,
322
+ type: Schema.Union([
323
+ Schema.Literal("Trajectory"),
324
+ Schema.Literal("Task"),
325
+ Schema.Literal("Benchmark")
326
+ ]),
327
+ cause: Schema.Defect()
328
+ }) {};
329
+ const MetricErrorReason = Schema.Union([ExecError$1]);
330
+ var MetricError = class MetricError extends Schema.TaggedErrorClass()("MetricError", { reason: MetricErrorReason }) {
331
+ static exec = ({ name, type }) => (cause) => new MetricError({ reason: new ExecError$1({
332
+ name,
333
+ type,
334
+ cause
335
+ }) });
336
+ static taskExec = (name) => MetricError.exec({
337
+ name,
338
+ type: "Task"
339
+ });
340
+ static trajExec = (name) => MetricError.exec({
341
+ name,
342
+ type: "Trajectory"
343
+ });
344
+ static benchExec = (name) => MetricError.exec({
345
+ name,
346
+ type: "Benchmark"
347
+ });
348
+ };
349
+ //#endregion
350
+ //#region src/metric/schema.ts
351
+ const GradeResultSchema = ResultSchema;
352
+ const { Grade, Messages } = Data.taggedEnum();
353
+ var TrajOutput = class extends Schema.TaggedClass()("TrajOutput", {
354
+ name: Schema.String,
355
+ task: MetadataSchema,
356
+ result: Schema.Json
357
+ }) {};
358
+ var TaskOutput = class extends Schema.TaggedClass()("TaskOutput", {
359
+ name: Schema.String,
360
+ task: MetadataSchema,
361
+ result: Schema.Json
362
+ }) {};
363
+ var BenchOutput = class extends Schema.TaggedClass()("BenchmarkOutput", {
364
+ name: Schema.String,
365
+ result: Schema.Json
366
+ }) {};
367
+ const OutputSchema = Schema.Union([
368
+ TrajOutput,
369
+ TaskOutput,
370
+ BenchOutput
371
+ ]);
372
+ //#endregion
373
+ //#region src/metric/bench/index.ts
374
+ var bench_exports = /* @__PURE__ */ __exportAll$1({
375
+ all: () => all$2,
376
+ build: () => build$3,
377
+ buildAll: () => buildAll$2,
378
+ buildEach: () => buildEach$2,
379
+ buildReduce: () => buildReduce$2,
380
+ each: () => each$2,
381
+ reduce: () => reduce$2
382
+ });
383
+ const reduce$2 = (name, init, exec) => ({
384
+ name,
385
+ exec: {
386
+ _tag: "Reduce",
387
+ init,
388
+ exec
389
+ }
390
+ });
391
+ const each$2 = (name, exec) => ({
392
+ name,
393
+ exec: {
394
+ _tag: "Each",
395
+ exec
396
+ }
397
+ });
398
+ const all$2 = (name, exec) => ({
399
+ name,
400
+ exec: {
401
+ _tag: "All",
402
+ exec
403
+ }
404
+ });
405
+ const runExec$2 = (name, exec) => Effect.tryPromise({
406
+ try: async () => await exec(),
407
+ catch: MetricError.exec({
408
+ name,
409
+ type: "Benchmark"
410
+ })
411
+ }).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.exec({
412
+ name,
413
+ type: "Benchmark"
414
+ })))));
415
+ const buildReduce$2 = ({ name, exec }) => {
416
+ const state = { value: exec.init };
417
+ return Effect.fn(function* (input) {
418
+ const rawResult = yield* Effect.tryPromise({
419
+ try: async () => await exec.exec(state.value, input),
420
+ catch: MetricError.exec({
421
+ name,
422
+ type: "Benchmark"
423
+ })
424
+ });
425
+ const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.exec({
426
+ name,
427
+ type: "Benchmark"
428
+ })));
429
+ state.value = rawResult;
430
+ return BenchOutput.make({
431
+ name,
432
+ result: { [input.task]: result }
433
+ });
434
+ });
435
+ };
436
+ const buildEach$2 = ({ name, exec }) => Effect.fn(function* (input) {
437
+ const result = yield* runExec$2(name, () => exec.exec(input));
438
+ return BenchOutput.make({
439
+ name,
440
+ result: { [input.task]: result }
441
+ });
442
+ });
443
+ const buildAll$2 = ({ name, exec, taskCount }) => {
444
+ const inputs = {};
445
+ return Effect.fn(function* (input) {
446
+ if (Object.keys(inputs).length >= taskCount) return null;
447
+ inputs[input.task] = input.input;
448
+ if (Object.keys(inputs).length < taskCount) return null;
449
+ const result = yield* runExec$2(name, () => exec.exec(inputs));
450
+ return BenchOutput.make({
451
+ name,
452
+ result
453
+ });
454
+ });
455
+ };
456
+ const build$3 = ({ metric, taskCount }) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce$2({
457
+ name: metric.name,
458
+ exec
459
+ })), Match.tag("Each", (exec) => buildEach$2({
460
+ name: metric.name,
461
+ exec
462
+ })), Match.tag("All", (exec) => buildAll$2({
463
+ name: metric.name,
464
+ exec,
465
+ taskCount
466
+ })), Match.exhaustive);
467
+ //#endregion
468
+ //#region src/metric/task/index.ts
469
+ var task_exports = /* @__PURE__ */ __exportAll$1({
470
+ all: () => all$1,
471
+ build: () => build$2,
472
+ buildAll: () => buildAll$1,
473
+ buildEach: () => buildEach$1,
474
+ buildReduce: () => buildReduce$1,
475
+ each: () => each$1,
476
+ reduce: () => reduce$1
477
+ });
478
+ const reduce$1 = (name, init, exec) => ({
479
+ name,
480
+ exec: {
481
+ _tag: "Reduce",
482
+ init,
483
+ exec
484
+ }
485
+ });
486
+ const each$1 = (name, exec) => ({
487
+ name,
488
+ exec: {
489
+ _tag: "Each",
490
+ exec
491
+ }
492
+ });
493
+ const all$1 = (name, exec) => ({
494
+ name,
495
+ exec: {
496
+ _tag: "All",
497
+ exec
498
+ }
499
+ });
500
+ const runExec$1 = (name, exec) => Effect.tryPromise({
501
+ try: async () => await exec(),
502
+ catch: MetricError.exec({
503
+ name,
504
+ type: "Task"
505
+ })
506
+ }).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.taskExec(name)))));
507
+ const buildReduce$1 = ({ name, exec }) => {
508
+ const state = { value: exec.init };
509
+ return Effect.fn(function* ({ task, delta }) {
510
+ if (delta._tag !== "Grade") return null;
511
+ const rawResult = yield* Effect.tryPromise({
512
+ try: async () => await exec.exec(state.value, delta.result),
513
+ catch: MetricError.taskExec(name)
514
+ });
515
+ const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.taskExec(name)));
516
+ state.value = rawResult;
517
+ return TaskOutput.make({
518
+ name,
519
+ task: task.metadata,
520
+ result: [result]
521
+ });
522
+ });
523
+ };
524
+ const buildEach$1 = ({ name, exec }) => Effect.fn(function* ({ task, delta }) {
525
+ if (delta._tag !== "Grade") return null;
526
+ const result = yield* runExec$1(name, () => exec.exec(delta.result));
527
+ return TaskOutput.make({
528
+ name,
529
+ task: task.metadata,
530
+ result: [result]
531
+ });
532
+ });
533
+ const buildAll$1 = ({ name, exec, trailCount }) => {
534
+ const inputs = [];
535
+ return Effect.fn(function* ({ task, delta }) {
536
+ if (delta._tag !== "Grade" || inputs.length >= trailCount) return null;
537
+ inputs.push(delta.result);
538
+ if (inputs.length < trailCount) return null;
539
+ const result = yield* runExec$1(name, () => exec.exec(inputs));
540
+ return TaskOutput.make({
541
+ name,
542
+ task: task.metadata,
543
+ result: [result]
544
+ });
545
+ });
546
+ };
547
+ const build$2 = ({ metric, trailCount }) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce$1({
548
+ name: metric.name,
549
+ exec
550
+ })), Match.tag("Each", (exec) => buildEach$1({
551
+ name: metric.name,
552
+ exec
553
+ })), Match.tag("All", (exec) => buildAll$1({
554
+ name: metric.name,
555
+ exec,
556
+ trailCount
557
+ })), Match.exhaustive);
558
+ //#endregion
559
+ //#region src/metric/traj/index.ts
560
+ var traj_exports = /* @__PURE__ */ __exportAll$1({
561
+ all: () => all,
562
+ build: () => build$1,
563
+ buildAll: () => buildAll,
564
+ buildEach: () => buildEach,
565
+ buildReduce: () => buildReduce,
566
+ each: () => each,
567
+ reduce: () => reduce
568
+ });
569
+ const reduce = (name, init, exec) => ({
570
+ name,
571
+ exec: {
572
+ _tag: "Reduce",
573
+ init,
574
+ exec
575
+ }
576
+ });
577
+ const each = (name, exec) => ({
578
+ name,
579
+ exec: {
580
+ _tag: "Each",
581
+ exec
582
+ }
583
+ });
584
+ const all = (name, exec) => ({
585
+ name,
586
+ exec: {
587
+ _tag: "All",
588
+ exec
589
+ }
590
+ });
591
+ const runExec = (name, exec) => Effect.tryPromise({
592
+ try: async () => await exec(),
593
+ catch: MetricError.exec({
594
+ name,
595
+ type: "Trajectory"
596
+ })
597
+ }).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.trajExec(name)))));
598
+ const buildReduce = ({ name, exec }) => {
599
+ const state = { value: exec.init };
600
+ return Effect.fn(function* ({ task, trajectory, delta }) {
601
+ if (delta._tag !== "Messages") return null;
602
+ const rawResult = yield* Effect.tryPromise({
603
+ try: async () => await exec.exec(state.value, {
604
+ trajectory,
605
+ messages: delta.messages
606
+ }),
607
+ catch: MetricError.trajExec(name)
608
+ });
609
+ const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.trajExec(name)));
610
+ state.value = rawResult;
611
+ return TrajOutput.make({
612
+ name,
613
+ task: task.metadata,
614
+ result
615
+ });
616
+ });
617
+ };
618
+ const buildEach = ({ name, exec }) => Effect.fn(function* ({ task, trajectory, delta }) {
619
+ if (delta._tag !== "Messages") return null;
620
+ const result = yield* runExec(name, () => exec.exec({
621
+ trajectory,
622
+ messages: delta.messages
623
+ }));
624
+ return TrajOutput.make({
625
+ name,
626
+ task: task.metadata,
627
+ result
628
+ });
629
+ });
630
+ const buildAll = ({ name, exec }) => {
631
+ return Effect.fn(function* ({ task, trajectory, delta }) {
632
+ if (delta._tag !== "Grade") return null;
633
+ const result = yield* runExec(name, () => exec.exec({ trajectory }));
634
+ return TrajOutput.make({
635
+ name,
636
+ task: task.metadata,
637
+ result
638
+ });
639
+ });
640
+ };
641
+ const build$1 = (metric) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce({
642
+ name: metric.name,
643
+ exec
644
+ })), Match.tag("Each", (exec) => buildEach({
645
+ name: metric.name,
646
+ exec
647
+ })), Match.tag("All", (exec) => buildAll({
648
+ name: metric.name,
649
+ exec
650
+ })), Match.exhaustive);
651
+ //#endregion
652
+ //#region src/metric/build.ts
653
+ const init$1 = () => Effect.succeed({
654
+ trajectory: [],
655
+ task: [],
656
+ benchmark: []
657
+ });
658
+ const withTrajReduce = (name, init, exec) => (build) => Effect.map(build, (metrics) => ({
659
+ ...metrics,
660
+ trajectory: [...metrics.trajectory, reduce(name, init, exec)]
661
+ }));
662
+ const withTrajEach = (name, exec) => (build) => Effect.map(build, (metrics) => ({
663
+ ...metrics,
664
+ trajectory: [...metrics.trajectory, each(name, exec)]
665
+ }));
666
+ const withTraj = (name, exec) => (build) => Effect.map(build, (metrics) => ({
667
+ ...metrics,
668
+ trajectory: [...metrics.trajectory, all(name, exec)]
669
+ }));
670
+ const withTaskReduce = (name, init, exec) => (builder) => Effect.map(builder, (metrics) => ({
671
+ ...metrics,
672
+ task: [...metrics.task, reduce$1(name, init, exec)]
673
+ }));
674
+ const withTaskEach = (name, exec) => (builder) => Effect.map(builder, (metrics) => ({
675
+ ...metrics,
676
+ task: [...metrics.task, each$1(name, exec)]
677
+ }));
678
+ const withTask = (name, exec) => (builder) => Effect.map(builder, (metrics) => ({
679
+ ...metrics,
680
+ task: [...metrics.task, all$1(name, exec)]
681
+ }));
682
+ const withBenchReduce = (name, init, exec) => (build) => Effect.map(build, (metrics) => ({
683
+ ...metrics,
684
+ benchmark: [...metrics.benchmark, reduce$2(name, init, exec)]
685
+ }));
686
+ const withBenchEach = (name, exec) => (build) => Effect.map(build, (metrics) => ({
687
+ ...metrics,
688
+ benchmark: [...metrics.benchmark, each$2(name, exec)]
689
+ }));
690
+ const withBenchmark$1 = (name, exec) => (build) => Effect.map(build, (metrics) => ({
691
+ ...metrics,
692
+ benchmark: [...metrics.benchmark, all$2(name, exec)]
693
+ }));
694
+ //#endregion
695
+ //#region src/metric/stream.ts
696
+ const buildTrajMetricConsumer = ({ metrics: metricVariants, queue }) => {
697
+ const metrics = metricVariants.map(build$1);
698
+ return Effect.fn(function* (input) {
699
+ const exec = Effect.fn(function* (output) {
700
+ if (output === null) return;
701
+ yield* Queue.offer(queue, output);
702
+ return output;
703
+ });
704
+ const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
705
+ if (outputs.every((output) => output !== null)) return outputs;
706
+ return null;
707
+ });
708
+ };
709
+ const buildTaskMetricConsumer = ({ metrics: metricVariants, trailCount, queue }) => {
710
+ const metrics = metricVariants.map((metric) => build$2({
711
+ metric,
712
+ trailCount
713
+ }));
714
+ return Effect.fn(function* (input) {
715
+ const exec = Effect.fn(function* (output) {
716
+ if (output === null) return;
717
+ yield* Queue.offer(queue, output);
718
+ return output;
719
+ });
720
+ const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
721
+ if (outputs.every((output) => output !== null)) return outputs;
722
+ return null;
723
+ });
724
+ };
725
+ const buildBenchMetricConsumer = ({ metrics: metricVariants, taskCount, queue }) => {
726
+ const metrics = metricVariants.map((metric) => build$3({
727
+ metric,
728
+ taskCount
729
+ }));
730
+ return Effect.fn(function* (input) {
731
+ const exec = Effect.fn(function* (output) {
732
+ if (output === null) return;
733
+ yield* Queue.offer(queue, output);
734
+ return output;
735
+ });
736
+ const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
737
+ if (outputs.every((output) => output !== null)) return outputs;
738
+ return null;
739
+ });
740
+ };
741
+ const transform = ({ metrics, trailCount, taskCount }) => Effect.fn(function* (inputStream) {
742
+ const benchQueue = yield* Queue.bounded(128);
743
+ const outputQueue = yield* Queue.bounded(128);
744
+ const consumeTrajMetrics = buildTrajMetricConsumer({
745
+ metrics: metrics.trajectory,
746
+ queue: outputQueue
747
+ });
748
+ const consumeTaskMetrics = buildTaskMetricConsumer({
749
+ metrics: metrics.task,
750
+ trailCount,
751
+ queue: outputQueue
752
+ });
753
+ const consumeBenchMetrics = buildBenchMetricConsumer({
754
+ metrics: metrics.benchmark,
755
+ taskCount,
756
+ queue: outputQueue
757
+ });
758
+ const tapTaskMetrics = Effect.fn(function* (input) {
759
+ const taskOutputs = yield* consumeTaskMetrics(input);
760
+ if (taskOutputs === null) return;
761
+ yield* Queue.offer(benchQueue, {
762
+ task: input.task.metadata.name,
763
+ input: pipe(taskOutputs.map((output) => [output.name, output.result]), Object.fromEntries)
764
+ });
765
+ });
766
+ const [trajStream, taskStream] = yield* inputStream.pipe(Stream.broadcastN({
767
+ n: 2,
768
+ capacity: 128
769
+ }));
770
+ const trajRun = trajStream.pipe(Stream.tap(consumeTrajMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(outputQueue))).pipe(Stream.runDrain);
771
+ const taskRun = taskStream.pipe(Stream.tap(tapTaskMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(benchQueue))).pipe(Stream.runDrain);
772
+ const benchRun = Stream.fromQueue(benchQueue).pipe(Stream.tap(consumeBenchMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(outputQueue))).pipe(Stream.runDrain);
773
+ yield* Effect.all([
774
+ trajRun,
775
+ taskRun,
776
+ benchRun
777
+ ], { concurrency: "unbounded" });
778
+ return Stream.fromQueue(outputQueue).pipe(Stream.scoped);
779
+ }, (effect) => effect.pipe(Effect.scoped, Stream.unwrap));
780
+ //#endregion
781
+ //#region src/metric/index.ts
782
+ var metric_exports = /* @__PURE__ */ __exportAll$1({
783
+ BenchOutput: () => BenchOutput,
784
+ Benchmark: () => bench_exports,
785
+ ExecError: () => ExecError$1,
786
+ Grade: () => Grade,
787
+ GradeResultSchema: () => GradeResultSchema,
788
+ Messages: () => Messages,
789
+ MetricError: () => MetricError,
790
+ MetricErrorReason: () => MetricErrorReason,
791
+ OutputSchema: () => OutputSchema,
792
+ Task: () => task_exports,
793
+ TaskOutput: () => TaskOutput,
794
+ Traj: () => traj_exports,
795
+ TrajOutput: () => TrajOutput,
796
+ buildBenchMetricConsumer: () => buildBenchMetricConsumer,
797
+ buildTaskMetricConsumer: () => buildTaskMetricConsumer,
798
+ buildTrajMetricConsumer: () => buildTrajMetricConsumer,
799
+ init: () => init$1,
800
+ transform: () => transform,
801
+ withBenchEach: () => withBenchEach,
802
+ withBenchReduce: () => withBenchReduce,
803
+ withBenchmark: () => withBenchmark$1,
804
+ withTask: () => withTask,
805
+ withTaskEach: () => withTaskEach,
806
+ withTaskReduce: () => withTaskReduce,
807
+ withTraj: () => withTraj,
808
+ withTrajEach: () => withTrajEach,
809
+ withTrajReduce: () => withTrajReduce
810
+ });
811
+ //#endregion
812
+ //#region src/exec/error.ts
813
+ const NonNegativeInt = Schema.Int.check(Schema.isGreaterThanOrEqualTo(0));
814
+ var InitError = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
815
+ var TaskLoadError = class extends Schema.TaggedErrorClass()("TaskLoadError", { cause: Schema.Defect() }) {};
816
+ var TaskInitError = class extends Schema.TaggedErrorClass()("TaskInitError", {
817
+ task: MetadataSchema,
818
+ cause: Schema.Defect()
819
+ }) {};
820
+ var TaskExecError = class extends Schema.TaggedErrorClass()("TaskExecError", {
821
+ task: MetadataSchema,
822
+ trailIndex: NonNegativeInt,
823
+ cause: Schema.Defect()
824
+ }) {};
825
+ var EventTransportInitError = class extends Schema.TaggedErrorClass()("EventTransportInitError", {
826
+ transport: Schema.String,
827
+ url: Schema.String,
828
+ cause: Schema.Defect()
829
+ }) {};
830
+ var EventTransportError = class extends Schema.TaggedErrorClass()("EventTransportError", {
831
+ transport: Schema.String,
832
+ cause: Schema.Defect()
833
+ }) {};
834
+ var SnapshotError = class extends Schema.TaggedErrorClass()("SnapshotError", {
835
+ task: MetadataSchema,
836
+ snapshot: Sandbox.Snapshot.Snapshot,
837
+ cause: Schema.Defect()
838
+ }) {};
839
+ const ExecErrorReason = Schema.Union([
840
+ InitError,
841
+ TaskLoadError,
842
+ EventTransportInitError,
843
+ EventTransportError,
844
+ SnapshotError,
845
+ TaskInitError,
846
+ TaskExecError,
847
+ MetricError
848
+ ]);
849
+ var ExecError = class ExecError extends Schema.TaggedErrorClass()("ExecError", { reason: ExecErrorReason }) {
850
+ static init = (cause) => new ExecError({ reason: new InitError({ cause }) });
851
+ static taskLoad = (cause) => new ExecError({ reason: new TaskLoadError({ cause }) });
852
+ static eventTransportInit = ({ transport, url }) => (cause) => this.make({ reason: EventTransportInitError.make({
853
+ transport,
854
+ url,
855
+ cause
856
+ }) });
857
+ static eventTransport = ({ transport }) => (cause) => this.make({ reason: EventTransportError.make({
858
+ transport,
859
+ cause
860
+ }) });
861
+ static snapshot = ({ task: { metadata, snapshot } }) => (cause) => new ExecError({ reason: new SnapshotError({
862
+ task: metadata,
863
+ snapshot,
864
+ cause
865
+ }) });
866
+ static taskInit = ({ task }) => (cause) => new ExecError({ reason: new TaskInitError({
867
+ task,
868
+ cause
869
+ }) });
870
+ static taskExec = ({ task, trailIndex }) => (cause) => new ExecError({ reason: new TaskExecError({
871
+ task,
872
+ trailIndex,
873
+ cause
874
+ }) });
875
+ static metric = (cause) => new ExecError({ reason: cause });
876
+ };
877
+ //#endregion
878
+ //#region src/exec/build.ts
879
+ const init = () => Effect.succeed({
880
+ benchmark: void 0,
881
+ harness: void 0
882
+ });
883
+ const withBenchmark = (benchmark) => (builder) => Effect.gen(function* () {
884
+ const exec = yield* builder;
885
+ const b = yield* benchmark.pipe(Effect.mapError(ExecError.init));
886
+ return {
887
+ ...exec,
888
+ benchmark: b
889
+ };
890
+ });
891
+ const withHarness = (harness) => (builder) => Effect.gen(function* () {
892
+ const exec = yield* builder;
893
+ const h = yield* harness.pipe(Effect.mapError(ExecError.init));
894
+ return {
895
+ ...exec,
896
+ harness: h
897
+ };
898
+ });
899
+ const withTrailCount = (trailCount) => (builder) => Effect.map(builder, (exec) => ({
900
+ ...exec,
901
+ trailCount
902
+ }));
903
+ const withMetrics = (metrics) => (builder) => Effect.gen(function* () {
904
+ const exec = yield* builder;
905
+ const m = yield* metrics.pipe(Effect.mapError(ExecError.init));
906
+ return {
907
+ ...exec,
908
+ metrics: m
909
+ };
910
+ });
911
+ const withTransport = (transport) => (builder) => Effect.gen(function* () {
912
+ const exec = yield* builder;
913
+ const t = yield* transport.pipe(Effect.mapError(ExecError.init));
914
+ return {
915
+ ...exec,
916
+ transport: t
917
+ };
918
+ });
919
+ const build = (builder) => Effect.gen(function* () {
920
+ const { benchmark, harness, transport, metrics, trailCount = 1 } = yield* builder.pipe(Effect.mapError(ExecError.init));
921
+ assertNonNull(benchmark);
922
+ assertNonNull(harness);
923
+ return {
924
+ benchmark,
925
+ harness,
926
+ transport: transport ?? null,
927
+ metrics: metrics ?? null,
928
+ trailCount
929
+ };
930
+ });
931
+ //#endregion
932
+ //#region src/exec/trail.ts
933
+ const runTrail = Effect.fn("exec/runTrail")(function* ({ task, trailIndex, sandbox, metricQueue }) {
934
+ yield* Effect.annotateCurrentSpan({
935
+ taskName: task.metadata.name,
936
+ trailIndex
937
+ });
938
+ yield* Effect.logDebug("Starting trail execution");
939
+ const provider = yield* Agent.ProviderService;
940
+ const { prompt, graders } = task;
941
+ const agent = yield* provider.runSession({ sandbox });
942
+ yield* Effect.logDebug("Started agent session");
943
+ const stream = yield* agent.prompt({ prompt });
944
+ yield* Effect.logDebug("Attached prompt stream");
945
+ const trajLength = yield* Ref.make(0);
946
+ const tapDelta = Effect.fn("exec/runTrail/tapDelta")(function* () {
947
+ const trajectory = yield* agent.trajectory();
948
+ const prevTrajLength = yield* Ref.get(trajLength);
949
+ const currTrajLength = trajectory.content.length;
950
+ if (currTrajLength === prevTrajLength) return;
951
+ const messages = trajectory.content.slice(prevTrajLength, currTrajLength);
952
+ yield* Ref.set(trajLength, currTrajLength);
953
+ yield* Queue.offer(metricQueue, {
954
+ task,
955
+ trajectory,
956
+ delta: Messages({ messages })
957
+ });
958
+ });
959
+ yield* stream.pipe(Stream.tap(tapDelta)).pipe(Stream.drain).pipe(Stream.runCollect);
960
+ const trajectory = yield* agent.trajectory();
961
+ yield* Effect.logDebug(`Prompt stream completed with ${trajectory.content.length} trajectory message(s)`);
962
+ const ctx = {
963
+ trajectory,
964
+ ...Sandbox.asPromise(sandbox)
965
+ };
966
+ yield* Effect.logDebug(`Starting graders`);
967
+ const gradeResults = yield* run$2(graders)(ctx);
968
+ yield* Effect.logDebug(`Completed graders`);
969
+ yield* Queue.offer(metricQueue, {
970
+ task,
971
+ trajectory,
972
+ delta: Grade({ result: gradeResults })
973
+ });
974
+ yield* Effect.logDebug("Published grade metric delta");
975
+ }, (effect, { task: { metadata }, trailIndex }) => effect.pipe(Effect.annotateLogs({
976
+ taskName: metadata.name,
977
+ trailIndex
978
+ }), Effect.mapError(ExecError.taskExec({
979
+ task: metadata,
980
+ trailIndex
981
+ }))));
982
+ const createTrail = Effect.fn("exec/createTrail")(function* ({ task, config, metricQueue }) {
983
+ const { snapshot, context, resources, metadata } = task;
984
+ yield* Effect.annotateCurrentSpan({ taskName: metadata.name });
985
+ yield* Effect.logDebug("Preparing derived snapshot");
986
+ const sandboxProvider = yield* Sandbox.ProviderService;
987
+ const agentProvider = yield* Agent.ProviderService;
988
+ const derived = yield* agentProvider.deriveSnapshot({
989
+ snapshot,
990
+ context
991
+ }).pipe(Effect.mapError(ExecError.taskInit({ task: metadata })));
992
+ yield* Effect.logDebug("Prepared derived snapshot");
993
+ yield* Effect.addFinalizer(Effect.fn("exec/createTrail/finalizeSnapshot")(function* () {
994
+ if (!config?.cacheSnapshot) {
995
+ yield* Effect.logDebug("Removing derived snapshot");
996
+ yield* sandboxProvider.removeSnapshot({ snapshot: derived }).pipe(Effect.ignore);
997
+ }
998
+ }));
999
+ const nextTrailIndex = yield* Ref.make(0);
1000
+ return Effect.gen(function* () {
1001
+ const trailIndex = yield* Ref.getAndUpdate(nextTrailIndex, (n) => n + 1);
1002
+ yield* Effect.annotateCurrentSpan({
1003
+ taskName: metadata.name,
1004
+ trailIndex
1005
+ });
1006
+ yield* Effect.logDebug("Starting sandbox for trail");
1007
+ const sandbox = yield* sandboxProvider.runSandbox({
1008
+ snapshot: derived,
1009
+ resources
1010
+ }).pipe(Effect.mapError(ExecError.taskExec({
1011
+ task: metadata,
1012
+ trailIndex
1013
+ }))).pipe(Effect.scoped);
1014
+ yield* Effect.logDebug("Sandbox is ready");
1015
+ yield* runTrail({
1016
+ task,
1017
+ trailIndex,
1018
+ sandbox,
1019
+ metricQueue
1020
+ }).pipe(Effect.provideService(Agent.ProviderService, agentProvider));
1021
+ }).pipe(Effect.annotateLogs({ taskName: metadata.name }));
1022
+ }, (effect, { task }) => effect.pipe(Effect.annotateLogs({ taskName: task.metadata.name })));
1023
+ //#endregion
1024
+ //#region src/exec/schedule.ts
1025
+ const run$1 = Effect.fn("exec/schedule")(function* ({ executor: { benchmark, harness, trailCount, metrics }, config: { harnessConfig, sandboxConfig } }) {
1026
+ const { tasks, metadata } = benchmark;
1027
+ const { sandbox, agent } = harness;
1028
+ const { snapshotConcurrency = 1, trailConcurrency = 1 } = harnessConfig ?? {};
1029
+ yield* Effect.annotateCurrentSpan({ benchmark: metadata.name });
1030
+ yield* Effect.logDebug("Starting evaluation schedule");
1031
+ const metricQueue = yield* Queue.bounded(128);
1032
+ const snapshotSem = yield* Semaphore.make(snapshotConcurrency);
1033
+ const snapshotCountdown = yield* countdown_exports.make(tasks.length);
1034
+ const trailSem = yield* Semaphore.make(trailConcurrency);
1035
+ const scheduleTrail = Effect.fn("exec/scheduleTrail")(function* ({ task }) {
1036
+ yield* Effect.annotateCurrentSpan({
1037
+ benchmark: metadata.name,
1038
+ taskName: task.metadata.name,
1039
+ trailCount
1040
+ });
1041
+ yield* Effect.logDebug("Preparing task schedule");
1042
+ const trail = yield* createTrail({
1043
+ task,
1044
+ metricQueue,
1045
+ config: sandboxConfig
1046
+ }).pipe((create) => snapshotSem.withPermit(create));
1047
+ yield* Effect.logDebug("Task snapshot is ready");
1048
+ yield* snapshotCountdown.open;
1049
+ yield* Effect.logDebug("Waiting for all task snapshots");
1050
+ yield* snapshotCountdown.await;
1051
+ yield* Effect.logDebug("All task snapshots are ready");
1052
+ const fibers = [];
1053
+ for (const trailIndex of Array.from({ length: trailCount }, (_, index) => index)) {
1054
+ yield* Effect.logDebug(`Forking trail ${trailIndex}`);
1055
+ const fiber = yield* trail.pipe((trail) => trailSem.withPermit(trail)).pipe(Effect.forkScoped);
1056
+ fibers.push(fiber);
1057
+ yield* Effect.yieldNow;
1058
+ }
1059
+ yield* Effect.logDebug("Waiting for task trails");
1060
+ yield* Effect.all(fibers.map((fiber) => Fiber.join(fiber)), { concurrency: "unbounded" });
1061
+ yield* Effect.logDebug("Completed task trails");
1062
+ }, (effect, { task }) => effect.pipe(Effect.annotateLogs({
1063
+ benchmark: metadata.name,
1064
+ taskName: task.metadata.name
1065
+ })).pipe(Effect.provide(agent), Effect.provide(sandbox)).pipe(Effect.mapError(ExecError.taskInit({ task: task.metadata }))));
1066
+ if (metrics) {
1067
+ yield* Effect.logDebug("Starting metrics stream");
1068
+ yield* Stream.fromQueue(metricQueue).pipe(transform({
1069
+ metrics,
1070
+ trailCount,
1071
+ taskCount: tasks.length
1072
+ }), Stream.runDrain, Pull.catchDone(() => Effect.void), Effect.mapError(ExecError.metric));
1073
+ } else yield* Effect.logDebug("Skipping metrics stream");
1074
+ yield* Effect.logDebug("Loading tasks");
1075
+ const loadedTasks = yield* Effect.all(tasks.map((task) => task.pipe(Effect.mapError(ExecError.taskLoad))), { concurrency: "unbounded" });
1076
+ yield* Effect.logDebug(`Loaded ${loadedTasks.length} task(s)`);
1077
+ yield* Effect.all(loadedTasks.map((task) => scheduleTrail({ task })), { concurrency: "unbounded" }).pipe(Effect.scoped);
1078
+ yield* Effect.logDebug("Scheduled all tasks");
1079
+ }, (effect, { executor: { benchmark } }) => effect.pipe(Effect.annotateLogs({ benchmark: benchmark.metadata.name })));
1080
+ //#endregion
1081
+ //#region src/exec/run.ts
1082
+ const run = Effect.fn(function* (executor, config = {}) {
1083
+ yield* Effect.map(executor, (exec) => run$1({
1084
+ executor: exec,
1085
+ config
1086
+ }));
1087
+ }, (effect) => effect.pipe(Effect.provide(NodeServices.layer), Effect.provide(NodeHttpClient.layerUndici), Effect.scoped));
1088
+ const runPromise = async (executor, config) => Effect.runPromise(run(executor, config));
1089
+ //#endregion
1090
+ //#region src/exec/index.ts
1091
+ var exec_exports = /* @__PURE__ */ __exportAll$1({
1092
+ build: () => build,
1093
+ init: () => init,
1094
+ run: () => run,
1095
+ runPromise: () => runPromise,
1096
+ withBenchmark: () => withBenchmark,
1097
+ withHarness: () => withHarness,
1098
+ withMetrics: () => withMetrics,
1099
+ withTrailCount: () => withTrailCount,
1100
+ withTransport: () => withTransport
1101
+ });
1102
+ //#endregion
1103
+ export { withSnapshot as $, harness_exports as A, fromIterable as B, reduce as C, all$2 as D, reduce$1 as E, HarnessError as F, grade_exports as G, withGitRepo as H, InitError$1 as I, init$3 as J, bash as K, task_exports$1 as L, init$2 as M, withAgentProvider as N, each$2 as O, withSandboxProvider as P, withResources as Q, fromArray as R, each as S, each$1 as T, fromDir as U, fromStream as V, ResultSchema as W, withGrader as X, withContext as Y, withPrompt as Z, withTaskReduce as _, init as a, InitError$2 as at, withTrajReduce as b, withMetrics as c, init$1 as d, benchmark_exports as et, withBenchEach as f, withTaskEach as g, withTask as h, build as i, BenchmarkError as it, build$4 as j, reduce$2 as k, withTrailCount as l, withBenchmark$1 as m, run as n, init$4 as nt, withBenchmark as o, withBenchReduce as p, build$5 as q, runPromise as r, withTasks as rt, withHarness as s, exec_exports as t, build$6 as tt, metric_exports as u, withTraj as v, all$1 as w, all as x, withTrajEach as y, fromAsyncIterable as z };