npm - @open-insight/eval - Versions diffs - 0.0.0 - Mend

@open-insight/eval 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/exec-w0BrRdRA.mjs +1103 -0
package/dist/index-CjLj-Fvc.d.mts +863 -0
package/dist/index.d.mts +35 -0
package/dist/index.mjs +105 -0
package/dist/internal.d.mts +6 -0
package/dist/internal.mjs +6 -0
package/dist/rolldown-runtime-D7D4PA-g.mjs +13 -0
package/package.json +52 -0

package/dist/exec-w0BrRdRA.mjs ADDED Viewed

@@ -0,0 +1,1103 @@
+import { t as __exportAll$1 } from "./rolldown-runtime-D7D4PA-g.mjs";
+import { Context, Data, Effect, Fiber, FileSystem, Latch, Layer, Match, Path, Pull, Queue, Ref, Schema, Semaphore, Stream, pipe } from "effect";
+import { Agent, Sandbox } from "@open-insight/core/internal";
+import picomatch from "picomatch";
+import { ChildProcess } from "effect/unstable/process";
+import { ChildProcessSpawner } from "effect/unstable/process/ChildProcessSpawner";
+import { NodeHttpClient, NodeServices } from "@effect/platform-node";
+//#region src/benchmark/error.ts
+var InitError$2 = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
+const BenchmarkErrorReason = Schema.Union([InitError$2]);
+var BenchmarkError = class BenchmarkError extends Schema.TaggedErrorClass()("BenchmarkError", { reason: BenchmarkErrorReason }) {
+	static init = (cause) => new BenchmarkError({ reason: new InitError$2({ cause }) });
+};
+//#endregion
+//#region src/utils/type.ts
+function assertNonNull(val) {
+	if (val === null || val === void 0) throw new Error("Value cannot be null or undefined");
+}
+//#endregion
+//#region src/benchmark/build.ts
+const MetadataSchema$1 = Schema.Struct({
+	name: Schema.String,
+	description: Schema.String
+});
+const init$4 = (metadata) => Effect.succeed({ metadata });
+const withTasks = (loader) => (build) => Effect.fn(function* () {
+	const tasks = yield* loader.pipe(Effect.mapError(BenchmarkError.init));
+	return {
+		...yield* build,
+		tasks
+	};
+})();
+const build$6 = (build) => Effect.map(build, ({ tasks, metadata }) => {
+	assertNonNull(metadata);
+	assertNonNull(tasks);
+	return {
+		tasks,
+		metadata
+	};
+});
+//#endregion
+//#region src/benchmark/index.ts
+var benchmark_exports = /* @__PURE__ */ __exportAll$1({
+	BenchmarkError: () => BenchmarkError,
+	BenchmarkErrorReason: () => BenchmarkErrorReason,
+	InitError: () => InitError$2,
+	MetadataSchema: () => MetadataSchema$1,
+	build: () => build$6,
+	init: () => init$4,
+	withTasks: () => withTasks
+});
+//#endregion
+//#region src/task/build.ts
+const MetadataSchema = Schema.Struct({
+	name: Schema.String,
+	description: Schema.optional(Schema.String)
+});
+const init$3 = (metadata) => Effect.succeed({ metadata });
+const withPrompt = (prompt) => (build) => Effect.map(build, (t) => ({
+	...t,
+	prompt
+}));
+const withContext = (context) => (build) => Effect.map(build, (t) => ({
+	...t,
+	context
+}));
+const withSnapshot = (snapshot) => (build) => Effect.map(build, (t) => ({
+	...t,
+	snapshot
+}));
+const withGrader = (name, exec) => (build) => Effect.map(build, (t) => ({
+	...t,
+	graders: Object.assign({}, t.graders, { [name]: exec })
+}));
+const withResources = (resources) => (build) => Effect.map(build, (t) => ({
+	...t,
+	resources
+}));
+const build$5 = (build) => build;
+//#endregion
+//#region src/task/error.ts
+var TaskLoadError$1 = class extends Schema.TaggedErrorClass()("TaskLoadError", { cause: Schema.Defect() }) {};
+var GradeExecError = class extends Schema.TaggedErrorClass()("GradeError", {
+	name: Schema.String,
+	cause: Schema.Defect()
+}) {};
+var InvalidGradeResultError = class extends Schema.TaggedErrorClass()("InvalidGradeResultError", { cause: Schema.Defect() }) {};
+const TaskErrorReason = Schema.Union([
+	TaskLoadError$1,
+	GradeExecError,
+	InvalidGradeResultError
+]);
+var TaskError = class TaskError extends Schema.TaggedErrorClass()("TaskError", { reason: TaskErrorReason }) {
+	static load = (cause) => new TaskError({ reason: new TaskLoadError$1({ cause }) });
+	static gradeExec = (name) => (cause) => new TaskError({ reason: new GradeExecError({
+		name,
+		cause
+	}) });
+	static gradeResult = (cause) => new TaskError({ reason: new InvalidGradeResultError({ cause }) });
+};
+//#endregion
+//#region src/task/grade/builtin/command.ts
+const bash = (bash) => async (ctx) => {
+	return await ctx.$({
+		command: "bash",
+		args: ["-lc", bash]
+	});
+};
+//#endregion
+//#region src/task/grade/index.ts
+var grade_exports = /* @__PURE__ */ __exportAll$1({
+	ResultSchema: () => ResultSchema,
+	bash: () => bash,
+	run: () => run$2
+});
+const ResultSchema = Schema.Record(Schema.String, Schema.Json);
+/**
+* Run a collection of graders with the given context.
+*/
+const run$2 = (map) => Effect.fn(function* (ctx) {
+	const result = {};
+	for (const [name, exec] of Object.entries(map)) result[name] = yield* Effect.tryPromise({
+		try: () => exec(ctx),
+		catch: TaskError.gradeExec(name)
+	});
+	return yield* Schema.decodeUnknownEffect(ResultSchema)(result).pipe(Effect.mapError(TaskError.gradeResult));
+});
+//#endregion
+//#region src/task/load/file.ts
+var file_exports = /* @__PURE__ */ __exportAll$1({ fromDir: () => fromDir });
+const fromDir = ({ dir, glob = "**/index.ts" }) => Effect.gen(function* () {
+	const fs = yield* FileSystem.FileSystem;
+	const path = yield* Path.Path;
+	const entries = yield* fs.readDirectory(dir, { recursive: true }).pipe(Effect.mapError(TaskError.load));
+	const matcher = picomatch(glob);
+	return entries.filter((entry) => matcher(path.relative(dir, entry))).map((entry) => path.join(dir, entry)).map((taskFile) => Effect.gen(function* () {
+		const context = Sandbox.Context.makeDir(path.dirname(taskFile));
+		const fileUrl = yield* path.toFileUrl(taskFile).pipe(Effect.mapError(TaskError.load));
+		const module = yield* Effect.tryPromise({
+			try: () => import(fileUrl.href),
+			catch: TaskError.load
+		});
+		if (module.default === null) return yield* Effect.fail(TaskError.load(/* @__PURE__ */ new Error(`Loading task from file requires a default export, but the module at ${taskFile} does not export any.`)));
+		return {
+			...module.default,
+			context
+		};
+	}));
+});
+//#endregion
+//#region ../utils/dist/rolldown-runtime-D7D4PA-g.mjs
+var __defProp = Object.defineProperty;
+var __exportAll = (all, no_symbols) => {
+	let target = {};
+	for (var name in all) __defProp(target, name, {
+		get: all[name],
+		enumerable: true
+	});
+	if (!no_symbols) __defProp(target, Symbol.toStringTag, { value: "Module" });
+	return target;
+};
+//#endregion
+//#region ../utils/dist/index.mjs
+var spawn_exports = /* @__PURE__ */ __exportAll({
+	SpawnError: () => SpawnError,
+	SpawnExitCodeError: () => SpawnExitCodeError,
+	SpawnService: () => SpawnService
+});
+var SpawnExitCodeError = class extends Data.TaggedError("SpawnExitCodeError") {
+	get message() {
+		return `process exited with code ${this.exitCode}`;
+	}
+};
+var SpawnError = class SpawnError extends Data.TaggedError("SpawnError") {
+	get message() {
+		return this.reason.message;
+	}
+	static platform = (err) => new SpawnError({ reason: err });
+	static exit = ({ exitCode, stdout, stderr }) => new SpawnError({ reason: new SpawnExitCodeError({
+		exitCode,
+		stdout,
+		stderr
+	}) });
+};
+var SpawnService = class SpawnService extends Context.Service()("packages/utils/SpawnService") {
+	static layer = Layer.effect(SpawnService, Effect.gen(function* () {
+		const spawner = yield* ChildProcessSpawner;
+		const streamText = (stream) => Stream.mkString(Stream.decodeText(stream));
+		const spawn = Effect.fn(function* (command) {
+			const handle = yield* spawner.spawn(command).pipe(Effect.mapError(SpawnError.platform));
+			const exitCode = yield* handle.exitCode.pipe(Effect.mapError(SpawnError.platform));
+			if (exitCode !== 0) {
+				const output = yield* Effect.all({
+					stdout: streamText(handle.stdout),
+					stderr: streamText(handle.stderr)
+				}, { concurrency: "unbounded" }).pipe(Effect.mapError(SpawnError.platform));
+				return yield* SpawnError.exit({
+					exitCode,
+					...output
+				});
+			}
+			return handle;
+		});
+		const streamString = (command, options) => spawn(command).pipe(Effect.map((handle) => Stream.decodeText(options?.includeStderr === true ? handle.all : handle.stdout).pipe(Stream.mapError(SpawnError.platform))), Stream.unwrap);
+		const streamLines = (command, options) => Stream.splitLines(streamString(command, options));
+		const exitCode = (command) => spawn(command).pipe(Effect.scoped, Effect.flatMap((handle) => handle.exitCode.pipe(Effect.mapError(SpawnError.platform))));
+		const string = (command, options) => Stream.mkString(streamString(command, options));
+		const lines = (command, options) => Stream.runCollect(streamLines(command, options));
+		return {
+			spawn,
+			exitCode,
+			streamString,
+			streamLines,
+			lines,
+			string
+		};
+	}));
+};
+var countdown_exports = /* @__PURE__ */ __exportAll({ make: () => make });
+const make = Effect.fn(function* (count) {
+	const countDown = yield* Ref.make(count);
+	const latch = yield* Latch.make();
+	return {
+		open: Effect.gen(function* () {
+			if ((yield* Ref.updateAndGet(countDown, (c) => c - 1)) <= 0) yield* latch.open;
+		}),
+		await: latch.await
+	};
+});
+//#endregion
+//#region src/task/load/git.ts
+var git_exports = /* @__PURE__ */ __exportAll$1({ withGitRepo: () => withGitRepo });
+const withGitRepo = (repoURL) => Effect.fn(function* (exec) {
+	const fs = yield* FileSystem.FileSystem;
+	const spawner = yield* spawn_exports.SpawnService;
+	const repoPath = yield* fs.makeTempDirectoryScoped({ prefix: "open-insight-task-" });
+	const clone = ChildProcess.make`git clone --depth 1 ${repoURL} ${repoPath}`;
+	yield* spawner.exitCode(clone);
+	return yield* exec(repoPath);
+});
+//#endregion
+//#region src/task/load/iter.ts
+const toTasks = (tasks) => Array.from(tasks);
+const fromArray = (tasks) => Effect.succeed(toTasks(tasks));
+const fromIterable = (iterable) => Effect.sync(() => toTasks(iterable));
+const fromAsyncIterable = (iterable) => Effect.tryPromise({
+	try: () => Array.fromAsync(iterable),
+	catch: TaskError.load
+}).pipe(Effect.map((tasks) => tasks));
+const fromStream = (stream) => stream.pipe(Stream.runCollect, Effect.map(toTasks));
+//#endregion
+//#region src/task/load/index.ts
+var load_exports = /* @__PURE__ */ __exportAll$1({
+	File: () => file_exports,
+	Git: () => git_exports,
+	fromArray: () => fromArray,
+	fromAsyncIterable: () => fromAsyncIterable,
+	fromIterable: () => fromIterable,
+	fromStream: () => fromStream
+});
+//#endregion
+//#region src/task/index.ts
+var task_exports$1 = /* @__PURE__ */ __exportAll$1({
+	Grade: () => grade_exports,
+	GradeExecError: () => GradeExecError,
+	InvalidGradeResultError: () => InvalidGradeResultError,
+	Load: () => load_exports,
+	MetadataSchema: () => MetadataSchema,
+	TaskError: () => TaskError,
+	TaskErrorReason: () => TaskErrorReason,
+	TaskLoadError: () => TaskLoadError$1,
+	build: () => build$5,
+	init: () => init$3,
+	withContext: () => withContext,
+	withGrader: () => withGrader,
+	withPrompt: () => withPrompt,
+	withResources: () => withResources,
+	withSnapshot: () => withSnapshot
+});
+//#endregion
+//#region src/harness/error.ts
+var InitError$1 = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
+const HarnessErrorReason = Schema.Union([InitError$1]);
+var HarnessError = class HarnessError extends Schema.TaggedErrorClass()("HarnessError", { reason: HarnessErrorReason }) {
+	static init = (cause) => new HarnessError({ reason: new InitError$1({ cause }) });
+};
+//#endregion
+//#region src/harness/build.ts
+const init$2 = () => Effect.succeed({});
+const withSandboxProvider = (provider) => (builder) => Effect.gen(function* () {
+	const p = yield* provider.pipe(Effect.mapError(HarnessError.init));
+	const layer = Layer.effect(Sandbox.ProviderService, Effect.succeed(p));
+	return {
+		...yield* builder,
+		sandbox: layer
+	};
+});
+const withAgentProvider = (provider) => (builder) => Effect.gen(function* () {
+	const p = yield* provider.pipe(Effect.mapError(HarnessError.init));
+	const layer = Layer.effect(Agent.ProviderService, Effect.succeed(p));
+	return {
+		...yield* builder,
+		agent: layer
+	};
+});
+const build$4 = (build) => build;
+//#endregion
+//#region src/harness/index.ts
+var harness_exports = /* @__PURE__ */ __exportAll$1({
+	HarnessError: () => HarnessError,
+	HarnessErrorReason: () => HarnessErrorReason,
+	InitError: () => InitError$1,
+	build: () => build$4,
+	init: () => init$2,
+	withAgentProvider: () => withAgentProvider,
+	withSandboxProvider: () => withSandboxProvider
+});
+//#endregion
+//#region src/metric/error.ts
+var ExecError$1 = class extends Schema.TaggedErrorClass()("ExecError", {
+	name: Schema.String,
+	type: Schema.Union([
+		Schema.Literal("Trajectory"),
+		Schema.Literal("Task"),
+		Schema.Literal("Benchmark")
+	]),
+	cause: Schema.Defect()
+}) {};
+const MetricErrorReason = Schema.Union([ExecError$1]);
+var MetricError = class MetricError extends Schema.TaggedErrorClass()("MetricError", { reason: MetricErrorReason }) {
+	static exec = ({ name, type }) => (cause) => new MetricError({ reason: new ExecError$1({
+		name,
+		type,
+		cause
+	}) });
+	static taskExec = (name) => MetricError.exec({
+		name,
+		type: "Task"
+	});
+	static trajExec = (name) => MetricError.exec({
+		name,
+		type: "Trajectory"
+	});
+	static benchExec = (name) => MetricError.exec({
+		name,
+		type: "Benchmark"
+	});
+};
+//#endregion
+//#region src/metric/schema.ts
+const GradeResultSchema = ResultSchema;
+const { Grade, Messages } = Data.taggedEnum();
+var TrajOutput = class extends Schema.TaggedClass()("TrajOutput", {
+	name: Schema.String,
+	task: MetadataSchema,
+	result: Schema.Json
+}) {};
+var TaskOutput = class extends Schema.TaggedClass()("TaskOutput", {
+	name: Schema.String,
+	task: MetadataSchema,
+	result: Schema.Json
+}) {};
+var BenchOutput = class extends Schema.TaggedClass()("BenchmarkOutput", {
+	name: Schema.String,
+	result: Schema.Json
+}) {};
+const OutputSchema = Schema.Union([
+	TrajOutput,
+	TaskOutput,
+	BenchOutput
+]);
+//#endregion
+//#region src/metric/bench/index.ts
+var bench_exports = /* @__PURE__ */ __exportAll$1({
+	all: () => all$2,
+	build: () => build$3,
+	buildAll: () => buildAll$2,
+	buildEach: () => buildEach$2,
+	buildReduce: () => buildReduce$2,
+	each: () => each$2,
+	reduce: () => reduce$2
+});
+const reduce$2 = (name, init, exec) => ({
+	name,
+	exec: {
+		_tag: "Reduce",
+		init,
+		exec
+	}
+});
+const each$2 = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "Each",
+		exec
+	}
+});
+const all$2 = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "All",
+		exec
+	}
+});
+const runExec$2 = (name, exec) => Effect.tryPromise({
+	try: async () => await exec(),
+	catch: MetricError.exec({
+		name,
+		type: "Benchmark"
+	})
+}).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.exec({
+	name,
+	type: "Benchmark"
+})))));
+const buildReduce$2 = ({ name, exec }) => {
+	const state = { value: exec.init };
+	return Effect.fn(function* (input) {
+		const rawResult = yield* Effect.tryPromise({
+			try: async () => await exec.exec(state.value, input),
+			catch: MetricError.exec({
+				name,
+				type: "Benchmark"
+			})
+		});
+		const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.exec({
+			name,
+			type: "Benchmark"
+		})));
+		state.value = rawResult;
+		return BenchOutput.make({
+			name,
+			result: { [input.task]: result }
+		});
+	});
+};
+const buildEach$2 = ({ name, exec }) => Effect.fn(function* (input) {
+	const result = yield* runExec$2(name, () => exec.exec(input));
+	return BenchOutput.make({
+		name,
+		result: { [input.task]: result }
+	});
+});
+const buildAll$2 = ({ name, exec, taskCount }) => {
+	const inputs = {};
+	return Effect.fn(function* (input) {
+		if (Object.keys(inputs).length >= taskCount) return null;
+		inputs[input.task] = input.input;
+		if (Object.keys(inputs).length < taskCount) return null;
+		const result = yield* runExec$2(name, () => exec.exec(inputs));
+		return BenchOutput.make({
+			name,
+			result
+		});
+	});
+};
+const build$3 = ({ metric, taskCount }) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce$2({
+	name: metric.name,
+	exec
+})), Match.tag("Each", (exec) => buildEach$2({
+	name: metric.name,
+	exec
+})), Match.tag("All", (exec) => buildAll$2({
+	name: metric.name,
+	exec,
+	taskCount
+})), Match.exhaustive);
+//#endregion
+//#region src/metric/task/index.ts
+var task_exports = /* @__PURE__ */ __exportAll$1({
+	all: () => all$1,
+	build: () => build$2,
+	buildAll: () => buildAll$1,
+	buildEach: () => buildEach$1,
+	buildReduce: () => buildReduce$1,
+	each: () => each$1,
+	reduce: () => reduce$1
+});
+const reduce$1 = (name, init, exec) => ({
+	name,
+	exec: {
+		_tag: "Reduce",
+		init,
+		exec
+	}
+});
+const each$1 = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "Each",
+		exec
+	}
+});
+const all$1 = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "All",
+		exec
+	}
+});
+const runExec$1 = (name, exec) => Effect.tryPromise({
+	try: async () => await exec(),
+	catch: MetricError.exec({
+		name,
+		type: "Task"
+	})
+}).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.taskExec(name)))));
+const buildReduce$1 = ({ name, exec }) => {
+	const state = { value: exec.init };
+	return Effect.fn(function* ({ task, delta }) {
+		if (delta._tag !== "Grade") return null;
+		const rawResult = yield* Effect.tryPromise({
+			try: async () => await exec.exec(state.value, delta.result),
+			catch: MetricError.taskExec(name)
+		});
+		const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.taskExec(name)));
+		state.value = rawResult;
+		return TaskOutput.make({
+			name,
+			task: task.metadata,
+			result: [result]
+		});
+	});
+};
+const buildEach$1 = ({ name, exec }) => Effect.fn(function* ({ task, delta }) {
+	if (delta._tag !== "Grade") return null;
+	const result = yield* runExec$1(name, () => exec.exec(delta.result));
+	return TaskOutput.make({
+		name,
+		task: task.metadata,
+		result: [result]
+	});
+});
+const buildAll$1 = ({ name, exec, trailCount }) => {
+	const inputs = [];
+	return Effect.fn(function* ({ task, delta }) {
+		if (delta._tag !== "Grade" || inputs.length >= trailCount) return null;
+		inputs.push(delta.result);
+		if (inputs.length < trailCount) return null;
+		const result = yield* runExec$1(name, () => exec.exec(inputs));
+		return TaskOutput.make({
+			name,
+			task: task.metadata,
+			result: [result]
+		});
+	});
+};
+const build$2 = ({ metric, trailCount }) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce$1({
+	name: metric.name,
+	exec
+})), Match.tag("Each", (exec) => buildEach$1({
+	name: metric.name,
+	exec
+})), Match.tag("All", (exec) => buildAll$1({
+	name: metric.name,
+	exec,
+	trailCount
+})), Match.exhaustive);
+//#endregion
+//#region src/metric/traj/index.ts
+var traj_exports = /* @__PURE__ */ __exportAll$1({
+	all: () => all,
+	build: () => build$1,
+	buildAll: () => buildAll,
+	buildEach: () => buildEach,
+	buildReduce: () => buildReduce,
+	each: () => each,
+	reduce: () => reduce
+});
+const reduce = (name, init, exec) => ({
+	name,
+	exec: {
+		_tag: "Reduce",
+		init,
+		exec
+	}
+});
+const each = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "Each",
+		exec
+	}
+});
+const all = (name, exec) => ({
+	name,
+	exec: {
+		_tag: "All",
+		exec
+	}
+});
+const runExec = (name, exec) => Effect.tryPromise({
+	try: async () => await exec(),
+	catch: MetricError.exec({
+		name,
+		type: "Trajectory"
+	})
+}).pipe(Effect.flatMap((result) => Schema.decodeUnknownEffect(Schema.Json)(result).pipe(Effect.mapError(MetricError.trajExec(name)))));
+const buildReduce = ({ name, exec }) => {
+	const state = { value: exec.init };
+	return Effect.fn(function* ({ task, trajectory, delta }) {
+		if (delta._tag !== "Messages") return null;
+		const rawResult = yield* Effect.tryPromise({
+			try: async () => await exec.exec(state.value, {
+				trajectory,
+				messages: delta.messages
+			}),
+			catch: MetricError.trajExec(name)
+		});
+		const result = yield* Schema.decodeUnknownEffect(Schema.Json)(rawResult).pipe(Effect.mapError(MetricError.trajExec(name)));
+		state.value = rawResult;
+		return TrajOutput.make({
+			name,
+			task: task.metadata,
+			result
+		});
+	});
+};
+const buildEach = ({ name, exec }) => Effect.fn(function* ({ task, trajectory, delta }) {
+	if (delta._tag !== "Messages") return null;
+	const result = yield* runExec(name, () => exec.exec({
+		trajectory,
+		messages: delta.messages
+	}));
+	return TrajOutput.make({
+		name,
+		task: task.metadata,
+		result
+	});
+});
+const buildAll = ({ name, exec }) => {
+	return Effect.fn(function* ({ task, trajectory, delta }) {
+		if (delta._tag !== "Grade") return null;
+		const result = yield* runExec(name, () => exec.exec({ trajectory }));
+		return TrajOutput.make({
+			name,
+			task: task.metadata,
+			result
+		});
+	});
+};
+const build$1 = (metric) => Match.value(metric.exec).pipe(Match.tag("Reduce", (exec) => buildReduce({
+	name: metric.name,
+	exec
+})), Match.tag("Each", (exec) => buildEach({
+	name: metric.name,
+	exec
+})), Match.tag("All", (exec) => buildAll({
+	name: metric.name,
+	exec
+})), Match.exhaustive);
+//#endregion
+//#region src/metric/build.ts
+const init$1 = () => Effect.succeed({
+	trajectory: [],
+	task: [],
+	benchmark: []
+});
+const withTrajReduce = (name, init, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	trajectory: [...metrics.trajectory, reduce(name, init, exec)]
+}));
+const withTrajEach = (name, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	trajectory: [...metrics.trajectory, each(name, exec)]
+}));
+const withTraj = (name, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	trajectory: [...metrics.trajectory, all(name, exec)]
+}));
+const withTaskReduce = (name, init, exec) => (builder) => Effect.map(builder, (metrics) => ({
+	...metrics,
+	task: [...metrics.task, reduce$1(name, init, exec)]
+}));
+const withTaskEach = (name, exec) => (builder) => Effect.map(builder, (metrics) => ({
+	...metrics,
+	task: [...metrics.task, each$1(name, exec)]
+}));
+const withTask = (name, exec) => (builder) => Effect.map(builder, (metrics) => ({
+	...metrics,
+	task: [...metrics.task, all$1(name, exec)]
+}));
+const withBenchReduce = (name, init, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	benchmark: [...metrics.benchmark, reduce$2(name, init, exec)]
+}));
+const withBenchEach = (name, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	benchmark: [...metrics.benchmark, each$2(name, exec)]
+}));
+const withBenchmark$1 = (name, exec) => (build) => Effect.map(build, (metrics) => ({
+	...metrics,
+	benchmark: [...metrics.benchmark, all$2(name, exec)]
+}));
+//#endregion
+//#region src/metric/stream.ts
+const buildTrajMetricConsumer = ({ metrics: metricVariants, queue }) => {
+	const metrics = metricVariants.map(build$1);
+	return Effect.fn(function* (input) {
+		const exec = Effect.fn(function* (output) {
+			if (output === null) return;
+			yield* Queue.offer(queue, output);
+			return output;
+		});
+		const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
+		if (outputs.every((output) => output !== null)) return outputs;
+		return null;
+	});
+};
+const buildTaskMetricConsumer = ({ metrics: metricVariants, trailCount, queue }) => {
+	const metrics = metricVariants.map((metric) => build$2({
+		metric,
+		trailCount
+	}));
+	return Effect.fn(function* (input) {
+		const exec = Effect.fn(function* (output) {
+			if (output === null) return;
+			yield* Queue.offer(queue, output);
+			return output;
+		});
+		const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
+		if (outputs.every((output) => output !== null)) return outputs;
+		return null;
+	});
+};
+const buildBenchMetricConsumer = ({ metrics: metricVariants, taskCount, queue }) => {
+	const metrics = metricVariants.map((metric) => build$3({
+		metric,
+		taskCount
+	}));
+	return Effect.fn(function* (input) {
+		const exec = Effect.fn(function* (output) {
+			if (output === null) return;
+			yield* Queue.offer(queue, output);
+			return output;
+		});
+		const outputs = yield* Effect.forEach(metrics, (metric) => metric(input).pipe(Effect.flatMap(exec)), { concurrency: "unbounded" });
+		if (outputs.every((output) => output !== null)) return outputs;
+		return null;
+	});
+};
+const transform = ({ metrics, trailCount, taskCount }) => Effect.fn(function* (inputStream) {
+	const benchQueue = yield* Queue.bounded(128);
+	const outputQueue = yield* Queue.bounded(128);
+	const consumeTrajMetrics = buildTrajMetricConsumer({
+		metrics: metrics.trajectory,
+		queue: outputQueue
+	});
+	const consumeTaskMetrics = buildTaskMetricConsumer({
+		metrics: metrics.task,
+		trailCount,
+		queue: outputQueue
+	});
+	const consumeBenchMetrics = buildBenchMetricConsumer({
+		metrics: metrics.benchmark,
+		taskCount,
+		queue: outputQueue
+	});
+	const tapTaskMetrics = Effect.fn(function* (input) {
+		const taskOutputs = yield* consumeTaskMetrics(input);
+		if (taskOutputs === null) return;
+		yield* Queue.offer(benchQueue, {
+			task: input.task.metadata.name,
+			input: pipe(taskOutputs.map((output) => [output.name, output.result]), Object.fromEntries)
+		});
+	});
+	const [trajStream, taskStream] = yield* inputStream.pipe(Stream.broadcastN({
+		n: 2,
+		capacity: 128
+	}));
+	const trajRun = trajStream.pipe(Stream.tap(consumeTrajMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(outputQueue))).pipe(Stream.runDrain);
+	const taskRun = taskStream.pipe(Stream.tap(tapTaskMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(benchQueue))).pipe(Stream.runDrain);
+	const benchRun = Stream.fromQueue(benchQueue).pipe(Stream.tap(consumeBenchMetrics, { concurrency: "unbounded" })).pipe(Stream.ensuring(Queue.end(outputQueue))).pipe(Stream.runDrain);
+	yield* Effect.all([
+		trajRun,
+		taskRun,
+		benchRun
+	], { concurrency: "unbounded" });
+	return Stream.fromQueue(outputQueue).pipe(Stream.scoped);
+}, (effect) => effect.pipe(Effect.scoped, Stream.unwrap));
+//#endregion
+//#region src/metric/index.ts
+var metric_exports = /* @__PURE__ */ __exportAll$1({
+	BenchOutput: () => BenchOutput,
+	Benchmark: () => bench_exports,
+	ExecError: () => ExecError$1,
+	Grade: () => Grade,
+	GradeResultSchema: () => GradeResultSchema,
+	Messages: () => Messages,
+	MetricError: () => MetricError,
+	MetricErrorReason: () => MetricErrorReason,
+	OutputSchema: () => OutputSchema,
+	Task: () => task_exports,
+	TaskOutput: () => TaskOutput,
+	Traj: () => traj_exports,
+	TrajOutput: () => TrajOutput,
+	buildBenchMetricConsumer: () => buildBenchMetricConsumer,
+	buildTaskMetricConsumer: () => buildTaskMetricConsumer,
+	buildTrajMetricConsumer: () => buildTrajMetricConsumer,
+	init: () => init$1,
+	transform: () => transform,
+	withBenchEach: () => withBenchEach,
+	withBenchReduce: () => withBenchReduce,
+	withBenchmark: () => withBenchmark$1,
+	withTask: () => withTask,
+	withTaskEach: () => withTaskEach,
+	withTaskReduce: () => withTaskReduce,
+	withTraj: () => withTraj,
+	withTrajEach: () => withTrajEach,
+	withTrajReduce: () => withTrajReduce
+});
+//#endregion
+//#region src/exec/error.ts
+const NonNegativeInt = Schema.Int.check(Schema.isGreaterThanOrEqualTo(0));
+var InitError = class extends Schema.TaggedErrorClass()("InitError", { cause: Schema.Defect() }) {};
+var TaskLoadError = class extends Schema.TaggedErrorClass()("TaskLoadError", { cause: Schema.Defect() }) {};
+var TaskInitError = class extends Schema.TaggedErrorClass()("TaskInitError", {
+	task: MetadataSchema,
+	cause: Schema.Defect()
+}) {};
+var TaskExecError = class extends Schema.TaggedErrorClass()("TaskExecError", {
+	task: MetadataSchema,
+	trailIndex: NonNegativeInt,
+	cause: Schema.Defect()
+}) {};
+var EventTransportInitError = class extends Schema.TaggedErrorClass()("EventTransportInitError", {
+	transport: Schema.String,
+	url: Schema.String,
+	cause: Schema.Defect()
+}) {};
+var EventTransportError = class extends Schema.TaggedErrorClass()("EventTransportError", {
+	transport: Schema.String,
+	cause: Schema.Defect()
+}) {};
+var SnapshotError = class extends Schema.TaggedErrorClass()("SnapshotError", {
+	task: MetadataSchema,
+	snapshot: Sandbox.Snapshot.Snapshot,
+	cause: Schema.Defect()
+}) {};
+const ExecErrorReason = Schema.Union([
+	InitError,
+	TaskLoadError,
+	EventTransportInitError,
+	EventTransportError,
+	SnapshotError,
+	TaskInitError,
+	TaskExecError,
+	MetricError
+]);
+var ExecError = class ExecError extends Schema.TaggedErrorClass()("ExecError", { reason: ExecErrorReason }) {
+	static init = (cause) => new ExecError({ reason: new InitError({ cause }) });
+	static taskLoad = (cause) => new ExecError({ reason: new TaskLoadError({ cause }) });
+	static eventTransportInit = ({ transport, url }) => (cause) => this.make({ reason: EventTransportInitError.make({
+		transport,
+		url,
+		cause
+	}) });
+	static eventTransport = ({ transport }) => (cause) => this.make({ reason: EventTransportError.make({
+		transport,
+		cause
+	}) });
+	static snapshot = ({ task: { metadata, snapshot } }) => (cause) => new ExecError({ reason: new SnapshotError({
+		task: metadata,
+		snapshot,
+		cause
+	}) });
+	static taskInit = ({ task }) => (cause) => new ExecError({ reason: new TaskInitError({
+		task,
+		cause
+	}) });
+	static taskExec = ({ task, trailIndex }) => (cause) => new ExecError({ reason: new TaskExecError({
+		task,
+		trailIndex,
+		cause
+	}) });
+	static metric = (cause) => new ExecError({ reason: cause });
+};
+//#endregion
+//#region src/exec/build.ts
+const init = () => Effect.succeed({
+	benchmark: void 0,
+	harness: void 0
+});
+const withBenchmark = (benchmark) => (builder) => Effect.gen(function* () {
+	const exec = yield* builder;
+	const b = yield* benchmark.pipe(Effect.mapError(ExecError.init));
+	return {
+		...exec,
+		benchmark: b
+	};
+});
+const withHarness = (harness) => (builder) => Effect.gen(function* () {
+	const exec = yield* builder;
+	const h = yield* harness.pipe(Effect.mapError(ExecError.init));
+	return {
+		...exec,
+		harness: h
+	};
+});
+const withTrailCount = (trailCount) => (builder) => Effect.map(builder, (exec) => ({
+	...exec,
+	trailCount
+}));
+const withMetrics = (metrics) => (builder) => Effect.gen(function* () {
+	const exec = yield* builder;
+	const m = yield* metrics.pipe(Effect.mapError(ExecError.init));
+	return {
+		...exec,
+		metrics: m
+	};
+});
+const withTransport = (transport) => (builder) => Effect.gen(function* () {
+	const exec = yield* builder;
+	const t = yield* transport.pipe(Effect.mapError(ExecError.init));
+	return {
+		...exec,
+		transport: t
+	};
+});
+const build = (builder) => Effect.gen(function* () {
+	const { benchmark, harness, transport, metrics, trailCount = 1 } = yield* builder.pipe(Effect.mapError(ExecError.init));
+	assertNonNull(benchmark);
+	assertNonNull(harness);
+	return {
+		benchmark,
+		harness,
+		transport: transport ?? null,
+		metrics: metrics ?? null,
+		trailCount
+	};
+});
+//#endregion
+//#region src/exec/trail.ts
+const runTrail = Effect.fn("exec/runTrail")(function* ({ task, trailIndex, sandbox, metricQueue }) {
+	yield* Effect.annotateCurrentSpan({
+		taskName: task.metadata.name,
+		trailIndex
+	});
+	yield* Effect.logDebug("Starting trail execution");
+	const provider = yield* Agent.ProviderService;
+	const { prompt, graders } = task;
+	const agent = yield* provider.runSession({ sandbox });
+	yield* Effect.logDebug("Started agent session");
+	const stream = yield* agent.prompt({ prompt });
+	yield* Effect.logDebug("Attached prompt stream");
+	const trajLength = yield* Ref.make(0);
+	const tapDelta = Effect.fn("exec/runTrail/tapDelta")(function* () {
+		const trajectory = yield* agent.trajectory();
+		const prevTrajLength = yield* Ref.get(trajLength);
+		const currTrajLength = trajectory.content.length;
+		if (currTrajLength === prevTrajLength) return;
+		const messages = trajectory.content.slice(prevTrajLength, currTrajLength);
+		yield* Ref.set(trajLength, currTrajLength);
+		yield* Queue.offer(metricQueue, {
+			task,
+			trajectory,
+			delta: Messages({ messages })
+		});
+	});
+	yield* stream.pipe(Stream.tap(tapDelta)).pipe(Stream.drain).pipe(Stream.runCollect);
+	const trajectory = yield* agent.trajectory();
+	yield* Effect.logDebug(`Prompt stream completed with ${trajectory.content.length} trajectory message(s)`);
+	const ctx = {
+		trajectory,
+		...Sandbox.asPromise(sandbox)
+	};
+	yield* Effect.logDebug(`Starting graders`);
+	const gradeResults = yield* run$2(graders)(ctx);
+	yield* Effect.logDebug(`Completed graders`);
+	yield* Queue.offer(metricQueue, {
+		task,
+		trajectory,
+		delta: Grade({ result: gradeResults })
+	});
+	yield* Effect.logDebug("Published grade metric delta");
+}, (effect, { task: { metadata }, trailIndex }) => effect.pipe(Effect.annotateLogs({
+	taskName: metadata.name,
+	trailIndex
+}), Effect.mapError(ExecError.taskExec({
+	task: metadata,
+	trailIndex
+}))));
+const createTrail = Effect.fn("exec/createTrail")(function* ({ task, config, metricQueue }) {
+	const { snapshot, context, resources, metadata } = task;
+	yield* Effect.annotateCurrentSpan({ taskName: metadata.name });
+	yield* Effect.logDebug("Preparing derived snapshot");
+	const sandboxProvider = yield* Sandbox.ProviderService;
+	const agentProvider = yield* Agent.ProviderService;
+	const derived = yield* agentProvider.deriveSnapshot({
+		snapshot,
+		context
+	}).pipe(Effect.mapError(ExecError.taskInit({ task: metadata })));
+	yield* Effect.logDebug("Prepared derived snapshot");
+	yield* Effect.addFinalizer(Effect.fn("exec/createTrail/finalizeSnapshot")(function* () {
+		if (!config?.cacheSnapshot) {
+			yield* Effect.logDebug("Removing derived snapshot");
+			yield* sandboxProvider.removeSnapshot({ snapshot: derived }).pipe(Effect.ignore);
+		}
+	}));
+	const nextTrailIndex = yield* Ref.make(0);
+	return Effect.gen(function* () {
+		const trailIndex = yield* Ref.getAndUpdate(nextTrailIndex, (n) => n + 1);
+		yield* Effect.annotateCurrentSpan({
+			taskName: metadata.name,
+			trailIndex
+		});
+		yield* Effect.logDebug("Starting sandbox for trail");
+		const sandbox = yield* sandboxProvider.runSandbox({
+			snapshot: derived,
+			resources
+		}).pipe(Effect.mapError(ExecError.taskExec({
+			task: metadata,
+			trailIndex
+		}))).pipe(Effect.scoped);
+		yield* Effect.logDebug("Sandbox is ready");
+		yield* runTrail({
+			task,
+			trailIndex,
+			sandbox,
+			metricQueue
+		}).pipe(Effect.provideService(Agent.ProviderService, agentProvider));
+	}).pipe(Effect.annotateLogs({ taskName: metadata.name }));
+}, (effect, { task }) => effect.pipe(Effect.annotateLogs({ taskName: task.metadata.name })));
+//#endregion
+//#region src/exec/schedule.ts
+const run$1 = Effect.fn("exec/schedule")(function* ({ executor: { benchmark, harness, trailCount, metrics }, config: { harnessConfig, sandboxConfig } }) {
+	const { tasks, metadata } = benchmark;
+	const { sandbox, agent } = harness;
+	const { snapshotConcurrency = 1, trailConcurrency = 1 } = harnessConfig ?? {};
+	yield* Effect.annotateCurrentSpan({ benchmark: metadata.name });
+	yield* Effect.logDebug("Starting evaluation schedule");
+	const metricQueue = yield* Queue.bounded(128);
+	const snapshotSem = yield* Semaphore.make(snapshotConcurrency);
+	const snapshotCountdown = yield* countdown_exports.make(tasks.length);
+	const trailSem = yield* Semaphore.make(trailConcurrency);
+	const scheduleTrail = Effect.fn("exec/scheduleTrail")(function* ({ task }) {
+		yield* Effect.annotateCurrentSpan({
+			benchmark: metadata.name,
+			taskName: task.metadata.name,
+			trailCount
+		});
+		yield* Effect.logDebug("Preparing task schedule");
+		const trail = yield* createTrail({
+			task,
+			metricQueue,
+			config: sandboxConfig
+		}).pipe((create) => snapshotSem.withPermit(create));
+		yield* Effect.logDebug("Task snapshot is ready");
+		yield* snapshotCountdown.open;
+		yield* Effect.logDebug("Waiting for all task snapshots");
+		yield* snapshotCountdown.await;
+		yield* Effect.logDebug("All task snapshots are ready");
+		const fibers = [];
+		for (const trailIndex of Array.from({ length: trailCount }, (_, index) => index)) {
+			yield* Effect.logDebug(`Forking trail ${trailIndex}`);
+			const fiber = yield* trail.pipe((trail) => trailSem.withPermit(trail)).pipe(Effect.forkScoped);
+			fibers.push(fiber);
+			yield* Effect.yieldNow;
+		}
+		yield* Effect.logDebug("Waiting for task trails");
+		yield* Effect.all(fibers.map((fiber) => Fiber.join(fiber)), { concurrency: "unbounded" });
+		yield* Effect.logDebug("Completed task trails");
+	}, (effect, { task }) => effect.pipe(Effect.annotateLogs({
+		benchmark: metadata.name,
+		taskName: task.metadata.name
+	})).pipe(Effect.provide(agent), Effect.provide(sandbox)).pipe(Effect.mapError(ExecError.taskInit({ task: task.metadata }))));
+	if (metrics) {
+		yield* Effect.logDebug("Starting metrics stream");
+		yield* Stream.fromQueue(metricQueue).pipe(transform({
+			metrics,
+			trailCount,
+			taskCount: tasks.length
+		}), Stream.runDrain, Pull.catchDone(() => Effect.void), Effect.mapError(ExecError.metric));
+	} else yield* Effect.logDebug("Skipping metrics stream");
+	yield* Effect.logDebug("Loading tasks");
+	const loadedTasks = yield* Effect.all(tasks.map((task) => task.pipe(Effect.mapError(ExecError.taskLoad))), { concurrency: "unbounded" });
+	yield* Effect.logDebug(`Loaded ${loadedTasks.length} task(s)`);
+	yield* Effect.all(loadedTasks.map((task) => scheduleTrail({ task })), { concurrency: "unbounded" }).pipe(Effect.scoped);
+	yield* Effect.logDebug("Scheduled all tasks");
+}, (effect, { executor: { benchmark } }) => effect.pipe(Effect.annotateLogs({ benchmark: benchmark.metadata.name })));
+//#endregion
+//#region src/exec/run.ts
+const run = Effect.fn(function* (executor, config = {}) {
+	yield* Effect.map(executor, (exec) => run$1({
+		executor: exec,
+		config
+	}));
+}, (effect) => effect.pipe(Effect.provide(NodeServices.layer), Effect.provide(NodeHttpClient.layerUndici), Effect.scoped));
+const runPromise = async (executor, config) => Effect.runPromise(run(executor, config));
+//#endregion
+//#region src/exec/index.ts
+var exec_exports = /* @__PURE__ */ __exportAll$1({
+	build: () => build,
+	init: () => init,
+	run: () => run,
+	runPromise: () => runPromise,
+	withBenchmark: () => withBenchmark,
+	withHarness: () => withHarness,
+	withMetrics: () => withMetrics,
+	withTrailCount: () => withTrailCount,
+	withTransport: () => withTransport
+});
+//#endregion
+export { withSnapshot as $, harness_exports as A, fromIterable as B, reduce as C, all$2 as D, reduce$1 as E, HarnessError as F, grade_exports as G, withGitRepo as H, InitError$1 as I, init$3 as J, bash as K, task_exports$1 as L, init$2 as M, withAgentProvider as N, each$2 as O, withSandboxProvider as P, withResources as Q, fromArray as R, each as S, each$1 as T, fromDir as U, fromStream as V, ResultSchema as W, withGrader as X, withContext as Y, withPrompt as Z, withTaskReduce as _, init as a, InitError$2 as at, withTrajReduce as b, withMetrics as c, init$1 as d, benchmark_exports as et, withBenchEach as f, withTaskEach as g, withTask as h, build as i, BenchmarkError as it, build$4 as j, reduce$2 as k, withTrailCount as l, withBenchmark$1 as m, run as n, init$4 as nt, withBenchmark as o, withBenchReduce as p, build$5 as q, runPromise as r, withTasks as rt, withHarness as s, exec_exports as t, build$6 as tt, metric_exports as u, withTraj as v, all$1 as w, all as x, withTrajEach as y, fromAsyncIterable as z };