npm - @evalgate/sdk - Versions diffs - 2.2.2 → 2.2.4 - Mend

@evalgate/sdk 2.2.2 → 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/CHANGELOG.md +32 -0
package/README.md +40 -1
package/dist/assertions.d.ts +194 -10
package/dist/assertions.js +525 -73
package/dist/batch.js +4 -4
package/dist/cache.d.ts +5 -1
package/dist/cache.js +5 -1
package/dist/cli/baseline.d.ts +14 -0
package/dist/cli/baseline.js +43 -3
package/dist/cli/check.d.ts +5 -2
package/dist/cli/check.js +20 -12
package/dist/cli/compare.d.ts +80 -0
package/dist/cli/compare.js +266 -0
package/dist/cli/index.js +244 -101
package/dist/cli/regression-gate.js +23 -0
package/dist/cli/run.js +22 -0
package/dist/cli/start.d.ts +26 -0
package/dist/cli/start.js +130 -0
package/dist/cli/templates.d.ts +24 -0
package/dist/cli/templates.js +314 -0
package/dist/cli/traces.d.ts +109 -0
package/dist/cli/traces.js +152 -0
package/dist/cli/upgrade.js +5 -0
package/dist/cli/validate.d.ts +37 -0
package/dist/cli/validate.js +252 -0
package/dist/cli/watch.d.ts +19 -0
package/dist/cli/watch.js +175 -0
package/dist/client.js +6 -13
package/dist/constants.d.ts +2 -0
package/dist/constants.js +5 -0
package/dist/errors.js +7 -0
package/dist/export.js +2 -2
package/dist/index.d.ts +10 -9
package/dist/index.js +24 -7
package/dist/integrations/anthropic.js +6 -6
package/dist/integrations/openai.js +84 -61
package/dist/logger.d.ts +3 -1
package/dist/logger.js +2 -1
package/dist/otel.d.ts +130 -0
package/dist/otel.js +309 -0
package/dist/pagination.d.ts +13 -2
package/dist/pagination.js +28 -2
package/dist/runtime/adapters/testsuite-to-dsl.js +1 -6
package/dist/runtime/eval.d.ts +14 -4
package/dist/runtime/eval.js +127 -2
package/dist/runtime/executor.d.ts +3 -2
package/dist/runtime/executor.js +3 -2
package/dist/runtime/registry.d.ts +8 -3
package/dist/runtime/registry.js +15 -4
package/dist/runtime/run-report.d.ts +1 -1
package/dist/runtime/run-report.js +7 -4
package/dist/runtime/types.d.ts +38 -0
package/dist/snapshot.d.ts +12 -0
package/dist/snapshot.js +24 -1
package/dist/testing.d.ts +8 -0
package/dist/testing.js +45 -10
package/dist/version.d.ts +2 -2
package/dist/version.js +2 -2
package/dist/workflows.d.ts +2 -0
package/dist/workflows.js +184 -102
package/package.json +8 -1

package/dist/otel.js ADDED Viewed

@@ -0,0 +1,309 @@
+"use strict";
+/**
+ * OpenTelemetry Export for WorkflowTracer
+ *
+ * Converts WorkflowTracer spans, decisions, and costs into
+ * OpenTelemetry-compatible span data for export to any OTEL collector.
+ *
+ * Usage:
+ *   import { OTelExporter } from "@evalgate/sdk/otel";
+ *
+ *   const exporter = new OTelExporter({ endpoint: "http://localhost:4318" });
+ *   const tracer = new WorkflowTracer(client, { debug: true });
+ *   // ... run workflow ...
+ *   await exporter.exportFromTracer(tracer);
+ */
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.OTelExporter = void 0;
+exports.createOTelExporter = createOTelExporter;
+/**
+ * Generate a random 16-byte hex trace ID
+ */
+function generateTraceId() {
+    const bytes = new Uint8Array(16);
+    for (let i = 0; i < 16; i++) {
+        bytes[i] = Math.floor(Math.random() * 256);
+    }
+    return Array.from(bytes)
+        .map((b) => b.toString(16).padStart(2, "0"))
+        .join("");
+}
+/**
+ * Generate a random 8-byte hex span ID
+ */
+function generateSpanId() {
+    const bytes = new Uint8Array(8);
+    for (let i = 0; i < 8; i++) {
+        bytes[i] = Math.floor(Math.random() * 256);
+    }
+    return Array.from(bytes)
+        .map((b) => b.toString(16).padStart(2, "0"))
+        .join("");
+}
+/**
+ * Convert milliseconds to nanosecond string
+ */
+function msToNano(ms) {
+    return `${BigInt(ms) * BigInt(1000000)}`;
+}
+/**
+ * Create an OTEL attribute
+ */
+function attr(key, value) {
+    if (typeof value === "string") {
+        return { key, value: { stringValue: value } };
+    }
+    if (typeof value === "number") {
+        if (Number.isInteger(value)) {
+            return { key, value: { intValue: String(value) } };
+        }
+        return { key, value: { doubleValue: value } };
+    }
+    return { key, value: { boolValue: value } };
+}
+/**
+ * OpenTelemetry Exporter for EvalGate WorkflowTracer
+ */
+class OTelExporter {
+    constructor(options = {}) {
+        this.options = {
+            endpoint: options.endpoint ?? "http://localhost:4318/v1/traces",
+            serviceName: options.serviceName ?? "evalgate",
+            resourceAttributes: options.resourceAttributes ?? {},
+            sdkVersion: options.sdkVersion ?? "2.2.4",
+            headers: options.headers ?? {},
+        };
+    }
+    /**
+     * Export workflow data from a WorkflowTracer instance
+     */
+    exportFromTracer(tracer) {
+        const workflow = tracer.getCurrentWorkflow();
+        const handoffs = tracer.getHandoffs();
+        const decisions = tracer.getDecisions();
+        const costs = tracer.getCosts();
+        const traceId = generateTraceId();
+        const rootSpanId = generateSpanId();
+        const now = Date.now();
+        const spans = [];
+        // Root workflow span
+        if (workflow) {
+            spans.push({
+                traceId,
+                spanId: rootSpanId,
+                name: `workflow.${workflow.name}`,
+                kind: 1,
+                startTimeUnixNano: msToNano(new Date(workflow.startedAt).getTime()),
+                endTimeUnixNano: msToNano(now),
+                attributes: [
+                    attr("evalgate.workflow.name", workflow.name),
+                    attr("evalgate.workflow.id", workflow.id),
+                    attr("evalgate.workflow.trace_id", workflow.traceId),
+                ],
+                status: { code: 1 },
+                events: [],
+            });
+        }
+        // Decision spans
+        for (let i = 0; i < decisions.length; i++) {
+            const decision = decisions[i];
+            const spanId = generateSpanId();
+            spans.push(this.decisionToSpan(traceId, spanId, rootSpanId, decision, now - decisions.length + i));
+        }
+        // Handoff events
+        for (let i = 0; i < handoffs.length; i++) {
+            const handoff = handoffs[i];
+            const spanId = generateSpanId();
+            spans.push(this.handoffToSpan(traceId, spanId, rootSpanId, handoff));
+        }
+        // Cost spans
+        for (let i = 0; i < costs.length; i++) {
+            const cost = costs[i];
+            const spanId = generateSpanId();
+            spans.push(this.costToSpan(traceId, spanId, rootSpanId, cost, now - costs.length + i));
+        }
+        return this.buildPayload(spans);
+    }
+    /**
+     * Export a run result as OTEL spans
+     */
+    exportRunResult(runResult) {
+        const traceId = generateTraceId();
+        const rootSpanId = generateSpanId();
+        const spans = [];
+        // Root run span
+        spans.push({
+            traceId,
+            spanId: rootSpanId,
+            name: `evalgate.run.${runResult.runId}`,
+            kind: 1,
+            startTimeUnixNano: msToNano(runResult.metadata.startedAt),
+            endTimeUnixNano: msToNano(runResult.metadata.completedAt),
+            attributes: [
+                attr("evalgate.run.id", runResult.runId),
+                attr("evalgate.run.mode", runResult.metadata.mode),
+                attr("evalgate.run.duration_ms", runResult.metadata.duration),
+                attr("evalgate.run.pass_rate", runResult.summary.passRate),
+                attr("evalgate.run.passed", runResult.summary.passed),
+                attr("evalgate.run.failed", runResult.summary.failed),
+            ],
+            status: {
+                code: runResult.summary.failed > 0 ? 2 : 1,
+            },
+            events: [],
+        });
+        // Per-spec child spans
+        let offset = 0;
+        for (const spec of runResult.results) {
+            const spanId = generateSpanId();
+            const specStart = runResult.metadata.startedAt + offset;
+            const specEnd = specStart + spec.result.duration;
+            offset += spec.result.duration;
+            const attributes = [
+                attr("evalgate.spec.id", spec.specId),
+                attr("evalgate.spec.name", spec.name),
+                attr("evalgate.spec.file", spec.filePath),
+                attr("evalgate.spec.status", spec.result.status),
+                attr("evalgate.spec.duration_ms", spec.result.duration),
+            ];
+            if (spec.result.score !== undefined) {
+                attributes.push(attr("evalgate.spec.score", spec.result.score));
+            }
+            spans.push({
+                traceId,
+                spanId,
+                parentSpanId: rootSpanId,
+                name: `evalgate.spec.${spec.name}`,
+                kind: 1,
+                startTimeUnixNano: msToNano(specStart),
+                endTimeUnixNano: msToNano(specEnd),
+                attributes,
+                status: {
+                    code: spec.result.status === "passed" ? 1 : 2,
+                    message: spec.result.error,
+                },
+                events: [],
+            });
+        }
+        return this.buildPayload(spans);
+    }
+    /**
+     * Send payload to OTEL collector via HTTP
+     */
+    async send(payload) {
+        try {
+            const response = await fetch(this.options.endpoint, {
+                method: "POST",
+                headers: {
+                    "Content-Type": "application/json",
+                    ...this.options.headers,
+                },
+                body: JSON.stringify(payload),
+            });
+            return response.ok;
+        }
+        catch (err) {
+            console.warn(`[OTelExporter] Failed to send: ${err instanceof Error ? err.message : String(err)}`);
+            return false;
+        }
+    }
+    decisionToSpan(traceId, spanId, parentSpanId, decision, timestampMs) {
+        return {
+            traceId,
+            spanId,
+            parentSpanId,
+            name: `decision.${decision.agent}.${decision.chosen}`,
+            kind: 1,
+            startTimeUnixNano: msToNano(timestampMs),
+            endTimeUnixNano: msToNano(timestampMs + 1),
+            attributes: [
+                attr("evalgate.decision.agent", decision.agent),
+                attr("evalgate.decision.type", decision.type),
+                attr("evalgate.decision.chosen", decision.chosen),
+                attr("evalgate.decision.alternatives", decision.alternatives.length),
+                ...(decision.confidence !== undefined
+                    ? [attr("evalgate.decision.confidence", decision.confidence)]
+                    : []),
+                ...(decision.reasoning
+                    ? [attr("evalgate.decision.reasoning", decision.reasoning)]
+                    : []),
+            ],
+            status: { code: 1 },
+            events: [],
+        };
+    }
+    handoffToSpan(traceId, spanId, parentSpanId, handoff) {
+        const ts = new Date(handoff.timestamp).getTime();
+        return {
+            traceId,
+            spanId,
+            parentSpanId,
+            name: `handoff.${handoff.fromAgent ?? "start"}.${handoff.toAgent}`,
+            kind: 1,
+            startTimeUnixNano: msToNano(ts),
+            endTimeUnixNano: msToNano(ts + 1),
+            attributes: [
+                attr("evalgate.handoff.from", handoff.fromAgent ?? "start"),
+                attr("evalgate.handoff.to", handoff.toAgent),
+                attr("evalgate.handoff.type", handoff.handoffType),
+            ],
+            status: { code: 1 },
+            events: [],
+        };
+    }
+    costToSpan(traceId, spanId, parentSpanId, cost, timestampMs) {
+        return {
+            traceId,
+            spanId,
+            parentSpanId,
+            name: `cost.${cost.provider}.${cost.model}`,
+            kind: 1,
+            startTimeUnixNano: msToNano(timestampMs),
+            endTimeUnixNano: msToNano(timestampMs + 1),
+            attributes: [
+                attr("evalgate.cost.provider", cost.provider),
+                attr("evalgate.cost.model", cost.model),
+                attr("evalgate.cost.input_tokens", cost.inputTokens),
+                attr("evalgate.cost.output_tokens", cost.outputTokens),
+                attr("evalgate.cost.total_tokens", cost.totalTokens),
+                attr("evalgate.cost.total_usd", cost.totalCost),
+            ],
+            status: { code: 1 },
+            events: [],
+        };
+    }
+    buildPayload(spans) {
+        const resourceAttrs = [
+            attr("service.name", this.options.serviceName),
+            attr("telemetry.sdk.name", "evalgate"),
+            attr("telemetry.sdk.version", this.options.sdkVersion),
+            attr("telemetry.sdk.language", "nodejs"),
+        ];
+        for (const [key, value] of Object.entries(this.options.resourceAttributes)) {
+            resourceAttrs.push(attr(key, value));
+        }
+        return {
+            resourceSpans: [
+                {
+                    resource: { attributes: resourceAttrs },
+                    scopeSpans: [
+                        {
+                            scope: {
+                                name: "evalgate",
+                                version: this.options.sdkVersion,
+                            },
+                            spans,
+                        },
+                    ],
+                },
+            ],
+        };
+    }
+}
+exports.OTelExporter = OTelExporter;
+/**
+ * Convenience factory
+ */
+function createOTelExporter(options) {
+    return new OTelExporter(options);
+}

package/dist/pagination.d.ts CHANGED Viewed

@@ -50,9 +50,20 @@ export declare function createPaginatedIterator<T>(fetchFn: (offset: number, lim
     hasMore: boolean;
 }>, limit?: number): PaginatedIterator<T>;
 /**
- * Auto-paginate helper that fetches all pages automatically
+ * Auto-paginate helper that fetches all pages and returns a flat array.
+ * @example
+ * ```typescript
+ * const allItems = await autoPaginate(
+ *   (offset, limit) => client.traces.list({ offset, limit }),
+ * );
+ * ```
  */
-export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
+export declare function autoPaginate<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): Promise<T[]>;
+/**
+ * Streaming auto-paginate generator — yields individual items one at a time.
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
+ */
+export declare function autoPaginateGenerator<T>(fetchFn: (offset: number, limit: number) => Promise<T[]>, limit?: number): AsyncGenerator<T, void, unknown>;
 /**
  * Encode cursor for pagination (base64)
  */

package/dist/pagination.js CHANGED Viewed

@@ -6,6 +6,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.PaginatedIterator = void 0;
 exports.createPaginatedIterator = createPaginatedIterator;
 exports.autoPaginate = autoPaginate;
+exports.autoPaginateGenerator = autoPaginateGenerator;
 exports.encodeCursor = encodeCursor;
 exports.decodeCursor = decodeCursor;
 exports.createPaginationMeta = createPaginationMeta;
@@ -56,9 +57,34 @@ function createPaginatedIterator(fetchFn, limit = 50) {
     return new PaginatedIterator(fetchFn, limit);
 }
 /**
- * Auto-paginate helper that fetches all pages automatically
+ * Auto-paginate helper that fetches all pages and returns a flat array.
+ * @example
+ * ```typescript
+ * const allItems = await autoPaginate(
+ *   (offset, limit) => client.traces.list({ offset, limit }),
+ * );
+ * ```
  */
-async function* autoPaginate(fetchFn, limit = 50) {
+async function autoPaginate(fetchFn, limit = 50) {
+    const result = [];
+    let offset = 0;
+    let hasMore = true;
+    while (hasMore) {
+        const items = await fetchFn(offset, limit);
+        if (items.length === 0) {
+            break;
+        }
+        result.push(...items);
+        hasMore = items.length === limit;
+        offset += limit;
+    }
+    return result;
+}
+/**
+ * Streaming auto-paginate generator — yields individual items one at a time.
+ * Use this when you want to process items as they arrive rather than waiting for all pages.
+ */
+async function* autoPaginateGenerator(fetchFn, limit = 50) {
     let offset = 0;
     let hasMore = true;
     while (hasMore) {

package/dist/runtime/adapters/testsuite-to-dsl.js CHANGED Viewed

@@ -208,12 +208,7 @@ function generateDefineEvalCode(suite, options = {}) {
     });
     const helperFunctions = generateHelperFunctionsForSuite(specs, options);
     const evaluationFunction = generateEvaluationFunction();
-    return [
-        ...imports,
-        ...helperFunctions,
-        ...evaluationFunction,
-        ...specCode,
-    ].join("\n");
+    return [...imports, helperFunctions, evaluationFunction, ...specCode].join("\n");
 }
 /**
  * Generate helper functions for a specific spec

package/dist/runtime/eval.d.ts CHANGED Viewed

@@ -4,12 +4,19 @@
  * The core DSL function for defining behavioral specifications.
  * Uses content-addressable identity with AST position for stability.
  */
-import type { DefineEvalFunction, EvalContext, EvalResult } from "./types";
+import { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, withRuntime } from "./registry";
+import type { DefineEvalFunction, EvalContext, EvalResult, EvalSpec } from "./types";
 /**
  * Export the defineEval function with proper typing
  * This is the main DSL entry point
  */
 export declare const defineEval: DefineEvalFunction;
+/**
+ * Filter a list of specs according to skip/only semantics:
+ * - If any spec has mode === "only", return only those specs
+ * - Otherwise, return all specs except those with mode === "skip"
+ */
+export declare function getFilteredSpecs(specs: EvalSpec[]): EvalSpec[];
 /**
  * Convenience export for evalai.test() alias (backward compatibility)
  * Provides alternative naming that matches the original roadmap vision
@@ -48,8 +55,11 @@ export declare function createResult(config: {
     assertions?: EvalResult["assertions"];
     metadata?: Record<string, unknown>;
     error?: string;
+    output?: string;
+    durationMs?: number;
+    tokens?: number;
 }): EvalResult;
-/**
- * Default export for convenience
- */
+export { createEvalRuntime, disposeActiveRuntime, getActiveRuntime, setActiveRuntime, withRuntime, };
+export { createContext as createEvalContext };
+export { createLocalExecutor } from "./executor";
 export default defineEval;

package/dist/runtime/eval.js CHANGED Viewed

@@ -39,13 +39,21 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.evalai = exports.defineEval = void 0;
+exports.createLocalExecutor = exports.withRuntime = exports.setActiveRuntime = exports.getActiveRuntime = exports.disposeActiveRuntime = exports.createEvalRuntime = exports.evalai = exports.defineEval = void 0;
+exports.getFilteredSpecs = getFilteredSpecs;
 exports.defineSuite = defineSuite;
 exports.createContext = createContext;
+exports.createEvalContext = createContext;
 exports.createResult = createResult;
 const crypto = __importStar(require("node:crypto"));
+const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const registry_1 = require("./registry");
+Object.defineProperty(exports, "createEvalRuntime", { enumerable: true, get: function () { return registry_1.createEvalRuntime; } });
+Object.defineProperty(exports, "disposeActiveRuntime", { enumerable: true, get: function () { return registry_1.disposeActiveRuntime; } });
+Object.defineProperty(exports, "getActiveRuntime", { enumerable: true, get: function () { return registry_1.getActiveRuntime; } });
+Object.defineProperty(exports, "setActiveRuntime", { enumerable: true, get: function () { return registry_1.setActiveRuntime; } });
+Object.defineProperty(exports, "withRuntime", { enumerable: true, get: function () { return registry_1.withRuntime; } });
 const types_1 = require("./types");
 /**
  * Extract AST position from call stack
@@ -159,7 +167,7 @@ function createSpecConfig(nameOrConfig, executor, options) {
 /**
  * Core defineEval function implementation
  */
-function defineEvalImpl(nameOrConfig, executor, options) {
+function defineEvalWithMode(mode, nameOrConfig, executor, options) {
     // Get caller position for identity
     const callerPosition = getCallerPosition();
     // Create specification configuration
@@ -187,15 +195,124 @@ function defineEvalImpl(nameOrConfig, executor, options) {
             budget: config.budget,
             model: config.model,
         },
+        mode,
     };
     // Register specification
     runtime.register(spec);
 }
+function defineEvalImpl(nameOrConfig, executor, options) {
+    defineEvalWithMode("normal", nameOrConfig, executor, options);
+}
+function defineEvalSkipImpl(nameOrConfig, executor, options) {
+    defineEvalWithMode("skip", nameOrConfig, executor, options);
+}
+function defineEvalOnlyImpl(nameOrConfig, executor, options) {
+    defineEvalWithMode("only", nameOrConfig, executor, options);
+}
 /**
  * Export the defineEval function with proper typing
  * This is the main DSL entry point
  */
 exports.defineEval = defineEvalImpl;
+// Attach .skip and .only modifiers (vitest/jest convention)
+exports.defineEval.skip = defineEvalSkipImpl;
+exports.defineEval.only = defineEvalOnlyImpl;
+/**
+ * Parse a JSONL file into an array of row objects.
+ * Each line must be a valid JSON object; blank lines are skipped.
+ */
+function parseJsonl(content) {
+    return content
+        .split("\n")
+        .map((line) => line.trim())
+        .filter((line) => line.length > 0)
+        .map((line, i) => {
+        try {
+            return JSON.parse(line);
+        }
+        catch {
+            throw new types_1.SpecRegistrationError(`Invalid JSON on line ${i + 1} of dataset`);
+        }
+    });
+}
+/**
+ * Parse a simple CSV file into an array of row objects.
+ * First line is treated as headers. Values are unquoted strings.
+ * For complex CSV (quoted fields, escapes), use a dedicated library.
+ */
+function parseCsv(content) {
+    const lines = content
+        .split("\n")
+        .map((l) => l.trim())
+        .filter((l) => l.length > 0);
+    if (lines.length < 2)
+        return [];
+    const headers = lines[0].split(",").map((h) => h.trim());
+    return lines.slice(1).map((line) => {
+        const values = line.split(",").map((v) => v.trim());
+        const row = {};
+        for (let i = 0; i < headers.length; i++) {
+            row[headers[i]] = values[i] ?? "";
+        }
+        return row;
+    });
+}
+/**
+ * Load a JSONL or CSV dataset and register one spec per row.
+ */
+function fromDatasetImpl(name, datasetPath, executor, options) {
+    const resolvedPath = path.isAbsolute(datasetPath)
+        ? datasetPath
+        : path.resolve(process.cwd(), datasetPath);
+    if (!fs.existsSync(resolvedPath)) {
+        throw new types_1.SpecRegistrationError(`Dataset file not found: ${resolvedPath}`);
+    }
+    const content = fs.readFileSync(resolvedPath, "utf8");
+    const ext = path.extname(resolvedPath).toLowerCase();
+    let rows;
+    if (ext === ".jsonl" || ext === ".ndjson") {
+        rows = parseJsonl(content);
+    }
+    else if (ext === ".csv") {
+        rows = parseCsv(content);
+    }
+    else if (ext === ".json") {
+        const parsed = JSON.parse(content);
+        rows = Array.isArray(parsed) ? parsed : [parsed];
+    }
+    else {
+        throw new types_1.SpecRegistrationError(`Unsupported dataset format: ${ext}. Use .jsonl, .ndjson, .csv, or .json`);
+    }
+    if (rows.length === 0) {
+        throw new types_1.SpecRegistrationError(`Dataset is empty: ${resolvedPath}`);
+    }
+    for (let i = 0; i < rows.length; i++) {
+        const row = rows[i];
+        const specName = `${name} - row ${i + 1}`;
+        const wrappedExecutor = (context) => executor({ ...context, input: row });
+        defineEvalWithMode("normal", specName, wrappedExecutor, {
+            ...options,
+            metadata: {
+                ...options?.metadata,
+                datasetPath: resolvedPath,
+                datasetRow: i + 1,
+            },
+        });
+    }
+}
+exports.defineEval.fromDataset = fromDatasetImpl;
+/**
+ * Filter a list of specs according to skip/only semantics:
+ * - If any spec has mode === "only", return only those specs
+ * - Otherwise, return all specs except those with mode === "skip"
+ */
+function getFilteredSpecs(specs) {
+    const onlySpecs = specs.filter((s) => s.mode === "only");
+    if (onlySpecs.length > 0) {
+        return onlySpecs;
+    }
+    return specs.filter((s) => s.mode !== "skip");
+}
 /**
  * Convenience export for evalai.test() alias (backward compatibility)
  * Provides alternative naming that matches the original roadmap vision
@@ -245,9 +362,17 @@ function createResult(config) {
         assertions: config.assertions,
         metadata: config.metadata,
         error: config.error,
+        output: config.output,
+        durationMs: config.durationMs,
+        tokens: config.tokens,
     };
 }
 /**
  * Default export for convenience
  */
+// Register defineEval with registry to break circular dependency
+(0, registry_1._registerDefineEval)(exports.defineEval);
+// Re-export createLocalExecutor from executor.ts
+var executor_1 = require("./executor");
+Object.defineProperty(exports, "createLocalExecutor", { enumerable: true, get: function () { return executor_1.createLocalExecutor; } });
 exports.default = exports.defineEval;

package/dist/runtime/executor.d.ts CHANGED Viewed

@@ -10,7 +10,8 @@ import type { LocalExecutor } from "./types";
  */
 export declare function createLocalExecutor(): LocalExecutor;
 /**
- * Default local executor instance
+ * Default local executor factory
+ * Call as defaultLocalExecutor() to get a new executor instance.
  * For convenience in simple use cases
  */
-export declare const defaultLocalExecutor: LocalExecutor;
+export declare const defaultLocalExecutor: typeof createLocalExecutor;

package/dist/runtime/executor.js CHANGED Viewed

@@ -146,7 +146,8 @@ function createLocalExecutor() {
     return new LocalExecutorImpl();
 }
 /**
- * Default local executor instance
+ * Default local executor factory
+ * Call as defaultLocalExecutor() to get a new executor instance.
  * For convenience in simple use cases
  */
-exports.defaultLocalExecutor = createLocalExecutor();
+exports.defaultLocalExecutor = createLocalExecutor;

package/dist/runtime/registry.d.ts CHANGED Viewed

@@ -4,7 +4,9 @@
  * Scoped registry with proper lifecycle management.
  * Prevents cross-run contamination and memory leaks.
  */
-import type { EvalRuntime } from "./types";
+import type { DefineEvalFunction, EvalRuntime } from "./types";
+/** @internal Called by eval.ts to register defineEval without circular import */
+export declare function _registerDefineEval(fn: (...args: unknown[]) => unknown): void;
 /**
  * Runtime interface with lifecycle management
  * Ensures proper cleanup and prevents resource leaks
@@ -13,7 +15,7 @@ export interface RuntimeHandle {
     /** Runtime instance */
     runtime: EvalRuntime;
     /** defineEval function bound to this runtime */
-    defineEval: typeof import("./eval").defineEval;
+    defineEval: DefineEvalFunction;
     /** Dispose runtime and clean up resources */
     dispose(): void;
     /** Create runtime snapshot for persistence */
@@ -61,7 +63,10 @@ export interface SerializedSpec {
  * Create a new scoped runtime with lifecycle management
  * Returns a handle for proper resource management
  */
-export declare function createEvalRuntime(projectRoot?: string): RuntimeHandle;
+export declare function createEvalRuntime(projectRootOrConfig?: string | {
+    name?: string;
+    projectRoot?: string;
+}): RuntimeHandle;
 /**
  * Helper function for safe runtime execution with automatic cleanup
  * Ensures runtime is disposed even if an exception is thrown