npm - @sebastiantuyu/agest - Versions diffs - 0.3.2 → 0.3.3-next.10 - Mend

@sebastiantuyu/agest 0.3.2 → 0.3.3-next.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/README.md +158 -1
package/dist/adapters/index.d.ts +2 -0
package/dist/adapters/index.js +1 -0
package/dist/adapters/langchain.d.ts +1 -1
package/dist/adapters/langchain.js +80 -11
package/dist/adapters/remote.d.ts +1 -1
package/dist/adapters/remote.js +3 -2
package/dist/adapters/tracing.d.ts +73 -0
package/dist/adapters/tracing.js +338 -0
package/dist/assertions.d.ts +57 -2
package/dist/assertions.js +119 -33
package/dist/cli.d.ts +15 -1
package/dist/cli.js +97 -18
package/dist/config.d.ts +9 -0
package/dist/context.d.ts +32 -11
package/dist/context.js +84 -10
package/dist/discover.d.ts +16 -0
package/dist/discover.js +62 -0
package/dist/index.d.ts +20 -2
package/dist/index.js +10 -3
package/dist/match.d.ts +28 -0
package/dist/match.js +57 -0
package/dist/preview.js +93 -0
package/dist/pricing/index.d.ts +32 -0
package/dist/pricing/index.js +48 -0
package/dist/pricing/models.json +21 -0
package/dist/reporter.d.ts +1 -1
package/dist/reporter.js +77 -4
package/dist/reports.d.ts +37 -0
package/dist/reports.js +126 -0
package/dist/resolve.d.ts +25 -0
package/dist/resolve.js +62 -0
package/dist/runner.d.ts +11 -2
package/dist/runner.js +97 -11
package/dist/schema.d.ts +63 -0
package/dist/schema.js +61 -0
package/dist/types.d.ts +84 -9
package/dist/waterfall.d.ts +11 -0
package/dist/waterfall.js +46 -0
package/package.json +24 -15

package/dist/cli.js CHANGED Viewed

@@ -1,43 +1,122 @@
 #!/usr/bin/env node
 import { spawn } from "child_process";
+import { fileURLToPath } from "node:url";
+import { realpathSync } from "node:fs";
 import { main as stats } from "./stats.js";
 import { main as preview } from "./preview.js";
-const command = process.argv[2];
-async function run() {
-    const files = process.argv.slice(3);
+import { DEFAULT_PATTERN, discoverTestFiles } from "./discover.js";
+/**
+ * Extract the args that follow the command word from a full `process.argv`.
+ * `argv = [execPath, scriptPath, command, ...commandArgs]`, so the command's
+ * args always start at index 3. Capturing them here (once, from the original
+ * argv) avoids re-slicing a mutated argv downstream — the double-shift that
+ * silently dropped a lone `run` target and made discovery scan the whole cwd.
+ */
+export function getCommandArgs(argv) {
+    return argv.slice(3);
+}
+export function parseRunArgs(args) {
+    const targets = [];
+    let pattern;
+    let full = false;
+    for (let i = 0; i < args.length; i++) {
+        const a = args[i];
+        if (a === "--pattern" || a === "-p") {
+            pattern = args[++i];
+            if (pattern === undefined) {
+                console.error("  Error: --pattern requires a value");
+                process.exit(1);
+            }
+        }
+        else if (a.startsWith("--pattern=")) {
+            pattern = a.slice("--pattern=".length);
+        }
+        else if (a === "--full") {
+            full = true;
+        }
+        else {
+            targets.push(a);
+        }
+    }
+    return { pattern, targets, full };
+}
+async function run(args) {
+    const { pattern, targets, full } = parseRunArgs(args);
+    const files = await discoverTestFiles(targets, { pattern });
     if (files.length === 0) {
-        console.error("  Usage: agest run <file...>");
+        const effective = pattern ?? DEFAULT_PATTERN;
+        console.error(`  No test files found (pattern: ${effective})`);
         process.exit(1);
     }
     for (const file of files) {
         const child = spawn("npx", ["tsx", file], {
             stdio: "inherit",
             shell: true,
+            // The test file renders its own output in a child process; propagate the
+            // --full flag through the environment so it knows to emit the waterfall
+            // and full report rather than just per-scene results.
+            env: full ? { ...process.env, AGEST_FULL: "1" } : process.env,
         });
         const code = await new Promise((resolve) => child.on("close", (c) => resolve(c ?? 1)));
         if (code !== 0)
             process.exit(code);
     }
 }
-const commands = {
-    stats,
-    preview,
-    run,
-};
-if (!command || !commands[command]) {
+function printUsage() {
     console.log(`
   Usage: agest <command>
   Commands:
-    run        Run test file(s)    agest run tests/*.test.ts
+    run        Run test file(s), directories, or glob patterns
+               agest run tests/                       # walks for ${DEFAULT_PATTERN}
+               agest run src/agest --pattern "**/*.test.ts"
+               agest run "tests/**/*.agest.ts" path/to/file.agest.ts
+               agest run tests/ --full                # also print waterfall + full report
     stats      Show aggregated test statistics
     preview    Generate an HTML report preview
 `);
-    process.exit(command ? 1 : 0);
 }
-// Forward remaining args so subcommands see them at process.argv[2+]
-process.argv = [process.argv[0], process.argv[1], ...process.argv.slice(3)];
-commands[command]().catch((err) => {
-    console.error("Error:", err.message);
-    process.exit(1);
-});
+const KNOWN_COMMANDS = new Set(["run", "stats", "preview"]);
+export async function main(argv) {
+    const command = argv[2];
+    const commandArgs = getCommandArgs(argv);
+    if (!command || !KNOWN_COMMANDS.has(command)) {
+        printUsage();
+        process.exit(command ? 1 : 0);
+    }
+    if (command === "run") {
+        await run(commandArgs);
+        return;
+    }
+    // stats/preview read their args from `process.argv.slice(2)`, so normalize
+    // argv to drop the command word before handing off.
+    process.argv = [argv[0], argv[1], ...commandArgs];
+    if (command === "stats")
+        await stats();
+    else
+        await preview();
+}
+// Only run as a CLI when invoked directly (bin or `tsx src/cli.ts`), not when
+// imported by a test — that keeps `main` from firing (and calling
+// process.exit) on import. Package managers expose the bin as a symlink
+// (node_modules/.bin/agest), so argv[1] is the symlink path while
+// import.meta.url is the real file; realpath both sides before comparing or
+// the CLI silently no-ops when invoked through the symlink.
+function isInvokedAsCli() {
+    const entry = process.argv[1];
+    if (!entry)
+        return false;
+    const self = fileURLToPath(import.meta.url);
+    try {
+        return realpathSync(entry) === realpathSync(self);
+    }
+    catch {
+        return entry === self;
+    }
+}
+if (isInvokedAsCli()) {
+    main(process.argv).catch((err) => {
+        console.error("Error:", err.message);
+        process.exit(1);
+    });
+}

package/dist/config.d.ts CHANGED Viewed

@@ -15,6 +15,15 @@ export interface AgestConfig {
     turns?: number;
     runs?: number;
     judge?: JudgeConfig;
+    /**
+     * Per-model pricing override (USD per 1M tokens). Merged on top of the
+     * built-in `src/pricing/models.json` table. Provide entries for any model
+     * you use that isn't already in the table, or to override a default.
+     */
+    pricing?: Record<string, {
+        input: number;
+        output: number;
+    }>;
 }
 export declare function defineConfig(config: AgestConfig): AgestConfig;
 export declare function loadConfig(): Promise<AgestConfig>;

package/dist/context.d.ts CHANGED Viewed

@@ -1,36 +1,57 @@
 import type { AgentExecutor, AgentReport, HookFn, SceneDefinition } from "./types";
-export declare class SceneBuilder {
+import type { StandardSchemaV1 } from "./schema";
+/**
+ * Builds a scene. Generic over `T`, the agent's native value type, so the
+ * known fields hand a typed value to the assertion callback:
+ *   - `"value"` / `"response"` → `T`
+ *   - `"text"`                 → `string`
+ *   - `"refusal"`              → `boolean | undefined`
+ *   - any dot-path / other     → `any` (a string field can't be typed)
+ * `T` flows in from a schema-typed `agent()` via the scene fn passed to its
+ * callback. The free `scene()` import stays `SceneBuilder<string>`.
+ */
+export declare class SceneBuilder<T = string> {
     private _prompt;
     private _assertions;
     private _timeout?;
     private _turns?;
     private _runs?;
     private _suite?;
+    private _schema?;
     constructor(_prompt: string);
-    timeout(ms: number): SceneBuilder;
-    turns(n: number): SceneBuilder;
-    runs(n: number): SceneBuilder;
+    timeout(ms: number): this;
+    turns(n: number): this;
+    runs(n: number): this;
     /** @internal */
     _setSuite(name: string): void;
-    expect(field: string, fn: (value: any) => void): SceneBuilder;
+    expect(field: "value" | "response", fn: (value: T) => void): this;
+    expect(field: "text", fn: (value: string) => void): this;
+    expect(field: "refusal", fn: (value: boolean | undefined) => void): this;
+    expect(field: string, fn: (value: any) => void): this;
+    /**
+     * Validate this scene's native value against a Standard Schema before user
+     * assertions run. Overrides any schema declared on the agent.
+     */
+    expectSchema(schema: StandardSchemaV1): this;
     toDefinition(): SceneDefinition;
 }
-export declare class AgentContext {
+export declare class AgentContext<T = string> {
     private _executor;
     private _name?;
+    private _schema?;
     private _scenes;
     private _currentSuite?;
     private _beforeAllHooks;
     private _afterAllHooks;
     private _beforeEachHooks;
     private _afterEachHooks;
-    constructor(_executor: AgentExecutor, _name?: string | undefined);
+    constructor(_executor: AgentExecutor<T>, _name?: string | undefined, _schema?: StandardSchemaV1 | undefined);
     registerHook(type: "beforeAll" | "afterAll" | "beforeEach" | "afterEach", fn: HookFn): void;
     setSuite(name: string): void;
     clearSuite(): void;
-    registerScene(prompt: string): SceneBuilder;
-    execute(): Promise<AgentReport>;
+    registerScene(prompt: string): SceneBuilder<T>;
+    execute(): Promise<AgentReport<T>>;
 }
 export declare function hashPromptOnly(prompt: string): string;
-export declare function setContext(ctx: AgentContext | null): void;
-export declare function getContext(): AgentContext;
+export declare function setContext(ctx: AgentContext<any> | null): void;
+export declare function getContext(): AgentContext<any>;

package/dist/context.js CHANGED Viewed

@@ -1,9 +1,22 @@
 import { createHash } from "crypto";
 import { executeScene } from "./runner";
+import { resolveText } from "./resolve";
 import { formatReport, writeReport, writeDiffEntry } from "./reporter";
 import { logger, c } from "./logger";
 import { loadConfig } from "./config";
+import { setPricingOverrides } from "./pricing";
+import { renderTerminalWaterfall } from "./waterfall";
 import { PromisePool } from "@supercharge/promise-pool";
+/**
+ * Builds a scene. Generic over `T`, the agent's native value type, so the
+ * known fields hand a typed value to the assertion callback:
+ *   - `"value"` / `"response"` → `T`
+ *   - `"text"`                 → `string`
+ *   - `"refusal"`              → `boolean | undefined`
+ *   - any dot-path / other     → `any` (a string field can't be typed)
+ * `T` flows in from a schema-typed `agent()` via the scene fn passed to its
+ * callback. The free `scene()` import stays `SceneBuilder<string>`.
+ */
 export class SceneBuilder {
     _prompt;
     _assertions = [];
@@ -11,6 +24,7 @@ export class SceneBuilder {
     _turns;
     _runs;
     _suite;
+    _schema;
     constructor(_prompt) {
         this._prompt = _prompt;
     }
@@ -34,6 +48,14 @@ export class SceneBuilder {
         this._assertions.push({ field, fn });
         return this;
     }
+    /**
+     * Validate this scene's native value against a Standard Schema before user
+     * assertions run. Overrides any schema declared on the agent.
+     */
+    expectSchema(schema) {
+        this._schema = schema;
+        return this;
+    }
     toDefinition() {
         return {
             prompt: this._prompt,
@@ -42,21 +64,24 @@ export class SceneBuilder {
             turns: this._turns,
             runs: this._runs,
             suite: this._suite,
+            schema: this._schema,
         };
     }
 }
 export class AgentContext {
     _executor;
     _name;
+    _schema;
     _scenes = [];
     _currentSuite;
     _beforeAllHooks = [];
     _afterAllHooks = [];
     _beforeEachHooks = [];
     _afterEachHooks = [];
-    constructor(_executor, _name) {
+    constructor(_executor, _name, _schema) {
         this._executor = _executor;
         this._name = _name;
+        this._schema = _schema;
     }
     registerHook(type, fn) {
         this[`_${type}Hooks`].push(fn);
@@ -76,9 +101,20 @@ export class AgentContext {
         return builder;
     }
     async execute() {
+        // `--full` flows in via the CLI runner (AGEST_FULL env) or directly on argv
+        // when a test file is run standalone (`tsx foo.test.ts --full`). Default is
+        // lean output: per-scene results only, no waterfall, no full report dump.
+        const full = process.env.AGEST_FULL === "1" || process.argv.includes("--full");
         const config = await loadConfig();
+        setPricingOverrides(config.pricing);
         const parallelism = Math.max(1, config.parallelism ?? 1);
-        const definitions = this._scenes.map((s) => s.toDefinition());
+        const definitions = this._scenes.map((s) => {
+            const def = s.toDefinition();
+            // Agent-level schema is the default; a scene-level schema wins.
+            if (!def.schema && this._schema)
+                def.schema = this._schema;
+            return def;
+        });
         const orderedResults = new Array(definitions.length);
         const total = definitions.length;
         // Group scenes by suite for organized output
@@ -127,7 +163,19 @@ export class AgentContext {
                 const sigColor = sig >= 0.95 ? c.green : sig >= 0.80 ? c.yellow : c.red;
                 logger.info(`${indent}       ${c.dim("significance:")} ${sigColor(`${(sig * 100).toFixed(1)}%`)} ${c.dim(`(pass rate: ${((result.passRate ?? 0) * 100).toFixed(1)}%)`)}`);
             }
-            logger.debug(`${indent}       response: ${result.response.text?.slice(0, 120)}`);
+            if (full && result.events && result.events.length > 0) {
+                const costLabel = result.costUsd != null
+                    ? ` ${c.dim("·")} ${c.green(`$${Number(result.costUsd.toFixed(4))}`)}`
+                    : "";
+                const tokLabel = result.tokens
+                    ? ` ${c.dim(`(${result.tokens.input}→${result.tokens.output} tok)`)}`
+                    : "";
+                logger.info(`${indent}       ${c.dim("waterfall:")}${tokLabel}${costLabel}`);
+                for (const line of renderTerminalWaterfall(result.events, { indent: `${indent}       ` })) {
+                    logger.info(line);
+                }
+            }
+            logger.debug(`${indent}       response: ${resolveText(result.response).slice(0, 120)}`);
         };
         if (hasSuites) {
             // Execute suite by suite — print header once, then run all scenes in that suite
@@ -170,14 +218,25 @@ export class AgentContext {
         const successRate = results.length > 0
             ? Number((results.filter((r) => r.passed).length / results.length).toFixed(2))
             : 0;
-        const tokensAvailable = results.some((r) => r.response.metadata?.tokens != null);
+        const sceneTokens = results
+            .map((r) => r.tokens ?? r.response.metadata?.tokens)
+            .filter((t) => t != null);
         let averageInputTokensPerCase;
         let averageOutputTokensPerCase;
-        if (tokensAvailable) {
-            const withTokens = results.filter((r) => r.response.metadata?.tokens != null);
-            averageInputTokensPerCase = Math.round(withTokens.reduce((sum, r) => sum + (r.response.metadata.tokens.input ?? 0), 0) / withTokens.length);
-            averageOutputTokensPerCase = Math.round(withTokens.reduce((sum, r) => sum + (r.response.metadata.tokens.output ?? 0), 0) / withTokens.length);
+        let totalInputTokens;
+        let totalOutputTokens;
+        if (sceneTokens.length > 0) {
+            totalInputTokens = sceneTokens.reduce((s, t) => s + (t.input ?? 0), 0);
+            totalOutputTokens = sceneTokens.reduce((s, t) => s + (t.output ?? 0), 0);
+            averageInputTokensPerCase = Math.round(totalInputTokens / sceneTokens.length);
+            averageOutputTokensPerCase = Math.round(totalOutputTokens / sceneTokens.length);
         }
+        const sceneCosts = results
+            .map((r) => r.costUsd)
+            .filter((c) => typeof c === "number");
+        const totalCostUsd = sceneCosts.length > 0
+            ? sceneCosts.reduce((s, c) => s + c, 0)
+            : undefined;
         const firstMeta = results.find((r) => r.response.metadata)?.response
             .metadata;
         const dimensions = {};
@@ -208,15 +267,27 @@ export class AgentContext {
             totalCases: results.length,
             averageInputTokensPerCase,
             averageOutputTokensPerCase,
+            totalInputTokens,
+            totalOutputTokens,
+            totalCostUsd,
             results,
         };
         if (report.systemPromptHash && firstMeta?.systemPrompt) {
             await writeDiffEntry(report.systemPromptHash, firstMeta.systemPrompt, report.tools ?? [], report.model);
         }
         const formatted = formatReport(report);
-        logger.info(formatted);
+        // Default mode prints a one-line summary; `--full` dumps the whole report.
+        if (full) {
+            logger.info(formatted);
+        }
+        else {
+            const passed = results.filter((r) => r.passed).length;
+            const rateColor = successRate >= 0.95 ? c.green : successRate >= 0.5 ? c.yellow : c.red;
+            const costSummary = totalCostUsd != null ? ` ${c.dim("·")} ${c.green(`$${Number(totalCostUsd.toFixed(4))}`)}` : "";
+            logger.info(`${rateColor(`${passed}/${results.length} passed`)} ${c.dim(`(${(successRate * 100).toFixed(0)}%)`)} ${c.dim("·")} ${c.dim(`${Math.round(totalDuration)}ms`)}${costSummary}`);
+        }
         const filepath = await writeReport(formatted, report.timestamp, report.name, report.dimensions);
-        logger.info(`\n${c.dim("Report saved to:")} ${c.cyan(filepath)}`);
+        logger.info(`${c.dim("Report saved to:")} ${c.cyan(filepath)}${full ? "" : c.dim(" (run with --full to print it)")}`);
         return report;
     }
 }
@@ -227,6 +298,9 @@ function hashPrompt(prompt, model) {
 export function hashPromptOnly(prompt) {
     return createHash("sha256").update(prompt).digest("hex").slice(0, 12);
 }
+// The active context is a runtime singleton holding an executor of arbitrary
+// value type, so `any` is the honest type for the holder. The generic flows
+// through `agent()` → `AgentContext<T>` → the report at the call site.
 let currentContext = null;
 export function setContext(ctx) {
     currentContext = ctx;

package/dist/discover.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+export declare const DEFAULT_PATTERN = "**/*.agest.ts";
+export interface DiscoverOptions {
+    pattern?: string;
+    cwd?: string;
+}
+/**
+ * Resolve a mix of file paths, directories, and glob patterns into a
+ * deduplicated, sorted list of absolute file paths.
+ *
+ * Rules per target:
+ *   - directory: search recursively for `pattern` (default `**\/*.agest.ts`)
+ *   - glob (contains *, ?, [], {}): expand it
+ *   - file: use as-is
+ *   - anything else: try as glob (zero matches is fine)
+ */
+export declare function discoverTestFiles(targets: string[], options?: DiscoverOptions): Promise<string[]>;

package/dist/discover.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { promises as fs } from "node:fs";
+import { isAbsolute, resolve } from "node:path";
+export const DEFAULT_PATTERN = "**/*.agest.ts";
+const GLOB_CHARS = /[*?[\]{}]/;
+function hasGlobChars(value) {
+    return GLOB_CHARS.test(value);
+}
+async function statSafe(path) {
+    try {
+        const stat = await fs.stat(path);
+        return { isFile: stat.isFile(), isDir: stat.isDirectory() };
+    }
+    catch {
+        return { isFile: false, isDir: false };
+    }
+}
+async function expandGlob(pattern, cwd) {
+    const out = [];
+    // fs.promises.glob is available in Node >= 22 (the package's required engine).
+    for await (const match of fs.glob(pattern, { cwd })) {
+        out.push(isAbsolute(match) ? match : resolve(cwd, match));
+    }
+    return out;
+}
+/**
+ * Resolve a mix of file paths, directories, and glob patterns into a
+ * deduplicated, sorted list of absolute file paths.
+ *
+ * Rules per target:
+ *   - directory: search recursively for `pattern` (default `**\/*.agest.ts`)
+ *   - glob (contains *, ?, [], {}): expand it
+ *   - file: use as-is
+ *   - anything else: try as glob (zero matches is fine)
+ */
+export async function discoverTestFiles(targets, options = {}) {
+    const cwd = options.cwd ?? process.cwd();
+    const pattern = options.pattern ?? DEFAULT_PATTERN;
+    const work = targets.length === 0 ? ["."] : targets;
+    const found = new Set();
+    for (const target of work) {
+        if (hasGlobChars(target)) {
+            for (const f of await expandGlob(target, cwd))
+                found.add(f);
+            continue;
+        }
+        const stat = await statSafe(isAbsolute(target) ? target : resolve(cwd, target));
+        if (stat.isDir) {
+            const trimmed = target.replace(/\/+$/, "");
+            const dirPattern = `${trimmed}/${pattern}`;
+            for (const f of await expandGlob(dirPattern, cwd))
+                found.add(f);
+            continue;
+        }
+        if (stat.isFile) {
+            found.add(isAbsolute(target) ? target : resolve(cwd, target));
+            continue;
+        }
+        for (const f of await expandGlob(target, cwd))
+            found.add(f);
+    }
+    return [...found].sort();
+}

package/dist/index.d.ts CHANGED Viewed

@@ -1,16 +1,26 @@
 import type { AgentExecutor, AgentReport, HookFn } from "./types";
 import { SceneBuilder } from "./context";
+import { type StandardSchemaV1, type InferOutput } from "./schema";
 export { expect } from "./assertions";
+export type { StandardSchemaV1, InferOutput } from "./schema";
 export { logger } from "./logger";
 export { defineConfig } from "./config";
+export { createTrace, summarizeEvents } from "./adapters/tracing";
+export type { Trace } from "./adapters/tracing";
 export type { AgestConfig, JudgeConfig, JudgeExecutor } from "./config";
 export type { LogLevel } from "./logger";
 export type { AgentExpectation, AgentMatchers } from "./assertions";
 export type { JudgeCriteria } from "./judge";
-export type { AgentExecutor, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, } from "./types";
+export type { AgentExecutor, ExecutorOptions, AgentResponse, AgentReport, SceneResult, RunResult, JudgeVerdict, JudgeResult, HookFn, TimelineEvent, TimelineEventKind, CostBreakdown, CostSource, } from "./types";
 export interface AgentOptions {
     name?: string;
 }
+/**
+ * Registers a scene in the active agent. The variant passed to an `agent()`
+ * callback is typed `SceneFn<T>`, so `.expect("value", …)` receives the agent's
+ * native value type.
+ */
+export type SceneFn<T = string> = (prompt: string) => SceneBuilder<T>;
 export declare function scene(prompt: string): SceneBuilder;
 export declare function beforeAll(fn: HookFn): void;
 export declare function afterAll(fn: HookFn): void;
@@ -19,4 +29,12 @@ export declare function afterEach(fn: HookFn): void;
 export declare function suite(name: string, fn: () => void): void;
 /** @internal reset auto-run state between tests */
 export declare function _resetAutoRun(): void;
-export declare function agent(executor: AgentExecutor, fn: () => void, options?: AgentOptions): Promise<AgentReport>;
+export declare function agent<T = string>(executor: AgentExecutor<T>, fn: (scene: SceneFn<T>) => void, options?: AgentOptions): Promise<AgentReport<T>>;
+/**
+ * Schema-typed agent: the executor's `value` type is inferred from the schema
+ * (e.g. `z.infer<typeof Schema>`), and every non-refusal scene is validated
+ * against it. The scene fn passed to the callback is typed accordingly, so
+ * `.expect("value", …)` receives that value type. A scene's own
+ * `.expectSchema()` overrides the agent schema.
+ */
+export declare function agent<S extends StandardSchemaV1>(schema: S, executor: AgentExecutor<InferOutput<S>>, fn: (scene: SceneFn<InferOutput<S>>) => void, options?: AgentOptions): Promise<AgentReport<InferOutput<S>>>;

package/dist/index.js CHANGED Viewed

@@ -1,7 +1,9 @@
 import { AgentContext, setContext, getContext } from "./context";
+import { isStandardSchema } from "./schema";
 export { expect } from "./assertions";
 export { logger } from "./logger";
 export { defineConfig } from "./config";
+export { createTrace, summarizeEvents } from "./adapters/tracing";
 export function scene(prompt) {
     return getContext().registerScene(prompt);
 }
@@ -36,11 +38,16 @@ export function _resetAutoRun() {
     autoRunScheduled = false;
     executionChain = Promise.resolve();
 }
-export function agent(executor, fn, options) {
-    const ctx = new AgentContext(executor, options?.name);
+export function agent(...args) {
+    const [schema, executor, fn, options] = isStandardSchema(args[0])
+        ? args
+        : [undefined, ...args];
+    const ctx = new AgentContext(executor, options?.name, schema);
     setContext(ctx);
     try {
-        fn();
+        // Hand the callback a scene fn bound to the active context. Its static type
+        // carries T (via the overloads); at runtime it's the same `scene()`.
+        fn(scene);
     }
     catch (err) {
         setContext(null);

package/dist/match.d.ts ADDED Viewed

@@ -0,0 +1,28 @@
+/**
+ * Structural matching primitives for deterministic assertions. Kept in their
+ * own module — they are correctness-critical (a wrong result here is a false
+ * test pass) and deserve isolated, exhaustive unit tests.
+ */
+/** Any non-null, non-array object — including class instances, Map, Date, etc. */
+export declare function isObjectLike(value: unknown): value is Record<string, unknown>;
+/**
+ * A "record" object — a plain `{...}` literal (prototype is Object.prototype or
+ * null). Class instances, Map, Date, RegExp, etc. are NOT plain: they are
+ * compared as opaque leaves rather than recursed into.
+ */
+export declare function isPlainObject(value: unknown): value is Record<string, unknown>;
+/**
+ * Recursive containment: is `expected` structurally present within `actual`?
+ *
+ * - `expected` array  → `actual` is an array and the expected elements can be
+ *   matched one-to-one to DISTINCT actual elements (order-independent
+ *   multiset/sub-multiset membership — duplicates require distinct matches).
+ * - `expected` plain object → `actual` is object-like and every key in
+ *   `expected` exists in `actual` with a recursively-contained value (extra
+ *   keys in `actual` are allowed — that is the "partial").
+ * - anything else (primitive, Date, Map, RegExp, class instance) → strict
+ *   deep equality via `isDeepStrictEqual` (correct for NaN / Date / ±0).
+ *
+ * Leaf comparison is EXACT and case-sensitive. Only the shape recurses.
+ */
+export declare function structuralContains(actual: unknown, expected: unknown): boolean;

package/dist/match.js ADDED Viewed

@@ -0,0 +1,57 @@
+import { isDeepStrictEqual } from "node:util";
+/**
+ * Structural matching primitives for deterministic assertions. Kept in their
+ * own module — they are correctness-critical (a wrong result here is a false
+ * test pass) and deserve isolated, exhaustive unit tests.
+ */
+/** Any non-null, non-array object — including class instances, Map, Date, etc. */
+export function isObjectLike(value) {
+    return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+/**
+ * A "record" object — a plain `{...}` literal (prototype is Object.prototype or
+ * null). Class instances, Map, Date, RegExp, etc. are NOT plain: they are
+ * compared as opaque leaves rather than recursed into.
+ */
+export function isPlainObject(value) {
+    if (!isObjectLike(value))
+        return false;
+    const proto = Object.getPrototypeOf(value);
+    return proto === Object.prototype || proto === null;
+}
+/**
+ * Recursive containment: is `expected` structurally present within `actual`?
+ *
+ * - `expected` array  → `actual` is an array and the expected elements can be
+ *   matched one-to-one to DISTINCT actual elements (order-independent
+ *   multiset/sub-multiset membership — duplicates require distinct matches).
+ * - `expected` plain object → `actual` is object-like and every key in
+ *   `expected` exists in `actual` with a recursively-contained value (extra
+ *   keys in `actual` are allowed — that is the "partial").
+ * - anything else (primitive, Date, Map, RegExp, class instance) → strict
+ *   deep equality via `isDeepStrictEqual` (correct for NaN / Date / ±0).
+ *
+ * Leaf comparison is EXACT and case-sensitive. Only the shape recurses.
+ */
+export function structuralContains(actual, expected) {
+    if (Array.isArray(expected)) {
+        if (!Array.isArray(actual))
+            return false;
+        // Greedy one-to-one matching: each expected element must claim a DISTINCT
+        // actual element, so `[1]` does not contain `[1, 1]`.
+        const claimed = new Set();
+        return expected.every((e) => {
+            const idx = actual.findIndex((a, i) => !claimed.has(i) && structuralContains(a, e));
+            if (idx === -1)
+                return false;
+            claimed.add(idx);
+            return true;
+        });
+    }
+    if (isPlainObject(expected)) {
+        if (!isObjectLike(actual))
+            return false;
+        return Object.keys(expected).every((key) => key in actual && structuralContains(actual[key], expected[key]));
+    }
+    return isDeepStrictEqual(actual, expected);
+}