npm - @runtypelabs/sdk - Versions diffs - 5.5.0 → 5.6.0 - Mend

@runtypelabs/sdk 5.5.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -9727,6 +9727,8 @@ interface paths {
                             };
                             virtual?: boolean;
                         };
+                        /** @description Strict mode (the `runtype eval --strict` gate): a soft grader miss fails its case like a gate miss. Default false — soft misses are reported per-outcome but do not fail the suite. */
+                        strict?: boolean;
                         suiteId?: string;
                     };
                 };
@@ -37329,6 +37331,11 @@ interface components {
                     passed: boolean;
                     reasoning?: string;
                     score?: number;
+                    /**
+                     * @description Grader severity. A soft miss is reported but only fails the suite under strict mode. Absent ⇒ gate.
+                     * @enum {string}
+                     */
+                    severity?: "gate" | "soft";
                 }[];
                 outputExcerpt: string;
                 passed: boolean;
@@ -39925,6 +39932,414 @@ interface BillingSpendAnalyticsParams {
     days?: number;
 }
+/**
+ * Eval config-as-code: `defineEval` + the grader builders.
+ *
+ * The authoring layer for code-colocated evals — define the evals for a flow or
+ * agent right next to its `defineFlow` / `flows.ensure` definition. This module
+ * is PURE and local (no I/O), the exact analog of `defineFlow` in
+ * `flows-ensure.ts`: it validates and normalizes a loose `DefineEvalInput` into
+ * a canonical `EvalDefinition` (target + cases + per-case graders) and computes
+ * a content hash for hash-first convergence. The converge motion
+ * (`client.evals.ensure` → `POST /eval/ensure`) and the `runtype eval` CLI build
+ * on this contract in later increments.
+ *
+ * Grader types are MIRRORED INLINE from `@runtypelabs/shared`'s
+ * `grader-types.ts` (the SDK is dependency-free by convention — see the same
+ * pattern in `flows-ensure.ts`). The wire shapes must stay byte-identical to the
+ * shared discriminated union so an eval authored here scores through the existing
+ * `EvalScoringService` unchanged.
+ *
+ * Scope: the output + AI-judge union plus the trace graders (`called_tool` /
+ * `tool_order` / `ran_step` / `completed` / `cost` / …), each scored server-side
+ * by the same pure `runCheck` engine over the run's captured execution trace.
+ * Severity (`.gate()` / `.soft()`) is deliberately NOT emitted here — it lands
+ * with its own grader-engine extension. See
+ * `docs/features/planning/2026-06-24-code-colocated-evals.md`.
+ */
+/**
+ * Per-grader severity (mirror of `@runtypelabs/shared`'s `GraderSeverity`). A
+ * `gate` miss always fails the case; a `soft` miss is tracked-but-not-failing
+ * unless the run is `--strict`. Absent ⇒ `gate`. Set it with the chainable
+ * `.gate()` / `.soft()` handles on any grader builder.
+ */
+type GraderSeverity = 'gate' | 'soft';
+/** The deterministic check kinds (before severity is mixed in). */
+type CheckGraderShape = {
+    kind: 'contains';
+    value: string;
+    caseSensitive?: boolean;
+} | {
+    kind: 'not_contains';
+    value: string;
+    caseSensitive?: boolean;
+} | {
+    kind: 'matches_expected';
+} | {
+    kind: 'regex';
+    pattern: string;
+    flags?: string;
+} | {
+    kind: 'valid_json';
+} | {
+    kind: 'json_field';
+    path: string;
+    equals?: unknown;
+    exists?: boolean;
+} | {
+    kind: 'length';
+    minChars?: number;
+    maxChars?: number;
+} | {
+    kind: 'latency';
+    maxMs: number;
+} | {
+    kind: 'no_error';
+} | {
+    kind: 'called_tool';
+    name: string;
+    input?: unknown;
+    output?: unknown;
+    isError?: boolean;
+    times?: number;
+} | {
+    kind: 'not_called_tool';
+    name: string;
+} | {
+    kind: 'used_no_tools';
+} | {
+    kind: 'max_tool_calls';
+    max: number;
+} | {
+    kind: 'tool_order';
+    tools: string[];
+} | {
+    kind: 'ran_step';
+    name: string;
+} | {
+    kind: 'step_order';
+    steps: string[];
+} | {
+    kind: 'completed';
+} | {
+    kind: 'cost';
+    maxUsd: number;
+};
+/** Deterministic, free, instant checks. Scored by the pure `runCheck` engine. */
+type CheckGrader = CheckGraderShape & {
+    severity?: GraderSeverity;
+};
+/** Built-in AI-grader preset ids (mirror of `BUILT_IN_GRADER_IDS`). */
+type BuiltInGraderId = 'answersQuestion' | 'matchesExpected' | 'followsInstructions' | 'grounded' | 'rightTone' | 'safeToSend';
+/** LLM-as-judge over plain-language criteria. Scored by the api grader executor. */
+interface AIGrader {
+    kind: 'ai';
+    preset?: BuiltInGraderId;
+    /** Plain language: "what does a good answer look like?" */
+    criteria: string;
+    /** Reference-guided when the case has `expected`. */
+    useExpected?: boolean;
+    /** Defaults to a cheap routed model at execution time. */
+    model?: string;
+    /** Pass cutoff on the 1-5 judge scale (default 4 server-side). Set via `.atLeast(n)`. */
+    threshold?: number;
+    /** Hard gate (default) vs soft. Set via `.gate()` / `.soft()`. */
+    severity?: GraderSeverity;
+}
+type GraderConfig = CheckGrader | AIGrader;
+/** What a good answer looks like for a case (mirror of `CaseExpected`). */
+interface CaseExpected {
+    text?: string;
+    json?: unknown;
+    facts?: string[];
+}
+/**
+ * A grader plus chainable severity handles. The handles are defined
+ * NON-ENUMERABLY, so `JSON.stringify`, `Object.keys`, and `computeEvalContentHash`
+ * see only the data fields — a `Gradeable` serializes byte-identically to the
+ * plain `GraderConfig` it wraps. `.atLeast(n)` is present only on AI graders.
+ */
+type Gradeable<T extends GraderConfig = GraderConfig> = T & {
+    /** Mark as a hard gate (the default): a miss fails the case. */
+    gate(): Gradeable<T>;
+    /** Mark as soft: a miss is tracked but only fails the case under `--strict`. */
+    soft(): Gradeable<T>;
+} & (T extends AIGrader ? {
+    /** AI graders only: set the 1-5 judge pass cutoff (e.g. `judge(...).atLeast(3)`). */
+    atLeast(threshold: number): Gradeable<T>;
+} : Record<never, never>);
+/** Output contains `value` (case-insensitive unless `caseSensitive`). */
+declare function contains(value: string, opts?: {
+    caseSensitive?: boolean;
+}): Gradeable<CheckGrader>;
+/** Output does NOT contain `value`. */
+declare function notContains(value: string, opts?: {
+    caseSensitive?: boolean;
+}): Gradeable<CheckGrader>;
+/** Output equals the case's `expected.text` (trim/lowercase/collapse-whitespace normalized). */
+declare function matchesExpected(): Gradeable<CheckGrader>;
+/** Output matches a regular expression. */
+declare function regex(pattern: string, flags?: string): Gradeable<CheckGrader>;
+/** Output parses as JSON. */
+declare function validJson(): Gradeable<CheckGrader>;
+/**
+ * A dot-path field in the output's parsed JSON. With `equals`, asserts value
+ * equality; otherwise asserts presence (`exists: false` asserts absence).
+ */
+declare function jsonField(path: string, opts?: {
+    equals?: unknown;
+    exists?: boolean;
+}): Gradeable<CheckGrader>;
+/** Output character length is within `[minChars, maxChars]`. */
+declare function length(opts: {
+    minChars?: number;
+    maxChars?: number;
+}): Gradeable<CheckGrader>;
+/** End-to-end latency is within `maxMs`. */
+declare function latency(maxMs: number): Gradeable<CheckGrader>;
+/** The case produced output without erroring. */
+declare function noError(): Gradeable<CheckGrader>;
+/**
+ * A tool named `name` was called. Optional filters narrow the match: `input` /
+ * `output` deep-equal a call's resolved input / result, `isError` matches a
+ * call's error flag, and `times` asserts the matching count EXACTLY (omit for
+ * "at least once").
+ */
+declare function calledTool(name: string, opts?: {
+    input?: unknown;
+    output?: unknown;
+    isError?: boolean;
+    times?: number;
+}): Gradeable<CheckGrader>;
+/** No tool named `name` was called. */
+declare function notCalledTool(name: string): Gradeable<CheckGrader>;
+/** The run made no tool calls at all. */
+declare function usedNoTools(): Gradeable<CheckGrader>;
+/** The run made at most `max` tool calls. */
+declare function maxToolCalls(max: number): Gradeable<CheckGrader>;
+/** `tools` appears as an ordered subsequence of the tool-call names. */
+declare function toolOrder(tools: string[]): Gradeable<CheckGrader>;
+/** A step named (or typed) `name` ran. */
+declare function ranStep(name: string): Gradeable<CheckGrader>;
+/** `steps` appears as an ordered subsequence of the steps that ran. */
+declare function stepOrder(steps: string[]): Gradeable<CheckGrader>;
+/** The run completed (finished without erroring and was not left paused). */
+declare function completed(): Gradeable<CheckGrader>;
+/** Total run cost was within `maxUsd` (US dollars). */
+declare function cost(maxUsd: number): Gradeable<CheckGrader>;
+/**
+ * LLM-as-judge over free-form criteria. Soft, model-graded — reach for it only
+ * when no deterministic check captures what "good" means. Chain `.atLeast(n)`
+ * to set the 1-5 pass cutoff and `.soft()` to make a miss non-blocking.
+ */
+declare function judge(criteria: string, opts?: {
+    useExpected?: boolean;
+    model?: string;
+    threshold?: number;
+    preset?: BuiltInGraderId;
+}): Gradeable<AIGrader>;
+/**
+ * The built-in AI-grader presets (criteria mirrored from `BUILT_IN_GRADERS`).
+ * Each returns a ready `AIGrader`; pass an override string to `rightTone`.
+ */
+declare const judges: {
+    readonly answersQuestion: () => Gradeable<AIGrader>;
+    readonly matchesExpected: () => Gradeable<AIGrader>;
+    readonly followsInstructions: () => Gradeable<AIGrader>;
+    readonly grounded: () => Gradeable<AIGrader>;
+    readonly rightTone: (voice?: string) => Gradeable<AIGrader>;
+    readonly safeToSend: () => Gradeable<AIGrader>;
+};
+interface EvalMessage {
+    role: 'user' | 'assistant' | 'system';
+    content: string;
+}
+/** A case's input: flow variables and/or a scripted conversation to replay. */
+interface EvalCaseInput {
+    variables?: Record<string, unknown>;
+    messages?: EvalMessage[];
+}
+/** The target a suite evaluates — a saved flow or agent, by portable name. */
+type EvalTarget = {
+    flow: string;
+} | {
+    agent: string;
+};
+/** Loose per-case input to `defineEval`. */
+interface DefineEvalCaseInput {
+    name: string;
+    input?: EvalCaseInput;
+    expected?: CaseExpected;
+    /** Case-level graders, appended after any suite-level `graders`. */
+    expect?: GraderConfig[];
+}
+/** Loose input to `defineEval` (validated + normalized into `EvalDefinition`). */
+interface DefineEvalInput {
+    /**
+     * Suite name — the converge identity (name + account scope), exactly like a
+     * flow's name. Optional: defaults to a stable name derived from the target
+     * (`flow:<name>` / `agent:<name>`). Give two suites for the same target
+     * distinct names (e.g. 'smoke', 'regression') to keep them separate.
+     */
+    name?: string;
+    target: EvalTarget;
+    /** Graders applied to EVERY case (suite-level). Run before each case's `expect`. */
+    graders?: GraderConfig[];
+    cases: DefineEvalCaseInput[];
+    /**
+     * Run without persisting a durable suite/batch to the dashboard (the 'virtual'
+     * opt-out, mirroring `useVirtualFlow` / `storeResults:false`). Default false:
+     * evals run via this surface are dashboard-visible by default.
+     */
+    virtual?: boolean;
+}
+/** A normalized case: graders merged (suite-level then case-level) into `expect`. */
+interface EvalCaseDefinition {
+    name: string;
+    input: EvalCaseInput;
+    expected?: CaseExpected;
+    expect: GraderConfig[];
+}
+/** The canonical (wire) eval definition produced by `defineEval`. */
+interface EvalDefinition {
+    /** Suite name — the converge identity. Derived from the target when omitted. */
+    name: string;
+    target: EvalTarget;
+    cases: EvalCaseDefinition[];
+    virtual: boolean;
+}
+/**
+ * Pure-local declarative constructor for an eval definition. No I/O. Validates
+ * structure, rejects unknown fields, merges suite-level `graders` into each
+ * case's `expect`, and produces a canonical, environment-portable
+ * `EvalDefinition`. The target names a saved flow/agent by name (portable across
+ * environments, like `defineFlow`'s `flow:<name>` references).
+ *
+ * @example
+ * ```typescript
+ * import { defineEval, contains, judges } from '@runtypelabs/sdk'
+ *
+ * export default defineEval({
+ *   target: { flow: 'support-triage' },
+ *   graders: [contains('ticket')],
+ *   cases: [
+ *     {
+ *       name: 'billing routes to finance',
+ *       input: { variables: { message: 'I was double charged' } },
+ *       expect: [contains('finance'), judges.answersQuestion()],
+ *     },
+ *   ],
+ * })
+ * ```
+ */
+declare function defineEval(input: DefineEvalInput): EvalDefinition;
+/**
+ * SHA-256 (hex) over the canonical normalized definition. Cases are sorted by
+ * name (name is the case identity); grader order WITHIN a case is preserved
+ * (reordering graders changes their result index, so it is a meaningful edit).
+ * The hash is the basis for hash-first convergence in `client.evals.ensure`.
+ */
+declare function computeEvalContentHash(definition: EvalDefinition): Promise<string>;
+/** The converge outcome of `client.evals.ensure(definition)`. */
+interface EnsureEvalResult {
+    result: 'unchanged' | 'created' | 'updated';
+    /** The persisted eval suite id. */
+    suiteId: string;
+    /** The server-computed canonical hash (echo this — never your own). */
+    contentHash: string;
+}
+/** The canonical definition + provenance returned by `client.evals.pull(name)`. */
+interface EvalPullResult {
+    suiteId: string;
+    definition: EvalDefinition;
+    contentHash: string;
+    lastModifiedSource: string | null;
+    updatedAt: string | null;
+}
+/**
+ * One grader's verdict for one case (mirror of `@runtypelabs/shared`'s
+ * `GraderOutcome`). `graderIndex` is the position in the suite's grader list.
+ */
+interface GraderOutcome {
+    graderIndex: number;
+    kind: string;
+    passed: boolean;
+    /** The grader's severity (absent ⇒ gate). A soft miss only fails under `--strict`. */
+    severity?: GraderSeverity;
+    /** 0..1 for scaled graders (AI graders normalize their 1-5 to 0..1). */
+    score?: number;
+    /** AI-grader verdict, or a check's human-readable reason. */
+    reasoning?: string;
+}
+/** One case's run result: pass/fail plus each grader's outcome. */
+interface RunEvalCaseResult {
+    name: string;
+    passed: boolean;
+    outcomes: GraderOutcome[];
+    /** Truncated final-output snapshot (for surfacing a failing case in CI logs). */
+    outputExcerpt: string;
+    /** Whether producing the output threw. */
+    errored: boolean;
+}
+/** The synchronous run + score result returned by `client.evals.runSuite(...)`. */
+interface RunEvalResult {
+    /** The saved suite id, or `null` for an inline (virtual) run. */
+    suiteId: string | null;
+    name: string;
+    targetType: 'flow' | 'agent';
+    /** Suite score, 0..1 (passed cases / total cases). */
+    score: number;
+    /** True when every case passed every grader. */
+    passed: boolean;
+    totalCases: number;
+    passedCases: number;
+    cases: RunEvalCaseResult[];
+}
+/**
+ * Run a saved suite by id (the post-`ensure` path) XOR an inline definition (the
+ * `virtual` path — nothing is persisted). Exactly one must be provided.
+ *
+ * `strict` (default false) is the `runtype eval --strict` gate: a `soft` grader
+ * miss fails its case like a `gate` miss. Without it, soft misses are reported
+ * per-outcome but do not fail the suite.
+ */
+type RunEvalInput = {
+    suiteId: string;
+    strict?: boolean;
+} | {
+    definition: EvalDefinition;
+    strict?: boolean;
+};
+/**
+ * Idempotently converge an eval suite definition onto the platform. Hash-first:
+ * probes with a content hash, and only ships the full definition when the
+ * server reports a miss (`definitionRequired`). Upserts the suite + replaces
+ * its cases; never executes the eval (use the `runtype eval` CLI / `/eval`
+ * submit surface to run it).
+ *
+ * `virtual: true` definitions are ephemeral and have nothing durable to
+ * converge — ensure rejects them. Run a virtual eval directly instead.
+ */
+declare function ensureEval(client: RuntypeClient$1, definition: EvalDefinition): Promise<EnsureEvalResult>;
+/**
+ * Pull the canonical definition + provenance for an eval suite by name — the
+ * absorb-drift direction of the ensure protocol. The contentHash reflects the
+ * live suite state.
+ */
+declare function pullEval(client: RuntypeClient$1, name: string): Promise<EvalPullResult>;
+/**
+ * Run an eval suite synchronously and return the suite score + per-case grader
+ * outcomes. Powers the `runtype eval` CI gate: a saved suite is run by id (after
+ * `ensure`); a `virtual` definition is run inline without persisting anything.
+ *
+ * Synchronous and ephemeral — no batch is created and no scores are saved (use
+ * the dashboard / `/eval/submit` for a durable, dashboard-visible run). Bounded:
+ * suites over the server's per-run case limit must use the batch path. Not
+ * supported here: `claude_managed` agents and inline/virtual agent targets.
+ */
+declare function runEvalSuite(client: RuntypeClient$1, input: RunEvalInput): Promise<RunEvalResult>;
 /**
  * Flow config-as-code: `defineFlow`, `flows.ensure`, `flows.pull`.
  *
@@ -39953,6 +40368,13 @@ interface BillingSpendAnalyticsParams {
  * same flow. The flow definition surface is `{ name, steps }` — description
  * is not part of the v1 ensure surface (the shared hash covers steps only).
  *
+ * A `defineFlow` may also carry inline `evals` — eval suites to converge
+ * alongside the flow. These are SDK-orchestrated: they are NOT part of the
+ * flow content hash and NEVER ride the `/flows/ensure` wire (its server schema
+ * is `.strict()` `{ name, steps }`). After the flow converges, `ensureFlow`
+ * converges each inline suite through the existing `/eval/ensure` endpoint
+ * (`ensureEval`), so eval semantics stay confined to the eval endpoints.
+ *
  * See docs/adr/0003-agent-config-as-code-ensure.md for the design rationale.
  */
@@ -39972,10 +40394,26 @@ interface FlowDefinitionStep {
     when?: string;
     config?: Record<string, unknown>;
 }
-/** `defineFlow` input: identity + the ordered step list. */
+/**
+ * An inline eval suite attached to a `defineFlow`. It is a `DefineEvalInput`
+ * whose `target` is optional: when omitted it defaults to `{ flow: <this
+ * flow's name> }` (the enclosing flow), so the common "evals for THIS flow"
+ * case needs no target. Each is normalized by the canonical `defineEval`.
+ */
+type FlowInlineEvalInput = Omit<DefineEvalInput, 'target'> & {
+    target?: DefineEvalInput['target'];
+};
+/** `defineFlow` input: identity + the ordered step list (+ optional inline evals). */
 interface DefineFlowInput {
     name: string;
     steps: FlowDefinitionStep[];
+    /**
+     * Eval suites to converge alongside this flow. Each is normalized through
+     * the canonical `defineEval`; a suite with no `target` defaults to this flow.
+     * NOT part of the flow content hash and NOT sent on the `/flows/ensure` wire
+     * — `flows.ensure` converges these separately via `/eval/ensure`.
+     */
+    evals?: FlowInlineEvalInput[];
 }
 /** The canonical (wire) definition produced by `defineFlow`. */
 interface FlowDefinition {
@@ -39983,6 +40421,12 @@ interface FlowDefinition {
     steps: Array<FlowDefinitionStep & {
         order: number;
     }>;
+    /**
+     * Normalized inline eval suites (a NON-wire field). Present only when the
+     * definition declared at least one. `ensureFlow` strips this before sending
+     * `{ name, steps }` to `/flows/ensure`, then converges each via `/eval/ensure`.
+     */
+    evals?: EvalDefinition[];
 }
 /**
  * Pure-local declarative constructor for a flow definition. No I/O.
@@ -40032,6 +40476,14 @@ interface EnsureFlowConverged {
     versionId: string | null;
     /** The server-computed canonical hash (echo this — never your own). */
     contentHash: string;
+    /**
+     * Per-suite converge outcomes for the definition's inline `evals`, in
+     * declaration order. Present only when the definition declared inline evals
+     * AND the flow converged on the real (non-plan) path — each suite converges
+     * via `/eval/ensure` after the flow itself. Absent when there are no inline
+     * evals (and never populated on the dryRun/plan path).
+     */
+    evals?: EnsureEvalResult[];
 }
 interface EnsureFlowPlan {
     result: 'plan';
@@ -40161,11 +40613,19 @@ declare class FlowsNamespace {
      * the steady state is one tiny probe request. Creates an immutable version
      * snapshot on every change; never deletes; never executes the flow.
      *
+     * When the definition carries inline `evals`, each suite is converged via
+     * `/eval/ensure` after the flow itself (real converge path only — not on
+     * dryRun/`expectNoChanges`), and the outcomes are returned as `result.evals`.
+     *
      * @example
      * ```typescript
-     * const def = defineFlow({ name: 'Onboarding Digest', steps: [...] })
+     * const def = defineFlow({
+     *   name: 'Onboarding Digest',
+     *   steps: [...],
+     *   evals: [{ cases: [{ name: 'smoke', input: {...}, expect: [contains('ok')] }] }],
+     * })
      *
-     * // Converge (CI/deploy).
+     * // Converge the flow AND its inline eval suites (CI/deploy).
      * const result = await Runtype.flows.ensure(def)
      *
      * // PR drift gate.
@@ -40574,377 +41034,6 @@ declare class BatchesNamespace {
     }>;
 }
-/**
- * Eval config-as-code: `defineEval` + the grader builders.
- *
- * The authoring layer for code-colocated evals — define the evals for a flow or
- * agent right next to its `defineFlow` / `flows.ensure` definition. This module
- * is PURE and local (no I/O), the exact analog of `defineFlow` in
- * `flows-ensure.ts`: it validates and normalizes a loose `DefineEvalInput` into
- * a canonical `EvalDefinition` (target + cases + per-case graders) and computes
- * a content hash for hash-first convergence. The converge motion
- * (`client.evals.ensure` → `POST /eval/ensure`) and the `runtype eval` CLI build
- * on this contract in later increments.
- *
- * Grader types are MIRRORED INLINE from `@runtypelabs/shared`'s
- * `grader-types.ts` (the SDK is dependency-free by convention — see the same
- * pattern in `flows-ensure.ts`). The wire shapes must stay byte-identical to the
- * shared discriminated union so an eval authored here scores through the existing
- * `EvalScoringService` unchanged.
- *
- * Scope: the output + AI-judge union plus the trace graders (`called_tool` /
- * `tool_order` / `ran_step` / `completed` / `cost` / …), each scored server-side
- * by the same pure `runCheck` engine over the run's captured execution trace.
- * Severity (`.gate()` / `.soft()`) is deliberately NOT emitted here — it lands
- * with its own grader-engine extension. See
- * `docs/features/planning/2026-06-24-code-colocated-evals.md`.
- */
-/** Deterministic, free, instant checks. Scored by the pure `runCheck` engine. */
-type CheckGrader = {
-    kind: 'contains';
-    value: string;
-    caseSensitive?: boolean;
-} | {
-    kind: 'not_contains';
-    value: string;
-    caseSensitive?: boolean;
-} | {
-    kind: 'matches_expected';
-} | {
-    kind: 'regex';
-    pattern: string;
-    flags?: string;
-} | {
-    kind: 'valid_json';
-} | {
-    kind: 'json_field';
-    path: string;
-    equals?: unknown;
-    exists?: boolean;
-} | {
-    kind: 'length';
-    minChars?: number;
-    maxChars?: number;
-} | {
-    kind: 'latency';
-    maxMs: number;
-} | {
-    kind: 'no_error';
-} | {
-    kind: 'called_tool';
-    name: string;
-    input?: unknown;
-    output?: unknown;
-    isError?: boolean;
-    times?: number;
-} | {
-    kind: 'not_called_tool';
-    name: string;
-} | {
-    kind: 'used_no_tools';
-} | {
-    kind: 'max_tool_calls';
-    max: number;
-} | {
-    kind: 'tool_order';
-    tools: string[];
-} | {
-    kind: 'ran_step';
-    name: string;
-} | {
-    kind: 'step_order';
-    steps: string[];
-} | {
-    kind: 'completed';
-} | {
-    kind: 'cost';
-    maxUsd: number;
-};
-/** Built-in AI-grader preset ids (mirror of `BUILT_IN_GRADER_IDS`). */
-type BuiltInGraderId = 'answersQuestion' | 'matchesExpected' | 'followsInstructions' | 'grounded' | 'rightTone' | 'safeToSend';
-/** LLM-as-judge over plain-language criteria. Scored by the api grader executor. */
-interface AIGrader {
-    kind: 'ai';
-    preset?: BuiltInGraderId;
-    /** Plain language: "what does a good answer look like?" */
-    criteria: string;
-    /** Reference-guided when the case has `expected`. */
-    useExpected?: boolean;
-    /** Defaults to a cheap routed model at execution time. */
-    model?: string;
-    /** Pass cutoff on the 1-5 judge scale (default 4 server-side). */
-    threshold?: number;
-}
-type GraderConfig = CheckGrader | AIGrader;
-/** What a good answer looks like for a case (mirror of `CaseExpected`). */
-interface CaseExpected {
-    text?: string;
-    json?: unknown;
-    facts?: string[];
-}
-/** Output contains `value` (case-insensitive unless `caseSensitive`). */
-declare function contains(value: string, opts?: {
-    caseSensitive?: boolean;
-}): CheckGrader;
-/** Output does NOT contain `value`. */
-declare function notContains(value: string, opts?: {
-    caseSensitive?: boolean;
-}): CheckGrader;
-/** Output equals the case's `expected.text` (trim/lowercase/collapse-whitespace normalized). */
-declare function matchesExpected(): CheckGrader;
-/** Output matches a regular expression. */
-declare function regex(pattern: string, flags?: string): CheckGrader;
-/** Output parses as JSON. */
-declare function validJson(): CheckGrader;
-/**
- * A dot-path field in the output's parsed JSON. With `equals`, asserts value
- * equality; otherwise asserts presence (`exists: false` asserts absence).
- */
-declare function jsonField(path: string, opts?: {
-    equals?: unknown;
-    exists?: boolean;
-}): CheckGrader;
-/** Output character length is within `[minChars, maxChars]`. */
-declare function length(opts: {
-    minChars?: number;
-    maxChars?: number;
-}): CheckGrader;
-/** End-to-end latency is within `maxMs`. */
-declare function latency(maxMs: number): CheckGrader;
-/** The case produced output without erroring. */
-declare function noError(): CheckGrader;
-/**
- * A tool named `name` was called. Optional filters narrow the match: `input` /
- * `output` deep-equal a call's resolved input / result, `isError` matches a
- * call's error flag, and `times` asserts the matching count EXACTLY (omit for
- * "at least once").
- */
-declare function calledTool(name: string, opts?: {
-    input?: unknown;
-    output?: unknown;
-    isError?: boolean;
-    times?: number;
-}): CheckGrader;
-/** No tool named `name` was called. */
-declare function notCalledTool(name: string): CheckGrader;
-/** The run made no tool calls at all. */
-declare function usedNoTools(): CheckGrader;
-/** The run made at most `max` tool calls. */
-declare function maxToolCalls(max: number): CheckGrader;
-/** `tools` appears as an ordered subsequence of the tool-call names. */
-declare function toolOrder(tools: string[]): CheckGrader;
-/** A step named (or typed) `name` ran. */
-declare function ranStep(name: string): CheckGrader;
-/** `steps` appears as an ordered subsequence of the steps that ran. */
-declare function stepOrder(steps: string[]): CheckGrader;
-/** The run completed (finished without erroring and was not left paused). */
-declare function completed(): CheckGrader;
-/** Total run cost was within `maxUsd` (US dollars). */
-declare function cost(maxUsd: number): CheckGrader;
-/**
- * LLM-as-judge over free-form criteria. Soft, model-graded — reach for it only
- * when no deterministic check captures what "good" means.
- */
-declare function judge(criteria: string, opts?: {
-    useExpected?: boolean;
-    model?: string;
-    threshold?: number;
-    preset?: BuiltInGraderId;
-}): AIGrader;
-/**
- * The built-in AI-grader presets (criteria mirrored from `BUILT_IN_GRADERS`).
- * Each returns a ready `AIGrader`; pass an override string to `rightTone`.
- */
-declare const judges: {
-    readonly answersQuestion: () => AIGrader;
-    readonly matchesExpected: () => AIGrader;
-    readonly followsInstructions: () => AIGrader;
-    readonly grounded: () => AIGrader;
-    readonly rightTone: (voice?: string) => AIGrader;
-    readonly safeToSend: () => AIGrader;
-};
-interface EvalMessage {
-    role: 'user' | 'assistant' | 'system';
-    content: string;
-}
-/** A case's input: flow variables and/or a scripted conversation to replay. */
-interface EvalCaseInput {
-    variables?: Record<string, unknown>;
-    messages?: EvalMessage[];
-}
-/** The target a suite evaluates — a saved flow or agent, by portable name. */
-type EvalTarget = {
-    flow: string;
-} | {
-    agent: string;
-};
-/** Loose per-case input to `defineEval`. */
-interface DefineEvalCaseInput {
-    name: string;
-    input?: EvalCaseInput;
-    expected?: CaseExpected;
-    /** Case-level graders, appended after any suite-level `graders`. */
-    expect?: GraderConfig[];
-}
-/** Loose input to `defineEval` (validated + normalized into `EvalDefinition`). */
-interface DefineEvalInput {
-    /**
-     * Suite name — the converge identity (name + account scope), exactly like a
-     * flow's name. Optional: defaults to a stable name derived from the target
-     * (`flow:<name>` / `agent:<name>`). Give two suites for the same target
-     * distinct names (e.g. 'smoke', 'regression') to keep them separate.
-     */
-    name?: string;
-    target: EvalTarget;
-    /** Graders applied to EVERY case (suite-level). Run before each case's `expect`. */
-    graders?: GraderConfig[];
-    cases: DefineEvalCaseInput[];
-    /**
-     * Run without persisting a durable suite/batch to the dashboard (the 'virtual'
-     * opt-out, mirroring `useVirtualFlow` / `storeResults:false`). Default false:
-     * evals run via this surface are dashboard-visible by default.
-     */
-    virtual?: boolean;
-}
-/** A normalized case: graders merged (suite-level then case-level) into `expect`. */
-interface EvalCaseDefinition {
-    name: string;
-    input: EvalCaseInput;
-    expected?: CaseExpected;
-    expect: GraderConfig[];
-}
-/** The canonical (wire) eval definition produced by `defineEval`. */
-interface EvalDefinition {
-    /** Suite name — the converge identity. Derived from the target when omitted. */
-    name: string;
-    target: EvalTarget;
-    cases: EvalCaseDefinition[];
-    virtual: boolean;
-}
-/**
- * Pure-local declarative constructor for an eval definition. No I/O. Validates
- * structure, rejects unknown fields, merges suite-level `graders` into each
- * case's `expect`, and produces a canonical, environment-portable
- * `EvalDefinition`. The target names a saved flow/agent by name (portable across
- * environments, like `defineFlow`'s `flow:<name>` references).
- *
- * @example
- * ```typescript
- * import { defineEval, contains, judges } from '@runtypelabs/sdk'
- *
- * export default defineEval({
- *   target: { flow: 'support-triage' },
- *   graders: [contains('ticket')],
- *   cases: [
- *     {
- *       name: 'billing routes to finance',
- *       input: { variables: { message: 'I was double charged' } },
- *       expect: [contains('finance'), judges.answersQuestion()],
- *     },
- *   ],
- * })
- * ```
- */
-declare function defineEval(input: DefineEvalInput): EvalDefinition;
-/**
- * SHA-256 (hex) over the canonical normalized definition. Cases are sorted by
- * name (name is the case identity); grader order WITHIN a case is preserved
- * (reordering graders changes their result index, so it is a meaningful edit).
- * The hash is the basis for hash-first convergence in `client.evals.ensure`.
- */
-declare function computeEvalContentHash(definition: EvalDefinition): Promise<string>;
-/** The converge outcome of `client.evals.ensure(definition)`. */
-interface EnsureEvalResult {
-    result: 'unchanged' | 'created' | 'updated';
-    /** The persisted eval suite id. */
-    suiteId: string;
-    /** The server-computed canonical hash (echo this — never your own). */
-    contentHash: string;
-}
-/** The canonical definition + provenance returned by `client.evals.pull(name)`. */
-interface EvalPullResult {
-    suiteId: string;
-    definition: EvalDefinition;
-    contentHash: string;
-    lastModifiedSource: string | null;
-    updatedAt: string | null;
-}
-/**
- * One grader's verdict for one case (mirror of `@runtypelabs/shared`'s
- * `GraderOutcome`). `graderIndex` is the position in the suite's grader list.
- */
-interface GraderOutcome {
-    graderIndex: number;
-    kind: string;
-    passed: boolean;
-    /** 0..1 for scaled graders (AI graders normalize their 1-5 to 0..1). */
-    score?: number;
-    /** AI-grader verdict, or a check's human-readable reason. */
-    reasoning?: string;
-}
-/** One case's run result: pass/fail plus each grader's outcome. */
-interface RunEvalCaseResult {
-    name: string;
-    passed: boolean;
-    outcomes: GraderOutcome[];
-    /** Truncated final-output snapshot (for surfacing a failing case in CI logs). */
-    outputExcerpt: string;
-    /** Whether producing the output threw. */
-    errored: boolean;
-}
-/** The synchronous run + score result returned by `client.evals.runSuite(...)`. */
-interface RunEvalResult {
-    /** The saved suite id, or `null` for an inline (virtual) run. */
-    suiteId: string | null;
-    name: string;
-    targetType: 'flow' | 'agent';
-    /** Suite score, 0..1 (passed cases / total cases). */
-    score: number;
-    /** True when every case passed every grader. */
-    passed: boolean;
-    totalCases: number;
-    passedCases: number;
-    cases: RunEvalCaseResult[];
-}
-/**
- * Run a saved suite by id (the post-`ensure` path) XOR an inline definition (the
- * `virtual` path — nothing is persisted). Exactly one must be provided.
- */
-type RunEvalInput = {
-    suiteId: string;
-} | {
-    definition: EvalDefinition;
-};
-/**
- * Idempotently converge an eval suite definition onto the platform. Hash-first:
- * probes with a content hash, and only ships the full definition when the
- * server reports a miss (`definitionRequired`). Upserts the suite + replaces
- * its cases; never executes the eval (use the `runtype eval` CLI / `/eval`
- * submit surface to run it).
- *
- * `virtual: true` definitions are ephemeral and have nothing durable to
- * converge — ensure rejects them. Run a virtual eval directly instead.
- */
-declare function ensureEval(client: RuntypeClient$1, definition: EvalDefinition): Promise<EnsureEvalResult>;
-/**
- * Pull the canonical definition + provenance for an eval suite by name — the
- * absorb-drift direction of the ensure protocol. The contentHash reflects the
- * live suite state.
- */
-declare function pullEval(client: RuntypeClient$1, name: string): Promise<EvalPullResult>;
-/**
- * Run an eval suite synchronously and return the suite score + per-case grader
- * outcomes. Powers the `runtype eval` CI gate: a saved suite is run by id (after
- * `ensure`); a `virtual` definition is run inline without persisting anything.
- *
- * Synchronous and ephemeral — no batch is created and no scores are saved (use
- * the dashboard / `/eval/submit` for a durable, dashboard-visible run). Bounded:
- * suites over the server's per-run case limit must use the batch path. Not
- * supported here: `claude_managed` agents and inline/virtual agent targets.
- */
-declare function runEvalSuite(client: RuntypeClient$1, input: RunEvalInput): Promise<RunEvalResult>;
 /**
  * EvalsNamespace - Static namespace for evaluation operations
  *
@@ -47802,4 +47891,4 @@ declare function getLikelySupportingCandidatePaths(bestCandidatePath: string | u
 declare function getDefaultPlanPath(taskName: string): string;
 declare function sanitizeTaskSlug(taskName: string): string;
-export { type AIGrader, type Agent, type AgentApprovalCompleteEvent, type AgentApprovalStartEvent, type AgentCompleteEvent, type AgentDefinition, type AgentDefinitionConfig, AgentDriftError, AgentEnsureConflictError, type AgentErrorEvent, type AgentEvent, type AgentEventType, type AgentExecuteRequest, type AgentExecuteResponse, type AgentIterationCompleteEvent, type AgentIterationStartEvent, type AgentMediaEvent, type AgentMessage, type AgentPausedEvent, type AgentPingEvent, type AgentPullResult, type AgentReflectionEvent, type AgentRuntimeToolDefinition, type AgentStartEvent, type AgentStreamCallbacks, type AgentStreamEvent, type AgentSubagentConfig, type AgentToolCompleteEvent, type AgentToolDeltaEvent, type AgentToolInputCompleteEvent, type AgentToolInputDeltaEvent, type AgentToolStartEvent, type AgentTurnCompleteEvent, type AgentTurnDeltaEvent, type AgentTurnStartEvent, type AgentVersionDetail, type AgentVersionListItem, type AgentVersionPublishResponse, AgentVersionsEndpoint, type AgentVersionsListResponse, AgentsEndpoint, AgentsNamespace, AnalyticsEndpoint, type ApiClient, type ApiKey, ApiKeysEndpoint, type ApiResponse, type App, type AppManifest, type AppVersion, type ApplyGeneratedProposalOptions, type ApplyGeneratedProposalResult, AppsEndpoint, type AssetReferenceContentPart, type AttachRuntimeToolsOptions, type BaseAgentEvent, BatchBuilder, type BatchClient, type BatchListParams, type BatchOptions, type BatchRequest, type BatchResult, type BatchScheduleConfig, type BatchStatus, BatchesNamespace, BillingEndpoint, type BillingSpendAnalyticsParams, type BindSkillInput, type BuiltInGraderId, type BuiltInTool, type BulkEditCondition, type BulkEditRequest, type BulkEditResponse, type BulkEditResult, type CaseExpected, ChatEndpoint, type CheckGrader, ClientBatchBuilder, type ClientConfig, type ClientConversation, ClientEvalBuilder, ClientFlowBuilder, type ClientToken, type ClientTokenConfig, type ClientTokenEnvironment, type ClientTokenVersionPin, ClientTokensEndpoint, type ClientToolDefinition, type ClientWidgetTheme, type ConditionalGetResult, type ConditionalStepConfig$1 as ConditionalStepConfig, type ContextErrorHandling, type ContextFallback, ContextTemplatesEndpoint, type Conversation, type ConversationListItem, type ConversationListParams, type ConversationMessage, type ConversationSource, ConversationsEndpoint, type ConversationsListResponse, type CreateApiKeyRequest, type CreateAppRequest, type CreateClientTokenRequest, type CreateClientTokenResponse, type CreateConversationRequest, type CreateFlowRequest, type CreateModelConfigRequest, type CreatePromptData, type CreatePromptRequest, type CreateProviderKeyRequest, type CreateRecordRequest, type CreateScheduleRequest, type CreateSecretRequest, type CreateToolRequest, type CustomMCPServer, type CustomMCPServerAuth, type CustomToolConfig, DEFAULT_RECOVERY_AFTER_EMPTY_SESSIONS, DEFAULT_STALL_STOP_AFTER, type DefineAgentInput, type DefineEvalCaseInput, type DefineEvalInput, type DefineFlowInput, type DefineProductInput, type DefineSkillInput, type DefineSurfaceInput, type DefineToolInput, type DeployCfSandboxRequest, type DeployCfSandboxResponse, type DeploySandboxRequest, type DeploySandboxResponse, type DiscoveredModel, type DispatchClient, DispatchEndpoint, type DispatchEnvironment, type DispatchEvent, type DispatchOptions$1 as DispatchOptions, type DispatchRequest, type EnsureAgentConverged, type EnsureAgentOptions, type EnsureAgentPlan, type EnsureAgentResult, type EnsureEvalResult, type EnsureFlowConverged, type EnsureFlowOptions, type EnsureFlowPlan, type EnsureFlowResult, type EnsureFpoOptions, type EnsureFpoResult, type EnsureProductConverged, type EnsureProductOptions, type EnsureProductPlan, type EnsureProductResult, type EnsureSkillConverged, type EnsureSkillOptions, type EnsureSkillPlan, type EnsureSkillResult, type EnsureSurfaceConverged, type EnsureSurfaceOptions, type EnsureSurfacePlan, type EnsureSurfaceResult, type EnsureToolConverged, type EnsureToolOptions, type EnsureToolPlan, type EnsureToolResult, type ErrorHandlingMode, EvalBuilder, type EvalCaseDefinition, type EvalCaseInput, type EvalClient, type EvalDefinition, EvalEndpoint, type EvalListParams, type EvalMessage, type EvalOptions, type EvalPullResult, type EvalRecord, type EvalRequest, type EvalResult, type EvalRunConfig, EvalRunner, type EvalStatus, type EvalTarget, EvalsNamespace, type ExecuteToolRequest, type ExecuteToolResponse, type ExecutionStreamEvent, type ExternalAgentContext, type ExternalToolConfig, type FallbackFailEvent, type FallbackStartEvent, type FallbackSuccessEvent, type FallbackTrigger, type FallbackTriggerType, type FallbacksExhaustedEvent, type FallbacksInitiatedEvent, type FetchGitHubStepConfig$1 as FetchGitHubStepConfig, type FetchUrlStepConfig$1 as FetchUrlStepConfig, type FieldFormat, type FileContentPart, type Flow, type FlowAttachment, FlowBuilder, type FlowCompleteEvent, type FlowConfig$1 as FlowConfig, type FlowDefinition, type FlowDefinitionStep, FlowDriftError, FlowEnsureConflictError, type FlowErrorEvent, type FlowFallback, type FlowListItem, type FlowPausedEvent, type FlowPullResult, FlowResult, type FlowStartEvent, type FlowStep, type FlowStepDefinition, type FlowStepType, FlowStepsEndpoint, type FlowStreamEvent, type FlowSummary, type FlowToolConfig, type FlowValidationClient, type FlowValidationIssue, type FlowValidationResult, type FlowVersionDetail, type FlowVersionListItem, type FlowVersionPublishResponse, FlowVersionsEndpoint, type FlowVersionsListResponse, FlowsEndpoint, FlowsNamespace, type FpoEntityOutcome, type FpoInput, type GenerateEmbeddingStepConfig$1 as GenerateEmbeddingStepConfig, type GeneratedRuntimeToolGateDecision, type GeneratedRuntimeToolGateOptions, type GraderConfig, type GraderOutcome, type ImageContentPart, type Integration, type IntegrationTool, IntegrationsEndpoint, type IntegrationsListResponse, type JSONSchema, type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, LEDGER_ARTIFACT_LINE_PREFIX, type ListConversationsResponse, type ListParams, type LocalToolConfig, type LocalToolDefinition, type LocalToolExecutionCompleteEvent, type LocalToolExecutionLoopSnapshotSlice, type LocalToolExecutionStartEvent, type LogEntry, type LogQueryParams, type LogQueryResponse, type LogQueryResult, type LogStatsParams, type LogStatsResponse, type LogStatsResult, LogsEndpoint, type Message$1 as Message, type MessageContent, type MessageFallback, type Metadata, type ModelConfig, ModelConfigsEndpoint, type ModelFallback, type ModelOverride, type ModelUsageDetail, type ModelUsageQueryParams, type ModelUsageResponse, type ModelUsageSummary, type ModelUsageTimeSeries, type PaginationResponse, type ProductDefinition, ProductDriftError, ProductEnsureConflictError, type ProductPullResult, ProductsNamespace, type Prompt$1 as Prompt, type PromptErrorHandling, type PromptFallback, type PromptListParams, type PromptStepConfig$1 as PromptStepConfig, PromptsEndpoint, PromptsNamespace, type ProviderApiKey, type ProviderKeyModel, ProviderKeysEndpoint, type PullFpoResult, RUNTYPE_CLIENT_KIND, type ReasoningConfig, type ReasoningContentPart, type ReasoningValue, type RecordConfig$1 as RecordConfig, type RecordCostAggregation, type RecordCostModelBreakdown, type RecordFilter, type RecordFilterCondition, type RecordFilterGroup, type RecordFilterOperator, type RecordListItem, type RecordListParams, type RecordStepResult, type RecordStepResultsParams, type RecordStepResultsResponse, type RecordWriteResponse, RecordsEndpoint, type RetrieveRecordStepConfig$1 as RetrieveRecordStepConfig, type RetryFallback, type RunEvalCaseResult, type RunEvalInput, type RunEvalResult, type RunTaskContextBudgetBreakdown, type RunTaskContextCompactionEvent, type RunTaskContextCompactionStrategy, type RunTaskContextNoticeEvent, type RunTaskContextSummaryEntry, type RunTaskContinuation, type RunTaskOffloadRecorder, type RunTaskOnContextCompaction, type RunTaskOnContextNotice, type RunTaskOnSession, type RunTaskOptions, type RunTaskResult, type RunTaskResumeState, type RunTaskSessionSummary, type RunTaskState, type RunTaskStateSlice, type RunTaskStatus, type RunTaskToolTraceSlice, type RuntimeCustomToolConfig, type RuntimeExternalToolConfig, type RuntimeFlowToolConfig, type RuntimeLocalToolConfig, type RuntimeSubagentToolConfig, type RuntimeTool, type RuntimeToolConfig, Runtype, type AgentSkillBinding as RuntypeAgentSkillBinding, RuntypeApiError, RuntypeClient, type ConditionalStepConfig as RuntypeConditionalStepConfig, type RuntypeConfig, type FetchGitHubStepConfig as RuntypeFetchGitHubStepConfig, type FetchUrlStepConfig as RuntypeFetchUrlStepConfig, RuntypeFlowBuilder, type FlowConfig as RuntypeFlowConfig, type GenerateEmbeddingStepConfig as RuntypeGenerateEmbeddingStepConfig, type Message as RuntypeMessage, type ModelOverride$1 as RuntypeModelOverride, type Prompt as RuntypePrompt, type PromptStepConfig as RuntypePromptStepConfig, type RuntypeRecord, type RecordConfig as RuntypeRecordConfig, type RetrieveRecordStepConfig as RuntypeRetrieveRecordStepConfig, type SearchStepConfig as RuntypeSearchStepConfig, type SendEmailStepConfig as RuntypeSendEmailStepConfig, type SendEventStepConfig as RuntypeSendEventStepConfig, type SendStreamStepConfig as RuntypeSendStreamStepConfig, type SendTextStepConfig as RuntypeSendTextStepConfig, type SetVariableStepConfig as RuntypeSetVariableStepConfig, type Skill as RuntypeSkill, type SkillCapabilities as RuntypeSkillCapabilities, type SkillFrontmatter as RuntypeSkillFrontmatter, type SkillManifest as RuntypeSkillManifest, type SkillProposal as RuntypeSkillProposal, type SkillRuntypeExtensions as RuntypeSkillRuntypeExtensions, type SkillScanFinding as RuntypeSkillScanFinding, type SkillScanResult as RuntypeSkillScanResult, type SkillScanVerdict as RuntypeSkillScanVerdict, type SkillVersion as RuntypeSkillVersion, type TransformDataStepConfig as RuntypeTransformDataStepConfig, type UpsertFlowConfig as RuntypeUpsertFlowConfig, type UpsertRecordStepConfig as RuntypeUpsertRecordStepConfig, type VectorSearchStepConfig as RuntypeVectorSearchStepConfig, type WaitUntilStepConfig as RuntypeWaitUntilStepConfig, SDK_USER_AGENT, SDK_VERSION, STEP_FIELD_REGISTRY, STEP_TYPE_TO_METHOD, type Schedule, type ScheduleExecutionOptions, type ScheduleListParams, type ScheduleMessage, type ScheduleMessageSet, type ScheduleMessages, type ScheduleMutationResponse, type ScheduleRun, type ScheduleRunNowResponse, type ScheduleStatusResponse, type ScheduleTarget, type ScheduleTrigger, SchedulesEndpoint, type SearchStepConfig$1 as SearchStepConfig, type Secret, type SecretCheckResponse, type SecretDeleteResponse, type SecretSetupUrlRequest, type SecretSetupUrlResponse, SecretsEndpoint, type SendEmailStepConfig$1 as SendEmailStepConfig, type SendEventStepConfig$1 as SendEventStepConfig, type SendStreamStepConfig$1 as SendStreamStepConfig, type SendTextStepConfig$1 as SendTextStepConfig, type SetVariableStepConfig$1 as SetVariableStepConfig, type SkillDefinition, SkillDriftError, SkillEnsureConflictError, type SkillListPage, type SkillListPagination, type SkillListParams, type SkillManifestInput, type SkillMarkdownInput, type SkillOrigin, type SkillProposalStatus, SkillProposalsNamespace, type SkillPullResult, type SkillStatus, type SkillTrustLevel, type SkillVersionStatus, type SkillWithVersion, type SkillWriteInput, SkillsNamespace, type SlackInstallRequest, type StepCompleteEvent, type StepDeltaEvent, type StepFallback, type StepFieldMeta, type StepStartEvent, type StepWaitingLocalEvent, type StreamCallbacks, type StreamConsumeOptions, type StreamEvent, type StreamEventOf, type SubagentToolConfig, type Surface, type SurfaceDefinition, type SurfaceDefinitionEnvironment, type SurfaceDefinitionStatus, type SurfaceDefinitionType, SurfaceDriftError, SurfaceEnsureConflictError, type SurfaceListParams, type SurfacePullResult, SurfacesEndpoint, SurfacesNamespace, type TextContentPart, type Tool, type ToolApprovalGrant, ToolApprovalGrantsEndpoint, type ToolConfig, type ToolDefinition, type ToolDefinitionType, ToolDriftError, ToolEnsureConflictError, type ToolPullResult, type ToolWithValidation, type ToolsConfig, ToolsEndpoint, ToolsNamespace, type TransformDataStepConfig$1 as TransformDataStepConfig, UNIFIED_EVENTS_QUERY, type UpdateAppRequest, type UpdateClientTokenRequest, type UpdateConversationRequest, type UpdatePromptData, type UpdateProviderKeyRequest, type UpdateScheduleRequest, type UpdateSecretRequest, type UpdateToolRequest, type UpdatedFlow, type UpsertFlowConfig$1 as UpsertFlowConfig, type UpsertOptions, type UpsertRecordStepConfig$1 as UpsertRecordStepConfig, type UserProfile, UsersEndpoint, type VectorSearchStepConfig$1 as VectorSearchStepConfig, type VersionType, type WaitUntilStepConfig$1 as WaitUntilStepConfig, type WorkflowCompileDeps, type WorkflowCompletionCriteriaConfig, type WorkflowConfig, type WorkflowConfigFactory, type WorkflowContext, type WorkflowDefinition, type WorkflowHookEntry, type WorkflowHookKind, type WorkflowHookRef, type WorkflowHookSignatures, type WorkflowMilestoneConfig, type WorkflowPhase, type WorkflowPolicyConfig, type WorkflowRecoveryConfig, type WorkflowSlot, type WorkflowStallPolicy, applyGeneratedRuntimeToolProposalToDispatchRequest, attachRuntimeToolsToDispatchRequest, buildEmptySessionNudge, buildGeneratedRuntimeToolGateOutput, buildLedgerOffloadReference, buildPolicyGuidance, buildSendViewOffloadMarker, calledTool, compileWorkflowConfig, completed, computeAgentContentHash, computeEvalContentHash, computeFlowContentHash, computeFpoContentHash, computeProductContentHash, computeSkillContentHash, computeSurfaceContentHash, computeToolContentHash, contains, cost, createAgentEventTranslator, createClient, createExternalTool, createFlowEventTranslator, defaultWorkflow, defaultWorkflowConfig, defineAgent, defineEval, defineFlow, defineFpo, definePlaybook, defineProduct, defineSkill, defineSurface, defineTool, deployWorkflow, ensureDefaultWorkflowHooks, ensureEval, ensureFpo, evaluateGeneratedRuntimeToolProposal, extractDeclaredToolResultChars, gameWorkflow, getDefaultPlanPath, getLikelySupportingCandidatePaths, interpolateWorkflowTemplate, isDiscoveryToolName, isMarathonArtifactPath, isPreservationSensitiveTask, isUnifiedEventType, isWorkflowHookRef, jsonField, judge, judges, latency, length, listWorkflowHooks, matchesExpected, maxToolCalls, noError, normalizeAgentDefinition, normalizeCandidatePath, normalizeFpoDefinition, normalizeProductDefinition, normalizeSkillDefinition, normalizeSurfaceDefinition, normalizeToolDefinition, notCalledTool, notContains, parseFinalBuffer, parseLedgerArtifactRelativePath, parseOffloadedOutputId, parseSSEChunk, processStream, pullEval, pullFpo, ranStep, regex, registerWorkflowHook, resolveStallStopAfter, resolveWorkflowHook, runEvalSuite, sanitizeTaskSlug, shouldInjectEmptySessionNudge, shouldRequestModelEscalation, stepOrder, streamEvents, toolOrder, unregisterWorkflowHook, usedNoTools, validJson, withUnifiedEvents };
+export { type AIGrader, type Agent, type AgentApprovalCompleteEvent, type AgentApprovalStartEvent, type AgentCompleteEvent, type AgentDefinition, type AgentDefinitionConfig, AgentDriftError, AgentEnsureConflictError, type AgentErrorEvent, type AgentEvent, type AgentEventType, type AgentExecuteRequest, type AgentExecuteResponse, type AgentIterationCompleteEvent, type AgentIterationStartEvent, type AgentMediaEvent, type AgentMessage, type AgentPausedEvent, type AgentPingEvent, type AgentPullResult, type AgentReflectionEvent, type AgentRuntimeToolDefinition, type AgentStartEvent, type AgentStreamCallbacks, type AgentStreamEvent, type AgentSubagentConfig, type AgentToolCompleteEvent, type AgentToolDeltaEvent, type AgentToolInputCompleteEvent, type AgentToolInputDeltaEvent, type AgentToolStartEvent, type AgentTurnCompleteEvent, type AgentTurnDeltaEvent, type AgentTurnStartEvent, type AgentVersionDetail, type AgentVersionListItem, type AgentVersionPublishResponse, AgentVersionsEndpoint, type AgentVersionsListResponse, AgentsEndpoint, AgentsNamespace, AnalyticsEndpoint, type ApiClient, type ApiKey, ApiKeysEndpoint, type ApiResponse, type App, type AppManifest, type AppVersion, type ApplyGeneratedProposalOptions, type ApplyGeneratedProposalResult, AppsEndpoint, type AssetReferenceContentPart, type AttachRuntimeToolsOptions, type BaseAgentEvent, BatchBuilder, type BatchClient, type BatchListParams, type BatchOptions, type BatchRequest, type BatchResult, type BatchScheduleConfig, type BatchStatus, BatchesNamespace, BillingEndpoint, type BillingSpendAnalyticsParams, type BindSkillInput, type BuiltInGraderId, type BuiltInTool, type BulkEditCondition, type BulkEditRequest, type BulkEditResponse, type BulkEditResult, type CaseExpected, ChatEndpoint, type CheckGrader, ClientBatchBuilder, type ClientConfig, type ClientConversation, ClientEvalBuilder, ClientFlowBuilder, type ClientToken, type ClientTokenConfig, type ClientTokenEnvironment, type ClientTokenVersionPin, ClientTokensEndpoint, type ClientToolDefinition, type ClientWidgetTheme, type ConditionalGetResult, type ConditionalStepConfig$1 as ConditionalStepConfig, type ContextErrorHandling, type ContextFallback, ContextTemplatesEndpoint, type Conversation, type ConversationListItem, type ConversationListParams, type ConversationMessage, type ConversationSource, ConversationsEndpoint, type ConversationsListResponse, type CreateApiKeyRequest, type CreateAppRequest, type CreateClientTokenRequest, type CreateClientTokenResponse, type CreateConversationRequest, type CreateFlowRequest, type CreateModelConfigRequest, type CreatePromptData, type CreatePromptRequest, type CreateProviderKeyRequest, type CreateRecordRequest, type CreateScheduleRequest, type CreateSecretRequest, type CreateToolRequest, type CustomMCPServer, type CustomMCPServerAuth, type CustomToolConfig, DEFAULT_RECOVERY_AFTER_EMPTY_SESSIONS, DEFAULT_STALL_STOP_AFTER, type DefineAgentInput, type DefineEvalCaseInput, type DefineEvalInput, type DefineFlowInput, type DefineProductInput, type DefineSkillInput, type DefineSurfaceInput, type DefineToolInput, type DeployCfSandboxRequest, type DeployCfSandboxResponse, type DeploySandboxRequest, type DeploySandboxResponse, type DiscoveredModel, type DispatchClient, DispatchEndpoint, type DispatchEnvironment, type DispatchEvent, type DispatchOptions$1 as DispatchOptions, type DispatchRequest, type EnsureAgentConverged, type EnsureAgentOptions, type EnsureAgentPlan, type EnsureAgentResult, type EnsureEvalResult, type EnsureFlowConverged, type EnsureFlowOptions, type EnsureFlowPlan, type EnsureFlowResult, type EnsureFpoOptions, type EnsureFpoResult, type EnsureProductConverged, type EnsureProductOptions, type EnsureProductPlan, type EnsureProductResult, type EnsureSkillConverged, type EnsureSkillOptions, type EnsureSkillPlan, type EnsureSkillResult, type EnsureSurfaceConverged, type EnsureSurfaceOptions, type EnsureSurfacePlan, type EnsureSurfaceResult, type EnsureToolConverged, type EnsureToolOptions, type EnsureToolPlan, type EnsureToolResult, type ErrorHandlingMode, EvalBuilder, type EvalCaseDefinition, type EvalCaseInput, type EvalClient, type EvalDefinition, EvalEndpoint, type EvalListParams, type EvalMessage, type EvalOptions, type EvalPullResult, type EvalRecord, type EvalRequest, type EvalResult, type EvalRunConfig, EvalRunner, type EvalStatus, type EvalTarget, EvalsNamespace, type ExecuteToolRequest, type ExecuteToolResponse, type ExecutionStreamEvent, type ExternalAgentContext, type ExternalToolConfig, type FallbackFailEvent, type FallbackStartEvent, type FallbackSuccessEvent, type FallbackTrigger, type FallbackTriggerType, type FallbacksExhaustedEvent, type FallbacksInitiatedEvent, type FetchGitHubStepConfig$1 as FetchGitHubStepConfig, type FetchUrlStepConfig$1 as FetchUrlStepConfig, type FieldFormat, type FileContentPart, type Flow, type FlowAttachment, FlowBuilder, type FlowCompleteEvent, type FlowConfig$1 as FlowConfig, type FlowDefinition, type FlowDefinitionStep, FlowDriftError, FlowEnsureConflictError, type FlowErrorEvent, type FlowFallback, type FlowInlineEvalInput, type FlowListItem, type FlowPausedEvent, type FlowPullResult, FlowResult, type FlowStartEvent, type FlowStep, type FlowStepDefinition, type FlowStepType, FlowStepsEndpoint, type FlowStreamEvent, type FlowSummary, type FlowToolConfig, type FlowValidationClient, type FlowValidationIssue, type FlowValidationResult, type FlowVersionDetail, type FlowVersionListItem, type FlowVersionPublishResponse, FlowVersionsEndpoint, type FlowVersionsListResponse, FlowsEndpoint, FlowsNamespace, type FpoEntityOutcome, type FpoInput, type GenerateEmbeddingStepConfig$1 as GenerateEmbeddingStepConfig, type GeneratedRuntimeToolGateDecision, type GeneratedRuntimeToolGateOptions, type Gradeable, type GraderConfig, type GraderOutcome, type GraderSeverity, type ImageContentPart, type Integration, type IntegrationTool, IntegrationsEndpoint, type IntegrationsListResponse, type JSONSchema, type JsonArray, type JsonObject, type JsonPrimitive, type JsonValue, LEDGER_ARTIFACT_LINE_PREFIX, type ListConversationsResponse, type ListParams, type LocalToolConfig, type LocalToolDefinition, type LocalToolExecutionCompleteEvent, type LocalToolExecutionLoopSnapshotSlice, type LocalToolExecutionStartEvent, type LogEntry, type LogQueryParams, type LogQueryResponse, type LogQueryResult, type LogStatsParams, type LogStatsResponse, type LogStatsResult, LogsEndpoint, type Message$1 as Message, type MessageContent, type MessageFallback, type Metadata, type ModelConfig, ModelConfigsEndpoint, type ModelFallback, type ModelOverride, type ModelUsageDetail, type ModelUsageQueryParams, type ModelUsageResponse, type ModelUsageSummary, type ModelUsageTimeSeries, type PaginationResponse, type ProductDefinition, ProductDriftError, ProductEnsureConflictError, type ProductPullResult, ProductsNamespace, type Prompt$1 as Prompt, type PromptErrorHandling, type PromptFallback, type PromptListParams, type PromptStepConfig$1 as PromptStepConfig, PromptsEndpoint, PromptsNamespace, type ProviderApiKey, type ProviderKeyModel, ProviderKeysEndpoint, type PullFpoResult, RUNTYPE_CLIENT_KIND, type ReasoningConfig, type ReasoningContentPart, type ReasoningValue, type RecordConfig$1 as RecordConfig, type RecordCostAggregation, type RecordCostModelBreakdown, type RecordFilter, type RecordFilterCondition, type RecordFilterGroup, type RecordFilterOperator, type RecordListItem, type RecordListParams, type RecordStepResult, type RecordStepResultsParams, type RecordStepResultsResponse, type RecordWriteResponse, RecordsEndpoint, type RetrieveRecordStepConfig$1 as RetrieveRecordStepConfig, type RetryFallback, type RunEvalCaseResult, type RunEvalInput, type RunEvalResult, type RunTaskContextBudgetBreakdown, type RunTaskContextCompactionEvent, type RunTaskContextCompactionStrategy, type RunTaskContextNoticeEvent, type RunTaskContextSummaryEntry, type RunTaskContinuation, type RunTaskOffloadRecorder, type RunTaskOnContextCompaction, type RunTaskOnContextNotice, type RunTaskOnSession, type RunTaskOptions, type RunTaskResult, type RunTaskResumeState, type RunTaskSessionSummary, type RunTaskState, type RunTaskStateSlice, type RunTaskStatus, type RunTaskToolTraceSlice, type RuntimeCustomToolConfig, type RuntimeExternalToolConfig, type RuntimeFlowToolConfig, type RuntimeLocalToolConfig, type RuntimeSubagentToolConfig, type RuntimeTool, type RuntimeToolConfig, Runtype, type AgentSkillBinding as RuntypeAgentSkillBinding, RuntypeApiError, RuntypeClient, type ConditionalStepConfig as RuntypeConditionalStepConfig, type RuntypeConfig, type FetchGitHubStepConfig as RuntypeFetchGitHubStepConfig, type FetchUrlStepConfig as RuntypeFetchUrlStepConfig, RuntypeFlowBuilder, type FlowConfig as RuntypeFlowConfig, type GenerateEmbeddingStepConfig as RuntypeGenerateEmbeddingStepConfig, type Message as RuntypeMessage, type ModelOverride$1 as RuntypeModelOverride, type Prompt as RuntypePrompt, type PromptStepConfig as RuntypePromptStepConfig, type RuntypeRecord, type RecordConfig as RuntypeRecordConfig, type RetrieveRecordStepConfig as RuntypeRetrieveRecordStepConfig, type SearchStepConfig as RuntypeSearchStepConfig, type SendEmailStepConfig as RuntypeSendEmailStepConfig, type SendEventStepConfig as RuntypeSendEventStepConfig, type SendStreamStepConfig as RuntypeSendStreamStepConfig, type SendTextStepConfig as RuntypeSendTextStepConfig, type SetVariableStepConfig as RuntypeSetVariableStepConfig, type Skill as RuntypeSkill, type SkillCapabilities as RuntypeSkillCapabilities, type SkillFrontmatter as RuntypeSkillFrontmatter, type SkillManifest as RuntypeSkillManifest, type SkillProposal as RuntypeSkillProposal, type SkillRuntypeExtensions as RuntypeSkillRuntypeExtensions, type SkillScanFinding as RuntypeSkillScanFinding, type SkillScanResult as RuntypeSkillScanResult, type SkillScanVerdict as RuntypeSkillScanVerdict, type SkillVersion as RuntypeSkillVersion, type TransformDataStepConfig as RuntypeTransformDataStepConfig, type UpsertFlowConfig as RuntypeUpsertFlowConfig, type UpsertRecordStepConfig as RuntypeUpsertRecordStepConfig, type VectorSearchStepConfig as RuntypeVectorSearchStepConfig, type WaitUntilStepConfig as RuntypeWaitUntilStepConfig, SDK_USER_AGENT, SDK_VERSION, STEP_FIELD_REGISTRY, STEP_TYPE_TO_METHOD, type Schedule, type ScheduleExecutionOptions, type ScheduleListParams, type ScheduleMessage, type ScheduleMessageSet, type ScheduleMessages, type ScheduleMutationResponse, type ScheduleRun, type ScheduleRunNowResponse, type ScheduleStatusResponse, type ScheduleTarget, type ScheduleTrigger, SchedulesEndpoint, type SearchStepConfig$1 as SearchStepConfig, type Secret, type SecretCheckResponse, type SecretDeleteResponse, type SecretSetupUrlRequest, type SecretSetupUrlResponse, SecretsEndpoint, type SendEmailStepConfig$1 as SendEmailStepConfig, type SendEventStepConfig$1 as SendEventStepConfig, type SendStreamStepConfig$1 as SendStreamStepConfig, type SendTextStepConfig$1 as SendTextStepConfig, type SetVariableStepConfig$1 as SetVariableStepConfig, type SkillDefinition, SkillDriftError, SkillEnsureConflictError, type SkillListPage, type SkillListPagination, type SkillListParams, type SkillManifestInput, type SkillMarkdownInput, type SkillOrigin, type SkillProposalStatus, SkillProposalsNamespace, type SkillPullResult, type SkillStatus, type SkillTrustLevel, type SkillVersionStatus, type SkillWithVersion, type SkillWriteInput, SkillsNamespace, type SlackInstallRequest, type StepCompleteEvent, type StepDeltaEvent, type StepFallback, type StepFieldMeta, type StepStartEvent, type StepWaitingLocalEvent, type StreamCallbacks, type StreamConsumeOptions, type StreamEvent, type StreamEventOf, type SubagentToolConfig, type Surface, type SurfaceDefinition, type SurfaceDefinitionEnvironment, type SurfaceDefinitionStatus, type SurfaceDefinitionType, SurfaceDriftError, SurfaceEnsureConflictError, type SurfaceListParams, type SurfacePullResult, SurfacesEndpoint, SurfacesNamespace, type TextContentPart, type Tool, type ToolApprovalGrant, ToolApprovalGrantsEndpoint, type ToolConfig, type ToolDefinition, type ToolDefinitionType, ToolDriftError, ToolEnsureConflictError, type ToolPullResult, type ToolWithValidation, type ToolsConfig, ToolsEndpoint, ToolsNamespace, type TransformDataStepConfig$1 as TransformDataStepConfig, UNIFIED_EVENTS_QUERY, type UpdateAppRequest, type UpdateClientTokenRequest, type UpdateConversationRequest, type UpdatePromptData, type UpdateProviderKeyRequest, type UpdateScheduleRequest, type UpdateSecretRequest, type UpdateToolRequest, type UpdatedFlow, type UpsertFlowConfig$1 as UpsertFlowConfig, type UpsertOptions, type UpsertRecordStepConfig$1 as UpsertRecordStepConfig, type UserProfile, UsersEndpoint, type VectorSearchStepConfig$1 as VectorSearchStepConfig, type VersionType, type WaitUntilStepConfig$1 as WaitUntilStepConfig, type WorkflowCompileDeps, type WorkflowCompletionCriteriaConfig, type WorkflowConfig, type WorkflowConfigFactory, type WorkflowContext, type WorkflowDefinition, type WorkflowHookEntry, type WorkflowHookKind, type WorkflowHookRef, type WorkflowHookSignatures, type WorkflowMilestoneConfig, type WorkflowPhase, type WorkflowPolicyConfig, type WorkflowRecoveryConfig, type WorkflowSlot, type WorkflowStallPolicy, applyGeneratedRuntimeToolProposalToDispatchRequest, attachRuntimeToolsToDispatchRequest, buildEmptySessionNudge, buildGeneratedRuntimeToolGateOutput, buildLedgerOffloadReference, buildPolicyGuidance, buildSendViewOffloadMarker, calledTool, compileWorkflowConfig, completed, computeAgentContentHash, computeEvalContentHash, computeFlowContentHash, computeFpoContentHash, computeProductContentHash, computeSkillContentHash, computeSurfaceContentHash, computeToolContentHash, contains, cost, createAgentEventTranslator, createClient, createExternalTool, createFlowEventTranslator, defaultWorkflow, defaultWorkflowConfig, defineAgent, defineEval, defineFlow, defineFpo, definePlaybook, defineProduct, defineSkill, defineSurface, defineTool, deployWorkflow, ensureDefaultWorkflowHooks, ensureEval, ensureFpo, evaluateGeneratedRuntimeToolProposal, extractDeclaredToolResultChars, gameWorkflow, getDefaultPlanPath, getLikelySupportingCandidatePaths, interpolateWorkflowTemplate, isDiscoveryToolName, isMarathonArtifactPath, isPreservationSensitiveTask, isUnifiedEventType, isWorkflowHookRef, jsonField, judge, judges, latency, length, listWorkflowHooks, matchesExpected, maxToolCalls, noError, normalizeAgentDefinition, normalizeCandidatePath, normalizeFpoDefinition, normalizeProductDefinition, normalizeSkillDefinition, normalizeSurfaceDefinition, normalizeToolDefinition, notCalledTool, notContains, parseFinalBuffer, parseLedgerArtifactRelativePath, parseOffloadedOutputId, parseSSEChunk, processStream, pullEval, pullFpo, ranStep, regex, registerWorkflowHook, resolveStallStopAfter, resolveWorkflowHook, runEvalSuite, sanitizeTaskSlug, shouldInjectEmptySessionNudge, shouldRequestModelEscalation, stepOrder, streamEvents, toolOrder, unregisterWorkflowHook, usedNoTools, validJson, withUnifiedEvents };