@runtypelabs/sdk 5.4.0 → 5.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -97,7 +97,9 @@ __export(index_exports, {
97
97
  buildLedgerOffloadReference: () => buildLedgerOffloadReference,
98
98
  buildPolicyGuidance: () => buildPolicyGuidance,
99
99
  buildSendViewOffloadMarker: () => buildSendViewOffloadMarker,
100
+ calledTool: () => calledTool,
100
101
  compileWorkflowConfig: () => compileWorkflowConfig,
102
+ completed: () => completed,
101
103
  computeAgentContentHash: () => computeAgentContentHash,
102
104
  computeEvalContentHash: () => computeEvalContentHash,
103
105
  computeFlowContentHash: () => computeFlowContentHash,
@@ -107,6 +109,7 @@ __export(index_exports, {
107
109
  computeSurfaceContentHash: () => computeSurfaceContentHash,
108
110
  computeToolContentHash: () => computeToolContentHash,
109
111
  contains: () => contains,
112
+ cost: () => cost,
110
113
  createAgentEventTranslator: () => createAgentEventTranslator,
111
114
  createClient: () => createClient,
112
115
  createExternalTool: () => createExternalTool,
@@ -144,6 +147,7 @@ __export(index_exports, {
144
147
  length: () => length,
145
148
  listWorkflowHooks: () => listWorkflowHooks,
146
149
  matchesExpected: () => matchesExpected,
150
+ maxToolCalls: () => maxToolCalls,
147
151
  noError: () => noError,
148
152
  normalizeAgentDefinition: () => normalizeAgentDefinition,
149
153
  normalizeCandidatePath: () => normalizeCandidatePath,
@@ -152,6 +156,7 @@ __export(index_exports, {
152
156
  normalizeSkillDefinition: () => normalizeSkillDefinition,
153
157
  normalizeSurfaceDefinition: () => normalizeSurfaceDefinition,
154
158
  normalizeToolDefinition: () => normalizeToolDefinition,
159
+ notCalledTool: () => notCalledTool,
155
160
  notContains: () => notContains,
156
161
  parseFinalBuffer: () => parseFinalBuffer,
157
162
  parseLedgerArtifactRelativePath: () => parseLedgerArtifactRelativePath,
@@ -160,6 +165,7 @@ __export(index_exports, {
160
165
  processStream: () => processStream,
161
166
  pullEval: () => pullEval,
162
167
  pullFpo: () => pullFpo,
168
+ ranStep: () => ranStep,
163
169
  regex: () => regex,
164
170
  registerWorkflowHook: () => registerWorkflowHook,
165
171
  resolveStallStopAfter: () => resolveStallStopAfter,
@@ -168,8 +174,11 @@ __export(index_exports, {
168
174
  sanitizeTaskSlug: () => sanitizeTaskSlug,
169
175
  shouldInjectEmptySessionNudge: () => shouldInjectEmptySessionNudge,
170
176
  shouldRequestModelEscalation: () => shouldRequestModelEscalation,
177
+ stepOrder: () => stepOrder,
171
178
  streamEvents: () => streamEvents,
179
+ toolOrder: () => toolOrder,
172
180
  unregisterWorkflowHook: () => unregisterWorkflowHook,
181
+ usedNoTools: () => usedNoTools,
173
182
  validJson: () => validJson,
174
183
  withUnifiedEvents: () => withUnifiedEvents
175
184
  });
@@ -2064,315 +2073,755 @@ function resolveBatchExecutionId(pausedTools) {
2064
2073
  return "";
2065
2074
  }
2066
2075
 
2067
- // src/flows-ensure.ts
2068
- function isPlainObject(value) {
2069
- return value !== null && typeof value === "object" && !Array.isArray(value);
2070
- }
2071
- function normalizeConfigForHash(config) {
2072
- if (!isPlainObject(config)) return {};
2073
- const normalized = {};
2074
- for (const key of Object.keys(config).sort()) {
2075
- const value = config[key];
2076
- if (value === void 0) continue;
2077
- if (value !== null && typeof value === "object" && !Array.isArray(value)) {
2078
- normalized[key] = normalizeConfigForHash(value);
2079
- } else if (Array.isArray(value)) {
2080
- normalized[key] = value.map((item) => {
2081
- if (item !== null && typeof item === "object" && !Array.isArray(item)) {
2082
- return normalizeConfigForHash(item);
2076
+ // src/evals-ensure.ts
2077
+ var CHECK_GRADER_KINDS = /* @__PURE__ */ new Set([
2078
+ "contains",
2079
+ "not_contains",
2080
+ "matches_expected",
2081
+ "regex",
2082
+ "valid_json",
2083
+ "json_field",
2084
+ "length",
2085
+ "latency",
2086
+ "no_error",
2087
+ // Trace checks.
2088
+ "called_tool",
2089
+ "not_called_tool",
2090
+ "used_no_tools",
2091
+ "max_tool_calls",
2092
+ "tool_order",
2093
+ "ran_step",
2094
+ "step_order",
2095
+ "completed",
2096
+ "cost"
2097
+ ]);
2098
+ function gradeable(data) {
2099
+ const obj = { ...data };
2100
+ const rebuild = (patch) => gradeable({ ...obj, ...patch });
2101
+ Object.defineProperty(obj, "gate", {
2102
+ value: () => rebuild({ severity: "gate" }),
2103
+ enumerable: false
2104
+ });
2105
+ Object.defineProperty(obj, "soft", {
2106
+ value: () => rebuild({ severity: "soft" }),
2107
+ enumerable: false
2108
+ });
2109
+ if (data.kind === "ai") {
2110
+ Object.defineProperty(obj, "atLeast", {
2111
+ value: (threshold) => {
2112
+ if (typeof threshold !== "number" || !Number.isFinite(threshold) || threshold < 1 || threshold > 5) {
2113
+ throw new Error("atLeast() requires a numeric judge threshold between 1 and 5");
2083
2114
  }
2084
- return item;
2085
- });
2086
- } else {
2087
- normalized[key] = value;
2088
- }
2115
+ return rebuild({ threshold });
2116
+ },
2117
+ enumerable: false
2118
+ });
2089
2119
  }
2090
- return normalized;
2120
+ return obj;
2091
2121
  }
2092
- function normalizeStepForHash(step) {
2093
- const stepObj = isPlainObject(step) ? step : {};
2094
- return {
2095
- type: typeof stepObj.type === "string" ? stepObj.type : "",
2096
- name: typeof stepObj.name === "string" ? stepObj.name : "",
2097
- enabled: stepObj.enabled !== false,
2098
- ...typeof stepObj.when === "string" ? { when: stepObj.when } : {},
2099
- config: normalizeConfigForHash(stepObj.config),
2100
- order: typeof stepObj.order === "number" ? stepObj.order : 0
2101
- };
2122
+ function contains(value, opts) {
2123
+ return gradeable({
2124
+ kind: "contains",
2125
+ value,
2126
+ ...opts?.caseSensitive ? { caseSensitive: true } : {}
2127
+ });
2102
2128
  }
2103
- async function computeFlowContentHash(steps) {
2104
- const normalized = [...steps].sort((a, b) => {
2105
- const orderA = isPlainObject(a) && typeof a.order === "number" ? a.order : 0;
2106
- const orderB = isPlainObject(b) && typeof b.order === "number" ? b.order : 0;
2107
- return orderA - orderB;
2108
- }).map(normalizeStepForHash);
2109
- const serialized = JSON.stringify(normalized);
2110
- const encoded = new TextEncoder().encode(serialized);
2111
- const hashBuffer = await crypto.subtle.digest("SHA-256", encoded);
2112
- return Array.from(new Uint8Array(hashBuffer)).map((b) => b.toString(16).padStart(2, "0")).join("");
2129
+ function notContains(value, opts) {
2130
+ return gradeable({
2131
+ kind: "not_contains",
2132
+ value,
2133
+ ...opts?.caseSensitive ? { caseSensitive: true } : {}
2134
+ });
2113
2135
  }
2114
- var DEFINE_FLOW_TOP_LEVEL_KEYS = /* @__PURE__ */ new Set(["name", "steps"]);
2115
- var DEFINE_FLOW_STEP_KEYS = /* @__PURE__ */ new Set([
2116
- "type",
2117
- "name",
2118
- "order",
2119
- "enabled",
2120
- "when",
2121
- "config"
2122
- ]);
2123
- function collectStepNonPortableToolRefs(config, path) {
2124
- const found = [];
2125
- const tools = config.tools;
2126
- const isAccountScoped = (ref) => typeof ref === "string" && ref.startsWith("tool_");
2127
- const isRawId = (ref, prefix) => typeof ref === "string" && ref.startsWith(prefix);
2128
- const scanArray = (value, subPath) => {
2129
- if (!Array.isArray(value)) return;
2130
- value.forEach((ref, i) => {
2131
- if (isAccountScoped(ref)) found.push(`${subPath}[${i}]`);
2132
- });
2133
- };
2134
- const scanKeys = (value, subPath) => {
2135
- if (!isPlainObject(value)) return;
2136
- for (const key of Object.keys(value)) {
2137
- if (isAccountScoped(key)) found.push(`${subPath}.${key}`);
2138
- }
2139
- };
2140
- if (isPlainObject(tools)) {
2141
- scanArray(tools.toolIds, `${path}.tools.toolIds`);
2142
- scanKeys(tools.toolConfigs, `${path}.tools.toolConfigs`);
2143
- scanKeys(tools.perToolLimits, `${path}.tools.perToolLimits`);
2144
- if (isPlainObject(tools.approval)) {
2145
- scanArray(tools.approval.require, `${path}.tools.approval.require`);
2146
- }
2147
- if (isPlainObject(tools.subagentConfig)) {
2148
- scanArray(tools.subagentConfig.toolPool, `${path}.tools.subagentConfig.toolPool`);
2149
- }
2150
- if (isPlainObject(tools.codeModeConfig)) {
2151
- scanArray(tools.codeModeConfig.toolPool, `${path}.tools.codeModeConfig.toolPool`);
2152
- }
2153
- if (Array.isArray(tools.runtimeTools)) {
2154
- tools.runtimeTools.forEach((runtimeTool, i) => {
2155
- if (!isPlainObject(runtimeTool) || !isPlainObject(runtimeTool.config)) return;
2156
- const base = `${path}.tools.runtimeTools[${i}].config`;
2157
- const rtConfig = runtimeTool.config;
2158
- if (runtimeTool.toolType === "subagent" && isRawId(rtConfig.agentId, "agent_")) {
2159
- found.push(`${base}.agentId`);
2160
- } else if (runtimeTool.toolType === "flow" && isRawId(rtConfig.flowId, "flow_")) {
2161
- found.push(`${base}.flowId`);
2162
- }
2163
- });
2164
- }
2165
- }
2166
- if (isAccountScoped(config.toolId)) {
2167
- found.push(`${path}.toolId`);
2168
- }
2169
- if (isRawId(config.agentId, "agent_")) {
2170
- found.push(`${path}.agentId`);
2171
- }
2172
- for (const branch of ["trueSteps", "falseSteps"]) {
2173
- const nested = config[branch];
2174
- if (!Array.isArray(nested)) continue;
2175
- nested.forEach((nestedStep, i) => {
2176
- if (isPlainObject(nestedStep) && isPlainObject(nestedStep.config)) {
2177
- found.push(
2178
- ...collectStepNonPortableToolRefs(
2179
- nestedStep.config,
2180
- `${path}.${branch}[${i}].config`
2181
- )
2182
- );
2183
- }
2184
- });
2185
- }
2186
- return found;
2136
+ function matchesExpected() {
2137
+ return gradeable({ kind: "matches_expected" });
2187
2138
  }
2188
- function defineFlow(input) {
2189
- if (!input || typeof input !== "object") {
2190
- throw new Error("defineFlow requires a definition object");
2139
+ function regex(pattern, flags) {
2140
+ return gradeable({ kind: "regex", pattern, ...flags ? { flags } : {} });
2141
+ }
2142
+ function validJson() {
2143
+ return gradeable({ kind: "valid_json" });
2144
+ }
2145
+ function jsonField(path, opts) {
2146
+ return gradeable({
2147
+ kind: "json_field",
2148
+ path,
2149
+ ...opts && "equals" in opts && opts.equals !== void 0 ? { equals: opts.equals } : {},
2150
+ ...opts && typeof opts.exists === "boolean" ? { exists: opts.exists } : {}
2151
+ });
2152
+ }
2153
+ function length(opts) {
2154
+ if (!opts || opts.minChars === void 0 && opts.maxChars === void 0) {
2155
+ throw new Error("length() requires at least one of minChars or maxChars");
2191
2156
  }
2192
- if (typeof input.name !== "string" || input.name.length === 0) {
2193
- throw new Error('defineFlow requires a non-empty string "name"');
2157
+ return gradeable({
2158
+ kind: "length",
2159
+ ...opts.minChars !== void 0 ? { minChars: opts.minChars } : {},
2160
+ ...opts.maxChars !== void 0 ? { maxChars: opts.maxChars } : {}
2161
+ });
2162
+ }
2163
+ function latency(maxMs) {
2164
+ if (!Number.isFinite(maxMs) || maxMs <= 0) {
2165
+ throw new Error("latency() requires a positive maxMs");
2194
2166
  }
2195
- const unknownKeys = Object.keys(input).filter((key) => !DEFINE_FLOW_TOP_LEVEL_KEYS.has(key));
2196
- if (unknownKeys.length > 0) {
2197
- throw new Error(
2198
- `defineFlow: unknown field(s): ${unknownKeys.join(", ")}. Allowed fields are name and steps. (Description is not part of the v1 ensure surface.)`
2199
- );
2167
+ return gradeable({ kind: "latency", maxMs });
2168
+ }
2169
+ function noError() {
2170
+ return gradeable({ kind: "no_error" });
2171
+ }
2172
+ function calledTool(name, opts) {
2173
+ if (typeof name !== "string" || name.length === 0) {
2174
+ throw new Error("calledTool() requires a non-empty tool name");
2200
2175
  }
2201
- if (!Array.isArray(input.steps) || input.steps.length === 0) {
2202
- throw new Error('defineFlow requires a non-empty "steps" array');
2176
+ if (opts?.times !== void 0 && (!Number.isInteger(opts.times) || opts.times <= 0)) {
2177
+ throw new Error('calledTool() "times" must be a positive integer');
2203
2178
  }
2204
- const steps = input.steps.map((step, index) => {
2205
- if (!isPlainObject(step)) {
2206
- throw new Error(`defineFlow: steps[${index}] must be an object`);
2207
- }
2208
- if (typeof step.type !== "string" || step.type.length === 0) {
2209
- throw new Error(`defineFlow: steps[${index}] requires a non-empty string "type"`);
2210
- }
2211
- if (typeof step.name !== "string" || step.name.length === 0) {
2212
- throw new Error(`defineFlow: steps[${index}] requires a non-empty string "name"`);
2213
- }
2214
- const unknownStepKeys = Object.keys(step).filter((key) => !DEFINE_FLOW_STEP_KEYS.has(key));
2215
- if (unknownStepKeys.length > 0) {
2216
- throw new Error(
2217
- `defineFlow: steps[${index}] has unknown field(s): ${unknownStepKeys.join(", ")}. Allowed step fields are type, name, order, enabled, when, config. (Step ids are server artifacts and not part of a portable definition.)`
2218
- );
2219
- }
2220
- const config = isPlainObject(step.config) ? step.config : void 0;
2221
- if (config) {
2222
- const nonPortable = collectStepNonPortableToolRefs(config, `steps[${index}].config`);
2223
- if (nonPortable.length > 0) {
2224
- throw new Error(
2225
- `defineFlow: account-scoped reference(s) at ${nonPortable.join(", ")}. Definitions must be environment-portable \u2014 tool_\u2026/agent_\u2026/flow_\u2026 IDs belong to one account/environment. Use builtin:/platform:/mcp: references, or reference a saved resource by name \u2014 tool:<name>, agent:<name>, or flow:<name> instead.`
2226
- );
2227
- }
2228
- }
2229
- return {
2230
- type: step.type,
2231
- name: step.name,
2232
- // Explicit 1-based order (the flow builder's convention) so the local
2233
- // probe hash agrees with the server's persisted step order.
2234
- order: typeof step.order === "number" ? step.order : index + 1,
2235
- ...step.enabled !== void 0 ? { enabled: step.enabled } : {},
2236
- ...typeof step.when === "string" ? { when: step.when } : {},
2237
- ...config ? { config } : {}
2238
- };
2179
+ return gradeable({
2180
+ kind: "called_tool",
2181
+ name,
2182
+ ...opts && "input" in opts && opts.input !== void 0 ? { input: opts.input } : {},
2183
+ ...opts && "output" in opts && opts.output !== void 0 ? { output: opts.output } : {},
2184
+ ...opts && typeof opts.isError === "boolean" ? { isError: opts.isError } : {},
2185
+ ...opts?.times !== void 0 ? { times: opts.times } : {}
2239
2186
  });
2240
- return { name: input.name, steps };
2241
2187
  }
2242
- var FlowEnsureConflictError = class extends Error {
2243
- constructor(body) {
2244
- super(body.error ?? `Flow ensure conflict: ${body.code}`);
2245
- this.name = "FlowEnsureConflictError";
2246
- this.code = body.code;
2247
- this.lastModifiedSource = body.lastModifiedSource;
2248
- this.modifiedAt = body.modifiedAt;
2249
- this.currentHash = body.currentHash;
2188
+ function notCalledTool(name) {
2189
+ if (typeof name !== "string" || name.length === 0) {
2190
+ throw new Error("notCalledTool() requires a non-empty tool name");
2250
2191
  }
2251
- };
2252
- var FlowDriftError = class extends Error {
2253
- constructor(plan) {
2254
- super(
2255
- `Flow "${plan.flowId ?? "definition"}" drifted: plan is '${plan.changes}' (changed: ${plan.changedKeys.join(", ") || "n/a"}). Run client.flows.pull(name) to absorb the remote edit into your repo, or re-run ensure to converge.`
2256
- );
2257
- this.name = "FlowDriftError";
2258
- this.plan = plan;
2192
+ return gradeable({ kind: "not_called_tool", name });
2193
+ }
2194
+ function usedNoTools() {
2195
+ return gradeable({ kind: "used_no_tools" });
2196
+ }
2197
+ function maxToolCalls(max) {
2198
+ if (!Number.isInteger(max) || max < 0) {
2199
+ throw new Error("maxToolCalls() requires a non-negative integer");
2259
2200
  }
2260
- };
2261
- function parseRequestError(err) {
2262
- if (!(err instanceof Error)) return { status: null, body: null };
2263
- const match = err.message.match(/^API request failed: (\d{3}) .*? - ([\s\S]*)$/);
2264
- if (!match) return { status: null, body: null };
2265
- try {
2266
- return { status: Number(match[1]), body: JSON.parse(match[2]) };
2267
- } catch {
2268
- return { status: Number(match[1]), body: null };
2201
+ return gradeable({ kind: "max_tool_calls", max });
2202
+ }
2203
+ function toolOrder(tools) {
2204
+ if (!Array.isArray(tools) || tools.length === 0) {
2205
+ throw new Error("toolOrder() requires a non-empty array of tool names");
2269
2206
  }
2207
+ return gradeable({ kind: "tool_order", tools });
2270
2208
  }
2271
- function toConflictError(err) {
2272
- const { status, body } = parseRequestError(err);
2273
- if (status !== 409 || !isPlainObject(body)) return null;
2274
- const code = body.code;
2275
- if (code !== "external_modification" && code !== "remote_changed") return null;
2276
- return new FlowEnsureConflictError(
2277
- body
2278
- );
2209
+ function ranStep(name) {
2210
+ if (typeof name !== "string" || name.length === 0) {
2211
+ throw new Error("ranStep() requires a non-empty step name");
2212
+ }
2213
+ return gradeable({ kind: "ran_step", name });
2279
2214
  }
2280
- var serverHashMemo = /* @__PURE__ */ new WeakMap();
2281
- function memoFor(client) {
2282
- let memo = serverHashMemo.get(client);
2283
- if (!memo) {
2284
- memo = /* @__PURE__ */ new Map();
2285
- serverHashMemo.set(client, memo);
2215
+ function stepOrder(steps) {
2216
+ if (!Array.isArray(steps) || steps.length === 0) {
2217
+ throw new Error("stepOrder() requires a non-empty array of step names");
2286
2218
  }
2287
- return memo;
2219
+ return gradeable({ kind: "step_order", steps });
2288
2220
  }
2289
- function memoize(memo, memoKey, result) {
2290
- if (result.result !== "plan") memo.set(memoKey, result.contentHash);
2221
+ function completed() {
2222
+ return gradeable({ kind: "completed" });
2291
2223
  }
2292
- async function request(client, body) {
2293
- try {
2294
- return await client.post(
2295
- "/flows/ensure",
2296
- body
2297
- );
2298
- } catch (err) {
2299
- const conflict = toConflictError(err);
2300
- if (conflict) throw conflict;
2301
- throw err;
2224
+ function cost(maxUsd) {
2225
+ if (!Number.isFinite(maxUsd) || maxUsd <= 0) {
2226
+ throw new Error("cost() requires a positive maxUsd");
2302
2227
  }
2228
+ return gradeable({ kind: "cost", maxUsd });
2303
2229
  }
2304
- async function ensureFlow(client, definition, options = {}) {
2305
- const { dryRun, onConflict, release, expectedRemoteHash, expectNoChanges } = options;
2306
- const passthrough = {
2307
- ...onConflict ? { onConflict } : {},
2308
- ...release ? { release } : {},
2309
- ...expectedRemoteHash ? { expectedRemoteHash } : {}
2310
- };
2311
- if (dryRun || expectNoChanges) {
2312
- const plan = await request(client, {
2313
- name: definition.name,
2314
- definition,
2315
- dryRun: true,
2316
- ...passthrough
2230
+ function judge(criteria, opts) {
2231
+ if (typeof criteria !== "string" || criteria.trim().length === 0) {
2232
+ throw new Error("judge() requires non-empty criteria");
2233
+ }
2234
+ return gradeable({
2235
+ kind: "ai",
2236
+ criteria,
2237
+ ...opts?.preset ? { preset: opts.preset } : {},
2238
+ ...opts?.useExpected ? { useExpected: true } : {},
2239
+ ...opts?.model ? { model: opts.model } : {},
2240
+ ...opts?.threshold !== void 0 ? { threshold: opts.threshold } : {}
2241
+ });
2242
+ }
2243
+ var judges = {
2244
+ answersQuestion: () => judge(
2245
+ "The response directly addresses what the user asked, without dodging or answering a different question.",
2246
+ { preset: "answersQuestion" }
2247
+ ),
2248
+ matchesExpected: () => judge(
2249
+ "The response conveys the same facts and conclusion as the expected answer. Wording may differ.",
2250
+ { preset: "matchesExpected", useExpected: true }
2251
+ ),
2252
+ followsInstructions: () => judge(
2253
+ "The response obeys every instruction in the system prompt (format, tone, constraints, refusals).",
2254
+ { preset: "followsInstructions" }
2255
+ ),
2256
+ grounded: () => judge(
2257
+ "Every factual claim in the response is supported by the provided context or the expected answer. Flag anything invented.",
2258
+ { preset: "grounded" }
2259
+ ),
2260
+ rightTone: (voice = "{describe the voice you want}") => judge(`The response matches this voice: ${voice}.`, { preset: "rightTone" }),
2261
+ safeToSend: () => judge(
2262
+ "The response contains nothing embarrassing to show a customer: no leaked internals, no hostile tone, no policy violations.",
2263
+ { preset: "safeToSend" }
2264
+ )
2265
+ };
2266
+ var DEFINE_EVAL_TOP_LEVEL_KEYS = /* @__PURE__ */ new Set([
2267
+ "name",
2268
+ "target",
2269
+ "graders",
2270
+ "cases",
2271
+ "virtual"
2272
+ ]);
2273
+ var DEFINE_EVAL_CASE_KEYS = /* @__PURE__ */ new Set(["name", "input", "expected", "expect"]);
2274
+ function isPlainObject(value) {
2275
+ return value !== null && typeof value === "object" && !Array.isArray(value);
2276
+ }
2277
+ function normalizeTarget(target) {
2278
+ if (!isPlainObject(target)) {
2279
+ throw new Error('defineEval requires a "target" object: { flow: name } or { agent: name }');
2280
+ }
2281
+ const hasFlow = typeof target.flow === "string" && target.flow.length > 0;
2282
+ const hasAgent = typeof target.agent === "string" && target.agent.length > 0;
2283
+ if (hasFlow === hasAgent) {
2284
+ throw new Error(
2285
+ 'defineEval "target" must name exactly one of flow or agent: { flow: "name" } XOR { agent: "name" }'
2286
+ );
2287
+ }
2288
+ const extraKeys = Object.keys(target).filter((k) => k !== "flow" && k !== "agent");
2289
+ if (extraKeys.length > 0) {
2290
+ throw new Error(`defineEval "target" has unknown field(s): ${extraKeys.join(", ")}`);
2291
+ }
2292
+ return hasFlow ? { flow: target.flow } : { agent: target.agent };
2293
+ }
2294
+ function validateGrader(grader, where) {
2295
+ if (!isPlainObject(grader) || typeof grader.kind !== "string") {
2296
+ throw new Error(`defineEval: ${where} must be a grader object with a string "kind"`);
2297
+ }
2298
+ if (grader.kind === "ai") {
2299
+ if (typeof grader.criteria !== "string" || grader.criteria.trim().length === 0) {
2300
+ throw new Error(`defineEval: ${where} is an AI grader and requires non-empty "criteria"`);
2301
+ }
2302
+ return grader;
2303
+ }
2304
+ if (!CHECK_GRADER_KINDS.has(grader.kind)) {
2305
+ throw new Error(
2306
+ `defineEval: ${where} has unknown grader kind "${grader.kind}". Known kinds: ${[...CHECK_GRADER_KINDS].join(", ")}, ai.`
2307
+ );
2308
+ }
2309
+ return grader;
2310
+ }
2311
+ function normalizeCaseInput(input, where) {
2312
+ if (input === void 0) return {};
2313
+ if (!isPlainObject(input)) {
2314
+ throw new Error(`defineEval: ${where} "input" must be an object`);
2315
+ }
2316
+ const out = {};
2317
+ if (input.variables !== void 0) {
2318
+ if (!isPlainObject(input.variables)) {
2319
+ throw new Error(`defineEval: ${where} "input.variables" must be an object`);
2320
+ }
2321
+ out.variables = input.variables;
2322
+ }
2323
+ if (input.messages !== void 0) {
2324
+ if (!Array.isArray(input.messages)) {
2325
+ throw new Error(`defineEval: ${where} "input.messages" must be an array`);
2326
+ }
2327
+ out.messages = input.messages.map((m, i) => {
2328
+ if (!isPlainObject(m) || typeof m.role !== "string" || typeof m.content !== "string") {
2329
+ throw new Error(`defineEval: ${where} "input.messages[${i}]" must be { role, content }`);
2330
+ }
2331
+ return { role: m.role, content: m.content };
2317
2332
  });
2318
- if (plan.result !== "plan") {
2319
- throw new Error(`Expected a plan result from dryRun, got '${plan.result}'`);
2333
+ }
2334
+ return out;
2335
+ }
2336
+ function defineEval(input) {
2337
+ if (!input || typeof input !== "object") {
2338
+ throw new Error("defineEval requires a definition object");
2339
+ }
2340
+ const unknownKeys = Object.keys(input).filter((k) => !DEFINE_EVAL_TOP_LEVEL_KEYS.has(k));
2341
+ if (unknownKeys.length > 0) {
2342
+ throw new Error(
2343
+ `defineEval: unknown field(s): ${unknownKeys.join(", ")}. Allowed fields are target, graders, cases, virtual.`
2344
+ );
2345
+ }
2346
+ const target = normalizeTarget(input.target);
2347
+ if (input.name !== void 0 && (typeof input.name !== "string" || input.name.length === 0)) {
2348
+ throw new Error('defineEval "name" must be a non-empty string when provided');
2349
+ }
2350
+ const name = input.name ?? ("flow" in target ? `flow:${target.flow}` : `agent:${target.agent}`);
2351
+ const suiteGraders = (input.graders ?? []).map((g, i) => validateGrader(g, `graders[${i}]`));
2352
+ if (!Array.isArray(input.cases) || input.cases.length === 0) {
2353
+ throw new Error('defineEval requires a non-empty "cases" array');
2354
+ }
2355
+ const seenNames = /* @__PURE__ */ new Set();
2356
+ const cases = input.cases.map((c, index) => {
2357
+ if (!isPlainObject(c)) {
2358
+ throw new Error(`defineEval: cases[${index}] must be an object`);
2320
2359
  }
2321
- if (expectNoChanges && plan.changes !== "none") {
2322
- throw new FlowDriftError(plan);
2360
+ if (typeof c.name !== "string" || c.name.length === 0) {
2361
+ throw new Error(`defineEval: cases[${index}] requires a non-empty string "name"`);
2323
2362
  }
2324
- return plan;
2363
+ if (seenNames.has(c.name)) {
2364
+ throw new Error(`defineEval: duplicate case name "${c.name}" (case names are the identity)`);
2365
+ }
2366
+ seenNames.add(c.name);
2367
+ const unknownCaseKeys = Object.keys(c).filter((k) => !DEFINE_EVAL_CASE_KEYS.has(k));
2368
+ if (unknownCaseKeys.length > 0) {
2369
+ throw new Error(
2370
+ `defineEval: cases[${index}] ("${c.name}") has unknown field(s): ${unknownCaseKeys.join(
2371
+ ", "
2372
+ )}. Allowed case fields are name, input, expected, expect.`
2373
+ );
2374
+ }
2375
+ const caseGraders = (c.expect ?? []).map(
2376
+ (g, i) => validateGrader(g, `cases[${index}].expect[${i}]`)
2377
+ );
2378
+ const expect = [...suiteGraders, ...caseGraders];
2379
+ if (expect.length === 0) {
2380
+ throw new Error(
2381
+ `defineEval: cases[${index}] ("${c.name}") has no graders. Add suite-level "graders" or case-level "expect" so there is something to score.`
2382
+ );
2383
+ }
2384
+ if (c.expected !== void 0 && !isPlainObject(c.expected)) {
2385
+ throw new Error(`defineEval: cases[${index}] ("${c.name}") "expected" must be an object`);
2386
+ }
2387
+ return {
2388
+ name: c.name,
2389
+ input: normalizeCaseInput(c.input, `cases[${index}] ("${c.name}")`),
2390
+ ...c.expected !== void 0 ? { expected: c.expected } : {},
2391
+ expect
2392
+ };
2393
+ });
2394
+ return { name, target, cases, virtual: input.virtual === true };
2395
+ }
2396
+ function normalizeForHash(value) {
2397
+ if (Array.isArray(value)) return value.map(normalizeForHash);
2398
+ if (isPlainObject(value)) {
2399
+ const out = {};
2400
+ for (const key of Object.keys(value).sort()) {
2401
+ const v = value[key];
2402
+ if (v === void 0) continue;
2403
+ out[key] = normalizeForHash(v);
2404
+ }
2405
+ return out;
2406
+ }
2407
+ return value;
2408
+ }
2409
+ async function computeEvalContentHash(definition) {
2410
+ const canonical = {
2411
+ target: normalizeForHash(definition.target),
2412
+ virtual: definition.virtual,
2413
+ cases: [...definition.cases].sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0).map((c) => ({
2414
+ name: c.name,
2415
+ input: normalizeForHash(c.input),
2416
+ ...c.expected !== void 0 ? { expected: normalizeForHash(c.expected) } : {},
2417
+ // Grader order preserved on purpose (it maps to the result index).
2418
+ expect: c.expect.map((g) => normalizeForHash(g))
2419
+ }))
2420
+ };
2421
+ const serialized = JSON.stringify(canonical);
2422
+ const encoded = new TextEncoder().encode(serialized);
2423
+ const hashBuffer = await crypto.subtle.digest("SHA-256", encoded);
2424
+ return Array.from(new Uint8Array(hashBuffer)).map((b) => b.toString(16).padStart(2, "0")).join("");
2425
+ }
2426
+ var serverHashMemo = /* @__PURE__ */ new WeakMap();
2427
+ function memoFor(client) {
2428
+ let memo = serverHashMemo.get(client);
2429
+ if (!memo) {
2430
+ memo = /* @__PURE__ */ new Map();
2431
+ serverHashMemo.set(client, memo);
2432
+ }
2433
+ return memo;
2434
+ }
2435
+ async function ensureEval(client, definition) {
2436
+ if (definition.virtual) {
2437
+ throw new Error(
2438
+ "Cannot ensure a virtual eval: virtual evals are ephemeral (nothing is persisted to converge). Remove `virtual: true` to converge a durable suite, or run it directly."
2439
+ );
2325
2440
  }
2326
2441
  const memo = memoFor(client);
2327
- const localHash = await computeFlowContentHash(definition.steps);
2442
+ const localHash = await computeEvalContentHash(definition);
2328
2443
  const memoKey = `${definition.name} ${localHash}`;
2329
2444
  const contentHash = memo.get(memoKey) ?? localHash;
2330
- const probe = await request(client, {
2331
- name: definition.name,
2332
- contentHash,
2333
- ...passthrough
2334
- });
2445
+ const probe = await client.post(
2446
+ "/eval/ensure",
2447
+ { name: definition.name, contentHash }
2448
+ );
2335
2449
  if (probe.result !== "definitionRequired") {
2336
- memoize(memo, memoKey, probe);
2450
+ memo.set(memoKey, probe.contentHash);
2337
2451
  return probe;
2338
2452
  }
2339
- const converged = await request(client, {
2340
- name: definition.name,
2341
- definition,
2342
- ...passthrough
2343
- });
2453
+ const converged = await client.post(
2454
+ "/eval/ensure",
2455
+ { name: definition.name, definition }
2456
+ );
2344
2457
  if (converged.result === "definitionRequired") {
2345
2458
  throw new Error("Server reported definitionRequired for a full-definition request");
2346
2459
  }
2347
- memoize(memo, memoKey, converged);
2460
+ memo.set(memoKey, converged.contentHash);
2348
2461
  return converged;
2349
2462
  }
2350
- async function pullFlow(client, name) {
2351
- return client.get("/flows/pull", { name });
2463
+ async function pullEval(client, name) {
2464
+ return client.get("/eval/pull", { name });
2465
+ }
2466
+ async function runEvalSuite(client, input) {
2467
+ return client.post("/eval/run", input);
2352
2468
  }
2353
2469
 
2354
- // src/flows-namespace.ts
2355
- var FlowsNamespace = class {
2356
- constructor(getClient) {
2357
- this.getClient = getClient;
2358
- }
2359
- /**
2360
- * Create or update a flow by name (upsert mode)
2361
- *
2362
- * The recommended pattern for code-first flow management when you want to
2363
- * save AND run in one dispatch. For a deploy-time, non-executing converge
2364
- * (CI/CD config-as-code), use {@link ensure} instead upsert and ensure
2365
- * are siblings, not versions of each other: upsert is the runtime verb
2366
- * (save-and-run), ensure is the deploy verb (converge only).
2367
- *
2368
- * @example
2369
- * ```typescript
2370
- * const result = await Runtype.flows.upsert({
2371
- * name: 'My Flow',
2372
- * createVersionOnChange: true
2373
- * })
2374
- * .prompt({ name: 'Analyze', model: 'gpt-4o', userPrompt: '...' })
2375
- * .stream()
2470
+ // src/flows-ensure.ts
2471
+ function isPlainObject2(value) {
2472
+ return value !== null && typeof value === "object" && !Array.isArray(value);
2473
+ }
2474
+ function normalizeConfigForHash(config) {
2475
+ if (!isPlainObject2(config)) return {};
2476
+ const normalized = {};
2477
+ for (const key of Object.keys(config).sort()) {
2478
+ const value = config[key];
2479
+ if (value === void 0) continue;
2480
+ if (value !== null && typeof value === "object" && !Array.isArray(value)) {
2481
+ normalized[key] = normalizeConfigForHash(value);
2482
+ } else if (Array.isArray(value)) {
2483
+ normalized[key] = value.map((item) => {
2484
+ if (item !== null && typeof item === "object" && !Array.isArray(item)) {
2485
+ return normalizeConfigForHash(item);
2486
+ }
2487
+ return item;
2488
+ });
2489
+ } else {
2490
+ normalized[key] = value;
2491
+ }
2492
+ }
2493
+ return normalized;
2494
+ }
2495
+ function normalizeStepForHash(step) {
2496
+ const stepObj = isPlainObject2(step) ? step : {};
2497
+ return {
2498
+ type: typeof stepObj.type === "string" ? stepObj.type : "",
2499
+ name: typeof stepObj.name === "string" ? stepObj.name : "",
2500
+ enabled: stepObj.enabled !== false,
2501
+ ...typeof stepObj.when === "string" ? { when: stepObj.when } : {},
2502
+ config: normalizeConfigForHash(stepObj.config),
2503
+ order: typeof stepObj.order === "number" ? stepObj.order : 0
2504
+ };
2505
+ }
2506
+ async function computeFlowContentHash(steps) {
2507
+ const normalized = [...steps].sort((a, b) => {
2508
+ const orderA = isPlainObject2(a) && typeof a.order === "number" ? a.order : 0;
2509
+ const orderB = isPlainObject2(b) && typeof b.order === "number" ? b.order : 0;
2510
+ return orderA - orderB;
2511
+ }).map(normalizeStepForHash);
2512
+ const serialized = JSON.stringify(normalized);
2513
+ const encoded = new TextEncoder().encode(serialized);
2514
+ const hashBuffer = await crypto.subtle.digest("SHA-256", encoded);
2515
+ return Array.from(new Uint8Array(hashBuffer)).map((b) => b.toString(16).padStart(2, "0")).join("");
2516
+ }
2517
+ var DEFINE_FLOW_TOP_LEVEL_KEYS = /* @__PURE__ */ new Set(["name", "steps", "evals"]);
2518
+ var DEFINE_FLOW_STEP_KEYS = /* @__PURE__ */ new Set([
2519
+ "type",
2520
+ "name",
2521
+ "order",
2522
+ "enabled",
2523
+ "when",
2524
+ "config"
2525
+ ]);
2526
+ function collectStepNonPortableToolRefs(config, path) {
2527
+ const found = [];
2528
+ const tools = config.tools;
2529
+ const isAccountScoped = (ref) => typeof ref === "string" && ref.startsWith("tool_");
2530
+ const isRawId = (ref, prefix) => typeof ref === "string" && ref.startsWith(prefix);
2531
+ const scanArray = (value, subPath) => {
2532
+ if (!Array.isArray(value)) return;
2533
+ value.forEach((ref, i) => {
2534
+ if (isAccountScoped(ref)) found.push(`${subPath}[${i}]`);
2535
+ });
2536
+ };
2537
+ const scanKeys = (value, subPath) => {
2538
+ if (!isPlainObject2(value)) return;
2539
+ for (const key of Object.keys(value)) {
2540
+ if (isAccountScoped(key)) found.push(`${subPath}.${key}`);
2541
+ }
2542
+ };
2543
+ if (isPlainObject2(tools)) {
2544
+ scanArray(tools.toolIds, `${path}.tools.toolIds`);
2545
+ scanKeys(tools.toolConfigs, `${path}.tools.toolConfigs`);
2546
+ scanKeys(tools.perToolLimits, `${path}.tools.perToolLimits`);
2547
+ if (isPlainObject2(tools.approval)) {
2548
+ scanArray(tools.approval.require, `${path}.tools.approval.require`);
2549
+ }
2550
+ if (isPlainObject2(tools.subagentConfig)) {
2551
+ scanArray(tools.subagentConfig.toolPool, `${path}.tools.subagentConfig.toolPool`);
2552
+ }
2553
+ if (isPlainObject2(tools.codeModeConfig)) {
2554
+ scanArray(tools.codeModeConfig.toolPool, `${path}.tools.codeModeConfig.toolPool`);
2555
+ }
2556
+ if (Array.isArray(tools.runtimeTools)) {
2557
+ tools.runtimeTools.forEach((runtimeTool, i) => {
2558
+ if (!isPlainObject2(runtimeTool) || !isPlainObject2(runtimeTool.config)) return;
2559
+ const base = `${path}.tools.runtimeTools[${i}].config`;
2560
+ const rtConfig = runtimeTool.config;
2561
+ if (runtimeTool.toolType === "subagent" && isRawId(rtConfig.agentId, "agent_")) {
2562
+ found.push(`${base}.agentId`);
2563
+ } else if (runtimeTool.toolType === "flow" && isRawId(rtConfig.flowId, "flow_")) {
2564
+ found.push(`${base}.flowId`);
2565
+ }
2566
+ });
2567
+ }
2568
+ }
2569
+ if (isAccountScoped(config.toolId)) {
2570
+ found.push(`${path}.toolId`);
2571
+ }
2572
+ if (isRawId(config.agentId, "agent_")) {
2573
+ found.push(`${path}.agentId`);
2574
+ }
2575
+ for (const branch of ["trueSteps", "falseSteps"]) {
2576
+ const nested = config[branch];
2577
+ if (!Array.isArray(nested)) continue;
2578
+ nested.forEach((nestedStep, i) => {
2579
+ if (isPlainObject2(nestedStep) && isPlainObject2(nestedStep.config)) {
2580
+ found.push(
2581
+ ...collectStepNonPortableToolRefs(nestedStep.config, `${path}.${branch}[${i}].config`)
2582
+ );
2583
+ }
2584
+ });
2585
+ }
2586
+ return found;
2587
+ }
2588
+ function defineFlow(input) {
2589
+ if (!input || typeof input !== "object") {
2590
+ throw new Error("defineFlow requires a definition object");
2591
+ }
2592
+ if (typeof input.name !== "string" || input.name.length === 0) {
2593
+ throw new Error('defineFlow requires a non-empty string "name"');
2594
+ }
2595
+ const unknownKeys = Object.keys(input).filter((key) => !DEFINE_FLOW_TOP_LEVEL_KEYS.has(key));
2596
+ if (unknownKeys.length > 0) {
2597
+ throw new Error(
2598
+ `defineFlow: unknown field(s): ${unknownKeys.join(", ")}. Allowed fields are name, steps, and evals. (Description is not part of the v1 ensure surface.)`
2599
+ );
2600
+ }
2601
+ if (!Array.isArray(input.steps) || input.steps.length === 0) {
2602
+ throw new Error('defineFlow requires a non-empty "steps" array');
2603
+ }
2604
+ const steps = input.steps.map((step, index) => {
2605
+ if (!isPlainObject2(step)) {
2606
+ throw new Error(`defineFlow: steps[${index}] must be an object`);
2607
+ }
2608
+ if (typeof step.type !== "string" || step.type.length === 0) {
2609
+ throw new Error(`defineFlow: steps[${index}] requires a non-empty string "type"`);
2610
+ }
2611
+ if (typeof step.name !== "string" || step.name.length === 0) {
2612
+ throw new Error(`defineFlow: steps[${index}] requires a non-empty string "name"`);
2613
+ }
2614
+ const unknownStepKeys = Object.keys(step).filter((key) => !DEFINE_FLOW_STEP_KEYS.has(key));
2615
+ if (unknownStepKeys.length > 0) {
2616
+ throw new Error(
2617
+ `defineFlow: steps[${index}] has unknown field(s): ${unknownStepKeys.join(", ")}. Allowed step fields are type, name, order, enabled, when, config. (Step ids are server artifacts and not part of a portable definition.)`
2618
+ );
2619
+ }
2620
+ const config = isPlainObject2(step.config) ? step.config : void 0;
2621
+ if (config) {
2622
+ const nonPortable = collectStepNonPortableToolRefs(config, `steps[${index}].config`);
2623
+ if (nonPortable.length > 0) {
2624
+ throw new Error(
2625
+ `defineFlow: account-scoped reference(s) at ${nonPortable.join(", ")}. Definitions must be environment-portable \u2014 tool_\u2026/agent_\u2026/flow_\u2026 IDs belong to one account/environment. Use builtin:/platform:/mcp: references, or reference a saved resource by name \u2014 tool:<name>, agent:<name>, or flow:<name> instead.`
2626
+ );
2627
+ }
2628
+ }
2629
+ return {
2630
+ type: step.type,
2631
+ name: step.name,
2632
+ // Explicit 1-based order (the flow builder's convention) so the local
2633
+ // probe hash agrees with the server's persisted step order.
2634
+ order: typeof step.order === "number" ? step.order : index + 1,
2635
+ ...step.enabled !== void 0 ? { enabled: step.enabled } : {},
2636
+ ...typeof step.when === "string" ? { when: step.when } : {},
2637
+ ...config ? { config } : {}
2638
+ };
2639
+ });
2640
+ let evals;
2641
+ if (input.evals !== void 0) {
2642
+ if (!Array.isArray(input.evals)) {
2643
+ throw new Error('defineFlow: "evals" must be an array');
2644
+ }
2645
+ const seenEvalNames = /* @__PURE__ */ new Set();
2646
+ evals = input.evals.map((evalInput, i) => {
2647
+ if (!isPlainObject2(evalInput)) {
2648
+ throw new Error(`defineFlow: evals[${i}] must be an object`);
2649
+ }
2650
+ if (evalInput.virtual === true) {
2651
+ throw new Error(
2652
+ `defineFlow: evals[${i}] cannot be virtual (inline evals converge with the flow; run a virtual eval directly instead).`
2653
+ );
2654
+ }
2655
+ const withTarget = evalInput.target === void 0 ? { ...evalInput, target: { flow: input.name } } : evalInput;
2656
+ let def;
2657
+ try {
2658
+ def = defineEval(withTarget);
2659
+ } catch (err) {
2660
+ throw new Error(
2661
+ `defineFlow: evals[${i}] \u2014 ${err instanceof Error ? err.message : String(err)}`,
2662
+ { cause: err }
2663
+ );
2664
+ }
2665
+ if (seenEvalNames.has(def.name)) {
2666
+ throw new Error(
2667
+ `defineFlow: evals[${i}] resolves to the duplicate suite name "${def.name}". Inline eval suites must have distinct names \u2014 give each a \`name\` (two unnamed evals targeting the same flow both default to the same name and would overwrite each other).`
2668
+ );
2669
+ }
2670
+ seenEvalNames.add(def.name);
2671
+ return def;
2672
+ });
2673
+ }
2674
+ return {
2675
+ name: input.name,
2676
+ steps,
2677
+ ...evals && evals.length > 0 ? { evals } : {}
2678
+ };
2679
+ }
2680
+ var FlowEnsureConflictError = class extends Error {
2681
+ constructor(body) {
2682
+ super(body.error ?? `Flow ensure conflict: ${body.code}`);
2683
+ this.name = "FlowEnsureConflictError";
2684
+ this.code = body.code;
2685
+ this.lastModifiedSource = body.lastModifiedSource;
2686
+ this.modifiedAt = body.modifiedAt;
2687
+ this.currentHash = body.currentHash;
2688
+ }
2689
+ };
2690
+ var FlowDriftError = class extends Error {
2691
+ constructor(plan) {
2692
+ super(
2693
+ `Flow "${plan.flowId ?? "definition"}" drifted: plan is '${plan.changes}' (changed: ${plan.changedKeys.join(", ") || "n/a"}). Run client.flows.pull(name) to absorb the remote edit into your repo, or re-run ensure to converge.`
2694
+ );
2695
+ this.name = "FlowDriftError";
2696
+ this.plan = plan;
2697
+ }
2698
+ };
2699
+ function parseRequestError(err) {
2700
+ if (!(err instanceof Error)) return { status: null, body: null };
2701
+ const match = err.message.match(/^API request failed: (\d{3}) .*? - ([\s\S]*)$/);
2702
+ if (!match) return { status: null, body: null };
2703
+ try {
2704
+ return { status: Number(match[1]), body: JSON.parse(match[2]) };
2705
+ } catch {
2706
+ return { status: Number(match[1]), body: null };
2707
+ }
2708
+ }
2709
+ function toConflictError(err) {
2710
+ const { status, body } = parseRequestError(err);
2711
+ if (status !== 409 || !isPlainObject2(body)) return null;
2712
+ const code = body.code;
2713
+ if (code !== "external_modification" && code !== "remote_changed") return null;
2714
+ return new FlowEnsureConflictError(
2715
+ body
2716
+ );
2717
+ }
2718
+ var serverHashMemo2 = /* @__PURE__ */ new WeakMap();
2719
+ function memoFor2(client) {
2720
+ let memo = serverHashMemo2.get(client);
2721
+ if (!memo) {
2722
+ memo = /* @__PURE__ */ new Map();
2723
+ serverHashMemo2.set(client, memo);
2724
+ }
2725
+ return memo;
2726
+ }
2727
+ function memoize(memo, memoKey, result) {
2728
+ if (result.result !== "plan") memo.set(memoKey, result.contentHash);
2729
+ }
2730
+ async function request(client, body) {
2731
+ try {
2732
+ return await client.post(
2733
+ "/flows/ensure",
2734
+ body
2735
+ );
2736
+ } catch (err) {
2737
+ const conflict = toConflictError(err);
2738
+ if (conflict) throw conflict;
2739
+ throw err;
2740
+ }
2741
+ }
2742
+ async function ensureFlow(client, definition, options = {}) {
2743
+ const { dryRun, onConflict, release, expectedRemoteHash, expectNoChanges } = options;
2744
+ const passthrough = {
2745
+ ...onConflict ? { onConflict } : {},
2746
+ ...release ? { release } : {},
2747
+ ...expectedRemoteHash ? { expectedRemoteHash } : {}
2748
+ };
2749
+ const wireDefinition = { name: definition.name, steps: definition.steps };
2750
+ if (dryRun || expectNoChanges) {
2751
+ const plan = await request(client, {
2752
+ name: definition.name,
2753
+ definition: wireDefinition,
2754
+ dryRun: true,
2755
+ ...passthrough
2756
+ });
2757
+ if (plan.result !== "plan") {
2758
+ throw new Error(`Expected a plan result from dryRun, got '${plan.result}'`);
2759
+ }
2760
+ if (expectNoChanges && plan.changes !== "none") {
2761
+ throw new FlowDriftError(plan);
2762
+ }
2763
+ return plan;
2764
+ }
2765
+ const memo = memoFor2(client);
2766
+ const localHash = await computeFlowContentHash(definition.steps);
2767
+ const memoKey = `${definition.name} ${localHash}`;
2768
+ const contentHash = memo.get(memoKey) ?? localHash;
2769
+ const probe = await request(client, {
2770
+ name: definition.name,
2771
+ contentHash,
2772
+ ...passthrough
2773
+ });
2774
+ if (probe.result !== "definitionRequired") {
2775
+ memoize(memo, memoKey, probe);
2776
+ return convergeInlineEvals(client, definition, probe);
2777
+ }
2778
+ const converged = await request(client, {
2779
+ name: definition.name,
2780
+ definition: wireDefinition,
2781
+ ...passthrough
2782
+ });
2783
+ if (converged.result === "definitionRequired") {
2784
+ throw new Error("Server reported definitionRequired for a full-definition request");
2785
+ }
2786
+ memoize(memo, memoKey, converged);
2787
+ return convergeInlineEvals(client, definition, converged);
2788
+ }
2789
+ async function convergeInlineEvals(client, definition, result) {
2790
+ if (result.result === "plan" || !definition.evals?.length) {
2791
+ return result;
2792
+ }
2793
+ const evals = [];
2794
+ for (const evalDef of definition.evals) {
2795
+ evals.push(await ensureEval(client, evalDef));
2796
+ }
2797
+ return { ...result, evals };
2798
+ }
2799
+ async function pullFlow(client, name) {
2800
+ return client.get("/flows/pull", { name });
2801
+ }
2802
+
2803
+ // src/flows-namespace.ts
2804
+ var FlowsNamespace = class {
2805
+ constructor(getClient) {
2806
+ this.getClient = getClient;
2807
+ }
2808
+ /**
2809
+ * Create or update a flow by name (upsert mode)
2810
+ *
2811
+ * The recommended pattern for code-first flow management when you want to
2812
+ * save AND run in one dispatch. For a deploy-time, non-executing converge
2813
+ * (CI/CD config-as-code), use {@link ensure} instead — upsert and ensure
2814
+ * are siblings, not versions of each other: upsert is the runtime verb
2815
+ * (save-and-run), ensure is the deploy verb (converge only).
2816
+ *
2817
+ * @example
2818
+ * ```typescript
2819
+ * const result = await Runtype.flows.upsert({
2820
+ * name: 'My Flow',
2821
+ * createVersionOnChange: true
2822
+ * })
2823
+ * .prompt({ name: 'Analyze', model: 'gpt-4o', userPrompt: '...' })
2824
+ * .stream()
2376
2825
  * ```
2377
2826
  */
2378
2827
  upsert(config) {
@@ -2384,11 +2833,19 @@ var FlowsNamespace = class {
2384
2833
  * the steady state is one tiny probe request. Creates an immutable version
2385
2834
  * snapshot on every change; never deletes; never executes the flow.
2386
2835
  *
2836
+ * When the definition carries inline `evals`, each suite is converged via
2837
+ * `/eval/ensure` after the flow itself (real converge path only — not on
2838
+ * dryRun/`expectNoChanges`), and the outcomes are returned as `result.evals`.
2839
+ *
2387
2840
  * @example
2388
2841
  * ```typescript
2389
- * const def = defineFlow({ name: 'Onboarding Digest', steps: [...] })
2842
+ * const def = defineFlow({
2843
+ * name: 'Onboarding Digest',
2844
+ * steps: [...],
2845
+ * evals: [{ cases: [{ name: 'smoke', input: {...}, expect: [contains('ok')] }] }],
2846
+ * })
2390
2847
  *
2391
- * // Converge (CI/deploy).
2848
+ * // Converge the flow AND its inline eval suites (CI/deploy).
2392
2849
  * const result = await Runtype.flows.ensure(def)
2393
2850
  *
2394
2851
  * // PR drift gate.
@@ -3222,530 +3679,236 @@ var RuntypeFlowBuilder = class {
3222
3679
  const flowMode = this.mode === "existing" ? "existing" : this.mode;
3223
3680
  const flow = this.existingFlowId ? { id: this.existingFlowId } : { name: this.flowConfig.name, steps: this.steps };
3224
3681
  const request6 = { flow };
3225
- if (this.recordConfig) {
3226
- request6.record = this.recordConfig;
3227
- }
3228
- if (this.messagesConfig) {
3229
- request6.messages = this.messagesConfig;
3230
- }
3231
- if (this.inputsConfig) {
3232
- request6.inputs = this.inputsConfig;
3233
- }
3234
- const options = {
3235
- flowMode,
3236
- ...this.dispatchOptions
3237
- };
3238
- if (this.recordConfig && !this.dispatchOptions.recordMode) {
3239
- if (this.recordConfig.id) {
3240
- options.recordMode = "existing";
3241
- } else if (this.recordConfig.name || this.recordConfig.type) {
3242
- options.recordMode = "create";
3243
- } else {
3244
- options.recordMode = "virtual";
3245
- }
3246
- }
3247
- if (this.mode === "upsert" && Object.keys(this.upsertOptions).length > 0) {
3248
- options.upsertOptions = this.upsertOptions;
3249
- }
3250
- request6.options = options;
3251
- return request6;
3252
- }
3253
- /**
3254
- * Validate this prospective flow against the public validation endpoint
3255
- * (`POST /v1/public/flows/validate`) WITHOUT creating it, using the bound
3256
- * client. Returns the same `errors` / `warnings` / `recommendations` envelope
3257
- * the API, dashboard, and MCP `validate_flow` tool use, so structural issues,
3258
- * the upsert-record JSON foot-gun, undeclared-variable warnings, and
3259
- * sub-optimal model selections surface at author time. The bound client
3260
- * carries authentication; an authenticated client additionally runs
3261
- * account-scoped checks (`result.context` reports whether they ran). Mirrors
3262
- * {@link FlowBuilder.validate}.
3263
- *
3264
- * Only valid for prospective flows (`Runtype.flows.virtual(...)` /
3265
- * `Runtype.flows.upsert(...)`). An existing-flow builder
3266
- * (`Runtype.flows.use(id)`) has no inline steps to validate, so this throws —
3267
- * the validation endpoint validates a `{ name, steps }` payload, not a saved
3268
- * flow by id (which was already validated at create time).
3269
- *
3270
- * @example
3271
- * ```typescript
3272
- * const result = await Runtype.flows.virtual({ name: 'Temp Flow' })
3273
- * .prompt({ name: 'Process', model: 'gpt-5-mini', userPrompt: '...' })
3274
- * .validate()
3275
- *
3276
- * if (!result.valid) console.error(result.errors)
3277
- * ```
3278
- */
3279
- async validate() {
3280
- return validateInlineFlow(
3281
- this.getClient(),
3282
- { name: this.flowConfig.name, steps: this.steps, existingFlowId: this.existingFlowId },
3283
- "Use Runtype.flows.virtual(...) or Runtype.flows.upsert(...) with inline steps to validate a flow before saving."
3284
- );
3285
- }
3286
- // ============================================================================
3287
- // Private Helpers
3288
- // ============================================================================
3289
- /**
3290
- * Persisted flow protocol (APQ-style): send hash-only first, retry with
3291
- * full definition on FLOW_DEFINITION_REQUIRED. For non-upsert modes,
3292
- * dispatches directly.
3293
- */
3294
- async dispatchWithPersistedFlow(client, config) {
3295
- if (this.mode !== "upsert" || !this.steps.length) {
3296
- return client.dispatch(config);
3297
- }
3298
- const contentHash = await this.computeContentHash();
3299
- const hashOnlyConfig = {
3300
- ...config,
3301
- flow: { name: config.flow.name, contentHash }
3302
- };
3303
- try {
3304
- return await client.dispatch(hashOnlyConfig);
3305
- } catch (err) {
3306
- const is422 = err != null && typeof err === "object" && "statusCode" in err && err.statusCode === 422 || err instanceof Error && /\b422\b/.test(err.message);
3307
- if (!is422) {
3308
- throw err;
3309
- }
3310
- }
3311
- const fullConfig = {
3312
- ...config,
3313
- flow: { ...config.flow, contentHash }
3314
- };
3315
- return client.dispatch(fullConfig);
3316
- }
3317
- async computeContentHash() {
3318
- return computeFlowContentHash(this.steps);
3319
- }
3320
- addRawStep(type, config) {
3321
- const { name, enabled, when, ...stepConfig } = config;
3322
- this.addStep(type, name, stepConfig, enabled, when);
3323
- return this;
3324
- }
3325
- addStep(type, name, config, enabled = true, when) {
3326
- this.stepCounter++;
3327
- const cleanConfig = {};
3328
- for (const [key, value] of Object.entries(config)) {
3329
- if (value !== void 0) {
3330
- cleanConfig[key] = value;
3331
- }
3332
- }
3333
- this.steps.push({
3334
- id: `step-${this.stepCounter}`,
3335
- type,
3336
- name,
3337
- order: this.stepCounter,
3338
- enabled,
3339
- ...when ? { when } : {},
3340
- config: cleanConfig
3341
- });
3342
- }
3343
- };
3344
-
3345
- // src/batches-namespace.ts
3346
- var BatchesNamespace = class {
3347
- constructor(getClient) {
3348
- this.getClient = getClient;
3349
- }
3350
- /**
3351
- * Schedule a batch operation
3352
- *
3353
- * Creates and schedules a batch to run a flow on all records of a type.
3354
- * By default, runs immediately. Use `at` to schedule for a specific time.
3355
- *
3356
- * @example
3357
- * ```typescript
3358
- * // Run immediately
3359
- * const batch = await Runtype.batches.schedule({
3360
- * flowId: 'flow_123',
3361
- * recordType: 'customers',
3362
- * })
3363
- *
3364
- * // Schedule for later
3365
- * const batch = await Runtype.batches.schedule({
3366
- * flowId: 'flow_123',
3367
- * recordType: 'customers',
3368
- * at: new Date('2024-01-15T09:00:00Z'),
3369
- * })
3370
- *
3371
- * // With options
3372
- * const batch = await Runtype.batches.schedule({
3373
- * flowId: 'flow_123',
3374
- * recordType: 'customers',
3375
- * concurrency: 5,
3376
- * continueOnError: true,
3377
- * filter: { status: 'active' },
3378
- * limit: 100,
3379
- * })
3380
- * ```
3381
- */
3382
- async schedule(config) {
3383
- const client = this.getClient();
3384
- const payload = {
3385
- flowId: config.flowId,
3386
- recordType: config.recordType
3387
- };
3388
- if (config.at) {
3389
- payload.scheduledAt = config.at.toISOString();
3682
+ if (this.recordConfig) {
3683
+ request6.record = this.recordConfig;
3390
3684
  }
3391
- const options = {};
3392
- if (config.async !== void 0) options.async = config.async;
3393
- if (config.concurrency !== void 0) options.concurrency = config.concurrency;
3394
- if (config.continueOnError !== void 0) options.continueOnError = config.continueOnError;
3395
- if (config.storeResults !== void 0) options.storeResults = config.storeResults;
3396
- if (config.modelOverride !== void 0) options.modelOverride = config.modelOverride;
3397
- if (Object.keys(options).length > 0) {
3398
- payload.options = options;
3685
+ if (this.messagesConfig) {
3686
+ request6.messages = this.messagesConfig;
3399
3687
  }
3400
- if (config.filter) {
3401
- payload.filter = config.filter;
3688
+ if (this.inputsConfig) {
3689
+ request6.inputs = this.inputsConfig;
3402
3690
  }
3403
- if (config.limit !== void 0) {
3404
- payload.limit = config.limit;
3691
+ const options = {
3692
+ flowMode,
3693
+ ...this.dispatchOptions
3694
+ };
3695
+ if (this.recordConfig && !this.dispatchOptions.recordMode) {
3696
+ if (this.recordConfig.id) {
3697
+ options.recordMode = "existing";
3698
+ } else if (this.recordConfig.name || this.recordConfig.type) {
3699
+ options.recordMode = "create";
3700
+ } else {
3701
+ options.recordMode = "virtual";
3702
+ }
3405
3703
  }
3406
- return client.post("/batches", payload);
3407
- }
3408
- /**
3409
- * Get batch status by ID
3410
- *
3411
- * @example
3412
- * ```typescript
3413
- * const status = await Runtype.batches.get('batch_456')
3414
- * console.log(status.status, status.processedRecords, '/', status.totalRecords)
3415
- * ```
3416
- */
3417
- async get(batchId) {
3418
- const client = this.getClient();
3419
- return client.get(`/batches/${batchId}`);
3704
+ if (this.mode === "upsert" && Object.keys(this.upsertOptions).length > 0) {
3705
+ options.upsertOptions = this.upsertOptions;
3706
+ }
3707
+ request6.options = options;
3708
+ return request6;
3420
3709
  }
3421
3710
  /**
3422
- * Cancel a batch operation
3423
- *
3424
- * Cancels a queued or running batch. Records already processed are not rolled back.
3711
+ * Validate this prospective flow against the public validation endpoint
3712
+ * (`POST /v1/public/flows/validate`) WITHOUT creating it, using the bound
3713
+ * client. Returns the same `errors` / `warnings` / `recommendations` envelope
3714
+ * the API, dashboard, and MCP `validate_flow` tool use, so structural issues,
3715
+ * the upsert-record JSON foot-gun, undeclared-variable warnings, and
3716
+ * sub-optimal model selections surface at author time. The bound client
3717
+ * carries authentication; an authenticated client additionally runs
3718
+ * account-scoped checks (`result.context` reports whether they ran). Mirrors
3719
+ * {@link FlowBuilder.validate}.
3425
3720
  *
3426
- * @example
3427
- * ```typescript
3428
- * await Runtype.batches.cancel('batch_456')
3429
- * ```
3430
- */
3431
- async cancel(batchId) {
3432
- const client = this.getClient();
3433
- return client.post(`/batches/${batchId}/cancel`);
3434
- }
3435
- /**
3436
- * List batch operations
3721
+ * Only valid for prospective flows (`Runtype.flows.virtual(...)` /
3722
+ * `Runtype.flows.upsert(...)`). An existing-flow builder
3723
+ * (`Runtype.flows.use(id)`) has no inline steps to validate, so this throws —
3724
+ * the validation endpoint validates a `{ name, steps }` payload, not a saved
3725
+ * flow by id (which was already validated at create time).
3437
3726
  *
3438
3727
  * @example
3439
3728
  * ```typescript
3440
- * // List all batches
3441
- * const batches = await Runtype.batches.list()
3442
- *
3443
- * // Filter by status
3444
- * const running = await Runtype.batches.list({ status: 'running' })
3729
+ * const result = await Runtype.flows.virtual({ name: 'Temp Flow' })
3730
+ * .prompt({ name: 'Process', model: 'gpt-5-mini', userPrompt: '...' })
3731
+ * .validate()
3445
3732
  *
3446
- * // Filter by flow
3447
- * const flowBatches = await Runtype.batches.list({ flowId: 'flow_123' })
3733
+ * if (!result.valid) console.error(result.errors)
3448
3734
  * ```
3449
3735
  */
3450
- async list(params) {
3451
- const client = this.getClient();
3452
- return client.get("/batches", params);
3453
- }
3454
- };
3455
-
3456
- // src/evals-ensure.ts
3457
- var CHECK_GRADER_KINDS = /* @__PURE__ */ new Set([
3458
- "contains",
3459
- "not_contains",
3460
- "matches_expected",
3461
- "regex",
3462
- "valid_json",
3463
- "json_field",
3464
- "length",
3465
- "latency",
3466
- "no_error"
3467
- ]);
3468
- function contains(value, opts) {
3469
- return { kind: "contains", value, ...opts?.caseSensitive ? { caseSensitive: true } : {} };
3470
- }
3471
- function notContains(value, opts) {
3472
- return { kind: "not_contains", value, ...opts?.caseSensitive ? { caseSensitive: true } : {} };
3473
- }
3474
- function matchesExpected() {
3475
- return { kind: "matches_expected" };
3476
- }
3477
- function regex(pattern, flags) {
3478
- return { kind: "regex", pattern, ...flags ? { flags } : {} };
3479
- }
3480
- function validJson() {
3481
- return { kind: "valid_json" };
3482
- }
3483
- function jsonField(path, opts) {
3484
- return {
3485
- kind: "json_field",
3486
- path,
3487
- ...opts && "equals" in opts && opts.equals !== void 0 ? { equals: opts.equals } : {},
3488
- ...opts && typeof opts.exists === "boolean" ? { exists: opts.exists } : {}
3489
- };
3490
- }
3491
- function length(opts) {
3492
- if (!opts || opts.minChars === void 0 && opts.maxChars === void 0) {
3493
- throw new Error("length() requires at least one of minChars or maxChars");
3494
- }
3495
- return {
3496
- kind: "length",
3497
- ...opts.minChars !== void 0 ? { minChars: opts.minChars } : {},
3498
- ...opts.maxChars !== void 0 ? { maxChars: opts.maxChars } : {}
3499
- };
3500
- }
3501
- function latency(maxMs) {
3502
- if (!Number.isFinite(maxMs) || maxMs <= 0) {
3503
- throw new Error("latency() requires a positive maxMs");
3504
- }
3505
- return { kind: "latency", maxMs };
3506
- }
3507
- function noError() {
3508
- return { kind: "no_error" };
3509
- }
3510
- function judge(criteria, opts) {
3511
- if (typeof criteria !== "string" || criteria.trim().length === 0) {
3512
- throw new Error("judge() requires non-empty criteria");
3513
- }
3514
- return {
3515
- kind: "ai",
3516
- criteria,
3517
- ...opts?.preset ? { preset: opts.preset } : {},
3518
- ...opts?.useExpected ? { useExpected: true } : {},
3519
- ...opts?.model ? { model: opts.model } : {},
3520
- ...opts?.threshold !== void 0 ? { threshold: opts.threshold } : {}
3521
- };
3522
- }
3523
- var judges = {
3524
- answersQuestion: () => judge(
3525
- "The response directly addresses what the user asked, without dodging or answering a different question.",
3526
- { preset: "answersQuestion" }
3527
- ),
3528
- matchesExpected: () => judge(
3529
- "The response conveys the same facts and conclusion as the expected answer. Wording may differ.",
3530
- { preset: "matchesExpected", useExpected: true }
3531
- ),
3532
- followsInstructions: () => judge(
3533
- "The response obeys every instruction in the system prompt (format, tone, constraints, refusals).",
3534
- { preset: "followsInstructions" }
3535
- ),
3536
- grounded: () => judge(
3537
- "Every factual claim in the response is supported by the provided context or the expected answer. Flag anything invented.",
3538
- { preset: "grounded" }
3539
- ),
3540
- rightTone: (voice = "{describe the voice you want}") => judge(`The response matches this voice: ${voice}.`, { preset: "rightTone" }),
3541
- safeToSend: () => judge(
3542
- "The response contains nothing embarrassing to show a customer: no leaked internals, no hostile tone, no policy violations.",
3543
- { preset: "safeToSend" }
3544
- )
3545
- };
3546
- var DEFINE_EVAL_TOP_LEVEL_KEYS = /* @__PURE__ */ new Set([
3547
- "name",
3548
- "target",
3549
- "graders",
3550
- "cases",
3551
- "virtual"
3552
- ]);
3553
- var DEFINE_EVAL_CASE_KEYS = /* @__PURE__ */ new Set(["name", "input", "expected", "expect"]);
3554
- function isPlainObject2(value) {
3555
- return value !== null && typeof value === "object" && !Array.isArray(value);
3556
- }
3557
- function normalizeTarget(target) {
3558
- if (!isPlainObject2(target)) {
3559
- throw new Error('defineEval requires a "target" object: { flow: name } or { agent: name }');
3560
- }
3561
- const hasFlow = typeof target.flow === "string" && target.flow.length > 0;
3562
- const hasAgent = typeof target.agent === "string" && target.agent.length > 0;
3563
- if (hasFlow === hasAgent) {
3564
- throw new Error(
3565
- 'defineEval "target" must name exactly one of flow or agent: { flow: "name" } XOR { agent: "name" }'
3566
- );
3567
- }
3568
- const extraKeys = Object.keys(target).filter((k) => k !== "flow" && k !== "agent");
3569
- if (extraKeys.length > 0) {
3570
- throw new Error(`defineEval "target" has unknown field(s): ${extraKeys.join(", ")}`);
3571
- }
3572
- return hasFlow ? { flow: target.flow } : { agent: target.agent };
3573
- }
3574
- function validateGrader(grader, where) {
3575
- if (!isPlainObject2(grader) || typeof grader.kind !== "string") {
3576
- throw new Error(`defineEval: ${where} must be a grader object with a string "kind"`);
3577
- }
3578
- if (grader.kind === "ai") {
3579
- if (typeof grader.criteria !== "string" || grader.criteria.trim().length === 0) {
3580
- throw new Error(`defineEval: ${where} is an AI grader and requires non-empty "criteria"`);
3581
- }
3582
- return grader;
3583
- }
3584
- if (!CHECK_GRADER_KINDS.has(grader.kind)) {
3585
- throw new Error(
3586
- `defineEval: ${where} has unknown grader kind "${grader.kind}". Known kinds: ${[...CHECK_GRADER_KINDS].join(", ")}, ai. (Trace graders such as called_tool/step_order are not available yet.)`
3736
+ async validate() {
3737
+ return validateInlineFlow(
3738
+ this.getClient(),
3739
+ { name: this.flowConfig.name, steps: this.steps, existingFlowId: this.existingFlowId },
3740
+ "Use Runtype.flows.virtual(...) or Runtype.flows.upsert(...) with inline steps to validate a flow before saving."
3587
3741
  );
3588
3742
  }
3589
- return grader;
3590
- }
3591
- function normalizeCaseInput(input, where) {
3592
- if (input === void 0) return {};
3593
- if (!isPlainObject2(input)) {
3594
- throw new Error(`defineEval: ${where} "input" must be an object`);
3595
- }
3596
- const out = {};
3597
- if (input.variables !== void 0) {
3598
- if (!isPlainObject2(input.variables)) {
3599
- throw new Error(`defineEval: ${where} "input.variables" must be an object`);
3600
- }
3601
- out.variables = input.variables;
3602
- }
3603
- if (input.messages !== void 0) {
3604
- if (!Array.isArray(input.messages)) {
3605
- throw new Error(`defineEval: ${where} "input.messages" must be an array`);
3743
+ // ============================================================================
3744
+ // Private Helpers
3745
+ // ============================================================================
3746
+ /**
3747
+ * Persisted flow protocol (APQ-style): send hash-only first, retry with
3748
+ * full definition on FLOW_DEFINITION_REQUIRED. For non-upsert modes,
3749
+ * dispatches directly.
3750
+ */
3751
+ async dispatchWithPersistedFlow(client, config) {
3752
+ if (this.mode !== "upsert" || !this.steps.length) {
3753
+ return client.dispatch(config);
3606
3754
  }
3607
- out.messages = input.messages.map((m, i) => {
3608
- if (!isPlainObject2(m) || typeof m.role !== "string" || typeof m.content !== "string") {
3609
- throw new Error(`defineEval: ${where} "input.messages[${i}]" must be { role, content }`);
3755
+ const contentHash = await this.computeContentHash();
3756
+ const hashOnlyConfig = {
3757
+ ...config,
3758
+ flow: { name: config.flow.name, contentHash }
3759
+ };
3760
+ try {
3761
+ return await client.dispatch(hashOnlyConfig);
3762
+ } catch (err) {
3763
+ const is422 = err != null && typeof err === "object" && "statusCode" in err && err.statusCode === 422 || err instanceof Error && /\b422\b/.test(err.message);
3764
+ if (!is422) {
3765
+ throw err;
3610
3766
  }
3611
- return { role: m.role, content: m.content };
3612
- });
3767
+ }
3768
+ const fullConfig = {
3769
+ ...config,
3770
+ flow: { ...config.flow, contentHash }
3771
+ };
3772
+ return client.dispatch(fullConfig);
3613
3773
  }
3614
- return out;
3615
- }
3616
- function defineEval(input) {
3617
- if (!input || typeof input !== "object") {
3618
- throw new Error("defineEval requires a definition object");
3774
+ async computeContentHash() {
3775
+ return computeFlowContentHash(this.steps);
3619
3776
  }
3620
- const unknownKeys = Object.keys(input).filter((k) => !DEFINE_EVAL_TOP_LEVEL_KEYS.has(k));
3621
- if (unknownKeys.length > 0) {
3622
- throw new Error(
3623
- `defineEval: unknown field(s): ${unknownKeys.join(", ")}. Allowed fields are target, graders, cases, virtual.`
3624
- );
3777
+ addRawStep(type, config) {
3778
+ const { name, enabled, when, ...stepConfig } = config;
3779
+ this.addStep(type, name, stepConfig, enabled, when);
3780
+ return this;
3625
3781
  }
3626
- const target = normalizeTarget(input.target);
3627
- if (input.name !== void 0 && (typeof input.name !== "string" || input.name.length === 0)) {
3628
- throw new Error('defineEval "name" must be a non-empty string when provided');
3782
+ addStep(type, name, config, enabled = true, when) {
3783
+ this.stepCounter++;
3784
+ const cleanConfig = {};
3785
+ for (const [key, value] of Object.entries(config)) {
3786
+ if (value !== void 0) {
3787
+ cleanConfig[key] = value;
3788
+ }
3789
+ }
3790
+ this.steps.push({
3791
+ id: `step-${this.stepCounter}`,
3792
+ type,
3793
+ name,
3794
+ order: this.stepCounter,
3795
+ enabled,
3796
+ ...when ? { when } : {},
3797
+ config: cleanConfig
3798
+ });
3629
3799
  }
3630
- const name = input.name ?? ("flow" in target ? `flow:${target.flow}` : `agent:${target.agent}`);
3631
- const suiteGraders = (input.graders ?? []).map((g, i) => validateGrader(g, `graders[${i}]`));
3632
- if (!Array.isArray(input.cases) || input.cases.length === 0) {
3633
- throw new Error('defineEval requires a non-empty "cases" array');
3800
+ };
3801
+
3802
+ // src/batches-namespace.ts
3803
+ var BatchesNamespace = class {
3804
+ constructor(getClient) {
3805
+ this.getClient = getClient;
3634
3806
  }
3635
- const seenNames = /* @__PURE__ */ new Set();
3636
- const cases = input.cases.map((c, index) => {
3637
- if (!isPlainObject2(c)) {
3638
- throw new Error(`defineEval: cases[${index}] must be an object`);
3639
- }
3640
- if (typeof c.name !== "string" || c.name.length === 0) {
3641
- throw new Error(`defineEval: cases[${index}] requires a non-empty string "name"`);
3642
- }
3643
- if (seenNames.has(c.name)) {
3644
- throw new Error(`defineEval: duplicate case name "${c.name}" (case names are the identity)`);
3645
- }
3646
- seenNames.add(c.name);
3647
- const unknownCaseKeys = Object.keys(c).filter((k) => !DEFINE_EVAL_CASE_KEYS.has(k));
3648
- if (unknownCaseKeys.length > 0) {
3649
- throw new Error(
3650
- `defineEval: cases[${index}] ("${c.name}") has unknown field(s): ${unknownCaseKeys.join(
3651
- ", "
3652
- )}. Allowed case fields are name, input, expected, expect.`
3653
- );
3807
+ /**
3808
+ * Schedule a batch operation
3809
+ *
3810
+ * Creates and schedules a batch to run a flow on all records of a type.
3811
+ * By default, runs immediately. Use `at` to schedule for a specific time.
3812
+ *
3813
+ * @example
3814
+ * ```typescript
3815
+ * // Run immediately
3816
+ * const batch = await Runtype.batches.schedule({
3817
+ * flowId: 'flow_123',
3818
+ * recordType: 'customers',
3819
+ * })
3820
+ *
3821
+ * // Schedule for later
3822
+ * const batch = await Runtype.batches.schedule({
3823
+ * flowId: 'flow_123',
3824
+ * recordType: 'customers',
3825
+ * at: new Date('2024-01-15T09:00:00Z'),
3826
+ * })
3827
+ *
3828
+ * // With options
3829
+ * const batch = await Runtype.batches.schedule({
3830
+ * flowId: 'flow_123',
3831
+ * recordType: 'customers',
3832
+ * concurrency: 5,
3833
+ * continueOnError: true,
3834
+ * filter: { status: 'active' },
3835
+ * limit: 100,
3836
+ * })
3837
+ * ```
3838
+ */
3839
+ async schedule(config) {
3840
+ const client = this.getClient();
3841
+ const payload = {
3842
+ flowId: config.flowId,
3843
+ recordType: config.recordType
3844
+ };
3845
+ if (config.at) {
3846
+ payload.scheduledAt = config.at.toISOString();
3654
3847
  }
3655
- const caseGraders = (c.expect ?? []).map(
3656
- (g, i) => validateGrader(g, `cases[${index}].expect[${i}]`)
3657
- );
3658
- const expect = [...suiteGraders, ...caseGraders];
3659
- if (expect.length === 0) {
3660
- throw new Error(
3661
- `defineEval: cases[${index}] ("${c.name}") has no graders. Add suite-level "graders" or case-level "expect" so there is something to score.`
3662
- );
3848
+ const options = {};
3849
+ if (config.async !== void 0) options.async = config.async;
3850
+ if (config.concurrency !== void 0) options.concurrency = config.concurrency;
3851
+ if (config.continueOnError !== void 0) options.continueOnError = config.continueOnError;
3852
+ if (config.storeResults !== void 0) options.storeResults = config.storeResults;
3853
+ if (config.modelOverride !== void 0) options.modelOverride = config.modelOverride;
3854
+ if (Object.keys(options).length > 0) {
3855
+ payload.options = options;
3663
3856
  }
3664
- if (c.expected !== void 0 && !isPlainObject2(c.expected)) {
3665
- throw new Error(`defineEval: cases[${index}] ("${c.name}") "expected" must be an object`);
3857
+ if (config.filter) {
3858
+ payload.filter = config.filter;
3666
3859
  }
3667
- return {
3668
- name: c.name,
3669
- input: normalizeCaseInput(c.input, `cases[${index}] ("${c.name}")`),
3670
- ...c.expected !== void 0 ? { expected: c.expected } : {},
3671
- expect
3672
- };
3673
- });
3674
- return { name, target, cases, virtual: input.virtual === true };
3675
- }
3676
- function normalizeForHash(value) {
3677
- if (Array.isArray(value)) return value.map(normalizeForHash);
3678
- if (isPlainObject2(value)) {
3679
- const out = {};
3680
- for (const key of Object.keys(value).sort()) {
3681
- const v = value[key];
3682
- if (v === void 0) continue;
3683
- out[key] = normalizeForHash(v);
3860
+ if (config.limit !== void 0) {
3861
+ payload.limit = config.limit;
3684
3862
  }
3685
- return out;
3686
- }
3687
- return value;
3688
- }
3689
- async function computeEvalContentHash(definition) {
3690
- const canonical = {
3691
- target: normalizeForHash(definition.target),
3692
- virtual: definition.virtual,
3693
- cases: [...definition.cases].sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0).map((c) => ({
3694
- name: c.name,
3695
- input: normalizeForHash(c.input),
3696
- ...c.expected !== void 0 ? { expected: normalizeForHash(c.expected) } : {},
3697
- // Grader order preserved on purpose (it maps to the result index).
3698
- expect: c.expect.map((g) => normalizeForHash(g))
3699
- }))
3700
- };
3701
- const serialized = JSON.stringify(canonical);
3702
- const encoded = new TextEncoder().encode(serialized);
3703
- const hashBuffer = await crypto.subtle.digest("SHA-256", encoded);
3704
- return Array.from(new Uint8Array(hashBuffer)).map((b) => b.toString(16).padStart(2, "0")).join("");
3705
- }
3706
- var serverHashMemo2 = /* @__PURE__ */ new WeakMap();
3707
- function memoFor2(client) {
3708
- let memo = serverHashMemo2.get(client);
3709
- if (!memo) {
3710
- memo = /* @__PURE__ */ new Map();
3711
- serverHashMemo2.set(client, memo);
3863
+ return client.post("/batches", payload);
3712
3864
  }
3713
- return memo;
3714
- }
3715
- async function ensureEval(client, definition) {
3716
- if (definition.virtual) {
3717
- throw new Error(
3718
- "Cannot ensure a virtual eval: virtual evals are ephemeral (nothing is persisted to converge). Remove `virtual: true` to converge a durable suite, or run it directly."
3719
- );
3865
+ /**
3866
+ * Get batch status by ID
3867
+ *
3868
+ * @example
3869
+ * ```typescript
3870
+ * const status = await Runtype.batches.get('batch_456')
3871
+ * console.log(status.status, status.processedRecords, '/', status.totalRecords)
3872
+ * ```
3873
+ */
3874
+ async get(batchId) {
3875
+ const client = this.getClient();
3876
+ return client.get(`/batches/${batchId}`);
3720
3877
  }
3721
- const memo = memoFor2(client);
3722
- const localHash = await computeEvalContentHash(definition);
3723
- const memoKey = `${definition.name} ${localHash}`;
3724
- const contentHash = memo.get(memoKey) ?? localHash;
3725
- const probe = await client.post(
3726
- "/eval/ensure",
3727
- { name: definition.name, contentHash }
3728
- );
3729
- if (probe.result !== "definitionRequired") {
3730
- memo.set(memoKey, probe.contentHash);
3731
- return probe;
3878
+ /**
3879
+ * Cancel a batch operation
3880
+ *
3881
+ * Cancels a queued or running batch. Records already processed are not rolled back.
3882
+ *
3883
+ * @example
3884
+ * ```typescript
3885
+ * await Runtype.batches.cancel('batch_456')
3886
+ * ```
3887
+ */
3888
+ async cancel(batchId) {
3889
+ const client = this.getClient();
3890
+ return client.post(`/batches/${batchId}/cancel`);
3732
3891
  }
3733
- const converged = await client.post(
3734
- "/eval/ensure",
3735
- { name: definition.name, definition }
3736
- );
3737
- if (converged.result === "definitionRequired") {
3738
- throw new Error("Server reported definitionRequired for a full-definition request");
3892
+ /**
3893
+ * List batch operations
3894
+ *
3895
+ * @example
3896
+ * ```typescript
3897
+ * // List all batches
3898
+ * const batches = await Runtype.batches.list()
3899
+ *
3900
+ * // Filter by status
3901
+ * const running = await Runtype.batches.list({ status: 'running' })
3902
+ *
3903
+ * // Filter by flow
3904
+ * const flowBatches = await Runtype.batches.list({ flowId: 'flow_123' })
3905
+ * ```
3906
+ */
3907
+ async list(params) {
3908
+ const client = this.getClient();
3909
+ return client.get("/batches", params);
3739
3910
  }
3740
- memo.set(memoKey, converged.contentHash);
3741
- return converged;
3742
- }
3743
- async function pullEval(client, name) {
3744
- return client.get("/eval/pull", { name });
3745
- }
3746
- async function runEvalSuite(client, input) {
3747
- return client.post("/eval/run", input);
3748
- }
3911
+ };
3749
3912
 
3750
3913
  // src/evals-namespace.ts
3751
3914
  var EvalRunner = class {
@@ -6025,7 +6188,7 @@ var Runtype = class {
6025
6188
 
6026
6189
  // src/version.ts
6027
6190
  var FALLBACK_VERSION = "0.0.0";
6028
- var SDK_VERSION = "5.4.0".length > 0 ? "5.4.0" : FALLBACK_VERSION;
6191
+ var SDK_VERSION = "5.6.0".length > 0 ? "5.6.0" : FALLBACK_VERSION;
6029
6192
  var RUNTYPE_CLIENT_KIND = "sdk";
6030
6193
  var SDK_USER_AGENT = `runtype-sdk/${SDK_VERSION} (typescript)`;
6031
6194
 
@@ -13736,7 +13899,9 @@ var STEP_TYPE_TO_METHOD = {
13736
13899
  buildLedgerOffloadReference,
13737
13900
  buildPolicyGuidance,
13738
13901
  buildSendViewOffloadMarker,
13902
+ calledTool,
13739
13903
  compileWorkflowConfig,
13904
+ completed,
13740
13905
  computeAgentContentHash,
13741
13906
  computeEvalContentHash,
13742
13907
  computeFlowContentHash,
@@ -13746,6 +13911,7 @@ var STEP_TYPE_TO_METHOD = {
13746
13911
  computeSurfaceContentHash,
13747
13912
  computeToolContentHash,
13748
13913
  contains,
13914
+ cost,
13749
13915
  createAgentEventTranslator,
13750
13916
  createClient,
13751
13917
  createExternalTool,
@@ -13783,6 +13949,7 @@ var STEP_TYPE_TO_METHOD = {
13783
13949
  length,
13784
13950
  listWorkflowHooks,
13785
13951
  matchesExpected,
13952
+ maxToolCalls,
13786
13953
  noError,
13787
13954
  normalizeAgentDefinition,
13788
13955
  normalizeCandidatePath,
@@ -13791,6 +13958,7 @@ var STEP_TYPE_TO_METHOD = {
13791
13958
  normalizeSkillDefinition,
13792
13959
  normalizeSurfaceDefinition,
13793
13960
  normalizeToolDefinition,
13961
+ notCalledTool,
13794
13962
  notContains,
13795
13963
  parseFinalBuffer,
13796
13964
  parseLedgerArtifactRelativePath,
@@ -13799,6 +13967,7 @@ var STEP_TYPE_TO_METHOD = {
13799
13967
  processStream,
13800
13968
  pullEval,
13801
13969
  pullFpo,
13970
+ ranStep,
13802
13971
  regex,
13803
13972
  registerWorkflowHook,
13804
13973
  resolveStallStopAfter,
@@ -13807,8 +13976,11 @@ var STEP_TYPE_TO_METHOD = {
13807
13976
  sanitizeTaskSlug,
13808
13977
  shouldInjectEmptySessionNudge,
13809
13978
  shouldRequestModelEscalation,
13979
+ stepOrder,
13810
13980
  streamEvents,
13981
+ toolOrder,
13811
13982
  unregisterWorkflowHook,
13983
+ usedNoTools,
13812
13984
  validJson,
13813
13985
  withUnifiedEvents
13814
13986
  });