@modelrelay/sdk 0.23.0 → 0.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -25,17 +25,21 @@ __export(index_exports, {
25
25
  ChatClient: () => ChatClient,
26
26
  ChatCompletionsStream: () => ChatCompletionsStream,
27
27
  ConfigError: () => ConfigError,
28
+ CustomerChatClient: () => CustomerChatClient,
28
29
  CustomersClient: () => CustomersClient,
29
30
  DEFAULT_BASE_URL: () => DEFAULT_BASE_URL,
30
31
  DEFAULT_CLIENT_HEADER: () => DEFAULT_CLIENT_HEADER,
31
32
  DEFAULT_CONNECT_TIMEOUT_MS: () => DEFAULT_CONNECT_TIMEOUT_MS,
32
33
  DEFAULT_REQUEST_TIMEOUT_MS: () => DEFAULT_REQUEST_TIMEOUT_MS,
33
34
  ErrorCodes: () => ErrorCodes,
35
+ MessageRoles: () => MessageRoles,
34
36
  ModelRelay: () => ModelRelay,
35
37
  ModelRelayError: () => ModelRelayError,
36
38
  ResponseFormatTypes: () => ResponseFormatTypes,
37
39
  SDK_VERSION: () => SDK_VERSION,
38
40
  StopReasons: () => StopReasons,
41
+ StructuredDecodeError: () => StructuredDecodeError,
42
+ StructuredExhaustedError: () => StructuredExhaustedError,
39
43
  StructuredJSONStream: () => StructuredJSONStream,
40
44
  TiersClient: () => TiersClient,
41
45
  ToolArgsError: () => ToolArgsError,
@@ -58,6 +62,7 @@ __export(index_exports, {
58
62
  createUsage: () => createUsage,
59
63
  createUserMessage: () => createUserMessage,
60
64
  createWebTool: () => createWebTool,
65
+ defaultRetryHandler: () => defaultRetryHandler,
61
66
  executeWithRetry: () => executeWithRetry,
62
67
  firstToolCall: () => firstToolCall,
63
68
  formatToolErrorForModel: () => formatToolErrorForModel,
@@ -78,12 +83,14 @@ __export(index_exports, {
78
83
  parseToolArgs: () => parseToolArgs,
79
84
  parseToolArgsRaw: () => parseToolArgsRaw,
80
85
  respondToToolCall: () => respondToToolCall,
86
+ responseFormatFromZod: () => responseFormatFromZod,
81
87
  stopReasonToString: () => stopReasonToString,
82
88
  toolChoiceAuto: () => toolChoiceAuto,
83
89
  toolChoiceNone: () => toolChoiceNone,
84
90
  toolChoiceRequired: () => toolChoiceRequired,
85
91
  toolResultMessage: () => toolResultMessage,
86
92
  tryParseToolArgs: () => tryParseToolArgs,
93
+ validateWithZod: () => validateWithZod,
87
94
  zodToJsonSchema: () => zodToJsonSchema
88
95
  });
89
96
  module.exports = __toCommonJS(index_exports);
@@ -430,7 +437,7 @@ function isTokenReusable(token) {
430
437
  // package.json
431
438
  var package_default = {
432
439
  name: "@modelrelay/sdk",
433
- version: "0.22.0",
440
+ version: "0.25.1",
434
441
  description: "TypeScript SDK for the ModelRelay API",
435
442
  type: "module",
436
443
  main: "dist/index.cjs",
@@ -496,6 +503,12 @@ function createUsage(inputTokens, outputTokens, totalTokens) {
496
503
  totalTokens: totalTokens ?? inputTokens + outputTokens
497
504
  };
498
505
  }
506
+ var MessageRoles = {
507
+ User: "user",
508
+ Assistant: "assistant",
509
+ System: "system",
510
+ Tool: "tool"
511
+ };
499
512
  var ToolTypes = {
500
513
  Function: "function",
501
514
  Web: "web",
@@ -1184,10 +1197,68 @@ async function executeWithRetry(registry, toolCalls, options = {}) {
1184
1197
  return Array.from(successfulResults.values());
1185
1198
  }
1186
1199
 
1200
+ // src/structured.ts
1201
+ var StructuredDecodeError = class extends Error {
1202
+ constructor(message, rawJson, attempt) {
1203
+ super(`structured output decode error (attempt ${attempt}): ${message}`);
1204
+ this.name = "StructuredDecodeError";
1205
+ this.rawJson = rawJson;
1206
+ this.attempt = attempt;
1207
+ }
1208
+ };
1209
+ var StructuredExhaustedError = class extends Error {
1210
+ constructor(lastRawJson, allAttempts, finalError) {
1211
+ const errorMsg = finalError.kind === "decode" ? finalError.message : finalError.issues.map((i) => i.message).join("; ");
1212
+ super(
1213
+ `structured output failed after ${allAttempts.length} attempts: ${errorMsg}`
1214
+ );
1215
+ this.name = "StructuredExhaustedError";
1216
+ this.lastRawJson = lastRawJson;
1217
+ this.allAttempts = allAttempts;
1218
+ this.finalError = finalError;
1219
+ }
1220
+ };
1221
+ var defaultRetryHandler = {
1222
+ onValidationError(_attempt, _rawJson, error, _originalMessages) {
1223
+ const errorMsg = error.kind === "decode" ? error.message : error.issues.map((i) => `${i.path ?? ""}: ${i.message}`).join("; ");
1224
+ return [
1225
+ {
1226
+ role: "user",
1227
+ content: `The previous response did not match the expected schema. Error: ${errorMsg}. Please provide a response that matches the schema exactly.`
1228
+ }
1229
+ ];
1230
+ }
1231
+ };
1232
+ function responseFormatFromZod(schema, name = "response") {
1233
+ const jsonSchema = zodToJsonSchema(schema);
1234
+ return {
1235
+ type: "json_schema",
1236
+ json_schema: {
1237
+ name,
1238
+ schema: jsonSchema,
1239
+ strict: true
1240
+ }
1241
+ };
1242
+ }
1243
+ function validateWithZod(schema, data) {
1244
+ const result = schema.safeParse(data);
1245
+ if (result.success) {
1246
+ return { success: true, data: result.data };
1247
+ }
1248
+ const errorMsg = result.error && typeof result.error === "object" && "message" in result.error ? String(result.error.message) : "validation failed";
1249
+ return { success: false, error: errorMsg };
1250
+ }
1251
+
1187
1252
  // src/chat.ts
1253
+ var CUSTOMER_ID_HEADER = "X-ModelRelay-Customer-Id";
1188
1254
  var REQUEST_ID_HEADER = "X-ModelRelay-Chat-Request-Id";
1189
1255
  var ChatClient = class {
1190
1256
  constructor(http, auth, cfg = {}) {
1257
+ this.http = http;
1258
+ this.auth = auth;
1259
+ this.defaultMetadata = cfg.defaultMetadata;
1260
+ this.metrics = cfg.metrics;
1261
+ this.trace = cfg.trace;
1191
1262
  this.completions = new ChatCompletionsClient(
1192
1263
  http,
1193
1264
  auth,
@@ -1196,6 +1267,30 @@ var ChatClient = class {
1196
1267
  cfg.trace
1197
1268
  );
1198
1269
  }
1270
+ /**
1271
+ * Create a customer-attributed chat client for the given customer ID.
1272
+ * The customer's tier determines the model - no model parameter is needed or allowed.
1273
+ *
1274
+ * @example
1275
+ * ```typescript
1276
+ * const stream = await client.chat.forCustomer("user-123").create({
1277
+ * messages: [{ role: "user", content: "Hello!" }],
1278
+ * });
1279
+ * ```
1280
+ */
1281
+ forCustomer(customerId) {
1282
+ if (!customerId?.trim()) {
1283
+ throw new ConfigError("customerId is required");
1284
+ }
1285
+ return new CustomerChatClient(
1286
+ this.http,
1287
+ this.auth,
1288
+ customerId,
1289
+ this.defaultMetadata,
1290
+ this.metrics,
1291
+ this.trace
1292
+ );
1293
+ }
1199
1294
  };
1200
1295
  var ChatCompletionsClient = class {
1201
1296
  constructor(http, auth, defaultMetadata, metrics, trace) {
@@ -1215,7 +1310,7 @@ var ChatCompletionsClient = class {
1215
1310
  if (!hasUserMessage(params.messages)) {
1216
1311
  throw new ConfigError("at least one user message is required");
1217
1312
  }
1218
- const authHeaders = await this.auth.authForChat(params.customerId);
1313
+ const authHeaders = await this.auth.authForChat();
1219
1314
  const body = buildProxyBody(
1220
1315
  params,
1221
1316
  mergeMetadata(this.defaultMetadata, params.metadata, options.metadata)
@@ -1295,7 +1390,7 @@ var ChatCompletionsClient = class {
1295
1390
  "responseFormat with type=json_object or json_schema is required for structured streaming"
1296
1391
  );
1297
1392
  }
1298
- const authHeaders = await this.auth.authForChat(params.customerId);
1393
+ const authHeaders = await this.auth.authForChat();
1299
1394
  const body = buildProxyBody(
1300
1395
  params,
1301
1396
  mergeMetadata(this.defaultMetadata, params.metadata, options.metadata)
@@ -1351,6 +1446,439 @@ var ChatCompletionsClient = class {
1351
1446
  trace
1352
1447
  );
1353
1448
  }
1449
+ /**
1450
+ * Send a structured output request with a Zod schema.
1451
+ *
1452
+ * Auto-generates JSON schema from the Zod schema, validates the response,
1453
+ * and retries on validation failure if configured.
1454
+ *
1455
+ * @param schema - A Zod schema defining the expected response structure
1456
+ * @param params - Chat completion parameters (excluding responseFormat)
1457
+ * @param options - Request options including retry configuration
1458
+ * @returns A typed result with the parsed value
1459
+ *
1460
+ * @example
1461
+ * ```typescript
1462
+ * import { z } from 'zod';
1463
+ *
1464
+ * const PersonSchema = z.object({
1465
+ * name: z.string(),
1466
+ * age: z.number(),
1467
+ * });
1468
+ *
1469
+ * const result = await client.chat.completions.structured(
1470
+ * PersonSchema,
1471
+ * { model: "claude-sonnet-4-20250514", messages: [...] },
1472
+ * { maxRetries: 2 }
1473
+ * );
1474
+ * ```
1475
+ */
1476
+ async structured(schema, params, options = {}) {
1477
+ const {
1478
+ maxRetries = 0,
1479
+ retryHandler = defaultRetryHandler,
1480
+ schemaName,
1481
+ ...requestOptions
1482
+ } = options;
1483
+ const responseFormat = responseFormatFromZod(schema, schemaName);
1484
+ const fullParams = {
1485
+ ...params,
1486
+ responseFormat,
1487
+ stream: false
1488
+ };
1489
+ let messages = [...params.messages];
1490
+ const attempts = [];
1491
+ const maxAttempts = maxRetries + 1;
1492
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1493
+ const response = await this.create(
1494
+ { ...fullParams, messages },
1495
+ { ...requestOptions, stream: false }
1496
+ );
1497
+ const rawJson = response.content.join("");
1498
+ const requestId = response.requestId;
1499
+ try {
1500
+ const parsed = JSON.parse(rawJson);
1501
+ const validated = validateWithZod(schema, parsed);
1502
+ if (validated.success) {
1503
+ return {
1504
+ value: validated.data,
1505
+ attempts: attempt,
1506
+ requestId
1507
+ };
1508
+ }
1509
+ const error = {
1510
+ kind: "validation",
1511
+ issues: [{ message: validated.error }]
1512
+ };
1513
+ attempts.push({ attempt, rawJson, error });
1514
+ if (attempt >= maxAttempts) {
1515
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1516
+ }
1517
+ const retryMessages = retryHandler.onValidationError(
1518
+ attempt,
1519
+ rawJson,
1520
+ error,
1521
+ params.messages
1522
+ );
1523
+ if (!retryMessages) {
1524
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1525
+ }
1526
+ messages = [
1527
+ ...params.messages,
1528
+ { role: "assistant", content: rawJson },
1529
+ ...retryMessages
1530
+ ];
1531
+ } catch (e) {
1532
+ if (e instanceof StructuredExhaustedError) {
1533
+ throw e;
1534
+ }
1535
+ const error = {
1536
+ kind: "decode",
1537
+ message: e instanceof Error ? e.message : String(e)
1538
+ };
1539
+ attempts.push({ attempt, rawJson, error });
1540
+ if (attempt >= maxAttempts) {
1541
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1542
+ }
1543
+ const retryMessages = retryHandler.onValidationError(
1544
+ attempt,
1545
+ rawJson,
1546
+ error,
1547
+ params.messages
1548
+ );
1549
+ if (!retryMessages) {
1550
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1551
+ }
1552
+ messages = [
1553
+ ...params.messages,
1554
+ { role: "assistant", content: rawJson },
1555
+ ...retryMessages
1556
+ ];
1557
+ }
1558
+ }
1559
+ throw new Error(
1560
+ `Internal error: structured output loop exited unexpectedly after ${maxAttempts} attempts (this is a bug, please report it)`
1561
+ );
1562
+ }
1563
+ /**
1564
+ * Stream structured output with a Zod schema.
1565
+ *
1566
+ * Auto-generates JSON schema from the Zod schema. Note that streaming
1567
+ * does not support retries - for retry behavior, use `structured()`.
1568
+ *
1569
+ * @param schema - A Zod schema defining the expected response structure
1570
+ * @param params - Chat completion parameters (excluding responseFormat)
1571
+ * @param options - Request options
1572
+ * @returns A structured JSON stream
1573
+ *
1574
+ * @example
1575
+ * ```typescript
1576
+ * import { z } from 'zod';
1577
+ *
1578
+ * const PersonSchema = z.object({
1579
+ * name: z.string(),
1580
+ * age: z.number(),
1581
+ * });
1582
+ *
1583
+ * const stream = await client.chat.completions.streamStructured(
1584
+ * PersonSchema,
1585
+ * { model: "claude-sonnet-4-20250514", messages: [...] },
1586
+ * );
1587
+ *
1588
+ * for await (const event of stream) {
1589
+ * console.log(event.type, event.payload);
1590
+ * }
1591
+ * ```
1592
+ */
1593
+ async streamStructured(schema, params, options = {}) {
1594
+ const { schemaName, ...requestOptions } = options;
1595
+ const responseFormat = responseFormatFromZod(schema, schemaName);
1596
+ return this.streamJSON(
1597
+ { ...params, responseFormat },
1598
+ requestOptions
1599
+ );
1600
+ }
1601
+ };
1602
+ var CustomerChatClient = class {
1603
+ constructor(http, auth, customerId, defaultMetadata, metrics, trace) {
1604
+ this.http = http;
1605
+ this.auth = auth;
1606
+ this.customerId = customerId;
1607
+ this.defaultMetadata = defaultMetadata;
1608
+ this.metrics = metrics;
1609
+ this.trace = trace;
1610
+ }
1611
+ async create(params, options = {}) {
1612
+ const stream = options.stream ?? params.stream ?? true;
1613
+ const metrics = mergeMetrics(this.metrics, options.metrics);
1614
+ const trace = mergeTrace(this.trace, options.trace);
1615
+ if (!params?.messages?.length) {
1616
+ throw new ConfigError("at least one message is required");
1617
+ }
1618
+ if (!hasUserMessage(params.messages)) {
1619
+ throw new ConfigError("at least one user message is required");
1620
+ }
1621
+ const authHeaders = await this.auth.authForChat(this.customerId);
1622
+ const body = buildCustomerProxyBody(
1623
+ params,
1624
+ mergeMetadata(this.defaultMetadata, params.metadata, options.metadata)
1625
+ );
1626
+ const requestId = params.requestId || options.requestId;
1627
+ const headers = {
1628
+ ...options.headers || {},
1629
+ [CUSTOMER_ID_HEADER]: this.customerId
1630
+ };
1631
+ if (requestId) {
1632
+ headers[REQUEST_ID_HEADER] = requestId;
1633
+ }
1634
+ const baseContext = {
1635
+ method: "POST",
1636
+ path: "/llm/proxy",
1637
+ model: void 0,
1638
+ // Model is determined by tier
1639
+ requestId
1640
+ };
1641
+ const response = await this.http.request("/llm/proxy", {
1642
+ method: "POST",
1643
+ body,
1644
+ headers,
1645
+ apiKey: authHeaders.apiKey,
1646
+ accessToken: authHeaders.accessToken,
1647
+ accept: stream ? "text/event-stream" : "application/json",
1648
+ raw: true,
1649
+ signal: options.signal,
1650
+ timeoutMs: options.timeoutMs ?? (stream ? 0 : void 0),
1651
+ useDefaultTimeout: !stream,
1652
+ connectTimeoutMs: options.connectTimeoutMs,
1653
+ retry: options.retry,
1654
+ metrics,
1655
+ trace,
1656
+ context: baseContext
1657
+ });
1658
+ const resolvedRequestId = requestIdFromHeaders(response.headers) || requestId || void 0;
1659
+ if (!response.ok) {
1660
+ throw await parseErrorResponse(response);
1661
+ }
1662
+ if (!stream) {
1663
+ const payload = await response.json();
1664
+ const result = normalizeChatResponse(payload, resolvedRequestId);
1665
+ if (metrics?.usage) {
1666
+ const ctx = {
1667
+ ...baseContext,
1668
+ requestId: resolvedRequestId ?? baseContext.requestId,
1669
+ responseId: result.id
1670
+ };
1671
+ metrics.usage({ usage: result.usage, context: ctx });
1672
+ }
1673
+ return result;
1674
+ }
1675
+ const streamContext = {
1676
+ ...baseContext,
1677
+ requestId: resolvedRequestId ?? baseContext.requestId
1678
+ };
1679
+ return new ChatCompletionsStream(
1680
+ response,
1681
+ resolvedRequestId,
1682
+ streamContext,
1683
+ metrics,
1684
+ trace
1685
+ );
1686
+ }
1687
+ /**
1688
+ * Stream structured JSON responses using the NDJSON contract.
1689
+ * The request must include a structured responseFormat.
1690
+ */
1691
+ async streamJSON(params, options = {}) {
1692
+ const metrics = mergeMetrics(this.metrics, options.metrics);
1693
+ const trace = mergeTrace(this.trace, options.trace);
1694
+ if (!params?.messages?.length) {
1695
+ throw new ConfigError("at least one message is required");
1696
+ }
1697
+ if (!hasUserMessage(params.messages)) {
1698
+ throw new ConfigError("at least one user message is required");
1699
+ }
1700
+ if (!params.responseFormat || params.responseFormat.type !== "json_object" && params.responseFormat.type !== "json_schema") {
1701
+ throw new ConfigError(
1702
+ "responseFormat with type=json_object or json_schema is required for structured streaming"
1703
+ );
1704
+ }
1705
+ const authHeaders = await this.auth.authForChat(this.customerId);
1706
+ const body = buildCustomerProxyBody(
1707
+ params,
1708
+ mergeMetadata(this.defaultMetadata, params.metadata, options.metadata)
1709
+ );
1710
+ const requestId = params.requestId || options.requestId;
1711
+ const headers = {
1712
+ ...options.headers || {},
1713
+ [CUSTOMER_ID_HEADER]: this.customerId
1714
+ };
1715
+ if (requestId) {
1716
+ headers[REQUEST_ID_HEADER] = requestId;
1717
+ }
1718
+ const baseContext = {
1719
+ method: "POST",
1720
+ path: "/llm/proxy",
1721
+ model: void 0,
1722
+ // Model is determined by tier
1723
+ requestId
1724
+ };
1725
+ const response = await this.http.request("/llm/proxy", {
1726
+ method: "POST",
1727
+ body,
1728
+ headers,
1729
+ apiKey: authHeaders.apiKey,
1730
+ accessToken: authHeaders.accessToken,
1731
+ accept: "application/x-ndjson",
1732
+ raw: true,
1733
+ signal: options.signal,
1734
+ timeoutMs: options.timeoutMs ?? 0,
1735
+ useDefaultTimeout: false,
1736
+ connectTimeoutMs: options.connectTimeoutMs,
1737
+ retry: options.retry,
1738
+ metrics,
1739
+ trace,
1740
+ context: baseContext
1741
+ });
1742
+ const resolvedRequestId = requestIdFromHeaders(response.headers) || requestId || void 0;
1743
+ if (!response.ok) {
1744
+ throw await parseErrorResponse(response);
1745
+ }
1746
+ const contentType = response.headers.get("Content-Type") || "";
1747
+ if (!/application\/(x-)?ndjson/i.test(contentType)) {
1748
+ throw new TransportError(
1749
+ `expected NDJSON structured stream, got Content-Type ${contentType || "missing"}`,
1750
+ { kind: "request" }
1751
+ );
1752
+ }
1753
+ const streamContext = {
1754
+ ...baseContext,
1755
+ requestId: resolvedRequestId ?? baseContext.requestId
1756
+ };
1757
+ return new StructuredJSONStream(
1758
+ response,
1759
+ resolvedRequestId,
1760
+ streamContext,
1761
+ metrics,
1762
+ trace
1763
+ );
1764
+ }
1765
+ /**
1766
+ * Send a structured output request with a Zod schema for customer-attributed calls.
1767
+ *
1768
+ * Auto-generates JSON schema from the Zod schema, validates the response,
1769
+ * and retries on validation failure if configured.
1770
+ *
1771
+ * @param schema - A Zod schema defining the expected response structure
1772
+ * @param params - Customer chat parameters (excluding responseFormat)
1773
+ * @param options - Request options including retry configuration
1774
+ * @returns A typed result with the parsed value
1775
+ */
1776
+ async structured(schema, params, options = {}) {
1777
+ const {
1778
+ maxRetries = 0,
1779
+ retryHandler = defaultRetryHandler,
1780
+ schemaName,
1781
+ ...requestOptions
1782
+ } = options;
1783
+ const responseFormat = responseFormatFromZod(schema, schemaName);
1784
+ const fullParams = {
1785
+ ...params,
1786
+ responseFormat,
1787
+ stream: false
1788
+ };
1789
+ let messages = [...params.messages];
1790
+ const attempts = [];
1791
+ const maxAttempts = maxRetries + 1;
1792
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1793
+ const response = await this.create(
1794
+ { ...fullParams, messages },
1795
+ { ...requestOptions, stream: false }
1796
+ );
1797
+ const rawJson = response.content.join("");
1798
+ const requestId = response.requestId;
1799
+ try {
1800
+ const parsed = JSON.parse(rawJson);
1801
+ const validated = validateWithZod(schema, parsed);
1802
+ if (validated.success) {
1803
+ return {
1804
+ value: validated.data,
1805
+ attempts: attempt,
1806
+ requestId
1807
+ };
1808
+ }
1809
+ const error = {
1810
+ kind: "validation",
1811
+ issues: [{ message: validated.error }]
1812
+ };
1813
+ attempts.push({ attempt, rawJson, error });
1814
+ if (attempt >= maxAttempts) {
1815
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1816
+ }
1817
+ const retryMessages = retryHandler.onValidationError(
1818
+ attempt,
1819
+ rawJson,
1820
+ error,
1821
+ params.messages
1822
+ );
1823
+ if (!retryMessages) {
1824
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1825
+ }
1826
+ messages = [
1827
+ ...params.messages,
1828
+ { role: "assistant", content: rawJson },
1829
+ ...retryMessages
1830
+ ];
1831
+ } catch (e) {
1832
+ if (e instanceof StructuredExhaustedError) {
1833
+ throw e;
1834
+ }
1835
+ const error = {
1836
+ kind: "decode",
1837
+ message: e instanceof Error ? e.message : String(e)
1838
+ };
1839
+ attempts.push({ attempt, rawJson, error });
1840
+ if (attempt >= maxAttempts) {
1841
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1842
+ }
1843
+ const retryMessages = retryHandler.onValidationError(
1844
+ attempt,
1845
+ rawJson,
1846
+ error,
1847
+ params.messages
1848
+ );
1849
+ if (!retryMessages) {
1850
+ throw new StructuredExhaustedError(rawJson, attempts, error);
1851
+ }
1852
+ messages = [
1853
+ ...params.messages,
1854
+ { role: "assistant", content: rawJson },
1855
+ ...retryMessages
1856
+ ];
1857
+ }
1858
+ }
1859
+ throw new Error(
1860
+ `Internal error: structured output loop exited unexpectedly after ${maxAttempts} attempts (this is a bug, please report it)`
1861
+ );
1862
+ }
1863
+ /**
1864
+ * Stream structured output with a Zod schema for customer-attributed calls.
1865
+ *
1866
+ * Auto-generates JSON schema from the Zod schema. Note that streaming
1867
+ * does not support retries - for retry behavior, use `structured()`.
1868
+ *
1869
+ * @param schema - A Zod schema defining the expected response structure
1870
+ * @param params - Customer chat parameters (excluding responseFormat)
1871
+ * @param options - Request options
1872
+ * @returns A structured JSON stream
1873
+ */
1874
+ async streamStructured(schema, params, options = {}) {
1875
+ const { schemaName, ...requestOptions } = options;
1876
+ const responseFormat = responseFormatFromZod(schema, schemaName);
1877
+ return this.streamJSON(
1878
+ { ...params, responseFormat },
1879
+ requestOptions
1880
+ );
1881
+ }
1354
1882
  };
1355
1883
  var ChatCompletionsStream = class {
1356
1884
  constructor(response, requestId, context, metrics, trace) {
@@ -1370,7 +1898,13 @@ var ChatCompletionsStream = class {
1370
1898
  this.closed = true;
1371
1899
  try {
1372
1900
  await this.response.body?.cancel(reason);
1373
- } catch {
1901
+ } catch (err) {
1902
+ if (this.trace?.streamError) {
1903
+ this.trace.streamError({
1904
+ context: this.context,
1905
+ error: err instanceof Error ? err : new Error(String(err))
1906
+ });
1907
+ }
1374
1908
  }
1375
1909
  }
1376
1910
  async *[Symbol.asyncIterator]() {
@@ -1469,7 +2003,13 @@ var StructuredJSONStream = class {
1469
2003
  this.closed = true;
1470
2004
  try {
1471
2005
  await this.response.body?.cancel(reason);
1472
- } catch {
2006
+ } catch (err) {
2007
+ if (this.trace?.streamError) {
2008
+ this.trace.streamError({
2009
+ context: this.context,
2010
+ error: err instanceof Error ? err : new Error(String(err))
2011
+ });
2012
+ }
1473
2013
  }
1474
2014
  }
1475
2015
  async *[Symbol.asyncIterator]() {
@@ -1670,7 +2210,7 @@ function mapChatEvent(raw, requestId) {
1670
2210
  if (raw.data) {
1671
2211
  try {
1672
2212
  parsed = JSON.parse(raw.data);
1673
- } catch {
2213
+ } catch (err) {
1674
2214
  parsed = raw.data;
1675
2215
  }
1676
2216
  }
@@ -1842,10 +2382,25 @@ function buildProxyBody(params, metadata) {
1842
2382
  if (params.responseFormat) body.response_format = params.responseFormat;
1843
2383
  return body;
1844
2384
  }
2385
+ function buildCustomerProxyBody(params, metadata) {
2386
+ const body = {
2387
+ messages: normalizeMessages(params.messages)
2388
+ };
2389
+ if (typeof params.maxTokens === "number") body.max_tokens = params.maxTokens;
2390
+ if (typeof params.temperature === "number")
2391
+ body.temperature = params.temperature;
2392
+ if (metadata && Object.keys(metadata).length > 0) body.metadata = metadata;
2393
+ if (params.stop?.length) body.stop = params.stop;
2394
+ if (params.stopSequences?.length) body.stop_sequences = params.stopSequences;
2395
+ if (params.tools?.length) body.tools = normalizeTools(params.tools);
2396
+ if (params.toolChoice) body.tool_choice = normalizeToolChoice(params.toolChoice);
2397
+ if (params.responseFormat) body.response_format = params.responseFormat;
2398
+ return body;
2399
+ }
1845
2400
  function normalizeMessages(messages) {
1846
2401
  return messages.map((msg) => {
1847
2402
  const normalized = {
1848
- role: msg.role || "user",
2403
+ role: msg.role,
1849
2404
  content: msg.content
1850
2405
  };
1851
2406
  if (msg.toolCalls?.length) {
@@ -2555,17 +3110,21 @@ function resolveBaseUrl(override) {
2555
3110
  ChatClient,
2556
3111
  ChatCompletionsStream,
2557
3112
  ConfigError,
3113
+ CustomerChatClient,
2558
3114
  CustomersClient,
2559
3115
  DEFAULT_BASE_URL,
2560
3116
  DEFAULT_CLIENT_HEADER,
2561
3117
  DEFAULT_CONNECT_TIMEOUT_MS,
2562
3118
  DEFAULT_REQUEST_TIMEOUT_MS,
2563
3119
  ErrorCodes,
3120
+ MessageRoles,
2564
3121
  ModelRelay,
2565
3122
  ModelRelayError,
2566
3123
  ResponseFormatTypes,
2567
3124
  SDK_VERSION,
2568
3125
  StopReasons,
3126
+ StructuredDecodeError,
3127
+ StructuredExhaustedError,
2569
3128
  StructuredJSONStream,
2570
3129
  TiersClient,
2571
3130
  ToolArgsError,
@@ -2588,6 +3147,7 @@ function resolveBaseUrl(override) {
2588
3147
  createUsage,
2589
3148
  createUserMessage,
2590
3149
  createWebTool,
3150
+ defaultRetryHandler,
2591
3151
  executeWithRetry,
2592
3152
  firstToolCall,
2593
3153
  formatToolErrorForModel,
@@ -2608,11 +3168,13 @@ function resolveBaseUrl(override) {
2608
3168
  parseToolArgs,
2609
3169
  parseToolArgsRaw,
2610
3170
  respondToToolCall,
3171
+ responseFormatFromZod,
2611
3172
  stopReasonToString,
2612
3173
  toolChoiceAuto,
2613
3174
  toolChoiceNone,
2614
3175
  toolChoiceRequired,
2615
3176
  toolResultMessage,
2616
3177
  tryParseToolArgs,
3178
+ validateWithZod,
2617
3179
  zodToJsonSchema
2618
3180
  });