veryfront 0.1.283 → 0.1.285

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/esm/deno.d.ts +0 -8
  2. package/esm/deno.js +8 -11
  3. package/esm/src/agent/agent-service.d.ts +28 -6
  4. package/esm/src/agent/agent-service.d.ts.map +1 -1
  5. package/esm/src/agent/agent-service.js +23 -1
  6. package/esm/src/agent/index.d.ts +1 -1
  7. package/esm/src/agent/index.d.ts.map +1 -1
  8. package/esm/src/embedding/resolve.d.ts.map +1 -1
  9. package/esm/src/embedding/resolve.js +15 -2
  10. package/esm/src/embedding/veryfront-cloud/provider.d.ts.map +1 -1
  11. package/esm/src/embedding/veryfront-cloud/provider.js +2 -7
  12. package/esm/src/extensions/contracts.js +3 -1
  13. package/esm/src/extensions/interfaces/ai-provider.d.ts +50 -0
  14. package/esm/src/extensions/interfaces/ai-provider.d.ts.map +1 -0
  15. package/esm/src/extensions/interfaces/ai-provider.js +13 -0
  16. package/esm/src/extensions/interfaces/code-parser.d.ts +11 -0
  17. package/esm/src/extensions/interfaces/code-parser.d.ts.map +1 -1
  18. package/esm/src/extensions/interfaces/css-processor.d.ts +41 -22
  19. package/esm/src/extensions/interfaces/css-processor.d.ts.map +1 -1
  20. package/esm/src/extensions/interfaces/css-processor.js +10 -1
  21. package/esm/src/extensions/interfaces/index.d.ts +5 -4
  22. package/esm/src/extensions/interfaces/index.d.ts.map +1 -1
  23. package/esm/src/extensions/interfaces/index.js +1 -0
  24. package/esm/src/extensions/interfaces/schema-validator.d.ts +84 -5
  25. package/esm/src/extensions/interfaces/schema-validator.d.ts.map +1 -1
  26. package/esm/src/extensions/interfaces/schema-validator.js +5 -0
  27. package/esm/src/extensions/loader.d.ts +7 -0
  28. package/esm/src/extensions/loader.d.ts.map +1 -1
  29. package/esm/src/extensions/loader.js +12 -0
  30. package/esm/src/extensions/orchestrate.d.ts +2 -0
  31. package/esm/src/extensions/orchestrate.d.ts.map +1 -1
  32. package/esm/src/extensions/orchestrate.js +3 -0
  33. package/esm/src/extensions/recommendations.d.ts.map +1 -1
  34. package/esm/src/extensions/recommendations.js +4 -1
  35. package/esm/src/extensions/registries/ai-provider-registry.d.ts +11 -0
  36. package/esm/src/extensions/registries/ai-provider-registry.d.ts.map +1 -0
  37. package/esm/src/extensions/registries/ai-provider-registry.js +40 -0
  38. package/esm/src/html/styles-builder/plugin-loader.d.ts.map +1 -1
  39. package/esm/src/html/styles-builder/plugin-loader.js +4 -16
  40. package/esm/src/html/styles-builder/tailwind-compiler-cache.d.ts +8 -2
  41. package/esm/src/html/styles-builder/tailwind-compiler-cache.d.ts.map +1 -1
  42. package/esm/src/html/styles-builder/tailwind-compiler-cache.js +20 -3
  43. package/esm/src/provider/model-registry.d.ts.map +1 -1
  44. package/esm/src/provider/model-registry.js +33 -6
  45. package/esm/src/provider/runtime-loader/provider-embedding-responses.d.ts +1 -0
  46. package/esm/src/provider/runtime-loader/provider-embedding-responses.d.ts.map +1 -1
  47. package/esm/src/provider/runtime-loader/provider-embedding-responses.js +1 -1
  48. package/esm/src/provider/runtime-loader/provider-http.d.ts +9 -0
  49. package/esm/src/provider/runtime-loader/provider-http.d.ts.map +1 -1
  50. package/esm/src/provider/runtime-loader/provider-http.js +2 -2
  51. package/esm/src/provider/runtime-loader.d.ts +120 -9
  52. package/esm/src/provider/runtime-loader.d.ts.map +1 -1
  53. package/esm/src/provider/runtime-loader.js +13 -943
  54. package/esm/src/provider/veryfront-cloud/provider.d.ts.map +1 -1
  55. package/esm/src/provider/veryfront-cloud/provider.js +30 -15
  56. package/esm/src/sandbox/index.d.ts +1 -1
  57. package/esm/src/sandbox/index.d.ts.map +1 -1
  58. package/esm/src/sandbox/index.js +1 -1
  59. package/esm/src/sandbox/lazy-sandbox.d.ts +3 -0
  60. package/esm/src/sandbox/lazy-sandbox.d.ts.map +1 -1
  61. package/esm/src/sandbox/lazy-sandbox.js +22 -1
  62. package/esm/src/schemas/define.d.ts +31 -0
  63. package/esm/src/schemas/define.d.ts.map +1 -0
  64. package/esm/src/schemas/define.js +42 -0
  65. package/esm/src/schemas/index.d.ts +7 -2
  66. package/esm/src/schemas/index.d.ts.map +1 -1
  67. package/esm/src/schemas/index.js +10 -2
  68. package/esm/src/schemas/zod-adapter.d.ts +25 -0
  69. package/esm/src/schemas/zod-adapter.d.ts.map +1 -0
  70. package/esm/src/schemas/zod-adapter.js +120 -0
  71. package/esm/src/server/bootstrap.d.ts.map +1 -1
  72. package/esm/src/server/bootstrap.js +5 -0
  73. package/esm/src/transforms/plugins/babel-node-positions.d.ts +6 -7
  74. package/esm/src/transforms/plugins/babel-node-positions.d.ts.map +1 -1
  75. package/esm/src/transforms/plugins/babel-node-positions.js +10 -123
  76. package/esm/src/utils/version-constant.d.ts +1 -1
  77. package/esm/src/utils/version-constant.js +1 -1
  78. package/package.json +1 -6
  79. package/src/deno.js +8 -11
  80. package/src/src/agent/agent-service.ts +91 -7
  81. package/src/src/agent/index.ts +4 -0
  82. package/src/src/embedding/resolve.ts +18 -7
  83. package/src/src/embedding/veryfront-cloud/provider.ts +4 -10
  84. package/src/src/extensions/contracts.ts +3 -3
  85. package/src/src/extensions/interfaces/ai-provider.ts +54 -0
  86. package/src/src/extensions/interfaces/code-parser.ts +12 -0
  87. package/src/src/extensions/interfaces/css-processor.ts +43 -22
  88. package/src/src/extensions/interfaces/index.ts +15 -11
  89. package/src/src/extensions/interfaces/schema-validator.ts +112 -5
  90. package/src/src/extensions/loader.ts +14 -0
  91. package/src/src/extensions/orchestrate.ts +5 -0
  92. package/src/src/extensions/recommendations.ts +4 -1
  93. package/src/src/extensions/registries/ai-provider-registry.ts +53 -0
  94. package/src/src/html/styles-builder/plugin-loader.ts +4 -16
  95. package/src/src/html/styles-builder/tailwind-compiler-cache.ts +27 -6
  96. package/src/src/provider/model-registry.ts +34 -15
  97. package/src/src/provider/runtime-loader/provider-embedding-responses.ts +1 -1
  98. package/src/src/provider/runtime-loader/provider-http.ts +2 -2
  99. package/src/src/provider/runtime-loader.ts +41 -1189
  100. package/src/src/provider/veryfront-cloud/provider.ts +35 -19
  101. package/src/src/sandbox/index.ts +5 -1
  102. package/src/src/sandbox/lazy-sandbox.ts +25 -1
  103. package/src/src/schemas/define.ts +48 -0
  104. package/src/src/schemas/index.ts +13 -2
  105. package/src/src/schemas/zod-adapter.ts +180 -0
  106. package/src/src/server/bootstrap.ts +5 -0
  107. package/src/src/transforms/plugins/babel-node-positions.ts +11 -173
  108. package/src/src/utils/version-constant.ts +1 -1
  109. package/esm/src/extensions/interfaces/ai-model-provider.d.ts +0 -94
  110. package/esm/src/extensions/interfaces/ai-model-provider.d.ts.map +0 -1
  111. package/esm/src/extensions/interfaces/ai-model-provider.js +0 -8
  112. package/src/src/extensions/interfaces/ai-model-provider.ts +0 -100
@@ -4,38 +4,34 @@ import {
4
4
  getGoogleEmbeddingUrl,
5
5
  getGoogleGenerateContentUrl,
6
6
  getGoogleStreamGenerateContentUrl,
7
- getOpenAIChatCompletionsUrl,
8
- getOpenAIEmbeddingUrl,
9
- getOpenAIResponsesUrl,
10
7
  } from "./runtime-loader/provider-endpoints.js";
11
8
  import {
12
9
  extractGoogleEmbedding,
13
10
  extractGoogleUsageTokens,
14
- extractOpenAIEmbeddings,
15
- extractOpenAIUsageTokens,
11
+ isNumberArray,
16
12
  } from "./runtime-loader/provider-embedding-responses.js";
17
13
  import {
18
14
  normalizeAnthropicFinishReason,
19
15
  normalizeGoogleFinishReason,
20
- normalizeOpenAIFinishReason,
21
- normalizeOpenAIResponsesFinishReason,
22
16
  } from "./runtime-loader/provider-finish-reasons.js";
23
17
  import {
24
18
  createAnthropicRequestInit,
25
19
  createGoogleRequestInit,
26
- createOpenAIRequestInit,
27
20
  } from "./runtime-loader/provider-request-init.js";
28
21
  import { parseSseChunk } from "./runtime-loader/provider-sse.js";
29
22
  import {
30
23
  extractAnthropicUsage,
31
24
  extractGoogleUsage,
32
- extractOpenAIResponsesUsage,
33
- extractOpenAIUsage,
34
25
  mergeUsage,
35
26
  type RuntimeUsage,
36
27
  } from "./runtime-loader/provider-usage.js";
37
28
  import type { ProviderKind } from "./runtime-loader/provider-http.js";
38
- import { requestJson, requestStream } from "./runtime-loader/provider-http.js";
29
+ import {
30
+ buildProviderError,
31
+ parseRetryAfterMs,
32
+ requestJson,
33
+ requestStream,
34
+ } from "./runtime-loader/provider-http.js";
39
35
  import { readRecord } from "./runtime-loader/provider-records.js";
40
36
  import {
41
37
  TOOL_INPUT_PENDING_THRESHOLD_MS,
@@ -50,13 +46,15 @@ export {
50
46
  ProviderRequestError,
51
47
  } from "./runtime-loader/provider-http.js";
52
48
  export { TOOL_INPUT_PENDING_THRESHOLD_MS, withToolInputStatusTransitions };
53
-
54
- export interface OpenAIRuntimeConfig {
55
- apiKey: string;
56
- baseURL?: string;
57
- name?: string;
58
- fetch?: typeof globalThis.fetch;
59
- }
49
+ export {
50
+ buildProviderError,
51
+ isNumberArray,
52
+ mergeUsage,
53
+ parseRetryAfterMs,
54
+ readRecord,
55
+ requestJson,
56
+ requestStream,
57
+ };
60
58
 
61
59
  export interface AnthropicRuntimeConfig {
62
60
  apiKey?: string;
@@ -73,7 +71,7 @@ export interface GoogleRuntimeConfig {
73
71
  fetch?: typeof globalThis.fetch;
74
72
  }
75
73
 
76
- type RuntimePromptMessage =
74
+ export type RuntimePromptMessage =
77
75
  | { role: "system"; content: string }
78
76
  | { role: "user"; content: Array<{ type: "text"; text: string }> }
79
77
  | {
@@ -323,7 +321,7 @@ type OpenAICompatibleLanguageOptions = {
323
321
  */
324
322
  mcpServers?: Array<Record<string, unknown>>;
325
323
  };
326
- type OpenAICompatibleChatMessage =
324
+ export type OpenAICompatibleChatMessage =
327
325
  | { role: "system"; content: string }
328
326
  | { role: "user"; content: string }
329
327
  | {
@@ -343,7 +341,7 @@ type OpenAICompatibleChatMessage =
343
341
  tool_call_id: string;
344
342
  content: string;
345
343
  };
346
- type OpenAICompatibleChatRequest = {
344
+ export type OpenAICompatibleChatRequest = {
347
345
  model: string;
348
346
  messages: OpenAICompatibleChatMessage[];
349
347
  stream?: boolean;
@@ -369,17 +367,6 @@ type OpenAICompatibleChatRequest = {
369
367
  frequency_penalty?: number;
370
368
  [key: string]: unknown;
371
369
  };
372
- type OpenAICompatibleChoice = {
373
- message?: unknown;
374
- delta?: unknown;
375
- finish_reason?: unknown;
376
- };
377
- type OpenAIStreamToolCallState = {
378
- id: string;
379
- name: string;
380
- arguments: string;
381
- started: boolean;
382
- };
383
370
  type AnthropicCompatibleMessage = {
384
371
  role: "user" | "assistant";
385
372
  content: Array<Record<string, unknown>>;
@@ -452,7 +439,7 @@ type WarningCollector = {
452
439
  drain(): ProviderWarning[];
453
440
  };
454
441
 
455
- function createWarningCollector(): WarningCollector {
442
+ export function createWarningCollector(): WarningCollector {
456
443
  const list: ProviderWarning[] = [];
457
444
  return {
458
445
  push(warning) {
@@ -464,7 +451,7 @@ function createWarningCollector(): WarningCollector {
464
451
  };
465
452
  }
466
453
 
467
- function stringifyJsonValue(value: unknown): string {
454
+ export function stringifyJsonValue(value: unknown): string {
468
455
  if (typeof value === "string") {
469
456
  return value;
470
457
  }
@@ -472,7 +459,7 @@ function stringifyJsonValue(value: unknown): string {
472
459
  return JSON.stringify(value);
473
460
  }
474
461
 
475
- function readTextParts(parts: Array<{ type: string; text?: string }>): string {
462
+ export function readTextParts(parts: Array<{ type: string; text?: string }>): string {
476
463
  let text = "";
477
464
  for (const part of parts) {
478
465
  if (part.type === "text" && typeof part.text === "string") {
@@ -482,7 +469,9 @@ function readTextParts(parts: Array<{ type: string; text?: string }>): string {
482
469
  return text;
483
470
  }
484
471
 
485
- function toOpenAICompatibleMessages(prompt: RuntimePromptMessage[]): OpenAICompatibleChatMessage[] {
472
+ export function toOpenAICompatibleMessages(
473
+ prompt: RuntimePromptMessage[],
474
+ ): OpenAICompatibleChatMessage[] {
486
475
  const messages: OpenAICompatibleChatMessage[] = [];
487
476
 
488
477
  for (const message of prompt) {
@@ -542,7 +531,7 @@ function toOpenAICompatibleMessages(prompt: RuntimePromptMessage[]): OpenAICompa
542
531
  return messages;
543
532
  }
544
533
 
545
- function toOpenAICompatibleTools(
534
+ export function toOpenAICompatibleTools(
546
535
  tools: RuntimeToolDefinition[] | undefined,
547
536
  ): OpenAICompatibleChatRequest["tools"] | undefined {
548
537
  if (!tools) {
@@ -565,7 +554,7 @@ function toOpenAICompatibleTools(
565
554
  return functions.length > 0 ? functions : undefined;
566
555
  }
567
556
 
568
- function readProviderOptions(
557
+ export function readProviderOptions(
569
558
  providerOptions: Record<string, unknown> | undefined,
570
559
  ...providerNames: string[]
571
560
  ): Record<string, unknown> {
@@ -1465,228 +1454,6 @@ async function* streamAnthropicCompatibleParts(
1465
1454
  };
1466
1455
  }
1467
1456
 
1468
- function extractOpenAIContentText(content: unknown): string {
1469
- if (typeof content === "string") {
1470
- return content;
1471
- }
1472
-
1473
- if (!Array.isArray(content)) {
1474
- return "";
1475
- }
1476
-
1477
- let text = "";
1478
- for (const part of content) {
1479
- const record = readRecord(part);
1480
- const type = record?.type;
1481
- if (type === "text" && typeof record?.text === "string") {
1482
- text += record.text;
1483
- }
1484
- }
1485
-
1486
- return text;
1487
- }
1488
-
1489
- function extractOpenAIToolCalls(message: Record<string, unknown>): Array<{
1490
- toolCallId: string;
1491
- toolName: string;
1492
- input: string;
1493
- }> {
1494
- const toolCalls = message.tool_calls;
1495
- if (!Array.isArray(toolCalls)) {
1496
- return [];
1497
- }
1498
-
1499
- const normalized: Array<{ toolCallId: string; toolName: string; input: string }> = [];
1500
- for (const entry of toolCalls) {
1501
- const record = readRecord(entry);
1502
- const id = typeof record?.id === "string" ? record.id : undefined;
1503
- const fn = readRecord(record?.function);
1504
- const name = typeof fn?.name === "string" ? fn.name : undefined;
1505
- const argumentsText = typeof fn?.arguments === "string" ? fn.arguments : undefined;
1506
- if (!id || !name || argumentsText === undefined) {
1507
- continue;
1508
- }
1509
- normalized.push({
1510
- toolCallId: id,
1511
- toolName: name,
1512
- input: argumentsText,
1513
- });
1514
- }
1515
-
1516
- return normalized;
1517
- }
1518
-
1519
- /**
1520
- * OpenAI reasoning models (o1 / o3 / o4 family) use the completion path but
1521
- * have different constraints than chat models: sampling params are rejected,
1522
- * and they accept a `reasoning_effort` field. We detect them by model id
1523
- * prefix so callers don't have to configure it per runtime.
1524
- */
1525
- function isOpenAIReasoningModel(modelId: string): boolean {
1526
- return /^o[134](-|$)/.test(modelId);
1527
- }
1528
-
1529
- /**
1530
- * Detect native OpenAI models (gpt-*, o-series, chatgpt-*) vs third-party
1531
- * OpenAI-compatible providers (Kimi, etc.). Native OpenAI models require
1532
- * `max_completion_tokens` (the old `max_tokens` is rejected by newer models
1533
- * like gpt-5.2), while third-party providers still expect `max_tokens`.
1534
- */
1535
- function isNativeOpenAIModel(modelId: string): boolean {
1536
- return /^(gpt-|o[134](-|$)|chatgpt-)/.test(modelId);
1537
- }
1538
-
1539
- /**
1540
- * Kimi K2.5 fixes sampling parameters (temperature, top_p, presence_penalty,
1541
- * frequency_penalty) to predetermined values and rejects any other values.
1542
- * See https://platform.moonshot.cn/docs/guide/kimi-k2-5-quickstart
1543
- */
1544
- function isFixedSamplingModel(modelId: string): boolean {
1545
- return /^kimi-k2\.5/.test(modelId);
1546
- }
1547
-
1548
- /**
1549
- * Map the unified reasoning effort to OpenAI's `reasoning_effort` enum.
1550
- * OpenAI doesn't accept "max" — we collapse it to "high".
1551
- */
1552
- function resolveOpenAIReasoningEffort(
1553
- option: ProviderReasoningOption | undefined,
1554
- ): "low" | "medium" | "high" | undefined {
1555
- if (!option || option.enabled !== true) {
1556
- return undefined;
1557
- }
1558
- switch (option.effort) {
1559
- case "low":
1560
- return "low";
1561
- case "high":
1562
- case "max":
1563
- return "high";
1564
- case "medium":
1565
- default:
1566
- return "medium";
1567
- }
1568
- }
1569
-
1570
- function buildOpenAIChatRequest(
1571
- modelId: string,
1572
- providerName: string,
1573
- options: OpenAICompatibleLanguageOptions,
1574
- stream: boolean,
1575
- warnings: WarningCollector,
1576
- ): OpenAICompatibleChatRequest {
1577
- const isReasoningModel = isOpenAIReasoningModel(modelId);
1578
- const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
1579
- const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
1580
- const fixedSampling = isFixedSamplingModel(modelId);
1581
- const dropSamplingParams = reasoningEnabled || fixedSampling;
1582
-
1583
- // OpenAI Chat Completions has no top_k surface (it's exposed only on the
1584
- // Responses API for some reasoning models). Quietly accepting it would
1585
- // mislead callers into thinking it took effect.
1586
- if (options.topK !== undefined) {
1587
- warnings.push({
1588
- type: "unsupported-setting",
1589
- provider: "openai",
1590
- setting: "topK",
1591
- details: "OpenAI Chat Completions does not expose top_k; the value was dropped.",
1592
- });
1593
- }
1594
-
1595
- // Reasoning models (o1 / o3 / o4) and models with fixed sampling params
1596
- // (Kimi K2.5) reject sampling params outright. Emit warnings at build time
1597
- // so callers see *why* the value didn't apply rather than a 400 from the API.
1598
- if (dropSamplingParams) {
1599
- const dropped: Array<[keyof typeof options, string]> = [
1600
- ["temperature", "temperature"],
1601
- ["topP", "top_p"],
1602
- ["presencePenalty", "presence_penalty"],
1603
- ["frequencyPenalty", "frequency_penalty"],
1604
- ];
1605
- for (const [key, openaiName] of dropped) {
1606
- if (options[key] !== undefined) {
1607
- warnings.push({
1608
- type: "unsupported-setting",
1609
- provider: "openai",
1610
- setting: key,
1611
- details: fixedSampling
1612
- ? `Dropped because this model uses fixed sampling parameters.`
1613
- : `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
1614
- });
1615
- }
1616
- }
1617
- }
1618
-
1619
- const body: OpenAICompatibleChatRequest = {
1620
- model: modelId,
1621
- messages: toOpenAICompatibleMessages(options.prompt),
1622
- ...(stream ? { stream: true, stream_options: { include_usage: true } } : {}),
1623
- ...(options.maxOutputTokens !== undefined
1624
- ? isNativeOpenAIModel(modelId)
1625
- ? { max_completion_tokens: options.maxOutputTokens }
1626
- : { max_tokens: options.maxOutputTokens }
1627
- : {}),
1628
- // Reasoning models and fixed-sampling models reject temperature / top_p /
1629
- // frequency / presence. Drop them rather than letting the API bounce.
1630
- ...(!dropSamplingParams && options.temperature !== undefined
1631
- ? { temperature: options.temperature }
1632
- : {}),
1633
- ...(!dropSamplingParams && options.topP !== undefined ? { top_p: options.topP } : {}),
1634
- ...(options.stopSequences && options.stopSequences.length > 0
1635
- ? { stop: options.stopSequences }
1636
- : {}),
1637
- ...(toOpenAICompatibleTools(options.tools)
1638
- ? { tools: toOpenAICompatibleTools(options.tools) }
1639
- : {}),
1640
- ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
1641
- ...(options.seed !== undefined ? { seed: options.seed } : {}),
1642
- ...(!dropSamplingParams && options.presencePenalty !== undefined
1643
- ? { presence_penalty: options.presencePenalty }
1644
- : {}),
1645
- ...(!dropSamplingParams && options.frequencyPenalty !== undefined
1646
- ? { frequency_penalty: options.frequencyPenalty }
1647
- : {}),
1648
- ...(reasoningEffort !== undefined ? { reasoning_effort: reasoningEffort } : {}),
1649
- ...(typeof options.userId === "string" && options.userId.length > 0
1650
- ? { user: options.userId }
1651
- : {}),
1652
- ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
1653
- ...(options.parallelToolCalls !== undefined
1654
- ? { parallel_tool_calls: options.parallelToolCalls }
1655
- : {}),
1656
- ...(options.responseFormat && options.responseFormat.type !== "text"
1657
- ? {
1658
- response_format: options.responseFormat.type === "json" ? { type: "json_object" } : {
1659
- type: "json_schema",
1660
- json_schema: {
1661
- name: options.responseFormat.name,
1662
- ...(typeof options.responseFormat.description === "string"
1663
- ? { description: options.responseFormat.description }
1664
- : {}),
1665
- schema: unwrapToolInputSchema(options.responseFormat.schema),
1666
- ...(options.responseFormat.strict !== undefined
1667
- ? { strict: options.responseFormat.strict }
1668
- : {}),
1669
- },
1670
- },
1671
- }
1672
- : {}),
1673
- };
1674
-
1675
- const providerOpts = readProviderOptions(options.providerOptions, "openai", providerName);
1676
-
1677
- // Normalize max_tokens → max_completion_tokens for native OpenAI models.
1678
- // Provider options can re-introduce max_tokens which newer models reject.
1679
- if (isNativeOpenAIModel(modelId) && "max_tokens" in providerOpts) {
1680
- if (!("max_completion_tokens" in providerOpts)) {
1681
- providerOpts.max_completion_tokens = providerOpts.max_tokens;
1682
- }
1683
- delete providerOpts.max_tokens;
1684
- }
1685
-
1686
- Object.assign(body, providerOpts);
1687
- return body;
1688
- }
1689
-
1690
1457
  function toGoogleContents(
1691
1458
  prompt: RuntimePromptMessage[],
1692
1459
  ): {
@@ -2178,224 +1945,23 @@ async function* streamGoogleCompatibleParts(
2178
1945
  };
2179
1946
  }
2180
1947
 
2181
- function extractFirstChoice(payload: unknown): OpenAICompatibleChoice | undefined {
2182
- const record = readRecord(payload);
2183
- const choices = record?.choices;
2184
- if (!Array.isArray(choices) || choices.length === 0) {
2185
- return undefined;
2186
- }
2187
-
2188
- const first = readRecord(choices[0]);
2189
- if (!first) {
2190
- return undefined;
2191
- }
2192
-
2193
- return first;
2194
- }
2195
-
2196
- function buildOpenAIGenerateResult(payload: unknown): {
2197
- content: Array<
2198
- { type: "text"; text: string } | {
2199
- type: "tool-call";
2200
- toolCallId: string;
2201
- toolName: string;
2202
- input: string;
2203
- }
2204
- >;
2205
- finishReason?: string | { unified: string; raw: string } | null;
2206
- usage?: RuntimeUsage;
2207
- } {
2208
- const choice = extractFirstChoice(payload);
2209
- const message = readRecord(choice?.message);
2210
- const text = extractOpenAIContentText(message?.content);
2211
- const toolCalls = message ? extractOpenAIToolCalls(message) : [];
2212
-
2213
- return {
2214
- content: [
2215
- ...(text.length > 0 ? [{ type: "text" as const, text }] : []),
2216
- ...toolCalls.map((toolCall) => ({
2217
- type: "tool-call" as const,
2218
- toolCallId: toolCall.toolCallId,
2219
- toolName: toolCall.toolName,
2220
- input: toolCall.input,
2221
- })),
2222
- ],
2223
- finishReason: normalizeOpenAIFinishReason(choice?.finish_reason),
2224
- usage: extractOpenAIUsage(payload),
2225
- };
2226
- }
2227
-
2228
- async function* streamOpenAICompatibleParts(
2229
- stream: ReadableStream<Uint8Array>,
2230
- ): AsyncIterable<unknown> {
2231
- const decoder = new TextDecoder();
2232
- let buffer = "";
2233
- const toolCalls = new Map<number, OpenAIStreamToolCallState>();
2234
- let reasoningId: string | null = null;
2235
- let reasoningIndex = 0;
2236
- let finishReason: string | { unified: string; raw: string } | null = null;
2237
- let usage: RuntimeUsage | undefined;
2238
-
2239
- for await (const chunk of stream) {
2240
- buffer += decoder.decode(chunk, { stream: true });
2241
- const parsed = parseSseChunk(buffer);
2242
- buffer = parsed.remainder;
2243
-
2244
- for (const event of parsed.events) {
2245
- if (event === "[DONE]") {
2246
- continue;
2247
- }
2248
-
2249
- const record = readRecord(event);
2250
- usage = extractOpenAIUsage(record) ?? usage;
2251
- const choice = extractFirstChoice(record);
2252
- if (!choice) {
2253
- continue;
2254
- }
2255
-
2256
- const delta = readRecord(choice.delta);
2257
- if (typeof delta?.reasoning_content === "string" && delta.reasoning_content.length > 0) {
2258
- if (!reasoningId) {
2259
- reasoningId = `reasoning-${reasoningIndex++}`;
2260
- yield {
2261
- type: "reasoning-start",
2262
- id: reasoningId,
2263
- };
2264
- }
2265
-
2266
- yield {
2267
- type: "reasoning-delta",
2268
- id: reasoningId,
2269
- delta: delta.reasoning_content,
2270
- };
2271
- }
2272
-
2273
- const textDelta = extractOpenAIContentText(delta?.content);
2274
- if (textDelta.length > 0) {
2275
- if (reasoningId) {
2276
- yield {
2277
- type: "reasoning-end",
2278
- id: reasoningId,
2279
- };
2280
- reasoningId = null;
2281
- }
2282
- yield { type: "text-delta", delta: textDelta };
2283
- }
2284
-
2285
- const rawToolCalls = Array.isArray(delta?.tool_calls) ? delta.tool_calls : [];
2286
- for (const rawToolCall of rawToolCalls) {
2287
- if (reasoningId) {
2288
- yield {
2289
- type: "reasoning-end",
2290
- id: reasoningId,
2291
- };
2292
- reasoningId = null;
2293
- }
2294
-
2295
- const toolCallRecord = readRecord(rawToolCall);
2296
- const index = typeof toolCallRecord?.index === "number" ? toolCallRecord.index : 0;
2297
- const current = toolCalls.get(index) ?? {
2298
- id: typeof toolCallRecord?.id === "string" ? toolCallRecord.id : `tool-${index}`,
2299
- name: "",
2300
- arguments: "",
2301
- started: false,
2302
- };
2303
-
2304
- if (typeof toolCallRecord?.id === "string") {
2305
- current.id = toolCallRecord.id;
2306
- }
2307
-
2308
- const fn = readRecord(toolCallRecord?.function);
2309
- if (typeof fn?.name === "string") {
2310
- current.name = fn.name;
2311
- }
2312
-
2313
- if (!current.started && current.name.length > 0) {
2314
- current.started = true;
2315
- yield {
2316
- type: "tool-input-start",
2317
- id: current.id,
2318
- toolName: current.name,
2319
- };
2320
- }
2321
-
2322
- if (typeof fn?.arguments === "string" && fn.arguments.length > 0) {
2323
- current.arguments += fn.arguments;
2324
- yield {
2325
- type: "tool-input-delta",
2326
- id: current.id,
2327
- delta: fn.arguments,
2328
- };
2329
- }
2330
-
2331
- toolCalls.set(index, current);
2332
- }
2333
-
2334
- const normalizedFinishReason = normalizeOpenAIFinishReason(choice.finish_reason);
2335
- if (normalizedFinishReason) {
2336
- finishReason = normalizedFinishReason;
2337
- }
2338
- }
2339
- }
2340
-
2341
- if (buffer.trim().length > 0) {
2342
- const parsed = parseSseChunk(`${buffer}\n\n`);
2343
- for (const event of parsed.events) {
2344
- if (event === "[DONE]") {
2345
- continue;
2346
- }
2347
-
2348
- const record = readRecord(event);
2349
- usage = extractOpenAIUsage(record) ?? usage;
2350
- }
2351
- }
2352
-
2353
- if (reasoningId) {
2354
- yield {
2355
- type: "reasoning-end",
2356
- id: reasoningId,
2357
- };
2358
- }
2359
-
2360
- if (
2361
- finishReason &&
2362
- typeof finishReason === "object" &&
2363
- finishReason.unified === "tool-calls"
2364
- ) {
2365
- for (const toolCall of toolCalls.values()) {
2366
- yield {
2367
- type: "tool-call",
2368
- toolCallId: toolCall.id,
2369
- toolName: toolCall.name,
2370
- input: toolCall.arguments,
2371
- };
2372
- }
2373
- }
2374
-
2375
- yield {
2376
- type: "finish",
2377
- finishReason,
2378
- ...(usage ? { usage } : {}),
2379
- };
2380
- }
2381
-
2382
- export function createOpenAIModelRuntime(
2383
- config: OpenAIRuntimeConfig,
1948
+ export function createAnthropicModelRuntime(
1949
+ config: AnthropicRuntimeConfig,
2384
1950
  modelId: string,
2385
1951
  ): ModelRuntime {
2386
1952
  const fetchImpl = config.fetch ?? globalThis.fetch;
2387
1953
  return {
2388
- provider: config.name ?? "openai",
1954
+ provider: config.name ?? "anthropic",
2389
1955
  modelId,
2390
1956
  specificationVersion: "v3",
2391
1957
  supportedUrls: {},
2392
1958
  doGenerate(optionsForRuntime: unknown) {
2393
1959
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
2394
- const url = getOpenAIChatCompletionsUrl(config.baseURL);
1960
+ const url = getAnthropicMessagesUrl(config.baseURL);
2395
1961
  const warnings = createWarningCollector();
2396
- const body = buildOpenAIChatRequest(
1962
+ const body = buildAnthropicMessagesRequest(
2397
1963
  modelId,
2398
- config.name ?? "openai",
1964
+ config.name ?? "anthropic",
2399
1965
  options,
2400
1966
  false,
2401
1967
  warnings,
@@ -2403,10 +1969,11 @@ export function createOpenAIModelRuntime(
2403
1969
  return requestJson({
2404
1970
  url,
2405
1971
  fetchImpl,
2406
- providerLabel: config.name ?? "openai",
2407
- providerKind: "openai",
2408
- init: createOpenAIRequestInit({
1972
+ providerLabel: config.name ?? "anthropic",
1973
+ providerKind: "anthropic",
1974
+ init: createAnthropicRequestInit({
2409
1975
  apiKey: config.apiKey,
1976
+ authToken: config.authToken,
2410
1977
  extraHeaders: options.headers,
2411
1978
  body: JSON.stringify(body),
2412
1979
  signal: options.abortSignal,
@@ -2414,689 +1981,18 @@ export function createOpenAIModelRuntime(
2414
1981
  }).then((payload) => {
2415
1982
  const drained = warnings.drain();
2416
1983
  return {
2417
- ...buildOpenAIGenerateResult(payload),
1984
+ ...buildAnthropicGenerateResult(payload),
2418
1985
  ...(drained.length > 0 ? { warnings: drained } : {}),
2419
1986
  };
2420
1987
  });
2421
1988
  },
2422
1989
  doStream(optionsForRuntime: unknown) {
2423
1990
  const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
2424
- const url = getOpenAIChatCompletionsUrl(config.baseURL);
1991
+ const url = getAnthropicMessagesUrl(config.baseURL);
2425
1992
  const warnings = createWarningCollector();
2426
- const body = buildOpenAIChatRequest(
1993
+ const body = buildAnthropicMessagesRequest(
2427
1994
  modelId,
2428
- config.name ?? "openai",
2429
- options,
2430
- true,
2431
- warnings,
2432
- );
2433
- return requestStream({
2434
- url,
2435
- fetchImpl,
2436
- providerLabel: config.name ?? "openai",
2437
- providerKind: "openai",
2438
- init: createOpenAIRequestInit({
2439
- apiKey: config.apiKey,
2440
- extraHeaders: options.headers,
2441
- body: JSON.stringify(body),
2442
- signal: options.abortSignal,
2443
- }),
2444
- }).then((responseStream) => {
2445
- const drained = warnings.drain();
2446
- return {
2447
- stream: ReadableStream.from(
2448
- withToolInputStatusTransitions(streamOpenAICompatibleParts(responseStream)),
2449
- ),
2450
- ...(drained.length > 0 ? { warnings: drained } : {}),
2451
- };
2452
- });
2453
- },
2454
- };
2455
- }
2456
-
2457
- // =============================================================================
2458
- // OpenAI Responses API runtime (#1077, deferred from #1052 C4)
2459
- // =============================================================================
2460
- //
2461
- // The Responses API (/v1/responses) is a different surface than Chat
2462
- // Completions. Same provider, different request shape, different streaming
2463
- // event grammar, different response shape, and different reasoning-summary
2464
- // surface. This runtime is parallel to createOpenAIModelRuntime so each
2465
- // path stays focused on one wire format.
2466
- //
2467
- // Why parallel runtimes instead of a flag? See the rationale in #1077.
2468
- //
2469
- // docs: https://platform.openai.com/docs/api-reference/responses
2470
-
2471
- type OpenAIResponsesInputItem = Record<string, unknown>;
2472
-
2473
- type OpenAIResponsesRequest = {
2474
- model: string;
2475
- input: OpenAIResponsesInputItem[];
2476
- instructions?: string;
2477
- stream?: boolean;
2478
- max_output_tokens?: number;
2479
- temperature?: number;
2480
- top_p?: number;
2481
- tools?: Array<Record<string, unknown>>;
2482
- tool_choice?: unknown;
2483
- reasoning?: { effort?: string; summary?: string };
2484
- metadata?: Record<string, string>;
2485
- user?: string;
2486
- service_tier?: string;
2487
- parallel_tool_calls?: boolean;
2488
- text?: { format: Record<string, unknown> };
2489
- [key: string]: unknown;
2490
- };
2491
-
2492
- /**
2493
- * Convert the unified RuntimePromptMessage[] to the Responses API `input`
2494
- * array shape. Differences from Chat Completions:
2495
- * - System prompts go on the top-level `instructions` field, not inline.
2496
- * - Content parts use `input_text` / `output_text` discriminants instead
2497
- * of the Chat Completions plain-text shorthand.
2498
- * - Assistant tool calls become standalone `function_call` items in the
2499
- * input array, not nested `tool_calls` on a message.
2500
- * - Tool results become standalone `function_call_output` items.
2501
- * - Reasoning content parts roundtrip as `reasoning` items so callers can
2502
- * replay multi-turn conversations with chain-of-thought intact.
2503
- */
2504
- function toOpenAIResponsesInput(
2505
- prompt: RuntimePromptMessage[],
2506
- ): { instructions?: string; input: OpenAIResponsesInputItem[] } {
2507
- const instructionsParts: string[] = [];
2508
- const input: OpenAIResponsesInputItem[] = [];
2509
-
2510
- for (const message of prompt) {
2511
- switch (message.role) {
2512
- case "system":
2513
- if (message.content.length > 0) {
2514
- instructionsParts.push(message.content);
2515
- }
2516
- break;
2517
- case "user":
2518
- input.push({
2519
- role: "user",
2520
- content: [{ type: "input_text", text: readTextParts(message.content) }],
2521
- });
2522
- break;
2523
- case "assistant": {
2524
- const messageContent: Array<Record<string, unknown>> = [];
2525
- for (const part of message.content) {
2526
- if (part.type === "text") {
2527
- messageContent.push({ type: "output_text", text: part.text });
2528
- continue;
2529
- }
2530
- if (part.type === "reasoning") {
2531
- // Reasoning items are top-level entries in the input array,
2532
- // not nested inside the assistant message — flush whatever
2533
- // text we've accumulated first, then push the reasoning item.
2534
- if (messageContent.length > 0) {
2535
- input.push({ role: "assistant", content: [...messageContent] });
2536
- messageContent.length = 0;
2537
- }
2538
- const summary: Array<Record<string, unknown>> = [];
2539
- if (typeof part.text === "string" && part.text.length > 0) {
2540
- summary.push({ type: "summary_text", text: part.text });
2541
- }
2542
- input.push({
2543
- type: "reasoning",
2544
- ...(typeof part.signature === "string" ? { encrypted_content: part.signature } : {}),
2545
- summary,
2546
- });
2547
- continue;
2548
- }
2549
- // tool-call: flush message content, then push as standalone
2550
- // function_call item per Responses API shape.
2551
- if (messageContent.length > 0) {
2552
- input.push({ role: "assistant", content: [...messageContent] });
2553
- messageContent.length = 0;
2554
- }
2555
- input.push({
2556
- type: "function_call",
2557
- call_id: part.toolCallId,
2558
- name: part.toolName,
2559
- arguments: stringifyJsonValue(part.input),
2560
- });
2561
- }
2562
- if (messageContent.length > 0) {
2563
- input.push({ role: "assistant", content: messageContent });
2564
- }
2565
- break;
2566
- }
2567
- case "tool":
2568
- for (const part of message.content) {
2569
- input.push({
2570
- type: "function_call_output",
2571
- call_id: part.toolCallId,
2572
- output: stringifyJsonValue(part.output.value),
2573
- });
2574
- }
2575
- break;
2576
- }
2577
- }
2578
-
2579
- return {
2580
- ...(instructionsParts.length > 0 ? { instructions: instructionsParts.join("\n\n") } : {}),
2581
- input,
2582
- };
2583
- }
2584
-
2585
- /**
2586
- * Tools on the Responses API differ from Chat Completions: instead of
2587
- * `{ type: "function", function: { name, parameters } }` the function
2588
- * shape lifts the name/parameters/strict to the top of the entry. Native
2589
- * tools (web_search, file_search, computer_use, code_interpreter) live
2590
- * alongside function tools in the same array.
2591
- */
2592
- function toOpenAIResponsesTools(
2593
- tools: RuntimeToolDefinition[] | undefined,
2594
- ): Array<Record<string, unknown>> | undefined {
2595
- if (!tools) return undefined;
2596
- const normalized: Array<Record<string, unknown>> = [];
2597
- for (const tool of tools) {
2598
- if (tool.type === "function") {
2599
- normalized.push({
2600
- type: "function",
2601
- name: tool.name,
2602
- ...(typeof tool.description === "string" ? { description: tool.description } : {}),
2603
- parameters: unwrapToolInputSchema(tool.inputSchema),
2604
- });
2605
- continue;
2606
- }
2607
- if (!tool.id.startsWith("openai.")) continue;
2608
- const providerType = tool.id.slice("openai.".length);
2609
- if (providerType.length === 0) continue;
2610
- normalized.push({
2611
- type: providerType,
2612
- ...toSnakeCaseRecord(tool.args),
2613
- });
2614
- }
2615
- return normalized.length > 0 ? normalized : undefined;
2616
- }
2617
-
2618
- function buildOpenAIResponsesRequest(
2619
- modelId: string,
2620
- providerName: string,
2621
- options: OpenAICompatibleLanguageOptions,
2622
- stream: boolean,
2623
- warnings: WarningCollector,
2624
- ): OpenAIResponsesRequest {
2625
- const isReasoningModel = isOpenAIReasoningModel(modelId);
2626
- const reasoningEffort = resolveOpenAIReasoningEffort(options.reasoning);
2627
- const reasoningEnabled = isReasoningModel || reasoningEffort !== undefined;
2628
-
2629
- // Same param-sanitization rules as Chat Completions: reasoning models
2630
- // reject sampling params. Drop with a warning.
2631
- if (options.topK !== undefined) {
2632
- warnings.push({
2633
- type: "unsupported-setting",
2634
- provider: "openai",
2635
- setting: "topK",
2636
- details: "OpenAI Responses API does not expose top_k; the value was dropped.",
2637
- });
2638
- }
2639
- if (reasoningEnabled) {
2640
- const dropped: Array<[keyof typeof options, string]> = [
2641
- ["temperature", "temperature"],
2642
- ["topP", "top_p"],
2643
- ["presencePenalty", "presence_penalty"],
2644
- ["frequencyPenalty", "frequency_penalty"],
2645
- ];
2646
- for (const [key, openaiName] of dropped) {
2647
- if (options[key] !== undefined) {
2648
- warnings.push({
2649
- type: "unsupported-setting",
2650
- provider: "openai",
2651
- setting: key,
2652
- details:
2653
- `Dropped because OpenAI reasoning models reject ${openaiName}. Reasoning was active for this request.`,
2654
- });
2655
- }
2656
- }
2657
- }
2658
-
2659
- const { instructions, input } = toOpenAIResponsesInput(options.prompt);
2660
- const responsesTools = toOpenAIResponsesTools(options.tools);
2661
-
2662
- const body: OpenAIResponsesRequest = {
2663
- model: modelId,
2664
- input,
2665
- ...(instructions !== undefined ? { instructions } : {}),
2666
- ...(stream ? { stream: true } : {}),
2667
- ...(options.maxOutputTokens !== undefined
2668
- ? { max_output_tokens: options.maxOutputTokens }
2669
- : {}),
2670
- ...(!reasoningEnabled && options.temperature !== undefined
2671
- ? { temperature: options.temperature }
2672
- : {}),
2673
- ...(!reasoningEnabled && options.topP !== undefined ? { top_p: options.topP } : {}),
2674
- ...(responsesTools ? { tools: responsesTools } : {}),
2675
- ...(options.toolChoice !== undefined ? { tool_choice: options.toolChoice } : {}),
2676
- // The Responses API surfaces reasoning effort + summary verbosity
2677
- // in a structured `reasoning` object instead of a flat field. We
2678
- // request "auto" summary so callers see structured summary parts
2679
- // without having to opt into them per request.
2680
- ...(reasoningEffort !== undefined
2681
- ? { reasoning: { effort: reasoningEffort, summary: "auto" } }
2682
- : {}),
2683
- ...(typeof options.userId === "string" && options.userId.length > 0
2684
- ? { user: options.userId }
2685
- : {}),
2686
- ...(options.serviceTier !== undefined ? { service_tier: options.serviceTier } : {}),
2687
- ...(options.parallelToolCalls !== undefined
2688
- ? { parallel_tool_calls: options.parallelToolCalls }
2689
- : {}),
2690
- // Responses API uses `text.format` instead of Chat Completions'
2691
- // `response_format`. The shape is similar but nested under `text`.
2692
- ...(options.responseFormat && options.responseFormat.type !== "text"
2693
- ? {
2694
- text: {
2695
- format: options.responseFormat.type === "json" ? { type: "json_object" } : {
2696
- type: "json_schema",
2697
- name: options.responseFormat.name,
2698
- ...(typeof options.responseFormat.description === "string"
2699
- ? { description: options.responseFormat.description }
2700
- : {}),
2701
- schema: unwrapToolInputSchema(options.responseFormat.schema),
2702
- ...(options.responseFormat.strict !== undefined
2703
- ? { strict: options.responseFormat.strict }
2704
- : {}),
2705
- },
2706
- },
2707
- }
2708
- : {}),
2709
- };
2710
-
2711
- Object.assign(body, readProviderOptions(options.providerOptions, "openai", providerName));
2712
- return body;
2713
- }
2714
-
2715
- type OpenAIResponsesContentPart =
2716
- | { type: "text"; text: string }
2717
- | {
2718
- type: "reasoning";
2719
- summaries?: Array<{ id?: string; text: string }>;
2720
- signature?: string;
2721
- }
2722
- | { type: "tool-call"; toolCallId: string; toolName: string; input: string };
2723
-
2724
- function buildOpenAIResponsesGenerateResult(payload: unknown): {
2725
- content: OpenAIResponsesContentPart[];
2726
- finishReason?: string | { unified: string; raw: string } | null;
2727
- usage?: RuntimeUsage;
2728
- } {
2729
- const record = readRecord(payload);
2730
- const output = Array.isArray(record?.output) ? record.output : [];
2731
- const content: OpenAIResponsesContentPart[] = [];
2732
-
2733
- for (const item of output) {
2734
- const itemRecord = readRecord(item);
2735
- const itemType = typeof itemRecord?.type === "string" ? itemRecord.type : undefined;
2736
-
2737
- if (itemType === "message" && Array.isArray(itemRecord?.content)) {
2738
- // A message item bundles one or more output_text parts. Concat
2739
- // their texts into a single text content entry.
2740
- let text = "";
2741
- for (const part of itemRecord.content) {
2742
- const p = readRecord(part);
2743
- if (typeof p?.type === "string" && p.type === "output_text" && typeof p.text === "string") {
2744
- text += p.text;
2745
- }
2746
- }
2747
- if (text.length > 0) {
2748
- content.push({ type: "text", text });
2749
- }
2750
- continue;
2751
- }
2752
-
2753
- if (itemType === "function_call") {
2754
- content.push({
2755
- type: "tool-call",
2756
- toolCallId: typeof itemRecord?.call_id === "string"
2757
- ? itemRecord.call_id
2758
- : (typeof itemRecord?.id === "string" ? itemRecord.id : ""),
2759
- toolName: typeof itemRecord?.name === "string" ? itemRecord.name : "",
2760
- input: typeof itemRecord?.arguments === "string"
2761
- ? itemRecord.arguments
2762
- : stringifyJsonValue(itemRecord?.arguments ?? {}),
2763
- });
2764
- continue;
2765
- }
2766
-
2767
- if (itemType === "reasoning") {
2768
- const summary = Array.isArray(itemRecord?.summary) ? itemRecord.summary : [];
2769
- const summaries: Array<{ id?: string; text: string }> = [];
2770
- for (const s of summary) {
2771
- const sr = readRecord(s);
2772
- if (typeof sr?.text === "string" && sr.text.length > 0) {
2773
- summaries.push({
2774
- ...(typeof sr?.id === "string" ? { id: sr.id } : {}),
2775
- text: sr.text,
2776
- });
2777
- }
2778
- }
2779
- content.push({
2780
- type: "reasoning",
2781
- ...(summaries.length > 0 ? { summaries } : {}),
2782
- ...(typeof itemRecord?.encrypted_content === "string"
2783
- ? { signature: itemRecord.encrypted_content }
2784
- : {}),
2785
- });
2786
- continue;
2787
- }
2788
- }
2789
-
2790
- return {
2791
- content,
2792
- finishReason: normalizeOpenAIResponsesFinishReason(record?.status),
2793
- usage: extractOpenAIResponsesUsage(payload),
2794
- };
2795
- }
2796
-
2797
- type OpenAIResponsesStreamReasoningState = {
2798
- id: string;
2799
- emittedStart: boolean;
2800
- };
2801
-
2802
- type OpenAIResponsesStreamFunctionCallState = {
2803
- id: string;
2804
- toolCallId: string;
2805
- name: string;
2806
- arguments: string;
2807
- };
2808
-
2809
- /**
2810
- * Parse the Responses API streaming event grammar into the same UI part
2811
- * shapes the existing OpenAI / Anthropic / Google streams emit. The
2812
- * Responses API uses a strict event-typed protocol — every event has a
2813
- * `type` field naming the lifecycle phase — instead of the loose
2814
- * `delta`-based shape Chat Completions uses.
2815
- */
2816
- async function* streamOpenAIResponsesParts(
2817
- stream: ReadableStream<Uint8Array>,
2818
- ): AsyncIterable<unknown> {
2819
- const decoder = new TextDecoder();
2820
- let buffer = "";
2821
- const reasoningBlocks = new Map<string, OpenAIResponsesStreamReasoningState>();
2822
- const functionCalls = new Map<string, OpenAIResponsesStreamFunctionCallState>();
2823
- const startedToolCalls = new Set<string>();
2824
- let finishReason: string | { unified: string; raw: string } | null = null;
2825
- let usage: RuntimeUsage | undefined;
2826
- let reasoningCounter = 0;
2827
-
2828
- for await (const chunk of stream) {
2829
- buffer += decoder.decode(chunk, { stream: true });
2830
- const parsed = parseSseChunk(buffer);
2831
- buffer = parsed.remainder;
2832
-
2833
- for (const event of parsed.events) {
2834
- if (event === "[DONE]") continue;
2835
- const record = readRecord(event);
2836
- const type = typeof record?.type === "string" ? record.type : undefined;
2837
- if (!type) continue;
2838
-
2839
- // response.output_item.added: a new output item begins. Track
2840
- // function_call items so their argument deltas can be attributed,
2841
- // and reasoning items so summary deltas can group correctly.
2842
- if (type === "response.output_item.added") {
2843
- const item = readRecord(record?.item);
2844
- const itemType = typeof item?.type === "string" ? item.type : undefined;
2845
- const itemId = typeof item?.id === "string" ? item.id : undefined;
2846
- if (itemType === "function_call" && itemId) {
2847
- const callId = typeof item?.call_id === "string" ? item.call_id : itemId;
2848
- const name = typeof item?.name === "string" ? item.name : "";
2849
- functionCalls.set(itemId, {
2850
- id: itemId,
2851
- toolCallId: callId,
2852
- name,
2853
- arguments: "",
2854
- });
2855
- }
2856
- if (itemType === "reasoning" && itemId) {
2857
- reasoningBlocks.set(itemId, {
2858
- id: `reasoning-${reasoningCounter++}`,
2859
- emittedStart: false,
2860
- });
2861
- }
2862
- continue;
2863
- }
2864
-
2865
- // response.output_text.delta: text chunk for a message item.
2866
- if (type === "response.output_text.delta" && typeof record?.delta === "string") {
2867
- if (record.delta.length > 0) {
2868
- yield { type: "text-delta", delta: record.delta };
2869
- }
2870
- continue;
2871
- }
2872
-
2873
- // response.reasoning_summary_text.delta: reasoning summary text
2874
- // chunk. The first delta on an item lazily emits the
2875
- // reasoning-start event so callers can group deltas into a part.
2876
- if (type === "response.reasoning_summary_text.delta" && typeof record?.delta === "string") {
2877
- const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2878
- const state = itemId ? reasoningBlocks.get(itemId) : undefined;
2879
- if (state && record.delta.length > 0) {
2880
- if (!state.emittedStart) {
2881
- yield { type: "reasoning-start", id: state.id };
2882
- state.emittedStart = true;
2883
- }
2884
- yield { type: "reasoning-delta", id: state.id, delta: record.delta };
2885
- }
2886
- continue;
2887
- }
2888
-
2889
- // response.function_call_arguments.delta: tool call argument
2890
- // chunk. The first delta lazily emits tool-input-start.
2891
- if (type === "response.function_call_arguments.delta" && typeof record?.delta === "string") {
2892
- const itemId = typeof record?.item_id === "string" ? record.item_id : undefined;
2893
- const state = itemId ? functionCalls.get(itemId) : undefined;
2894
- if (state && record.delta.length > 0) {
2895
- if (!startedToolCalls.has(state.id)) {
2896
- yield {
2897
- type: "tool-input-start",
2898
- id: state.toolCallId,
2899
- toolName: state.name,
2900
- };
2901
- startedToolCalls.add(state.id);
2902
- }
2903
- state.arguments += record.delta;
2904
- yield {
2905
- type: "tool-input-delta",
2906
- id: state.toolCallId,
2907
- delta: record.delta,
2908
- };
2909
- }
2910
- continue;
2911
- }
2912
-
2913
- // response.output_item.done: an item has finished emitting deltas.
2914
- // Close any reasoning or function-call streams that were open.
2915
- if (type === "response.output_item.done") {
2916
- const item = readRecord(record?.item);
2917
- const itemType = typeof item?.type === "string" ? item.type : undefined;
2918
- const itemId = typeof item?.id === "string" ? item.id : undefined;
2919
- if (itemType === "reasoning" && itemId) {
2920
- const state = reasoningBlocks.get(itemId);
2921
- if (state?.emittedStart) {
2922
- yield { type: "reasoning-end", id: state.id };
2923
- }
2924
- reasoningBlocks.delete(itemId);
2925
- }
2926
- if (itemType === "function_call" && itemId) {
2927
- const state = functionCalls.get(itemId);
2928
- if (state) {
2929
- yield {
2930
- type: "tool-call",
2931
- toolCallId: state.toolCallId,
2932
- toolName: state.name,
2933
- input: state.arguments,
2934
- };
2935
- }
2936
- functionCalls.delete(itemId);
2937
- }
2938
- continue;
2939
- }
2940
-
2941
- // response.completed: terminal event with the final response object
2942
- // (status + usage). Capture both for the final finish part.
2943
- if (type === "response.completed") {
2944
- usage = extractOpenAIResponsesUsage(record) ?? usage;
2945
- const responseRecord = readRecord(record?.response);
2946
- finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status);
2947
- continue;
2948
- }
2949
-
2950
- if (type === "response.failed" || type === "response.incomplete") {
2951
- const responseRecord = readRecord(record?.response);
2952
- finishReason = normalizeOpenAIResponsesFinishReason(responseRecord?.status) ??
2953
- (type === "response.failed"
2954
- ? { unified: "error", raw: "failed" }
2955
- : { unified: "length", raw: "incomplete" });
2956
- usage = extractOpenAIResponsesUsage(record) ?? usage;
2957
- continue;
2958
- }
2959
- }
2960
- }
2961
-
2962
- // Close any reasoning streams still open at end-of-stream (defensive
2963
- // — a clean Responses API stream always closes them via output_item.done).
2964
- for (const state of reasoningBlocks.values()) {
2965
- if (state.emittedStart) {
2966
- yield { type: "reasoning-end", id: state.id };
2967
- }
2968
- }
2969
-
2970
- yield {
2971
- type: "finish",
2972
- finishReason,
2973
- ...(usage ? { usage } : {}),
2974
- };
2975
- }
2976
-
2977
- export function createOpenAIResponsesRuntime(
2978
- config: OpenAIRuntimeConfig,
2979
- modelId: string,
2980
- ): ModelRuntime {
2981
- const fetchImpl = config.fetch ?? globalThis.fetch;
2982
- return {
2983
- provider: config.name ?? "openai",
2984
- modelId,
2985
- specificationVersion: "v3",
2986
- supportedUrls: {},
2987
- doGenerate(optionsForRuntime: unknown) {
2988
- const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
2989
- const url = getOpenAIResponsesUrl(config.baseURL);
2990
- const warnings = createWarningCollector();
2991
- const body = buildOpenAIResponsesRequest(
2992
- modelId,
2993
- config.name ?? "openai",
2994
- options,
2995
- false,
2996
- warnings,
2997
- );
2998
- return requestJson({
2999
- url,
3000
- fetchImpl,
3001
- providerLabel: config.name ?? "openai",
3002
- providerKind: "openai",
3003
- init: createOpenAIRequestInit({
3004
- apiKey: config.apiKey,
3005
- extraHeaders: options.headers,
3006
- body: JSON.stringify(body),
3007
- signal: options.abortSignal,
3008
- }),
3009
- }).then((payload) => {
3010
- const drained = warnings.drain();
3011
- return {
3012
- ...buildOpenAIResponsesGenerateResult(payload),
3013
- ...(drained.length > 0 ? { warnings: drained } : {}),
3014
- };
3015
- });
3016
- },
3017
- doStream(optionsForRuntime: unknown) {
3018
- const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3019
- const url = getOpenAIResponsesUrl(config.baseURL);
3020
- const warnings = createWarningCollector();
3021
- const body = buildOpenAIResponsesRequest(
3022
- modelId,
3023
- config.name ?? "openai",
3024
- options,
3025
- true,
3026
- warnings,
3027
- );
3028
- return requestStream({
3029
- url,
3030
- fetchImpl,
3031
- providerLabel: config.name ?? "openai",
3032
- providerKind: "openai",
3033
- init: createOpenAIRequestInit({
3034
- apiKey: config.apiKey,
3035
- extraHeaders: options.headers,
3036
- body: JSON.stringify(body),
3037
- signal: options.abortSignal,
3038
- }),
3039
- }).then((responseStream) => {
3040
- const drained = warnings.drain();
3041
- return {
3042
- stream: ReadableStream.from(
3043
- withToolInputStatusTransitions(streamOpenAIResponsesParts(responseStream)),
3044
- ),
3045
- ...(drained.length > 0 ? { warnings: drained } : {}),
3046
- };
3047
- });
3048
- },
3049
- };
3050
- }
3051
-
3052
- export function createAnthropicModelRuntime(
3053
- config: AnthropicRuntimeConfig,
3054
- modelId: string,
3055
- ): ModelRuntime {
3056
- const fetchImpl = config.fetch ?? globalThis.fetch;
3057
- return {
3058
- provider: config.name ?? "anthropic",
3059
- modelId,
3060
- specificationVersion: "v3",
3061
- supportedUrls: {},
3062
- doGenerate(optionsForRuntime: unknown) {
3063
- const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3064
- const url = getAnthropicMessagesUrl(config.baseURL);
3065
- const warnings = createWarningCollector();
3066
- const body = buildAnthropicMessagesRequest(
3067
- modelId,
3068
- config.name ?? "anthropic",
3069
- options,
3070
- false,
3071
- warnings,
3072
- );
3073
- return requestJson({
3074
- url,
3075
- fetchImpl,
3076
- providerLabel: config.name ?? "anthropic",
3077
- providerKind: "anthropic",
3078
- init: createAnthropicRequestInit({
3079
- apiKey: config.apiKey,
3080
- authToken: config.authToken,
3081
- extraHeaders: options.headers,
3082
- body: JSON.stringify(body),
3083
- signal: options.abortSignal,
3084
- }),
3085
- }).then((payload) => {
3086
- const drained = warnings.drain();
3087
- return {
3088
- ...buildAnthropicGenerateResult(payload),
3089
- ...(drained.length > 0 ? { warnings: drained } : {}),
3090
- };
3091
- });
3092
- },
3093
- doStream(optionsForRuntime: unknown) {
3094
- const options = optionsForRuntime as OpenAICompatibleLanguageOptions;
3095
- const url = getAnthropicMessagesUrl(config.baseURL);
3096
- const warnings = createWarningCollector();
3097
- const body = buildAnthropicMessagesRequest(
3098
- modelId,
3099
- config.name ?? "anthropic",
1995
+ config.name ?? "anthropic",
3100
1996
  options,
3101
1997
  true,
3102
1998
  warnings,
@@ -3198,50 +2094,6 @@ export function createGoogleModelRuntime(
3198
2094
  };
3199
2095
  }
3200
2096
 
3201
- export function createOpenAIEmbeddingRuntime(
3202
- config: OpenAIRuntimeConfig,
3203
- modelId: string,
3204
- ): EmbeddingRuntime {
3205
- const fetchImpl = config.fetch ?? globalThis.fetch;
3206
- return {
3207
- provider: config.name ?? "openai",
3208
- modelId,
3209
- supportsParallelCalls: true,
3210
- doEmbed({ values, abortSignal }) {
3211
- if (values.length === 0) {
3212
- return Promise.resolve({
3213
- embeddings: [],
3214
- warnings: [],
3215
- rawResponse: { data: [] },
3216
- });
3217
- }
3218
-
3219
- const url = getOpenAIEmbeddingUrl(config.baseURL);
3220
- return requestJson({
3221
- url,
3222
- fetchImpl,
3223
- providerLabel: config.name ?? "openai",
3224
- providerKind: "openai",
3225
- init: createOpenAIRequestInit({
3226
- apiKey: config.apiKey,
3227
- body: JSON.stringify({
3228
- model: modelId,
3229
- input: values,
3230
- }),
3231
- signal: abortSignal,
3232
- }),
3233
- }).then((payload) => ({
3234
- embeddings: extractOpenAIEmbeddings(payload),
3235
- usage: {
3236
- tokens: extractOpenAIUsageTokens(payload),
3237
- },
3238
- rawResponse: payload,
3239
- warnings: [],
3240
- }));
3241
- },
3242
- };
3243
- }
3244
-
3245
2097
  export function createGoogleEmbeddingRuntime(
3246
2098
  config: GoogleRuntimeConfig,
3247
2099
  modelId: string,