extrait 0.7.3 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ export { DEFAULT_EXTRACTION_HEURISTICS, extractJsonCandidates } from "./extract"
2
2
  export { DEFAULT_SCHEMA_INSTRUCTION, formatPrompt, resolveSchemaInstruction, withFormat, type WithFormatOptions, } from "./format";
3
3
  export { extractFirstMarkdownCode, extractMarkdownCodeBlocks } from "./markdown";
4
4
  export { sanitizeThink } from "./think";
5
+ export { normalizeModelOutput, withoutTrailingThinkTagPrefix } from "./generate-output";
5
6
  export { createLLM, type CreateLLMOptions, type LLMClient } from "./llm";
6
7
  export { generate } from "./generate";
7
8
  export { formatZodIssues, parseLLMOutput } from "./parse";
package/dist/index.js CHANGED
@@ -1077,6 +1077,110 @@ function countHiddenChars(value) {
1077
1077
  function maskKeepingLineBreaks(value) {
1078
1078
  return value.replace(RE_NON_LINE_BREAK, " ");
1079
1079
  }
1080
+ // src/generate-output.ts
1081
+ var RE_THINK_TAGS = /<\/?think\s*>/gi;
1082
+ function normalizeModelOutput(text, dedicatedReasoning, reasoningBlocks) {
1083
+ const sanitized = sanitizeThink(text);
1084
+ const visibleText = stripThinkBlocks(text, sanitized.thinkBlocks);
1085
+ const reasoning = joinReasoningSegments([
1086
+ sanitizeReasoningText(dedicatedReasoning),
1087
+ ...sanitized.thinkBlocks.map((block) => block.content)
1088
+ ]);
1089
+ return {
1090
+ text: visibleText,
1091
+ reasoning,
1092
+ reasoningBlocks: normalizeReasoningBlocks(reasoningBlocks),
1093
+ thinkBlocks: sanitized.thinkBlocks,
1094
+ parseSource: composeParseSource(visibleText, reasoning)
1095
+ };
1096
+ }
1097
+ function normalizeReasoningBlocks(blocks) {
1098
+ if (!Array.isArray(blocks)) {
1099
+ return;
1100
+ }
1101
+ const normalized = blocks.map((block) => ({
1102
+ turnIndex: block.turnIndex,
1103
+ text: block.text.replace(RE_THINK_TAGS, "").trim()
1104
+ })).filter((block) => Number.isFinite(block.turnIndex) && block.text.length > 0);
1105
+ return normalized.length > 0 ? normalized : undefined;
1106
+ }
1107
+ function appendReasoningBlock(blocks, transition) {
1108
+ const text = transition.reasoningText?.replace(RE_THINK_TAGS, "").trim();
1109
+ if (!text) {
1110
+ return blocks;
1111
+ }
1112
+ const next = [...blocks ?? [], { turnIndex: transition.turnIndex, text }];
1113
+ return normalizeReasoningBlocks(next);
1114
+ }
1115
+ function composeParseSource(text, reasoning) {
1116
+ if (typeof reasoning !== "string" || reasoning.length === 0) {
1117
+ return text;
1118
+ }
1119
+ const sanitized = reasoning.replace(RE_THINK_TAGS, "");
1120
+ if (sanitized.length === 0) {
1121
+ return text;
1122
+ }
1123
+ return `<think>${sanitized}</think>${text}`;
1124
+ }
1125
+ function aggregateUsage(attempts) {
1126
+ let usage;
1127
+ for (const attempt of attempts) {
1128
+ usage = mergeUsage(usage, attempt.usage);
1129
+ }
1130
+ return usage;
1131
+ }
1132
+ function mergeUsage(base, next) {
1133
+ if (!base && !next) {
1134
+ return;
1135
+ }
1136
+ return {
1137
+ inputTokens: (base?.inputTokens ?? 0) + (next?.inputTokens ?? 0),
1138
+ outputTokens: (base?.outputTokens ?? 0) + (next?.outputTokens ?? 0),
1139
+ totalTokens: (base?.totalTokens ?? 0) + (next?.totalTokens ?? 0),
1140
+ cost: (base?.cost ?? 0) + (next?.cost ?? 0)
1141
+ };
1142
+ }
1143
+ function joinReasoningSegments(parts) {
1144
+ return parts.map((value) => value?.trim()).filter((value) => Boolean(value)).join(`
1145
+
1146
+ `);
1147
+ }
1148
+ function sanitizeReasoningText(value) {
1149
+ const sanitized = value?.replace(RE_THINK_TAGS, "").trim();
1150
+ return sanitized ? sanitized : undefined;
1151
+ }
1152
+ var THINK_TAG_VARIANTS = ["<think>", "</think>"];
1153
+ var MAX_THINK_TAG_PREFIX = Math.max(...THINK_TAG_VARIANTS.map((tag) => tag.length)) - 1;
1154
+ function withoutTrailingThinkTagPrefix(value) {
1155
+ const max = Math.min(value.length, MAX_THINK_TAG_PREFIX);
1156
+ for (let length = max;length > 0; length -= 1) {
1157
+ const suffix = value.slice(value.length - length);
1158
+ if (THINK_TAG_VARIANTS.some((tag) => tag.length > suffix.length && tag.startsWith(suffix))) {
1159
+ return value.slice(0, value.length - length);
1160
+ }
1161
+ }
1162
+ return value;
1163
+ }
1164
+ function stripThinkBlocks(text, thinkBlocks) {
1165
+ if (thinkBlocks.length === 0) {
1166
+ return text;
1167
+ }
1168
+ let output = "";
1169
+ let cursor = 0;
1170
+ for (const block of thinkBlocks) {
1171
+ output += text.slice(cursor, block.start);
1172
+ cursor = block.end;
1173
+ }
1174
+ output += text.slice(cursor);
1175
+ return output;
1176
+ }
1177
+ function toStreamDataFingerprint(value) {
1178
+ try {
1179
+ return JSON.stringify(value);
1180
+ } catch {
1181
+ return "__unserializable__";
1182
+ }
1183
+ }
1080
1184
  // src/providers/stream-utils.ts
1081
1185
  var RE_LINE_ENDING = /\r?\n/;
1082
1186
  async function consumeSSE(response, onEvent) {
@@ -1506,7 +1610,7 @@ function pickString(value) {
1506
1610
  function toFiniteNumber(value) {
1507
1611
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
1508
1612
  }
1509
- function mergeUsage(base, next) {
1613
+ function mergeUsage2(base, next) {
1510
1614
  if (!base && !next) {
1511
1615
  return;
1512
1616
  }
@@ -1631,6 +1735,8 @@ async function streamWithChatCompletionsPassThrough(options, fetcher, path, requ
1631
1735
  let reasoning = "";
1632
1736
  let usage;
1633
1737
  let finishReason;
1738
+ const streamedToolCalls = new Map;
1739
+ const nativeToolCalls = new NativeToolCallStreamState(requestDeclaresTools(options, request));
1634
1740
  await consumeSSE(response, (data) => {
1635
1741
  if (data === "[DONE]") {
1636
1742
  return;
@@ -1639,10 +1745,14 @@ async function streamWithChatCompletionsPassThrough(options, fetcher, path, requ
1639
1745
  if (!isRecord2(json)) {
1640
1746
  return;
1641
1747
  }
1642
- const delta = pickAssistantDelta(json);
1748
+ const rawDelta = pickAssistantDelta(json);
1643
1749
  const reasoningDelta = pickAssistantReasoningDelta(json);
1644
1750
  const chunkUsage = pickUsage(json);
1645
1751
  const chunkFinishReason = pickFinishReason(json);
1752
+ collectOpenAIStreamToolCalls(json, streamedToolCalls);
1753
+ const nativeDelta = nativeToolCalls.push(rawDelta);
1754
+ const delta = nativeDelta.textDelta;
1755
+ const chunkToolCalls = mergeToolCalls(buildOpenAIStreamToolCalls(streamedToolCalls), nativeDelta.toolCalls);
1646
1756
  usage = preferLatestUsage(usage, chunkUsage);
1647
1757
  if (chunkFinishReason) {
1648
1758
  finishReason = chunkFinishReason;
@@ -1654,13 +1764,21 @@ async function streamWithChatCompletionsPassThrough(options, fetcher, path, requ
1654
1764
  if (reasoningDelta) {
1655
1765
  reasoning += reasoningDelta;
1656
1766
  }
1657
- emitOpenAIStreamChunk(callbacks, undefined, json, delta, reasoningDelta, chunkUsage, chunkFinishReason);
1767
+ emitOpenAIStreamChunk(callbacks, undefined, json, delta, reasoningDelta, chunkUsage, chunkFinishReason, chunkToolCalls.length > 0 ? chunkToolCalls : undefined);
1658
1768
  });
1769
+ const tail = nativeToolCalls.flush();
1770
+ if (tail.textDelta) {
1771
+ text += tail.textDelta;
1772
+ callbacks.onToken?.(tail.textDelta);
1773
+ emitOpenAIStreamChunk(callbacks, undefined, {}, tail.textDelta, "", undefined, undefined);
1774
+ }
1775
+ const toolCalls = mergeToolCalls(buildOpenAIStreamToolCalls(streamedToolCalls), nativeToolCalls.calls);
1659
1776
  const out = {
1660
1777
  text,
1661
1778
  reasoning: reasoning.length > 0 ? reasoning : undefined,
1662
1779
  usage,
1663
- finishReason
1780
+ finishReason: finishReason ?? (toolCalls.length > 0 ? "tool_calls" : undefined),
1781
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined
1664
1782
  };
1665
1783
  callbacks.onComplete?.(out);
1666
1784
  return out;
@@ -1770,15 +1888,16 @@ function buildResponsesMCPResult(state, text, raw) {
1770
1888
  toolExecutions: state.toolExecutions.length > 0 ? state.toolExecutions : undefined
1771
1889
  };
1772
1890
  }
1773
- function emitOpenAIStreamChunk(callbacks, round, raw, delta, reasoningDelta, usage, finishReason) {
1774
- if (delta || reasoningDelta || usage || finishReason) {
1891
+ function emitOpenAIStreamChunk(callbacks, round, raw, delta, reasoningDelta, usage, finishReason, toolCalls) {
1892
+ if (delta || reasoningDelta || usage || finishReason || toolCalls) {
1775
1893
  callbacks.onChunk?.({
1776
1894
  textDelta: delta,
1777
1895
  reasoningDelta: reasoningDelta || undefined,
1778
1896
  ...round !== undefined ? { turnIndex: round } : {},
1779
1897
  raw,
1780
1898
  usage,
1781
- finishReason
1899
+ finishReason,
1900
+ toolCalls
1782
1901
  });
1783
1902
  }
1784
1903
  }
@@ -1845,7 +1964,7 @@ async function completeWithChatCompletionsWithMCP(options, fetcher, path, reques
1845
1964
  parallel_tool_calls: request.parallelToolCalls
1846
1965
  }));
1847
1966
  lastPayload = payload;
1848
- aggregatedUsage = mergeUsage(aggregatedUsage, pickUsage(payload));
1967
+ aggregatedUsage = mergeUsage2(aggregatedUsage, pickUsage(payload));
1849
1968
  finishReason = pickFinishReason(payload);
1850
1969
  const assistantMessage = pickAssistantMessage(payload);
1851
1970
  const calledTools = pickChatToolCalls(payload);
@@ -1925,7 +2044,7 @@ async function completeWithResponsesAPIWithMCP(options, fetcher, path, request)
1925
2044
  parallel_tool_calls: request.parallelToolCalls
1926
2045
  }));
1927
2046
  state.lastPayload = payload;
1928
- state.aggregatedUsage = mergeUsage(state.aggregatedUsage, pickUsage(payload));
2047
+ state.aggregatedUsage = mergeUsage2(state.aggregatedUsage, pickUsage(payload));
1929
2048
  state.finishReason = pickResponsesFinishReason(payload) ?? state.finishReason;
1930
2049
  pushReasoningBlock(state.reasoningBlocks, round, pickResponsesReasoning(payload));
1931
2050
  const providerToolCalls = pickResponsesToolCalls(payload);
@@ -1985,6 +2104,7 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1985
2104
  let roundUsage;
1986
2105
  let roundFinishReason;
1987
2106
  const streamedToolCalls = new Map;
2107
+ const nativeToolCalls = new NativeToolCallStreamState;
1988
2108
  let reasoningFieldName;
1989
2109
  await consumeSSE(response, (data) => {
1990
2110
  if (data === "[DONE]") {
@@ -1995,10 +2115,12 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
1995
2115
  return;
1996
2116
  }
1997
2117
  lastPayload = json;
1998
- const delta = pickAssistantDelta(json);
2118
+ const rawDelta = pickAssistantDelta(json);
1999
2119
  const reasoningDelta = pickAssistantReasoningDelta(json);
2000
2120
  const chunkUsage = pickUsage(json);
2001
2121
  const chunkFinishReason = pickFinishReason(json);
2122
+ const nativeDelta = nativeToolCalls.push(rawDelta);
2123
+ const delta = nativeDelta.textDelta;
2002
2124
  collectOpenAIStreamToolCalls(json, streamedToolCalls);
2003
2125
  roundUsage = preferLatestUsage(roundUsage, chunkUsage);
2004
2126
  if (chunkFinishReason) {
@@ -2012,13 +2134,20 @@ async function streamWithChatCompletionsWithMCP(options, fetcher, path, request,
2012
2134
  roundReasoning += reasoningDelta;
2013
2135
  reasoningFieldName ??= pickAssistantReasoningDeltaFieldName(json);
2014
2136
  }
2015
- emitOpenAIStreamChunk(callbacks, round, json, delta, reasoningDelta, chunkUsage, chunkFinishReason);
2137
+ const chunkToolCalls = nativeDelta.toolCalls.length > 0 ? mergeToolCalls(buildOpenAIStreamToolCalls(streamedToolCalls), nativeDelta.toolCalls) : undefined;
2138
+ emitOpenAIStreamChunk(callbacks, round, json, delta, reasoningDelta, chunkUsage, chunkFinishReason, chunkToolCalls);
2016
2139
  });
2017
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
2140
+ const tail = nativeToolCalls.flush();
2141
+ if (tail.textDelta) {
2142
+ roundText += tail.textDelta;
2143
+ callbacks.onToken?.(tail.textDelta);
2144
+ emitOpenAIStreamChunk(callbacks, round, {}, tail.textDelta, "", undefined, undefined);
2145
+ }
2146
+ aggregatedUsage = mergeUsage2(aggregatedUsage, roundUsage);
2018
2147
  if (roundFinishReason) {
2019
2148
  finishReason = roundFinishReason;
2020
2149
  }
2021
- const calledTools = buildOpenAIStreamToolCalls(streamedToolCalls);
2150
+ const calledTools = mergeToolCalls(buildOpenAIStreamToolCalls(streamedToolCalls), nativeToolCalls.calls);
2022
2151
  pushReasoningBlock(reasoningBlocks, round, roundReasoning);
2023
2152
  request.onTurnTransition?.({
2024
2153
  turnIndex: round,
@@ -2105,6 +2234,7 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2105
2234
  let usage;
2106
2235
  let finishReason;
2107
2236
  let lastPayload;
2237
+ const streamedToolCalls = new Map;
2108
2238
  await consumeSSE(response, (data) => {
2109
2239
  if (data === "[DONE]") {
2110
2240
  return;
@@ -2120,6 +2250,8 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2120
2250
  const delta = pickResponsesStreamTextDelta(json);
2121
2251
  const chunkUsage = pickResponsesStreamUsage(json);
2122
2252
  const chunkFinishReason = pickResponsesStreamFinishReason(json);
2253
+ collectResponsesStreamToolCalls(json, streamedToolCalls);
2254
+ const chunkToolCalls = buildResponsesStreamToolCalls(streamedToolCalls);
2123
2255
  usage = preferLatestUsage(usage, chunkUsage);
2124
2256
  if (chunkFinishReason) {
2125
2257
  finishReason = chunkFinishReason;
@@ -2128,14 +2260,16 @@ async function streamWithResponsesAPIPassThrough(options, fetcher, path, request
2128
2260
  text += delta;
2129
2261
  callbacks.onToken?.(delta);
2130
2262
  }
2131
- emitOpenAIStreamChunk(callbacks, undefined, json, delta, "", chunkUsage, chunkFinishReason);
2263
+ emitOpenAIStreamChunk(callbacks, undefined, json, delta, "", chunkUsage, chunkFinishReason, chunkToolCalls.length > 0 ? chunkToolCalls : undefined);
2132
2264
  });
2133
2265
  const finalPayload = lastPayload ?? {};
2266
+ const toolCalls = buildResponsesStreamToolCalls(streamedToolCalls);
2134
2267
  const out = {
2135
2268
  text: text.length > 0 ? text : pickResponsesText(finalPayload) || pickAssistantText(finalPayload),
2136
2269
  raw: finalPayload,
2137
2270
  usage: preferLatestUsage(usage, pickUsage(finalPayload)),
2138
- finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload)
2271
+ finishReason: finishReason ?? pickResponsesFinishReason(finalPayload) ?? pickFinishReason(finalPayload),
2272
+ toolCalls: toolCalls.length > 0 ? toolCalls : undefined
2139
2273
  };
2140
2274
  callbacks.onComplete?.(out);
2141
2275
  return out;
@@ -2197,7 +2331,7 @@ async function streamWithResponsesAPIWithMCP(options, fetcher, path, request, ca
2197
2331
  emitOpenAIStreamChunk(callbacks, round, json, delta, reasoningDelta, chunkUsage, chunkFinishReason);
2198
2332
  });
2199
2333
  const resolvedRoundUsage = preferLatestUsage(roundUsage, roundPayload ? pickUsage(roundPayload) : undefined);
2200
- state.aggregatedUsage = mergeUsage(state.aggregatedUsage, resolvedRoundUsage);
2334
+ state.aggregatedUsage = mergeUsage2(state.aggregatedUsage, resolvedRoundUsage);
2201
2335
  if (roundFinishReason) {
2202
2336
  state.finishReason = roundFinishReason;
2203
2337
  } else if (roundPayload) {
@@ -2451,43 +2585,204 @@ function pickAssistantReasoningDeltaFieldName(payload) {
2451
2585
  }
2452
2586
  return;
2453
2587
  }
2454
- function collectOpenAIStreamToolCalls(payload, state) {
2455
- const choices = payload.choices;
2456
- if (!Array.isArray(choices) || choices.length === 0 || !isRecord2(choices[0])) {
2588
+ var NATIVE_TOOL_CALL_OPEN = "<tool_call";
2589
+ var NATIVE_TOOL_CALL_CLOSE = "</tool_call>";
2590
+
2591
+ class NativeToolCallStreamState {
2592
+ enabled;
2593
+ calls = [];
2594
+ pending = "";
2595
+ constructor(enabled = true) {
2596
+ this.enabled = enabled;
2597
+ }
2598
+ push(delta) {
2599
+ if (!delta || !this.enabled) {
2600
+ return { textDelta: delta, toolCalls: [] };
2601
+ }
2602
+ this.pending += delta;
2603
+ return this.drain(false);
2604
+ }
2605
+ flush() {
2606
+ return this.enabled ? this.drain(true) : { textDelta: "", toolCalls: [] };
2607
+ }
2608
+ drain(flush) {
2609
+ let textDelta = "";
2610
+ const toolCalls = [];
2611
+ while (this.pending.length > 0) {
2612
+ const openIndex = this.pending.indexOf(NATIVE_TOOL_CALL_OPEN);
2613
+ if (openIndex < 0) {
2614
+ const keep = flush ? 0 : nativeToolCallPrefixSuffixLength(this.pending);
2615
+ const emitLength = this.pending.length - keep;
2616
+ if (emitLength > 0) {
2617
+ textDelta += this.pending.slice(0, emitLength);
2618
+ this.pending = this.pending.slice(emitLength);
2619
+ }
2620
+ break;
2621
+ }
2622
+ if (openIndex > 0) {
2623
+ textDelta += this.pending.slice(0, openIndex);
2624
+ this.pending = this.pending.slice(openIndex);
2625
+ continue;
2626
+ }
2627
+ const closeIndex = this.pending.indexOf(NATIVE_TOOL_CALL_CLOSE);
2628
+ if (closeIndex < 0) {
2629
+ if (flush) {
2630
+ this.pending = "";
2631
+ }
2632
+ break;
2633
+ }
2634
+ const blockEnd = closeIndex + NATIVE_TOOL_CALL_CLOSE.length;
2635
+ const call = parseNativeToolCallBlock(this.pending.slice(0, blockEnd), this.calls.length);
2636
+ if (call) {
2637
+ this.calls.push(call);
2638
+ toolCalls.push(call);
2639
+ }
2640
+ this.pending = this.pending.slice(blockEnd);
2641
+ }
2642
+ return { textDelta, toolCalls };
2643
+ }
2644
+ }
2645
+ function requestDeclaresTools(options, request) {
2646
+ const hasTools = (body) => Array.isArray(body?.tools) && body.tools.length > 0;
2647
+ return hasTools(request.body) || hasTools(options.defaultBody);
2648
+ }
2649
+ var NATIVE_FUNCTION_PATTERN = /<function=([^>\s]+)\s*>([\s\S]*?)<\/function>/;
2650
+ var NATIVE_PARAMETER_PATTERN = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/g;
2651
+ function parseNativeToolCallBlock(block, index) {
2652
+ const inner = extractNativeToolCallInner(block);
2653
+ if (inner === undefined) {
2457
2654
  return;
2458
2655
  }
2459
- const delta = choices[0].delta;
2460
- if (!isRecord2(delta) || !Array.isArray(delta.tool_calls)) {
2656
+ return parseNativeJsonToolCall(inner, index) ?? parseNativeXmlToolCall(inner, index);
2657
+ }
2658
+ function extractNativeToolCallInner(block) {
2659
+ const openEnd = block.indexOf(">");
2660
+ const closeStart = block.lastIndexOf(NATIVE_TOOL_CALL_CLOSE);
2661
+ if (openEnd < 0 || closeStart < 0 || closeStart <= openEnd) {
2461
2662
  return;
2462
2663
  }
2463
- for (const rawToolCall of delta.tool_calls) {
2464
- if (!isRecord2(rawToolCall)) {
2465
- continue;
2664
+ return block.slice(openEnd + 1, closeStart).trim();
2665
+ }
2666
+ function parseNativeXmlToolCall(inner, index) {
2667
+ const functionMatch = NATIVE_FUNCTION_PATTERN.exec(inner);
2668
+ const functionName = functionMatch?.[1];
2669
+ const functionBody = functionMatch?.[2];
2670
+ if (!functionName || functionBody === undefined) {
2671
+ return;
2672
+ }
2673
+ const args = {};
2674
+ for (const [, key, rawValue] of functionBody.matchAll(NATIVE_PARAMETER_PATTERN)) {
2675
+ if (key && rawValue !== undefined) {
2676
+ args[key] = coerceNativeParameterValue(rawValue.trim());
2466
2677
  }
2467
- const index = toFiniteNumber(rawToolCall.index);
2468
- const toolIndex = index !== undefined ? Math.floor(index) : 0;
2469
- const existing = state.get(toolIndex) ?? {
2470
- index: toolIndex,
2471
- argumentsText: ""
2472
- };
2473
- const id = pickString(rawToolCall.id);
2474
- if (id) {
2475
- existing.id = id;
2678
+ }
2679
+ return {
2680
+ id: `call_native_${index}`,
2681
+ type: "function",
2682
+ name: functionName,
2683
+ arguments: JSON.stringify(args)
2684
+ };
2685
+ }
2686
+ function coerceNativeParameterValue(value) {
2687
+ if (value.length === 0) {
2688
+ return "";
2689
+ }
2690
+ const parsed = safeJSONParse(value);
2691
+ if (parsed === null) {
2692
+ return value === "null" ? null : value;
2693
+ }
2694
+ return parsed;
2695
+ }
2696
+ function nativeToolCallPrefixSuffixLength(value) {
2697
+ const max = Math.min(value.length, NATIVE_TOOL_CALL_OPEN.length - 1);
2698
+ for (let length = max;length > 0; length -= 1) {
2699
+ if (NATIVE_TOOL_CALL_OPEN.startsWith(value.slice(-length))) {
2700
+ return length;
2701
+ }
2702
+ }
2703
+ return 0;
2704
+ }
2705
+ function parseNativeJsonToolCall(inner, index) {
2706
+ const parsed = safeJSONParse(inner);
2707
+ if (!isRecord2(parsed)) {
2708
+ return;
2709
+ }
2710
+ const name = pickString(parsed.name) ?? pickString(parsed.function);
2711
+ if (!name) {
2712
+ return;
2713
+ }
2714
+ const rawArguments = parsed.arguments ?? parsed.parameters ?? {};
2715
+ const args = typeof rawArguments === "string" ? rawArguments : JSON.stringify(rawArguments);
2716
+ return {
2717
+ id: pickString(parsed.id) ?? `call_native_${index}`,
2718
+ type: "function",
2719
+ name,
2720
+ arguments: args
2721
+ };
2722
+ }
2723
+ function mergeToolCalls(...groups) {
2724
+ const merged = [];
2725
+ const seen = new Set;
2726
+ for (const group of groups) {
2727
+ for (const call of group) {
2728
+ const key = call.id || `${call.name ?? ""}:${String(call.arguments ?? "")}`;
2729
+ if (seen.has(key)) {
2730
+ continue;
2731
+ }
2732
+ seen.add(key);
2733
+ merged.push(call);
2476
2734
  }
2477
- const type = pickString(rawToolCall.type);
2478
- if (type) {
2479
- existing.type = type;
2735
+ }
2736
+ return merged;
2737
+ }
2738
+ function collectOpenAIStreamToolCalls(payload, state) {
2739
+ const choices = payload.choices;
2740
+ if (!Array.isArray(choices) || choices.length === 0) {
2741
+ return;
2742
+ }
2743
+ for (const choice of choices) {
2744
+ if (!isRecord2(choice)) {
2745
+ continue;
2480
2746
  }
2481
- const functionCall = isRecord2(rawToolCall.function) ? rawToolCall.function : undefined;
2482
- const name = pickString(functionCall?.name);
2483
- if (name) {
2484
- existing.name = `${existing.name ?? ""}${name}`;
2747
+ const delta = isRecord2(choice.delta) ? choice.delta : undefined;
2748
+ const message = isRecord2(choice.message) ? choice.message : undefined;
2749
+ const toolCalls = Array.isArray(delta?.tool_calls) ? delta.tool_calls : Array.isArray(message?.tool_calls) ? message.tool_calls : Array.isArray(choice.tool_calls) ? choice.tool_calls : undefined;
2750
+ if (!toolCalls) {
2751
+ continue;
2485
2752
  }
2486
- const argumentsDelta = pickString(functionCall?.arguments);
2487
- if (argumentsDelta) {
2488
- existing.argumentsText += argumentsDelta;
2753
+ for (const rawToolCall of toolCalls) {
2754
+ if (!isRecord2(rawToolCall)) {
2755
+ continue;
2756
+ }
2757
+ const index = toFiniteNumber(rawToolCall.index);
2758
+ const toolIndex = index !== undefined ? Math.floor(index) : state.size;
2759
+ const existing = state.get(toolIndex) ?? {
2760
+ index: toolIndex,
2761
+ argumentsText: ""
2762
+ };
2763
+ const id = pickString(rawToolCall.id);
2764
+ if (id) {
2765
+ existing.id = id;
2766
+ }
2767
+ const type = pickString(rawToolCall.type);
2768
+ if (type) {
2769
+ existing.type = type;
2770
+ }
2771
+ const functionCall = isRecord2(rawToolCall.function) ? rawToolCall.function : undefined;
2772
+ const name = pickString(functionCall?.name);
2773
+ if (name) {
2774
+ existing.name = `${existing.name ?? ""}${name}`;
2775
+ }
2776
+ const argumentsDelta = pickString(functionCall?.arguments);
2777
+ if (argumentsDelta) {
2778
+ if (message?.tool_calls === toolCalls || choice.tool_calls === toolCalls) {
2779
+ existing.argumentsText = argumentsDelta;
2780
+ } else {
2781
+ existing.argumentsText += argumentsDelta;
2782
+ }
2783
+ }
2784
+ state.set(toolIndex, existing);
2489
2785
  }
2490
- state.set(toolIndex, existing);
2491
2786
  }
2492
2787
  }
2493
2788
  function buildOpenAIStreamToolCalls(state) {
@@ -2935,7 +3230,7 @@ async function completeWithMCPToolLoop(options, fetcher, path, request) {
2935
3230
  }
2936
3231
  const payload = await response.json();
2937
3232
  lastPayload = payload;
2938
- aggregatedUsage = mergeUsage(aggregatedUsage, pickUsage2(payload));
3233
+ aggregatedUsage = mergeUsage2(aggregatedUsage, pickUsage2(payload));
2939
3234
  finishReason = pickFinishReason2(payload);
2940
3235
  const content = Array.isArray(payload.content) ? payload.content : [];
2941
3236
  const calledTools = pickAnthropicToolCalls(payload).filter((call) => call.type === "function");
@@ -3056,7 +3351,7 @@ async function streamWithMCPToolLoop(options, fetcher, path, request, callbacks)
3056
3351
  callbacks.onChunk?.(chunk);
3057
3352
  }
3058
3353
  });
3059
- aggregatedUsage = mergeUsage(aggregatedUsage, roundUsage);
3354
+ aggregatedUsage = mergeUsage2(aggregatedUsage, roundUsage);
3060
3355
  if (roundFinishReason) {
3061
3356
  finishReason = roundFinishReason;
3062
3357
  }
@@ -3715,95 +4010,6 @@ function withToolTimeout(client, toolTimeoutMs) {
3715
4010
  function applyToolTimeout(clients, toolTimeoutMs) {
3716
4011
  return clients.map((client) => withToolTimeout(client, toolTimeoutMs));
3717
4012
  }
3718
- // src/generate-output.ts
3719
- var RE_THINK_TAGS = /<\/?think\s*>/gi;
3720
- function normalizeModelOutput(text, dedicatedReasoning, reasoningBlocks) {
3721
- const sanitized = sanitizeThink(text);
3722
- const visibleText = stripThinkBlocks(text, sanitized.thinkBlocks);
3723
- const reasoning = joinReasoningSegments([
3724
- dedicatedReasoning,
3725
- ...sanitized.thinkBlocks.map((block) => block.content)
3726
- ]);
3727
- return {
3728
- text: visibleText,
3729
- reasoning,
3730
- reasoningBlocks: normalizeReasoningBlocks(reasoningBlocks),
3731
- thinkBlocks: sanitized.thinkBlocks,
3732
- parseSource: composeParseSource(visibleText, reasoning)
3733
- };
3734
- }
3735
- function normalizeReasoningBlocks(blocks) {
3736
- if (!Array.isArray(blocks)) {
3737
- return;
3738
- }
3739
- const normalized = blocks.map((block) => ({
3740
- turnIndex: block.turnIndex,
3741
- text: block.text.replace(RE_THINK_TAGS, "").trim()
3742
- })).filter((block) => Number.isFinite(block.turnIndex) && block.text.length > 0);
3743
- return normalized.length > 0 ? normalized : undefined;
3744
- }
3745
- function appendReasoningBlock(blocks, transition) {
3746
- const text = transition.reasoningText?.replace(RE_THINK_TAGS, "").trim();
3747
- if (!text) {
3748
- return blocks;
3749
- }
3750
- const next = [...blocks ?? [], { turnIndex: transition.turnIndex, text }];
3751
- return normalizeReasoningBlocks(next);
3752
- }
3753
- function composeParseSource(text, reasoning) {
3754
- if (typeof reasoning !== "string" || reasoning.length === 0) {
3755
- return text;
3756
- }
3757
- const sanitized = reasoning.replace(RE_THINK_TAGS, "");
3758
- if (sanitized.length === 0) {
3759
- return text;
3760
- }
3761
- return `<think>${sanitized}</think>${text}`;
3762
- }
3763
- function aggregateUsage(attempts) {
3764
- let usage;
3765
- for (const attempt of attempts) {
3766
- usage = mergeUsage2(usage, attempt.usage);
3767
- }
3768
- return usage;
3769
- }
3770
- function mergeUsage2(base, next) {
3771
- if (!base && !next) {
3772
- return;
3773
- }
3774
- return {
3775
- inputTokens: (base?.inputTokens ?? 0) + (next?.inputTokens ?? 0),
3776
- outputTokens: (base?.outputTokens ?? 0) + (next?.outputTokens ?? 0),
3777
- totalTokens: (base?.totalTokens ?? 0) + (next?.totalTokens ?? 0),
3778
- cost: (base?.cost ?? 0) + (next?.cost ?? 0)
3779
- };
3780
- }
3781
- function joinReasoningSegments(parts) {
3782
- return parts.map((value) => value?.trim()).filter((value) => Boolean(value)).join(`
3783
-
3784
- `);
3785
- }
3786
- function stripThinkBlocks(text, thinkBlocks) {
3787
- if (thinkBlocks.length === 0) {
3788
- return text;
3789
- }
3790
- let output = "";
3791
- let cursor = 0;
3792
- for (const block of thinkBlocks) {
3793
- output += text.slice(cursor, block.start);
3794
- cursor = block.end;
3795
- }
3796
- output += text.slice(cursor);
3797
- return output;
3798
- }
3799
- function toStreamDataFingerprint(value) {
3800
- try {
3801
- return JSON.stringify(value);
3802
- } catch {
3803
- return "__unserializable__";
3804
- }
3805
- }
3806
-
3807
4013
  // src/utils/debug-colors.ts
3808
4014
  var ANSI = {
3809
4015
  reset: "\x1B[0m",
@@ -3965,13 +4171,15 @@ async function callModel(adapter, options) {
3965
4171
  if (!done && fingerprint === lastSnapshotFingerprint) {
3966
4172
  return;
3967
4173
  }
4174
+ const stableText = done ? normalized2.text : withoutTrailingThinkTagPrefix(normalized2.text);
4175
+ const stableReasoning = done ? normalized2.reasoning : withoutTrailingThinkTagPrefix(normalized2.reasoning);
3968
4176
  const delta = {
3969
- text: normalized2.text.startsWith(previousSnapshotText) ? normalized2.text.slice(previousSnapshotText.length) : "",
3970
- reasoning: normalized2.reasoning.startsWith(previousSnapshotReasoning) ? normalized2.reasoning.slice(previousSnapshotReasoning.length) : ""
4177
+ text: stableText.startsWith(previousSnapshotText) ? stableText.slice(previousSnapshotText.length) : "",
4178
+ reasoning: stableReasoning.startsWith(previousSnapshotReasoning) ? stableReasoning.slice(previousSnapshotReasoning.length) : ""
3971
4179
  };
3972
4180
  lastSnapshotFingerprint = fingerprint;
3973
- previousSnapshotText = normalized2.text;
3974
- previousSnapshotReasoning = normalized2.reasoning;
4181
+ previousSnapshotText = stableText;
4182
+ previousSnapshotReasoning = stableReasoning;
3975
4183
  options.stream.onData?.({
3976
4184
  delta,
3977
4185
  snapshot,
@@ -5816,6 +6024,7 @@ function unwrap2(schema) {
5816
6024
  }
5817
6025
  export {
5818
6026
  wrapMCPClient,
6027
+ withoutTrailingThinkTagPrefix,
5819
6028
  withFormat,
5820
6029
  structured,
5821
6030
  sanitizeThink,
@@ -5825,6 +6034,7 @@ export {
5825
6034
  registerBuiltinProviders,
5826
6035
  prompt,
5827
6036
  parseLLMOutput,
6037
+ normalizeModelOutput,
5828
6038
  inspectSchemaMetadata,
5829
6039
  inferSchemaExample,
5830
6040
  images,