@dex-ai/openai 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,386 @@
1
+ /**
2
+ * Responses API streaming: POST /v1/responses → StreamPart async iterable.
3
+ *
4
+ * Supports streamed reasoning deltas (reasoning-delta StreamPart) that the
5
+ * Chat Completions endpoint does not expose.
6
+ */
7
+
8
+ import type {
9
+ StreamPart,
10
+ ModelRequest,
11
+ FinishReason,
12
+ Usage,
13
+ ResponseMeta,
14
+ Content,
15
+ Message,
16
+ } from "@dex-ai/sdk";
17
+ import { parseSSE } from "./sse";
18
+ import type { SSEFrame } from "./sse";
19
+ import { OpenAIError } from "./errors";
20
+ import { messagesToOAI, toolToOAI, toolChoiceToOAI } from "./translate";
21
+ import { thinkingToReasoningEffort, modelSupportsReasoning } from "./thinking";
22
+
23
+ /* ------------------------------------------------------------------ */
24
+ /* Types */
25
+ /* ------------------------------------------------------------------ */
26
+
27
+ interface ToolCallAccumulator {
28
+ id: string;
29
+ callId: string;
30
+ name: string;
31
+ arguments: string;
32
+ }
33
+
34
+ interface ResponsesStreamOptions {
35
+ baseUrl: string;
36
+ apiKey: string;
37
+ modelId: string;
38
+ providerName: string;
39
+ rawChunks: boolean;
40
+ doFetch: (url: string, init: RequestInit) => Promise<Response>;
41
+ headers?: Record<string, string> | undefined;
42
+ /** Idle timeout in ms — if no SSE data arrives within this window, abort. Default: 120_000 (2min). */
43
+ streamIdleTimeoutMs?: number | undefined;
44
+ }
45
+
46
+ /* ------------------------------------------------------------------ */
47
+ /* createResponsesStream */
48
+ /* ------------------------------------------------------------------ */
49
+
50
+ export function createResponsesStream(
51
+ opts: ResponsesStreamOptions,
52
+ _modelMaxTokens?: number,
53
+ ): (req: ModelRequest) => AsyncIterable<StreamPart> {
54
+ const { baseUrl, apiKey, modelId, providerName, rawChunks, doFetch } = opts;
55
+ const url = `${baseUrl}/responses`;
56
+ const idleTimeoutMs = opts.streamIdleTimeoutMs ?? 120_000; // 2 min default
57
+
58
+ return async function* stream(req: ModelRequest): AsyncIterable<StreamPart> {
59
+ const startedAt = Date.now();
60
+
61
+ // Build request body — Responses API uses `input` instead of `messages`
62
+ const body: Record<string, unknown> = {
63
+ model: modelId,
64
+ input: messagesToOAI(req.messages),
65
+ stream: true,
66
+ };
67
+
68
+ if (req.maxTokens !== undefined) body.max_output_tokens = req.maxTokens;
69
+ if (req.temperature !== undefined) body.temperature = req.temperature;
70
+ if (req.topP !== undefined) body.top_p = req.topP;
71
+
72
+ // Tools
73
+ if (req.tools?.length) {
74
+ body.tools = req.tools.map((t) => {
75
+ const oai = toolToOAI(t);
76
+ return oai;
77
+ });
78
+ const tc = toolChoiceToOAI(req.toolChoice);
79
+ if (tc !== undefined) body.tool_choice = tc;
80
+ }
81
+
82
+ // Reasoning
83
+ if (modelSupportsReasoning(modelId)) {
84
+ const effort = thinkingToReasoningEffort(req.thinking, "responses");
85
+ if (effort !== undefined) {
86
+ body.reasoning = { effort };
87
+ }
88
+ }
89
+
90
+ // Provider options pass-through
91
+ if (req.providerOptions) Object.assign(body, req.providerOptions);
92
+
93
+ const init: RequestInit = {
94
+ method: "POST",
95
+ headers: {
96
+ "Content-Type": "application/json",
97
+ Authorization: `Bearer ${apiKey}`,
98
+ ...opts.headers,
99
+ },
100
+ body: JSON.stringify(body),
101
+ };
102
+ if (req.signal) (init as { signal: AbortSignal }).signal = req.signal;
103
+
104
+ let res: Response;
105
+ try {
106
+ res = await doFetch(url, init);
107
+ } catch (err: unknown) {
108
+ if (err instanceof Error && err.name === "AbortError") {
109
+ yield { type: "abort", reason: err };
110
+ return;
111
+ }
112
+ throw err;
113
+ }
114
+
115
+ if (!res.ok) {
116
+ const error = await OpenAIError.fromResponse(res);
117
+ yield { type: "error", error, recoverable: false };
118
+ return;
119
+ }
120
+
121
+ if (!res.body) {
122
+ yield {
123
+ type: "error",
124
+ error: new Error("openai responses stream: empty body"),
125
+ recoverable: false,
126
+ };
127
+ return;
128
+ }
129
+
130
+ const meta: ResponseMeta = { providerName, modelId, startedAt };
131
+ yield { type: "response-start", meta };
132
+
133
+ // State
134
+ const textParts: string[] = [];
135
+ const reasoningParts: string[] = [];
136
+ const toolCalls: Map<string, ToolCallAccumulator> = new Map();
137
+ let messageStarted = false;
138
+ let finishReason: FinishReason = "stop";
139
+ let usage: Usage = { inputTokens: 0, outputTokens: 0 };
140
+ let responseId: string | undefined;
141
+
142
+ try {
143
+ const sseIter = parseSSE(res.body)[Symbol.asyncIterator]();
144
+ while (true) {
145
+ // Race next SSE frame against idle timeout
146
+ let timer: ReturnType<typeof setTimeout> | undefined;
147
+ const timeoutPromise = new Promise<never>((_, reject) => {
148
+ timer = setTimeout(
149
+ () =>
150
+ reject(
151
+ new Error(
152
+ `Stream idle timeout: no data received for ${idleTimeoutMs}ms`,
153
+ ),
154
+ ),
155
+ idleTimeoutMs,
156
+ );
157
+ });
158
+ let result: IteratorResult<SSEFrame, undefined>;
159
+ try {
160
+ result = await Promise.race([sseIter.next(), timeoutPromise]);
161
+ } finally {
162
+ clearTimeout(timer);
163
+ }
164
+ if (result.done) break;
165
+ const frame = result.value;
166
+ if (frame.data === "[DONE]") break;
167
+
168
+ if (rawChunks) {
169
+ yield { type: "raw-chunk", providerName, data: frame.data };
170
+ }
171
+
172
+ let event: any;
173
+ try {
174
+ event = JSON.parse(frame.data);
175
+ } catch {
176
+ continue;
177
+ }
178
+
179
+ const eventType = event.type as string | undefined;
180
+
181
+ switch (eventType) {
182
+ case "response.created": {
183
+ if (event.response?.id) responseId = event.response.id as string;
184
+ if (!messageStarted) {
185
+ messageStarted = true;
186
+ yield { type: "message-start", role: "assistant" };
187
+ }
188
+ break;
189
+ }
190
+
191
+ case "response.output_text.delta": {
192
+ if (!messageStarted) {
193
+ messageStarted = true;
194
+ yield { type: "message-start", role: "assistant" };
195
+ }
196
+ const text = event.delta as string;
197
+ if (text) {
198
+ textParts.push(text);
199
+ yield { type: "text-delta", delta: text };
200
+ }
201
+ break;
202
+ }
203
+
204
+ case "response.reasoning.delta": {
205
+ if (!messageStarted) {
206
+ messageStarted = true;
207
+ yield { type: "message-start", role: "assistant" };
208
+ }
209
+ const reasoning = event.delta as string;
210
+ if (reasoning) {
211
+ reasoningParts.push(reasoning);
212
+ yield { type: "reasoning-delta", delta: reasoning };
213
+ }
214
+ break;
215
+ }
216
+
217
+ case "response.function_call_arguments.delta": {
218
+ const itemId = (event.item_id as string) ?? "";
219
+ let acc = toolCalls.get(itemId);
220
+ if (!acc) {
221
+ acc = {
222
+ id: itemId,
223
+ callId: (event.call_id as string) ?? "",
224
+ name: (event.name as string) ?? "",
225
+ arguments: "",
226
+ };
227
+ toolCalls.set(itemId, acc);
228
+ }
229
+ if (event.name) acc.name = event.name as string;
230
+ if (event.call_id) acc.callId = event.call_id as string;
231
+ const argDelta = event.delta as string;
232
+ if (argDelta) {
233
+ acc.arguments += argDelta;
234
+ yield {
235
+ type: "tool-call-delta",
236
+ toolCallId: acc.callId || acc.id,
237
+ toolName: acc.name,
238
+ inputDelta: argDelta,
239
+ };
240
+ }
241
+ break;
242
+ }
243
+
244
+ case "response.function_call_arguments.done": {
245
+ const itemId = (event.item_id as string) ?? "";
246
+ const acc = toolCalls.get(itemId);
247
+ if (acc) {
248
+ if (event.call_id) acc.callId = event.call_id as string;
249
+ if (event.name) acc.name = event.name as string;
250
+ if (event.arguments) acc.arguments = event.arguments as string;
251
+ }
252
+ break;
253
+ }
254
+
255
+ case "response.completed": {
256
+ const r = event.response as Record<string, unknown> | undefined;
257
+ if (r) {
258
+ if (r.id) responseId = r.id as string;
259
+ const status = r.status as string | undefined;
260
+ if (status === "incomplete") finishReason = "length";
261
+ else if (status === "failed") finishReason = "error";
262
+
263
+ const u = r.usage as Record<string, unknown> | undefined;
264
+ if (u) {
265
+ usage = {
266
+ inputTokens: (u.input_tokens as number) ?? 0,
267
+ outputTokens: (u.output_tokens as number) ?? 0,
268
+ totalTokens: (u.total_tokens as number) ?? undefined,
269
+ ...(typeof u.input_tokens_details === "object" &&
270
+ u.input_tokens_details !== null &&
271
+ (u.input_tokens_details as Record<string, unknown>)
272
+ .cached_tokens !== undefined
273
+ ? {
274
+ cachedInputTokens: (
275
+ u.input_tokens_details as Record<string, unknown>
276
+ ).cached_tokens as number,
277
+ }
278
+ : {}),
279
+ ...(typeof u.output_tokens_details === "object" &&
280
+ u.output_tokens_details !== null &&
281
+ (u.output_tokens_details as Record<string, unknown>)
282
+ .reasoning_tokens !== undefined
283
+ ? {
284
+ reasoningTokens: (
285
+ u.output_tokens_details as Record<string, unknown>
286
+ ).reasoning_tokens as number,
287
+ }
288
+ : {}),
289
+ };
290
+ }
291
+ }
292
+ break;
293
+ }
294
+
295
+ case "response.output_item.done": {
296
+ // Tool call items complete
297
+ const item = event.item as Record<string, unknown> | undefined;
298
+ if (item && item.type === "function_call") {
299
+ const id = (item.id as string) ?? "";
300
+ const acc = toolCalls.get(id) ?? {
301
+ id,
302
+ callId: (item.call_id as string) ?? id,
303
+ name: (item.name as string) ?? "",
304
+ arguments: (item.arguments as string) ?? "",
305
+ };
306
+ toolCalls.set(id, acc);
307
+ if (item.call_id) acc.callId = item.call_id as string;
308
+ if (item.name) acc.name = item.name as string;
309
+ if (item.arguments) acc.arguments = item.arguments as string;
310
+ }
311
+ break;
312
+ }
313
+
314
+ default:
315
+ // Ignore unknown event types
316
+ break;
317
+ }
318
+ }
319
+ } catch (err: unknown) {
320
+ if (err instanceof Error && err.name === "AbortError") {
321
+ yield { type: "abort", reason: err };
322
+ return;
323
+ }
324
+ yield { type: "error", error: err, recoverable: false };
325
+ return;
326
+ }
327
+
328
+ // Determine finish reason from tool calls
329
+ if (toolCalls.size > 0 && finishReason === "stop") {
330
+ finishReason = "tool-calls";
331
+ }
332
+
333
+ // Emit finalized tool-calls
334
+ for (const [, acc] of toolCalls) {
335
+ let input: unknown = {};
336
+ try {
337
+ input = acc.arguments ? JSON.parse(acc.arguments) : {};
338
+ } catch {
339
+ input = acc.arguments;
340
+ }
341
+ yield {
342
+ type: "tool-call",
343
+ toolCallId: acc.callId || acc.id,
344
+ toolName: acc.name,
345
+ input,
346
+ };
347
+ }
348
+
349
+ // Assemble final message
350
+ const contentParts: Content[] = [];
351
+ if (reasoningParts.length > 0) {
352
+ contentParts.push({ type: "reasoning", text: reasoningParts.join("") });
353
+ }
354
+ if (textParts.length > 0) {
355
+ contentParts.push({ type: "text", text: textParts.join("") });
356
+ }
357
+ for (const [, acc] of toolCalls) {
358
+ let input: unknown = {};
359
+ try {
360
+ input = acc.arguments ? JSON.parse(acc.arguments) : {};
361
+ } catch {
362
+ input = acc.arguments;
363
+ }
364
+ contentParts.push({
365
+ type: "tool-call",
366
+ toolCallId: acc.callId || acc.id,
367
+ toolName: acc.name,
368
+ input,
369
+ });
370
+ }
371
+
372
+ const message: Message = { role: "assistant", content: contentParts };
373
+ yield { type: "message-stop", message };
374
+ yield { type: "finish", reason: finishReason, usage };
375
+ yield {
376
+ type: "response-stop",
377
+ meta: {
378
+ ...meta,
379
+ ...(responseId !== undefined ? { id: responseId } : {}),
380
+ endedAt: Date.now(),
381
+ },
382
+ usage,
383
+ finishReason,
384
+ };
385
+ };
386
+ }
package/src/sse.ts ADDED
@@ -0,0 +1,35 @@
1
+ /** Minimal SSE line parser for streaming API responses. */
2
+
3
+ export interface SSEFrame {
4
+ data: string;
5
+ }
6
+
7
+ /** Parse a ReadableStream<Uint8Array> into SSE frames. Yields each `data:` payload (trimmed of the prefix). */
8
+ export async function* parseSSE(
9
+ body: ReadableStream<Uint8Array>,
10
+ ): AsyncGenerator<SSEFrame> {
11
+ const reader = body.getReader();
12
+ const decoder = new TextDecoder();
13
+ let buffer = "";
14
+ try {
15
+ while (true) {
16
+ const { done, value } = await reader.read();
17
+ if (done) break;
18
+ buffer += decoder.decode(value, { stream: true });
19
+ let nl: number;
20
+ while ((nl = buffer.indexOf("\n")) !== -1) {
21
+ const line = buffer.slice(0, nl).replace(/\r$/, "");
22
+ buffer = buffer.slice(nl + 1);
23
+ if (!line) continue;
24
+ if (line.startsWith("data: ")) {
25
+ yield { data: line.slice(6) };
26
+ } else if (line.startsWith("data:")) {
27
+ yield { data: line.slice(5) };
28
+ }
29
+ // ignore comments / other fields
30
+ }
31
+ }
32
+ } finally {
33
+ reader.releaseLock();
34
+ }
35
+ }