npm - @galdor/provider-openai - Versions diffs - 0.3.0 - Mend

@galdor/provider-openai 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/src/openai.test.ts ADDED Viewed

@@ -0,0 +1,184 @@
+import { afterEach, describe, expect, test } from "bun:test";
+import { collectStream, RateLimitError } from "@galdor/core/provider";
+import { messageText } from "@galdor/core/schema";
+import { newOpenAI } from "./index.ts";
+let server: { stop(): void; url: string } | undefined;
+afterEach(() => {
+  server?.stop();
+  server = undefined;
+});
+function serve(handler: (req: Request) => Response | Promise<Response>): string {
+  const s = Bun.serve({ port: 0, fetch: handler });
+  server = { stop: () => s.stop(true), url: `http://localhost:${s.port}` };
+  return server.url;
+}
+describe("OpenAIProvider.generate", () => {
+  test("shapes the wire request and parses text + tool calls + usage", async () => {
+    let received: any;
+    let path = "";
+    const url = serve(async (req) => {
+      path = new URL(req.url).pathname;
+      received = await req.json();
+      return Response.json({
+        id: "chatcmpl-1",
+        object: "chat.completion",
+        model: "gpt-4o-mini",
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: "assistant",
+              content: "the answer",
+              tool_calls: [{ id: "t1", type: "function", function: { name: "add", arguments: '{"a":1,"b":2}' } }],
+            },
+            finish_reason: "tool_calls",
+          },
+        ],
+        usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+      });
+    });
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    const resp = await p.generate({
+      model: "gpt-4o-mini",
+      messages: [
+        { role: "system", content: [{ type: "text", text: "be terse" }] },
+        { role: "user", content: [{ type: "text", text: "hi" }] },
+      ],
+    });
+    // request shaping: system stays a message, model carried, content stringified
+    expect(path).toBe("/chat/completions");
+    expect(received.model).toBe("gpt-4o-mini");
+    expect(received.messages[0]).toEqual({ role: "system", content: "be terse" });
+    expect(received.messages[1]).toEqual({ role: "user", content: "hi" });
+    // response parsing
+    expect(messageText(resp.message)).toBe("the answer");
+    expect(resp.message.toolCalls?.[0]).toEqual({ id: "t1", name: "add", arguments: { a: 1, b: 2 } });
+    expect(resp.stopReason).toBe("tool_use");
+    expect(resp.usage.inputTokens).toBe(10);
+    expect(resp.usage.outputTokens).toBe(5);
+    expect(resp.model).toBe("gpt-4o-mini");
+  });
+  test("omits `stop` when stopSequences is empty, includes it when non-empty", async () => {
+    let received: any;
+    const url = serve(async (req) => {
+      received = await req.json();
+      return Response.json({ model: "m", choices: [{ index: 0, message: { role: "assistant", content: "ok" } }] });
+    });
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    await p.generate({
+      model: "m",
+      messages: [{ role: "user", content: [{ type: "text", text: "x" }] }],
+      stopSequences: [],
+    });
+    expect("stop" in received).toBe(false);
+    await p.generate({
+      model: "m",
+      messages: [{ role: "user", content: [{ type: "text", text: "x" }] }],
+      stopSequences: ["END"],
+    });
+    expect(received.stop).toEqual(["END"]);
+  });
+  test("maps a 429 to a typed RateLimitError with retryAfter", async () => {
+    const url = serve(
+      () =>
+        new Response(JSON.stringify({ error: { type: "rate_limit_error", message: "slow down" } }), {
+          status: 429,
+          headers: { "retry-after": "7" },
+        }),
+    );
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    try {
+      await p.generate({ model: "m", messages: [{ role: "user", content: [{ type: "text", text: "x" }] }] });
+      throw new Error("should have thrown");
+    } catch (e) {
+      expect(e).toBeInstanceOf(RateLimitError);
+      expect((e as RateLimitError).retryAfter).toBe(7);
+    }
+  });
+});
+describe("OpenAIProvider.stream", () => {
+  test("parses an SSE sequence into events that collectStream reassembles", async () => {
+    const chunk = (o: unknown) => `data: ${JSON.stringify(o)}\n\n`;
+    const sse = [
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { role: "assistant", content: "" } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { content: "hello " } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { content: "world" } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: {}, finish_reason: "stop" }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [], usage: { prompt_tokens: 3, completion_tokens: 2 } }),
+      "data: [DONE]\n\n",
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(
+      p.stream({ model: "gpt-4o-mini", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }),
+    );
+    expect(messageText(resp.message)).toBe("hello world");
+    expect(resp.stopReason).toBe("end_turn");
+    expect(resp.usage.inputTokens).toBe(3);
+    expect(resp.usage.outputTokens).toBe(2);
+    expect(resp.model).toBe("gpt-4o-mini");
+  });
+  test("parses a CRLF-framed SSE stream (OpenAI-compatible backends)", async () => {
+    const chunk = (o: unknown) => `data: ${JSON.stringify(o)}\r\n\r\n`;
+    const sse = [
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { role: "assistant", content: "" } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { content: "hello " } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { content: "world" } }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: {}, finish_reason: "stop" }] }),
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [], usage: { prompt_tokens: 3, completion_tokens: 2 } }),
+      "data: [DONE]\r\n\r\n",
+    ].join("");
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(
+      p.stream({ model: "gpt-4o-mini", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }),
+    );
+    expect(messageText(resp.message)).toBe("hello world");
+    expect(resp.stopReason).toBe("end_turn");
+    expect(resp.usage.inputTokens).toBe(3);
+    expect(resp.usage.outputTokens).toBe(2);
+    expect(resp.model).toBe("gpt-4o-mini");
+  });
+  test("captures a final chunk not terminated by a blank line", async () => {
+    const chunk = (o: unknown) => `data: ${JSON.stringify(o)}\n\n`;
+    // The backend closes the connection after the last data line, without a
+    // trailing blank line and without [DONE]: the finish/usage chunk must still
+    // be honored.
+    const sse =
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { role: "assistant", content: "hi" } }] }) +
+      chunk({ id: "1", model: "gpt-4o-mini", choices: [{ index: 0, delta: { content: " there" } }] }) +
+      `data: ${JSON.stringify({
+        id: "1",
+        model: "gpt-4o-mini",
+        choices: [{ index: 0, delta: {}, finish_reason: "length" }],
+        usage: { prompt_tokens: 7, completion_tokens: 4 },
+      })}`;
+    const url = serve(() => new Response(sse, { headers: { "content-type": "text/event-stream" } }));
+    const p = newOpenAI({ apiKey: "sk-test", baseURL: url });
+    const resp = await collectStream(
+      p.stream({ model: "gpt-4o-mini", messages: [{ role: "user", content: [{ type: "text", text: "hi" }] }] }),
+    );
+    expect(messageText(resp.message)).toBe("hi there");
+    expect(resp.stopReason).toBe("max_tokens");
+    expect(resp.usage.inputTokens).toBe(7);
+    expect(resp.usage.outputTokens).toBe(4);
+    expect(resp.model).toBe("gpt-4o-mini");
+  });
+});

package/src/stream.ts ADDED Viewed

@@ -0,0 +1,245 @@
+/**
+ * OpenAI streaming over Server-Sent Events.
+ *
+ * Decodes each `data: {...}` chunk of the /chat/completions stream into galdor
+ * provider {@link Event}s (MessageStart / ContentDelta / ToolCallDelta /
+ * MessageStop). The OpenAI stream carries no dedicated opening frame, so
+ * MessageStart is synthesized from the first chunk, and MessageStop is deferred
+ * to the end: with `stream_options.include_usage = true` the final usage chunk
+ * arrives after the `finish_reason` chunk. Some OpenAI-compatible backends close
+ * the connection rather than emitting `data: [DONE]`, so the terminal
+ * MessageStop is always synthesized from accumulated state, regardless of how
+ * the stream ends. Consume the generator with `for await`, or fold it into a
+ * single {@link Response} via `collectStream`.
+ */
+import { APIError, type Event, EventType, fetchWithHeaderTimeout } from "@galdor/core/provider";
+import { Role, type StopReason, thinkingPart, type Usage } from "@galdor/core/schema";
+import { normalizeFinishReason, usageFromWire, type WireUsage } from "./convert.ts";
+import { kindForType, normalizeHTTPError } from "./errors.ts";
+const PROVIDER_NAME = "openai";
+interface ChunkFuncCall {
+  name?: string;
+  arguments?: string;
+}
+interface ChunkToolCall {
+  id?: string;
+  index?: number;
+  function?: ChunkFuncCall;
+}
+interface ChunkDelta {
+  role?: string;
+  content?: string;
+  reasoning_content?: string;
+  tool_calls?: ChunkToolCall[];
+}
+interface ChunkChoice {
+  index?: number;
+  delta?: ChunkDelta;
+  finish_reason?: string;
+}
+interface ChunkError {
+  type?: string;
+  code?: string;
+  message?: string;
+}
+interface ChatChunk {
+  model?: string;
+  choices?: ChunkChoice[];
+  usage?: WireUsage;
+  error?: ChunkError;
+}
+function emptyUsage(): Usage {
+  return { inputTokens: 0, outputTokens: 0, cacheCreationTokens: 0, cacheReadTokens: 0 };
+}
+interface ToolState {
+  id: string;
+  name: string;
+}
+interface StreamState {
+  started: boolean;
+  model: string;
+  usage: Usage;
+  stopReason: StopReason;
+  reasoning: string;
+  toolByIdx: Map<number, ToolState>;
+}
+/**
+ * POST a streaming /chat/completions request and yield galdor provider events.
+ *
+ * Synthesizes a MessageStart from the first chunk, forwards content and tool-call
+ * deltas as they arrive, accumulates reasoning and usage, and emits a terminal
+ * MessageStop once the upstream stream ends.
+ *
+ * @param url - The fully-qualified /chat/completions endpoint to POST to.
+ * @param headers - Request headers (auth, content-type, etc.); an SSE `Accept`
+ * header is added automatically.
+ * @param body - The request payload, serialized to JSON.
+ * @param signal - Optional abort signal to cancel the in-flight request.
+ * @returns An async generator of provider {@link Event}s ending in MessageStop.
+ * @throws {APIError} When the HTTP response is non-2xx, or when an in-stream
+ * error frame is received.
+ * @throws {Error} When a 2xx response unexpectedly carries no body.
+ * @example
+ * ```ts
+ * for await (const ev of streamChat(url, headers, wire, signal)) {
+ *   if (ev.type === EventType.ContentDelta) process.stdout.write(ev.contentDelta);
+ * }
+ * ```
+ */
+export async function* streamChat(
+  url: string,
+  headers: Record<string, string>,
+  body: unknown,
+  signal: AbortSignal | undefined,
+  timeoutMs = 0,
+): AsyncGenerator<Event> {
+  const res = await fetchWithHeaderTimeout(
+    url,
+    { method: "POST", headers: { ...headers, accept: "text/event-stream" }, body: JSON.stringify(body) },
+    timeoutMs,
+    signal,
+  );
+  if (Math.floor(res.status / 100) !== 2) throw await normalizeHTTPError(res);
+  if (!res.body) throw new Error("openai: streaming response had no body");
+  const state: StreamState = {
+    started: false,
+    model: "",
+    usage: emptyUsage(),
+    stopReason: "end_turn",
+    reasoning: "",
+    toolByIdx: new Map(),
+  };
+  const decoder = new TextDecoder();
+  let buffer = "";
+  for await (const chunk of res.body as unknown as AsyncIterable<Uint8Array>) {
+    buffer += decoder.decode(chunk, { stream: true });
+    // SSE events are separated by a blank line. Accept both LF and CRLF framing
+    // so OpenAI-compatible backends that emit \r\n boundaries parse cleanly.
+    let m: RegExpExecArray | null;
+    while ((m = FRAME_BOUNDARY.exec(buffer)) !== null) {
+      const rawEvent = buffer.slice(0, m.index);
+      buffer = buffer.slice(m.index + m[0].length);
+      FRAME_BOUNDARY.lastIndex = 0;
+      const payload = parseDataLine(rawEvent);
+      if (payload === undefined) continue;
+      yield* handleChunk(payload, state);
+    }
+  }
+  // Some backends close the connection without a blank-line-terminated final
+  // frame; if the leftover buffer still holds a data line, process it so the
+  // closing usage/finish chunk is not dropped.
+  const tail = parseDataLine(buffer);
+  if (tail !== undefined) yield* handleChunk(tail, state);
+  yield terminalStop(state);
+}
+/** Matches an SSE blank-line frame boundary under either LF or CRLF framing. */
+const FRAME_BOUNDARY = /\r?\n\r?\n/g;
+/** Extract and JSON-parse the `data:` payload of one SSE event block. */
+function parseDataLine(rawEvent: string): ChatChunk | undefined {
+  const dataParts: string[] = [];
+  for (const raw of rawEvent.split("\n")) {
+    const line = raw.endsWith("\r") ? raw.slice(0, -1) : raw; // strip CR under CRLF framing
+    if (line.startsWith(":")) continue; // comment
+    if (line.startsWith("data:")) dataParts.push(line.slice(5).trimStart());
+  }
+  if (dataParts.length === 0) return undefined;
+  const payload = dataParts.join("\n");
+  if (payload === "" || payload === "[DONE]") return undefined;
+  try {
+    return JSON.parse(payload) as ChatChunk;
+  } catch {
+    // Skip lines that fail to parse — be permissive about transport hiccups.
+    return undefined;
+  }
+}
+function terminalStop(state: StreamState): Event {
+  const msg =
+    state.reasoning !== ""
+      ? { role: Role.Assistant, content: [thinkingPart(state.reasoning)] }
+      : undefined;
+  return {
+    type: EventType.MessageStop,
+    stopReason: state.stopReason,
+    usage: state.usage,
+    model: state.model,
+    ...(msg ? { message: msg } : {}),
+  };
+}
+function* handleChunk(c: ChatChunk, state: StreamState): Generator<Event> {
+  // Surface an in-stream error frame instead of silently ending the stream.
+  if (c.error) {
+    const kind = kindForType(c.error.type, c.error.code) ?? "server";
+    throw new APIError({ kind, provider: PROVIDER_NAME, statusCode: 0, message: c.error.message ?? "stream error" });
+  }
+  if (c.model) state.model = c.model;
+  if (c.usage) state.usage = usageFromWire(c.usage);
+  // First chunk: synthesize MessageStart, since the stream has no start frame.
+  if (!state.started && (state.model !== "" || (c.choices?.length ?? 0) > 0)) {
+    state.started = true;
+    yield { type: EventType.MessageStart, model: state.model };
+  }
+  const ch = c.choices?.[0];
+  if (!ch) return;
+  if (ch.delta?.reasoning_content) {
+    // Accumulate reasoning; do not forward it on the live stream.
+    state.reasoning += ch.delta.reasoning_content;
+  }
+  if (ch.delta?.content) {
+    yield { type: EventType.ContentDelta, contentDelta: ch.delta.content };
+  }
+  for (const td of ch.delta?.tool_calls ?? []) {
+    const ts = touchToolState(td, state);
+    yield {
+      type: EventType.ToolCallDelta,
+      toolCallDelta: { id: ts.id, name: td.function?.name ?? "", argumentsDelta: td.function?.arguments ?? "" },
+    };
+  }
+  if (ch.finish_reason) state.stopReason = normalizeFinishReason(ch.finish_reason);
+}
+/**
+ * Ensure a ToolState exists for `td.index` (defaulting to 0) and fold any new
+ * id or name from this delta into it. Some OpenAI-compatible backends omit
+ * `tool_call` ids, so a stable id is synthesized from the index to keep the call
+ * from being dropped downstream (collectStream discards id-less tool deltas).
+ */
+function touchToolState(td: ChunkToolCall, state: StreamState): ToolState {
+  const idx = td.index ?? 0;
+  let ts = state.toolByIdx.get(idx);
+  if (!ts) {
+    ts = { id: "", name: "" };
+    state.toolByIdx.set(idx, ts);
+  }
+  if (td.id) ts.id = td.id;
+  if (td.function?.name) ts.name = td.function.name;
+  if (ts.id === "") ts.id = `call_${idx}`;
+  return ts;
+}