@nebulaos/llm-gateway 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -57,7 +57,7 @@ var LLMGateway = class {
57
57
  };
58
58
  return Tracing.withSpan(
59
59
  {
60
- kind: SpanType.llm,
60
+ kind: SpanType.llm_wrapper,
61
61
  name: `llm:${this.modelName}`,
62
62
  data: startData
63
63
  },
@@ -153,130 +153,141 @@ var LLMGateway = class {
153
153
  messages,
154
154
  tools
155
155
  };
156
- const llmSpan = await Tracing.startSpan({
157
- kind: SpanType.llm,
156
+ const llmSpan = Tracing.startSpan({
157
+ kind: SpanType.llm_wrapper,
158
158
  name: `llm:${this.modelName}`,
159
159
  data: startData
160
160
  });
161
- const headers = this.buildGatewayHeaders();
162
- this.logger.debug("LLM Gateway stream request", {
163
- model,
164
- baseUrl: this.baseUrl,
165
- stream: true,
166
- messageCount: messages.length,
167
- toolCount: tools?.length ?? 0
168
- });
169
- let stream;
170
- try {
171
- stream = await this.client.chat.completions.create(
172
- {
173
- model,
174
- messages: this.convertMessages(messages),
175
- tools: this.convertTools(tools),
176
- stream: true,
177
- stream_options: { include_usage: true },
178
- response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
179
- type: "json_schema",
180
- json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
181
- } : { type: "json_object" } : void 0,
182
- ...this.extractExtraOptions(mergedOptions)
183
- },
184
- { headers }
185
- );
186
- } catch (error) {
187
- this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
188
- const gatewayError = this.handleError(error);
189
- if (llmSpan) {
190
- const errorEndData = {
191
- error: {
192
- message: gatewayError.message,
193
- code: gatewayError.code,
194
- status: gatewayError.status
195
- }
196
- };
197
- await llmSpan.end({
198
- status: "error",
199
- data: errorEndData
200
- });
161
+ const queue = [];
162
+ let pendingResolve = null;
163
+ const abortController = new AbortController();
164
+ let consumerAborted = false;
165
+ const push = (item) => {
166
+ if (pendingResolve) {
167
+ const resolve = pendingResolve;
168
+ pendingResolve = null;
169
+ resolve(item);
170
+ } else {
171
+ queue.push(item);
201
172
  }
202
- throw gatewayError;
203
- }
204
- let finalUsage;
205
- let finalFinishReason;
206
- let toolCallsCount = 0;
207
- let outputPreview = "";
208
- let finalContent = "";
209
- const toolCallsAccumulator = /* @__PURE__ */ new Map();
210
- try {
211
- for await (const chunk of stream) {
212
- if (chunk.usage) {
213
- finalUsage = this.mapUsage(chunk.usage);
214
- yield {
215
- type: "finish",
216
- reason: "stop",
217
- usage: finalUsage
218
- };
219
- }
220
- const choice = chunk.choices?.[0];
221
- if (!choice) continue;
222
- if (choice.finish_reason) {
223
- finalFinishReason = this.mapFinishReason(choice.finish_reason);
224
- yield {
225
- type: "finish",
226
- reason: finalFinishReason
227
- };
228
- }
229
- const delta = choice.delta;
230
- if (!delta) continue;
231
- if (delta.content) {
232
- finalContent += delta.content;
233
- if (outputPreview.length < 200) {
234
- outputPreview += delta.content.slice(0, 200 - outputPreview.length);
173
+ };
174
+ const pull = () => {
175
+ if (queue.length > 0) return Promise.resolve(queue.shift());
176
+ return new Promise((resolve) => {
177
+ pendingResolve = resolve;
178
+ });
179
+ };
180
+ const producer = Tracing.runWithSpan(llmSpan, async () => {
181
+ const headers = this.buildGatewayHeaders();
182
+ this.logger.debug("LLM Gateway stream request", {
183
+ model,
184
+ baseUrl: this.baseUrl,
185
+ stream: true,
186
+ messageCount: messages.length,
187
+ toolCount: tools?.length ?? 0
188
+ });
189
+ let stream;
190
+ try {
191
+ stream = await this.client.chat.completions.create(
192
+ {
193
+ model,
194
+ messages: this.convertMessages(messages),
195
+ tools: this.convertTools(tools),
196
+ stream: true,
197
+ stream_options: { include_usage: true },
198
+ response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
199
+ type: "json_schema",
200
+ json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
201
+ } : { type: "json_object" } : void 0,
202
+ ...this.extractExtraOptions(mergedOptions)
203
+ },
204
+ { headers, signal: abortController.signal }
205
+ );
206
+ } catch (error) {
207
+ this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
208
+ throw this.handleError(error);
209
+ }
210
+ let finalUsage;
211
+ let finalFinishReason;
212
+ let toolCallsCount = 0;
213
+ let outputPreview = "";
214
+ let finalContent = "";
215
+ const toolCallsAccumulator = /* @__PURE__ */ new Map();
216
+ try {
217
+ for await (const chunk of stream) {
218
+ if (abortController.signal.aborted) break;
219
+ if (chunk.usage) {
220
+ finalUsage = this.mapUsage(chunk.usage);
221
+ push({
222
+ kind: "chunk",
223
+ value: { type: "finish", reason: "stop", usage: finalUsage }
224
+ });
235
225
  }
236
- yield { type: "content_delta", delta: delta.content };
237
- }
238
- if (delta.tool_calls) {
239
- for (const tc of delta.tool_calls) {
240
- const idx = tc.index;
241
- if (tc.id && tc.function?.name) {
242
- toolCallsCount++;
243
- toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
244
- yield {
245
- type: "tool_call_start",
246
- index: idx,
247
- id: tc.id,
248
- name: tc.function.name
249
- };
226
+ const choice = chunk.choices?.[0];
227
+ if (!choice) continue;
228
+ if (choice.finish_reason) {
229
+ finalFinishReason = this.mapFinishReason(choice.finish_reason);
230
+ push({
231
+ kind: "chunk",
232
+ value: { type: "finish", reason: finalFinishReason }
233
+ });
234
+ }
235
+ const delta = choice.delta;
236
+ if (!delta) continue;
237
+ if (delta.content) {
238
+ finalContent += delta.content;
239
+ if (outputPreview.length < 200) {
240
+ outputPreview += delta.content.slice(0, 200 - outputPreview.length);
250
241
  }
251
- if (tc.function?.arguments) {
252
- const existing = toolCallsAccumulator.get(idx);
253
- if (existing) {
254
- existing.arguments += tc.function.arguments;
242
+ push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
243
+ }
244
+ if (delta.tool_calls) {
245
+ for (const tc of delta.tool_calls) {
246
+ const idx = tc.index;
247
+ if (tc.id && tc.function?.name) {
248
+ toolCallsCount++;
249
+ toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
250
+ push({
251
+ kind: "chunk",
252
+ value: {
253
+ type: "tool_call_start",
254
+ index: idx,
255
+ id: tc.id,
256
+ name: tc.function.name
257
+ }
258
+ });
259
+ }
260
+ if (tc.function?.arguments) {
261
+ const existing = toolCallsAccumulator.get(idx);
262
+ if (existing) {
263
+ existing.arguments += tc.function.arguments;
264
+ }
265
+ push({
266
+ kind: "chunk",
267
+ value: {
268
+ type: "tool_call_delta",
269
+ index: idx,
270
+ args: tc.function.arguments
271
+ }
272
+ });
255
273
  }
256
- yield {
257
- type: "tool_call_delta",
258
- index: idx,
259
- args: tc.function.arguments
260
- };
261
274
  }
262
275
  }
263
276
  }
264
- }
265
- const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
266
- id: tc.id,
267
- type: "function",
268
- function: { name: tc.name, arguments: tc.arguments }
269
- }));
270
- const choices = [{
271
- index: 0,
272
- message: {
273
- role: "assistant",
274
- content: finalContent || null,
275
- tool_calls: toolCalls.length > 0 ? toolCalls : void 0
276
- },
277
- finish_reason: finalFinishReason
278
- }];
279
- if (llmSpan) {
277
+ const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
278
+ id: tc.id,
279
+ type: "function",
280
+ function: { name: tc.name, arguments: tc.arguments }
281
+ }));
282
+ const choices = [{
283
+ index: 0,
284
+ message: {
285
+ role: "assistant",
286
+ content: finalContent || null,
287
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
288
+ },
289
+ finish_reason: finalFinishReason
290
+ }];
280
291
  const endData = {
281
292
  usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
282
293
  finishReason: finalFinishReason ?? "stop",
@@ -284,28 +295,56 @@ var LLMGateway = class {
284
295
  outputPreview,
285
296
  choices: this.sanitizeChoices(choices)
286
297
  };
287
- await llmSpan.end({
288
- status: "success",
289
- data: endData
290
- });
298
+ await llmSpan.end({ status: "success", data: endData });
299
+ } catch (error) {
300
+ this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
301
+ throw this.handleError(error);
291
302
  }
292
- } catch (error) {
293
- this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
294
- const gatewayError = this.handleError(error);
295
- if (llmSpan) {
296
- const errorEndData = {
297
- error: {
298
- message: gatewayError.message,
299
- code: gatewayError.code,
300
- status: gatewayError.status
303
+ }).then(
304
+ () => push({ kind: "done" }),
305
+ (error) => push({ kind: "error", error })
306
+ );
307
+ let completedNormally = false;
308
+ try {
309
+ while (true) {
310
+ const item = await pull();
311
+ if (item.kind === "chunk") {
312
+ yield item.value;
313
+ } else if (item.kind === "done") {
314
+ completedNormally = true;
315
+ return;
316
+ } else {
317
+ completedNormally = true;
318
+ const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
319
+ if (!llmSpan.isEnded) {
320
+ const errorEndData = {
321
+ error: {
322
+ message: gatewayError.message,
323
+ code: gatewayError.code,
324
+ status: gatewayError.status
325
+ }
326
+ };
327
+ await llmSpan.end({ status: "error", data: errorEndData });
301
328
  }
302
- };
303
- await llmSpan.end({
304
- status: "error",
305
- data: errorEndData
306
- });
329
+ throw gatewayError;
330
+ }
331
+ }
332
+ } finally {
333
+ if (!completedNormally) {
334
+ consumerAborted = true;
335
+ abortController.abort();
336
+ if (!llmSpan.isEnded) {
337
+ try {
338
+ await llmSpan.end({ status: "cancelled" });
339
+ } catch {
340
+ }
341
+ }
342
+ }
343
+ try {
344
+ await producer;
345
+ } catch {
307
346
  }
308
- throw gatewayError;
347
+ void consumerAborted;
309
348
  }
310
349
  }
311
350
  // ==========================================================================
@@ -484,21 +523,44 @@ var LLMGateway = class {
484
523
  const { responseFormat, ...rest } = options;
485
524
  return rest;
486
525
  }
526
+ /**
527
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
528
+ *
529
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
530
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
531
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
532
+ * span-id) for compatibility with caches, proxies, and log correlation —
533
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
534
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
535
+ * unaffected.
536
+ *
537
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
538
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
539
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
540
+ * plus the compat `traceparent`.
541
+ */
487
542
  buildGatewayHeaders() {
488
543
  const headers = {
489
- "x-request-id": randomUUID()
544
+ "x-nebula-request-id": randomUUID()
490
545
  };
491
546
  const ctx = Tracing.getContext();
547
+ const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
548
+ const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
549
+ const traceparent = `00-${traceId}-${spanId}-01`;
550
+ headers["x-nebula-traceparent"] = traceparent;
551
+ headers.traceparent = traceparent;
492
552
  const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
493
553
  if (executionId) {
494
- headers["x-execution-id"] = executionId;
554
+ headers["x-nebula-execution-id"] = executionId;
495
555
  }
496
- if (ctx) {
497
- headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
498
- } else {
499
- const traceId = randomBytes(16).toString("hex");
500
- const spanId = randomBytes(8).toString("hex");
501
- headers.traceparent = `00-${traceId}-${spanId}-01`;
556
+ if (ctx?.resourceName) {
557
+ headers["x-nebula-resource-name"] = ctx.resourceName;
558
+ }
559
+ if (ctx?.resourceType) {
560
+ headers["x-nebula-resource-type"] = ctx.resourceType;
561
+ }
562
+ if (ctx?.workspaceId) {
563
+ headers["x-nebula-workspace-id"] = ctx.workspaceId;
502
564
  }
503
565
  return headers;
504
566
  }
@@ -654,18 +716,38 @@ var LLMGateway = class {
654
716
  convertContentPart(part) {
655
717
  if (part.type === "text") return { type: "text", text: part.text };
656
718
  if (part.type === "file") {
657
- const { mimeType, source } = part.file;
658
- if (!mimeType.startsWith("image/")) {
659
- throw new Error(`LLM Gateway: file mimeType '${mimeType}' is not supported yet`);
719
+ const { data, mediaType, filename } = part;
720
+ const isImage = mediaType.startsWith("image/");
721
+ const isPdf = mediaType === "application/pdf";
722
+ const isText = mediaType.startsWith("text/");
723
+ if (!isImage && !isPdf && !isText) {
724
+ throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
660
725
  }
661
- const url = source.type === "url" ? source.url : `data:${mimeType};base64,${source.base64}`;
662
- return { type: "image_url", image_url: { url } };
663
- }
664
- if (part.type === "image_url") {
665
- return { type: "image_url", image_url: { url: part.image_url.url } };
726
+ let url;
727
+ if (data instanceof Uint8Array) {
728
+ const base64 = Buffer.from(data).toString("base64");
729
+ url = `data:${mediaType};base64,${base64}`;
730
+ } else if (typeof data === "string") {
731
+ if (data.startsWith("data:") || data.includes("://")) {
732
+ url = data;
733
+ } else {
734
+ url = `data:${mediaType};base64,${data}`;
735
+ }
736
+ } else {
737
+ throw new Error(`LLM Gateway: unsupported file data type`);
738
+ }
739
+ if (isImage) {
740
+ return { type: "image_url", image_url: { url } };
741
+ }
742
+ return {
743
+ type: "file",
744
+ file: {
745
+ file_data: url,
746
+ filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
747
+ }
748
+ };
666
749
  }
667
- const _exhaustive = part;
668
- throw new Error(`Unsupported content type: ${_exhaustive.type}`);
750
+ throw new Error(`Unsupported content type: ${part.type}`);
669
751
  }
670
752
  /**
671
753
  * Sanitize choices for observability storage.