@nebulaos/llm-gateway 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.d.ts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.js CHANGED
@@ -88,7 +88,7 @@ var LLMGateway = class {
88
88
  };
89
89
  return import_core.Tracing.withSpan(
90
90
  {
91
- kind: import_types.SpanType.llm,
91
+ kind: import_types.SpanType.llm_wrapper,
92
92
  name: `llm:${this.modelName}`,
93
93
  data: startData
94
94
  },
@@ -184,130 +184,141 @@ var LLMGateway = class {
184
184
  messages,
185
185
  tools
186
186
  };
187
- const llmSpan = await import_core.Tracing.startSpan({
188
- kind: import_types.SpanType.llm,
187
+ const llmSpan = import_core.Tracing.startSpan({
188
+ kind: import_types.SpanType.llm_wrapper,
189
189
  name: `llm:${this.modelName}`,
190
190
  data: startData
191
191
  });
192
- const headers = this.buildGatewayHeaders();
193
- this.logger.debug("LLM Gateway stream request", {
194
- model,
195
- baseUrl: this.baseUrl,
196
- stream: true,
197
- messageCount: messages.length,
198
- toolCount: tools?.length ?? 0
199
- });
200
- let stream;
201
- try {
202
- stream = await this.client.chat.completions.create(
203
- {
204
- model,
205
- messages: this.convertMessages(messages),
206
- tools: this.convertTools(tools),
207
- stream: true,
208
- stream_options: { include_usage: true },
209
- response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
210
- type: "json_schema",
211
- json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
212
- } : { type: "json_object" } : void 0,
213
- ...this.extractExtraOptions(mergedOptions)
214
- },
215
- { headers }
216
- );
217
- } catch (error) {
218
- this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
219
- const gatewayError = this.handleError(error);
220
- if (llmSpan) {
221
- const errorEndData = {
222
- error: {
223
- message: gatewayError.message,
224
- code: gatewayError.code,
225
- status: gatewayError.status
226
- }
227
- };
228
- await llmSpan.end({
229
- status: "error",
230
- data: errorEndData
231
- });
192
+ const queue = [];
193
+ let pendingResolve = null;
194
+ const abortController = new AbortController();
195
+ let consumerAborted = false;
196
+ const push = (item) => {
197
+ if (pendingResolve) {
198
+ const resolve = pendingResolve;
199
+ pendingResolve = null;
200
+ resolve(item);
201
+ } else {
202
+ queue.push(item);
232
203
  }
233
- throw gatewayError;
234
- }
235
- let finalUsage;
236
- let finalFinishReason;
237
- let toolCallsCount = 0;
238
- let outputPreview = "";
239
- let finalContent = "";
240
- const toolCallsAccumulator = /* @__PURE__ */ new Map();
241
- try {
242
- for await (const chunk of stream) {
243
- if (chunk.usage) {
244
- finalUsage = this.mapUsage(chunk.usage);
245
- yield {
246
- type: "finish",
247
- reason: "stop",
248
- usage: finalUsage
249
- };
250
- }
251
- const choice = chunk.choices?.[0];
252
- if (!choice) continue;
253
- if (choice.finish_reason) {
254
- finalFinishReason = this.mapFinishReason(choice.finish_reason);
255
- yield {
256
- type: "finish",
257
- reason: finalFinishReason
258
- };
259
- }
260
- const delta = choice.delta;
261
- if (!delta) continue;
262
- if (delta.content) {
263
- finalContent += delta.content;
264
- if (outputPreview.length < 200) {
265
- outputPreview += delta.content.slice(0, 200 - outputPreview.length);
204
+ };
205
+ const pull = () => {
206
+ if (queue.length > 0) return Promise.resolve(queue.shift());
207
+ return new Promise((resolve) => {
208
+ pendingResolve = resolve;
209
+ });
210
+ };
211
+ const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
212
+ const headers = this.buildGatewayHeaders();
213
+ this.logger.debug("LLM Gateway stream request", {
214
+ model,
215
+ baseUrl: this.baseUrl,
216
+ stream: true,
217
+ messageCount: messages.length,
218
+ toolCount: tools?.length ?? 0
219
+ });
220
+ let stream;
221
+ try {
222
+ stream = await this.client.chat.completions.create(
223
+ {
224
+ model,
225
+ messages: this.convertMessages(messages),
226
+ tools: this.convertTools(tools),
227
+ stream: true,
228
+ stream_options: { include_usage: true },
229
+ response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
230
+ type: "json_schema",
231
+ json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
232
+ } : { type: "json_object" } : void 0,
233
+ ...this.extractExtraOptions(mergedOptions)
234
+ },
235
+ { headers, signal: abortController.signal }
236
+ );
237
+ } catch (error) {
238
+ this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
239
+ throw this.handleError(error);
240
+ }
241
+ let finalUsage;
242
+ let finalFinishReason;
243
+ let toolCallsCount = 0;
244
+ let outputPreview = "";
245
+ let finalContent = "";
246
+ const toolCallsAccumulator = /* @__PURE__ */ new Map();
247
+ try {
248
+ for await (const chunk of stream) {
249
+ if (abortController.signal.aborted) break;
250
+ if (chunk.usage) {
251
+ finalUsage = this.mapUsage(chunk.usage);
252
+ push({
253
+ kind: "chunk",
254
+ value: { type: "finish", reason: "stop", usage: finalUsage }
255
+ });
266
256
  }
267
- yield { type: "content_delta", delta: delta.content };
268
- }
269
- if (delta.tool_calls) {
270
- for (const tc of delta.tool_calls) {
271
- const idx = tc.index;
272
- if (tc.id && tc.function?.name) {
273
- toolCallsCount++;
274
- toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
275
- yield {
276
- type: "tool_call_start",
277
- index: idx,
278
- id: tc.id,
279
- name: tc.function.name
280
- };
257
+ const choice = chunk.choices?.[0];
258
+ if (!choice) continue;
259
+ if (choice.finish_reason) {
260
+ finalFinishReason = this.mapFinishReason(choice.finish_reason);
261
+ push({
262
+ kind: "chunk",
263
+ value: { type: "finish", reason: finalFinishReason }
264
+ });
265
+ }
266
+ const delta = choice.delta;
267
+ if (!delta) continue;
268
+ if (delta.content) {
269
+ finalContent += delta.content;
270
+ if (outputPreview.length < 200) {
271
+ outputPreview += delta.content.slice(0, 200 - outputPreview.length);
281
272
  }
282
- if (tc.function?.arguments) {
283
- const existing = toolCallsAccumulator.get(idx);
284
- if (existing) {
285
- existing.arguments += tc.function.arguments;
273
+ push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
274
+ }
275
+ if (delta.tool_calls) {
276
+ for (const tc of delta.tool_calls) {
277
+ const idx = tc.index;
278
+ if (tc.id && tc.function?.name) {
279
+ toolCallsCount++;
280
+ toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
281
+ push({
282
+ kind: "chunk",
283
+ value: {
284
+ type: "tool_call_start",
285
+ index: idx,
286
+ id: tc.id,
287
+ name: tc.function.name
288
+ }
289
+ });
290
+ }
291
+ if (tc.function?.arguments) {
292
+ const existing = toolCallsAccumulator.get(idx);
293
+ if (existing) {
294
+ existing.arguments += tc.function.arguments;
295
+ }
296
+ push({
297
+ kind: "chunk",
298
+ value: {
299
+ type: "tool_call_delta",
300
+ index: idx,
301
+ args: tc.function.arguments
302
+ }
303
+ });
286
304
  }
287
- yield {
288
- type: "tool_call_delta",
289
- index: idx,
290
- args: tc.function.arguments
291
- };
292
305
  }
293
306
  }
294
307
  }
295
- }
296
- const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
297
- id: tc.id,
298
- type: "function",
299
- function: { name: tc.name, arguments: tc.arguments }
300
- }));
301
- const choices = [{
302
- index: 0,
303
- message: {
304
- role: "assistant",
305
- content: finalContent || null,
306
- tool_calls: toolCalls.length > 0 ? toolCalls : void 0
307
- },
308
- finish_reason: finalFinishReason
309
- }];
310
- if (llmSpan) {
308
+ const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
309
+ id: tc.id,
310
+ type: "function",
311
+ function: { name: tc.name, arguments: tc.arguments }
312
+ }));
313
+ const choices = [{
314
+ index: 0,
315
+ message: {
316
+ role: "assistant",
317
+ content: finalContent || null,
318
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
319
+ },
320
+ finish_reason: finalFinishReason
321
+ }];
311
322
  const endData = {
312
323
  usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
313
324
  finishReason: finalFinishReason ?? "stop",
@@ -315,28 +326,56 @@ var LLMGateway = class {
315
326
  outputPreview,
316
327
  choices: this.sanitizeChoices(choices)
317
328
  };
318
- await llmSpan.end({
319
- status: "success",
320
- data: endData
321
- });
329
+ await llmSpan.end({ status: "success", data: endData });
330
+ } catch (error) {
331
+ this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
332
+ throw this.handleError(error);
322
333
  }
323
- } catch (error) {
324
- this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
325
- const gatewayError = this.handleError(error);
326
- if (llmSpan) {
327
- const errorEndData = {
328
- error: {
329
- message: gatewayError.message,
330
- code: gatewayError.code,
331
- status: gatewayError.status
334
+ }).then(
335
+ () => push({ kind: "done" }),
336
+ (error) => push({ kind: "error", error })
337
+ );
338
+ let completedNormally = false;
339
+ try {
340
+ while (true) {
341
+ const item = await pull();
342
+ if (item.kind === "chunk") {
343
+ yield item.value;
344
+ } else if (item.kind === "done") {
345
+ completedNormally = true;
346
+ return;
347
+ } else {
348
+ completedNormally = true;
349
+ const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
350
+ if (!llmSpan.isEnded) {
351
+ const errorEndData = {
352
+ error: {
353
+ message: gatewayError.message,
354
+ code: gatewayError.code,
355
+ status: gatewayError.status
356
+ }
357
+ };
358
+ await llmSpan.end({ status: "error", data: errorEndData });
332
359
  }
333
- };
334
- await llmSpan.end({
335
- status: "error",
336
- data: errorEndData
337
- });
360
+ throw gatewayError;
361
+ }
362
+ }
363
+ } finally {
364
+ if (!completedNormally) {
365
+ consumerAborted = true;
366
+ abortController.abort();
367
+ if (!llmSpan.isEnded) {
368
+ try {
369
+ await llmSpan.end({ status: "cancelled" });
370
+ } catch {
371
+ }
372
+ }
373
+ }
374
+ try {
375
+ await producer;
376
+ } catch {
338
377
  }
339
- throw gatewayError;
378
+ void consumerAborted;
340
379
  }
341
380
  }
342
381
  // ==========================================================================
@@ -515,21 +554,44 @@ var LLMGateway = class {
515
554
  const { responseFormat, ...rest } = options;
516
555
  return rest;
517
556
  }
557
+ /**
558
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
559
+ *
560
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
561
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
562
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
563
+ * span-id) for compatibility with caches, proxies, and log correlation —
564
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
565
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
566
+ * unaffected.
567
+ *
568
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
569
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
570
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
571
+ * plus the compat `traceparent`.
572
+ */
518
573
  buildGatewayHeaders() {
519
574
  const headers = {
520
- "x-request-id": (0, import_node_crypto.randomUUID)()
575
+ "x-nebula-request-id": (0, import_node_crypto.randomUUID)()
521
576
  };
522
577
  const ctx = import_core.Tracing.getContext();
578
+ const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
579
+ const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
580
+ const traceparent = `00-${traceId}-${spanId}-01`;
581
+ headers["x-nebula-traceparent"] = traceparent;
582
+ headers.traceparent = traceparent;
523
583
  const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
524
584
  if (executionId) {
525
- headers["x-execution-id"] = executionId;
585
+ headers["x-nebula-execution-id"] = executionId;
526
586
  }
527
- if (ctx) {
528
- headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
529
- } else {
530
- const traceId = (0, import_node_crypto.randomBytes)(16).toString("hex");
531
- const spanId = (0, import_node_crypto.randomBytes)(8).toString("hex");
532
- headers.traceparent = `00-${traceId}-${spanId}-01`;
587
+ if (ctx?.resourceName) {
588
+ headers["x-nebula-resource-name"] = ctx.resourceName;
589
+ }
590
+ if (ctx?.resourceType) {
591
+ headers["x-nebula-resource-type"] = ctx.resourceType;
592
+ }
593
+ if (ctx?.workspaceId) {
594
+ headers["x-nebula-workspace-id"] = ctx.workspaceId;
533
595
  }
534
596
  return headers;
535
597
  }
@@ -685,18 +747,38 @@ var LLMGateway = class {
685
747
  convertContentPart(part) {
686
748
  if (part.type === "text") return { type: "text", text: part.text };
687
749
  if (part.type === "file") {
688
- const { mimeType, source } = part.file;
689
- if (!mimeType.startsWith("image/")) {
690
- throw new Error(`LLM Gateway: file mimeType '${mimeType}' is not supported yet`);
750
+ const { data, mediaType, filename } = part;
751
+ const isImage = mediaType.startsWith("image/");
752
+ const isPdf = mediaType === "application/pdf";
753
+ const isText = mediaType.startsWith("text/");
754
+ if (!isImage && !isPdf && !isText) {
755
+ throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
691
756
  }
692
- const url = source.type === "url" ? source.url : `data:${mimeType};base64,${source.base64}`;
693
- return { type: "image_url", image_url: { url } };
694
- }
695
- if (part.type === "image_url") {
696
- return { type: "image_url", image_url: { url: part.image_url.url } };
757
+ let url;
758
+ if (data instanceof Uint8Array) {
759
+ const base64 = Buffer.from(data).toString("base64");
760
+ url = `data:${mediaType};base64,${base64}`;
761
+ } else if (typeof data === "string") {
762
+ if (data.startsWith("data:") || data.includes("://")) {
763
+ url = data;
764
+ } else {
765
+ url = `data:${mediaType};base64,${data}`;
766
+ }
767
+ } else {
768
+ throw new Error(`LLM Gateway: unsupported file data type`);
769
+ }
770
+ if (isImage) {
771
+ return { type: "image_url", image_url: { url } };
772
+ }
773
+ return {
774
+ type: "file",
775
+ file: {
776
+ file_data: url,
777
+ filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
778
+ }
779
+ };
697
780
  }
698
- const _exhaustive = part;
699
- throw new Error(`Unsupported content type: ${_exhaustive.type}`);
781
+ throw new Error(`Unsupported content type: ${part.type}`);
700
782
  }
701
783
  /**
702
784
  * Sanitize choices for observability storage.