@nebulaos/llm-gateway 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.d.ts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.js CHANGED
@@ -82,9 +82,7 @@ var LLMGateway = class {
82
82
  messagesCount: messages.length,
83
83
  toolsCount: tools?.length ?? 0,
84
84
  llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
85
- responseFormat,
86
- messages,
87
- tools
85
+ responseFormat
88
86
  };
89
87
  return import_core.Tracing.withSpan(
90
88
  {
@@ -180,134 +178,143 @@ var LLMGateway = class {
180
178
  messagesCount: messages.length,
181
179
  toolsCount: tools?.length ?? 0,
182
180
  llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
183
- responseFormat,
184
- messages,
185
- tools
181
+ responseFormat
186
182
  };
187
- const llmSpan = await import_core.Tracing.startSpan({
183
+ const llmSpan = import_core.Tracing.startSpan({
188
184
  kind: import_types.SpanType.llm_wrapper,
189
185
  name: `llm:${this.modelName}`,
190
186
  data: startData
191
187
  });
192
- const headers = this.buildGatewayHeaders();
193
- this.logger.debug("LLM Gateway stream request", {
194
- model,
195
- baseUrl: this.baseUrl,
196
- stream: true,
197
- messageCount: messages.length,
198
- toolCount: tools?.length ?? 0
199
- });
200
- let stream;
201
- try {
202
- stream = await this.client.chat.completions.create(
203
- {
204
- model,
205
- messages: this.convertMessages(messages),
206
- tools: this.convertTools(tools),
207
- stream: true,
208
- stream_options: { include_usage: true },
209
- response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
210
- type: "json_schema",
211
- json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
212
- } : { type: "json_object" } : void 0,
213
- ...this.extractExtraOptions(mergedOptions)
214
- },
215
- { headers }
216
- );
217
- } catch (error) {
218
- this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
219
- const gatewayError = this.handleError(error);
220
- if (llmSpan) {
221
- const errorEndData = {
222
- error: {
223
- message: gatewayError.message,
224
- code: gatewayError.code,
225
- status: gatewayError.status
226
- }
227
- };
228
- await llmSpan.end({
229
- status: "error",
230
- data: errorEndData
231
- });
188
+ const queue = [];
189
+ let pendingResolve = null;
190
+ const abortController = new AbortController();
191
+ let consumerAborted = false;
192
+ const push = (item) => {
193
+ if (pendingResolve) {
194
+ const resolve = pendingResolve;
195
+ pendingResolve = null;
196
+ resolve(item);
197
+ } else {
198
+ queue.push(item);
232
199
  }
233
- throw gatewayError;
234
- }
235
- let finalUsage;
236
- let finalFinishReason;
237
- let toolCallsCount = 0;
238
- let outputPreview = "";
239
- let finalContent = "";
240
- const toolCallsAccumulator = /* @__PURE__ */ new Map();
241
- try {
242
- for await (const chunk of stream) {
243
- if (chunk.usage) {
244
- finalUsage = this.mapUsage(chunk.usage);
245
- yield {
246
- type: "finish",
247
- reason: "stop",
248
- usage: finalUsage
249
- };
250
- }
251
- const choice = chunk.choices?.[0];
252
- if (!choice) continue;
253
- if (choice.finish_reason) {
254
- finalFinishReason = this.mapFinishReason(choice.finish_reason);
255
- yield {
256
- type: "finish",
257
- reason: finalFinishReason
258
- };
259
- }
260
- const delta = choice.delta;
261
- if (!delta) continue;
262
- if (delta.content) {
263
- finalContent += delta.content;
264
- if (outputPreview.length < 200) {
265
- outputPreview += delta.content.slice(0, 200 - outputPreview.length);
200
+ };
201
+ const pull = () => {
202
+ if (queue.length > 0) return Promise.resolve(queue.shift());
203
+ return new Promise((resolve) => {
204
+ pendingResolve = resolve;
205
+ });
206
+ };
207
+ const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
208
+ const headers = this.buildGatewayHeaders();
209
+ this.logger.debug("LLM Gateway stream request", {
210
+ model,
211
+ baseUrl: this.baseUrl,
212
+ stream: true,
213
+ messageCount: messages.length,
214
+ toolCount: tools?.length ?? 0
215
+ });
216
+ let stream;
217
+ try {
218
+ stream = await this.client.chat.completions.create(
219
+ {
220
+ model,
221
+ messages: this.convertMessages(messages),
222
+ tools: this.convertTools(tools),
223
+ stream: true,
224
+ stream_options: { include_usage: true },
225
+ response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
226
+ type: "json_schema",
227
+ json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
228
+ } : { type: "json_object" } : void 0,
229
+ ...this.extractExtraOptions(mergedOptions)
230
+ },
231
+ { headers, signal: abortController.signal }
232
+ );
233
+ } catch (error) {
234
+ this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
235
+ throw this.handleError(error);
236
+ }
237
+ let finalUsage;
238
+ let finalFinishReason;
239
+ let toolCallsCount = 0;
240
+ let outputPreview = "";
241
+ let finalContent = "";
242
+ const toolCallsAccumulator = /* @__PURE__ */ new Map();
243
+ try {
244
+ for await (const chunk of stream) {
245
+ if (abortController.signal.aborted) break;
246
+ if (chunk.usage) {
247
+ finalUsage = this.mapUsage(chunk.usage);
248
+ push({
249
+ kind: "chunk",
250
+ value: { type: "finish", reason: "stop", usage: finalUsage }
251
+ });
266
252
  }
267
- yield { type: "content_delta", delta: delta.content };
268
- }
269
- if (delta.tool_calls) {
270
- for (const tc of delta.tool_calls) {
271
- const idx = tc.index;
272
- if (tc.id && tc.function?.name) {
273
- toolCallsCount++;
274
- toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
275
- yield {
276
- type: "tool_call_start",
277
- index: idx,
278
- id: tc.id,
279
- name: tc.function.name
280
- };
253
+ const choice = chunk.choices?.[0];
254
+ if (!choice) continue;
255
+ if (choice.finish_reason) {
256
+ finalFinishReason = this.mapFinishReason(choice.finish_reason);
257
+ push({
258
+ kind: "chunk",
259
+ value: { type: "finish", reason: finalFinishReason }
260
+ });
261
+ }
262
+ const delta = choice.delta;
263
+ if (!delta) continue;
264
+ if (delta.content) {
265
+ finalContent += delta.content;
266
+ if (outputPreview.length < 200) {
267
+ outputPreview += delta.content.slice(0, 200 - outputPreview.length);
281
268
  }
282
- if (tc.function?.arguments) {
283
- const existing = toolCallsAccumulator.get(idx);
284
- if (existing) {
285
- existing.arguments += tc.function.arguments;
269
+ push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
270
+ }
271
+ if (delta.tool_calls) {
272
+ for (const tc of delta.tool_calls) {
273
+ const idx = tc.index;
274
+ if (tc.id && tc.function?.name) {
275
+ toolCallsCount++;
276
+ toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
277
+ push({
278
+ kind: "chunk",
279
+ value: {
280
+ type: "tool_call_start",
281
+ index: idx,
282
+ id: tc.id,
283
+ name: tc.function.name
284
+ }
285
+ });
286
+ }
287
+ if (tc.function?.arguments) {
288
+ const existing = toolCallsAccumulator.get(idx);
289
+ if (existing) {
290
+ existing.arguments += tc.function.arguments;
291
+ }
292
+ push({
293
+ kind: "chunk",
294
+ value: {
295
+ type: "tool_call_delta",
296
+ index: idx,
297
+ args: tc.function.arguments
298
+ }
299
+ });
286
300
  }
287
- yield {
288
- type: "tool_call_delta",
289
- index: idx,
290
- args: tc.function.arguments
291
- };
292
301
  }
293
302
  }
294
303
  }
295
- }
296
- const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
297
- id: tc.id,
298
- type: "function",
299
- function: { name: tc.name, arguments: tc.arguments }
300
- }));
301
- const choices = [{
302
- index: 0,
303
- message: {
304
- role: "assistant",
305
- content: finalContent || null,
306
- tool_calls: toolCalls.length > 0 ? toolCalls : void 0
307
- },
308
- finish_reason: finalFinishReason
309
- }];
310
- if (llmSpan) {
304
+ const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
305
+ id: tc.id,
306
+ type: "function",
307
+ function: { name: tc.name, arguments: tc.arguments }
308
+ }));
309
+ const choices = [{
310
+ index: 0,
311
+ message: {
312
+ role: "assistant",
313
+ content: finalContent || null,
314
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
315
+ },
316
+ finish_reason: finalFinishReason
317
+ }];
311
318
  const endData = {
312
319
  usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
313
320
  finishReason: finalFinishReason ?? "stop",
@@ -315,28 +322,56 @@ var LLMGateway = class {
315
322
  outputPreview,
316
323
  choices: this.sanitizeChoices(choices)
317
324
  };
318
- await llmSpan.end({
319
- status: "success",
320
- data: endData
321
- });
325
+ await llmSpan.end({ status: "success", data: endData });
326
+ } catch (error) {
327
+ this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
328
+ throw this.handleError(error);
322
329
  }
323
- } catch (error) {
324
- this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
325
- const gatewayError = this.handleError(error);
326
- if (llmSpan) {
327
- const errorEndData = {
328
- error: {
329
- message: gatewayError.message,
330
- code: gatewayError.code,
331
- status: gatewayError.status
330
+ }).then(
331
+ () => push({ kind: "done" }),
332
+ (error) => push({ kind: "error", error })
333
+ );
334
+ let completedNormally = false;
335
+ try {
336
+ while (true) {
337
+ const item = await pull();
338
+ if (item.kind === "chunk") {
339
+ yield item.value;
340
+ } else if (item.kind === "done") {
341
+ completedNormally = true;
342
+ return;
343
+ } else {
344
+ completedNormally = true;
345
+ const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
346
+ if (!llmSpan.isEnded) {
347
+ const errorEndData = {
348
+ error: {
349
+ message: gatewayError.message,
350
+ code: gatewayError.code,
351
+ status: gatewayError.status
352
+ }
353
+ };
354
+ await llmSpan.end({ status: "error", data: errorEndData });
332
355
  }
333
- };
334
- await llmSpan.end({
335
- status: "error",
336
- data: errorEndData
337
- });
356
+ throw gatewayError;
357
+ }
338
358
  }
339
- throw gatewayError;
359
+ } finally {
360
+ if (!completedNormally) {
361
+ consumerAborted = true;
362
+ abortController.abort();
363
+ if (!llmSpan.isEnded) {
364
+ try {
365
+ await llmSpan.end({ status: "cancelled" });
366
+ } catch {
367
+ }
368
+ }
369
+ }
370
+ try {
371
+ await producer;
372
+ } catch {
373
+ }
374
+ void consumerAborted;
340
375
  }
341
376
  }
342
377
  // ==========================================================================
@@ -515,24 +550,44 @@ var LLMGateway = class {
515
550
  const { responseFormat, ...rest } = options;
516
551
  return rest;
517
552
  }
553
+ /**
554
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
555
+ *
556
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
557
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
558
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
559
+ * span-id) for compatibility with caches, proxies, and log correlation —
560
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
561
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
562
+ * unaffected.
563
+ *
564
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
565
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
566
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
567
+ * plus the compat `traceparent`.
568
+ */
518
569
  buildGatewayHeaders() {
519
570
  const headers = {
520
- "x-request-id": (0, import_node_crypto.randomUUID)()
571
+ "x-nebula-request-id": (0, import_node_crypto.randomUUID)()
521
572
  };
522
573
  const ctx = import_core.Tracing.getContext();
574
+ const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
575
+ const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
576
+ const traceparent = `00-${traceId}-${spanId}-01`;
577
+ headers["x-nebula-traceparent"] = traceparent;
578
+ headers.traceparent = traceparent;
523
579
  const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
524
580
  if (executionId) {
525
- headers["x-execution-id"] = executionId;
581
+ headers["x-nebula-execution-id"] = executionId;
526
582
  }
527
583
  if (ctx?.resourceName) {
528
- headers["x-resource-name"] = ctx.resourceName;
584
+ headers["x-nebula-resource-name"] = ctx.resourceName;
585
+ }
586
+ if (ctx?.resourceType) {
587
+ headers["x-nebula-resource-type"] = ctx.resourceType;
529
588
  }
530
- if (ctx) {
531
- headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
532
- } else {
533
- const traceId = (0, import_node_crypto.randomBytes)(16).toString("hex");
534
- const spanId = (0, import_node_crypto.randomBytes)(8).toString("hex");
535
- headers.traceparent = `00-${traceId}-${spanId}-01`;
589
+ if (ctx?.workspaceId) {
590
+ headers["x-nebula-workspace-id"] = ctx.workspaceId;
536
591
  }
537
592
  return headers;
538
593
  }
@@ -688,8 +743,11 @@ var LLMGateway = class {
688
743
  convertContentPart(part) {
689
744
  if (part.type === "text") return { type: "text", text: part.text };
690
745
  if (part.type === "file") {
691
- const { data, mediaType } = part;
692
- if (!mediaType.startsWith("image/")) {
746
+ const { data, mediaType, filename } = part;
747
+ const isImage = mediaType.startsWith("image/");
748
+ const isPdf = mediaType === "application/pdf";
749
+ const isText = mediaType.startsWith("text/");
750
+ if (!isImage && !isPdf && !isText) {
693
751
  throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
694
752
  }
695
753
  let url;
@@ -705,7 +763,16 @@ var LLMGateway = class {
705
763
  } else {
706
764
  throw new Error(`LLM Gateway: unsupported file data type`);
707
765
  }
708
- return { type: "image_url", image_url: { url } };
766
+ if (isImage) {
767
+ return { type: "image_url", image_url: { url } };
768
+ }
769
+ return {
770
+ type: "file",
771
+ file: {
772
+ file_data: url,
773
+ filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
774
+ }
775
+ };
709
776
  }
710
777
  throw new Error(`Unsupported content type: ${part.type}`);
711
778
  }