@nebulaos/llm-gateway 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.d.ts CHANGED
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
75
75
  */
76
76
  private extractErrorSource;
77
77
  private extractExtraOptions;
78
+ /**
79
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
80
+ *
81
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
82
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
83
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
84
+ * span-id) for compatibility with caches, proxies, and log correlation —
85
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
86
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
87
+ * unaffected.
88
+ *
89
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
90
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
91
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
92
+ * plus the compat `traceparent`.
93
+ */
78
94
  private buildGatewayHeaders;
79
95
  /**
80
96
  * Extracts enrichment data from backend HTTP headers.
package/dist/index.js CHANGED
@@ -184,130 +184,141 @@ var LLMGateway = class {
184
184
  messages,
185
185
  tools
186
186
  };
187
- const llmSpan = await import_core.Tracing.startSpan({
187
+ const llmSpan = import_core.Tracing.startSpan({
188
188
  kind: import_types.SpanType.llm_wrapper,
189
189
  name: `llm:${this.modelName}`,
190
190
  data: startData
191
191
  });
192
- const headers = this.buildGatewayHeaders();
193
- this.logger.debug("LLM Gateway stream request", {
194
- model,
195
- baseUrl: this.baseUrl,
196
- stream: true,
197
- messageCount: messages.length,
198
- toolCount: tools?.length ?? 0
199
- });
200
- let stream;
201
- try {
202
- stream = await this.client.chat.completions.create(
203
- {
204
- model,
205
- messages: this.convertMessages(messages),
206
- tools: this.convertTools(tools),
207
- stream: true,
208
- stream_options: { include_usage: true },
209
- response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
210
- type: "json_schema",
211
- json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
212
- } : { type: "json_object" } : void 0,
213
- ...this.extractExtraOptions(mergedOptions)
214
- },
215
- { headers }
216
- );
217
- } catch (error) {
218
- this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
219
- const gatewayError = this.handleError(error);
220
- if (llmSpan) {
221
- const errorEndData = {
222
- error: {
223
- message: gatewayError.message,
224
- code: gatewayError.code,
225
- status: gatewayError.status
226
- }
227
- };
228
- await llmSpan.end({
229
- status: "error",
230
- data: errorEndData
231
- });
192
+ const queue = [];
193
+ let pendingResolve = null;
194
+ const abortController = new AbortController();
195
+ let consumerAborted = false;
196
+ const push = (item) => {
197
+ if (pendingResolve) {
198
+ const resolve = pendingResolve;
199
+ pendingResolve = null;
200
+ resolve(item);
201
+ } else {
202
+ queue.push(item);
232
203
  }
233
- throw gatewayError;
234
- }
235
- let finalUsage;
236
- let finalFinishReason;
237
- let toolCallsCount = 0;
238
- let outputPreview = "";
239
- let finalContent = "";
240
- const toolCallsAccumulator = /* @__PURE__ */ new Map();
241
- try {
242
- for await (const chunk of stream) {
243
- if (chunk.usage) {
244
- finalUsage = this.mapUsage(chunk.usage);
245
- yield {
246
- type: "finish",
247
- reason: "stop",
248
- usage: finalUsage
249
- };
250
- }
251
- const choice = chunk.choices?.[0];
252
- if (!choice) continue;
253
- if (choice.finish_reason) {
254
- finalFinishReason = this.mapFinishReason(choice.finish_reason);
255
- yield {
256
- type: "finish",
257
- reason: finalFinishReason
258
- };
259
- }
260
- const delta = choice.delta;
261
- if (!delta) continue;
262
- if (delta.content) {
263
- finalContent += delta.content;
264
- if (outputPreview.length < 200) {
265
- outputPreview += delta.content.slice(0, 200 - outputPreview.length);
204
+ };
205
+ const pull = () => {
206
+ if (queue.length > 0) return Promise.resolve(queue.shift());
207
+ return new Promise((resolve) => {
208
+ pendingResolve = resolve;
209
+ });
210
+ };
211
+ const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
212
+ const headers = this.buildGatewayHeaders();
213
+ this.logger.debug("LLM Gateway stream request", {
214
+ model,
215
+ baseUrl: this.baseUrl,
216
+ stream: true,
217
+ messageCount: messages.length,
218
+ toolCount: tools?.length ?? 0
219
+ });
220
+ let stream;
221
+ try {
222
+ stream = await this.client.chat.completions.create(
223
+ {
224
+ model,
225
+ messages: this.convertMessages(messages),
226
+ tools: this.convertTools(tools),
227
+ stream: true,
228
+ stream_options: { include_usage: true },
229
+ response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
230
+ type: "json_schema",
231
+ json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
232
+ } : { type: "json_object" } : void 0,
233
+ ...this.extractExtraOptions(mergedOptions)
234
+ },
235
+ { headers, signal: abortController.signal }
236
+ );
237
+ } catch (error) {
238
+ this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
239
+ throw this.handleError(error);
240
+ }
241
+ let finalUsage;
242
+ let finalFinishReason;
243
+ let toolCallsCount = 0;
244
+ let outputPreview = "";
245
+ let finalContent = "";
246
+ const toolCallsAccumulator = /* @__PURE__ */ new Map();
247
+ try {
248
+ for await (const chunk of stream) {
249
+ if (abortController.signal.aborted) break;
250
+ if (chunk.usage) {
251
+ finalUsage = this.mapUsage(chunk.usage);
252
+ push({
253
+ kind: "chunk",
254
+ value: { type: "finish", reason: "stop", usage: finalUsage }
255
+ });
266
256
  }
267
- yield { type: "content_delta", delta: delta.content };
268
- }
269
- if (delta.tool_calls) {
270
- for (const tc of delta.tool_calls) {
271
- const idx = tc.index;
272
- if (tc.id && tc.function?.name) {
273
- toolCallsCount++;
274
- toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
275
- yield {
276
- type: "tool_call_start",
277
- index: idx,
278
- id: tc.id,
279
- name: tc.function.name
280
- };
257
+ const choice = chunk.choices?.[0];
258
+ if (!choice) continue;
259
+ if (choice.finish_reason) {
260
+ finalFinishReason = this.mapFinishReason(choice.finish_reason);
261
+ push({
262
+ kind: "chunk",
263
+ value: { type: "finish", reason: finalFinishReason }
264
+ });
265
+ }
266
+ const delta = choice.delta;
267
+ if (!delta) continue;
268
+ if (delta.content) {
269
+ finalContent += delta.content;
270
+ if (outputPreview.length < 200) {
271
+ outputPreview += delta.content.slice(0, 200 - outputPreview.length);
281
272
  }
282
- if (tc.function?.arguments) {
283
- const existing = toolCallsAccumulator.get(idx);
284
- if (existing) {
285
- existing.arguments += tc.function.arguments;
273
+ push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
274
+ }
275
+ if (delta.tool_calls) {
276
+ for (const tc of delta.tool_calls) {
277
+ const idx = tc.index;
278
+ if (tc.id && tc.function?.name) {
279
+ toolCallsCount++;
280
+ toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
281
+ push({
282
+ kind: "chunk",
283
+ value: {
284
+ type: "tool_call_start",
285
+ index: idx,
286
+ id: tc.id,
287
+ name: tc.function.name
288
+ }
289
+ });
290
+ }
291
+ if (tc.function?.arguments) {
292
+ const existing = toolCallsAccumulator.get(idx);
293
+ if (existing) {
294
+ existing.arguments += tc.function.arguments;
295
+ }
296
+ push({
297
+ kind: "chunk",
298
+ value: {
299
+ type: "tool_call_delta",
300
+ index: idx,
301
+ args: tc.function.arguments
302
+ }
303
+ });
286
304
  }
287
- yield {
288
- type: "tool_call_delta",
289
- index: idx,
290
- args: tc.function.arguments
291
- };
292
305
  }
293
306
  }
294
307
  }
295
- }
296
- const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
297
- id: tc.id,
298
- type: "function",
299
- function: { name: tc.name, arguments: tc.arguments }
300
- }));
301
- const choices = [{
302
- index: 0,
303
- message: {
304
- role: "assistant",
305
- content: finalContent || null,
306
- tool_calls: toolCalls.length > 0 ? toolCalls : void 0
307
- },
308
- finish_reason: finalFinishReason
309
- }];
310
- if (llmSpan) {
308
+ const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
309
+ id: tc.id,
310
+ type: "function",
311
+ function: { name: tc.name, arguments: tc.arguments }
312
+ }));
313
+ const choices = [{
314
+ index: 0,
315
+ message: {
316
+ role: "assistant",
317
+ content: finalContent || null,
318
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
319
+ },
320
+ finish_reason: finalFinishReason
321
+ }];
311
322
  const endData = {
312
323
  usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
313
324
  finishReason: finalFinishReason ?? "stop",
@@ -315,28 +326,56 @@ var LLMGateway = class {
315
326
  outputPreview,
316
327
  choices: this.sanitizeChoices(choices)
317
328
  };
318
- await llmSpan.end({
319
- status: "success",
320
- data: endData
321
- });
329
+ await llmSpan.end({ status: "success", data: endData });
330
+ } catch (error) {
331
+ this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
332
+ throw this.handleError(error);
322
333
  }
323
- } catch (error) {
324
- this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
325
- const gatewayError = this.handleError(error);
326
- if (llmSpan) {
327
- const errorEndData = {
328
- error: {
329
- message: gatewayError.message,
330
- code: gatewayError.code,
331
- status: gatewayError.status
334
+ }).then(
335
+ () => push({ kind: "done" }),
336
+ (error) => push({ kind: "error", error })
337
+ );
338
+ let completedNormally = false;
339
+ try {
340
+ while (true) {
341
+ const item = await pull();
342
+ if (item.kind === "chunk") {
343
+ yield item.value;
344
+ } else if (item.kind === "done") {
345
+ completedNormally = true;
346
+ return;
347
+ } else {
348
+ completedNormally = true;
349
+ const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
350
+ if (!llmSpan.isEnded) {
351
+ const errorEndData = {
352
+ error: {
353
+ message: gatewayError.message,
354
+ code: gatewayError.code,
355
+ status: gatewayError.status
356
+ }
357
+ };
358
+ await llmSpan.end({ status: "error", data: errorEndData });
332
359
  }
333
- };
334
- await llmSpan.end({
335
- status: "error",
336
- data: errorEndData
337
- });
360
+ throw gatewayError;
361
+ }
338
362
  }
339
- throw gatewayError;
363
+ } finally {
364
+ if (!completedNormally) {
365
+ consumerAborted = true;
366
+ abortController.abort();
367
+ if (!llmSpan.isEnded) {
368
+ try {
369
+ await llmSpan.end({ status: "cancelled" });
370
+ } catch {
371
+ }
372
+ }
373
+ }
374
+ try {
375
+ await producer;
376
+ } catch {
377
+ }
378
+ void consumerAborted;
340
379
  }
341
380
  }
342
381
  // ==========================================================================
@@ -515,24 +554,44 @@ var LLMGateway = class {
515
554
  const { responseFormat, ...rest } = options;
516
555
  return rest;
517
556
  }
557
+ /**
558
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
559
+ *
560
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
561
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
562
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
563
+ * span-id) for compatibility with caches, proxies, and log correlation —
564
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
565
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
566
+ * unaffected.
567
+ *
568
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
569
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
570
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
571
+ * plus the compat `traceparent`.
572
+ */
518
573
  buildGatewayHeaders() {
519
574
  const headers = {
520
- "x-request-id": (0, import_node_crypto.randomUUID)()
575
+ "x-nebula-request-id": (0, import_node_crypto.randomUUID)()
521
576
  };
522
577
  const ctx = import_core.Tracing.getContext();
578
+ const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
579
+ const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
580
+ const traceparent = `00-${traceId}-${spanId}-01`;
581
+ headers["x-nebula-traceparent"] = traceparent;
582
+ headers.traceparent = traceparent;
523
583
  const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
524
584
  if (executionId) {
525
- headers["x-execution-id"] = executionId;
585
+ headers["x-nebula-execution-id"] = executionId;
526
586
  }
527
587
  if (ctx?.resourceName) {
528
- headers["x-resource-name"] = ctx.resourceName;
588
+ headers["x-nebula-resource-name"] = ctx.resourceName;
589
+ }
590
+ if (ctx?.resourceType) {
591
+ headers["x-nebula-resource-type"] = ctx.resourceType;
529
592
  }
530
- if (ctx) {
531
- headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
532
- } else {
533
- const traceId = (0, import_node_crypto.randomBytes)(16).toString("hex");
534
- const spanId = (0, import_node_crypto.randomBytes)(8).toString("hex");
535
- headers.traceparent = `00-${traceId}-${spanId}-01`;
593
+ if (ctx?.workspaceId) {
594
+ headers["x-nebula-workspace-id"] = ctx.workspaceId;
536
595
  }
537
596
  return headers;
538
597
  }
@@ -688,8 +747,11 @@ var LLMGateway = class {
688
747
  convertContentPart(part) {
689
748
  if (part.type === "text") return { type: "text", text: part.text };
690
749
  if (part.type === "file") {
691
- const { data, mediaType } = part;
692
- if (!mediaType.startsWith("image/")) {
750
+ const { data, mediaType, filename } = part;
751
+ const isImage = mediaType.startsWith("image/");
752
+ const isPdf = mediaType === "application/pdf";
753
+ const isText = mediaType.startsWith("text/");
754
+ if (!isImage && !isPdf && !isText) {
693
755
  throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
694
756
  }
695
757
  let url;
@@ -705,7 +767,16 @@ var LLMGateway = class {
705
767
  } else {
706
768
  throw new Error(`LLM Gateway: unsupported file data type`);
707
769
  }
708
- return { type: "image_url", image_url: { url } };
770
+ if (isImage) {
771
+ return { type: "image_url", image_url: { url } };
772
+ }
773
+ return {
774
+ type: "file",
775
+ file: {
776
+ file_data: url,
777
+ filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
778
+ }
779
+ };
709
780
  }
710
781
  throw new Error(`Unsupported content type: ${part.type}`);
711
782
  }