@nebulaos/llm-gateway 0.1.9 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -51,9 +51,7 @@ var LLMGateway = class {
51
51
  messagesCount: messages.length,
52
52
  toolsCount: tools?.length ?? 0,
53
53
  llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
54
- responseFormat,
55
- messages,
56
- tools
54
+ responseFormat
57
55
  };
58
56
  return Tracing.withSpan(
59
57
  {
@@ -149,134 +147,143 @@ var LLMGateway = class {
149
147
  messagesCount: messages.length,
150
148
  toolsCount: tools?.length ?? 0,
151
149
  llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
152
- responseFormat,
153
- messages,
154
- tools
150
+ responseFormat
155
151
  };
156
- const llmSpan = await Tracing.startSpan({
152
+ const llmSpan = Tracing.startSpan({
157
153
  kind: SpanType.llm_wrapper,
158
154
  name: `llm:${this.modelName}`,
159
155
  data: startData
160
156
  });
161
- const headers = this.buildGatewayHeaders();
162
- this.logger.debug("LLM Gateway stream request", {
163
- model,
164
- baseUrl: this.baseUrl,
165
- stream: true,
166
- messageCount: messages.length,
167
- toolCount: tools?.length ?? 0
168
- });
169
- let stream;
170
- try {
171
- stream = await this.client.chat.completions.create(
172
- {
173
- model,
174
- messages: this.convertMessages(messages),
175
- tools: this.convertTools(tools),
176
- stream: true,
177
- stream_options: { include_usage: true },
178
- response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
179
- type: "json_schema",
180
- json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
181
- } : { type: "json_object" } : void 0,
182
- ...this.extractExtraOptions(mergedOptions)
183
- },
184
- { headers }
185
- );
186
- } catch (error) {
187
- this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
188
- const gatewayError = this.handleError(error);
189
- if (llmSpan) {
190
- const errorEndData = {
191
- error: {
192
- message: gatewayError.message,
193
- code: gatewayError.code,
194
- status: gatewayError.status
195
- }
196
- };
197
- await llmSpan.end({
198
- status: "error",
199
- data: errorEndData
200
- });
157
+ const queue = [];
158
+ let pendingResolve = null;
159
+ const abortController = new AbortController();
160
+ let consumerAborted = false;
161
+ const push = (item) => {
162
+ if (pendingResolve) {
163
+ const resolve = pendingResolve;
164
+ pendingResolve = null;
165
+ resolve(item);
166
+ } else {
167
+ queue.push(item);
201
168
  }
202
- throw gatewayError;
203
- }
204
- let finalUsage;
205
- let finalFinishReason;
206
- let toolCallsCount = 0;
207
- let outputPreview = "";
208
- let finalContent = "";
209
- const toolCallsAccumulator = /* @__PURE__ */ new Map();
210
- try {
211
- for await (const chunk of stream) {
212
- if (chunk.usage) {
213
- finalUsage = this.mapUsage(chunk.usage);
214
- yield {
215
- type: "finish",
216
- reason: "stop",
217
- usage: finalUsage
218
- };
219
- }
220
- const choice = chunk.choices?.[0];
221
- if (!choice) continue;
222
- if (choice.finish_reason) {
223
- finalFinishReason = this.mapFinishReason(choice.finish_reason);
224
- yield {
225
- type: "finish",
226
- reason: finalFinishReason
227
- };
228
- }
229
- const delta = choice.delta;
230
- if (!delta) continue;
231
- if (delta.content) {
232
- finalContent += delta.content;
233
- if (outputPreview.length < 200) {
234
- outputPreview += delta.content.slice(0, 200 - outputPreview.length);
169
+ };
170
+ const pull = () => {
171
+ if (queue.length > 0) return Promise.resolve(queue.shift());
172
+ return new Promise((resolve) => {
173
+ pendingResolve = resolve;
174
+ });
175
+ };
176
+ const producer = Tracing.runWithSpan(llmSpan, async () => {
177
+ const headers = this.buildGatewayHeaders();
178
+ this.logger.debug("LLM Gateway stream request", {
179
+ model,
180
+ baseUrl: this.baseUrl,
181
+ stream: true,
182
+ messageCount: messages.length,
183
+ toolCount: tools?.length ?? 0
184
+ });
185
+ let stream;
186
+ try {
187
+ stream = await this.client.chat.completions.create(
188
+ {
189
+ model,
190
+ messages: this.convertMessages(messages),
191
+ tools: this.convertTools(tools),
192
+ stream: true,
193
+ stream_options: { include_usage: true },
194
+ response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
195
+ type: "json_schema",
196
+ json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
197
+ } : { type: "json_object" } : void 0,
198
+ ...this.extractExtraOptions(mergedOptions)
199
+ },
200
+ { headers, signal: abortController.signal }
201
+ );
202
+ } catch (error) {
203
+ this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
204
+ throw this.handleError(error);
205
+ }
206
+ let finalUsage;
207
+ let finalFinishReason;
208
+ let toolCallsCount = 0;
209
+ let outputPreview = "";
210
+ let finalContent = "";
211
+ const toolCallsAccumulator = /* @__PURE__ */ new Map();
212
+ try {
213
+ for await (const chunk of stream) {
214
+ if (abortController.signal.aborted) break;
215
+ if (chunk.usage) {
216
+ finalUsage = this.mapUsage(chunk.usage);
217
+ push({
218
+ kind: "chunk",
219
+ value: { type: "finish", reason: "stop", usage: finalUsage }
220
+ });
235
221
  }
236
- yield { type: "content_delta", delta: delta.content };
237
- }
238
- if (delta.tool_calls) {
239
- for (const tc of delta.tool_calls) {
240
- const idx = tc.index;
241
- if (tc.id && tc.function?.name) {
242
- toolCallsCount++;
243
- toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
244
- yield {
245
- type: "tool_call_start",
246
- index: idx,
247
- id: tc.id,
248
- name: tc.function.name
249
- };
222
+ const choice = chunk.choices?.[0];
223
+ if (!choice) continue;
224
+ if (choice.finish_reason) {
225
+ finalFinishReason = this.mapFinishReason(choice.finish_reason);
226
+ push({
227
+ kind: "chunk",
228
+ value: { type: "finish", reason: finalFinishReason }
229
+ });
230
+ }
231
+ const delta = choice.delta;
232
+ if (!delta) continue;
233
+ if (delta.content) {
234
+ finalContent += delta.content;
235
+ if (outputPreview.length < 200) {
236
+ outputPreview += delta.content.slice(0, 200 - outputPreview.length);
250
237
  }
251
- if (tc.function?.arguments) {
252
- const existing = toolCallsAccumulator.get(idx);
253
- if (existing) {
254
- existing.arguments += tc.function.arguments;
238
+ push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
239
+ }
240
+ if (delta.tool_calls) {
241
+ for (const tc of delta.tool_calls) {
242
+ const idx = tc.index;
243
+ if (tc.id && tc.function?.name) {
244
+ toolCallsCount++;
245
+ toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
246
+ push({
247
+ kind: "chunk",
248
+ value: {
249
+ type: "tool_call_start",
250
+ index: idx,
251
+ id: tc.id,
252
+ name: tc.function.name
253
+ }
254
+ });
255
+ }
256
+ if (tc.function?.arguments) {
257
+ const existing = toolCallsAccumulator.get(idx);
258
+ if (existing) {
259
+ existing.arguments += tc.function.arguments;
260
+ }
261
+ push({
262
+ kind: "chunk",
263
+ value: {
264
+ type: "tool_call_delta",
265
+ index: idx,
266
+ args: tc.function.arguments
267
+ }
268
+ });
255
269
  }
256
- yield {
257
- type: "tool_call_delta",
258
- index: idx,
259
- args: tc.function.arguments
260
- };
261
270
  }
262
271
  }
263
272
  }
264
- }
265
- const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
266
- id: tc.id,
267
- type: "function",
268
- function: { name: tc.name, arguments: tc.arguments }
269
- }));
270
- const choices = [{
271
- index: 0,
272
- message: {
273
- role: "assistant",
274
- content: finalContent || null,
275
- tool_calls: toolCalls.length > 0 ? toolCalls : void 0
276
- },
277
- finish_reason: finalFinishReason
278
- }];
279
- if (llmSpan) {
273
+ const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
274
+ id: tc.id,
275
+ type: "function",
276
+ function: { name: tc.name, arguments: tc.arguments }
277
+ }));
278
+ const choices = [{
279
+ index: 0,
280
+ message: {
281
+ role: "assistant",
282
+ content: finalContent || null,
283
+ tool_calls: toolCalls.length > 0 ? toolCalls : void 0
284
+ },
285
+ finish_reason: finalFinishReason
286
+ }];
280
287
  const endData = {
281
288
  usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
282
289
  finishReason: finalFinishReason ?? "stop",
@@ -284,28 +291,56 @@ var LLMGateway = class {
284
291
  outputPreview,
285
292
  choices: this.sanitizeChoices(choices)
286
293
  };
287
- await llmSpan.end({
288
- status: "success",
289
- data: endData
290
- });
294
+ await llmSpan.end({ status: "success", data: endData });
295
+ } catch (error) {
296
+ this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
297
+ throw this.handleError(error);
291
298
  }
292
- } catch (error) {
293
- this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
294
- const gatewayError = this.handleError(error);
295
- if (llmSpan) {
296
- const errorEndData = {
297
- error: {
298
- message: gatewayError.message,
299
- code: gatewayError.code,
300
- status: gatewayError.status
299
+ }).then(
300
+ () => push({ kind: "done" }),
301
+ (error) => push({ kind: "error", error })
302
+ );
303
+ let completedNormally = false;
304
+ try {
305
+ while (true) {
306
+ const item = await pull();
307
+ if (item.kind === "chunk") {
308
+ yield item.value;
309
+ } else if (item.kind === "done") {
310
+ completedNormally = true;
311
+ return;
312
+ } else {
313
+ completedNormally = true;
314
+ const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
315
+ if (!llmSpan.isEnded) {
316
+ const errorEndData = {
317
+ error: {
318
+ message: gatewayError.message,
319
+ code: gatewayError.code,
320
+ status: gatewayError.status
321
+ }
322
+ };
323
+ await llmSpan.end({ status: "error", data: errorEndData });
301
324
  }
302
- };
303
- await llmSpan.end({
304
- status: "error",
305
- data: errorEndData
306
- });
325
+ throw gatewayError;
326
+ }
307
327
  }
308
- throw gatewayError;
328
+ } finally {
329
+ if (!completedNormally) {
330
+ consumerAborted = true;
331
+ abortController.abort();
332
+ if (!llmSpan.isEnded) {
333
+ try {
334
+ await llmSpan.end({ status: "cancelled" });
335
+ } catch {
336
+ }
337
+ }
338
+ }
339
+ try {
340
+ await producer;
341
+ } catch {
342
+ }
343
+ void consumerAborted;
309
344
  }
310
345
  }
311
346
  // ==========================================================================
@@ -484,24 +519,44 @@ var LLMGateway = class {
484
519
  const { responseFormat, ...rest } = options;
485
520
  return rest;
486
521
  }
522
+ /**
523
+ * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
524
+ *
525
+ * Under ADR-0002, correlation with the NebulaOS backend is carried on
526
+ * domain-scoped `x-nebula-*` headers that APMs of the host process do not
527
+ * touch. The standard W3C `traceparent` is still emitted (same trace-id /
528
+ * span-id) for compatibility with caches, proxies, and log correlation —
529
+ * but the backend treats `x-nebula-traceparent` as the authoritative source.
530
+ * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
531
+ * unaffected.
532
+ *
533
+ * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
534
+ * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
535
+ * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
536
+ * plus the compat `traceparent`.
537
+ */
487
538
  buildGatewayHeaders() {
488
539
  const headers = {
489
- "x-request-id": randomUUID()
540
+ "x-nebula-request-id": randomUUID()
490
541
  };
491
542
  const ctx = Tracing.getContext();
543
+ const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
544
+ const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
545
+ const traceparent = `00-${traceId}-${spanId}-01`;
546
+ headers["x-nebula-traceparent"] = traceparent;
547
+ headers.traceparent = traceparent;
492
548
  const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
493
549
  if (executionId) {
494
- headers["x-execution-id"] = executionId;
550
+ headers["x-nebula-execution-id"] = executionId;
495
551
  }
496
552
  if (ctx?.resourceName) {
497
- headers["x-resource-name"] = ctx.resourceName;
553
+ headers["x-nebula-resource-name"] = ctx.resourceName;
554
+ }
555
+ if (ctx?.resourceType) {
556
+ headers["x-nebula-resource-type"] = ctx.resourceType;
498
557
  }
499
- if (ctx) {
500
- headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
501
- } else {
502
- const traceId = randomBytes(16).toString("hex");
503
- const spanId = randomBytes(8).toString("hex");
504
- headers.traceparent = `00-${traceId}-${spanId}-01`;
558
+ if (ctx?.workspaceId) {
559
+ headers["x-nebula-workspace-id"] = ctx.workspaceId;
505
560
  }
506
561
  return headers;
507
562
  }
@@ -657,8 +712,11 @@ var LLMGateway = class {
657
712
  convertContentPart(part) {
658
713
  if (part.type === "text") return { type: "text", text: part.text };
659
714
  if (part.type === "file") {
660
- const { data, mediaType } = part;
661
- if (!mediaType.startsWith("image/")) {
715
+ const { data, mediaType, filename } = part;
716
+ const isImage = mediaType.startsWith("image/");
717
+ const isPdf = mediaType === "application/pdf";
718
+ const isText = mediaType.startsWith("text/");
719
+ if (!isImage && !isPdf && !isText) {
662
720
  throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
663
721
  }
664
722
  let url;
@@ -674,7 +732,16 @@ var LLMGateway = class {
674
732
  } else {
675
733
  throw new Error(`LLM Gateway: unsupported file data type`);
676
734
  }
677
- return { type: "image_url", image_url: { url } };
735
+ if (isImage) {
736
+ return { type: "image_url", image_url: { url } };
737
+ }
738
+ return {
739
+ type: "file",
740
+ file: {
741
+ file_data: url,
742
+ filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
743
+ }
744
+ };
678
745
  }
679
746
  throw new Error(`Unsupported content type: ${part.type}`);
680
747
  }