@nebulaos/llm-gateway 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +234 -152
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +234 -152
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/dist/index.d.mts
CHANGED
|
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
|
|
|
75
75
|
*/
|
|
76
76
|
private extractErrorSource;
|
|
77
77
|
private extractExtraOptions;
|
|
78
|
+
/**
|
|
79
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
80
|
+
*
|
|
81
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
82
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
83
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
84
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
85
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
86
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
87
|
+
* unaffected.
|
|
88
|
+
*
|
|
89
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
90
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
91
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
92
|
+
* plus the compat `traceparent`.
|
|
93
|
+
*/
|
|
78
94
|
private buildGatewayHeaders;
|
|
79
95
|
/**
|
|
80
96
|
* Extracts enrichment data from backend HTTP headers.
|
package/dist/index.d.ts
CHANGED
|
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
|
|
|
75
75
|
*/
|
|
76
76
|
private extractErrorSource;
|
|
77
77
|
private extractExtraOptions;
|
|
78
|
+
/**
|
|
79
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
80
|
+
*
|
|
81
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
82
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
83
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
84
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
85
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
86
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
87
|
+
* unaffected.
|
|
88
|
+
*
|
|
89
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
90
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
91
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
92
|
+
* plus the compat `traceparent`.
|
|
93
|
+
*/
|
|
78
94
|
private buildGatewayHeaders;
|
|
79
95
|
/**
|
|
80
96
|
* Extracts enrichment data from backend HTTP headers.
|
package/dist/index.js
CHANGED
|
@@ -88,7 +88,7 @@ var LLMGateway = class {
|
|
|
88
88
|
};
|
|
89
89
|
return import_core.Tracing.withSpan(
|
|
90
90
|
{
|
|
91
|
-
kind: import_types.SpanType.
|
|
91
|
+
kind: import_types.SpanType.llm_wrapper,
|
|
92
92
|
name: `llm:${this.modelName}`,
|
|
93
93
|
data: startData
|
|
94
94
|
},
|
|
@@ -184,130 +184,141 @@ var LLMGateway = class {
|
|
|
184
184
|
messages,
|
|
185
185
|
tools
|
|
186
186
|
};
|
|
187
|
-
const llmSpan =
|
|
188
|
-
kind: import_types.SpanType.
|
|
187
|
+
const llmSpan = import_core.Tracing.startSpan({
|
|
188
|
+
kind: import_types.SpanType.llm_wrapper,
|
|
189
189
|
name: `llm:${this.modelName}`,
|
|
190
190
|
data: startData
|
|
191
191
|
});
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
{
|
|
204
|
-
model,
|
|
205
|
-
messages: this.convertMessages(messages),
|
|
206
|
-
tools: this.convertTools(tools),
|
|
207
|
-
stream: true,
|
|
208
|
-
stream_options: { include_usage: true },
|
|
209
|
-
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
210
|
-
type: "json_schema",
|
|
211
|
-
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
212
|
-
} : { type: "json_object" } : void 0,
|
|
213
|
-
...this.extractExtraOptions(mergedOptions)
|
|
214
|
-
},
|
|
215
|
-
{ headers }
|
|
216
|
-
);
|
|
217
|
-
} catch (error) {
|
|
218
|
-
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
219
|
-
const gatewayError = this.handleError(error);
|
|
220
|
-
if (llmSpan) {
|
|
221
|
-
const errorEndData = {
|
|
222
|
-
error: {
|
|
223
|
-
message: gatewayError.message,
|
|
224
|
-
code: gatewayError.code,
|
|
225
|
-
status: gatewayError.status
|
|
226
|
-
}
|
|
227
|
-
};
|
|
228
|
-
await llmSpan.end({
|
|
229
|
-
status: "error",
|
|
230
|
-
data: errorEndData
|
|
231
|
-
});
|
|
192
|
+
const queue = [];
|
|
193
|
+
let pendingResolve = null;
|
|
194
|
+
const abortController = new AbortController();
|
|
195
|
+
let consumerAborted = false;
|
|
196
|
+
const push = (item) => {
|
|
197
|
+
if (pendingResolve) {
|
|
198
|
+
const resolve = pendingResolve;
|
|
199
|
+
pendingResolve = null;
|
|
200
|
+
resolve(item);
|
|
201
|
+
} else {
|
|
202
|
+
queue.push(item);
|
|
232
203
|
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
204
|
+
};
|
|
205
|
+
const pull = () => {
|
|
206
|
+
if (queue.length > 0) return Promise.resolve(queue.shift());
|
|
207
|
+
return new Promise((resolve) => {
|
|
208
|
+
pendingResolve = resolve;
|
|
209
|
+
});
|
|
210
|
+
};
|
|
211
|
+
const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
|
|
212
|
+
const headers = this.buildGatewayHeaders();
|
|
213
|
+
this.logger.debug("LLM Gateway stream request", {
|
|
214
|
+
model,
|
|
215
|
+
baseUrl: this.baseUrl,
|
|
216
|
+
stream: true,
|
|
217
|
+
messageCount: messages.length,
|
|
218
|
+
toolCount: tools?.length ?? 0
|
|
219
|
+
});
|
|
220
|
+
let stream;
|
|
221
|
+
try {
|
|
222
|
+
stream = await this.client.chat.completions.create(
|
|
223
|
+
{
|
|
224
|
+
model,
|
|
225
|
+
messages: this.convertMessages(messages),
|
|
226
|
+
tools: this.convertTools(tools),
|
|
227
|
+
stream: true,
|
|
228
|
+
stream_options: { include_usage: true },
|
|
229
|
+
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
230
|
+
type: "json_schema",
|
|
231
|
+
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
232
|
+
} : { type: "json_object" } : void 0,
|
|
233
|
+
...this.extractExtraOptions(mergedOptions)
|
|
234
|
+
},
|
|
235
|
+
{ headers, signal: abortController.signal }
|
|
236
|
+
);
|
|
237
|
+
} catch (error) {
|
|
238
|
+
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
239
|
+
throw this.handleError(error);
|
|
240
|
+
}
|
|
241
|
+
let finalUsage;
|
|
242
|
+
let finalFinishReason;
|
|
243
|
+
let toolCallsCount = 0;
|
|
244
|
+
let outputPreview = "";
|
|
245
|
+
let finalContent = "";
|
|
246
|
+
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
247
|
+
try {
|
|
248
|
+
for await (const chunk of stream) {
|
|
249
|
+
if (abortController.signal.aborted) break;
|
|
250
|
+
if (chunk.usage) {
|
|
251
|
+
finalUsage = this.mapUsage(chunk.usage);
|
|
252
|
+
push({
|
|
253
|
+
kind: "chunk",
|
|
254
|
+
value: { type: "finish", reason: "stop", usage: finalUsage }
|
|
255
|
+
});
|
|
266
256
|
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
257
|
+
const choice = chunk.choices?.[0];
|
|
258
|
+
if (!choice) continue;
|
|
259
|
+
if (choice.finish_reason) {
|
|
260
|
+
finalFinishReason = this.mapFinishReason(choice.finish_reason);
|
|
261
|
+
push({
|
|
262
|
+
kind: "chunk",
|
|
263
|
+
value: { type: "finish", reason: finalFinishReason }
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
const delta = choice.delta;
|
|
267
|
+
if (!delta) continue;
|
|
268
|
+
if (delta.content) {
|
|
269
|
+
finalContent += delta.content;
|
|
270
|
+
if (outputPreview.length < 200) {
|
|
271
|
+
outputPreview += delta.content.slice(0, 200 - outputPreview.length);
|
|
281
272
|
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
273
|
+
push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
|
|
274
|
+
}
|
|
275
|
+
if (delta.tool_calls) {
|
|
276
|
+
for (const tc of delta.tool_calls) {
|
|
277
|
+
const idx = tc.index;
|
|
278
|
+
if (tc.id && tc.function?.name) {
|
|
279
|
+
toolCallsCount++;
|
|
280
|
+
toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
|
|
281
|
+
push({
|
|
282
|
+
kind: "chunk",
|
|
283
|
+
value: {
|
|
284
|
+
type: "tool_call_start",
|
|
285
|
+
index: idx,
|
|
286
|
+
id: tc.id,
|
|
287
|
+
name: tc.function.name
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
if (tc.function?.arguments) {
|
|
292
|
+
const existing = toolCallsAccumulator.get(idx);
|
|
293
|
+
if (existing) {
|
|
294
|
+
existing.arguments += tc.function.arguments;
|
|
295
|
+
}
|
|
296
|
+
push({
|
|
297
|
+
kind: "chunk",
|
|
298
|
+
value: {
|
|
299
|
+
type: "tool_call_delta",
|
|
300
|
+
index: idx,
|
|
301
|
+
args: tc.function.arguments
|
|
302
|
+
}
|
|
303
|
+
});
|
|
286
304
|
}
|
|
287
|
-
yield {
|
|
288
|
-
type: "tool_call_delta",
|
|
289
|
-
index: idx,
|
|
290
|
-
args: tc.function.arguments
|
|
291
|
-
};
|
|
292
305
|
}
|
|
293
306
|
}
|
|
294
307
|
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
}];
|
|
310
|
-
if (llmSpan) {
|
|
308
|
+
const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
|
|
309
|
+
id: tc.id,
|
|
310
|
+
type: "function",
|
|
311
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
312
|
+
}));
|
|
313
|
+
const choices = [{
|
|
314
|
+
index: 0,
|
|
315
|
+
message: {
|
|
316
|
+
role: "assistant",
|
|
317
|
+
content: finalContent || null,
|
|
318
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : void 0
|
|
319
|
+
},
|
|
320
|
+
finish_reason: finalFinishReason
|
|
321
|
+
}];
|
|
311
322
|
const endData = {
|
|
312
323
|
usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
313
324
|
finishReason: finalFinishReason ?? "stop",
|
|
@@ -315,28 +326,56 @@ var LLMGateway = class {
|
|
|
315
326
|
outputPreview,
|
|
316
327
|
choices: this.sanitizeChoices(choices)
|
|
317
328
|
};
|
|
318
|
-
await llmSpan.end({
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
329
|
+
await llmSpan.end({ status: "success", data: endData });
|
|
330
|
+
} catch (error) {
|
|
331
|
+
this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
|
|
332
|
+
throw this.handleError(error);
|
|
322
333
|
}
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
334
|
+
}).then(
|
|
335
|
+
() => push({ kind: "done" }),
|
|
336
|
+
(error) => push({ kind: "error", error })
|
|
337
|
+
);
|
|
338
|
+
let completedNormally = false;
|
|
339
|
+
try {
|
|
340
|
+
while (true) {
|
|
341
|
+
const item = await pull();
|
|
342
|
+
if (item.kind === "chunk") {
|
|
343
|
+
yield item.value;
|
|
344
|
+
} else if (item.kind === "done") {
|
|
345
|
+
completedNormally = true;
|
|
346
|
+
return;
|
|
347
|
+
} else {
|
|
348
|
+
completedNormally = true;
|
|
349
|
+
const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
|
|
350
|
+
if (!llmSpan.isEnded) {
|
|
351
|
+
const errorEndData = {
|
|
352
|
+
error: {
|
|
353
|
+
message: gatewayError.message,
|
|
354
|
+
code: gatewayError.code,
|
|
355
|
+
status: gatewayError.status
|
|
356
|
+
}
|
|
357
|
+
};
|
|
358
|
+
await llmSpan.end({ status: "error", data: errorEndData });
|
|
332
359
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
360
|
+
throw gatewayError;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
} finally {
|
|
364
|
+
if (!completedNormally) {
|
|
365
|
+
consumerAborted = true;
|
|
366
|
+
abortController.abort();
|
|
367
|
+
if (!llmSpan.isEnded) {
|
|
368
|
+
try {
|
|
369
|
+
await llmSpan.end({ status: "cancelled" });
|
|
370
|
+
} catch {
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
try {
|
|
375
|
+
await producer;
|
|
376
|
+
} catch {
|
|
338
377
|
}
|
|
339
|
-
|
|
378
|
+
void consumerAborted;
|
|
340
379
|
}
|
|
341
380
|
}
|
|
342
381
|
// ==========================================================================
|
|
@@ -515,21 +554,44 @@ var LLMGateway = class {
|
|
|
515
554
|
const { responseFormat, ...rest } = options;
|
|
516
555
|
return rest;
|
|
517
556
|
}
|
|
557
|
+
/**
|
|
558
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
559
|
+
*
|
|
560
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
561
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
562
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
563
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
564
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
565
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
566
|
+
* unaffected.
|
|
567
|
+
*
|
|
568
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
569
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
570
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
571
|
+
* plus the compat `traceparent`.
|
|
572
|
+
*/
|
|
518
573
|
buildGatewayHeaders() {
|
|
519
574
|
const headers = {
|
|
520
|
-
"x-request-id": (0, import_node_crypto.randomUUID)()
|
|
575
|
+
"x-nebula-request-id": (0, import_node_crypto.randomUUID)()
|
|
521
576
|
};
|
|
522
577
|
const ctx = import_core.Tracing.getContext();
|
|
578
|
+
const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
|
|
579
|
+
const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
|
|
580
|
+
const traceparent = `00-${traceId}-${spanId}-01`;
|
|
581
|
+
headers["x-nebula-traceparent"] = traceparent;
|
|
582
|
+
headers.traceparent = traceparent;
|
|
523
583
|
const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
|
|
524
584
|
if (executionId) {
|
|
525
|
-
headers["x-execution-id"] = executionId;
|
|
585
|
+
headers["x-nebula-execution-id"] = executionId;
|
|
526
586
|
}
|
|
527
|
-
if (ctx) {
|
|
528
|
-
headers
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
587
|
+
if (ctx?.resourceName) {
|
|
588
|
+
headers["x-nebula-resource-name"] = ctx.resourceName;
|
|
589
|
+
}
|
|
590
|
+
if (ctx?.resourceType) {
|
|
591
|
+
headers["x-nebula-resource-type"] = ctx.resourceType;
|
|
592
|
+
}
|
|
593
|
+
if (ctx?.workspaceId) {
|
|
594
|
+
headers["x-nebula-workspace-id"] = ctx.workspaceId;
|
|
533
595
|
}
|
|
534
596
|
return headers;
|
|
535
597
|
}
|
|
@@ -685,18 +747,38 @@ var LLMGateway = class {
|
|
|
685
747
|
convertContentPart(part) {
|
|
686
748
|
if (part.type === "text") return { type: "text", text: part.text };
|
|
687
749
|
if (part.type === "file") {
|
|
688
|
-
const {
|
|
689
|
-
|
|
690
|
-
|
|
750
|
+
const { data, mediaType, filename } = part;
|
|
751
|
+
const isImage = mediaType.startsWith("image/");
|
|
752
|
+
const isPdf = mediaType === "application/pdf";
|
|
753
|
+
const isText = mediaType.startsWith("text/");
|
|
754
|
+
if (!isImage && !isPdf && !isText) {
|
|
755
|
+
throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
|
|
691
756
|
}
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
757
|
+
let url;
|
|
758
|
+
if (data instanceof Uint8Array) {
|
|
759
|
+
const base64 = Buffer.from(data).toString("base64");
|
|
760
|
+
url = `data:${mediaType};base64,${base64}`;
|
|
761
|
+
} else if (typeof data === "string") {
|
|
762
|
+
if (data.startsWith("data:") || data.includes("://")) {
|
|
763
|
+
url = data;
|
|
764
|
+
} else {
|
|
765
|
+
url = `data:${mediaType};base64,${data}`;
|
|
766
|
+
}
|
|
767
|
+
} else {
|
|
768
|
+
throw new Error(`LLM Gateway: unsupported file data type`);
|
|
769
|
+
}
|
|
770
|
+
if (isImage) {
|
|
771
|
+
return { type: "image_url", image_url: { url } };
|
|
772
|
+
}
|
|
773
|
+
return {
|
|
774
|
+
type: "file",
|
|
775
|
+
file: {
|
|
776
|
+
file_data: url,
|
|
777
|
+
filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
|
|
778
|
+
}
|
|
779
|
+
};
|
|
697
780
|
}
|
|
698
|
-
|
|
699
|
-
throw new Error(`Unsupported content type: ${_exhaustive.type}`);
|
|
781
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
700
782
|
}
|
|
701
783
|
/**
|
|
702
784
|
* Sanitize choices for observability storage.
|