@nebulaos/llm-gateway 0.1.9 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +217 -150
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +217 -150
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/dist/index.d.mts
CHANGED
|
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
|
|
|
75
75
|
*/
|
|
76
76
|
private extractErrorSource;
|
|
77
77
|
private extractExtraOptions;
|
|
78
|
+
/**
|
|
79
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
80
|
+
*
|
|
81
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
82
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
83
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
84
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
85
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
86
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
87
|
+
* unaffected.
|
|
88
|
+
*
|
|
89
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
90
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
91
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
92
|
+
* plus the compat `traceparent`.
|
|
93
|
+
*/
|
|
78
94
|
private buildGatewayHeaders;
|
|
79
95
|
/**
|
|
80
96
|
* Extracts enrichment data from backend HTTP headers.
|
package/dist/index.d.ts
CHANGED
|
@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
|
|
|
75
75
|
*/
|
|
76
76
|
private extractErrorSource;
|
|
77
77
|
private extractExtraOptions;
|
|
78
|
+
/**
|
|
79
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
80
|
+
*
|
|
81
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
82
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
83
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
84
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
85
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
86
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
87
|
+
* unaffected.
|
|
88
|
+
*
|
|
89
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
90
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
91
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
92
|
+
* plus the compat `traceparent`.
|
|
93
|
+
*/
|
|
78
94
|
private buildGatewayHeaders;
|
|
79
95
|
/**
|
|
80
96
|
* Extracts enrichment data from backend HTTP headers.
|
package/dist/index.js
CHANGED
|
@@ -82,9 +82,7 @@ var LLMGateway = class {
|
|
|
82
82
|
messagesCount: messages.length,
|
|
83
83
|
toolsCount: tools?.length ?? 0,
|
|
84
84
|
llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
|
|
85
|
-
responseFormat
|
|
86
|
-
messages,
|
|
87
|
-
tools
|
|
85
|
+
responseFormat
|
|
88
86
|
};
|
|
89
87
|
return import_core.Tracing.withSpan(
|
|
90
88
|
{
|
|
@@ -180,134 +178,143 @@ var LLMGateway = class {
|
|
|
180
178
|
messagesCount: messages.length,
|
|
181
179
|
toolsCount: tools?.length ?? 0,
|
|
182
180
|
llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
|
|
183
|
-
responseFormat
|
|
184
|
-
messages,
|
|
185
|
-
tools
|
|
181
|
+
responseFormat
|
|
186
182
|
};
|
|
187
|
-
const llmSpan =
|
|
183
|
+
const llmSpan = import_core.Tracing.startSpan({
|
|
188
184
|
kind: import_types.SpanType.llm_wrapper,
|
|
189
185
|
name: `llm:${this.modelName}`,
|
|
190
186
|
data: startData
|
|
191
187
|
});
|
|
192
|
-
const
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
{
|
|
204
|
-
model,
|
|
205
|
-
messages: this.convertMessages(messages),
|
|
206
|
-
tools: this.convertTools(tools),
|
|
207
|
-
stream: true,
|
|
208
|
-
stream_options: { include_usage: true },
|
|
209
|
-
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
210
|
-
type: "json_schema",
|
|
211
|
-
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
212
|
-
} : { type: "json_object" } : void 0,
|
|
213
|
-
...this.extractExtraOptions(mergedOptions)
|
|
214
|
-
},
|
|
215
|
-
{ headers }
|
|
216
|
-
);
|
|
217
|
-
} catch (error) {
|
|
218
|
-
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
219
|
-
const gatewayError = this.handleError(error);
|
|
220
|
-
if (llmSpan) {
|
|
221
|
-
const errorEndData = {
|
|
222
|
-
error: {
|
|
223
|
-
message: gatewayError.message,
|
|
224
|
-
code: gatewayError.code,
|
|
225
|
-
status: gatewayError.status
|
|
226
|
-
}
|
|
227
|
-
};
|
|
228
|
-
await llmSpan.end({
|
|
229
|
-
status: "error",
|
|
230
|
-
data: errorEndData
|
|
231
|
-
});
|
|
188
|
+
const queue = [];
|
|
189
|
+
let pendingResolve = null;
|
|
190
|
+
const abortController = new AbortController();
|
|
191
|
+
let consumerAborted = false;
|
|
192
|
+
const push = (item) => {
|
|
193
|
+
if (pendingResolve) {
|
|
194
|
+
const resolve = pendingResolve;
|
|
195
|
+
pendingResolve = null;
|
|
196
|
+
resolve(item);
|
|
197
|
+
} else {
|
|
198
|
+
queue.push(item);
|
|
232
199
|
}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
200
|
+
};
|
|
201
|
+
const pull = () => {
|
|
202
|
+
if (queue.length > 0) return Promise.resolve(queue.shift());
|
|
203
|
+
return new Promise((resolve) => {
|
|
204
|
+
pendingResolve = resolve;
|
|
205
|
+
});
|
|
206
|
+
};
|
|
207
|
+
const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
|
|
208
|
+
const headers = this.buildGatewayHeaders();
|
|
209
|
+
this.logger.debug("LLM Gateway stream request", {
|
|
210
|
+
model,
|
|
211
|
+
baseUrl: this.baseUrl,
|
|
212
|
+
stream: true,
|
|
213
|
+
messageCount: messages.length,
|
|
214
|
+
toolCount: tools?.length ?? 0
|
|
215
|
+
});
|
|
216
|
+
let stream;
|
|
217
|
+
try {
|
|
218
|
+
stream = await this.client.chat.completions.create(
|
|
219
|
+
{
|
|
220
|
+
model,
|
|
221
|
+
messages: this.convertMessages(messages),
|
|
222
|
+
tools: this.convertTools(tools),
|
|
223
|
+
stream: true,
|
|
224
|
+
stream_options: { include_usage: true },
|
|
225
|
+
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
226
|
+
type: "json_schema",
|
|
227
|
+
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
228
|
+
} : { type: "json_object" } : void 0,
|
|
229
|
+
...this.extractExtraOptions(mergedOptions)
|
|
230
|
+
},
|
|
231
|
+
{ headers, signal: abortController.signal }
|
|
232
|
+
);
|
|
233
|
+
} catch (error) {
|
|
234
|
+
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
235
|
+
throw this.handleError(error);
|
|
236
|
+
}
|
|
237
|
+
let finalUsage;
|
|
238
|
+
let finalFinishReason;
|
|
239
|
+
let toolCallsCount = 0;
|
|
240
|
+
let outputPreview = "";
|
|
241
|
+
let finalContent = "";
|
|
242
|
+
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
243
|
+
try {
|
|
244
|
+
for await (const chunk of stream) {
|
|
245
|
+
if (abortController.signal.aborted) break;
|
|
246
|
+
if (chunk.usage) {
|
|
247
|
+
finalUsage = this.mapUsage(chunk.usage);
|
|
248
|
+
push({
|
|
249
|
+
kind: "chunk",
|
|
250
|
+
value: { type: "finish", reason: "stop", usage: finalUsage }
|
|
251
|
+
});
|
|
266
252
|
}
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
253
|
+
const choice = chunk.choices?.[0];
|
|
254
|
+
if (!choice) continue;
|
|
255
|
+
if (choice.finish_reason) {
|
|
256
|
+
finalFinishReason = this.mapFinishReason(choice.finish_reason);
|
|
257
|
+
push({
|
|
258
|
+
kind: "chunk",
|
|
259
|
+
value: { type: "finish", reason: finalFinishReason }
|
|
260
|
+
});
|
|
261
|
+
}
|
|
262
|
+
const delta = choice.delta;
|
|
263
|
+
if (!delta) continue;
|
|
264
|
+
if (delta.content) {
|
|
265
|
+
finalContent += delta.content;
|
|
266
|
+
if (outputPreview.length < 200) {
|
|
267
|
+
outputPreview += delta.content.slice(0, 200 - outputPreview.length);
|
|
281
268
|
}
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
269
|
+
push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
|
|
270
|
+
}
|
|
271
|
+
if (delta.tool_calls) {
|
|
272
|
+
for (const tc of delta.tool_calls) {
|
|
273
|
+
const idx = tc.index;
|
|
274
|
+
if (tc.id && tc.function?.name) {
|
|
275
|
+
toolCallsCount++;
|
|
276
|
+
toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
|
|
277
|
+
push({
|
|
278
|
+
kind: "chunk",
|
|
279
|
+
value: {
|
|
280
|
+
type: "tool_call_start",
|
|
281
|
+
index: idx,
|
|
282
|
+
id: tc.id,
|
|
283
|
+
name: tc.function.name
|
|
284
|
+
}
|
|
285
|
+
});
|
|
286
|
+
}
|
|
287
|
+
if (tc.function?.arguments) {
|
|
288
|
+
const existing = toolCallsAccumulator.get(idx);
|
|
289
|
+
if (existing) {
|
|
290
|
+
existing.arguments += tc.function.arguments;
|
|
291
|
+
}
|
|
292
|
+
push({
|
|
293
|
+
kind: "chunk",
|
|
294
|
+
value: {
|
|
295
|
+
type: "tool_call_delta",
|
|
296
|
+
index: idx,
|
|
297
|
+
args: tc.function.arguments
|
|
298
|
+
}
|
|
299
|
+
});
|
|
286
300
|
}
|
|
287
|
-
yield {
|
|
288
|
-
type: "tool_call_delta",
|
|
289
|
-
index: idx,
|
|
290
|
-
args: tc.function.arguments
|
|
291
|
-
};
|
|
292
301
|
}
|
|
293
302
|
}
|
|
294
303
|
}
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
}];
|
|
310
|
-
if (llmSpan) {
|
|
304
|
+
const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
|
|
305
|
+
id: tc.id,
|
|
306
|
+
type: "function",
|
|
307
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
308
|
+
}));
|
|
309
|
+
const choices = [{
|
|
310
|
+
index: 0,
|
|
311
|
+
message: {
|
|
312
|
+
role: "assistant",
|
|
313
|
+
content: finalContent || null,
|
|
314
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : void 0
|
|
315
|
+
},
|
|
316
|
+
finish_reason: finalFinishReason
|
|
317
|
+
}];
|
|
311
318
|
const endData = {
|
|
312
319
|
usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
313
320
|
finishReason: finalFinishReason ?? "stop",
|
|
@@ -315,28 +322,56 @@ var LLMGateway = class {
|
|
|
315
322
|
outputPreview,
|
|
316
323
|
choices: this.sanitizeChoices(choices)
|
|
317
324
|
};
|
|
318
|
-
await llmSpan.end({
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
325
|
+
await llmSpan.end({ status: "success", data: endData });
|
|
326
|
+
} catch (error) {
|
|
327
|
+
this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
|
|
328
|
+
throw this.handleError(error);
|
|
322
329
|
}
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
330
|
+
}).then(
|
|
331
|
+
() => push({ kind: "done" }),
|
|
332
|
+
(error) => push({ kind: "error", error })
|
|
333
|
+
);
|
|
334
|
+
let completedNormally = false;
|
|
335
|
+
try {
|
|
336
|
+
while (true) {
|
|
337
|
+
const item = await pull();
|
|
338
|
+
if (item.kind === "chunk") {
|
|
339
|
+
yield item.value;
|
|
340
|
+
} else if (item.kind === "done") {
|
|
341
|
+
completedNormally = true;
|
|
342
|
+
return;
|
|
343
|
+
} else {
|
|
344
|
+
completedNormally = true;
|
|
345
|
+
const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
|
|
346
|
+
if (!llmSpan.isEnded) {
|
|
347
|
+
const errorEndData = {
|
|
348
|
+
error: {
|
|
349
|
+
message: gatewayError.message,
|
|
350
|
+
code: gatewayError.code,
|
|
351
|
+
status: gatewayError.status
|
|
352
|
+
}
|
|
353
|
+
};
|
|
354
|
+
await llmSpan.end({ status: "error", data: errorEndData });
|
|
332
355
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
status: "error",
|
|
336
|
-
data: errorEndData
|
|
337
|
-
});
|
|
356
|
+
throw gatewayError;
|
|
357
|
+
}
|
|
338
358
|
}
|
|
339
|
-
|
|
359
|
+
} finally {
|
|
360
|
+
if (!completedNormally) {
|
|
361
|
+
consumerAborted = true;
|
|
362
|
+
abortController.abort();
|
|
363
|
+
if (!llmSpan.isEnded) {
|
|
364
|
+
try {
|
|
365
|
+
await llmSpan.end({ status: "cancelled" });
|
|
366
|
+
} catch {
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
try {
|
|
371
|
+
await producer;
|
|
372
|
+
} catch {
|
|
373
|
+
}
|
|
374
|
+
void consumerAborted;
|
|
340
375
|
}
|
|
341
376
|
}
|
|
342
377
|
// ==========================================================================
|
|
@@ -515,24 +550,44 @@ var LLMGateway = class {
|
|
|
515
550
|
const { responseFormat, ...rest } = options;
|
|
516
551
|
return rest;
|
|
517
552
|
}
|
|
553
|
+
/**
|
|
554
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
555
|
+
*
|
|
556
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
557
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
558
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
559
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
560
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
561
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
562
|
+
* unaffected.
|
|
563
|
+
*
|
|
564
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
565
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
566
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
567
|
+
* plus the compat `traceparent`.
|
|
568
|
+
*/
|
|
518
569
|
buildGatewayHeaders() {
|
|
519
570
|
const headers = {
|
|
520
|
-
"x-request-id": (0, import_node_crypto.randomUUID)()
|
|
571
|
+
"x-nebula-request-id": (0, import_node_crypto.randomUUID)()
|
|
521
572
|
};
|
|
522
573
|
const ctx = import_core.Tracing.getContext();
|
|
574
|
+
const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
|
|
575
|
+
const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
|
|
576
|
+
const traceparent = `00-${traceId}-${spanId}-01`;
|
|
577
|
+
headers["x-nebula-traceparent"] = traceparent;
|
|
578
|
+
headers.traceparent = traceparent;
|
|
523
579
|
const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
|
|
524
580
|
if (executionId) {
|
|
525
|
-
headers["x-execution-id"] = executionId;
|
|
581
|
+
headers["x-nebula-execution-id"] = executionId;
|
|
526
582
|
}
|
|
527
583
|
if (ctx?.resourceName) {
|
|
528
|
-
headers["x-resource-name"] = ctx.resourceName;
|
|
584
|
+
headers["x-nebula-resource-name"] = ctx.resourceName;
|
|
585
|
+
}
|
|
586
|
+
if (ctx?.resourceType) {
|
|
587
|
+
headers["x-nebula-resource-type"] = ctx.resourceType;
|
|
529
588
|
}
|
|
530
|
-
if (ctx) {
|
|
531
|
-
headers
|
|
532
|
-
} else {
|
|
533
|
-
const traceId = (0, import_node_crypto.randomBytes)(16).toString("hex");
|
|
534
|
-
const spanId = (0, import_node_crypto.randomBytes)(8).toString("hex");
|
|
535
|
-
headers.traceparent = `00-${traceId}-${spanId}-01`;
|
|
589
|
+
if (ctx?.workspaceId) {
|
|
590
|
+
headers["x-nebula-workspace-id"] = ctx.workspaceId;
|
|
536
591
|
}
|
|
537
592
|
return headers;
|
|
538
593
|
}
|
|
@@ -688,8 +743,11 @@ var LLMGateway = class {
|
|
|
688
743
|
convertContentPart(part) {
|
|
689
744
|
if (part.type === "text") return { type: "text", text: part.text };
|
|
690
745
|
if (part.type === "file") {
|
|
691
|
-
const { data, mediaType } = part;
|
|
692
|
-
|
|
746
|
+
const { data, mediaType, filename } = part;
|
|
747
|
+
const isImage = mediaType.startsWith("image/");
|
|
748
|
+
const isPdf = mediaType === "application/pdf";
|
|
749
|
+
const isText = mediaType.startsWith("text/");
|
|
750
|
+
if (!isImage && !isPdf && !isText) {
|
|
693
751
|
throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
|
|
694
752
|
}
|
|
695
753
|
let url;
|
|
@@ -705,7 +763,16 @@ var LLMGateway = class {
|
|
|
705
763
|
} else {
|
|
706
764
|
throw new Error(`LLM Gateway: unsupported file data type`);
|
|
707
765
|
}
|
|
708
|
-
|
|
766
|
+
if (isImage) {
|
|
767
|
+
return { type: "image_url", image_url: { url } };
|
|
768
|
+
}
|
|
769
|
+
return {
|
|
770
|
+
type: "file",
|
|
771
|
+
file: {
|
|
772
|
+
file_data: url,
|
|
773
|
+
filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
|
|
774
|
+
}
|
|
775
|
+
};
|
|
709
776
|
}
|
|
710
777
|
throw new Error(`Unsupported content type: ${part.type}`);
|
|
711
778
|
}
|