@nebulaos/llm-gateway 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +234 -152
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +234 -152
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/dist/index.mjs
CHANGED
|
@@ -57,7 +57,7 @@ var LLMGateway = class {
|
|
|
57
57
|
};
|
|
58
58
|
return Tracing.withSpan(
|
|
59
59
|
{
|
|
60
|
-
kind: SpanType.
|
|
60
|
+
kind: SpanType.llm_wrapper,
|
|
61
61
|
name: `llm:${this.modelName}`,
|
|
62
62
|
data: startData
|
|
63
63
|
},
|
|
@@ -153,130 +153,141 @@ var LLMGateway = class {
|
|
|
153
153
|
messages,
|
|
154
154
|
tools
|
|
155
155
|
};
|
|
156
|
-
const llmSpan =
|
|
157
|
-
kind: SpanType.
|
|
156
|
+
const llmSpan = Tracing.startSpan({
|
|
157
|
+
kind: SpanType.llm_wrapper,
|
|
158
158
|
name: `llm:${this.modelName}`,
|
|
159
159
|
data: startData
|
|
160
160
|
});
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
{
|
|
173
|
-
model,
|
|
174
|
-
messages: this.convertMessages(messages),
|
|
175
|
-
tools: this.convertTools(tools),
|
|
176
|
-
stream: true,
|
|
177
|
-
stream_options: { include_usage: true },
|
|
178
|
-
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
179
|
-
type: "json_schema",
|
|
180
|
-
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
181
|
-
} : { type: "json_object" } : void 0,
|
|
182
|
-
...this.extractExtraOptions(mergedOptions)
|
|
183
|
-
},
|
|
184
|
-
{ headers }
|
|
185
|
-
);
|
|
186
|
-
} catch (error) {
|
|
187
|
-
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
188
|
-
const gatewayError = this.handleError(error);
|
|
189
|
-
if (llmSpan) {
|
|
190
|
-
const errorEndData = {
|
|
191
|
-
error: {
|
|
192
|
-
message: gatewayError.message,
|
|
193
|
-
code: gatewayError.code,
|
|
194
|
-
status: gatewayError.status
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
|
-
await llmSpan.end({
|
|
198
|
-
status: "error",
|
|
199
|
-
data: errorEndData
|
|
200
|
-
});
|
|
161
|
+
const queue = [];
|
|
162
|
+
let pendingResolve = null;
|
|
163
|
+
const abortController = new AbortController();
|
|
164
|
+
let consumerAborted = false;
|
|
165
|
+
const push = (item) => {
|
|
166
|
+
if (pendingResolve) {
|
|
167
|
+
const resolve = pendingResolve;
|
|
168
|
+
pendingResolve = null;
|
|
169
|
+
resolve(item);
|
|
170
|
+
} else {
|
|
171
|
+
queue.push(item);
|
|
201
172
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
const
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
173
|
+
};
|
|
174
|
+
const pull = () => {
|
|
175
|
+
if (queue.length > 0) return Promise.resolve(queue.shift());
|
|
176
|
+
return new Promise((resolve) => {
|
|
177
|
+
pendingResolve = resolve;
|
|
178
|
+
});
|
|
179
|
+
};
|
|
180
|
+
const producer = Tracing.runWithSpan(llmSpan, async () => {
|
|
181
|
+
const headers = this.buildGatewayHeaders();
|
|
182
|
+
this.logger.debug("LLM Gateway stream request", {
|
|
183
|
+
model,
|
|
184
|
+
baseUrl: this.baseUrl,
|
|
185
|
+
stream: true,
|
|
186
|
+
messageCount: messages.length,
|
|
187
|
+
toolCount: tools?.length ?? 0
|
|
188
|
+
});
|
|
189
|
+
let stream;
|
|
190
|
+
try {
|
|
191
|
+
stream = await this.client.chat.completions.create(
|
|
192
|
+
{
|
|
193
|
+
model,
|
|
194
|
+
messages: this.convertMessages(messages),
|
|
195
|
+
tools: this.convertTools(tools),
|
|
196
|
+
stream: true,
|
|
197
|
+
stream_options: { include_usage: true },
|
|
198
|
+
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
199
|
+
type: "json_schema",
|
|
200
|
+
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
201
|
+
} : { type: "json_object" } : void 0,
|
|
202
|
+
...this.extractExtraOptions(mergedOptions)
|
|
203
|
+
},
|
|
204
|
+
{ headers, signal: abortController.signal }
|
|
205
|
+
);
|
|
206
|
+
} catch (error) {
|
|
207
|
+
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
208
|
+
throw this.handleError(error);
|
|
209
|
+
}
|
|
210
|
+
let finalUsage;
|
|
211
|
+
let finalFinishReason;
|
|
212
|
+
let toolCallsCount = 0;
|
|
213
|
+
let outputPreview = "";
|
|
214
|
+
let finalContent = "";
|
|
215
|
+
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
216
|
+
try {
|
|
217
|
+
for await (const chunk of stream) {
|
|
218
|
+
if (abortController.signal.aborted) break;
|
|
219
|
+
if (chunk.usage) {
|
|
220
|
+
finalUsage = this.mapUsage(chunk.usage);
|
|
221
|
+
push({
|
|
222
|
+
kind: "chunk",
|
|
223
|
+
value: { type: "finish", reason: "stop", usage: finalUsage }
|
|
224
|
+
});
|
|
235
225
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
226
|
+
const choice = chunk.choices?.[0];
|
|
227
|
+
if (!choice) continue;
|
|
228
|
+
if (choice.finish_reason) {
|
|
229
|
+
finalFinishReason = this.mapFinishReason(choice.finish_reason);
|
|
230
|
+
push({
|
|
231
|
+
kind: "chunk",
|
|
232
|
+
value: { type: "finish", reason: finalFinishReason }
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
const delta = choice.delta;
|
|
236
|
+
if (!delta) continue;
|
|
237
|
+
if (delta.content) {
|
|
238
|
+
finalContent += delta.content;
|
|
239
|
+
if (outputPreview.length < 200) {
|
|
240
|
+
outputPreview += delta.content.slice(0, 200 - outputPreview.length);
|
|
250
241
|
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
242
|
+
push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
|
|
243
|
+
}
|
|
244
|
+
if (delta.tool_calls) {
|
|
245
|
+
for (const tc of delta.tool_calls) {
|
|
246
|
+
const idx = tc.index;
|
|
247
|
+
if (tc.id && tc.function?.name) {
|
|
248
|
+
toolCallsCount++;
|
|
249
|
+
toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
|
|
250
|
+
push({
|
|
251
|
+
kind: "chunk",
|
|
252
|
+
value: {
|
|
253
|
+
type: "tool_call_start",
|
|
254
|
+
index: idx,
|
|
255
|
+
id: tc.id,
|
|
256
|
+
name: tc.function.name
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
if (tc.function?.arguments) {
|
|
261
|
+
const existing = toolCallsAccumulator.get(idx);
|
|
262
|
+
if (existing) {
|
|
263
|
+
existing.arguments += tc.function.arguments;
|
|
264
|
+
}
|
|
265
|
+
push({
|
|
266
|
+
kind: "chunk",
|
|
267
|
+
value: {
|
|
268
|
+
type: "tool_call_delta",
|
|
269
|
+
index: idx,
|
|
270
|
+
args: tc.function.arguments
|
|
271
|
+
}
|
|
272
|
+
});
|
|
255
273
|
}
|
|
256
|
-
yield {
|
|
257
|
-
type: "tool_call_delta",
|
|
258
|
-
index: idx,
|
|
259
|
-
args: tc.function.arguments
|
|
260
|
-
};
|
|
261
274
|
}
|
|
262
275
|
}
|
|
263
276
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
}];
|
|
279
|
-
if (llmSpan) {
|
|
277
|
+
const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
|
|
278
|
+
id: tc.id,
|
|
279
|
+
type: "function",
|
|
280
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
281
|
+
}));
|
|
282
|
+
const choices = [{
|
|
283
|
+
index: 0,
|
|
284
|
+
message: {
|
|
285
|
+
role: "assistant",
|
|
286
|
+
content: finalContent || null,
|
|
287
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : void 0
|
|
288
|
+
},
|
|
289
|
+
finish_reason: finalFinishReason
|
|
290
|
+
}];
|
|
280
291
|
const endData = {
|
|
281
292
|
usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
282
293
|
finishReason: finalFinishReason ?? "stop",
|
|
@@ -284,28 +295,56 @@ var LLMGateway = class {
|
|
|
284
295
|
outputPreview,
|
|
285
296
|
choices: this.sanitizeChoices(choices)
|
|
286
297
|
};
|
|
287
|
-
await llmSpan.end({
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
298
|
+
await llmSpan.end({ status: "success", data: endData });
|
|
299
|
+
} catch (error) {
|
|
300
|
+
this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
|
|
301
|
+
throw this.handleError(error);
|
|
291
302
|
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
303
|
+
}).then(
|
|
304
|
+
() => push({ kind: "done" }),
|
|
305
|
+
(error) => push({ kind: "error", error })
|
|
306
|
+
);
|
|
307
|
+
let completedNormally = false;
|
|
308
|
+
try {
|
|
309
|
+
while (true) {
|
|
310
|
+
const item = await pull();
|
|
311
|
+
if (item.kind === "chunk") {
|
|
312
|
+
yield item.value;
|
|
313
|
+
} else if (item.kind === "done") {
|
|
314
|
+
completedNormally = true;
|
|
315
|
+
return;
|
|
316
|
+
} else {
|
|
317
|
+
completedNormally = true;
|
|
318
|
+
const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
|
|
319
|
+
if (!llmSpan.isEnded) {
|
|
320
|
+
const errorEndData = {
|
|
321
|
+
error: {
|
|
322
|
+
message: gatewayError.message,
|
|
323
|
+
code: gatewayError.code,
|
|
324
|
+
status: gatewayError.status
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
await llmSpan.end({ status: "error", data: errorEndData });
|
|
301
328
|
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
329
|
+
throw gatewayError;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
} finally {
|
|
333
|
+
if (!completedNormally) {
|
|
334
|
+
consumerAborted = true;
|
|
335
|
+
abortController.abort();
|
|
336
|
+
if (!llmSpan.isEnded) {
|
|
337
|
+
try {
|
|
338
|
+
await llmSpan.end({ status: "cancelled" });
|
|
339
|
+
} catch {
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
try {
|
|
344
|
+
await producer;
|
|
345
|
+
} catch {
|
|
307
346
|
}
|
|
308
|
-
|
|
347
|
+
void consumerAborted;
|
|
309
348
|
}
|
|
310
349
|
}
|
|
311
350
|
// ==========================================================================
|
|
@@ -484,21 +523,44 @@ var LLMGateway = class {
|
|
|
484
523
|
const { responseFormat, ...rest } = options;
|
|
485
524
|
return rest;
|
|
486
525
|
}
|
|
526
|
+
/**
|
|
527
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
528
|
+
*
|
|
529
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
530
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
531
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
532
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
533
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
534
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
535
|
+
* unaffected.
|
|
536
|
+
*
|
|
537
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
538
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
539
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
540
|
+
* plus the compat `traceparent`.
|
|
541
|
+
*/
|
|
487
542
|
buildGatewayHeaders() {
|
|
488
543
|
const headers = {
|
|
489
|
-
"x-request-id": randomUUID()
|
|
544
|
+
"x-nebula-request-id": randomUUID()
|
|
490
545
|
};
|
|
491
546
|
const ctx = Tracing.getContext();
|
|
547
|
+
const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
|
|
548
|
+
const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
|
|
549
|
+
const traceparent = `00-${traceId}-${spanId}-01`;
|
|
550
|
+
headers["x-nebula-traceparent"] = traceparent;
|
|
551
|
+
headers.traceparent = traceparent;
|
|
492
552
|
const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
|
|
493
553
|
if (executionId) {
|
|
494
|
-
headers["x-execution-id"] = executionId;
|
|
554
|
+
headers["x-nebula-execution-id"] = executionId;
|
|
495
555
|
}
|
|
496
|
-
if (ctx) {
|
|
497
|
-
headers
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
556
|
+
if (ctx?.resourceName) {
|
|
557
|
+
headers["x-nebula-resource-name"] = ctx.resourceName;
|
|
558
|
+
}
|
|
559
|
+
if (ctx?.resourceType) {
|
|
560
|
+
headers["x-nebula-resource-type"] = ctx.resourceType;
|
|
561
|
+
}
|
|
562
|
+
if (ctx?.workspaceId) {
|
|
563
|
+
headers["x-nebula-workspace-id"] = ctx.workspaceId;
|
|
502
564
|
}
|
|
503
565
|
return headers;
|
|
504
566
|
}
|
|
@@ -654,18 +716,38 @@ var LLMGateway = class {
|
|
|
654
716
|
convertContentPart(part) {
|
|
655
717
|
if (part.type === "text") return { type: "text", text: part.text };
|
|
656
718
|
if (part.type === "file") {
|
|
657
|
-
const {
|
|
658
|
-
|
|
659
|
-
|
|
719
|
+
const { data, mediaType, filename } = part;
|
|
720
|
+
const isImage = mediaType.startsWith("image/");
|
|
721
|
+
const isPdf = mediaType === "application/pdf";
|
|
722
|
+
const isText = mediaType.startsWith("text/");
|
|
723
|
+
if (!isImage && !isPdf && !isText) {
|
|
724
|
+
throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
|
|
660
725
|
}
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
726
|
+
let url;
|
|
727
|
+
if (data instanceof Uint8Array) {
|
|
728
|
+
const base64 = Buffer.from(data).toString("base64");
|
|
729
|
+
url = `data:${mediaType};base64,${base64}`;
|
|
730
|
+
} else if (typeof data === "string") {
|
|
731
|
+
if (data.startsWith("data:") || data.includes("://")) {
|
|
732
|
+
url = data;
|
|
733
|
+
} else {
|
|
734
|
+
url = `data:${mediaType};base64,${data}`;
|
|
735
|
+
}
|
|
736
|
+
} else {
|
|
737
|
+
throw new Error(`LLM Gateway: unsupported file data type`);
|
|
738
|
+
}
|
|
739
|
+
if (isImage) {
|
|
740
|
+
return { type: "image_url", image_url: { url } };
|
|
741
|
+
}
|
|
742
|
+
return {
|
|
743
|
+
type: "file",
|
|
744
|
+
file: {
|
|
745
|
+
file_data: url,
|
|
746
|
+
filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
|
|
747
|
+
}
|
|
748
|
+
};
|
|
666
749
|
}
|
|
667
|
-
|
|
668
|
-
throw new Error(`Unsupported content type: ${_exhaustive.type}`);
|
|
750
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
669
751
|
}
|
|
670
752
|
/**
|
|
671
753
|
* Sanitize choices for observability storage.
|