@nebulaos/llm-gateway 0.1.9 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +217 -150
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +217 -150
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/dist/index.mjs
CHANGED
|
@@ -51,9 +51,7 @@ var LLMGateway = class {
|
|
|
51
51
|
messagesCount: messages.length,
|
|
52
52
|
toolsCount: tools?.length ?? 0,
|
|
53
53
|
llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
|
|
54
|
-
responseFormat
|
|
55
|
-
messages,
|
|
56
|
-
tools
|
|
54
|
+
responseFormat
|
|
57
55
|
};
|
|
58
56
|
return Tracing.withSpan(
|
|
59
57
|
{
|
|
@@ -149,134 +147,143 @@ var LLMGateway = class {
|
|
|
149
147
|
messagesCount: messages.length,
|
|
150
148
|
toolsCount: tools?.length ?? 0,
|
|
151
149
|
llmConfig: Object.keys(llmConfig).length > 0 ? llmConfig : void 0,
|
|
152
|
-
responseFormat
|
|
153
|
-
messages,
|
|
154
|
-
tools
|
|
150
|
+
responseFormat
|
|
155
151
|
};
|
|
156
|
-
const llmSpan =
|
|
152
|
+
const llmSpan = Tracing.startSpan({
|
|
157
153
|
kind: SpanType.llm_wrapper,
|
|
158
154
|
name: `llm:${this.modelName}`,
|
|
159
155
|
data: startData
|
|
160
156
|
});
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
{
|
|
173
|
-
model,
|
|
174
|
-
messages: this.convertMessages(messages),
|
|
175
|
-
tools: this.convertTools(tools),
|
|
176
|
-
stream: true,
|
|
177
|
-
stream_options: { include_usage: true },
|
|
178
|
-
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
179
|
-
type: "json_schema",
|
|
180
|
-
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
181
|
-
} : { type: "json_object" } : void 0,
|
|
182
|
-
...this.extractExtraOptions(mergedOptions)
|
|
183
|
-
},
|
|
184
|
-
{ headers }
|
|
185
|
-
);
|
|
186
|
-
} catch (error) {
|
|
187
|
-
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
188
|
-
const gatewayError = this.handleError(error);
|
|
189
|
-
if (llmSpan) {
|
|
190
|
-
const errorEndData = {
|
|
191
|
-
error: {
|
|
192
|
-
message: gatewayError.message,
|
|
193
|
-
code: gatewayError.code,
|
|
194
|
-
status: gatewayError.status
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
|
-
await llmSpan.end({
|
|
198
|
-
status: "error",
|
|
199
|
-
data: errorEndData
|
|
200
|
-
});
|
|
157
|
+
const queue = [];
|
|
158
|
+
let pendingResolve = null;
|
|
159
|
+
const abortController = new AbortController();
|
|
160
|
+
let consumerAborted = false;
|
|
161
|
+
const push = (item) => {
|
|
162
|
+
if (pendingResolve) {
|
|
163
|
+
const resolve = pendingResolve;
|
|
164
|
+
pendingResolve = null;
|
|
165
|
+
resolve(item);
|
|
166
|
+
} else {
|
|
167
|
+
queue.push(item);
|
|
201
168
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
const
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
169
|
+
};
|
|
170
|
+
const pull = () => {
|
|
171
|
+
if (queue.length > 0) return Promise.resolve(queue.shift());
|
|
172
|
+
return new Promise((resolve) => {
|
|
173
|
+
pendingResolve = resolve;
|
|
174
|
+
});
|
|
175
|
+
};
|
|
176
|
+
const producer = Tracing.runWithSpan(llmSpan, async () => {
|
|
177
|
+
const headers = this.buildGatewayHeaders();
|
|
178
|
+
this.logger.debug("LLM Gateway stream request", {
|
|
179
|
+
model,
|
|
180
|
+
baseUrl: this.baseUrl,
|
|
181
|
+
stream: true,
|
|
182
|
+
messageCount: messages.length,
|
|
183
|
+
toolCount: tools?.length ?? 0
|
|
184
|
+
});
|
|
185
|
+
let stream;
|
|
186
|
+
try {
|
|
187
|
+
stream = await this.client.chat.completions.create(
|
|
188
|
+
{
|
|
189
|
+
model,
|
|
190
|
+
messages: this.convertMessages(messages),
|
|
191
|
+
tools: this.convertTools(tools),
|
|
192
|
+
stream: true,
|
|
193
|
+
stream_options: { include_usage: true },
|
|
194
|
+
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
195
|
+
type: "json_schema",
|
|
196
|
+
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
197
|
+
} : { type: "json_object" } : void 0,
|
|
198
|
+
...this.extractExtraOptions(mergedOptions)
|
|
199
|
+
},
|
|
200
|
+
{ headers, signal: abortController.signal }
|
|
201
|
+
);
|
|
202
|
+
} catch (error) {
|
|
203
|
+
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
204
|
+
throw this.handleError(error);
|
|
205
|
+
}
|
|
206
|
+
let finalUsage;
|
|
207
|
+
let finalFinishReason;
|
|
208
|
+
let toolCallsCount = 0;
|
|
209
|
+
let outputPreview = "";
|
|
210
|
+
let finalContent = "";
|
|
211
|
+
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
212
|
+
try {
|
|
213
|
+
for await (const chunk of stream) {
|
|
214
|
+
if (abortController.signal.aborted) break;
|
|
215
|
+
if (chunk.usage) {
|
|
216
|
+
finalUsage = this.mapUsage(chunk.usage);
|
|
217
|
+
push({
|
|
218
|
+
kind: "chunk",
|
|
219
|
+
value: { type: "finish", reason: "stop", usage: finalUsage }
|
|
220
|
+
});
|
|
235
221
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
222
|
+
const choice = chunk.choices?.[0];
|
|
223
|
+
if (!choice) continue;
|
|
224
|
+
if (choice.finish_reason) {
|
|
225
|
+
finalFinishReason = this.mapFinishReason(choice.finish_reason);
|
|
226
|
+
push({
|
|
227
|
+
kind: "chunk",
|
|
228
|
+
value: { type: "finish", reason: finalFinishReason }
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
const delta = choice.delta;
|
|
232
|
+
if (!delta) continue;
|
|
233
|
+
if (delta.content) {
|
|
234
|
+
finalContent += delta.content;
|
|
235
|
+
if (outputPreview.length < 200) {
|
|
236
|
+
outputPreview += delta.content.slice(0, 200 - outputPreview.length);
|
|
250
237
|
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
238
|
+
push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
|
|
239
|
+
}
|
|
240
|
+
if (delta.tool_calls) {
|
|
241
|
+
for (const tc of delta.tool_calls) {
|
|
242
|
+
const idx = tc.index;
|
|
243
|
+
if (tc.id && tc.function?.name) {
|
|
244
|
+
toolCallsCount++;
|
|
245
|
+
toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
|
|
246
|
+
push({
|
|
247
|
+
kind: "chunk",
|
|
248
|
+
value: {
|
|
249
|
+
type: "tool_call_start",
|
|
250
|
+
index: idx,
|
|
251
|
+
id: tc.id,
|
|
252
|
+
name: tc.function.name
|
|
253
|
+
}
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
if (tc.function?.arguments) {
|
|
257
|
+
const existing = toolCallsAccumulator.get(idx);
|
|
258
|
+
if (existing) {
|
|
259
|
+
existing.arguments += tc.function.arguments;
|
|
260
|
+
}
|
|
261
|
+
push({
|
|
262
|
+
kind: "chunk",
|
|
263
|
+
value: {
|
|
264
|
+
type: "tool_call_delta",
|
|
265
|
+
index: idx,
|
|
266
|
+
args: tc.function.arguments
|
|
267
|
+
}
|
|
268
|
+
});
|
|
255
269
|
}
|
|
256
|
-
yield {
|
|
257
|
-
type: "tool_call_delta",
|
|
258
|
-
index: idx,
|
|
259
|
-
args: tc.function.arguments
|
|
260
|
-
};
|
|
261
270
|
}
|
|
262
271
|
}
|
|
263
272
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
}];
|
|
279
|
-
if (llmSpan) {
|
|
273
|
+
const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
|
|
274
|
+
id: tc.id,
|
|
275
|
+
type: "function",
|
|
276
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
277
|
+
}));
|
|
278
|
+
const choices = [{
|
|
279
|
+
index: 0,
|
|
280
|
+
message: {
|
|
281
|
+
role: "assistant",
|
|
282
|
+
content: finalContent || null,
|
|
283
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : void 0
|
|
284
|
+
},
|
|
285
|
+
finish_reason: finalFinishReason
|
|
286
|
+
}];
|
|
280
287
|
const endData = {
|
|
281
288
|
usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
282
289
|
finishReason: finalFinishReason ?? "stop",
|
|
@@ -284,28 +291,56 @@ var LLMGateway = class {
|
|
|
284
291
|
outputPreview,
|
|
285
292
|
choices: this.sanitizeChoices(choices)
|
|
286
293
|
};
|
|
287
|
-
await llmSpan.end({
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
294
|
+
await llmSpan.end({ status: "success", data: endData });
|
|
295
|
+
} catch (error) {
|
|
296
|
+
this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
|
|
297
|
+
throw this.handleError(error);
|
|
291
298
|
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
299
|
+
}).then(
|
|
300
|
+
() => push({ kind: "done" }),
|
|
301
|
+
(error) => push({ kind: "error", error })
|
|
302
|
+
);
|
|
303
|
+
let completedNormally = false;
|
|
304
|
+
try {
|
|
305
|
+
while (true) {
|
|
306
|
+
const item = await pull();
|
|
307
|
+
if (item.kind === "chunk") {
|
|
308
|
+
yield item.value;
|
|
309
|
+
} else if (item.kind === "done") {
|
|
310
|
+
completedNormally = true;
|
|
311
|
+
return;
|
|
312
|
+
} else {
|
|
313
|
+
completedNormally = true;
|
|
314
|
+
const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
|
|
315
|
+
if (!llmSpan.isEnded) {
|
|
316
|
+
const errorEndData = {
|
|
317
|
+
error: {
|
|
318
|
+
message: gatewayError.message,
|
|
319
|
+
code: gatewayError.code,
|
|
320
|
+
status: gatewayError.status
|
|
321
|
+
}
|
|
322
|
+
};
|
|
323
|
+
await llmSpan.end({ status: "error", data: errorEndData });
|
|
301
324
|
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
status: "error",
|
|
305
|
-
data: errorEndData
|
|
306
|
-
});
|
|
325
|
+
throw gatewayError;
|
|
326
|
+
}
|
|
307
327
|
}
|
|
308
|
-
|
|
328
|
+
} finally {
|
|
329
|
+
if (!completedNormally) {
|
|
330
|
+
consumerAborted = true;
|
|
331
|
+
abortController.abort();
|
|
332
|
+
if (!llmSpan.isEnded) {
|
|
333
|
+
try {
|
|
334
|
+
await llmSpan.end({ status: "cancelled" });
|
|
335
|
+
} catch {
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
try {
|
|
340
|
+
await producer;
|
|
341
|
+
} catch {
|
|
342
|
+
}
|
|
343
|
+
void consumerAborted;
|
|
309
344
|
}
|
|
310
345
|
}
|
|
311
346
|
// ==========================================================================
|
|
@@ -484,24 +519,44 @@ var LLMGateway = class {
|
|
|
484
519
|
const { responseFormat, ...rest } = options;
|
|
485
520
|
return rest;
|
|
486
521
|
}
|
|
522
|
+
/**
|
|
523
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
524
|
+
*
|
|
525
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
526
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
527
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
528
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
529
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
530
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
531
|
+
* unaffected.
|
|
532
|
+
*
|
|
533
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
534
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
535
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
536
|
+
* plus the compat `traceparent`.
|
|
537
|
+
*/
|
|
487
538
|
buildGatewayHeaders() {
|
|
488
539
|
const headers = {
|
|
489
|
-
"x-request-id": randomUUID()
|
|
540
|
+
"x-nebula-request-id": randomUUID()
|
|
490
541
|
};
|
|
491
542
|
const ctx = Tracing.getContext();
|
|
543
|
+
const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
|
|
544
|
+
const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
|
|
545
|
+
const traceparent = `00-${traceId}-${spanId}-01`;
|
|
546
|
+
headers["x-nebula-traceparent"] = traceparent;
|
|
547
|
+
headers.traceparent = traceparent;
|
|
492
548
|
const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
|
|
493
549
|
if (executionId) {
|
|
494
|
-
headers["x-execution-id"] = executionId;
|
|
550
|
+
headers["x-nebula-execution-id"] = executionId;
|
|
495
551
|
}
|
|
496
552
|
if (ctx?.resourceName) {
|
|
497
|
-
headers["x-resource-name"] = ctx.resourceName;
|
|
553
|
+
headers["x-nebula-resource-name"] = ctx.resourceName;
|
|
554
|
+
}
|
|
555
|
+
if (ctx?.resourceType) {
|
|
556
|
+
headers["x-nebula-resource-type"] = ctx.resourceType;
|
|
498
557
|
}
|
|
499
|
-
if (ctx) {
|
|
500
|
-
headers
|
|
501
|
-
} else {
|
|
502
|
-
const traceId = randomBytes(16).toString("hex");
|
|
503
|
-
const spanId = randomBytes(8).toString("hex");
|
|
504
|
-
headers.traceparent = `00-${traceId}-${spanId}-01`;
|
|
558
|
+
if (ctx?.workspaceId) {
|
|
559
|
+
headers["x-nebula-workspace-id"] = ctx.workspaceId;
|
|
505
560
|
}
|
|
506
561
|
return headers;
|
|
507
562
|
}
|
|
@@ -657,8 +712,11 @@ var LLMGateway = class {
|
|
|
657
712
|
convertContentPart(part) {
|
|
658
713
|
if (part.type === "text") return { type: "text", text: part.text };
|
|
659
714
|
if (part.type === "file") {
|
|
660
|
-
const { data, mediaType } = part;
|
|
661
|
-
|
|
715
|
+
const { data, mediaType, filename } = part;
|
|
716
|
+
const isImage = mediaType.startsWith("image/");
|
|
717
|
+
const isPdf = mediaType === "application/pdf";
|
|
718
|
+
const isText = mediaType.startsWith("text/");
|
|
719
|
+
if (!isImage && !isPdf && !isText) {
|
|
662
720
|
throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
|
|
663
721
|
}
|
|
664
722
|
let url;
|
|
@@ -674,7 +732,16 @@ var LLMGateway = class {
|
|
|
674
732
|
} else {
|
|
675
733
|
throw new Error(`LLM Gateway: unsupported file data type`);
|
|
676
734
|
}
|
|
677
|
-
|
|
735
|
+
if (isImage) {
|
|
736
|
+
return { type: "image_url", image_url: { url } };
|
|
737
|
+
}
|
|
738
|
+
return {
|
|
739
|
+
type: "file",
|
|
740
|
+
file: {
|
|
741
|
+
file_data: url,
|
|
742
|
+
filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
|
|
743
|
+
}
|
|
744
|
+
};
|
|
678
745
|
}
|
|
679
746
|
throw new Error(`Unsupported content type: ${part.type}`);
|
|
680
747
|
}
|