@nebulaos/llm-gateway 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +215 -144
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +215 -144
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/dist/index.mjs
CHANGED
|
@@ -153,130 +153,141 @@ var LLMGateway = class {
|
|
|
153
153
|
messages,
|
|
154
154
|
tools
|
|
155
155
|
};
|
|
156
|
-
const llmSpan =
|
|
156
|
+
const llmSpan = Tracing.startSpan({
|
|
157
157
|
kind: SpanType.llm_wrapper,
|
|
158
158
|
name: `llm:${this.modelName}`,
|
|
159
159
|
data: startData
|
|
160
160
|
});
|
|
161
|
-
const
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
{
|
|
173
|
-
model,
|
|
174
|
-
messages: this.convertMessages(messages),
|
|
175
|
-
tools: this.convertTools(tools),
|
|
176
|
-
stream: true,
|
|
177
|
-
stream_options: { include_usage: true },
|
|
178
|
-
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
179
|
-
type: "json_schema",
|
|
180
|
-
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
181
|
-
} : { type: "json_object" } : void 0,
|
|
182
|
-
...this.extractExtraOptions(mergedOptions)
|
|
183
|
-
},
|
|
184
|
-
{ headers }
|
|
185
|
-
);
|
|
186
|
-
} catch (error) {
|
|
187
|
-
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
188
|
-
const gatewayError = this.handleError(error);
|
|
189
|
-
if (llmSpan) {
|
|
190
|
-
const errorEndData = {
|
|
191
|
-
error: {
|
|
192
|
-
message: gatewayError.message,
|
|
193
|
-
code: gatewayError.code,
|
|
194
|
-
status: gatewayError.status
|
|
195
|
-
}
|
|
196
|
-
};
|
|
197
|
-
await llmSpan.end({
|
|
198
|
-
status: "error",
|
|
199
|
-
data: errorEndData
|
|
200
|
-
});
|
|
161
|
+
const queue = [];
|
|
162
|
+
let pendingResolve = null;
|
|
163
|
+
const abortController = new AbortController();
|
|
164
|
+
let consumerAborted = false;
|
|
165
|
+
const push = (item) => {
|
|
166
|
+
if (pendingResolve) {
|
|
167
|
+
const resolve = pendingResolve;
|
|
168
|
+
pendingResolve = null;
|
|
169
|
+
resolve(item);
|
|
170
|
+
} else {
|
|
171
|
+
queue.push(item);
|
|
201
172
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
const
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
173
|
+
};
|
|
174
|
+
const pull = () => {
|
|
175
|
+
if (queue.length > 0) return Promise.resolve(queue.shift());
|
|
176
|
+
return new Promise((resolve) => {
|
|
177
|
+
pendingResolve = resolve;
|
|
178
|
+
});
|
|
179
|
+
};
|
|
180
|
+
const producer = Tracing.runWithSpan(llmSpan, async () => {
|
|
181
|
+
const headers = this.buildGatewayHeaders();
|
|
182
|
+
this.logger.debug("LLM Gateway stream request", {
|
|
183
|
+
model,
|
|
184
|
+
baseUrl: this.baseUrl,
|
|
185
|
+
stream: true,
|
|
186
|
+
messageCount: messages.length,
|
|
187
|
+
toolCount: tools?.length ?? 0
|
|
188
|
+
});
|
|
189
|
+
let stream;
|
|
190
|
+
try {
|
|
191
|
+
stream = await this.client.chat.completions.create(
|
|
192
|
+
{
|
|
193
|
+
model,
|
|
194
|
+
messages: this.convertMessages(messages),
|
|
195
|
+
tools: this.convertTools(tools),
|
|
196
|
+
stream: true,
|
|
197
|
+
stream_options: { include_usage: true },
|
|
198
|
+
response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
|
|
199
|
+
type: "json_schema",
|
|
200
|
+
json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
|
|
201
|
+
} : { type: "json_object" } : void 0,
|
|
202
|
+
...this.extractExtraOptions(mergedOptions)
|
|
203
|
+
},
|
|
204
|
+
{ headers, signal: abortController.signal }
|
|
205
|
+
);
|
|
206
|
+
} catch (error) {
|
|
207
|
+
this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
|
|
208
|
+
throw this.handleError(error);
|
|
209
|
+
}
|
|
210
|
+
let finalUsage;
|
|
211
|
+
let finalFinishReason;
|
|
212
|
+
let toolCallsCount = 0;
|
|
213
|
+
let outputPreview = "";
|
|
214
|
+
let finalContent = "";
|
|
215
|
+
const toolCallsAccumulator = /* @__PURE__ */ new Map();
|
|
216
|
+
try {
|
|
217
|
+
for await (const chunk of stream) {
|
|
218
|
+
if (abortController.signal.aborted) break;
|
|
219
|
+
if (chunk.usage) {
|
|
220
|
+
finalUsage = this.mapUsage(chunk.usage);
|
|
221
|
+
push({
|
|
222
|
+
kind: "chunk",
|
|
223
|
+
value: { type: "finish", reason: "stop", usage: finalUsage }
|
|
224
|
+
});
|
|
235
225
|
}
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
226
|
+
const choice = chunk.choices?.[0];
|
|
227
|
+
if (!choice) continue;
|
|
228
|
+
if (choice.finish_reason) {
|
|
229
|
+
finalFinishReason = this.mapFinishReason(choice.finish_reason);
|
|
230
|
+
push({
|
|
231
|
+
kind: "chunk",
|
|
232
|
+
value: { type: "finish", reason: finalFinishReason }
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
const delta = choice.delta;
|
|
236
|
+
if (!delta) continue;
|
|
237
|
+
if (delta.content) {
|
|
238
|
+
finalContent += delta.content;
|
|
239
|
+
if (outputPreview.length < 200) {
|
|
240
|
+
outputPreview += delta.content.slice(0, 200 - outputPreview.length);
|
|
250
241
|
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
242
|
+
push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
|
|
243
|
+
}
|
|
244
|
+
if (delta.tool_calls) {
|
|
245
|
+
for (const tc of delta.tool_calls) {
|
|
246
|
+
const idx = tc.index;
|
|
247
|
+
if (tc.id && tc.function?.name) {
|
|
248
|
+
toolCallsCount++;
|
|
249
|
+
toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
|
|
250
|
+
push({
|
|
251
|
+
kind: "chunk",
|
|
252
|
+
value: {
|
|
253
|
+
type: "tool_call_start",
|
|
254
|
+
index: idx,
|
|
255
|
+
id: tc.id,
|
|
256
|
+
name: tc.function.name
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
if (tc.function?.arguments) {
|
|
261
|
+
const existing = toolCallsAccumulator.get(idx);
|
|
262
|
+
if (existing) {
|
|
263
|
+
existing.arguments += tc.function.arguments;
|
|
264
|
+
}
|
|
265
|
+
push({
|
|
266
|
+
kind: "chunk",
|
|
267
|
+
value: {
|
|
268
|
+
type: "tool_call_delta",
|
|
269
|
+
index: idx,
|
|
270
|
+
args: tc.function.arguments
|
|
271
|
+
}
|
|
272
|
+
});
|
|
255
273
|
}
|
|
256
|
-
yield {
|
|
257
|
-
type: "tool_call_delta",
|
|
258
|
-
index: idx,
|
|
259
|
-
args: tc.function.arguments
|
|
260
|
-
};
|
|
261
274
|
}
|
|
262
275
|
}
|
|
263
276
|
}
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
}];
|
|
279
|
-
if (llmSpan) {
|
|
277
|
+
const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
|
|
278
|
+
id: tc.id,
|
|
279
|
+
type: "function",
|
|
280
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
281
|
+
}));
|
|
282
|
+
const choices = [{
|
|
283
|
+
index: 0,
|
|
284
|
+
message: {
|
|
285
|
+
role: "assistant",
|
|
286
|
+
content: finalContent || null,
|
|
287
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : void 0
|
|
288
|
+
},
|
|
289
|
+
finish_reason: finalFinishReason
|
|
290
|
+
}];
|
|
280
291
|
const endData = {
|
|
281
292
|
usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
|
|
282
293
|
finishReason: finalFinishReason ?? "stop",
|
|
@@ -284,28 +295,56 @@ var LLMGateway = class {
|
|
|
284
295
|
outputPreview,
|
|
285
296
|
choices: this.sanitizeChoices(choices)
|
|
286
297
|
};
|
|
287
|
-
await llmSpan.end({
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
298
|
+
await llmSpan.end({ status: "success", data: endData });
|
|
299
|
+
} catch (error) {
|
|
300
|
+
this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
|
|
301
|
+
throw this.handleError(error);
|
|
291
302
|
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
303
|
+
}).then(
|
|
304
|
+
() => push({ kind: "done" }),
|
|
305
|
+
(error) => push({ kind: "error", error })
|
|
306
|
+
);
|
|
307
|
+
let completedNormally = false;
|
|
308
|
+
try {
|
|
309
|
+
while (true) {
|
|
310
|
+
const item = await pull();
|
|
311
|
+
if (item.kind === "chunk") {
|
|
312
|
+
yield item.value;
|
|
313
|
+
} else if (item.kind === "done") {
|
|
314
|
+
completedNormally = true;
|
|
315
|
+
return;
|
|
316
|
+
} else {
|
|
317
|
+
completedNormally = true;
|
|
318
|
+
const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
|
|
319
|
+
if (!llmSpan.isEnded) {
|
|
320
|
+
const errorEndData = {
|
|
321
|
+
error: {
|
|
322
|
+
message: gatewayError.message,
|
|
323
|
+
code: gatewayError.code,
|
|
324
|
+
status: gatewayError.status
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
await llmSpan.end({ status: "error", data: errorEndData });
|
|
301
328
|
}
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
status: "error",
|
|
305
|
-
data: errorEndData
|
|
306
|
-
});
|
|
329
|
+
throw gatewayError;
|
|
330
|
+
}
|
|
307
331
|
}
|
|
308
|
-
|
|
332
|
+
} finally {
|
|
333
|
+
if (!completedNormally) {
|
|
334
|
+
consumerAborted = true;
|
|
335
|
+
abortController.abort();
|
|
336
|
+
if (!llmSpan.isEnded) {
|
|
337
|
+
try {
|
|
338
|
+
await llmSpan.end({ status: "cancelled" });
|
|
339
|
+
} catch {
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
try {
|
|
344
|
+
await producer;
|
|
345
|
+
} catch {
|
|
346
|
+
}
|
|
347
|
+
void consumerAborted;
|
|
309
348
|
}
|
|
310
349
|
}
|
|
311
350
|
// ==========================================================================
|
|
@@ -484,24 +523,44 @@ var LLMGateway = class {
|
|
|
484
523
|
const { responseFormat, ...rest } = options;
|
|
485
524
|
return rest;
|
|
486
525
|
}
|
|
526
|
+
/**
|
|
527
|
+
* Builds the outbound headers for a call to the NebulaOS LLM Gateway.
|
|
528
|
+
*
|
|
529
|
+
* Under ADR-0002, correlation with the NebulaOS backend is carried on
|
|
530
|
+
* domain-scoped `x-nebula-*` headers that APMs of the host process do not
|
|
531
|
+
* touch. The standard W3C `traceparent` is still emitted (same trace-id /
|
|
532
|
+
* span-id) for compatibility with caches, proxies, and log correlation —
|
|
533
|
+
* but the backend treats `x-nebula-traceparent` as the authoritative source.
|
|
534
|
+
* If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
|
|
535
|
+
* unaffected.
|
|
536
|
+
*
|
|
537
|
+
* Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
|
|
538
|
+
* no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
|
|
539
|
+
* Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
|
|
540
|
+
* plus the compat `traceparent`.
|
|
541
|
+
*/
|
|
487
542
|
buildGatewayHeaders() {
|
|
488
543
|
const headers = {
|
|
489
|
-
"x-request-id": randomUUID()
|
|
544
|
+
"x-nebula-request-id": randomUUID()
|
|
490
545
|
};
|
|
491
546
|
const ctx = Tracing.getContext();
|
|
547
|
+
const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
|
|
548
|
+
const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
|
|
549
|
+
const traceparent = `00-${traceId}-${spanId}-01`;
|
|
550
|
+
headers["x-nebula-traceparent"] = traceparent;
|
|
551
|
+
headers.traceparent = traceparent;
|
|
492
552
|
const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
|
|
493
553
|
if (executionId) {
|
|
494
|
-
headers["x-execution-id"] = executionId;
|
|
554
|
+
headers["x-nebula-execution-id"] = executionId;
|
|
495
555
|
}
|
|
496
556
|
if (ctx?.resourceName) {
|
|
497
|
-
headers["x-resource-name"] = ctx.resourceName;
|
|
557
|
+
headers["x-nebula-resource-name"] = ctx.resourceName;
|
|
558
|
+
}
|
|
559
|
+
if (ctx?.resourceType) {
|
|
560
|
+
headers["x-nebula-resource-type"] = ctx.resourceType;
|
|
498
561
|
}
|
|
499
|
-
if (ctx) {
|
|
500
|
-
headers
|
|
501
|
-
} else {
|
|
502
|
-
const traceId = randomBytes(16).toString("hex");
|
|
503
|
-
const spanId = randomBytes(8).toString("hex");
|
|
504
|
-
headers.traceparent = `00-${traceId}-${spanId}-01`;
|
|
562
|
+
if (ctx?.workspaceId) {
|
|
563
|
+
headers["x-nebula-workspace-id"] = ctx.workspaceId;
|
|
505
564
|
}
|
|
506
565
|
return headers;
|
|
507
566
|
}
|
|
@@ -657,8 +716,11 @@ var LLMGateway = class {
|
|
|
657
716
|
convertContentPart(part) {
|
|
658
717
|
if (part.type === "text") return { type: "text", text: part.text };
|
|
659
718
|
if (part.type === "file") {
|
|
660
|
-
const { data, mediaType } = part;
|
|
661
|
-
|
|
719
|
+
const { data, mediaType, filename } = part;
|
|
720
|
+
const isImage = mediaType.startsWith("image/");
|
|
721
|
+
const isPdf = mediaType === "application/pdf";
|
|
722
|
+
const isText = mediaType.startsWith("text/");
|
|
723
|
+
if (!isImage && !isPdf && !isText) {
|
|
662
724
|
throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
|
|
663
725
|
}
|
|
664
726
|
let url;
|
|
@@ -674,7 +736,16 @@ var LLMGateway = class {
|
|
|
674
736
|
} else {
|
|
675
737
|
throw new Error(`LLM Gateway: unsupported file data type`);
|
|
676
738
|
}
|
|
677
|
-
|
|
739
|
+
if (isImage) {
|
|
740
|
+
return { type: "image_url", image_url: { url } };
|
|
741
|
+
}
|
|
742
|
+
return {
|
|
743
|
+
type: "file",
|
|
744
|
+
file: {
|
|
745
|
+
file_data: url,
|
|
746
|
+
filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
|
|
747
|
+
}
|
|
748
|
+
};
|
|
678
749
|
}
|
|
679
750
|
throw new Error(`Unsupported content type: ${part.type}`);
|
|
680
751
|
}
|