npm - @nebulaos/llm-gateway - Versions diffs - 0.1.8 → 0.2.0 - Mend

@nebulaos/llm-gateway 0.1.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.mjs CHANGED Viewed

@@ -57,7 +57,7 @@ var LLMGateway = class {
     };
     return Tracing.withSpan(
       {
-        kind: SpanType.llm,
+        kind: SpanType.llm_wrapper,
         name: `llm:${this.modelName}`,
         data: startData
       },
@@ -153,130 +153,141 @@ var LLMGateway = class {
       messages,
       tools
     };
-    const llmSpan = await Tracing.startSpan({
-      kind: SpanType.llm,
+    const llmSpan = Tracing.startSpan({
+      kind: SpanType.llm_wrapper,
       name: `llm:${this.modelName}`,
       data: startData
     });
-    const headers = this.buildGatewayHeaders();
-    this.logger.debug("LLM Gateway stream request", {
-      model,
-      baseUrl: this.baseUrl,
-      stream: true,
-      messageCount: messages.length,
-      toolCount: tools?.length ?? 0
-    });
-    let stream;
-    try {
-      stream = await this.client.chat.completions.create(
-        {
-          model,
-          messages: this.convertMessages(messages),
-          tools: this.convertTools(tools),
-          stream: true,
-          stream_options: { include_usage: true },
-          response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
-            type: "json_schema",
-            json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
-          } : { type: "json_object" } : void 0,
-          ...this.extractExtraOptions(mergedOptions)
-        },
-        { headers }
-      );
-    } catch (error) {
-      this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
-      const gatewayError = this.handleError(error);
-      if (llmSpan) {
-        const errorEndData = {
-          error: {
-            message: gatewayError.message,
-            code: gatewayError.code,
-            status: gatewayError.status
-          }
-        };
-        await llmSpan.end({
-          status: "error",
-          data: errorEndData
-        });
+    const queue = [];
+    let pendingResolve = null;
+    const abortController = new AbortController();
+    let consumerAborted = false;
+    const push = (item) => {
+      if (pendingResolve) {
+        const resolve = pendingResolve;
+        pendingResolve = null;
+        resolve(item);
+      } else {
+        queue.push(item);
       }
-      throw gatewayError;
-    }
-    let finalUsage;
-    let finalFinishReason;
-    let toolCallsCount = 0;
-    let outputPreview = "";
-    let finalContent = "";
-    const toolCallsAccumulator = /* @__PURE__ */ new Map();
-    try {
-      for await (const chunk of stream) {
-        if (chunk.usage) {
-          finalUsage = this.mapUsage(chunk.usage);
-          yield {
-            type: "finish",
-            reason: "stop",
-            usage: finalUsage
-          };
-        }
-        const choice = chunk.choices?.[0];
-        if (!choice) continue;
-        if (choice.finish_reason) {
-          finalFinishReason = this.mapFinishReason(choice.finish_reason);
-          yield {
-            type: "finish",
-            reason: finalFinishReason
-          };
-        }
-        const delta = choice.delta;
-        if (!delta) continue;
-        if (delta.content) {
-          finalContent += delta.content;
-          if (outputPreview.length < 200) {
-            outputPreview += delta.content.slice(0, 200 - outputPreview.length);
+    };
+    const pull = () => {
+      if (queue.length > 0) return Promise.resolve(queue.shift());
+      return new Promise((resolve) => {
+        pendingResolve = resolve;
+      });
+    };
+    const producer = Tracing.runWithSpan(llmSpan, async () => {
+      const headers = this.buildGatewayHeaders();
+      this.logger.debug("LLM Gateway stream request", {
+        model,
+        baseUrl: this.baseUrl,
+        stream: true,
+        messageCount: messages.length,
+        toolCount: tools?.length ?? 0
+      });
+      let stream;
+      try {
+        stream = await this.client.chat.completions.create(
+          {
+            model,
+            messages: this.convertMessages(messages),
+            tools: this.convertTools(tools),
+            stream: true,
+            stream_options: { include_usage: true },
+            response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
+              type: "json_schema",
+              json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
+            } : { type: "json_object" } : void 0,
+            ...this.extractExtraOptions(mergedOptions)
+          },
+          { headers, signal: abortController.signal }
+        );
+      } catch (error) {
+        this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
+        throw this.handleError(error);
+      }
+      let finalUsage;
+      let finalFinishReason;
+      let toolCallsCount = 0;
+      let outputPreview = "";
+      let finalContent = "";
+      const toolCallsAccumulator = /* @__PURE__ */ new Map();
+      try {
+        for await (const chunk of stream) {
+          if (abortController.signal.aborted) break;
+          if (chunk.usage) {
+            finalUsage = this.mapUsage(chunk.usage);
+            push({
+              kind: "chunk",
+              value: { type: "finish", reason: "stop", usage: finalUsage }
+            });
           }
-          yield { type: "content_delta", delta: delta.content };
-        }
-        if (delta.tool_calls) {
-          for (const tc of delta.tool_calls) {
-            const idx = tc.index;
-            if (tc.id && tc.function?.name) {
-              toolCallsCount++;
-              toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
-              yield {
-                type: "tool_call_start",
-                index: idx,
-                id: tc.id,
-                name: tc.function.name
-              };
+          const choice = chunk.choices?.[0];
+          if (!choice) continue;
+          if (choice.finish_reason) {
+            finalFinishReason = this.mapFinishReason(choice.finish_reason);
+            push({
+              kind: "chunk",
+              value: { type: "finish", reason: finalFinishReason }
+            });
+          }
+          const delta = choice.delta;
+          if (!delta) continue;
+          if (delta.content) {
+            finalContent += delta.content;
+            if (outputPreview.length < 200) {
+              outputPreview += delta.content.slice(0, 200 - outputPreview.length);
             }
-            if (tc.function?.arguments) {
-              const existing = toolCallsAccumulator.get(idx);
-              if (existing) {
-                existing.arguments += tc.function.arguments;
+            push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
+          }
+          if (delta.tool_calls) {
+            for (const tc of delta.tool_calls) {
+              const idx = tc.index;
+              if (tc.id && tc.function?.name) {
+                toolCallsCount++;
+                toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
+                push({
+                  kind: "chunk",
+                  value: {
+                    type: "tool_call_start",
+                    index: idx,
+                    id: tc.id,
+                    name: tc.function.name
+                  }
+                });
+              }
+              if (tc.function?.arguments) {
+                const existing = toolCallsAccumulator.get(idx);
+                if (existing) {
+                  existing.arguments += tc.function.arguments;
+                }
+                push({
+                  kind: "chunk",
+                  value: {
+                    type: "tool_call_delta",
+                    index: idx,
+                    args: tc.function.arguments
+                  }
+                });
               }
-              yield {
-                type: "tool_call_delta",
-                index: idx,
-                args: tc.function.arguments
-              };
             }
           }
         }
-      }
-      const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
-        id: tc.id,
-        type: "function",
-        function: { name: tc.name, arguments: tc.arguments }
-      }));
-      const choices = [{
-        index: 0,
-        message: {
-          role: "assistant",
-          content: finalContent || null,
-          tool_calls: toolCalls.length > 0 ? toolCalls : void 0
-        },
-        finish_reason: finalFinishReason
-      }];
-      if (llmSpan) {
+        const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
+          id: tc.id,
+          type: "function",
+          function: { name: tc.name, arguments: tc.arguments }
+        }));
+        const choices = [{
+          index: 0,
+          message: {
+            role: "assistant",
+            content: finalContent || null,
+            tool_calls: toolCalls.length > 0 ? toolCalls : void 0
+          },
+          finish_reason: finalFinishReason
+        }];
         const endData = {
           usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
           finishReason: finalFinishReason ?? "stop",
@@ -284,28 +295,56 @@ var LLMGateway = class {
           outputPreview,
           choices: this.sanitizeChoices(choices)
         };
-        await llmSpan.end({
-          status: "success",
-          data: endData
-        });
+        await llmSpan.end({ status: "success", data: endData });
+      } catch (error) {
+        this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
+        throw this.handleError(error);
       }
-    } catch (error) {
-      this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
-      const gatewayError = this.handleError(error);
-      if (llmSpan) {
-        const errorEndData = {
-          error: {
-            message: gatewayError.message,
-            code: gatewayError.code,
-            status: gatewayError.status
+    }).then(
+      () => push({ kind: "done" }),
+      (error) => push({ kind: "error", error })
+    );
+    let completedNormally = false;
+    try {
+      while (true) {
+        const item = await pull();
+        if (item.kind === "chunk") {
+          yield item.value;
+        } else if (item.kind === "done") {
+          completedNormally = true;
+          return;
+        } else {
+          completedNormally = true;
+          const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
+          if (!llmSpan.isEnded) {
+            const errorEndData = {
+              error: {
+                message: gatewayError.message,
+                code: gatewayError.code,
+                status: gatewayError.status
+              }
+            };
+            await llmSpan.end({ status: "error", data: errorEndData });
           }
-        };
-        await llmSpan.end({
-          status: "error",
-          data: errorEndData
-        });
+          throw gatewayError;
+        }
+      }
+    } finally {
+      if (!completedNormally) {
+        consumerAborted = true;
+        abortController.abort();
+        if (!llmSpan.isEnded) {
+          try {
+            await llmSpan.end({ status: "cancelled" });
+          } catch {
+          }
+        }
+      }
+      try {
+        await producer;
+      } catch {
       }
-      throw gatewayError;
+      void consumerAborted;
     }
   }
   // ==========================================================================
@@ -484,21 +523,44 @@ var LLMGateway = class {
     const { responseFormat, ...rest } = options;
     return rest;
   }
+  /**
+   * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
+   *
+   * Under ADR-0002, correlation with the NebulaOS backend is carried on
+   * domain-scoped `x-nebula-*` headers that APMs of the host process do not
+   * touch. The standard W3C `traceparent` is still emitted (same trace-id /
+   * span-id) for compatibility with caches, proxies, and log correlation —
+   * but the backend treats `x-nebula-traceparent` as the authoritative source.
+   * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
+   * unaffected.
+   *
+   * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
+   * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
+   * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
+   * plus the compat `traceparent`.
+   */
   buildGatewayHeaders() {
     const headers = {
-      "x-request-id": randomUUID()
+      "x-nebula-request-id": randomUUID()
     };
     const ctx = Tracing.getContext();
+    const traceId = ctx?.traceId ?? randomBytes(16).toString("hex");
+    const spanId = ctx?.spanId ?? randomBytes(8).toString("hex");
+    const traceparent = `00-${traceId}-${spanId}-01`;
+    headers["x-nebula-traceparent"] = traceparent;
+    headers.traceparent = traceparent;
     const executionId = ctx?.executionId ?? ExecutionContext.getOrUndefined()?.executionId;
     if (executionId) {
-      headers["x-execution-id"] = executionId;
+      headers["x-nebula-execution-id"] = executionId;
     }
-    if (ctx) {
-      headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
-    } else {
-      const traceId = randomBytes(16).toString("hex");
-      const spanId = randomBytes(8).toString("hex");
-      headers.traceparent = `00-${traceId}-${spanId}-01`;
+    if (ctx?.resourceName) {
+      headers["x-nebula-resource-name"] = ctx.resourceName;
+    }
+    if (ctx?.resourceType) {
+      headers["x-nebula-resource-type"] = ctx.resourceType;
+    }
+    if (ctx?.workspaceId) {
+      headers["x-nebula-workspace-id"] = ctx.workspaceId;
     }
     return headers;
   }
@@ -654,18 +716,38 @@ var LLMGateway = class {
   convertContentPart(part) {
     if (part.type === "text") return { type: "text", text: part.text };
     if (part.type === "file") {
-      const { mimeType, source } = part.file;
-      if (!mimeType.startsWith("image/")) {
-        throw new Error(`LLM Gateway: file mimeType '${mimeType}' is not supported yet`);
+      const { data, mediaType, filename } = part;
+      const isImage = mediaType.startsWith("image/");
+      const isPdf = mediaType === "application/pdf";
+      const isText = mediaType.startsWith("text/");
+      if (!isImage && !isPdf && !isText) {
+        throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
       }
-      const url = source.type === "url" ? source.url : `data:${mimeType};base64,${source.base64}`;
-      return { type: "image_url", image_url: { url } };
-    }
-    if (part.type === "image_url") {
-      return { type: "image_url", image_url: { url: part.image_url.url } };
+      let url;
+      if (data instanceof Uint8Array) {
+        const base64 = Buffer.from(data).toString("base64");
+        url = `data:${mediaType};base64,${base64}`;
+      } else if (typeof data === "string") {
+        if (data.startsWith("data:") || data.includes("://")) {
+          url = data;
+        } else {
+          url = `data:${mediaType};base64,${data}`;
+        }
+      } else {
+        throw new Error(`LLM Gateway: unsupported file data type`);
+      }
+      if (isImage) {
+        return { type: "image_url", image_url: { url } };
+      }
+      return {
+        type: "file",
+        file: {
+          file_data: url,
+          filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
+        }
+      };
     }
-    const _exhaustive = part;
-    throw new Error(`Unsupported content type: ${_exhaustive.type}`);
+    throw new Error(`Unsupported content type: ${part.type}`);
   }
   /**
    * Sanitize choices for observability storage.