npm - @nebulaos/llm-gateway - Versions diffs - 0.1.9 → 0.2.0 - Mend

@nebulaos/llm-gateway 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
      */
     private extractErrorSource;
     private extractExtraOptions;
+    /**
+     * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
+     *
+     * Under ADR-0002, correlation with the NebulaOS backend is carried on
+     * domain-scoped `x-nebula-*` headers that APMs of the host process do not
+     * touch. The standard W3C `traceparent` is still emitted (same trace-id /
+     * span-id) for compatibility with caches, proxies, and log correlation —
+     * but the backend treats `x-nebula-traceparent` as the authoritative source.
+     * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
+     * unaffected.
+     *
+     * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
+     * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
+     * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
+     * plus the compat `traceparent`.
+     */
     private buildGatewayHeaders;
     /**
      * Extracts enrichment data from backend HTTP headers.

package/dist/index.d.ts CHANGED Viewed

@@ -75,6 +75,22 @@ declare class LLMGateway implements IModel {
      */
     private extractErrorSource;
     private extractExtraOptions;
+    /**
+     * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
+     *
+     * Under ADR-0002, correlation with the NebulaOS backend is carried on
+     * domain-scoped `x-nebula-*` headers that APMs of the host process do not
+     * touch. The standard W3C `traceparent` is still emitted (same trace-id /
+     * span-id) for compatibility with caches, proxies, and log correlation —
+     * but the backend treats `x-nebula-traceparent` as the authoritative source.
+     * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
+     * unaffected.
+     *
+     * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
+     * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
+     * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
+     * plus the compat `traceparent`.
+     */
     private buildGatewayHeaders;
     /**
      * Extracts enrichment data from backend HTTP headers.

package/dist/index.js CHANGED Viewed

@@ -184,130 +184,141 @@ var LLMGateway = class {
       messages,
       tools
     };
-    const llmSpan = await import_core.Tracing.startSpan({
+    const llmSpan = import_core.Tracing.startSpan({
       kind: import_types.SpanType.llm_wrapper,
       name: `llm:${this.modelName}`,
       data: startData
     });
-    const headers = this.buildGatewayHeaders();
-    this.logger.debug("LLM Gateway stream request", {
-      model,
-      baseUrl: this.baseUrl,
-      stream: true,
-      messageCount: messages.length,
-      toolCount: tools?.length ?? 0
-    });
-    let stream;
-    try {
-      stream = await this.client.chat.completions.create(
-        {
-          model,
-          messages: this.convertMessages(messages),
-          tools: this.convertTools(tools),
-          stream: true,
-          stream_options: { include_usage: true },
-          response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
-            type: "json_schema",
-            json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
-          } : { type: "json_object" } : void 0,
-          ...this.extractExtraOptions(mergedOptions)
-        },
-        { headers }
-      );
-    } catch (error) {
-      this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
-      const gatewayError = this.handleError(error);
-      if (llmSpan) {
-        const errorEndData = {
-          error: {
-            message: gatewayError.message,
-            code: gatewayError.code,
-            status: gatewayError.status
-          }
-        };
-        await llmSpan.end({
-          status: "error",
-          data: errorEndData
-        });
+    const queue = [];
+    let pendingResolve = null;
+    const abortController = new AbortController();
+    let consumerAborted = false;
+    const push = (item) => {
+      if (pendingResolve) {
+        const resolve = pendingResolve;
+        pendingResolve = null;
+        resolve(item);
+      } else {
+        queue.push(item);
       }
-      throw gatewayError;
-    }
-    let finalUsage;
-    let finalFinishReason;
-    let toolCallsCount = 0;
-    let outputPreview = "";
-    let finalContent = "";
-    const toolCallsAccumulator = /* @__PURE__ */ new Map();
-    try {
-      for await (const chunk of stream) {
-        if (chunk.usage) {
-          finalUsage = this.mapUsage(chunk.usage);
-          yield {
-            type: "finish",
-            reason: "stop",
-            usage: finalUsage
-          };
-        }
-        const choice = chunk.choices?.[0];
-        if (!choice) continue;
-        if (choice.finish_reason) {
-          finalFinishReason = this.mapFinishReason(choice.finish_reason);
-          yield {
-            type: "finish",
-            reason: finalFinishReason
-          };
-        }
-        const delta = choice.delta;
-        if (!delta) continue;
-        if (delta.content) {
-          finalContent += delta.content;
-          if (outputPreview.length < 200) {
-            outputPreview += delta.content.slice(0, 200 - outputPreview.length);
+    };
+    const pull = () => {
+      if (queue.length > 0) return Promise.resolve(queue.shift());
+      return new Promise((resolve) => {
+        pendingResolve = resolve;
+      });
+    };
+    const producer = import_core.Tracing.runWithSpan(llmSpan, async () => {
+      const headers = this.buildGatewayHeaders();
+      this.logger.debug("LLM Gateway stream request", {
+        model,
+        baseUrl: this.baseUrl,
+        stream: true,
+        messageCount: messages.length,
+        toolCount: tools?.length ?? 0
+      });
+      let stream;
+      try {
+        stream = await this.client.chat.completions.create(
+          {
+            model,
+            messages: this.convertMessages(messages),
+            tools: this.convertTools(tools),
+            stream: true,
+            stream_options: { include_usage: true },
+            response_format: mergedOptions?.responseFormat?.type === "json" ? mergedOptions.responseFormat.schema ? {
+              type: "json_schema",
+              json_schema: { name: "response", schema: mergedOptions.responseFormat.schema }
+            } : { type: "json_object" } : void 0,
+            ...this.extractExtraOptions(mergedOptions)
+          },
+          { headers, signal: abortController.signal }
+        );
+      } catch (error) {
+        this.logger.error("LLM Gateway stream request failed", error, void 0, void 0);
+        throw this.handleError(error);
+      }
+      let finalUsage;
+      let finalFinishReason;
+      let toolCallsCount = 0;
+      let outputPreview = "";
+      let finalContent = "";
+      const toolCallsAccumulator = /* @__PURE__ */ new Map();
+      try {
+        for await (const chunk of stream) {
+          if (abortController.signal.aborted) break;
+          if (chunk.usage) {
+            finalUsage = this.mapUsage(chunk.usage);
+            push({
+              kind: "chunk",
+              value: { type: "finish", reason: "stop", usage: finalUsage }
+            });
           }
-          yield { type: "content_delta", delta: delta.content };
-        }
-        if (delta.tool_calls) {
-          for (const tc of delta.tool_calls) {
-            const idx = tc.index;
-            if (tc.id && tc.function?.name) {
-              toolCallsCount++;
-              toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
-              yield {
-                type: "tool_call_start",
-                index: idx,
-                id: tc.id,
-                name: tc.function.name
-              };
+          const choice = chunk.choices?.[0];
+          if (!choice) continue;
+          if (choice.finish_reason) {
+            finalFinishReason = this.mapFinishReason(choice.finish_reason);
+            push({
+              kind: "chunk",
+              value: { type: "finish", reason: finalFinishReason }
+            });
+          }
+          const delta = choice.delta;
+          if (!delta) continue;
+          if (delta.content) {
+            finalContent += delta.content;
+            if (outputPreview.length < 200) {
+              outputPreview += delta.content.slice(0, 200 - outputPreview.length);
             }
-            if (tc.function?.arguments) {
-              const existing = toolCallsAccumulator.get(idx);
-              if (existing) {
-                existing.arguments += tc.function.arguments;
+            push({ kind: "chunk", value: { type: "content_delta", delta: delta.content } });
+          }
+          if (delta.tool_calls) {
+            for (const tc of delta.tool_calls) {
+              const idx = tc.index;
+              if (tc.id && tc.function?.name) {
+                toolCallsCount++;
+                toolCallsAccumulator.set(idx, { id: tc.id, name: tc.function.name, arguments: "" });
+                push({
+                  kind: "chunk",
+                  value: {
+                    type: "tool_call_start",
+                    index: idx,
+                    id: tc.id,
+                    name: tc.function.name
+                  }
+                });
+              }
+              if (tc.function?.arguments) {
+                const existing = toolCallsAccumulator.get(idx);
+                if (existing) {
+                  existing.arguments += tc.function.arguments;
+                }
+                push({
+                  kind: "chunk",
+                  value: {
+                    type: "tool_call_delta",
+                    index: idx,
+                    args: tc.function.arguments
+                  }
+                });
               }
-              yield {
-                type: "tool_call_delta",
-                index: idx,
-                args: tc.function.arguments
-              };
             }
           }
         }
-      }
-      const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
-        id: tc.id,
-        type: "function",
-        function: { name: tc.name, arguments: tc.arguments }
-      }));
-      const choices = [{
-        index: 0,
-        message: {
-          role: "assistant",
-          content: finalContent || null,
-          tool_calls: toolCalls.length > 0 ? toolCalls : void 0
-        },
-        finish_reason: finalFinishReason
-      }];
-      if (llmSpan) {
+        const toolCalls = Array.from(toolCallsAccumulator.values()).map((tc) => ({
+          id: tc.id,
+          type: "function",
+          function: { name: tc.name, arguments: tc.arguments }
+        }));
+        const choices = [{
+          index: 0,
+          message: {
+            role: "assistant",
+            content: finalContent || null,
+            tool_calls: toolCalls.length > 0 ? toolCalls : void 0
+          },
+          finish_reason: finalFinishReason
+        }];
         const endData = {
           usage: finalUsage ?? { promptTokens: 0, completionTokens: 0, totalTokens: 0 },
           finishReason: finalFinishReason ?? "stop",
@@ -315,28 +326,56 @@ var LLMGateway = class {
           outputPreview,
           choices: this.sanitizeChoices(choices)
         };
-        await llmSpan.end({
-          status: "success",
-          data: endData
-        });
+        await llmSpan.end({ status: "success", data: endData });
+      } catch (error) {
+        this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
+        throw this.handleError(error);
       }
-    } catch (error) {
-      this.logger.error("LLM Gateway stream failed", error, void 0, void 0);
-      const gatewayError = this.handleError(error);
-      if (llmSpan) {
-        const errorEndData = {
-          error: {
-            message: gatewayError.message,
-            code: gatewayError.code,
-            status: gatewayError.status
+    }).then(
+      () => push({ kind: "done" }),
+      (error) => push({ kind: "error", error })
+    );
+    let completedNormally = false;
+    try {
+      while (true) {
+        const item = await pull();
+        if (item.kind === "chunk") {
+          yield item.value;
+        } else if (item.kind === "done") {
+          completedNormally = true;
+          return;
+        } else {
+          completedNormally = true;
+          const gatewayError = item.error instanceof LLMGatewayError ? item.error : this.handleError(item.error);
+          if (!llmSpan.isEnded) {
+            const errorEndData = {
+              error: {
+                message: gatewayError.message,
+                code: gatewayError.code,
+                status: gatewayError.status
+              }
+            };
+            await llmSpan.end({ status: "error", data: errorEndData });
           }
-        };
-        await llmSpan.end({
-          status: "error",
-          data: errorEndData
-        });
+          throw gatewayError;
+        }
       }
-      throw gatewayError;
+    } finally {
+      if (!completedNormally) {
+        consumerAborted = true;
+        abortController.abort();
+        if (!llmSpan.isEnded) {
+          try {
+            await llmSpan.end({ status: "cancelled" });
+          } catch {
+          }
+        }
+      }
+      try {
+        await producer;
+      } catch {
+      }
+      void consumerAborted;
     }
   }
   // ==========================================================================
@@ -515,24 +554,44 @@ var LLMGateway = class {
     const { responseFormat, ...rest } = options;
     return rest;
   }
+  /**
+   * Builds the outbound headers for a call to the NebulaOS LLM Gateway.
+   *
+   * Under ADR-0002, correlation with the NebulaOS backend is carried on
+   * domain-scoped `x-nebula-*` headers that APMs of the host process do not
+   * touch. The standard W3C `traceparent` is still emitted (same trace-id /
+   * span-id) for compatibility with caches, proxies, and log correlation —
+   * but the backend treats `x-nebula-traceparent` as the authoritative source.
+   * If a host APM rewrites `traceparent` on egress, NebulaOS correlation is
+   * unaffected.
+   *
+   * Legacy headers (`x-request-id`, `x-execution-id`, `x-resource-name`) are
+   * no longer emitted; `nebulaos-cloud` accepts both sets temporarily via a
+   * Phase 1B fallback, but new SDK releases emit only the `x-nebula-*` set
+   * plus the compat `traceparent`.
+   */
   buildGatewayHeaders() {
     const headers = {
-      "x-request-id": (0, import_node_crypto.randomUUID)()
+      "x-nebula-request-id": (0, import_node_crypto.randomUUID)()
     };
     const ctx = import_core.Tracing.getContext();
+    const traceId = ctx?.traceId ?? (0, import_node_crypto.randomBytes)(16).toString("hex");
+    const spanId = ctx?.spanId ?? (0, import_node_crypto.randomBytes)(8).toString("hex");
+    const traceparent = `00-${traceId}-${spanId}-01`;
+    headers["x-nebula-traceparent"] = traceparent;
+    headers.traceparent = traceparent;
     const executionId = ctx?.executionId ?? import_core.ExecutionContext.getOrUndefined()?.executionId;
     if (executionId) {
-      headers["x-execution-id"] = executionId;
+      headers["x-nebula-execution-id"] = executionId;
     }
     if (ctx?.resourceName) {
-      headers["x-resource-name"] = ctx.resourceName;
+      headers["x-nebula-resource-name"] = ctx.resourceName;
+    }
+    if (ctx?.resourceType) {
+      headers["x-nebula-resource-type"] = ctx.resourceType;
     }
-    if (ctx) {
-      headers.traceparent = `00-${ctx.traceId}-${ctx.spanId}-01`;
-    } else {
-      const traceId = (0, import_node_crypto.randomBytes)(16).toString("hex");
-      const spanId = (0, import_node_crypto.randomBytes)(8).toString("hex");
-      headers.traceparent = `00-${traceId}-${spanId}-01`;
+    if (ctx?.workspaceId) {
+      headers["x-nebula-workspace-id"] = ctx.workspaceId;
     }
     return headers;
   }
@@ -688,8 +747,11 @@ var LLMGateway = class {
   convertContentPart(part) {
     if (part.type === "text") return { type: "text", text: part.text };
     if (part.type === "file") {
-      const { data, mediaType } = part;
-      if (!mediaType.startsWith("image/")) {
+      const { data, mediaType, filename } = part;
+      const isImage = mediaType.startsWith("image/");
+      const isPdf = mediaType === "application/pdf";
+      const isText = mediaType.startsWith("text/");
+      if (!isImage && !isPdf && !isText) {
         throw new Error(`LLM Gateway: file mediaType '${mediaType}' is not supported yet`);
       }
       let url;
@@ -705,7 +767,16 @@ var LLMGateway = class {
       } else {
         throw new Error(`LLM Gateway: unsupported file data type`);
       }
-      return { type: "image_url", image_url: { url } };
+      if (isImage) {
+        return { type: "image_url", image_url: { url } };
+      }
+      return {
+        type: "file",
+        file: {
+          file_data: url,
+          filename: filename ?? (isPdf ? "document.pdf" : "document.txt")
+        }
+      };
     }
     throw new Error(`Unsupported content type: ${part.type}`);
   }