npm - @juspay/neurolink - Versions diffs - 9.70.6 → 9.71.0 - Mend

@juspay/neurolink 9.70.6 → 9.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +344 -344
package/dist/lib/neurolink.js +53 -16
package/dist/lib/providers/googleVertex.js +257 -30
package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
package/dist/lib/telemetry/attributes.d.ts +31 -0
package/dist/lib/telemetry/attributes.js +46 -0
package/dist/lib/telemetry/index.d.ts +1 -1
package/dist/lib/telemetry/index.js +1 -1
package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
package/dist/lib/utils/json/coerce.js +85 -0
package/dist/lib/utils/mcpErrorText.d.ts +16 -0
package/dist/lib/utils/mcpErrorText.js +36 -0
package/dist/neurolink.js +53 -16
package/dist/providers/googleVertex.js +257 -30
package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/services/server/ai/observability/instrumentation.js +36 -1
package/dist/telemetry/attributes.d.ts +31 -0
package/dist/telemetry/attributes.js +46 -0
package/dist/telemetry/index.d.ts +1 -1
package/dist/telemetry/index.js +1 -1
package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/utils/anthropicTraceSanitizer.js +25 -0
package/dist/utils/json/coerce.js +85 -0
package/dist/utils/mcpErrorText.d.ts +16 -0
package/dist/utils/mcpErrorText.js +36 -0
package/package.json +3 -2

package/dist/lib/telemetry/attributes.d.ts CHANGED Viewed

@@ -103,3 +103,34 @@ export declare const ATTR: {
     readonly AR_DESCRIPTION: "autoresearch.description";
     readonly AR_ERROR_CODE: "autoresearch.error_code";
 };
+/**
+ * Langfuse observation/trace attribute names recognised by `@langfuse/otel`'s
+ * LangfuseSpanProcessor (already registered on the global TracerProvider). They
+ * let native (non-AI-SDK) provider paths emit spans that render as proper
+ * generation / tool observations — the same data the Vercel AI SDK's
+ * `experimental_telemetry` produced before providers moved to native SDKs.
+ */
+export declare const LANGFUSE_ATTR: {
+    readonly TRACE_NAME: "langfuse.trace.name";
+    readonly TRACE_INPUT: "langfuse.trace.input";
+    readonly TRACE_OUTPUT: "langfuse.trace.output";
+    readonly OBSERVATION_TYPE: "langfuse.observation.type";
+    readonly OBSERVATION_INPUT: "langfuse.observation.input";
+    readonly OBSERVATION_OUTPUT: "langfuse.observation.output";
+    readonly OBSERVATION_METADATA: "langfuse.observation.metadata";
+    readonly OBSERVATION_MODEL_NAME: "langfuse.observation.model.name";
+    readonly OBSERVATION_MODEL_PARAMETERS: "langfuse.observation.model.parameters";
+    readonly OBSERVATION_USAGE_DETAILS: "langfuse.observation.usage_details";
+    readonly OBSERVATION_LEVEL: "langfuse.observation.level";
+    readonly OBSERVATION_STATUS_MESSAGE: "langfuse.observation.status_message";
+    readonly OBSERVATION_COMPLETION_START_TIME: "langfuse.observation.completion_start_time";
+};
+/** Default ceiling for serialized span attribute values. */
+export declare const SPAN_ATTRIBUTE_MAX_CHARS = 40000;
+/**
+ * Serialize an arbitrary value for a span attribute, hard-capped at
+ * `maxChars` so a pathological prompt or tool result can't put megabytes
+ * on a single span. Strings pass through unserialized; everything else is
+ * JSON-stringified with a String() fallback for circular structures.
+ */
+export declare function spanJsonAttribute(value: unknown, maxChars?: number): string;

package/dist/lib/telemetry/attributes.js CHANGED Viewed

@@ -114,4 +114,50 @@ export const ATTR = {
     AR_DESCRIPTION: "autoresearch.description",
     AR_ERROR_CODE: "autoresearch.error_code",
 };
+/**
+ * Langfuse observation/trace attribute names recognised by `@langfuse/otel`'s
+ * LangfuseSpanProcessor (already registered on the global TracerProvider). They
+ * let native (non-AI-SDK) provider paths emit spans that render as proper
+ * generation / tool observations — the same data the Vercel AI SDK's
+ * `experimental_telemetry` produced before providers moved to native SDKs.
+ */
+export const LANGFUSE_ATTR = {
+    TRACE_NAME: "langfuse.trace.name",
+    TRACE_INPUT: "langfuse.trace.input",
+    TRACE_OUTPUT: "langfuse.trace.output",
+    OBSERVATION_TYPE: "langfuse.observation.type",
+    OBSERVATION_INPUT: "langfuse.observation.input",
+    OBSERVATION_OUTPUT: "langfuse.observation.output",
+    OBSERVATION_METADATA: "langfuse.observation.metadata",
+    OBSERVATION_MODEL_NAME: "langfuse.observation.model.name",
+    OBSERVATION_MODEL_PARAMETERS: "langfuse.observation.model.parameters",
+    OBSERVATION_USAGE_DETAILS: "langfuse.observation.usage_details",
+    OBSERVATION_LEVEL: "langfuse.observation.level",
+    OBSERVATION_STATUS_MESSAGE: "langfuse.observation.status_message",
+    OBSERVATION_COMPLETION_START_TIME: "langfuse.observation.completion_start_time",
+};
+/** Default ceiling for serialized span attribute values. */
+export const SPAN_ATTRIBUTE_MAX_CHARS = 40_000;
+/**
+ * Serialize an arbitrary value for a span attribute, hard-capped at
+ * `maxChars` so a pathological prompt or tool result can't put megabytes
+ * on a single span. Strings pass through unserialized; everything else is
+ * JSON-stringified with a String() fallback for circular structures.
+ */
+export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
+    let serialized;
+    try {
+        serialized =
+            typeof value === "string"
+                ? value
+                : (JSON.stringify(value) ?? String(value));
+    }
+    catch {
+        serialized = String(value);
+    }
+    if (serialized.length > maxChars) {
+        return `${serialized.slice(0, maxChars)}...[truncated ${serialized.length - maxChars} chars]`;
+    }
+    return serialized;
+}
 //# sourceMappingURL=attributes.js.map

package/dist/lib/telemetry/index.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 export { TelemetryService } from "./telemetryService.js";
 export { tracers } from "./tracers.js";
 export { withSpan, withClientSpan, withStreamSpan, withClientStreamSpan, } from "./withSpan.js";
-export { ATTR } from "./attributes.js";
+export { ATTR, LANGFUSE_ATTR, SPAN_ATTRIBUTE_MAX_CHARS, spanJsonAttribute, } from "./attributes.js";
 /**
  * Initialize telemetry for NeuroLink
  * Reuses an existing global TracerProvider when one is already registered,

package/dist/lib/telemetry/index.js CHANGED Viewed

@@ -2,7 +2,7 @@
 export { TelemetryService } from "./telemetryService.js";
 export { tracers } from "./tracers.js";
 export { withSpan, withClientSpan, withStreamSpan, withClientStreamSpan, } from "./withSpan.js";
-export { ATTR } from "./attributes.js";
+export { ATTR, LANGFUSE_ATTR, SPAN_ATTRIBUTE_MAX_CHARS, spanJsonAttribute, } from "./attributes.js";
 import { logger } from "../utils/logger.js";
 /**
  * Initialize telemetry for NeuroLink

package/dist/lib/utils/anthropicTraceSanitizer.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { VertexAnthropicMessage } from "../types/index.js";
+/**
+ * Strips base64 image/PDF payloads from Anthropic messages before they go on a
+ * trace attribute — one screenshot would otherwise be megabytes on a span.
+ * Other block types pass through; the serializer still applies its length cap.
+ */
+export declare function sanitizeAnthropicMessagesForTrace(messages: VertexAnthropicMessage[]): Array<Record<string, unknown>>;

package/dist/lib/utils/anthropicTraceSanitizer.js ADDED Viewed

@@ -0,0 +1,26 @@
+/**
+ * Strips base64 image/PDF payloads from Anthropic messages before they go on a
+ * trace attribute — one screenshot would otherwise be megabytes on a span.
+ * Other block types pass through; the serializer still applies its length cap.
+ */
+export function sanitizeAnthropicMessagesForTrace(messages) {
+    return messages.map((message) => {
+        if (typeof message.content === "string") {
+            return { role: message.role, content: message.content };
+        }
+        return {
+            role: message.role,
+            content: message.content.map((block) => {
+                if (block.type === "image" || block.type === "document") {
+                    return {
+                        type: block.type,
+                        media_type: block.source.media_type,
+                        base64_chars: block.source.data.length,
+                    };
+                }
+                return block;
+            }),
+        };
+    });
+}
+//# sourceMappingURL=anthropicTraceSanitizer.js.map

package/dist/lib/utils/json/coerce.js CHANGED Viewed

@@ -49,6 +49,73 @@ function parseOrRepair(candidate) {
         return undefined;
     }
 }
+/** Bounds the recursive nested-string unwrap against pathological inputs. */
+const MAX_NESTED_UNWRAP_DEPTH = 6;
+/**
+ * Recursively replace any string-valued field whose content is itself a JSON
+ * object/array with the parsed value. Models sometimes double-encode a NESTED
+ * field — e.g. `{ "attachment": "{\"k\":1}" }` instead of
+ * `{ "attachment": { "k": 1 } }` — which fails schema validation even though the
+ * intended object is right there. (`coerceJsonToSchema` already unwraps a
+ * stringified TOP-LEVEL object; this handles the nested case.)
+ *
+ * A parsed string is NOT re-descended into: its own string fields (e.g. an
+ * attachment's `content`) are the model's intended values and must be left
+ * alone. Recursion only walks already-structural objects/arrays to find
+ * stringified fields anywhere in the tree. Returns a NEW value (never mutates
+ * the input) plus whether anything changed, so the caller can skip a redundant
+ * re-validation when nothing was unwrapped. Callers MUST re-validate the result
+ * against the schema — that gate is what keeps an over-eager unwrap (a field
+ * that should stay a string) from being accepted.
+ */
+function deepUnwrapJsonStrings(value, depth = 0) {
+    if (depth > MAX_NESTED_UNWRAP_DEPTH) {
+        return { value, changed: false };
+    }
+    if (typeof value === "string") {
+        const s = value.trim();
+        const looksJson = (s.startsWith("{") && s.endsWith("}")) ||
+            (s.startsWith("[") && s.endsWith("]"));
+        if (looksJson) {
+            try {
+                const parsed = JSON.parse(s);
+                if (parsed !== null && typeof parsed === "object") {
+                    // Parsed one stringified layer. Do NOT descend into `parsed` — its
+                    // own string fields are intended values, not double-encodings.
+                    return { value: parsed, changed: true };
+                }
+            }
+            catch {
+                // not JSON — leave the string as-is
+            }
+        }
+        return { value, changed: false };
+    }
+    if (Array.isArray(value)) {
+        let changed = false;
+        const out = value.map((item) => {
+            const r = deepUnwrapJsonStrings(item, depth + 1);
+            if (r.changed) {
+                changed = true;
+            }
+            return r.value;
+        });
+        return { value: changed ? out : value, changed };
+    }
+    if (value !== null && typeof value === "object") {
+        let changed = false;
+        const out = {};
+        for (const [k, v] of Object.entries(value)) {
+            const r = deepUnwrapJsonStrings(v, depth + 1);
+            if (r.changed) {
+                changed = true;
+            }
+            out[k] = r.value;
+        }
+        return { value: changed ? out : value, changed };
+    }
+    return { value, changed: false };
+}
 /**
  * Try to produce canonical JSON from `text`. Returns null when no JSON object
  * could be recovered (caller should then keep the raw text).
@@ -147,6 +214,24 @@ export function coerceJsonToSchema(text, schema) {
         if (safeParseable.safeParse(outcome.value).success) {
             schemaValid.push(record);
         }
+        else {
+            // The model may have double-encoded a NESTED field as a JSON string
+            // (e.g. `{"attachment":"{...}"}` instead of `{"attachment":{...}}`),
+            // which fails validation even though the intended object is present.
+            // Unwrap stringified object/array fields and re-validate before giving
+            // up — the safeParse gate rejects any over-eager unwrap.
+            const unwrapped = deepUnwrapJsonStrings(outcome.value);
+            if (unwrapped.changed &&
+                unwrapped.value !== null &&
+                typeof unwrapped.value === "object" &&
+                safeParseable.safeParse(unwrapped.value).success) {
+                schemaValid.push({
+                    value: unwrapped.value,
+                    repaired: true,
+                    truncated: candidate.truncated,
+                });
+            }
+        }
     }
     // Among schema-valid candidates prefer the MOST COMPLETE one. With nullable
     // fields a lean object (e.g. `{summary, attachment: null}`) validates

package/dist/lib/utils/mcpErrorText.d.ts CHANGED Viewed

@@ -8,3 +8,19 @@
  * must happen here and propagate to all three surfaces.
  */
 export declare function extractMcpErrorText(raw: unknown): string;
+/**
+ * MCP tools signal failure by RETURNING `{ isError: true, ... }`, not throwing,
+ * so execute()'s try/catch never sees it. Returns a capped status message for
+ * failures (undefined for success) for the caller to set the span error level.
+ *
+ * Generic over input shape: accepts either a result object or a JSON-stringified
+ * envelope (different providers hand back different shapes), mirroring
+ * `extractMcpErrorText`. A non-JSON string has no `isError` field, so it is
+ * correctly treated as "not an error" (→ undefined).
+ *
+ * Layered on `extractMcpErrorText`: this adds the `isError === true` gate and
+ * the human-readable "MCP tool returned isError: …" prefix, while the shared
+ * helper owns the content parsing and the 500-char cap. When `isError` is set
+ * but no readable text is present, falls back to a generic message.
+ */
+export declare function extractMcpToolErrorMessage(result: unknown): string | undefined;

package/dist/lib/utils/mcpErrorText.js CHANGED Viewed

@@ -33,4 +33,40 @@ export function extractMcpErrorText(raw) {
         .map((c) => c.text);
     return texts.join(" ").substring(0, 500);
 }
+/**
+ * MCP tools signal failure by RETURNING `{ isError: true, ... }`, not throwing,
+ * so execute()'s try/catch never sees it. Returns a capped status message for
+ * failures (undefined for success) for the caller to set the span error level.
+ *
+ * Generic over input shape: accepts either a result object or a JSON-stringified
+ * envelope (different providers hand back different shapes), mirroring
+ * `extractMcpErrorText`. A non-JSON string has no `isError` field, so it is
+ * correctly treated as "not an error" (→ undefined).
+ *
+ * Layered on `extractMcpErrorText`: this adds the `isError === true` gate and
+ * the human-readable "MCP tool returned isError: …" prefix, while the shared
+ * helper owns the content parsing and the 500-char cap. When `isError` is set
+ * but no readable text is present, falls back to a generic message.
+ */
+export function extractMcpToolErrorMessage(result) {
+    let resultObj = result;
+    if (typeof resultObj === "string") {
+        try {
+            resultObj = JSON.parse(resultObj);
+        }
+        catch {
+            return undefined;
+        }
+    }
+    if (!resultObj || typeof resultObj !== "object") {
+        return undefined;
+    }
+    if (resultObj.isError !== true) {
+        return undefined;
+    }
+    const text = extractMcpErrorText(resultObj);
+    return text
+        ? `MCP tool returned isError: ${text}`
+        : "MCP tool returned isError: true";
+}
 //# sourceMappingURL=mcpErrorText.js.map

package/dist/neurolink.js CHANGED Viewed

@@ -55,7 +55,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
 import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
 import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
 import { SpanSerializer } from "./observability/utils/spanSerializer.js";
-import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
+import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
 import { TaskManager } from "./tasks/taskManager.js";
 import { createTaskTools } from "./tasks/tools/taskTools.js";
 import { ATTR } from "./telemetry/attributes.js";
@@ -1378,11 +1378,8 @@ Current user's request: ${currentInput}`;
      * Calls add(userId, content) which internally condenses old + new via LLM.
      * Supports additional users with per-user prompt and maxWords overrides.
      */
-    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
-        // Preserve AsyncLocalStorage context across setImmediate boundary so that
-        // memory writes appear under the originating Langfuse trace instead of
-        // becoming orphan spans.
-        const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
+    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
+        const memoryWrite = async () => {
             try {
                 const client = this.ensureMemoryReady();
                 if (!client) {
@@ -1408,7 +1405,21 @@ Current user's request: ${currentInput}`;
             catch (error) {
                 logger.warn("Memory storage failed:", error);
             }
-        });
+        };
+        // Carry the turn's identity across the setImmediate boundary so the
+        // condensation generate + redis spans don't orphan to "guest". Keep the
+        // ambient store when it survived (generate path — carries conversationId,
+        // metadata, …); re-establish from the caller only when it was lost (stream
+        // path, which fires after the caller consumed the stream).
+        const ambient = getLangfuseContext();
+        const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
+            (langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
+            ? () => setLangfuseContext({
+                userId,
+                sessionId: langfuseIdentity.sessionId ?? null,
+                traceName: langfuseIdentity.traceName ?? null,
+            }, memoryWrite)
+            : runWithCurrentLangfuseContext(memoryWrite);
         setImmediate(wrappedMemoryWrite);
     }
     /**
@@ -2801,7 +2812,15 @@ Current user's request: ${currentInput}`;
         }
         const startedAt = Date.now();
         try {
-            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
+            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
+                // Capture root-ness before startActiveSpan makes generateSpan active.
+                // The actual guest-rescue stamp is deferred to executeGenerateRequest,
+                // AFTER prepareGenerateRequest merges auth/requestContext-derived
+                // identity into options.context — otherwise an auth:{token} caller
+                // with no pre-set context.userId would stamp the root span as guest.
+                const generateIsRoot = !trace.getSpan(context.active());
+                return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
+            });
         }
         catch (error) {
             // Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
@@ -2973,14 +2992,17 @@ Current user's request: ${currentInput}`;
             return { error };
         }
     }
-    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
-        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
+    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
+        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
     }
-    async executeGenerateRequest(optionsOrPrompt, generateSpan) {
+    async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
         let resolvedOptions;
         try {
             const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
             resolvedOptions = options;
+            // Stamp now that prepareGenerateRequest has merged any auth/requestContext
+            // identity into options.context (see capture of isRootSpan in generate()).
+            stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
             const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
             if (earlyResult) {
                 generateSpan.setStatus({ code: SpanStatusCode.OK });
@@ -3545,7 +3567,7 @@ Current user's request: ${currentInput}`;
         // Memory storage
         if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
             options.context?.userId) {
-            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
         }
     }
     /**
@@ -5531,10 +5553,20 @@ Current user's request: ${currentInput}`;
                 [ATTR.NL_PROVIDER]: options.provider || "default",
                 [ATTR.GEN_AI_MODEL]: options.model || "default",
                 [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
-                [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
+                // Count registered custom tools too — chat hosts put their MCP tools
+                // in the registry, so options.tools alone under-reports.
+                [ATTR.NL_HAS_TOOLS]: !options.disableTools &&
+                    (!!(options.tools && Object.keys(options.tools).length > 0) ||
+                        this.getCustomTools().size > 0),
                 [ATTR.NL_STREAM_MODE]: true,
             },
         });
+        // streamSpan isn't active yet, so context.active() is its parent — empty =
+        // root. Capture root-ness here, but defer the actual guest-rescue stamp to
+        // after validateStreamRequestOptions merges auth/requestContext identity
+        // into options.context (below) — otherwise an auth:{token} caller with no
+        // pre-set context.userId would stamp the root span as guest.
+        const streamIsRoot = !trace.getSpan(context.active());
         const spanStartTime = Date.now();
         this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
         try {
@@ -5576,6 +5608,8 @@ Current user's request: ${currentInput}`;
             const originalPrompt = options.input?.text ?? "";
             options.fileRegistry = this.fileRegistry;
             await this.validateStreamRequestOptions(options, startTime);
+            // options.context now carries any auth/requestContext-derived identity.
+            stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
             const workflowResult = await this.maybeHandleWorkflowStreamRequest({
                 options,
                 startTime,
@@ -5585,6 +5619,9 @@ Current user's request: ${currentInput}`;
             if (workflowResult) {
                 return workflowResult;
             }
+            // Make neurolink.stream the active span so every provider span (generations,
+            // tool calls) parents under it — one Langfuse trace per turn, not a forest.
+            const streamSpanContext = trace.setSpan(context.active(), streamSpan);
             // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
             // accumulated response into a single audio chunk at end-of-stream and
             // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5599,7 +5636,7 @@ Current user's request: ${currentInput}`;
                     resolveStreamTtsAudio = resolve;
                 })
                 : undefined;
-            const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
+            const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
                 options,
                 streamSpan,
                 spanStartTime,
@@ -5608,7 +5645,7 @@ Current user's request: ${currentInput}`;
                 streamId,
                 originalPrompt,
                 ttsResolver: resolveStreamTtsAudio,
-            }));
+            })));
             if (streamSttTranscription) {
                 streamResult.transcription = streamSttTranscription;
             }
@@ -6512,7 +6549,7 @@ Current user's request: ${currentInput}`;
             }
         }
         if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
-            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
         }
     }
     /**