npm - @juspay/neurolink - Versions diffs - 9.70.6 → 9.71.0 - Mend

@juspay/neurolink 9.70.6 → 9.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +12 -0
package/dist/browser/neurolink.min.js +344 -344
package/dist/lib/neurolink.js +53 -16
package/dist/lib/providers/googleVertex.js +257 -30
package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
package/dist/lib/telemetry/attributes.d.ts +31 -0
package/dist/lib/telemetry/attributes.js +46 -0
package/dist/lib/telemetry/index.d.ts +1 -1
package/dist/lib/telemetry/index.js +1 -1
package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
package/dist/lib/utils/json/coerce.js +85 -0
package/dist/lib/utils/mcpErrorText.d.ts +16 -0
package/dist/lib/utils/mcpErrorText.js +36 -0
package/dist/neurolink.js +53 -16
package/dist/providers/googleVertex.js +257 -30
package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
package/dist/services/server/ai/observability/instrumentation.js +36 -1
package/dist/telemetry/attributes.d.ts +31 -0
package/dist/telemetry/attributes.js +46 -0
package/dist/telemetry/index.d.ts +1 -1
package/dist/telemetry/index.js +1 -1
package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
package/dist/utils/anthropicTraceSanitizer.js +25 -0
package/dist/utils/json/coerce.js +85 -0
package/dist/utils/mcpErrorText.d.ts +16 -0
package/dist/utils/mcpErrorText.js +36 -0
package/package.json +3 -2

package/dist/lib/neurolink.js CHANGED Viewed

@@ -55,7 +55,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
 import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
 import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
 import { SpanSerializer } from "./observability/utils/spanSerializer.js";
-import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
+import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
 import { TaskManager } from "./tasks/taskManager.js";
 import { createTaskTools } from "./tasks/tools/taskTools.js";
 import { ATTR } from "./telemetry/attributes.js";
@@ -1378,11 +1378,8 @@ Current user's request: ${currentInput}`;
      * Calls add(userId, content) which internally condenses old + new via LLM.
      * Supports additional users with per-user prompt and maxWords overrides.
      */
-    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
-        // Preserve AsyncLocalStorage context across setImmediate boundary so that
-        // memory writes appear under the originating Langfuse trace instead of
-        // becoming orphan spans.
-        const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
+    storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
+        const memoryWrite = async () => {
             try {
                 const client = this.ensureMemoryReady();
                 if (!client) {
@@ -1408,7 +1405,21 @@ Current user's request: ${currentInput}`;
             catch (error) {
                 logger.warn("Memory storage failed:", error);
             }
-        });
+        };
+        // Carry the turn's identity across the setImmediate boundary so the
+        // condensation generate + redis spans don't orphan to "guest". Keep the
+        // ambient store when it survived (generate path — carries conversationId,
+        // metadata, …); re-establish from the caller only when it was lost (stream
+        // path, which fires after the caller consumed the stream).
+        const ambient = getLangfuseContext();
+        const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
+            (langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
+            ? () => setLangfuseContext({
+                userId,
+                sessionId: langfuseIdentity.sessionId ?? null,
+                traceName: langfuseIdentity.traceName ?? null,
+            }, memoryWrite)
+            : runWithCurrentLangfuseContext(memoryWrite);
         setImmediate(wrappedMemoryWrite);
     }
     /**
@@ -2801,7 +2812,15 @@ Current user's request: ${currentInput}`;
         }
         const startedAt = Date.now();
         try {
-            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan)));
+            return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
+                // Capture root-ness before startActiveSpan makes generateSpan active.
+                // The actual guest-rescue stamp is deferred to executeGenerateRequest,
+                // AFTER prepareGenerateRequest merges auth/requestContext-derived
+                // identity into options.context — otherwise an auth:{token} caller
+                // with no pre-set context.userId would stamp the root span as guest.
+                const generateIsRoot = !trace.getSpan(context.active());
+                return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
+            });
         }
         catch (error) {
             // Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
@@ -2973,14 +2992,17 @@ Current user's request: ${currentInput}`;
             return { error };
         }
     }
-    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
-        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
+    async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
+        return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
     }
-    async executeGenerateRequest(optionsOrPrompt, generateSpan) {
+    async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
         let resolvedOptions;
         try {
             const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
             resolvedOptions = options;
+            // Stamp now that prepareGenerateRequest has merged any auth/requestContext
+            // identity into options.context (see capture of isRootSpan in generate()).
+            stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
             const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
             if (earlyResult) {
                 generateSpan.setStatus({ code: SpanStatusCode.OK });
@@ -3545,7 +3567,7 @@ Current user's request: ${currentInput}`;
         // Memory storage
         if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
             options.context?.userId) {
-            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
         }
     }
     /**
@@ -5531,10 +5553,20 @@ Current user's request: ${currentInput}`;
                 [ATTR.NL_PROVIDER]: options.provider || "default",
                 [ATTR.GEN_AI_MODEL]: options.model || "default",
                 [ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
-                [ATTR.NL_HAS_TOOLS]: !!(options.tools && Object.keys(options.tools).length > 0),
+                // Count registered custom tools too — chat hosts put their MCP tools
+                // in the registry, so options.tools alone under-reports.
+                [ATTR.NL_HAS_TOOLS]: !options.disableTools &&
+                    (!!(options.tools && Object.keys(options.tools).length > 0) ||
+                        this.getCustomTools().size > 0),
                 [ATTR.NL_STREAM_MODE]: true,
             },
         });
+        // streamSpan isn't active yet, so context.active() is its parent — empty =
+        // root. Capture root-ness here, but defer the actual guest-rescue stamp to
+        // after validateStreamRequestOptions merges auth/requestContext identity
+        // into options.context (below) — otherwise an auth:{token} caller with no
+        // pre-set context.userId would stamp the root span as guest.
+        const streamIsRoot = !trace.getSpan(context.active());
         const spanStartTime = Date.now();
         this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
         try {
@@ -5576,6 +5608,8 @@ Current user's request: ${currentInput}`;
             const originalPrompt = options.input?.text ?? "";
             options.fileRegistry = this.fileRegistry;
             await this.validateStreamRequestOptions(options, startTime);
+            // options.context now carries any auth/requestContext-derived identity.
+            stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
             const workflowResult = await this.maybeHandleWorkflowStreamRequest({
                 options,
                 startTime,
@@ -5585,6 +5619,9 @@ Current user's request: ${currentInput}`;
             if (workflowResult) {
                 return workflowResult;
             }
+            // Make neurolink.stream the active span so every provider span (generations,
+            // tool calls) parents under it — one Langfuse trace per turn, not a forest.
+            const streamSpanContext = trace.setSpan(context.active(), streamSpan);
             // TTS Mode 2 deferred: stream() emits text first, then synthesizes the
             // accumulated response into a single audio chunk at end-of-stream and
             // resolves `streamResult.audio` with the same TTSResult. The resolver is
@@ -5599,7 +5636,7 @@ Current user's request: ${currentInput}`;
                     resolveStreamTtsAudio = resolve;
                 })
                 : undefined;
-            const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
+            const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
                 options,
                 streamSpan,
                 spanStartTime,
@@ -5608,7 +5645,7 @@ Current user's request: ${currentInput}`;
                 streamId,
                 originalPrompt,
                 ttsResolver: resolveStreamTtsAudio,
-            }));
+            })));
             if (streamSttTranscription) {
                 streamResult.transcription = streamSttTranscription;
             }
@@ -6512,7 +6549,7 @@ Current user's request: ${currentInput}`;
             }
         }
         if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
-            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
+            this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
         }
     }
     /**

package/dist/lib/providers/googleVertex.js CHANGED Viewed

@@ -22,9 +22,12 @@ import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
 import { TimeoutError, withTimeout } from "../utils/async/index.js";
 import { parseTimeout } from "../utils/timeout.js";
 import { createTextChannel, extractThoughtSignature, prependConversationMessages, } from "./googleNativeGemini3.js";
-import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
+import { ATTR, LANGFUSE_ATTR, spanJsonAttribute, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
+import { SpanKind, SpanStatusCode, context as otelContext, trace as otelTrace, } from "@opentelemetry/api";
 import { calculateCost } from "../utils/pricing.js";
 import { transformToolExecutions } from "../utils/transformationUtils.js";
+import { sanitizeAnthropicMessagesForTrace } from "../utils/anthropicTraceSanitizer.js";
+import { extractMcpToolErrorMessage } from "../utils/mcpErrorText.js";
 // Import proper types for multimodal message handling
 // Dynamic import helper for native Anthropic Vertex SDK
 let anthropicVertexModule = null;
@@ -2390,6 +2393,49 @@ export class GoogleVertexProvider extends BaseProvider {
         };
         const toolsUsedRef = [];
         const structuredOutputRef = {};
+        // Langfuse/OTel: the native SDK bypasses the Vercel AI SDK's
+        // experimental_telemetry, so emit spans manually — one turn span, one
+        // generation span per API call, one tool span per execution — all carrying
+        // langfuse.* attributes the LangfuseSpanProcessor maps to observations.
+        // Usage lives ONLY on the generation spans (Langfuse sums usage across
+        // observations for trace totals, so repeating it on the turn double-counts).
+        const offeredToolNames = (tools ?? []).map((anthropicTool) => anthropicTool.name);
+        const turnInputAttribute = spanJsonAttribute({
+            system: systemPromptWithSchema,
+            messages: sanitizeAnthropicMessagesForTrace(messages),
+        });
+        const turnSpan = tracers.provider.startSpan("anthropic.vertex.stream", {
+            kind: SpanKind.CLIENT,
+            attributes: {
+                // Mark as span, not generation — without it Langfuse infers "generation"
+                // from the gen_ai.* attributes; the model calls live in child spans.
+                [LANGFUSE_ATTR.OBSERVATION_TYPE]: "span",
+                [ATTR.GEN_AI_SYSTEM]: "anthropic",
+                [ATTR.GEN_AI_MODEL]: modelName,
+                [ATTR.GEN_AI_OPERATION]: "stream",
+                [ATTR.NL_PROVIDER]: this.providerName,
+                [ATTR.NL_TOOL_COUNT]: offeredToolNames.length,
+                [LANGFUSE_ATTR.OBSERVATION_INPUT]: turnInputAttribute,
+                // Also lift IO to the trace — Langfuse reads trace input/output from
+                // langfuse.trace.* and the trace list is unreadable without it.
+                [LANGFUSE_ATTR.TRACE_INPUT]: turnInputAttribute,
+                [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
+                    toolsOffered: offeredToolNames,
+                    toolCount: offeredToolNames.length,
+                    maxSteps,
+                    structuredOutput: useFinalResultTool,
+                }),
+            },
+        });
+        const turnContext = otelTrace.setSpan(otelContext.active(), turnSpan);
+        let aggregatedTurnText = "";
+        // Anthropic prompt-cache token accounting, aggregated across loop steps.
+        const turnCacheUsage = {
+            read: 0,
+            creation: 0,
+            creation5m: 0,
+            creation1h: 0,
+        };
         // Track the active Anthropic stream so options.abortSignal can cancel it
         // mid-flight (pre-rewrite code had no abort handling — fixed for free).
         let activeStream;
@@ -2419,29 +2465,116 @@ export class GoogleVertexProvider extends BaseProvider {
                         throw new Error("Stream aborted by caller");
                     }
                     step++;
-                    const stream = await client.messages.stream({
-                        ...requestParams,
-                        messages: currentMessages,
-                    });
-                    activeStream = stream;
-                    // Forward each text delta to the consumer as it arrives. The
-                    // Anthropic SDK fires this listener synchronously for every
-                    // content_block_delta SSE event, so the channel sees bytes at
-                    // the same cadence the wire delivers them.
-                    stream.on("text", (delta) => {
-                        if (delta.length > 0) {
-                            channel.push(delta);
+                    // One generation observation per API call: request in, content + usage out.
+                    const generationSpan = tracers.generation.startSpan("anthropic.messages.stream", {
+                        kind: SpanKind.CLIENT,
+                        attributes: {
+                            [LANGFUSE_ATTR.OBSERVATION_TYPE]: "generation",
+                            [LANGFUSE_ATTR.OBSERVATION_MODEL_NAME]: modelName,
+                            [LANGFUSE_ATTR.OBSERVATION_MODEL_PARAMETERS]: spanJsonAttribute({
+                                max_tokens: requestParams.max_tokens,
+                                temperature: requestParams.temperature,
+                                top_p: requestParams.top_p,
+                            }),
+                            [LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute({
+                                system: systemPromptWithSchema,
+                                messages: sanitizeAnthropicMessagesForTrace(currentMessages),
+                            }),
+                            [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
+                                step,
+                                toolsOffered: offeredToolNames.length,
+                            }),
+                            [ATTR.GEN_AI_SYSTEM]: "anthropic",
+                            [ATTR.GEN_AI_MODEL]: modelName,
+                            [ATTR.GEN_AI_OPERATION]: "chat",
+                        },
+                    }, turnContext);
+                    let response;
+                    try {
+                        const stream = await client.messages.stream({
+                            ...requestParams,
+                            messages: currentMessages,
+                        });
+                        activeStream = stream;
+                        // Forward each text delta as it arrives — the Anthropic SDK fires
+                        // this synchronously per content_block_delta, so the channel streams
+                        // at wire cadence. The first delta stamps completion_start_time,
+                        // giving Langfuse the generation's time-to-first-token.
+                        let firstDeltaSeen = false;
+                        stream.on("text", (delta) => {
+                            if (delta.length > 0) {
+                                if (!firstDeltaSeen) {
+                                    firstDeltaSeen = true;
+                                    generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_COMPLETION_START_TIME, new Date().toISOString());
+                                }
+                                channel.push(delta);
+                            }
+                        });
+                        // finalMessage() resolves AFTER message_stop. By then the listener
+                        // has already fired for every delta — awaiting here doesn't block
+                        // visible streaming, it just gives us the structured response
+                        // shape needed for tool_use block extraction.
+                        response = await stream.finalMessage();
+                    }
+                    catch (modelCallError) {
+                        generationSpan.setStatus({
+                            code: SpanStatusCode.ERROR,
+                            message: modelCallError instanceof Error
+                                ? modelCallError.message
+                                : String(modelCallError),
+                        });
+                        if (modelCallError instanceof Error) {
+                            generationSpan.recordException(modelCallError);
                         }
-                    });
-                    // finalMessage() resolves AFTER message_stop. By then the listener
-                    // has already fired for every delta — awaiting here doesn't block
-                    // visible streaming, it just gives us the structured response
-                    // shape needed for tool_use block extraction.
-                    const response = await stream.finalMessage();
+                        generationSpan.end();
+                        throw modelCallError;
+                    }
                     activeStream = undefined;
-                    usage.input += response.usage?.input_tokens || 0;
-                    usage.output += response.usage?.output_tokens || 0;
-                    usage.total = usage.input + usage.output;
+                    // End the generation span even if the bookkeeping below throws (else
+                    // it leaks). The model-call error path already ended it — no double-end.
+                    try {
+                        const stepCacheRead = response.usage?.cache_read_input_tokens ?? 0;
+                        const stepCacheCreation = response.usage?.cache_creation_input_tokens ?? 0;
+                        const stepCacheCreation5m = response.usage?.cache_creation?.ephemeral_5m_input_tokens ?? 0;
+                        const stepCacheCreation1h = response.usage?.cache_creation?.ephemeral_1h_input_tokens ?? 0;
+                        turnCacheUsage.read += stepCacheRead;
+                        turnCacheUsage.creation += stepCacheCreation;
+                        turnCacheUsage.creation5m += stepCacheCreation5m;
+                        turnCacheUsage.creation1h += stepCacheCreation1h;
+                        usage.input += response.usage?.input_tokens || 0;
+                        usage.output += response.usage?.output_tokens || 0;
+                        usage.total = usage.input + usage.output;
+                        for (const block of response.content) {
+                            if (block.type === "text" && typeof block.text === "string") {
+                                aggregatedTurnText += block.text;
+                            }
+                        }
+                        generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(response.content));
+                        // 5m and 1h cache-creation are priced differently, so keep both;
+                        // drop the aggregate input_cache_creation (= 5m + 1h) that would
+                        // double-count. total sums the per-TTL keys shown here to match them.
+                        generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_USAGE_DETAILS, spanJsonAttribute({
+                            input: response.usage?.input_tokens ?? 0,
+                            output: response.usage?.output_tokens ?? 0,
+                            input_cached_tokens: stepCacheRead,
+                            input_cache_creation_5m: stepCacheCreation5m,
+                            input_cache_creation_1h: stepCacheCreation1h,
+                            total: (response.usage?.input_tokens ?? 0) +
+                                (response.usage?.output_tokens ?? 0) +
+                                stepCacheRead +
+                                stepCacheCreation5m +
+                                stepCacheCreation1h,
+                        }));
+                        generationSpan.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, response.usage?.input_tokens ?? 0);
+                        generationSpan.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, response.usage?.output_tokens ?? 0);
+                        if (response.stop_reason) {
+                            generationSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, response.stop_reason);
+                        }
+                        generationSpan.setStatus({ code: SpanStatusCode.OK });
+                    }
+                    finally {
+                        generationSpan.end();
+                    }
                     const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
                     // Structured-output pattern: when the model returns the
                     // final_result tool call, push its arguments as JSON and stop.
@@ -2481,6 +2614,38 @@ export class GoogleVertexProvider extends BaseProvider {
                             toolName: toolUse.name,
                             args: toolUse.input,
                         });
+                        // One tool observation per execution. ai.toolCall.* names follow the
+                        // Vercel AI SDK convention so existing tooling keeps working.
+                        const toolSpan = tracers.mcp.startSpan("ai.toolCall", {
+                            kind: SpanKind.INTERNAL,
+                            attributes: {
+                                [LANGFUSE_ATTR.OBSERVATION_TYPE]: "tool",
+                                [ATTR.GEN_AI_TOOL_NAME]: toolUse.name,
+                                "ai.toolCall.name": toolUse.name,
+                                "ai.toolCall.id": toolUse.id,
+                                "ai.toolCall.args": spanJsonAttribute(toolUse.input, 20_000),
+                                [LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute(toolUse.input, 20_000),
+                                [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
+                                    step,
+                                }),
+                            },
+                        }, turnContext);
+                        const endToolSpan = (output, errorMessage) => {
+                            toolSpan.setAttribute("ai.toolCall.result", spanJsonAttribute(output));
+                            toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(output));
+                            if (errorMessage) {
+                                toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_LEVEL, "ERROR");
+                                toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_STATUS_MESSAGE, errorMessage);
+                                toolSpan.setStatus({
+                                    code: SpanStatusCode.ERROR,
+                                    message: errorMessage,
+                                });
+                            }
+                            else {
+                                toolSpan.setStatus({ code: SpanStatusCode.OK });
+                            }
+                            toolSpan.end();
+                        };
                         const execute = executeMap.get(toolUse.name);
                         if (execute) {
                             try {
@@ -2489,7 +2654,13 @@ export class GoogleVertexProvider extends BaseProvider {
                                     messages: [],
                                     abortSignal: options.abortSignal,
                                 };
-                                const result = await execute(toolUse.input, toolOptions);
+                                // Run with toolSpan active so spans inside execute
+                                // (neurolink.tool.execute) nest under this observation instead
+                                // of becoming disconnected siblings.
+                                const result = await otelContext.with(otelTrace.setSpan(turnContext, toolSpan), () => execute(toolUse.input, toolOptions));
+                                // MCP failures are returned, not thrown — surface them on
+                                // the span so failed calls show as ERROR in Langfuse.
+                                endToolSpan(result, extractMcpToolErrorMessage(result));
                                 toolExecutions.push({
                                     name: toolUse.name,
                                     input: toolUse.input,
@@ -2515,6 +2686,7 @@ export class GoogleVertexProvider extends BaseProvider {
                             catch (err) {
                                 const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
                                 const errorPayload = { error: errMsg };
+                                endToolSpan(errorPayload, errMsg);
                                 toolExecutions.push({
                                     name: toolUse.name,
                                     input: toolUse.input,
@@ -2535,6 +2707,7 @@ export class GoogleVertexProvider extends BaseProvider {
                         else {
                             const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
                             const errorPayload = { error: errMsg };
+                            endToolSpan(errorPayload, errMsg);
                             toolExecutions.push({
                                 name: toolUse.name,
                                 input: toolUse.input,
@@ -2577,13 +2750,49 @@ export class GoogleVertexProvider extends BaseProvider {
                 }
                 metadata.responseTime = Date.now() - startTime;
                 metadata.totalToolExecutions = allToolCalls.filter((tc) => tc.toolName !== "final_result").length;
+                const turnOutputAttribute = spanJsonAttribute({
+                    text: aggregatedTurnText,
+                    ...(structuredOutputRef.value
+                        ? { structuredOutput: structuredOutputRef.value }
+                        : {}),
+                });
+                turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, turnOutputAttribute);
+                turnSpan.setAttribute(LANGFUSE_ATTR.TRACE_OUTPUT, turnOutputAttribute);
+                // Turn usage is metadata-only (not usage_details) — see the note at the
+                // top of this method on why it must not contribute to the cost rollup.
+                turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_METADATA, spanJsonAttribute({
+                    toolsOffered: offeredToolNames,
+                    toolCount: offeredToolNames.length,
+                    maxSteps,
+                    steps: step,
+                    toolCallCount: metadata.totalToolExecutions,
+                    toolsCalled: toolsUsedRef.filter((name) => name !== "final_result"),
+                    structuredOutput: useFinalResultTool,
+                    usage: {
+                        input: usage.input,
+                        output: usage.output,
+                        input_cached_tokens: turnCacheUsage.read,
+                        input_cache_creation: turnCacheUsage.creation,
+                        input_cache_creation_5m: turnCacheUsage.creation5m,
+                        input_cache_creation_1h: turnCacheUsage.creation1h,
+                    },
+                }));
+                turnSpan.setStatus({ code: SpanStatusCode.OK });
                 channel.close();
             }
             catch (err) {
+                turnSpan.setStatus({
+                    code: SpanStatusCode.ERROR,
+                    message: err instanceof Error ? err.message : String(err),
+                });
+                if (err instanceof Error) {
+                    turnSpan.recordException(err);
+                }
                 logger.error("[GoogleVertex] Native Anthropic SDK stream error", err);
                 channel.error(this.handleProviderError(err));
             }
             finally {
+                turnSpan.end();
                 options.abortSignal?.removeEventListener("abort", abortHandler);
                 clearTimeout(streamTimeoutHandle);
             }
@@ -3316,6 +3525,15 @@ export class GoogleVertexProvider extends BaseProvider {
             const inputPrompt = mergedOptions.input?.text ||
                 mergedOptions.prompt ||
                 "";
+            // Set generation input before the call so error paths still carry the
+            // request; output is set after the native call resolves.
+            const generationInputAttribute = spanJsonAttribute({
+                ...(mergedOptions.systemPrompt
+                    ? { system: mergedOptions.systemPrompt }
+                    : {}),
+                prompt: inputPrompt,
+            });
+            generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_INPUT, generationInputAttribute);
             try {
                 let result;
                 // Wrap the actual native generate call in `neurolink.executeGeneration`
@@ -3332,20 +3550,28 @@ export class GoogleVertexProvider extends BaseProvider {
                         "neurolink.path": isAnthropicModel(modelName)
                             ? "native.anthropic"
                             : "native.google-genai",
+                        [LANGFUSE_ATTR.OBSERVATION_INPUT]: generationInputAttribute,
                     },
-                }, async () => {
+                }, async (executionSpan) => {
+                    let nativeResult;
                     if (isAnthropicModel(modelName)) {
                         logger.info("[GoogleVertex] Routing Claude generate to native @anthropic-ai/vertex-sdk", {
                             model: modelName,
                             totalToolCount: Object.keys(mergedOptions.tools).length,
                         });
-                        return this.executeNativeAnthropicGenerate(mergedOptions);
+                        nativeResult =
+                            await this.executeNativeAnthropicGenerate(mergedOptions);
                     }
-                    logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
-                        model: modelName,
-                        totalToolCount: Object.keys(mergedOptions.tools).length,
-                    });
-                    return this.executeNativeGemini3Generate(mergedOptions);
+                    else {
+                        logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
+                            model: modelName,
+                            totalToolCount: Object.keys(mergedOptions.tools).length,
+                        });
+                        nativeResult =
+                            await this.executeNativeGemini3Generate(mergedOptions);
+                    }
+                    executionSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(nativeResult?.content ?? ""));
+                    return nativeResult;
                 });
                 this.attachUsageAndCostAttributes(generateSpan, modelName, result?.usage);
                 // Pipe through TTS-of-AI-response when caller asks for it. The
@@ -3353,6 +3579,7 @@ export class GoogleVertexProvider extends BaseProvider {
                 // enabled / useAiResponse is false, so the cost is zero on
                 // non-TTS paths.
                 result = await this.synthesizeAIResponseIfNeeded(result, options);
+                generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(result?.content ?? ""));
                 // Fire onFinish lifecycle callback for the native generate path.
                 // Pipeline A providers get this for free via the AI SDK middleware
                 // wrapper (LifecycleMiddleware); native @google/genai bypasses

package/dist/lib/services/server/ai/observability/instrumentation.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  *
  * Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
  */
-import { trace } from "@opentelemetry/api";
+import { trace, type Span as ApiSpan } from "@opentelemetry/api";
 import { LoggerProvider } from "@opentelemetry/sdk-logs";
 import { type SpanProcessor } from "@opentelemetry/sdk-trace-base";
 import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
@@ -146,6 +146,15 @@ export declare function setLangfuseContext<T = void>(context: {
  * console.log(context?.userId, context?.sessionId);
  */
 export declare function getLangfuseContext(): LangfuseContext | undefined;
+/**
+ * Fill a span's Langfuse identity when the caller's context would otherwise
+ * fall back to "guest". Identity (user.id / session.id) is set additively —
+ * only fields ambient context didn't already provide, so a host's own context
+ * is never overridden. trace.name (the title) is rescued only when no ambient
+ * name source exists AND this span is the trace root, mirroring
+ * ContextEnricher.onStart so a host wrapper span isn't relabelled.
+ */
+export declare function stampGuestRescueIdentity(span: ApiSpan, callContext: unknown, isRootSpan: boolean): void;
 /**
  * Capture the current Langfuse AsyncLocalStorage context and return a wrapper
  * that re-enters that context when executing the provided callback.

package/dist/lib/services/server/ai/observability/instrumentation.js CHANGED Viewed

@@ -6,7 +6,7 @@
  *
  * Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
  */
-import { metrics, SpanStatusCode, trace } from "@opentelemetry/api";
+import { metrics, SpanStatusCode, trace, } from "@opentelemetry/api";
 import { W3CTraceContextPropagator } from "@opentelemetry/core";
 import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
 import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
@@ -20,6 +20,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, } from "@opentelemetry/semanti
 import { AsyncLocalStorage } from "async_hooks";
 import { extractMcpErrorText } from "../../../../utils/mcpErrorText.js";
 import { logger } from "../../../../utils/logger.js";
+import { LANGFUSE_ATTR } from "../../../../telemetry/attributes.js";
 const LOG_PREFIX = "[OpenTelemetry]";
 function createOtelResource(config, serviceName) {
     return resourceFromAttributes({
@@ -1117,6 +1118,40 @@ export async function setLangfuseContext(context, callback) {
 export function getLangfuseContext() {
     return contextStorage.getStore();
 }
+/**
+ * Fill a span's Langfuse identity when the caller's context would otherwise
+ * fall back to "guest". Identity (user.id / session.id) is set additively —
+ * only fields ambient context didn't already provide, so a host's own context
+ * is never overridden. trace.name (the title) is rescued only when no ambient
+ * name source exists AND this span is the trace root, mirroring
+ * ContextEnricher.onStart so a host wrapper span isn't relabelled.
+ */
+export function stampGuestRescueIdentity(span, callContext, isRootSpan) {
+    const ambient = getLangfuseContext();
+    const ctx = callContext;
+    const userId = typeof ctx?.userId === "string" && ctx.userId ? ctx.userId : undefined;
+    const sessionId = typeof ctx?.sessionId === "string" && ctx.sessionId
+        ? ctx.sessionId
+        : undefined;
+    // Title: the trace name comes from traceName ?? userId, so only rescue it
+    // from "guest" when ambient has neither, and only on the trace root.
+    if (isRootSpan && !ambient?.traceName && !ambient?.userId) {
+        const traceName = typeof ctx?.traceName === "string" && ctx.traceName
+            ? ctx.traceName
+            : userId;
+        if (traceName) {
+            span.setAttribute(LANGFUSE_ATTR.TRACE_NAME, traceName);
+            span.setAttribute("trace.name", traceName);
+        }
+    }
+    // Identity: additive — set each field only where ambient didn't.
+    if (userId && !ambient?.userId) {
+        span.setAttribute("user.id", userId);
+    }
+    if (sessionId && !ambient?.sessionId) {
+        span.setAttribute("session.id", sessionId);
+    }
+}
 /**
  * Capture the current Langfuse AsyncLocalStorage context and return a wrapper
  * that re-enters that context when executing the provided callback.