@juspay/neurolink 9.70.7 → 9.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/browser/neurolink.min.js +362 -344
  3. package/dist/core/constants.d.ts +1 -0
  4. package/dist/core/constants.js +2 -0
  5. package/dist/core/toolRouting.d.ts +59 -0
  6. package/dist/core/toolRouting.js +232 -0
  7. package/dist/lib/core/constants.d.ts +1 -0
  8. package/dist/lib/core/constants.js +2 -0
  9. package/dist/lib/core/toolRouting.d.ts +59 -0
  10. package/dist/lib/core/toolRouting.js +233 -0
  11. package/dist/lib/neurolink.d.ts +31 -1
  12. package/dist/lib/neurolink.js +241 -17
  13. package/dist/lib/providers/googleVertex.js +257 -30
  14. package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
  15. package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
  16. package/dist/lib/telemetry/attributes.d.ts +31 -0
  17. package/dist/lib/telemetry/attributes.js +48 -0
  18. package/dist/lib/telemetry/index.d.ts +1 -1
  19. package/dist/lib/telemetry/index.js +1 -1
  20. package/dist/lib/types/config.d.ts +8 -0
  21. package/dist/lib/types/index.d.ts +1 -0
  22. package/dist/lib/types/index.js +1 -0
  23. package/dist/lib/types/toolRouting.d.ts +91 -0
  24. package/dist/lib/types/toolRouting.js +19 -0
  25. package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
  26. package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
  27. package/dist/lib/utils/mcpErrorText.d.ts +16 -0
  28. package/dist/lib/utils/mcpErrorText.js +36 -0
  29. package/dist/neurolink.d.ts +31 -1
  30. package/dist/neurolink.js +241 -17
  31. package/dist/providers/googleVertex.js +257 -30
  32. package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
  33. package/dist/services/server/ai/observability/instrumentation.js +36 -1
  34. package/dist/telemetry/attributes.d.ts +31 -0
  35. package/dist/telemetry/attributes.js +48 -0
  36. package/dist/telemetry/index.d.ts +1 -1
  37. package/dist/telemetry/index.js +1 -1
  38. package/dist/types/config.d.ts +8 -0
  39. package/dist/types/index.d.ts +1 -0
  40. package/dist/types/index.js +1 -0
  41. package/dist/types/toolRouting.d.ts +91 -0
  42. package/dist/types/toolRouting.js +18 -0
  43. package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
  44. package/dist/utils/anthropicTraceSanitizer.js +25 -0
  45. package/dist/utils/mcpErrorText.d.ts +16 -0
  46. package/dist/utils/mcpErrorText.js +36 -0
  47. package/package.json +2 -1
@@ -22,9 +22,12 @@ import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
22
22
  import { TimeoutError, withTimeout } from "../utils/async/index.js";
23
23
  import { parseTimeout } from "../utils/timeout.js";
24
24
  import { createTextChannel, extractThoughtSignature, prependConversationMessages, } from "./googleNativeGemini3.js";
25
- import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
25
+ import { ATTR, LANGFUSE_ATTR, spanJsonAttribute, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
26
+ import { SpanKind, SpanStatusCode, context as otelContext, trace as otelTrace, } from "@opentelemetry/api";
26
27
  import { calculateCost } from "../utils/pricing.js";
27
28
  import { transformToolExecutions } from "../utils/transformationUtils.js";
29
+ import { sanitizeAnthropicMessagesForTrace } from "../utils/anthropicTraceSanitizer.js";
30
+ import { extractMcpToolErrorMessage } from "../utils/mcpErrorText.js";
28
31
  // Import proper types for multimodal message handling
29
32
  // Dynamic import helper for native Anthropic Vertex SDK
30
33
  let anthropicVertexModule = null;
@@ -2390,6 +2393,49 @@ export class GoogleVertexProvider extends BaseProvider {
2390
2393
  };
2391
2394
  const toolsUsedRef = [];
2392
2395
  const structuredOutputRef = {};
2396
+ // Langfuse/OTel: the native SDK bypasses the Vercel AI SDK's
2397
+ // experimental_telemetry, so emit spans manually — one turn span, one
2398
+ // generation span per API call, one tool span per execution — all carrying
2399
+ // langfuse.* attributes the LangfuseSpanProcessor maps to observations.
2400
+ // Usage lives ONLY on the generation spans (Langfuse sums usage across
2401
+ // observations for trace totals, so repeating it on the turn double-counts).
2402
+ const offeredToolNames = (tools ?? []).map((anthropicTool) => anthropicTool.name);
2403
+ const turnInputAttribute = spanJsonAttribute({
2404
+ system: systemPromptWithSchema,
2405
+ messages: sanitizeAnthropicMessagesForTrace(messages),
2406
+ });
2407
+ const turnSpan = tracers.provider.startSpan("anthropic.vertex.stream", {
2408
+ kind: SpanKind.CLIENT,
2409
+ attributes: {
2410
+ // Mark as span, not generation — without it Langfuse infers "generation"
2411
+ // from the gen_ai.* attributes; the model calls live in child spans.
2412
+ [LANGFUSE_ATTR.OBSERVATION_TYPE]: "span",
2413
+ [ATTR.GEN_AI_SYSTEM]: "anthropic",
2414
+ [ATTR.GEN_AI_MODEL]: modelName,
2415
+ [ATTR.GEN_AI_OPERATION]: "stream",
2416
+ [ATTR.NL_PROVIDER]: this.providerName,
2417
+ [ATTR.NL_TOOL_COUNT]: offeredToolNames.length,
2418
+ [LANGFUSE_ATTR.OBSERVATION_INPUT]: turnInputAttribute,
2419
+ // Also lift IO to the trace — Langfuse reads trace input/output from
2420
+ // langfuse.trace.* and the trace list is unreadable without it.
2421
+ [LANGFUSE_ATTR.TRACE_INPUT]: turnInputAttribute,
2422
+ [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
2423
+ toolsOffered: offeredToolNames,
2424
+ toolCount: offeredToolNames.length,
2425
+ maxSteps,
2426
+ structuredOutput: useFinalResultTool,
2427
+ }),
2428
+ },
2429
+ });
2430
+ const turnContext = otelTrace.setSpan(otelContext.active(), turnSpan);
2431
+ let aggregatedTurnText = "";
2432
+ // Anthropic prompt-cache token accounting, aggregated across loop steps.
2433
+ const turnCacheUsage = {
2434
+ read: 0,
2435
+ creation: 0,
2436
+ creation5m: 0,
2437
+ creation1h: 0,
2438
+ };
2393
2439
  // Track the active Anthropic stream so options.abortSignal can cancel it
2394
2440
  // mid-flight (pre-rewrite code had no abort handling — fixed for free).
2395
2441
  let activeStream;
@@ -2419,29 +2465,116 @@ export class GoogleVertexProvider extends BaseProvider {
2419
2465
  throw new Error("Stream aborted by caller");
2420
2466
  }
2421
2467
  step++;
2422
- const stream = await client.messages.stream({
2423
- ...requestParams,
2424
- messages: currentMessages,
2425
- });
2426
- activeStream = stream;
2427
- // Forward each text delta to the consumer as it arrives. The
2428
- // Anthropic SDK fires this listener synchronously for every
2429
- // content_block_delta SSE event, so the channel sees bytes at
2430
- // the same cadence the wire delivers them.
2431
- stream.on("text", (delta) => {
2432
- if (delta.length > 0) {
2433
- channel.push(delta);
2468
+ // One generation observation per API call: request in, content + usage out.
2469
+ const generationSpan = tracers.generation.startSpan("anthropic.messages.stream", {
2470
+ kind: SpanKind.CLIENT,
2471
+ attributes: {
2472
+ [LANGFUSE_ATTR.OBSERVATION_TYPE]: "generation",
2473
+ [LANGFUSE_ATTR.OBSERVATION_MODEL_NAME]: modelName,
2474
+ [LANGFUSE_ATTR.OBSERVATION_MODEL_PARAMETERS]: spanJsonAttribute({
2475
+ max_tokens: requestParams.max_tokens,
2476
+ temperature: requestParams.temperature,
2477
+ top_p: requestParams.top_p,
2478
+ }),
2479
+ [LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute({
2480
+ system: systemPromptWithSchema,
2481
+ messages: sanitizeAnthropicMessagesForTrace(currentMessages),
2482
+ }),
2483
+ [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
2484
+ step,
2485
+ toolsOffered: offeredToolNames.length,
2486
+ }),
2487
+ [ATTR.GEN_AI_SYSTEM]: "anthropic",
2488
+ [ATTR.GEN_AI_MODEL]: modelName,
2489
+ [ATTR.GEN_AI_OPERATION]: "chat",
2490
+ },
2491
+ }, turnContext);
2492
+ let response;
2493
+ try {
2494
+ const stream = await client.messages.stream({
2495
+ ...requestParams,
2496
+ messages: currentMessages,
2497
+ });
2498
+ activeStream = stream;
2499
+ // Forward each text delta as it arrives — the Anthropic SDK fires
2500
+ // this synchronously per content_block_delta, so the channel streams
2501
+ // at wire cadence. The first delta stamps completion_start_time,
2502
+ // giving Langfuse the generation's time-to-first-token.
2503
+ let firstDeltaSeen = false;
2504
+ stream.on("text", (delta) => {
2505
+ if (delta.length > 0) {
2506
+ if (!firstDeltaSeen) {
2507
+ firstDeltaSeen = true;
2508
+ generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_COMPLETION_START_TIME, new Date().toISOString());
2509
+ }
2510
+ channel.push(delta);
2511
+ }
2512
+ });
2513
+ // finalMessage() resolves AFTER message_stop. By then the listener
2514
+ // has already fired for every delta — awaiting here doesn't block
2515
+ // visible streaming, it just gives us the structured response
2516
+ // shape needed for tool_use block extraction.
2517
+ response = await stream.finalMessage();
2518
+ }
2519
+ catch (modelCallError) {
2520
+ generationSpan.setStatus({
2521
+ code: SpanStatusCode.ERROR,
2522
+ message: modelCallError instanceof Error
2523
+ ? modelCallError.message
2524
+ : String(modelCallError),
2525
+ });
2526
+ if (modelCallError instanceof Error) {
2527
+ generationSpan.recordException(modelCallError);
2434
2528
  }
2435
- });
2436
- // finalMessage() resolves AFTER message_stop. By then the listener
2437
- // has already fired for every delta — awaiting here doesn't block
2438
- // visible streaming, it just gives us the structured response
2439
- // shape needed for tool_use block extraction.
2440
- const response = await stream.finalMessage();
2529
+ generationSpan.end();
2530
+ throw modelCallError;
2531
+ }
2441
2532
  activeStream = undefined;
2442
- usage.input += response.usage?.input_tokens || 0;
2443
- usage.output += response.usage?.output_tokens || 0;
2444
- usage.total = usage.input + usage.output;
2533
+ // End the generation span even if the bookkeeping below throws (else
2534
+ // it leaks). The model-call error path already ended it — no double-end.
2535
+ try {
2536
+ const stepCacheRead = response.usage?.cache_read_input_tokens ?? 0;
2537
+ const stepCacheCreation = response.usage?.cache_creation_input_tokens ?? 0;
2538
+ const stepCacheCreation5m = response.usage?.cache_creation?.ephemeral_5m_input_tokens ?? 0;
2539
+ const stepCacheCreation1h = response.usage?.cache_creation?.ephemeral_1h_input_tokens ?? 0;
2540
+ turnCacheUsage.read += stepCacheRead;
2541
+ turnCacheUsage.creation += stepCacheCreation;
2542
+ turnCacheUsage.creation5m += stepCacheCreation5m;
2543
+ turnCacheUsage.creation1h += stepCacheCreation1h;
2544
+ usage.input += response.usage?.input_tokens || 0;
2545
+ usage.output += response.usage?.output_tokens || 0;
2546
+ usage.total = usage.input + usage.output;
2547
+ for (const block of response.content) {
2548
+ if (block.type === "text" && typeof block.text === "string") {
2549
+ aggregatedTurnText += block.text;
2550
+ }
2551
+ }
2552
+ generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(response.content));
2553
+ // 5m and 1h cache-creation are priced differently, so keep both;
2554
+ // drop the aggregate input_cache_creation (= 5m + 1h) that would
2555
+ // double-count. total sums the per-TTL keys shown here to match them.
2556
+ generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_USAGE_DETAILS, spanJsonAttribute({
2557
+ input: response.usage?.input_tokens ?? 0,
2558
+ output: response.usage?.output_tokens ?? 0,
2559
+ input_cached_tokens: stepCacheRead,
2560
+ input_cache_creation_5m: stepCacheCreation5m,
2561
+ input_cache_creation_1h: stepCacheCreation1h,
2562
+ total: (response.usage?.input_tokens ?? 0) +
2563
+ (response.usage?.output_tokens ?? 0) +
2564
+ stepCacheRead +
2565
+ stepCacheCreation5m +
2566
+ stepCacheCreation1h,
2567
+ }));
2568
+ generationSpan.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, response.usage?.input_tokens ?? 0);
2569
+ generationSpan.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, response.usage?.output_tokens ?? 0);
2570
+ if (response.stop_reason) {
2571
+ generationSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, response.stop_reason);
2572
+ }
2573
+ generationSpan.setStatus({ code: SpanStatusCode.OK });
2574
+ }
2575
+ finally {
2576
+ generationSpan.end();
2577
+ }
2445
2578
  const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
2446
2579
  // Structured-output pattern: when the model returns the
2447
2580
  // final_result tool call, push its arguments as JSON and stop.
@@ -2481,6 +2614,38 @@ export class GoogleVertexProvider extends BaseProvider {
2481
2614
  toolName: toolUse.name,
2482
2615
  args: toolUse.input,
2483
2616
  });
2617
+ // One tool observation per execution. ai.toolCall.* names follow the
2618
+ // Vercel AI SDK convention so existing tooling keeps working.
2619
+ const toolSpan = tracers.mcp.startSpan("ai.toolCall", {
2620
+ kind: SpanKind.INTERNAL,
2621
+ attributes: {
2622
+ [LANGFUSE_ATTR.OBSERVATION_TYPE]: "tool",
2623
+ [ATTR.GEN_AI_TOOL_NAME]: toolUse.name,
2624
+ "ai.toolCall.name": toolUse.name,
2625
+ "ai.toolCall.id": toolUse.id,
2626
+ "ai.toolCall.args": spanJsonAttribute(toolUse.input, 20_000),
2627
+ [LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute(toolUse.input, 20_000),
2628
+ [LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
2629
+ step,
2630
+ }),
2631
+ },
2632
+ }, turnContext);
2633
+ const endToolSpan = (output, errorMessage) => {
2634
+ toolSpan.setAttribute("ai.toolCall.result", spanJsonAttribute(output));
2635
+ toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(output));
2636
+ if (errorMessage) {
2637
+ toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_LEVEL, "ERROR");
2638
+ toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_STATUS_MESSAGE, errorMessage);
2639
+ toolSpan.setStatus({
2640
+ code: SpanStatusCode.ERROR,
2641
+ message: errorMessage,
2642
+ });
2643
+ }
2644
+ else {
2645
+ toolSpan.setStatus({ code: SpanStatusCode.OK });
2646
+ }
2647
+ toolSpan.end();
2648
+ };
2484
2649
  const execute = executeMap.get(toolUse.name);
2485
2650
  if (execute) {
2486
2651
  try {
@@ -2489,7 +2654,13 @@ export class GoogleVertexProvider extends BaseProvider {
2489
2654
  messages: [],
2490
2655
  abortSignal: options.abortSignal,
2491
2656
  };
2492
- const result = await execute(toolUse.input, toolOptions);
2657
+ // Run with toolSpan active so spans inside execute
2658
+ // (neurolink.tool.execute) nest under this observation instead
2659
+ // of becoming disconnected siblings.
2660
+ const result = await otelContext.with(otelTrace.setSpan(turnContext, toolSpan), () => execute(toolUse.input, toolOptions));
2661
+ // MCP failures are returned, not thrown — surface them on
2662
+ // the span so failed calls show as ERROR in Langfuse.
2663
+ endToolSpan(result, extractMcpToolErrorMessage(result));
2493
2664
  toolExecutions.push({
2494
2665
  name: toolUse.name,
2495
2666
  input: toolUse.input,
@@ -2515,6 +2686,7 @@ export class GoogleVertexProvider extends BaseProvider {
2515
2686
  catch (err) {
2516
2687
  const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
2517
2688
  const errorPayload = { error: errMsg };
2689
+ endToolSpan(errorPayload, errMsg);
2518
2690
  toolExecutions.push({
2519
2691
  name: toolUse.name,
2520
2692
  input: toolUse.input,
@@ -2535,6 +2707,7 @@ export class GoogleVertexProvider extends BaseProvider {
2535
2707
  else {
2536
2708
  const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
2537
2709
  const errorPayload = { error: errMsg };
2710
+ endToolSpan(errorPayload, errMsg);
2538
2711
  toolExecutions.push({
2539
2712
  name: toolUse.name,
2540
2713
  input: toolUse.input,
@@ -2577,13 +2750,49 @@ export class GoogleVertexProvider extends BaseProvider {
2577
2750
  }
2578
2751
  metadata.responseTime = Date.now() - startTime;
2579
2752
  metadata.totalToolExecutions = allToolCalls.filter((tc) => tc.toolName !== "final_result").length;
2753
+ const turnOutputAttribute = spanJsonAttribute({
2754
+ text: aggregatedTurnText,
2755
+ ...(structuredOutputRef.value
2756
+ ? { structuredOutput: structuredOutputRef.value }
2757
+ : {}),
2758
+ });
2759
+ turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, turnOutputAttribute);
2760
+ turnSpan.setAttribute(LANGFUSE_ATTR.TRACE_OUTPUT, turnOutputAttribute);
2761
+ // Turn usage is metadata-only (not usage_details) — see the note at the
2762
+ // top of this method on why it must not contribute to the cost rollup.
2763
+ turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_METADATA, spanJsonAttribute({
2764
+ toolsOffered: offeredToolNames,
2765
+ toolCount: offeredToolNames.length,
2766
+ maxSteps,
2767
+ steps: step,
2768
+ toolCallCount: metadata.totalToolExecutions,
2769
+ toolsCalled: toolsUsedRef.filter((name) => name !== "final_result"),
2770
+ structuredOutput: useFinalResultTool,
2771
+ usage: {
2772
+ input: usage.input,
2773
+ output: usage.output,
2774
+ input_cached_tokens: turnCacheUsage.read,
2775
+ input_cache_creation: turnCacheUsage.creation,
2776
+ input_cache_creation_5m: turnCacheUsage.creation5m,
2777
+ input_cache_creation_1h: turnCacheUsage.creation1h,
2778
+ },
2779
+ }));
2780
+ turnSpan.setStatus({ code: SpanStatusCode.OK });
2580
2781
  channel.close();
2581
2782
  }
2582
2783
  catch (err) {
2784
+ turnSpan.setStatus({
2785
+ code: SpanStatusCode.ERROR,
2786
+ message: err instanceof Error ? err.message : String(err),
2787
+ });
2788
+ if (err instanceof Error) {
2789
+ turnSpan.recordException(err);
2790
+ }
2583
2791
  logger.error("[GoogleVertex] Native Anthropic SDK stream error", err);
2584
2792
  channel.error(this.handleProviderError(err));
2585
2793
  }
2586
2794
  finally {
2795
+ turnSpan.end();
2587
2796
  options.abortSignal?.removeEventListener("abort", abortHandler);
2588
2797
  clearTimeout(streamTimeoutHandle);
2589
2798
  }
@@ -3316,6 +3525,15 @@ export class GoogleVertexProvider extends BaseProvider {
3316
3525
  const inputPrompt = mergedOptions.input?.text ||
3317
3526
  mergedOptions.prompt ||
3318
3527
  "";
3528
+ // Set generation input before the call so error paths still carry the
3529
+ // request; output is set after the native call resolves.
3530
+ const generationInputAttribute = spanJsonAttribute({
3531
+ ...(mergedOptions.systemPrompt
3532
+ ? { system: mergedOptions.systemPrompt }
3533
+ : {}),
3534
+ prompt: inputPrompt,
3535
+ });
3536
+ generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_INPUT, generationInputAttribute);
3319
3537
  try {
3320
3538
  let result;
3321
3539
  // Wrap the actual native generate call in `neurolink.executeGeneration`
@@ -3332,20 +3550,28 @@ export class GoogleVertexProvider extends BaseProvider {
3332
3550
  "neurolink.path": isAnthropicModel(modelName)
3333
3551
  ? "native.anthropic"
3334
3552
  : "native.google-genai",
3553
+ [LANGFUSE_ATTR.OBSERVATION_INPUT]: generationInputAttribute,
3335
3554
  },
3336
- }, async () => {
3555
+ }, async (executionSpan) => {
3556
+ let nativeResult;
3337
3557
  if (isAnthropicModel(modelName)) {
3338
3558
  logger.info("[GoogleVertex] Routing Claude generate to native @anthropic-ai/vertex-sdk", {
3339
3559
  model: modelName,
3340
3560
  totalToolCount: Object.keys(mergedOptions.tools).length,
3341
3561
  });
3342
- return this.executeNativeAnthropicGenerate(mergedOptions);
3562
+ nativeResult =
3563
+ await this.executeNativeAnthropicGenerate(mergedOptions);
3343
3564
  }
3344
- logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
3345
- model: modelName,
3346
- totalToolCount: Object.keys(mergedOptions.tools).length,
3347
- });
3348
- return this.executeNativeGemini3Generate(mergedOptions);
3565
+ else {
3566
+ logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
3567
+ model: modelName,
3568
+ totalToolCount: Object.keys(mergedOptions.tools).length,
3569
+ });
3570
+ nativeResult =
3571
+ await this.executeNativeGemini3Generate(mergedOptions);
3572
+ }
3573
+ executionSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(nativeResult?.content ?? ""));
3574
+ return nativeResult;
3349
3575
  });
3350
3576
  this.attachUsageAndCostAttributes(generateSpan, modelName, result?.usage);
3351
3577
  // Pipe through TTS-of-AI-response when caller asks for it. The
@@ -3353,6 +3579,7 @@ export class GoogleVertexProvider extends BaseProvider {
3353
3579
  // enabled / useAiResponse is false, so the cost is zero on
3354
3580
  // non-TTS paths.
3355
3581
  result = await this.synthesizeAIResponseIfNeeded(result, options);
3582
+ generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(result?.content ?? ""));
3356
3583
  // Fire onFinish lifecycle callback for the native generate path.
3357
3584
  // Pipeline A providers get this for free via the AI SDK middleware
3358
3585
  // wrapper (LifecycleMiddleware); native @google/genai bypasses
@@ -6,7 +6,7 @@
6
6
  *
7
7
  * Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
8
8
  */
9
- import { trace } from "@opentelemetry/api";
9
+ import { trace, type Span as ApiSpan } from "@opentelemetry/api";
10
10
  import { LoggerProvider } from "@opentelemetry/sdk-logs";
11
11
  import { type SpanProcessor } from "@opentelemetry/sdk-trace-base";
12
12
  import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
@@ -146,6 +146,15 @@ export declare function setLangfuseContext<T = void>(context: {
146
146
  * console.log(context?.userId, context?.sessionId);
147
147
  */
148
148
  export declare function getLangfuseContext(): LangfuseContext | undefined;
149
+ /**
150
+ * Fill a span's Langfuse identity when the caller's context would otherwise
151
+ * fall back to "guest". Identity (user.id / session.id) is set additively —
152
+ * only fields ambient context didn't already provide, so a host's own context
153
+ * is never overridden. trace.name (the title) is rescued only when no ambient
154
+ * name source exists AND this span is the trace root, mirroring
155
+ * ContextEnricher.onStart so a host wrapper span isn't relabelled.
156
+ */
157
+ export declare function stampGuestRescueIdentity(span: ApiSpan, callContext: unknown, isRootSpan: boolean): void;
149
158
  /**
150
159
  * Capture the current Langfuse AsyncLocalStorage context and return a wrapper
151
160
  * that re-enters that context when executing the provided callback.
@@ -6,7 +6,7 @@
6
6
  *
7
7
  * Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
8
8
  */
9
- import { metrics, SpanStatusCode, trace } from "@opentelemetry/api";
9
+ import { metrics, SpanStatusCode, trace, } from "@opentelemetry/api";
10
10
  import { W3CTraceContextPropagator } from "@opentelemetry/core";
11
11
  import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
12
12
  import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
@@ -20,6 +20,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, } from "@opentelemetry/semanti
20
20
  import { AsyncLocalStorage } from "async_hooks";
21
21
  import { extractMcpErrorText } from "../../../../utils/mcpErrorText.js";
22
22
  import { logger } from "../../../../utils/logger.js";
23
+ import { LANGFUSE_ATTR } from "../../../../telemetry/attributes.js";
23
24
  const LOG_PREFIX = "[OpenTelemetry]";
24
25
  function createOtelResource(config, serviceName) {
25
26
  return resourceFromAttributes({
@@ -1117,6 +1118,40 @@ export async function setLangfuseContext(context, callback) {
1117
1118
  export function getLangfuseContext() {
1118
1119
  return contextStorage.getStore();
1119
1120
  }
1121
+ /**
1122
+ * Fill a span's Langfuse identity when the caller's context would otherwise
1123
+ * fall back to "guest". Identity (user.id / session.id) is set additively —
1124
+ * only fields ambient context didn't already provide, so a host's own context
1125
+ * is never overridden. trace.name (the title) is rescued only when no ambient
1126
+ * name source exists AND this span is the trace root, mirroring
1127
+ * ContextEnricher.onStart so a host wrapper span isn't relabelled.
1128
+ */
1129
+ export function stampGuestRescueIdentity(span, callContext, isRootSpan) {
1130
+ const ambient = getLangfuseContext();
1131
+ const ctx = callContext;
1132
+ const userId = typeof ctx?.userId === "string" && ctx.userId ? ctx.userId : undefined;
1133
+ const sessionId = typeof ctx?.sessionId === "string" && ctx.sessionId
1134
+ ? ctx.sessionId
1135
+ : undefined;
1136
+ // Title: the trace name comes from traceName ?? userId, so only rescue it
1137
+ // from "guest" when ambient has neither, and only on the trace root.
1138
+ if (isRootSpan && !ambient?.traceName && !ambient?.userId) {
1139
+ const traceName = typeof ctx?.traceName === "string" && ctx.traceName
1140
+ ? ctx.traceName
1141
+ : userId;
1142
+ if (traceName) {
1143
+ span.setAttribute(LANGFUSE_ATTR.TRACE_NAME, traceName);
1144
+ span.setAttribute("trace.name", traceName);
1145
+ }
1146
+ }
1147
+ // Identity: additive — set each field only where ambient didn't.
1148
+ if (userId && !ambient?.userId) {
1149
+ span.setAttribute("user.id", userId);
1150
+ }
1151
+ if (sessionId && !ambient?.sessionId) {
1152
+ span.setAttribute("session.id", sessionId);
1153
+ }
1154
+ }
1120
1155
  /**
1121
1156
  * Capture the current Langfuse AsyncLocalStorage context and return a wrapper
1122
1157
  * that re-enters that context when executing the provided callback.
@@ -103,3 +103,34 @@ export declare const ATTR: {
103
103
  readonly AR_DESCRIPTION: "autoresearch.description";
104
104
  readonly AR_ERROR_CODE: "autoresearch.error_code";
105
105
  };
106
+ /**
107
+ * Langfuse observation/trace attribute names recognised by `@langfuse/otel`'s
108
+ * LangfuseSpanProcessor (already registered on the global TracerProvider). They
109
+ * let native (non-AI-SDK) provider paths emit spans that render as proper
110
+ * generation / tool observations — the same data the Vercel AI SDK's
111
+ * `experimental_telemetry` produced before providers moved to native SDKs.
112
+ */
113
+ export declare const LANGFUSE_ATTR: {
114
+ readonly TRACE_NAME: "langfuse.trace.name";
115
+ readonly TRACE_INPUT: "langfuse.trace.input";
116
+ readonly TRACE_OUTPUT: "langfuse.trace.output";
117
+ readonly OBSERVATION_TYPE: "langfuse.observation.type";
118
+ readonly OBSERVATION_INPUT: "langfuse.observation.input";
119
+ readonly OBSERVATION_OUTPUT: "langfuse.observation.output";
120
+ readonly OBSERVATION_METADATA: "langfuse.observation.metadata";
121
+ readonly OBSERVATION_MODEL_NAME: "langfuse.observation.model.name";
122
+ readonly OBSERVATION_MODEL_PARAMETERS: "langfuse.observation.model.parameters";
123
+ readonly OBSERVATION_USAGE_DETAILS: "langfuse.observation.usage_details";
124
+ readonly OBSERVATION_LEVEL: "langfuse.observation.level";
125
+ readonly OBSERVATION_STATUS_MESSAGE: "langfuse.observation.status_message";
126
+ readonly OBSERVATION_COMPLETION_START_TIME: "langfuse.observation.completion_start_time";
127
+ };
128
+ /** Default ceiling for serialized span attribute values. */
129
+ export declare const SPAN_ATTRIBUTE_MAX_CHARS = 40000;
130
+ /**
131
+ * Serialize an arbitrary value for a span attribute, hard-capped at
132
+ * `maxChars` so a pathological prompt or tool result can't put megabytes
133
+ * on a single span. Strings pass through unserialized; everything else is
134
+ * JSON-stringified with a String() fallback for circular structures.
135
+ */
136
+ export declare function spanJsonAttribute(value: unknown, maxChars?: number): string;
@@ -114,4 +114,52 @@ export const ATTR = {
114
114
  AR_DESCRIPTION: "autoresearch.description",
115
115
  AR_ERROR_CODE: "autoresearch.error_code",
116
116
  };
117
+ /**
118
+ * Langfuse observation/trace attribute names recognised by `@langfuse/otel`'s
119
+ * LangfuseSpanProcessor (already registered on the global TracerProvider). They
120
+ * let native (non-AI-SDK) provider paths emit spans that render as proper
121
+ * generation / tool observations — the same data the Vercel AI SDK's
122
+ * `experimental_telemetry` produced before providers moved to native SDKs.
123
+ */
124
+ export const LANGFUSE_ATTR = {
125
+ TRACE_NAME: "langfuse.trace.name",
126
+ TRACE_INPUT: "langfuse.trace.input",
127
+ TRACE_OUTPUT: "langfuse.trace.output",
128
+ OBSERVATION_TYPE: "langfuse.observation.type",
129
+ OBSERVATION_INPUT: "langfuse.observation.input",
130
+ OBSERVATION_OUTPUT: "langfuse.observation.output",
131
+ OBSERVATION_METADATA: "langfuse.observation.metadata",
132
+ OBSERVATION_MODEL_NAME: "langfuse.observation.model.name",
133
+ OBSERVATION_MODEL_PARAMETERS: "langfuse.observation.model.parameters",
134
+ OBSERVATION_USAGE_DETAILS: "langfuse.observation.usage_details",
135
+ OBSERVATION_LEVEL: "langfuse.observation.level",
136
+ OBSERVATION_STATUS_MESSAGE: "langfuse.observation.status_message",
137
+ OBSERVATION_COMPLETION_START_TIME: "langfuse.observation.completion_start_time",
138
+ };
139
+ /** Default ceiling for serialized span attribute values. */
140
+ export const SPAN_ATTRIBUTE_MAX_CHARS = 40_000;
141
+ /**
142
+ * Serialize an arbitrary value for a span attribute, hard-capped at
143
+ * `maxChars` so a pathological prompt or tool result can't put megabytes
144
+ * on a single span. Strings pass through unserialized; everything else is
145
+ * JSON-stringified with a String() fallback for circular structures.
146
+ */
147
+ export function spanJsonAttribute(value, maxChars = SPAN_ATTRIBUTE_MAX_CHARS) {
148
+ let serialized;
149
+ try {
150
+ serialized =
151
+ typeof value === "string"
152
+ ? value
153
+ : (JSON.stringify(value) ?? String(value));
154
+ }
155
+ catch {
156
+ serialized = String(value);
157
+ }
158
+ if (serialized.length > maxChars) {
159
+ const truncationSuffix = `...[truncated ${serialized.length - maxChars} chars]`;
160
+ const keepLength = Math.max(0, maxChars - truncationSuffix.length);
161
+ return `${serialized.slice(0, keepLength)}${truncationSuffix}`;
162
+ }
163
+ return serialized;
164
+ }
117
165
  //# sourceMappingURL=attributes.js.map
@@ -1,7 +1,7 @@
1
1
  export { TelemetryService } from "./telemetryService.js";
2
2
  export { tracers } from "./tracers.js";
3
3
  export { withSpan, withClientSpan, withStreamSpan, withClientStreamSpan, } from "./withSpan.js";
4
- export { ATTR } from "./attributes.js";
4
+ export { ATTR, LANGFUSE_ATTR, SPAN_ATTRIBUTE_MAX_CHARS, spanJsonAttribute, } from "./attributes.js";
5
5
  /**
6
6
  * Initialize telemetry for NeuroLink
7
7
  * Reuses an existing global TracerProvider when one is already registered,
@@ -2,7 +2,7 @@
2
2
  export { TelemetryService } from "./telemetryService.js";
3
3
  export { tracers } from "./tracers.js";
4
4
  export { withSpan, withClientSpan, withStreamSpan, withClientStreamSpan, } from "./withSpan.js";
5
- export { ATTR } from "./attributes.js";
5
+ export { ATTR, LANGFUSE_ATTR, SPAN_ATTRIBUTE_MAX_CHARS, spanJsonAttribute, } from "./attributes.js";
6
6
  import { logger } from "../utils/logger.js";
7
7
  /**
8
8
  * Initialize telemetry for NeuroLink
@@ -9,6 +9,7 @@ import type { ConversationMemoryConfig } from "./conversation.js";
9
9
  import type { ObservabilityConfig } from "./observability.js";
10
10
  import type { AuthProvider, AuthProviderType, AuthProviderConfig, Auth0Config, ClerkConfig, FirebaseConfig, SupabaseConfig, WorkOSConfig, BetterAuthConfig, JWTConfig, OAuth2Config, CognitoConfig, KeycloakConfig, AuthenticatedContext } from "./auth.js";
11
11
  import type { NeurolinkCredentials } from "./providers.js";
12
+ import type { ToolRoutingConfig } from "./toolRouting.js";
12
13
  /**
13
14
  * Main NeuroLink configuration type
14
15
  */
@@ -66,6 +67,13 @@ export type NeurolinkConstructorConfig = {
66
67
  * provider is preserved across the chain; only the model name changes.
67
68
  */
68
69
  modelChain?: string[];
70
+ /**
71
+ * Pre-call tool routing: a cheap router LLM picks the tool servers
72
+ * relevant to each stream() turn and the unpicked servers' tools are
73
+ * dropped from the request via `excludeTools`. Fails open (all tools) on
74
+ * any router failure. See {@link ToolRoutingConfig}.
75
+ */
76
+ toolRouting?: ToolRoutingConfig;
69
77
  };
70
78
  /**
71
79
  * Configuration for MCP enhancement modules wired into generate()/stream() paths.
@@ -50,6 +50,7 @@ export * from "./stream.js";
50
50
  export * from "./subscription.js";
51
51
  export * from "./task.js";
52
52
  export * from "./taskClassification.js";
53
+ export * from "./toolRouting.js";
53
54
  export * from "./tools.js";
54
55
  export * from "./voice.js";
55
56
  export * from "./universalProviderOptions.js";
@@ -51,6 +51,7 @@ export * from "./stream.js";
51
51
  export * from "./subscription.js";
52
52
  export * from "./task.js";
53
53
  export * from "./taskClassification.js";
54
+ export * from "./toolRouting.js";
54
55
  export * from "./tools.js";
55
56
  export * from "./voice.js";
56
57
  export * from "./universalProviderOptions.js";