@juspay/neurolink 9.70.6 → 9.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +344 -344
- package/dist/lib/neurolink.js +53 -16
- package/dist/lib/providers/googleVertex.js +257 -30
- package/dist/lib/services/server/ai/observability/instrumentation.d.ts +10 -1
- package/dist/lib/services/server/ai/observability/instrumentation.js +36 -1
- package/dist/lib/telemetry/attributes.d.ts +31 -0
- package/dist/lib/telemetry/attributes.js +46 -0
- package/dist/lib/telemetry/index.d.ts +1 -1
- package/dist/lib/telemetry/index.js +1 -1
- package/dist/lib/utils/anthropicTraceSanitizer.d.ts +7 -0
- package/dist/lib/utils/anthropicTraceSanitizer.js +26 -0
- package/dist/lib/utils/json/coerce.js +85 -0
- package/dist/lib/utils/mcpErrorText.d.ts +16 -0
- package/dist/lib/utils/mcpErrorText.js +36 -0
- package/dist/neurolink.js +53 -16
- package/dist/providers/googleVertex.js +257 -30
- package/dist/services/server/ai/observability/instrumentation.d.ts +10 -1
- package/dist/services/server/ai/observability/instrumentation.js +36 -1
- package/dist/telemetry/attributes.d.ts +31 -0
- package/dist/telemetry/attributes.js +46 -0
- package/dist/telemetry/index.d.ts +1 -1
- package/dist/telemetry/index.js +1 -1
- package/dist/utils/anthropicTraceSanitizer.d.ts +7 -0
- package/dist/utils/anthropicTraceSanitizer.js +25 -0
- package/dist/utils/json/coerce.js +85 -0
- package/dist/utils/mcpErrorText.d.ts +16 -0
- package/dist/utils/mcpErrorText.js +36 -0
- package/package.json +3 -2
package/dist/lib/neurolink.js
CHANGED
|
@@ -55,7 +55,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
|
|
|
55
55
|
import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
|
|
56
56
|
import { SpanStatus, SpanType, CircuitBreakerOpenError, ConversationMemoryError, AuthenticationError, AuthorizationError, InvalidModelError, ModelAccessDeniedError, } from "./types/index.js";
|
|
57
57
|
import { SpanSerializer } from "./observability/utils/spanSerializer.js";
|
|
58
|
-
import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
|
|
58
|
+
import { flushOpenTelemetry, getLangfuseContext, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, stampGuestRescueIdentity, } from "./services/server/ai/observability/instrumentation.js";
|
|
59
59
|
import { TaskManager } from "./tasks/taskManager.js";
|
|
60
60
|
import { createTaskTools } from "./tasks/tools/taskTools.js";
|
|
61
61
|
import { ATTR } from "./telemetry/attributes.js";
|
|
@@ -1378,11 +1378,8 @@ Current user's request: ${currentInput}`;
|
|
|
1378
1378
|
* Calls add(userId, content) which internally condenses old + new via LLM.
|
|
1379
1379
|
* Supports additional users with per-user prompt and maxWords overrides.
|
|
1380
1380
|
*/
|
|
1381
|
-
storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
|
|
1382
|
-
|
|
1383
|
-
// memory writes appear under the originating Langfuse trace instead of
|
|
1384
|
-
// becoming orphan spans.
|
|
1385
|
-
const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
|
|
1381
|
+
storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers, langfuseIdentity) {
|
|
1382
|
+
const memoryWrite = async () => {
|
|
1386
1383
|
try {
|
|
1387
1384
|
const client = this.ensureMemoryReady();
|
|
1388
1385
|
if (!client) {
|
|
@@ -1408,7 +1405,21 @@ Current user's request: ${currentInput}`;
|
|
|
1408
1405
|
catch (error) {
|
|
1409
1406
|
logger.warn("Memory storage failed:", error);
|
|
1410
1407
|
}
|
|
1411
|
-
}
|
|
1408
|
+
};
|
|
1409
|
+
// Carry the turn's identity across the setImmediate boundary so the
|
|
1410
|
+
// condensation generate + redis spans don't orphan to "guest". Keep the
|
|
1411
|
+
// ambient store when it survived (generate path — carries conversationId,
|
|
1412
|
+
// metadata, …); re-establish from the caller only when it was lost (stream
|
|
1413
|
+
// path, which fires after the caller consumed the stream).
|
|
1414
|
+
const ambient = getLangfuseContext();
|
|
1415
|
+
const wrappedMemoryWrite = !(ambient?.traceName || ambient?.userId) &&
|
|
1416
|
+
(langfuseIdentity?.traceName || langfuseIdentity?.sessionId)
|
|
1417
|
+
? () => setLangfuseContext({
|
|
1418
|
+
userId,
|
|
1419
|
+
sessionId: langfuseIdentity.sessionId ?? null,
|
|
1420
|
+
traceName: langfuseIdentity.traceName ?? null,
|
|
1421
|
+
}, memoryWrite)
|
|
1422
|
+
: runWithCurrentLangfuseContext(memoryWrite);
|
|
1412
1423
|
setImmediate(wrappedMemoryWrite);
|
|
1413
1424
|
}
|
|
1414
1425
|
/**
|
|
@@ -2801,7 +2812,15 @@ Current user's request: ${currentInput}`;
|
|
|
2801
2812
|
}
|
|
2802
2813
|
const startedAt = Date.now();
|
|
2803
2814
|
try {
|
|
2804
|
-
return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) =>
|
|
2815
|
+
return await this.runWithFallbackOrchestration(optionsOrPrompt, "generate", (opts) => {
|
|
2816
|
+
// Capture root-ness before startActiveSpan makes generateSpan active.
|
|
2817
|
+
// The actual guest-rescue stamp is deferred to executeGenerateRequest,
|
|
2818
|
+
// AFTER prepareGenerateRequest merges auth/requestContext-derived
|
|
2819
|
+
// identity into options.context — otherwise an auth:{token} caller
|
|
2820
|
+
// with no pre-set context.userId would stamp the root span as guest.
|
|
2821
|
+
const generateIsRoot = !trace.getSpan(context.active());
|
|
2822
|
+
return tracers.sdk.startActiveSpan("neurolink.generate", { kind: SpanKind.INTERNAL }, (generateSpan) => this.executeGenerateWithMetricsContext(opts, generateSpan, generateIsRoot));
|
|
2823
|
+
});
|
|
2805
2824
|
}
|
|
2806
2825
|
catch (error) {
|
|
2807
2826
|
// Lifecycle middleware (wrapGenerate.catch in builtin/lifecycle.ts)
|
|
@@ -2973,14 +2992,17 @@ Current user's request: ${currentInput}`;
|
|
|
2973
2992
|
return { error };
|
|
2974
2993
|
}
|
|
2975
2994
|
}
|
|
2976
|
-
async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan) {
|
|
2977
|
-
return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan));
|
|
2995
|
+
async executeGenerateWithMetricsContext(optionsOrPrompt, generateSpan, isRootSpan) {
|
|
2996
|
+
return metricsTraceContextStorage.run(this.createMetricsTraceContext(), () => this.executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan));
|
|
2978
2997
|
}
|
|
2979
|
-
async executeGenerateRequest(optionsOrPrompt, generateSpan) {
|
|
2998
|
+
async executeGenerateRequest(optionsOrPrompt, generateSpan, isRootSpan) {
|
|
2980
2999
|
let resolvedOptions;
|
|
2981
3000
|
try {
|
|
2982
3001
|
const { options, originalPrompt } = await this.prepareGenerateRequest(optionsOrPrompt, generateSpan);
|
|
2983
3002
|
resolvedOptions = options;
|
|
3003
|
+
// Stamp now that prepareGenerateRequest has merged any auth/requestContext
|
|
3004
|
+
// identity into options.context (see capture of isRootSpan in generate()).
|
|
3005
|
+
stampGuestRescueIdentity(generateSpan, options.context, isRootSpan);
|
|
2984
3006
|
const earlyResult = await this.maybeHandleEarlyGenerateResult(options, generateSpan);
|
|
2985
3007
|
if (earlyResult) {
|
|
2986
3008
|
generateSpan.setStatus({ code: SpanStatusCode.OK });
|
|
@@ -3545,7 +3567,7 @@ Current user's request: ${currentInput}`;
|
|
|
3545
3567
|
// Memory storage
|
|
3546
3568
|
if (this.shouldWriteMemory(options.memory, options.context?.userId, generateResult.content) &&
|
|
3547
3569
|
options.context?.userId) {
|
|
3548
|
-
this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers);
|
|
3570
|
+
this.storeMemoryInBackground(originalPrompt ?? "", generateResult.content.trim(), options.context.userId, options.memory?.additionalUsers, options.context);
|
|
3549
3571
|
}
|
|
3550
3572
|
}
|
|
3551
3573
|
/**
|
|
@@ -5531,10 +5553,20 @@ Current user's request: ${currentInput}`;
|
|
|
5531
5553
|
[ATTR.NL_PROVIDER]: options.provider || "default",
|
|
5532
5554
|
[ATTR.GEN_AI_MODEL]: options.model || "default",
|
|
5533
5555
|
[ATTR.NL_INPUT_LENGTH]: options.input?.text?.length || 0,
|
|
5534
|
-
|
|
5556
|
+
// Count registered custom tools too — chat hosts put their MCP tools
|
|
5557
|
+
// in the registry, so options.tools alone under-reports.
|
|
5558
|
+
[ATTR.NL_HAS_TOOLS]: !options.disableTools &&
|
|
5559
|
+
(!!(options.tools && Object.keys(options.tools).length > 0) ||
|
|
5560
|
+
this.getCustomTools().size > 0),
|
|
5535
5561
|
[ATTR.NL_STREAM_MODE]: true,
|
|
5536
5562
|
},
|
|
5537
5563
|
});
|
|
5564
|
+
// streamSpan isn't active yet, so context.active() is its parent — empty =
|
|
5565
|
+
// root. Capture root-ness here, but defer the actual guest-rescue stamp to
|
|
5566
|
+
// after validateStreamRequestOptions merges auth/requestContext identity
|
|
5567
|
+
// into options.context (below) — otherwise an auth:{token} caller with no
|
|
5568
|
+
// pre-set context.userId would stamp the root span as guest.
|
|
5569
|
+
const streamIsRoot = !trace.getSpan(context.active());
|
|
5538
5570
|
const spanStartTime = Date.now();
|
|
5539
5571
|
this._disableToolCacheForCurrentRequest = !!options.disableToolCache;
|
|
5540
5572
|
try {
|
|
@@ -5576,6 +5608,8 @@ Current user's request: ${currentInput}`;
|
|
|
5576
5608
|
const originalPrompt = options.input?.text ?? "";
|
|
5577
5609
|
options.fileRegistry = this.fileRegistry;
|
|
5578
5610
|
await this.validateStreamRequestOptions(options, startTime);
|
|
5611
|
+
// options.context now carries any auth/requestContext-derived identity.
|
|
5612
|
+
stampGuestRescueIdentity(streamSpan, options.context, streamIsRoot);
|
|
5579
5613
|
const workflowResult = await this.maybeHandleWorkflowStreamRequest({
|
|
5580
5614
|
options,
|
|
5581
5615
|
startTime,
|
|
@@ -5585,6 +5619,9 @@ Current user's request: ${currentInput}`;
|
|
|
5585
5619
|
if (workflowResult) {
|
|
5586
5620
|
return workflowResult;
|
|
5587
5621
|
}
|
|
5622
|
+
// Make neurolink.stream the active span so every provider span (generations,
|
|
5623
|
+
// tool calls) parents under it — one Langfuse trace per turn, not a forest.
|
|
5624
|
+
const streamSpanContext = trace.setSpan(context.active(), streamSpan);
|
|
5588
5625
|
// TTS Mode 2 deferred: stream() emits text first, then synthesizes the
|
|
5589
5626
|
// accumulated response into a single audio chunk at end-of-stream and
|
|
5590
5627
|
// resolves `streamResult.audio` with the same TTSResult. The resolver is
|
|
@@ -5599,7 +5636,7 @@ Current user's request: ${currentInput}`;
|
|
|
5599
5636
|
resolveStreamTtsAudio = resolve;
|
|
5600
5637
|
})
|
|
5601
5638
|
: undefined;
|
|
5602
|
-
const streamResult = await this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
|
|
5639
|
+
const streamResult = await context.with(streamSpanContext, () => this.setLangfuseContextFromOptions(options, () => this.runStandardStreamRequest({
|
|
5603
5640
|
options,
|
|
5604
5641
|
streamSpan,
|
|
5605
5642
|
spanStartTime,
|
|
@@ -5608,7 +5645,7 @@ Current user's request: ${currentInput}`;
|
|
|
5608
5645
|
streamId,
|
|
5609
5646
|
originalPrompt,
|
|
5610
5647
|
ttsResolver: resolveStreamTtsAudio,
|
|
5611
|
-
}));
|
|
5648
|
+
})));
|
|
5612
5649
|
if (streamSttTranscription) {
|
|
5613
5650
|
streamResult.transcription = streamSttTranscription;
|
|
5614
5651
|
}
|
|
@@ -6512,7 +6549,7 @@ Current user's request: ${currentInput}`;
|
|
|
6512
6549
|
}
|
|
6513
6550
|
}
|
|
6514
6551
|
if (this.shouldWriteMemory(enhancedOptions.memory, enhancedOptions.context?.userId, accumulatedContent)) {
|
|
6515
|
-
this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers);
|
|
6552
|
+
this.storeMemoryInBackground(originalPrompt ?? "", accumulatedContent.trim(), enhancedOptions.context?.userId, enhancedOptions.memory?.additionalUsers, enhancedOptions.context);
|
|
6516
6553
|
}
|
|
6517
6554
|
}
|
|
6518
6555
|
/**
|
|
@@ -22,9 +22,12 @@ import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
|
|
|
22
22
|
import { TimeoutError, withTimeout } from "../utils/async/index.js";
|
|
23
23
|
import { parseTimeout } from "../utils/timeout.js";
|
|
24
24
|
import { createTextChannel, extractThoughtSignature, prependConversationMessages, } from "./googleNativeGemini3.js";
|
|
25
|
-
import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
|
|
25
|
+
import { ATTR, LANGFUSE_ATTR, spanJsonAttribute, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js";
|
|
26
|
+
import { SpanKind, SpanStatusCode, context as otelContext, trace as otelTrace, } from "@opentelemetry/api";
|
|
26
27
|
import { calculateCost } from "../utils/pricing.js";
|
|
27
28
|
import { transformToolExecutions } from "../utils/transformationUtils.js";
|
|
29
|
+
import { sanitizeAnthropicMessagesForTrace } from "../utils/anthropicTraceSanitizer.js";
|
|
30
|
+
import { extractMcpToolErrorMessage } from "../utils/mcpErrorText.js";
|
|
28
31
|
// Import proper types for multimodal message handling
|
|
29
32
|
// Dynamic import helper for native Anthropic Vertex SDK
|
|
30
33
|
let anthropicVertexModule = null;
|
|
@@ -2390,6 +2393,49 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2390
2393
|
};
|
|
2391
2394
|
const toolsUsedRef = [];
|
|
2392
2395
|
const structuredOutputRef = {};
|
|
2396
|
+
// Langfuse/OTel: the native SDK bypasses the Vercel AI SDK's
|
|
2397
|
+
// experimental_telemetry, so emit spans manually — one turn span, one
|
|
2398
|
+
// generation span per API call, one tool span per execution — all carrying
|
|
2399
|
+
// langfuse.* attributes the LangfuseSpanProcessor maps to observations.
|
|
2400
|
+
// Usage lives ONLY on the generation spans (Langfuse sums usage across
|
|
2401
|
+
// observations for trace totals, so repeating it on the turn double-counts).
|
|
2402
|
+
const offeredToolNames = (tools ?? []).map((anthropicTool) => anthropicTool.name);
|
|
2403
|
+
const turnInputAttribute = spanJsonAttribute({
|
|
2404
|
+
system: systemPromptWithSchema,
|
|
2405
|
+
messages: sanitizeAnthropicMessagesForTrace(messages),
|
|
2406
|
+
});
|
|
2407
|
+
const turnSpan = tracers.provider.startSpan("anthropic.vertex.stream", {
|
|
2408
|
+
kind: SpanKind.CLIENT,
|
|
2409
|
+
attributes: {
|
|
2410
|
+
// Mark as span, not generation — without it Langfuse infers "generation"
|
|
2411
|
+
// from the gen_ai.* attributes; the model calls live in child spans.
|
|
2412
|
+
[LANGFUSE_ATTR.OBSERVATION_TYPE]: "span",
|
|
2413
|
+
[ATTR.GEN_AI_SYSTEM]: "anthropic",
|
|
2414
|
+
[ATTR.GEN_AI_MODEL]: modelName,
|
|
2415
|
+
[ATTR.GEN_AI_OPERATION]: "stream",
|
|
2416
|
+
[ATTR.NL_PROVIDER]: this.providerName,
|
|
2417
|
+
[ATTR.NL_TOOL_COUNT]: offeredToolNames.length,
|
|
2418
|
+
[LANGFUSE_ATTR.OBSERVATION_INPUT]: turnInputAttribute,
|
|
2419
|
+
// Also lift IO to the trace — Langfuse reads trace input/output from
|
|
2420
|
+
// langfuse.trace.* and the trace list is unreadable without it.
|
|
2421
|
+
[LANGFUSE_ATTR.TRACE_INPUT]: turnInputAttribute,
|
|
2422
|
+
[LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
|
|
2423
|
+
toolsOffered: offeredToolNames,
|
|
2424
|
+
toolCount: offeredToolNames.length,
|
|
2425
|
+
maxSteps,
|
|
2426
|
+
structuredOutput: useFinalResultTool,
|
|
2427
|
+
}),
|
|
2428
|
+
},
|
|
2429
|
+
});
|
|
2430
|
+
const turnContext = otelTrace.setSpan(otelContext.active(), turnSpan);
|
|
2431
|
+
let aggregatedTurnText = "";
|
|
2432
|
+
// Anthropic prompt-cache token accounting, aggregated across loop steps.
|
|
2433
|
+
const turnCacheUsage = {
|
|
2434
|
+
read: 0,
|
|
2435
|
+
creation: 0,
|
|
2436
|
+
creation5m: 0,
|
|
2437
|
+
creation1h: 0,
|
|
2438
|
+
};
|
|
2393
2439
|
// Track the active Anthropic stream so options.abortSignal can cancel it
|
|
2394
2440
|
// mid-flight (pre-rewrite code had no abort handling — fixed for free).
|
|
2395
2441
|
let activeStream;
|
|
@@ -2419,29 +2465,116 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2419
2465
|
throw new Error("Stream aborted by caller");
|
|
2420
2466
|
}
|
|
2421
2467
|
step++;
|
|
2422
|
-
|
|
2423
|
-
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2429
|
-
|
|
2430
|
-
|
|
2431
|
-
|
|
2432
|
-
|
|
2433
|
-
|
|
2468
|
+
// One generation observation per API call: request in, content + usage out.
|
|
2469
|
+
const generationSpan = tracers.generation.startSpan("anthropic.messages.stream", {
|
|
2470
|
+
kind: SpanKind.CLIENT,
|
|
2471
|
+
attributes: {
|
|
2472
|
+
[LANGFUSE_ATTR.OBSERVATION_TYPE]: "generation",
|
|
2473
|
+
[LANGFUSE_ATTR.OBSERVATION_MODEL_NAME]: modelName,
|
|
2474
|
+
[LANGFUSE_ATTR.OBSERVATION_MODEL_PARAMETERS]: spanJsonAttribute({
|
|
2475
|
+
max_tokens: requestParams.max_tokens,
|
|
2476
|
+
temperature: requestParams.temperature,
|
|
2477
|
+
top_p: requestParams.top_p,
|
|
2478
|
+
}),
|
|
2479
|
+
[LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute({
|
|
2480
|
+
system: systemPromptWithSchema,
|
|
2481
|
+
messages: sanitizeAnthropicMessagesForTrace(currentMessages),
|
|
2482
|
+
}),
|
|
2483
|
+
[LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
|
|
2484
|
+
step,
|
|
2485
|
+
toolsOffered: offeredToolNames.length,
|
|
2486
|
+
}),
|
|
2487
|
+
[ATTR.GEN_AI_SYSTEM]: "anthropic",
|
|
2488
|
+
[ATTR.GEN_AI_MODEL]: modelName,
|
|
2489
|
+
[ATTR.GEN_AI_OPERATION]: "chat",
|
|
2490
|
+
},
|
|
2491
|
+
}, turnContext);
|
|
2492
|
+
let response;
|
|
2493
|
+
try {
|
|
2494
|
+
const stream = await client.messages.stream({
|
|
2495
|
+
...requestParams,
|
|
2496
|
+
messages: currentMessages,
|
|
2497
|
+
});
|
|
2498
|
+
activeStream = stream;
|
|
2499
|
+
// Forward each text delta as it arrives — the Anthropic SDK fires
|
|
2500
|
+
// this synchronously per content_block_delta, so the channel streams
|
|
2501
|
+
// at wire cadence. The first delta stamps completion_start_time,
|
|
2502
|
+
// giving Langfuse the generation's time-to-first-token.
|
|
2503
|
+
let firstDeltaSeen = false;
|
|
2504
|
+
stream.on("text", (delta) => {
|
|
2505
|
+
if (delta.length > 0) {
|
|
2506
|
+
if (!firstDeltaSeen) {
|
|
2507
|
+
firstDeltaSeen = true;
|
|
2508
|
+
generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_COMPLETION_START_TIME, new Date().toISOString());
|
|
2509
|
+
}
|
|
2510
|
+
channel.push(delta);
|
|
2511
|
+
}
|
|
2512
|
+
});
|
|
2513
|
+
// finalMessage() resolves AFTER message_stop. By then the listener
|
|
2514
|
+
// has already fired for every delta — awaiting here doesn't block
|
|
2515
|
+
// visible streaming, it just gives us the structured response
|
|
2516
|
+
// shape needed for tool_use block extraction.
|
|
2517
|
+
response = await stream.finalMessage();
|
|
2518
|
+
}
|
|
2519
|
+
catch (modelCallError) {
|
|
2520
|
+
generationSpan.setStatus({
|
|
2521
|
+
code: SpanStatusCode.ERROR,
|
|
2522
|
+
message: modelCallError instanceof Error
|
|
2523
|
+
? modelCallError.message
|
|
2524
|
+
: String(modelCallError),
|
|
2525
|
+
});
|
|
2526
|
+
if (modelCallError instanceof Error) {
|
|
2527
|
+
generationSpan.recordException(modelCallError);
|
|
2434
2528
|
}
|
|
2435
|
-
|
|
2436
|
-
|
|
2437
|
-
|
|
2438
|
-
// visible streaming, it just gives us the structured response
|
|
2439
|
-
// shape needed for tool_use block extraction.
|
|
2440
|
-
const response = await stream.finalMessage();
|
|
2529
|
+
generationSpan.end();
|
|
2530
|
+
throw modelCallError;
|
|
2531
|
+
}
|
|
2441
2532
|
activeStream = undefined;
|
|
2442
|
-
|
|
2443
|
-
|
|
2444
|
-
|
|
2533
|
+
// End the generation span even if the bookkeeping below throws (else
|
|
2534
|
+
// it leaks). The model-call error path already ended it — no double-end.
|
|
2535
|
+
try {
|
|
2536
|
+
const stepCacheRead = response.usage?.cache_read_input_tokens ?? 0;
|
|
2537
|
+
const stepCacheCreation = response.usage?.cache_creation_input_tokens ?? 0;
|
|
2538
|
+
const stepCacheCreation5m = response.usage?.cache_creation?.ephemeral_5m_input_tokens ?? 0;
|
|
2539
|
+
const stepCacheCreation1h = response.usage?.cache_creation?.ephemeral_1h_input_tokens ?? 0;
|
|
2540
|
+
turnCacheUsage.read += stepCacheRead;
|
|
2541
|
+
turnCacheUsage.creation += stepCacheCreation;
|
|
2542
|
+
turnCacheUsage.creation5m += stepCacheCreation5m;
|
|
2543
|
+
turnCacheUsage.creation1h += stepCacheCreation1h;
|
|
2544
|
+
usage.input += response.usage?.input_tokens || 0;
|
|
2545
|
+
usage.output += response.usage?.output_tokens || 0;
|
|
2546
|
+
usage.total = usage.input + usage.output;
|
|
2547
|
+
for (const block of response.content) {
|
|
2548
|
+
if (block.type === "text" && typeof block.text === "string") {
|
|
2549
|
+
aggregatedTurnText += block.text;
|
|
2550
|
+
}
|
|
2551
|
+
}
|
|
2552
|
+
generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(response.content));
|
|
2553
|
+
// 5m and 1h cache-creation are priced differently, so keep both;
|
|
2554
|
+
// drop the aggregate input_cache_creation (= 5m + 1h) that would
|
|
2555
|
+
// double-count. total sums the per-TTL keys shown here to match them.
|
|
2556
|
+
generationSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_USAGE_DETAILS, spanJsonAttribute({
|
|
2557
|
+
input: response.usage?.input_tokens ?? 0,
|
|
2558
|
+
output: response.usage?.output_tokens ?? 0,
|
|
2559
|
+
input_cached_tokens: stepCacheRead,
|
|
2560
|
+
input_cache_creation_5m: stepCacheCreation5m,
|
|
2561
|
+
input_cache_creation_1h: stepCacheCreation1h,
|
|
2562
|
+
total: (response.usage?.input_tokens ?? 0) +
|
|
2563
|
+
(response.usage?.output_tokens ?? 0) +
|
|
2564
|
+
stepCacheRead +
|
|
2565
|
+
stepCacheCreation5m +
|
|
2566
|
+
stepCacheCreation1h,
|
|
2567
|
+
}));
|
|
2568
|
+
generationSpan.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, response.usage?.input_tokens ?? 0);
|
|
2569
|
+
generationSpan.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, response.usage?.output_tokens ?? 0);
|
|
2570
|
+
if (response.stop_reason) {
|
|
2571
|
+
generationSpan.setAttribute(ATTR.GEN_AI_FINISH_REASON, response.stop_reason);
|
|
2572
|
+
}
|
|
2573
|
+
generationSpan.setStatus({ code: SpanStatusCode.OK });
|
|
2574
|
+
}
|
|
2575
|
+
finally {
|
|
2576
|
+
generationSpan.end();
|
|
2577
|
+
}
|
|
2445
2578
|
const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
|
|
2446
2579
|
// Structured-output pattern: when the model returns the
|
|
2447
2580
|
// final_result tool call, push its arguments as JSON and stop.
|
|
@@ -2481,6 +2614,38 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2481
2614
|
toolName: toolUse.name,
|
|
2482
2615
|
args: toolUse.input,
|
|
2483
2616
|
});
|
|
2617
|
+
// One tool observation per execution. ai.toolCall.* names follow the
|
|
2618
|
+
// Vercel AI SDK convention so existing tooling keeps working.
|
|
2619
|
+
const toolSpan = tracers.mcp.startSpan("ai.toolCall", {
|
|
2620
|
+
kind: SpanKind.INTERNAL,
|
|
2621
|
+
attributes: {
|
|
2622
|
+
[LANGFUSE_ATTR.OBSERVATION_TYPE]: "tool",
|
|
2623
|
+
[ATTR.GEN_AI_TOOL_NAME]: toolUse.name,
|
|
2624
|
+
"ai.toolCall.name": toolUse.name,
|
|
2625
|
+
"ai.toolCall.id": toolUse.id,
|
|
2626
|
+
"ai.toolCall.args": spanJsonAttribute(toolUse.input, 20_000),
|
|
2627
|
+
[LANGFUSE_ATTR.OBSERVATION_INPUT]: spanJsonAttribute(toolUse.input, 20_000),
|
|
2628
|
+
[LANGFUSE_ATTR.OBSERVATION_METADATA]: spanJsonAttribute({
|
|
2629
|
+
step,
|
|
2630
|
+
}),
|
|
2631
|
+
},
|
|
2632
|
+
}, turnContext);
|
|
2633
|
+
const endToolSpan = (output, errorMessage) => {
|
|
2634
|
+
toolSpan.setAttribute("ai.toolCall.result", spanJsonAttribute(output));
|
|
2635
|
+
toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(output));
|
|
2636
|
+
if (errorMessage) {
|
|
2637
|
+
toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_LEVEL, "ERROR");
|
|
2638
|
+
toolSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_STATUS_MESSAGE, errorMessage);
|
|
2639
|
+
toolSpan.setStatus({
|
|
2640
|
+
code: SpanStatusCode.ERROR,
|
|
2641
|
+
message: errorMessage,
|
|
2642
|
+
});
|
|
2643
|
+
}
|
|
2644
|
+
else {
|
|
2645
|
+
toolSpan.setStatus({ code: SpanStatusCode.OK });
|
|
2646
|
+
}
|
|
2647
|
+
toolSpan.end();
|
|
2648
|
+
};
|
|
2484
2649
|
const execute = executeMap.get(toolUse.name);
|
|
2485
2650
|
if (execute) {
|
|
2486
2651
|
try {
|
|
@@ -2489,7 +2654,13 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2489
2654
|
messages: [],
|
|
2490
2655
|
abortSignal: options.abortSignal,
|
|
2491
2656
|
};
|
|
2492
|
-
|
|
2657
|
+
// Run with toolSpan active so spans inside execute
|
|
2658
|
+
// (neurolink.tool.execute) nest under this observation instead
|
|
2659
|
+
// of becoming disconnected siblings.
|
|
2660
|
+
const result = await otelContext.with(otelTrace.setSpan(turnContext, toolSpan), () => execute(toolUse.input, toolOptions));
|
|
2661
|
+
// MCP failures are returned, not thrown — surface them on
|
|
2662
|
+
// the span so failed calls show as ERROR in Langfuse.
|
|
2663
|
+
endToolSpan(result, extractMcpToolErrorMessage(result));
|
|
2493
2664
|
toolExecutions.push({
|
|
2494
2665
|
name: toolUse.name,
|
|
2495
2666
|
input: toolUse.input,
|
|
@@ -2515,6 +2686,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2515
2686
|
catch (err) {
|
|
2516
2687
|
const errMsg = `Error executing tool "${toolUse.name}": ${err instanceof Error ? err.message : String(err)}`;
|
|
2517
2688
|
const errorPayload = { error: errMsg };
|
|
2689
|
+
endToolSpan(errorPayload, errMsg);
|
|
2518
2690
|
toolExecutions.push({
|
|
2519
2691
|
name: toolUse.name,
|
|
2520
2692
|
input: toolUse.input,
|
|
@@ -2535,6 +2707,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2535
2707
|
else {
|
|
2536
2708
|
const errMsg = `TOOL_NOT_FOUND: The tool "${toolUse.name}" does not exist.`;
|
|
2537
2709
|
const errorPayload = { error: errMsg };
|
|
2710
|
+
endToolSpan(errorPayload, errMsg);
|
|
2538
2711
|
toolExecutions.push({
|
|
2539
2712
|
name: toolUse.name,
|
|
2540
2713
|
input: toolUse.input,
|
|
@@ -2577,13 +2750,49 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2577
2750
|
}
|
|
2578
2751
|
metadata.responseTime = Date.now() - startTime;
|
|
2579
2752
|
metadata.totalToolExecutions = allToolCalls.filter((tc) => tc.toolName !== "final_result").length;
|
|
2753
|
+
const turnOutputAttribute = spanJsonAttribute({
|
|
2754
|
+
text: aggregatedTurnText,
|
|
2755
|
+
...(structuredOutputRef.value
|
|
2756
|
+
? { structuredOutput: structuredOutputRef.value }
|
|
2757
|
+
: {}),
|
|
2758
|
+
});
|
|
2759
|
+
turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, turnOutputAttribute);
|
|
2760
|
+
turnSpan.setAttribute(LANGFUSE_ATTR.TRACE_OUTPUT, turnOutputAttribute);
|
|
2761
|
+
// Turn usage is metadata-only (not usage_details) — see the note at the
|
|
2762
|
+
// top of this method on why it must not contribute to the cost rollup.
|
|
2763
|
+
turnSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_METADATA, spanJsonAttribute({
|
|
2764
|
+
toolsOffered: offeredToolNames,
|
|
2765
|
+
toolCount: offeredToolNames.length,
|
|
2766
|
+
maxSteps,
|
|
2767
|
+
steps: step,
|
|
2768
|
+
toolCallCount: metadata.totalToolExecutions,
|
|
2769
|
+
toolsCalled: toolsUsedRef.filter((name) => name !== "final_result"),
|
|
2770
|
+
structuredOutput: useFinalResultTool,
|
|
2771
|
+
usage: {
|
|
2772
|
+
input: usage.input,
|
|
2773
|
+
output: usage.output,
|
|
2774
|
+
input_cached_tokens: turnCacheUsage.read,
|
|
2775
|
+
input_cache_creation: turnCacheUsage.creation,
|
|
2776
|
+
input_cache_creation_5m: turnCacheUsage.creation5m,
|
|
2777
|
+
input_cache_creation_1h: turnCacheUsage.creation1h,
|
|
2778
|
+
},
|
|
2779
|
+
}));
|
|
2780
|
+
turnSpan.setStatus({ code: SpanStatusCode.OK });
|
|
2580
2781
|
channel.close();
|
|
2581
2782
|
}
|
|
2582
2783
|
catch (err) {
|
|
2784
|
+
turnSpan.setStatus({
|
|
2785
|
+
code: SpanStatusCode.ERROR,
|
|
2786
|
+
message: err instanceof Error ? err.message : String(err),
|
|
2787
|
+
});
|
|
2788
|
+
if (err instanceof Error) {
|
|
2789
|
+
turnSpan.recordException(err);
|
|
2790
|
+
}
|
|
2583
2791
|
logger.error("[GoogleVertex] Native Anthropic SDK stream error", err);
|
|
2584
2792
|
channel.error(this.handleProviderError(err));
|
|
2585
2793
|
}
|
|
2586
2794
|
finally {
|
|
2795
|
+
turnSpan.end();
|
|
2587
2796
|
options.abortSignal?.removeEventListener("abort", abortHandler);
|
|
2588
2797
|
clearTimeout(streamTimeoutHandle);
|
|
2589
2798
|
}
|
|
@@ -3316,6 +3525,15 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
3316
3525
|
const inputPrompt = mergedOptions.input?.text ||
|
|
3317
3526
|
mergedOptions.prompt ||
|
|
3318
3527
|
"";
|
|
3528
|
+
// Set generation input before the call so error paths still carry the
|
|
3529
|
+
// request; output is set after the native call resolves.
|
|
3530
|
+
const generationInputAttribute = spanJsonAttribute({
|
|
3531
|
+
...(mergedOptions.systemPrompt
|
|
3532
|
+
? { system: mergedOptions.systemPrompt }
|
|
3533
|
+
: {}),
|
|
3534
|
+
prompt: inputPrompt,
|
|
3535
|
+
});
|
|
3536
|
+
generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_INPUT, generationInputAttribute);
|
|
3319
3537
|
try {
|
|
3320
3538
|
let result;
|
|
3321
3539
|
// Wrap the actual native generate call in `neurolink.executeGeneration`
|
|
@@ -3332,20 +3550,28 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
3332
3550
|
"neurolink.path": isAnthropicModel(modelName)
|
|
3333
3551
|
? "native.anthropic"
|
|
3334
3552
|
: "native.google-genai",
|
|
3553
|
+
[LANGFUSE_ATTR.OBSERVATION_INPUT]: generationInputAttribute,
|
|
3335
3554
|
},
|
|
3336
|
-
}, async () => {
|
|
3555
|
+
}, async (executionSpan) => {
|
|
3556
|
+
let nativeResult;
|
|
3337
3557
|
if (isAnthropicModel(modelName)) {
|
|
3338
3558
|
logger.info("[GoogleVertex] Routing Claude generate to native @anthropic-ai/vertex-sdk", {
|
|
3339
3559
|
model: modelName,
|
|
3340
3560
|
totalToolCount: Object.keys(mergedOptions.tools).length,
|
|
3341
3561
|
});
|
|
3342
|
-
|
|
3562
|
+
nativeResult =
|
|
3563
|
+
await this.executeNativeAnthropicGenerate(mergedOptions);
|
|
3343
3564
|
}
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3565
|
+
else {
|
|
3566
|
+
logger.info("[GoogleVertex] Routing Gemini generate to native @google/genai", {
|
|
3567
|
+
model: modelName,
|
|
3568
|
+
totalToolCount: Object.keys(mergedOptions.tools).length,
|
|
3569
|
+
});
|
|
3570
|
+
nativeResult =
|
|
3571
|
+
await this.executeNativeGemini3Generate(mergedOptions);
|
|
3572
|
+
}
|
|
3573
|
+
executionSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(nativeResult?.content ?? ""));
|
|
3574
|
+
return nativeResult;
|
|
3349
3575
|
});
|
|
3350
3576
|
this.attachUsageAndCostAttributes(generateSpan, modelName, result?.usage);
|
|
3351
3577
|
// Pipe through TTS-of-AI-response when caller asks for it. The
|
|
@@ -3353,6 +3579,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
3353
3579
|
// enabled / useAiResponse is false, so the cost is zero on
|
|
3354
3580
|
// non-TTS paths.
|
|
3355
3581
|
result = await this.synthesizeAIResponseIfNeeded(result, options);
|
|
3582
|
+
generateSpan.setAttribute(LANGFUSE_ATTR.OBSERVATION_OUTPUT, spanJsonAttribute(result?.content ?? ""));
|
|
3356
3583
|
// Fire onFinish lifecycle callback for the native generate path.
|
|
3357
3584
|
// Pipeline A providers get this for free via the AI SDK middleware
|
|
3358
3585
|
// wrapper (LifecycleMiddleware); native @google/genai bypasses
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
|
|
8
8
|
*/
|
|
9
|
-
import { trace } from "@opentelemetry/api";
|
|
9
|
+
import { trace, type Span as ApiSpan } from "@opentelemetry/api";
|
|
10
10
|
import { LoggerProvider } from "@opentelemetry/sdk-logs";
|
|
11
11
|
import { type SpanProcessor } from "@opentelemetry/sdk-trace-base";
|
|
12
12
|
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
@@ -146,6 +146,15 @@ export declare function setLangfuseContext<T = void>(context: {
|
|
|
146
146
|
* console.log(context?.userId, context?.sessionId);
|
|
147
147
|
*/
|
|
148
148
|
export declare function getLangfuseContext(): LangfuseContext | undefined;
|
|
149
|
+
/**
|
|
150
|
+
* Fill a span's Langfuse identity when the caller's context would otherwise
|
|
151
|
+
* fall back to "guest". Identity (user.id / session.id) is set additively —
|
|
152
|
+
* only fields ambient context didn't already provide, so a host's own context
|
|
153
|
+
* is never overridden. trace.name (the title) is rescued only when no ambient
|
|
154
|
+
* name source exists AND this span is the trace root, mirroring
|
|
155
|
+
* ContextEnricher.onStart so a host wrapper span isn't relabelled.
|
|
156
|
+
*/
|
|
157
|
+
export declare function stampGuestRescueIdentity(span: ApiSpan, callContext: unknown, isRootSpan: boolean): void;
|
|
149
158
|
/**
|
|
150
159
|
* Capture the current Langfuse AsyncLocalStorage context and return a wrapper
|
|
151
160
|
* that re-enters that context when executing the provided callback.
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Flow: Vercel AI SDK → OpenTelemetry Spans → LangfuseSpanProcessor → Langfuse Platform
|
|
8
8
|
*/
|
|
9
|
-
import { metrics, SpanStatusCode, trace } from "@opentelemetry/api";
|
|
9
|
+
import { metrics, SpanStatusCode, trace, } from "@opentelemetry/api";
|
|
10
10
|
import { W3CTraceContextPropagator } from "@opentelemetry/core";
|
|
11
11
|
import { OTLPLogExporter } from "@opentelemetry/exporter-logs-otlp-http";
|
|
12
12
|
import { OTLPMetricExporter } from "@opentelemetry/exporter-metrics-otlp-http";
|
|
@@ -20,6 +20,7 @@ import { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, } from "@opentelemetry/semanti
|
|
|
20
20
|
import { AsyncLocalStorage } from "async_hooks";
|
|
21
21
|
import { extractMcpErrorText } from "../../../../utils/mcpErrorText.js";
|
|
22
22
|
import { logger } from "../../../../utils/logger.js";
|
|
23
|
+
import { LANGFUSE_ATTR } from "../../../../telemetry/attributes.js";
|
|
23
24
|
const LOG_PREFIX = "[OpenTelemetry]";
|
|
24
25
|
function createOtelResource(config, serviceName) {
|
|
25
26
|
return resourceFromAttributes({
|
|
@@ -1117,6 +1118,40 @@ export async function setLangfuseContext(context, callback) {
|
|
|
1117
1118
|
export function getLangfuseContext() {
|
|
1118
1119
|
return contextStorage.getStore();
|
|
1119
1120
|
}
|
|
1121
|
+
/**
|
|
1122
|
+
* Fill a span's Langfuse identity when the caller's context would otherwise
|
|
1123
|
+
* fall back to "guest". Identity (user.id / session.id) is set additively —
|
|
1124
|
+
* only fields ambient context didn't already provide, so a host's own context
|
|
1125
|
+
* is never overridden. trace.name (the title) is rescued only when no ambient
|
|
1126
|
+
* name source exists AND this span is the trace root, mirroring
|
|
1127
|
+
* ContextEnricher.onStart so a host wrapper span isn't relabelled.
|
|
1128
|
+
*/
|
|
1129
|
+
export function stampGuestRescueIdentity(span, callContext, isRootSpan) {
|
|
1130
|
+
const ambient = getLangfuseContext();
|
|
1131
|
+
const ctx = callContext;
|
|
1132
|
+
const userId = typeof ctx?.userId === "string" && ctx.userId ? ctx.userId : undefined;
|
|
1133
|
+
const sessionId = typeof ctx?.sessionId === "string" && ctx.sessionId
|
|
1134
|
+
? ctx.sessionId
|
|
1135
|
+
: undefined;
|
|
1136
|
+
// Title: the trace name comes from traceName ?? userId, so only rescue it
|
|
1137
|
+
// from "guest" when ambient has neither, and only on the trace root.
|
|
1138
|
+
if (isRootSpan && !ambient?.traceName && !ambient?.userId) {
|
|
1139
|
+
const traceName = typeof ctx?.traceName === "string" && ctx.traceName
|
|
1140
|
+
? ctx.traceName
|
|
1141
|
+
: userId;
|
|
1142
|
+
if (traceName) {
|
|
1143
|
+
span.setAttribute(LANGFUSE_ATTR.TRACE_NAME, traceName);
|
|
1144
|
+
span.setAttribute("trace.name", traceName);
|
|
1145
|
+
}
|
|
1146
|
+
}
|
|
1147
|
+
// Identity: additive — set each field only where ambient didn't.
|
|
1148
|
+
if (userId && !ambient?.userId) {
|
|
1149
|
+
span.setAttribute("user.id", userId);
|
|
1150
|
+
}
|
|
1151
|
+
if (sessionId && !ambient?.sessionId) {
|
|
1152
|
+
span.setAttribute("session.id", sessionId);
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1120
1155
|
/**
|
|
1121
1156
|
* Capture the current Langfuse AsyncLocalStorage context and return a wrapper
|
|
1122
1157
|
* that re-enters that context when executing the provided callback.
|