mantisdk 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +1 -1
- mantisdk/tracing/__init__.py +57 -0
- mantisdk/tracing/api.py +546 -0
- mantisdk/tracing/attributes.py +191 -0
- mantisdk/tracing/exporters/__init__.py +10 -0
- mantisdk/tracing/exporters/insight.py +202 -0
- mantisdk/tracing/init.py +371 -0
- mantisdk/tracing/instrumentors/__init__.py +15 -0
- mantisdk/tracing/instrumentors/claude_agent_sdk.py +591 -0
- mantisdk/tracing/instrumentors/instrumentation_principles.md +289 -0
- mantisdk/tracing/instrumentors/registry.py +313 -0
- {mantisdk-0.1.0.dist-info → mantisdk-0.1.2.dist-info}/METADATA +1 -1
- {mantisdk-0.1.0.dist-info → mantisdk-0.1.2.dist-info}/RECORD +16 -6
- {mantisdk-0.1.0.dist-info → mantisdk-0.1.2.dist-info}/WHEEL +0 -0
- {mantisdk-0.1.0.dist-info → mantisdk-0.1.2.dist-info}/entry_points.txt +0 -0
- {mantisdk-0.1.0.dist-info → mantisdk-0.1.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
# Copyright (c) Metis. All rights reserved.
|
|
2
|
+
|
|
3
|
+
"""Instrumentor for claude-agent-sdk.
|
|
4
|
+
|
|
5
|
+
This module provides automatic instrumentation for the Claude Agent SDK,
|
|
6
|
+
capturing conversation turns, tool calls, and response metadata.
|
|
7
|
+
|
|
8
|
+
Spans are annotated with semantic attributes following Mantis Insight conventions,
|
|
9
|
+
ensuring proper processing and display in the Insight dashboard.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import functools
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Any, AsyncIterator, Optional, TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
from opentelemetry import trace
|
|
20
|
+
from opentelemetry.trace import Span, SpanKind, Status, StatusCode
|
|
21
|
+
from opentelemetry.context import attach, detach, Context
|
|
22
|
+
|
|
23
|
+
from .registry import BaseInstrumentor, _is_package_available
|
|
24
|
+
from ..attributes import (
|
|
25
|
+
# Langfuse observation attributes (MUST use these for OtelIngestionProcessor to extract I/O)
|
|
26
|
+
LANGFUSE_OBSERVATION_INPUT,
|
|
27
|
+
LANGFUSE_OBSERVATION_OUTPUT,
|
|
28
|
+
LANGFUSE_OBSERVATION_MODEL,
|
|
29
|
+
# OpenTelemetry GenAI attributes (widely supported)
|
|
30
|
+
GEN_AI_SYSTEM,
|
|
31
|
+
GEN_AI_REQUEST_MODEL,
|
|
32
|
+
GEN_AI_RESPONSE_MODEL,
|
|
33
|
+
GEN_AI_USAGE_INPUT_TOKENS,
|
|
34
|
+
GEN_AI_USAGE_OUTPUT_TOKENS,
|
|
35
|
+
GEN_AI_USAGE_COST,
|
|
36
|
+
GEN_AI_OPERATION_NAME,
|
|
37
|
+
GEN_AI_TOOL_NAME,
|
|
38
|
+
GEN_AI_TOOL_CALL_ARGUMENTS,
|
|
39
|
+
GEN_AI_TOOL_CALL_RESULT,
|
|
40
|
+
# OpenInference attributes
|
|
41
|
+
OPENINFERENCE_SPAN_KIND,
|
|
42
|
+
SPAN_KIND_LLM,
|
|
43
|
+
SPAN_KIND_TOOL,
|
|
44
|
+
INPUT_VALUE,
|
|
45
|
+
INPUT_MIME_TYPE,
|
|
46
|
+
OUTPUT_VALUE,
|
|
47
|
+
OUTPUT_MIME_TYPE,
|
|
48
|
+
MIME_TYPE_TEXT,
|
|
49
|
+
MIME_TYPE_JSON,
|
|
50
|
+
LLM_MODEL_NAME,
|
|
51
|
+
LLM_SYSTEM,
|
|
52
|
+
LLM_PROVIDER,
|
|
53
|
+
LLM_TOKEN_COUNT_PROMPT,
|
|
54
|
+
LLM_TOKEN_COUNT_COMPLETION,
|
|
55
|
+
LLM_TOKEN_COUNT_TOTAL,
|
|
56
|
+
TOOL_NAME,
|
|
57
|
+
SESSION_ID,
|
|
58
|
+
# MantisDK custom attributes
|
|
59
|
+
MANTIS_LLM_THINKING,
|
|
60
|
+
MANTIS_LLM_COST_USD,
|
|
61
|
+
MANTIS_DURATION_MS,
|
|
62
|
+
MANTIS_DURATION_API_MS,
|
|
63
|
+
MANTIS_NUM_TURNS,
|
|
64
|
+
MANTIS_TOOL_IS_ERROR,
|
|
65
|
+
# Claude-specific attributes
|
|
66
|
+
CLAUDE_API_ERROR,
|
|
67
|
+
CLAUDE_PARENT_TOOL_USE_ID,
|
|
68
|
+
CLAUDE_STRUCTURED_OUTPUT,
|
|
69
|
+
CLAUDE_THINKING_SIGNATURE,
|
|
70
|
+
CLAUDE_MESSAGE_UUID,
|
|
71
|
+
CLAUDE_SYSTEM_SUBTYPE,
|
|
72
|
+
CLAUDE_SYSTEM_DATA,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if TYPE_CHECKING:
|
|
76
|
+
from openinference.instrumentation import TraceConfig
|
|
77
|
+
|
|
78
|
+
logger = logging.getLogger(__name__)
|
|
79
|
+
|
|
80
|
+
# Tracer name for claude-agent-sdk spans
|
|
81
|
+
TRACER_NAME = "mantisdk.instrumentation.claude_agent_sdk"
|
|
82
|
+
|
|
83
|
+
# Attribute to store span on client instance
|
|
84
|
+
_CLIENT_SPAN_ATTR = "_mantisdk_conversation_span"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ClaudeAgentSDKInstrumentor(BaseInstrumentor):
|
|
88
|
+
"""Instrumentor for claude-agent-sdk.
|
|
89
|
+
|
|
90
|
+
This instrumentor wraps the ClaudeSDKClient to automatically create
|
|
91
|
+
OpenTelemetry spans for:
|
|
92
|
+
- Conversation turns (query calls)
|
|
93
|
+
- Tool executions
|
|
94
|
+
- Response processing
|
|
95
|
+
|
|
96
|
+
Example::
|
|
97
|
+
|
|
98
|
+
import mantisdk.tracing as tracing
|
|
99
|
+
|
|
100
|
+
# Auto-instruments claude-agent-sdk if installed
|
|
101
|
+
tracing.init()
|
|
102
|
+
|
|
103
|
+
# Now all ClaudeSDKClient usage is traced
|
|
104
|
+
from claude_agent_sdk import ClaudeSDKClient
|
|
105
|
+
client = ClaudeSDKClient()
|
|
106
|
+
# ...
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def __init__(self):
|
|
110
|
+
self._instrumented = False
|
|
111
|
+
self._original_query: Optional[Any] = None
|
|
112
|
+
self._original_receive_messages: Optional[Any] = None
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def name(self) -> str:
|
|
116
|
+
return "claude_agent_sdk"
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def package_name(self) -> str:
|
|
120
|
+
return "claude_agent_sdk"
|
|
121
|
+
|
|
122
|
+
def is_available(self) -> bool:
|
|
123
|
+
"""Check if claude-agent-sdk is installed."""
|
|
124
|
+
return _is_package_available("claude_agent_sdk")
|
|
125
|
+
|
|
126
|
+
def instrument(self, trace_config: Optional["TraceConfig"] = None) -> None:
|
|
127
|
+
"""Activate instrumentation for claude-agent-sdk.
|
|
128
|
+
|
|
129
|
+
This wraps ClaudeSDKClient.query() and receive_messages() to
|
|
130
|
+
automatically create spans for conversation turns.
|
|
131
|
+
"""
|
|
132
|
+
if self._instrumented:
|
|
133
|
+
logger.debug("claude_agent_sdk already instrumented")
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
if not self.is_available():
|
|
137
|
+
logger.debug("claude_agent_sdk not available, skipping instrumentation")
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
from claude_agent_sdk import ClaudeSDKClient
|
|
142
|
+
|
|
143
|
+
# Store original methods
|
|
144
|
+
self._original_query = ClaudeSDKClient.query
|
|
145
|
+
self._original_receive_messages = ClaudeSDKClient.receive_messages
|
|
146
|
+
|
|
147
|
+
# Create instrumented versions
|
|
148
|
+
instrumented_query = self._create_instrumented_query(self._original_query)
|
|
149
|
+
instrumented_receive_messages = self._create_instrumented_receive_messages(
|
|
150
|
+
self._original_receive_messages
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Patch the methods
|
|
154
|
+
ClaudeSDKClient.query = instrumented_query
|
|
155
|
+
ClaudeSDKClient.receive_messages = instrumented_receive_messages
|
|
156
|
+
|
|
157
|
+
self._instrumented = True
|
|
158
|
+
logger.info("Instrumented claude_agent_sdk")
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning("Failed to instrument claude_agent_sdk: %s", e)
|
|
162
|
+
raise
|
|
163
|
+
|
|
164
|
+
def uninstrument(self) -> None:
|
|
165
|
+
"""Deactivate instrumentation for claude-agent-sdk."""
|
|
166
|
+
if not self._instrumented:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
from claude_agent_sdk import ClaudeSDKClient
|
|
171
|
+
|
|
172
|
+
# Restore original methods
|
|
173
|
+
if self._original_query is not None:
|
|
174
|
+
ClaudeSDKClient.query = self._original_query
|
|
175
|
+
if self._original_receive_messages is not None:
|
|
176
|
+
ClaudeSDKClient.receive_messages = self._original_receive_messages
|
|
177
|
+
|
|
178
|
+
self._instrumented = False
|
|
179
|
+
self._original_query = None
|
|
180
|
+
self._original_receive_messages = None
|
|
181
|
+
logger.info("Uninstrumented claude_agent_sdk")
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
logger.warning("Failed to uninstrument claude_agent_sdk: %s", e)
|
|
185
|
+
|
|
186
|
+
def _create_instrumented_query(self, original_query):
|
|
187
|
+
"""Create an instrumented version of ClaudeSDKClient.query().
|
|
188
|
+
|
|
189
|
+
This creates a span and stores it on the client instance so that
|
|
190
|
+
receive_messages() can add output attributes to it later.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
@functools.wraps(original_query)
|
|
194
|
+
async def instrumented_query(
|
|
195
|
+
client_self,
|
|
196
|
+
prompt: str | Any,
|
|
197
|
+
session_id: str = "default",
|
|
198
|
+
) -> None:
|
|
199
|
+
tracer = trace.get_tracer(TRACER_NAME)
|
|
200
|
+
|
|
201
|
+
# Get model from client options
|
|
202
|
+
model = getattr(client_self.options, "model", None) or "claude"
|
|
203
|
+
|
|
204
|
+
# Create span as a NEW TRACE (fresh context = each turn is its own trace)
|
|
205
|
+
span = tracer.start_span(
|
|
206
|
+
"llm.conversation_turn",
|
|
207
|
+
kind=SpanKind.CLIENT,
|
|
208
|
+
context=Context(), # Empty context creates a new root trace
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Record input using multiple conventions for compatibility
|
|
212
|
+
input_value = prompt if isinstance(prompt, str) else str(prompt)
|
|
213
|
+
|
|
214
|
+
# Langfuse attributes (REQUIRED for OtelIngestionProcessor to extract I/O)
|
|
215
|
+
span.set_attribute(LANGFUSE_OBSERVATION_INPUT, input_value)
|
|
216
|
+
span.set_attribute(LANGFUSE_OBSERVATION_MODEL, model)
|
|
217
|
+
|
|
218
|
+
# OpenInference attributes
|
|
219
|
+
span.set_attribute(OPENINFERENCE_SPAN_KIND, SPAN_KIND_LLM)
|
|
220
|
+
span.set_attribute(INPUT_VALUE, input_value)
|
|
221
|
+
span.set_attribute(INPUT_MIME_TYPE, MIME_TYPE_TEXT)
|
|
222
|
+
span.set_attribute(LLM_MODEL_NAME, model)
|
|
223
|
+
span.set_attribute(LLM_SYSTEM, "anthropic")
|
|
224
|
+
span.set_attribute(LLM_PROVIDER, "anthropic")
|
|
225
|
+
|
|
226
|
+
# GenAI attributes
|
|
227
|
+
span.set_attribute(GEN_AI_REQUEST_MODEL, model)
|
|
228
|
+
span.set_attribute(GEN_AI_SYSTEM, "anthropic")
|
|
229
|
+
span.set_attribute(GEN_AI_OPERATION_NAME, "chat")
|
|
230
|
+
|
|
231
|
+
# Session ID (both conventions)
|
|
232
|
+
span.set_attribute(SESSION_ID, session_id)
|
|
233
|
+
|
|
234
|
+
# Store span on client instance for receive_messages to use
|
|
235
|
+
setattr(client_self, _CLIENT_SPAN_ATTR, span)
|
|
236
|
+
|
|
237
|
+
try:
|
|
238
|
+
# Call original method
|
|
239
|
+
result = await original_query(client_self, prompt, session_id)
|
|
240
|
+
return result
|
|
241
|
+
except Exception as e:
|
|
242
|
+
# On error, end the span here
|
|
243
|
+
span.set_status(Status(StatusCode.ERROR, str(e)))
|
|
244
|
+
span.record_exception(e)
|
|
245
|
+
span.end()
|
|
246
|
+
# Clear the stored span
|
|
247
|
+
if hasattr(client_self, _CLIENT_SPAN_ATTR):
|
|
248
|
+
delattr(client_self, _CLIENT_SPAN_ATTR)
|
|
249
|
+
raise
|
|
250
|
+
|
|
251
|
+
return instrumented_query
|
|
252
|
+
|
|
253
|
+
def _create_instrumented_receive_messages(self, original_receive_messages):
|
|
254
|
+
"""Create an instrumented version of ClaudeSDKClient.receive_messages().
|
|
255
|
+
|
|
256
|
+
This uses the span created by query() to record output attributes,
|
|
257
|
+
then ends the span when ResultMessage is received.
|
|
258
|
+
|
|
259
|
+
The output is structured as a sequence of blocks to preserve the order:
|
|
260
|
+
text → tool_call → text (if that's how the response came)
|
|
261
|
+
|
|
262
|
+
Tool calls appear as child spans under the conversation turn span.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
@functools.wraps(original_receive_messages)
|
|
266
|
+
async def instrumented_receive_messages(client_self) -> AsyncIterator[Any]:
|
|
267
|
+
tracer = trace.get_tracer(TRACER_NAME)
|
|
268
|
+
|
|
269
|
+
# Import message types
|
|
270
|
+
from claude_agent_sdk import (
|
|
271
|
+
AssistantMessage,
|
|
272
|
+
ResultMessage,
|
|
273
|
+
SystemMessage,
|
|
274
|
+
TextBlock,
|
|
275
|
+
ThinkingBlock,
|
|
276
|
+
ToolUseBlock,
|
|
277
|
+
ToolResultBlock,
|
|
278
|
+
UserMessage,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Get the span created by query()
|
|
282
|
+
conversation_span: Optional[Span] = getattr(client_self, _CLIENT_SPAN_ATTR, None)
|
|
283
|
+
|
|
284
|
+
# Create and ACTIVATE context for proper parent-child span relationships
|
|
285
|
+
# This ensures other instrumented code (MCP, Snowflake, etc.) creates child spans
|
|
286
|
+
parent_ctx = None
|
|
287
|
+
context_token = None
|
|
288
|
+
if conversation_span:
|
|
289
|
+
parent_ctx = trace.set_span_in_context(conversation_span)
|
|
290
|
+
context_token = attach(parent_ctx) # Activate so other instrumentation sees it
|
|
291
|
+
|
|
292
|
+
# Track state for this conversation turn
|
|
293
|
+
output_blocks = [] # Sequence of blocks: text, tool_call, text...
|
|
294
|
+
collected_thinking = []
|
|
295
|
+
thinking_signatures = [] # ThinkingBlock signatures
|
|
296
|
+
tool_spans = {} # Map tool_use_id to span
|
|
297
|
+
current_text_buffer = []
|
|
298
|
+
system_events = [] # SystemMessage events
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
async for message in original_receive_messages(client_self):
|
|
302
|
+
if isinstance(message, AssistantMessage):
|
|
303
|
+
model = getattr(message, "model", None)
|
|
304
|
+
if conversation_span and conversation_span.is_recording():
|
|
305
|
+
# Update model from response (GenAI convention)
|
|
306
|
+
if model:
|
|
307
|
+
conversation_span.set_attribute(GEN_AI_RESPONSE_MODEL, model)
|
|
308
|
+
conversation_span.set_attribute(LANGFUSE_OBSERVATION_MODEL, model)
|
|
309
|
+
conversation_span.set_attribute(LLM_MODEL_NAME, model)
|
|
310
|
+
|
|
311
|
+
# Capture API error if present
|
|
312
|
+
if message.error:
|
|
313
|
+
conversation_span.set_attribute(CLAUDE_API_ERROR, message.error)
|
|
314
|
+
|
|
315
|
+
# Capture parent_tool_use_id for sub-agent tracking
|
|
316
|
+
if message.parent_tool_use_id:
|
|
317
|
+
conversation_span.set_attribute(CLAUDE_PARENT_TOOL_USE_ID, message.parent_tool_use_id)
|
|
318
|
+
|
|
319
|
+
for block in message.content:
|
|
320
|
+
if isinstance(block, TextBlock):
|
|
321
|
+
current_text_buffer.append(block.text)
|
|
322
|
+
|
|
323
|
+
elif isinstance(block, ThinkingBlock):
|
|
324
|
+
collected_thinking.append(block.thinking)
|
|
325
|
+
# Capture thinking signature if present
|
|
326
|
+
if hasattr(block, 'signature') and block.signature:
|
|
327
|
+
thinking_signatures.append(block.signature)
|
|
328
|
+
|
|
329
|
+
elif isinstance(block, ToolUseBlock):
|
|
330
|
+
# Flush any buffered text before tool call
|
|
331
|
+
if current_text_buffer:
|
|
332
|
+
output_blocks.append({
|
|
333
|
+
"type": "text",
|
|
334
|
+
"content": "\n".join(current_text_buffer)
|
|
335
|
+
})
|
|
336
|
+
current_text_buffer = []
|
|
337
|
+
|
|
338
|
+
# Add tool call block (will be updated with result)
|
|
339
|
+
output_blocks.append({
|
|
340
|
+
"type": "tool_call",
|
|
341
|
+
"name": block.name,
|
|
342
|
+
"id": block.id,
|
|
343
|
+
"input": block.input,
|
|
344
|
+
"output": None
|
|
345
|
+
})
|
|
346
|
+
|
|
347
|
+
# Create a child span for this tool call
|
|
348
|
+
# Note: Span timing is based on when we receive ToolUseBlock/ToolResultBlock messages,
|
|
349
|
+
# not when the tool actually executes in Claude CLI. This may cause timing inaccuracies.
|
|
350
|
+
tool_span = tracer.start_span(
|
|
351
|
+
f"tool.{block.name}",
|
|
352
|
+
kind=SpanKind.INTERNAL,
|
|
353
|
+
context=parent_ctx,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# OpenInference attributes
|
|
357
|
+
tool_span.set_attribute(OPENINFERENCE_SPAN_KIND, SPAN_KIND_TOOL)
|
|
358
|
+
tool_span.set_attribute(TOOL_NAME, block.name)
|
|
359
|
+
|
|
360
|
+
# GenAI attributes
|
|
361
|
+
tool_span.set_attribute(GEN_AI_TOOL_NAME, block.name)
|
|
362
|
+
tool_span.set_attribute(GEN_AI_OPERATION_NAME, "tool_call")
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
input_json = json.dumps(block.input)
|
|
366
|
+
# GenAI
|
|
367
|
+
tool_span.set_attribute(GEN_AI_TOOL_CALL_ARGUMENTS, input_json)
|
|
368
|
+
# Langfuse
|
|
369
|
+
tool_span.set_attribute(LANGFUSE_OBSERVATION_INPUT, input_json)
|
|
370
|
+
# OpenInference
|
|
371
|
+
tool_span.set_attribute(INPUT_VALUE, input_json)
|
|
372
|
+
tool_span.set_attribute(INPUT_MIME_TYPE, MIME_TYPE_JSON)
|
|
373
|
+
except (TypeError, ValueError):
|
|
374
|
+
input_str = str(block.input)
|
|
375
|
+
tool_span.set_attribute(GEN_AI_TOOL_CALL_ARGUMENTS, input_str)
|
|
376
|
+
tool_span.set_attribute(LANGFUSE_OBSERVATION_INPUT, input_str)
|
|
377
|
+
tool_span.set_attribute(INPUT_VALUE, input_str)
|
|
378
|
+
tool_span.set_attribute(INPUT_MIME_TYPE, MIME_TYPE_TEXT)
|
|
379
|
+
|
|
380
|
+
tool_spans[block.id] = tool_span
|
|
381
|
+
|
|
382
|
+
elif isinstance(message, UserMessage):
|
|
383
|
+
# Capture UserMessage uuid and parent_tool_use_id if present
|
|
384
|
+
if conversation_span and conversation_span.is_recording():
|
|
385
|
+
if hasattr(message, 'uuid') and message.uuid:
|
|
386
|
+
conversation_span.set_attribute(CLAUDE_MESSAGE_UUID, message.uuid)
|
|
387
|
+
if hasattr(message, 'parent_tool_use_id') and message.parent_tool_use_id:
|
|
388
|
+
conversation_span.set_attribute(CLAUDE_PARENT_TOOL_USE_ID, message.parent_tool_use_id)
|
|
389
|
+
|
|
390
|
+
# Check for tool results
|
|
391
|
+
if isinstance(message.content, list):
|
|
392
|
+
for block in message.content:
|
|
393
|
+
if isinstance(block, ToolResultBlock):
|
|
394
|
+
tool_span = tool_spans.get(block.tool_use_id)
|
|
395
|
+
content_str = str(block.content) if block.content else ""
|
|
396
|
+
is_error = getattr(block, "is_error", False) or False
|
|
397
|
+
|
|
398
|
+
# Update the tool_call block in output_blocks
|
|
399
|
+
for ob in output_blocks:
|
|
400
|
+
if ob.get("type") == "tool_call" and ob.get("id") == block.tool_use_id:
|
|
401
|
+
ob["output"] = content_str
|
|
402
|
+
ob["is_error"] = is_error
|
|
403
|
+
break
|
|
404
|
+
|
|
405
|
+
if tool_span:
|
|
406
|
+
# GenAI
|
|
407
|
+
tool_span.set_attribute(GEN_AI_TOOL_CALL_RESULT, content_str)
|
|
408
|
+
# Langfuse
|
|
409
|
+
tool_span.set_attribute(LANGFUSE_OBSERVATION_OUTPUT, content_str)
|
|
410
|
+
# OpenInference
|
|
411
|
+
tool_span.set_attribute(OUTPUT_VALUE, content_str)
|
|
412
|
+
# Determine output mime type based on content
|
|
413
|
+
if content_str.strip().startswith('{') or content_str.strip().startswith('['):
|
|
414
|
+
tool_span.set_attribute(OUTPUT_MIME_TYPE, MIME_TYPE_JSON)
|
|
415
|
+
else:
|
|
416
|
+
tool_span.set_attribute(OUTPUT_MIME_TYPE, MIME_TYPE_TEXT)
|
|
417
|
+
# MantisDK
|
|
418
|
+
tool_span.set_attribute(MANTIS_TOOL_IS_ERROR, is_error)
|
|
419
|
+
|
|
420
|
+
if is_error:
|
|
421
|
+
tool_span.set_status(Status(StatusCode.ERROR, content_str))
|
|
422
|
+
else:
|
|
423
|
+
tool_span.set_status(Status(StatusCode.OK))
|
|
424
|
+
|
|
425
|
+
tool_span.end()
|
|
426
|
+
del tool_spans[block.tool_use_id]
|
|
427
|
+
|
|
428
|
+
elif isinstance(message, SystemMessage):
|
|
429
|
+
# Handle SystemMessage - contains system events with subtype and data
|
|
430
|
+
system_events.append({
|
|
431
|
+
"subtype": message.subtype,
|
|
432
|
+
"data": message.data
|
|
433
|
+
})
|
|
434
|
+
|
|
435
|
+
# Add as event on the conversation span
|
|
436
|
+
if conversation_span and conversation_span.is_recording():
|
|
437
|
+
event_attrs = {CLAUDE_SYSTEM_SUBTYPE: message.subtype}
|
|
438
|
+
if message.data:
|
|
439
|
+
try:
|
|
440
|
+
event_attrs[CLAUDE_SYSTEM_DATA] = json.dumps(message.data)
|
|
441
|
+
except (TypeError, ValueError):
|
|
442
|
+
event_attrs[CLAUDE_SYSTEM_DATA] = str(message.data)
|
|
443
|
+
conversation_span.add_event(
|
|
444
|
+
f"system.{message.subtype}",
|
|
445
|
+
attributes=event_attrs
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
elif isinstance(message, ResultMessage):
|
|
449
|
+
# Flush any remaining text buffer
|
|
450
|
+
if current_text_buffer:
|
|
451
|
+
output_blocks.append({
|
|
452
|
+
"type": "text",
|
|
453
|
+
"content": "\n".join(current_text_buffer)
|
|
454
|
+
})
|
|
455
|
+
current_text_buffer = []
|
|
456
|
+
|
|
457
|
+
# Record final metrics on the conversation span
|
|
458
|
+
if conversation_span and conversation_span.is_recording():
|
|
459
|
+
# Output attributes (multiple conventions)
|
|
460
|
+
if output_blocks:
|
|
461
|
+
text_only = all(b["type"] == "text" for b in output_blocks)
|
|
462
|
+
if text_only:
|
|
463
|
+
full_response = "\n".join(b["content"] for b in output_blocks)
|
|
464
|
+
# Langfuse
|
|
465
|
+
conversation_span.set_attribute(LANGFUSE_OBSERVATION_OUTPUT, full_response)
|
|
466
|
+
# OpenInference
|
|
467
|
+
conversation_span.set_attribute(OUTPUT_VALUE, full_response)
|
|
468
|
+
conversation_span.set_attribute(OUTPUT_MIME_TYPE, MIME_TYPE_TEXT)
|
|
469
|
+
else:
|
|
470
|
+
output_json = json.dumps(output_blocks, indent=2)
|
|
471
|
+
# Langfuse
|
|
472
|
+
conversation_span.set_attribute(LANGFUSE_OBSERVATION_OUTPUT, output_json)
|
|
473
|
+
# OpenInference
|
|
474
|
+
conversation_span.set_attribute(OUTPUT_VALUE, output_json)
|
|
475
|
+
conversation_span.set_attribute(OUTPUT_MIME_TYPE, MIME_TYPE_JSON)
|
|
476
|
+
|
|
477
|
+
# Thinking content
|
|
478
|
+
if collected_thinking:
|
|
479
|
+
conversation_span.set_attribute(
|
|
480
|
+
MANTIS_LLM_THINKING, "\n".join(collected_thinking)
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Thinking signatures (Claude-specific)
|
|
484
|
+
if thinking_signatures:
|
|
485
|
+
conversation_span.set_attribute(
|
|
486
|
+
CLAUDE_THINKING_SIGNATURE, "\n".join(thinking_signatures)
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# Structured output (Claude-specific)
|
|
490
|
+
if hasattr(message, 'structured_output') and message.structured_output is not None:
|
|
491
|
+
try:
|
|
492
|
+
conversation_span.set_attribute(
|
|
493
|
+
CLAUDE_STRUCTURED_OUTPUT, json.dumps(message.structured_output)
|
|
494
|
+
)
|
|
495
|
+
except (TypeError, ValueError):
|
|
496
|
+
conversation_span.set_attribute(
|
|
497
|
+
CLAUDE_STRUCTURED_OUTPUT, str(message.structured_output)
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Cost (GenAI and MantisDK)
|
|
501
|
+
if message.total_cost_usd is not None:
|
|
502
|
+
conversation_span.set_attribute(GEN_AI_USAGE_COST, message.total_cost_usd)
|
|
503
|
+
conversation_span.set_attribute(MANTIS_LLM_COST_USD, message.total_cost_usd)
|
|
504
|
+
|
|
505
|
+
# Token usage (GenAI and OpenInference)
|
|
506
|
+
if message.usage:
|
|
507
|
+
usage = message.usage
|
|
508
|
+
input_tokens = usage.get("input_tokens")
|
|
509
|
+
output_tokens = usage.get("output_tokens")
|
|
510
|
+
|
|
511
|
+
if input_tokens is not None:
|
|
512
|
+
# GenAI
|
|
513
|
+
conversation_span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, input_tokens)
|
|
514
|
+
# OpenInference
|
|
515
|
+
conversation_span.set_attribute(LLM_TOKEN_COUNT_PROMPT, input_tokens)
|
|
516
|
+
|
|
517
|
+
if output_tokens is not None:
|
|
518
|
+
# GenAI
|
|
519
|
+
conversation_span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens)
|
|
520
|
+
# OpenInference
|
|
521
|
+
conversation_span.set_attribute(LLM_TOKEN_COUNT_COMPLETION, output_tokens)
|
|
522
|
+
|
|
523
|
+
if input_tokens is not None and output_tokens is not None:
|
|
524
|
+
conversation_span.set_attribute(
|
|
525
|
+
LLM_TOKEN_COUNT_TOTAL, input_tokens + output_tokens
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
# Duration (MantisDK)
|
|
529
|
+
conversation_span.set_attribute(MANTIS_DURATION_MS, message.duration_ms)
|
|
530
|
+
conversation_span.set_attribute(MANTIS_DURATION_API_MS, message.duration_api_ms)
|
|
531
|
+
|
|
532
|
+
# Turns (MantisDK)
|
|
533
|
+
conversation_span.set_attribute(MANTIS_NUM_TURNS, message.num_turns)
|
|
534
|
+
|
|
535
|
+
# Session ID (update with final session_id)
|
|
536
|
+
conversation_span.set_attribute(SESSION_ID, message.session_id)
|
|
537
|
+
|
|
538
|
+
# Error status
|
|
539
|
+
if message.is_error:
|
|
540
|
+
conversation_span.set_status(
|
|
541
|
+
Status(StatusCode.ERROR, message.result or "Unknown error")
|
|
542
|
+
)
|
|
543
|
+
else:
|
|
544
|
+
conversation_span.set_status(Status(StatusCode.OK))
|
|
545
|
+
|
|
546
|
+
# End the conversation span
|
|
547
|
+
conversation_span.end()
|
|
548
|
+
|
|
549
|
+
# Clear the stored span
|
|
550
|
+
if hasattr(client_self, _CLIENT_SPAN_ATTR):
|
|
551
|
+
delattr(client_self, _CLIENT_SPAN_ATTR)
|
|
552
|
+
|
|
553
|
+
# Yield the original message unchanged
|
|
554
|
+
yield message
|
|
555
|
+
|
|
556
|
+
finally:
|
|
557
|
+
# Detach context first so cleanup doesn't create orphan spans
|
|
558
|
+
if context_token is not None:
|
|
559
|
+
detach(context_token)
|
|
560
|
+
|
|
561
|
+
# Clean up any unclosed tool spans
|
|
562
|
+
for tool_id, tool_span in tool_spans.items():
|
|
563
|
+
tool_span.set_status(Status(StatusCode.ERROR, "Tool span not properly closed"))
|
|
564
|
+
tool_span.end()
|
|
565
|
+
|
|
566
|
+
# Ensure conversation span is ended if not already
|
|
567
|
+
if hasattr(client_self, _CLIENT_SPAN_ATTR):
|
|
568
|
+
remaining_span = getattr(client_self, _CLIENT_SPAN_ATTR)
|
|
569
|
+
if remaining_span and remaining_span.is_recording():
|
|
570
|
+
if current_text_buffer:
|
|
571
|
+
output_blocks.append({
|
|
572
|
+
"type": "text",
|
|
573
|
+
"content": "\n".join(current_text_buffer)
|
|
574
|
+
})
|
|
575
|
+
if output_blocks:
|
|
576
|
+
text_only = all(b["type"] == "text" for b in output_blocks)
|
|
577
|
+
if text_only:
|
|
578
|
+
remaining_span.set_attribute(
|
|
579
|
+
LANGFUSE_OBSERVATION_OUTPUT,
|
|
580
|
+
"\n".join(b["content"] for b in output_blocks)
|
|
581
|
+
)
|
|
582
|
+
else:
|
|
583
|
+
remaining_span.set_attribute(
|
|
584
|
+
LANGFUSE_OBSERVATION_OUTPUT,
|
|
585
|
+
json.dumps(output_blocks, indent=2)
|
|
586
|
+
)
|
|
587
|
+
remaining_span.set_status(Status(StatusCode.OK))
|
|
588
|
+
remaining_span.end()
|
|
589
|
+
delattr(client_self, _CLIENT_SPAN_ATTR)
|
|
590
|
+
|
|
591
|
+
return instrumented_receive_messages
|