monocle-apptrace 0.5.3__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of monocle-apptrace might be problematic. Click here for more details.
- monocle_apptrace/exporters/file_exporter.py +7 -1
- monocle_apptrace/instrumentation/common/constants.py +8 -0
- monocle_apptrace/instrumentation/common/instrumentor.py +1 -1
- monocle_apptrace/instrumentation/common/span_handler.py +75 -24
- monocle_apptrace/instrumentation/common/utils.py +63 -6
- monocle_apptrace/instrumentation/common/wrapper.py +111 -42
- monocle_apptrace/instrumentation/common/wrapper_method.py +6 -2
- monocle_apptrace/instrumentation/metamodel/a2a/methods.py +1 -1
- monocle_apptrace/instrumentation/metamodel/adk/_helper.py +7 -4
- monocle_apptrace/instrumentation/metamodel/adk/entities/agent.py +6 -1
- monocle_apptrace/instrumentation/metamodel/agents/_helper.py +8 -8
- monocle_apptrace/instrumentation/metamodel/agents/entities/inference.py +9 -2
- monocle_apptrace/instrumentation/metamodel/aiohttp/_helper.py +1 -1
- monocle_apptrace/instrumentation/metamodel/anthropic/entities/inference.py +1 -4
- monocle_apptrace/instrumentation/metamodel/azfunc/_helper.py +1 -1
- monocle_apptrace/instrumentation/metamodel/botocore/_helper.py +5 -0
- monocle_apptrace/instrumentation/metamodel/botocore/entities/inference.py +4 -0
- monocle_apptrace/instrumentation/metamodel/fastapi/_helper.py +4 -4
- monocle_apptrace/instrumentation/metamodel/fastapi/methods.py +4 -4
- monocle_apptrace/instrumentation/metamodel/finish_types.py +32 -1
- monocle_apptrace/instrumentation/metamodel/flask/_helper.py +3 -3
- monocle_apptrace/instrumentation/metamodel/hugging_face/__init__.py +0 -0
- monocle_apptrace/instrumentation/metamodel/hugging_face/_helper.py +138 -0
- monocle_apptrace/instrumentation/metamodel/hugging_face/entities/__init__.py +0 -0
- monocle_apptrace/instrumentation/metamodel/hugging_face/entities/inference.py +94 -0
- monocle_apptrace/instrumentation/metamodel/hugging_face/methods.py +23 -0
- monocle_apptrace/instrumentation/metamodel/lambdafunc/_helper.py +1 -1
- monocle_apptrace/instrumentation/metamodel/langchain/entities/inference.py +1 -4
- monocle_apptrace/instrumentation/metamodel/langgraph/_helper.py +34 -8
- monocle_apptrace/instrumentation/metamodel/langgraph/entities/inference.py +8 -3
- monocle_apptrace/instrumentation/metamodel/langgraph/langgraph_processor.py +88 -19
- monocle_apptrace/instrumentation/metamodel/langgraph/methods.py +22 -6
- monocle_apptrace/instrumentation/metamodel/llamaindex/_helper.py +30 -10
- monocle_apptrace/instrumentation/metamodel/llamaindex/entities/agent.py +4 -3
- monocle_apptrace/instrumentation/metamodel/llamaindex/llamaindex_processor.py +15 -7
- monocle_apptrace/instrumentation/metamodel/llamaindex/methods.py +1 -8
- monocle_apptrace/instrumentation/metamodel/mcp/_helper.py +7 -6
- monocle_apptrace/instrumentation/metamodel/mistral/_helper.py +98 -49
- monocle_apptrace/instrumentation/metamodel/mistral/entities/inference.py +15 -9
- monocle_apptrace/instrumentation/metamodel/mistral/entities/retrieval.py +41 -0
- monocle_apptrace/instrumentation/metamodel/mistral/methods.py +10 -1
- monocle_apptrace/instrumentation/metamodel/openai/_helper.py +47 -7
- monocle_apptrace/instrumentation/metamodel/openai/entities/inference.py +20 -4
- monocle_apptrace/instrumentation/metamodel/openai/methods.py +1 -1
- monocle_apptrace/instrumentation/metamodel/strands/_helper.py +44 -0
- monocle_apptrace/instrumentation/metamodel/strands/entities/agent.py +179 -0
- monocle_apptrace/instrumentation/metamodel/strands/entities/tool.py +62 -0
- monocle_apptrace/instrumentation/metamodel/strands/methods.py +20 -0
- {monocle_apptrace-0.5.3.dist-info → monocle_apptrace-0.6.6.dist-info}/METADATA +23 -79
- {monocle_apptrace-0.5.3.dist-info → monocle_apptrace-0.6.6.dist-info}/RECORD +53 -46
- monocle_apptrace/README.md +0 -101
- monocle_apptrace/mcp_server.py +0 -94
- monocle_apptrace-0.5.3.dist-info/licenses/NOTICE +0 -4
- {monocle_apptrace-0.5.3.dist-info → monocle_apptrace-0.6.6.dist-info}/WHEEL +0 -0
- {monocle_apptrace-0.5.3.dist-info → monocle_apptrace-0.6.6.dist-info}/entry_points.txt +0 -0
- {monocle_apptrace-0.5.3.dist-info → monocle_apptrace-0.6.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,6 +20,11 @@ from monocle_apptrace.instrumentation.common.utils import (
|
|
|
20
20
|
from monocle_apptrace.instrumentation.metamodel.finish_types import map_llamaindex_finish_reason_to_finish_type
|
|
21
21
|
|
|
22
22
|
LLAMAINDEX_AGENT_NAME_KEY = "_active_agent_name"
|
|
23
|
+
|
|
24
|
+
# Thread-local storage for current agent context
|
|
25
|
+
import threading
|
|
26
|
+
_thread_local = threading.local()
|
|
27
|
+
|
|
23
28
|
logger = logging.getLogger(__name__)
|
|
24
29
|
|
|
25
30
|
def get_status(result):
|
|
@@ -64,18 +69,18 @@ def get_tool_description(arguments):
|
|
|
64
69
|
return ""
|
|
65
70
|
|
|
66
71
|
def extract_tool_args(arguments):
|
|
67
|
-
tool_args =
|
|
72
|
+
tool_args = {}
|
|
68
73
|
if len(arguments['args']) > 1:
|
|
69
74
|
for key, value in arguments['args'][2].items():
|
|
70
75
|
# check if value is builtin type or a string
|
|
71
76
|
if value is not None and isinstance(value, (str, int, float, bool)):
|
|
72
|
-
tool_args
|
|
77
|
+
tool_args[key] = value
|
|
73
78
|
else:
|
|
74
79
|
for key, value in arguments['kwargs'].items():
|
|
75
80
|
# check if value is builtin type or a string
|
|
76
81
|
if value is not None and isinstance(value, (str, int, float, bool)):
|
|
77
|
-
tool_args
|
|
78
|
-
return
|
|
82
|
+
tool_args[key] = value
|
|
83
|
+
return get_json_dumps(tool_args)
|
|
79
84
|
|
|
80
85
|
def extract_tool_response(response):
|
|
81
86
|
if hasattr(response, 'raw_output'):
|
|
@@ -96,12 +101,27 @@ def get_agent_description(instance) -> str:
|
|
|
96
101
|
return instance.description
|
|
97
102
|
return ""
|
|
98
103
|
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
104
|
+
def get_name(instance):
|
|
105
|
+
return instance.name if hasattr(instance, 'name') else ""
|
|
106
|
+
|
|
107
|
+
def set_current_agent(agent_name: str):
|
|
108
|
+
"""Set the current agent name in thread-local storage."""
|
|
109
|
+
_thread_local.current_agent = agent_name
|
|
110
|
+
|
|
111
|
+
def get_current_agent() -> str:
|
|
112
|
+
"""Get the current agent name from thread-local storage."""
|
|
113
|
+
return getattr(_thread_local, 'current_agent', '')
|
|
114
|
+
|
|
115
|
+
def get_source_agent() -> str:
|
|
116
|
+
"""Get the name of the agent that initiated the request."""
|
|
117
|
+
source_agent = get_value(LLAMAINDEX_AGENT_NAME_KEY)
|
|
118
|
+
if source_agent is not None and isinstance(source_agent,str) and source_agent != "":
|
|
119
|
+
return source_agent
|
|
120
|
+
|
|
121
|
+
source_agent = get_current_agent()
|
|
122
|
+
if source_agent:
|
|
123
|
+
return source_agent
|
|
124
|
+
return ""
|
|
105
125
|
|
|
106
126
|
def get_target_agent(results) -> str:
|
|
107
127
|
if hasattr(results, 'raw_input'):
|
|
@@ -111,7 +111,7 @@ TOOLS = {
|
|
|
111
111
|
{
|
|
112
112
|
"_comment": "name of the agent",
|
|
113
113
|
"attribute": "name",
|
|
114
|
-
"accessor": lambda arguments: _helper.get_source_agent(
|
|
114
|
+
"accessor": lambda arguments: _helper.get_source_agent()
|
|
115
115
|
},
|
|
116
116
|
{
|
|
117
117
|
"_comment": "agent type",
|
|
@@ -157,12 +157,13 @@ AGENT_DELEGATION = {
|
|
|
157
157
|
{
|
|
158
158
|
"_comment": "name of the agent",
|
|
159
159
|
"attribute": "from_agent",
|
|
160
|
-
"accessor": lambda arguments: _helper.get_source_agent(
|
|
160
|
+
"accessor": lambda arguments: _helper.get_source_agent()
|
|
161
161
|
},
|
|
162
162
|
{
|
|
163
163
|
"_comment": "name of the agent called",
|
|
164
164
|
"attribute": "to_agent",
|
|
165
|
-
"accessor": lambda arguments: _helper.get_target_agent(arguments['result'])
|
|
165
|
+
"accessor": lambda arguments: _helper.get_target_agent(arguments['result']),
|
|
166
|
+
"phase": "post_execution"
|
|
166
167
|
}
|
|
167
168
|
]
|
|
168
169
|
]
|
|
@@ -2,7 +2,7 @@ from opentelemetry.context import attach, detach, get_current, get_value, set_va
|
|
|
2
2
|
from monocle_apptrace.instrumentation.common.constants import AGENT_PREFIX_KEY
|
|
3
3
|
from monocle_apptrace.instrumentation.common.span_handler import SpanHandler
|
|
4
4
|
from monocle_apptrace.instrumentation.metamodel.llamaindex._helper import (
|
|
5
|
-
is_delegation_tool, LLAMAINDEX_AGENT_NAME_KEY, get_agent_name
|
|
5
|
+
is_delegation_tool, LLAMAINDEX_AGENT_NAME_KEY, get_agent_name, get_name, set_current_agent
|
|
6
6
|
)
|
|
7
7
|
from monocle_apptrace.instrumentation.metamodel.llamaindex.entities.agent import (
|
|
8
8
|
AGENT_DELEGATION
|
|
@@ -13,14 +13,14 @@ TOOL_INVOCATION_STARTED:str = "llamaindex.tool_invocation_started"
|
|
|
13
13
|
class DelegationHandler(SpanHandler):
|
|
14
14
|
# LlamaIndex uses an internal tool to initate delegation to other agents. The method is tool invoke() with tool name as `transfer_to_<agent_name>`.
|
|
15
15
|
# Hence we usea different output processor for tool invoke() to format the span as agentic.delegation.
|
|
16
|
-
def hydrate_span(self, to_wrap, wrapped, instance, args, kwargs, result, span, parent_span = None, ex:Exception = None) -> bool:
|
|
16
|
+
def hydrate_span(self, to_wrap, wrapped, instance, args, kwargs, result, span, parent_span = None, ex:Exception = None, is_post_exec:bool= False) -> bool:
|
|
17
17
|
if is_delegation_tool(args, instance):
|
|
18
18
|
agent_request_wrapper = to_wrap.copy()
|
|
19
19
|
agent_request_wrapper["output_processor"] = AGENT_DELEGATION
|
|
20
20
|
else:
|
|
21
21
|
agent_request_wrapper = to_wrap
|
|
22
22
|
|
|
23
|
-
return super().hydrate_span(agent_request_wrapper, wrapped, instance, args, kwargs, result, span, parent_span, ex)
|
|
23
|
+
return super().hydrate_span(agent_request_wrapper, wrapped, instance, args, kwargs, result, span, parent_span, ex, is_post_exec)
|
|
24
24
|
|
|
25
25
|
# There are two different APIs for tool calling FunctionTool.call() and AgentWorkflow.tool_call(). In case of single agent calling tool, only the FunctionTool.call() is used. In case of multi agent case,
|
|
26
26
|
# the AgentWorkflow.tool_call() is used which inturn calls FunctionTool.call(). We can't entirely rely on the FunctionTool.call() to extract tool span details, especially the agent delegation details are not available there.
|
|
@@ -29,7 +29,10 @@ class LlamaIndexToolHandler(DelegationHandler):
|
|
|
29
29
|
def pre_tracing(self, to_wrap, wrapped, instance, args, kwargs):
|
|
30
30
|
cur_context = get_current()
|
|
31
31
|
cur_context = set_value(TOOL_INVOCATION_STARTED, True, cur_context)
|
|
32
|
-
|
|
32
|
+
current_agent = get_value(LLAMAINDEX_AGENT_NAME_KEY)
|
|
33
|
+
if current_agent is not None:
|
|
34
|
+
cur_context = set_value(LLAMAINDEX_AGENT_NAME_KEY, current_agent, cur_context)
|
|
35
|
+
return attach(cur_context), None
|
|
33
36
|
|
|
34
37
|
def post_tracing(self, to_wrap, wrapped, instance, args, kwargs, return_value, token=None):
|
|
35
38
|
if token:
|
|
@@ -44,8 +47,13 @@ class LlamaIndexSingleAgenttToolHandlerWrapper(DelegationHandler):
|
|
|
44
47
|
class LlamaIndexAgentHandler(SpanHandler):
|
|
45
48
|
def pre_tracing(self, to_wrap, wrapped, instance, args, kwargs):
|
|
46
49
|
cur_context = get_current()
|
|
50
|
+
agent_name = get_name(instance)
|
|
51
|
+
|
|
52
|
+
# Set both OpenTelemetry context and thread-local storage
|
|
53
|
+
set_current_agent(agent_name)
|
|
54
|
+
cur_context = set_value(LLAMAINDEX_AGENT_NAME_KEY, agent_name, cur_context)
|
|
47
55
|
cur_context = set_value(AGENT_PREFIX_KEY, "handoff", cur_context)
|
|
48
|
-
return attach(cur_context)
|
|
56
|
+
return attach(cur_context), None
|
|
49
57
|
|
|
50
58
|
def post_tracing(self, to_wrap, wrapped, instance, args, kwargs, return_value, token=None):
|
|
51
59
|
if token:
|
|
@@ -53,8 +61,8 @@ class LlamaIndexAgentHandler(SpanHandler):
|
|
|
53
61
|
|
|
54
62
|
# LlamaIndex uses direct OpenAI call for agent inferences. Given that the workflow type is set to llamaindex, the openAI inference does not record the input/output events.
|
|
55
63
|
# To avoid this, we set the workflow type to generic for agent inference spans so we can capture the prompts and responses.
|
|
56
|
-
def hydrate_span(self, to_wrap, wrapped, instance, args, kwargs, result, span, parent_span = None, ex:Exception = None) -> bool:
|
|
57
|
-
retval = super().hydrate_span(to_wrap, wrapped, instance, args, kwargs, result, span, parent_span, ex)
|
|
64
|
+
def hydrate_span(self, to_wrap, wrapped, instance, args, kwargs, result, span, parent_span = None, ex:Exception = None, is_post_exec:bool= False) -> bool:
|
|
65
|
+
retval = super().hydrate_span(to_wrap, wrapped, instance, args, kwargs, result, span, parent_span, ex, is_post_exec)
|
|
58
66
|
if SpanHandler.is_root_span(parent_span):
|
|
59
67
|
span.set_attribute(LLAMAINDEX_AGENT_NAME_KEY, "")
|
|
60
68
|
else:
|
|
@@ -89,17 +89,10 @@ LLAMAINDEX_METHODS = [
|
|
|
89
89
|
{
|
|
90
90
|
"package": "llama_index.core.agent",
|
|
91
91
|
"object": "ReActAgent",
|
|
92
|
-
"method": "
|
|
92
|
+
"method": "run",
|
|
93
93
|
"wrapper_method": task_wrapper,
|
|
94
94
|
"output_processor": AGENT
|
|
95
95
|
},
|
|
96
|
-
{
|
|
97
|
-
"package": "llama_index.core.agent",
|
|
98
|
-
"object": "ReActAgent",
|
|
99
|
-
"method": "achat",
|
|
100
|
-
"wrapper_method": atask_wrapper,
|
|
101
|
-
"output_processor": AGENT
|
|
102
|
-
},
|
|
103
96
|
{
|
|
104
97
|
"package": "llama_index.core.agent.workflow.function_agent",
|
|
105
98
|
"object": "FunctionAgent",
|
|
@@ -22,7 +22,7 @@ def get_output_text(arguments):
|
|
|
22
22
|
for tool in arguments["result"].tools:
|
|
23
23
|
if hasattr(tool, "name"):
|
|
24
24
|
tools.append(tool.name)
|
|
25
|
-
return tools
|
|
25
|
+
return ", ".join(tools)
|
|
26
26
|
if (
|
|
27
27
|
"result" in arguments
|
|
28
28
|
and hasattr(arguments["result"], "content")
|
|
@@ -32,12 +32,12 @@ def get_output_text(arguments):
|
|
|
32
32
|
for content in arguments["result"].content:
|
|
33
33
|
if hasattr(content, "text"):
|
|
34
34
|
ret_val.append(content.text)
|
|
35
|
-
return ret_val
|
|
35
|
+
return " ".join(ret_val)
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
def get_name(arguments):
|
|
39
39
|
"""Get the name of the tool from the instance."""
|
|
40
|
-
if 'parent_span' in arguments:
|
|
40
|
+
if 'parent_span' in arguments and arguments['parent_span']:
|
|
41
41
|
arguments['parent_span'].set_attribute("is_mcp", True)
|
|
42
42
|
args = arguments["args"]
|
|
43
43
|
if (
|
|
@@ -63,17 +63,18 @@ def get_params_arguments(arguments):
|
|
|
63
63
|
|
|
64
64
|
args = arguments["args"]
|
|
65
65
|
if (
|
|
66
|
-
|
|
66
|
+
args
|
|
67
67
|
and hasattr(args[0], "root")
|
|
68
68
|
and hasattr(args[0].root, "params")
|
|
69
69
|
and hasattr(args[0].root.params, "arguments")
|
|
70
70
|
):
|
|
71
71
|
# If the first argument has a root with params and arguments, return those arguments
|
|
72
72
|
try:
|
|
73
|
-
return json.dumps(args[0].root.params.arguments)
|
|
73
|
+
return [json.dumps(args[0].root.params.arguments)]
|
|
74
74
|
except (TypeError, ValueError) as e:
|
|
75
75
|
logger.error(f"Error serializing arguments: {e}")
|
|
76
|
-
return str(args[0].root.params.arguments)
|
|
76
|
+
return [str(args[0].root.params.arguments)]
|
|
77
|
+
return []
|
|
77
78
|
|
|
78
79
|
|
|
79
80
|
def get_url(arguments):
|
|
@@ -15,16 +15,43 @@ from monocle_apptrace.instrumentation.common.utils import (
|
|
|
15
15
|
try_option,
|
|
16
16
|
get_exception_message,
|
|
17
17
|
)
|
|
18
|
-
from monocle_apptrace.instrumentation.metamodel.finish_types import
|
|
18
|
+
from monocle_apptrace.instrumentation.metamodel.finish_types import map_mistral_finish_reason_to_finish_type
|
|
19
19
|
from monocle_apptrace.instrumentation.common.constants import AGENT_PREFIX_KEY, INFERENCE_AGENT_DELEGATION, INFERENCE_TURN_END, INFERENCE_TOOL_CALL
|
|
20
20
|
|
|
21
|
-
|
|
22
21
|
logger = logging.getLogger(__name__)
|
|
23
22
|
|
|
23
|
+
|
|
24
24
|
def extract_provider_name(instance):
|
|
25
25
|
provider_url: Option[str] = try_option(getattr, instance._client.base_url, 'host')
|
|
26
26
|
return provider_url.unwrap_or(None)
|
|
27
27
|
|
|
28
|
+
def update_input_span_events(kwargs):
|
|
29
|
+
"""Extract embedding input for spans"""
|
|
30
|
+
if "inputs" in kwargs and isinstance(kwargs["inputs"], list):
|
|
31
|
+
# Join multiple strings into one
|
|
32
|
+
return " | ".join(kwargs["inputs"])
|
|
33
|
+
elif "inputs" in kwargs and isinstance(kwargs["inputs"], str):
|
|
34
|
+
return kwargs["inputs"]
|
|
35
|
+
return ""
|
|
36
|
+
|
|
37
|
+
def update_output_span_events(results):
|
|
38
|
+
"""Extract embedding output for spans"""
|
|
39
|
+
try:
|
|
40
|
+
if hasattr(results, "data") and isinstance(results.data, list):
|
|
41
|
+
embeddings = results.data
|
|
42
|
+
# just return the indices, not full vectors
|
|
43
|
+
embedding_summaries = [
|
|
44
|
+
f"index={e.index}, dim={len(e.embedding)}"
|
|
45
|
+
for e in embeddings
|
|
46
|
+
]
|
|
47
|
+
output = "\n".join(embedding_summaries)
|
|
48
|
+
if len(output) > 200:
|
|
49
|
+
output = output[:200] + "..."
|
|
50
|
+
return output
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.warning("Error in update_output_span_events: %s", str(e))
|
|
53
|
+
return ""
|
|
54
|
+
|
|
28
55
|
def extract_inference_endpoint(instance):
|
|
29
56
|
inference_endpoint: Option[str] = try_option(getattr, instance._client, 'base_url').map(str)
|
|
30
57
|
if inference_endpoint.is_none() and "meta" in instance.client.__dict__:
|
|
@@ -32,16 +59,18 @@ def extract_inference_endpoint(instance):
|
|
|
32
59
|
|
|
33
60
|
return inference_endpoint.unwrap_or(extract_provider_name(instance))
|
|
34
61
|
|
|
35
|
-
|
|
62
|
+
|
|
63
|
+
def dummy_method(arguments):
|
|
36
64
|
pass
|
|
37
65
|
|
|
66
|
+
|
|
38
67
|
def extract_messages(kwargs):
|
|
39
68
|
"""Extract system and user messages"""
|
|
40
69
|
try:
|
|
41
70
|
messages = []
|
|
42
71
|
if "system" in kwargs and isinstance(kwargs["system"], str):
|
|
43
72
|
messages.append({"system": kwargs["system"]})
|
|
44
|
-
if 'messages' in kwargs and
|
|
73
|
+
if 'messages' in kwargs and kwargs['messages']:
|
|
45
74
|
for msg in kwargs['messages']:
|
|
46
75
|
if msg.get('content') and msg.get('role'):
|
|
47
76
|
messages.append({msg['role']: msg['content']})
|
|
@@ -50,6 +79,7 @@ def extract_messages(kwargs):
|
|
|
50
79
|
logger.warning("Warning: Error occurred in extract_messages: %s", str(e))
|
|
51
80
|
return []
|
|
52
81
|
|
|
82
|
+
|
|
53
83
|
def get_exception_status_code(arguments):
|
|
54
84
|
exc = arguments.get("exception")
|
|
55
85
|
if exc is not None and hasattr(exc, "status_code"):
|
|
@@ -73,7 +103,7 @@ def extract_assistant_message(arguments):
|
|
|
73
103
|
Returns a JSON string like {"assistant": "<text>"}.
|
|
74
104
|
"""
|
|
75
105
|
try:
|
|
76
|
-
result = arguments.get("result")
|
|
106
|
+
result = arguments.get("result") if isinstance(arguments, dict) else arguments
|
|
77
107
|
if result is None:
|
|
78
108
|
return ""
|
|
79
109
|
|
|
@@ -86,9 +116,10 @@ def extract_assistant_message(arguments):
|
|
|
86
116
|
if isinstance(result, list):
|
|
87
117
|
content = []
|
|
88
118
|
for chunk in result:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
119
|
+
if hasattr(chunk, "data") and hasattr(chunk.data, "choices") and chunk.data.choices:
|
|
120
|
+
choice = chunk.data.choices[0]
|
|
121
|
+
if hasattr(choice, "delta") and hasattr(choice.delta, "content"):
|
|
122
|
+
content.append(choice.delta.content or "")
|
|
92
123
|
return get_json_dumps({"assistant": "".join(content)})
|
|
93
124
|
|
|
94
125
|
return ""
|
|
@@ -98,77 +129,95 @@ def extract_assistant_message(arguments):
|
|
|
98
129
|
return ""
|
|
99
130
|
|
|
100
131
|
|
|
101
|
-
|
|
102
|
-
def update_span_from_llm_response(response):
|
|
132
|
+
'''def update_span_from_llm_response(response):
|
|
103
133
|
meta_dict = {}
|
|
104
134
|
if response is not None and hasattr(response, "usage"):
|
|
105
|
-
|
|
106
|
-
token_usage = response.usage
|
|
107
|
-
else:
|
|
108
|
-
response_metadata = response.response_metadata
|
|
109
|
-
token_usage = response_metadata.get("token_usage")
|
|
135
|
+
token_usage = getattr(response, "usage", None) or getattr(response, "response_metadata", {}).get("token_usage")
|
|
110
136
|
if token_usage is not None:
|
|
111
137
|
meta_dict.update({"completion_tokens": getattr(response.usage, "output_tokens", 0)})
|
|
112
138
|
meta_dict.update({"prompt_tokens": getattr(response.usage, "input_tokens", 0)})
|
|
113
|
-
meta_dict.update({"total_tokens": getattr(response.usage, "input_tokens", 0)+getattr(response.usage, "output_tokens", 0)})
|
|
114
|
-
return meta_dict
|
|
139
|
+
meta_dict.update({"total_tokens": getattr(response.usage, "input_tokens", 0) + getattr(response.usage, "output_tokens", 0)})
|
|
140
|
+
return meta_dict'''
|
|
141
|
+
|
|
142
|
+
def update_span_from_llm_response(result, include_token_counts=False):
|
|
143
|
+
tokens = {
|
|
144
|
+
"completion_tokens": getattr(result, "completion_tokens", 0),
|
|
145
|
+
"prompt_tokens": getattr(result, "prompt_tokens", 0),
|
|
146
|
+
"total_tokens": getattr(result, "total_tokens", 0),
|
|
147
|
+
} if include_token_counts else {}
|
|
148
|
+
# Add other metadata fields like finish_reason, etc.
|
|
149
|
+
return {**tokens}
|
|
150
|
+
|
|
115
151
|
|
|
116
152
|
def extract_finish_reason(arguments):
|
|
117
|
-
"""
|
|
153
|
+
"""
|
|
154
|
+
Extract stop_reason from a Mistral response or stream chunks.
|
|
155
|
+
Works for both streaming (list of chunks) and full responses.
|
|
156
|
+
"""
|
|
118
157
|
try:
|
|
119
|
-
# Arguments may be a dict with 'result' or just the response object
|
|
120
158
|
response = arguments.get("result") if isinstance(arguments, dict) else arguments
|
|
121
|
-
if response is
|
|
159
|
+
if response is None:
|
|
160
|
+
return None
|
|
161
|
+
|
|
162
|
+
# Handle full response: single object with stop_reason
|
|
163
|
+
if hasattr(response, "stop_reason") and response.stop_reason:
|
|
122
164
|
return response.stop_reason
|
|
165
|
+
|
|
166
|
+
# Handle streaming: list of chunks, last chunk may have finish_reason
|
|
167
|
+
if isinstance(response, list):
|
|
168
|
+
for chunk in reversed(response):
|
|
169
|
+
if hasattr(chunk, "data") and hasattr(chunk.data, "choices") and chunk.data.choices:
|
|
170
|
+
fr = getattr(chunk.data.choices[0], "finish_reason", None)
|
|
171
|
+
if fr is not None:
|
|
172
|
+
return fr
|
|
173
|
+
|
|
123
174
|
except Exception as e:
|
|
124
175
|
logger.warning("Warning: Error occurred in extract_finish_reason: %s", str(e))
|
|
125
176
|
return None
|
|
177
|
+
|
|
126
178
|
return None
|
|
127
179
|
|
|
180
|
+
|
|
128
181
|
def map_finish_reason_to_finish_type(finish_reason):
|
|
129
|
-
"""Map
|
|
130
|
-
return
|
|
182
|
+
"""Map Mistral stop_reason to finish_type, similar to OpenAI mapping."""
|
|
183
|
+
return map_mistral_finish_reason_to_finish_type(finish_reason)
|
|
184
|
+
|
|
131
185
|
|
|
132
186
|
def agent_inference_type(arguments):
|
|
133
|
-
"""Extract agent inference type from
|
|
187
|
+
"""Extract agent inference type from Mistral response"""
|
|
134
188
|
try:
|
|
135
189
|
status = get_status_code(arguments)
|
|
136
|
-
if status
|
|
137
|
-
response = arguments
|
|
138
|
-
|
|
190
|
+
if status in ('success', 'completed'):
|
|
191
|
+
response = arguments.get("result")
|
|
192
|
+
if response is None:
|
|
193
|
+
return INFERENCE_TURN_END
|
|
194
|
+
|
|
139
195
|
# Check if stop_reason indicates tool use
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
if
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
if agent_prefix and tool_name.startswith(agent_prefix):
|
|
150
|
-
return INFERENCE_AGENT_DELEGATION
|
|
151
|
-
# If we found tool use but no agent delegation, it's a regular tool call
|
|
152
|
-
return INFERENCE_TOOL_CALL
|
|
153
|
-
|
|
196
|
+
stop_reason = getattr(response, "stop_reason", None)
|
|
197
|
+
if stop_reason == "tool_use" and hasattr(response, "content") and response.content:
|
|
198
|
+
agent_prefix = get_value(AGENT_PREFIX_KEY)
|
|
199
|
+
for content_block in response.content:
|
|
200
|
+
if getattr(content_block, "type", None) == "tool_use" and hasattr(content_block, "name"):
|
|
201
|
+
if agent_prefix and content_block.name.startswith(agent_prefix):
|
|
202
|
+
return INFERENCE_AGENT_DELEGATION
|
|
203
|
+
return INFERENCE_TOOL_CALL
|
|
204
|
+
|
|
154
205
|
# Fallback: check the extracted message for tool content
|
|
155
206
|
assistant_message = extract_assistant_message(arguments)
|
|
156
207
|
if assistant_message:
|
|
157
208
|
try:
|
|
158
209
|
message = json.loads(assistant_message)
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if agent_prefix and agent_prefix in assistant_content:
|
|
164
|
-
return INFERENCE_AGENT_DELEGATION
|
|
210
|
+
assistant_content = message.get("assistant", "") if isinstance(message, dict) else ""
|
|
211
|
+
agent_prefix = get_value(AGENT_PREFIX_KEY)
|
|
212
|
+
if agent_prefix and agent_prefix in assistant_content:
|
|
213
|
+
return INFERENCE_AGENT_DELEGATION
|
|
165
214
|
except (json.JSONDecodeError, TypeError):
|
|
166
|
-
# If JSON parsing fails, fall back to string analysis
|
|
167
215
|
agent_prefix = get_value(AGENT_PREFIX_KEY)
|
|
168
216
|
if agent_prefix and agent_prefix in assistant_message:
|
|
169
217
|
return INFERENCE_AGENT_DELEGATION
|
|
170
|
-
|
|
218
|
+
|
|
171
219
|
return INFERENCE_TURN_END
|
|
220
|
+
|
|
172
221
|
except Exception as e:
|
|
173
222
|
logger.warning("Warning: Error occurred in agent_inference_type: %s", str(e))
|
|
174
|
-
return INFERENCE_TURN_END
|
|
223
|
+
return INFERENCE_TURN_END
|
|
@@ -4,13 +4,13 @@ from monocle_apptrace.instrumentation.common.utils import get_error_message, res
|
|
|
4
4
|
|
|
5
5
|
MISTRAL_INFERENCE = {
|
|
6
6
|
"type": SPAN_TYPES.INFERENCE,
|
|
7
|
+
"subtype": lambda arguments: _helper.agent_inference_type(arguments),
|
|
7
8
|
"attributes": [
|
|
8
9
|
[
|
|
9
10
|
{
|
|
10
11
|
"_comment": "provider type ,name , deployment , inference_endpoint",
|
|
11
12
|
"attribute": "type",
|
|
12
13
|
"accessor": lambda arguments: 'inference.mistral'
|
|
13
|
-
|
|
14
14
|
},
|
|
15
15
|
{
|
|
16
16
|
"attribute": "provider_name",
|
|
@@ -52,9 +52,16 @@ MISTRAL_INFERENCE = {
|
|
|
52
52
|
"accessor": lambda arguments: get_error_message(arguments)
|
|
53
53
|
},
|
|
54
54
|
{
|
|
55
|
-
"_comment": "this is result from LLM",
|
|
55
|
+
"_comment": "this is result from LLM, works for streaming and non-streaming",
|
|
56
56
|
"attribute": "response",
|
|
57
|
-
"accessor": lambda arguments:
|
|
57
|
+
"accessor": lambda arguments: (
|
|
58
|
+
# Handle streaming: combine chunks if result is iterable and doesn't have 'choices'
|
|
59
|
+
_helper.extract_assistant_message(
|
|
60
|
+
{"result": list(arguments["result"])}
|
|
61
|
+
if hasattr(arguments.get("result"), "__iter__") and not hasattr(arguments.get("result"), "choices")
|
|
62
|
+
else arguments
|
|
63
|
+
)
|
|
64
|
+
)
|
|
58
65
|
}
|
|
59
66
|
]
|
|
60
67
|
},
|
|
@@ -62,8 +69,11 @@ MISTRAL_INFERENCE = {
|
|
|
62
69
|
"name": "metadata",
|
|
63
70
|
"attributes": [
|
|
64
71
|
{
|
|
65
|
-
"_comment": "this is metadata usage from LLM",
|
|
66
|
-
"accessor": lambda arguments: _helper.update_span_from_llm_response(
|
|
72
|
+
"_comment": "this is metadata usage from LLM, includes token counts",
|
|
73
|
+
"accessor": lambda arguments: _helper.update_span_from_llm_response(
|
|
74
|
+
arguments.get("result"),
|
|
75
|
+
include_token_counts=True # new flag for streaming handling
|
|
76
|
+
)
|
|
67
77
|
},
|
|
68
78
|
{
|
|
69
79
|
"_comment": "finish reason from Anthropic response",
|
|
@@ -74,10 +84,6 @@ MISTRAL_INFERENCE = {
|
|
|
74
84
|
"_comment": "finish type mapped from finish reason",
|
|
75
85
|
"attribute": "finish_type",
|
|
76
86
|
"accessor": lambda arguments: _helper.map_finish_reason_to_finish_type(_helper.extract_finish_reason(arguments))
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"attribute": "inference_sub_type",
|
|
80
|
-
"accessor": lambda arguments: _helper.agent_inference_type(arguments)
|
|
81
87
|
}
|
|
82
88
|
]
|
|
83
89
|
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from monocle_apptrace.instrumentation.metamodel.mistral import _helper
|
|
2
|
+
from monocle_apptrace.instrumentation.common.utils import resolve_from_alias
|
|
3
|
+
|
|
4
|
+
MISTRAL_RETRIEVAL = {
|
|
5
|
+
"type": "embedding",
|
|
6
|
+
"attributes": [
|
|
7
|
+
[
|
|
8
|
+
{
|
|
9
|
+
"_comment": "LLM Model",
|
|
10
|
+
"attribute": "name",
|
|
11
|
+
"accessor": lambda arguments: resolve_from_alias(arguments['kwargs'], ['model'])
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"attribute": "type",
|
|
15
|
+
"accessor": lambda arguments: 'model.embedding.' + resolve_from_alias(arguments['kwargs'], ['model'])
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
],
|
|
19
|
+
"events": [
|
|
20
|
+
{
|
|
21
|
+
"name": "data.input",
|
|
22
|
+
"attributes": [
|
|
23
|
+
{
|
|
24
|
+
"_comment": "embedding input",
|
|
25
|
+
"attribute": "input",
|
|
26
|
+
"accessor": lambda arguments: _helper.update_input_span_events(arguments["kwargs"])
|
|
27
|
+
}
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"name": "data.output",
|
|
32
|
+
"attributes": [
|
|
33
|
+
{
|
|
34
|
+
"_comment": "embedding output summary",
|
|
35
|
+
"attribute": "response",
|
|
36
|
+
"accessor": lambda arguments: _helper.update_output_span_events(arguments["result"])
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from monocle_apptrace.instrumentation.common.wrapper import task_wrapper, atask_wrapper
|
|
2
2
|
from monocle_apptrace.instrumentation.metamodel.mistral.entities.inference import MISTRAL_INFERENCE
|
|
3
|
+
from monocle_apptrace.instrumentation.metamodel.mistral.entities.retrieval import MISTRAL_RETRIEVAL
|
|
3
4
|
|
|
4
5
|
MISTRAL_METHODS = [
|
|
5
6
|
{
|
|
@@ -33,7 +34,15 @@ MISTRAL_METHODS = [
|
|
|
33
34
|
"span_handler": "non_framework_handler",
|
|
34
35
|
"wrapper_method": atask_wrapper,
|
|
35
36
|
"output_processor": MISTRAL_INFERENCE,
|
|
36
|
-
}
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"package": "mistralai.embeddings", # where Embeddings is defined
|
|
40
|
+
"object": "Embeddings", # sync embeddings client
|
|
41
|
+
"method": "create", # sync create
|
|
42
|
+
"span_handler": "non_framework_handler",
|
|
43
|
+
"wrapper_method": task_wrapper,
|
|
44
|
+
"output_processor": MISTRAL_RETRIEVAL
|
|
45
|
+
},
|
|
37
46
|
]
|
|
38
47
|
|
|
39
48
|
|