holmesgpt 0.13.3a0__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +15 -4
- holmes/common/env_vars.py +8 -1
- holmes/config.py +66 -139
- holmes/core/investigation.py +1 -2
- holmes/core/llm.py +295 -52
- holmes/core/models.py +2 -0
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +14 -8
- holmes/core/tool_calling_llm.py +202 -177
- holmes/core/tools.py +260 -25
- holmes/core/tools_utils/data_types.py +81 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
- holmes/core/tools_utils/tool_executor.py +2 -2
- holmes/core/toolset_manager.py +150 -3
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/main.py +5 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +345 -207
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +96 -32
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +21 -22
- holmes/plugins/toolsets/git.py +22 -22
- holmes/plugins/toolsets/grafana/common.py +14 -2
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +473 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +662 -290
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +3 -3
- holmes/plugins/toolsets/internet/notion.py +3 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/kafka.py +18 -18
- holmes/plugins/toolsets/kubernetes.yaml +58 -0
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
- holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
- holmes/plugins/toolsets/newrelic.py +8 -8
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
- holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
- holmes/plugins/toolsets/robusta/robusta.py +10 -10
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
- holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/env.py +7 -0
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +9 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +11 -15
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +85 -75
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,7 +2,7 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
from typing import Dict, List, Optional, Type, Union, Callable
|
|
5
|
+
from typing import Dict, List, Optional, Type, Union, Callable, Any
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
import sentry_sdk
|
|
@@ -27,19 +27,28 @@ from holmes.core.investigation_structured_output import (
|
|
|
27
27
|
is_response_an_incorrect_tool_call,
|
|
28
28
|
)
|
|
29
29
|
from holmes.core.issue import Issue
|
|
30
|
-
from holmes.core.llm import LLM
|
|
30
|
+
from holmes.core.llm import LLM, get_llm_usage
|
|
31
31
|
from holmes.core.performance_timing import PerformanceTiming
|
|
32
32
|
from holmes.core.resource_instruction import ResourceInstructions
|
|
33
33
|
from holmes.core.runbooks import RunbookManager
|
|
34
34
|
from holmes.core.safeguards import prevent_overly_repeated_tool_call
|
|
35
|
-
from holmes.core.tools import StructuredToolResult,
|
|
35
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
36
|
+
from holmes.core.tools_utils.tool_context_window_limiter import (
|
|
37
|
+
prevent_overly_big_tool_response,
|
|
38
|
+
)
|
|
36
39
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
40
|
+
from holmes.utils import sentry_helper
|
|
37
41
|
from holmes.utils.global_instructions import (
|
|
38
42
|
Instructions,
|
|
39
43
|
add_global_instructions_to_user_prompt,
|
|
40
44
|
)
|
|
41
45
|
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
42
46
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
47
|
+
from holmes.core.tools_utils.data_types import (
|
|
48
|
+
TruncationResult,
|
|
49
|
+
ToolCallResult,
|
|
50
|
+
TruncationMetadata,
|
|
51
|
+
)
|
|
43
52
|
from holmes.core.tracing import DummySpan
|
|
44
53
|
from holmes.utils.colors import AI_COLOR
|
|
45
54
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
@@ -48,6 +57,9 @@ from holmes.utils.stream import StreamEvents, StreamMessage
|
|
|
48
57
|
cost_logger = logging.getLogger("holmes.costs")
|
|
49
58
|
|
|
50
59
|
|
|
60
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
61
|
+
|
|
62
|
+
|
|
51
63
|
class LLMCosts(BaseModel):
|
|
52
64
|
"""Tracks cost and token usage for LLM calls."""
|
|
53
65
|
|
|
@@ -119,23 +131,6 @@ def _process_cost_info(
|
|
|
119
131
|
logging.debug(f"Could not extract cost information: {e}")
|
|
120
132
|
|
|
121
133
|
|
|
122
|
-
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
123
|
-
tool_response = tool_result.data
|
|
124
|
-
if isinstance(tool_result.data, str):
|
|
125
|
-
tool_response = tool_result.data
|
|
126
|
-
else:
|
|
127
|
-
try:
|
|
128
|
-
if isinstance(tool_result.data, BaseModel):
|
|
129
|
-
tool_response = tool_result.data.model_dump_json(indent=2)
|
|
130
|
-
else:
|
|
131
|
-
tool_response = json.dumps(tool_result.data, indent=2)
|
|
132
|
-
except Exception:
|
|
133
|
-
tool_response = str(tool_result.data)
|
|
134
|
-
if tool_result.status == ToolResultStatus.ERROR:
|
|
135
|
-
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
|
|
136
|
-
return tool_response
|
|
137
|
-
|
|
138
|
-
|
|
139
134
|
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
140
135
|
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
141
136
|
# We should fix this in the future
|
|
@@ -143,7 +138,7 @@ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
|
143
138
|
# token truncation and not character truncation
|
|
144
139
|
def truncate_messages_to_fit_context(
|
|
145
140
|
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
146
|
-
) ->
|
|
141
|
+
) -> TruncationResult:
|
|
147
142
|
"""
|
|
148
143
|
Helper function to truncate tool messages to fit within context limits.
|
|
149
144
|
|
|
@@ -176,13 +171,17 @@ def truncate_messages_to_fit_context(
|
|
|
176
171
|
)
|
|
177
172
|
|
|
178
173
|
if len(tool_call_messages) == 0:
|
|
179
|
-
return messages
|
|
174
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
180
175
|
|
|
181
176
|
available_space = (
|
|
182
|
-
max_context_size - message_size_without_tools -
|
|
177
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
183
178
|
)
|
|
184
179
|
remaining_space = available_space
|
|
185
|
-
tool_call_messages.sort(
|
|
180
|
+
tool_call_messages.sort(
|
|
181
|
+
key=lambda x: count_tokens_fn([{"role": "tool", "content": x["content"]}])
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
truncations = []
|
|
186
185
|
|
|
187
186
|
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
188
187
|
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
@@ -190,75 +189,49 @@ def truncate_messages_to_fit_context(
|
|
|
190
189
|
for i, msg in enumerate(tool_call_messages):
|
|
191
190
|
remaining_tools = len(tool_call_messages) - i
|
|
192
191
|
max_allocation = remaining_space // remaining_tools
|
|
193
|
-
needed_space =
|
|
192
|
+
needed_space = count_tokens_fn([{"role": "tool", "content": msg["content"]}])
|
|
194
193
|
allocated_space = min(needed_space, max_allocation)
|
|
195
194
|
|
|
196
195
|
if needed_space > allocated_space:
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
msg["content"][: allocated_space - len(truncation_notice)]
|
|
202
|
-
+ truncation_notice
|
|
203
|
-
)
|
|
204
|
-
logging.info(
|
|
205
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space-len(truncation_notice)} tokens"
|
|
206
|
-
)
|
|
207
|
-
else:
|
|
208
|
-
msg["content"] = truncation_notice[:allocated_space]
|
|
209
|
-
logging.info(
|
|
210
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space} tokens"
|
|
211
|
-
)
|
|
212
|
-
msg.pop("token_count", None) # Remove token_count if present
|
|
196
|
+
truncation_metadata = _truncate_tool_message(
|
|
197
|
+
msg, allocated_space, needed_space
|
|
198
|
+
)
|
|
199
|
+
truncations.append(truncation_metadata)
|
|
213
200
|
|
|
214
201
|
remaining_space -= allocated_space
|
|
215
|
-
return messages
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
if
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"result": result_dump,
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
def as_streaming_tool_result_response(self):
|
|
252
|
-
result_dump = self.result.model_dump()
|
|
253
|
-
result_dump["data"] = self.result.get_stringified_data()
|
|
254
|
-
|
|
255
|
-
return {
|
|
256
|
-
"tool_call_id": self.tool_call_id,
|
|
257
|
-
"role": "tool",
|
|
258
|
-
"description": self.description,
|
|
259
|
-
"name": self.tool_name,
|
|
260
|
-
"result": result_dump,
|
|
261
|
-
}
|
|
202
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _truncate_tool_message(
|
|
206
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
207
|
+
) -> TruncationMetadata:
|
|
208
|
+
msg_content = msg["content"]
|
|
209
|
+
tool_call_id = msg["tool_call_id"]
|
|
210
|
+
tool_name = msg["name"]
|
|
211
|
+
|
|
212
|
+
# Ensure the indicator fits in the allocated space
|
|
213
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
214
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
215
|
+
msg["content"] = (
|
|
216
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
217
|
+
)
|
|
218
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
219
|
+
else:
|
|
220
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
221
|
+
end_index = allocated_space
|
|
222
|
+
|
|
223
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
224
|
+
logging.info(
|
|
225
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
226
|
+
)
|
|
227
|
+
truncation_metadata = TruncationMetadata(
|
|
228
|
+
tool_call_id=tool_call_id,
|
|
229
|
+
start_index=0,
|
|
230
|
+
end_index=end_index,
|
|
231
|
+
tool_name=tool_name,
|
|
232
|
+
original_token_count=needed_space,
|
|
233
|
+
)
|
|
234
|
+
return truncation_metadata
|
|
262
235
|
|
|
263
236
|
|
|
264
237
|
class LLMResult(LLMCosts):
|
|
@@ -269,6 +242,7 @@ class LLMResult(LLMCosts):
|
|
|
269
242
|
# TODO: clean up these two
|
|
270
243
|
prompt: Optional[str] = None
|
|
271
244
|
messages: Optional[List[dict]] = None
|
|
245
|
+
metadata: Optional[Dict[Any, Any]] = None
|
|
272
246
|
|
|
273
247
|
def get_tool_usage_summary(self):
|
|
274
248
|
return "AI used info from issue and " + ",".join(
|
|
@@ -344,7 +318,7 @@ class ToolCallingLLM:
|
|
|
344
318
|
perf_timing.measure("get_all_tools_openai_format")
|
|
345
319
|
max_steps = self.max_steps
|
|
346
320
|
i = 0
|
|
347
|
-
|
|
321
|
+
metadata: Dict[Any, Any] = {}
|
|
348
322
|
while i < max_steps:
|
|
349
323
|
i += 1
|
|
350
324
|
perf_timing.measure(f"start iteration {i}")
|
|
@@ -360,9 +334,13 @@ class ToolCallingLLM:
|
|
|
360
334
|
|
|
361
335
|
if (total_tokens + maximum_output_token) > max_context_size:
|
|
362
336
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
363
|
-
|
|
337
|
+
truncated_res = self.truncate_messages_to_fit_context(
|
|
364
338
|
messages, max_context_size, maximum_output_token
|
|
365
339
|
)
|
|
340
|
+
metadata["truncations"] = [
|
|
341
|
+
t.model_dump() for t in truncated_res.truncations
|
|
342
|
+
]
|
|
343
|
+
messages = truncated_res.truncated_messages
|
|
366
344
|
perf_timing.measure("truncate_messages_to_fit_context")
|
|
367
345
|
|
|
368
346
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
@@ -408,6 +386,7 @@ class ToolCallingLLM:
|
|
|
408
386
|
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
409
387
|
)
|
|
410
388
|
# disable structured output going forward and and retry
|
|
389
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
411
390
|
response_format = None
|
|
412
391
|
max_steps = max_steps + 1
|
|
413
392
|
continue
|
|
@@ -443,7 +422,11 @@ class ToolCallingLLM:
|
|
|
443
422
|
)
|
|
444
423
|
costs.total_cost += post_processing_cost
|
|
445
424
|
|
|
425
|
+
self.llm.count_tokens_for_message(messages)
|
|
446
426
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
427
|
+
metadata["usage"] = get_llm_usage(full_response)
|
|
428
|
+
metadata["max_tokens"] = max_context_size
|
|
429
|
+
metadata["max_output_tokens"] = maximum_output_token
|
|
447
430
|
return LLMResult(
|
|
448
431
|
result=post_processed_response,
|
|
449
432
|
unprocessed_result=raw_response,
|
|
@@ -451,6 +434,7 @@ class ToolCallingLLM:
|
|
|
451
434
|
prompt=json.dumps(messages, indent=2),
|
|
452
435
|
messages=messages,
|
|
453
436
|
**costs.model_dump(), # Include all cost fields
|
|
437
|
+
metadata=metadata,
|
|
454
438
|
)
|
|
455
439
|
|
|
456
440
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
@@ -460,6 +444,7 @@ class ToolCallingLLM:
|
|
|
460
444
|
prompt=json.dumps(messages, indent=2),
|
|
461
445
|
messages=messages,
|
|
462
446
|
**costs.model_dump(), # Include all cost fields
|
|
447
|
+
metadata=metadata,
|
|
463
448
|
)
|
|
464
449
|
|
|
465
450
|
if text_response and text_response.strip():
|
|
@@ -495,9 +480,19 @@ class ToolCallingLLM:
|
|
|
495
480
|
if future in futures_tool_numbers
|
|
496
481
|
else None
|
|
497
482
|
)
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
483
|
+
|
|
484
|
+
if (
|
|
485
|
+
tool_call_result.result.status
|
|
486
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
487
|
+
):
|
|
488
|
+
with trace_span.start_span(type="tool") as tool_span:
|
|
489
|
+
tool_call_result = self._handle_tool_call_approval(
|
|
490
|
+
tool_call_result=tool_call_result,
|
|
491
|
+
tool_number=tool_number,
|
|
492
|
+
)
|
|
493
|
+
ToolCallingLLM._log_tool_call_result(
|
|
494
|
+
tool_span, tool_call_result
|
|
495
|
+
)
|
|
501
496
|
|
|
502
497
|
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
503
498
|
messages.append(tool_call_result.as_tool_call_message())
|
|
@@ -513,91 +508,47 @@ class ToolCallingLLM:
|
|
|
513
508
|
|
|
514
509
|
raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
|
|
515
510
|
|
|
516
|
-
def
|
|
511
|
+
def _directly_invoke_tool_call(
|
|
517
512
|
self,
|
|
518
513
|
tool_name: str,
|
|
519
514
|
tool_params: dict,
|
|
520
515
|
user_approved: bool,
|
|
521
|
-
trace_span=DummySpan(),
|
|
522
516
|
tool_number: Optional[int] = None,
|
|
523
517
|
) -> StructuredToolResult:
|
|
524
|
-
tool_span = trace_span.start_span(name=tool_name, type="tool")
|
|
525
518
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
526
|
-
|
|
519
|
+
if not tool:
|
|
520
|
+
logging.warning(
|
|
521
|
+
f"Skipping tool execution for {tool_name}: args: {tool_params}"
|
|
522
|
+
)
|
|
523
|
+
return StructuredToolResult(
|
|
524
|
+
status=StructuredToolResultStatus.ERROR,
|
|
525
|
+
error=f"Failed to find tool {tool_name}",
|
|
526
|
+
params=tool_params,
|
|
527
|
+
)
|
|
528
|
+
|
|
527
529
|
try:
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
)
|
|
532
|
-
tool_response = StructuredToolResult(
|
|
533
|
-
status=ToolResultStatus.ERROR,
|
|
534
|
-
error=f"Failed to find tool {tool_name}",
|
|
535
|
-
params=tool_params,
|
|
536
|
-
)
|
|
537
|
-
else:
|
|
538
|
-
tool_response = tool.invoke(
|
|
539
|
-
tool_params, tool_number=tool_number, user_approved=user_approved
|
|
540
|
-
)
|
|
530
|
+
tool_response = tool.invoke(
|
|
531
|
+
tool_params, tool_number=tool_number, user_approved=user_approved
|
|
532
|
+
)
|
|
541
533
|
except Exception as e:
|
|
542
534
|
logging.error(
|
|
543
535
|
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
544
536
|
)
|
|
545
537
|
tool_response = StructuredToolResult(
|
|
546
|
-
status=
|
|
538
|
+
status=StructuredToolResultStatus.ERROR,
|
|
547
539
|
error=f"Tool call failed: {e}",
|
|
548
540
|
params=tool_params,
|
|
549
541
|
)
|
|
550
|
-
|
|
551
|
-
# Log error to trace span
|
|
552
|
-
tool_span.log(
|
|
553
|
-
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
tool_span.log(
|
|
557
|
-
input=tool_params,
|
|
558
|
-
output=tool_response.data,
|
|
559
|
-
metadata={
|
|
560
|
-
"status": tool_response.status.value,
|
|
561
|
-
"error": tool_response.error,
|
|
562
|
-
"description": tool.get_parameterized_one_liner(tool_params)
|
|
563
|
-
if tool
|
|
564
|
-
else "",
|
|
565
|
-
"structured_tool_result": tool_response,
|
|
566
|
-
},
|
|
567
|
-
)
|
|
568
|
-
tool_span.end()
|
|
569
|
-
|
|
570
542
|
return tool_response
|
|
571
543
|
|
|
572
|
-
def
|
|
544
|
+
def _get_tool_call_result(
|
|
573
545
|
self,
|
|
574
|
-
|
|
546
|
+
tool_call_id: str,
|
|
547
|
+
tool_name: str,
|
|
548
|
+
tool_arguments: str,
|
|
575
549
|
previous_tool_calls: list[dict],
|
|
576
|
-
|
|
577
|
-
tool_number=None,
|
|
550
|
+
tool_number: Optional[int] = None,
|
|
578
551
|
) -> ToolCallResult:
|
|
579
|
-
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
580
|
-
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
581
|
-
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
582
|
-
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
583
|
-
# and doesn't require importing the specific type.
|
|
584
|
-
if hasattr(tool_to_call, "function"):
|
|
585
|
-
tool_name = tool_to_call.function.name
|
|
586
|
-
tool_arguments = tool_to_call.function.arguments
|
|
587
|
-
else:
|
|
588
|
-
# This is a custom tool call - we don't support these currently
|
|
589
|
-
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
590
|
-
return ToolCallResult(
|
|
591
|
-
tool_call_id=tool_to_call.id,
|
|
592
|
-
tool_name="unknown",
|
|
593
|
-
description="NA",
|
|
594
|
-
result=StructuredToolResult(
|
|
595
|
-
status=ToolResultStatus.ERROR,
|
|
596
|
-
error="Custom tool calls are not supported",
|
|
597
|
-
params=None,
|
|
598
|
-
),
|
|
599
|
-
)
|
|
600
|
-
|
|
601
552
|
tool_params = {}
|
|
602
553
|
try:
|
|
603
554
|
tool_params = json.loads(tool_arguments)
|
|
@@ -606,8 +557,6 @@ class ToolCallingLLM:
|
|
|
606
557
|
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
607
558
|
)
|
|
608
559
|
|
|
609
|
-
tool_call_id = tool_to_call.id
|
|
610
|
-
|
|
611
560
|
tool_response = prevent_overly_repeated_tool_call(
|
|
612
561
|
tool_name=tool_name,
|
|
613
562
|
tool_params=tool_params,
|
|
@@ -615,11 +564,10 @@ class ToolCallingLLM:
|
|
|
615
564
|
)
|
|
616
565
|
|
|
617
566
|
if not tool_response:
|
|
618
|
-
tool_response = self.
|
|
567
|
+
tool_response = self._directly_invoke_tool_call(
|
|
619
568
|
tool_name=tool_name,
|
|
620
569
|
tool_params=tool_params,
|
|
621
570
|
user_approved=False,
|
|
622
|
-
trace_span=trace_span,
|
|
623
571
|
tool_number=tool_number,
|
|
624
572
|
)
|
|
625
573
|
|
|
@@ -629,12 +577,13 @@ class ToolCallingLLM:
|
|
|
629
577
|
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
630
578
|
)
|
|
631
579
|
tool_response = StructuredToolResult(
|
|
632
|
-
status=
|
|
580
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
633
581
|
data=tool_response,
|
|
634
582
|
params=tool_params,
|
|
635
583
|
)
|
|
636
584
|
|
|
637
585
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
586
|
+
|
|
638
587
|
return ToolCallResult(
|
|
639
588
|
tool_call_id=tool_call_id,
|
|
640
589
|
tool_name=tool_name,
|
|
@@ -642,25 +591,85 @@ class ToolCallingLLM:
|
|
|
642
591
|
result=tool_response,
|
|
643
592
|
)
|
|
644
593
|
|
|
645
|
-
|
|
646
|
-
|
|
594
|
+
@staticmethod
|
|
595
|
+
def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
|
|
596
|
+
tool_span.set_attributes(name=tool_call_result.tool_name)
|
|
597
|
+
tool_span.log(
|
|
598
|
+
input=tool_call_result.result.params,
|
|
599
|
+
output=tool_call_result.result.data,
|
|
600
|
+
error=tool_call_result.result.error,
|
|
601
|
+
metadata={
|
|
602
|
+
"status": tool_call_result.result.status,
|
|
603
|
+
"description": tool_call_result.description,
|
|
604
|
+
},
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
def _invoke_llm_tool_call(
|
|
608
|
+
self,
|
|
609
|
+
tool_to_call: ChatCompletionMessageToolCall,
|
|
610
|
+
previous_tool_calls: list[dict],
|
|
611
|
+
trace_span=None,
|
|
612
|
+
tool_number=None,
|
|
613
|
+
) -> ToolCallResult:
|
|
614
|
+
if trace_span is None:
|
|
615
|
+
trace_span = DummySpan()
|
|
616
|
+
with trace_span.start_span(type="tool") as tool_span:
|
|
617
|
+
if not hasattr(tool_to_call, "function"):
|
|
618
|
+
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
619
|
+
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
620
|
+
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
621
|
+
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
622
|
+
# and doesn't require importing the specific type.
|
|
623
|
+
tool_name = "Unknown_Custom_Tool"
|
|
624
|
+
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
625
|
+
tool_call_result = ToolCallResult(
|
|
626
|
+
tool_call_id=tool_to_call.id,
|
|
627
|
+
tool_name=tool_name,
|
|
628
|
+
description="NA",
|
|
629
|
+
result=StructuredToolResult(
|
|
630
|
+
status=StructuredToolResultStatus.ERROR,
|
|
631
|
+
error="Custom tool calls are not supported",
|
|
632
|
+
params=None,
|
|
633
|
+
),
|
|
634
|
+
)
|
|
635
|
+
else:
|
|
636
|
+
tool_name = tool_to_call.function.name
|
|
637
|
+
tool_arguments = tool_to_call.function.arguments
|
|
638
|
+
tool_id = tool_to_call.id
|
|
639
|
+
tool_call_result = self._get_tool_call_result(
|
|
640
|
+
tool_id,
|
|
641
|
+
tool_name,
|
|
642
|
+
tool_arguments,
|
|
643
|
+
previous_tool_calls=previous_tool_calls,
|
|
644
|
+
tool_number=tool_number,
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
prevent_overly_big_tool_response(
|
|
648
|
+
tool_call_result=tool_call_result, llm=self.llm
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
|
|
652
|
+
return tool_call_result
|
|
653
|
+
|
|
654
|
+
def _handle_tool_call_approval(
|
|
655
|
+
self,
|
|
656
|
+
tool_call_result: ToolCallResult,
|
|
657
|
+
tool_number: Optional[int],
|
|
647
658
|
) -> ToolCallResult:
|
|
648
659
|
"""
|
|
649
660
|
Handle approval for a single tool call if required.
|
|
650
661
|
|
|
651
662
|
Args:
|
|
652
663
|
tool_call_result: A single tool call result that may require approval
|
|
664
|
+
tool_number: The tool call number
|
|
653
665
|
|
|
654
666
|
Returns:
|
|
655
667
|
Updated tool call result with approved/denied status
|
|
656
668
|
"""
|
|
657
669
|
|
|
658
|
-
if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
|
|
659
|
-
return tool_call_result
|
|
660
|
-
|
|
661
670
|
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
|
|
662
671
|
if not self.approval_callback:
|
|
663
|
-
tool_call_result.result.status =
|
|
672
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
664
673
|
return tool_call_result
|
|
665
674
|
|
|
666
675
|
# Get approval from user
|
|
@@ -670,19 +679,17 @@ class ToolCallingLLM:
|
|
|
670
679
|
logging.debug(
|
|
671
680
|
f"User approved command: {tool_call_result.result.invocation}"
|
|
672
681
|
)
|
|
673
|
-
|
|
674
|
-
new_response = self._directly_invoke_tool(
|
|
682
|
+
new_response = self._directly_invoke_tool_call(
|
|
675
683
|
tool_name=tool_call_result.tool_name,
|
|
676
684
|
tool_params=tool_call_result.result.params or {},
|
|
677
685
|
user_approved=True,
|
|
678
|
-
trace_span=DummySpan(),
|
|
679
686
|
tool_number=tool_number,
|
|
680
687
|
)
|
|
681
688
|
tool_call_result.result = new_response
|
|
682
689
|
else:
|
|
683
690
|
# User denied - update to error
|
|
684
691
|
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
685
|
-
tool_call_result.result.status =
|
|
692
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
686
693
|
tool_call_result.result.error = (
|
|
687
694
|
f"User denied command execution.{feedback_text}"
|
|
688
695
|
)
|
|
@@ -740,13 +747,16 @@ class ToolCallingLLM:
|
|
|
740
747
|
@sentry_sdk.trace
|
|
741
748
|
def truncate_messages_to_fit_context(
|
|
742
749
|
self, messages: list, max_context_size: int, maximum_output_token: int
|
|
743
|
-
) ->
|
|
744
|
-
|
|
750
|
+
) -> TruncationResult:
|
|
751
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
745
752
|
messages,
|
|
746
753
|
max_context_size,
|
|
747
754
|
maximum_output_token,
|
|
748
755
|
self.llm.count_tokens_for_message,
|
|
749
756
|
)
|
|
757
|
+
if truncated_res.truncations:
|
|
758
|
+
sentry_helper.capture_tool_truncations(truncated_res.truncations)
|
|
759
|
+
return truncated_res
|
|
750
760
|
|
|
751
761
|
def call_stream(
|
|
752
762
|
self,
|
|
@@ -774,6 +784,7 @@ class ToolCallingLLM:
|
|
|
774
784
|
)
|
|
775
785
|
perf_timing.measure("get_all_tools_openai_format")
|
|
776
786
|
max_steps = self.max_steps
|
|
787
|
+
metadata: Dict[Any, Any] = {}
|
|
777
788
|
i = 0
|
|
778
789
|
tool_number_offset = 0
|
|
779
790
|
|
|
@@ -792,10 +803,16 @@ class ToolCallingLLM:
|
|
|
792
803
|
|
|
793
804
|
if (total_tokens + maximum_output_token) > max_context_size:
|
|
794
805
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
795
|
-
|
|
806
|
+
truncated_res = self.truncate_messages_to_fit_context(
|
|
796
807
|
messages, max_context_size, maximum_output_token
|
|
797
808
|
)
|
|
809
|
+
metadata["truncations"] = [
|
|
810
|
+
t.model_dump() for t in truncated_res.truncations
|
|
811
|
+
]
|
|
812
|
+
messages = truncated_res.truncated_messages
|
|
798
813
|
perf_timing.measure("truncate_messages_to_fit_context")
|
|
814
|
+
else:
|
|
815
|
+
metadata["truncations"] = []
|
|
799
816
|
|
|
800
817
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
801
818
|
try:
|
|
@@ -837,6 +854,7 @@ class ToolCallingLLM:
|
|
|
837
854
|
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
838
855
|
)
|
|
839
856
|
# disable structured output going forward and and retry
|
|
857
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
840
858
|
response_format = None
|
|
841
859
|
max_steps = max_steps + 1
|
|
842
860
|
continue
|
|
@@ -849,9 +867,17 @@ class ToolCallingLLM:
|
|
|
849
867
|
|
|
850
868
|
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
851
869
|
if not tools_to_call:
|
|
870
|
+
self.llm.count_tokens_for_message(messages)
|
|
871
|
+
metadata["usage"] = get_llm_usage(full_response)
|
|
872
|
+
metadata["max_tokens"] = max_context_size
|
|
873
|
+
metadata["max_output_tokens"] = maximum_output_token
|
|
852
874
|
yield StreamMessage(
|
|
853
875
|
event=StreamEvents.ANSWER_END,
|
|
854
|
-
data={
|
|
876
|
+
data={
|
|
877
|
+
"content": response_message.content,
|
|
878
|
+
"messages": messages,
|
|
879
|
+
"metadata": metadata,
|
|
880
|
+
},
|
|
855
881
|
)
|
|
856
882
|
return
|
|
857
883
|
|
|
@@ -883,7 +909,6 @@ class ToolCallingLLM:
|
|
|
883
909
|
|
|
884
910
|
for future in concurrent.futures.as_completed(futures):
|
|
885
911
|
tool_call_result: ToolCallResult = future.result()
|
|
886
|
-
|
|
887
912
|
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
888
913
|
messages.append(tool_call_result.as_tool_call_message())
|
|
889
914
|
|