holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +10 -2
- holmes/common/env_vars.py +8 -1
- holmes/config.py +66 -139
- holmes/core/investigation.py +1 -2
- holmes/core/llm.py +256 -51
- holmes/core/models.py +2 -0
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +14 -8
- holmes/core/tool_calling_llm.py +101 -101
- holmes/core/tools.py +260 -25
- holmes/core/tools_utils/data_types.py +81 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
- holmes/core/tools_utils/tool_executor.py +2 -2
- holmes/core/toolset_manager.py +150 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/main.py +5 -0
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +15 -15
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +8 -8
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -20
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +8 -8
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +17 -17
- holmes/plugins/toolsets/git.py +21 -21
- holmes/plugins/toolsets/grafana/common.py +2 -2
- holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
- holmes/plugins/toolsets/internet/internet.py +3 -3
- holmes/plugins/toolsets/internet/notion.py +3 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/kafka.py +18 -18
- holmes/plugins/toolsets/kubernetes.yaml +58 -0
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
- holmes/plugins/toolsets/newrelic.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
- holmes/plugins/toolsets/prometheus/prometheus.py +172 -39
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +25 -0
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
- holmes/plugins/toolsets/robusta/robusta.py +10 -10
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
- holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/env.py +7 -0
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +9 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/METADATA +9 -13
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/RECORD +78 -68
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,7 +2,7 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
from typing import Dict, List, Optional, Type, Union, Callable
|
|
5
|
+
from typing import Dict, List, Optional, Type, Union, Callable, Any
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
import sentry_sdk
|
|
@@ -32,14 +32,23 @@ from holmes.core.performance_timing import PerformanceTiming
|
|
|
32
32
|
from holmes.core.resource_instruction import ResourceInstructions
|
|
33
33
|
from holmes.core.runbooks import RunbookManager
|
|
34
34
|
from holmes.core.safeguards import prevent_overly_repeated_tool_call
|
|
35
|
-
from holmes.core.tools import StructuredToolResult,
|
|
35
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
36
|
+
from holmes.core.tools_utils.tool_context_window_limiter import (
|
|
37
|
+
prevent_overly_big_tool_response,
|
|
38
|
+
)
|
|
36
39
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
40
|
+
from holmes.utils import sentry_helper
|
|
37
41
|
from holmes.utils.global_instructions import (
|
|
38
42
|
Instructions,
|
|
39
43
|
add_global_instructions_to_user_prompt,
|
|
40
44
|
)
|
|
41
45
|
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
42
46
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
47
|
+
from holmes.core.tools_utils.data_types import (
|
|
48
|
+
TruncationResult,
|
|
49
|
+
ToolCallResult,
|
|
50
|
+
TruncationMetadata,
|
|
51
|
+
)
|
|
43
52
|
from holmes.core.tracing import DummySpan
|
|
44
53
|
from holmes.utils.colors import AI_COLOR
|
|
45
54
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
@@ -48,6 +57,9 @@ from holmes.utils.stream import StreamEvents, StreamMessage
|
|
|
48
57
|
cost_logger = logging.getLogger("holmes.costs")
|
|
49
58
|
|
|
50
59
|
|
|
60
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
61
|
+
|
|
62
|
+
|
|
51
63
|
class LLMCosts(BaseModel):
|
|
52
64
|
"""Tracks cost and token usage for LLM calls."""
|
|
53
65
|
|
|
@@ -119,23 +131,6 @@ def _process_cost_info(
|
|
|
119
131
|
logging.debug(f"Could not extract cost information: {e}")
|
|
120
132
|
|
|
121
133
|
|
|
122
|
-
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
123
|
-
tool_response = tool_result.data
|
|
124
|
-
if isinstance(tool_result.data, str):
|
|
125
|
-
tool_response = tool_result.data
|
|
126
|
-
else:
|
|
127
|
-
try:
|
|
128
|
-
if isinstance(tool_result.data, BaseModel):
|
|
129
|
-
tool_response = tool_result.data.model_dump_json(indent=2)
|
|
130
|
-
else:
|
|
131
|
-
tool_response = json.dumps(tool_result.data, indent=2)
|
|
132
|
-
except Exception:
|
|
133
|
-
tool_response = str(tool_result.data)
|
|
134
|
-
if tool_result.status == ToolResultStatus.ERROR:
|
|
135
|
-
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
|
|
136
|
-
return tool_response
|
|
137
|
-
|
|
138
|
-
|
|
139
134
|
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
140
135
|
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
141
136
|
# We should fix this in the future
|
|
@@ -143,7 +138,7 @@ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
|
143
138
|
# token truncation and not character truncation
|
|
144
139
|
def truncate_messages_to_fit_context(
|
|
145
140
|
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
146
|
-
) ->
|
|
141
|
+
) -> TruncationResult:
|
|
147
142
|
"""
|
|
148
143
|
Helper function to truncate tool messages to fit within context limits.
|
|
149
144
|
|
|
@@ -176,13 +171,17 @@ def truncate_messages_to_fit_context(
|
|
|
176
171
|
)
|
|
177
172
|
|
|
178
173
|
if len(tool_call_messages) == 0:
|
|
179
|
-
return messages
|
|
174
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
180
175
|
|
|
181
176
|
available_space = (
|
|
182
|
-
max_context_size - message_size_without_tools -
|
|
177
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
183
178
|
)
|
|
184
179
|
remaining_space = available_space
|
|
185
|
-
tool_call_messages.sort(
|
|
180
|
+
tool_call_messages.sort(
|
|
181
|
+
key=lambda x: count_tokens_fn([{"role": "tool", "content": x["content"]}])
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
truncations = []
|
|
186
185
|
|
|
187
186
|
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
188
187
|
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
@@ -190,75 +189,49 @@ def truncate_messages_to_fit_context(
|
|
|
190
189
|
for i, msg in enumerate(tool_call_messages):
|
|
191
190
|
remaining_tools = len(tool_call_messages) - i
|
|
192
191
|
max_allocation = remaining_space // remaining_tools
|
|
193
|
-
needed_space =
|
|
192
|
+
needed_space = count_tokens_fn([{"role": "tool", "content": msg["content"]}])
|
|
194
193
|
allocated_space = min(needed_space, max_allocation)
|
|
195
194
|
|
|
196
195
|
if needed_space > allocated_space:
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
msg["content"][: allocated_space - len(truncation_notice)]
|
|
202
|
-
+ truncation_notice
|
|
203
|
-
)
|
|
204
|
-
logging.info(
|
|
205
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space-len(truncation_notice)} tokens"
|
|
206
|
-
)
|
|
207
|
-
else:
|
|
208
|
-
msg["content"] = truncation_notice[:allocated_space]
|
|
209
|
-
logging.info(
|
|
210
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space} tokens"
|
|
211
|
-
)
|
|
212
|
-
msg.pop("token_count", None) # Remove token_count if present
|
|
196
|
+
truncation_metadata = _truncate_tool_message(
|
|
197
|
+
msg, allocated_space, needed_space
|
|
198
|
+
)
|
|
199
|
+
truncations.append(truncation_metadata)
|
|
213
200
|
|
|
214
201
|
remaining_space -= allocated_space
|
|
215
|
-
return messages
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
if
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"result": result_dump,
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
def as_streaming_tool_result_response(self):
|
|
252
|
-
result_dump = self.result.model_dump()
|
|
253
|
-
result_dump["data"] = self.result.get_stringified_data()
|
|
254
|
-
|
|
255
|
-
return {
|
|
256
|
-
"tool_call_id": self.tool_call_id,
|
|
257
|
-
"role": "tool",
|
|
258
|
-
"description": self.description,
|
|
259
|
-
"name": self.tool_name,
|
|
260
|
-
"result": result_dump,
|
|
261
|
-
}
|
|
202
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _truncate_tool_message(
|
|
206
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
207
|
+
) -> TruncationMetadata:
|
|
208
|
+
msg_content = msg["content"]
|
|
209
|
+
tool_call_id = msg["tool_call_id"]
|
|
210
|
+
tool_name = msg["name"]
|
|
211
|
+
|
|
212
|
+
# Ensure the indicator fits in the allocated space
|
|
213
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
214
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
215
|
+
msg["content"] = (
|
|
216
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
217
|
+
)
|
|
218
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
219
|
+
else:
|
|
220
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
221
|
+
end_index = allocated_space
|
|
222
|
+
|
|
223
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
224
|
+
logging.info(
|
|
225
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
226
|
+
)
|
|
227
|
+
truncation_metadata = TruncationMetadata(
|
|
228
|
+
tool_call_id=tool_call_id,
|
|
229
|
+
start_index=0,
|
|
230
|
+
end_index=end_index,
|
|
231
|
+
tool_name=tool_name,
|
|
232
|
+
original_token_count=needed_space,
|
|
233
|
+
)
|
|
234
|
+
return truncation_metadata
|
|
262
235
|
|
|
263
236
|
|
|
264
237
|
class LLMResult(LLMCosts):
|
|
@@ -269,6 +242,7 @@ class LLMResult(LLMCosts):
|
|
|
269
242
|
# TODO: clean up these two
|
|
270
243
|
prompt: Optional[str] = None
|
|
271
244
|
messages: Optional[List[dict]] = None
|
|
245
|
+
metadata: Optional[Dict[Any, Any]] = None
|
|
272
246
|
|
|
273
247
|
def get_tool_usage_summary(self):
|
|
274
248
|
return "AI used info from issue and " + ",".join(
|
|
@@ -344,7 +318,7 @@ class ToolCallingLLM:
|
|
|
344
318
|
perf_timing.measure("get_all_tools_openai_format")
|
|
345
319
|
max_steps = self.max_steps
|
|
346
320
|
i = 0
|
|
347
|
-
|
|
321
|
+
metadata: Dict[Any, Any] = {}
|
|
348
322
|
while i < max_steps:
|
|
349
323
|
i += 1
|
|
350
324
|
perf_timing.measure(f"start iteration {i}")
|
|
@@ -360,9 +334,13 @@ class ToolCallingLLM:
|
|
|
360
334
|
|
|
361
335
|
if (total_tokens + maximum_output_token) > max_context_size:
|
|
362
336
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
363
|
-
|
|
337
|
+
truncated_res = self.truncate_messages_to_fit_context(
|
|
364
338
|
messages, max_context_size, maximum_output_token
|
|
365
339
|
)
|
|
340
|
+
metadata["truncations"] = [
|
|
341
|
+
t.model_dump() for t in truncated_res.truncations
|
|
342
|
+
]
|
|
343
|
+
messages = truncated_res.truncated_messages
|
|
366
344
|
perf_timing.measure("truncate_messages_to_fit_context")
|
|
367
345
|
|
|
368
346
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
@@ -408,6 +386,7 @@ class ToolCallingLLM:
|
|
|
408
386
|
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
409
387
|
)
|
|
410
388
|
# disable structured output going forward and and retry
|
|
389
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
411
390
|
response_format = None
|
|
412
391
|
max_steps = max_steps + 1
|
|
413
392
|
continue
|
|
@@ -451,6 +430,7 @@ class ToolCallingLLM:
|
|
|
451
430
|
prompt=json.dumps(messages, indent=2),
|
|
452
431
|
messages=messages,
|
|
453
432
|
**costs.model_dump(), # Include all cost fields
|
|
433
|
+
metadata=metadata,
|
|
454
434
|
)
|
|
455
435
|
|
|
456
436
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
@@ -460,6 +440,7 @@ class ToolCallingLLM:
|
|
|
460
440
|
prompt=json.dumps(messages, indent=2),
|
|
461
441
|
messages=messages,
|
|
462
442
|
**costs.model_dump(), # Include all cost fields
|
|
443
|
+
metadata=metadata,
|
|
463
444
|
)
|
|
464
445
|
|
|
465
446
|
if text_response and text_response.strip():
|
|
@@ -498,7 +479,7 @@ class ToolCallingLLM:
|
|
|
498
479
|
|
|
499
480
|
if (
|
|
500
481
|
tool_call_result.result.status
|
|
501
|
-
==
|
|
482
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
502
483
|
):
|
|
503
484
|
with trace_span.start_span(type="tool") as tool_span:
|
|
504
485
|
tool_call_result = self._handle_tool_call_approval(
|
|
@@ -536,7 +517,7 @@ class ToolCallingLLM:
|
|
|
536
517
|
f"Skipping tool execution for {tool_name}: args: {tool_params}"
|
|
537
518
|
)
|
|
538
519
|
return StructuredToolResult(
|
|
539
|
-
status=
|
|
520
|
+
status=StructuredToolResultStatus.ERROR,
|
|
540
521
|
error=f"Failed to find tool {tool_name}",
|
|
541
522
|
params=tool_params,
|
|
542
523
|
)
|
|
@@ -550,7 +531,7 @@ class ToolCallingLLM:
|
|
|
550
531
|
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
551
532
|
)
|
|
552
533
|
tool_response = StructuredToolResult(
|
|
553
|
-
status=
|
|
534
|
+
status=StructuredToolResultStatus.ERROR,
|
|
554
535
|
error=f"Tool call failed: {e}",
|
|
555
536
|
params=tool_params,
|
|
556
537
|
)
|
|
@@ -592,7 +573,7 @@ class ToolCallingLLM:
|
|
|
592
573
|
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
593
574
|
)
|
|
594
575
|
tool_response = StructuredToolResult(
|
|
595
|
-
status=
|
|
576
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
596
577
|
data=tool_response,
|
|
597
578
|
params=tool_params,
|
|
598
579
|
)
|
|
@@ -642,7 +623,7 @@ class ToolCallingLLM:
|
|
|
642
623
|
tool_name=tool_name,
|
|
643
624
|
description="NA",
|
|
644
625
|
result=StructuredToolResult(
|
|
645
|
-
status=
|
|
626
|
+
status=StructuredToolResultStatus.ERROR,
|
|
646
627
|
error="Custom tool calls are not supported",
|
|
647
628
|
params=None,
|
|
648
629
|
),
|
|
@@ -658,6 +639,11 @@ class ToolCallingLLM:
|
|
|
658
639
|
previous_tool_calls=previous_tool_calls,
|
|
659
640
|
tool_number=tool_number,
|
|
660
641
|
)
|
|
642
|
+
|
|
643
|
+
prevent_overly_big_tool_response(
|
|
644
|
+
tool_call_result=tool_call_result, llm=self.llm
|
|
645
|
+
)
|
|
646
|
+
|
|
661
647
|
ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
|
|
662
648
|
return tool_call_result
|
|
663
649
|
|
|
@@ -679,7 +665,7 @@ class ToolCallingLLM:
|
|
|
679
665
|
|
|
680
666
|
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
|
|
681
667
|
if not self.approval_callback:
|
|
682
|
-
tool_call_result.result.status =
|
|
668
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
683
669
|
return tool_call_result
|
|
684
670
|
|
|
685
671
|
# Get approval from user
|
|
@@ -699,7 +685,7 @@ class ToolCallingLLM:
|
|
|
699
685
|
else:
|
|
700
686
|
# User denied - update to error
|
|
701
687
|
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
702
|
-
tool_call_result.result.status =
|
|
688
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
703
689
|
tool_call_result.result.error = (
|
|
704
690
|
f"User denied command execution.{feedback_text}"
|
|
705
691
|
)
|
|
@@ -757,13 +743,16 @@ class ToolCallingLLM:
|
|
|
757
743
|
@sentry_sdk.trace
|
|
758
744
|
def truncate_messages_to_fit_context(
|
|
759
745
|
self, messages: list, max_context_size: int, maximum_output_token: int
|
|
760
|
-
) ->
|
|
761
|
-
|
|
746
|
+
) -> TruncationResult:
|
|
747
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
762
748
|
messages,
|
|
763
749
|
max_context_size,
|
|
764
750
|
maximum_output_token,
|
|
765
751
|
self.llm.count_tokens_for_message,
|
|
766
752
|
)
|
|
753
|
+
if truncated_res.truncations:
|
|
754
|
+
sentry_helper.capture_tool_truncations(truncated_res.truncations)
|
|
755
|
+
return truncated_res
|
|
767
756
|
|
|
768
757
|
def call_stream(
|
|
769
758
|
self,
|
|
@@ -791,6 +780,7 @@ class ToolCallingLLM:
|
|
|
791
780
|
)
|
|
792
781
|
perf_timing.measure("get_all_tools_openai_format")
|
|
793
782
|
max_steps = self.max_steps
|
|
783
|
+
metadata: Dict[Any, Any] = {}
|
|
794
784
|
i = 0
|
|
795
785
|
tool_number_offset = 0
|
|
796
786
|
|
|
@@ -809,10 +799,16 @@ class ToolCallingLLM:
|
|
|
809
799
|
|
|
810
800
|
if (total_tokens + maximum_output_token) > max_context_size:
|
|
811
801
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
812
|
-
|
|
802
|
+
truncated_res = self.truncate_messages_to_fit_context(
|
|
813
803
|
messages, max_context_size, maximum_output_token
|
|
814
804
|
)
|
|
805
|
+
metadata["truncations"] = [
|
|
806
|
+
t.model_dump() for t in truncated_res.truncations
|
|
807
|
+
]
|
|
808
|
+
messages = truncated_res.truncated_messages
|
|
815
809
|
perf_timing.measure("truncate_messages_to_fit_context")
|
|
810
|
+
else:
|
|
811
|
+
metadata["truncations"] = []
|
|
816
812
|
|
|
817
813
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
818
814
|
try:
|
|
@@ -854,6 +850,7 @@ class ToolCallingLLM:
|
|
|
854
850
|
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
855
851
|
)
|
|
856
852
|
# disable structured output going forward and and retry
|
|
853
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
857
854
|
response_format = None
|
|
858
855
|
max_steps = max_steps + 1
|
|
859
856
|
continue
|
|
@@ -868,7 +865,11 @@ class ToolCallingLLM:
|
|
|
868
865
|
if not tools_to_call:
|
|
869
866
|
yield StreamMessage(
|
|
870
867
|
event=StreamEvents.ANSWER_END,
|
|
871
|
-
data={
|
|
868
|
+
data={
|
|
869
|
+
"content": response_message.content,
|
|
870
|
+
"messages": messages,
|
|
871
|
+
"metadata": metadata,
|
|
872
|
+
},
|
|
872
873
|
)
|
|
873
874
|
return
|
|
874
875
|
|
|
@@ -900,7 +901,6 @@ class ToolCallingLLM:
|
|
|
900
901
|
|
|
901
902
|
for future in concurrent.futures.as_completed(futures):
|
|
902
903
|
tool_call_result: ToolCallResult = future.result()
|
|
903
|
-
|
|
904
904
|
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
905
905
|
messages.append(tool_call_result.as_tool_call_message())
|
|
906
906
|
|