holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,8 +2,7 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
from typing import Dict, List, Optional, Type, Union
|
|
6
|
-
|
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Type, Union
|
|
7
6
|
|
|
8
7
|
import sentry_sdk
|
|
9
8
|
from openai import BadRequestError
|
|
@@ -14,11 +13,10 @@ from pydantic import BaseModel, Field
|
|
|
14
13
|
from rich.console import Console
|
|
15
14
|
|
|
16
15
|
from holmes.common.env_vars import (
|
|
17
|
-
TEMPERATURE,
|
|
18
|
-
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
19
16
|
LOG_LLM_USAGE_RESPONSE,
|
|
17
|
+
RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION,
|
|
18
|
+
TEMPERATURE,
|
|
20
19
|
)
|
|
21
|
-
|
|
22
20
|
from holmes.core.investigation_structured_output import (
|
|
23
21
|
DEFAULT_SECTIONS,
|
|
24
22
|
REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
|
|
@@ -28,21 +26,42 @@ from holmes.core.investigation_structured_output import (
|
|
|
28
26
|
)
|
|
29
27
|
from holmes.core.issue import Issue
|
|
30
28
|
from holmes.core.llm import LLM
|
|
31
|
-
from holmes.core.
|
|
32
|
-
|
|
29
|
+
from holmes.core.models import (
|
|
30
|
+
PendingToolApproval,
|
|
31
|
+
ToolApprovalDecision,
|
|
32
|
+
ToolCallResult,
|
|
33
|
+
)
|
|
34
|
+
from holmes.core.prompt import generate_user_prompt
|
|
33
35
|
from holmes.core.runbooks import RunbookManager
|
|
34
36
|
from holmes.core.safeguards import prevent_overly_repeated_tool_call
|
|
35
|
-
from holmes.core.tools import
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
from holmes.core.tools import (
|
|
38
|
+
StructuredToolResult,
|
|
39
|
+
StructuredToolResultStatus,
|
|
40
|
+
ToolInvokeContext,
|
|
41
|
+
)
|
|
42
|
+
from holmes.core.tools_utils.tool_context_window_limiter import (
|
|
43
|
+
prevent_overly_big_tool_response,
|
|
40
44
|
)
|
|
41
|
-
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
42
45
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
43
46
|
from holmes.core.tracing import DummySpan
|
|
47
|
+
from holmes.core.truncation.input_context_window_limiter import (
|
|
48
|
+
limit_input_context_window,
|
|
49
|
+
)
|
|
50
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
51
|
+
from holmes.plugins.runbooks import RunbookCatalog
|
|
52
|
+
from holmes.utils import sentry_helper
|
|
44
53
|
from holmes.utils.colors import AI_COLOR
|
|
45
|
-
from holmes.utils.
|
|
54
|
+
from holmes.utils.global_instructions import (
|
|
55
|
+
Instructions,
|
|
56
|
+
generate_runbooks_args,
|
|
57
|
+
)
|
|
58
|
+
from holmes.utils.stream import (
|
|
59
|
+
StreamEvents,
|
|
60
|
+
StreamMessage,
|
|
61
|
+
add_token_count_to_metadata,
|
|
62
|
+
build_stream_event_token_count,
|
|
63
|
+
)
|
|
64
|
+
from holmes.utils.tags import parse_messages_tags
|
|
46
65
|
|
|
47
66
|
# Create a named logger for cost tracking
|
|
48
67
|
cost_logger = logging.getLogger("holmes.costs")
|
|
@@ -119,156 +138,16 @@ def _process_cost_info(
|
|
|
119
138
|
logging.debug(f"Could not extract cost information: {e}")
|
|
120
139
|
|
|
121
140
|
|
|
122
|
-
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
123
|
-
tool_response = tool_result.data
|
|
124
|
-
if isinstance(tool_result.data, str):
|
|
125
|
-
tool_response = tool_result.data
|
|
126
|
-
else:
|
|
127
|
-
try:
|
|
128
|
-
if isinstance(tool_result.data, BaseModel):
|
|
129
|
-
tool_response = tool_result.data.model_dump_json(indent=2)
|
|
130
|
-
else:
|
|
131
|
-
tool_response = json.dumps(tool_result.data, indent=2)
|
|
132
|
-
except Exception:
|
|
133
|
-
tool_response = str(tool_result.data)
|
|
134
|
-
if tool_result.status == ToolResultStatus.ERROR:
|
|
135
|
-
tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
|
|
136
|
-
return tool_response
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
140
|
-
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
141
|
-
# We should fix this in the future
|
|
142
|
-
# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
|
|
143
|
-
# token truncation and not character truncation
|
|
144
|
-
def truncate_messages_to_fit_context(
|
|
145
|
-
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
146
|
-
) -> list:
|
|
147
|
-
"""
|
|
148
|
-
Helper function to truncate tool messages to fit within context limits.
|
|
149
|
-
|
|
150
|
-
Args:
|
|
151
|
-
messages: List of message dictionaries with roles and content
|
|
152
|
-
max_context_size: Maximum context window size for the model
|
|
153
|
-
maximum_output_token: Maximum tokens reserved for model output
|
|
154
|
-
count_tokens_fn: Function to count tokens for a list of messages
|
|
155
|
-
|
|
156
|
-
Returns:
|
|
157
|
-
Modified list of messages with truncated tool responses
|
|
158
|
-
|
|
159
|
-
Raises:
|
|
160
|
-
Exception: If non-tool messages exceed available context space
|
|
161
|
-
"""
|
|
162
|
-
messages_except_tools = [
|
|
163
|
-
message for message in messages if message["role"] != "tool"
|
|
164
|
-
]
|
|
165
|
-
message_size_without_tools = count_tokens_fn(messages_except_tools)
|
|
166
|
-
|
|
167
|
-
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
168
|
-
|
|
169
|
-
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
170
|
-
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
171
|
-
logging.error(
|
|
172
|
-
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
173
|
-
)
|
|
174
|
-
raise Exception(
|
|
175
|
-
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if len(tool_call_messages) == 0:
|
|
179
|
-
return messages
|
|
180
|
-
|
|
181
|
-
available_space = (
|
|
182
|
-
max_context_size - message_size_without_tools - maximum_output_token
|
|
183
|
-
)
|
|
184
|
-
remaining_space = available_space
|
|
185
|
-
tool_call_messages.sort(key=lambda x: len(x["content"]))
|
|
186
|
-
|
|
187
|
-
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
188
|
-
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
189
|
-
# We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
|
|
190
|
-
for i, msg in enumerate(tool_call_messages):
|
|
191
|
-
remaining_tools = len(tool_call_messages) - i
|
|
192
|
-
max_allocation = remaining_space // remaining_tools
|
|
193
|
-
needed_space = len(msg["content"])
|
|
194
|
-
allocated_space = min(needed_space, max_allocation)
|
|
195
|
-
|
|
196
|
-
if needed_space > allocated_space:
|
|
197
|
-
truncation_notice = "\n\n[TRUNCATED]"
|
|
198
|
-
# Ensure the indicator fits in the allocated space
|
|
199
|
-
if allocated_space > len(truncation_notice):
|
|
200
|
-
msg["content"] = (
|
|
201
|
-
msg["content"][: allocated_space - len(truncation_notice)]
|
|
202
|
-
+ truncation_notice
|
|
203
|
-
)
|
|
204
|
-
logging.info(
|
|
205
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space-len(truncation_notice)} tokens"
|
|
206
|
-
)
|
|
207
|
-
else:
|
|
208
|
-
msg["content"] = truncation_notice[:allocated_space]
|
|
209
|
-
logging.info(
|
|
210
|
-
f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space} tokens"
|
|
211
|
-
)
|
|
212
|
-
msg.pop("token_count", None) # Remove token_count if present
|
|
213
|
-
|
|
214
|
-
remaining_space -= allocated_space
|
|
215
|
-
return messages
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
class ToolCallResult(BaseModel):
|
|
219
|
-
tool_call_id: str
|
|
220
|
-
tool_name: str
|
|
221
|
-
description: str
|
|
222
|
-
result: StructuredToolResult
|
|
223
|
-
size: Optional[int] = None
|
|
224
|
-
|
|
225
|
-
def as_tool_call_message(self):
|
|
226
|
-
content = format_tool_result_data(self.result)
|
|
227
|
-
if self.result.params:
|
|
228
|
-
content = (
|
|
229
|
-
f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
|
|
230
|
-
+ content
|
|
231
|
-
)
|
|
232
|
-
return {
|
|
233
|
-
"tool_call_id": self.tool_call_id,
|
|
234
|
-
"role": "tool",
|
|
235
|
-
"name": self.tool_name,
|
|
236
|
-
"content": content,
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
def as_tool_result_response(self):
|
|
240
|
-
result_dump = self.result.model_dump()
|
|
241
|
-
result_dump["data"] = self.result.get_stringified_data()
|
|
242
|
-
|
|
243
|
-
return {
|
|
244
|
-
"tool_call_id": self.tool_call_id,
|
|
245
|
-
"tool_name": self.tool_name,
|
|
246
|
-
"description": self.description,
|
|
247
|
-
"role": "tool",
|
|
248
|
-
"result": result_dump,
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
def as_streaming_tool_result_response(self):
|
|
252
|
-
result_dump = self.result.model_dump()
|
|
253
|
-
result_dump["data"] = self.result.get_stringified_data()
|
|
254
|
-
|
|
255
|
-
return {
|
|
256
|
-
"tool_call_id": self.tool_call_id,
|
|
257
|
-
"role": "tool",
|
|
258
|
-
"description": self.description,
|
|
259
|
-
"name": self.tool_name,
|
|
260
|
-
"result": result_dump,
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
|
|
264
141
|
class LLMResult(LLMCosts):
|
|
265
142
|
tool_calls: Optional[List[ToolCallResult]] = None
|
|
143
|
+
num_llm_calls: Optional[int] = None # Number of LLM API calls (turns)
|
|
266
144
|
result: Optional[str] = None
|
|
267
145
|
unprocessed_result: Optional[str] = None
|
|
268
146
|
instructions: List[str] = Field(default_factory=list)
|
|
269
147
|
# TODO: clean up these two
|
|
270
148
|
prompt: Optional[str] = None
|
|
271
149
|
messages: Optional[List[dict]] = None
|
|
150
|
+
metadata: Optional[Dict[Any, Any]] = None
|
|
272
151
|
|
|
273
152
|
def get_tool_usage_summary(self):
|
|
274
153
|
return "AI used info from issue and " + ",".join(
|
|
@@ -276,6 +155,12 @@ class LLMResult(LLMCosts):
|
|
|
276
155
|
)
|
|
277
156
|
|
|
278
157
|
|
|
158
|
+
class ToolCallWithDecision(BaseModel):
|
|
159
|
+
message_index: int
|
|
160
|
+
tool_call: ChatCompletionMessageToolCall
|
|
161
|
+
decision: Optional[ToolApprovalDecision]
|
|
162
|
+
|
|
163
|
+
|
|
279
164
|
class ToolCallingLLM:
|
|
280
165
|
llm: LLM
|
|
281
166
|
|
|
@@ -290,11 +175,99 @@ class ToolCallingLLM:
|
|
|
290
175
|
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
|
|
291
176
|
] = None
|
|
292
177
|
|
|
178
|
+
def process_tool_decisions(
|
|
179
|
+
self, messages: List[Dict[str, Any]], tool_decisions: List[ToolApprovalDecision]
|
|
180
|
+
) -> tuple[List[Dict[str, Any]], list[StreamMessage]]:
|
|
181
|
+
"""
|
|
182
|
+
Process tool approval decisions and execute approved tools.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
messages: Current conversation messages
|
|
186
|
+
tool_decisions: List of ToolApprovalDecision objects
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
Updated messages list with tool execution results
|
|
190
|
+
"""
|
|
191
|
+
events: list[StreamMessage] = []
|
|
192
|
+
if not tool_decisions:
|
|
193
|
+
return messages, events
|
|
194
|
+
|
|
195
|
+
# Create decision lookup
|
|
196
|
+
decisions_by_tool_call_id = {
|
|
197
|
+
decision.tool_call_id: decision for decision in tool_decisions
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
pending_tool_calls: list[ToolCallWithDecision] = []
|
|
201
|
+
|
|
202
|
+
for i in reversed(range(len(messages))):
|
|
203
|
+
msg = messages[i]
|
|
204
|
+
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
205
|
+
message_tool_calls = msg.get("tool_calls", [])
|
|
206
|
+
for tool_call in message_tool_calls:
|
|
207
|
+
decision = decisions_by_tool_call_id.get(tool_call.get("id"), None)
|
|
208
|
+
if tool_call.get("pending_approval"):
|
|
209
|
+
del tool_call[
|
|
210
|
+
"pending_approval"
|
|
211
|
+
] # Cleanup so that a pending approval is not tagged on message in a future response
|
|
212
|
+
pending_tool_calls.append(
|
|
213
|
+
ToolCallWithDecision(
|
|
214
|
+
tool_call=ChatCompletionMessageToolCall(**tool_call),
|
|
215
|
+
decision=decision,
|
|
216
|
+
message_index=i,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if not pending_tool_calls:
|
|
221
|
+
error_message = f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
|
|
222
|
+
logging.error(error_message)
|
|
223
|
+
raise Exception(error_message)
|
|
224
|
+
for tool_call_with_decision in pending_tool_calls:
|
|
225
|
+
tool_call_message: dict
|
|
226
|
+
tool_call = tool_call_with_decision.tool_call
|
|
227
|
+
decision = tool_call_with_decision.decision
|
|
228
|
+
tool_result: Optional[ToolCallResult] = None
|
|
229
|
+
if decision and decision.approved:
|
|
230
|
+
tool_result = self._invoke_llm_tool_call(
|
|
231
|
+
tool_to_call=tool_call,
|
|
232
|
+
previous_tool_calls=[],
|
|
233
|
+
trace_span=DummySpan(), # TODO: replace with proper span
|
|
234
|
+
tool_number=None,
|
|
235
|
+
user_approved=True,
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
# Tool was rejected or no decision found, add rejection message
|
|
239
|
+
tool_result = ToolCallResult(
|
|
240
|
+
tool_call_id=tool_call.id,
|
|
241
|
+
tool_name=tool_call.function.name,
|
|
242
|
+
description=tool_call.function.name,
|
|
243
|
+
result=StructuredToolResult(
|
|
244
|
+
status=StructuredToolResultStatus.ERROR,
|
|
245
|
+
error="Tool execution was denied by the user.",
|
|
246
|
+
),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
events.append(
|
|
250
|
+
StreamMessage(
|
|
251
|
+
event=StreamEvents.TOOL_RESULT,
|
|
252
|
+
data=tool_result.as_streaming_tool_result_response(),
|
|
253
|
+
)
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
tool_call_message = tool_result.as_tool_call_message()
|
|
257
|
+
|
|
258
|
+
# It is expected that the tool call result directly follows the tool call request from the LLM
|
|
259
|
+
# The API call may contain a user ask which is appended to the messages so we can't just append
|
|
260
|
+
# tool call results; they need to be inserted right after the llm's message requesting tool calls
|
|
261
|
+
messages.insert(
|
|
262
|
+
tool_call_with_decision.message_index + 1, tool_call_message
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
return messages, events
|
|
266
|
+
|
|
293
267
|
def prompt_call(
|
|
294
268
|
self,
|
|
295
269
|
system_prompt: str,
|
|
296
270
|
user_prompt: str,
|
|
297
|
-
post_process_prompt: Optional[str] = None,
|
|
298
271
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
299
272
|
sections: Optional[InputSectionsDataType] = None,
|
|
300
273
|
trace_span=DummySpan(),
|
|
@@ -305,8 +278,7 @@ class ToolCallingLLM:
|
|
|
305
278
|
]
|
|
306
279
|
return self.call(
|
|
307
280
|
messages,
|
|
308
|
-
|
|
309
|
-
response_format,
|
|
281
|
+
response_format=response_format,
|
|
310
282
|
user_prompt=user_prompt,
|
|
311
283
|
sections=sections,
|
|
312
284
|
trace_span=trace_span,
|
|
@@ -315,55 +287,52 @@ class ToolCallingLLM:
|
|
|
315
287
|
def messages_call(
|
|
316
288
|
self,
|
|
317
289
|
messages: List[Dict[str, str]],
|
|
318
|
-
post_process_prompt: Optional[str] = None,
|
|
319
290
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
320
291
|
trace_span=DummySpan(),
|
|
321
292
|
) -> LLMResult:
|
|
322
293
|
return self.call(
|
|
323
|
-
messages,
|
|
294
|
+
messages, response_format=response_format, trace_span=trace_span
|
|
324
295
|
)
|
|
325
296
|
|
|
326
297
|
@sentry_sdk.trace
|
|
327
298
|
def call( # type: ignore
|
|
328
299
|
self,
|
|
329
300
|
messages: List[Dict[str, str]],
|
|
330
|
-
post_process_prompt: Optional[str] = None,
|
|
331
301
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
332
302
|
user_prompt: Optional[str] = None,
|
|
333
303
|
sections: Optional[InputSectionsDataType] = None,
|
|
334
304
|
trace_span=DummySpan(),
|
|
335
305
|
tool_number_offset: int = 0,
|
|
336
306
|
) -> LLMResult:
|
|
337
|
-
|
|
338
|
-
|
|
307
|
+
tool_calls: list[
|
|
308
|
+
dict
|
|
309
|
+
] = [] # Used for preventing repeated tool calls. potentially reset after compaction
|
|
310
|
+
all_tool_calls = [] # type: ignore
|
|
339
311
|
costs = LLMCosts()
|
|
340
|
-
|
|
341
312
|
tools = self.tool_executor.get_all_tools_openai_format(
|
|
342
313
|
target_model=self.llm.model
|
|
343
314
|
)
|
|
344
|
-
perf_timing.measure("get_all_tools_openai_format")
|
|
345
315
|
max_steps = self.max_steps
|
|
346
316
|
i = 0
|
|
347
|
-
|
|
317
|
+
metadata: Dict[Any, Any] = {}
|
|
348
318
|
while i < max_steps:
|
|
349
319
|
i += 1
|
|
350
|
-
perf_timing.measure(f"start iteration {i}")
|
|
351
320
|
logging.debug(f"running iteration {i}")
|
|
352
321
|
# on the last step we don't allow tools - we want to force a reply, not a request to run another tool
|
|
353
322
|
tools = None if i == max_steps else tools
|
|
354
323
|
tool_choice = "auto" if tools else None
|
|
355
324
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
325
|
+
limit_result = limit_input_context_window(
|
|
326
|
+
llm=self.llm, messages=messages, tools=tools
|
|
327
|
+
)
|
|
328
|
+
messages = limit_result.messages
|
|
329
|
+
metadata = metadata | limit_result.metadata
|
|
360
330
|
|
|
361
|
-
if (
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
perf_timing.measure("truncate_messages_to_fit_context")
|
|
331
|
+
if (
|
|
332
|
+
limit_result.conversation_history_compacted
|
|
333
|
+
and RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION
|
|
334
|
+
):
|
|
335
|
+
tool_calls = []
|
|
367
336
|
|
|
368
337
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
369
338
|
|
|
@@ -381,7 +350,6 @@ class ToolCallingLLM:
|
|
|
381
350
|
# Extract and accumulate cost information
|
|
382
351
|
_process_cost_info(full_response, costs, "LLM call")
|
|
383
352
|
|
|
384
|
-
perf_timing.measure("llm.completion")
|
|
385
353
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
386
354
|
except BadRequestError as e:
|
|
387
355
|
if "Unrecognized request arguments supplied: tool_choice, tools" in str(
|
|
@@ -405,9 +373,10 @@ class ToolCallingLLM:
|
|
|
405
373
|
|
|
406
374
|
if incorrect_tool_call:
|
|
407
375
|
logging.warning(
|
|
408
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-
|
|
376
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
409
377
|
)
|
|
410
378
|
# disable structured output going forward and and retry
|
|
379
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
411
380
|
response_format = None
|
|
412
381
|
max_steps = max_steps + 1
|
|
413
382
|
continue
|
|
@@ -424,42 +393,29 @@ class ToolCallingLLM:
|
|
|
424
393
|
hasattr(response_message, "reasoning_content")
|
|
425
394
|
and response_message.reasoning_content
|
|
426
395
|
):
|
|
427
|
-
logging.
|
|
428
|
-
f"[
|
|
396
|
+
logging.info(
|
|
397
|
+
f"[italic dim]AI reasoning:\n\n{response_message.reasoning_content}[/italic dim]\n"
|
|
429
398
|
)
|
|
430
399
|
|
|
431
400
|
if not tools_to_call:
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
user_prompt=post_process_prompt,
|
|
442
|
-
)
|
|
443
|
-
)
|
|
444
|
-
costs.total_cost += post_processing_cost
|
|
445
|
-
|
|
446
|
-
perf_timing.end(f"- completed in {i} iterations -")
|
|
447
|
-
return LLMResult(
|
|
448
|
-
result=post_processed_response,
|
|
449
|
-
unprocessed_result=raw_response,
|
|
450
|
-
tool_calls=tool_calls,
|
|
451
|
-
prompt=json.dumps(messages, indent=2),
|
|
452
|
-
messages=messages,
|
|
453
|
-
**costs.model_dump(), # Include all cost fields
|
|
454
|
-
)
|
|
401
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
402
|
+
|
|
403
|
+
add_token_count_to_metadata(
|
|
404
|
+
tokens=tokens,
|
|
405
|
+
full_llm_response=full_response,
|
|
406
|
+
max_context_size=limit_result.max_context_size,
|
|
407
|
+
maximum_output_token=limit_result.maximum_output_token,
|
|
408
|
+
metadata=metadata,
|
|
409
|
+
)
|
|
455
410
|
|
|
456
|
-
perf_timing.end(f"- completed in {i} iterations -")
|
|
457
411
|
return LLMResult(
|
|
458
412
|
result=text_response,
|
|
459
|
-
tool_calls=
|
|
413
|
+
tool_calls=all_tool_calls,
|
|
414
|
+
num_llm_calls=i,
|
|
460
415
|
prompt=json.dumps(messages, indent=2),
|
|
461
416
|
messages=messages,
|
|
462
417
|
**costs.model_dump(), # Include all cost fields
|
|
418
|
+
metadata=metadata,
|
|
463
419
|
)
|
|
464
420
|
|
|
465
421
|
if text_response and text_response.strip():
|
|
@@ -467,7 +423,6 @@ class ToolCallingLLM:
|
|
|
467
423
|
logging.info(
|
|
468
424
|
f"The AI requested [bold]{len(tools_to_call) if tools_to_call else 0}[/bold] tool call(s)."
|
|
469
425
|
)
|
|
470
|
-
perf_timing.measure("pre-tool-calls")
|
|
471
426
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
472
427
|
futures = []
|
|
473
428
|
futures_tool_numbers: dict[
|
|
@@ -477,6 +432,7 @@ class ToolCallingLLM:
|
|
|
477
432
|
for tool_index, t in enumerate(tools_to_call, 1):
|
|
478
433
|
logging.debug(f"Tool to call: {t}")
|
|
479
434
|
tool_number = tool_number_offset + tool_index
|
|
435
|
+
|
|
480
436
|
future = executor.submit(
|
|
481
437
|
self._invoke_llm_tool_call,
|
|
482
438
|
tool_to_call=t,
|
|
@@ -495,14 +451,24 @@ class ToolCallingLLM:
|
|
|
495
451
|
if future in futures_tool_numbers
|
|
496
452
|
else None
|
|
497
453
|
)
|
|
498
|
-
tool_call_result = self.handle_tool_call_approval(
|
|
499
|
-
tool_call_result=tool_call_result, tool_number=tool_number
|
|
500
|
-
)
|
|
501
454
|
|
|
502
|
-
|
|
503
|
-
|
|
455
|
+
if (
|
|
456
|
+
tool_call_result.result.status
|
|
457
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
458
|
+
):
|
|
459
|
+
tool_call_result = self._handle_tool_call_approval(
|
|
460
|
+
tool_call_result=tool_call_result,
|
|
461
|
+
tool_number=tool_number,
|
|
462
|
+
trace_span=trace_span,
|
|
463
|
+
)
|
|
504
464
|
|
|
505
|
-
|
|
465
|
+
tool_result_response_dict = (
|
|
466
|
+
tool_call_result.as_tool_result_response()
|
|
467
|
+
)
|
|
468
|
+
tool_calls.append(tool_result_response_dict)
|
|
469
|
+
all_tool_calls.append(tool_result_response_dict)
|
|
470
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
471
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
506
472
|
|
|
507
473
|
# Update the tool number offset for the next iteration
|
|
508
474
|
tool_number_offset += len(tools_to_call)
|
|
@@ -513,91 +479,55 @@ class ToolCallingLLM:
|
|
|
513
479
|
|
|
514
480
|
raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
|
|
515
481
|
|
|
516
|
-
def
|
|
482
|
+
def _directly_invoke_tool_call(
|
|
517
483
|
self,
|
|
518
484
|
tool_name: str,
|
|
519
485
|
tool_params: dict,
|
|
520
486
|
user_approved: bool,
|
|
521
|
-
|
|
487
|
+
tool_call_id: str,
|
|
522
488
|
tool_number: Optional[int] = None,
|
|
523
489
|
) -> StructuredToolResult:
|
|
524
|
-
tool_span = trace_span.start_span(name=tool_name, type="tool")
|
|
525
490
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
526
|
-
|
|
491
|
+
if not tool:
|
|
492
|
+
logging.warning(
|
|
493
|
+
f"Skipping tool execution for {tool_name}: args: {tool_params}"
|
|
494
|
+
)
|
|
495
|
+
return StructuredToolResult(
|
|
496
|
+
status=StructuredToolResultStatus.ERROR,
|
|
497
|
+
error=f"Failed to find tool {tool_name}",
|
|
498
|
+
params=tool_params,
|
|
499
|
+
)
|
|
500
|
+
|
|
527
501
|
try:
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
else:
|
|
538
|
-
tool_response = tool.invoke(
|
|
539
|
-
tool_params, tool_number=tool_number, user_approved=user_approved
|
|
540
|
-
)
|
|
502
|
+
invoke_context = ToolInvokeContext(
|
|
503
|
+
tool_number=tool_number,
|
|
504
|
+
user_approved=user_approved,
|
|
505
|
+
llm=self.llm,
|
|
506
|
+
max_token_count=self.llm.get_max_token_count_for_single_tool(),
|
|
507
|
+
tool_name=tool_name,
|
|
508
|
+
tool_call_id=tool_call_id,
|
|
509
|
+
)
|
|
510
|
+
tool_response = tool.invoke(tool_params, context=invoke_context)
|
|
541
511
|
except Exception as e:
|
|
542
512
|
logging.error(
|
|
543
513
|
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
544
514
|
)
|
|
545
515
|
tool_response = StructuredToolResult(
|
|
546
|
-
status=
|
|
516
|
+
status=StructuredToolResultStatus.ERROR,
|
|
547
517
|
error=f"Tool call failed: {e}",
|
|
548
518
|
params=tool_params,
|
|
549
519
|
)
|
|
550
|
-
|
|
551
|
-
# Log error to trace span
|
|
552
|
-
tool_span.log(
|
|
553
|
-
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
554
|
-
)
|
|
555
|
-
|
|
556
|
-
tool_span.log(
|
|
557
|
-
input=tool_params,
|
|
558
|
-
output=tool_response.data,
|
|
559
|
-
metadata={
|
|
560
|
-
"status": tool_response.status.value,
|
|
561
|
-
"error": tool_response.error,
|
|
562
|
-
"description": tool.get_parameterized_one_liner(tool_params)
|
|
563
|
-
if tool
|
|
564
|
-
else "",
|
|
565
|
-
"structured_tool_result": tool_response,
|
|
566
|
-
},
|
|
567
|
-
)
|
|
568
|
-
tool_span.end()
|
|
569
|
-
|
|
570
520
|
return tool_response
|
|
571
521
|
|
|
572
|
-
def
|
|
522
|
+
def _get_tool_call_result(
|
|
573
523
|
self,
|
|
574
|
-
|
|
524
|
+
tool_call_id: str,
|
|
525
|
+
tool_name: str,
|
|
526
|
+
tool_arguments: str,
|
|
527
|
+
user_approved: bool,
|
|
575
528
|
previous_tool_calls: list[dict],
|
|
576
|
-
|
|
577
|
-
tool_number=None,
|
|
529
|
+
tool_number: Optional[int] = None,
|
|
578
530
|
) -> ToolCallResult:
|
|
579
|
-
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
580
|
-
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
581
|
-
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
582
|
-
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
583
|
-
# and doesn't require importing the specific type.
|
|
584
|
-
if hasattr(tool_to_call, "function"):
|
|
585
|
-
tool_name = tool_to_call.function.name
|
|
586
|
-
tool_arguments = tool_to_call.function.arguments
|
|
587
|
-
else:
|
|
588
|
-
# This is a custom tool call - we don't support these currently
|
|
589
|
-
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
590
|
-
return ToolCallResult(
|
|
591
|
-
tool_call_id=tool_to_call.id,
|
|
592
|
-
tool_name="unknown",
|
|
593
|
-
description="NA",
|
|
594
|
-
result=StructuredToolResult(
|
|
595
|
-
status=ToolResultStatus.ERROR,
|
|
596
|
-
error="Custom tool calls are not supported",
|
|
597
|
-
params=None,
|
|
598
|
-
),
|
|
599
|
-
)
|
|
600
|
-
|
|
601
531
|
tool_params = {}
|
|
602
532
|
try:
|
|
603
533
|
tool_params = json.loads(tool_arguments)
|
|
@@ -606,21 +536,21 @@ class ToolCallingLLM:
|
|
|
606
536
|
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
607
537
|
)
|
|
608
538
|
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
539
|
+
tool_response = None
|
|
540
|
+
if not user_approved:
|
|
541
|
+
tool_response = prevent_overly_repeated_tool_call(
|
|
542
|
+
tool_name=tool_name,
|
|
543
|
+
tool_params=tool_params,
|
|
544
|
+
tool_calls=previous_tool_calls,
|
|
545
|
+
)
|
|
616
546
|
|
|
617
547
|
if not tool_response:
|
|
618
|
-
tool_response = self.
|
|
548
|
+
tool_response = self._directly_invoke_tool_call(
|
|
619
549
|
tool_name=tool_name,
|
|
620
550
|
tool_params=tool_params,
|
|
621
|
-
user_approved=
|
|
622
|
-
trace_span=trace_span,
|
|
551
|
+
user_approved=user_approved,
|
|
623
552
|
tool_number=tool_number,
|
|
553
|
+
tool_call_id=tool_call_id,
|
|
624
554
|
)
|
|
625
555
|
|
|
626
556
|
if not isinstance(tool_response, StructuredToolResult):
|
|
@@ -629,124 +559,165 @@ class ToolCallingLLM:
|
|
|
629
559
|
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
630
560
|
)
|
|
631
561
|
tool_response = StructuredToolResult(
|
|
632
|
-
status=
|
|
562
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
633
563
|
data=tool_response,
|
|
634
564
|
params=tool_params,
|
|
635
565
|
)
|
|
636
566
|
|
|
637
567
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
568
|
+
|
|
638
569
|
return ToolCallResult(
|
|
639
570
|
tool_call_id=tool_call_id,
|
|
640
571
|
tool_name=tool_name,
|
|
641
|
-
description=tool.get_parameterized_one_liner(tool_params)
|
|
572
|
+
description=str(tool.get_parameterized_one_liner(tool_params))
|
|
573
|
+
if tool
|
|
574
|
+
else "",
|
|
642
575
|
result=tool_response,
|
|
643
576
|
)
|
|
644
577
|
|
|
645
|
-
|
|
646
|
-
|
|
578
|
+
@staticmethod
|
|
579
|
+
def _log_tool_call_result(
|
|
580
|
+
tool_span,
|
|
581
|
+
tool_call_result: ToolCallResult,
|
|
582
|
+
approval_possible=True,
|
|
583
|
+
original_token_count=None,
|
|
584
|
+
):
|
|
585
|
+
tool_span.set_attributes(name=tool_call_result.tool_name)
|
|
586
|
+
status = tool_call_result.result.status
|
|
587
|
+
|
|
588
|
+
if (
|
|
589
|
+
status == StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
590
|
+
and not approval_possible
|
|
591
|
+
):
|
|
592
|
+
status = StructuredToolResultStatus.ERROR
|
|
593
|
+
|
|
594
|
+
if status == StructuredToolResultStatus.ERROR:
|
|
595
|
+
error = (
|
|
596
|
+
tool_call_result.result.error
|
|
597
|
+
if tool_call_result.result.error
|
|
598
|
+
else "Unspecified error"
|
|
599
|
+
)
|
|
600
|
+
else:
|
|
601
|
+
error = None
|
|
602
|
+
tool_span.log(
|
|
603
|
+
input=tool_call_result.result.params,
|
|
604
|
+
output=tool_call_result.result.data,
|
|
605
|
+
error=error,
|
|
606
|
+
metadata={
|
|
607
|
+
"status": status,
|
|
608
|
+
"description": tool_call_result.description,
|
|
609
|
+
"return_code": tool_call_result.result.return_code,
|
|
610
|
+
"error": tool_call_result.result.error,
|
|
611
|
+
"original_token_count": original_token_count,
|
|
612
|
+
},
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
def _invoke_llm_tool_call(
|
|
616
|
+
self,
|
|
617
|
+
tool_to_call: ChatCompletionMessageToolCall,
|
|
618
|
+
previous_tool_calls: list[dict],
|
|
619
|
+
trace_span=None,
|
|
620
|
+
tool_number=None,
|
|
621
|
+
user_approved: bool = False,
|
|
622
|
+
) -> ToolCallResult:
|
|
623
|
+
if trace_span is None:
|
|
624
|
+
trace_span = DummySpan()
|
|
625
|
+
with trace_span.start_span(type="tool") as tool_span:
|
|
626
|
+
if not hasattr(tool_to_call, "function"):
|
|
627
|
+
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
628
|
+
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
629
|
+
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
630
|
+
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
631
|
+
# and doesn't require importing the specific type.
|
|
632
|
+
tool_name = "Unknown_Custom_Tool"
|
|
633
|
+
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
634
|
+
tool_call_result = ToolCallResult(
|
|
635
|
+
tool_call_id=tool_to_call.id,
|
|
636
|
+
tool_name=tool_name,
|
|
637
|
+
description="NA",
|
|
638
|
+
result=StructuredToolResult(
|
|
639
|
+
status=StructuredToolResultStatus.ERROR,
|
|
640
|
+
error="Custom tool calls are not supported",
|
|
641
|
+
params=None,
|
|
642
|
+
),
|
|
643
|
+
)
|
|
644
|
+
else:
|
|
645
|
+
tool_name = tool_to_call.function.name
|
|
646
|
+
tool_arguments = tool_to_call.function.arguments
|
|
647
|
+
tool_id = tool_to_call.id
|
|
648
|
+
tool_call_result = self._get_tool_call_result(
|
|
649
|
+
tool_id,
|
|
650
|
+
tool_name,
|
|
651
|
+
tool_arguments,
|
|
652
|
+
previous_tool_calls=previous_tool_calls,
|
|
653
|
+
tool_number=tool_number,
|
|
654
|
+
user_approved=user_approved,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
original_token_count = prevent_overly_big_tool_response(
|
|
658
|
+
tool_call_result=tool_call_result, llm=self.llm
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
ToolCallingLLM._log_tool_call_result(
|
|
662
|
+
tool_span,
|
|
663
|
+
tool_call_result,
|
|
664
|
+
self.approval_callback is not None,
|
|
665
|
+
original_token_count,
|
|
666
|
+
)
|
|
667
|
+
return tool_call_result
|
|
668
|
+
|
|
669
|
+
def _handle_tool_call_approval(
|
|
670
|
+
self,
|
|
671
|
+
tool_call_result: ToolCallResult,
|
|
672
|
+
tool_number: Optional[int],
|
|
673
|
+
trace_span: Any,
|
|
647
674
|
) -> ToolCallResult:
|
|
648
675
|
"""
|
|
649
676
|
Handle approval for a single tool call if required.
|
|
650
677
|
|
|
651
678
|
Args:
|
|
652
679
|
tool_call_result: A single tool call result that may require approval
|
|
680
|
+
tool_number: The tool call number
|
|
653
681
|
|
|
654
682
|
Returns:
|
|
655
683
|
Updated tool call result with approved/denied status
|
|
656
684
|
"""
|
|
657
685
|
|
|
658
|
-
if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
|
|
659
|
-
return tool_call_result
|
|
660
|
-
|
|
661
686
|
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
|
|
662
687
|
if not self.approval_callback:
|
|
663
|
-
tool_call_result.result.status =
|
|
688
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
664
689
|
return tool_call_result
|
|
665
690
|
|
|
666
691
|
# Get approval from user
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
user_approved=True,
|
|
678
|
-
trace_span=DummySpan(),
|
|
679
|
-
tool_number=tool_number,
|
|
680
|
-
)
|
|
681
|
-
tool_call_result.result = new_response
|
|
682
|
-
else:
|
|
683
|
-
# User denied - update to error
|
|
684
|
-
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
685
|
-
tool_call_result.result.status = ToolResultStatus.ERROR
|
|
686
|
-
tool_call_result.result.error = (
|
|
687
|
-
f"User denied command execution.{feedback_text}"
|
|
688
|
-
)
|
|
689
|
-
|
|
690
|
-
return tool_call_result
|
|
691
|
-
|
|
692
|
-
@staticmethod
|
|
693
|
-
def __load_post_processing_user_prompt(
|
|
694
|
-
input_prompt, investigation, user_prompt: Optional[str] = None
|
|
695
|
-
) -> str:
|
|
696
|
-
if not user_prompt:
|
|
697
|
-
user_prompt = "builtin://generic_post_processing.jinja2"
|
|
698
|
-
return load_and_render_prompt(
|
|
699
|
-
user_prompt, {"investigation": investigation, "prompt": input_prompt}
|
|
700
|
-
)
|
|
701
|
-
|
|
702
|
-
def _post_processing_call(
|
|
703
|
-
self,
|
|
704
|
-
prompt,
|
|
705
|
-
investigation,
|
|
706
|
-
user_prompt: Optional[str] = None,
|
|
707
|
-
system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
|
|
708
|
-
) -> tuple[Optional[str], float]:
|
|
709
|
-
try:
|
|
710
|
-
user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
|
|
711
|
-
prompt, investigation, user_prompt
|
|
712
|
-
)
|
|
713
|
-
|
|
714
|
-
logging.debug(f'Post processing prompt:\n"""\n{user_prompt}\n"""')
|
|
715
|
-
messages = [
|
|
716
|
-
{
|
|
717
|
-
"role": "system",
|
|
718
|
-
"content": system_prompt,
|
|
719
|
-
},
|
|
720
|
-
{
|
|
721
|
-
"role": "user",
|
|
722
|
-
"content": format_tags_in_string(user_prompt),
|
|
723
|
-
},
|
|
724
|
-
]
|
|
725
|
-
full_response = self.llm.completion(messages=messages, temperature=0)
|
|
726
|
-
logging.debug(f"Post processing response {full_response}")
|
|
727
|
-
|
|
728
|
-
# Extract and log cost information for post-processing
|
|
729
|
-
post_processing_cost = _extract_cost_from_response(full_response)
|
|
730
|
-
if post_processing_cost > 0:
|
|
731
|
-
cost_logger.debug(
|
|
732
|
-
f"Post-processing LLM cost: ${post_processing_cost:.6f}"
|
|
692
|
+
with trace_span.start_span(
|
|
693
|
+
type="task", name=f"Ask approval for {tool_call_result.tool_name}"
|
|
694
|
+
):
|
|
695
|
+
approved, feedback = self.approval_callback(tool_call_result.result)
|
|
696
|
+
|
|
697
|
+
# Note - Tool calls are currently logged twice, once when returning APPROVAL_REQUIRED and once here
|
|
698
|
+
with trace_span.start_span(type="tool") as tool_span:
|
|
699
|
+
if approved:
|
|
700
|
+
logging.debug(
|
|
701
|
+
f"User approved command: {tool_call_result.result.invocation}"
|
|
733
702
|
)
|
|
703
|
+
new_response = self._directly_invoke_tool_call(
|
|
704
|
+
tool_name=tool_call_result.tool_name,
|
|
705
|
+
tool_params=tool_call_result.result.params or {},
|
|
706
|
+
user_approved=True,
|
|
707
|
+
tool_number=tool_number,
|
|
708
|
+
tool_call_id=tool_call_result.tool_call_id,
|
|
709
|
+
)
|
|
710
|
+
tool_call_result.result = new_response
|
|
711
|
+
else:
|
|
712
|
+
# User denied - update to error
|
|
713
|
+
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
714
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
715
|
+
tool_call_result.result.error = (
|
|
716
|
+
f"User denied command execution.{feedback_text}"
|
|
717
|
+
)
|
|
718
|
+
ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
|
|
734
719
|
|
|
735
|
-
|
|
736
|
-
except Exception:
|
|
737
|
-
logging.exception("Failed to run post processing", exc_info=True)
|
|
738
|
-
return investigation, 0.0
|
|
739
|
-
|
|
740
|
-
@sentry_sdk.trace
|
|
741
|
-
def truncate_messages_to_fit_context(
|
|
742
|
-
self, messages: list, max_context_size: int, maximum_output_token: int
|
|
743
|
-
) -> list:
|
|
744
|
-
return truncate_messages_to_fit_context(
|
|
745
|
-
messages,
|
|
746
|
-
max_context_size,
|
|
747
|
-
maximum_output_token,
|
|
748
|
-
self.llm.count_tokens_for_message,
|
|
749
|
-
)
|
|
720
|
+
return tool_call_result
|
|
750
721
|
|
|
751
722
|
def call_stream(
|
|
752
723
|
self,
|
|
@@ -755,47 +726,55 @@ class ToolCallingLLM:
|
|
|
755
726
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
756
727
|
sections: Optional[InputSectionsDataType] = None,
|
|
757
728
|
msgs: Optional[list[dict]] = None,
|
|
729
|
+
enable_tool_approval: bool = False,
|
|
730
|
+
tool_decisions: List[ToolApprovalDecision] | None = None,
|
|
758
731
|
):
|
|
759
732
|
"""
|
|
760
733
|
This function DOES NOT call llm.completion(stream=true).
|
|
761
734
|
This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
|
|
762
735
|
"""
|
|
763
|
-
|
|
736
|
+
|
|
737
|
+
# Process tool decisions if provided
|
|
738
|
+
if msgs and tool_decisions:
|
|
739
|
+
logging.info(f"Processing {len(tool_decisions)} tool decisions")
|
|
740
|
+
msgs, events = self.process_tool_decisions(msgs, tool_decisions)
|
|
741
|
+
yield from events
|
|
742
|
+
|
|
743
|
+
messages: list[dict] = []
|
|
764
744
|
if system_prompt:
|
|
765
745
|
messages.append({"role": "system", "content": system_prompt})
|
|
766
746
|
if user_prompt:
|
|
767
747
|
messages.append({"role": "user", "content": user_prompt})
|
|
768
748
|
if msgs:
|
|
769
749
|
messages.extend(msgs)
|
|
770
|
-
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
771
750
|
tool_calls: list[dict] = []
|
|
772
751
|
tools = self.tool_executor.get_all_tools_openai_format(
|
|
773
752
|
target_model=self.llm.model
|
|
774
753
|
)
|
|
775
|
-
perf_timing.measure("get_all_tools_openai_format")
|
|
776
754
|
max_steps = self.max_steps
|
|
755
|
+
metadata: Dict[Any, Any] = {}
|
|
777
756
|
i = 0
|
|
778
757
|
tool_number_offset = 0
|
|
779
758
|
|
|
780
759
|
while i < max_steps:
|
|
781
760
|
i += 1
|
|
782
|
-
perf_timing.measure(f"start iteration {i}")
|
|
783
761
|
logging.debug(f"running iteration {i}")
|
|
784
762
|
|
|
785
763
|
tools = None if i == max_steps else tools
|
|
786
764
|
tool_choice = "auto" if tools else None
|
|
787
765
|
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
766
|
+
limit_result = limit_input_context_window(
|
|
767
|
+
llm=self.llm, messages=messages, tools=tools
|
|
768
|
+
)
|
|
769
|
+
yield from limit_result.events
|
|
770
|
+
messages = limit_result.messages
|
|
771
|
+
metadata = metadata | limit_result.metadata
|
|
792
772
|
|
|
793
|
-
if (
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
perf_timing.measure("truncate_messages_to_fit_context")
|
|
773
|
+
if (
|
|
774
|
+
limit_result.conversation_history_compacted
|
|
775
|
+
and RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION
|
|
776
|
+
):
|
|
777
|
+
tool_calls = []
|
|
799
778
|
|
|
800
779
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
801
780
|
try:
|
|
@@ -812,7 +791,6 @@ class ToolCallingLLM:
|
|
|
812
791
|
# Log cost information for this iteration (no accumulation in streaming)
|
|
813
792
|
_process_cost_info(full_response, log_prefix="LLM iteration")
|
|
814
793
|
|
|
815
|
-
perf_timing.measure("llm.completion")
|
|
816
794
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
817
795
|
except BadRequestError as e:
|
|
818
796
|
if "Unrecognized request arguments supplied: tool_choice, tools" in str(
|
|
@@ -834,9 +812,10 @@ class ToolCallingLLM:
|
|
|
834
812
|
|
|
835
813
|
if incorrect_tool_call:
|
|
836
814
|
logging.warning(
|
|
837
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-
|
|
815
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
838
816
|
)
|
|
839
817
|
# disable structured output going forward and and retry
|
|
818
|
+
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
840
819
|
response_format = None
|
|
841
820
|
max_steps = max_steps + 1
|
|
842
821
|
continue
|
|
@@ -847,11 +826,25 @@ class ToolCallingLLM:
|
|
|
847
826
|
)
|
|
848
827
|
)
|
|
849
828
|
|
|
829
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
830
|
+
add_token_count_to_metadata(
|
|
831
|
+
tokens=tokens,
|
|
832
|
+
full_llm_response=full_response,
|
|
833
|
+
max_context_size=limit_result.max_context_size,
|
|
834
|
+
maximum_output_token=limit_result.maximum_output_token,
|
|
835
|
+
metadata=metadata,
|
|
836
|
+
)
|
|
837
|
+
yield build_stream_event_token_count(metadata=metadata)
|
|
838
|
+
|
|
850
839
|
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
851
840
|
if not tools_to_call:
|
|
852
841
|
yield StreamMessage(
|
|
853
842
|
event=StreamEvents.ANSWER_END,
|
|
854
|
-
data={
|
|
843
|
+
data={
|
|
844
|
+
"content": response_message.content,
|
|
845
|
+
"messages": messages,
|
|
846
|
+
"metadata": metadata,
|
|
847
|
+
},
|
|
855
848
|
)
|
|
856
849
|
return
|
|
857
850
|
|
|
@@ -860,14 +853,22 @@ class ToolCallingLLM:
|
|
|
860
853
|
if reasoning or message:
|
|
861
854
|
yield StreamMessage(
|
|
862
855
|
event=StreamEvents.AI_MESSAGE,
|
|
863
|
-
data={
|
|
856
|
+
data={
|
|
857
|
+
"content": message,
|
|
858
|
+
"reasoning": reasoning,
|
|
859
|
+
"metadata": metadata,
|
|
860
|
+
},
|
|
864
861
|
)
|
|
865
862
|
|
|
866
|
-
|
|
863
|
+
# Check if any tools require approval first
|
|
864
|
+
pending_approvals = []
|
|
865
|
+
approval_required_tools = []
|
|
866
|
+
|
|
867
867
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
868
868
|
futures = []
|
|
869
869
|
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
|
|
870
870
|
tool_number = tool_number_offset + tool_index
|
|
871
|
+
|
|
871
872
|
future = executor.submit(
|
|
872
873
|
self._invoke_llm_tool_call,
|
|
873
874
|
tool_to_call=t, # type: ignore
|
|
@@ -884,15 +885,72 @@ class ToolCallingLLM:
|
|
|
884
885
|
for future in concurrent.futures.as_completed(futures):
|
|
885
886
|
tool_call_result: ToolCallResult = future.result()
|
|
886
887
|
|
|
887
|
-
|
|
888
|
-
|
|
888
|
+
if (
|
|
889
|
+
tool_call_result.result.status
|
|
890
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
891
|
+
):
|
|
892
|
+
if enable_tool_approval:
|
|
893
|
+
pending_approvals.append(
|
|
894
|
+
PendingToolApproval(
|
|
895
|
+
tool_call_id=tool_call_result.tool_call_id,
|
|
896
|
+
tool_name=tool_call_result.tool_name,
|
|
897
|
+
description=tool_call_result.description,
|
|
898
|
+
params=tool_call_result.result.params or {},
|
|
899
|
+
)
|
|
900
|
+
)
|
|
901
|
+
approval_required_tools.append(tool_call_result)
|
|
902
|
+
|
|
903
|
+
yield StreamMessage(
|
|
904
|
+
event=StreamEvents.TOOL_RESULT,
|
|
905
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
906
|
+
)
|
|
907
|
+
else:
|
|
908
|
+
tool_call_result.result.status = (
|
|
909
|
+
StructuredToolResultStatus.ERROR
|
|
910
|
+
)
|
|
911
|
+
tool_call_result.result.error = f"Tool call rejected for security reasons: {tool_call_result.result.error}"
|
|
912
|
+
|
|
913
|
+
tool_calls.append(
|
|
914
|
+
tool_call_result.as_tool_result_response()
|
|
915
|
+
)
|
|
916
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
917
|
+
|
|
918
|
+
yield StreamMessage(
|
|
919
|
+
event=StreamEvents.TOOL_RESULT,
|
|
920
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
else:
|
|
924
|
+
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
925
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
926
|
+
|
|
927
|
+
yield StreamMessage(
|
|
928
|
+
event=StreamEvents.TOOL_RESULT,
|
|
929
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
930
|
+
)
|
|
889
931
|
|
|
890
|
-
|
|
932
|
+
# If we have approval required tools, end the stream with pending approvals
|
|
933
|
+
if pending_approvals:
|
|
934
|
+
# Add assistant message with pending tool calls
|
|
935
|
+
for result in approval_required_tools:
|
|
936
|
+
tool_call = self.find_assistant_tool_call_request(
|
|
937
|
+
tool_call_id=result.tool_call_id, messages=messages
|
|
938
|
+
)
|
|
939
|
+
tool_call["pending_approval"] = True
|
|
891
940
|
|
|
941
|
+
# End stream with approvals required
|
|
892
942
|
yield StreamMessage(
|
|
893
|
-
event=StreamEvents.
|
|
894
|
-
data=
|
|
943
|
+
event=StreamEvents.APPROVAL_REQUIRED,
|
|
944
|
+
data={
|
|
945
|
+
"content": None,
|
|
946
|
+
"messages": messages,
|
|
947
|
+
"pending_approvals": [
|
|
948
|
+
approval.model_dump() for approval in pending_approvals
|
|
949
|
+
],
|
|
950
|
+
"requires_approval": True,
|
|
951
|
+
},
|
|
895
952
|
)
|
|
953
|
+
return
|
|
896
954
|
|
|
897
955
|
# Update the tool number offset for the next iteration
|
|
898
956
|
tool_number_offset += len(tools_to_call)
|
|
@@ -901,6 +959,21 @@ class ToolCallingLLM:
|
|
|
901
959
|
f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
|
|
902
960
|
)
|
|
903
961
|
|
|
962
|
+
def find_assistant_tool_call_request(
|
|
963
|
+
self, tool_call_id: str, messages: list[dict[str, Any]]
|
|
964
|
+
) -> dict[str, Any]:
|
|
965
|
+
for message in messages:
|
|
966
|
+
if message.get("role") == "assistant":
|
|
967
|
+
for tool_call in message.get("tool_calls", []):
|
|
968
|
+
if tool_call.get("id") == tool_call_id:
|
|
969
|
+
return tool_call
|
|
970
|
+
|
|
971
|
+
# Should not happen unless there is a bug.
|
|
972
|
+
# If we are here
|
|
973
|
+
raise Exception(
|
|
974
|
+
f"Failed to find assistant request for a tool_call in conversation history. tool_call_id={tool_call_id}"
|
|
975
|
+
)
|
|
976
|
+
|
|
904
977
|
|
|
905
978
|
# TODO: consider getting rid of this entirely and moving templating into the cmds in holmes_cli.py
|
|
906
979
|
class IssueInvestigator(ToolCallingLLM):
|
|
@@ -927,14 +1000,13 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
927
1000
|
self,
|
|
928
1001
|
issue: Issue,
|
|
929
1002
|
prompt: str,
|
|
930
|
-
instructions: Optional[ResourceInstructions],
|
|
931
1003
|
console: Optional[Console] = None,
|
|
932
1004
|
global_instructions: Optional[Instructions] = None,
|
|
933
|
-
post_processing_prompt: Optional[str] = None,
|
|
934
1005
|
sections: Optional[InputSectionsDataType] = None,
|
|
935
1006
|
trace_span=DummySpan(),
|
|
1007
|
+
runbooks: Optional[RunbookCatalog] = None,
|
|
936
1008
|
) -> LLMResult:
|
|
937
|
-
|
|
1009
|
+
issue_runbooks = self.runbook_manager.get_instructions_for_issue(issue)
|
|
938
1010
|
|
|
939
1011
|
request_structured_output_from_llm = True
|
|
940
1012
|
response_format = None
|
|
@@ -962,12 +1034,9 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
962
1034
|
else:
|
|
963
1035
|
logging.info("Structured output is disabled for this request")
|
|
964
1036
|
|
|
965
|
-
if instructions is not None and instructions.instructions:
|
|
966
|
-
runbooks.extend(instructions.instructions)
|
|
967
|
-
|
|
968
1037
|
if console and runbooks:
|
|
969
1038
|
console.print(
|
|
970
|
-
f"[bold]Analyzing with {len(
|
|
1039
|
+
f"[bold]Analyzing with {len(issue_runbooks)} runbooks: {issue_runbooks}[/bold]"
|
|
971
1040
|
)
|
|
972
1041
|
elif console:
|
|
973
1042
|
console.print(
|
|
@@ -982,29 +1051,22 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
982
1051
|
"structured_output": request_structured_output_from_llm,
|
|
983
1052
|
"toolsets": self.tool_executor.toolsets,
|
|
984
1053
|
"cluster_name": self.cluster_name,
|
|
1054
|
+
"runbooks_enabled": True if runbooks else False,
|
|
985
1055
|
},
|
|
986
1056
|
)
|
|
987
1057
|
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
for document in instructions.documents:
|
|
991
|
-
docPrompts.append(
|
|
992
|
-
f"* fetch information from this URL: {document.url}\n"
|
|
993
|
-
)
|
|
994
|
-
runbooks.extend(docPrompts)
|
|
995
|
-
|
|
996
|
-
user_prompt = ""
|
|
997
|
-
if runbooks:
|
|
998
|
-
for runbook_str in runbooks:
|
|
999
|
-
user_prompt += f"* {runbook_str}\n"
|
|
1058
|
+
base_user = ""
|
|
1059
|
+
base_user = f"{base_user}\n #This is context from the issue:\n{issue.raw}"
|
|
1000
1060
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1061
|
+
runbooks_ctx = generate_runbooks_args(
|
|
1062
|
+
runbook_catalog=runbooks,
|
|
1063
|
+
global_instructions=global_instructions,
|
|
1064
|
+
issue_instructions=issue_runbooks,
|
|
1065
|
+
)
|
|
1066
|
+
user_prompt = generate_user_prompt(
|
|
1067
|
+
base_user,
|
|
1068
|
+
runbooks_ctx,
|
|
1005
1069
|
)
|
|
1006
|
-
user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}"
|
|
1007
|
-
|
|
1008
1070
|
logging.debug(
|
|
1009
1071
|
"Rendered system prompt:\n%s", textwrap.indent(system_prompt, " ")
|
|
1010
1072
|
)
|
|
@@ -1013,10 +1075,9 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
1013
1075
|
res = self.prompt_call(
|
|
1014
1076
|
system_prompt,
|
|
1015
1077
|
user_prompt,
|
|
1016
|
-
post_processing_prompt,
|
|
1017
1078
|
response_format=response_format,
|
|
1018
1079
|
sections=sections,
|
|
1019
1080
|
trace_span=trace_span,
|
|
1020
1081
|
)
|
|
1021
|
-
res.instructions =
|
|
1082
|
+
res.instructions = issue_runbooks
|
|
1022
1083
|
return res
|