holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,798 @@
1
+ import concurrent.futures
2
+ import json
3
+ import logging
4
+ import textwrap
5
+ from typing import Dict, List, Optional, Type, Union
6
+
7
+ import requests # type: ignore
8
+ import sentry_sdk
9
+ from litellm.types.utils import Message
10
+ from openai import BadRequestError
11
+ from openai.types.chat.chat_completion_message_tool_call import (
12
+ ChatCompletionMessageToolCall,
13
+ )
14
+ from pydantic import BaseModel
15
+ from pydantic_core import from_json
16
+ from rich.console import Console
17
+
18
+ from holmes.common.env_vars import ROBUSTA_API_ENDPOINT, STREAM_CHUNKS_PER_PARSE
19
+ from holmes.core.investigation_structured_output import (
20
+ DEFAULT_SECTIONS,
21
+ REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
22
+ InputSectionsDataType,
23
+ get_output_format_for_investigation,
24
+ is_response_an_incorrect_tool_call,
25
+ parse_markdown_into_sections_from_hash_sign,
26
+ process_response_into_sections,
27
+ )
28
+ from holmes.core.issue import Issue
29
+ from holmes.core.llm import LLM
30
+ from holmes.core.performance_timing import PerformanceTiming
31
+ from holmes.core.resource_instruction import ResourceInstructions
32
+ from holmes.core.runbooks import RunbookManager
33
+ from holmes.core.safeguards import prevent_overly_repeated_tool_call
34
+ from holmes.core.tools import StructuredToolResult, ToolResultStatus
35
+ from holmes.plugins.prompts import load_and_render_prompt
36
+ from holmes.utils.global_instructions import (
37
+ Instructions,
38
+ add_global_instructions_to_user_prompt,
39
+ )
40
+ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
41
+ from holmes.core.tools_utils.tool_executor import ToolExecutor
42
+
43
+
44
+ def format_tool_result_data(tool_result: StructuredToolResult) -> str:
45
+ tool_response = tool_result.data
46
+ if isinstance(tool_result.data, str):
47
+ tool_response = tool_result.data
48
+ else:
49
+ try:
50
+ if isinstance(tool_result.data, BaseModel):
51
+ tool_response = tool_result.data.model_dump_json(indent=2)
52
+ else:
53
+ tool_response = json.dumps(tool_result.data, indent=2)
54
+ except Exception:
55
+ tool_response = str(tool_result.data)
56
+ if tool_result.status == ToolResultStatus.ERROR:
57
+ tool_response = f"{tool_result.error or 'Tool execution failed'}:\n\n{tool_result.data or ''}".strip()
58
+ return tool_response
59
+
60
+
61
+ # TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
62
+ # However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
63
+ # We should fix this in the future
64
+ # TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
65
+ # token truncation and not character truncation
66
+ def truncate_messages_to_fit_context(
67
+ messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
68
+ ) -> list:
69
+ """
70
+ Helper function to truncate tool messages to fit within context limits.
71
+
72
+ Args:
73
+ messages: List of message dictionaries with roles and content
74
+ max_context_size: Maximum context window size for the model
75
+ maximum_output_token: Maximum tokens reserved for model output
76
+ count_tokens_fn: Function to count tokens for a list of messages
77
+
78
+ Returns:
79
+ Modified list of messages with truncated tool responses
80
+
81
+ Raises:
82
+ Exception: If non-tool messages exceed available context space
83
+ """
84
+ messages_except_tools = [
85
+ message for message in messages if message["role"] != "tool"
86
+ ]
87
+ message_size_without_tools = count_tokens_fn(messages_except_tools)
88
+
89
+ tool_call_messages = [message for message in messages if message["role"] == "tool"]
90
+
91
+ if message_size_without_tools >= (max_context_size - maximum_output_token):
92
+ logging.error(
93
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
94
+ )
95
+ raise Exception(
96
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - maximum_output_token} tokens available for input."
97
+ )
98
+
99
+ if len(tool_call_messages) == 0:
100
+ return messages
101
+
102
+ available_space = (
103
+ max_context_size - message_size_without_tools - maximum_output_token
104
+ )
105
+ remaining_space = available_space
106
+ tool_call_messages.sort(key=lambda x: len(x["content"]))
107
+
108
+ # Allocate space starting with small tools and going to larger tools, while maintaining fairness
109
+ # Small tools can often get exactly what they need, while larger tools may need to be truncated
110
+ # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
111
+ for i, msg in enumerate(tool_call_messages):
112
+ remaining_tools = len(tool_call_messages) - i
113
+ max_allocation = remaining_space // remaining_tools
114
+ needed_space = len(msg["content"])
115
+ allocated_space = min(needed_space, max_allocation)
116
+
117
+ if needed_space > allocated_space:
118
+ truncation_notice = "\n\n[TRUNCATED]"
119
+ # Ensure the indicator fits in the allocated space
120
+ if allocated_space > len(truncation_notice):
121
+ msg["content"] = (
122
+ msg["content"][: allocated_space - len(truncation_notice)]
123
+ + truncation_notice
124
+ )
125
+ logging.info(
126
+ f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space-len(truncation_notice)} tokens"
127
+ )
128
+ else:
129
+ msg["content"] = truncation_notice[:allocated_space]
130
+ logging.info(
131
+ f"Truncating tool message '{msg['name']}' from {needed_space} to {allocated_space} tokens"
132
+ )
133
+ msg.pop("token_count", None) # Remove token_count if present
134
+
135
+ remaining_space -= allocated_space
136
+ return messages
137
+
138
+
139
+ class ToolCallResult(BaseModel):
140
+ tool_call_id: str
141
+ tool_name: str
142
+ description: str
143
+ result: StructuredToolResult
144
+ size: Optional[int] = None
145
+
146
+ def as_tool_call_message(self):
147
+ content = format_tool_result_data(self.result)
148
+ if self.result.params:
149
+ content = (
150
+ f"Params used for the tool call: {json.dumps(self.result.params)}. The tool call output follows on the next line.\n"
151
+ + content
152
+ )
153
+ return {
154
+ "tool_call_id": self.tool_call_id,
155
+ "role": "tool",
156
+ "name": self.tool_name,
157
+ "content": content,
158
+ }
159
+
160
+ def as_tool_result_response(self):
161
+ result_dump = self.result.model_dump()
162
+ result_dump["data"] = self.result.get_stringified_data()
163
+
164
+ return {
165
+ "tool_call_id": self.tool_call_id,
166
+ "tool_name": self.tool_name,
167
+ "description": self.description,
168
+ "role": "tool",
169
+ "result": result_dump,
170
+ }
171
+
172
+ def as_streaming_tool_result_response(self):
173
+ result_dump = self.result.model_dump()
174
+ result_dump["data"] = self.result.get_stringified_data()
175
+
176
+ return {
177
+ "tool_call_id": self.tool_call_id,
178
+ "role": "tool",
179
+ "description": self.description,
180
+ "name": self.tool_name,
181
+ "result": result_dump,
182
+ }
183
+
184
+
185
+ class LLMResult(BaseModel):
186
+ tool_calls: Optional[List[ToolCallResult]] = None
187
+ result: Optional[str] = None
188
+ unprocessed_result: Optional[str] = None
189
+ instructions: List[str] = []
190
+ # TODO: clean up these two
191
+ prompt: Optional[str] = None
192
+ messages: Optional[List[dict]] = None
193
+
194
+ def get_tool_usage_summary(self):
195
+ return "AI used info from issue and " + ",".join(
196
+ [f"`{tool_call.description}`" for tool_call in self.tool_calls]
197
+ )
198
+
199
+
200
+ class ToolCallingLLM:
201
+ llm: LLM
202
+
203
+ def __init__(self, tool_executor: ToolExecutor, max_steps: int, llm: LLM):
204
+ self.tool_executor = tool_executor
205
+ self.max_steps = max_steps
206
+ self.llm = llm
207
+
208
+ def prompt_call(
209
+ self,
210
+ system_prompt: str,
211
+ user_prompt: str,
212
+ post_process_prompt: Optional[str] = None,
213
+ response_format: Optional[Union[dict, Type[BaseModel]]] = None,
214
+ sections: Optional[InputSectionsDataType] = None,
215
+ ) -> LLMResult:
216
+ messages = [
217
+ {"role": "system", "content": system_prompt},
218
+ {"role": "user", "content": user_prompt},
219
+ ]
220
+ return self.call(
221
+ messages,
222
+ post_process_prompt,
223
+ response_format,
224
+ user_prompt=user_prompt,
225
+ sections=sections,
226
+ )
227
+
228
+ def messages_call(
229
+ self,
230
+ messages: List[Dict[str, str]],
231
+ post_process_prompt: Optional[str] = None,
232
+ response_format: Optional[Union[dict, Type[BaseModel]]] = None,
233
+ ) -> LLMResult:
234
+ return self.call(messages, post_process_prompt, response_format)
235
+
236
+ @sentry_sdk.trace
237
+ def call( # type: ignore
238
+ self,
239
+ messages: List[Dict[str, str]],
240
+ post_process_prompt: Optional[str] = None,
241
+ response_format: Optional[Union[dict, Type[BaseModel]]] = None,
242
+ user_prompt: Optional[str] = None,
243
+ sections: Optional[InputSectionsDataType] = None,
244
+ ) -> LLMResult:
245
+ perf_timing = PerformanceTiming("tool_calling_llm.call")
246
+ tool_calls = [] # type: ignore
247
+ tools = self.tool_executor.get_all_tools_openai_format()
248
+ perf_timing.measure("get_all_tools_openai_format")
249
+ max_steps = self.max_steps
250
+ i = 0
251
+
252
+ while i < max_steps:
253
+ i += 1
254
+ perf_timing.measure(f"start iteration {i}")
255
+ logging.debug(f"running iteration {i}")
256
+ # on the last step we don't allow tools - we want to force a reply, not a request to run another tool
257
+ tools = None if i == max_steps else tools
258
+ tool_choice = "auto" if tools else None
259
+
260
+ total_tokens = self.llm.count_tokens_for_message(messages)
261
+ max_context_size = self.llm.get_context_window_size()
262
+ maximum_output_token = self.llm.get_maximum_output_token()
263
+ perf_timing.measure("count tokens")
264
+
265
+ if (total_tokens + maximum_output_token) > max_context_size:
266
+ logging.warning("Token limit exceeded. Truncating tool responses.")
267
+ messages = self.truncate_messages_to_fit_context(
268
+ messages, max_context_size, maximum_output_token
269
+ )
270
+ perf_timing.measure("truncate_messages_to_fit_context")
271
+
272
+ logging.debug(f"sending messages={messages}\n\ntools={tools}")
273
+ try:
274
+ full_response = self.llm.completion(
275
+ messages=parse_messages_tags(messages),
276
+ tools=tools,
277
+ tool_choice=tool_choice,
278
+ response_format=response_format,
279
+ drop_params=True,
280
+ )
281
+ logging.debug(f"got response {full_response.to_json()}") # type: ignore
282
+
283
+ perf_timing.measure("llm.completion")
284
+ # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
285
+ except BadRequestError as e:
286
+ if "Unrecognized request arguments supplied: tool_choice, tools" in str(
287
+ e
288
+ ):
289
+ raise Exception(
290
+ "The Azure model you chose is not supported. Model version 1106 and higher required."
291
+ )
292
+ else:
293
+ raise
294
+ response = full_response.choices[0] # type: ignore
295
+
296
+ response_message = response.message # type: ignore
297
+ if response_message and response_format:
298
+ # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
299
+ dict_response = json.loads(full_response.to_json()) # type: ignore
300
+ incorrect_tool_call = is_response_an_incorrect_tool_call(
301
+ sections, dict_response.get("choices", [{}])[0]
302
+ )
303
+
304
+ if incorrect_tool_call:
305
+ logging.warning(
306
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
307
+ )
308
+ # disable structured output going forward and and retry
309
+ response_format = None
310
+ max_steps = max_steps + 1
311
+ continue
312
+
313
+ new_message = response_message.model_dump(
314
+ exclude_defaults=True, exclude_unset=True, exclude_none=True
315
+ )
316
+ messages.append(new_message)
317
+
318
+ tools_to_call = getattr(response_message, "tool_calls", None)
319
+ text_response = response_message.content
320
+ if not tools_to_call:
321
+ # For chatty models post process and summarize the result
322
+ # this only works for calls where user prompt is explicitly passed through
323
+ if post_process_prompt and user_prompt:
324
+ logging.info("Running post processing on investigation.")
325
+ raw_response = text_response
326
+ post_processed_response = self._post_processing_call(
327
+ prompt=user_prompt,
328
+ investigation=raw_response,
329
+ user_prompt=post_process_prompt,
330
+ )
331
+
332
+ perf_timing.end(f"- completed in {i} iterations -")
333
+ return LLMResult(
334
+ result=post_processed_response,
335
+ unprocessed_result=raw_response,
336
+ tool_calls=tool_calls,
337
+ prompt=json.dumps(messages, indent=2),
338
+ messages=messages,
339
+ )
340
+
341
+ perf_timing.end(f"- completed in {i} iterations -")
342
+ return LLMResult(
343
+ result=text_response,
344
+ tool_calls=tool_calls,
345
+ prompt=json.dumps(messages, indent=2),
346
+ messages=messages,
347
+ )
348
+
349
+ perf_timing.measure("pre-tool-calls")
350
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
351
+ futures = []
352
+ for t in tools_to_call:
353
+ logging.debug(f"Tool to call: {t}")
354
+ futures.append(
355
+ executor.submit(
356
+ self._invoke_tool,
357
+ tool_to_call=t,
358
+ previous_tool_calls=tool_calls,
359
+ )
360
+ )
361
+
362
+ for future in concurrent.futures.as_completed(futures):
363
+ tool_call_result: ToolCallResult = future.result()
364
+
365
+ tool_calls.append(tool_call_result.as_tool_result_response())
366
+ messages.append(tool_call_result.as_tool_call_message())
367
+
368
+ perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
369
+
370
+ def _invoke_tool(
371
+ self,
372
+ tool_to_call: ChatCompletionMessageToolCall,
373
+ previous_tool_calls: list[dict],
374
+ ) -> ToolCallResult:
375
+ tool_name = tool_to_call.function.name
376
+ tool_params = None
377
+ try:
378
+ tool_params = json.loads(tool_to_call.function.arguments)
379
+ except Exception:
380
+ logging.warning(
381
+ f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}"
382
+ )
383
+ tool_call_id = tool_to_call.id
384
+ tool = self.tool_executor.get_tool_by_name(tool_name)
385
+
386
+ if (not tool) or (tool_params is None):
387
+ logging.warning(
388
+ f"Skipping tool execution for {tool_name}: args: {tool_to_call.function.arguments}"
389
+ )
390
+ return ToolCallResult(
391
+ tool_call_id=tool_call_id,
392
+ tool_name=tool_name,
393
+ description="NA",
394
+ result=StructuredToolResult(
395
+ status=ToolResultStatus.ERROR,
396
+ error=f"Failed to find tool {tool_name}",
397
+ params=tool_params,
398
+ ),
399
+ )
400
+
401
+ tool_response = None
402
+ try:
403
+ tool_response = prevent_overly_repeated_tool_call(
404
+ tool_name=tool.name,
405
+ tool_params=tool_params,
406
+ tool_calls=previous_tool_calls,
407
+ )
408
+ if not tool_response:
409
+ tool_response = tool.invoke(tool_params)
410
+
411
+ if not isinstance(tool_response, StructuredToolResult):
412
+ # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
413
+ logging.error(
414
+ f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
415
+ )
416
+ tool_response = StructuredToolResult(
417
+ status=ToolResultStatus.SUCCESS,
418
+ data=tool_response,
419
+ params=tool_params,
420
+ )
421
+
422
+ except Exception as e:
423
+ logging.error(
424
+ f"Tool call to {tool_name} failed with an Exception", exc_info=True
425
+ )
426
+ tool_response = StructuredToolResult(
427
+ status=ToolResultStatus.ERROR,
428
+ error=f"Tool call failed: {e}",
429
+ params=tool_params,
430
+ )
431
+ return ToolCallResult(
432
+ tool_call_id=tool_call_id,
433
+ tool_name=tool_name,
434
+ description=tool.get_parameterized_one_liner(tool_params),
435
+ result=tool_response,
436
+ )
437
+
438
+ @staticmethod
439
+ def __load_post_processing_user_prompt(
440
+ input_prompt, investigation, user_prompt: Optional[str] = None
441
+ ) -> str:
442
+ if not user_prompt:
443
+ user_prompt = "builtin://generic_post_processing.jinja2"
444
+ return load_and_render_prompt(
445
+ user_prompt, {"investigation": investigation, "prompt": input_prompt}
446
+ )
447
+
448
+ def _post_processing_call(
449
+ self,
450
+ prompt,
451
+ investigation,
452
+ user_prompt: Optional[str] = None,
453
+ system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
454
+ ) -> Optional[str]:
455
+ try:
456
+ user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
457
+ prompt, investigation, user_prompt
458
+ )
459
+
460
+ logging.debug(f'Post processing prompt:\n"""\n{user_prompt}\n"""')
461
+ messages = [
462
+ {
463
+ "role": "system",
464
+ "content": system_prompt,
465
+ },
466
+ {
467
+ "role": "user",
468
+ "content": format_tags_in_string(user_prompt),
469
+ },
470
+ ]
471
+ full_response = self.llm.completion(messages=messages, temperature=0)
472
+ logging.debug(f"Post processing response {full_response}")
473
+ return full_response.choices[0].message.content # type: ignore
474
+ except Exception:
475
+ logging.exception("Failed to run post processing", exc_info=True)
476
+ return investigation
477
+
478
+ @sentry_sdk.trace
479
+ def truncate_messages_to_fit_context(
480
+ self, messages: list, max_context_size: int, maximum_output_token: int
481
+ ) -> list:
482
+ return truncate_messages_to_fit_context(
483
+ messages,
484
+ max_context_size,
485
+ maximum_output_token,
486
+ self.llm.count_tokens_for_message,
487
+ )
488
+
489
+ def call_stream(
490
+ self,
491
+ system_prompt: str,
492
+ user_prompt: Optional[str] = None,
493
+ stream: bool = False,
494
+ response_format: Optional[Union[dict, Type[BaseModel]]] = None,
495
+ sections: Optional[InputSectionsDataType] = None,
496
+ runbooks: Optional[List[str]] = None,
497
+ ):
498
+ def stream_analysis(it, peek_chunk):
499
+ buffer = peek_chunk.get("data", "")
500
+ yield create_sse_message(peek_chunk.get("event"), peek_chunk.get("data"))
501
+ chunk_counter = 0
502
+
503
+ for chunk in it:
504
+ buffer += chunk
505
+ chunk_counter += 1
506
+ if chunk_counter == STREAM_CHUNKS_PER_PARSE:
507
+ chunk_counter = 0
508
+ yield create_sse_message(
509
+ "ai_answer",
510
+ {
511
+ "sections": parse_markdown_into_sections_from_hash_sign(
512
+ buffer
513
+ )
514
+ or {},
515
+ "analysis": buffer,
516
+ "instructions": runbooks or [],
517
+ },
518
+ )
519
+
520
+ yield create_sse_message(
521
+ "ai_answer_end",
522
+ {
523
+ "sections": parse_markdown_into_sections_from_hash_sign(buffer)
524
+ or {},
525
+ "analysis": buffer,
526
+ "instructions": runbooks or [],
527
+ },
528
+ )
529
+
530
+ messages = [
531
+ {"role": "system", "content": system_prompt},
532
+ {"role": "user", "content": user_prompt},
533
+ ]
534
+ perf_timing = PerformanceTiming("tool_calling_llm.call")
535
+ tools = self.tool_executor.get_all_tools_openai_format()
536
+ perf_timing.measure("get_all_tools_openai_format")
537
+ i = 0
538
+ tool_calls: list[dict] = []
539
+ while i < self.max_steps:
540
+ i += 1
541
+ perf_timing.measure(f"start iteration {i}")
542
+ logging.debug(f"running iteration {i}")
543
+
544
+ tools = [] if i == self.max_steps - 1 else tools
545
+ tool_choice = None if tools == [] else "auto"
546
+
547
+ total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
548
+ max_context_size = self.llm.get_context_window_size()
549
+ maximum_output_token = self.llm.get_maximum_output_token()
550
+ perf_timing.measure("count tokens")
551
+
552
+ if (total_tokens + maximum_output_token) > max_context_size:
553
+ logging.warning("Token limit exceeded. Truncating tool responses.")
554
+ messages = self.truncate_messages_to_fit_context(
555
+ messages, max_context_size, maximum_output_token
556
+ )
557
+ perf_timing.measure("truncate_messages_to_fit_context")
558
+
559
+ logging.debug(f"sending messages={messages}\n\ntools={tools}")
560
+ try:
561
+ if stream:
562
+ response = requests.post(
563
+ f"{ROBUSTA_API_ENDPOINT}/chat/completions",
564
+ json={
565
+ "messages": parse_messages_tags(messages), # type: ignore
566
+ "tools": tools,
567
+ "tool_choice": tool_choice,
568
+ "response_format": response_format,
569
+ "stream": True,
570
+ "drop_param": True,
571
+ },
572
+ headers={"Authorization": f"Bearer {self.llm.api_key}"}, # type: ignore
573
+ stream=True,
574
+ )
575
+ response.raise_for_status()
576
+ it = response.iter_content(chunk_size=None, decode_unicode=True)
577
+ peek_chunk = from_json(next(it))
578
+ tools = peek_chunk.get("tool_calls")
579
+
580
+ if not tools:
581
+ yield from stream_analysis(it, peek_chunk)
582
+ perf_timing.measure("llm.completion")
583
+ return
584
+
585
+ response_message = Message(**peek_chunk)
586
+ tools_to_call = response_message.tool_calls
587
+ else:
588
+ full_response = self.llm.completion(
589
+ messages=parse_messages_tags(messages), # type: ignore
590
+ tools=tools,
591
+ tool_choice=tool_choice,
592
+ response_format=response_format,
593
+ stream=False,
594
+ drop_params=True,
595
+ )
596
+ perf_timing.measure("llm.completion")
597
+
598
+ response_message = full_response.choices[0].message # type: ignore
599
+ if response_message and response_format:
600
+ # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
601
+ dict_response = json.loads(full_response.to_json()) # type: ignore
602
+ incorrect_tool_call = is_response_an_incorrect_tool_call(
603
+ sections, dict_response.get("choices", [{}])[0]
604
+ )
605
+
606
+ if incorrect_tool_call:
607
+ logging.warning(
608
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
609
+ )
610
+ # disable structured output going forward and and retry
611
+ response_format = None
612
+ i -= 1
613
+ continue
614
+
615
+ tools_to_call = getattr(response_message, "tool_calls", None)
616
+ if not tools_to_call:
617
+ (text_response, sections) = process_response_into_sections( # type: ignore
618
+ response_message.content
619
+ )
620
+
621
+ yield create_sse_message(
622
+ "ai_answer_end",
623
+ {
624
+ "sections": sections or {},
625
+ "analysis": text_response,
626
+ "instructions": runbooks or [],
627
+ },
628
+ )
629
+ return
630
+ # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
631
+ except BadRequestError as e:
632
+ logging.exception("Bad completion request")
633
+ if "Unrecognized request arguments supplied: tool_choice, tools" in str(
634
+ e
635
+ ):
636
+ raise Exception(
637
+ "The Azure model you chose is not supported. Model version 1106 and higher required."
638
+ )
639
+ raise e
640
+ except Exception:
641
+ logging.exception("Completion request exception")
642
+ raise
643
+
644
+ messages.append(
645
+ response_message.model_dump(
646
+ exclude_defaults=True, exclude_unset=True, exclude_none=True
647
+ )
648
+ )
649
+
650
+ perf_timing.measure("pre-tool-calls")
651
+ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
652
+ futures = []
653
+ for t in tools_to_call: # type: ignore
654
+ futures.append(
655
+ executor.submit(
656
+ self._invoke_tool,
657
+ tool_to_call=t, # type: ignore
658
+ previous_tool_calls=tool_calls,
659
+ )
660
+ )
661
+ yield create_sse_message(
662
+ "start_tool_calling", {"tool_name": t.function.name, "id": t.id}
663
+ )
664
+
665
+ for future in concurrent.futures.as_completed(futures):
666
+ tool_call_result: ToolCallResult = future.result()
667
+
668
+ tool_calls.append(tool_call_result.as_tool_result_response())
669
+ messages.append(tool_call_result.as_tool_call_message())
670
+
671
+ perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
672
+
673
+ streaming_result_dict = (
674
+ tool_call_result.as_streaming_tool_result_response()
675
+ )
676
+
677
+ yield create_sse_message(
678
+ "tool_calling_result", streaming_result_dict
679
+ )
680
+
681
+
682
+ # TODO: consider getting rid of this entirely and moving templating into the cmds in holmes_cli.py
683
+ class IssueInvestigator(ToolCallingLLM):
684
+ """
685
+ Thin wrapper around ToolCallingLLM which:
686
+ 1) Provides a default prompt for RCA
687
+ 2) Accepts Issue objects
688
+ 3) Looks up and attaches runbooks
689
+ """
690
+
691
+ def __init__(
692
+ self,
693
+ tool_executor: ToolExecutor,
694
+ runbook_manager: RunbookManager,
695
+ max_steps: int,
696
+ llm: LLM,
697
+ ):
698
+ super().__init__(tool_executor, max_steps, llm)
699
+ self.runbook_manager = runbook_manager
700
+
701
+ def investigate(
702
+ self,
703
+ issue: Issue,
704
+ prompt: str,
705
+ instructions: Optional[ResourceInstructions],
706
+ console: Optional[Console] = None,
707
+ global_instructions: Optional[Instructions] = None,
708
+ post_processing_prompt: Optional[str] = None,
709
+ sections: Optional[InputSectionsDataType] = None,
710
+ ) -> LLMResult:
711
+ runbooks = self.runbook_manager.get_instructions_for_issue(issue)
712
+
713
+ request_structured_output_from_llm = True
714
+ response_format = None
715
+
716
+ # This section is about setting vars to request the LLM to return structured output.
717
+ # It does not mean that Holmes will not return structured sections for investigation as it is
718
+ # capable of splitting the markdown into sections
719
+ if not sections or len(sections) == 0:
720
+ # If no sections are passed, we will not ask the LLM for structured output
721
+ sections = DEFAULT_SECTIONS
722
+ request_structured_output_from_llm = False
723
+ logging.info(
724
+ "No section received from the client. Default sections will be used."
725
+ )
726
+ elif self.llm.model and self.llm.model.startswith("bedrock"):
727
+ # Structured output does not work well with Bedrock Anthropic Sonnet 3.5 through litellm
728
+ request_structured_output_from_llm = False
729
+
730
+ if not REQUEST_STRUCTURED_OUTPUT_FROM_LLM:
731
+ request_structured_output_from_llm = False
732
+
733
+ if request_structured_output_from_llm:
734
+ response_format = get_output_format_for_investigation(sections)
735
+ logging.info("Structured output is enabled for this request")
736
+ else:
737
+ logging.info("Structured output is disabled for this request")
738
+
739
+ if instructions is not None and instructions.instructions:
740
+ runbooks.extend(instructions.instructions)
741
+
742
+ if console and runbooks:
743
+ console.print(
744
+ f"[bold]Analyzing with {len(runbooks)} runbooks: {runbooks}[/bold]"
745
+ )
746
+ elif console:
747
+ console.print(
748
+ "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
749
+ )
750
+
751
+ system_prompt = load_and_render_prompt(
752
+ prompt,
753
+ {
754
+ "issue": issue,
755
+ "sections": sections,
756
+ "structured_output": request_structured_output_from_llm,
757
+ "toolsets": self.tool_executor.toolsets,
758
+ },
759
+ )
760
+
761
+ if instructions is not None and len(instructions.documents) > 0:
762
+ docPrompts = []
763
+ for document in instructions.documents:
764
+ docPrompts.append(
765
+ f"* fetch information from this URL: {document.url}\n"
766
+ )
767
+ runbooks.extend(docPrompts)
768
+
769
+ user_prompt = ""
770
+ if runbooks:
771
+ for runbook_str in runbooks:
772
+ user_prompt += f"* {runbook_str}\n"
773
+
774
+ user_prompt = f'My instructions to check \n"""{user_prompt}"""'
775
+
776
+ user_prompt = add_global_instructions_to_user_prompt(
777
+ user_prompt, global_instructions
778
+ )
779
+ user_prompt = f"{user_prompt}\n This is context from the issue {issue.raw}"
780
+
781
+ logging.debug(
782
+ "Rendered system prompt:\n%s", textwrap.indent(system_prompt, " ")
783
+ )
784
+ logging.debug("Rendered user prompt:\n%s", textwrap.indent(user_prompt, " "))
785
+
786
+ res = self.prompt_call(
787
+ system_prompt,
788
+ user_prompt,
789
+ post_processing_prompt,
790
+ response_format=response_format,
791
+ sections=sections,
792
+ )
793
+ res.instructions = runbooks
794
+ return res
795
+
796
+
797
+ def create_sse_message(event_type: str, data: dict = {}):
798
+ return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"