holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/core/tools.py CHANGED
@@ -5,43 +5,73 @@ import re
5
5
  import shlex
6
6
  import subprocess
7
7
  import tempfile
8
+ import time
8
9
  from abc import ABC, abstractmethod
9
10
  from datetime import datetime
10
11
  from enum import Enum
11
- from typing import Any, Callable, Dict, List, Optional, OrderedDict, Tuple, Union
12
+ from typing import (
13
+ TYPE_CHECKING,
14
+ Any,
15
+ Callable,
16
+ Dict,
17
+ List,
18
+ Optional,
19
+ OrderedDict,
20
+ Tuple,
21
+ Union,
22
+ )
12
23
 
13
24
  from jinja2 import Template
14
- from pydantic import BaseModel, ConfigDict, Field, FilePath, model_validator
25
+ from pydantic import (
26
+ BaseModel,
27
+ ConfigDict,
28
+ Field,
29
+ FilePath,
30
+ PrivateAttr,
31
+ model_validator,
32
+ )
15
33
  from rich.console import Console
34
+ from rich.table import Table
16
35
 
36
+ from holmes.core.llm import LLM
17
37
  from holmes.core.openai_formatting import format_tool_to_open_ai_standard
38
+ from holmes.core.transformers import (
39
+ Transformer,
40
+ TransformerError,
41
+ registry,
42
+ )
18
43
  from holmes.plugins.prompts import load_and_render_prompt
19
- import time
20
- from rich.table import Table
44
+ from holmes.utils.config_utils import merge_transformers
45
+ from holmes.utils.memory_limit import check_oom_and_append_hint, get_ulimit_prefix
21
46
 
47
+ if TYPE_CHECKING:
48
+ from holmes.core.transformers import BaseTransformer
22
49
 
23
- class ToolResultStatus(str, Enum):
50
+ logger = logging.getLogger(__name__)
51
+
52
+
53
+ class StructuredToolResultStatus(str, Enum):
24
54
  SUCCESS = "success"
25
55
  ERROR = "error"
26
56
  NO_DATA = "no_data"
27
57
  APPROVAL_REQUIRED = "approval_required"
28
58
 
29
59
  def to_color(self) -> str:
30
- if self == ToolResultStatus.SUCCESS:
60
+ if self == StructuredToolResultStatus.SUCCESS:
31
61
  return "green"
32
- elif self == ToolResultStatus.ERROR:
62
+ elif self == StructuredToolResultStatus.ERROR:
33
63
  return "red"
34
- elif self == ToolResultStatus.APPROVAL_REQUIRED:
64
+ elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
35
65
  return "yellow"
36
66
  else:
37
67
  return "white"
38
68
 
39
69
  def to_emoji(self) -> str:
40
- if self == ToolResultStatus.SUCCESS:
70
+ if self == StructuredToolResultStatus.SUCCESS:
41
71
  return "✔"
42
- elif self == ToolResultStatus.ERROR:
72
+ elif self == StructuredToolResultStatus.ERROR:
43
73
  return "❌"
44
- elif self == ToolResultStatus.APPROVAL_REQUIRED:
74
+ elif self == StructuredToolResultStatus.APPROVAL_REQUIRED:
45
75
  return "⚠️"
46
76
  else:
47
77
  return "⚪️"
@@ -49,7 +79,7 @@ class ToolResultStatus(str, Enum):
49
79
 
50
80
  class StructuredToolResult(BaseModel):
51
81
  schema_version: str = "robusta:v1.0.0"
52
- status: ToolResultStatus
82
+ status: StructuredToolResultStatus
53
83
  error: Optional[str] = None
54
84
  return_code: Optional[int] = None
55
85
  data: Optional[Any] = None
@@ -67,9 +97,11 @@ class StructuredToolResult(BaseModel):
67
97
  else:
68
98
  try:
69
99
  if isinstance(self.data, BaseModel):
70
- return self.data.model_dump_json(indent=2)
100
+ return self.data.model_dump_json()
71
101
  else:
72
- return json.dumps(self.data, indent=2)
102
+ return json.dumps(
103
+ self.data, separators=(",", ":"), ensure_ascii=False
104
+ )
73
105
  except Exception:
74
106
  return str(self.data)
75
107
 
@@ -88,23 +120,6 @@ def sanitize_params(params):
88
120
  return {k: sanitize(str(v)) for k, v in params.items()}
89
121
 
90
122
 
91
- def format_tool_output(tool_result: Union[str, StructuredToolResult]) -> str:
92
- if isinstance(tool_result, StructuredToolResult):
93
- if tool_result.data and isinstance(tool_result.data, str):
94
- # Display logs and other string outputs in a way that is readable to humans.
95
- # To do this, we extract them from the result and print them as-is below.
96
- # The metadata is printed on a single line to
97
- data = tool_result.data
98
- tool_result.data = "The raw tool data is printed below this JSON"
99
- result_str = tool_result.model_dump_json(indent=2, exclude_none=True)
100
- result_str += f"\n{data}"
101
- return result_str
102
- else:
103
- return tool_result.model_dump_json(indent=2)
104
- else:
105
- return tool_result
106
-
107
-
108
123
  class ToolsetStatusEnum(str, Enum):
109
124
  ENABLED = "enabled"
110
125
  DISABLED = "disabled"
@@ -129,6 +144,18 @@ class ToolParameter(BaseModel):
129
144
  required: bool = True
130
145
  properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
131
146
  items: Optional["ToolParameter"] = None # For array item schemas
147
+ enum: Optional[List[str]] = None # For restricting to specific values
148
+
149
+
150
+ class ToolInvokeContext(BaseModel):
151
+ model_config = ConfigDict(arbitrary_types_allowed=True)
152
+
153
+ tool_number: Optional[int] = None
154
+ user_approved: bool = False
155
+ llm: LLM
156
+ max_token_count: int
157
+ tool_call_id: str
158
+ tool_name: str
132
159
 
133
160
 
134
161
  class Tool(ABC, BaseModel):
@@ -143,6 +170,48 @@ class Tool(ABC, BaseModel):
143
170
  default=None,
144
171
  description="The URL of the icon for the tool, if None will get toolset icon",
145
172
  )
173
+ transformers: Optional[List[Transformer]] = None
174
+
175
+ # Private attribute to store initialized transformer instances for performance
176
+ _transformer_instances: Optional[List["BaseTransformer"]] = PrivateAttr(
177
+ default=None
178
+ )
179
+
180
+ def model_post_init(self, __context) -> None:
181
+ """Initialize transformer instances once during tool creation for better performance."""
182
+ logger.debug(
183
+ f"Tool '{self.name}' model_post_init: creating transformer instances"
184
+ )
185
+
186
+ if self.transformers:
187
+ logger.debug(
188
+ f"Tool '{self.name}' has {len(self.transformers)} transformers to initialize"
189
+ )
190
+ self._transformer_instances = []
191
+ for transformer in self.transformers:
192
+ if not transformer:
193
+ continue
194
+ logger.debug(
195
+ f" Initializing transformer '{transformer.name}' with config: {transformer.config}"
196
+ )
197
+ try:
198
+ # Create transformer instance once and cache it
199
+ transformer_instance = registry.create_transformer(
200
+ transformer.name, transformer.config
201
+ )
202
+ self._transformer_instances.append(transformer_instance)
203
+ logger.debug(
204
+ f"Initialized transformer '{transformer.name}' for tool '{self.name}'"
205
+ )
206
+ except Exception as e:
207
+ logger.warning(
208
+ f"Failed to initialize transformer '{transformer.name}' for tool '{self.name}': {e}"
209
+ )
210
+ # Continue with other transformers, don't fail the entire initialization
211
+ continue
212
+ else:
213
+ logger.debug(f"Tool '{self.name}' has no transformers")
214
+ self._transformer_instances = None
146
215
 
147
216
  def get_openai_format(self, target_model: str):
148
217
  return format_tool_to_open_ai_standard(
@@ -155,32 +224,123 @@ class Tool(ABC, BaseModel):
155
224
  def invoke(
156
225
  self,
157
226
  params: Dict,
158
- tool_number: Optional[int] = None,
159
- user_approved: bool = False,
227
+ context: ToolInvokeContext,
160
228
  ) -> StructuredToolResult:
161
- tool_number_str = f"#{tool_number} " if tool_number else ""
162
- logging.info(
229
+ tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
230
+ logger.info(
163
231
  f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
164
232
  )
165
233
  start_time = time.time()
166
- result = self._invoke(params=params, user_approved=user_approved)
234
+ result = self._invoke(params=params, context=context)
167
235
  result.icon_url = self.icon_url
236
+
237
+ # Apply transformers to the result
238
+ transformed_result = self._apply_transformers(result)
168
239
  elapsed = time.time() - start_time
169
240
  output_str = (
170
- result.get_stringified_data()
171
- if hasattr(result, "get_stringified_data")
172
- else str(result)
241
+ transformed_result.get_stringified_data()
242
+ if hasattr(transformed_result, "get_stringified_data")
243
+ else str(transformed_result)
173
244
  )
174
- show_hint = f"/show {tool_number}" if tool_number else "/show"
245
+ show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
175
246
  line_count = output_str.count("\n") + 1 if output_str else 0
176
- logging.info(
247
+ logger.info(
177
248
  f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
178
249
  )
250
+ return transformed_result
251
+
252
+ def _apply_transformers(self, result: StructuredToolResult) -> StructuredToolResult:
253
+ """
254
+ Apply configured transformers to the tool result.
255
+
256
+ Args:
257
+ result: The original tool result
258
+
259
+ Returns:
260
+ The tool result with transformed data, or original result if transformation fails
261
+ """
262
+ if (
263
+ not self._transformer_instances
264
+ or result.status != StructuredToolResultStatus.SUCCESS
265
+ ):
266
+ return result
267
+
268
+ # Get the output string to transform
269
+ original_data = result.get_stringified_data()
270
+ if not original_data:
271
+ return result
272
+
273
+ transformed_data = original_data
274
+ transformers_applied = []
275
+
276
+ # Use cached transformer instances instead of creating new ones
277
+ for transformer_instance in self._transformer_instances:
278
+ try:
279
+ # Check if transformer should be applied
280
+ if not transformer_instance.should_apply(transformed_data):
281
+ logger.debug(
282
+ f"Transformer '{transformer_instance.name}' skipped for tool '{self.name}' (conditions not met)"
283
+ )
284
+ continue
285
+
286
+ # Apply transformation
287
+ pre_transform_size = len(transformed_data)
288
+ transform_start_time = time.time()
289
+ original_data = transformed_data # Keep a copy for potential reversion
290
+ transformed_data = transformer_instance.transform(transformed_data)
291
+ transform_elapsed = time.time() - transform_start_time
292
+
293
+ # Check if this is llm_summarize and revert if summary is not smaller
294
+ post_transform_size = len(transformed_data)
295
+ if (
296
+ transformer_instance.name == "llm_summarize"
297
+ and post_transform_size >= pre_transform_size
298
+ ):
299
+ # Revert to original data if summary is not smaller
300
+ transformed_data = original_data
301
+ logger.debug(
302
+ f"Transformer '{transformer_instance.name}' reverted for tool '{self.name}' "
303
+ f"(output size {post_transform_size:,} >= input size {pre_transform_size:,})"
304
+ )
305
+ continue # Don't mark as applied
306
+
307
+ transformers_applied.append(transformer_instance.name)
308
+
309
+ # Generic logging - transformers can override this with their own specific metrics
310
+ size_change = post_transform_size - pre_transform_size
311
+ logger.info(
312
+ f"Applied transformer '{transformer_instance.name}' to tool '{self.name}' output "
313
+ f"in {transform_elapsed:.2f}s (size: {pre_transform_size:,} → {post_transform_size:,} chars, "
314
+ f"change: {size_change:+,})"
315
+ )
316
+
317
+ except TransformerError as e:
318
+ logger.warning(
319
+ f"Transformer '{transformer_instance.name}' failed for tool '{self.name}': {e}"
320
+ )
321
+ # Continue with other transformers, don't fail the entire chain
322
+ continue
323
+ except Exception as e:
324
+ logger.error(
325
+ f"Unexpected error applying transformer '{transformer_instance.name}' to tool '{self.name}': {e}"
326
+ )
327
+ # Continue with other transformers
328
+ continue
329
+
330
+ # If any transformers were applied, update the result
331
+ if transformers_applied:
332
+ # Create a copy of the result with transformed data
333
+ result_dict = result.model_dump(exclude={"data"})
334
+ result_dict["data"] = transformed_data
335
+ return StructuredToolResult(**result_dict)
336
+
179
337
  return result
180
338
 
181
339
  @abstractmethod
182
340
  def _invoke(
183
- self, params: dict, user_approved: bool = False
341
+ self,
342
+ params: dict,
343
+ context: ToolInvokeContext,
184
344
  ) -> StructuredToolResult:
185
345
  """
186
346
  params: the tool params
@@ -230,15 +390,19 @@ class YAMLTool(Tool, BaseModel):
230
390
  context = {**params}
231
391
  return context
232
392
 
233
- def _get_status(self, return_code: int, raw_output: str) -> ToolResultStatus:
393
+ def _get_status(
394
+ self, return_code: int, raw_output: str
395
+ ) -> StructuredToolResultStatus:
234
396
  if return_code != 0:
235
- return ToolResultStatus.ERROR
397
+ return StructuredToolResultStatus.ERROR
236
398
  if raw_output == "":
237
- return ToolResultStatus.NO_DATA
238
- return ToolResultStatus.SUCCESS
399
+ return StructuredToolResultStatus.NO_DATA
400
+ return StructuredToolResultStatus.SUCCESS
239
401
 
240
402
  def _invoke(
241
- self, params: dict, user_approved: bool = False
403
+ self,
404
+ params: dict,
405
+ context: ToolInvokeContext,
242
406
  ) -> StructuredToolResult:
243
407
  if self.command is not None:
244
408
  raw_output, return_code, invocation = self.__invoke_command(params)
@@ -246,7 +410,7 @@ class YAMLTool(Tool, BaseModel):
246
410
  raw_output, return_code, invocation = self.__invoke_script(params) # type: ignore
247
411
 
248
412
  if self.additional_instructions and return_code == 0:
249
- logging.info(
413
+ logger.info(
250
414
  f"Applying additional instructions: {self.additional_instructions}"
251
415
  )
252
416
  output_with_instructions = self.__apply_additional_instructions(raw_output)
@@ -281,7 +445,7 @@ class YAMLTool(Tool, BaseModel):
281
445
  )
282
446
  return result.stdout.strip()
283
447
  except subprocess.CalledProcessError as e:
284
- logging.error(
448
+ logger.error(
285
449
  f"Failed to apply additional instructions: {self.additional_instructions}. "
286
450
  f"Error: {e.stderr}"
287
451
  )
@@ -316,9 +480,10 @@ class YAMLTool(Tool, BaseModel):
316
480
 
317
481
  def __execute_subprocess(self, cmd) -> Tuple[str, int]:
318
482
  try:
319
- logging.debug(f"Running `{cmd}`")
483
+ logger.debug(f"Running `{cmd}`")
484
+ protected_cmd = get_ulimit_prefix() + cmd
320
485
  result = subprocess.run(
321
- cmd,
486
+ protected_cmd,
322
487
  shell=True,
323
488
  text=True,
324
489
  check=False, # do not throw error, we just return the error code
@@ -327,9 +492,11 @@ class YAMLTool(Tool, BaseModel):
327
492
  stderr=subprocess.STDOUT,
328
493
  )
329
494
 
330
- return result.stdout.strip(), result.returncode
495
+ output = result.stdout.strip()
496
+ output = check_oom_and_append_hint(output, result.returncode)
497
+ return output, result.returncode
331
498
  except Exception as e:
332
- logging.error(
499
+ logger.error(
333
500
  f"An unexpected error occurred while running '{cmd}': {e}",
334
501
  exc_info=True,
335
502
  )
@@ -381,6 +548,7 @@ class Toolset(BaseModel):
381
548
  config: Optional[Any] = None
382
549
  is_default: bool = False
383
550
  llm_instructions: Optional[str] = None
551
+ transformers: Optional[List[Transformer]] = None
384
552
 
385
553
  # warning! private attributes are not copied, which can lead to subtle bugs.
386
554
  # e.g. l.extend([some_tool]) will reset these private attribute to None
@@ -406,13 +574,85 @@ class Toolset(BaseModel):
406
574
  @model_validator(mode="before")
407
575
  def preprocess_tools(cls, values):
408
576
  additional_instructions = values.get("additional_instructions", "")
577
+ transformers = values.get("transformers", None)
409
578
  tools_data = values.get("tools", [])
579
+
580
+ # Convert raw dict transformers to Transformer objects BEFORE merging
581
+ if transformers:
582
+ converted_transformers = []
583
+ for t in transformers:
584
+ if isinstance(t, dict):
585
+ try:
586
+ transformer_obj = Transformer(**t)
587
+ # Check if transformer is registered
588
+ from holmes.core.transformers import registry
589
+
590
+ if not registry.is_registered(transformer_obj.name):
591
+ logger.warning(
592
+ f"Invalid toolset transformer configuration: Transformer '{transformer_obj.name}' is not registered"
593
+ )
594
+ continue # Skip invalid transformer
595
+ converted_transformers.append(transformer_obj)
596
+ except Exception as e:
597
+ # Log warning and skip invalid transformer
598
+ logger.warning(
599
+ f"Invalid toolset transformer configuration: {e}"
600
+ )
601
+ continue
602
+ else:
603
+ # Already a Transformer object
604
+ converted_transformers.append(t)
605
+ transformers = converted_transformers if converted_transformers else None
606
+
410
607
  tools = []
411
608
  for tool in tools_data:
412
609
  if isinstance(tool, dict):
413
610
  tool["additional_instructions"] = additional_instructions
611
+
612
+ # Convert tool-level transformers to Transformer objects
613
+ tool_transformers = tool.get("transformers")
614
+ if tool_transformers:
615
+ converted_tool_transformers = []
616
+ for t in tool_transformers:
617
+ if isinstance(t, dict):
618
+ try:
619
+ transformer_obj = Transformer(**t)
620
+ # Check if transformer is registered
621
+ from holmes.core.transformers import registry
622
+
623
+ if not registry.is_registered(transformer_obj.name):
624
+ logger.warning(
625
+ f"Invalid tool transformer configuration: Transformer '{transformer_obj.name}' is not registered"
626
+ )
627
+ continue # Skip invalid transformer
628
+ converted_tool_transformers.append(transformer_obj)
629
+ except Exception as e:
630
+ # Log warning and skip invalid transformer
631
+ logger.warning(
632
+ f"Invalid tool transformer configuration: {e}"
633
+ )
634
+ continue
635
+ else:
636
+ # Already a Transformer object
637
+ converted_tool_transformers.append(t)
638
+ tool_transformers = (
639
+ converted_tool_transformers
640
+ if converted_tool_transformers
641
+ else None
642
+ )
643
+
644
+ # Merge toolset-level transformers with tool-level configs
645
+ tool["transformers"] = merge_transformers(
646
+ base_transformers=transformers,
647
+ override_transformers=tool_transformers,
648
+ )
414
649
  if isinstance(tool, Tool):
415
650
  tool.additional_instructions = additional_instructions
651
+ # Merge toolset-level transformers with tool-level configs
652
+ tool.transformers = merge_transformers( # type: ignore
653
+ base_transformers=transformers,
654
+ override_transformers=tool.transformers,
655
+ )
416
656
  tools.append(tool)
417
657
  values["tools"] = tools
418
658
 
@@ -434,7 +674,26 @@ class Toolset(BaseModel):
434
674
  def check_prerequisites(self):
435
675
  self.status = ToolsetStatusEnum.ENABLED
436
676
 
437
- for prereq in self.prerequisites:
677
+ # Sort prerequisites by type to fail fast on missing env vars before
678
+ # running slow commands (e.g., ArgoCD checks that timeout):
679
+ # 1. Static checks (instant)
680
+ # 2. Environment variable checks (instant, often required by commands)
681
+ # 3. Callable checks (variable speed)
682
+ # 4. Command checks (slowest - may timeout or hang)
683
+ def prereq_priority(prereq):
684
+ if isinstance(prereq, StaticPrerequisite):
685
+ return 0
686
+ elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
687
+ return 1
688
+ elif isinstance(prereq, CallablePrerequisite):
689
+ return 2
690
+ elif isinstance(prereq, ToolsetCommandPrerequisite):
691
+ return 3
692
+ return 4 # Unknown types go last
693
+
694
+ sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
695
+
696
+ for prereq in sorted_prereqs:
438
697
  if isinstance(prereq, ToolsetCommandPrerequisite):
439
698
  try:
440
699
  command = self.interpolate_command(prereq.command)
@@ -482,11 +741,11 @@ class Toolset(BaseModel):
482
741
  self.status == ToolsetStatusEnum.DISABLED
483
742
  or self.status == ToolsetStatusEnum.FAILED
484
743
  ):
485
- logging.info(f"❌ Toolset {self.name}: {self.error}")
744
+ logger.info(f"❌ Toolset {self.name}: {self.error}")
486
745
  # no point checking further prerequisites if one failed
487
746
  return
488
747
 
489
- logging.info(f"✅ Toolset {self.name}")
748
+ logger.info(f"✅ Toolset {self.name}")
490
749
 
491
750
  @abstractmethod
492
751
  def get_example_config(self) -> Dict[str, Any]:
@@ -499,6 +758,16 @@ class Toolset(BaseModel):
499
758
  context={"tool_names": tool_names, "config": self.config},
500
759
  )
501
760
 
761
+ def _load_llm_instructions_from_file(self, file_dir: str, filename: str) -> None:
762
+ """Helper method to load LLM instructions from a jinja2 template file.
763
+
764
+ Args:
765
+ file_dir: Directory where the template file is located (typically os.path.dirname(__file__))
766
+ filename: Name of the jinja2 template file (e.g., "toolset_grafana_dashboard.jinja2")
767
+ """
768
+ template_file_path = os.path.abspath(os.path.join(file_dir, filename))
769
+ self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
770
+
502
771
 
503
772
  class YAMLToolset(Toolset):
504
773
  tools: List[YAMLTool] # type: ignore
@@ -0,0 +1,21 @@
1
+ from holmes.core.llm import LLM
2
+ from holmes.core.models import format_tool_result_data
3
+ from holmes.core.tools import StructuredToolResult
4
+
5
+
6
+ def count_tool_response_tokens(
7
+ llm: LLM,
8
+ structured_tool_result: StructuredToolResult,
9
+ tool_call_id: str,
10
+ tool_name: str,
11
+ ) -> int:
12
+ message = {
13
+ "role": "tool",
14
+ "content": format_tool_result_data(
15
+ tool_result=structured_tool_result,
16
+ tool_call_id=tool_call_id,
17
+ tool_name=tool_name,
18
+ ),
19
+ }
20
+ tokens = llm.count_tokens([message])
21
+ return tokens.total_tokens
@@ -0,0 +1,40 @@
1
+ from pydantic import BaseModel
2
+
3
+ from holmes.core.llm import LLM
4
+ from holmes.core.models import ToolCallResult
5
+ from holmes.core.tools import StructuredToolResultStatus
6
+ from holmes.utils import sentry_helper
7
+
8
+
9
+ class ToolCallSizeMetadata(BaseModel):
10
+ messages_token: int
11
+ max_tokens_allowed: int
12
+
13
+
14
+ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
15
+ context_window_size = llm.get_context_window_size()
16
+
17
+ if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
18
+ return int(context_window_size * percent_of_total_context_window // 100)
19
+ else:
20
+ return context_window_size
21
+
22
+
23
+ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
24
+ message = tool_call_result.as_tool_call_message()
25
+ messages_token = llm.count_tokens(messages=[message]).total_tokens
26
+ max_tokens_allowed = llm.get_max_token_count_for_single_tool()
27
+ if (
28
+ tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
29
+ and messages_token > max_tokens_allowed
30
+ ):
31
+ relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
32
+ error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
33
+ tool_call_result.result.status = StructuredToolResultStatus.ERROR
34
+ tool_call_result.result.data = None
35
+ tool_call_result.result.error = error_message
36
+
37
+ sentry_helper.capture_toolcall_contains_too_many_tokens(
38
+ tool_call_result, messages_token, max_tokens_allowed
39
+ )
40
+ return messages_token
@@ -4,9 +4,7 @@ from typing import List, Optional
4
4
  import sentry_sdk
5
5
 
6
6
  from holmes.core.tools import (
7
- StructuredToolResult,
8
7
  Tool,
9
- ToolResultStatus,
10
8
  Toolset,
11
9
  ToolsetStatusEnum,
12
10
  )
@@ -46,17 +44,6 @@ class ToolExecutor:
46
44
  )
47
45
  self.tools_by_name[tool.name] = tool
48
46
 
49
- def invoke(self, tool_name: str, params: dict) -> StructuredToolResult:
50
- tool = self.get_tool_by_name(tool_name)
51
- return (
52
- tool.invoke(params)
53
- if tool
54
- else StructuredToolResult(
55
- status=ToolResultStatus.ERROR,
56
- error=f"Could not find tool named {tool_name}",
57
- )
58
- )
59
-
60
47
  def get_tool_by_name(self, name: str) -> Optional[Tool]:
61
48
  if name in self.tools_by_name:
62
49
  return self.tools_by_name[name]
@@ -1,4 +1,5 @@
1
1
  import logging
2
+
2
3
  from holmes.core.tools import Toolset, ToolsetStatusEnum
3
4
  from holmes.plugins.toolsets.logging_utils.logging_api import BasePodLoggingToolset
4
5