holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +4 -3
  3. holmes/common/env_vars.py +18 -2
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +11 -6
  6. holmes/core/conversations.py +30 -13
  7. holmes/core/investigation.py +21 -25
  8. holmes/core/investigation_structured_output.py +3 -3
  9. holmes/core/issue.py +1 -1
  10. holmes/core/llm.py +50 -31
  11. holmes/core/models.py +19 -17
  12. holmes/core/openai_formatting.py +1 -1
  13. holmes/core/prompt.py +47 -2
  14. holmes/core/runbooks.py +1 -0
  15. holmes/core/safeguards.py +4 -2
  16. holmes/core/supabase_dal.py +4 -2
  17. holmes/core/tool_calling_llm.py +102 -141
  18. holmes/core/tools.py +19 -28
  19. holmes/core/tools_utils/token_counting.py +9 -2
  20. holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
  21. holmes/core/tools_utils/tool_executor.py +0 -18
  22. holmes/core/tools_utils/toolset_utils.py +1 -0
  23. holmes/core/toolset_manager.py +37 -2
  24. holmes/core/tracing.py +13 -2
  25. holmes/core/transformers/__init__.py +1 -1
  26. holmes/core/transformers/base.py +1 -0
  27. holmes/core/transformers/llm_summarize.py +3 -2
  28. holmes/core/transformers/registry.py +2 -1
  29. holmes/core/transformers/transformer.py +1 -0
  30. holmes/core/truncation/compaction.py +37 -2
  31. holmes/core/truncation/input_context_window_limiter.py +3 -2
  32. holmes/interactive.py +52 -8
  33. holmes/main.py +17 -37
  34. holmes/plugins/interfaces.py +2 -1
  35. holmes/plugins/prompts/__init__.py +2 -1
  36. holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
  37. holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
  38. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  39. holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
  40. holmes/plugins/prompts/generic_ask.jinja2 +0 -2
  41. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
  42. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
  43. holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
  44. holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
  45. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
  46. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
  47. holmes/plugins/runbooks/__init__.py +32 -3
  48. holmes/plugins/sources/github/__init__.py +4 -2
  49. holmes/plugins/sources/prometheus/models.py +1 -0
  50. holmes/plugins/toolsets/__init__.py +30 -26
  51. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
  52. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  53. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  54. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  55. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  56. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  57. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
  58. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
  59. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
  60. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
  61. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  62. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
  63. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
  64. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
  65. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
  66. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
  67. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
  68. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  69. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  70. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  71. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  72. holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
  73. holmes/plugins/toolsets/bash/common/bash.py +19 -9
  74. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  75. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  76. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  77. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  78. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  79. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  80. holmes/plugins/toolsets/connectivity_check.py +124 -0
  81. holmes/plugins/toolsets/coralogix/api.py +132 -119
  82. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  84. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  85. holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
  86. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
  87. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  88. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  89. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  90. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  91. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
  92. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
  93. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
  94. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
  95. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  96. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  97. holmes/plugins/toolsets/git.py +7 -8
  98. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  99. holmes/plugins/toolsets/grafana/common.py +2 -30
  100. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
  101. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
  102. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
  103. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  104. holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
  105. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
  106. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
  107. holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
  108. holmes/plugins/toolsets/internet/internet.py +10 -10
  109. holmes/plugins/toolsets/internet/notion.py +5 -6
  110. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  111. holmes/plugins/toolsets/investigator/model.py +3 -1
  112. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  113. holmes/plugins/toolsets/kafka.py +12 -7
  114. holmes/plugins/toolsets/kubernetes.yaml +260 -30
  115. holmes/plugins/toolsets/kubernetes_logs.py +3 -3
  116. holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
  117. holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
  118. holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
  119. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
  120. holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
  121. holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
  122. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
  123. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  124. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
  125. holmes/plugins/toolsets/robusta/robusta.py +5 -5
  126. holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
  127. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
  128. holmes/plugins/toolsets/utils.py +1 -1
  129. holmes/utils/config_utils.py +1 -1
  130. holmes/utils/connection_utils.py +31 -0
  131. holmes/utils/console/result.py +10 -0
  132. holmes/utils/file_utils.py +2 -1
  133. holmes/utils/global_instructions.py +10 -26
  134. holmes/utils/holmes_status.py +4 -3
  135. holmes/utils/log.py +15 -0
  136. holmes/utils/markdown_utils.py +2 -3
  137. holmes/utils/memory_limit.py +58 -0
  138. holmes/utils/sentry_helper.py +23 -0
  139. holmes/utils/stream.py +12 -5
  140. holmes/utils/tags.py +4 -3
  141. holmes/version.py +3 -1
  142. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
  143. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  144. holmes/plugins/toolsets/aws.yaml +0 -80
  145. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
  146. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  147. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
  148. holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
  149. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  150. holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
  151. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  152. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
  153. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  154. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  155. holmes/utils/keygen_utils.py +0 -6
  156. holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
  157. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
  158. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
  159. /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
  160. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
  161. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  162. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/core/runbooks.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from typing import List
2
+
2
3
  from holmes.core.issue import Issue
3
4
  from holmes.plugins.runbooks import Runbook
4
5
 
holmes/core/safeguards.py CHANGED
@@ -4,9 +4,11 @@ from typing import Optional
4
4
  from pydantic import ValidationError
5
5
 
6
6
  from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
7
- from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
8
7
  from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
9
- from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
8
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
9
+ POD_LOGGING_TOOL_NAME,
10
+ FetchPodLogsParams,
11
+ )
10
12
 
11
13
 
12
14
  def _is_redundant_fetch_pod_logs(
@@ -10,10 +10,12 @@ from enum import Enum
10
10
  from typing import Dict, List, Optional, Tuple
11
11
  from uuid import uuid4
12
12
 
13
- from postgrest.base_request_builder import QueryArgs
13
+ import sentry_sdk
14
14
  import yaml # type: ignore
15
15
  from cachetools import TTLCache # type: ignore
16
+ from postgrest._sync import request_builder as supabase_request_builder
16
17
  from postgrest._sync.request_builder import SyncQueryRequestBuilder
18
+ from postgrest.base_request_builder import QueryArgs
17
19
  from postgrest.exceptions import APIError as PGAPIError
18
20
  from postgrest.types import ReturnMethod
19
21
  from pydantic import BaseModel
@@ -40,7 +42,6 @@ from holmes.utils.definitions import RobustaConfig
40
42
  from holmes.utils.env import get_env_replacement
41
43
  from holmes.utils.global_instructions import Instructions
42
44
  from holmes.utils.krr_utils import calculate_krr_savings
43
- from postgrest._sync import request_builder as supabase_request_builder
44
45
 
45
46
  SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
46
47
 
@@ -112,6 +113,7 @@ class SupabaseDal:
112
113
  f"Initializing Robusta platform connection for account {self.account_id}"
113
114
  )
114
115
  options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS)
116
+ sentry_sdk.set_tag("db_url", self.url)
115
117
  self.client = create_client(self.url, self.api_key, options) # type: ignore
116
118
  self.user_id = self.sign_in()
117
119
  ttl = int(os.environ.get("SAAS_SESSION_TOKEN_TTL_SEC", "82800")) # 23 hours
@@ -2,13 +2,7 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
- from typing import Dict, List, Optional, Type, Union, Callable, Any
6
-
7
- from holmes.core.models import (
8
- ToolApprovalDecision,
9
- ToolCallResult,
10
- PendingToolApproval,
11
- )
5
+ from typing import Any, Callable, Dict, List, Optional, Type, Union
12
6
 
13
7
  import sentry_sdk
14
8
  from openai import BadRequestError
@@ -19,11 +13,10 @@ from pydantic import BaseModel, Field
19
13
  from rich.console import Console
20
14
 
21
15
  from holmes.common.env_vars import (
16
+ LOG_LLM_USAGE_RESPONSE,
22
17
  RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION,
23
18
  TEMPERATURE,
24
- LOG_LLM_USAGE_RESPONSE,
25
19
  )
26
-
27
20
  from holmes.core.investigation_structured_output import (
28
21
  DEFAULT_SECTIONS,
29
22
  REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
@@ -33,7 +26,12 @@ from holmes.core.investigation_structured_output import (
33
26
  )
34
27
  from holmes.core.issue import Issue
35
28
  from holmes.core.llm import LLM
36
- from holmes.core.resource_instruction import ResourceInstructions
29
+ from holmes.core.models import (
30
+ PendingToolApproval,
31
+ ToolApprovalDecision,
32
+ ToolCallResult,
33
+ )
34
+ from holmes.core.prompt import generate_user_prompt
37
35
  from holmes.core.runbooks import RunbookManager
38
36
  from holmes.core.safeguards import prevent_overly_repeated_tool_call
39
37
  from holmes.core.tools import (
@@ -44,26 +42,26 @@ from holmes.core.tools import (
44
42
  from holmes.core.tools_utils.tool_context_window_limiter import (
45
43
  prevent_overly_big_tool_response,
46
44
  )
45
+ from holmes.core.tools_utils.tool_executor import ToolExecutor
46
+ from holmes.core.tracing import DummySpan
47
47
  from holmes.core.truncation.input_context_window_limiter import (
48
48
  limit_input_context_window,
49
49
  )
50
50
  from holmes.plugins.prompts import load_and_render_prompt
51
51
  from holmes.plugins.runbooks import RunbookCatalog
52
52
  from holmes.utils import sentry_helper
53
+ from holmes.utils.colors import AI_COLOR
53
54
  from holmes.utils.global_instructions import (
54
55
  Instructions,
55
- add_runbooks_to_user_prompt,
56
+ generate_runbooks_args,
56
57
  )
57
- from holmes.utils.tags import format_tags_in_string, parse_messages_tags
58
- from holmes.core.tools_utils.tool_executor import ToolExecutor
59
- from holmes.core.tracing import DummySpan
60
- from holmes.utils.colors import AI_COLOR
61
58
  from holmes.utils.stream import (
62
59
  StreamEvents,
63
60
  StreamMessage,
64
61
  add_token_count_to_metadata,
65
62
  build_stream_event_token_count,
66
63
  )
64
+ from holmes.utils.tags import parse_messages_tags
67
65
 
68
66
  # Create a named logger for cost tracking
69
67
  cost_logger = logging.getLogger("holmes.costs")
@@ -142,6 +140,7 @@ def _process_cost_info(
142
140
 
143
141
  class LLMResult(LLMCosts):
144
142
  tool_calls: Optional[List[ToolCallResult]] = None
143
+ num_llm_calls: Optional[int] = None # Number of LLM API calls (turns)
145
144
  result: Optional[str] = None
146
145
  unprocessed_result: Optional[str] = None
147
146
  instructions: List[str] = Field(default_factory=list)
@@ -269,7 +268,6 @@ class ToolCallingLLM:
269
268
  self,
270
269
  system_prompt: str,
271
270
  user_prompt: str,
272
- post_process_prompt: Optional[str] = None,
273
271
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
274
272
  sections: Optional[InputSectionsDataType] = None,
275
273
  trace_span=DummySpan(),
@@ -280,8 +278,7 @@ class ToolCallingLLM:
280
278
  ]
281
279
  return self.call(
282
280
  messages,
283
- post_process_prompt,
284
- response_format,
281
+ response_format=response_format,
285
282
  user_prompt=user_prompt,
286
283
  sections=sections,
287
284
  trace_span=trace_span,
@@ -290,19 +287,17 @@ class ToolCallingLLM:
290
287
  def messages_call(
291
288
  self,
292
289
  messages: List[Dict[str, str]],
293
- post_process_prompt: Optional[str] = None,
294
290
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
295
291
  trace_span=DummySpan(),
296
292
  ) -> LLMResult:
297
293
  return self.call(
298
- messages, post_process_prompt, response_format, trace_span=trace_span
294
+ messages, response_format=response_format, trace_span=trace_span
299
295
  )
300
296
 
301
297
  @sentry_sdk.trace
302
298
  def call( # type: ignore
303
299
  self,
304
300
  messages: List[Dict[str, str]],
305
- post_process_prompt: Optional[str] = None,
306
301
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
307
302
  user_prompt: Optional[str] = None,
308
303
  sections: Optional[InputSectionsDataType] = None,
@@ -403,43 +398,20 @@ class ToolCallingLLM:
403
398
  )
404
399
 
405
400
  if not tools_to_call:
406
- # For chatty models post process and summarize the result
407
- # this only works for calls where user prompt is explicitly passed through
408
- if post_process_prompt and user_prompt:
409
- logging.info("Running post processing on investigation.")
410
- raw_response = text_response
411
- post_processed_response, post_processing_cost = (
412
- self._post_processing_call(
413
- prompt=user_prompt,
414
- investigation=raw_response,
415
- user_prompt=post_process_prompt,
416
- )
417
- )
418
- costs.total_cost += post_processing_cost
419
-
420
- tokens = self.llm.count_tokens(messages=messages, tools=tools)
421
-
422
- add_token_count_to_metadata(
423
- tokens=tokens,
424
- full_llm_response=full_response,
425
- max_context_size=limit_result.max_context_size,
426
- maximum_output_token=limit_result.maximum_output_token,
427
- metadata=metadata,
428
- )
401
+ tokens = self.llm.count_tokens(messages=messages, tools=tools)
429
402
 
430
- return LLMResult(
431
- result=post_processed_response,
432
- unprocessed_result=raw_response,
433
- tool_calls=all_tool_calls,
434
- prompt=json.dumps(messages, indent=2),
435
- messages=messages,
436
- **costs.model_dump(), # Include all cost fields
437
- metadata=metadata,
438
- )
403
+ add_token_count_to_metadata(
404
+ tokens=tokens,
405
+ full_llm_response=full_response,
406
+ max_context_size=limit_result.max_context_size,
407
+ maximum_output_token=limit_result.maximum_output_token,
408
+ metadata=metadata,
409
+ )
439
410
 
440
411
  return LLMResult(
441
412
  result=text_response,
442
413
  tool_calls=all_tool_calls,
414
+ num_llm_calls=i,
443
415
  prompt=json.dumps(messages, indent=2),
444
416
  messages=messages,
445
417
  **costs.model_dump(), # Include all cost fields
@@ -484,14 +456,11 @@ class ToolCallingLLM:
484
456
  tool_call_result.result.status
485
457
  == StructuredToolResultStatus.APPROVAL_REQUIRED
486
458
  ):
487
- with trace_span.start_span(type="tool") as tool_span:
488
- tool_call_result = self._handle_tool_call_approval(
489
- tool_call_result=tool_call_result,
490
- tool_number=tool_number,
491
- )
492
- ToolCallingLLM._log_tool_call_result(
493
- tool_span, tool_call_result
494
- )
459
+ tool_call_result = self._handle_tool_call_approval(
460
+ tool_call_result=tool_call_result,
461
+ tool_number=tool_number,
462
+ trace_span=trace_span,
463
+ )
495
464
 
496
465
  tool_result_response_dict = (
497
466
  tool_call_result.as_tool_result_response()
@@ -515,6 +484,7 @@ class ToolCallingLLM:
515
484
  tool_name: str,
516
485
  tool_params: dict,
517
486
  user_approved: bool,
487
+ tool_call_id: str,
518
488
  tool_number: Optional[int] = None,
519
489
  ) -> StructuredToolResult:
520
490
  tool = self.tool_executor.get_tool_by_name(tool_name)
@@ -534,6 +504,8 @@ class ToolCallingLLM:
534
504
  user_approved=user_approved,
535
505
  llm=self.llm,
536
506
  max_token_count=self.llm.get_max_token_count_for_single_tool(),
507
+ tool_name=tool_name,
508
+ tool_call_id=tool_call_id,
537
509
  )
538
510
  tool_response = tool.invoke(tool_params, context=invoke_context)
539
511
  except Exception as e:
@@ -578,6 +550,7 @@ class ToolCallingLLM:
578
550
  tool_params=tool_params,
579
551
  user_approved=user_approved,
580
552
  tool_number=tool_number,
553
+ tool_call_id=tool_call_id,
581
554
  )
582
555
 
583
556
  if not isinstance(tool_response, StructuredToolResult):
@@ -603,15 +576,39 @@ class ToolCallingLLM:
603
576
  )
604
577
 
605
578
  @staticmethod
606
- def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
579
+ def _log_tool_call_result(
580
+ tool_span,
581
+ tool_call_result: ToolCallResult,
582
+ approval_possible=True,
583
+ original_token_count=None,
584
+ ):
607
585
  tool_span.set_attributes(name=tool_call_result.tool_name)
586
+ status = tool_call_result.result.status
587
+
588
+ if (
589
+ status == StructuredToolResultStatus.APPROVAL_REQUIRED
590
+ and not approval_possible
591
+ ):
592
+ status = StructuredToolResultStatus.ERROR
593
+
594
+ if status == StructuredToolResultStatus.ERROR:
595
+ error = (
596
+ tool_call_result.result.error
597
+ if tool_call_result.result.error
598
+ else "Unspecified error"
599
+ )
600
+ else:
601
+ error = None
608
602
  tool_span.log(
609
603
  input=tool_call_result.result.params,
610
604
  output=tool_call_result.result.data,
611
- error=tool_call_result.result.error,
605
+ error=error,
612
606
  metadata={
613
- "status": tool_call_result.result.status,
607
+ "status": status,
614
608
  "description": tool_call_result.description,
609
+ "return_code": tool_call_result.result.return_code,
610
+ "error": tool_call_result.result.error,
611
+ "original_token_count": original_token_count,
615
612
  },
616
613
  )
617
614
 
@@ -657,17 +654,23 @@ class ToolCallingLLM:
657
654
  user_approved=user_approved,
658
655
  )
659
656
 
660
- prevent_overly_big_tool_response(
657
+ original_token_count = prevent_overly_big_tool_response(
661
658
  tool_call_result=tool_call_result, llm=self.llm
662
659
  )
663
660
 
664
- ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
661
+ ToolCallingLLM._log_tool_call_result(
662
+ tool_span,
663
+ tool_call_result,
664
+ self.approval_callback is not None,
665
+ original_token_count,
666
+ )
665
667
  return tool_call_result
666
668
 
667
669
  def _handle_tool_call_approval(
668
670
  self,
669
671
  tool_call_result: ToolCallResult,
670
672
  tool_number: Optional[int],
673
+ trace_span: Any,
671
674
  ) -> ToolCallResult:
672
675
  """
673
676
  Handle approval for a single tool call if required.
@@ -686,76 +689,35 @@ class ToolCallingLLM:
686
689
  return tool_call_result
687
690
 
688
691
  # Get approval from user
689
- approved, feedback = self.approval_callback(tool_call_result.result)
690
-
691
- if approved:
692
- logging.debug(
693
- f"User approved command: {tool_call_result.result.invocation}"
694
- )
695
- new_response = self._directly_invoke_tool_call(
696
- tool_name=tool_call_result.tool_name,
697
- tool_params=tool_call_result.result.params or {},
698
- user_approved=True,
699
- tool_number=tool_number,
700
- )
701
- tool_call_result.result = new_response
702
- else:
703
- # User denied - update to error
704
- feedback_text = f" User feedback: {feedback}" if feedback else ""
705
- tool_call_result.result.status = StructuredToolResultStatus.ERROR
706
- tool_call_result.result.error = (
707
- f"User denied command execution.{feedback_text}"
708
- )
709
-
710
- return tool_call_result
711
-
712
- @staticmethod
713
- def __load_post_processing_user_prompt(
714
- input_prompt, investigation, user_prompt: Optional[str] = None
715
- ) -> str:
716
- if not user_prompt:
717
- user_prompt = "builtin://generic_post_processing.jinja2"
718
- return load_and_render_prompt(
719
- user_prompt, {"investigation": investigation, "prompt": input_prompt}
720
- )
692
+ with trace_span.start_span(
693
+ type="task", name=f"Ask approval for {tool_call_result.tool_name}"
694
+ ):
695
+ approved, feedback = self.approval_callback(tool_call_result.result)
721
696
 
722
- def _post_processing_call(
723
- self,
724
- prompt,
725
- investigation,
726
- user_prompt: Optional[str] = None,
727
- system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
728
- ) -> tuple[Optional[str], float]:
729
- try:
730
- user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
731
- prompt, investigation, user_prompt
732
- )
733
-
734
- logging.debug(f'Post processing prompt:\n"""\n{user_prompt}\n"""')
735
- messages = [
736
- {
737
- "role": "system",
738
- "content": system_prompt,
739
- },
740
- {
741
- "role": "user",
742
- "content": format_tags_in_string(user_prompt),
743
- },
744
- ]
745
- full_response = self.llm.completion(messages=messages, temperature=0)
746
- logging.debug(f"Post processing response {full_response}")
747
-
748
- # Extract and log cost information for post-processing
749
- post_processing_cost = _extract_cost_from_response(full_response)
750
- if post_processing_cost > 0:
751
- cost_logger.debug(
752
- f"Post-processing LLM cost: ${post_processing_cost:.6f}"
697
+ # Note - Tool calls are currently logged twice, once when returning APPROVAL_REQUIRED and once here
698
+ with trace_span.start_span(type="tool") as tool_span:
699
+ if approved:
700
+ logging.debug(
701
+ f"User approved command: {tool_call_result.result.invocation}"
753
702
  )
703
+ new_response = self._directly_invoke_tool_call(
704
+ tool_name=tool_call_result.tool_name,
705
+ tool_params=tool_call_result.result.params or {},
706
+ user_approved=True,
707
+ tool_number=tool_number,
708
+ tool_call_id=tool_call_result.tool_call_id,
709
+ )
710
+ tool_call_result.result = new_response
711
+ else:
712
+ # User denied - update to error
713
+ feedback_text = f" User feedback: {feedback}" if feedback else ""
714
+ tool_call_result.result.status = StructuredToolResultStatus.ERROR
715
+ tool_call_result.result.error = (
716
+ f"User denied command execution.{feedback_text}"
717
+ )
718
+ ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
754
719
 
755
- return full_response.choices[0].message.content, post_processing_cost # type: ignore
756
- except Exception:
757
- logging.exception("Failed to run post processing", exc_info=True)
758
- return investigation, 0.0
720
+ return tool_call_result
759
721
 
760
722
  def call_stream(
761
723
  self,
@@ -1038,10 +1000,8 @@ class IssueInvestigator(ToolCallingLLM):
1038
1000
  self,
1039
1001
  issue: Issue,
1040
1002
  prompt: str,
1041
- instructions: Optional[ResourceInstructions],
1042
1003
  console: Optional[Console] = None,
1043
1004
  global_instructions: Optional[Instructions] = None,
1044
- post_processing_prompt: Optional[str] = None,
1045
1005
  sections: Optional[InputSectionsDataType] = None,
1046
1006
  trace_span=DummySpan(),
1047
1007
  runbooks: Optional[RunbookCatalog] = None,
@@ -1095,16 +1055,18 @@ class IssueInvestigator(ToolCallingLLM):
1095
1055
  },
1096
1056
  )
1097
1057
 
1098
- user_prompt = ""
1058
+ base_user = ""
1059
+ base_user = f"{base_user}\n #This is context from the issue:\n{issue.raw}"
1099
1060
 
1100
- user_prompt = add_runbooks_to_user_prompt(
1101
- user_prompt,
1061
+ runbooks_ctx = generate_runbooks_args(
1102
1062
  runbook_catalog=runbooks,
1103
1063
  global_instructions=global_instructions,
1104
1064
  issue_instructions=issue_runbooks,
1105
- resource_instructions=instructions,
1106
1065
  )
1107
- user_prompt = f"{user_prompt}\n #This is context from the issue:\n{issue.raw}"
1066
+ user_prompt = generate_user_prompt(
1067
+ base_user,
1068
+ runbooks_ctx,
1069
+ )
1108
1070
  logging.debug(
1109
1071
  "Rendered system prompt:\n%s", textwrap.indent(system_prompt, " ")
1110
1072
  )
@@ -1113,7 +1075,6 @@ class IssueInvestigator(ToolCallingLLM):
1113
1075
  res = self.prompt_call(
1114
1076
  system_prompt,
1115
1077
  user_prompt,
1116
- post_processing_prompt,
1117
1078
  response_format=response_format,
1118
1079
  sections=sections,
1119
1080
  trace_span=trace_span,
holmes/core/tools.py CHANGED
@@ -5,6 +5,7 @@ import re
5
5
  import shlex
6
6
  import subprocess
7
7
  import tempfile
8
+ import time
8
9
  from abc import ABC, abstractmethod
9
10
  from datetime import datetime
10
11
  from enum import Enum
@@ -26,25 +27,25 @@ from pydantic import (
26
27
  ConfigDict,
27
28
  Field,
28
29
  FilePath,
29
- model_validator,
30
30
  PrivateAttr,
31
+ model_validator,
31
32
  )
32
33
  from rich.console import Console
34
+ from rich.table import Table
33
35
 
34
36
  from holmes.core.llm import LLM
35
37
  from holmes.core.openai_formatting import format_tool_to_open_ai_standard
36
- from holmes.plugins.prompts import load_and_render_prompt
37
38
  from holmes.core.transformers import (
38
- registry,
39
- TransformerError,
40
39
  Transformer,
40
+ TransformerError,
41
+ registry,
41
42
  )
43
+ from holmes.plugins.prompts import load_and_render_prompt
44
+ from holmes.utils.config_utils import merge_transformers
45
+ from holmes.utils.memory_limit import check_oom_and_append_hint, get_ulimit_prefix
42
46
 
43
47
  if TYPE_CHECKING:
44
48
  from holmes.core.transformers import BaseTransformer
45
- from holmes.utils.config_utils import merge_transformers
46
- import time
47
- from rich.table import Table
48
49
 
49
50
  logger = logging.getLogger(__name__)
50
51
 
@@ -96,9 +97,11 @@ class StructuredToolResult(BaseModel):
96
97
  else:
97
98
  try:
98
99
  if isinstance(self.data, BaseModel):
99
- return self.data.model_dump_json(indent=2)
100
+ return self.data.model_dump_json()
100
101
  else:
101
- return json.dumps(self.data, indent=2)
102
+ return json.dumps(
103
+ self.data, separators=(",", ":"), ensure_ascii=False
104
+ )
102
105
  except Exception:
103
106
  return str(self.data)
104
107
 
@@ -117,23 +120,6 @@ def sanitize_params(params):
117
120
  return {k: sanitize(str(v)) for k, v in params.items()}
118
121
 
119
122
 
120
- def format_tool_output(tool_result: Union[str, StructuredToolResult]) -> str:
121
- if isinstance(tool_result, StructuredToolResult):
122
- if tool_result.data and isinstance(tool_result.data, str):
123
- # Display logs and other string outputs in a way that is readable to humans.
124
- # To do this, we extract them from the result and print them as-is below.
125
- # The metadata is printed on a single line to
126
- data = tool_result.data
127
- tool_result.data = "The raw tool data is printed below this JSON"
128
- result_str = tool_result.model_dump_json(indent=2, exclude_none=True)
129
- result_str += f"\n{data}"
130
- return result_str
131
- else:
132
- return tool_result.model_dump_json(indent=2)
133
- else:
134
- return tool_result
135
-
136
-
137
123
  class ToolsetStatusEnum(str, Enum):
138
124
  ENABLED = "enabled"
139
125
  DISABLED = "disabled"
@@ -168,6 +154,8 @@ class ToolInvokeContext(BaseModel):
168
154
  user_approved: bool = False
169
155
  llm: LLM
170
156
  max_token_count: int
157
+ tool_call_id: str
158
+ tool_name: str
171
159
 
172
160
 
173
161
  class Tool(ABC, BaseModel):
@@ -493,8 +481,9 @@ class YAMLTool(Tool, BaseModel):
493
481
  def __execute_subprocess(self, cmd) -> Tuple[str, int]:
494
482
  try:
495
483
  logger.debug(f"Running `{cmd}`")
484
+ protected_cmd = get_ulimit_prefix() + cmd
496
485
  result = subprocess.run(
497
- cmd,
486
+ protected_cmd,
498
487
  shell=True,
499
488
  text=True,
500
489
  check=False, # do not throw error, we just return the error code
@@ -503,7 +492,9 @@ class YAMLTool(Tool, BaseModel):
503
492
  stderr=subprocess.STDOUT,
504
493
  )
505
494
 
506
- return result.stdout.strip(), result.returncode
495
+ output = result.stdout.strip()
496
+ output = check_oom_and_append_hint(output, result.returncode)
497
+ return output, result.returncode
507
498
  except Exception as e:
508
499
  logger.error(
509
500
  f"An unexpected error occurred while running '{cmd}': {e}",
@@ -4,11 +4,18 @@ from holmes.core.tools import StructuredToolResult
4
4
 
5
5
 
6
6
  def count_tool_response_tokens(
7
- llm: LLM, structured_tool_result: StructuredToolResult
7
+ llm: LLM,
8
+ structured_tool_result: StructuredToolResult,
9
+ tool_call_id: str,
10
+ tool_name: str,
8
11
  ) -> int:
9
12
  message = {
10
13
  "role": "tool",
11
- "content": format_tool_result_data(structured_tool_result),
14
+ "content": format_tool_result_data(
15
+ tool_result=structured_tool_result,
16
+ tool_call_id=tool_call_id,
17
+ tool_name=tool_name,
18
+ ),
12
19
  }
13
20
  tokens = llm.count_tokens([message])
14
21
  return tokens.total_tokens
@@ -1,8 +1,8 @@
1
- from typing import Optional
2
1
  from pydantic import BaseModel
2
+
3
3
  from holmes.core.llm import LLM
4
- from holmes.core.tools import StructuredToolResultStatus
5
4
  from holmes.core.models import ToolCallResult
5
+ from holmes.core.tools import StructuredToolResultStatus
6
6
  from holmes.utils import sentry_helper
7
7
 
8
8
 
@@ -20,38 +20,21 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
20
20
  return context_window_size
21
21
 
22
22
 
23
- def is_tool_call_too_big(
24
- tool_call_result: ToolCallResult, llm: LLM
25
- ) -> tuple[bool, Optional[ToolCallSizeMetadata]]:
26
- if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
27
- message = tool_call_result.as_tool_call_message()
28
-
29
- tokens = llm.count_tokens(messages=[message])
30
- max_tokens_allowed = llm.get_max_token_count_for_single_tool()
31
- return (
32
- tokens.total_tokens > max_tokens_allowed,
33
- ToolCallSizeMetadata(
34
- messages_token=tokens.total_tokens,
35
- max_tokens_allowed=max_tokens_allowed,
36
- ),
37
- )
38
- return False, None
39
-
40
-
41
23
  def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
42
- tool_call_result_is_too_big, metadata = is_tool_call_too_big(
43
- tool_call_result=tool_call_result, llm=llm
44
- )
45
- if tool_call_result_is_too_big and metadata:
46
- relative_pct = (
47
- (metadata.messages_token - metadata.max_tokens_allowed)
48
- / metadata.messages_token
49
- ) * 100
50
- error_message = f"The tool call result is too large to return: {metadata.messages_token} tokens.\nThe maximum allowed tokens is {metadata.max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
24
+ message = tool_call_result.as_tool_call_message()
25
+ messages_token = llm.count_tokens(messages=[message]).total_tokens
26
+ max_tokens_allowed = llm.get_max_token_count_for_single_tool()
27
+ if (
28
+ tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
29
+ and messages_token > max_tokens_allowed
30
+ ):
31
+ relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
32
+ error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
51
33
  tool_call_result.result.status = StructuredToolResultStatus.ERROR
52
34
  tool_call_result.result.data = None
53
35
  tool_call_result.result.error = error_message
54
36
 
55
37
  sentry_helper.capture_toolcall_contains_too_many_tokens(
56
- tool_call_result, metadata.messages_token, metadata.max_tokens_allowed
38
+ tool_call_result, messages_token, max_tokens_allowed
57
39
  )
40
+ return messages_token