holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.3a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (73) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +5 -2
  3. holmes/common/env_vars.py +8 -2
  4. holmes/config.py +4 -7
  5. holmes/core/conversations.py +12 -2
  6. holmes/core/feedback.py +191 -0
  7. holmes/core/llm.py +52 -10
  8. holmes/core/models.py +101 -1
  9. holmes/core/supabase_dal.py +23 -9
  10. holmes/core/tool_calling_llm.py +206 -16
  11. holmes/core/tools.py +20 -7
  12. holmes/core/tools_utils/token_counting.py +13 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
  14. holmes/core/tools_utils/tool_executor.py +11 -6
  15. holmes/core/toolset_manager.py +7 -3
  16. holmes/core/truncation/dal_truncation_utils.py +23 -0
  17. holmes/interactive.py +146 -14
  18. holmes/plugins/prompts/_fetch_logs.jinja2 +13 -1
  19. holmes/plugins/runbooks/__init__.py +6 -1
  20. holmes/plugins/toolsets/__init__.py +11 -4
  21. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
  22. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
  23. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
  24. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
  25. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
  27. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
  28. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
  29. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
  30. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
  32. holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
  33. holmes/plugins/toolsets/cilium.yaml +284 -0
  34. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  35. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  36. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +333 -199
  37. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +181 -9
  38. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +80 -22
  39. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +5 -8
  40. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +7 -12
  41. holmes/plugins/toolsets/git.py +14 -12
  42. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
  43. holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
  44. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +2 -1
  45. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +21 -39
  46. holmes/plugins/toolsets/internet/internet.py +2 -3
  47. holmes/plugins/toolsets/internet/notion.py +2 -3
  48. holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
  49. holmes/plugins/toolsets/kafka.py +7 -18
  50. holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
  51. holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
  52. holmes/plugins/toolsets/newrelic/__init__.py +0 -0
  53. holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
  54. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
  55. holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
  56. holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
  57. holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
  58. holmes/plugins/toolsets/prometheus/prometheus.py +808 -419
  59. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +27 -11
  60. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
  61. holmes/plugins/toolsets/robusta/robusta.py +4 -9
  62. holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
  63. holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
  64. holmes/utils/sentry_helper.py +1 -1
  65. holmes/utils/stream.py +22 -7
  66. holmes/version.py +34 -14
  67. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +7 -9
  68. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +71 -65
  69. holmes/core/tools_utils/data_types.py +0 -81
  70. holmes/plugins/toolsets/newrelic.py +0 -231
  71. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
  72. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
  73. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import base64
2
2
  import binascii
3
+ import gzip
3
4
  import json
4
5
  import logging
5
6
  import os
@@ -7,7 +8,6 @@ import threading
7
8
  from datetime import datetime, timedelta
8
9
  from typing import Dict, List, Optional, Tuple
9
10
  from uuid import uuid4
10
- import gzip
11
11
 
12
12
  import yaml # type: ignore
13
13
  from cachetools import TTLCache # type: ignore
@@ -30,6 +30,9 @@ from holmes.core.resource_instruction import (
30
30
  ResourceInstructionDocument,
31
31
  ResourceInstructions,
32
32
  )
33
+ from holmes.core.truncation.dal_truncation_utils import (
34
+ truncate_evidences_entities_if_necessary,
35
+ )
33
36
  from holmes.utils.definitions import RobustaConfig
34
37
  from holmes.utils.env import get_env_replacement
35
38
  from holmes.utils.global_instructions import Instructions
@@ -46,6 +49,9 @@ HOLMES_TOOLSET = "HolmesToolsStatus"
46
49
  SCANS_META_TABLE = "ScansMeta"
47
50
  SCANS_RESULTS_TABLE = "ScansResults"
48
51
 
52
+ ENRICHMENT_BLACKLIST = ["text_file", "graph", "ai_analysis", "holmes"]
53
+ ENRICHMENT_BLACKLIST_SET = set(ENRICHMENT_BLACKLIST)
54
+
49
55
 
50
56
  class RobustaToken(BaseModel):
51
57
  store_url: str
@@ -60,7 +66,7 @@ class SupabaseDal:
60
66
  self.enabled = self.__init_config()
61
67
  self.cluster = cluster
62
68
  if not self.enabled:
63
- logging.info(
69
+ logging.debug(
64
70
  "Not connecting to Robusta platform - robusta token not provided - using ROBUSTA_AI will not be possible"
65
71
  )
66
72
  return
@@ -118,7 +124,7 @@ class SupabaseDal:
118
124
  )
119
125
 
120
126
  if not os.path.exists(config_file_path):
121
- logging.info(f"No robusta config in {config_file_path}")
127
+ logging.debug(f"No robusta config in {config_file_path}")
122
128
  return None
123
129
 
124
130
  logging.info(f"loading config {config_file_path}")
@@ -262,11 +268,14 @@ class SupabaseDal:
262
268
  .select("*")
263
269
  .eq("account_id", self.account_id)
264
270
  .in_("issue_id", changes_ids)
271
+ .not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
265
272
  .execute()
266
273
  )
267
274
  if not len(change_data_response.data):
268
275
  return None
269
276
 
277
+ truncate_evidences_entities_if_necessary(change_data_response.data)
278
+
270
279
  except Exception:
271
280
  logging.exception("Supabase error while retrieving change content")
272
281
  return None
@@ -323,17 +332,17 @@ class SupabaseDal:
323
332
  return data
324
333
 
325
334
  def extract_relevant_issues(self, evidence):
326
- enrichment_blacklist = {"text_file", "graph", "ai_analysis", "holmes"}
327
335
  data = [
328
336
  enrich
329
337
  for enrich in evidence.data
330
- if enrich.get("enrichment_type") not in enrichment_blacklist
338
+ if enrich.get("enrichment_type") not in ENRICHMENT_BLACKLIST_SET
331
339
  ]
332
340
 
333
341
  unzipped_files = [
334
342
  self.unzip_evidence_file(enrich)
335
343
  for enrich in evidence.data
336
344
  if enrich.get("enrichment_type") == "text_file"
345
+ or enrich.get("enrichment_type") == "alert_raw_data"
337
346
  ]
338
347
 
339
348
  data.extend(unzipped_files)
@@ -370,12 +379,14 @@ class SupabaseDal:
370
379
  evidence = (
371
380
  self.client.table(EVIDENCE_TABLE)
372
381
  .select("*")
373
- .filter("issue_id", "eq", issue_id)
382
+ .eq("issue_id", issue_id)
383
+ .not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
374
384
  .execute()
375
385
  )
376
- data = self.extract_relevant_issues(evidence)
386
+ relevant_evidence = self.extract_relevant_issues(evidence)
387
+ truncate_evidences_entities_if_necessary(relevant_evidence)
377
388
 
378
- issue_data["evidence"] = data
389
+ issue_data["evidence"] = relevant_evidence
379
390
 
380
391
  # build issue investigation dates
381
392
  started_at = issue_data.get("starts_at")
@@ -518,10 +529,13 @@ class SupabaseDal:
518
529
  self.client.table(EVIDENCE_TABLE)
519
530
  .select("data, enrichment_type")
520
531
  .in_("issue_id", unique_issues)
532
+ .not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
521
533
  .execute()
522
534
  )
523
535
 
524
- return self.extract_relevant_issues(res)
536
+ relevant_issues = self.extract_relevant_issues(res)
537
+ truncate_evidences_entities_if_necessary(relevant_issues)
538
+ return relevant_issues
525
539
 
526
540
  except Exception:
527
541
  logging.exception("failed to fetch workload issues data", exc_info=True)
@@ -4,6 +4,13 @@ import logging
4
4
  import textwrap
5
5
  from typing import Dict, List, Optional, Type, Union, Callable, Any
6
6
 
7
+ from holmes.core.models import (
8
+ ToolApprovalDecision,
9
+ ToolCallResult,
10
+ TruncationResult,
11
+ TruncationMetadata,
12
+ PendingToolApproval,
13
+ )
7
14
 
8
15
  import sentry_sdk
9
16
  from openai import BadRequestError
@@ -27,13 +34,18 @@ from holmes.core.investigation_structured_output import (
27
34
  is_response_an_incorrect_tool_call,
28
35
  )
29
36
  from holmes.core.issue import Issue
30
- from holmes.core.llm import LLM
37
+ from holmes.core.llm import LLM, get_llm_usage
31
38
  from holmes.core.performance_timing import PerformanceTiming
32
39
  from holmes.core.resource_instruction import ResourceInstructions
33
40
  from holmes.core.runbooks import RunbookManager
34
41
  from holmes.core.safeguards import prevent_overly_repeated_tool_call
35
- from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
42
+ from holmes.core.tools import (
43
+ StructuredToolResult,
44
+ StructuredToolResultStatus,
45
+ ToolInvokeContext,
46
+ )
36
47
  from holmes.core.tools_utils.tool_context_window_limiter import (
48
+ get_max_token_count_for_single_tool,
37
49
  prevent_overly_big_tool_response,
38
50
  )
39
51
  from holmes.plugins.prompts import load_and_render_prompt
@@ -44,11 +56,6 @@ from holmes.utils.global_instructions import (
44
56
  )
45
57
  from holmes.utils.tags import format_tags_in_string, parse_messages_tags
46
58
  from holmes.core.tools_utils.tool_executor import ToolExecutor
47
- from holmes.core.tools_utils.data_types import (
48
- TruncationResult,
49
- ToolCallResult,
50
- TruncationMetadata,
51
- )
52
59
  from holmes.core.tracing import DummySpan
53
60
  from holmes.utils.colors import AI_COLOR
54
61
  from holmes.utils.stream import StreamEvents, StreamMessage
@@ -264,6 +271,99 @@ class ToolCallingLLM:
264
271
  Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
265
272
  ] = None
266
273
 
274
+ def process_tool_decisions(
275
+ self, messages: List[Dict[str, Any]], tool_decisions: List[ToolApprovalDecision]
276
+ ) -> List[Dict[str, Any]]:
277
+ """
278
+ Process tool approval decisions and execute approved tools.
279
+
280
+ Args:
281
+ messages: Current conversation messages
282
+ tool_decisions: List of ToolApprovalDecision objects
283
+
284
+ Returns:
285
+ Updated messages list with tool execution results
286
+ """
287
+ # Import here to avoid circular imports
288
+
289
+ # Find the last message with pending approvals
290
+ pending_message_idx = None
291
+ pending_tool_calls = None
292
+
293
+ for i in reversed(range(len(messages))):
294
+ msg = messages[i]
295
+ if msg.get("role") == "assistant" and msg.get("pending_approval"):
296
+ pending_message_idx = i
297
+ pending_tool_calls = msg.get("tool_calls", [])
298
+ break
299
+
300
+ if pending_message_idx is None or not pending_tool_calls:
301
+ # No pending approvals found
302
+ if tool_decisions:
303
+ logging.warning(
304
+ f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
305
+ )
306
+ return messages
307
+
308
+ # Create decision lookup
309
+ decisions_by_id = {
310
+ decision.tool_call_id: decision for decision in tool_decisions
311
+ }
312
+
313
+ # Validate that all decisions have corresponding pending tool calls
314
+ pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
315
+ invalid_decisions = [
316
+ decision.tool_call_id
317
+ for decision in tool_decisions
318
+ if decision.tool_call_id not in pending_tool_ids
319
+ ]
320
+
321
+ if invalid_decisions:
322
+ logging.warning(
323
+ f"Received decisions for non-pending tool calls: {invalid_decisions}"
324
+ )
325
+
326
+ # Process each tool call
327
+ for tool_call in pending_tool_calls:
328
+ tool_call_id = tool_call["id"]
329
+ decision = decisions_by_id.get(tool_call_id)
330
+
331
+ if decision and decision.approved:
332
+ try:
333
+ tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
334
+ llm_tool_result = self._invoke_llm_tool_call(
335
+ tool_to_call=tool_call_obj,
336
+ previous_tool_calls=[],
337
+ trace_span=DummySpan(),
338
+ tool_number=None,
339
+ )
340
+ messages.append(llm_tool_result.as_tool_call_message())
341
+
342
+ except Exception as e:
343
+ logging.error(
344
+ f"Failed to execute approved tool {tool_call_id}: {e}"
345
+ )
346
+ messages.append(
347
+ {
348
+ "tool_call_id": tool_call_id,
349
+ "role": "tool",
350
+ "name": tool_call["function"]["name"],
351
+ "content": f"Tool execution failed: {str(e)}",
352
+ }
353
+ )
354
+ else:
355
+ # Tool was rejected or no decision found, add rejection message
356
+ messages.append(
357
+ {
358
+ "tool_call_id": tool_call_id,
359
+ "role": "tool",
360
+ "name": tool_call["function"]["name"],
361
+ "content": "Tool execution was denied by the user.",
362
+ }
363
+ )
364
+
365
+ return messages
366
+
267
367
  def prompt_call(
268
368
  self,
269
369
  system_prompt: str,
@@ -422,7 +522,11 @@ class ToolCallingLLM:
422
522
  )
423
523
  costs.total_cost += post_processing_cost
424
524
 
525
+ self.llm.count_tokens_for_message(messages)
425
526
  perf_timing.end(f"- completed in {i} iterations -")
527
+ metadata["usage"] = get_llm_usage(full_response)
528
+ metadata["max_tokens"] = max_context_size
529
+ metadata["max_output_tokens"] = maximum_output_token
426
530
  return LLMResult(
427
531
  result=post_processed_response,
428
532
  unprocessed_result=raw_response,
@@ -523,9 +627,13 @@ class ToolCallingLLM:
523
627
  )
524
628
 
525
629
  try:
526
- tool_response = tool.invoke(
527
- tool_params, tool_number=tool_number, user_approved=user_approved
630
+ invoke_context = ToolInvokeContext(
631
+ tool_number=tool_number,
632
+ user_approved=user_approved,
633
+ llm=self.llm,
634
+ max_token_count=get_max_token_count_for_single_tool(self.llm),
528
635
  )
636
+ tool_response = tool.invoke(tool_params, context=invoke_context)
529
637
  except Exception as e:
530
638
  logging.error(
531
639
  f"Tool call to {tool_name} failed with an Exception", exc_info=True
@@ -583,7 +691,9 @@ class ToolCallingLLM:
583
691
  return ToolCallResult(
584
692
  tool_call_id=tool_call_id,
585
693
  tool_name=tool_name,
586
- description=tool.get_parameterized_one_liner(tool_params) if tool else "",
694
+ description=str(tool.get_parameterized_one_liner(tool_params))
695
+ if tool
696
+ else "",
587
697
  result=tool_response,
588
698
  )
589
699
 
@@ -761,12 +871,13 @@ class ToolCallingLLM:
761
871
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
762
872
  sections: Optional[InputSectionsDataType] = None,
763
873
  msgs: Optional[list[dict]] = None,
874
+ enable_tool_approval: bool = False,
764
875
  ):
765
876
  """
766
877
  This function DOES NOT call llm.completion(stream=true).
767
878
  This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
768
879
  """
769
- messages = []
880
+ messages: list[dict] = []
770
881
  if system_prompt:
771
882
  messages.append({"role": "system", "content": system_prompt})
772
883
  if user_prompt:
@@ -863,6 +974,10 @@ class ToolCallingLLM:
863
974
 
864
975
  tools_to_call = getattr(response_message, "tool_calls", None)
865
976
  if not tools_to_call:
977
+ self.llm.count_tokens_for_message(messages)
978
+ metadata["usage"] = get_llm_usage(full_response)
979
+ metadata["max_tokens"] = max_context_size
980
+ metadata["max_output_tokens"] = maximum_output_token
866
981
  yield StreamMessage(
867
982
  event=StreamEvents.ANSWER_END,
868
983
  data={
@@ -882,6 +997,11 @@ class ToolCallingLLM:
882
997
  )
883
998
 
884
999
  perf_timing.measure("pre-tool-calls")
1000
+
1001
+ # Check if any tools require approval first
1002
+ pending_approvals = []
1003
+ approval_required_tools = []
1004
+
885
1005
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
886
1006
  futures = []
887
1007
  for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
@@ -901,15 +1021,85 @@ class ToolCallingLLM:
901
1021
 
902
1022
  for future in concurrent.futures.as_completed(futures):
903
1023
  tool_call_result: ToolCallResult = future.result()
904
- tool_calls.append(tool_call_result.as_tool_result_response())
905
- messages.append(tool_call_result.as_tool_call_message())
906
1024
 
907
- perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
1025
+ if (
1026
+ tool_call_result.result.status
1027
+ == StructuredToolResultStatus.APPROVAL_REQUIRED
1028
+ ):
1029
+ if enable_tool_approval:
1030
+ pending_approvals.append(
1031
+ PendingToolApproval(
1032
+ tool_call_id=tool_call_result.tool_call_id,
1033
+ tool_name=tool_call_result.tool_name,
1034
+ description=tool_call_result.description,
1035
+ params=tool_call_result.result.params or {},
1036
+ )
1037
+ )
1038
+ approval_required_tools.append(tool_call_result)
1039
+
1040
+ yield StreamMessage(
1041
+ event=StreamEvents.TOOL_RESULT,
1042
+ data=tool_call_result.as_streaming_tool_result_response(),
1043
+ )
1044
+ else:
1045
+ tool_call_result.result.status = (
1046
+ StructuredToolResultStatus.ERROR
1047
+ )
1048
+ tool_call_result.result.error = f"Tool call rejected for security reasons: {tool_call_result.result.error}"
1049
+
1050
+ tool_calls.append(
1051
+ tool_call_result.as_tool_result_response()
1052
+ )
1053
+ messages.append(tool_call_result.as_tool_call_message())
908
1054
 
1055
+ yield StreamMessage(
1056
+ event=StreamEvents.TOOL_RESULT,
1057
+ data=tool_call_result.as_streaming_tool_result_response(),
1058
+ )
1059
+
1060
+ else:
1061
+ tool_calls.append(tool_call_result.as_tool_result_response())
1062
+ messages.append(tool_call_result.as_tool_call_message())
1063
+
1064
+ yield StreamMessage(
1065
+ event=StreamEvents.TOOL_RESULT,
1066
+ data=tool_call_result.as_streaming_tool_result_response(),
1067
+ )
1068
+
1069
+ # If we have approval required tools, end the stream with pending approvals
1070
+ if pending_approvals:
1071
+ # Add assistant message with pending tool calls
1072
+ assistant_msg = {
1073
+ "role": "assistant",
1074
+ "content": response_message.content,
1075
+ "tool_calls": [
1076
+ {
1077
+ "id": result.tool_call_id,
1078
+ "type": "function",
1079
+ "function": {
1080
+ "name": result.tool_name,
1081
+ "arguments": json.dumps(result.result.params or {}),
1082
+ },
1083
+ }
1084
+ for result in approval_required_tools
1085
+ ],
1086
+ "pending_approval": True,
1087
+ }
1088
+ messages.append(assistant_msg)
1089
+
1090
+ # End stream with approvals required
909
1091
  yield StreamMessage(
910
- event=StreamEvents.TOOL_RESULT,
911
- data=tool_call_result.as_streaming_tool_result_response(),
1092
+ event=StreamEvents.APPROVAL_REQUIRED,
1093
+ data={
1094
+ "content": None,
1095
+ "messages": messages,
1096
+ "pending_approvals": [
1097
+ approval.model_dump() for approval in pending_approvals
1098
+ ],
1099
+ "requires_approval": True,
1100
+ },
912
1101
  )
1102
+ return
913
1103
 
914
1104
  # Update the tool number offset for the next iteration
915
1105
  tool_number_offset += len(tools_to_call)
holmes/core/tools.py CHANGED
@@ -31,6 +31,7 @@ from pydantic import (
31
31
  )
32
32
  from rich.console import Console
33
33
 
34
+ from holmes.core.llm import LLM
34
35
  from holmes.core.openai_formatting import format_tool_to_open_ai_standard
35
36
  from holmes.plugins.prompts import load_and_render_prompt
36
37
  from holmes.core.transformers import (
@@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
159
160
  items: Optional["ToolParameter"] = None # For array item schemas
160
161
 
161
162
 
163
+ class ToolInvokeContext(BaseModel):
164
+ model_config = ConfigDict(arbitrary_types_allowed=True)
165
+
166
+ tool_number: Optional[int] = None
167
+ user_approved: bool = False
168
+ llm: LLM
169
+ max_token_count: int
170
+
171
+
162
172
  class Tool(ABC, BaseModel):
163
173
  name: str
164
174
  description: str
@@ -225,15 +235,14 @@ class Tool(ABC, BaseModel):
225
235
  def invoke(
226
236
  self,
227
237
  params: Dict,
228
- tool_number: Optional[int] = None,
229
- user_approved: bool = False,
238
+ context: ToolInvokeContext,
230
239
  ) -> StructuredToolResult:
231
- tool_number_str = f"#{tool_number} " if tool_number else ""
240
+ tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
232
241
  logger.info(
233
242
  f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
234
243
  )
235
244
  start_time = time.time()
236
- result = self._invoke(params=params, user_approved=user_approved)
245
+ result = self._invoke(params=params, context=context)
237
246
  result.icon_url = self.icon_url
238
247
 
239
248
  # Apply transformers to the result
@@ -244,7 +253,7 @@ class Tool(ABC, BaseModel):
244
253
  if hasattr(transformed_result, "get_stringified_data")
245
254
  else str(transformed_result)
246
255
  )
247
- show_hint = f"/show {tool_number}" if tool_number else "/show"
256
+ show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
248
257
  line_count = output_str.count("\n") + 1 if output_str else 0
249
258
  logger.info(
250
259
  f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
@@ -340,7 +349,9 @@ class Tool(ABC, BaseModel):
340
349
 
341
350
  @abstractmethod
342
351
  def _invoke(
343
- self, params: dict, user_approved: bool = False
352
+ self,
353
+ params: dict,
354
+ context: ToolInvokeContext,
344
355
  ) -> StructuredToolResult:
345
356
  """
346
357
  params: the tool params
@@ -400,7 +411,9 @@ class YAMLTool(Tool, BaseModel):
400
411
  return StructuredToolResultStatus.SUCCESS
401
412
 
402
413
  def _invoke(
403
- self, params: dict, user_approved: bool = False
414
+ self,
415
+ params: dict,
416
+ context: ToolInvokeContext,
404
417
  ) -> StructuredToolResult:
405
418
  if self.command is not None:
406
419
  raw_output, return_code, invocation = self.__invoke_command(params)
@@ -0,0 +1,13 @@
1
+ from holmes.core.llm import LLM
2
+ from holmes.core.models import format_tool_result_data
3
+ from holmes.core.tools import StructuredToolResult
4
+
5
+
6
+ def count_tool_response_tokens(
7
+ llm: LLM, structured_tool_result: StructuredToolResult
8
+ ) -> int:
9
+ message = {
10
+ "role": "tool",
11
+ "content": format_tool_result_data(structured_tool_result),
12
+ }
13
+ return llm.count_tokens_for_message([message])
@@ -1,33 +1,55 @@
1
+ from typing import Optional
1
2
  from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
2
3
  from holmes.core.llm import LLM
3
4
  from holmes.core.tools import StructuredToolResultStatus
4
- from holmes.core.tools_utils.data_types import ToolCallResult
5
+ from holmes.core.models import ToolCallResult
5
6
  from holmes.utils import sentry_helper
6
7
 
7
8
 
9
+ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
10
+ context_window_size = llm.get_context_window_size()
11
+
12
+ if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
13
+ return int(context_window_size * percent_of_total_context_window // 100)
14
+ else:
15
+ return context_window_size
16
+
17
+
18
+ def get_max_token_count_for_single_tool(llm: LLM) -> int:
19
+ return get_pct_token_count(
20
+ percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
21
+ )
22
+
23
+
8
24
  def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
9
- if (
10
- tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
11
- and 0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
12
- and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
13
- ):
14
- message = tool_call_result.as_tool_call_message()
15
-
16
- messages_token = llm.count_tokens_for_message(messages=[message])
17
- context_window_size = llm.get_context_window_size()
18
- max_tokens_allowed: int = int(
19
- context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
25
+ max_tokens_allowed = get_max_token_count_for_single_tool(llm)
26
+
27
+ message = tool_call_result.as_tool_call_message()
28
+ messages_token = llm.count_tokens_for_message(messages=[message])
29
+
30
+ if messages_token > max_tokens_allowed:
31
+ relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
32
+
33
+ error_message: Optional[str] = (
34
+ f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
20
35
  )
21
36
 
22
- if messages_token > max_tokens_allowed:
23
- relative_pct = (
24
- (messages_token - max_tokens_allowed) / messages_token
25
- ) * 100
26
- error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
27
- tool_call_result.result.status = StructuredToolResultStatus.ERROR
28
- tool_call_result.result.data = None
29
- tool_call_result.result.error = error_message
30
-
31
- sentry_helper.capture_toolcall_contains_too_many_tokens(
32
- tool_call_result, messages_token, max_tokens_allowed
37
+ if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
38
+ error_message = None
39
+ # tool_call_result.result.data is set to None below which is expected to fix the issue
40
+ elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
41
+ original_error = (
42
+ tool_call_result.result.error
43
+ or tool_call_result.result.data
44
+ or "Unknown error"
33
45
  )
46
+ truncated_error = str(original_error)[:100]
47
+ error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
48
+
49
+ tool_call_result.result.status = StructuredToolResultStatus.ERROR
50
+ tool_call_result.result.data = None
51
+ tool_call_result.result.error = error_message
52
+
53
+ sentry_helper.capture_toolcall_contains_too_many_tokens(
54
+ tool_call_result, messages_token, max_tokens_allowed
55
+ )
@@ -9,6 +9,7 @@ from holmes.core.tools import (
9
9
  StructuredToolResultStatus,
10
10
  Toolset,
11
11
  ToolsetStatusEnum,
12
+ ToolInvokeContext,
12
13
  )
13
14
  from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset
14
15
 
@@ -46,16 +47,20 @@ class ToolExecutor:
46
47
  )
47
48
  self.tools_by_name[tool.name] = tool
48
49
 
49
- def invoke(self, tool_name: str, params: dict) -> StructuredToolResult:
50
+ def invoke(
51
+ self, tool_name: str, params: dict, context: ToolInvokeContext
52
+ ) -> StructuredToolResult:
53
+ """TODO: remove this function as it seems unused.
54
+ We call tool_executor.get_tool_by_name() and then tool.invoke() directly instead of this invoke function
55
+ """
50
56
  tool = self.get_tool_by_name(tool_name)
51
- return (
52
- tool.invoke(params)
53
- if tool
54
- else StructuredToolResult(
57
+ if not tool:
58
+ return StructuredToolResult(
55
59
  status=StructuredToolResultStatus.ERROR,
56
60
  error=f"Could not find tool named {tool_name}",
57
61
  )
58
- )
62
+
63
+ return tool.invoke(params, context)
59
64
 
60
65
  def get_tool_by_name(self, name: str) -> Optional[Tool]:
61
66
  if name in self.tools_by_name:
@@ -275,7 +275,11 @@ class ToolsetManager:
275
275
  toolset.path = cached_status.get("path", None)
276
276
  # check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
277
277
  # not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
278
- if toolset.enabled and toolset.status == ToolsetStatusEnum.ENABLED:
278
+ if toolset.enabled and (
279
+ toolset.status == ToolsetStatusEnum.ENABLED
280
+ or toolset.type == ToolsetType.MCP
281
+ ):
282
+ # MCP servers need to reload their tools even if previously failed, so rerun prerequisites
279
283
  enabled_toolsets_from_cache.append(toolset)
280
284
  self.check_toolset_prerequisites(enabled_toolsets_from_cache)
281
285
 
@@ -464,12 +468,12 @@ class ToolsetManager:
464
468
 
465
469
  logger = logging.getLogger(__name__)
466
470
 
467
- logger.info(
471
+ logger.debug(
468
472
  f"Starting fast_model injection. global_fast_model={self.global_fast_model}"
469
473
  )
470
474
 
471
475
  if not self.global_fast_model:
472
- logger.info("No global_fast_model configured, skipping injection")
476
+ logger.debug("No global_fast_model configured, skipping injection")
473
477
  return
474
478
 
475
479
  injected_count = 0