holmesgpt 0.14.3a0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (30) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +12 -10
  3. holmes/common/env_vars.py +14 -0
  4. holmes/config.py +51 -4
  5. holmes/core/conversations.py +3 -2
  6. holmes/core/llm.py +198 -72
  7. holmes/core/openai_formatting.py +13 -0
  8. holmes/core/tool_calling_llm.py +129 -95
  9. holmes/core/tools.py +21 -1
  10. holmes/core/tools_utils/token_counting.py +2 -1
  11. holmes/core/tools_utils/tool_context_window_limiter.py +13 -4
  12. holmes/interactive.py +17 -7
  13. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  14. holmes/plugins/toolsets/__init__.py +4 -0
  15. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
  16. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  17. holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
  18. holmes/plugins/toolsets/investigator/core_investigation.py +14 -13
  19. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  20. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  21. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  22. holmes/plugins/toolsets/prometheus/prometheus.py +7 -4
  23. holmes/plugins/toolsets/service_discovery.py +1 -1
  24. holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
  25. holmes/utils/stream.py +30 -1
  26. {holmesgpt-0.14.3a0.dist-info → holmesgpt-0.15.0.dist-info}/METADATA +3 -1
  27. {holmesgpt-0.14.3a0.dist-info → holmesgpt-0.15.0.dist-info}/RECORD +30 -27
  28. {holmesgpt-0.14.3a0.dist-info → holmesgpt-0.15.0.dist-info}/LICENSE.txt +0 -0
  29. {holmesgpt-0.14.3a0.dist-info → holmesgpt-0.15.0.dist-info}/WHEEL +0 -0
  30. {holmesgpt-0.14.3a0.dist-info → holmesgpt-0.15.0.dist-info}/entry_points.txt +0 -0
@@ -34,7 +34,7 @@ from holmes.core.investigation_structured_output import (
34
34
  is_response_an_incorrect_tool_call,
35
35
  )
36
36
  from holmes.core.issue import Issue
37
- from holmes.core.llm import LLM, get_llm_usage
37
+ from holmes.core.llm import LLM
38
38
  from holmes.core.performance_timing import PerformanceTiming
39
39
  from holmes.core.resource_instruction import ResourceInstructions
40
40
  from holmes.core.runbooks import RunbookManager
@@ -58,7 +58,12 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
58
58
  from holmes.core.tools_utils.tool_executor import ToolExecutor
59
59
  from holmes.core.tracing import DummySpan
60
60
  from holmes.utils.colors import AI_COLOR
61
- from holmes.utils.stream import StreamEvents, StreamMessage
61
+ from holmes.utils.stream import (
62
+ StreamEvents,
63
+ StreamMessage,
64
+ add_token_count_to_metadata,
65
+ build_stream_event_token_count,
66
+ )
62
67
 
63
68
  # Create a named logger for cost tracking
64
69
  cost_logger = logging.getLogger("holmes.costs")
@@ -164,7 +169,8 @@ def truncate_messages_to_fit_context(
164
169
  messages_except_tools = [
165
170
  message for message in messages if message["role"] != "tool"
166
171
  ]
167
- message_size_without_tools = count_tokens_fn(messages_except_tools)
172
+ tokens = count_tokens_fn(messages_except_tools)
173
+ message_size_without_tools = tokens.total_tokens
168
174
 
169
175
  tool_call_messages = [message for message in messages if message["role"] == "tool"]
170
176
 
@@ -185,7 +191,9 @@ def truncate_messages_to_fit_context(
185
191
  )
186
192
  remaining_space = available_space
187
193
  tool_call_messages.sort(
188
- key=lambda x: count_tokens_fn([{"role": "tool", "content": x["content"]}])
194
+ key=lambda x: count_tokens_fn(
195
+ [{"role": "tool", "content": x["content"]}]
196
+ ).total_tokens
189
197
  )
190
198
 
191
199
  truncations = []
@@ -196,7 +204,9 @@ def truncate_messages_to_fit_context(
196
204
  for i, msg in enumerate(tool_call_messages):
197
205
  remaining_tools = len(tool_call_messages) - i
198
206
  max_allocation = remaining_space // remaining_tools
199
- needed_space = count_tokens_fn([{"role": "tool", "content": msg["content"]}])
207
+ needed_space = count_tokens_fn(
208
+ [{"role": "tool", "content": msg["content"]}]
209
+ ).total_tokens
200
210
  allocated_space = min(needed_space, max_allocation)
201
211
 
202
212
  if needed_space > allocated_space:
@@ -257,6 +267,12 @@ class LLMResult(LLMCosts):
257
267
  )
258
268
 
259
269
 
270
+ class ToolCallWithDecision(BaseModel):
271
+ message_index: int
272
+ tool_call: ChatCompletionMessageToolCall
273
+ decision: Optional[ToolApprovalDecision]
274
+
275
+
260
276
  class ToolCallingLLM:
261
277
  llm: LLM
262
278
 
@@ -284,83 +300,79 @@ class ToolCallingLLM:
284
300
  Returns:
285
301
  Updated messages list with tool execution results
286
302
  """
287
- # Import here to avoid circular imports
288
-
289
- # Find the last message with pending approvals
290
- pending_message_idx = None
291
- pending_tool_calls = None
292
-
293
- for i in reversed(range(len(messages))):
294
- msg = messages[i]
295
- if msg.get("role") == "assistant" and msg.get("pending_approval"):
296
- pending_message_idx = i
297
- pending_tool_calls = msg.get("tool_calls", [])
298
- break
299
-
300
- if pending_message_idx is None or not pending_tool_calls:
301
- # No pending approvals found
302
- if tool_decisions:
303
- logging.warning(
304
- f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
305
- )
303
+ if not tool_decisions:
306
304
  return messages
307
305
 
308
306
  # Create decision lookup
309
- decisions_by_id = {
307
+ decisions_by_tool_call_id = {
310
308
  decision.tool_call_id: decision for decision in tool_decisions
311
309
  }
312
310
 
313
- # Validate that all decisions have corresponding pending tool calls
314
- pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
315
- invalid_decisions = [
316
- decision.tool_call_id
317
- for decision in tool_decisions
318
- if decision.tool_call_id not in pending_tool_ids
319
- ]
311
+ pending_tool_calls: list[ToolCallWithDecision] = []
320
312
 
321
- if invalid_decisions:
322
- logging.warning(
323
- f"Received decisions for non-pending tool calls: {invalid_decisions}"
324
- )
313
+ for i in reversed(range(len(messages))):
314
+ msg = messages[i]
315
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
316
+ message_tool_calls = msg.get("tool_calls", [])
317
+ for tool_call in message_tool_calls:
318
+ decision = decisions_by_tool_call_id.get(tool_call.get("id"), None)
319
+ if tool_call.get("pending_approval"):
320
+ del tool_call[
321
+ "pending_approval"
322
+ ] # Cleanup so that a pending approval is not tagged on message in a future response
323
+ pending_tool_calls.append(
324
+ ToolCallWithDecision(
325
+ tool_call=ChatCompletionMessageToolCall(**tool_call),
326
+ decision=decision,
327
+ message_index=i,
328
+ )
329
+ )
325
330
 
326
- # Process each tool call
327
- for tool_call in pending_tool_calls:
328
- tool_call_id = tool_call["id"]
329
- decision = decisions_by_id.get(tool_call_id)
331
+ if not pending_tool_calls:
332
+ error_message = f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
333
+ logging.error(error_message)
334
+ raise Exception(error_message)
330
335
 
336
+ for tool_call_with_decision in pending_tool_calls:
337
+ tool_call_message: dict
338
+ tool_call = tool_call_with_decision.tool_call
339
+ decision = tool_call_with_decision.decision
331
340
  if decision and decision.approved:
332
341
  try:
333
- tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
334
342
  llm_tool_result = self._invoke_llm_tool_call(
335
- tool_to_call=tool_call_obj,
343
+ tool_to_call=tool_call,
336
344
  previous_tool_calls=[],
337
- trace_span=DummySpan(),
345
+ trace_span=DummySpan(), # TODO: replace with proper span
338
346
  tool_number=None,
347
+ user_approved=True,
339
348
  )
340
- messages.append(llm_tool_result.as_tool_call_message())
349
+ tool_call_message = llm_tool_result.as_tool_call_message()
341
350
 
342
351
  except Exception as e:
343
352
  logging.error(
344
- f"Failed to execute approved tool {tool_call_id}: {e}"
353
+ f"Failed to execute approved tool {tool_call.id}: {e}"
345
354
  )
346
- messages.append(
347
- {
348
- "tool_call_id": tool_call_id,
349
- "role": "tool",
350
- "name": tool_call["function"]["name"],
351
- "content": f"Tool execution failed: {str(e)}",
352
- }
353
- )
354
- else:
355
- # Tool was rejected or no decision found, add rejection message
356
- messages.append(
357
- {
358
- "tool_call_id": tool_call_id,
355
+ tool_call_message = {
356
+ "tool_call_id": tool_call.id,
359
357
  "role": "tool",
360
- "name": tool_call["function"]["name"],
361
- "content": "Tool execution was denied by the user.",
358
+ "name": tool_call.function.name,
359
+ "content": f"Tool execution failed: {str(e)}",
362
360
  }
363
- )
361
+ else:
362
+ # Tool was rejected or no decision found, add rejection message
363
+ tool_call_message = {
364
+ "tool_call_id": tool_call.id,
365
+ "role": "tool",
366
+ "name": tool_call.function.name,
367
+ "content": "Tool execution was denied by the user.",
368
+ }
369
+
370
+ # It is expected that the tool call result directly follows the tool call request from the LLM
371
+ # The API call may contain a user ask which is appended to the messages so we can't just append
372
+ # tool call results; they need to be inserted right after the llm's message requesting tool calls
373
+ messages.insert(
374
+ tool_call_with_decision.message_index + 1, tool_call_message
375
+ )
364
376
 
365
377
  return messages
366
378
 
@@ -427,12 +439,12 @@ class ToolCallingLLM:
427
439
  tools = None if i == max_steps else tools
428
440
  tool_choice = "auto" if tools else None
429
441
 
430
- total_tokens = self.llm.count_tokens_for_message(messages)
442
+ tokens = self.llm.count_tokens(messages=messages, tools=tools)
431
443
  max_context_size = self.llm.get_context_window_size()
432
444
  maximum_output_token = self.llm.get_maximum_output_token()
433
445
  perf_timing.measure("count tokens")
434
446
 
435
- if (total_tokens + maximum_output_token) > max_context_size:
447
+ if (tokens.total_tokens + maximum_output_token) > max_context_size:
436
448
  logging.warning("Token limit exceeded. Truncating tool responses.")
437
449
  truncated_res = self.truncate_messages_to_fit_context(
438
450
  messages, max_context_size, maximum_output_token
@@ -483,7 +495,7 @@ class ToolCallingLLM:
483
495
 
484
496
  if incorrect_tool_call:
485
497
  logging.warning(
486
- "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
498
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
487
499
  )
488
500
  # disable structured output going forward and and retry
489
501
  sentry_helper.capture_structured_output_incorrect_tool_call()
@@ -522,11 +534,17 @@ class ToolCallingLLM:
522
534
  )
523
535
  costs.total_cost += post_processing_cost
524
536
 
525
- self.llm.count_tokens_for_message(messages)
537
+ tokens = self.llm.count_tokens(messages=messages, tools=tools)
538
+
539
+ add_token_count_to_metadata(
540
+ tokens=tokens,
541
+ full_llm_response=full_response,
542
+ max_context_size=max_context_size,
543
+ maximum_output_token=maximum_output_token,
544
+ metadata=metadata,
545
+ )
526
546
  perf_timing.end(f"- completed in {i} iterations -")
527
- metadata["usage"] = get_llm_usage(full_response)
528
- metadata["max_tokens"] = max_context_size
529
- metadata["max_output_tokens"] = maximum_output_token
547
+
530
548
  return LLMResult(
531
549
  result=post_processed_response,
532
550
  unprocessed_result=raw_response,
@@ -650,6 +668,7 @@ class ToolCallingLLM:
650
668
  tool_call_id: str,
651
669
  tool_name: str,
652
670
  tool_arguments: str,
671
+ user_approved: bool,
653
672
  previous_tool_calls: list[dict],
654
673
  tool_number: Optional[int] = None,
655
674
  ) -> ToolCallResult:
@@ -671,7 +690,7 @@ class ToolCallingLLM:
671
690
  tool_response = self._directly_invoke_tool_call(
672
691
  tool_name=tool_name,
673
692
  tool_params=tool_params,
674
- user_approved=False,
693
+ user_approved=user_approved,
675
694
  tool_number=tool_number,
676
695
  )
677
696
 
@@ -716,6 +735,7 @@ class ToolCallingLLM:
716
735
  previous_tool_calls: list[dict],
717
736
  trace_span=None,
718
737
  tool_number=None,
738
+ user_approved: bool = False,
719
739
  ) -> ToolCallResult:
720
740
  if trace_span is None:
721
741
  trace_span = DummySpan()
@@ -748,6 +768,7 @@ class ToolCallingLLM:
748
768
  tool_arguments,
749
769
  previous_tool_calls=previous_tool_calls,
750
770
  tool_number=tool_number,
771
+ user_approved=user_approved,
751
772
  )
752
773
 
753
774
  prevent_overly_big_tool_response(
@@ -858,7 +879,7 @@ class ToolCallingLLM:
858
879
  messages,
859
880
  max_context_size,
860
881
  maximum_output_token,
861
- self.llm.count_tokens_for_message,
882
+ self.llm.count_tokens,
862
883
  )
863
884
  if truncated_res.truncations:
864
885
  sentry_helper.capture_tool_truncations(truncated_res.truncations)
@@ -903,12 +924,12 @@ class ToolCallingLLM:
903
924
  tools = None if i == max_steps else tools
904
925
  tool_choice = "auto" if tools else None
905
926
 
906
- total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
927
+ tokens = self.llm.count_tokens(messages=messages, tools=tools) # type: ignore
907
928
  max_context_size = self.llm.get_context_window_size()
908
929
  maximum_output_token = self.llm.get_maximum_output_token()
909
930
  perf_timing.measure("count tokens")
910
931
 
911
- if (total_tokens + maximum_output_token) > max_context_size:
932
+ if (tokens.total_tokens + maximum_output_token) > max_context_size:
912
933
  logging.warning("Token limit exceeded. Truncating tool responses.")
913
934
  truncated_res = self.truncate_messages_to_fit_context(
914
935
  messages, max_context_size, maximum_output_token
@@ -958,7 +979,7 @@ class ToolCallingLLM:
958
979
 
959
980
  if incorrect_tool_call:
960
981
  logging.warning(
961
- "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
982
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
962
983
  )
963
984
  # disable structured output going forward and and retry
964
985
  sentry_helper.capture_structured_output_incorrect_tool_call()
@@ -972,12 +993,18 @@ class ToolCallingLLM:
972
993
  )
973
994
  )
974
995
 
996
+ tokens = self.llm.count_tokens(messages=messages, tools=tools)
997
+ add_token_count_to_metadata(
998
+ tokens=tokens,
999
+ full_llm_response=full_response,
1000
+ max_context_size=max_context_size,
1001
+ maximum_output_token=maximum_output_token,
1002
+ metadata=metadata,
1003
+ )
1004
+ yield build_stream_event_token_count(metadata=metadata)
1005
+
975
1006
  tools_to_call = getattr(response_message, "tool_calls", None)
976
1007
  if not tools_to_call:
977
- self.llm.count_tokens_for_message(messages)
978
- metadata["usage"] = get_llm_usage(full_response)
979
- metadata["max_tokens"] = max_context_size
980
- metadata["max_output_tokens"] = maximum_output_token
981
1008
  yield StreamMessage(
982
1009
  event=StreamEvents.ANSWER_END,
983
1010
  data={
@@ -993,7 +1020,11 @@ class ToolCallingLLM:
993
1020
  if reasoning or message:
994
1021
  yield StreamMessage(
995
1022
  event=StreamEvents.AI_MESSAGE,
996
- data={"content": message, "reasoning": reasoning},
1023
+ data={
1024
+ "content": message,
1025
+ "reasoning": reasoning,
1026
+ "metadata": metadata,
1027
+ },
997
1028
  )
998
1029
 
999
1030
  perf_timing.measure("pre-tool-calls")
@@ -1069,23 +1100,11 @@ class ToolCallingLLM:
1069
1100
  # If we have approval required tools, end the stream with pending approvals
1070
1101
  if pending_approvals:
1071
1102
  # Add assistant message with pending tool calls
1072
- assistant_msg = {
1073
- "role": "assistant",
1074
- "content": response_message.content,
1075
- "tool_calls": [
1076
- {
1077
- "id": result.tool_call_id,
1078
- "type": "function",
1079
- "function": {
1080
- "name": result.tool_name,
1081
- "arguments": json.dumps(result.result.params or {}),
1082
- },
1083
- }
1084
- for result in approval_required_tools
1085
- ],
1086
- "pending_approval": True,
1087
- }
1088
- messages.append(assistant_msg)
1103
+ for result in approval_required_tools:
1104
+ tool_call = self.find_assistant_tool_call_request(
1105
+ tool_call_id=result.tool_call_id, messages=messages
1106
+ )
1107
+ tool_call["pending_approval"] = True
1089
1108
 
1090
1109
  # End stream with approvals required
1091
1110
  yield StreamMessage(
@@ -1108,6 +1127,21 @@ class ToolCallingLLM:
1108
1127
  f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
1109
1128
  )
1110
1129
 
1130
+ def find_assistant_tool_call_request(
1131
+ self, tool_call_id: str, messages: list[dict[str, Any]]
1132
+ ) -> dict[str, Any]:
1133
+ for message in messages:
1134
+ if message.get("role") == "assistant":
1135
+ for tool_call in message.get("tool_calls", []):
1136
+ if tool_call.get("id") == tool_call_id:
1137
+ return tool_call
1138
+
1139
+ # Should not happen unless there is a bug.
1140
+ # If we are here
1141
+ raise Exception(
1142
+ f"Failed to find assistant request for a tool_call in conversation history. tool_call_id={tool_call_id}"
1143
+ )
1144
+
1111
1145
 
1112
1146
  # TODO: consider getting rid of this entirely and moving templating into the cmds in holmes_cli.py
1113
1147
  class IssueInvestigator(ToolCallingLLM):
holmes/core/tools.py CHANGED
@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
158
158
  required: bool = True
159
159
  properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
160
160
  items: Optional["ToolParameter"] = None # For array item schemas
161
+ enum: Optional[List[str]] = None # For restricting to specific values
161
162
 
162
163
 
163
164
  class ToolInvokeContext(BaseModel):
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
682
683
  def check_prerequisites(self):
683
684
  self.status = ToolsetStatusEnum.ENABLED
684
685
 
685
- for prereq in self.prerequisites:
686
+ # Sort prerequisites by type to fail fast on missing env vars before
687
+ # running slow commands (e.g., ArgoCD checks that timeout):
688
+ # 1. Static checks (instant)
689
+ # 2. Environment variable checks (instant, often required by commands)
690
+ # 3. Callable checks (variable speed)
691
+ # 4. Command checks (slowest - may timeout or hang)
692
+ def prereq_priority(prereq):
693
+ if isinstance(prereq, StaticPrerequisite):
694
+ return 0
695
+ elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
696
+ return 1
697
+ elif isinstance(prereq, CallablePrerequisite):
698
+ return 2
699
+ elif isinstance(prereq, ToolsetCommandPrerequisite):
700
+ return 3
701
+ return 4 # Unknown types go last
702
+
703
+ sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
704
+
705
+ for prereq in sorted_prereqs:
686
706
  if isinstance(prereq, ToolsetCommandPrerequisite):
687
707
  try:
688
708
  command = self.interpolate_command(prereq.command)
@@ -10,4 +10,5 @@ def count_tool_response_tokens(
10
10
  "role": "tool",
11
11
  "content": format_tool_result_data(structured_tool_result),
12
12
  }
13
- return llm.count_tokens_for_message([message])
13
+ tokens = llm.count_tokens([message])
14
+ return tokens.total_tokens
@@ -1,5 +1,8 @@
1
1
  from typing import Optional
2
- from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
2
+ from holmes.common.env_vars import (
3
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
4
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
5
+ )
3
6
  from holmes.core.llm import LLM
4
7
  from holmes.core.tools import StructuredToolResultStatus
5
8
  from holmes.core.models import ToolCallResult
@@ -16,8 +19,12 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
16
19
 
17
20
 
18
21
  def get_max_token_count_for_single_tool(llm: LLM) -> int:
19
- return get_pct_token_count(
20
- percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
22
+ return min(
23
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
24
+ get_pct_token_count(
25
+ percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
26
+ llm=llm,
27
+ ),
21
28
  )
22
29
 
23
30
 
@@ -25,7 +32,9 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
25
32
  max_tokens_allowed = get_max_token_count_for_single_tool(llm)
26
33
 
27
34
  message = tool_call_result.as_tool_call_message()
28
- messages_token = llm.count_tokens_for_message(messages=[message])
35
+
36
+ tokens = llm.count_tokens(messages=[message])
37
+ messages_token = tokens.total_tokens
29
38
 
30
39
  if messages_token > max_tokens_allowed:
31
40
  relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
holmes/interactive.py CHANGED
@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
480
480
  return
481
481
 
482
482
  # Calculate context statistics
483
- total_tokens = ai.llm.count_tokens_for_message(messages)
483
+ tokens_metadata = ai.llm.count_tokens(
484
+ messages
485
+ ) # TODO: pass tools to also count tokens used by input tools
484
486
  max_context_size = ai.llm.get_context_window_size()
485
487
  max_output_tokens = ai.llm.get_maximum_output_token()
486
- available_tokens = max_context_size - total_tokens - max_output_tokens
488
+ available_tokens = (
489
+ max_context_size - tokens_metadata.total_tokens - max_output_tokens
490
+ )
487
491
 
488
492
  # Analyze token distribution by role and tool calls
489
493
  role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
492
496
 
493
497
  for msg in messages:
494
498
  role = msg.get("role", "unknown")
495
- msg_tokens = ai.llm.count_tokens_for_message([msg])
496
- role_token_usage[role] += msg_tokens
499
+ message_tokens = ai.llm.count_tokens(
500
+ [msg]
501
+ ) # TODO: pass tools to also count tokens used by input tools
502
+ role_token_usage[role] += message_tokens.total_tokens
497
503
 
498
504
  # Track individual tool usage
499
505
  if role == "tool":
500
506
  tool_name = msg.get("name", "unknown_tool")
501
- tool_token_usage[tool_name] += msg_tokens
507
+ tool_token_usage[tool_name] += message_tokens.total_tokens
502
508
  tool_call_counts[tool_name] += 1
503
509
 
504
510
  # Display context information
505
511
  console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
506
512
  console.print(
507
- f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
513
+ f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
508
514
  )
509
515
  console.print(
510
516
  f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
515
521
  for role in ["system", "user", "assistant", "tool"]:
516
522
  if role in role_token_usage:
517
523
  tokens = role_token_usage[role]
518
- percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
524
+ percentage = (
525
+ (tokens / tokens_metadata.total_tokens) * 100
526
+ if tokens_metadata.total_tokens > 0
527
+ else 0
528
+ )
519
529
  role_name = {
520
530
  "system": "system prompt",
521
531
  "user": "user messages",
@@ -12,8 +12,7 @@
12
12
  * do not stop investigating until you are at the final root cause you are able to find.
13
13
  * use the "five whys" methodology to find the root cause.
14
14
  * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
15
- * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and.
16
- * in this case, try to find substrings or search for the correct spellings
15
+ * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
17
16
  * always provide detailed information like exact resource names, versions, labels, etc
18
17
  * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
19
18
  * if a runbook url is present you MUST fetch the runbook before beginning your investigation
@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
44
44
  from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
45
45
  from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
46
46
  from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
47
+ from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
48
+ OpenSearchQueryAssistToolset,
49
+ )
47
50
  from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
48
51
  from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
49
52
  from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
93
96
  DatadogRDSToolset(),
94
97
  OpenSearchLogsToolset(),
95
98
  OpenSearchTracesToolset(),
99
+ OpenSearchQueryAssistToolset(),
96
100
  CoralogixLogsToolset(),
97
101
  RabbitMQToolset(),
98
102
  GitToolset(),
@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
42
42
  def __init__(self):
43
43
  super().__init__(
44
44
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
45
- experimental=True,
46
45
  tools=[
47
46
  ReturnProjectAlerts(toolset=self),
48
47
  ReturnProjectProcesses(toolset=self),
@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
60
60
  docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
61
61
  icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
62
62
  tags=[ToolsetTag.CORE],
63
- experimental=True,
64
63
  tools=[
65
64
  AnalyzeDatabaseHealthStatus(self),
66
65
  AnalyzeDatabasePerformance(self),
@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
27
27
  response.raise_for_status()
28
28
  return True, ""
29
29
  except Exception as e:
30
- logging.error(f"Failed to fetch grafana health status at {url}", exc_info=True)
30
+ logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
31
31
  error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
32
32
 
33
33
  # Add helpful hint if this looks like a common misconfiguration
@@ -29,7 +29,11 @@ class TodoWriteTool(Tool):
29
29
  properties={
30
30
  "id": ToolParameter(type="string", required=True),
31
31
  "content": ToolParameter(type="string", required=True),
32
- "status": ToolParameter(type="string", required=True),
32
+ "status": ToolParameter(
33
+ type="string",
34
+ required=True,
35
+ enum=["pending", "in_progress", "completed"],
36
+ ),
33
37
  },
34
38
  ),
35
39
  ),
@@ -58,22 +62,20 @@ class TodoWriteTool(Tool):
58
62
  content_width = max(max_content_width, len("Content"))
59
63
  status_width = max(max_status_display_width, len("Status"))
60
64
 
61
- # Build table
62
65
  separator = f"+{'-' * (id_width + 2)}+{'-' * (content_width + 2)}+{'-' * (status_width + 2)}+"
63
66
  header = f"| {'ID':<{id_width}} | {'Content':<{content_width}} | {'Status':<{status_width}} |"
64
-
65
- # Log the table
66
- logging.info("Updated Investigation Tasks:")
67
- logging.info(separator)
68
- logging.info(header)
69
- logging.info(separator)
67
+ tasks_to_display = []
70
68
 
71
69
  for task in tasks:
72
70
  status_display = f"{status_icons[task.status.value]} {task.status.value}"
73
71
  row = f"| {task.id:<{id_width}} | {task.content:<{content_width}} | {status_display:<{status_width}} |"
74
- logging.info(row)
72
+ tasks_to_display.append(row)
75
73
 
76
- logging.info(separator)
74
+ logging.info(
75
+ f"Task List:\n{separator}\n{header}\n{separator}\n"
76
+ + "\n".join(tasks_to_display)
77
+ + f"\n{separator}"
78
+ )
77
79
 
78
80
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
79
81
  try:
@@ -90,7 +92,7 @@ class TodoWriteTool(Tool):
90
92
  )
91
93
  tasks.append(task)
92
94
 
93
- logging.info(f"Tasks: {len(tasks)}")
95
+ logging.debug(f"Tasks: {len(tasks)}")
94
96
 
95
97
  self.print_tasks_table(tasks)
96
98
  formatted_tasks = format_tasks(tasks)
@@ -116,8 +118,7 @@ class TodoWriteTool(Tool):
116
118
  )
117
119
 
118
120
  def get_parameterized_one_liner(self, params: Dict) -> str:
119
- todos = params.get("todos", [])
120
- return f"Write {todos} investigation tasks"
121
+ return "Update investigation tasks"
121
122
 
122
123
 
123
124
  class CoreInvestigationToolset(Toolset):