holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (37) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +12 -10
  3. holmes/common/env_vars.py +22 -0
  4. holmes/config.py +51 -4
  5. holmes/core/conversations.py +3 -2
  6. holmes/core/llm.py +226 -72
  7. holmes/core/openai_formatting.py +13 -0
  8. holmes/core/supabase_dal.py +33 -42
  9. holmes/core/tool_calling_llm.py +185 -282
  10. holmes/core/tools.py +21 -1
  11. holmes/core/tools_utils/token_counting.py +2 -1
  12. holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
  13. holmes/core/truncation/compaction.py +59 -0
  14. holmes/core/truncation/input_context_window_limiter.py +218 -0
  15. holmes/interactive.py +17 -7
  16. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  17. holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
  18. holmes/plugins/toolsets/__init__.py +4 -0
  19. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
  20. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  21. holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
  22. holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
  23. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  24. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  25. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  26. holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
  27. holmes/plugins/toolsets/robusta/robusta.py +35 -8
  28. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
  29. holmes/plugins/toolsets/service_discovery.py +1 -1
  30. holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
  31. holmes/utils/stream.py +31 -1
  32. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
  33. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
  34. holmes/core/performance_timing.py +0 -72
  35. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
  36. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
  37. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/core/tools.py CHANGED
@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
158
158
  required: bool = True
159
159
  properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
160
160
  items: Optional["ToolParameter"] = None # For array item schemas
161
+ enum: Optional[List[str]] = None # For restricting to specific values
161
162
 
162
163
 
163
164
  class ToolInvokeContext(BaseModel):
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
682
683
  def check_prerequisites(self):
683
684
  self.status = ToolsetStatusEnum.ENABLED
684
685
 
685
- for prereq in self.prerequisites:
686
+ # Sort prerequisites by type to fail fast on missing env vars before
687
+ # running slow commands (e.g., ArgoCD checks that timeout):
688
+ # 1. Static checks (instant)
689
+ # 2. Environment variable checks (instant, often required by commands)
690
+ # 3. Callable checks (variable speed)
691
+ # 4. Command checks (slowest - may timeout or hang)
692
+ def prereq_priority(prereq):
693
+ if isinstance(prereq, StaticPrerequisite):
694
+ return 0
695
+ elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
696
+ return 1
697
+ elif isinstance(prereq, CallablePrerequisite):
698
+ return 2
699
+ elif isinstance(prereq, ToolsetCommandPrerequisite):
700
+ return 3
701
+ return 4 # Unknown types go last
702
+
703
+ sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
704
+
705
+ for prereq in sorted_prereqs:
686
706
  if isinstance(prereq, ToolsetCommandPrerequisite):
687
707
  try:
688
708
  command = self.interpolate_command(prereq.command)
@@ -10,4 +10,5 @@ def count_tool_response_tokens(
10
10
  "role": "tool",
11
11
  "content": format_tool_result_data(structured_tool_result),
12
12
  }
13
- return llm.count_tokens_for_message([message])
13
+ tokens = llm.count_tokens([message])
14
+ return tokens.total_tokens
@@ -1,11 +1,16 @@
1
1
  from typing import Optional
2
- from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
2
+ from pydantic import BaseModel
3
3
  from holmes.core.llm import LLM
4
4
  from holmes.core.tools import StructuredToolResultStatus
5
5
  from holmes.core.models import ToolCallResult
6
6
  from holmes.utils import sentry_helper
7
7
 
8
8
 
9
+ class ToolCallSizeMetadata(BaseModel):
10
+ messages_token: int
11
+ max_tokens_allowed: int
12
+
13
+
9
14
  def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
10
15
  context_window_size = llm.get_context_window_size()
11
16
 
@@ -15,41 +20,38 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
15
20
  return context_window_size
16
21
 
17
22
 
18
- def get_max_token_count_for_single_tool(llm: LLM) -> int:
19
- return get_pct_token_count(
20
- percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
21
- )
22
-
23
-
24
- def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
25
- max_tokens_allowed = get_max_token_count_for_single_tool(llm)
26
-
27
- message = tool_call_result.as_tool_call_message()
28
- messages_token = llm.count_tokens_for_message(messages=[message])
29
-
30
- if messages_token > max_tokens_allowed:
31
- relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
32
-
33
- error_message: Optional[str] = (
34
- f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
23
+ def is_tool_call_too_big(
24
+ tool_call_result: ToolCallResult, llm: LLM
25
+ ) -> tuple[bool, Optional[ToolCallSizeMetadata]]:
26
+ if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
27
+ message = tool_call_result.as_tool_call_message()
28
+
29
+ tokens = llm.count_tokens(messages=[message])
30
+ max_tokens_allowed = llm.get_max_token_count_for_single_tool()
31
+ return (
32
+ tokens.total_tokens > max_tokens_allowed,
33
+ ToolCallSizeMetadata(
34
+ messages_token=tokens.total_tokens,
35
+ max_tokens_allowed=max_tokens_allowed,
36
+ ),
35
37
  )
38
+ return False, None
36
39
 
37
- if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
38
- error_message = None
39
- # tool_call_result.result.data is set to None below which is expected to fix the issue
40
- elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
41
- original_error = (
42
- tool_call_result.result.error
43
- or tool_call_result.result.data
44
- or "Unknown error"
45
- )
46
- truncated_error = str(original_error)[:100]
47
- error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
48
40
 
41
+ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
42
+ tool_call_result_is_too_big, metadata = is_tool_call_too_big(
43
+ tool_call_result=tool_call_result, llm=llm
44
+ )
45
+ if tool_call_result_is_too_big and metadata:
46
+ relative_pct = (
47
+ (metadata.messages_token - metadata.max_tokens_allowed)
48
+ / metadata.messages_token
49
+ ) * 100
50
+ error_message = f"The tool call result is too large to return: {metadata.messages_token} tokens.\nThe maximum allowed tokens is {metadata.max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
49
51
  tool_call_result.result.status = StructuredToolResultStatus.ERROR
50
52
  tool_call_result.result.data = None
51
53
  tool_call_result.result.error = error_message
52
54
 
53
55
  sentry_helper.capture_toolcall_contains_too_many_tokens(
54
- tool_call_result, messages_token, max_tokens_allowed
56
+ tool_call_result, metadata.messages_token, metadata.max_tokens_allowed
55
57
  )
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from typing import Optional
3
+ from holmes.core.llm import LLM
4
+ from holmes.plugins.prompts import load_and_render_prompt
5
+ from litellm.types.utils import ModelResponse
6
+
7
+
8
+ def strip_system_prompt(
9
+ conversation_history: list[dict],
10
+ ) -> tuple[list[dict], Optional[dict]]:
11
+ if not conversation_history:
12
+ return conversation_history, None
13
+ first_message = conversation_history[0]
14
+ if first_message and first_message.get("role") == "system":
15
+ return conversation_history[1:], first_message
16
+ return conversation_history[:], None
17
+
18
+
19
+ def compact_conversation_history(
20
+ original_conversation_history: list[dict], llm: LLM
21
+ ) -> list[dict]:
22
+ conversation_history, system_prompt_message = strip_system_prompt(
23
+ original_conversation_history
24
+ )
25
+ compaction_instructions = load_and_render_prompt(
26
+ prompt="builtin://conversation_history_compaction.jinja2", context={}
27
+ )
28
+ conversation_history.append({"role": "user", "content": compaction_instructions})
29
+
30
+ response: ModelResponse = llm.completion(conversation_history) # type: ignore
31
+ response_message = None
32
+ if (
33
+ response
34
+ and response.choices
35
+ and response.choices[0]
36
+ and response.choices[0].message # type:ignore
37
+ ):
38
+ response_message = response.choices[0].message # type:ignore
39
+ else:
40
+ logging.error(
41
+ "Failed to compact conversation history. Unexpected LLM's response for compaction"
42
+ )
43
+ return original_conversation_history
44
+
45
+ compacted_conversation_history: list[dict] = []
46
+ if system_prompt_message:
47
+ compacted_conversation_history.append(system_prompt_message)
48
+ compacted_conversation_history.append(
49
+ response_message.model_dump(
50
+ exclude_defaults=True, exclude_unset=True, exclude_none=True
51
+ )
52
+ )
53
+ compacted_conversation_history.append(
54
+ {
55
+ "role": "system",
56
+ "content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
57
+ }
58
+ )
59
+ return compacted_conversation_history
@@ -0,0 +1,218 @@
1
+ import logging
2
+ from typing import Any, Optional
3
+ from pydantic import BaseModel
4
+ import sentry_sdk
5
+ from holmes.common.env_vars import (
6
+ ENABLE_CONVERSATION_HISTORY_COMPACTION,
7
+ MAX_OUTPUT_TOKEN_RESERVATION,
8
+ )
9
+ from holmes.core.llm import (
10
+ LLM,
11
+ TokenCountMetadata,
12
+ get_context_window_compaction_threshold_pct,
13
+ )
14
+ from holmes.core.models import TruncationMetadata, TruncationResult
15
+ from holmes.core.truncation.compaction import compact_conversation_history
16
+ from holmes.utils import sentry_helper
17
+ from holmes.utils.stream import StreamEvents, StreamMessage
18
+
19
+
20
+ TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
21
+
22
+
23
+ def _truncate_tool_message(
24
+ msg: dict, allocated_space: int, needed_space: int
25
+ ) -> TruncationMetadata:
26
+ msg_content = msg["content"]
27
+ tool_call_id = msg["tool_call_id"]
28
+ tool_name = msg["name"]
29
+
30
+ # Ensure the indicator fits in the allocated space
31
+ if allocated_space > len(TRUNCATION_NOTICE):
32
+ original = msg_content if isinstance(msg_content, str) else str(msg_content)
33
+ msg["content"] = (
34
+ original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
35
+ )
36
+ end_index = allocated_space - len(TRUNCATION_NOTICE)
37
+ else:
38
+ msg["content"] = TRUNCATION_NOTICE[:allocated_space]
39
+ end_index = allocated_space
40
+
41
+ msg.pop("token_count", None) # Remove token_count if present
42
+ logging.info(
43
+ f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
44
+ )
45
+ truncation_metadata = TruncationMetadata(
46
+ tool_call_id=tool_call_id,
47
+ start_index=0,
48
+ end_index=end_index,
49
+ tool_name=tool_name,
50
+ original_token_count=needed_space,
51
+ )
52
+ return truncation_metadata
53
+
54
+
55
+ # TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
56
+ # However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
57
+ # We should fix this in the future
58
+ # TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
59
+ # token truncation and not character truncation
60
+ def truncate_messages_to_fit_context(
61
+ messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
62
+ ) -> TruncationResult:
63
+ """
64
+ Helper function to truncate tool messages to fit within context limits.
65
+
66
+ Args:
67
+ messages: List of message dictionaries with roles and content
68
+ max_context_size: Maximum context window size for the model
69
+ maximum_output_token: Maximum tokens reserved for model output
70
+ count_tokens_fn: Function to count tokens for a list of messages
71
+
72
+ Returns:
73
+ Modified list of messages with truncated tool responses
74
+
75
+ Raises:
76
+ Exception: If non-tool messages exceed available context space
77
+ """
78
+ messages_except_tools = [
79
+ message for message in messages if message["role"] != "tool"
80
+ ]
81
+ tokens = count_tokens_fn(messages_except_tools)
82
+ message_size_without_tools = tokens.total_tokens
83
+
84
+ tool_call_messages = [message for message in messages if message["role"] == "tool"]
85
+
86
+ reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
87
+ if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
88
+ logging.error(
89
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
90
+ )
91
+ raise Exception(
92
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
93
+ )
94
+
95
+ if len(tool_call_messages) == 0:
96
+ return TruncationResult(truncated_messages=messages, truncations=[])
97
+
98
+ available_space = (
99
+ max_context_size - message_size_without_tools - reserved_for_output_tokens
100
+ )
101
+ remaining_space = available_space
102
+ tool_call_messages.sort(
103
+ key=lambda x: count_tokens_fn(
104
+ [{"role": "tool", "content": x["content"]}]
105
+ ).total_tokens
106
+ )
107
+
108
+ truncations = []
109
+
110
+ # Allocate space starting with small tools and going to larger tools, while maintaining fairness
111
+ # Small tools can often get exactly what they need, while larger tools may need to be truncated
112
+ # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
113
+ for i, msg in enumerate(tool_call_messages):
114
+ remaining_tools = len(tool_call_messages) - i
115
+ max_allocation = remaining_space // remaining_tools
116
+ needed_space = count_tokens_fn(
117
+ [{"role": "tool", "content": msg["content"]}]
118
+ ).total_tokens
119
+ allocated_space = min(needed_space, max_allocation)
120
+
121
+ if needed_space > allocated_space:
122
+ truncation_metadata = _truncate_tool_message(
123
+ msg, allocated_space, needed_space
124
+ )
125
+ truncations.append(truncation_metadata)
126
+
127
+ remaining_space -= allocated_space
128
+
129
+ if truncations:
130
+ sentry_helper.capture_tool_truncations(truncations)
131
+
132
+ return TruncationResult(truncated_messages=messages, truncations=truncations)
133
+
134
+
135
+ class ContextWindowLimiterOutput(BaseModel):
136
+ metadata: dict
137
+ messages: list[dict]
138
+ events: list[StreamMessage]
139
+ max_context_size: int
140
+ maximum_output_token: int
141
+ tokens: TokenCountMetadata
142
+ conversation_history_compacted: bool
143
+
144
+
145
+ @sentry_sdk.trace
146
+ def limit_input_context_window(
147
+ llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
148
+ ) -> ContextWindowLimiterOutput:
149
+ events = []
150
+ metadata = {}
151
+ initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
152
+ max_context_size = llm.get_context_window_size()
153
+ maximum_output_token = llm.get_maximum_output_token()
154
+ conversation_history_compacted = False
155
+ if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
156
+ initial_tokens.total_tokens + maximum_output_token
157
+ ) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
158
+ compacted_messages = compact_conversation_history(
159
+ original_conversation_history=messages, llm=llm
160
+ )
161
+ compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
162
+ compacted_total_tokens = compacted_tokens.total_tokens
163
+
164
+ if compacted_total_tokens < initial_tokens.total_tokens:
165
+ messages = compacted_messages
166
+ compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
167
+ logging.info(compaction_message)
168
+ conversation_history_compacted = True
169
+ events.append(
170
+ StreamMessage(
171
+ event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
172
+ data={
173
+ "content": compaction_message,
174
+ "messages": compacted_messages,
175
+ "metadata": {
176
+ "initial_tokens": initial_tokens.total_tokens,
177
+ "compacted_tokens": compacted_total_tokens,
178
+ },
179
+ },
180
+ )
181
+ )
182
+ events.append(
183
+ StreamMessage(
184
+ event=StreamEvents.AI_MESSAGE,
185
+ data={"content": compaction_message},
186
+ )
187
+ )
188
+ else:
189
+ logging.debug(
190
+ f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
191
+ )
192
+
193
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
194
+ if (tokens.total_tokens + maximum_output_token) > max_context_size:
195
+ # Compaction was not sufficient. Truncating messages.
196
+ truncated_res = truncate_messages_to_fit_context(
197
+ messages=messages,
198
+ max_context_size=max_context_size,
199
+ maximum_output_token=maximum_output_token,
200
+ count_tokens_fn=llm.count_tokens,
201
+ )
202
+ metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
203
+ messages = truncated_res.truncated_messages
204
+
205
+ # recount after truncation
206
+ tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
207
+ else:
208
+ metadata["truncations"] = []
209
+
210
+ return ContextWindowLimiterOutput(
211
+ events=events,
212
+ messages=messages,
213
+ metadata=metadata,
214
+ max_context_size=max_context_size,
215
+ maximum_output_token=maximum_output_token,
216
+ tokens=tokens,
217
+ conversation_history_compacted=conversation_history_compacted,
218
+ )
holmes/interactive.py CHANGED
@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
480
480
  return
481
481
 
482
482
  # Calculate context statistics
483
- total_tokens = ai.llm.count_tokens_for_message(messages)
483
+ tokens_metadata = ai.llm.count_tokens(
484
+ messages
485
+ ) # TODO: pass tools to also count tokens used by input tools
484
486
  max_context_size = ai.llm.get_context_window_size()
485
487
  max_output_tokens = ai.llm.get_maximum_output_token()
486
- available_tokens = max_context_size - total_tokens - max_output_tokens
488
+ available_tokens = (
489
+ max_context_size - tokens_metadata.total_tokens - max_output_tokens
490
+ )
487
491
 
488
492
  # Analyze token distribution by role and tool calls
489
493
  role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
492
496
 
493
497
  for msg in messages:
494
498
  role = msg.get("role", "unknown")
495
- msg_tokens = ai.llm.count_tokens_for_message([msg])
496
- role_token_usage[role] += msg_tokens
499
+ message_tokens = ai.llm.count_tokens(
500
+ [msg]
501
+ ) # TODO: pass tools to also count tokens used by input tools
502
+ role_token_usage[role] += message_tokens.total_tokens
497
503
 
498
504
  # Track individual tool usage
499
505
  if role == "tool":
500
506
  tool_name = msg.get("name", "unknown_tool")
501
- tool_token_usage[tool_name] += msg_tokens
507
+ tool_token_usage[tool_name] += message_tokens.total_tokens
502
508
  tool_call_counts[tool_name] += 1
503
509
 
504
510
  # Display context information
505
511
  console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
506
512
  console.print(
507
- f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
513
+ f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
508
514
  )
509
515
  console.print(
510
516
  f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
515
521
  for role in ["system", "user", "assistant", "tool"]:
516
522
  if role in role_token_usage:
517
523
  tokens = role_token_usage[role]
518
- percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
524
+ percentage = (
525
+ (tokens / tokens_metadata.total_tokens) * 100
526
+ if tokens_metadata.total_tokens > 0
527
+ else 0
528
+ )
519
529
  role_name = {
520
530
  "system": "system prompt",
521
531
  "user": "user messages",
@@ -12,8 +12,7 @@
12
12
  * do not stop investigating until you are at the final root cause you are able to find.
13
13
  * use the "five whys" methodology to find the root cause.
14
14
  * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
15
- * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and.
16
- * in this case, try to find substrings or search for the correct spellings
15
+ * if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
17
16
  * always provide detailed information like exact resource names, versions, labels, etc
18
17
  * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
19
18
  * if a runbook url is present you MUST fetch the runbook before beginning your investigation
@@ -0,0 +1,88 @@
1
+ Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
2
+ This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
3
+
4
+ Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
5
+
6
+ 1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
7
+ - The user's explicit requests and intents
8
+ - Your approach to addressing the user's requests
9
+ - Key decisions, technical concepts and code patterns
10
+ - Specific details like kubernetes resource names, namespaces, relevant logs extracts (verbatim), etc
11
+ - What tools were called and the outcome or analysis of the tool output
12
+ 2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
13
+
14
+ Your summary should include the following sections:
15
+
16
+ 1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
17
+ 2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
18
+ 3. Resources: Enumerate specific kubernetes or cloud resources and logs extract examined. Pay special attention to the most recent messages and include logs or tool outputs where applicable and include a summary of why this resource is important.
19
+ 4. Tool calls: List all tool calls that were executed and whether they failed/succeeded. Make sure to mention the full arguments used. Only summarize the arguments if they are over 200 characters long
20
+ 5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
21
+ 6. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
22
+ 7. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include resource names and their namespace and log extracts where applicable.
23
+ 8. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
24
+ If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
25
+
26
+ Here's an example of how your output should be structured:
27
+
28
+ <example>
29
+ <analysis>
30
+ [Your thought process, ensuring all points are covered thoroughly and accurately]
31
+ </analysis>
32
+
33
+ <summary>
34
+ 1. Primary Request and Intent:
35
+ [Detailed description]
36
+
37
+ 2. Key Technical Concepts:
38
+ - [Concept 1]
39
+ - [Concept 2]
40
+ - [...]
41
+
42
+ 3. Infrastructure Resources:
43
+ - [Deployment name 1]
44
+ - [Summary of why this deployment is important]
45
+ - [Summary of the issues identified with this deployment, if any]
46
+ - [List of related pods/services or otyher resources and why they are relevant]
47
+ - [Pod name 2]
48
+ - [Summary of why this pod is important]
49
+ - [Summary of the issues identified with this pod, if any]
50
+ - [List of related pods/services or otyher resources and why they are relevant]
51
+ - [...]
52
+
53
+ 4. Tool Calls:
54
+ - [✅ function_name {args}]
55
+ - [✅ function_name {args}]
56
+ - [❌ function_name {args} - NO DATA]
57
+ - [❌ function_name {args} - Error message]
58
+ - [...]
59
+
60
+ 5. Problem Solving:
61
+ [Description of solved problems and ongoing troubleshooting]
62
+
63
+ 6. Pending Tasks:
64
+ - [Task 1]
65
+ - [Task 2]
66
+ - [...]
67
+
68
+ 7. Current Work:
69
+ [Precise description of current work]
70
+
71
+ 8. Optional Next Step:
72
+ [Optional Next step to take]
73
+
74
+ </summary>
75
+ </example>
76
+
77
+ Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
78
+
79
+ There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
80
+ <example>
81
+ ## Compact Instructions
82
+ When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
83
+ </example>
84
+
85
+ <example>
86
+ # Summary instructions
87
+ When you are using compact - please focus on test output and code changes. Include relevant logs verbatim.
88
+ </example>
@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
44
44
  from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
45
45
  from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
46
46
  from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
47
+ from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
48
+ OpenSearchQueryAssistToolset,
49
+ )
47
50
  from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
48
51
  from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
49
52
  from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
93
96
  DatadogRDSToolset(),
94
97
  OpenSearchLogsToolset(),
95
98
  OpenSearchTracesToolset(),
99
+ OpenSearchQueryAssistToolset(),
96
100
  CoralogixLogsToolset(),
97
101
  RabbitMQToolset(),
98
102
  GitToolset(),
@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
42
42
  def __init__(self):
43
43
  super().__init__(
44
44
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
45
- experimental=True,
46
45
  tools=[
47
46
  ReturnProjectAlerts(toolset=self),
48
47
  ReturnProjectProcesses(toolset=self),
@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
60
60
  docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
61
61
  icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
62
62
  tags=[ToolsetTag.CORE],
63
- experimental=True,
64
63
  tools=[
65
64
  AnalyzeDatabaseHealthStatus(self),
66
65
  AnalyzeDatabasePerformance(self),
@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
27
27
  response.raise_for_status()
28
28
  return True, ""
29
29
  except Exception as e:
30
- logging.error(f"Failed to fetch grafana health status at {url}", exc_info=True)
30
+ logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
31
31
  error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
32
32
 
33
33
  # Add helpful hint if this looks like a common misconfiguration