holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +12 -10
- holmes/common/env_vars.py +22 -0
- holmes/config.py +51 -4
- holmes/core/conversations.py +3 -2
- holmes/core/llm.py +226 -72
- holmes/core/openai_formatting.py +13 -0
- holmes/core/supabase_dal.py +33 -42
- holmes/core/tool_calling_llm.py +185 -282
- holmes/core/tools.py +21 -1
- holmes/core/tools_utils/token_counting.py +2 -1
- holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +17 -7
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/toolsets/__init__.py +4 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
- holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
- holmes/plugins/toolsets/robusta/robusta.py +35 -8
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
- holmes/utils/stream.py +31 -1
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
- holmes/core/performance_timing.py +0 -72
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/core/tools.py
CHANGED
|
@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
|
|
|
158
158
|
required: bool = True
|
|
159
159
|
properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
|
|
160
160
|
items: Optional["ToolParameter"] = None # For array item schemas
|
|
161
|
+
enum: Optional[List[str]] = None # For restricting to specific values
|
|
161
162
|
|
|
162
163
|
|
|
163
164
|
class ToolInvokeContext(BaseModel):
|
|
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
|
|
|
682
683
|
def check_prerequisites(self):
|
|
683
684
|
self.status = ToolsetStatusEnum.ENABLED
|
|
684
685
|
|
|
685
|
-
|
|
686
|
+
# Sort prerequisites by type to fail fast on missing env vars before
|
|
687
|
+
# running slow commands (e.g., ArgoCD checks that timeout):
|
|
688
|
+
# 1. Static checks (instant)
|
|
689
|
+
# 2. Environment variable checks (instant, often required by commands)
|
|
690
|
+
# 3. Callable checks (variable speed)
|
|
691
|
+
# 4. Command checks (slowest - may timeout or hang)
|
|
692
|
+
def prereq_priority(prereq):
|
|
693
|
+
if isinstance(prereq, StaticPrerequisite):
|
|
694
|
+
return 0
|
|
695
|
+
elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
|
|
696
|
+
return 1
|
|
697
|
+
elif isinstance(prereq, CallablePrerequisite):
|
|
698
|
+
return 2
|
|
699
|
+
elif isinstance(prereq, ToolsetCommandPrerequisite):
|
|
700
|
+
return 3
|
|
701
|
+
return 4 # Unknown types go last
|
|
702
|
+
|
|
703
|
+
sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
|
|
704
|
+
|
|
705
|
+
for prereq in sorted_prereqs:
|
|
686
706
|
if isinstance(prereq, ToolsetCommandPrerequisite):
|
|
687
707
|
try:
|
|
688
708
|
command = self.interpolate_command(prereq.command)
|
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
-
from
|
|
2
|
+
from pydantic import BaseModel
|
|
3
3
|
from holmes.core.llm import LLM
|
|
4
4
|
from holmes.core.tools import StructuredToolResultStatus
|
|
5
5
|
from holmes.core.models import ToolCallResult
|
|
6
6
|
from holmes.utils import sentry_helper
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
class ToolCallSizeMetadata(BaseModel):
|
|
10
|
+
messages_token: int
|
|
11
|
+
max_tokens_allowed: int
|
|
12
|
+
|
|
13
|
+
|
|
9
14
|
def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
|
|
10
15
|
context_window_size = llm.get_context_window_size()
|
|
11
16
|
|
|
@@ -15,41 +20,38 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
|
|
|
15
20
|
return context_window_size
|
|
16
21
|
|
|
17
22
|
|
|
18
|
-
def
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
error_message: Optional[str] = (
|
|
34
|
-
f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
|
|
23
|
+
def is_tool_call_too_big(
|
|
24
|
+
tool_call_result: ToolCallResult, llm: LLM
|
|
25
|
+
) -> tuple[bool, Optional[ToolCallSizeMetadata]]:
|
|
26
|
+
if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
|
|
27
|
+
message = tool_call_result.as_tool_call_message()
|
|
28
|
+
|
|
29
|
+
tokens = llm.count_tokens(messages=[message])
|
|
30
|
+
max_tokens_allowed = llm.get_max_token_count_for_single_tool()
|
|
31
|
+
return (
|
|
32
|
+
tokens.total_tokens > max_tokens_allowed,
|
|
33
|
+
ToolCallSizeMetadata(
|
|
34
|
+
messages_token=tokens.total_tokens,
|
|
35
|
+
max_tokens_allowed=max_tokens_allowed,
|
|
36
|
+
),
|
|
35
37
|
)
|
|
38
|
+
return False, None
|
|
36
39
|
|
|
37
|
-
if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
|
|
38
|
-
error_message = None
|
|
39
|
-
# tool_call_result.result.data is set to None below which is expected to fix the issue
|
|
40
|
-
elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
|
|
41
|
-
original_error = (
|
|
42
|
-
tool_call_result.result.error
|
|
43
|
-
or tool_call_result.result.data
|
|
44
|
-
or "Unknown error"
|
|
45
|
-
)
|
|
46
|
-
truncated_error = str(original_error)[:100]
|
|
47
|
-
error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
|
|
48
40
|
|
|
41
|
+
def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
|
|
42
|
+
tool_call_result_is_too_big, metadata = is_tool_call_too_big(
|
|
43
|
+
tool_call_result=tool_call_result, llm=llm
|
|
44
|
+
)
|
|
45
|
+
if tool_call_result_is_too_big and metadata:
|
|
46
|
+
relative_pct = (
|
|
47
|
+
(metadata.messages_token - metadata.max_tokens_allowed)
|
|
48
|
+
/ metadata.messages_token
|
|
49
|
+
) * 100
|
|
50
|
+
error_message = f"The tool call result is too large to return: {metadata.messages_token} tokens.\nThe maximum allowed tokens is {metadata.max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
|
|
49
51
|
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
50
52
|
tool_call_result.result.data = None
|
|
51
53
|
tool_call_result.result.error = error_message
|
|
52
54
|
|
|
53
55
|
sentry_helper.capture_toolcall_contains_too_many_tokens(
|
|
54
|
-
tool_call_result, messages_token, max_tokens_allowed
|
|
56
|
+
tool_call_result, metadata.messages_token, metadata.max_tokens_allowed
|
|
55
57
|
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from holmes.core.llm import LLM
|
|
4
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
5
|
+
from litellm.types.utils import ModelResponse
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_system_prompt(
|
|
9
|
+
conversation_history: list[dict],
|
|
10
|
+
) -> tuple[list[dict], Optional[dict]]:
|
|
11
|
+
if not conversation_history:
|
|
12
|
+
return conversation_history, None
|
|
13
|
+
first_message = conversation_history[0]
|
|
14
|
+
if first_message and first_message.get("role") == "system":
|
|
15
|
+
return conversation_history[1:], first_message
|
|
16
|
+
return conversation_history[:], None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def compact_conversation_history(
|
|
20
|
+
original_conversation_history: list[dict], llm: LLM
|
|
21
|
+
) -> list[dict]:
|
|
22
|
+
conversation_history, system_prompt_message = strip_system_prompt(
|
|
23
|
+
original_conversation_history
|
|
24
|
+
)
|
|
25
|
+
compaction_instructions = load_and_render_prompt(
|
|
26
|
+
prompt="builtin://conversation_history_compaction.jinja2", context={}
|
|
27
|
+
)
|
|
28
|
+
conversation_history.append({"role": "user", "content": compaction_instructions})
|
|
29
|
+
|
|
30
|
+
response: ModelResponse = llm.completion(conversation_history) # type: ignore
|
|
31
|
+
response_message = None
|
|
32
|
+
if (
|
|
33
|
+
response
|
|
34
|
+
and response.choices
|
|
35
|
+
and response.choices[0]
|
|
36
|
+
and response.choices[0].message # type:ignore
|
|
37
|
+
):
|
|
38
|
+
response_message = response.choices[0].message # type:ignore
|
|
39
|
+
else:
|
|
40
|
+
logging.error(
|
|
41
|
+
"Failed to compact conversation history. Unexpected LLM's response for compaction"
|
|
42
|
+
)
|
|
43
|
+
return original_conversation_history
|
|
44
|
+
|
|
45
|
+
compacted_conversation_history: list[dict] = []
|
|
46
|
+
if system_prompt_message:
|
|
47
|
+
compacted_conversation_history.append(system_prompt_message)
|
|
48
|
+
compacted_conversation_history.append(
|
|
49
|
+
response_message.model_dump(
|
|
50
|
+
exclude_defaults=True, exclude_unset=True, exclude_none=True
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
compacted_conversation_history.append(
|
|
54
|
+
{
|
|
55
|
+
"role": "system",
|
|
56
|
+
"content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
return compacted_conversation_history
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
import sentry_sdk
|
|
5
|
+
from holmes.common.env_vars import (
|
|
6
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION,
|
|
7
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
8
|
+
)
|
|
9
|
+
from holmes.core.llm import (
|
|
10
|
+
LLM,
|
|
11
|
+
TokenCountMetadata,
|
|
12
|
+
get_context_window_compaction_threshold_pct,
|
|
13
|
+
)
|
|
14
|
+
from holmes.core.models import TruncationMetadata, TruncationResult
|
|
15
|
+
from holmes.core.truncation.compaction import compact_conversation_history
|
|
16
|
+
from holmes.utils import sentry_helper
|
|
17
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _truncate_tool_message(
|
|
24
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
25
|
+
) -> TruncationMetadata:
|
|
26
|
+
msg_content = msg["content"]
|
|
27
|
+
tool_call_id = msg["tool_call_id"]
|
|
28
|
+
tool_name = msg["name"]
|
|
29
|
+
|
|
30
|
+
# Ensure the indicator fits in the allocated space
|
|
31
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
32
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
33
|
+
msg["content"] = (
|
|
34
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
35
|
+
)
|
|
36
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
37
|
+
else:
|
|
38
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
39
|
+
end_index = allocated_space
|
|
40
|
+
|
|
41
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
42
|
+
logging.info(
|
|
43
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
44
|
+
)
|
|
45
|
+
truncation_metadata = TruncationMetadata(
|
|
46
|
+
tool_call_id=tool_call_id,
|
|
47
|
+
start_index=0,
|
|
48
|
+
end_index=end_index,
|
|
49
|
+
tool_name=tool_name,
|
|
50
|
+
original_token_count=needed_space,
|
|
51
|
+
)
|
|
52
|
+
return truncation_metadata
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
56
|
+
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
57
|
+
# We should fix this in the future
|
|
58
|
+
# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
|
|
59
|
+
# token truncation and not character truncation
|
|
60
|
+
def truncate_messages_to_fit_context(
|
|
61
|
+
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
62
|
+
) -> TruncationResult:
|
|
63
|
+
"""
|
|
64
|
+
Helper function to truncate tool messages to fit within context limits.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
messages: List of message dictionaries with roles and content
|
|
68
|
+
max_context_size: Maximum context window size for the model
|
|
69
|
+
maximum_output_token: Maximum tokens reserved for model output
|
|
70
|
+
count_tokens_fn: Function to count tokens for a list of messages
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Modified list of messages with truncated tool responses
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
Exception: If non-tool messages exceed available context space
|
|
77
|
+
"""
|
|
78
|
+
messages_except_tools = [
|
|
79
|
+
message for message in messages if message["role"] != "tool"
|
|
80
|
+
]
|
|
81
|
+
tokens = count_tokens_fn(messages_except_tools)
|
|
82
|
+
message_size_without_tools = tokens.total_tokens
|
|
83
|
+
|
|
84
|
+
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
85
|
+
|
|
86
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
87
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
88
|
+
logging.error(
|
|
89
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
90
|
+
)
|
|
91
|
+
raise Exception(
|
|
92
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if len(tool_call_messages) == 0:
|
|
96
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
97
|
+
|
|
98
|
+
available_space = (
|
|
99
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
100
|
+
)
|
|
101
|
+
remaining_space = available_space
|
|
102
|
+
tool_call_messages.sort(
|
|
103
|
+
key=lambda x: count_tokens_fn(
|
|
104
|
+
[{"role": "tool", "content": x["content"]}]
|
|
105
|
+
).total_tokens
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
truncations = []
|
|
109
|
+
|
|
110
|
+
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
111
|
+
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
112
|
+
# We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
|
|
113
|
+
for i, msg in enumerate(tool_call_messages):
|
|
114
|
+
remaining_tools = len(tool_call_messages) - i
|
|
115
|
+
max_allocation = remaining_space // remaining_tools
|
|
116
|
+
needed_space = count_tokens_fn(
|
|
117
|
+
[{"role": "tool", "content": msg["content"]}]
|
|
118
|
+
).total_tokens
|
|
119
|
+
allocated_space = min(needed_space, max_allocation)
|
|
120
|
+
|
|
121
|
+
if needed_space > allocated_space:
|
|
122
|
+
truncation_metadata = _truncate_tool_message(
|
|
123
|
+
msg, allocated_space, needed_space
|
|
124
|
+
)
|
|
125
|
+
truncations.append(truncation_metadata)
|
|
126
|
+
|
|
127
|
+
remaining_space -= allocated_space
|
|
128
|
+
|
|
129
|
+
if truncations:
|
|
130
|
+
sentry_helper.capture_tool_truncations(truncations)
|
|
131
|
+
|
|
132
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class ContextWindowLimiterOutput(BaseModel):
|
|
136
|
+
metadata: dict
|
|
137
|
+
messages: list[dict]
|
|
138
|
+
events: list[StreamMessage]
|
|
139
|
+
max_context_size: int
|
|
140
|
+
maximum_output_token: int
|
|
141
|
+
tokens: TokenCountMetadata
|
|
142
|
+
conversation_history_compacted: bool
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@sentry_sdk.trace
|
|
146
|
+
def limit_input_context_window(
|
|
147
|
+
llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
|
|
148
|
+
) -> ContextWindowLimiterOutput:
|
|
149
|
+
events = []
|
|
150
|
+
metadata = {}
|
|
151
|
+
initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
152
|
+
max_context_size = llm.get_context_window_size()
|
|
153
|
+
maximum_output_token = llm.get_maximum_output_token()
|
|
154
|
+
conversation_history_compacted = False
|
|
155
|
+
if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
|
|
156
|
+
initial_tokens.total_tokens + maximum_output_token
|
|
157
|
+
) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
|
|
158
|
+
compacted_messages = compact_conversation_history(
|
|
159
|
+
original_conversation_history=messages, llm=llm
|
|
160
|
+
)
|
|
161
|
+
compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
|
|
162
|
+
compacted_total_tokens = compacted_tokens.total_tokens
|
|
163
|
+
|
|
164
|
+
if compacted_total_tokens < initial_tokens.total_tokens:
|
|
165
|
+
messages = compacted_messages
|
|
166
|
+
compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
|
|
167
|
+
logging.info(compaction_message)
|
|
168
|
+
conversation_history_compacted = True
|
|
169
|
+
events.append(
|
|
170
|
+
StreamMessage(
|
|
171
|
+
event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
|
|
172
|
+
data={
|
|
173
|
+
"content": compaction_message,
|
|
174
|
+
"messages": compacted_messages,
|
|
175
|
+
"metadata": {
|
|
176
|
+
"initial_tokens": initial_tokens.total_tokens,
|
|
177
|
+
"compacted_tokens": compacted_total_tokens,
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
events.append(
|
|
183
|
+
StreamMessage(
|
|
184
|
+
event=StreamEvents.AI_MESSAGE,
|
|
185
|
+
data={"content": compaction_message},
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
logging.debug(
|
|
190
|
+
f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
194
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
195
|
+
# Compaction was not sufficient. Truncating messages.
|
|
196
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
197
|
+
messages=messages,
|
|
198
|
+
max_context_size=max_context_size,
|
|
199
|
+
maximum_output_token=maximum_output_token,
|
|
200
|
+
count_tokens_fn=llm.count_tokens,
|
|
201
|
+
)
|
|
202
|
+
metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
|
|
203
|
+
messages = truncated_res.truncated_messages
|
|
204
|
+
|
|
205
|
+
# recount after truncation
|
|
206
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
207
|
+
else:
|
|
208
|
+
metadata["truncations"] = []
|
|
209
|
+
|
|
210
|
+
return ContextWindowLimiterOutput(
|
|
211
|
+
events=events,
|
|
212
|
+
messages=messages,
|
|
213
|
+
metadata=metadata,
|
|
214
|
+
max_context_size=max_context_size,
|
|
215
|
+
maximum_output_token=maximum_output_token,
|
|
216
|
+
tokens=tokens,
|
|
217
|
+
conversation_history_compacted=conversation_history_compacted,
|
|
218
|
+
)
|
holmes/interactive.py
CHANGED
|
@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
480
480
|
return
|
|
481
481
|
|
|
482
482
|
# Calculate context statistics
|
|
483
|
-
|
|
483
|
+
tokens_metadata = ai.llm.count_tokens(
|
|
484
|
+
messages
|
|
485
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
484
486
|
max_context_size = ai.llm.get_context_window_size()
|
|
485
487
|
max_output_tokens = ai.llm.get_maximum_output_token()
|
|
486
|
-
available_tokens =
|
|
488
|
+
available_tokens = (
|
|
489
|
+
max_context_size - tokens_metadata.total_tokens - max_output_tokens
|
|
490
|
+
)
|
|
487
491
|
|
|
488
492
|
# Analyze token distribution by role and tool calls
|
|
489
493
|
role_token_usage: DefaultDict[str, int] = defaultdict(int)
|
|
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
492
496
|
|
|
493
497
|
for msg in messages:
|
|
494
498
|
role = msg.get("role", "unknown")
|
|
495
|
-
|
|
496
|
-
|
|
499
|
+
message_tokens = ai.llm.count_tokens(
|
|
500
|
+
[msg]
|
|
501
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
502
|
+
role_token_usage[role] += message_tokens.total_tokens
|
|
497
503
|
|
|
498
504
|
# Track individual tool usage
|
|
499
505
|
if role == "tool":
|
|
500
506
|
tool_name = msg.get("name", "unknown_tool")
|
|
501
|
-
tool_token_usage[tool_name] +=
|
|
507
|
+
tool_token_usage[tool_name] += message_tokens.total_tokens
|
|
502
508
|
tool_call_counts[tool_name] += 1
|
|
503
509
|
|
|
504
510
|
# Display context information
|
|
505
511
|
console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
|
|
506
512
|
console.print(
|
|
507
|
-
f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
|
|
513
|
+
f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
|
|
508
514
|
)
|
|
509
515
|
console.print(
|
|
510
516
|
f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
|
|
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
515
521
|
for role in ["system", "user", "assistant", "tool"]:
|
|
516
522
|
if role in role_token_usage:
|
|
517
523
|
tokens = role_token_usage[role]
|
|
518
|
-
percentage = (
|
|
524
|
+
percentage = (
|
|
525
|
+
(tokens / tokens_metadata.total_tokens) * 100
|
|
526
|
+
if tokens_metadata.total_tokens > 0
|
|
527
|
+
else 0
|
|
528
|
+
)
|
|
519
529
|
role_name = {
|
|
520
530
|
"system": "system prompt",
|
|
521
531
|
"user": "user messages",
|
|
@@ -12,8 +12,7 @@
|
|
|
12
12
|
* do not stop investigating until you are at the final root cause you are able to find.
|
|
13
13
|
* use the "five whys" methodology to find the root cause.
|
|
14
14
|
* for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
|
|
15
|
-
* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and
|
|
16
|
-
* in this case, try to find substrings or search for the correct spellings
|
|
15
|
+
* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
|
|
17
16
|
* always provide detailed information like exact resource names, versions, labels, etc
|
|
18
17
|
* even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
|
|
19
18
|
* if a runbook url is present you MUST fetch the runbook before beginning your investigation
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
|
|
2
|
+
This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
|
|
3
|
+
|
|
4
|
+
Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
|
|
5
|
+
|
|
6
|
+
1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
|
|
7
|
+
- The user's explicit requests and intents
|
|
8
|
+
- Your approach to addressing the user's requests
|
|
9
|
+
- Key decisions, technical concepts and code patterns
|
|
10
|
+
- Specific details like kubernetes resource names, namespaces, relevant logs extracts (verbatim), etc
|
|
11
|
+
- What tools were called and the outcome or analysis of the tool output
|
|
12
|
+
2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
|
|
13
|
+
|
|
14
|
+
Your summary should include the following sections:
|
|
15
|
+
|
|
16
|
+
1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
|
|
17
|
+
2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
|
|
18
|
+
3. Resources: Enumerate specific kubernetes or cloud resources and logs extract examined. Pay special attention to the most recent messages and include logs or tool outputs where applicable and include a summary of why this resource is important.
|
|
19
|
+
4. Tool calls: List all tool calls that were executed and whether they failed/succeeded. Make sure to mention the full arguments used. Only summarize the arguments if they are over 200 characters long
|
|
20
|
+
5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
|
|
21
|
+
6. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
|
|
22
|
+
7. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include resource names and their namespace and log extracts where applicable.
|
|
23
|
+
8. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
|
|
24
|
+
If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
|
|
25
|
+
|
|
26
|
+
Here's an example of how your output should be structured:
|
|
27
|
+
|
|
28
|
+
<example>
|
|
29
|
+
<analysis>
|
|
30
|
+
[Your thought process, ensuring all points are covered thoroughly and accurately]
|
|
31
|
+
</analysis>
|
|
32
|
+
|
|
33
|
+
<summary>
|
|
34
|
+
1. Primary Request and Intent:
|
|
35
|
+
[Detailed description]
|
|
36
|
+
|
|
37
|
+
2. Key Technical Concepts:
|
|
38
|
+
- [Concept 1]
|
|
39
|
+
- [Concept 2]
|
|
40
|
+
- [...]
|
|
41
|
+
|
|
42
|
+
3. Infrastructure Resources:
|
|
43
|
+
- [Deployment name 1]
|
|
44
|
+
- [Summary of why this deployment is important]
|
|
45
|
+
- [Summary of the issues identified with this deployment, if any]
|
|
46
|
+
- [List of related pods/services or otyher resources and why they are relevant]
|
|
47
|
+
- [Pod name 2]
|
|
48
|
+
- [Summary of why this pod is important]
|
|
49
|
+
- [Summary of the issues identified with this pod, if any]
|
|
50
|
+
- [List of related pods/services or otyher resources and why they are relevant]
|
|
51
|
+
- [...]
|
|
52
|
+
|
|
53
|
+
4. Tool Calls:
|
|
54
|
+
- [✅ function_name {args}]
|
|
55
|
+
- [✅ function_name {args}]
|
|
56
|
+
- [❌ function_name {args} - NO DATA]
|
|
57
|
+
- [❌ function_name {args} - Error message]
|
|
58
|
+
- [...]
|
|
59
|
+
|
|
60
|
+
5. Problem Solving:
|
|
61
|
+
[Description of solved problems and ongoing troubleshooting]
|
|
62
|
+
|
|
63
|
+
6. Pending Tasks:
|
|
64
|
+
- [Task 1]
|
|
65
|
+
- [Task 2]
|
|
66
|
+
- [...]
|
|
67
|
+
|
|
68
|
+
7. Current Work:
|
|
69
|
+
[Precise description of current work]
|
|
70
|
+
|
|
71
|
+
8. Optional Next Step:
|
|
72
|
+
[Optional Next step to take]
|
|
73
|
+
|
|
74
|
+
</summary>
|
|
75
|
+
</example>
|
|
76
|
+
|
|
77
|
+
Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
|
|
78
|
+
|
|
79
|
+
There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
|
|
80
|
+
<example>
|
|
81
|
+
## Compact Instructions
|
|
82
|
+
When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
|
|
83
|
+
</example>
|
|
84
|
+
|
|
85
|
+
<example>
|
|
86
|
+
# Summary instructions
|
|
87
|
+
When you are using compact - please focus on test output and code changes. Include relevant logs verbatim.
|
|
88
|
+
</example>
|
|
@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
|
|
|
44
44
|
from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
|
|
45
45
|
from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
|
|
46
46
|
from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
|
|
47
|
+
from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
|
|
48
|
+
OpenSearchQueryAssistToolset,
|
|
49
|
+
)
|
|
47
50
|
from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
|
|
48
51
|
from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
|
|
49
52
|
from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
|
|
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
|
|
|
93
96
|
DatadogRDSToolset(),
|
|
94
97
|
OpenSearchLogsToolset(),
|
|
95
98
|
OpenSearchTracesToolset(),
|
|
99
|
+
OpenSearchQueryAssistToolset(),
|
|
96
100
|
CoralogixLogsToolset(),
|
|
97
101
|
RabbitMQToolset(),
|
|
98
102
|
GitToolset(),
|
|
@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
|
|
|
42
42
|
def __init__(self):
|
|
43
43
|
super().__init__(
|
|
44
44
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
45
|
-
experimental=True,
|
|
46
45
|
tools=[
|
|
47
46
|
ReturnProjectAlerts(toolset=self),
|
|
48
47
|
ReturnProjectProcesses(toolset=self),
|
|
@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
|
|
|
60
60
|
docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
|
|
61
61
|
icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
|
|
62
62
|
tags=[ToolsetTag.CORE],
|
|
63
|
-
experimental=True,
|
|
64
63
|
tools=[
|
|
65
64
|
AnalyzeDatabaseHealthStatus(self),
|
|
66
65
|
AnalyzeDatabasePerformance(self),
|
|
@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
|
|
|
27
27
|
response.raise_for_status()
|
|
28
28
|
return True, ""
|
|
29
29
|
except Exception as e:
|
|
30
|
-
logging.
|
|
30
|
+
logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
|
|
31
31
|
error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
|
|
32
32
|
|
|
33
33
|
# Add helpful hint if this looks like a common misconfiguration
|