holmesgpt 0.15.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +8 -0
- holmes/core/llm.py +28 -0
- holmes/core/supabase_dal.py +33 -42
- holmes/core/tool_calling_llm.py +92 -223
- holmes/core/tools_utils/tool_context_window_limiter.py +32 -39
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/toolsets/investigator/core_investigation.py +20 -11
- holmes/plugins/toolsets/robusta/robusta.py +35 -8
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
- holmes/utils/stream.py +1 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +4 -2
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +18 -16
- holmes/core/performance_timing.py +0 -72
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.15.0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from holmes.core.llm import LLM
|
|
4
|
+
from holmes.plugins.prompts import load_and_render_prompt
|
|
5
|
+
from litellm.types.utils import ModelResponse
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_system_prompt(
|
|
9
|
+
conversation_history: list[dict],
|
|
10
|
+
) -> tuple[list[dict], Optional[dict]]:
|
|
11
|
+
if not conversation_history:
|
|
12
|
+
return conversation_history, None
|
|
13
|
+
first_message = conversation_history[0]
|
|
14
|
+
if first_message and first_message.get("role") == "system":
|
|
15
|
+
return conversation_history[1:], first_message
|
|
16
|
+
return conversation_history[:], None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def compact_conversation_history(
|
|
20
|
+
original_conversation_history: list[dict], llm: LLM
|
|
21
|
+
) -> list[dict]:
|
|
22
|
+
conversation_history, system_prompt_message = strip_system_prompt(
|
|
23
|
+
original_conversation_history
|
|
24
|
+
)
|
|
25
|
+
compaction_instructions = load_and_render_prompt(
|
|
26
|
+
prompt="builtin://conversation_history_compaction.jinja2", context={}
|
|
27
|
+
)
|
|
28
|
+
conversation_history.append({"role": "user", "content": compaction_instructions})
|
|
29
|
+
|
|
30
|
+
response: ModelResponse = llm.completion(conversation_history) # type: ignore
|
|
31
|
+
response_message = None
|
|
32
|
+
if (
|
|
33
|
+
response
|
|
34
|
+
and response.choices
|
|
35
|
+
and response.choices[0]
|
|
36
|
+
and response.choices[0].message # type:ignore
|
|
37
|
+
):
|
|
38
|
+
response_message = response.choices[0].message # type:ignore
|
|
39
|
+
else:
|
|
40
|
+
logging.error(
|
|
41
|
+
"Failed to compact conversation history. Unexpected LLM's response for compaction"
|
|
42
|
+
)
|
|
43
|
+
return original_conversation_history
|
|
44
|
+
|
|
45
|
+
compacted_conversation_history: list[dict] = []
|
|
46
|
+
if system_prompt_message:
|
|
47
|
+
compacted_conversation_history.append(system_prompt_message)
|
|
48
|
+
compacted_conversation_history.append(
|
|
49
|
+
response_message.model_dump(
|
|
50
|
+
exclude_defaults=True, exclude_unset=True, exclude_none=True
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
compacted_conversation_history.append(
|
|
54
|
+
{
|
|
55
|
+
"role": "system",
|
|
56
|
+
"content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
|
|
57
|
+
}
|
|
58
|
+
)
|
|
59
|
+
return compacted_conversation_history
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Optional
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
import sentry_sdk
|
|
5
|
+
from holmes.common.env_vars import (
|
|
6
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION,
|
|
7
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
8
|
+
)
|
|
9
|
+
from holmes.core.llm import (
|
|
10
|
+
LLM,
|
|
11
|
+
TokenCountMetadata,
|
|
12
|
+
get_context_window_compaction_threshold_pct,
|
|
13
|
+
)
|
|
14
|
+
from holmes.core.models import TruncationMetadata, TruncationResult
|
|
15
|
+
from holmes.core.truncation.compaction import compact_conversation_history
|
|
16
|
+
from holmes.utils import sentry_helper
|
|
17
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _truncate_tool_message(
|
|
24
|
+
msg: dict, allocated_space: int, needed_space: int
|
|
25
|
+
) -> TruncationMetadata:
|
|
26
|
+
msg_content = msg["content"]
|
|
27
|
+
tool_call_id = msg["tool_call_id"]
|
|
28
|
+
tool_name = msg["name"]
|
|
29
|
+
|
|
30
|
+
# Ensure the indicator fits in the allocated space
|
|
31
|
+
if allocated_space > len(TRUNCATION_NOTICE):
|
|
32
|
+
original = msg_content if isinstance(msg_content, str) else str(msg_content)
|
|
33
|
+
msg["content"] = (
|
|
34
|
+
original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
|
|
35
|
+
)
|
|
36
|
+
end_index = allocated_space - len(TRUNCATION_NOTICE)
|
|
37
|
+
else:
|
|
38
|
+
msg["content"] = TRUNCATION_NOTICE[:allocated_space]
|
|
39
|
+
end_index = allocated_space
|
|
40
|
+
|
|
41
|
+
msg.pop("token_count", None) # Remove token_count if present
|
|
42
|
+
logging.info(
|
|
43
|
+
f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
|
|
44
|
+
)
|
|
45
|
+
truncation_metadata = TruncationMetadata(
|
|
46
|
+
tool_call_id=tool_call_id,
|
|
47
|
+
start_index=0,
|
|
48
|
+
end_index=end_index,
|
|
49
|
+
tool_name=tool_name,
|
|
50
|
+
original_token_count=needed_space,
|
|
51
|
+
)
|
|
52
|
+
return truncation_metadata
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
|
|
56
|
+
# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
|
|
57
|
+
# We should fix this in the future
|
|
58
|
+
# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
|
|
59
|
+
# token truncation and not character truncation
|
|
60
|
+
def truncate_messages_to_fit_context(
|
|
61
|
+
messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
|
|
62
|
+
) -> TruncationResult:
|
|
63
|
+
"""
|
|
64
|
+
Helper function to truncate tool messages to fit within context limits.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
messages: List of message dictionaries with roles and content
|
|
68
|
+
max_context_size: Maximum context window size for the model
|
|
69
|
+
maximum_output_token: Maximum tokens reserved for model output
|
|
70
|
+
count_tokens_fn: Function to count tokens for a list of messages
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Modified list of messages with truncated tool responses
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
Exception: If non-tool messages exceed available context space
|
|
77
|
+
"""
|
|
78
|
+
messages_except_tools = [
|
|
79
|
+
message for message in messages if message["role"] != "tool"
|
|
80
|
+
]
|
|
81
|
+
tokens = count_tokens_fn(messages_except_tools)
|
|
82
|
+
message_size_without_tools = tokens.total_tokens
|
|
83
|
+
|
|
84
|
+
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
85
|
+
|
|
86
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
87
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
88
|
+
logging.error(
|
|
89
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
90
|
+
)
|
|
91
|
+
raise Exception(
|
|
92
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if len(tool_call_messages) == 0:
|
|
96
|
+
return TruncationResult(truncated_messages=messages, truncations=[])
|
|
97
|
+
|
|
98
|
+
available_space = (
|
|
99
|
+
max_context_size - message_size_without_tools - reserved_for_output_tokens
|
|
100
|
+
)
|
|
101
|
+
remaining_space = available_space
|
|
102
|
+
tool_call_messages.sort(
|
|
103
|
+
key=lambda x: count_tokens_fn(
|
|
104
|
+
[{"role": "tool", "content": x["content"]}]
|
|
105
|
+
).total_tokens
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
truncations = []
|
|
109
|
+
|
|
110
|
+
# Allocate space starting with small tools and going to larger tools, while maintaining fairness
|
|
111
|
+
# Small tools can often get exactly what they need, while larger tools may need to be truncated
|
|
112
|
+
# We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
|
|
113
|
+
for i, msg in enumerate(tool_call_messages):
|
|
114
|
+
remaining_tools = len(tool_call_messages) - i
|
|
115
|
+
max_allocation = remaining_space // remaining_tools
|
|
116
|
+
needed_space = count_tokens_fn(
|
|
117
|
+
[{"role": "tool", "content": msg["content"]}]
|
|
118
|
+
).total_tokens
|
|
119
|
+
allocated_space = min(needed_space, max_allocation)
|
|
120
|
+
|
|
121
|
+
if needed_space > allocated_space:
|
|
122
|
+
truncation_metadata = _truncate_tool_message(
|
|
123
|
+
msg, allocated_space, needed_space
|
|
124
|
+
)
|
|
125
|
+
truncations.append(truncation_metadata)
|
|
126
|
+
|
|
127
|
+
remaining_space -= allocated_space
|
|
128
|
+
|
|
129
|
+
if truncations:
|
|
130
|
+
sentry_helper.capture_tool_truncations(truncations)
|
|
131
|
+
|
|
132
|
+
return TruncationResult(truncated_messages=messages, truncations=truncations)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class ContextWindowLimiterOutput(BaseModel):
|
|
136
|
+
metadata: dict
|
|
137
|
+
messages: list[dict]
|
|
138
|
+
events: list[StreamMessage]
|
|
139
|
+
max_context_size: int
|
|
140
|
+
maximum_output_token: int
|
|
141
|
+
tokens: TokenCountMetadata
|
|
142
|
+
conversation_history_compacted: bool
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@sentry_sdk.trace
|
|
146
|
+
def limit_input_context_window(
|
|
147
|
+
llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
|
|
148
|
+
) -> ContextWindowLimiterOutput:
|
|
149
|
+
events = []
|
|
150
|
+
metadata = {}
|
|
151
|
+
initial_tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
152
|
+
max_context_size = llm.get_context_window_size()
|
|
153
|
+
maximum_output_token = llm.get_maximum_output_token()
|
|
154
|
+
conversation_history_compacted = False
|
|
155
|
+
if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
|
|
156
|
+
initial_tokens.total_tokens + maximum_output_token
|
|
157
|
+
) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
|
|
158
|
+
compacted_messages = compact_conversation_history(
|
|
159
|
+
original_conversation_history=messages, llm=llm
|
|
160
|
+
)
|
|
161
|
+
compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
|
|
162
|
+
compacted_total_tokens = compacted_tokens.total_tokens
|
|
163
|
+
|
|
164
|
+
if compacted_total_tokens < initial_tokens.total_tokens:
|
|
165
|
+
messages = compacted_messages
|
|
166
|
+
compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
|
|
167
|
+
logging.info(compaction_message)
|
|
168
|
+
conversation_history_compacted = True
|
|
169
|
+
events.append(
|
|
170
|
+
StreamMessage(
|
|
171
|
+
event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
|
|
172
|
+
data={
|
|
173
|
+
"content": compaction_message,
|
|
174
|
+
"messages": compacted_messages,
|
|
175
|
+
"metadata": {
|
|
176
|
+
"initial_tokens": initial_tokens.total_tokens,
|
|
177
|
+
"compacted_tokens": compacted_total_tokens,
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
)
|
|
182
|
+
events.append(
|
|
183
|
+
StreamMessage(
|
|
184
|
+
event=StreamEvents.AI_MESSAGE,
|
|
185
|
+
data={"content": compaction_message},
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
logging.debug(
|
|
190
|
+
f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
194
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
195
|
+
# Compaction was not sufficient. Truncating messages.
|
|
196
|
+
truncated_res = truncate_messages_to_fit_context(
|
|
197
|
+
messages=messages,
|
|
198
|
+
max_context_size=max_context_size,
|
|
199
|
+
maximum_output_token=maximum_output_token,
|
|
200
|
+
count_tokens_fn=llm.count_tokens,
|
|
201
|
+
)
|
|
202
|
+
metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
|
|
203
|
+
messages = truncated_res.truncated_messages
|
|
204
|
+
|
|
205
|
+
# recount after truncation
|
|
206
|
+
tokens = llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
207
|
+
else:
|
|
208
|
+
metadata["truncations"] = []
|
|
209
|
+
|
|
210
|
+
return ContextWindowLimiterOutput(
|
|
211
|
+
events=events,
|
|
212
|
+
messages=messages,
|
|
213
|
+
metadata=metadata,
|
|
214
|
+
max_context_size=max_context_size,
|
|
215
|
+
maximum_output_token=maximum_output_token,
|
|
216
|
+
tokens=tokens,
|
|
217
|
+
conversation_history_compacted=conversation_history_compacted,
|
|
218
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
|
|
2
|
+
This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
|
|
3
|
+
|
|
4
|
+
Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
|
|
5
|
+
|
|
6
|
+
1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
|
|
7
|
+
- The user's explicit requests and intents
|
|
8
|
+
- Your approach to addressing the user's requests
|
|
9
|
+
- Key decisions, technical concepts and code patterns
|
|
10
|
+
- Specific details like kubernetes resource names, namespaces, relevant logs extracts (verbatim), etc
|
|
11
|
+
- What tools were called and the outcome or analysis of the tool output
|
|
12
|
+
2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
|
|
13
|
+
|
|
14
|
+
Your summary should include the following sections:
|
|
15
|
+
|
|
16
|
+
1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
|
|
17
|
+
2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
|
|
18
|
+
3. Resources: Enumerate specific kubernetes or cloud resources and logs extract examined. Pay special attention to the most recent messages and include logs or tool outputs where applicable and include a summary of why this resource is important.
|
|
19
|
+
4. Tool calls: List all tool calls that were executed and whether they failed/succeeded. Make sure to mention the full arguments used. Only summarize the arguments if they are over 200 characters long
|
|
20
|
+
5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
|
|
21
|
+
6. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
|
|
22
|
+
7. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include resource names and their namespace and log extracts where applicable.
|
|
23
|
+
8. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
|
|
24
|
+
If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
|
|
25
|
+
|
|
26
|
+
Here's an example of how your output should be structured:
|
|
27
|
+
|
|
28
|
+
<example>
|
|
29
|
+
<analysis>
|
|
30
|
+
[Your thought process, ensuring all points are covered thoroughly and accurately]
|
|
31
|
+
</analysis>
|
|
32
|
+
|
|
33
|
+
<summary>
|
|
34
|
+
1. Primary Request and Intent:
|
|
35
|
+
[Detailed description]
|
|
36
|
+
|
|
37
|
+
2. Key Technical Concepts:
|
|
38
|
+
- [Concept 1]
|
|
39
|
+
- [Concept 2]
|
|
40
|
+
- [...]
|
|
41
|
+
|
|
42
|
+
3. Infrastructure Resources:
|
|
43
|
+
- [Deployment name 1]
|
|
44
|
+
- [Summary of why this deployment is important]
|
|
45
|
+
- [Summary of the issues identified with this deployment, if any]
|
|
46
|
+
- [List of related pods/services or otyher resources and why they are relevant]
|
|
47
|
+
- [Pod name 2]
|
|
48
|
+
- [Summary of why this pod is important]
|
|
49
|
+
- [Summary of the issues identified with this pod, if any]
|
|
50
|
+
- [List of related pods/services or otyher resources and why they are relevant]
|
|
51
|
+
- [...]
|
|
52
|
+
|
|
53
|
+
4. Tool Calls:
|
|
54
|
+
- [✅ function_name {args}]
|
|
55
|
+
- [✅ function_name {args}]
|
|
56
|
+
- [❌ function_name {args} - NO DATA]
|
|
57
|
+
- [❌ function_name {args} - Error message]
|
|
58
|
+
- [...]
|
|
59
|
+
|
|
60
|
+
5. Problem Solving:
|
|
61
|
+
[Description of solved problems and ongoing troubleshooting]
|
|
62
|
+
|
|
63
|
+
6. Pending Tasks:
|
|
64
|
+
- [Task 1]
|
|
65
|
+
- [Task 2]
|
|
66
|
+
- [...]
|
|
67
|
+
|
|
68
|
+
7. Current Work:
|
|
69
|
+
[Precise description of current work]
|
|
70
|
+
|
|
71
|
+
8. Optional Next Step:
|
|
72
|
+
[Optional Next step to take]
|
|
73
|
+
|
|
74
|
+
</summary>
|
|
75
|
+
</example>
|
|
76
|
+
|
|
77
|
+
Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
|
|
78
|
+
|
|
79
|
+
There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
|
|
80
|
+
<example>
|
|
81
|
+
## Compact Instructions
|
|
82
|
+
When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
|
|
83
|
+
</example>
|
|
84
|
+
|
|
85
|
+
<example>
|
|
86
|
+
# Summary instructions
|
|
87
|
+
When you are using compact - please focus on test output and code changes. Include relevant logs verbatim.
|
|
88
|
+
</example>
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
3
|
from typing import Any, Dict
|
|
4
|
+
|
|
4
5
|
from uuid import uuid4
|
|
5
6
|
|
|
6
7
|
from holmes.core.todo_tasks_formatter import format_tasks
|
|
@@ -15,9 +16,26 @@ from holmes.core.tools import (
|
|
|
15
16
|
)
|
|
16
17
|
from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
|
|
17
18
|
|
|
19
|
+
TODO_WRITE_TOOL_NAME = "TodoWrite"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_tasks(todos_data: Any) -> list[Task]:
|
|
23
|
+
tasks = []
|
|
24
|
+
|
|
25
|
+
for todo_item in todos_data:
|
|
26
|
+
if isinstance(todo_item, dict):
|
|
27
|
+
task = Task(
|
|
28
|
+
id=todo_item.get("id", str(uuid4())),
|
|
29
|
+
content=todo_item.get("content", ""),
|
|
30
|
+
status=TaskStatus(todo_item.get("status", "pending")),
|
|
31
|
+
)
|
|
32
|
+
tasks.append(task)
|
|
33
|
+
|
|
34
|
+
return tasks
|
|
35
|
+
|
|
18
36
|
|
|
19
37
|
class TodoWriteTool(Tool):
|
|
20
|
-
name: str =
|
|
38
|
+
name: str = TODO_WRITE_TOOL_NAME
|
|
21
39
|
description: str = "Save investigation tasks to break down complex problems into manageable sub-tasks. ALWAYS provide the COMPLETE list of all tasks, not just the ones being updated."
|
|
22
40
|
parameters: Dict[str, ToolParameter] = {
|
|
23
41
|
"todos": ToolParameter(
|
|
@@ -81,16 +99,7 @@ class TodoWriteTool(Tool):
|
|
|
81
99
|
try:
|
|
82
100
|
todos_data = params.get("todos", [])
|
|
83
101
|
|
|
84
|
-
tasks =
|
|
85
|
-
|
|
86
|
-
for todo_item in todos_data:
|
|
87
|
-
if isinstance(todo_item, dict):
|
|
88
|
-
task = Task(
|
|
89
|
-
id=todo_item.get("id", str(uuid4())),
|
|
90
|
-
content=todo_item.get("content", ""),
|
|
91
|
-
status=TaskStatus(todo_item.get("status", "pending")),
|
|
92
|
-
)
|
|
93
|
-
tasks.append(task)
|
|
102
|
+
tasks = parse_tasks(todos_data=todos_data)
|
|
94
103
|
|
|
95
104
|
logging.debug(f"Tasks: {len(tasks)}")
|
|
96
105
|
|
|
@@ -19,6 +19,8 @@ START_TIME = "start_datetime"
|
|
|
19
19
|
END_TIME = "end_datetime"
|
|
20
20
|
NAMESPACE = "namespace"
|
|
21
21
|
WORKLOAD = "workload"
|
|
22
|
+
DEFAULT_LIMIT_CHANGE_ROWS = 100
|
|
23
|
+
MAX_LIMIT_CHANGE_ROWS = 200
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
class FetchRobustaFinding(Tool):
|
|
@@ -27,7 +29,7 @@ class FetchRobustaFinding(Tool):
|
|
|
27
29
|
def __init__(self, dal: Optional[SupabaseDal]):
|
|
28
30
|
super().__init__(
|
|
29
31
|
name="fetch_finding_by_id",
|
|
30
|
-
description="Fetches a robusta finding. Findings are events, like a Prometheus alert or a deployment update",
|
|
32
|
+
description="Fetches a robusta finding. Findings are events, like a Prometheus alert or a deployment update and configuration change.",
|
|
31
33
|
parameters={
|
|
32
34
|
PARAM_FINDING_ID: ToolParameter(
|
|
33
35
|
description="The id of the finding to fetch",
|
|
@@ -75,7 +77,7 @@ class FetchRobustaFinding(Tool):
|
|
|
75
77
|
)
|
|
76
78
|
|
|
77
79
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
78
|
-
return "Robusta: Fetch
|
|
80
|
+
return f"Robusta: Fetch finding data {params}"
|
|
79
81
|
|
|
80
82
|
|
|
81
83
|
class FetchResourceRecommendation(Tool):
|
|
@@ -142,13 +144,17 @@ class FetchResourceRecommendation(Tool):
|
|
|
142
144
|
return f"Robusta: Check Historical Resource Utilization: ({str(params)})"
|
|
143
145
|
|
|
144
146
|
|
|
145
|
-
class
|
|
147
|
+
class FetchConfigurationChangesMetadata(Tool):
|
|
146
148
|
_dal: Optional[SupabaseDal]
|
|
147
149
|
|
|
148
150
|
def __init__(self, dal: Optional[SupabaseDal]):
|
|
149
151
|
super().__init__(
|
|
150
|
-
name="
|
|
151
|
-
description=
|
|
152
|
+
name="fetch_configuration_changes_metadata",
|
|
153
|
+
description=(
|
|
154
|
+
"Fetch configuration changes metadata in a given time range. "
|
|
155
|
+
"By default, fetch all cluster changes. Can be filtered on a given namespace or a specific workload. "
|
|
156
|
+
"Use fetch_finding_by_id to get detailed change of one specific configuration change."
|
|
157
|
+
),
|
|
152
158
|
parameters={
|
|
153
159
|
START_TIME: ToolParameter(
|
|
154
160
|
description="The starting time boundary for the search period. String in RFC3339 format.",
|
|
@@ -160,15 +166,36 @@ class FetchConfigurationChanges(Tool):
|
|
|
160
166
|
type="string",
|
|
161
167
|
required=True,
|
|
162
168
|
),
|
|
169
|
+
"namespace": ToolParameter(
|
|
170
|
+
description="The Kubernetes namespace name for filtering configuration changes",
|
|
171
|
+
type="string",
|
|
172
|
+
required=False,
|
|
173
|
+
),
|
|
174
|
+
"workload": ToolParameter(
|
|
175
|
+
description="The kubernetes workload name for filtering configuration changes. Deployment name or Pod name for example.",
|
|
176
|
+
type="string",
|
|
177
|
+
required=False,
|
|
178
|
+
),
|
|
179
|
+
"limit": ToolParameter(
|
|
180
|
+
description=f"Maximum number of rows to return. Default is {DEFAULT_LIMIT_CHANGE_ROWS} and the maximum is 200",
|
|
181
|
+
type="integer",
|
|
182
|
+
required=False,
|
|
183
|
+
),
|
|
163
184
|
},
|
|
164
185
|
)
|
|
165
186
|
self._dal = dal
|
|
166
187
|
|
|
167
188
|
def _fetch_change_history(self, params: Dict) -> Optional[List[Dict]]:
|
|
168
189
|
if self._dal and self._dal.enabled:
|
|
169
|
-
return self._dal.
|
|
190
|
+
return self._dal.get_configuration_changes_metadata(
|
|
170
191
|
start_datetime=params["start_datetime"],
|
|
171
192
|
end_datetime=params["end_datetime"],
|
|
193
|
+
limit=min(
|
|
194
|
+
params.get("limit") or DEFAULT_LIMIT_CHANGE_ROWS,
|
|
195
|
+
MAX_LIMIT_CHANGE_ROWS,
|
|
196
|
+
),
|
|
197
|
+
ns=params.get("namespace"),
|
|
198
|
+
workload=params.get("workload"),
|
|
172
199
|
)
|
|
173
200
|
return None
|
|
174
201
|
|
|
@@ -197,7 +224,7 @@ class FetchConfigurationChanges(Tool):
|
|
|
197
224
|
)
|
|
198
225
|
|
|
199
226
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
200
|
-
return "Robusta: Search Change History"
|
|
227
|
+
return f"Robusta: Search Change History {params}"
|
|
201
228
|
|
|
202
229
|
|
|
203
230
|
class RobustaToolset(Toolset):
|
|
@@ -219,7 +246,7 @@ class RobustaToolset(Toolset):
|
|
|
219
246
|
prerequisites=[dal_prereq],
|
|
220
247
|
tools=[
|
|
221
248
|
FetchRobustaFinding(dal),
|
|
222
|
-
|
|
249
|
+
FetchConfigurationChangesMetadata(dal),
|
|
223
250
|
FetchResourceRecommendation(dal),
|
|
224
251
|
],
|
|
225
252
|
tags=[
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# Configuration and alerts history
|
|
2
|
-
* Use
|
|
3
|
-
*
|
|
4
|
-
*
|
|
2
|
+
* Use fetch_configuration_changes_metadata to get historical configuration changes in a cluster or for a specific workload.
|
|
3
|
+
* If a change seems important to the investigation, Use fetch_finding_by_id with the configuration change ID to get full details of the change.
|
|
4
|
+
* You must ALWAYS call fetch_configuration_changes_metadata when investigating an alert
|
|
5
|
+
* Never respond without calling fetch_configuration_changes_metadata
|
|
5
6
|
* When investigating an alert, look at historical configuration changes that happen 4 hours before the alert started
|
|
6
7
|
* If you found a change that caused the alert, you MUST write: 'The issue was introduced by ...' with a short description of the change, and the date of it.
|
|
7
8
|
For example:
|
holmes/utils/stream.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: holmesgpt
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.16.0
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Natan Yellin
|
|
6
6
|
Author-email: natan@robusta.dev
|
|
@@ -58,7 +58,9 @@ Description-Content-Type: text/markdown
|
|
|
58
58
|
|
|
59
59
|
HolmesGPT is an AI agent for investigating problems in your cloud, finding the root cause, and suggesting remediations. It has dozens of built-in integrations for cloud providers, observability tools, and on-call systems.
|
|
60
60
|
|
|
61
|
-
HolmesGPT
|
|
61
|
+
>🎉 **HolmesGPT is now a CNCF Sandbox Project!** We're thrilled to be part of the Cloud Native Computing Foundation. [Learn more about our journey](https://github.com/cncf/sandbox/issues/392#issuecomment-3380007501).
|
|
62
|
+
|
|
63
|
+
Find more about HolmesGPT's maintainers and adopters [here](./ADOPTERS.md).
|
|
62
64
|
|
|
63
65
|
<p align="center">
|
|
64
66
|
<a href="#how-it-works"><strong>How it Works</strong></a> |
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
holmes/.git_archival.json,sha256=PbwdO7rNhEJ4ALiO12DPPb81xNAIsVxCA0m8OrVoqsk,182
|
|
2
|
-
holmes/__init__.py,sha256=
|
|
2
|
+
holmes/__init__.py,sha256=0nXci66VjsOpEKFNT6utHH7QLkZ14FWaqtPQ2syHOdc,257
|
|
3
3
|
holmes/clients/robusta_client.py,sha256=YZA70OXGO0WZGTqtBhKiOtP7bhsrSW_f2Ea3Qcg9aMY,1530
|
|
4
|
-
holmes/common/env_vars.py,sha256=
|
|
4
|
+
holmes/common/env_vars.py,sha256=IdNr-cNaTX80-h4HuFvzCM9451dddY7vKJi-1IpqTm8,4263
|
|
5
5
|
holmes/common/openshift.py,sha256=akbQ0GpnmuzXOqTcotpTDQSDKIROypS9mgPOprUgkCw,407
|
|
6
6
|
holmes/config.py,sha256=1t732ILkEBKxzXchupNHwxVsRy7H-v4LpYNbi5DqE8Y,23330
|
|
7
7
|
holmes/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -11,21 +11,20 @@ holmes/core/feedback.py,sha256=Gu69ghRYGSCPDgFA77xOB5RPbVdQX-9Qpv4yVVegL4g,6793
|
|
|
11
11
|
holmes/core/investigation.py,sha256=HrRi1-myPF7ndOwwZ4Sv8iUbvPkrd5M02RPhZzln7NM,5900
|
|
12
12
|
holmes/core/investigation_structured_output.py,sha256=sNxyqmsElQ-B22OlzTOrJtfrlipjyidcTU07idOBO7w,10570
|
|
13
13
|
holmes/core/issue.py,sha256=dbctGv8KHAXC1SeOMkEP-BudJ50u7kA8jLN5FN_d808,2426
|
|
14
|
-
holmes/core/llm.py,sha256=
|
|
14
|
+
holmes/core/llm.py,sha256=3qFMjMflFba1NDk64oaU2lPi7Yr8oktYPdNKxzEQ8cM,28423
|
|
15
15
|
holmes/core/models.py,sha256=xFHFutZWoIaQWSeuq1PiYPw9SGkDrQsQ9qYXuk60EEU,9096
|
|
16
16
|
holmes/core/openai_formatting.py,sha256=31MwVvu0v0JiXot4Y0AwDJlFYe9vx8IB6mZiyC1y_lo,4684
|
|
17
|
-
holmes/core/performance_timing.py,sha256=MTbTiiX2jjPmW7PuNA2eYON40eWsHPryR1ap_KlwZ_E,2217
|
|
18
17
|
holmes/core/prompt.py,sha256=YkztY4gsobXys0fHxcwgngZBR2xDtBSYryY7HRnTxCQ,3025
|
|
19
18
|
holmes/core/resource_instruction.py,sha256=rduue_t8iQi1jbWc3-k3jX867W1Fvc6Tah5uOJk35Mc,483
|
|
20
19
|
holmes/core/runbooks.py,sha256=Oj5ICmiGgaq57t4erPzQDvHQ0rMGj1nhiiYhl8peH3Q,939
|
|
21
20
|
holmes/core/safeguards.py,sha256=XrKgmMoJxSROfoSOW6t6QEG2MFppzC20Nyn1HA5G4Go,4935
|
|
22
|
-
holmes/core/supabase_dal.py,sha256=
|
|
21
|
+
holmes/core/supabase_dal.py,sha256=KFkvcw_IaM88eNgOlueZI7jodAs-b62hsTGAa6g2CtI,21804
|
|
23
22
|
holmes/core/todo_tasks_formatter.py,sha256=USyJZcoX6zoxID1UV-abAKdaWFYLO6QJd-UKryJAurI,1487
|
|
24
|
-
holmes/core/tool_calling_llm.py,sha256=
|
|
23
|
+
holmes/core/tool_calling_llm.py,sha256=JCj-hl1WjdTECaup_dYSh_PzWPe5Bf4-rGppCjAPVr0,45637
|
|
25
24
|
holmes/core/tools.py,sha256=V0YZogQUzGUVg79mTaS0cfSh6nR_NS1zhOr0h6sNpZU,32730
|
|
26
25
|
holmes/core/tools_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
26
|
holmes/core/tools_utils/token_counting.py,sha256=7ZXbPqEIb8ClVvG_t9z2wlujDtX7m_pTVi242-2ZmQE,427
|
|
28
|
-
holmes/core/tools_utils/tool_context_window_limiter.py,sha256=
|
|
27
|
+
holmes/core/tools_utils/tool_context_window_limiter.py,sha256=tGDVb0LhpACWTrM1tmOEFfTqEhdbdAx7pJ-Eoedr7y8,2432
|
|
29
28
|
holmes/core/tools_utils/tool_executor.py,sha256=pUkddbm_kgYdfhR1w5IbnSmwG56kvA4VadzBV8OqG8g,2632
|
|
30
29
|
holmes/core/tools_utils/toolset_utils.py,sha256=SvWzen8Fg_TB_6Idw1hK0nCPrJL40ueWVMfsv8Kh2RY,2363
|
|
31
30
|
holmes/core/toolset_manager.py,sha256=UqAUfjY09SAGirOHzyQwpOu2wxQUzU0F1STw8w-1abw,25878
|
|
@@ -35,7 +34,9 @@ holmes/core/transformers/base.py,sha256=FHUocHIS_oUBLWMiibdAuKY0Lpz5xY2ICji6DbmP
|
|
|
35
34
|
holmes/core/transformers/llm_summarize.py,sha256=ZEJn3DElzMZLCCHNIzlCozllM2CmQ-JyXiqaGO7dAw4,6407
|
|
36
35
|
holmes/core/transformers/registry.py,sha256=x8kKRXJvc_tJO2RvNGyoVXt6rFgG4S_ZcTG8p_OXYH0,3771
|
|
37
36
|
holmes/core/transformers/transformer.py,sha256=rfT84Oq6qJyryevZGFKEbo1VSxinK4FBWUw_HpJ72xE,1028
|
|
37
|
+
holmes/core/truncation/compaction.py,sha256=tIm8hM6bLA_NKG0--LAewmdNjUnw9NTOJlcLgfntYLI,2118
|
|
38
38
|
holmes/core/truncation/dal_truncation_utils.py,sha256=I69I7Jac1kTtpxDRMe8O3IPN8Au0bZJqI8gXwW-GMaI,776
|
|
39
|
+
holmes/core/truncation/input_context_window_limiter.py,sha256=spTOqBGKu1yputPxMt5xv46bemkMmvOWGW9bnIQB-NQ,8897
|
|
39
40
|
holmes/interactive.py,sha256=MGo5b6PKfQWrCmwMKMiISjNY02KWDxgoKdUAfcwmjTE,47102
|
|
40
41
|
holmes/main.py,sha256=cz3i9YZkvXWTK8pk5O-LfAM5AsYcOcTEKYnbvpVY5ns,35001
|
|
41
42
|
holmes/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -53,6 +54,7 @@ holmes/plugins/prompts/_global_instructions.jinja2,sha256=d_c-BtDhU_Rmx637TPAyzl
|
|
|
53
54
|
holmes/plugins/prompts/_permission_errors.jinja2,sha256=gIMQx-zaTnuEv7SkQVC_GvxsR5R85fLuDZnJIKWcm5A,480
|
|
54
55
|
holmes/plugins/prompts/_runbook_instructions.jinja2,sha256=ngm3rmPPvgPG-9fjtR3yVb84YQNdNWfWShDGIag1JnY,1121
|
|
55
56
|
holmes/plugins/prompts/_toolsets_instructions.jinja2,sha256=MaK5HRxhJyZ3I9zsmgdLO4lU95l24CwGp70ON5xHLMc,3024
|
|
57
|
+
holmes/plugins/prompts/conversation_history_compaction.jinja2,sha256=F-m7soEKD01uifWKWEczvraLBvjDAkPo3PrOJcaqyRw,4884
|
|
56
58
|
holmes/plugins/prompts/generic_ask.jinja2,sha256=wHAPX8SQaF_xpaYiLkwDuhQYYaKRsBPxFkciY6A3_QY,1997
|
|
57
59
|
holmes/plugins/prompts/generic_ask_conversation.jinja2,sha256=qsWIHkrLTHMJSSx62kzGaZBLWmSgOmyhpMQRYoZqj_I,1610
|
|
58
60
|
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2,sha256=ooAPhHr7yEDQWziOTnjEUtDUZrGcApLNz2h5RS-0XnA,2074
|
|
@@ -181,7 +183,7 @@ holmes/plugins/toolsets/helm.yaml,sha256=-IPDChKMHcxGbzA0z9GKczRshL-mD24cHpBizfN
|
|
|
181
183
|
holmes/plugins/toolsets/internet/internet.py,sha256=qeV6M9302QWacFcr7bOfsZUc84v9MnlTEF_76oUOSNA,7787
|
|
182
184
|
holmes/plugins/toolsets/internet/notion.py,sha256=ELDtsP8kxdU8rExEL8hq1yV5DEeWDNOljHZEwjnumJc,4795
|
|
183
185
|
holmes/plugins/toolsets/investigator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
|
-
holmes/plugins/toolsets/investigator/core_investigation.py,sha256=
|
|
186
|
+
holmes/plugins/toolsets/investigator/core_investigation.py,sha256=FQmQT6AkE0-oq8Alo7GfCx_tPJo0Sby1_nOlcUW9JGU,5389
|
|
185
187
|
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2,sha256=C6y6OaJI2dQSLSw7Zq9-D-sWmL5K_40zRItvkzVAdH4,13967
|
|
186
188
|
holmes/plugins/toolsets/investigator/model.py,sha256=6AE9Iy05GaX3gC9ChTtZQOFGjSUsas_pB9_YyDaJXP0,342
|
|
187
189
|
holmes/plugins/toolsets/kafka.py,sha256=kO_CKzdXG__6QmiwaGumZgPkdZbft0f1DpqhNV8ogs8,24774
|
|
@@ -212,8 +214,8 @@ holmes/plugins/toolsets/rabbitmq/api.py,sha256=-BtqF7hQWtl_OamnQ521vYHhR8E2n2wcP
|
|
|
212
214
|
holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2,sha256=qetmtJUMkx9LIihr2fSJ2EV9h2J-b-ZdUAvMtopXZYY,3105
|
|
213
215
|
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py,sha256=k1Sq0Iw8f4ygLVdMDSWMXEGqLX5Jq98hTDcAlsjrv6A,9286
|
|
214
216
|
holmes/plugins/toolsets/robusta/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
|
-
holmes/plugins/toolsets/robusta/robusta.py,sha256=
|
|
216
|
-
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2,sha256=
|
|
217
|
+
holmes/plugins/toolsets/robusta/robusta.py,sha256=7q9zcW2k0_6JYYFnjerQobf6CWvwv95VeQ2MXbPChV4,10269
|
|
218
|
+
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2,sha256=EZd3jiiiipqVjkc9Ma-6aBGvn4Opa-HY-k3kv4jYPpo,2272
|
|
217
219
|
holmes/plugins/toolsets/runbook/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
218
220
|
holmes/plugins/toolsets/runbook/runbook_fetcher.py,sha256=0WN_-T55M5CASGGf9uyUclhALazF8m7xYLk5-kKcHf0,10271
|
|
219
221
|
holmes/plugins/toolsets/service_discovery.py,sha256=HqENA92SyN7Z_Kd8OZVfw-S9PwdYhn6HDFUAo1-fosw,3268
|
|
@@ -243,11 +245,11 @@ holmes/utils/llms.py,sha256=YLqq54I84wW7Kp7Z7CPVTxAFPb-Sq6xkdmVGzVf60jI,629
|
|
|
243
245
|
holmes/utils/markdown_utils.py,sha256=_yDc_IRB5zkj9THUlZ6nzir44VfirTjPccC_DrFrBkc,1507
|
|
244
246
|
holmes/utils/pydantic_utils.py,sha256=g0e0jLTa8Je8JKrhEP4N5sMxj0_hhPOqFZr0Vpd67sg,1649
|
|
245
247
|
holmes/utils/sentry_helper.py,sha256=BPkyMs7Yo_0b7QLMmAQ3mKZyXTmxkgVRjr3kikr5ZX8,1328
|
|
246
|
-
holmes/utils/stream.py,sha256=
|
|
248
|
+
holmes/utils/stream.py,sha256=YgwCgXQjlM6gSKCjTU5ZSeHK9q_VMOoL8_-iL5hQhek,4873
|
|
247
249
|
holmes/utils/tags.py,sha256=SU4EZMBtLlIb7OlHsSpguFaypczRzOcuHYxDSanV3sQ,3364
|
|
248
250
|
holmes/version.py,sha256=5-3__IY_2hcIC4WQyTqcdyX1QF-e2VfkYKrI4BIrq0Q,5992
|
|
249
|
-
holmesgpt-0.
|
|
250
|
-
holmesgpt-0.
|
|
251
|
-
holmesgpt-0.
|
|
252
|
-
holmesgpt-0.
|
|
253
|
-
holmesgpt-0.
|
|
251
|
+
holmesgpt-0.16.0.dist-info/LICENSE.txt,sha256=RdZMj8VXRQdVslr6PMYMbAEu5pOjOdjDqt3yAmWb9Ds,1072
|
|
252
|
+
holmesgpt-0.16.0.dist-info/METADATA,sha256=4a5VtcJYhIgvRGLd9WOa1oaqUzpkJoOTb5D58NOlCD8,16259
|
|
253
|
+
holmesgpt-0.16.0.dist-info/WHEEL,sha256=kLuE8m1WYU0Ig0_YEGrXyTtiJvKPpLpDEiChiNyei5Y,88
|
|
254
|
+
holmesgpt-0.16.0.dist-info/entry_points.txt,sha256=JdzEyZhpaYr7Boo4uy4UZgzY1VsAEbzMgGmHZtx9KFY,42
|
|
255
|
+
holmesgpt-0.16.0.dist-info/RECORD,,
|