holmesgpt 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +19 -1
- holmes/common/env_vars.py +13 -0
- holmes/config.py +69 -9
- holmes/core/conversations.py +11 -0
- holmes/core/investigation.py +16 -3
- holmes/core/investigation_structured_output.py +12 -0
- holmes/core/llm.py +10 -0
- holmes/core/models.py +9 -1
- holmes/core/openai_formatting.py +72 -12
- holmes/core/prompt.py +13 -0
- holmes/core/supabase_dal.py +3 -0
- holmes/core/todo_manager.py +88 -0
- holmes/core/tool_calling_llm.py +121 -149
- holmes/core/tools.py +10 -1
- holmes/core/tools_utils/tool_executor.py +7 -2
- holmes/core/tools_utils/toolset_utils.py +7 -2
- holmes/core/tracing.py +8 -7
- holmes/interactive.py +1 -0
- holmes/main.py +2 -1
- holmes/plugins/prompts/__init__.py +7 -1
- holmes/plugins/prompts/_ai_safety.jinja2 +43 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
- holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +16 -0
- holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
- holmes/plugins/prompts/generic_ask.jinja2 +4 -3
- holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +4 -0
- holmes/plugins/toolsets/__init__.py +19 -6
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
- holmes/plugins/toolsets/coralogix/api.py +6 -6
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
- holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
- holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
- holmes/plugins/toolsets/git.py +15 -15
- holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
- holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
- holmes/plugins/toolsets/internet/internet.py +2 -1
- holmes/plugins/toolsets/internet/notion.py +2 -1
- holmes/plugins/toolsets/investigator/__init__.py +0 -0
- holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
- holmes/plugins/toolsets/investigator/model.py +15 -0
- holmes/plugins/toolsets/kafka.py +14 -7
- holmes/plugins/toolsets/kubernetes.yaml +7 -7
- holmes/plugins/toolsets/kubernetes_logs.py +454 -25
- holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
- holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
- holmes/plugins/toolsets/newrelic.py +8 -3
- holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
- holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
- holmes/plugins/toolsets/robusta/robusta.py +4 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
- holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
- holmes/plugins/toolsets/utils.py +8 -1
- holmes/utils/llms.py +20 -0
- holmes/utils/stream.py +90 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +85 -75
- holmes/utils/robusta.py +0 -9
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Task Management
|
|
2
|
+
You have access to the TodoWrite tool to help you manage and plan tasks. The updated task list will always appear in the prompt. Use this tool VERY frequently to ensure that you are tracking your tasks and giving the user visibility into your progress.
|
|
3
|
+
This tool is also EXTREMELY helpful for planning tasks, and for breaking down larger complex tasks into smaller steps. If you do not use this tool when planning, you may forget to do important tasks - and that is unacceptable.
|
|
4
|
+
|
|
5
|
+
It is critical that you mark todos as completed as soon as you are done with a task. Do not batch up multiple tasks before marking them as completed.
|
|
6
|
+
|
|
7
|
+
Examples:
|
|
8
|
+
|
|
9
|
+
<example>
|
|
10
|
+
user: The invoice-reconciliation service is experiencing high latency and timeouts
|
|
11
|
+
assistant: I'll investigate the invoice-reconciliation service latency issue. Let me use the TodoWrite tool to plan this investigation:
|
|
12
|
+
- Check invoice-reconciliation service deployment status and pod health
|
|
13
|
+
- Examine recent logs for errors or performance issues
|
|
14
|
+
- Analyze resource utilization (CPU/memory) of invoice-reconciliation service pods
|
|
15
|
+
- Check service dependencies and external API response times
|
|
16
|
+
- Review recent configuration changes that might affect performance
|
|
17
|
+
|
|
18
|
+
marking the first todo as in_progress
|
|
19
|
+
|
|
20
|
+
Let me start by checking the deployment status...
|
|
21
|
+
|
|
22
|
+
I found the invoice-reconciliation service pods are experiencing CPU throttling. Let me mark this task as completed and investigate the resource limits...
|
|
23
|
+
..
|
|
24
|
+
..
|
|
25
|
+
</example>
|
|
26
|
+
In the above example, the assistant systematically investigates the latency issue by checking infrastructure, logs, metrics, and configuration.
|
|
27
|
+
|
|
28
|
+
<example>
|
|
29
|
+
user: The shipment-tracker microservice pods keep restarting and we're seeing 502 errors
|
|
30
|
+
|
|
31
|
+
assistant: I'll investigate the shipment-tracker service restart issue and 502 errors. Let me use the TodoWrite tool to plan this investigation:
|
|
32
|
+
1. Check shipment-tracker pod status and restart patterns
|
|
33
|
+
2. Examine pod logs around restart times to identify crash causes
|
|
34
|
+
3. Check service health checks and readiness/liveness probes
|
|
35
|
+
4. Analyze resource limits and requests for memory/CPU issues
|
|
36
|
+
5. Query distributed traces to analyze upstream/downstream service dependencies
|
|
37
|
+
6. Investigate any recent deployments or configuration changes
|
|
38
|
+
|
|
39
|
+
Let me start by examining the pod status and restart patterns.
|
|
40
|
+
|
|
41
|
+
I found the pods are crashing due to OOMKilled events. Let me mark this task as completed and investigate the memory usage patterns...
|
|
42
|
+
|
|
43
|
+
[Assistant continues investigating the issue step by step, marking todos as in_progress and completed as they go]
|
|
44
|
+
</example>
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Doing tasks
|
|
48
|
+
The user will primarily request you perform reliability troubleshooting and incident investigation tasks. This includes analyzing observability data (logs, traces, metrics), identifying misconfigurations, finding root causes of outages, correlating incidents with recent changes, following investigation runbooks, and determining remediation steps. For these tasks the following steps are recommended:
|
|
49
|
+
- Use the TodoWrite tool to plan the investigation if required
|
|
50
|
+
- Use the available observability tools to gather data from multiple sources (metrics, logs, traces, configuration). You are encouraged to use the tools extensively both in parallel and sequentially.
|
|
51
|
+
- Analyze the collected data to identify patterns, correlations, and root causes
|
|
52
|
+
- Cross-reference findings across different data sources to validate conclusions
|
|
53
|
+
- VERY IMPORTANT: When you have completed an investigation, validate that your analysis is supported by the evidence collected.
|
|
54
|
+
|
|
55
|
+
- Tool results and user messages may include <system-reminder> tags. <system-reminder> tags contain useful information and reminders. They are NOT part of the user's provided input or the tool result.
|
|
56
|
+
|
|
57
|
+
# Tool usage policy
|
|
58
|
+
- You have the capability to call multiple tools in a single response. When multiple independent pieces of information are requested, batch your tool calls together for optimal performance. When making multiple bash tool calls, you MUST send a single message with multiple tools calls to run the calls in parallel. For example, if you need to run "git status" and "git diff", send a single message with two tool calls to run the calls in parallel.
|
|
59
|
+
|
|
60
|
+
You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
|
|
61
|
+
|
|
62
|
+
IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
|
|
63
|
+
IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
|
|
64
|
+
|
|
65
|
+
IMPORTANT: Always use the TodoWrite tool to plan and track tasks throughout the conversation.
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# TodoWrite
|
|
69
|
+
Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.
|
|
70
|
+
It also helps the user understand the progress of the task and overall progress of their requests.
|
|
71
|
+
|
|
72
|
+
## When to Use This Tool
|
|
73
|
+
Use this tool proactively in these scenarios:
|
|
74
|
+
|
|
75
|
+
1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions
|
|
76
|
+
2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations
|
|
77
|
+
3. User explicitly requests todo list - When the user directly asks you to use the todo list
|
|
78
|
+
4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated)
|
|
79
|
+
5. After receiving new instructions - Immediately capture user requirements as todos
|
|
80
|
+
6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time
|
|
81
|
+
7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation
|
|
82
|
+
|
|
83
|
+
## When NOT to Use This Tool
|
|
84
|
+
|
|
85
|
+
Skip using this tool when:
|
|
86
|
+
1. There is only a single, straightforward task
|
|
87
|
+
2. The task is trivial and tracking it provides no organizational benefit
|
|
88
|
+
3. The task can be completed in less than 3 trivial steps
|
|
89
|
+
4. The task is purely conversational or informational
|
|
90
|
+
|
|
91
|
+
NOTE that you should not use this tool if there is only one trivial task to do. In this case you are better off just doing the task directly.
|
|
92
|
+
|
|
93
|
+
## Examples of When to Use the Todo List
|
|
94
|
+
|
|
95
|
+
<example>
|
|
96
|
+
User: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!
|
|
97
|
+
Assistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.
|
|
98
|
+
*Creates todo list with the following items:*
|
|
99
|
+
1. Create dark mode toggle component in Settings page
|
|
100
|
+
2. Add dark mode state management (context/store)
|
|
101
|
+
3. Implement CSS-in-JS styles for dark theme
|
|
102
|
+
4. Update existing components to support theme switching
|
|
103
|
+
5. Run tests and build process, addressing any failures or errors that occur
|
|
104
|
+
*Begins working on the first task*
|
|
105
|
+
|
|
106
|
+
<reasoning>
|
|
107
|
+
The assistant used the todo list because:
|
|
108
|
+
1. Adding dark mode is a multi-step feature requiring UI, state management, and styling changes
|
|
109
|
+
2. The user explicitly requested tests and build be run afterward
|
|
110
|
+
3. The assistant inferred that tests and build need to pass by adding "Ensure tests and build succeed" as the final task
|
|
111
|
+
</reasoning>
|
|
112
|
+
</example>
|
|
113
|
+
|
|
114
|
+
<example>
|
|
115
|
+
User: Help me rename the function getCwd to getCurrentWorkingDirectory across my project
|
|
116
|
+
Assistant: Let me first search through your codebase to find all occurrences of 'getCwd'.
|
|
117
|
+
*Uses grep or search tools to locate all instances of getCwd in the codebase*
|
|
118
|
+
Assistant: I've found 15 instances of 'getCwd' across 8 different files. Let me create a todo list to track these changes.
|
|
119
|
+
*Creates todo list with specific items for each file that needs updating*
|
|
120
|
+
|
|
121
|
+
<reasoning>
|
|
122
|
+
The assistant used the todo list because:
|
|
123
|
+
1. First, the assistant searched to understand the scope of the task
|
|
124
|
+
2. Upon finding multiple occurrences across different files, it determined this was a complex task with multiple steps
|
|
125
|
+
3. The todo list helps ensure every instance is tracked and updated systematically
|
|
126
|
+
4. This approach prevents missing any occurrences and maintains code consistency
|
|
127
|
+
</reasoning>
|
|
128
|
+
</example>
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
<example>
|
|
132
|
+
User: We're seeing high error rates across multiple services after the latest deployment. Can you investigate what's causing the widespread issues?
|
|
133
|
+
Assistant: I'll investigate the widespread error rates after the deployment. Let me create a todo list to systematically track this multi-service investigation:
|
|
134
|
+
*Creates a todo list breaking down the investigation across different observability sources*
|
|
135
|
+
Assistant: Let's start by checking the deployment timeline and correlating it with error rate spikes using metrics and traces.
|
|
136
|
+
|
|
137
|
+
<reasoning>
|
|
138
|
+
The assistant used the todo list because:
|
|
139
|
+
1. The user reported multiple services affected, requiring systematic investigation
|
|
140
|
+
2. The todo list helps organize investigation across different services and data sources
|
|
141
|
+
3. This approach ensures comprehensive analysis of metrics, logs, traces, and recent changes
|
|
142
|
+
</reasoning>
|
|
143
|
+
</example>
|
|
144
|
+
|
|
145
|
+
<example>
|
|
146
|
+
User: The inventory-validation service is showing high CPU usage and slow response times. Can you help troubleshoot this performance issue?
|
|
147
|
+
Assistant: I'll investigate the inventory-validation service performance issue. First, let me gather observability data to identify the root cause.
|
|
148
|
+
*Queries metrics to see CPU patterns, memory usage, and request latency trends*
|
|
149
|
+
Assistant: I found CPU spikes correlating with specific API endpoints. Let me create a todo list to systematically investigate this performance degradation.
|
|
150
|
+
*Creates todo list with items like: 1) Analyze CPU and memory metrics over time, 2) Check database query performance and connection pooling, 3) Examine distributed traces for slow operations, 4) Review recent code changes that might affect performance, 5) Check for resource contention with other services*
|
|
151
|
+
Let's start by analyzing the detailed CPU metrics and correlating with request patterns.</assistant>
|
|
152
|
+
|
|
153
|
+
<reasoning>
|
|
154
|
+
The assistant used the todo list because:
|
|
155
|
+
1. First, the assistant gathered initial observability data to understand the scope
|
|
156
|
+
2. Based on this analysis, it identified multiple potential performance bottlenecks
|
|
157
|
+
3. Performance troubleshooting requires systematic investigation across multiple data sources
|
|
158
|
+
4. The todo list helps methodically track investigation across metrics, traces, logs, and configuration
|
|
159
|
+
5. This systematic approach ensures comprehensive analysis of all potential causes
|
|
160
|
+
</reasoning>
|
|
161
|
+
</example>
|
|
162
|
+
|
|
163
|
+
## Examples of When NOT to Use the Todo List
|
|
164
|
+
|
|
165
|
+
<example>
|
|
166
|
+
User: How do I print 'Hello World' in Python?
|
|
167
|
+
Assistant: In Python, you can print "Hello World" with this simple code:
|
|
168
|
+
|
|
169
|
+
python
|
|
170
|
+
print("Hello World")
|
|
171
|
+
|
|
172
|
+
This will output the text "Hello World" to the console when executed.</assistant>
|
|
173
|
+
|
|
174
|
+
<reasoning>
|
|
175
|
+
The assistant did not use the todo list because this is a single, trivial task that can be completed in one step. There's no need to track multiple tasks or steps for such a straightforward request.
|
|
176
|
+
</reasoning>
|
|
177
|
+
</example>
|
|
178
|
+
|
|
179
|
+
<example>
|
|
180
|
+
User: What does the git status command do?
|
|
181
|
+
Assistant: The git status command shows the current state of your working directory and staging area. It displays which changes have been staged, which haven't, and which files aren't being tracked by Git. This is a fundamental command for understanding the current state of your repository before making commits.
|
|
182
|
+
|
|
183
|
+
<reasoning>
|
|
184
|
+
The assistant did not use the todo list because this is an informational request with no actual coding task to complete. The user is simply asking for an explanation, not for the assistant to perform multiple steps or tasks.
|
|
185
|
+
</reasoning>
|
|
186
|
+
</example>
|
|
187
|
+
|
|
188
|
+
<example>
|
|
189
|
+
User: Can you add a comment to the calculateTotal function to explain what it does?
|
|
190
|
+
Assistant: Sure, let me add a comment to the calculateTotal function to explain what it does.
|
|
191
|
+
* Uses the Edit tool to add a comment to the calculateTotal function *
|
|
192
|
+
|
|
193
|
+
<reasoning>
|
|
194
|
+
The assistant did not use the todo list because this is a single, straightforward task confined to one location in the code. Adding a comment doesn't require tracking multiple steps or systematic organization.
|
|
195
|
+
</reasoning>
|
|
196
|
+
</example>
|
|
197
|
+
|
|
198
|
+
<example>
|
|
199
|
+
User: Run npm install for me and tell me what happens.
|
|
200
|
+
Assistant: I'll run the npm install command for you.
|
|
201
|
+
|
|
202
|
+
*Executes: npm install*
|
|
203
|
+
|
|
204
|
+
The command completed successfully. Here's the output:
|
|
205
|
+
[Output of npm install command]
|
|
206
|
+
|
|
207
|
+
All dependencies have been installed according to your package.json file.
|
|
208
|
+
|
|
209
|
+
<reasoning>
|
|
210
|
+
The assistant did not use the todo list because this is a single command execution with immediate results. There are no multiple steps to track or organize, making the todo list unnecessary for this straightforward task.
|
|
211
|
+
</reasoning>
|
|
212
|
+
</example>
|
|
213
|
+
|
|
214
|
+
## Task States and Management
|
|
215
|
+
|
|
216
|
+
1. **Task States**: Use these states to track progress:
|
|
217
|
+
- pending: Task not yet started
|
|
218
|
+
- in_progress: Currently working on (limit to ONE task at a time)
|
|
219
|
+
- completed: Task finished successfully
|
|
220
|
+
|
|
221
|
+
2. **Task Management**:
|
|
222
|
+
- Update task status in real-time as you work
|
|
223
|
+
- Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
|
|
224
|
+
- Only have ONE task in_progress at any time
|
|
225
|
+
|
|
226
|
+
3. **Task Completion Requirements**:
|
|
227
|
+
- ONLY mark a task as completed when you have FULLY accomplished it
|
|
228
|
+
- If you encounter errors, blockers, or cannot finish, keep the task as in_progress
|
|
229
|
+
- When blocked, create a new task describing what needs to be resolved
|
|
230
|
+
- Never mark a task as completed if:
|
|
231
|
+
- Investigation is partial
|
|
232
|
+
- You encountered unresolved errors
|
|
233
|
+
- You couldn't find necessary files or dependencies
|
|
234
|
+
|
|
235
|
+
4. **Task Breakdown**:
|
|
236
|
+
- Create specific, actionable items
|
|
237
|
+
- Break complex tasks into smaller, manageable steps
|
|
238
|
+
- Use clear, descriptive task names
|
|
239
|
+
|
|
240
|
+
When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
{
|
|
245
|
+
// The updated todo list
|
|
246
|
+
todos: {
|
|
247
|
+
content: string;
|
|
248
|
+
status: "pending" | "in_progress" | "completed";
|
|
249
|
+
priority: "high" | "medium" | "low";
|
|
250
|
+
id: string;
|
|
251
|
+
}[];
|
|
252
|
+
}
|
|
253
|
+
```
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from pydantic import BaseModel, Field
|
|
3
|
+
from uuid import uuid4
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TaskStatus(str, Enum):
|
|
7
|
+
PENDING = "pending"
|
|
8
|
+
IN_PROGRESS = "in_progress"
|
|
9
|
+
COMPLETED = "completed"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Task(BaseModel):
|
|
13
|
+
id: str = Field(default_factory=lambda: str(uuid4()))
|
|
14
|
+
content: str
|
|
15
|
+
status: TaskStatus = TaskStatus.PENDING
|
holmes/plugins/toolsets/kafka.py
CHANGED
|
@@ -33,7 +33,7 @@ from holmes.core.tools import (
|
|
|
33
33
|
ToolsetTag,
|
|
34
34
|
)
|
|
35
35
|
from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
|
|
36
|
-
from holmes.plugins.toolsets.utils import get_param_or_raise
|
|
36
|
+
from holmes.plugins.toolsets.utils import get_param_or_raise, toolset_name_for_one_liner
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class KafkaClusterConfig(BaseModel):
|
|
@@ -202,7 +202,8 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
202
202
|
)
|
|
203
203
|
|
|
204
204
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
205
|
-
|
|
205
|
+
cluster = params.get("kafka_cluster_name", "")
|
|
206
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: List Consumer Groups ({cluster})"
|
|
206
207
|
|
|
207
208
|
|
|
208
209
|
class DescribeConsumerGroup(BaseKafkaTool):
|
|
@@ -262,7 +263,8 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
262
263
|
)
|
|
263
264
|
|
|
264
265
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
265
|
-
|
|
266
|
+
group_id = params.get("group_id", "")
|
|
267
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Describe Consumer Group ({group_id})"
|
|
266
268
|
|
|
267
269
|
|
|
268
270
|
class ListTopics(BaseKafkaTool):
|
|
@@ -307,7 +309,8 @@ class ListTopics(BaseKafkaTool):
|
|
|
307
309
|
)
|
|
308
310
|
|
|
309
311
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
310
|
-
|
|
312
|
+
cluster = params.get("kafka_cluster_name", "")
|
|
313
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: List Kafka Topics ({cluster})"
|
|
311
314
|
|
|
312
315
|
|
|
313
316
|
class DescribeTopic(BaseKafkaTool):
|
|
@@ -376,7 +379,10 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
376
379
|
)
|
|
377
380
|
|
|
378
381
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
379
|
-
|
|
382
|
+
topic = params.get("topic_name", "")
|
|
383
|
+
return (
|
|
384
|
+
f"{toolset_name_for_one_liner(self.toolset.name)}: Describe Topic ({topic})"
|
|
385
|
+
)
|
|
380
386
|
|
|
381
387
|
|
|
382
388
|
def group_has_topic(
|
|
@@ -530,7 +536,8 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
530
536
|
)
|
|
531
537
|
|
|
532
538
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
533
|
-
|
|
539
|
+
topic = params.get("topic_name", "")
|
|
540
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Find Topic Consumers ({topic})"
|
|
534
541
|
|
|
535
542
|
|
|
536
543
|
class ListKafkaClusters(BaseKafkaTool):
|
|
@@ -551,7 +558,7 @@ class ListKafkaClusters(BaseKafkaTool):
|
|
|
551
558
|
)
|
|
552
559
|
|
|
553
560
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
554
|
-
return "
|
|
561
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: List Kafka Clusters"
|
|
555
562
|
|
|
556
563
|
|
|
557
564
|
class KafkaToolset(Toolset):
|
|
@@ -36,11 +36,11 @@ toolsets:
|
|
|
36
36
|
|
|
37
37
|
- name: "kubectl_get_yaml"
|
|
38
38
|
description: "Run `kubectl get -o yaml` on a single Kubernetes resource"
|
|
39
|
-
command: "kubectl get -o yaml {{ kind }} {{ name}}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
39
|
+
command: "kubectl get -o yaml {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
40
40
|
|
|
41
41
|
- name: "kubectl_events"
|
|
42
|
-
description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment, 'job'
|
|
43
|
-
command: "kubectl events --for {{resource_type}}/{{
|
|
42
|
+
description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'job', 'node', etc."
|
|
43
|
+
command: "kubectl events --for {{resource_type}}/{{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
44
44
|
|
|
45
45
|
- name: "kubectl_memory_requests_all_namespaces"
|
|
46
46
|
description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
|
|
@@ -239,10 +239,10 @@ toolsets:
|
|
|
239
239
|
tools:
|
|
240
240
|
- name: "kubectl_lineage_children"
|
|
241
241
|
description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
|
|
242
|
-
command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }}"
|
|
242
|
+
command: "kubectl lineage {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
243
243
|
- name: "kubectl_lineage_parents"
|
|
244
244
|
description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
|
|
245
|
-
command: "kubectl lineage {{ kind }} {{ name}} -n {{ namespace }} -D"
|
|
245
|
+
command: "kubectl lineage {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %} -D"
|
|
246
246
|
|
|
247
247
|
kubernetes/kube-lineage-extras: # To make this work, build kube-lineage from source
|
|
248
248
|
description: "Fetches children/dependents and parents/dependencies resources using kube-lineage"
|
|
@@ -255,7 +255,7 @@ toolsets:
|
|
|
255
255
|
tools:
|
|
256
256
|
- name: "kubectl_lineage_children"
|
|
257
257
|
description: "Get all children/dependents of a Kubernetes resource, recursively, including their status"
|
|
258
|
-
command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }}"
|
|
258
|
+
command: "kube-lineage {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
259
259
|
- name: "kubectl_lineage_parents"
|
|
260
260
|
description: "Get all parents/dependencies of a Kubernetes resource, recursively, including their status"
|
|
261
|
-
command: "kube-lineage {{ kind }} {{ name}} -n {{ namespace }} -D"
|
|
261
|
+
command: "kube-lineage {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %} -D"
|