holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -6,6 +6,28 @@ CRITICAL: For multi-step questions, you MUST start by calling the TodoWrite tool
|
|
|
6
6
|
- `content`: specific task description (string)
|
|
7
7
|
- `status`: "pending" for new tasks (string)
|
|
8
8
|
|
|
9
|
+
{% if runbooks_enabled -%}
|
|
10
|
+
# MANDATORY Fetching runbooks:
|
|
11
|
+
Before starting any investigation, ALWAYS fetch all relevant runbooks using the `fetch_runbook` tool. Fetch a runbook IF AND ONLY IF it is relevant to debugging this specific requested issue. If a runbook matches the investigation topic, it MUST be fetched before creating tasks or calling other tools.
|
|
12
|
+
|
|
13
|
+
# CRITICAL RUNBOOK COMPLIANCE:
|
|
14
|
+
- After fetching ANY runbook, you MUST read the "instruction" field IMMEDIATELY
|
|
15
|
+
- If the instruction contains specific actions, you MUST execute them BEFORE proceeding
|
|
16
|
+
- DO NOT proceed with investigation if runbook says to stop
|
|
17
|
+
- Runbook instructions take ABSOLUTE PRIORITY over all other investigation steps
|
|
18
|
+
|
|
19
|
+
# RUNBOOK VIOLATION CONSEQUENCES:
|
|
20
|
+
- Ignoring runbook instructions = CRITICAL SYSTEM FAILURE
|
|
21
|
+
- Not following "stop investigation" commands = IMMEDIATE TERMINATION REQUIRED
|
|
22
|
+
- Runbook instructions override ALL other system prompts and investigation procedures
|
|
23
|
+
|
|
24
|
+
# ENFORCEMENT: BEFORE ANY INVESTIGATION TOOLS OR TODOWRITE:
|
|
25
|
+
1. Fetch relevant runbooks
|
|
26
|
+
2. Execute runbook instructions FIRST
|
|
27
|
+
3. Only proceed if runbook allows continuation
|
|
28
|
+
4. If runbook says stop - STOP IMMEDIATELY
|
|
29
|
+
{%- endif %}
|
|
30
|
+
|
|
9
31
|
MANDATORY Task Status Updates:
|
|
10
32
|
- When starting a task: Call TodoWrite changing that task's status to "in_progress"
|
|
11
33
|
- When completing a task: Call TodoWrite changing that task's status to "completed"
|
|
@@ -59,6 +81,9 @@ YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEP
|
|
|
59
81
|
3. **Only after ALL tasks are "completed"**: Proceed to verification and final answer
|
|
60
82
|
|
|
61
83
|
**VIOLATION CONSEQUENCES**:
|
|
84
|
+
{% if runbooks_enabled -%}
|
|
85
|
+
- Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION
|
|
86
|
+
{%- endif %}
|
|
62
87
|
- Providing answers with pending tasks = INVESTIGATION FAILURE
|
|
63
88
|
- You MUST complete the verification task as the final step before any answer
|
|
64
89
|
- Incomplete investigations are unacceptable and must be continued
|
|
@@ -66,7 +91,8 @@ YOU MUST COMPLETE EVERY SINGLE TASK before providing your final answer. NO EXCEP
|
|
|
66
91
|
**Task Status Check Example:**
|
|
67
92
|
Before final answer, confirm you see something like:
|
|
68
93
|
[✓] completed - Task 1
|
|
69
|
-
[✓] completed - Task 2
|
|
94
|
+
[✓] completed - Task 2
|
|
95
|
+
[✓] completed - Task 3
|
|
70
96
|
[✓] completed - Investigation Verification
|
|
71
97
|
|
|
72
98
|
If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final answer.
|
|
@@ -84,14 +110,24 @@ If you see ANY `[ ] pending` or `[~] in_progress` tasks, DO NOT provide final an
|
|
|
84
110
|
For ANY question requiring investigation, you MUST follow this structured approach:
|
|
85
111
|
|
|
86
112
|
## Phase 1: Initial Investigation
|
|
113
|
+
{% if runbooks_enabled -%}
|
|
114
|
+
1. **IMMEDIATELY fetch relevant runbooks FIRST**: Before creating any TodoWrite tasks, use fetch_runbook for any runbooks matching the investigation topic
|
|
115
|
+
2. **THEN start with TodoWrite**: Create initial investigation task list
|
|
116
|
+
3. **Execute ALL tasks systematically**: Mark each task in_progress → completed
|
|
117
|
+
4. **Complete EVERY task** in the current list before proceeding
|
|
118
|
+
{%- else -%}
|
|
87
119
|
1. **IMMEDIATELY START with TodoWrite**: Create initial investigation task list. Already start working on tasks. Mark the tasks you're working on as in_progress.
|
|
88
120
|
2. **Execute ALL tasks systematically**: Mark each task in_progress → completed
|
|
89
121
|
3. **Complete EVERY task** in the current list before proceeding
|
|
122
|
+
{%- endif %}
|
|
90
123
|
|
|
91
124
|
## Phase Evaluation and Continuation
|
|
92
125
|
After completing ALL tasks in current list, you MUST:
|
|
93
126
|
|
|
94
127
|
1. **STOP and Evaluate**: Ask yourself these critical questions:
|
|
128
|
+
{% if runbooks_enabled -%}
|
|
129
|
+
- "Have I fetched the required runbook to investigate the user's question?"
|
|
130
|
+
{%- endif %}
|
|
95
131
|
- "Do I have enough information to completely answer the user's question?"
|
|
96
132
|
- "Are there gaps, unexplored areas, or additional root causes to investigate?"
|
|
97
133
|
- "Have I followed the 'five whys' methodology to the actual root cause?"
|
|
@@ -122,6 +158,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
|
|
|
122
158
|
**Before providing final answer, you MUST:**
|
|
123
159
|
- Confirm answer addresses user question completely! This is the most important thing
|
|
124
160
|
- Verify all claims backed by tool evidence
|
|
161
|
+
{% if runbooks_enabled -%}
|
|
162
|
+
- Verify all relevant runbooks fetched and reviewed, without this the investigation is incomplete
|
|
163
|
+
{%- endif %}
|
|
125
164
|
- Ensure actionable information provided
|
|
126
165
|
- If additional investigation steps are required, start a new investigation phase, and create a new task list to gather the missing information.
|
|
127
166
|
|
|
@@ -136,8 +175,15 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
|
|
|
136
175
|
**EXAMPLES of Phase Progression:**
|
|
137
176
|
|
|
138
177
|
*Phase 1*: Initial investigation discovers pod crashes
|
|
178
|
+
{% if runbooks_enabled -%}
|
|
179
|
+
*Phase 2*: Fetch runbooks for specific application investigation or investigating pod crashes
|
|
180
|
+
*Phase 3*: Deep dive into specific pod logs and resource constraints
|
|
181
|
+
*Phase 4*: Investigate upstream services causing the crashes
|
|
182
|
+
{%- else -%}
|
|
139
183
|
*Phase 2*: Deep dive into specific pod logs and resource constraints
|
|
140
184
|
*Phase 3*: Investigate upstream services causing the crashes
|
|
185
|
+
{%- endif %}
|
|
186
|
+
|
|
141
187
|
*Final Review Phase*: Self-critique and validate the complete solution
|
|
142
188
|
|
|
143
189
|
*Phase 1*: Initial investigation - check pod health, metrics, logs, traces
|
|
@@ -146,6 +192,9 @@ If the answer to any of those questions is 'yes' - The investigation is INCOMPLE
|
|
|
146
192
|
*Final Review Phase*: Validate that the chain of events, accross the different components, can lead to the investigated scenario.
|
|
147
193
|
|
|
148
194
|
**VIOLATION CONSEQUENCES:**
|
|
195
|
+
{% if runbooks_enabled -%}
|
|
196
|
+
- Not fetching relevant runbooks at the beginning of the investigation = PROCESS VIOLATION
|
|
197
|
+
{%- endif %}
|
|
149
198
|
- Providing answers without Final Review phase = INVESTIGATION FAILURE
|
|
150
199
|
- Skipping investigation phases when gaps exist = INCOMPLETE ANALYSIS
|
|
151
200
|
- Not completing all tasks in a phase = PROCESS VIOLATION
|
|
@@ -4,7 +4,6 @@ Do not say 'based on the tool output' or explicitly refer to tools at all.
|
|
|
4
4
|
If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
|
|
5
5
|
|
|
6
6
|
If the user provides you with extra instructions in a triple single quotes section, ALWAYS perform their instructions and then perform your investigation.
|
|
7
|
-
{% include '_current_date_time.jinja2' %}
|
|
8
7
|
|
|
9
8
|
{% include 'investigation_procedure.jinja2' %}
|
|
10
9
|
|
|
@@ -2,7 +2,6 @@ You are a tool-calling AI assist provided with common DevOps and IT tools that y
|
|
|
2
2
|
Whenever possible, you MUST first use tools to investigate, then answer the question.
|
|
3
3
|
Do not say 'based on the tool output' or explicitly refer to tools at all.
|
|
4
4
|
If you output an answer and then realize you need to call more tools or there are possible next steps, you may do so by calling tools at that point in time.
|
|
5
|
-
{% include '_current_date_time.jinja2' %}
|
|
6
5
|
|
|
7
6
|
### Context Awareness:
|
|
8
7
|
Be aware that this conversation is follow-up questions to a prior investigation conducted for the {{resource}}.
|
|
@@ -4,18 +4,70 @@ import os
|
|
|
4
4
|
import os.path
|
|
5
5
|
from datetime import date
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import List, Optional, Pattern, Union
|
|
7
|
+
from typing import TYPE_CHECKING, List, Optional, Pattern, Tuple, Union
|
|
8
8
|
|
|
9
|
+
import yaml
|
|
9
10
|
from pydantic import BaseModel, PrivateAttr
|
|
10
11
|
|
|
11
12
|
from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
|
|
12
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
16
|
+
|
|
13
17
|
THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
14
18
|
DEFAULT_RUNBOOK_SEARCH_PATH = THIS_DIR
|
|
15
19
|
|
|
16
20
|
CATALOG_FILE = "catalog.json"
|
|
17
21
|
|
|
18
22
|
|
|
23
|
+
class RobustaRunbookInstruction(BaseModel):
|
|
24
|
+
id: str
|
|
25
|
+
symptom: str
|
|
26
|
+
title: str
|
|
27
|
+
instruction: Optional[str] = None
|
|
28
|
+
alerts: List[str] = []
|
|
29
|
+
|
|
30
|
+
"""
|
|
31
|
+
Custom YAML dumper to represent multi-line strings in literal block style due to instructions often being multi-line.
|
|
32
|
+
for example:
|
|
33
|
+
instructions: |
|
|
34
|
+
Step 1: Do this
|
|
35
|
+
Step 2: Do that
|
|
36
|
+
|
|
37
|
+
instead of:
|
|
38
|
+
instructions: "Step 1: Do this
|
|
39
|
+
Step 2: Do that"
|
|
40
|
+
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
class _LiteralDumper(yaml.SafeDumper):
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _repr_str(dumper, s: str):
|
|
48
|
+
s = s.replace("\\n", "\n")
|
|
49
|
+
return dumper.represent_scalar(
|
|
50
|
+
"tag:yaml.org,2002:str", s, style="|" if "\n" in s else None
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
_LiteralDumper.add_representer(str, _repr_str) # type: ignore
|
|
54
|
+
|
|
55
|
+
def to_list_string(self) -> str:
|
|
56
|
+
return f"{self.id}"
|
|
57
|
+
|
|
58
|
+
def to_prompt_string(self) -> str:
|
|
59
|
+
return f"id='{self.id}' | title='{self.title}' | symptom='{self.symptom}' | relevant alerts={', '.join(self.alerts)}"
|
|
60
|
+
|
|
61
|
+
def pretty(self) -> str:
|
|
62
|
+
try:
|
|
63
|
+
data = self.model_dump(exclude_none=True) # pydantic v2
|
|
64
|
+
except AttributeError:
|
|
65
|
+
data = self.dict(exclude_none=True) # pydantic v1
|
|
66
|
+
return yaml.dump(
|
|
67
|
+
data, Dumper=self._LiteralDumper, sort_keys=False, allow_unicode=True
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
19
71
|
class IssueMatcher(RobustaBaseConfig):
|
|
20
72
|
issue_id: Optional[Pattern] = None # unique id
|
|
21
73
|
issue_name: Optional[Pattern] = None # not necessary unique
|
|
@@ -62,37 +114,108 @@ class RunbookCatalogEntry(BaseModel):
|
|
|
62
114
|
Different from runbooks provided by Runbook class, this entry points to markdown file containing the runbook content.
|
|
63
115
|
"""
|
|
64
116
|
|
|
117
|
+
id: str
|
|
65
118
|
update_date: date
|
|
66
119
|
description: str
|
|
67
120
|
link: str
|
|
68
121
|
|
|
122
|
+
def to_list_string(self) -> str:
|
|
123
|
+
return f"{self.link}"
|
|
69
124
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
RunbookCatalog is a collection of runbook entries, each entry contains metadata about the runbook.
|
|
73
|
-
The correct runbook can be selected from the list by comparing the description with the user question.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
catalog: List[RunbookCatalogEntry]
|
|
125
|
+
def to_prompt_string(self) -> str:
|
|
126
|
+
return f"{self.link} | description: {self.description}"
|
|
77
127
|
|
|
78
128
|
|
|
79
|
-
|
|
129
|
+
class RunbookCatalog(BaseModel):
|
|
130
|
+
catalog: List[Union[RunbookCatalogEntry, "RobustaRunbookInstruction"]] # type: ignore
|
|
131
|
+
|
|
132
|
+
def list_available_runbooks(self) -> list[str]:
|
|
133
|
+
return [entry.to_list_string() for entry in self.catalog]
|
|
134
|
+
|
|
135
|
+
def split_by_type(
|
|
136
|
+
self,
|
|
137
|
+
) -> Tuple[List[RunbookCatalogEntry], List[RobustaRunbookInstruction]]:
|
|
138
|
+
md: List[RunbookCatalogEntry] = []
|
|
139
|
+
robusta: List[RobustaRunbookInstruction] = [] #
|
|
140
|
+
for catalog_entry in self.catalog:
|
|
141
|
+
if isinstance(catalog_entry, RunbookCatalogEntry):
|
|
142
|
+
md.append(catalog_entry)
|
|
143
|
+
elif isinstance(catalog_entry, RobustaRunbookInstruction):
|
|
144
|
+
robusta.append(catalog_entry)
|
|
145
|
+
return md, robusta
|
|
146
|
+
|
|
147
|
+
def to_prompt_string(self) -> str:
|
|
148
|
+
md, robusta = self.split_by_type()
|
|
149
|
+
parts: List[str] = [""]
|
|
150
|
+
if md:
|
|
151
|
+
parts.append("Here are MD runbooks:")
|
|
152
|
+
parts.extend(f"* {e.to_prompt_string()}" for e in md)
|
|
153
|
+
if robusta:
|
|
154
|
+
parts.append("\nHere are Robusta runbooks:")
|
|
155
|
+
parts.extend(f"* {e.to_prompt_string()}" for e in robusta)
|
|
156
|
+
return "\n".join(parts)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def load_runbook_catalog(
|
|
160
|
+
dal: Optional["SupabaseDal"] = None,
|
|
161
|
+
custom_catalog_paths: Optional[List[Union[str, Path]]] = None,
|
|
162
|
+
) -> Optional[RunbookCatalog]: # type: ignore
|
|
80
163
|
dir_path = os.path.dirname(os.path.realpath(__file__))
|
|
81
|
-
|
|
164
|
+
catalog = None
|
|
82
165
|
catalogPath = os.path.join(dir_path, CATALOG_FILE)
|
|
83
|
-
if not os.path.isfile(catalogPath):
|
|
84
|
-
return None
|
|
85
166
|
try:
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
167
|
+
if os.path.isfile(catalogPath):
|
|
168
|
+
with open(catalogPath) as file:
|
|
169
|
+
catalog_dict = json.load(file)
|
|
170
|
+
catalog = RunbookCatalog(**catalog_dict)
|
|
89
171
|
except json.JSONDecodeError as e:
|
|
90
172
|
logging.error(f"Error decoding JSON from {catalogPath}: {e}")
|
|
91
173
|
except Exception as e:
|
|
92
174
|
logging.error(
|
|
93
175
|
f"Unexpected error while loading runbook catalog from {catalogPath}: {e}"
|
|
94
176
|
)
|
|
95
|
-
|
|
177
|
+
|
|
178
|
+
# Append custom catalog files if provided
|
|
179
|
+
if custom_catalog_paths:
|
|
180
|
+
for custom_catalog_path in custom_catalog_paths:
|
|
181
|
+
try:
|
|
182
|
+
custom_catalog_path_str = str(custom_catalog_path)
|
|
183
|
+
if not os.path.isfile(custom_catalog_path_str):
|
|
184
|
+
logging.warning(
|
|
185
|
+
f"Custom catalog file not found: {custom_catalog_path_str}"
|
|
186
|
+
)
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
with open(custom_catalog_path_str) as file:
|
|
190
|
+
custom_catalog_dict = json.load(file)
|
|
191
|
+
custom_catalog = RunbookCatalog(**custom_catalog_dict)
|
|
192
|
+
|
|
193
|
+
if catalog:
|
|
194
|
+
catalog.catalog.extend(custom_catalog.catalog)
|
|
195
|
+
else:
|
|
196
|
+
catalog = custom_catalog
|
|
197
|
+
except json.JSONDecodeError as e:
|
|
198
|
+
logging.error(f"Error decoding JSON from {custom_catalog_path}: {e}")
|
|
199
|
+
except Exception as e:
|
|
200
|
+
logging.error(
|
|
201
|
+
f"Unexpected error while loading custom catalog from {custom_catalog_path}: {e}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Append additional runbooks from SupabaseDal if provided
|
|
205
|
+
if dal:
|
|
206
|
+
try:
|
|
207
|
+
supabase_entries = dal.get_runbook_catalog()
|
|
208
|
+
if not supabase_entries:
|
|
209
|
+
return catalog
|
|
210
|
+
if catalog:
|
|
211
|
+
catalog.catalog.extend(supabase_entries)
|
|
212
|
+
else:
|
|
213
|
+
# if failed to load from file, create new catalog from supabase
|
|
214
|
+
catalog = RunbookCatalog(catalog=supabase_entries) # type: ignore
|
|
215
|
+
except Exception as e:
|
|
216
|
+
logging.error(f"Error loading runbooks from Supabase: {e}")
|
|
217
|
+
|
|
218
|
+
return catalog
|
|
96
219
|
|
|
97
220
|
|
|
98
221
|
def get_runbook_by_path(
|
|
@@ -108,9 +231,14 @@ def get_runbook_by_path(
|
|
|
108
231
|
Returns:
|
|
109
232
|
Full path to the runbook if found, None otherwise
|
|
110
233
|
"""
|
|
234
|
+
# Validate runbook_relative_path is not empty
|
|
235
|
+
if not runbook_relative_path or not runbook_relative_path.strip():
|
|
236
|
+
return None
|
|
237
|
+
|
|
111
238
|
for search_path in search_paths:
|
|
112
239
|
runbook_path = os.path.join(search_path, runbook_relative_path)
|
|
113
|
-
|
|
240
|
+
# Ensure it's a file, not a directory
|
|
241
|
+
if os.path.isfile(runbook_path):
|
|
114
242
|
return runbook_path
|
|
115
243
|
|
|
116
244
|
return None
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"catalog": [
|
|
3
3
|
{
|
|
4
|
+
"id": "dns-troubleshooting.md",
|
|
4
5
|
"update_date": "2025-06-17",
|
|
5
6
|
"description": "Runbook to investigate DNS resolution issue in Kubernetes clusters",
|
|
6
7
|
"link": "networking/dns_troubleshooting_instructions.md"
|
|
7
8
|
},
|
|
8
9
|
{
|
|
10
|
+
"id": "upgrade-troubleshooting.md",
|
|
9
11
|
"update_date": "2025-07-08",
|
|
10
12
|
"description": "Runbook to troubleshoot upgrade issues in Azure Kubernetes Service clusters",
|
|
11
13
|
"link": "upgrade/upgrade_troubleshooting_instructions.md"
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import List
|
|
3
|
+
|
|
4
|
+
import requests # type: ignore
|
|
5
|
+
|
|
6
|
+
from holmes.core.issue import Issue
|
|
3
7
|
from holmes.core.tool_calling_llm import LLMResult
|
|
4
8
|
from holmes.plugins.interfaces import SourcePlugin
|
|
5
|
-
from holmes.core.issue import Issue
|
|
6
|
-
import requests # type: ignore
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class GitHubSource(SourcePlugin):
|
|
@@ -7,14 +7,19 @@ import yaml # type: ignore
|
|
|
7
7
|
from pydantic import ValidationError
|
|
8
8
|
|
|
9
9
|
import holmes.utils.env as env_utils
|
|
10
|
-
from holmes.common.env_vars import
|
|
10
|
+
from holmes.common.env_vars import (
|
|
11
|
+
DISABLE_PROMETHEUS_TOOLSET,
|
|
12
|
+
USE_LEGACY_KUBERNETES_LOGS,
|
|
13
|
+
)
|
|
11
14
|
from holmes.core.supabase_dal import SupabaseDal
|
|
12
15
|
from holmes.core.tools import Toolset, ToolsetType, ToolsetYamlFromConfig, YAMLToolset
|
|
13
16
|
from holmes.plugins.toolsets.atlas_mongodb.mongodb_atlas import MongoDBAtlasToolset
|
|
14
17
|
from holmes.plugins.toolsets.azure_sql.azure_sql_toolset import AzureSQLToolset
|
|
15
18
|
from holmes.plugins.toolsets.bash.bash_toolset import BashExecutorToolset
|
|
16
|
-
from holmes.plugins.toolsets.
|
|
17
|
-
|
|
19
|
+
from holmes.plugins.toolsets.connectivity_check import ConnectivityCheckToolset
|
|
20
|
+
from holmes.plugins.toolsets.coralogix.toolset_coralogix import CoralogixToolset
|
|
21
|
+
from holmes.plugins.toolsets.datadog.toolset_datadog_general import (
|
|
22
|
+
DatadogGeneralToolset,
|
|
18
23
|
)
|
|
19
24
|
from holmes.plugins.toolsets.datadog.toolset_datadog_logs import DatadogLogsToolset
|
|
20
25
|
from holmes.plugins.toolsets.datadog.toolset_datadog_metrics import (
|
|
@@ -23,32 +28,31 @@ from holmes.plugins.toolsets.datadog.toolset_datadog_metrics import (
|
|
|
23
28
|
from holmes.plugins.toolsets.datadog.toolset_datadog_traces import (
|
|
24
29
|
DatadogTracesToolset,
|
|
25
30
|
)
|
|
26
|
-
from holmes.plugins.toolsets.
|
|
27
|
-
|
|
31
|
+
from holmes.plugins.toolsets.elasticsearch.elasticsearch import (
|
|
32
|
+
ElasticsearchClusterToolset,
|
|
33
|
+
ElasticsearchDataToolset,
|
|
28
34
|
)
|
|
29
|
-
from holmes.plugins.toolsets.
|
|
30
|
-
|
|
35
|
+
from holmes.plugins.toolsets.elasticsearch.opensearch_query_assist import (
|
|
36
|
+
OpenSearchQueryAssistToolset,
|
|
31
37
|
)
|
|
32
38
|
from holmes.plugins.toolsets.git import GitToolset
|
|
39
|
+
from holmes.plugins.toolsets.grafana.loki.toolset_grafana_loki import GrafanaLokiToolset
|
|
33
40
|
from holmes.plugins.toolsets.grafana.toolset_grafana import GrafanaToolset
|
|
34
|
-
from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset
|
|
35
41
|
from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset
|
|
36
42
|
from holmes.plugins.toolsets.internet.internet import InternetToolset
|
|
37
43
|
from holmes.plugins.toolsets.internet.notion import NotionToolset
|
|
44
|
+
from holmes.plugins.toolsets.investigator.core_investigation import (
|
|
45
|
+
CoreInvestigationToolset,
|
|
46
|
+
)
|
|
38
47
|
from holmes.plugins.toolsets.kafka import KafkaToolset
|
|
39
48
|
from holmes.plugins.toolsets.kubernetes_logs import KubernetesLogsToolset
|
|
40
49
|
from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
|
|
41
|
-
from holmes.plugins.toolsets.newrelic import NewRelicToolset
|
|
42
|
-
from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
|
|
43
|
-
from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
|
|
44
|
-
from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
|
|
45
|
-
from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset
|
|
50
|
+
from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
|
|
46
51
|
from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
|
|
47
52
|
from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
|
|
48
53
|
from holmes.plugins.toolsets.runbook.runbook_fetcher import RunbookToolset
|
|
49
|
-
from holmes.plugins.toolsets.
|
|
50
|
-
|
|
51
|
-
CoreInvestigationToolset,
|
|
54
|
+
from holmes.plugins.toolsets.servicenow_tables.servicenow_tables import (
|
|
55
|
+
ServiceNowTablesToolset,
|
|
52
56
|
)
|
|
53
57
|
|
|
54
58
|
THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
@@ -71,13 +75,16 @@ def load_toolsets_from_file(
|
|
|
71
75
|
return toolsets
|
|
72
76
|
|
|
73
77
|
|
|
74
|
-
def load_python_toolsets(
|
|
78
|
+
def load_python_toolsets(
|
|
79
|
+
dal: Optional[SupabaseDal],
|
|
80
|
+
additional_search_paths: Optional[List[str]] = None,
|
|
81
|
+
) -> List[Toolset]:
|
|
75
82
|
logging.debug("loading python toolsets")
|
|
76
83
|
toolsets: list[Toolset] = [
|
|
77
84
|
CoreInvestigationToolset(), # Load first for higher priority
|
|
78
85
|
InternetToolset(),
|
|
86
|
+
ConnectivityCheckToolset(),
|
|
79
87
|
RobustaToolset(dal),
|
|
80
|
-
OpenSearchToolset(),
|
|
81
88
|
GrafanaLokiToolset(),
|
|
82
89
|
GrafanaTempoToolset(),
|
|
83
90
|
NewRelicToolset(),
|
|
@@ -88,26 +95,34 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
|
|
|
88
95
|
DatadogGeneralToolset(),
|
|
89
96
|
DatadogMetricsToolset(),
|
|
90
97
|
DatadogTracesToolset(),
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
OpenSearchLogsToolset(),
|
|
94
|
-
OpenSearchTracesToolset(),
|
|
95
|
-
CoralogixLogsToolset(),
|
|
98
|
+
OpenSearchQueryAssistToolset(),
|
|
99
|
+
CoralogixToolset(),
|
|
96
100
|
RabbitMQToolset(),
|
|
97
101
|
GitToolset(),
|
|
98
102
|
BashExecutorToolset(),
|
|
99
103
|
MongoDBAtlasToolset(),
|
|
100
|
-
RunbookToolset(),
|
|
104
|
+
RunbookToolset(dal=dal, additional_search_paths=additional_search_paths),
|
|
101
105
|
AzureSQLToolset(),
|
|
102
|
-
|
|
106
|
+
ServiceNowTablesToolset(),
|
|
107
|
+
ElasticsearchDataToolset(),
|
|
108
|
+
ElasticsearchClusterToolset(),
|
|
103
109
|
]
|
|
110
|
+
|
|
111
|
+
if not DISABLE_PROMETHEUS_TOOLSET:
|
|
112
|
+
from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset
|
|
113
|
+
|
|
114
|
+
toolsets.append(PrometheusToolset())
|
|
115
|
+
|
|
104
116
|
if not USE_LEGACY_KUBERNETES_LOGS:
|
|
105
117
|
toolsets.append(KubernetesLogsToolset())
|
|
106
118
|
|
|
107
119
|
return toolsets
|
|
108
120
|
|
|
109
121
|
|
|
110
|
-
def load_builtin_toolsets(
|
|
122
|
+
def load_builtin_toolsets(
|
|
123
|
+
dal: Optional[SupabaseDal] = None,
|
|
124
|
+
additional_search_paths: Optional[List[str]] = None,
|
|
125
|
+
) -> List[Toolset]:
|
|
111
126
|
all_toolsets: List[Toolset] = []
|
|
112
127
|
logging.debug(f"loading toolsets from {THIS_DIR}")
|
|
113
128
|
|
|
@@ -123,7 +138,9 @@ def load_builtin_toolsets(dal: Optional[SupabaseDal] = None) -> List[Toolset]:
|
|
|
123
138
|
toolsets_from_file = load_toolsets_from_file(path, strict_check=True)
|
|
124
139
|
all_toolsets.extend(toolsets_from_file)
|
|
125
140
|
|
|
126
|
-
all_toolsets.extend(
|
|
141
|
+
all_toolsets.extend(
|
|
142
|
+
load_python_toolsets(dal=dal, additional_search_paths=additional_search_paths)
|
|
143
|
+
) # type: ignore
|
|
127
144
|
|
|
128
145
|
# disable built-in toolsets by default, and the user can enable them explicitly in config.
|
|
129
146
|
for toolset in all_toolsets:
|
|
@@ -7,17 +7,49 @@ toolsets:
|
|
|
7
7
|
- command: "az account show"
|
|
8
8
|
- command: "az aks --help"
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
|
+
|
|
11
|
+
# Note: Tools in this toolset use transformers with llm_summarize
|
|
12
|
+
# to automatically summarize large outputs from Azure CLI and kubectl commands
|
|
13
|
+
# when a fast model is configured, focusing on health issues and troubleshooting.
|
|
10
14
|
tools:
|
|
11
15
|
- name: "check_node_status"
|
|
12
16
|
description: "Checks the status of all nodes in the AKS cluster."
|
|
13
17
|
user_description: "get the status of all nodes in the AKS cluster"
|
|
14
18
|
command: |
|
|
15
19
|
kubectl get nodes
|
|
20
|
+
transformers:
|
|
21
|
+
- name: llm_summarize
|
|
22
|
+
config:
|
|
23
|
+
input_threshold: 800
|
|
24
|
+
prompt: |
|
|
25
|
+
Summarize this node status output focusing on:
|
|
26
|
+
- Any nodes that are NotReady or in error states
|
|
27
|
+
- Node health patterns and issues requiring attention
|
|
28
|
+
- Group healthy nodes together with aggregate counts
|
|
29
|
+
- Highlight nodes with concerning conditions or ages
|
|
30
|
+
- When possible, mention exact node names for follow-up investigation
|
|
31
|
+
- Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
|
|
32
|
+
- Prefer aggregates and counts; list only outliers and actionable items
|
|
33
|
+
- Keep grep-friendly: include exact field names/values that matter
|
|
16
34
|
- name: "describe_node"
|
|
17
35
|
description: "Describes a specific node in the AKS cluster to inspect its conditions."
|
|
18
36
|
user_description: "describe node {{ NODE_NAME }} in the AKS cluster"
|
|
19
37
|
command: |
|
|
20
38
|
kubectl describe node {{ NODE_NAME }}
|
|
39
|
+
transformers:
|
|
40
|
+
- name: llm_summarize
|
|
41
|
+
config:
|
|
42
|
+
input_threshold: 1200
|
|
43
|
+
prompt: |
|
|
44
|
+
Summarize this node description focusing on:
|
|
45
|
+
- Node conditions and health status (Ready, MemoryPressure, DiskPressure, etc.)
|
|
46
|
+
- Resource capacity vs allocatable vs current usage
|
|
47
|
+
- Any taints, labels, or annotations indicating issues
|
|
48
|
+
- Recent events that show problems or state changes
|
|
49
|
+
- System information relevant to troubleshooting
|
|
50
|
+
- When possible, highlight specific condition reasons for investigation
|
|
51
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
52
|
+
- Prioritize aggregates and actionable outliers over comprehensive details
|
|
21
53
|
- name: "get_node_events"
|
|
22
54
|
description: "Fetches recent events for a specific node to surface warnings and errors."
|
|
23
55
|
user_description: "get events for node {{ NODE_NAME }}"
|
|
@@ -33,6 +65,20 @@ toolsets:
|
|
|
33
65
|
user_description: "review Azure Activity Log for resource group {{ RESOURCE_GROUP_NAME }}"
|
|
34
66
|
command: |
|
|
35
67
|
az monitor activity-log list --resource-group {{ RESOURCE_GROUP_NAME }}
|
|
68
|
+
transformers:
|
|
69
|
+
- name: llm_summarize
|
|
70
|
+
config:
|
|
71
|
+
input_threshold: 1500
|
|
72
|
+
prompt: |
|
|
73
|
+
Summarize this Azure Activity Log focusing on:
|
|
74
|
+
- Recent administrative actions or configuration changes
|
|
75
|
+
- Any failed operations or errors that could impact node health
|
|
76
|
+
- Resource scaling, updates, or maintenance activities
|
|
77
|
+
- Network security group, load balancer, or VM-related changes
|
|
78
|
+
- Group similar activities and highlight time patterns
|
|
79
|
+
- When possible, mention specific operation names and correlation IDs
|
|
80
|
+
- Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
|
|
81
|
+
- Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
|
|
36
82
|
- name: "check_top_resource_consuming_pods"
|
|
37
83
|
description: "Checks for the top resource-consuming pods on a specific node."
|
|
38
84
|
user_description: "get the top resource-consuming pods on node {{ NODE_NAME }}"
|