holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import os.path
|
|
4
|
+
from typing import Any, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from holmes.common.env_vars import USE_LEGACY_KUBERNETES_LOGS
|
|
7
|
+
import yaml # type: ignore
|
|
8
|
+
from pydantic import ValidationError
|
|
9
|
+
|
|
10
|
+
from holmes.plugins.toolsets.azure_sql.azure_sql_toolset import AzureSQLToolset
|
|
11
|
+
import holmes.utils.env as env_utils
|
|
12
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
13
|
+
from holmes.core.tools import Toolset, ToolsetType, ToolsetYamlFromConfig, YAMLToolset
|
|
14
|
+
from holmes.plugins.toolsets.coralogix.toolset_coralogix_logs import (
|
|
15
|
+
CoralogixLogsToolset,
|
|
16
|
+
)
|
|
17
|
+
from holmes.plugins.toolsets.datadog import DatadogToolset
|
|
18
|
+
from holmes.plugins.toolsets.kubernetes_logs import KubernetesLogsToolset
|
|
19
|
+
from holmes.plugins.toolsets.git import GitToolset
|
|
20
|
+
from holmes.plugins.toolsets.grafana.toolset_grafana import GrafanaToolset
|
|
21
|
+
from holmes.plugins.toolsets.bash.bash_toolset import BashExecutorToolset
|
|
22
|
+
from holmes.plugins.toolsets.grafana.toolset_grafana_loki import GrafanaLokiToolset
|
|
23
|
+
from holmes.plugins.toolsets.grafana.toolset_grafana_tempo import GrafanaTempoToolset
|
|
24
|
+
from holmes.plugins.toolsets.internet.internet import InternetToolset
|
|
25
|
+
from holmes.plugins.toolsets.internet.notion import NotionToolset
|
|
26
|
+
from holmes.plugins.toolsets.kafka import KafkaToolset
|
|
27
|
+
from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
|
|
28
|
+
from holmes.plugins.toolsets.newrelic import NewRelicToolset
|
|
29
|
+
from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
|
|
30
|
+
from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
|
|
31
|
+
from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
|
|
32
|
+
from holmes.plugins.toolsets.prometheus.prometheus import PrometheusToolset
|
|
33
|
+
from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
|
|
34
|
+
from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
|
|
35
|
+
from holmes.plugins.toolsets.atlas_mongodb.mongodb_atlas import MongoDBAtlasToolset
|
|
36
|
+
from holmes.plugins.toolsets.runbook.runbook_fetcher import RunbookToolset
|
|
37
|
+
from holmes.plugins.toolsets.servicenow.servicenow import ServiceNowToolset
|
|
38
|
+
|
|
39
|
+
THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def load_toolsets_from_file(
|
|
43
|
+
toolsets_path: str, strict_check: bool = True
|
|
44
|
+
) -> List[Toolset]:
|
|
45
|
+
toolsets = []
|
|
46
|
+
with open(toolsets_path) as file:
|
|
47
|
+
parsed_yaml = yaml.safe_load(file)
|
|
48
|
+
if parsed_yaml is None:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Failed to load toolsets from {toolsets_path}: file is empty or invalid YAML."
|
|
51
|
+
)
|
|
52
|
+
toolsets_dict = parsed_yaml.get("toolsets", {})
|
|
53
|
+
|
|
54
|
+
toolsets.extend(load_toolsets_from_config(toolsets_dict, strict_check))
|
|
55
|
+
|
|
56
|
+
return toolsets
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
|
|
60
|
+
logging.debug("loading python toolsets")
|
|
61
|
+
toolsets: list[Toolset] = [
|
|
62
|
+
InternetToolset(),
|
|
63
|
+
RobustaToolset(dal),
|
|
64
|
+
OpenSearchToolset(),
|
|
65
|
+
GrafanaLokiToolset(),
|
|
66
|
+
GrafanaTempoToolset(),
|
|
67
|
+
NewRelicToolset(),
|
|
68
|
+
GrafanaToolset(),
|
|
69
|
+
NotionToolset(),
|
|
70
|
+
KafkaToolset(),
|
|
71
|
+
DatadogToolset(),
|
|
72
|
+
PrometheusToolset(),
|
|
73
|
+
OpenSearchLogsToolset(),
|
|
74
|
+
OpenSearchTracesToolset(),
|
|
75
|
+
CoralogixLogsToolset(),
|
|
76
|
+
RabbitMQToolset(),
|
|
77
|
+
GitToolset(),
|
|
78
|
+
BashExecutorToolset(),
|
|
79
|
+
MongoDBAtlasToolset(),
|
|
80
|
+
RunbookToolset(),
|
|
81
|
+
AzureSQLToolset(),
|
|
82
|
+
ServiceNowToolset(),
|
|
83
|
+
]
|
|
84
|
+
if not USE_LEGACY_KUBERNETES_LOGS:
|
|
85
|
+
toolsets.append(KubernetesLogsToolset())
|
|
86
|
+
|
|
87
|
+
return toolsets
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def load_builtin_toolsets(dal: Optional[SupabaseDal] = None) -> List[Toolset]:
|
|
91
|
+
all_toolsets: List[Toolset] = []
|
|
92
|
+
logging.debug(f"loading toolsets from {THIS_DIR}")
|
|
93
|
+
|
|
94
|
+
# Handle YAML toolsets
|
|
95
|
+
for filename in os.listdir(THIS_DIR):
|
|
96
|
+
if not filename.endswith(".yaml"):
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if filename == "kubernetes_logs.yaml" and not USE_LEGACY_KUBERNETES_LOGS:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
path = os.path.join(THIS_DIR, filename)
|
|
103
|
+
toolsets_from_file = load_toolsets_from_file(path, strict_check=True)
|
|
104
|
+
all_toolsets.extend(toolsets_from_file)
|
|
105
|
+
|
|
106
|
+
all_toolsets.extend(load_python_toolsets(dal=dal)) # type: ignore
|
|
107
|
+
|
|
108
|
+
# disable built-in toolsets by default, and the user can enable them explicitly in config.
|
|
109
|
+
for toolset in all_toolsets:
|
|
110
|
+
toolset.type = ToolsetType.BUILTIN
|
|
111
|
+
# dont' expose build-in toolsets path
|
|
112
|
+
toolset.path = None
|
|
113
|
+
|
|
114
|
+
return all_toolsets # type: ignore
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def is_old_toolset_config(
|
|
118
|
+
toolsets: Union[dict[str, dict[str, Any]], List[dict[str, Any]]],
|
|
119
|
+
) -> bool:
|
|
120
|
+
# old config is a list of toolsets
|
|
121
|
+
if isinstance(toolsets, list):
|
|
122
|
+
return True
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def load_toolsets_from_config(
|
|
127
|
+
toolsets: dict[str, dict[str, Any]],
|
|
128
|
+
strict_check: bool = True,
|
|
129
|
+
) -> List[Toolset]:
|
|
130
|
+
"""
|
|
131
|
+
Load toolsets from a dictionary or list of dictionaries.
|
|
132
|
+
:param toolsets: Dictionary of toolsets or list of toolset configurations.
|
|
133
|
+
:param strict_check: If True, all required fields for a toolset must be present.
|
|
134
|
+
:return: List of validated Toolset objects.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
if not toolsets:
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
loaded_toolsets: list[Toolset] = []
|
|
141
|
+
if is_old_toolset_config(toolsets):
|
|
142
|
+
message = "Old toolset config format detected, please update to the new format: https://docs.robusta.dev/master/configuration/holmesgpt/custom_toolsets.html"
|
|
143
|
+
logging.warning(message)
|
|
144
|
+
raise ValueError(message)
|
|
145
|
+
|
|
146
|
+
for name, config in toolsets.items():
|
|
147
|
+
try:
|
|
148
|
+
toolset_type = config.get("type", ToolsetType.BUILTIN.value)
|
|
149
|
+
# MCP server is not a built-in toolset, so we need to set the type explicitly
|
|
150
|
+
validated_toolset: Optional[Toolset] = None
|
|
151
|
+
if toolset_type is ToolsetType.MCP:
|
|
152
|
+
validated_toolset = RemoteMCPToolset(**config, name=name)
|
|
153
|
+
elif strict_check:
|
|
154
|
+
validated_toolset = YAMLToolset(**config, name=name) # type: ignore
|
|
155
|
+
else:
|
|
156
|
+
validated_toolset = ToolsetYamlFromConfig( # type: ignore
|
|
157
|
+
**config, name=name
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if validated_toolset.config:
|
|
161
|
+
validated_toolset.config = env_utils.replace_env_vars_values(
|
|
162
|
+
validated_toolset.config
|
|
163
|
+
)
|
|
164
|
+
loaded_toolsets.append(validated_toolset)
|
|
165
|
+
except ValidationError as e:
|
|
166
|
+
logging.warning(f"Toolset '{name}' is invalid: {e}")
|
|
167
|
+
|
|
168
|
+
except Exception:
|
|
169
|
+
logging.warning("Failed to load toolset: %s", name, exc_info=True)
|
|
170
|
+
|
|
171
|
+
return loaded_toolsets
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
toolsets:
|
|
2
|
+
aks/node-health:
|
|
3
|
+
description: "Set of tools to troubleshoot AKS node health issues"
|
|
4
|
+
tags:
|
|
5
|
+
- cli
|
|
6
|
+
prerequisites:
|
|
7
|
+
- command: "az account show"
|
|
8
|
+
- command: "az aks --help"
|
|
9
|
+
- command: "kubectl version --client"
|
|
10
|
+
tools:
|
|
11
|
+
- name: "check_node_status"
|
|
12
|
+
description: "Checks the status of all nodes in the AKS cluster."
|
|
13
|
+
user_description: "get the status of all nodes in the AKS cluster"
|
|
14
|
+
command: |
|
|
15
|
+
kubectl get nodes
|
|
16
|
+
- name: "describe_node"
|
|
17
|
+
description: "Describes a specific node in the AKS cluster to inspect its conditions."
|
|
18
|
+
user_description: "describe node {{ NODE_NAME }} in the AKS cluster"
|
|
19
|
+
command: |
|
|
20
|
+
kubectl describe node {{ NODE_NAME }}
|
|
21
|
+
- name: "get_node_events"
|
|
22
|
+
description: "Fetches recent events for a specific node to surface warnings and errors."
|
|
23
|
+
user_description: "get events for node {{ NODE_NAME }}"
|
|
24
|
+
command: |
|
|
25
|
+
kubectl get events --field-selector involvedObject.kind=Node,involvedObject.name={{ NODE_NAME }} --sort-by='.lastTimestamp'
|
|
26
|
+
- name: "check_node_resource_usage"
|
|
27
|
+
description: "Shows CPU/memory usage for a specific node (requires metrics-server)."
|
|
28
|
+
user_description: "get resource usage for node {{ NODE_NAME }}"
|
|
29
|
+
command: |
|
|
30
|
+
kubectl top node {{ NODE_NAME }}
|
|
31
|
+
- name: "review_activity_log"
|
|
32
|
+
description: "Reviews the Azure Activity Log for recent changes affecting the node."
|
|
33
|
+
user_description: "review Azure Activity Log for resource group {{ RESOURCE_GROUP_NAME }}"
|
|
34
|
+
command: |
|
|
35
|
+
az monitor activity-log list --resource-group {{ RESOURCE_GROUP_NAME }}
|
|
36
|
+
- name: "check_top_resource_consuming_pods"
|
|
37
|
+
description: "Checks for the top resource-consuming pods on a specific node."
|
|
38
|
+
user_description: "get the top resource-consuming pods on node {{ NODE_NAME }}"
|
|
39
|
+
command: |
|
|
40
|
+
kubectl top pod --all-namespaces --sort-by=cpu | grep {{ NODE_NAME }}
|
|
41
|
+
- name: "check_network_outbound"
|
|
42
|
+
description: "Checks the outbound network connectivity for an AKS cluster."
|
|
43
|
+
user_description: "check outbound network connectivity for AKS cluster {{ CLUSTER_NAME }} in resource group {{ RESOURCE_GROUP }}"
|
|
44
|
+
command: |
|
|
45
|
+
az aks check-network outbound --name {{ CLUSTER_NAME }} --resource-group {{ RESOURCE_GROUP }}
|
|
46
|
+
- name: "check_network_inbound"
|
|
47
|
+
description: "Checks the inbound network connectivity for an AKS cluster."
|
|
48
|
+
user_description: "check inbound network connectivity for AKS cluster {{ CLUSTER_NAME }} in resource group {{ RESOURCE_GROUP }}"
|
|
49
|
+
command: |
|
|
50
|
+
az aks check-network inbound --name {{ CLUSTER_NAME }} --resource-group {{ RESOURCE_GROUP }}
|
|
51
|
+
- name: "list_vmss_names"
|
|
52
|
+
description: |
|
|
53
|
+
Lists all VMSS names in the cluster node resource group.
|
|
54
|
+
Prerequisites: get_node_resource_group
|
|
55
|
+
user_description: "lists all VMSS names in {{ NODE_RESOURCE_GROUP }}"
|
|
56
|
+
command: |
|
|
57
|
+
az vmss list -g {{ NODE_RESOURCE_GROUP }} --query '[*].name' -o tsv --only-show-errors
|
|
58
|
+
- name: "vmss_run_command"
|
|
59
|
+
description: |
|
|
60
|
+
Execute a shell command on a specific VMSS VM instance using az vmss run-command.
|
|
61
|
+
VM_ID is the instance ID of the VMSS, which can be derived from node names.
|
|
62
|
+
Prerequisites: get_node_resource_group, list_vmss_names
|
|
63
|
+
user_description: "run command {{ SHELL_COMMAND }} on VM #{{ VM_ID }} of VMSS {{ VMSS_NAME }}"
|
|
64
|
+
command: |
|
|
65
|
+
az vmss run-command invoke --resource-group {{ NODE_RESOURCE_GROUP }} --name {{ VMSS_NAME }} --instance-id {{ VM_ID }} --command-id RunShellScript --scripts {{ SHELL_COMMAND }}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
toolsets:
|
|
2
|
+
aks/core:
|
|
3
|
+
description: "Set of tools to read Azure Kubernetes Service resources"
|
|
4
|
+
tags:
|
|
5
|
+
- cli
|
|
6
|
+
prerequisites:
|
|
7
|
+
- command: "az account show"
|
|
8
|
+
- command: "az aks --help"
|
|
9
|
+
- command: "kubectl version --client"
|
|
10
|
+
tools:
|
|
11
|
+
- name: "cloud_provider"
|
|
12
|
+
description: "Fetches the cloud provider of the kubernetes cluster, determined by the providerID of the nodes"
|
|
13
|
+
user_description: "get cloud provider of AKS cluster"
|
|
14
|
+
command: |
|
|
15
|
+
kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"\t"}{.spec.providerID}{"\n"}{end}'
|
|
16
|
+
- name: "aks_get_cluster"
|
|
17
|
+
description: "Get the configuration details of a specific AKS cluster"
|
|
18
|
+
user_description: "get AKS cluster {{ CLUSTER_NAME }} under resource group {{ RESOURCE_GROUP_NAME }} in subscription {{ SUBSCRIPTION_ID }}"
|
|
19
|
+
command: |
|
|
20
|
+
az aks show --resource-group {{ RESOURCE_GROUP_NAME }} --name {{ CLUSTER_NAME }} --subscription {{ SUBSCRIPTION_ID }}
|
|
21
|
+
- name: "aks_list_clusters_by_rg"
|
|
22
|
+
description: "Lists all AKS clusters under a specific resource group. Only run this tool when you need to get all clusters in a resource group, rather than a specific one."
|
|
23
|
+
user_description: "list AKS clusters in resource group {{ RESOURCE_GROUP_NAME }} under subscription {{ SUBSCRIPTION_ID }}"
|
|
24
|
+
command: |
|
|
25
|
+
az aks list --resource-group {{ RESOURCE_GROUP_NAME }} --subscription {{ SUBSCRIPTION_ID }}
|
|
26
|
+
- name: "aks_list_node_pools"
|
|
27
|
+
description: "Lists node pools in an AKS cluster"
|
|
28
|
+
user_description: "list node pools for AKS cluster {{ CLUSTER_NAME }} under resource group {{ RESOURCE_GROUP_NAME }}"
|
|
29
|
+
command: |
|
|
30
|
+
az aks nodepool list --resource-group {{ RESOURCE_GROUP_NAME }} --cluster-name {{ CLUSTER_NAME }} --subscription {{ SUBSCRIPTION_ID }}
|
|
31
|
+
- name: "aks_show_node_pool"
|
|
32
|
+
description: "Shows details of a specific node pool in an AKS cluster"
|
|
33
|
+
user_description: "get node pool {{ NODE_POOL_NAME }} in AKS cluster {{ CLUSTER_NAME }} under resource group {{ RESOURCE_GROUP_NAME }}"
|
|
34
|
+
command: |
|
|
35
|
+
az aks nodepool show --resource-group {{ RESOURCE_GROUP_NAME }} --cluster-name {{ CLUSTER_NAME }} --name {{ NODE_POOL_NAME }} --subscription {{ SUBSCRIPTION_ID }}
|
|
36
|
+
- name: "aks_list_versions"
|
|
37
|
+
description: "Lists supported Kubernetes versions in a region"
|
|
38
|
+
user_description: "list supported Kubernetes versions for region {{ LOCATION }}"
|
|
39
|
+
command: |
|
|
40
|
+
az aks get-versions --location {{ LOCATION }} --subscription {{ SUBSCRIPTION_ID }}
|
|
41
|
+
- name: "aks_get_credentials"
|
|
42
|
+
description: "Downloads kubeconfig file for an AKS cluster"
|
|
43
|
+
user_description: "get kubeconfig credentials for AKS cluster {{ CLUSTER_NAME }} under resource group {{ RESOURCE_GROUP_NAME }}"
|
|
44
|
+
command: |
|
|
45
|
+
az aks get-credentials --resource-group {{ RESOURCE_GROUP_NAME }} --name {{ CLUSTER_NAME }} --subscription {{ SUBSCRIPTION_ID }}
|
|
46
|
+
- name: "aks_list_addons"
|
|
47
|
+
description: "Lists all available AKS addons"
|
|
48
|
+
user_description: "list available addons for AKS in region {{ LOCATION }}"
|
|
49
|
+
command: |
|
|
50
|
+
az aks get-versions --location {{ LOCATION }} --query "orchestrators[].addons"
|
|
51
|
+
# helper tools to auto-discover variables
|
|
52
|
+
- name: "get_default_subscription"
|
|
53
|
+
description: "Retrieves the current Azure CLI default subscription ID"
|
|
54
|
+
user_description: "get default subscription ID"
|
|
55
|
+
command: |
|
|
56
|
+
az account show --query id -o tsv
|
|
57
|
+
- name: "get_cluster_name"
|
|
58
|
+
description: "Retrieves the active Kubernetes cluster name from kubeconfig"
|
|
59
|
+
user_description: "get current AKS cluster name from kubeconfig"
|
|
60
|
+
command: |
|
|
61
|
+
kubectl config current-context
|
|
62
|
+
- name: "get_cluster_resource_group"
|
|
63
|
+
description: "Retrieves the resource group name for the AKS cluster"
|
|
64
|
+
user_description: "get resource group for cluster {{ CLUSTER_NAME }}"
|
|
65
|
+
command: |
|
|
66
|
+
az aks list --subscription {{ SUBSCRIPTION_ID }} --only-show-errors --query "[?name=='{{ CLUSTER_NAME }}'].resourceGroup | [0]" -o tsv
|
|
67
|
+
- name: "get_node_resource_group"
|
|
68
|
+
description: "Retrieves the node resource group name for the AKS cluster"
|
|
69
|
+
user_description: "get the node resource group name for cluster {{ CLUSTER_NAME }}"
|
|
70
|
+
command: |
|
|
71
|
+
az aks list --subscription {{ SUBSCRIPTION_ID }} --only-show-errors --query "[?name=='{{ CLUSTER_NAME }}'].nodeResourceGroup | [0]" -o tsv
|
|
72
|
+
- name: "get_api_server_public_ip"
|
|
73
|
+
description: "get the public IP of kube-apiserver for a public AKS cluster"
|
|
74
|
+
user_description: "get the public IP of kube-apiserver for a public AS cluster {{ CLUSTER_NAME }}"
|
|
75
|
+
command: |
|
|
76
|
+
API_SERVER=$(kubectl cluster-info | grep 'Kubernetes control plane' | sed -E 's|.*https://([^:]+):.*|\1|') && dig +short "${API_SERVER}"
|
|
77
|
+
- name: "get_all_nsgs"
|
|
78
|
+
description: "Gets all Network Security Group (NSG) instances in a subscription"
|
|
79
|
+
user_description: "list NSG instances in current Azure subscription {{ SUBSCRIPTION_ID }} context"
|
|
80
|
+
command: |
|
|
81
|
+
az network nsg list --subscription {{ SUBSCRIPTION_ID }} -o table
|
|
82
|
+
- name: "get_nsg_rules"
|
|
83
|
+
description: "Gets all NSG rules associated with a specific NSG"
|
|
84
|
+
user_description: "list NSG rules for NSG {{ NSG_NAME }} in resource group {{ RESOURCE_GROUP_NAME }} under subscription {{ SUBSCRIPTION_ID }}"
|
|
85
|
+
command: |
|
|
86
|
+
az network nsg rule list --resource-group {{ RESOURCE_GROUP_NAME }} --nsg-name {{ NSG_NAME }} --subscription {{ SUBSCRIPTION_ID }} --include-default -o table
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
toolsets:
|
|
2
|
+
argocd/core:
|
|
3
|
+
description: "Set of tools to get argocd metadata like list of apps, repositories, projects, etc."
|
|
4
|
+
docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/argocd.html"
|
|
5
|
+
icon_url: "https://argo-cd.readthedocs.io/en/stable/assets/logo.png"
|
|
6
|
+
llm_instructions: |
|
|
7
|
+
You have access to a set of ArgoCD tools for debugging Kubernetes application deployments.
|
|
8
|
+
If an application's name does not exist in kubernetes, it may exist in argocd: call the tool `argocd_app_list` to find it.
|
|
9
|
+
These tools help you investigate issues with GitOps-managed applications in your Kubernetes clusters.
|
|
10
|
+
ALWAYS follow these steps:
|
|
11
|
+
1. List the applications
|
|
12
|
+
2. Retrieve the application status and its config
|
|
13
|
+
3. Retrieve the application's manifests for issues
|
|
14
|
+
4. Compare the ArgoCD config with the kubernetes status using kubectl tools
|
|
15
|
+
5. Check for resources mismatch between argocd and kubernetes
|
|
16
|
+
{% if tool_names|list|length > 0 %}
|
|
17
|
+
The following commands are available to introspect into ArgoCD: {{ ", ".join(tool_names) }}
|
|
18
|
+
{% endif %}
|
|
19
|
+
ALWAYS compare the argocd deployment with kubernetes so that you can suggest better solutions to the user.
|
|
20
|
+
DO NOT tell the user to check if a resource exists or to update the configuration without being specific, DO checks yourself on behalf of the user and then tell them the solution.
|
|
21
|
+
tags:
|
|
22
|
+
- core
|
|
23
|
+
prerequisites:
|
|
24
|
+
- command: "argocd version"
|
|
25
|
+
- env:
|
|
26
|
+
- ARGOCD_AUTH_TOKEN
|
|
27
|
+
tools:
|
|
28
|
+
- name: "argocd_app_list"
|
|
29
|
+
description: "List the applications in Argocd"
|
|
30
|
+
command: "argocd app list"
|
|
31
|
+
|
|
32
|
+
- name: "argocd_app_get"
|
|
33
|
+
description: "Retrieve information about an existing application, such as its status and configuration"
|
|
34
|
+
command: "argocd app get {{ app_name }} --show-operation -o wide"
|
|
35
|
+
|
|
36
|
+
- name: "argocd_app_diff"
|
|
37
|
+
description: "Display the differences between the current state of an application and the desired state specified in its Git repository"
|
|
38
|
+
command: "argocd app diff {{ app_name }}"
|
|
39
|
+
|
|
40
|
+
- name: "argocd_app_manifests"
|
|
41
|
+
description: "Retrieve manifests for an application"
|
|
42
|
+
command: "argocd app manifests {{app_name}}"
|
|
43
|
+
|
|
44
|
+
- name: "argocd_app_resources"
|
|
45
|
+
description: "List resources of application"
|
|
46
|
+
command: "argocd app resources {{app_name}}"
|
|
47
|
+
|
|
48
|
+
- name: "argocd_app_manifest_source_revision"
|
|
49
|
+
description: "Get manifests for a multi-source application at specific revision for specific source"
|
|
50
|
+
command: "argocd app manifests {{app_name}}{% if revision %} --revision {{ revision }}{% endif %}{% if source %} --source {{ source }}{% endif %}"
|
|
51
|
+
|
|
52
|
+
- name: "argocd_app_history"
|
|
53
|
+
description: "List the deployment history of an application in ArgoCD"
|
|
54
|
+
command: "argocd app history {{app_name}} --app-namespace {{namespace}}"
|
|
55
|
+
|
|
56
|
+
- name: "argocd_repo_list"
|
|
57
|
+
description: "List all the Git repositories that ArgoCD is currently managing"
|
|
58
|
+
command: "argocd repo list"
|
|
59
|
+
|
|
60
|
+
- name: "argocd_proj_list"
|
|
61
|
+
description: "List all available projects"
|
|
62
|
+
command: "argocd proj list"
|
|
63
|
+
|
|
64
|
+
- name: "argocd_proj_get"
|
|
65
|
+
description: "Retrieves information about an existing project, such as its applications and policies"
|
|
66
|
+
command: "argocd proj get {{ project_name }}"
|
|
67
|
+
|
|
68
|
+
- name: "argocd_cluster_list"
|
|
69
|
+
description: "List all known clusters"
|
|
70
|
+
command: "argocd cluster list"
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
When a user requests checking issues on Atlas mongodb project use these steps.
|
|
2
|
+
|
|
3
|
+
* Use atlas_return_project_alerts and atlas_return_events_from_project first to find any known potential issues on the project.
|
|
4
|
+
* For performance issues or collscan queries use atlas_return_project_slow_queries to see a list of slow queries. YOU MUST check this for all processes of the project. ALWAYS show the query in the result for every slow query.
|
|
5
|
+
* atlas_return_project_slow_queries currently returns data from the last 24 hours. If the user asks for a different time range, mention its not supported.
|
|
6
|
+
* atlas_return_events_from_project currently returns data from the last 24 hours, if the user asks for a different time range, mention its not supported.
|
|
7
|
+
* CHECK logs for mongodb using atlas_return_logs_for_host_in_project if the user mentions checking the logs.
|
|
8
|
+
* When asked for a specific number of slow queries. for example top 10 slow queries. DO NOT duplicate queries from different processes.
|