holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Tuple
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
6
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
7
|
+
BaseAzureSQLTool,
|
|
8
|
+
BaseAzureSQLToolset,
|
|
9
|
+
AzureSQLDatabaseConfig,
|
|
10
|
+
)
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.apis.storage_analysis_api import (
|
|
12
|
+
StorageAnalysisAPI,
|
|
13
|
+
)
|
|
14
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AnalyzeDatabaseStorage(BaseAzureSQLTool):
|
|
18
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
19
|
+
super().__init__(
|
|
20
|
+
name="analyze_database_storage",
|
|
21
|
+
description="Analyzes database storage utilization including disk usage, growth trends, file-level details, and table space consumption. Use this for capacity planning and storage optimization.",
|
|
22
|
+
parameters={
|
|
23
|
+
"hours_back": ToolParameter(
|
|
24
|
+
description="Time window for storage metrics analysis in hours. Use 24 for daily trends, 168 for weekly analysis (default: 24)",
|
|
25
|
+
type="integer",
|
|
26
|
+
required=False,
|
|
27
|
+
),
|
|
28
|
+
"top_tables": ToolParameter(
|
|
29
|
+
description="Number of largest tables to analyze for space usage. Use 20 for comprehensive view, 10 for quick overview (default: 20)",
|
|
30
|
+
type="integer",
|
|
31
|
+
required=False,
|
|
32
|
+
),
|
|
33
|
+
},
|
|
34
|
+
toolset=toolset,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def _build_storage_report(
|
|
38
|
+
self,
|
|
39
|
+
db_config: AzureSQLDatabaseConfig,
|
|
40
|
+
storage_data: Dict,
|
|
41
|
+
hours_back: int,
|
|
42
|
+
top_tables: int,
|
|
43
|
+
) -> str:
|
|
44
|
+
"""Build the formatted storage report from gathered data."""
|
|
45
|
+
report_sections = []
|
|
46
|
+
|
|
47
|
+
# Header
|
|
48
|
+
report_sections.append("# Azure SQL Database Storage Analysis Report")
|
|
49
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
50
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
51
|
+
report_sections.append(f"**Analysis Period:** Last {hours_back} hours")
|
|
52
|
+
report_sections.append(
|
|
53
|
+
f"**Generated:** {datetime.now(timezone.utc).isoformat()}"
|
|
54
|
+
)
|
|
55
|
+
report_sections.append("")
|
|
56
|
+
|
|
57
|
+
# Storage Summary
|
|
58
|
+
report_sections.append("## Storage Summary")
|
|
59
|
+
summary = storage_data.get("summary", {})
|
|
60
|
+
if "error" in summary:
|
|
61
|
+
report_sections.append(
|
|
62
|
+
f"⚠️ **Error retrieving storage summary:** {summary['error']}"
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
total_size = summary.get("total_database_size_mb", 0) or 0
|
|
66
|
+
used_size = summary.get("total_used_size_mb", 0) or 0
|
|
67
|
+
data_size = summary.get("total_data_size_mb", 0) or 0
|
|
68
|
+
log_size = summary.get("total_log_size_mb", 0) or 0
|
|
69
|
+
|
|
70
|
+
if total_size:
|
|
71
|
+
used_percent = (used_size / total_size) * 100
|
|
72
|
+
free_size = total_size - used_size
|
|
73
|
+
|
|
74
|
+
report_sections.append(
|
|
75
|
+
f"- **Total Database Size**: {total_size:,.1f} MB"
|
|
76
|
+
)
|
|
77
|
+
report_sections.append(
|
|
78
|
+
f"- **Used Space**: {used_size:,.1f} MB ({used_percent:.1f}%)"
|
|
79
|
+
)
|
|
80
|
+
report_sections.append(f"- **Free Space**: {free_size:,.1f} MB")
|
|
81
|
+
report_sections.append(f"- **Data Files**: {data_size:,.1f} MB")
|
|
82
|
+
report_sections.append(f"- **Log Files**: {log_size:,.1f} MB")
|
|
83
|
+
report_sections.append(
|
|
84
|
+
f"- **Data Files Count**: {summary.get('data_files_count', 0)}"
|
|
85
|
+
)
|
|
86
|
+
report_sections.append(
|
|
87
|
+
f"- **Log Files Count**: {summary.get('log_files_count', 0)}"
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
report_sections.append("No storage summary data available")
|
|
91
|
+
report_sections.append("")
|
|
92
|
+
|
|
93
|
+
# File Details
|
|
94
|
+
report_sections.append("## Database Files Details")
|
|
95
|
+
file_details = storage_data.get("file_details", [])
|
|
96
|
+
if isinstance(file_details, dict) and "error" in file_details:
|
|
97
|
+
report_sections.append(
|
|
98
|
+
f"⚠️ **Error retrieving file details:** {file_details['error']}"
|
|
99
|
+
)
|
|
100
|
+
elif file_details:
|
|
101
|
+
for file_info in file_details:
|
|
102
|
+
file_type = file_info.get("file_type", "Unknown")
|
|
103
|
+
logical_name = file_info.get("logical_name", "Unknown")
|
|
104
|
+
size_mb = file_info.get("size_mb", 0) or 0
|
|
105
|
+
used_mb = file_info.get("used_mb")
|
|
106
|
+
used_percent = file_info.get("used_percent")
|
|
107
|
+
max_size = file_info.get("max_size", "Unknown")
|
|
108
|
+
growth = file_info.get("growth_setting", "Unknown")
|
|
109
|
+
|
|
110
|
+
# Only calculate status icon if we have used_percent data
|
|
111
|
+
if used_percent is not None:
|
|
112
|
+
status_icon = (
|
|
113
|
+
"🔴"
|
|
114
|
+
if used_percent > 90
|
|
115
|
+
else "🟡"
|
|
116
|
+
if used_percent > 75
|
|
117
|
+
else "🟢"
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
status_icon = ""
|
|
121
|
+
|
|
122
|
+
report_sections.append(f"### {file_type} File: {logical_name}")
|
|
123
|
+
report_sections.append(f"- **Size**: {size_mb:,.1f} MB")
|
|
124
|
+
if used_mb is not None and used_percent is not None:
|
|
125
|
+
report_sections.append(
|
|
126
|
+
f"- **Used**: {used_mb:,.1f} MB ({used_percent:.1f}%) {status_icon}"
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
report_sections.append("- **Used**: N/A (FILESTREAM file)")
|
|
130
|
+
report_sections.append(f"- **Max Size**: {max_size}")
|
|
131
|
+
report_sections.append(f"- **Growth**: {growth}")
|
|
132
|
+
report_sections.append("")
|
|
133
|
+
else:
|
|
134
|
+
report_sections.append("No file details available")
|
|
135
|
+
|
|
136
|
+
# Growth Trend Analysis
|
|
137
|
+
report_sections.append("## Storage Growth Analysis")
|
|
138
|
+
growth_data = storage_data.get("growth_trend", {})
|
|
139
|
+
if "error" in growth_data:
|
|
140
|
+
report_sections.append(
|
|
141
|
+
f"⚠️ **Growth analysis unavailable:** {growth_data['error']}"
|
|
142
|
+
)
|
|
143
|
+
elif growth_data.get("growth_analysis"):
|
|
144
|
+
analysis = growth_data["growth_analysis"]
|
|
145
|
+
total_growth = analysis.get("total_growth_mb", 0) or 0
|
|
146
|
+
growth_percent = analysis.get("growth_percent", 0) or 0
|
|
147
|
+
days_analyzed = analysis.get("days_analyzed", 0) or 0
|
|
148
|
+
daily_growth = analysis.get("avg_daily_growth_mb", 0) or 0
|
|
149
|
+
|
|
150
|
+
growth_icon = (
|
|
151
|
+
"🔴" if daily_growth > 100 else "🟡" if daily_growth > 50 else "🟢"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
report_sections.append(f"- **Analysis Period**: {days_analyzed} days")
|
|
155
|
+
report_sections.append(
|
|
156
|
+
f"- **Total Growth**: {total_growth:,.1f} MB ({growth_percent:.1f}%)"
|
|
157
|
+
)
|
|
158
|
+
report_sections.append(
|
|
159
|
+
f"- **Daily Average Growth**: {daily_growth:,.1f} MB {growth_icon}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Growth projection
|
|
163
|
+
if daily_growth > 0:
|
|
164
|
+
days_to_double = (
|
|
165
|
+
(summary.get("total_database_size_mb", 0) / daily_growth)
|
|
166
|
+
if daily_growth > 0
|
|
167
|
+
else 0
|
|
168
|
+
)
|
|
169
|
+
report_sections.append(
|
|
170
|
+
f"- **Projected to Double**: {days_to_double:,.0f} days"
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
report_sections.append(
|
|
174
|
+
"Growth analysis requires backup history (not available)"
|
|
175
|
+
)
|
|
176
|
+
report_sections.append("")
|
|
177
|
+
|
|
178
|
+
# Top Tables by Space Usage
|
|
179
|
+
report_sections.append(f"## Top {top_tables} Tables by Space Usage")
|
|
180
|
+
table_usage = storage_data.get("table_usage", [])
|
|
181
|
+
if table_usage:
|
|
182
|
+
report_sections.append("")
|
|
183
|
+
for i, table in enumerate(table_usage[:top_tables], 1):
|
|
184
|
+
schema_name = table.get("schema_name", "unknown")
|
|
185
|
+
table_name = table.get("table_name", "unknown")
|
|
186
|
+
total_space = table.get("total_space_mb", 0)
|
|
187
|
+
row_count = table.get("row_count", 0)
|
|
188
|
+
index_type = table.get("index_type", "unknown")
|
|
189
|
+
|
|
190
|
+
report_sections.append(f"### {i}. {schema_name}.{table_name}")
|
|
191
|
+
report_sections.append(f"- **Total Space**: {total_space:,.1f} MB")
|
|
192
|
+
report_sections.append(f"- **Row Count**: {row_count:,}")
|
|
193
|
+
report_sections.append(f"- **Index Type**: {index_type}")
|
|
194
|
+
report_sections.append("")
|
|
195
|
+
else:
|
|
196
|
+
report_sections.append("No table usage data available")
|
|
197
|
+
|
|
198
|
+
# Azure Monitor Storage Metrics
|
|
199
|
+
report_sections.append("## Azure Monitor Storage Metrics")
|
|
200
|
+
metrics = storage_data.get("metrics", {})
|
|
201
|
+
if "error" in metrics:
|
|
202
|
+
report_sections.append(f"⚠️ **Metrics unavailable:** {metrics['error']}")
|
|
203
|
+
else:
|
|
204
|
+
metric_found = False
|
|
205
|
+
for metric_name, metric_data in metrics.items():
|
|
206
|
+
if metric_data:
|
|
207
|
+
metric_found = True
|
|
208
|
+
recent_values = metric_data[-5:] # Last 5 data points
|
|
209
|
+
if recent_values:
|
|
210
|
+
avg_value = sum(
|
|
211
|
+
point.get("average", 0) or 0 for point in recent_values
|
|
212
|
+
) / len(recent_values)
|
|
213
|
+
max_value = max(
|
|
214
|
+
point.get("maximum", 0) or 0 for point in recent_values
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Format based on metric type
|
|
218
|
+
if "percent" in metric_name:
|
|
219
|
+
report_sections.append(
|
|
220
|
+
f"- **{metric_name}**: Avg {avg_value:.1f}%, Max {max_value:.1f}%"
|
|
221
|
+
)
|
|
222
|
+
else:
|
|
223
|
+
report_sections.append(
|
|
224
|
+
f"- **{metric_name}**: Avg {avg_value:,.1f}, Max {max_value:,.1f}"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
if not metric_found:
|
|
228
|
+
report_sections.append("No recent storage metric data available")
|
|
229
|
+
|
|
230
|
+
# TempDB Usage
|
|
231
|
+
tempdb_data = storage_data.get("tempdb", {})
|
|
232
|
+
if tempdb_data and "error" not in tempdb_data:
|
|
233
|
+
report_sections.append("")
|
|
234
|
+
report_sections.append("## TempDB Usage")
|
|
235
|
+
for metric_type, data in tempdb_data.items():
|
|
236
|
+
if isinstance(data, dict):
|
|
237
|
+
used_percent = data.get("used_percent", 0) or 0
|
|
238
|
+
status_icon = (
|
|
239
|
+
"🔴"
|
|
240
|
+
if used_percent > 90
|
|
241
|
+
else "🟡"
|
|
242
|
+
if used_percent > 75
|
|
243
|
+
else "🟢"
|
|
244
|
+
)
|
|
245
|
+
report_sections.append(
|
|
246
|
+
f"- **{metric_type}**: {data.get('used_size_mb', 0) or 0:,.1f} MB / {data.get('total_size_mb', 0) or 0:,.1f} MB ({used_percent:.1f}%) {status_icon}"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return "\n".join(report_sections)
|
|
250
|
+
|
|
251
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
252
|
+
try:
|
|
253
|
+
hours_back = params.get("hours_back", 24)
|
|
254
|
+
top_tables = params.get("top_tables", 20)
|
|
255
|
+
|
|
256
|
+
db_config = self.toolset.database_config()
|
|
257
|
+
|
|
258
|
+
# Create storage analysis API client
|
|
259
|
+
api_client = self.toolset.api_client()
|
|
260
|
+
storage_api = StorageAnalysisAPI(
|
|
261
|
+
credential=api_client.credential,
|
|
262
|
+
subscription_id=db_config.subscription_id,
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Gather storage data
|
|
266
|
+
storage_data: Dict[str, Any] = {}
|
|
267
|
+
|
|
268
|
+
# Get storage summary
|
|
269
|
+
storage_data["summary"] = storage_api.get_storage_summary(
|
|
270
|
+
db_config.server_name, db_config.database_name
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Get file details
|
|
274
|
+
storage_data["file_details"] = storage_api.get_database_size_details(
|
|
275
|
+
db_config.server_name, db_config.database_name
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Get table space usage
|
|
279
|
+
storage_data["table_usage"] = storage_api.get_table_space_usage(
|
|
280
|
+
db_config.server_name, db_config.database_name, top_tables
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Get growth trend
|
|
284
|
+
storage_data["growth_trend"] = storage_api.get_storage_growth_trend(
|
|
285
|
+
db_config.server_name, db_config.database_name
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Get Azure Monitor storage metrics
|
|
289
|
+
storage_data["metrics"] = storage_api.get_storage_metrics(
|
|
290
|
+
db_config.resource_group,
|
|
291
|
+
db_config.server_name,
|
|
292
|
+
db_config.database_name,
|
|
293
|
+
hours_back,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Get TempDB usage
|
|
297
|
+
storage_data["tempdb"] = storage_api.get_tempdb_usage(
|
|
298
|
+
db_config.server_name, db_config.database_name
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Build the formatted report
|
|
302
|
+
report_text = self._build_storage_report(
|
|
303
|
+
db_config, storage_data, hours_back, top_tables
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
return StructuredToolResult(
|
|
307
|
+
status=ToolResultStatus.SUCCESS,
|
|
308
|
+
data=report_text,
|
|
309
|
+
params=params,
|
|
310
|
+
)
|
|
311
|
+
except Exception as e:
|
|
312
|
+
error_msg = f"Failed to generate storage report: {str(e)}"
|
|
313
|
+
logging.error(error_msg)
|
|
314
|
+
return StructuredToolResult(
|
|
315
|
+
status=ToolResultStatus.ERROR,
|
|
316
|
+
error=error_msg,
|
|
317
|
+
params=params,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
321
|
+
db_config = self.toolset.database_config()
|
|
322
|
+
return f"Analyzed database storage for database {db_config.server_name}/{db_config.database_name}"
|
|
323
|
+
|
|
324
|
+
@staticmethod
|
|
325
|
+
def validate_config(
|
|
326
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
327
|
+
) -> Tuple[bool, str]:
|
|
328
|
+
errors = []
|
|
329
|
+
|
|
330
|
+
# Create storage analysis API client for validation
|
|
331
|
+
storage_api = StorageAnalysisAPI(
|
|
332
|
+
credential=api_client.credential,
|
|
333
|
+
subscription_id=database_config.subscription_id,
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# Test SQL database connection (storage queries)
|
|
337
|
+
try:
|
|
338
|
+
storage_api.get_storage_summary(
|
|
339
|
+
database_config.server_name, database_config.database_name
|
|
340
|
+
)
|
|
341
|
+
except Exception as e:
|
|
342
|
+
error_msg = str(e)
|
|
343
|
+
if "authentication" in error_msg.lower() or "login" in error_msg.lower():
|
|
344
|
+
errors.append(f"SQL database authentication failed: {error_msg}")
|
|
345
|
+
elif (
|
|
346
|
+
"permission" in error_msg.lower()
|
|
347
|
+
or "authorization" in error_msg.lower()
|
|
348
|
+
):
|
|
349
|
+
errors.append(f"SQL database permissions insufficient: {error_msg}")
|
|
350
|
+
else:
|
|
351
|
+
errors.append(f"SQL database connection failed: {error_msg}")
|
|
352
|
+
|
|
353
|
+
# Test Azure Monitor API access (storage metrics)
|
|
354
|
+
try:
|
|
355
|
+
storage_api.get_storage_metrics(
|
|
356
|
+
database_config.resource_group,
|
|
357
|
+
database_config.server_name,
|
|
358
|
+
database_config.database_name,
|
|
359
|
+
1, # Test with 1 hour
|
|
360
|
+
)
|
|
361
|
+
except Exception as e:
|
|
362
|
+
error_msg = str(e)
|
|
363
|
+
if (
|
|
364
|
+
"authorization" in error_msg.lower()
|
|
365
|
+
or "permission" in error_msg.lower()
|
|
366
|
+
):
|
|
367
|
+
errors.append(f"Azure Monitor API access denied: {error_msg}")
|
|
368
|
+
else:
|
|
369
|
+
errors.append(f"Azure Monitor API connection failed: {error_msg}")
|
|
370
|
+
|
|
371
|
+
if errors:
|
|
372
|
+
return False, "\n".join(errors)
|
|
373
|
+
return True, ""
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
from holmes.core.tools import StructuredToolResult, ToolResultStatus
|
|
6
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
7
|
+
BaseAzureSQLTool,
|
|
8
|
+
BaseAzureSQLToolset,
|
|
9
|
+
AzureSQLDatabaseConfig,
|
|
10
|
+
)
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
12
|
+
from holmes.plugins.toolsets.azure_sql.apis.alert_monitoring_api import (
|
|
13
|
+
AlertMonitoringAPI,
|
|
14
|
+
)
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GetActiveAlerts(BaseAzureSQLTool):
|
|
19
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
20
|
+
super().__init__(
|
|
21
|
+
name="get_active_alerts",
|
|
22
|
+
description="Retrieves currently active Azure Monitor alerts for the SQL database and server. Use this to identify ongoing issues, performance problems, and service health alerts that need immediate attention.",
|
|
23
|
+
parameters={},
|
|
24
|
+
toolset=toolset,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def _build_alerts_report(
|
|
28
|
+
self, db_config: AzureSQLDatabaseConfig, alerts_data: Dict, alert_type: str
|
|
29
|
+
) -> str:
|
|
30
|
+
"""Build the formatted alerts report from gathered data."""
|
|
31
|
+
report_sections = []
|
|
32
|
+
|
|
33
|
+
# Header
|
|
34
|
+
report_sections.append(
|
|
35
|
+
f"# Azure SQL Database {alert_type.title()} Alerts Report"
|
|
36
|
+
)
|
|
37
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
38
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
39
|
+
report_sections.append(f"**Resource Group:** {db_config.resource_group}")
|
|
40
|
+
report_sections.append(
|
|
41
|
+
f"**Generated:** {datetime.now(timezone.utc).isoformat()}"
|
|
42
|
+
)
|
|
43
|
+
report_sections.append("")
|
|
44
|
+
|
|
45
|
+
# Summary
|
|
46
|
+
total_alerts = alerts_data.get("total_count", 0)
|
|
47
|
+
active_alerts = alerts_data.get("active_alerts", [])
|
|
48
|
+
|
|
49
|
+
report_sections.append("## Summary")
|
|
50
|
+
if total_alerts == 0:
|
|
51
|
+
report_sections.append("✅ **No active alerts** - System appears healthy")
|
|
52
|
+
else:
|
|
53
|
+
severity_counts: dict = {}
|
|
54
|
+
scope_counts: dict = {}
|
|
55
|
+
for alert in active_alerts:
|
|
56
|
+
severity = alert.get("severity", "Unknown")
|
|
57
|
+
scope = alert.get("scope", "Unknown")
|
|
58
|
+
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
|
59
|
+
scope_counts[scope] = scope_counts.get(scope, 0) + 1
|
|
60
|
+
|
|
61
|
+
report_sections.append(f"🚨 **{total_alerts} active alerts detected**")
|
|
62
|
+
|
|
63
|
+
# Severity breakdown
|
|
64
|
+
if severity_counts:
|
|
65
|
+
report_sections.append("### Severity Breakdown:")
|
|
66
|
+
for severity, count in sorted(severity_counts.items()):
|
|
67
|
+
icon = (
|
|
68
|
+
"🔴"
|
|
69
|
+
if severity in ["Sev0", "Critical"]
|
|
70
|
+
else "🟡"
|
|
71
|
+
if severity in ["Sev1", "Error"]
|
|
72
|
+
else "🟢"
|
|
73
|
+
)
|
|
74
|
+
report_sections.append(f"- **{severity}**: {count} alerts {icon}")
|
|
75
|
+
|
|
76
|
+
# Scope breakdown
|
|
77
|
+
if scope_counts:
|
|
78
|
+
report_sections.append("### Scope Breakdown:")
|
|
79
|
+
for scope, count in sorted(scope_counts.items()):
|
|
80
|
+
report_sections.append(f"- **{scope.title()}**: {count} alerts")
|
|
81
|
+
|
|
82
|
+
report_sections.append("")
|
|
83
|
+
|
|
84
|
+
# Alert Details
|
|
85
|
+
if active_alerts:
|
|
86
|
+
report_sections.append("## Active Alerts Details")
|
|
87
|
+
|
|
88
|
+
# Sort by severity (most critical first)
|
|
89
|
+
severity_order = {
|
|
90
|
+
"Sev0": 0,
|
|
91
|
+
"Critical": 0,
|
|
92
|
+
"Sev1": 1,
|
|
93
|
+
"Error": 1,
|
|
94
|
+
"Sev2": 2,
|
|
95
|
+
"Warning": 2,
|
|
96
|
+
"Sev3": 3,
|
|
97
|
+
"Informational": 3,
|
|
98
|
+
}
|
|
99
|
+
active_alerts.sort(
|
|
100
|
+
key=lambda x: severity_order.get(x.get("severity", "Unknown"), 99)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
for i, alert in enumerate(active_alerts, 1):
|
|
104
|
+
alert_id = alert.get("id", "Unknown")
|
|
105
|
+
name = alert.get("name", "Unknown Alert")
|
|
106
|
+
description = alert.get("description", "No description available")
|
|
107
|
+
severity = alert.get("severity", "Unknown")
|
|
108
|
+
state = alert.get("state", "Unknown")
|
|
109
|
+
fired_time = alert.get("fired_time", "Unknown")
|
|
110
|
+
scope = alert.get("scope", "Unknown")
|
|
111
|
+
resource_type = alert.get("resource_type", "Unknown")
|
|
112
|
+
|
|
113
|
+
# Format severity with icon
|
|
114
|
+
severity_icon = (
|
|
115
|
+
"🔴"
|
|
116
|
+
if severity in ["Sev0", "Critical"]
|
|
117
|
+
else "🟡"
|
|
118
|
+
if severity in ["Sev1", "Error"]
|
|
119
|
+
else "🟢"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
report_sections.append(f"### Alert #{i}: {name}")
|
|
123
|
+
report_sections.append(f"- **Severity**: {severity} {severity_icon}")
|
|
124
|
+
report_sections.append(f"- **State**: {state}")
|
|
125
|
+
report_sections.append(f"- **Scope**: {scope.title()}")
|
|
126
|
+
report_sections.append(f"- **Resource Type**: {resource_type}")
|
|
127
|
+
report_sections.append(f"- **Fired Time**: {fired_time}")
|
|
128
|
+
report_sections.append(f"- **Alert ID**: {alert_id}")
|
|
129
|
+
report_sections.append(f"- **Description**: {description}")
|
|
130
|
+
report_sections.append("")
|
|
131
|
+
|
|
132
|
+
# Resource Information
|
|
133
|
+
report_sections.append("## Resource Information")
|
|
134
|
+
report_sections.append(
|
|
135
|
+
f"- **Database Resource ID**: {alerts_data.get('database_resource_id', 'N/A')}"
|
|
136
|
+
)
|
|
137
|
+
report_sections.append(
|
|
138
|
+
f"- **Server Resource ID**: {alerts_data.get('server_resource_id', 'N/A')}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Metadata
|
|
142
|
+
method = alerts_data.get("method")
|
|
143
|
+
if method:
|
|
144
|
+
report_sections.append(f"- **Data Source**: {method}")
|
|
145
|
+
|
|
146
|
+
return "\n".join(report_sections)
|
|
147
|
+
|
|
148
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
149
|
+
try:
|
|
150
|
+
db_config = self.toolset.database_config()
|
|
151
|
+
api_client = self.toolset.api_client()
|
|
152
|
+
|
|
153
|
+
# Create alert monitoring API client
|
|
154
|
+
alert_api = AlertMonitoringAPI(
|
|
155
|
+
credential=api_client.credential,
|
|
156
|
+
subscription_id=db_config.subscription_id,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Get active alerts
|
|
160
|
+
alerts_data = alert_api.get_active_alerts(
|
|
161
|
+
db_config.resource_group,
|
|
162
|
+
db_config.server_name,
|
|
163
|
+
db_config.database_name,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Check for errors
|
|
167
|
+
if "error" in alerts_data:
|
|
168
|
+
return StructuredToolResult(
|
|
169
|
+
status=ToolResultStatus.ERROR,
|
|
170
|
+
error=alerts_data["error"],
|
|
171
|
+
params=params,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Build the formatted report
|
|
175
|
+
report_text = self._build_alerts_report(db_config, alerts_data, "active")
|
|
176
|
+
|
|
177
|
+
return StructuredToolResult(
|
|
178
|
+
status=ToolResultStatus.SUCCESS,
|
|
179
|
+
data=report_text,
|
|
180
|
+
params=params,
|
|
181
|
+
)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
error_msg = f"Failed to retrieve active alerts: {str(e)}"
|
|
184
|
+
logging.error(error_msg)
|
|
185
|
+
return StructuredToolResult(
|
|
186
|
+
status=ToolResultStatus.ERROR,
|
|
187
|
+
error=error_msg,
|
|
188
|
+
params=params,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
192
|
+
db_config = self.toolset.database_config()
|
|
193
|
+
return f"Fetch active alerts for database {db_config.server_name}/{db_config.database_name}"
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def validate_config(
|
|
197
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
198
|
+
) -> Tuple[bool, str]:
|
|
199
|
+
errors = []
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
# Test alert monitoring API access
|
|
203
|
+
alert_api = AlertMonitoringAPI(
|
|
204
|
+
credential=api_client.credential,
|
|
205
|
+
subscription_id=database_config.subscription_id,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Test getting active alerts
|
|
209
|
+
alerts_data = alert_api.get_active_alerts(
|
|
210
|
+
database_config.resource_group,
|
|
211
|
+
database_config.server_name,
|
|
212
|
+
database_config.database_name,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if "error" in alerts_data:
|
|
216
|
+
error_msg = alerts_data["error"]
|
|
217
|
+
if (
|
|
218
|
+
"authorization" in error_msg.lower()
|
|
219
|
+
or "permission" in error_msg.lower()
|
|
220
|
+
):
|
|
221
|
+
errors.append(f"Alert monitoring access denied: {error_msg}")
|
|
222
|
+
else:
|
|
223
|
+
errors.append(f"Alert monitoring connection failed: {error_msg}")
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
error_msg = str(e)
|
|
227
|
+
if (
|
|
228
|
+
"authorization" in error_msg.lower()
|
|
229
|
+
or "permission" in error_msg.lower()
|
|
230
|
+
):
|
|
231
|
+
errors.append(f"Alert monitoring API access denied: {error_msg}")
|
|
232
|
+
else:
|
|
233
|
+
errors.append(f"Alert monitoring API connection failed: {error_msg}")
|
|
234
|
+
|
|
235
|
+
if errors:
|
|
236
|
+
return False, "\n".join(errors)
|
|
237
|
+
return True, ""
|