holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, Tuple
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
6
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
7
|
+
BaseAzureSQLTool,
|
|
8
|
+
BaseAzureSQLToolset,
|
|
9
|
+
AzureSQLDatabaseConfig,
|
|
10
|
+
)
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
12
|
+
from holmes.plugins.toolsets.azure_sql.apis.connection_failure_api import (
|
|
13
|
+
ConnectionFailureAPI,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AnalyzeConnectionFailures(BaseAzureSQLTool):
|
|
18
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
19
|
+
super().__init__(
|
|
20
|
+
name="analyze_connection_failures",
|
|
21
|
+
description="Analyzes connection failures, firewall blocks, and connection patterns for Azure SQL Database. Use this to investigate connection issues, authentication problems, and network connectivity problems.",
|
|
22
|
+
parameters={
|
|
23
|
+
"hours_back": ToolParameter(
|
|
24
|
+
description="Number of hours to look back for connection failure analysis (default: 24, max: 168)",
|
|
25
|
+
type="integer",
|
|
26
|
+
required=False,
|
|
27
|
+
),
|
|
28
|
+
},
|
|
29
|
+
toolset=toolset,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def _build_connection_failures_report(
|
|
33
|
+
self, db_config: AzureSQLDatabaseConfig, analysis_data: Dict, hours_back: int
|
|
34
|
+
) -> str:
|
|
35
|
+
"""Build the formatted connection failures report from gathered data."""
|
|
36
|
+
report_sections = []
|
|
37
|
+
|
|
38
|
+
# Header
|
|
39
|
+
report_sections.append("# Azure SQL Database Connection Failures Analysis")
|
|
40
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
41
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
42
|
+
report_sections.append(f"**Resource Group:** {db_config.resource_group}")
|
|
43
|
+
report_sections.append(f"**Analysis Period:** {hours_back} hours")
|
|
44
|
+
report_sections.append(
|
|
45
|
+
f"**Generated:** {datetime.now(timezone.utc).isoformat()}"
|
|
46
|
+
)
|
|
47
|
+
report_sections.append("")
|
|
48
|
+
|
|
49
|
+
# Summary
|
|
50
|
+
analysis = analysis_data.get("analysis", {})
|
|
51
|
+
summary = analysis.get("summary", {})
|
|
52
|
+
issues = analysis.get("issues_detected", [])
|
|
53
|
+
recommendations = analysis.get("recommendations", [])
|
|
54
|
+
|
|
55
|
+
report_sections.append("## Executive Summary")
|
|
56
|
+
if summary.get("status") == "healthy":
|
|
57
|
+
report_sections.append(
|
|
58
|
+
"✅ **Status: HEALTHY** - No significant connection issues detected"
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
report_sections.append(
|
|
62
|
+
"⚠️ **Status: ISSUES DETECTED** - Connection problems identified"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if summary.get("message"):
|
|
66
|
+
report_sections.append(f"- {summary['message']}")
|
|
67
|
+
report_sections.append("")
|
|
68
|
+
|
|
69
|
+
# Issues Detected
|
|
70
|
+
if issues:
|
|
71
|
+
report_sections.append("## Issues Detected")
|
|
72
|
+
for issue in issues:
|
|
73
|
+
report_sections.append(f"- {issue}")
|
|
74
|
+
report_sections.append("")
|
|
75
|
+
|
|
76
|
+
# Metrics Analysis
|
|
77
|
+
metrics_analysis = analysis.get("metrics_analysis", {})
|
|
78
|
+
if metrics_analysis:
|
|
79
|
+
report_sections.append("## Connection Metrics Analysis")
|
|
80
|
+
|
|
81
|
+
# Connection failures
|
|
82
|
+
if "connection_failures" in metrics_analysis:
|
|
83
|
+
failures = metrics_analysis["connection_failures"]
|
|
84
|
+
report_sections.append("### Connection Failures")
|
|
85
|
+
report_sections.append(
|
|
86
|
+
f"- **Total Failed Connections:** {int(failures.get('total_failed_connections', 0))}"
|
|
87
|
+
)
|
|
88
|
+
report_sections.append(
|
|
89
|
+
f"- **Peak Failures (1 hour):** {int(failures.get('max_failures_per_hour', 0))}"
|
|
90
|
+
)
|
|
91
|
+
report_sections.append(
|
|
92
|
+
f"- **Trend:** {failures.get('failure_trend', 'Unknown').title()}"
|
|
93
|
+
)
|
|
94
|
+
report_sections.append("")
|
|
95
|
+
|
|
96
|
+
# Successful connections
|
|
97
|
+
if "successful_connections" in metrics_analysis:
|
|
98
|
+
successful = metrics_analysis["successful_connections"]
|
|
99
|
+
report_sections.append("### Successful Connections")
|
|
100
|
+
report_sections.append(
|
|
101
|
+
f"- **Total Successful Connections:** {int(successful.get('total_successful_connections', 0))}"
|
|
102
|
+
)
|
|
103
|
+
report_sections.append("")
|
|
104
|
+
|
|
105
|
+
# Failure rate
|
|
106
|
+
if "failure_rate_percent" in metrics_analysis:
|
|
107
|
+
failure_rate = metrics_analysis["failure_rate_percent"]
|
|
108
|
+
status_icon = (
|
|
109
|
+
"🔴" if failure_rate > 5 else "🟡" if failure_rate > 1 else "🟢"
|
|
110
|
+
)
|
|
111
|
+
report_sections.append("### Overall Connection Health")
|
|
112
|
+
report_sections.append(
|
|
113
|
+
f"- **Failure Rate:** {failure_rate}% {status_icon}"
|
|
114
|
+
)
|
|
115
|
+
report_sections.append("")
|
|
116
|
+
|
|
117
|
+
# Activity Log Events
|
|
118
|
+
activity_data = analysis_data.get("activity_events", {})
|
|
119
|
+
if activity_data.get("events"):
|
|
120
|
+
report_sections.append("## Activity Log Events")
|
|
121
|
+
report_sections.append(
|
|
122
|
+
f"- **Total Events:** {activity_data.get('total_events', 0)}"
|
|
123
|
+
)
|
|
124
|
+
report_sections.append(
|
|
125
|
+
f"- **Connection-Related Events:** {activity_data.get('connection_related_events', 0)}"
|
|
126
|
+
)
|
|
127
|
+
report_sections.append(
|
|
128
|
+
f"- **Error Events:** {activity_data.get('error_events', 0)}"
|
|
129
|
+
)
|
|
130
|
+
report_sections.append(
|
|
131
|
+
f"- **Warning Events:** {activity_data.get('warning_events', 0)}"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Show recent critical events
|
|
135
|
+
critical_events = [
|
|
136
|
+
e
|
|
137
|
+
for e in activity_data["events"][:10]
|
|
138
|
+
if e["level"] in ["Error", "Critical"]
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
if critical_events:
|
|
142
|
+
report_sections.append("")
|
|
143
|
+
report_sections.append("### Recent Critical Events")
|
|
144
|
+
for event in critical_events:
|
|
145
|
+
report_sections.append(
|
|
146
|
+
f"- **{event['timestamp']}** - {event['operation_name']}"
|
|
147
|
+
)
|
|
148
|
+
report_sections.append(f" - Level: {event['level']}")
|
|
149
|
+
report_sections.append(f" - Status: {event['status']}")
|
|
150
|
+
if (
|
|
151
|
+
event.get("description")
|
|
152
|
+
and event["description"] != "No description"
|
|
153
|
+
):
|
|
154
|
+
report_sections.append(
|
|
155
|
+
f" - Description: {event['description']}"
|
|
156
|
+
)
|
|
157
|
+
report_sections.append("")
|
|
158
|
+
|
|
159
|
+
# Detailed Metrics Data
|
|
160
|
+
connection_metrics = analysis_data.get("connection_metrics", {})
|
|
161
|
+
if connection_metrics:
|
|
162
|
+
report_sections.append("## Detailed Metrics")
|
|
163
|
+
|
|
164
|
+
for metric_name, metric_data in connection_metrics.items():
|
|
165
|
+
if metric_data.get("values") and not metric_data.get("error"):
|
|
166
|
+
values = metric_data["values"]
|
|
167
|
+
if values:
|
|
168
|
+
total_value = sum(dp.get("total", 0) or 0 for dp in values)
|
|
169
|
+
max_value = max(
|
|
170
|
+
(dp.get("maximum", 0) or 0 for dp in values), default=0
|
|
171
|
+
)
|
|
172
|
+
avg_value = (
|
|
173
|
+
sum(dp.get("average", 0) or 0 for dp in values)
|
|
174
|
+
/ len(values)
|
|
175
|
+
if values
|
|
176
|
+
else 0
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
report_sections.append(
|
|
180
|
+
f"### {metric_name.replace('_', ' ').title()}"
|
|
181
|
+
)
|
|
182
|
+
report_sections.append(f"- **Total:** {int(total_value)}")
|
|
183
|
+
report_sections.append(f"- **Peak (1 hour):** {int(max_value)}")
|
|
184
|
+
report_sections.append(f"- **Average:** {avg_value:.1f}")
|
|
185
|
+
report_sections.append(f"- **Data Points:** {len(values)}")
|
|
186
|
+
report_sections.append("")
|
|
187
|
+
|
|
188
|
+
# Recommendations
|
|
189
|
+
if recommendations:
|
|
190
|
+
report_sections.append("## Recommendations")
|
|
191
|
+
for rec in recommendations:
|
|
192
|
+
report_sections.append(f"- {rec}")
|
|
193
|
+
report_sections.append("")
|
|
194
|
+
|
|
195
|
+
# Resource Information
|
|
196
|
+
report_sections.append("## Resource Information")
|
|
197
|
+
report_sections.append(
|
|
198
|
+
f"- **Database Resource ID:** {analysis_data.get('database_resource_id', 'N/A')}"
|
|
199
|
+
)
|
|
200
|
+
report_sections.append(
|
|
201
|
+
f"- **Server Resource ID:** {analysis_data.get('server_resource_id', 'N/A')}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
time_range = analysis_data.get("time_range", {})
|
|
205
|
+
if time_range:
|
|
206
|
+
report_sections.append(
|
|
207
|
+
f"- **Analysis Start:** {time_range.get('start', 'N/A')}"
|
|
208
|
+
)
|
|
209
|
+
report_sections.append(
|
|
210
|
+
f"- **Analysis End:** {time_range.get('end', 'N/A')}"
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
return "\n".join(report_sections)
|
|
214
|
+
|
|
215
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
216
|
+
try:
|
|
217
|
+
# Get configuration
|
|
218
|
+
db_config = self.toolset.database_config()
|
|
219
|
+
api_client = self.toolset.api_client()
|
|
220
|
+
|
|
221
|
+
# Parse parameters
|
|
222
|
+
hours_back = params.get("hours_back", 24)
|
|
223
|
+
hours_back = max(1, min(hours_back, 168)) # Limit between 1 and 168 hours
|
|
224
|
+
|
|
225
|
+
# Create connection failure API client
|
|
226
|
+
connection_api = ConnectionFailureAPI(
|
|
227
|
+
credential=api_client.credential,
|
|
228
|
+
subscription_id=db_config.subscription_id,
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Analyze connection failures
|
|
232
|
+
analysis_data = connection_api.analyze_connection_failures(
|
|
233
|
+
db_config.resource_group,
|
|
234
|
+
db_config.server_name,
|
|
235
|
+
db_config.database_name,
|
|
236
|
+
hours_back,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Check for errors
|
|
240
|
+
if "error" in analysis_data:
|
|
241
|
+
return StructuredToolResult(
|
|
242
|
+
status=ToolResultStatus.ERROR,
|
|
243
|
+
error=analysis_data["error"],
|
|
244
|
+
params=params,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Build the formatted report
|
|
248
|
+
report_text = self._build_connection_failures_report(
|
|
249
|
+
db_config, analysis_data, hours_back
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
return StructuredToolResult(
|
|
253
|
+
status=ToolResultStatus.SUCCESS,
|
|
254
|
+
data=report_text,
|
|
255
|
+
params=params,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logging.error(
|
|
260
|
+
f"Error in analyze_connection_failures: {str(e)}", exc_info=True
|
|
261
|
+
)
|
|
262
|
+
return StructuredToolResult(
|
|
263
|
+
status=ToolResultStatus.ERROR,
|
|
264
|
+
error=f"Failed to analyze connection failures: {str(e)}",
|
|
265
|
+
params=params,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
269
|
+
db_config = self.toolset.database_config()
|
|
270
|
+
hours_back = params.get("hours_back", 24)
|
|
271
|
+
return f"Analyze connection failures for {db_config.server_name}/{db_config.database_name} over {hours_back} hours"
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def validate_config(
|
|
275
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
276
|
+
) -> Tuple[bool, str]:
|
|
277
|
+
errors = []
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# Test connection failure API access
|
|
281
|
+
connection_api = ConnectionFailureAPI(
|
|
282
|
+
credential=api_client.credential,
|
|
283
|
+
subscription_id=database_config.subscription_id,
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Test getting connection metrics (try a minimal request)
|
|
287
|
+
test_analysis = connection_api.analyze_connection_failures(
|
|
288
|
+
database_config.resource_group,
|
|
289
|
+
database_config.server_name,
|
|
290
|
+
database_config.database_name,
|
|
291
|
+
hours_back=1, # Minimal test
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if "error" in test_analysis:
|
|
295
|
+
error_msg = test_analysis["error"]
|
|
296
|
+
if (
|
|
297
|
+
"authorization" in error_msg.lower()
|
|
298
|
+
or "permission" in error_msg.lower()
|
|
299
|
+
):
|
|
300
|
+
errors.append(
|
|
301
|
+
f"Connection failure monitoring access denied: {error_msg}"
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
errors.append(
|
|
305
|
+
f"Connection failure monitoring API failed: {error_msg}"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
error_msg = str(e)
|
|
310
|
+
if (
|
|
311
|
+
"authorization" in error_msg.lower()
|
|
312
|
+
or "permission" in error_msg.lower()
|
|
313
|
+
):
|
|
314
|
+
errors.append(
|
|
315
|
+
f"Connection failure monitoring API access denied: {error_msg}"
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
errors.append(
|
|
319
|
+
f"Connection failure monitoring API connection failed: {error_msg}"
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
if errors:
|
|
323
|
+
return False, "\n".join(errors)
|
|
324
|
+
return True, ""
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Tuple
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
|
|
5
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
6
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
7
|
+
BaseAzureSQLTool,
|
|
8
|
+
BaseAzureSQLToolset,
|
|
9
|
+
AzureSQLDatabaseConfig,
|
|
10
|
+
)
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.apis.connection_monitoring_api import (
|
|
12
|
+
ConnectionMonitoringAPI,
|
|
13
|
+
)
|
|
14
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AnalyzeDatabaseConnections(BaseAzureSQLTool):
|
|
18
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
19
|
+
super().__init__(
|
|
20
|
+
name="analyze_database_connections",
|
|
21
|
+
description="Analyzes database connection patterns, active connections, and connection pool utilization. Use this to investigate connection-related issues, blocking sessions, and connection pool exhaustion.",
|
|
22
|
+
parameters={
|
|
23
|
+
"hours_back": ToolParameter(
|
|
24
|
+
description="Time window for metrics analysis in hours. Use 2 for recent activity, 24+ for trend analysis (default: 2)",
|
|
25
|
+
type="integer",
|
|
26
|
+
required=False,
|
|
27
|
+
),
|
|
28
|
+
},
|
|
29
|
+
toolset=toolset,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def _build_connection_report(
|
|
33
|
+
self, db_config: AzureSQLDatabaseConfig, connection_data: Dict, hours_back: int
|
|
34
|
+
) -> str:
|
|
35
|
+
"""Build the formatted connection report from gathered data."""
|
|
36
|
+
report_sections = []
|
|
37
|
+
|
|
38
|
+
# Header
|
|
39
|
+
report_sections.append("# Azure SQL Database Connection Report")
|
|
40
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
41
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
42
|
+
report_sections.append(f"**Analysis Period:** Last {hours_back} hours")
|
|
43
|
+
report_sections.append(
|
|
44
|
+
f"**Generated:** {datetime.now(timezone.utc).isoformat()}"
|
|
45
|
+
)
|
|
46
|
+
report_sections.append("")
|
|
47
|
+
|
|
48
|
+
# Connection Summary
|
|
49
|
+
report_sections.append("## Connection Summary")
|
|
50
|
+
summary = connection_data.get("summary", {})
|
|
51
|
+
if "error" in summary:
|
|
52
|
+
report_sections.append(
|
|
53
|
+
f"⚠️ **Error retrieving connection summary:** {summary['error']}"
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
total_conn = summary.get("total_connections", 0)
|
|
57
|
+
active_conn = summary.get("active_connections", 0)
|
|
58
|
+
idle_conn = summary.get("idle_connections", 0)
|
|
59
|
+
blocked_conn = summary.get("blocked_connections", 0)
|
|
60
|
+
|
|
61
|
+
report_sections.append(f"- **Total Connections**: {total_conn}")
|
|
62
|
+
report_sections.append(f"- **Active Connections**: {active_conn}")
|
|
63
|
+
report_sections.append(f"- **Idle Connections**: {idle_conn}")
|
|
64
|
+
if blocked_conn > 0:
|
|
65
|
+
report_sections.append(f"- **🚨 Blocked Connections**: {blocked_conn}")
|
|
66
|
+
else:
|
|
67
|
+
report_sections.append(f"- **Blocked Connections**: {blocked_conn}")
|
|
68
|
+
report_sections.append(
|
|
69
|
+
f"- **Unique Users**: {summary.get('unique_users', 0)}"
|
|
70
|
+
)
|
|
71
|
+
report_sections.append(
|
|
72
|
+
f"- **Unique Hosts**: {summary.get('unique_hosts', 0)}"
|
|
73
|
+
)
|
|
74
|
+
report_sections.append("")
|
|
75
|
+
|
|
76
|
+
# Connection Pool Statistics
|
|
77
|
+
report_sections.append("## Connection Pool Statistics")
|
|
78
|
+
pool_stats = connection_data.get("pool_stats", {})
|
|
79
|
+
if "error" in pool_stats:
|
|
80
|
+
report_sections.append(
|
|
81
|
+
f"⚠️ **Error retrieving pool stats:** {pool_stats['error']}"
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
for metric_name, metric_data in pool_stats.items():
|
|
85
|
+
if isinstance(metric_data, dict) and "value" in metric_data:
|
|
86
|
+
value = metric_data["value"]
|
|
87
|
+
unit = metric_data.get("unit", "")
|
|
88
|
+
report_sections.append(f"- **{metric_name}**: {value:,} {unit}")
|
|
89
|
+
report_sections.append("")
|
|
90
|
+
|
|
91
|
+
# Active Connections Detail
|
|
92
|
+
report_sections.append("## Active Connections Detail")
|
|
93
|
+
active_connections = connection_data.get("active_connections", [])
|
|
94
|
+
if active_connections:
|
|
95
|
+
active_count = len(
|
|
96
|
+
[
|
|
97
|
+
conn
|
|
98
|
+
for conn in active_connections
|
|
99
|
+
if conn.get("connection_status") == "Active"
|
|
100
|
+
]
|
|
101
|
+
)
|
|
102
|
+
report_sections.append(f"**{active_count} active connections found:**")
|
|
103
|
+
report_sections.append("")
|
|
104
|
+
|
|
105
|
+
for i, conn in enumerate(active_connections[:10], 1): # Show top 10
|
|
106
|
+
if conn.get("connection_status") == "Active":
|
|
107
|
+
login_name = conn.get("login_name", "Unknown")
|
|
108
|
+
host_name = conn.get("host_name", "Unknown")
|
|
109
|
+
status = conn.get("status", "Unknown")
|
|
110
|
+
cpu_time = conn.get("cpu_time", 0)
|
|
111
|
+
wait_type = conn.get("wait_type", "")
|
|
112
|
+
blocking_session = conn.get("blocking_session_id", 0)
|
|
113
|
+
|
|
114
|
+
report_sections.append(f"### Connection #{i}")
|
|
115
|
+
report_sections.append(f"- **User**: {login_name}@{host_name}")
|
|
116
|
+
report_sections.append(f"- **Status**: {status}")
|
|
117
|
+
report_sections.append(f"- **CPU Time**: {cpu_time:,} ms")
|
|
118
|
+
if wait_type:
|
|
119
|
+
report_sections.append(f"- **Wait Type**: {wait_type}")
|
|
120
|
+
if blocking_session and blocking_session > 0:
|
|
121
|
+
report_sections.append(
|
|
122
|
+
f"- **🚨 Blocked by Session**: {blocking_session}"
|
|
123
|
+
)
|
|
124
|
+
report_sections.append("")
|
|
125
|
+
else:
|
|
126
|
+
report_sections.append("No active connections found")
|
|
127
|
+
|
|
128
|
+
# Azure Monitor Metrics (if available)
|
|
129
|
+
report_sections.append("## Azure Monitor Connection Metrics")
|
|
130
|
+
metrics = connection_data.get("metrics", {})
|
|
131
|
+
if "error" in metrics:
|
|
132
|
+
report_sections.append(f"⚠️ **Metrics unavailable:** {metrics['error']}")
|
|
133
|
+
else:
|
|
134
|
+
for metric_name, metric_data in metrics.items():
|
|
135
|
+
if metric_data:
|
|
136
|
+
recent_values = metric_data[-5:] # Last 5 data points
|
|
137
|
+
if recent_values:
|
|
138
|
+
avg_value = sum(
|
|
139
|
+
point.get("average", 0) or 0 for point in recent_values
|
|
140
|
+
) / len(recent_values)
|
|
141
|
+
max_value = max(
|
|
142
|
+
point.get("maximum", 0) or 0 for point in recent_values
|
|
143
|
+
)
|
|
144
|
+
report_sections.append(
|
|
145
|
+
f"- **{metric_name}**: Avg {avg_value:.1f}, Max {max_value:.1f}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if not any(metrics.values()):
|
|
149
|
+
report_sections.append("No recent metric data available")
|
|
150
|
+
|
|
151
|
+
return "\n".join(report_sections)
|
|
152
|
+
|
|
153
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
154
|
+
try:
|
|
155
|
+
hours_back = params.get("hours_back", 2)
|
|
156
|
+
|
|
157
|
+
db_config = self.toolset.database_config()
|
|
158
|
+
|
|
159
|
+
# Create connection monitoring API client
|
|
160
|
+
api_client = self.toolset.api_client()
|
|
161
|
+
connection_api = ConnectionMonitoringAPI(
|
|
162
|
+
credential=api_client.credential,
|
|
163
|
+
subscription_id=db_config.subscription_id,
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Gather connection data
|
|
167
|
+
connection_data: Dict[str, Any] = {}
|
|
168
|
+
|
|
169
|
+
# Get connection summary
|
|
170
|
+
connection_data["summary"] = connection_api.get_connection_summary(
|
|
171
|
+
db_config.server_name, db_config.database_name
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Get active connections
|
|
175
|
+
connection_data["active_connections"] = (
|
|
176
|
+
connection_api.get_active_connections(
|
|
177
|
+
db_config.server_name, db_config.database_name
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Get connection pool stats
|
|
182
|
+
connection_data["pool_stats"] = connection_api.get_connection_pool_stats(
|
|
183
|
+
db_config.server_name, db_config.database_name
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Get Azure Monitor metrics
|
|
187
|
+
connection_data["metrics"] = connection_api.get_connection_metrics(
|
|
188
|
+
db_config.resource_group,
|
|
189
|
+
db_config.server_name,
|
|
190
|
+
db_config.database_name,
|
|
191
|
+
hours_back,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Build the formatted report
|
|
195
|
+
report_text = self._build_connection_report(
|
|
196
|
+
db_config, connection_data, hours_back
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return StructuredToolResult(
|
|
200
|
+
status=ToolResultStatus.SUCCESS,
|
|
201
|
+
data=report_text,
|
|
202
|
+
params=params,
|
|
203
|
+
)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
error_msg = f"Failed to generate connection report: {str(e)}"
|
|
206
|
+
logging.error(error_msg)
|
|
207
|
+
return StructuredToolResult(
|
|
208
|
+
status=ToolResultStatus.ERROR,
|
|
209
|
+
error=error_msg,
|
|
210
|
+
params=params,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
214
|
+
db_config = self.toolset.database_config()
|
|
215
|
+
return f"Analyze database connections for {db_config.server_name}/{db_config.database_name}"
|
|
216
|
+
|
|
217
|
+
@staticmethod
|
|
218
|
+
def validate_config(
|
|
219
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
220
|
+
) -> Tuple[bool, str]:
|
|
221
|
+
error = ""
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
# Test database advisors API access
|
|
225
|
+
api_client.get_database_advisors(
|
|
226
|
+
database_config.subscription_id,
|
|
227
|
+
database_config.resource_group,
|
|
228
|
+
database_config.server_name,
|
|
229
|
+
database_config.database_name,
|
|
230
|
+
)
|
|
231
|
+
except Exception as e:
|
|
232
|
+
error_msg = str(e)
|
|
233
|
+
if (
|
|
234
|
+
"authorization" in error_msg.lower()
|
|
235
|
+
or "permission" in error_msg.lower()
|
|
236
|
+
):
|
|
237
|
+
error = f"Database management API access denied: {error_msg}"
|
|
238
|
+
else:
|
|
239
|
+
error = f"Database management API connection failed: {error_msg}"
|
|
240
|
+
|
|
241
|
+
if error:
|
|
242
|
+
return False, error
|
|
243
|
+
return True, ""
|