holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Azure SQL Database Troubleshooting Guidelines
|
|
2
|
+
|
|
3
|
+
## Goal
|
|
4
|
+
Your primary goal when using these tools is to diagnose Azure SQL Database performance, health, and operational issues. Use these tools systematically to identify problems and provide actionable insights.
|
|
5
|
+
|
|
6
|
+
## Diagnostic Workflow
|
|
7
|
+
|
|
8
|
+
### 1. Start with Health and Alert Assessment
|
|
9
|
+
- **Always begin** with `analyze_database_health_status` to get overall database health
|
|
10
|
+
- **Immediately follow** with `get_active_alerts` to check for any active monitoring alerts
|
|
11
|
+
- Look for:
|
|
12
|
+
- Active operations that might indicate ongoing maintenance or issues
|
|
13
|
+
- Resource usage warnings (high CPU, memory, storage usage >90%)
|
|
14
|
+
- Active Azure Monitor alerts indicating problems
|
|
15
|
+
- Any error messages in operations or usage retrieval
|
|
16
|
+
|
|
17
|
+
### 2. Performance Analysis
|
|
18
|
+
- Use `analyze_database_performance` to examine:
|
|
19
|
+
- **Automatic tuning status** - check if desired and actual states match
|
|
20
|
+
- **Performance advisors** - look for active recommendations
|
|
21
|
+
- **Performance recommendations** - pay attention to high-impact suggestions
|
|
22
|
+
- Focus on recommendations with "High" impact and "Active" or "Pending" status
|
|
23
|
+
|
|
24
|
+
### 3. Query Performance Deep Dive
|
|
25
|
+
When performance issues are detected, use these tools to identify problematic queries:
|
|
26
|
+
- `get_top_cpu_queries` - Start here for CPU performance issues
|
|
27
|
+
- `get_slow_queries` - For response time problems
|
|
28
|
+
- `get_top_data_io_queries` - For storage I/O bottlenecks
|
|
29
|
+
- `get_top_log_io_queries` - For transaction log performance issues
|
|
30
|
+
|
|
31
|
+
**Query Analysis Best Practices:**
|
|
32
|
+
- Use `hours_back=24` for trend analysis, `hours_back=2` for recent issues
|
|
33
|
+
- Look for queries with high execution counts AND high resource usage
|
|
34
|
+
- Pay attention to query patterns that might indicate inefficient application logic
|
|
35
|
+
|
|
36
|
+
### 4. Alert and Monitoring Analysis
|
|
37
|
+
- **Always check alerts** with `get_active_alerts` to understand current system health
|
|
38
|
+
- Look for:
|
|
39
|
+
- Active Azure Monitor alerts related to the database
|
|
40
|
+
- Alert patterns and frequencies
|
|
41
|
+
- Critical or error-level alerts requiring immediate attention
|
|
42
|
+
- Use alert information to prioritize further investigation
|
|
43
|
+
|
|
44
|
+
### 5. Connection Issues Investigation
|
|
45
|
+
- Use `analyze_database_connections` for general connection analysis:
|
|
46
|
+
- Connection pool exhaustion
|
|
47
|
+
- Blocking sessions
|
|
48
|
+
- High concurrent user loads
|
|
49
|
+
- Use `analyze_connection_failures` for connection failure patterns:
|
|
50
|
+
- Authentication failures
|
|
51
|
+
- Firewall blocks
|
|
52
|
+
- Connection timeout issues
|
|
53
|
+
- Failed connection trends and analysis
|
|
54
|
+
- Look for blocked connections and identify blocking sessions
|
|
55
|
+
|
|
56
|
+
### 6. Storage Problems
|
|
57
|
+
- Use `analyze_database_storage` for:
|
|
58
|
+
- Storage capacity planning
|
|
59
|
+
- Growth rate analysis
|
|
60
|
+
- File-level storage issues
|
|
61
|
+
- TempDB usage problems
|
|
62
|
+
|
|
63
|
+
## Key Indicators to Flag
|
|
64
|
+
|
|
65
|
+
### Critical Issues (🚨)
|
|
66
|
+
- **Active critical alerts** (Sev0/Critical level) in monitoring
|
|
67
|
+
- **High connection failure rates** (>5% failed connections)
|
|
68
|
+
- **Firewall blocks** indicating unauthorized access attempts
|
|
69
|
+
- **Blocked connections** with blocking_session_id > 0
|
|
70
|
+
- **Storage usage > 90%** in any file
|
|
71
|
+
- **Active performance recommendations** with High impact
|
|
72
|
+
- **Resource alarms** (memory, disk space)
|
|
73
|
+
- **Auto-tuning mismatches** (desired ≠ actual state)
|
|
74
|
+
|
|
75
|
+
### Warning Signs (⚠️)
|
|
76
|
+
- **Warning-level alerts** (Sev1/Sev2) that may indicate developing issues
|
|
77
|
+
- **Connection failure trends** showing increasing failures over time
|
|
78
|
+
- **Storage usage 70-90%**
|
|
79
|
+
- **High daily growth rates** (>100MB/day)
|
|
80
|
+
- **Many concurrent connections** without proper pooling
|
|
81
|
+
- **Long-running queries** (>10 seconds average duration)
|
|
82
|
+
|
|
83
|
+
## Recommendations Framework
|
|
84
|
+
|
|
85
|
+
### Performance Issues
|
|
86
|
+
1. **Index recommendations**: Apply suggested indexes from performance advisors
|
|
87
|
+
2. **Query optimization**: Rewrite queries identified in top CPU/IO reports
|
|
88
|
+
3. **Auto-tuning**: Enable automatic tuning options if they're disabled
|
|
89
|
+
|
|
90
|
+
### Storage Issues
|
|
91
|
+
1. **Immediate**: Address files >90% full
|
|
92
|
+
2. **Planning**: Monitor growth trends and plan capacity increases
|
|
93
|
+
3. **Optimization**: Review top space-consuming tables for archiving opportunities
|
|
94
|
+
|
|
95
|
+
### Connection Issues
|
|
96
|
+
1. **Connection failures**: Investigate and resolve authentication/firewall issues
|
|
97
|
+
2. **Connection pooling**: Implement proper connection pooling in applications
|
|
98
|
+
3. **Blocking resolution**: Identify and optimize long-running transactions
|
|
99
|
+
4. **Security**: Review firewall rules if blocked connections are detected
|
|
100
|
+
5. **Monitoring**: Set up alerts for connection count and failure rate thresholds
|
|
101
|
+
|
|
102
|
+
### Alert Management
|
|
103
|
+
1. **Critical alerts**: Address immediately - these indicate active problems
|
|
104
|
+
2. **Alert patterns**: Look for recurring alerts that indicate systemic issues
|
|
105
|
+
3. **Proactive monitoring**: Set up additional alerts based on discovered patterns
|
|
106
|
+
|
|
107
|
+
## Analysis Structure
|
|
108
|
+
Present findings in this order:
|
|
109
|
+
1. **Executive Summary** - High-level status and critical issues
|
|
110
|
+
2. **Detailed Findings** - Tool-by-tool analysis with specific metrics
|
|
111
|
+
3. **Prioritized Recommendations** - Critical first, then important
|
|
112
|
+
4. **Monitoring Suggestions** - Ongoing observability improvements
|
|
113
|
+
|
|
114
|
+
## Available Tools Reference
|
|
115
|
+
|
|
116
|
+
### Health and Monitoring Tools
|
|
117
|
+
- `analyze_database_health_status` - Overall database health and resource usage
|
|
118
|
+
- `get_active_alerts` - Active Azure Monitor alerts and alert patterns
|
|
119
|
+
- `analyze_database_performance` - Performance advisors and automatic tuning status
|
|
120
|
+
|
|
121
|
+
### Query Performance Tools
|
|
122
|
+
- `get_top_cpu_queries` - Highest CPU-consuming queries from Query Store
|
|
123
|
+
- `get_slow_queries` - Longest-running queries causing response time issues
|
|
124
|
+
- `get_top_data_io_queries` - Queries with highest data I/O (logical reads/writes)
|
|
125
|
+
- `get_top_log_io_queries` - Queries consuming most transaction log I/O
|
|
126
|
+
|
|
127
|
+
### Connection and Infrastructure Tools
|
|
128
|
+
- `analyze_database_connections` - Connection pooling, blocking sessions, concurrent users
|
|
129
|
+
- `analyze_connection_failures` - Connection failure patterns, authentication issues, firewall blocks
|
|
130
|
+
- `analyze_database_storage` - Storage capacity, growth trends, file-level analysis
|
|
131
|
+
|
|
132
|
+
## Important Notes
|
|
133
|
+
- **Time ranges**: Adjust `hours_back` based on issue timeline (2h for immediate, 24h+ for trends)
|
|
134
|
+
- **Multiple tools**: Use complementary tools together (e.g., health + alerts + performance reports)
|
|
135
|
+
- **Resource context**: Always consider database tier and limits when interpreting metrics
|
|
136
|
+
- **Correlation**: Look for patterns across different metrics (CPU high + IO high = query optimization needed)
|
|
137
|
+
- **Security focus**: Use connection failure analysis to detect potential security issues
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Dict, Tuple, Union
|
|
4
|
+
|
|
5
|
+
from azure.identity import DefaultAzureCredential, ClientSecretCredential
|
|
6
|
+
|
|
7
|
+
from holmes.core.tools import (
|
|
8
|
+
CallablePrerequisite,
|
|
9
|
+
ToolsetTag,
|
|
10
|
+
)
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
12
|
+
from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
|
|
13
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
14
|
+
BaseAzureSQLToolset,
|
|
15
|
+
AzureSQLConfig,
|
|
16
|
+
AzureSQLDatabaseConfig,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Import all tool classes
|
|
20
|
+
from holmes.plugins.toolsets.azure_sql.tools.analyze_database_health_status import (
|
|
21
|
+
AnalyzeDatabaseHealthStatus,
|
|
22
|
+
)
|
|
23
|
+
from holmes.plugins.toolsets.azure_sql.tools.analyze_database_performance import (
|
|
24
|
+
AnalyzeDatabasePerformance,
|
|
25
|
+
)
|
|
26
|
+
from holmes.plugins.toolsets.azure_sql.tools.analyze_database_connections import (
|
|
27
|
+
AnalyzeDatabaseConnections,
|
|
28
|
+
)
|
|
29
|
+
from holmes.plugins.toolsets.azure_sql.tools.analyze_database_storage import (
|
|
30
|
+
AnalyzeDatabaseStorage,
|
|
31
|
+
)
|
|
32
|
+
from holmes.plugins.toolsets.azure_sql.tools.get_top_cpu_queries import GetTopCPUQueries
|
|
33
|
+
from holmes.plugins.toolsets.azure_sql.tools.get_slow_queries import GetSlowQueries
|
|
34
|
+
from holmes.plugins.toolsets.azure_sql.tools.get_top_data_io_queries import (
|
|
35
|
+
GetTopDataIOQueries,
|
|
36
|
+
)
|
|
37
|
+
from holmes.plugins.toolsets.azure_sql.tools.get_top_log_io_queries import (
|
|
38
|
+
GetTopLogIOQueries,
|
|
39
|
+
)
|
|
40
|
+
from holmes.plugins.toolsets.azure_sql.tools.get_active_alerts import GetActiveAlerts
|
|
41
|
+
from holmes.plugins.toolsets.azure_sql.tools.analyze_connection_failures import (
|
|
42
|
+
AnalyzeConnectionFailures,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class AzureSQLToolset(BaseAzureSQLToolset):
|
|
47
|
+
def __init__(self):
|
|
48
|
+
# Reduce Azure SDK HTTP logging verbosity
|
|
49
|
+
logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
50
|
+
logging.WARNING
|
|
51
|
+
)
|
|
52
|
+
logging.getLogger("azure.identity").setLevel(logging.WARNING)
|
|
53
|
+
logging.getLogger("azure.mgmt").setLevel(logging.WARNING)
|
|
54
|
+
logging.getLogger("azure.monitor").setLevel(logging.WARNING)
|
|
55
|
+
|
|
56
|
+
super().__init__(
|
|
57
|
+
name="azure/sql",
|
|
58
|
+
description="Analyzes Azure SQL Database performance, health, and operational issues using Azure REST APIs and Query Store data",
|
|
59
|
+
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
60
|
+
docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
|
|
61
|
+
icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
|
|
62
|
+
tags=[ToolsetTag.CORE],
|
|
63
|
+
experimental=True,
|
|
64
|
+
tools=[
|
|
65
|
+
AnalyzeDatabaseHealthStatus(self),
|
|
66
|
+
AnalyzeDatabasePerformance(self),
|
|
67
|
+
AnalyzeDatabaseConnections(self),
|
|
68
|
+
AnalyzeDatabaseStorage(self),
|
|
69
|
+
GetTopCPUQueries(self),
|
|
70
|
+
GetSlowQueries(self),
|
|
71
|
+
GetTopDataIOQueries(self),
|
|
72
|
+
GetTopLogIOQueries(self),
|
|
73
|
+
GetActiveAlerts(self),
|
|
74
|
+
AnalyzeConnectionFailures(self),
|
|
75
|
+
],
|
|
76
|
+
)
|
|
77
|
+
self._reload_llm_instructions()
|
|
78
|
+
|
|
79
|
+
def prerequisites_callable(self, config: Dict[str, Any]) -> Tuple[bool, str]:
|
|
80
|
+
if not config:
|
|
81
|
+
return False, TOOLSET_CONFIG_MISSING_ERROR
|
|
82
|
+
|
|
83
|
+
errors = []
|
|
84
|
+
try:
|
|
85
|
+
azure_sql_config = AzureSQLConfig(**config)
|
|
86
|
+
|
|
87
|
+
# Set up Azure credentials
|
|
88
|
+
try:
|
|
89
|
+
credential: Union[ClientSecretCredential, DefaultAzureCredential]
|
|
90
|
+
if (
|
|
91
|
+
azure_sql_config.tenant_id
|
|
92
|
+
and azure_sql_config.client_id
|
|
93
|
+
and azure_sql_config.client_secret
|
|
94
|
+
):
|
|
95
|
+
logging.info(
|
|
96
|
+
"Using ClientSecretCredential for Azure authentication"
|
|
97
|
+
)
|
|
98
|
+
credential = ClientSecretCredential(
|
|
99
|
+
tenant_id=azure_sql_config.tenant_id,
|
|
100
|
+
client_id=azure_sql_config.client_id,
|
|
101
|
+
client_secret=azure_sql_config.client_secret,
|
|
102
|
+
)
|
|
103
|
+
else:
|
|
104
|
+
logging.info(
|
|
105
|
+
"Using DefaultAzureCredential for Azure authentication"
|
|
106
|
+
)
|
|
107
|
+
credential = DefaultAzureCredential()
|
|
108
|
+
|
|
109
|
+
# Test the credential by attempting to get tokens for both required scopes
|
|
110
|
+
mgmt_token = credential.get_token(
|
|
111
|
+
"https://management.azure.com/.default"
|
|
112
|
+
)
|
|
113
|
+
if not mgmt_token.token:
|
|
114
|
+
raise Exception("Failed to obtain Azure management token")
|
|
115
|
+
|
|
116
|
+
# Test SQL database token as well
|
|
117
|
+
sql_token = credential.get_token(
|
|
118
|
+
"https://database.windows.net/.default"
|
|
119
|
+
)
|
|
120
|
+
if not sql_token.token:
|
|
121
|
+
raise Exception("Failed to obtain Azure SQL database token")
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
message = f"Failed to set up Azure authentication: {str(e)}"
|
|
125
|
+
logging.error(message)
|
|
126
|
+
errors.append(message)
|
|
127
|
+
return False, message
|
|
128
|
+
|
|
129
|
+
# Store single database configuration and create API client
|
|
130
|
+
self._database_config = azure_sql_config.database
|
|
131
|
+
self._api_client = AzureSQLAPIClient(
|
|
132
|
+
credential, azure_sql_config.database.subscription_id
|
|
133
|
+
)
|
|
134
|
+
logging.info(
|
|
135
|
+
f"Configured Azure SQL database: {azure_sql_config.database.server_name}/{azure_sql_config.database.database_name}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Validate each tool's configuration requirements
|
|
139
|
+
# tool_validation_errors = []
|
|
140
|
+
# for tool in self.tools:
|
|
141
|
+
# if isinstance(tool, BaseAzureSQLTool):
|
|
142
|
+
# azure_tool = cast(BaseAzureSQLTool, tool)
|
|
143
|
+
# try:
|
|
144
|
+
# is_valid, error_msg = azure_tool.validate_config(
|
|
145
|
+
# self._api_client, self._database_config
|
|
146
|
+
# )
|
|
147
|
+
# if not is_valid:
|
|
148
|
+
# tool_validation_errors.append(
|
|
149
|
+
# f"Tool '{azure_tool.name}' validation failed: {error_msg}"
|
|
150
|
+
# )
|
|
151
|
+
# except Exception as e:
|
|
152
|
+
# tool_validation_errors.append(
|
|
153
|
+
# f"Tool '{azure_tool.name}' validation error: {str(e)}"
|
|
154
|
+
# )
|
|
155
|
+
|
|
156
|
+
# Combine all errors
|
|
157
|
+
all_errors = errors # + tool_validation_errors
|
|
158
|
+
|
|
159
|
+
return len(all_errors) == 0, "\n".join(all_errors)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logging.exception("Failed to set up Azure SQL toolset")
|
|
162
|
+
return False, str(e)
|
|
163
|
+
|
|
164
|
+
def get_example_config(self) -> Dict[str, Any]:
|
|
165
|
+
example_config = AzureSQLConfig(
|
|
166
|
+
tenant_id="{{ env.AZURE_TENANT_ID }}",
|
|
167
|
+
client_id="{{ env.AZURE_CLIENT_ID }}",
|
|
168
|
+
client_secret="{{ env.AZURE_CLIENT_SECRET }}",
|
|
169
|
+
database=AzureSQLDatabaseConfig(
|
|
170
|
+
subscription_id="12345678-1234-1234-1234-123456789012",
|
|
171
|
+
resource_group="my-resource-group",
|
|
172
|
+
server_name="myserver",
|
|
173
|
+
database_name="mydatabase",
|
|
174
|
+
),
|
|
175
|
+
)
|
|
176
|
+
return example_config.model_dump()
|
|
177
|
+
|
|
178
|
+
def _reload_llm_instructions(self):
|
|
179
|
+
"""Load Azure SQL specific troubleshooting instructions."""
|
|
180
|
+
template_file_path = os.path.abspath(
|
|
181
|
+
os.path.join(os.path.dirname(__file__), "azure_sql_instructions.jinja2")
|
|
182
|
+
)
|
|
183
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# The dockerfile contains the odbc driver.
|
|
2
|
+
|
|
3
|
+
Supported authentication include Azure AD Workload Identity as well as Service Principal.
|
|
4
|
+
|
|
5
|
+
## Configuration
|
|
6
|
+
|
|
7
|
+
### Azure AD Workload Identity
|
|
8
|
+
|
|
9
|
+
```yaml
|
|
10
|
+
holmes:
|
|
11
|
+
toolsets:
|
|
12
|
+
azure/sql:
|
|
13
|
+
enabled: True
|
|
14
|
+
config:
|
|
15
|
+
database:
|
|
16
|
+
subscription_id: "2f90e3c5-xxxx-xxxx-xxxx-9783a7a5dea7"
|
|
17
|
+
resource_group: "<...azure resource group...>"
|
|
18
|
+
server_name: "<azure sql server name>"
|
|
19
|
+
database_name: "<azure sql database name>"
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
With AD workload identity,
|
|
23
|
+
|
|
24
|
+
### Service Principal
|
|
25
|
+
|
|
26
|
+
```yaml
|
|
27
|
+
holmes:
|
|
28
|
+
toolsets:
|
|
29
|
+
azure/sql:
|
|
30
|
+
enabled: True
|
|
31
|
+
config:
|
|
32
|
+
tenant_id: e5317b2d-xxxx-xxxx-xxxx-875841d00831
|
|
33
|
+
client_id: 73bacf7a-xxxx-xxxx-xxxx-110360f79d16
|
|
34
|
+
client_secret: "xxxx"
|
|
35
|
+
database:
|
|
36
|
+
subscription_id: "2f90e3c5-xxxx-xxxx-xxxx-9783a7a5dea7"
|
|
37
|
+
resource_group: "<...azure resource group...>"
|
|
38
|
+
server_name: "<azure sql server name>"
|
|
39
|
+
database_name: "<azure sql database name>"
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
### Roles / Access controls
|
|
44
|
+
|
|
45
|
+
The service principal requires these roles:
|
|
46
|
+
|
|
47
|
+
#### 1. Azure
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Azure Level (RBAC):
|
|
51
|
+
├── Monitoring Reader (subscription)
|
|
52
|
+
├── SQL DB Contributor (resource group)
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
#### 2. SQL
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
Database Level (SQL permissions):
|
|
59
|
+
├── CREATE USER [holmes-service-principal] FROM EXTERNAL PROVIDER
|
|
60
|
+
├── GRANT VIEW SERVER STATE TO [holmes-service-principal]
|
|
61
|
+
└── ALTER ROLE db_datareader ADD MEMBER [holmes-service-principal]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
#### 3. Query Store
|
|
65
|
+
|
|
66
|
+
In addition, Query Store should be enabled on target databases
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Azure SQL Tool Classes
|