holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
5
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
6
|
+
BaseAzureSQLTool,
|
|
7
|
+
BaseAzureSQLToolset,
|
|
8
|
+
AzureSQLDatabaseConfig,
|
|
9
|
+
)
|
|
10
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.utils import format_timing
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GetSlowQueries(BaseAzureSQLTool):
|
|
15
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
16
|
+
super().__init__(
|
|
17
|
+
name="get_slow_queries",
|
|
18
|
+
description="Identifies the slowest/longest-running queries from Query Store. Use this to find queries causing response time issues and user experience problems.",
|
|
19
|
+
parameters={
|
|
20
|
+
"top_count": ToolParameter(
|
|
21
|
+
description="Number of top queries to return. Use 15 for detailed analysis, 5-10 for quick overview (default: 15)",
|
|
22
|
+
type="integer",
|
|
23
|
+
required=False,
|
|
24
|
+
),
|
|
25
|
+
"hours_back": ToolParameter(
|
|
26
|
+
description="Time window for analysis in hours. Use 2 for recent issues, 24+ for trend analysis (default: 2)",
|
|
27
|
+
type="integer",
|
|
28
|
+
required=False,
|
|
29
|
+
),
|
|
30
|
+
},
|
|
31
|
+
toolset=toolset,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def _format_slow_queries_report(
|
|
35
|
+
self,
|
|
36
|
+
queries: List[Dict],
|
|
37
|
+
db_config: AzureSQLDatabaseConfig,
|
|
38
|
+
top_count: int,
|
|
39
|
+
hours_back: int,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Format the slow queries data into a readable report."""
|
|
42
|
+
report_sections = []
|
|
43
|
+
|
|
44
|
+
# Header
|
|
45
|
+
report_sections.append("# Slowest/Longest-Running Queries Report")
|
|
46
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
47
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
48
|
+
report_sections.append(f"**Analysis Period:** Last {hours_back} hours")
|
|
49
|
+
report_sections.append(f"**Top Queries:** {top_count}")
|
|
50
|
+
report_sections.append("")
|
|
51
|
+
|
|
52
|
+
if not queries:
|
|
53
|
+
report_sections.append("No queries found for the specified time period.")
|
|
54
|
+
return "\n".join(report_sections)
|
|
55
|
+
|
|
56
|
+
# Summary
|
|
57
|
+
total_duration = sum(float(q.get("total_duration", 0)) for q in queries)
|
|
58
|
+
total_executions = sum(int(q.get("execution_count", 0)) for q in queries)
|
|
59
|
+
|
|
60
|
+
report_sections.append("## Summary")
|
|
61
|
+
report_sections.append(f"- **Total Queries Analyzed:** {len(queries)}")
|
|
62
|
+
report_sections.append(f"- **Total Duration:** {format_timing(total_duration)}")
|
|
63
|
+
report_sections.append(f"- **Total Executions:** {total_executions:,}")
|
|
64
|
+
report_sections.append("")
|
|
65
|
+
|
|
66
|
+
# Query Details
|
|
67
|
+
report_sections.append("## Query Details")
|
|
68
|
+
|
|
69
|
+
for i, query in enumerate(queries[:top_count], 1):
|
|
70
|
+
avg_duration = float(query.get("avg_duration", 0))
|
|
71
|
+
execution_count = int(query.get("execution_count", 0))
|
|
72
|
+
total_duration = float(query.get("total_duration", 0))
|
|
73
|
+
max_duration = float(query.get("max_duration", 0))
|
|
74
|
+
avg_cpu = float(query.get("avg_cpu_time", 0))
|
|
75
|
+
query_text = query.get("query_sql_text", "N/A")
|
|
76
|
+
last_execution = query.get("last_execution_time", "N/A")
|
|
77
|
+
|
|
78
|
+
# Truncate long queries
|
|
79
|
+
if len(query_text) > 200:
|
|
80
|
+
query_text = query_text[:200] + "..."
|
|
81
|
+
|
|
82
|
+
report_sections.append(f"### Query #{i}")
|
|
83
|
+
report_sections.append(
|
|
84
|
+
f"- **Average Duration:** {format_timing(avg_duration)}"
|
|
85
|
+
)
|
|
86
|
+
report_sections.append(
|
|
87
|
+
f"- **Total Duration:** {format_timing(total_duration)}"
|
|
88
|
+
)
|
|
89
|
+
report_sections.append(f"- **Max Duration:** {format_timing(max_duration)}")
|
|
90
|
+
report_sections.append(f"- **Execution Count:** {execution_count:,}")
|
|
91
|
+
report_sections.append(f"- **Average CPU Time:** {format_timing(avg_cpu)}")
|
|
92
|
+
report_sections.append(f"- **Last Execution:** {last_execution}")
|
|
93
|
+
report_sections.append("- **Query Text:**")
|
|
94
|
+
report_sections.append("```sql")
|
|
95
|
+
report_sections.append(query_text)
|
|
96
|
+
report_sections.append("```")
|
|
97
|
+
report_sections.append("")
|
|
98
|
+
|
|
99
|
+
return "\n".join(report_sections)
|
|
100
|
+
|
|
101
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
102
|
+
try:
|
|
103
|
+
top_count = params.get("top_count", 15)
|
|
104
|
+
hours_back = params.get("hours_back", 2)
|
|
105
|
+
|
|
106
|
+
db_config = self.toolset.database_config()
|
|
107
|
+
client = self.toolset.api_client()
|
|
108
|
+
|
|
109
|
+
# Get slow queries
|
|
110
|
+
queries = client.get_slow_queries(
|
|
111
|
+
db_config.subscription_id,
|
|
112
|
+
db_config.resource_group,
|
|
113
|
+
db_config.server_name,
|
|
114
|
+
db_config.database_name,
|
|
115
|
+
top_count,
|
|
116
|
+
hours_back,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Format the report
|
|
120
|
+
report_text = self._format_slow_queries_report(
|
|
121
|
+
queries, db_config, top_count, hours_back
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return StructuredToolResult(
|
|
125
|
+
status=ToolResultStatus.SUCCESS,
|
|
126
|
+
data=report_text,
|
|
127
|
+
params=params,
|
|
128
|
+
)
|
|
129
|
+
except Exception as e:
|
|
130
|
+
error_msg = f"Failed to get slow queries: {str(e)}"
|
|
131
|
+
logging.error(error_msg)
|
|
132
|
+
return StructuredToolResult(
|
|
133
|
+
status=ToolResultStatus.ERROR,
|
|
134
|
+
error=error_msg,
|
|
135
|
+
params=params,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
139
|
+
db_config = self.toolset.database_config()
|
|
140
|
+
return f"Fetch slowest queries for database {db_config.server_name}/{db_config.database_name}"
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def validate_config(
|
|
144
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
145
|
+
) -> Tuple[bool, str]:
|
|
146
|
+
errors = []
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
# Test direct database connection for Query Store access
|
|
150
|
+
test_query = (
|
|
151
|
+
"SELECT TOP 1 query_id FROM sys.query_store_query WHERE query_id > 0"
|
|
152
|
+
)
|
|
153
|
+
api_client.execute_query(
|
|
154
|
+
database_config.server_name, database_config.database_name, test_query
|
|
155
|
+
)
|
|
156
|
+
except Exception as e:
|
|
157
|
+
error_msg = str(e)
|
|
158
|
+
if (
|
|
159
|
+
"login failed" in error_msg.lower()
|
|
160
|
+
or "authentication" in error_msg.lower()
|
|
161
|
+
):
|
|
162
|
+
errors.append(f"Database authentication failed: {error_msg}")
|
|
163
|
+
elif "permission" in error_msg.lower() or "denied" in error_msg.lower():
|
|
164
|
+
errors.append(f"Query Store access denied: {error_msg}")
|
|
165
|
+
elif "query store" in error_msg.lower():
|
|
166
|
+
errors.append(f"Query Store not available or disabled: {error_msg}")
|
|
167
|
+
else:
|
|
168
|
+
errors.append(f"Database connection failed: {error_msg}")
|
|
169
|
+
|
|
170
|
+
if errors:
|
|
171
|
+
return False, "\n".join(errors)
|
|
172
|
+
return True, ""
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
5
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
6
|
+
BaseAzureSQLTool,
|
|
7
|
+
BaseAzureSQLToolset,
|
|
8
|
+
AzureSQLDatabaseConfig,
|
|
9
|
+
)
|
|
10
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.utils import format_timing
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GetTopCPUQueries(BaseAzureSQLTool):
|
|
15
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
16
|
+
super().__init__(
|
|
17
|
+
name="get_top_cpu_queries",
|
|
18
|
+
description="Identifies the top CPU-consuming queries from Query Store. Use this to find queries causing high CPU utilization and performance bottlenecks.",
|
|
19
|
+
parameters={
|
|
20
|
+
"top_count": ToolParameter(
|
|
21
|
+
description="Number of top queries to return. Use 15 for detailed analysis, 5-10 for quick overview (default: 15)",
|
|
22
|
+
type="integer",
|
|
23
|
+
required=False,
|
|
24
|
+
),
|
|
25
|
+
"hours_back": ToolParameter(
|
|
26
|
+
description="Time window for analysis in hours. Use 2 for recent issues, 24+ for trend analysis (default: 2)",
|
|
27
|
+
type="integer",
|
|
28
|
+
required=False,
|
|
29
|
+
),
|
|
30
|
+
},
|
|
31
|
+
toolset=toolset,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def _format_cpu_queries_report(
|
|
35
|
+
self,
|
|
36
|
+
queries: List[Dict],
|
|
37
|
+
db_config: AzureSQLDatabaseConfig,
|
|
38
|
+
top_count: int,
|
|
39
|
+
hours_back: int,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Format the CPU queries data into a readable report."""
|
|
42
|
+
report_sections = []
|
|
43
|
+
|
|
44
|
+
# Header
|
|
45
|
+
report_sections.append("# Top CPU Consuming Queries Report")
|
|
46
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
47
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
48
|
+
report_sections.append(f"**Analysis Period:** Last {hours_back} hours")
|
|
49
|
+
report_sections.append(f"**Top Queries:** {top_count}")
|
|
50
|
+
report_sections.append("")
|
|
51
|
+
|
|
52
|
+
if not queries:
|
|
53
|
+
report_sections.append("No queries found for the specified time period.")
|
|
54
|
+
return "\n".join(report_sections)
|
|
55
|
+
|
|
56
|
+
# Summary
|
|
57
|
+
total_cpu_time = sum(float(q.get("total_cpu_time", 0)) for q in queries)
|
|
58
|
+
total_executions = sum(int(q.get("execution_count", 0)) for q in queries)
|
|
59
|
+
|
|
60
|
+
report_sections.append("## Summary")
|
|
61
|
+
report_sections.append(f"- **Total Queries Analyzed:** {len(queries)}")
|
|
62
|
+
report_sections.append(f"- **Total CPU Time:** {format_timing(total_cpu_time)}")
|
|
63
|
+
report_sections.append(f"- **Total Executions:** {total_executions:,}")
|
|
64
|
+
report_sections.append("")
|
|
65
|
+
|
|
66
|
+
# Query Details
|
|
67
|
+
report_sections.append("## Query Details")
|
|
68
|
+
|
|
69
|
+
for i, query in enumerate(queries[:top_count], 1):
|
|
70
|
+
avg_cpu = float(query.get("avg_cpu_time", 0))
|
|
71
|
+
execution_count = int(query.get("execution_count", 0))
|
|
72
|
+
total_cpu = float(query.get("total_cpu_time", 0))
|
|
73
|
+
max_cpu = float(query.get("max_cpu_time", 0))
|
|
74
|
+
avg_duration = float(query.get("avg_duration", 0))
|
|
75
|
+
query_text = query.get("query_sql_text", "N/A")
|
|
76
|
+
last_execution = query.get("last_execution_time", "N/A")
|
|
77
|
+
|
|
78
|
+
# Truncate long queries
|
|
79
|
+
if len(query_text) > 200:
|
|
80
|
+
query_text = query_text[:200] + "..."
|
|
81
|
+
|
|
82
|
+
report_sections.append(f"### Query #{i}")
|
|
83
|
+
report_sections.append(f"- **Average CPU Time:** {format_timing(avg_cpu)}")
|
|
84
|
+
report_sections.append(f"- **Total CPU Time:** {format_timing(total_cpu)}")
|
|
85
|
+
report_sections.append(f"- **Max CPU Time:** {format_timing(max_cpu)}")
|
|
86
|
+
report_sections.append(f"- **Execution Count:** {execution_count:,}")
|
|
87
|
+
report_sections.append(
|
|
88
|
+
f"- **Average Duration:** {format_timing(avg_duration)}"
|
|
89
|
+
)
|
|
90
|
+
report_sections.append(f"- **Last Execution:** {last_execution}")
|
|
91
|
+
report_sections.append("- **Query Text:**")
|
|
92
|
+
report_sections.append("```sql")
|
|
93
|
+
report_sections.append(query_text)
|
|
94
|
+
report_sections.append("```")
|
|
95
|
+
report_sections.append("")
|
|
96
|
+
|
|
97
|
+
return "\n".join(report_sections)
|
|
98
|
+
|
|
99
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
100
|
+
try:
|
|
101
|
+
top_count = params.get("top_count", 15)
|
|
102
|
+
hours_back = params.get("hours_back", 2)
|
|
103
|
+
|
|
104
|
+
db_config = self.toolset.database_config()
|
|
105
|
+
client = self.toolset.api_client()
|
|
106
|
+
|
|
107
|
+
# Get top CPU queries
|
|
108
|
+
queries = client.get_top_cpu_queries(
|
|
109
|
+
db_config.subscription_id,
|
|
110
|
+
db_config.resource_group,
|
|
111
|
+
db_config.server_name,
|
|
112
|
+
db_config.database_name,
|
|
113
|
+
top_count,
|
|
114
|
+
hours_back,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Format the report
|
|
118
|
+
report_text = self._format_cpu_queries_report(
|
|
119
|
+
queries, db_config, top_count, hours_back
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return StructuredToolResult(
|
|
123
|
+
status=ToolResultStatus.SUCCESS,
|
|
124
|
+
data=report_text,
|
|
125
|
+
params=params,
|
|
126
|
+
)
|
|
127
|
+
except Exception as e:
|
|
128
|
+
error_msg = f"Failed to get top CPU queries: {str(e)}"
|
|
129
|
+
logging.error(error_msg)
|
|
130
|
+
return StructuredToolResult(
|
|
131
|
+
status=ToolResultStatus.ERROR,
|
|
132
|
+
error=error_msg,
|
|
133
|
+
params=params,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
137
|
+
db_config = self.toolset.database_config()
|
|
138
|
+
return f"Fetch top CPU consuming queries for database {db_config.server_name}/{db_config.database_name}"
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def validate_config(
|
|
142
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
143
|
+
) -> Tuple[bool, str]:
|
|
144
|
+
errors = []
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Test direct database connection for Query Store access
|
|
148
|
+
test_query = (
|
|
149
|
+
"SELECT TOP 1 query_id FROM sys.query_store_query WHERE query_id > 0"
|
|
150
|
+
)
|
|
151
|
+
api_client.execute_query(
|
|
152
|
+
database_config.server_name, database_config.database_name, test_query
|
|
153
|
+
)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
error_msg = str(e)
|
|
156
|
+
if (
|
|
157
|
+
"login failed" in error_msg.lower()
|
|
158
|
+
or "authentication" in error_msg.lower()
|
|
159
|
+
):
|
|
160
|
+
errors.append(f"Database authentication failed: {error_msg}")
|
|
161
|
+
elif "permission" in error_msg.lower() or "denied" in error_msg.lower():
|
|
162
|
+
errors.append(f"Query Store access denied: {error_msg}")
|
|
163
|
+
elif "query store" in error_msg.lower():
|
|
164
|
+
errors.append(f"Query Store not available or disabled: {error_msg}")
|
|
165
|
+
else:
|
|
166
|
+
errors.append(f"Database connection failed: {error_msg}")
|
|
167
|
+
|
|
168
|
+
if errors:
|
|
169
|
+
return False, "\n".join(errors)
|
|
170
|
+
return True, ""
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from holmes.core.tools import StructuredToolResult, ToolParameter, ToolResultStatus
|
|
5
|
+
from holmes.plugins.toolsets.azure_sql.azure_base_toolset import (
|
|
6
|
+
BaseAzureSQLTool,
|
|
7
|
+
BaseAzureSQLToolset,
|
|
8
|
+
AzureSQLDatabaseConfig,
|
|
9
|
+
)
|
|
10
|
+
from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
|
|
11
|
+
from holmes.plugins.toolsets.azure_sql.utils import format_timing
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GetTopDataIOQueries(BaseAzureSQLTool):
|
|
15
|
+
def __init__(self, toolset: "BaseAzureSQLToolset"):
|
|
16
|
+
super().__init__(
|
|
17
|
+
name="get_top_data_io_queries",
|
|
18
|
+
description="Identifies queries consuming the most data I/O (logical reads/writes) from Query Store. Use this to find queries causing storage I/O bottlenecks and disk performance issues.",
|
|
19
|
+
parameters={
|
|
20
|
+
"top_count": ToolParameter(
|
|
21
|
+
description="Number of top queries to return. Use 15 for detailed analysis, 5-10 for quick overview (default: 15)",
|
|
22
|
+
type="integer",
|
|
23
|
+
required=False,
|
|
24
|
+
),
|
|
25
|
+
"hours_back": ToolParameter(
|
|
26
|
+
description="Time window for analysis in hours. Use 2 for recent issues, 24+ for trend analysis (default: 2)",
|
|
27
|
+
type="integer",
|
|
28
|
+
required=False,
|
|
29
|
+
),
|
|
30
|
+
},
|
|
31
|
+
toolset=toolset,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def _format_data_io_queries_report(
|
|
35
|
+
self,
|
|
36
|
+
queries: List[Dict],
|
|
37
|
+
db_config: AzureSQLDatabaseConfig,
|
|
38
|
+
top_count: int,
|
|
39
|
+
hours_back: int,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Format the data I/O queries data into a readable report."""
|
|
42
|
+
report_sections = []
|
|
43
|
+
|
|
44
|
+
# Header
|
|
45
|
+
report_sections.append("# Top Data I/O Consuming Queries Report")
|
|
46
|
+
report_sections.append(f"**Database:** {db_config.database_name}")
|
|
47
|
+
report_sections.append(f"**Server:** {db_config.server_name}")
|
|
48
|
+
report_sections.append(f"**Analysis Period:** Last {hours_back} hours")
|
|
49
|
+
report_sections.append(f"**Top Queries:** {top_count}")
|
|
50
|
+
report_sections.append("")
|
|
51
|
+
|
|
52
|
+
if not queries:
|
|
53
|
+
report_sections.append("No queries found for the specified time period.")
|
|
54
|
+
return "\n".join(report_sections)
|
|
55
|
+
|
|
56
|
+
# Summary
|
|
57
|
+
total_reads = sum(float(q.get("total_logical_reads", 0)) for q in queries)
|
|
58
|
+
total_writes = sum(float(q.get("total_logical_writes", 0)) for q in queries)
|
|
59
|
+
total_executions = sum(int(q.get("execution_count", 0)) for q in queries)
|
|
60
|
+
|
|
61
|
+
report_sections.append("## Summary")
|
|
62
|
+
report_sections.append(f"- **Total Queries Analyzed:** {len(queries)}")
|
|
63
|
+
report_sections.append(f"- **Total Logical Reads:** {total_reads:,.0f} pages")
|
|
64
|
+
report_sections.append(f"- **Total Logical Writes:** {total_writes:,.0f} pages")
|
|
65
|
+
report_sections.append(f"- **Total Executions:** {total_executions:,}")
|
|
66
|
+
report_sections.append("")
|
|
67
|
+
|
|
68
|
+
# Query Details
|
|
69
|
+
report_sections.append("## Query Details")
|
|
70
|
+
|
|
71
|
+
for i, query in enumerate(queries[:top_count], 1):
|
|
72
|
+
avg_reads = float(query.get("avg_logical_reads", 0))
|
|
73
|
+
avg_writes = float(query.get("avg_logical_writes", 0))
|
|
74
|
+
execution_count = int(query.get("execution_count", 0))
|
|
75
|
+
total_reads = float(query.get("total_logical_reads", 0))
|
|
76
|
+
total_writes = float(query.get("total_logical_writes", 0))
|
|
77
|
+
max_reads = float(query.get("max_logical_reads", 0))
|
|
78
|
+
max_writes = float(query.get("max_logical_writes", 0))
|
|
79
|
+
avg_cpu = float(query.get("avg_cpu_time", 0))
|
|
80
|
+
avg_duration = float(query.get("avg_duration", 0))
|
|
81
|
+
query_text = query.get("query_sql_text", "N/A")
|
|
82
|
+
last_execution = query.get("last_execution_time", "N/A")
|
|
83
|
+
|
|
84
|
+
# Truncate long queries
|
|
85
|
+
if len(query_text) > 200:
|
|
86
|
+
query_text = query_text[:200] + "..."
|
|
87
|
+
|
|
88
|
+
report_sections.append(f"### Query #{i}")
|
|
89
|
+
report_sections.append(
|
|
90
|
+
f"- **Average Logical Reads:** {avg_reads:,.0f} pages"
|
|
91
|
+
)
|
|
92
|
+
report_sections.append(
|
|
93
|
+
f"- **Total Logical Reads:** {total_reads:,.0f} pages"
|
|
94
|
+
)
|
|
95
|
+
report_sections.append(f"- **Max Logical Reads:** {max_reads:,.0f} pages")
|
|
96
|
+
report_sections.append(
|
|
97
|
+
f"- **Average Logical Writes:** {avg_writes:,.0f} pages"
|
|
98
|
+
)
|
|
99
|
+
report_sections.append(
|
|
100
|
+
f"- **Total Logical Writes:** {total_writes:,.0f} pages"
|
|
101
|
+
)
|
|
102
|
+
report_sections.append(f"- **Max Logical Writes:** {max_writes:,.0f} pages")
|
|
103
|
+
report_sections.append(f"- **Execution Count:** {execution_count:,}")
|
|
104
|
+
report_sections.append(f"- **Average CPU Time:** {format_timing(avg_cpu)}")
|
|
105
|
+
report_sections.append(
|
|
106
|
+
f"- **Average Duration:** {format_timing(avg_duration)}"
|
|
107
|
+
)
|
|
108
|
+
report_sections.append(f"- **Last Execution:** {last_execution}")
|
|
109
|
+
report_sections.append("- **Query Text:**")
|
|
110
|
+
report_sections.append("```sql")
|
|
111
|
+
report_sections.append(query_text)
|
|
112
|
+
report_sections.append("```")
|
|
113
|
+
report_sections.append("")
|
|
114
|
+
|
|
115
|
+
return "\n".join(report_sections)
|
|
116
|
+
|
|
117
|
+
def _invoke(self, params: Dict) -> StructuredToolResult:
|
|
118
|
+
try:
|
|
119
|
+
top_count = params.get("top_count", 15)
|
|
120
|
+
hours_back = params.get("hours_back", 2)
|
|
121
|
+
|
|
122
|
+
db_config = self.toolset.database_config()
|
|
123
|
+
client = self.toolset.api_client()
|
|
124
|
+
|
|
125
|
+
# Get top data I/O queries
|
|
126
|
+
queries = client.get_top_data_io_queries(
|
|
127
|
+
db_config.subscription_id,
|
|
128
|
+
db_config.resource_group,
|
|
129
|
+
db_config.server_name,
|
|
130
|
+
db_config.database_name,
|
|
131
|
+
top_count,
|
|
132
|
+
hours_back,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Format the report
|
|
136
|
+
report_text = self._format_data_io_queries_report(
|
|
137
|
+
queries, db_config, top_count, hours_back
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
return StructuredToolResult(
|
|
141
|
+
status=ToolResultStatus.SUCCESS,
|
|
142
|
+
data=report_text,
|
|
143
|
+
params=params,
|
|
144
|
+
)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
error_msg = f"Failed to get top data I/O queries: {str(e)}"
|
|
147
|
+
logging.error(error_msg)
|
|
148
|
+
return StructuredToolResult(
|
|
149
|
+
status=ToolResultStatus.ERROR,
|
|
150
|
+
error=error_msg,
|
|
151
|
+
params=params,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
155
|
+
db_config = self.toolset.database_config()
|
|
156
|
+
return f"Fetch top data I/O consuming queries for database {db_config.server_name}/{db_config.database_name}"
|
|
157
|
+
|
|
158
|
+
@staticmethod
|
|
159
|
+
def validate_config(
|
|
160
|
+
api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
|
|
161
|
+
) -> Tuple[bool, str]:
|
|
162
|
+
errors = []
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
# Test direct database connection for Query Store access
|
|
166
|
+
test_query = (
|
|
167
|
+
"SELECT TOP 1 query_id FROM sys.query_store_query WHERE query_id > 0"
|
|
168
|
+
)
|
|
169
|
+
api_client.execute_query(
|
|
170
|
+
database_config.server_name, database_config.database_name, test_query
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
error_msg = str(e)
|
|
174
|
+
if (
|
|
175
|
+
"login failed" in error_msg.lower()
|
|
176
|
+
or "authentication" in error_msg.lower()
|
|
177
|
+
):
|
|
178
|
+
errors.append(f"Database authentication failed: {error_msg}")
|
|
179
|
+
elif "permission" in error_msg.lower() or "denied" in error_msg.lower():
|
|
180
|
+
errors.append(f"Query Store access denied: {error_msg}")
|
|
181
|
+
elif "query store" in error_msg.lower():
|
|
182
|
+
errors.append(f"Query Store not available or disabled: {error_msg}")
|
|
183
|
+
else:
|
|
184
|
+
errors.append(f"Database connection failed: {error_msg}")
|
|
185
|
+
|
|
186
|
+
if errors:
|
|
187
|
+
return False, "\n".join(errors)
|
|
188
|
+
return True, ""
|