PyPI - holmesgpt - Versions diffs - 0.11.5__py3-none-any.whl - Mend

holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show

holmes/.git_archival.json +7 -0
holmes/__init__.py +76 -0
holmes/__init__.py.bak +76 -0
holmes/clients/robusta_client.py +24 -0
holmes/common/env_vars.py +47 -0
holmes/config.py +526 -0
holmes/core/__init__.py +0 -0
holmes/core/conversations.py +578 -0
holmes/core/investigation.py +152 -0
holmes/core/investigation_structured_output.py +264 -0
holmes/core/issue.py +54 -0
holmes/core/llm.py +250 -0
holmes/core/models.py +157 -0
holmes/core/openai_formatting.py +51 -0
holmes/core/performance_timing.py +72 -0
holmes/core/prompt.py +42 -0
holmes/core/resource_instruction.py +17 -0
holmes/core/runbooks.py +26 -0
holmes/core/safeguards.py +120 -0
holmes/core/supabase_dal.py +540 -0
holmes/core/tool_calling_llm.py +798 -0
holmes/core/tools.py +566 -0
holmes/core/tools_utils/__init__.py +0 -0
holmes/core/tools_utils/tool_executor.py +65 -0
holmes/core/tools_utils/toolset_utils.py +52 -0
holmes/core/toolset_manager.py +418 -0
holmes/interactive.py +229 -0
holmes/main.py +1041 -0
holmes/plugins/__init__.py +0 -0
holmes/plugins/destinations/__init__.py +6 -0
holmes/plugins/destinations/slack/__init__.py +2 -0
holmes/plugins/destinations/slack/plugin.py +163 -0
holmes/plugins/interfaces.py +32 -0
holmes/plugins/prompts/__init__.py +48 -0
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
holmes/plugins/prompts/generic_ask.jinja2 +36 -0
holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
holmes/plugins/runbooks/README.md +22 -0
holmes/plugins/runbooks/__init__.py +100 -0
holmes/plugins/runbooks/catalog.json +14 -0
holmes/plugins/runbooks/jira.yaml +12 -0
holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
holmes/plugins/sources/github/__init__.py +77 -0
holmes/plugins/sources/jira/__init__.py +123 -0
holmes/plugins/sources/opsgenie/__init__.py +93 -0
holmes/plugins/sources/pagerduty/__init__.py +147 -0
holmes/plugins/sources/prometheus/__init__.py +0 -0
holmes/plugins/sources/prometheus/models.py +104 -0
holmes/plugins/sources/prometheus/plugin.py +154 -0
holmes/plugins/toolsets/__init__.py +171 -0
holmes/plugins/toolsets/aks-node-health.yaml +65 -0
holmes/plugins/toolsets/aks.yaml +86 -0
holmes/plugins/toolsets/argocd.yaml +70 -0
holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
holmes/plugins/toolsets/aws.yaml +76 -0
holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
holmes/plugins/toolsets/azure_sql/install.md +66 -0
holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
holmes/plugins/toolsets/azure_sql/utils.py +83 -0
holmes/plugins/toolsets/bash/__init__.py +0 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
holmes/plugins/toolsets/bash/common/bash.py +52 -0
holmes/plugins/toolsets/bash/common/config.py +14 -0
holmes/plugins/toolsets/bash/common/stringify.py +25 -0
holmes/plugins/toolsets/bash/common/validators.py +24 -0
holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
holmes/plugins/toolsets/bash/parse_command.py +103 -0
holmes/plugins/toolsets/confluence.yaml +19 -0
holmes/plugins/toolsets/consts.py +5 -0
holmes/plugins/toolsets/coralogix/api.py +158 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
holmes/plugins/toolsets/coralogix/utils.py +181 -0
holmes/plugins/toolsets/datadog.py +153 -0
holmes/plugins/toolsets/docker.yaml +46 -0
holmes/plugins/toolsets/git.py +756 -0
holmes/plugins/toolsets/grafana/__init__.py +0 -0
holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
holmes/plugins/toolsets/grafana/common.py +68 -0
holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
holmes/plugins/toolsets/grafana/loki_api.py +89 -0
holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
holmes/plugins/toolsets/helm.yaml +42 -0
holmes/plugins/toolsets/internet/internet.py +275 -0
holmes/plugins/toolsets/internet/notion.py +137 -0
holmes/plugins/toolsets/kafka.py +638 -0
holmes/plugins/toolsets/kubernetes.yaml +255 -0
holmes/plugins/toolsets/kubernetes_logs.py +426 -0
holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
holmes/plugins/toolsets/logging_utils/types.py +0 -0
holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
holmes/plugins/toolsets/newrelic.py +222 -0
holmes/plugins/toolsets/opensearch/__init__.py +0 -0
holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
holmes/plugins/toolsets/rabbitmq/api.py +398 -0
holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
holmes/plugins/toolsets/robusta/__init__.py +0 -0
holmes/plugins/toolsets/robusta/robusta.py +235 -0
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
holmes/plugins/toolsets/runbook/__init__.py +0 -0
holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
holmes/plugins/toolsets/service_discovery.py +92 -0
holmes/plugins/toolsets/servicenow/install.md +37 -0
holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
holmes/plugins/toolsets/slab.yaml +20 -0
holmes/plugins/toolsets/utils.py +137 -0
holmes/plugins/utils.py +14 -0
holmes/utils/__init__.py +0 -0
holmes/utils/cache.py +84 -0
holmes/utils/cert_utils.py +40 -0
holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
holmes/utils/definitions.py +13 -0
holmes/utils/env.py +53 -0
holmes/utils/file_utils.py +56 -0
holmes/utils/global_instructions.py +20 -0
holmes/utils/holmes_status.py +22 -0
holmes/utils/holmes_sync_toolsets.py +80 -0
holmes/utils/markdown_utils.py +55 -0
holmes/utils/pydantic_utils.py +54 -0
holmes/utils/robusta.py +10 -0
holmes/utils/tags.py +97 -0
holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
holmesgpt-0.11.5.dist-info/METADATA +400 -0
holmesgpt-0.11.5.dist-info/RECORD +183 -0
holmesgpt-0.11.5.dist-info/WHEEL +4 -0
holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0

holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py ADDED Viewed

@@ -0,0 +1,251 @@
+from typing import Dict, List
+import logging
+from datetime import datetime, timedelta
+from azure.core.credentials import TokenCredential
+from azure.monitor.query import MetricsQueryClient
+from .azure_sql_api import AzureSQLAPIClient
+class ConnectionMonitoringAPI:
+    def __init__(
+        self,
+        credential: TokenCredential,
+        subscription_id: str,
+    ):
+        self.sql_api_client = AzureSQLAPIClient(credential, subscription_id)
+        self.metrics_client = MetricsQueryClient(credential)
+        self.subscription_id = subscription_id
+    def _format_sql_error(self, error: Exception) -> str:
+        """Format SQL errors with helpful permission guidance."""
+        error_str = str(error)
+        # Detect common permission issues
+        if (
+            "Login failed for user" in error_str
+            and "token-identified principal" in error_str
+        ):
+            return (
+                f"Azure AD authentication failed - the service principal lacks database permissions. "
+                f"Please ensure the service principal is added as a database user with VIEW SERVER STATE permission. "
+                f"Original error: {error_str}"
+            )
+        elif (
+            "permission was denied" in error_str.lower()
+            or "view server state" in error_str.lower()
+        ):
+            return (
+                f"Insufficient database permissions - the user needs VIEW SERVER STATE permission to access system views. "
+                f"Original error: {error_str}"
+            )
+        elif "login failed" in error_str.lower():
+            return (
+                f"Database login failed - check authentication credentials and database access permissions. "
+                f"Original error: {error_str}"
+            )
+        else:
+            return error_str
+    def get_connection_metrics(
+        self,
+        resource_group: str,
+        server_name: str,
+        database_name: str,
+        hours_back: int = 2,
+    ) -> Dict:
+        """Get connection-related metrics from Azure Monitor."""
+        resource_id = (
+            f"subscriptions/{self.subscription_id}/"
+            f"resourceGroups/{resource_group}/"
+            f"providers/Microsoft.Sql/servers/{server_name}/"
+            f"databases/{database_name}"
+        )
+        end_time = datetime.now()
+        # Use longer timespan for better data availability
+        start_time = end_time - timedelta(hours=max(hours_back, 24))
+        try:
+            metrics_data = self.metrics_client.query_resource(
+                resource_uri=resource_id,
+                metric_names=[
+                    "connection_successful",  # This exists
+                    "sessions_count",  # This exists
+                    "cpu_percent",  # This exists
+                    "storage_percent",  # This exists
+                ],
+                timespan=(start_time, end_time),
+                granularity=timedelta(hours=1),  # Larger granularity for better data
+                aggregations=["Maximum", "Average", "Total"],
+            )
+            result = {}
+            for metric in metrics_data.metrics:
+                metric_data = []
+                for timeseries in metric.timeseries:
+                    for data_point in timeseries.data:
+                        # Handle None values and pick the best available aggregation
+                        value_data = {
+                            "timestamp": data_point.timestamp.isoformat(),
+                            "maximum": data_point.maximum
+                            if data_point.maximum is not None
+                            else 0,
+                            "average": data_point.average
+                            if data_point.average is not None
+                            else 0,
+                            "total": data_point.total
+                            if data_point.total is not None
+                            else 0,
+                        }
+                        metric_data.append(value_data)
+                result[metric.name] = metric_data
+            return result
+        except Exception as e:
+            logging.error(f"Failed to get connection metrics: {str(e)}")
+            return {"error": str(e)}
+    def get_active_connections(
+        self, server_name: str, database_name: str
+    ) -> List[Dict]:
+        """Get currently active connections using DMV."""
+        query = """
+        SELECT
+            s.session_id,
+            s.login_name,
+            s.host_name,
+            s.program_name,
+            s.login_time,
+            s.last_request_start_time,
+            s.last_request_end_time,
+            s.status,
+            s.cpu_time,
+            s.memory_usage,
+            s.total_scheduled_time,
+            s.total_elapsed_time,
+            s.reads,
+            s.writes,
+            s.logical_reads,
+            CASE
+                WHEN r.session_id IS NOT NULL THEN 'Active'
+                ELSE 'Inactive'
+            END as connection_status,
+            r.blocking_session_id,
+            r.wait_type,
+            r.wait_time,
+            r.wait_resource
+        FROM sys.dm_exec_sessions s
+        LEFT JOIN sys.dm_exec_requests r ON s.session_id = r.session_id
+        WHERE s.is_user_process = 1
+        ORDER BY s.login_time DESC;
+        """
+        try:
+            return self.sql_api_client.execute_query(server_name, database_name, query)
+        except Exception as e:
+            formatted_error = self._format_sql_error(e)
+            logging.error(f"Failed to get active connections: {formatted_error}")
+            return []
+    def get_connection_summary(self, server_name: str, database_name: str) -> Dict:
+        """Get connection summary statistics."""
+        query = """
+        SELECT
+            COUNT(*) as total_connections,
+            COUNT(CASE WHEN r.session_id IS NOT NULL THEN 1 END) as active_connections,
+            COUNT(CASE WHEN r.session_id IS NULL THEN 1 END) as idle_connections,
+            COUNT(CASE WHEN r.blocking_session_id > 0 THEN 1 END) as blocked_connections,
+            COUNT(DISTINCT s.login_name) as unique_users,
+            COUNT(DISTINCT s.host_name) as unique_hosts,
+            MAX(s.login_time) as latest_login,
+            MIN(s.login_time) as earliest_login
+        FROM sys.dm_exec_sessions s
+        LEFT JOIN sys.dm_exec_requests r ON s.session_id = r.session_id
+        WHERE s.is_user_process = 1;
+        """
+        try:
+            result = self.sql_api_client.execute_query(
+                server_name, database_name, query
+            )
+            return result[0] if result else {}
+        except Exception as e:
+            formatted_error = self._format_sql_error(e)
+            logging.error(f"Failed to get connection summary: {formatted_error}")
+            return {"error": formatted_error}
+    def get_failed_connections(
+        self, server_name: str, database_name: str, hours_back: int = 24
+    ) -> List[Dict]:
+        """Get failed connection attempts from extended events or system health."""
+        # Note: This query looks for connectivity ring buffer events
+        query = f"""
+        WITH ConnectivityEvents AS (
+            SELECT
+                CAST(event_data AS XML) as event_xml,
+                timestamp_utc
+            FROM sys.fn_xe_file_target_read_file('system_health*.xel', null, null, null)
+            WHERE object_name = 'connectivity_ring_buffer_recorded'
+            AND timestamp_utc > DATEADD(hour, -{hours_back}, GETUTCDATE())
+        )
+        SELECT TOP 100
+            timestamp_utc,
+            event_xml.value('(/Record/ConnectivityTraceRecord/RecordType)[1]', 'varchar(50)') as record_type,
+            event_xml.value('(/Record/ConnectivityTraceRecord/RecordSource)[1]', 'varchar(50)') as record_source,
+            event_xml.value('(/Record/ConnectivityTraceRecord/Spid)[1]', 'int') as spid,
+            event_xml.value('(/Record/ConnectivityTraceRecord/SniConsumerError)[1]', 'int') as sni_consumer_error,
+            event_xml.value('(/Record/ConnectivityTraceRecord/State)[1]', 'int') as state,
+            event_xml.value('(/Record/ConnectivityTraceRecord/RemoteHost)[1]', 'varchar(100)') as remote_host,
+            event_xml.value('(/Record/ConnectivityTraceRecord/RemotePort)[1]', 'varchar(10)') as remote_port
+        FROM ConnectivityEvents
+        WHERE event_xml.value('(/Record/ConnectivityTraceRecord/RecordType)[1]', 'varchar(50)') LIKE '%Error%'
+        ORDER BY timestamp_utc DESC;
+        """
+        try:
+            return self.sql_api_client.execute_query(server_name, database_name, query)
+        except Exception as e:
+            logging.warning(
+                f"Failed to get failed connections (extended events may not be available): {str(e)}"
+            )
+            # Fallback to a simpler approach using error log if available
+            return []
+    def get_connection_pool_stats(self, server_name: str, database_name: str) -> Dict:
+        """Get connection pool related statistics."""
+        query = """
+        SELECT
+            'Database Connections' as metric_name,
+            COUNT(*) as current_value,
+            'connections' as unit
+        FROM sys.dm_exec_sessions
+        WHERE is_user_process = 1
+        UNION ALL
+        SELECT
+            'Active Requests' as metric_name,
+            COUNT(*) as current_value,
+            'requests' as unit
+        FROM sys.dm_exec_requests
+        WHERE session_id > 50
+        UNION ALL
+        SELECT
+            'Waiting Tasks' as metric_name,
+            COUNT(*) as current_value,
+            'tasks' as unit
+        FROM sys.dm_os_waiting_tasks
+        WHERE session_id > 50;
+        """
+        try:
+            results = self.sql_api_client.execute_query(
+                server_name, database_name, query
+            )
+            return {
+                row["metric_name"]: {"value": row["current_value"], "unit": row["unit"]}
+                for row in results
+            }
+        except Exception as e:
+            formatted_error = self._format_sql_error(e)
+            logging.error(f"Failed to get connection pool stats: {formatted_error}")
+            return {"error": formatted_error}

holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py ADDED Viewed

@@ -0,0 +1,317 @@
+from typing import Dict, List
+import logging
+from datetime import datetime, timedelta
+from azure.core.credentials import TokenCredential
+from azure.monitor.query import MetricsQueryClient
+from .azure_sql_api import AzureSQLAPIClient
+class StorageAnalysisAPI:
+    def __init__(
+        self,
+        credential: TokenCredential,
+        subscription_id: str,
+    ):
+        self.sql_api_client = AzureSQLAPIClient(credential, subscription_id)
+        self.metrics_client = MetricsQueryClient(credential)
+        self.subscription_id = subscription_id
+    def _format_sql_error(self, error: Exception) -> str:
+        """Format SQL errors with helpful permission guidance."""
+        error_str = str(error)
+        # Detect common permission issues
+        if (
+            "Login failed for user" in error_str
+            and "token-identified principal" in error_str
+        ):
+            return (
+                f"Azure AD authentication failed - the service principal lacks database permissions. "
+                f"Please ensure the service principal is added as a database user with appropriate permissions. "
+                f"Original error: {error_str}"
+            )
+        elif "permission was denied" in error_str.lower():
+            return (
+                f"Insufficient database permissions - check user access rights. "
+                f"Original error: {error_str}"
+            )
+        elif "login failed" in error_str.lower():
+            return (
+                f"Database login failed - check authentication credentials and database access permissions. "
+                f"Original error: {error_str}"
+            )
+        else:
+            return error_str
+    def get_storage_metrics(
+        self,
+        resource_group: str,
+        server_name: str,
+        database_name: str,
+        hours_back: int = 24,
+    ) -> Dict:
+        """Get storage-related metrics from Azure Monitor."""
+        resource_id = (
+            f"subscriptions/{self.subscription_id}/"
+            f"resourceGroups/{resource_group}/"
+            f"providers/Microsoft.Sql/servers/{server_name}/"
+            f"databases/{database_name}"
+        )
+        end_time = datetime.now()
+        start_time = end_time - timedelta(hours=hours_back)
+        try:
+            metrics_data = self.metrics_client.query_resource(
+                resource_uri=resource_id,
+                metric_names=[
+                    "storage_percent",
+                    "storage",
+                    "allocated_data_storage",
+                    "log_write_percent",
+                    "tempdb_data_size",
+                    "tempdb_log_size",
+                    "tempdb_log_used_percent",
+                ],
+                timespan=(start_time, end_time),
+                granularity=timedelta(minutes=15),
+                aggregations=["Maximum", "Average", "Minimum"],
+            )
+            result = {}
+            for metric in metrics_data.metrics:
+                metric_data = []
+                for timeseries in metric.timeseries:
+                    for data_point in timeseries.data:
+                        metric_data.append(
+                            {
+                                "timestamp": data_point.timestamp.isoformat(),
+                                "maximum": data_point.maximum,
+                                "average": data_point.average,
+                                "minimum": data_point.minimum,
+                            }
+                        )
+                result[metric.name] = metric_data
+            return result
+        except Exception as e:
+            logging.error(f"Failed to get storage metrics: {str(e)}")
+            return {"error": str(e)}
+    def get_database_size_details(
+        self, server_name: str, database_name: str
+    ) -> List[Dict]:
+        """Get detailed database size information using DMV."""
+        query = """
+        SELECT
+            DB_NAME() as database_name,
+            CASE
+                WHEN type_desc = 'ROWS' THEN 'Data'
+                WHEN type_desc = 'LOG' THEN 'Log'
+                ELSE type_desc
+            END as file_type,
+            name as logical_name,
+            physical_name,
+            CAST(size * 8.0 / 1024 AS DECIMAL(10,2)) as size_mb,
+            CAST(FILEPROPERTY(name, 'SpaceUsed') * 8.0 / 1024 AS DECIMAL(10,2)) as used_mb,
+            CAST((size - FILEPROPERTY(name, 'SpaceUsed')) * 8.0 / 1024 AS DECIMAL(10,2)) as free_mb,
+            CAST(FILEPROPERTY(name, 'SpaceUsed') * 100.0 / size AS DECIMAL(5,2)) as used_percent,
+            CASE
+                WHEN max_size = -1 THEN 'Unlimited'
+                WHEN max_size = 268435456 THEN 'Default (2TB)'
+                ELSE CAST(max_size * 8.0 / 1024 AS VARCHAR(20)) + ' MB'
+            END as max_size,
+            is_percent_growth,
+            CASE
+                WHEN is_percent_growth = 1 THEN CAST(growth AS VARCHAR(10)) + '%'
+                ELSE CAST(growth * 8.0 / 1024 AS VARCHAR(20)) + ' MB'
+            END as growth_setting,
+            state_desc as file_state
+        FROM sys.database_files
+        ORDER BY type_desc, file_id;
+        """
+        try:
+            return self.sql_api_client.execute_query(server_name, database_name, query)
+        except Exception as e:
+            logging.error(f"Failed to get database size details: {str(e)}")
+            return [{"error": str(e)}]
+    def get_storage_summary(self, server_name: str, database_name: str) -> Dict:
+        """Get storage summary statistics."""
+        query = """
+        SELECT
+            DB_NAME() as database_name,
+            CAST(SUM(CASE WHEN type_desc = 'ROWS' THEN size END) * 8.0 / 1024 AS DECIMAL(10,2)) as total_data_size_mb,
+            CAST(SUM(CASE WHEN type_desc = 'ROWS' THEN FILEPROPERTY(name, 'SpaceUsed') END) * 8.0 / 1024 AS DECIMAL(10,2)) as used_data_size_mb,
+            CAST(SUM(CASE WHEN type_desc = 'LOG' THEN size END) * 8.0 / 1024 AS DECIMAL(10,2)) as total_log_size_mb,
+            CAST(SUM(CASE WHEN type_desc = 'LOG' THEN FILEPROPERTY(name, 'SpaceUsed') END) * 8.0 / 1024 AS DECIMAL(10,2)) as used_log_size_mb,
+            CAST((SUM(CASE WHEN type_desc = 'ROWS' THEN size END) +
+                  SUM(CASE WHEN type_desc = 'LOG' THEN size END)) * 8.0 / 1024 AS DECIMAL(10,2)) as total_database_size_mb,
+            CAST((SUM(CASE WHEN type_desc = 'ROWS' THEN FILEPROPERTY(name, 'SpaceUsed') END) +
+                  SUM(CASE WHEN type_desc = 'LOG' THEN FILEPROPERTY(name, 'SpaceUsed') END)) * 8.0 / 1024 AS DECIMAL(10,2)) as total_used_size_mb,
+            COUNT(CASE WHEN type_desc = 'ROWS' THEN 1 END) as data_files_count,
+            COUNT(CASE WHEN type_desc = 'LOG' THEN 1 END) as log_files_count
+        FROM sys.database_files;
+        """
+        try:
+            result = self.sql_api_client.execute_query(
+                server_name, database_name, query
+            )
+            return result[0] if result else {}
+        except Exception as e:
+            logging.error(f"Failed to get storage summary: {str(e)}")
+            return {"error": str(e)}
+    def get_table_space_usage(
+        self, server_name: str, database_name: str, top_count: int = 20
+    ) -> List[Dict]:
+        """Get space usage by table/index."""
+        query = f"""
+        SELECT TOP {top_count}
+            SCHEMA_NAME(t.schema_id) as schema_name,
+            t.name as table_name,
+            i.name as index_name,
+            i.type_desc as index_type,
+            p.rows as row_count,
+            a.total_pages,
+            a.used_pages,
+            a.data_pages,
+            CAST(a.total_pages * 8.0 / 1024 AS DECIMAL(10,2)) as total_space_mb,
+            CAST(a.used_pages * 8.0 / 1024 AS DECIMAL(10,2)) as used_space_mb,
+            CAST(a.data_pages * 8.0 / 1024 AS DECIMAL(10,2)) as data_space_mb,
+            CAST((a.total_pages - a.used_pages) * 8.0 / 1024 AS DECIMAL(10,2)) as unused_space_mb,
+            CAST((a.used_pages - a.data_pages) * 8.0 / 1024 AS DECIMAL(10,2)) as index_space_mb
+        FROM sys.tables t
+        INNER JOIN sys.indexes i ON t.object_id = i.object_id
+        INNER JOIN sys.partitions p ON i.object_id = p.object_id AND i.index_id = p.index_id
+        INNER JOIN (
+            SELECT
+                object_id,
+                index_id,
+                SUM(total_pages) as total_pages,
+                SUM(used_pages) as used_pages,
+                SUM(data_pages) as data_pages
+            FROM sys.allocation_units au
+            INNER JOIN sys.partitions p ON
+                (au.type IN (1,3) AND au.container_id = p.hobt_id) OR
+                (au.type = 2 AND au.container_id = p.partition_id)
+            GROUP BY object_id, index_id
+        ) a ON i.object_id = a.object_id AND i.index_id = a.index_id
+        WHERE t.is_ms_shipped = 0
+        ORDER BY a.total_pages DESC;
+        """
+        try:
+            return self.sql_api_client.execute_query(server_name, database_name, query)
+        except Exception as e:
+            logging.error(f"Failed to get table space usage: {str(e)}")
+            return []
+    def get_storage_growth_trend(self, server_name: str, database_name: str) -> Dict:
+        """Get storage growth trends from backup history."""
+        query = """
+        WITH BackupSizes AS (
+            SELECT
+                backup_start_date,
+                database_name,
+                backup_size,
+                compressed_backup_size,
+                type as backup_type,
+                ROW_NUMBER() OVER (PARTITION BY CONVERT(date, backup_start_date) ORDER BY backup_start_date DESC) as rn
+            FROM msdb.dbo.backupset
+            WHERE database_name = DB_NAME()
+                AND type = 'D'  -- Full backups only
+                AND backup_start_date >= DATEADD(day, -30, GETDATE())
+        )
+        SELECT
+            CONVERT(date, backup_start_date) as backup_date,
+            database_name,
+            CAST(backup_size / 1024.0 / 1024.0 AS DECIMAL(10,2)) as backup_size_mb,
+            CAST(compressed_backup_size / 1024.0 / 1024.0 AS DECIMAL(10,2)) as compressed_backup_size_mb,
+            CAST((backup_size - compressed_backup_size) * 100.0 / backup_size AS DECIMAL(5,2)) as compression_ratio_percent
+        FROM BackupSizes
+        WHERE rn = 1  -- One backup per day
+        ORDER BY backup_date DESC;
+        """
+        try:
+            results = self.sql_api_client.execute_query(
+                server_name, database_name, query
+            )
+            # Calculate growth trend if we have multiple data points
+            if len(results) >= 2:
+                oldest = results[-1]
+                newest = results[0]
+                if oldest["backup_size_mb"] and newest["backup_size_mb"]:
+                    growth_mb = newest["backup_size_mb"] - oldest["backup_size_mb"]
+                    growth_percent = (growth_mb / oldest["backup_size_mb"]) * 100
+                    days_diff = (
+                        datetime.strptime(str(newest["backup_date"]), "%Y-%m-%d")
+                        - datetime.strptime(str(oldest["backup_date"]), "%Y-%m-%d")
+                    ).days
+                    return {
+                        "backup_history": results,
+                        "growth_analysis": {
+                            "total_growth_mb": round(growth_mb, 2),
+                            "growth_percent": round(growth_percent, 2),
+                            "days_analyzed": days_diff,
+                            "avg_daily_growth_mb": round(growth_mb / days_diff, 2)
+                            if days_diff > 0
+                            else 0,
+                        },
+                    }
+            return {"backup_history": results, "growth_analysis": None}
+        except Exception as e:
+            logging.warning(
+                f"Failed to get storage growth trend (backup history may not be available): {str(e)}"
+            )
+            return {"error": str(e)}
+    def get_tempdb_usage(self, server_name: str, database_name: str) -> Dict:
+        """Get tempdb usage information."""
+        query = """
+        SELECT
+            'TempDB Usage' as metric_type,
+            CAST(SUM(size) * 8.0 / 1024 AS DECIMAL(10,2)) as total_size_mb,
+            CAST(SUM(FILEPROPERTY(name, 'SpaceUsed')) * 8.0 / 1024 AS DECIMAL(10,2)) as used_size_mb,
+            CAST((SUM(size) - SUM(FILEPROPERTY(name, 'SpaceUsed'))) * 8.0 / 1024 AS DECIMAL(10,2)) as free_size_mb,
+            CAST(SUM(FILEPROPERTY(name, 'SpaceUsed')) * 100.0 / SUM(size) AS DECIMAL(5,2)) as used_percent
+        FROM tempdb.sys.database_files
+        WHERE type_desc = 'ROWS'
+        UNION ALL
+        SELECT
+            'TempDB Log' as metric_type,
+            CAST(SUM(size) * 8.0 / 1024 AS DECIMAL(10,2)) as total_size_mb,
+            CAST(SUM(FILEPROPERTY(name, 'SpaceUsed')) * 8.0 / 1024 AS DECIMAL(10,2)) as used_size_mb,
+            CAST((SUM(size) - SUM(FILEPROPERTY(name, 'SpaceUsed'))) * 8.0 / 1024 AS DECIMAL(10,2)) as free_size_mb,
+            CAST(SUM(FILEPROPERTY(name, 'SpaceUsed')) * 100.0 / SUM(size) AS DECIMAL(5,2)) as used_percent
+        FROM tempdb.sys.database_files
+        WHERE type_desc = 'LOG';
+        """
+        try:
+            results = self.sql_api_client.execute_query(
+                server_name, database_name, query
+            )
+            return {
+                row["metric_type"]: {
+                    "total_size_mb": row["total_size_mb"],
+                    "used_size_mb": row["used_size_mb"],
+                    "free_size_mb": row["free_size_mb"],
+                    "used_percent": row["used_percent"],
+                }
+                for row in results
+            }
+        except Exception as e:
+            logging.warning(
+                f"Failed to get tempdb usage (may not have permissions): {str(e)}"
+            )
+            return {"error": str(e)}

holmes/plugins/toolsets/azure_sql/azure_base_toolset.py ADDED Viewed

@@ -0,0 +1,55 @@
+from typing import Optional, Tuple
+from pydantic import BaseModel, ConfigDict
+from holmes.core.tools import Tool, Toolset
+from holmes.plugins.toolsets.azure_sql.apis.azure_sql_api import AzureSQLAPIClient
+class AzureSQLDatabaseConfig(BaseModel):
+    subscription_id: str
+    resource_group: str
+    server_name: str
+    database_name: str
+class AzureSQLConfig(BaseModel):
+    database: AzureSQLDatabaseConfig
+    tenant_id: Optional[str]
+    client_id: Optional[str]
+    client_secret: Optional[str]
+class BaseAzureSQLToolset(Toolset):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    _api_client: Optional[AzureSQLAPIClient] = None
+    _database_config: Optional[AzureSQLDatabaseConfig] = None
+    def api_client(self):
+        if not self._api_client:
+            raise Exception(
+                "Toolset is missing api_client. This is likely a code issue and not a configuration issue"
+            )
+        else:
+            return self._api_client
+    def database_config(self):
+        if not self._database_config:
+            raise Exception(
+                "Toolset is missing database_config. This is likely a code issue and not a configuration issue"
+            )
+        else:
+            return self._database_config
+class BaseAzureSQLTool(Tool):
+    toolset: BaseAzureSQLToolset
+    @staticmethod
+    def validate_config(
+        api_client: AzureSQLAPIClient, database_config: AzureSQLDatabaseConfig
+    ) -> Tuple[bool, str]:
+        # Each tool is able to validate whether it can work and generate output with this config.
+        # The tool should report an error if a permission is missing. e.g. return False, "The client '597a70b9-9f01-4739-ac3e-ac8a934e9ffc' with object id '597a70b9-9f01-4739-ac3e-ac8a934e9ffc' does not have authorization to perform action 'Microsoft.Insights/metricAlerts/read' over scope '/subscriptions/e7a7e3c5-ff48-4ccb-898b-83aa5d2f9097/resourceGroups/arik-aks-dev_group/providers/Microsoft.Insights' or the scope is invalid."
+        # The tool should return multiple errors in the return message if there are multiple issues that prevent it from fully working
+        return True, ""