holmesgpt 0.11.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (183) hide show
  1. holmes/.git_archival.json +7 -0
  2. holmes/__init__.py +76 -0
  3. holmes/__init__.py.bak +76 -0
  4. holmes/clients/robusta_client.py +24 -0
  5. holmes/common/env_vars.py +47 -0
  6. holmes/config.py +526 -0
  7. holmes/core/__init__.py +0 -0
  8. holmes/core/conversations.py +578 -0
  9. holmes/core/investigation.py +152 -0
  10. holmes/core/investigation_structured_output.py +264 -0
  11. holmes/core/issue.py +54 -0
  12. holmes/core/llm.py +250 -0
  13. holmes/core/models.py +157 -0
  14. holmes/core/openai_formatting.py +51 -0
  15. holmes/core/performance_timing.py +72 -0
  16. holmes/core/prompt.py +42 -0
  17. holmes/core/resource_instruction.py +17 -0
  18. holmes/core/runbooks.py +26 -0
  19. holmes/core/safeguards.py +120 -0
  20. holmes/core/supabase_dal.py +540 -0
  21. holmes/core/tool_calling_llm.py +798 -0
  22. holmes/core/tools.py +566 -0
  23. holmes/core/tools_utils/__init__.py +0 -0
  24. holmes/core/tools_utils/tool_executor.py +65 -0
  25. holmes/core/tools_utils/toolset_utils.py +52 -0
  26. holmes/core/toolset_manager.py +418 -0
  27. holmes/interactive.py +229 -0
  28. holmes/main.py +1041 -0
  29. holmes/plugins/__init__.py +0 -0
  30. holmes/plugins/destinations/__init__.py +6 -0
  31. holmes/plugins/destinations/slack/__init__.py +2 -0
  32. holmes/plugins/destinations/slack/plugin.py +163 -0
  33. holmes/plugins/interfaces.py +32 -0
  34. holmes/plugins/prompts/__init__.py +48 -0
  35. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  36. holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
  37. holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
  38. holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
  39. holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
  41. holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
  42. holmes/plugins/prompts/generic_ask.jinja2 +36 -0
  43. holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
  44. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
  45. holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
  46. holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
  47. holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
  48. holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
  49. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
  50. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
  51. holmes/plugins/runbooks/README.md +22 -0
  52. holmes/plugins/runbooks/__init__.py +100 -0
  53. holmes/plugins/runbooks/catalog.json +14 -0
  54. holmes/plugins/runbooks/jira.yaml +12 -0
  55. holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
  56. holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
  57. holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
  58. holmes/plugins/sources/github/__init__.py +77 -0
  59. holmes/plugins/sources/jira/__init__.py +123 -0
  60. holmes/plugins/sources/opsgenie/__init__.py +93 -0
  61. holmes/plugins/sources/pagerduty/__init__.py +147 -0
  62. holmes/plugins/sources/prometheus/__init__.py +0 -0
  63. holmes/plugins/sources/prometheus/models.py +104 -0
  64. holmes/plugins/sources/prometheus/plugin.py +154 -0
  65. holmes/plugins/toolsets/__init__.py +171 -0
  66. holmes/plugins/toolsets/aks-node-health.yaml +65 -0
  67. holmes/plugins/toolsets/aks.yaml +86 -0
  68. holmes/plugins/toolsets/argocd.yaml +70 -0
  69. holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
  70. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
  71. holmes/plugins/toolsets/aws.yaml +76 -0
  72. holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
  73. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
  74. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
  75. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
  76. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
  77. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
  78. holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
  79. holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
  80. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
  81. holmes/plugins/toolsets/azure_sql/install.md +66 -0
  82. holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
  83. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
  84. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
  85. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
  86. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
  87. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
  88. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
  89. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
  90. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
  91. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
  92. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
  93. holmes/plugins/toolsets/azure_sql/utils.py +83 -0
  94. holmes/plugins/toolsets/bash/__init__.py +0 -0
  95. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
  96. holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
  97. holmes/plugins/toolsets/bash/common/bash.py +52 -0
  98. holmes/plugins/toolsets/bash/common/config.py +14 -0
  99. holmes/plugins/toolsets/bash/common/stringify.py +25 -0
  100. holmes/plugins/toolsets/bash/common/validators.py +24 -0
  101. holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
  102. holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
  103. holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
  104. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
  105. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
  106. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
  107. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
  108. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
  109. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
  110. holmes/plugins/toolsets/bash/parse_command.py +103 -0
  111. holmes/plugins/toolsets/confluence.yaml +19 -0
  112. holmes/plugins/toolsets/consts.py +5 -0
  113. holmes/plugins/toolsets/coralogix/api.py +158 -0
  114. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
  115. holmes/plugins/toolsets/coralogix/utils.py +181 -0
  116. holmes/plugins/toolsets/datadog.py +153 -0
  117. holmes/plugins/toolsets/docker.yaml +46 -0
  118. holmes/plugins/toolsets/git.py +756 -0
  119. holmes/plugins/toolsets/grafana/__init__.py +0 -0
  120. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
  121. holmes/plugins/toolsets/grafana/common.py +68 -0
  122. holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
  123. holmes/plugins/toolsets/grafana/loki_api.py +89 -0
  124. holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
  125. holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
  126. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
  127. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
  128. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
  129. holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
  130. holmes/plugins/toolsets/helm.yaml +42 -0
  131. holmes/plugins/toolsets/internet/internet.py +275 -0
  132. holmes/plugins/toolsets/internet/notion.py +137 -0
  133. holmes/plugins/toolsets/kafka.py +638 -0
  134. holmes/plugins/toolsets/kubernetes.yaml +255 -0
  135. holmes/plugins/toolsets/kubernetes_logs.py +426 -0
  136. holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
  137. holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
  138. holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
  139. holmes/plugins/toolsets/logging_utils/types.py +0 -0
  140. holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
  141. holmes/plugins/toolsets/newrelic.py +222 -0
  142. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  143. holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
  144. holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
  145. holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
  146. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
  147. holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
  148. holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
  149. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
  150. holmes/plugins/toolsets/rabbitmq/api.py +398 -0
  151. holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
  152. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
  153. holmes/plugins/toolsets/robusta/__init__.py +0 -0
  154. holmes/plugins/toolsets/robusta/robusta.py +235 -0
  155. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
  156. holmes/plugins/toolsets/runbook/__init__.py +0 -0
  157. holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
  158. holmes/plugins/toolsets/service_discovery.py +92 -0
  159. holmes/plugins/toolsets/servicenow/install.md +37 -0
  160. holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
  161. holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
  162. holmes/plugins/toolsets/slab.yaml +20 -0
  163. holmes/plugins/toolsets/utils.py +137 -0
  164. holmes/plugins/utils.py +14 -0
  165. holmes/utils/__init__.py +0 -0
  166. holmes/utils/cache.py +84 -0
  167. holmes/utils/cert_utils.py +40 -0
  168. holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
  169. holmes/utils/definitions.py +13 -0
  170. holmes/utils/env.py +53 -0
  171. holmes/utils/file_utils.py +56 -0
  172. holmes/utils/global_instructions.py +20 -0
  173. holmes/utils/holmes_status.py +22 -0
  174. holmes/utils/holmes_sync_toolsets.py +80 -0
  175. holmes/utils/markdown_utils.py +55 -0
  176. holmes/utils/pydantic_utils.py +54 -0
  177. holmes/utils/robusta.py +10 -0
  178. holmes/utils/tags.py +97 -0
  179. holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
  180. holmesgpt-0.11.5.dist-info/METADATA +400 -0
  181. holmesgpt-0.11.5.dist-info/RECORD +183 -0
  182. holmesgpt-0.11.5.dist-info/WHEEL +4 -0
  183. holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,445 @@
1
+ from typing import Dict, List, Any
2
+ import logging
3
+ from datetime import datetime, timezone, timedelta
4
+ from azure.core.credentials import TokenCredential
5
+ from azure.mgmt.monitor import MonitorManagementClient
6
+
7
+
8
+ class ConnectionFailureAPI:
9
+ """API client for analyzing Azure SQL Database connection failures and patterns."""
10
+
11
+ def __init__(
12
+ self,
13
+ credential: TokenCredential,
14
+ subscription_id: str,
15
+ ):
16
+ self.credential = credential
17
+ self.subscription_id = subscription_id
18
+ self.monitor_client = MonitorManagementClient(credential, subscription_id)
19
+
20
+ def _build_database_resource_id(
21
+ self, resource_group: str, server_name: str, database_name: str
22
+ ) -> str:
23
+ """Build the full Azure resource ID for the SQL database."""
24
+ return (
25
+ f"/subscriptions/{self.subscription_id}/"
26
+ f"resourceGroups/{resource_group}/"
27
+ f"providers/Microsoft.Sql/servers/{server_name}/"
28
+ f"databases/{database_name}"
29
+ )
30
+
31
+ def _build_server_resource_id(self, resource_group: str, server_name: str) -> str:
32
+ """Build the full Azure resource ID for the SQL server."""
33
+ return (
34
+ f"/subscriptions/{self.subscription_id}/"
35
+ f"resourceGroups/{resource_group}/"
36
+ f"providers/Microsoft.Sql/servers/{server_name}"
37
+ )
38
+
39
+ def analyze_connection_failures(
40
+ self,
41
+ resource_group: str,
42
+ server_name: str,
43
+ database_name: str,
44
+ hours_back: int = 24,
45
+ ) -> Dict[str, Any]:
46
+ """Analyze connection failures and patterns for the SQL database."""
47
+ try:
48
+ database_resource_id = self._build_database_resource_id(
49
+ resource_group, server_name, database_name
50
+ )
51
+ server_resource_id = self._build_server_resource_id(
52
+ resource_group, server_name
53
+ )
54
+
55
+ end_time = datetime.now(timezone.utc)
56
+ start_time = end_time - timedelta(hours=hours_back)
57
+
58
+ # Connection-related metrics to analyze (database-level only)
59
+ connection_metrics = [
60
+ "connection_failed",
61
+ "connection_successful",
62
+ "blocked_by_firewall",
63
+ "connection_failed_user_error",
64
+ "sessions_count",
65
+ "sessions_percent",
66
+ "workers_percent",
67
+ ]
68
+
69
+ # Get connection metrics (only from database, not server)
70
+ connection_data = self._get_connection_metrics(
71
+ database_resource_id, connection_metrics, start_time, end_time
72
+ )
73
+
74
+ # Server-level metrics are not available for connection failures
75
+ # Only DTU and storage metrics are available at server level
76
+ server_connection_data = {
77
+ "note": "Connection metrics only available at database level"
78
+ }
79
+
80
+ # Analyze activity logs for connection-related events
81
+ activity_log_data = self._analyze_connection_activity_logs(
82
+ database_resource_id, server_resource_id, start_time, end_time
83
+ )
84
+
85
+ # Combine and analyze all data
86
+ analysis = self._analyze_connection_patterns(
87
+ connection_data, server_connection_data, activity_log_data
88
+ )
89
+
90
+ return {
91
+ "database_resource_id": database_resource_id,
92
+ "server_resource_id": server_resource_id,
93
+ "time_range": {
94
+ "start": start_time.isoformat(),
95
+ "end": end_time.isoformat(),
96
+ "hours": hours_back,
97
+ },
98
+ "connection_metrics": connection_data,
99
+ "server_metrics": server_connection_data,
100
+ "activity_events": activity_log_data,
101
+ "analysis": analysis,
102
+ "retrieved_at": datetime.now(timezone.utc).isoformat(),
103
+ }
104
+
105
+ except Exception as e:
106
+ error_msg = f"Failed to analyze connection failures: {str(e)}"
107
+ logging.error(error_msg, exc_info=True)
108
+ return {"error": error_msg}
109
+
110
+ def _get_connection_metrics(
111
+ self,
112
+ resource_id: str,
113
+ metric_names: List[str],
114
+ start_time: datetime,
115
+ end_time: datetime,
116
+ ) -> Dict[str, Any]:
117
+ """Get connection-related metrics from Azure Monitor."""
118
+ try:
119
+ metrics_data = {}
120
+
121
+ for metric_name in metric_names:
122
+ try:
123
+ # Get metric data with proper ISO 8601 format
124
+ timespan = f"{start_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}/{end_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}"
125
+ metrics = self.monitor_client.metrics.list(
126
+ resource_uri=resource_id,
127
+ timespan=timespan,
128
+ interval="PT1H", # 1-hour intervals
129
+ metricnames=metric_name,
130
+ aggregation="Total,Average,Maximum",
131
+ )
132
+
133
+ metric_values = []
134
+ for metric in metrics.value:
135
+ if metric.timeseries:
136
+ for timeseries in metric.timeseries:
137
+ for data_point in timeseries.data:
138
+ if data_point.time_stamp:
139
+ metric_values.append(
140
+ {
141
+ "timestamp": data_point.time_stamp.isoformat(),
142
+ "total": data_point.total,
143
+ "average": data_point.average,
144
+ "maximum": data_point.maximum,
145
+ }
146
+ )
147
+
148
+ metrics_data[metric_name] = {
149
+ "values": metric_values,
150
+ "total_data_points": len(metric_values),
151
+ }
152
+
153
+ except Exception as e:
154
+ # Only log as warning if it's not a known metric availability issue
155
+ error_msg = str(e)
156
+ if "Failed to find metric configuration" in error_msg:
157
+ logging.info(
158
+ f"Metric {metric_name} not available for this resource type"
159
+ )
160
+ else:
161
+ logging.warning(f"Failed to get metric {metric_name}: {e}")
162
+
163
+ metrics_data[metric_name] = {
164
+ "error": str(e),
165
+ "values": [],
166
+ "total_data_points": 0,
167
+ }
168
+
169
+ return metrics_data
170
+
171
+ except Exception as e:
172
+ logging.error(f"Failed to get connection metrics: {e}")
173
+ return {"error": str(e)}
174
+
175
+ def _get_server_connection_metrics(
176
+ self, server_resource_id: str, start_time: datetime, end_time: datetime
177
+ ) -> Dict[str, Any]:
178
+ """Get server-level connection metrics - Note: Connection metrics not available at server level."""
179
+ # Connection failure metrics are only available at database level
180
+ # Server level only has DTU and storage metrics
181
+ return {
182
+ "note": "Connection failure metrics are only available at database level",
183
+ "available_server_metrics": [
184
+ "dtu_consumption_percent",
185
+ "storage_used",
186
+ "dtu_used",
187
+ ],
188
+ "connection_metrics_location": "database_level_only",
189
+ }
190
+
191
+ def _analyze_connection_activity_logs(
192
+ self,
193
+ database_resource_id: str,
194
+ server_resource_id: str,
195
+ start_time: datetime,
196
+ end_time: datetime,
197
+ ) -> Dict[str, Any]:
198
+ """Analyze activity logs for connection-related events."""
199
+ try:
200
+ # Connection-related operation names to look for
201
+ connection_operations = [
202
+ "Microsoft.Sql/servers/databases/connect",
203
+ "Microsoft.Sql/servers/connect",
204
+ "Microsoft.Sql/servers/databases/disconnect",
205
+ "Microsoft.Sql/servers/firewallRules/write",
206
+ "Microsoft.Sql/servers/connectionPolicies/write",
207
+ ]
208
+
209
+ # Activity logs filter - remove unsupported level filter
210
+ filter_query = (
211
+ f"eventTimestamp ge '{start_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and "
212
+ f"eventTimestamp le '{end_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and "
213
+ f"resourceId eq '{database_resource_id}'"
214
+ )
215
+
216
+ activity_logs = self.monitor_client.activity_logs.list(filter=filter_query)
217
+
218
+ connection_events = []
219
+ for log_entry in activity_logs:
220
+ if hasattr(log_entry, "operation_name") and log_entry.operation_name:
221
+ operation_name = (
222
+ log_entry.operation_name.value
223
+ if hasattr(log_entry.operation_name, "value")
224
+ else str(log_entry.operation_name)
225
+ )
226
+
227
+ # Check if this is a connection-related operation
228
+ is_connection_related = any(
229
+ op in operation_name for op in connection_operations
230
+ ) or any(
231
+ keyword in operation_name.lower()
232
+ for keyword in [
233
+ "connect",
234
+ "firewall",
235
+ "auth",
236
+ "login",
237
+ "security",
238
+ ]
239
+ )
240
+
241
+ # Filter by level after getting the data, since level filter isn't supported in query
242
+ if is_connection_related or (
243
+ hasattr(log_entry, "level")
244
+ and log_entry.level in ["Warning", "Error", "Critical"]
245
+ ):
246
+ event_data = {
247
+ "timestamp": getattr(
248
+ log_entry, "event_timestamp", end_time
249
+ ).isoformat(),
250
+ "operation_name": operation_name,
251
+ "level": getattr(log_entry, "level", "Unknown"),
252
+ "status": getattr(log_entry, "status", {}).get(
253
+ "value", "Unknown"
254
+ )
255
+ if hasattr(getattr(log_entry, "status", {}), "get")
256
+ else "Unknown",
257
+ "caller": getattr(log_entry, "caller", "Unknown"),
258
+ "description": getattr(
259
+ log_entry, "description", "No description"
260
+ ),
261
+ "resource_id": getattr(log_entry, "resource_id", ""),
262
+ "correlation_id": getattr(log_entry, "correlation_id", ""),
263
+ "is_connection_related": is_connection_related,
264
+ }
265
+ connection_events.append(event_data)
266
+
267
+ # Sort by timestamp, most recent first
268
+ connection_events.sort(key=lambda x: x["timestamp"], reverse=True)
269
+
270
+ return {
271
+ "events": connection_events,
272
+ "total_events": len(connection_events),
273
+ "connection_related_events": len(
274
+ [e for e in connection_events if e["is_connection_related"]]
275
+ ),
276
+ "error_events": len(
277
+ [
278
+ e
279
+ for e in connection_events
280
+ if e["level"] in ["Error", "Critical"]
281
+ ]
282
+ ),
283
+ "warning_events": len(
284
+ [e for e in connection_events if e["level"] == "Warning"]
285
+ ),
286
+ }
287
+
288
+ except Exception as e:
289
+ logging.error(f"Failed to analyze connection activity logs: {e}")
290
+ return {"error": str(e), "events": [], "total_events": 0}
291
+
292
+ def _analyze_connection_patterns(
293
+ self,
294
+ connection_data: Dict[str, Any],
295
+ server_data: Dict[str, Any],
296
+ activity_data: Dict[str, Any],
297
+ ) -> Dict[str, Any]:
298
+ """Analyze connection patterns and identify issues."""
299
+ analysis: dict = {
300
+ "summary": {},
301
+ "issues_detected": [],
302
+ "recommendations": [],
303
+ "metrics_analysis": {},
304
+ }
305
+
306
+ try:
307
+ # Analyze connection failure metrics
308
+ if "connection_failed" in connection_data and connection_data[
309
+ "connection_failed"
310
+ ].get("values"):
311
+ failed_connections = connection_data["connection_failed"]["values"]
312
+ total_failures = sum(
313
+ dp.get("total", 0) or 0 for dp in failed_connections
314
+ )
315
+ max_failures_per_hour = max(
316
+ (dp.get("maximum", 0) or 0 for dp in failed_connections), default=0
317
+ )
318
+
319
+ analysis["metrics_analysis"]["connection_failures"] = {
320
+ "total_failed_connections": total_failures,
321
+ "max_failures_per_hour": max_failures_per_hour,
322
+ "failure_trend": "increasing"
323
+ if len(failed_connections) > 1
324
+ and (failed_connections[-1].get("total", 0) or 0)
325
+ > (failed_connections[0].get("total", 0) or 0)
326
+ else "stable",
327
+ }
328
+
329
+ if total_failures > 0:
330
+ analysis["issues_detected"].append(
331
+ f"🔴 {int(total_failures)} connection failures detected"
332
+ )
333
+ if max_failures_per_hour > 10:
334
+ analysis["issues_detected"].append(
335
+ f"⚠️ High failure rate: {int(max_failures_per_hour)} failures in single hour"
336
+ )
337
+
338
+ # Analyze firewall blocks
339
+ if "blocked_by_firewall" in connection_data and connection_data[
340
+ "blocked_by_firewall"
341
+ ].get("values"):
342
+ firewall_blocks = connection_data["blocked_by_firewall"]["values"]
343
+ total_blocks = sum(dp.get("total", 0) or 0 for dp in firewall_blocks)
344
+
345
+ if total_blocks > 0:
346
+ analysis["issues_detected"].append(
347
+ f"🚫 {int(total_blocks)} connections blocked by firewall"
348
+ )
349
+ analysis["recommendations"].append(
350
+ "Review firewall rules - clients may be connecting from unauthorized IP addresses"
351
+ )
352
+
353
+ # Analyze successful connections for context
354
+ if "connection_successful" in connection_data and connection_data[
355
+ "connection_successful"
356
+ ].get("values"):
357
+ successful_connections = connection_data["connection_successful"][
358
+ "values"
359
+ ]
360
+ total_successful = sum(
361
+ dp.get("total", 0) or 0 for dp in successful_connections
362
+ )
363
+
364
+ analysis["metrics_analysis"]["successful_connections"] = {
365
+ "total_successful_connections": total_successful
366
+ }
367
+
368
+ # Calculate failure rate if we have both metrics
369
+ if "connection_failures" in analysis["metrics_analysis"]:
370
+ total_failures = analysis["metrics_analysis"][
371
+ "connection_failures"
372
+ ]["total_failed_connections"]
373
+ if total_successful + total_failures > 0:
374
+ failure_rate = (
375
+ total_failures / (total_successful + total_failures)
376
+ ) * 100
377
+ analysis["metrics_analysis"]["failure_rate_percent"] = round(
378
+ failure_rate, 2
379
+ )
380
+
381
+ if failure_rate > 5:
382
+ analysis["issues_detected"].append(
383
+ f"📊 High connection failure rate: {failure_rate:.1f}%"
384
+ )
385
+
386
+ # Analyze activity log events
387
+ if "events" in activity_data and activity_data["events"]:
388
+ error_events = [
389
+ e
390
+ for e in activity_data["events"]
391
+ if e["level"] in ["Error", "Critical"]
392
+ ]
393
+ if error_events:
394
+ analysis["issues_detected"].append(
395
+ f"📋 {len(error_events)} error-level events in activity logs"
396
+ )
397
+
398
+ # Look for specific patterns
399
+ auth_events = [
400
+ e
401
+ for e in activity_data["events"]
402
+ if "auth" in e["operation_name"].lower()
403
+ or "login" in e["operation_name"].lower()
404
+ ]
405
+ if auth_events:
406
+ analysis["issues_detected"].append(
407
+ f"🔐 {len(auth_events)} authentication-related events detected"
408
+ )
409
+
410
+ # Generate recommendations based on findings
411
+ if not analysis["issues_detected"]:
412
+ analysis["summary"]["status"] = "healthy"
413
+ analysis["summary"]["message"] = (
414
+ "✅ No significant connection issues detected"
415
+ )
416
+ else:
417
+ analysis["summary"]["status"] = "issues_detected"
418
+ analysis["summary"]["message"] = (
419
+ f"⚠️ {len(analysis['issues_detected'])} connection issues detected"
420
+ )
421
+
422
+ # Add general recommendations
423
+ if any(
424
+ "failure" in issue.lower() for issue in analysis["issues_detected"]
425
+ ):
426
+ analysis["recommendations"].extend(
427
+ [
428
+ "Monitor application connection pooling configuration",
429
+ "Check for network connectivity issues between client and server",
430
+ "Review connection timeout settings in application",
431
+ ]
432
+ )
433
+
434
+ if any(
435
+ "firewall" in issue.lower() for issue in analysis["issues_detected"]
436
+ ):
437
+ analysis["recommendations"].append(
438
+ "Validate client IP addresses against firewall rules"
439
+ )
440
+
441
+ except Exception as e:
442
+ logging.error(f"Failed to analyze connection patterns: {e}")
443
+ analysis["error"] = str(e)
444
+
445
+ return analysis