rootly-mcp-server 2.0.15__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rootly_mcp_server/__init__.py +9 -5
- rootly_mcp_server/__main__.py +44 -29
- rootly_mcp_server/client.py +98 -44
- rootly_mcp_server/data/__init__.py +1 -1
- rootly_mcp_server/exceptions.py +148 -0
- rootly_mcp_server/monitoring.py +378 -0
- rootly_mcp_server/pagination.py +98 -0
- rootly_mcp_server/security.py +404 -0
- rootly_mcp_server/server.py +1002 -467
- rootly_mcp_server/smart_utils.py +294 -209
- rootly_mcp_server/utils.py +48 -33
- rootly_mcp_server/validators.py +147 -0
- {rootly_mcp_server-2.0.15.dist-info → rootly_mcp_server-2.1.0.dist-info}/METADATA +66 -13
- rootly_mcp_server-2.1.0.dist-info/RECORD +18 -0
- {rootly_mcp_server-2.0.15.dist-info → rootly_mcp_server-2.1.0.dist-info}/WHEEL +1 -1
- rootly_mcp_server-2.0.15.dist-info/RECORD +0 -13
- {rootly_mcp_server-2.0.15.dist-info → rootly_mcp_server-2.1.0.dist-info}/entry_points.txt +0 -0
- {rootly_mcp_server-2.0.15.dist-info → rootly_mcp_server-2.1.0.dist-info}/licenses/LICENSE +0 -0
rootly_mcp_server/smart_utils.py
CHANGED
|
@@ -5,18 +5,18 @@ This module provides text similarity, pattern matching, and intelligent analysis
|
|
|
5
5
|
functions for implementing smart incident management features.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
# Check ML library availability
|
|
9
|
+
import importlib.util
|
|
9
10
|
import logging
|
|
10
|
-
|
|
11
|
+
import re
|
|
11
12
|
from dataclasses import dataclass
|
|
12
13
|
from datetime import datetime
|
|
14
|
+
from typing import Any
|
|
13
15
|
|
|
14
|
-
# Check ML library availability
|
|
15
|
-
import importlib.util
|
|
16
16
|
try:
|
|
17
17
|
ML_AVAILABLE = (
|
|
18
|
-
importlib.util.find_spec("sklearn.feature_extraction.text") is not None
|
|
19
|
-
importlib.util.find_spec("sklearn.metrics.pairwise") is not None
|
|
18
|
+
importlib.util.find_spec("sklearn.feature_extraction.text") is not None
|
|
19
|
+
and importlib.util.find_spec("sklearn.metrics.pairwise") is not None
|
|
20
20
|
)
|
|
21
21
|
except (ImportError, ModuleNotFoundError):
|
|
22
22
|
ML_AVAILABLE = False
|
|
@@ -27,255 +27,328 @@ logger = logging.getLogger(__name__)
|
|
|
27
27
|
@dataclass
|
|
28
28
|
class IncidentSimilarity:
|
|
29
29
|
"""Represents similarity between two incidents."""
|
|
30
|
+
|
|
30
31
|
incident_id: str
|
|
31
32
|
title: str
|
|
32
33
|
similarity_score: float
|
|
33
|
-
matched_services:
|
|
34
|
-
matched_keywords:
|
|
34
|
+
matched_services: list[str]
|
|
35
|
+
matched_keywords: list[str]
|
|
35
36
|
resolution_summary: str = ""
|
|
36
|
-
resolution_time_hours:
|
|
37
|
+
resolution_time_hours: float | None = None
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
class TextSimilarityAnalyzer:
|
|
40
41
|
"""Analyzes text similarity between incidents using TF-IDF and cosine similarity."""
|
|
41
|
-
|
|
42
|
+
|
|
42
43
|
def __init__(self):
|
|
43
44
|
if not ML_AVAILABLE:
|
|
44
|
-
logger.warning(
|
|
45
|
+
logger.warning(
|
|
46
|
+
"scikit-learn not available. Text similarity will use basic keyword matching."
|
|
47
|
+
)
|
|
45
48
|
self.vectorizer = None
|
|
46
49
|
self.incident_vectors = None
|
|
47
50
|
self.incident_metadata = {}
|
|
48
|
-
|
|
49
|
-
def preprocess_text(self, text:
|
|
51
|
+
|
|
52
|
+
def preprocess_text(self, text: str | None) -> str:
|
|
50
53
|
"""Clean and normalize text for analysis."""
|
|
51
54
|
if not text:
|
|
52
55
|
return ""
|
|
53
|
-
|
|
56
|
+
|
|
54
57
|
# Convert to lowercase
|
|
55
58
|
text = text.lower()
|
|
56
|
-
|
|
59
|
+
|
|
57
60
|
# Remove special characters but keep spaces and important symbols
|
|
58
|
-
text = re.sub(r
|
|
59
|
-
|
|
61
|
+
text = re.sub(r"[^\w\s\-\.]", " ", text)
|
|
62
|
+
|
|
60
63
|
# Replace multiple spaces with single space
|
|
61
|
-
text = re.sub(r
|
|
62
|
-
|
|
64
|
+
text = re.sub(r"\s+", " ", text)
|
|
65
|
+
|
|
63
66
|
# Remove common stopwords manually (basic set)
|
|
64
|
-
stopwords = {
|
|
67
|
+
stopwords = {
|
|
68
|
+
"the",
|
|
69
|
+
"a",
|
|
70
|
+
"an",
|
|
71
|
+
"and",
|
|
72
|
+
"or",
|
|
73
|
+
"but",
|
|
74
|
+
"in",
|
|
75
|
+
"on",
|
|
76
|
+
"at",
|
|
77
|
+
"to",
|
|
78
|
+
"for",
|
|
79
|
+
"of",
|
|
80
|
+
"with",
|
|
81
|
+
"by",
|
|
82
|
+
"is",
|
|
83
|
+
"are",
|
|
84
|
+
"was",
|
|
85
|
+
"were",
|
|
86
|
+
}
|
|
65
87
|
words = text.split()
|
|
66
|
-
text =
|
|
67
|
-
|
|
88
|
+
text = " ".join([word for word in words if word not in stopwords and len(word) > 1])
|
|
89
|
+
|
|
68
90
|
return text.strip()
|
|
69
|
-
|
|
70
|
-
def extract_services(self, text: str) ->
|
|
91
|
+
|
|
92
|
+
def extract_services(self, text: str) -> list[str]:
|
|
71
93
|
"""Extract service names from incident text."""
|
|
72
94
|
services = []
|
|
73
|
-
|
|
95
|
+
|
|
74
96
|
# Common service patterns
|
|
75
97
|
service_patterns = [
|
|
76
|
-
r
|
|
77
|
-
r
|
|
78
|
-
r
|
|
79
|
-
r
|
|
98
|
+
r"\b(\w+)-(?:service|api|app|server|db)\b", # service-api, auth-service
|
|
99
|
+
r"\b(\w+)(?:service|api|app|server|db)\b", # paymentapi, authservice
|
|
100
|
+
r"\b(\w+)\.(?:service|api|app|com)\b", # auth.service, api.com
|
|
101
|
+
r"\b(\w+)\s+(?:api|service|app|server|db)\b", # payment api, auth service
|
|
80
102
|
]
|
|
81
|
-
|
|
103
|
+
|
|
82
104
|
# Known service names (exact matches)
|
|
83
105
|
known_services = [
|
|
84
|
-
|
|
85
|
-
|
|
106
|
+
"elasticsearch",
|
|
107
|
+
"elastic",
|
|
108
|
+
"kibana",
|
|
109
|
+
"redis",
|
|
110
|
+
"postgres",
|
|
111
|
+
"mysql",
|
|
112
|
+
"mongodb",
|
|
113
|
+
"kafka",
|
|
114
|
+
"rabbitmq",
|
|
115
|
+
"nginx",
|
|
116
|
+
"apache",
|
|
117
|
+
"docker",
|
|
118
|
+
"kubernetes",
|
|
86
119
|
]
|
|
87
|
-
|
|
120
|
+
|
|
88
121
|
text_lower = text.lower()
|
|
89
|
-
|
|
122
|
+
|
|
90
123
|
# Extract pattern-based services
|
|
91
124
|
for pattern in service_patterns:
|
|
92
125
|
matches = re.findall(pattern, text_lower)
|
|
93
126
|
services.extend(matches)
|
|
94
|
-
|
|
127
|
+
|
|
95
128
|
# Extract known services (with word boundaries to avoid false positives)
|
|
96
129
|
for service in known_services:
|
|
97
|
-
if re.search(r
|
|
130
|
+
if re.search(r"\b" + re.escape(service) + r"\b", text_lower):
|
|
98
131
|
services.append(service)
|
|
99
|
-
|
|
132
|
+
|
|
100
133
|
# Remove duplicates while preserving order
|
|
101
134
|
return list(dict.fromkeys(services))
|
|
102
|
-
|
|
103
|
-
def extract_error_patterns(self, text: str) ->
|
|
135
|
+
|
|
136
|
+
def extract_error_patterns(self, text: str) -> list[str]:
|
|
104
137
|
"""Extract common error patterns from incident text."""
|
|
105
138
|
patterns = []
|
|
106
|
-
|
|
139
|
+
|
|
107
140
|
# HTTP status codes
|
|
108
|
-
http_codes = re.findall(r
|
|
141
|
+
http_codes = re.findall(r"\b[45]\d\d\b", text)
|
|
109
142
|
patterns.extend([f"http-{code}" for code in http_codes])
|
|
110
|
-
|
|
143
|
+
|
|
111
144
|
# Database errors
|
|
112
|
-
if re.search(r
|
|
145
|
+
if re.search(r"\b(?:connection|timeout|database|db)\b", text.lower()):
|
|
113
146
|
patterns.append("database-error")
|
|
114
|
-
|
|
115
|
-
# Memory/resource errors
|
|
116
|
-
if re.search(r
|
|
147
|
+
|
|
148
|
+
# Memory/resource errors
|
|
149
|
+
if re.search(r"\b(?:memory|cpu|disk|resource)\b", text.lower()):
|
|
117
150
|
patterns.append("resource-error")
|
|
118
|
-
|
|
151
|
+
|
|
119
152
|
# Network errors
|
|
120
|
-
if re.search(r
|
|
153
|
+
if re.search(r"\b(?:network|dns|connection|unreachable)\b", text.lower()):
|
|
121
154
|
patterns.append("network-error")
|
|
122
|
-
|
|
155
|
+
|
|
123
156
|
return patterns
|
|
124
|
-
|
|
125
|
-
def calculate_similarity(
|
|
157
|
+
|
|
158
|
+
def calculate_similarity(
|
|
159
|
+
self, incidents: list[dict], target_incident: dict
|
|
160
|
+
) -> list[IncidentSimilarity]:
|
|
126
161
|
"""Calculate similarity scores between target incident and historical incidents."""
|
|
127
162
|
if not incidents:
|
|
128
163
|
return []
|
|
129
|
-
|
|
164
|
+
|
|
130
165
|
target_text = self._combine_incident_text(target_incident)
|
|
131
166
|
target_services = self.extract_services(target_text)
|
|
132
167
|
target_errors = self.extract_error_patterns(target_text)
|
|
133
|
-
|
|
168
|
+
|
|
134
169
|
similarities = []
|
|
135
|
-
|
|
170
|
+
|
|
136
171
|
if ML_AVAILABLE and len(incidents) > 1:
|
|
137
|
-
similarities = self._calculate_tfidf_similarity(
|
|
172
|
+
similarities = self._calculate_tfidf_similarity(
|
|
173
|
+
incidents, target_incident, target_text, target_services, target_errors
|
|
174
|
+
)
|
|
138
175
|
else:
|
|
139
|
-
similarities = self._calculate_keyword_similarity(
|
|
140
|
-
|
|
176
|
+
similarities = self._calculate_keyword_similarity(
|
|
177
|
+
incidents, target_incident, target_text, target_services, target_errors
|
|
178
|
+
)
|
|
179
|
+
|
|
141
180
|
# Sort by similarity score descending
|
|
142
181
|
return sorted(similarities, key=lambda x: x.similarity_score, reverse=True)
|
|
143
|
-
|
|
144
|
-
def _combine_incident_text(self, incident:
|
|
182
|
+
|
|
183
|
+
def _combine_incident_text(self, incident: dict) -> str:
|
|
145
184
|
"""Combine incident title, description, and other text fields."""
|
|
146
185
|
text_parts = []
|
|
147
|
-
|
|
186
|
+
|
|
148
187
|
# Get text from incident attributes (preferred)
|
|
149
|
-
attributes = incident.get(
|
|
150
|
-
title = attributes.get(
|
|
151
|
-
summary = attributes.get(
|
|
152
|
-
description = attributes.get(
|
|
153
|
-
|
|
188
|
+
attributes = incident.get("attributes", {})
|
|
189
|
+
title = attributes.get("title", "")
|
|
190
|
+
summary = attributes.get("summary", "")
|
|
191
|
+
description = attributes.get("description", "")
|
|
192
|
+
|
|
154
193
|
# Fallback to root level if attributes are empty
|
|
155
194
|
if not title:
|
|
156
|
-
title = incident.get(
|
|
195
|
+
title = incident.get("title", "")
|
|
157
196
|
if not summary:
|
|
158
|
-
summary = incident.get(
|
|
197
|
+
summary = incident.get("summary", "")
|
|
159
198
|
if not description:
|
|
160
|
-
description = incident.get(
|
|
161
|
-
|
|
199
|
+
description = incident.get("description", "")
|
|
200
|
+
|
|
162
201
|
# Add non-empty parts, avoiding duplication
|
|
163
202
|
for part in [title, summary, description]:
|
|
164
203
|
if part and part not in text_parts:
|
|
165
204
|
text_parts.append(part)
|
|
166
|
-
|
|
167
|
-
combined =
|
|
205
|
+
|
|
206
|
+
combined = " ".join(text_parts)
|
|
168
207
|
return self.preprocess_text(combined)
|
|
169
|
-
|
|
170
|
-
def _calculate_tfidf_similarity(
|
|
171
|
-
|
|
172
|
-
|
|
208
|
+
|
|
209
|
+
def _calculate_tfidf_similarity(
|
|
210
|
+
self,
|
|
211
|
+
incidents: list[dict],
|
|
212
|
+
target_incident: dict,
|
|
213
|
+
target_text: str,
|
|
214
|
+
target_services: list[str],
|
|
215
|
+
target_errors: list[str],
|
|
216
|
+
) -> list[IncidentSimilarity]:
|
|
173
217
|
"""Use TF-IDF and cosine similarity for advanced text matching."""
|
|
174
218
|
if not ML_AVAILABLE:
|
|
175
219
|
return []
|
|
176
|
-
|
|
220
|
+
|
|
177
221
|
# Import here to avoid issues with conditional imports
|
|
178
222
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
179
223
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
180
|
-
|
|
224
|
+
|
|
181
225
|
# Prepare texts
|
|
182
226
|
incident_texts = [self._combine_incident_text(inc) for inc in incidents]
|
|
183
227
|
all_texts = incident_texts + [target_text]
|
|
184
|
-
|
|
228
|
+
|
|
185
229
|
# Vectorize
|
|
186
230
|
vectorizer = TfidfVectorizer(max_features=1000, ngram_range=(1, 2))
|
|
187
231
|
tfidf_matrix = vectorizer.fit_transform(all_texts)
|
|
188
|
-
|
|
232
|
+
|
|
189
233
|
# Calculate similarities
|
|
190
234
|
target_vector = tfidf_matrix[-1]
|
|
191
235
|
similarities = cosine_similarity(target_vector, tfidf_matrix[:-1]).flatten()
|
|
192
|
-
|
|
236
|
+
|
|
193
237
|
results = []
|
|
194
238
|
for i, incident in enumerate(incidents):
|
|
195
239
|
if similarities[i] > 0.1: # Only include reasonable matches
|
|
196
240
|
incident_services = self.extract_services(incident_texts[i])
|
|
197
241
|
incident_errors = self.extract_error_patterns(incident_texts[i])
|
|
198
|
-
|
|
242
|
+
|
|
199
243
|
# Bonus for matching services and error patterns
|
|
200
244
|
service_bonus = len(set(target_services) & set(incident_services)) * 0.1
|
|
201
245
|
error_bonus = len(set(target_errors) & set(incident_errors)) * 0.15
|
|
202
|
-
|
|
246
|
+
|
|
203
247
|
# Exact match bonus for identical preprocessed text
|
|
204
248
|
exact_match_bonus = 0.0
|
|
205
|
-
if
|
|
249
|
+
if (
|
|
250
|
+
target_text
|
|
251
|
+
and incident_texts[i]
|
|
252
|
+
and target_text.strip() == incident_texts[i].strip()
|
|
253
|
+
):
|
|
206
254
|
exact_match_bonus = 0.3 # Strong bonus for exact matches
|
|
207
|
-
|
|
255
|
+
|
|
208
256
|
# Partial matching bonus using fuzzy keyword similarity
|
|
209
|
-
partial_bonus = self._calculate_partial_similarity_bonus(
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
257
|
+
partial_bonus = self._calculate_partial_similarity_bonus(
|
|
258
|
+
target_text, incident_texts[i]
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
final_score = min(
|
|
262
|
+
1.0,
|
|
263
|
+
similarities[i]
|
|
264
|
+
+ service_bonus
|
|
265
|
+
+ error_bonus
|
|
266
|
+
+ exact_match_bonus
|
|
267
|
+
+ partial_bonus,
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
results.append(
|
|
271
|
+
IncidentSimilarity(
|
|
272
|
+
incident_id=str(incident.get("id", "")),
|
|
273
|
+
title=incident.get("attributes", {}).get("title", "Unknown"),
|
|
274
|
+
similarity_score=final_score,
|
|
275
|
+
matched_services=list(set(target_services) & set(incident_services)),
|
|
276
|
+
matched_keywords=self._extract_common_keywords(
|
|
277
|
+
target_text, incident_texts[i]
|
|
278
|
+
),
|
|
279
|
+
resolution_summary=incident.get("attributes", {}).get("summary", ""),
|
|
280
|
+
resolution_time_hours=self._calculate_resolution_time(incident),
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
|
|
223
284
|
return results
|
|
224
|
-
|
|
225
|
-
def _calculate_keyword_similarity(
|
|
226
|
-
|
|
227
|
-
|
|
285
|
+
|
|
286
|
+
def _calculate_keyword_similarity(
|
|
287
|
+
self,
|
|
288
|
+
incidents: list[dict],
|
|
289
|
+
target_incident: dict,
|
|
290
|
+
target_text: str,
|
|
291
|
+
target_services: list[str],
|
|
292
|
+
target_errors: list[str],
|
|
293
|
+
) -> list[IncidentSimilarity]:
|
|
228
294
|
"""Fallback keyword-based similarity when ML libraries not available."""
|
|
229
295
|
target_words = set(target_text.split())
|
|
230
|
-
|
|
296
|
+
|
|
231
297
|
results = []
|
|
232
298
|
for incident in incidents:
|
|
233
299
|
incident_text = self._combine_incident_text(incident)
|
|
234
300
|
incident_words = set(incident_text.split())
|
|
235
301
|
incident_services = self.extract_services(incident_text)
|
|
236
302
|
incident_errors = self.extract_error_patterns(incident_text)
|
|
237
|
-
|
|
303
|
+
|
|
238
304
|
# Calculate Jaccard similarity
|
|
239
305
|
if len(target_words | incident_words) > 0:
|
|
240
|
-
word_similarity = len(target_words & incident_words) / len(
|
|
306
|
+
word_similarity = len(target_words & incident_words) / len(
|
|
307
|
+
target_words | incident_words
|
|
308
|
+
)
|
|
241
309
|
else:
|
|
242
310
|
word_similarity = 0
|
|
243
|
-
|
|
311
|
+
|
|
244
312
|
# Service and error pattern bonuses
|
|
245
313
|
service_bonus = len(set(target_services) & set(incident_services)) * 0.2
|
|
246
314
|
error_bonus = len(set(target_errors) & set(incident_errors)) * 0.25
|
|
247
|
-
|
|
315
|
+
|
|
248
316
|
# Exact match bonus for identical preprocessed text
|
|
249
317
|
exact_match_bonus = 0.0
|
|
250
318
|
if target_text and incident_text and target_text.strip() == incident_text.strip():
|
|
251
319
|
exact_match_bonus = 0.4 # Strong bonus for exact matches in keyword mode
|
|
252
|
-
|
|
320
|
+
|
|
253
321
|
# Partial matching bonus using fuzzy keyword similarity
|
|
254
322
|
partial_bonus = self._calculate_partial_similarity_bonus(target_text, incident_text)
|
|
255
|
-
|
|
256
|
-
final_score = min(
|
|
257
|
-
|
|
323
|
+
|
|
324
|
+
final_score = min(
|
|
325
|
+
1.0,
|
|
326
|
+
word_similarity + service_bonus + error_bonus + exact_match_bonus + partial_bonus,
|
|
327
|
+
)
|
|
328
|
+
|
|
258
329
|
if final_score > 0.15: # Only include reasonable matches
|
|
259
|
-
results.append(
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
330
|
+
results.append(
|
|
331
|
+
IncidentSimilarity(
|
|
332
|
+
incident_id=str(incident.get("id", "")),
|
|
333
|
+
title=incident.get("attributes", {}).get("title", "Unknown"),
|
|
334
|
+
similarity_score=final_score,
|
|
335
|
+
matched_services=list(set(target_services) & set(incident_services)),
|
|
336
|
+
matched_keywords=list(target_words & incident_words)[:5], # Top 5 matches
|
|
337
|
+
resolution_summary=incident.get("attributes", {}).get("summary", ""),
|
|
338
|
+
resolution_time_hours=self._calculate_resolution_time(incident),
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
|
|
269
342
|
return results
|
|
270
|
-
|
|
271
|
-
def _extract_common_keywords(self, text1: str, text2: str) ->
|
|
343
|
+
|
|
344
|
+
def _extract_common_keywords(self, text1: str, text2: str) -> list[str]:
|
|
272
345
|
"""Extract common meaningful keywords between two texts with fuzzy matching."""
|
|
273
346
|
words1 = set(text1.split())
|
|
274
347
|
words2 = set(text2.split())
|
|
275
|
-
|
|
348
|
+
|
|
276
349
|
# Exact matches
|
|
277
350
|
exact_common = words1 & words2
|
|
278
|
-
|
|
351
|
+
|
|
279
352
|
# Fuzzy matches for partial similarity
|
|
280
353
|
fuzzy_common = []
|
|
281
354
|
for word1 in words1:
|
|
@@ -285,228 +358,240 @@ class TextSimilarityAnalyzer:
|
|
|
285
358
|
# Check if words share significant substring (fuzzy matching)
|
|
286
359
|
if self._words_similar(word1, word2):
|
|
287
360
|
fuzzy_common.append(f"{word1}~{word2}")
|
|
288
|
-
|
|
361
|
+
|
|
289
362
|
# Combine exact and fuzzy matches
|
|
290
363
|
all_matches = list(exact_common) + fuzzy_common
|
|
291
|
-
meaningful = [word for word in all_matches if len(word.split(
|
|
364
|
+
meaningful = [word for word in all_matches if len(word.split("~")[0]) > 2]
|
|
292
365
|
return meaningful[:8] # Increased to show more matches
|
|
293
|
-
|
|
366
|
+
|
|
294
367
|
def _words_similar(self, word1: str, word2: str) -> bool:
|
|
295
368
|
"""Check if two words are similar enough to be considered related."""
|
|
296
369
|
# Handle common variations
|
|
297
370
|
variations = {
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
371
|
+
"elastic": ["elasticsearch", "elk"],
|
|
372
|
+
"payment": ["payments", "pay", "billing"],
|
|
373
|
+
"database": ["db", "postgres", "mysql", "mongo"],
|
|
374
|
+
"timeout": ["timeouts", "timed-out", "timing-out"],
|
|
375
|
+
"service": ["services", "svc", "api", "app"],
|
|
376
|
+
"error": ["errors", "err", "failure", "failed", "failing"],
|
|
377
|
+
"down": ["outage", "offline", "unavailable"],
|
|
305
378
|
}
|
|
306
|
-
|
|
379
|
+
|
|
307
380
|
# Check if words are variations of each other
|
|
308
381
|
for base, variants in variations.items():
|
|
309
382
|
if (word1 == base and word2 in variants) or (word2 == base and word1 in variants):
|
|
310
383
|
return True
|
|
311
384
|
if word1 in variants and word2 in variants:
|
|
312
385
|
return True
|
|
313
|
-
|
|
386
|
+
|
|
314
387
|
# Check substring similarity (at least 70% overlap for longer words)
|
|
315
388
|
if len(word1) >= 5 and len(word2) >= 5:
|
|
316
389
|
shorter = min(word1, word2, key=len)
|
|
317
390
|
longer = max(word1, word2, key=len)
|
|
318
391
|
if shorter in longer and len(shorter) / len(longer) >= 0.7:
|
|
319
392
|
return True
|
|
320
|
-
|
|
393
|
+
|
|
321
394
|
# Check if one word starts with the other (for prefixed services)
|
|
322
395
|
if len(word1) >= 4 and len(word2) >= 4:
|
|
323
396
|
if word1.startswith(word2) or word2.startswith(word1):
|
|
324
397
|
return True
|
|
325
|
-
|
|
398
|
+
|
|
326
399
|
return False
|
|
327
|
-
|
|
400
|
+
|
|
328
401
|
def _calculate_partial_similarity_bonus(self, text1: str, text2: str) -> float:
|
|
329
402
|
"""Calculate bonus for partial/fuzzy keyword matches."""
|
|
330
403
|
if not text1 or not text2:
|
|
331
404
|
return 0.0
|
|
332
|
-
|
|
405
|
+
|
|
333
406
|
words1 = set(text1.split())
|
|
334
407
|
words2 = set(text2.split())
|
|
335
|
-
|
|
408
|
+
|
|
336
409
|
fuzzy_matches = 0
|
|
337
|
-
|
|
410
|
+
|
|
338
411
|
# Count meaningful words that could be compared
|
|
339
412
|
meaningful_words1 = [w for w in words1 if len(w) > 3]
|
|
340
413
|
meaningful_words2 = [w for w in words2 if len(w) > 3]
|
|
341
|
-
|
|
414
|
+
|
|
342
415
|
if not meaningful_words1 or not meaningful_words2:
|
|
343
416
|
return 0.0
|
|
344
|
-
|
|
417
|
+
|
|
345
418
|
# Count fuzzy matches
|
|
346
419
|
for word1 in meaningful_words1:
|
|
347
420
|
for word2 in meaningful_words2:
|
|
348
421
|
if word1 != word2 and self._words_similar(word1, word2):
|
|
349
422
|
fuzzy_matches += 1
|
|
350
423
|
break # Only count each target word once
|
|
351
|
-
|
|
424
|
+
|
|
352
425
|
# Calculate bonus based on fuzzy match ratio
|
|
353
426
|
if fuzzy_matches > 0:
|
|
354
427
|
# Use the smaller meaningful word set as denominator for conservative bonus
|
|
355
428
|
total_possible_matches = min(len(meaningful_words1), len(meaningful_words2))
|
|
356
429
|
bonus_ratio = fuzzy_matches / total_possible_matches
|
|
357
430
|
return min(0.15, bonus_ratio * 0.3) # Max 0.15 bonus for partial matches
|
|
358
|
-
|
|
431
|
+
|
|
359
432
|
return 0.0
|
|
360
|
-
|
|
361
|
-
def _calculate_resolution_time(self, incident:
|
|
433
|
+
|
|
434
|
+
def _calculate_resolution_time(self, incident: dict) -> float | None:
|
|
362
435
|
"""Calculate resolution time in hours if timestamps are available."""
|
|
363
436
|
try:
|
|
364
|
-
attributes = incident.get(
|
|
365
|
-
created_at = attributes.get(
|
|
366
|
-
resolved_at = attributes.get(
|
|
367
|
-
|
|
437
|
+
attributes = incident.get("attributes", {})
|
|
438
|
+
created_at = attributes.get("created_at")
|
|
439
|
+
resolved_at = attributes.get("resolved_at") or attributes.get("updated_at")
|
|
440
|
+
|
|
368
441
|
if created_at and resolved_at:
|
|
369
442
|
# Try to parse ISO format timestamps
|
|
370
|
-
created = datetime.fromisoformat(created_at.replace(
|
|
371
|
-
resolved = datetime.fromisoformat(resolved_at.replace(
|
|
443
|
+
created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
|
|
444
|
+
resolved = datetime.fromisoformat(resolved_at.replace("Z", "+00:00"))
|
|
372
445
|
diff = resolved - created
|
|
373
446
|
return diff.total_seconds() / 3600 # Convert to hours
|
|
374
|
-
except Exception:
|
|
447
|
+
except Exception: # nosec B110
|
|
448
|
+
# Intentionally broad: invalid date formats should return None, not crash
|
|
375
449
|
pass
|
|
376
|
-
|
|
450
|
+
|
|
377
451
|
return None
|
|
378
452
|
|
|
379
453
|
|
|
380
454
|
class SolutionExtractor:
|
|
381
455
|
"""Extract and format solution information from resolved incidents."""
|
|
382
|
-
|
|
383
|
-
def extract_solutions(self, similar_incidents:
|
|
456
|
+
|
|
457
|
+
def extract_solutions(self, similar_incidents: list[IncidentSimilarity]) -> dict[str, Any]:
|
|
384
458
|
"""Extract actionable solutions from similar resolved incidents."""
|
|
385
459
|
if not similar_incidents:
|
|
386
460
|
return {
|
|
387
461
|
"solutions": [],
|
|
388
462
|
"common_patterns": [],
|
|
389
463
|
"average_resolution_time": None,
|
|
390
|
-
"total_similar_incidents": 0
|
|
464
|
+
"total_similar_incidents": 0,
|
|
391
465
|
}
|
|
392
|
-
|
|
466
|
+
|
|
393
467
|
solutions = []
|
|
394
468
|
resolution_times = []
|
|
395
469
|
all_keywords = []
|
|
396
|
-
|
|
470
|
+
|
|
397
471
|
for incident in similar_incidents[:5]: # Top 5 most similar
|
|
398
472
|
solution_info = {
|
|
399
473
|
"incident_id": incident.incident_id,
|
|
400
474
|
"title": incident.title,
|
|
401
475
|
"similarity": round(incident.similarity_score, 3),
|
|
402
476
|
"matched_services": incident.matched_services,
|
|
403
|
-
"resolution_summary": incident.resolution_summary
|
|
404
|
-
"
|
|
477
|
+
"resolution_summary": incident.resolution_summary
|
|
478
|
+
or "No resolution summary available",
|
|
479
|
+
"resolution_time_hours": incident.resolution_time_hours,
|
|
405
480
|
}
|
|
406
|
-
|
|
481
|
+
|
|
407
482
|
# Extract potential solution steps from resolution summary
|
|
408
483
|
solution_steps = self._extract_action_items(incident.resolution_summary)
|
|
409
484
|
if solution_steps:
|
|
410
485
|
solution_info["suggested_actions"] = solution_steps
|
|
411
|
-
|
|
486
|
+
|
|
412
487
|
solutions.append(solution_info)
|
|
413
|
-
|
|
488
|
+
|
|
414
489
|
if incident.resolution_time_hours:
|
|
415
490
|
resolution_times.append(incident.resolution_time_hours)
|
|
416
|
-
|
|
491
|
+
|
|
417
492
|
all_keywords.extend(incident.matched_keywords)
|
|
418
|
-
|
|
493
|
+
|
|
419
494
|
# Calculate average resolution time
|
|
420
495
|
avg_resolution = sum(resolution_times) / len(resolution_times) if resolution_times else None
|
|
421
|
-
|
|
496
|
+
|
|
422
497
|
# Find common patterns
|
|
423
498
|
common_patterns = self._identify_common_patterns(all_keywords, similar_incidents)
|
|
424
|
-
|
|
499
|
+
|
|
425
500
|
return {
|
|
426
501
|
"solutions": solutions,
|
|
427
502
|
"common_patterns": common_patterns,
|
|
428
503
|
"average_resolution_time": round(avg_resolution, 2) if avg_resolution else None,
|
|
429
|
-
"total_similar_incidents": len(similar_incidents)
|
|
504
|
+
"total_similar_incidents": len(similar_incidents),
|
|
430
505
|
}
|
|
431
|
-
|
|
432
|
-
def _extract_action_items(self, resolution_text: str) ->
|
|
506
|
+
|
|
507
|
+
def _extract_action_items(self, resolution_text: str) -> list[str]:
|
|
433
508
|
"""Extract potential action items from resolution text."""
|
|
434
509
|
if not resolution_text:
|
|
435
510
|
return []
|
|
436
|
-
|
|
511
|
+
|
|
437
512
|
actions = []
|
|
438
513
|
text_lower = resolution_text.lower()
|
|
439
|
-
|
|
514
|
+
|
|
440
515
|
# Look for common action patterns
|
|
441
516
|
action_patterns = [
|
|
442
|
-
r
|
|
443
|
-
r
|
|
444
|
-
r
|
|
445
|
-
r
|
|
446
|
-
r
|
|
447
|
-
r
|
|
448
|
-
r
|
|
517
|
+
r"restart(?:ed)?\s+(\w+(?:\s+\w+)*)",
|
|
518
|
+
r"clear(?:ed)?\s+(\w+(?:\s+\w+)*)",
|
|
519
|
+
r"update(?:d)?\s+(\w+(?:\s+\w+)*)",
|
|
520
|
+
r"fix(?:ed)?\s+(\w+(?:\s+\w+)*)",
|
|
521
|
+
r"roll(?:ed)?\s+back\s+(\w+(?:\s+\w+)*)",
|
|
522
|
+
r"scale(?:d)?\s+(\w+(?:\s+\w+)*)",
|
|
523
|
+
r"deploy(?:ed)?\s+(\w+(?:\s+\w+)*)",
|
|
449
524
|
]
|
|
450
|
-
|
|
525
|
+
|
|
451
526
|
for pattern in action_patterns:
|
|
452
527
|
matches = re.findall(pattern, text_lower)
|
|
453
528
|
for match in matches:
|
|
454
529
|
# Extract the base action word from the pattern
|
|
455
|
-
if
|
|
530
|
+
if "roll" in pattern and "back" in pattern:
|
|
456
531
|
action = f"rollback {match}".strip()
|
|
457
|
-
elif
|
|
532
|
+
elif "restart" in pattern:
|
|
458
533
|
action = f"restart {match}".strip()
|
|
459
|
-
elif
|
|
534
|
+
elif "clear" in pattern:
|
|
460
535
|
action = f"clear {match}".strip()
|
|
461
|
-
elif
|
|
536
|
+
elif "update" in pattern:
|
|
462
537
|
action = f"update {match}".strip()
|
|
463
|
-
elif
|
|
538
|
+
elif "fix" in pattern:
|
|
464
539
|
action = f"fix {match}".strip()
|
|
465
|
-
elif
|
|
540
|
+
elif "scale" in pattern:
|
|
466
541
|
action = f"scale {match}".strip()
|
|
467
|
-
elif
|
|
542
|
+
elif "deploy" in pattern:
|
|
468
543
|
action = f"deploy {match}".strip()
|
|
469
544
|
else:
|
|
470
545
|
# Fallback to original logic
|
|
471
|
-
base_pattern =
|
|
472
|
-
|
|
546
|
+
base_pattern = (
|
|
547
|
+
pattern.split("(")[0].replace("(?:ed)?", "").replace("(?:d)?", "")
|
|
548
|
+
)
|
|
549
|
+
# Extract replacement outside f-string for Python 3.10 compatibility
|
|
550
|
+
cleaned_pattern = base_pattern.replace(r"\s+", " ")
|
|
551
|
+
action = f"{cleaned_pattern} {match}".strip()
|
|
473
552
|
actions.append(action)
|
|
474
|
-
|
|
553
|
+
|
|
475
554
|
# Look for explicit steps
|
|
476
|
-
if
|
|
477
|
-
sentences = resolution_text.split(
|
|
555
|
+
if "step" in text_lower or "action" in text_lower:
|
|
556
|
+
sentences = resolution_text.split(".")
|
|
478
557
|
for sentence in sentences:
|
|
479
|
-
if any(word in sentence.lower() for word in [
|
|
558
|
+
if any(word in sentence.lower() for word in ["step", "action", "fix", "solution"]):
|
|
480
559
|
actions.append(sentence.strip())
|
|
481
|
-
|
|
560
|
+
|
|
482
561
|
return actions[:5] # Limit to top 5 actions
|
|
483
|
-
|
|
484
|
-
def _identify_common_patterns(
|
|
562
|
+
|
|
563
|
+
def _identify_common_patterns(
|
|
564
|
+
self, keywords: list[str], incidents: list[IncidentSimilarity]
|
|
565
|
+
) -> list[str]:
|
|
485
566
|
"""Identify common patterns across similar incidents."""
|
|
486
567
|
patterns = []
|
|
487
|
-
|
|
568
|
+
|
|
488
569
|
# Service patterns
|
|
489
570
|
all_services = []
|
|
490
571
|
for incident in incidents:
|
|
491
572
|
all_services.extend(incident.matched_services)
|
|
492
|
-
|
|
573
|
+
|
|
493
574
|
if all_services:
|
|
494
|
-
common_services = [
|
|
575
|
+
common_services = [
|
|
576
|
+
service for service in set(all_services) if all_services.count(service) >= 2
|
|
577
|
+
]
|
|
495
578
|
if common_services:
|
|
496
579
|
patterns.append(f"Common services affected: {', '.join(common_services)}")
|
|
497
|
-
|
|
580
|
+
|
|
498
581
|
# Keyword patterns
|
|
499
582
|
if keywords:
|
|
500
583
|
keyword_counts = {}
|
|
501
584
|
for keyword in keywords:
|
|
502
585
|
keyword_counts[keyword] = keyword_counts.get(keyword, 0) + 1
|
|
503
|
-
|
|
586
|
+
|
|
504
587
|
frequent_keywords = [k for k, v in keyword_counts.items() if v >= 2 and len(k) > 3]
|
|
505
588
|
if frequent_keywords:
|
|
506
589
|
patterns.append(f"Common keywords: {', '.join(frequent_keywords[:3])}")
|
|
507
|
-
|
|
590
|
+
|
|
508
591
|
# Resolution time patterns
|
|
509
|
-
resolution_times = [
|
|
592
|
+
resolution_times = [
|
|
593
|
+
inc.resolution_time_hours for inc in incidents if inc.resolution_time_hours is not None
|
|
594
|
+
]
|
|
510
595
|
if resolution_times:
|
|
511
596
|
avg_time = sum(resolution_times) / len(resolution_times)
|
|
512
597
|
if avg_time < 1:
|
|
@@ -515,5 +600,5 @@ class SolutionExtractor:
|
|
|
515
600
|
patterns.append("These incidents typically take longer to resolve (> 4 hours)")
|
|
516
601
|
else:
|
|
517
602
|
patterns.append(f"These incidents typically resolve in {avg_time:.1f} hours")
|
|
518
|
-
|
|
519
|
-
return patterns
|
|
603
|
+
|
|
604
|
+
return patterns
|