opspilot-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opspilot/__init__.py +0 -0
- opspilot/agents/fixer.py +46 -0
- opspilot/agents/planner.py +74 -0
- opspilot/agents/remediation.py +200 -0
- opspilot/agents/verifier.py +67 -0
- opspilot/cli.py +360 -0
- opspilot/config.py +22 -0
- opspilot/context/__init__.py +26 -0
- opspilot/context/deployment_history.py +347 -0
- opspilot/context/deps.py +14 -0
- opspilot/context/docker.py +17 -0
- opspilot/context/env.py +17 -0
- opspilot/context/logs.py +16 -0
- opspilot/context/production_logs.py +262 -0
- opspilot/context/project.py +19 -0
- opspilot/diffs/redis.py +23 -0
- opspilot/graph/engine.py +33 -0
- opspilot/graph/nodes.py +41 -0
- opspilot/memory.py +24 -0
- opspilot/memory_redis.py +322 -0
- opspilot/state.py +18 -0
- opspilot/tools/__init__.py +52 -0
- opspilot/tools/dep_tools.py +5 -0
- opspilot/tools/env_tools.py +5 -0
- opspilot/tools/log_tools.py +11 -0
- opspilot/tools/pattern_analysis.py +194 -0
- opspilot/utils/__init__.py +1 -0
- opspilot/utils/llm.py +23 -0
- opspilot/utils/llm_providers.py +499 -0
- opspilot_ai-0.1.0.dist-info/METADATA +408 -0
- opspilot_ai-0.1.0.dist-info/RECORD +35 -0
- opspilot_ai-0.1.0.dist-info/WHEEL +5 -0
- opspilot_ai-0.1.0.dist-info/entry_points.txt +2 -0
- opspilot_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
- opspilot_ai-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Production error pattern recognition."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
from collections import Counter
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def identify_error_patterns(logs: str) -> Dict:
|
|
9
|
+
"""
|
|
10
|
+
Identify common production error patterns.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
logs: Raw log text
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Dictionary containing identified patterns and severity
|
|
17
|
+
"""
|
|
18
|
+
if not logs:
|
|
19
|
+
return {}
|
|
20
|
+
|
|
21
|
+
patterns = {
|
|
22
|
+
"http_errors": _extract_http_errors(logs),
|
|
23
|
+
"exceptions": _extract_exceptions(logs),
|
|
24
|
+
"database_errors": _extract_database_errors(logs),
|
|
25
|
+
"timeout_errors": _extract_timeout_errors(logs),
|
|
26
|
+
"memory_errors": _extract_memory_errors(logs),
|
|
27
|
+
"stack_traces": _extract_stack_traces(logs)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Add severity assessment
|
|
31
|
+
patterns["severity"] = _assess_severity(patterns)
|
|
32
|
+
patterns["error_count"] = _count_total_errors(patterns)
|
|
33
|
+
|
|
34
|
+
return {k: v for k, v in patterns.items() if v}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _extract_http_errors(logs: str) -> Dict[str, int]:
|
|
38
|
+
"""Extract HTTP error codes (4xx, 5xx)."""
|
|
39
|
+
http_pattern = r'\b(4\d{2}|5\d{2})\b'
|
|
40
|
+
codes = re.findall(http_pattern, logs)
|
|
41
|
+
return dict(Counter(codes)) if codes else {}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _extract_exceptions(logs: str) -> List[str]:
|
|
45
|
+
"""Extract exception types."""
|
|
46
|
+
exception_patterns = [
|
|
47
|
+
r'(\w+Exception):',
|
|
48
|
+
r'(\w+Error):',
|
|
49
|
+
r'Traceback.*?(\w+Error)',
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
exceptions = []
|
|
53
|
+
for pattern in exception_patterns:
|
|
54
|
+
matches = re.findall(pattern, logs, re.MULTILINE)
|
|
55
|
+
exceptions.extend(matches)
|
|
56
|
+
|
|
57
|
+
# Return top 10 unique exceptions
|
|
58
|
+
return list(set(exceptions))[:10]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _extract_database_errors(logs: str) -> List[str]:
|
|
62
|
+
"""Extract database-related errors."""
|
|
63
|
+
db_patterns = [
|
|
64
|
+
r'(connection refused|connection timeout|connection lost)',
|
|
65
|
+
r'(deadlock|lock timeout)',
|
|
66
|
+
r'(too many connections)',
|
|
67
|
+
r'(database.*?error)',
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
db_errors = []
|
|
71
|
+
for pattern in db_patterns:
|
|
72
|
+
matches = re.findall(pattern, logs, re.IGNORECASE)
|
|
73
|
+
db_errors.extend(matches)
|
|
74
|
+
|
|
75
|
+
return list(set(db_errors))[:10]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _extract_timeout_errors(logs: str) -> int:
|
|
79
|
+
"""Count timeout-related errors."""
|
|
80
|
+
timeout_pattern = r'timeout|timed out|time out'
|
|
81
|
+
return len(re.findall(timeout_pattern, logs, re.IGNORECASE))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _extract_memory_errors(logs: str) -> List[str]:
|
|
85
|
+
"""Extract memory-related errors."""
|
|
86
|
+
memory_patterns = [
|
|
87
|
+
r'OutOfMemoryError',
|
|
88
|
+
r'MemoryError',
|
|
89
|
+
r'out of memory',
|
|
90
|
+
r'OOM',
|
|
91
|
+
r'memory limit exceeded'
|
|
92
|
+
]
|
|
93
|
+
|
|
94
|
+
memory_errors = []
|
|
95
|
+
for pattern in memory_patterns:
|
|
96
|
+
if re.search(pattern, logs, re.IGNORECASE):
|
|
97
|
+
memory_errors.append(pattern)
|
|
98
|
+
|
|
99
|
+
return memory_errors
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _extract_stack_traces(logs: str) -> int:
|
|
103
|
+
"""Count stack traces."""
|
|
104
|
+
trace_patterns = [
|
|
105
|
+
r'Traceback \(most recent call last\)',
|
|
106
|
+
r'at \w+\.\w+\(',
|
|
107
|
+
r'^\s+at .*\(.*:\d+\)',
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
count = 0
|
|
111
|
+
for pattern in trace_patterns:
|
|
112
|
+
count += len(re.findall(pattern, logs, re.MULTILINE))
|
|
113
|
+
|
|
114
|
+
return count
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _assess_severity(patterns: Dict) -> str:
|
|
118
|
+
"""
|
|
119
|
+
Assess overall severity based on error patterns.
|
|
120
|
+
|
|
121
|
+
P0 = Critical (production down)
|
|
122
|
+
P1 = High (major functionality broken)
|
|
123
|
+
P2 = Medium (degraded performance)
|
|
124
|
+
P3 = Low (minor issues)
|
|
125
|
+
"""
|
|
126
|
+
http_5xx = sum(
|
|
127
|
+
count for code, count in patterns.get("http_errors", {}).items()
|
|
128
|
+
if code.startswith('5')
|
|
129
|
+
)
|
|
130
|
+
memory_errors = len(patterns.get("memory_errors", []))
|
|
131
|
+
timeout_count = patterns.get("timeout_errors", 0)
|
|
132
|
+
db_errors = len(patterns.get("database_errors", []))
|
|
133
|
+
|
|
134
|
+
# P0: Multiple 5xx errors or OOM
|
|
135
|
+
if http_5xx > 10 or memory_errors > 0:
|
|
136
|
+
return "P0"
|
|
137
|
+
|
|
138
|
+
# P1: Significant 5xx or database errors
|
|
139
|
+
if http_5xx > 5 or db_errors > 0:
|
|
140
|
+
return "P1"
|
|
141
|
+
|
|
142
|
+
# P2: Moderate errors or timeouts
|
|
143
|
+
if http_5xx > 0 or timeout_count > 5:
|
|
144
|
+
return "P2"
|
|
145
|
+
|
|
146
|
+
# P3: Minor 4xx errors only
|
|
147
|
+
return "P3"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _count_total_errors(patterns: Dict) -> int:
|
|
151
|
+
"""Count total errors across all patterns."""
|
|
152
|
+
count = 0
|
|
153
|
+
|
|
154
|
+
# HTTP errors
|
|
155
|
+
count += sum(patterns.get("http_errors", {}).values())
|
|
156
|
+
|
|
157
|
+
# Exceptions
|
|
158
|
+
count += len(patterns.get("exceptions", []))
|
|
159
|
+
|
|
160
|
+
# Database errors
|
|
161
|
+
count += len(patterns.get("database_errors", []))
|
|
162
|
+
|
|
163
|
+
# Timeouts
|
|
164
|
+
count += patterns.get("timeout_errors", 0)
|
|
165
|
+
|
|
166
|
+
# Memory errors
|
|
167
|
+
count += len(patterns.get("memory_errors", []))
|
|
168
|
+
|
|
169
|
+
return count
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def build_error_timeline(logs: str) -> Dict:
|
|
173
|
+
"""
|
|
174
|
+
Build a timeline of when errors occurred.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
logs: Raw log text with timestamps
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
Timeline information including first/last seen, occurrences
|
|
181
|
+
"""
|
|
182
|
+
# Extract timestamps (ISO format or common log formats)
|
|
183
|
+
timestamp_pattern = r'(\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2})'
|
|
184
|
+
timestamps = re.findall(timestamp_pattern, logs)
|
|
185
|
+
|
|
186
|
+
if not timestamps:
|
|
187
|
+
return {}
|
|
188
|
+
|
|
189
|
+
return {
|
|
190
|
+
"first_seen": timestamps[0] if timestamps else "unknown",
|
|
191
|
+
"last_seen": timestamps[-1] if timestamps else "unknown",
|
|
192
|
+
"total_occurrences": len(timestamps),
|
|
193
|
+
"time_range": f"{timestamps[0]} to {timestamps[-1]}" if len(timestamps) > 1 else "single occurrence"
|
|
194
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules for OpsPilot."""
|
opspilot/utils/llm.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""LLM utility functions shared across agents.
|
|
2
|
+
|
|
3
|
+
This module provides backward-compatible wrappers around the new
|
|
4
|
+
multi-provider LLM system with automatic fallback.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Import from new multi-provider system
|
|
8
|
+
from opspilot.utils.llm_providers import (
|
|
9
|
+
call_llama,
|
|
10
|
+
safe_json_parse,
|
|
11
|
+
check_ollama_available,
|
|
12
|
+
check_any_llm_available,
|
|
13
|
+
get_llm_router
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Re-export for backward compatibility
|
|
17
|
+
__all__ = [
|
|
18
|
+
'call_llama',
|
|
19
|
+
'safe_json_parse',
|
|
20
|
+
'check_ollama_available',
|
|
21
|
+
'check_any_llm_available',
|
|
22
|
+
'get_llm_router'
|
|
23
|
+
]
|