opspilot-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ def redis_timeout_diff():
2
+ return {
3
+ "file": ".env",
4
+ "diff": """--- a/.env
5
+ +++ b/.env
6
+ @@
7
+ -REDIS_TIMEOUT=1
8
+ +REDIS_TIMEOUT=5
9
+ """,
10
+ "rationale": "Increase Redis timeout to reduce transient timeout errors under load."
11
+ }
12
+
13
+ def redis_pooling_diff():
14
+ return {
15
+ "file": "app/config/redis.py",
16
+ "diff": """--- a/app/config/redis.py
17
+ +++ b/app/config/redis.py
18
+ @@
19
+ -redis.Redis(host=host, port=port)
20
+ +redis.Redis(host=host, port=port, socket_timeout=5, max_connections=20)
21
+ """,
22
+ "rationale": "Enable connection pooling and reasonable timeouts to improve reliability."
23
+ }
@@ -0,0 +1,33 @@
1
+ from opspilot.state import AgentState
2
+ from opspilot.graph.nodes import (
3
+ collect_context_node,
4
+ planner_node,
5
+ verifier_node,
6
+ fixer_node,
7
+ )
8
+
9
+ CONFIDENCE_THRESHOLD = 0.6
10
+
11
+
12
+ def run_agent(state: AgentState) -> AgentState:
13
+ # START → CONTEXT
14
+ state = collect_context_node(state)
15
+
16
+ while not state.terminated:
17
+ # PLAN
18
+ state = planner_node(state)
19
+
20
+ # VERIFY
21
+ state = verifier_node(state)
22
+
23
+ # DECISION
24
+ if state.confidence >= CONFIDENCE_THRESHOLD:
25
+ state = fixer_node(state)
26
+ state.terminated = True
27
+
28
+ elif state.iteration >= state.max_iterations:
29
+ state.terminated = True
30
+
31
+ # else → loop back to PLAN
32
+
33
+ return state
@@ -0,0 +1,41 @@
1
+ from opspilot.state import AgentState
2
+ from opspilot.context import collect_context
3
+ from opspilot.agents.planner import plan
4
+ from opspilot.agents.verifier import verify
5
+ from opspilot.agents.fixer import suggest
6
+ from opspilot.tools import collect_evidence
7
+
8
+ CONFIDENCE_THRESHOLD = 0.6
9
+
10
+
11
+ def collect_context_node(state: AgentState) -> AgentState:
12
+ state.context = collect_context(state.project_root)
13
+ return state
14
+
15
+
16
+ def planner_node(state: AgentState) -> AgentState:
17
+ if state.hypothesis:
18
+ return state # already planned this iteration
19
+
20
+ result = plan(state.context)
21
+ state.hypothesis = result.get("hypothesis")
22
+ state.confidence = result.get("confidence", 0.0)
23
+ state.iteration += 1
24
+ return state
25
+
26
+
27
+
28
+ def verifier_node(state: AgentState) -> AgentState:
29
+ state.evidence = collect_evidence(state.context)
30
+
31
+ verdict = verify(state.hypothesis, state.evidence)
32
+ state.confidence = verdict.get("confidence", state.confidence)
33
+
34
+ return state
35
+
36
+
37
+ def fixer_node(state: AgentState) -> AgentState:
38
+ if state.confidence >= CONFIDENCE_THRESHOLD:
39
+ fixes = suggest(state.hypothesis, state.evidence)
40
+ state.suggestions = fixes.get("suggestions", [])
41
+ return state
opspilot/memory.py ADDED
@@ -0,0 +1,24 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Dict, List
4
+
5
+ MEMORY_FILE = Path.home() / ".opspilot_memory.json"
6
+
7
+
8
+ def load_memory() -> List[Dict]:
9
+ if MEMORY_FILE.exists():
10
+ return json.loads(MEMORY_FILE.read_text())
11
+ return []
12
+
13
+
14
+ def save_memory(entry: Dict):
15
+ memory = load_memory()
16
+ memory.append(entry)
17
+ MEMORY_FILE.write_text(json.dumps(memory, indent=2))
18
+
19
+ def find_similar_issues(project_root: str, threshold: float = 0.6):
20
+ memory = load_memory()
21
+ return [
22
+ m for m in memory
23
+ if m["project"] == project_root and m["confidence"] >= threshold
24
+ ]
@@ -0,0 +1,322 @@
1
+ """Redis-based memory system for OpsPilot with user isolation."""
2
+
3
+ import json
4
+ import hashlib
5
+ import time
6
+ import os
7
+ from typing import Dict, List, Optional
8
+ from datetime import datetime
9
+
10
+ try:
11
+ import redis
12
+ REDIS_AVAILABLE = True
13
+ except ImportError:
14
+ REDIS_AVAILABLE = False
15
+
16
+
17
+ class RedisMemory:
18
+ """Redis-based memory with automatic TTL and user isolation."""
19
+
20
+ def __init__(
21
+ self,
22
+ host: str = "localhost",
23
+ port: int = 6379,
24
+ db: int = 0,
25
+ password: Optional[str] = None,
26
+ ttl_days: Optional[int] = None
27
+ ):
28
+ """
29
+ Initialize Redis connection.
30
+
31
+ Args:
32
+ host: Redis host
33
+ port: Redis port
34
+ db: Redis database number
35
+ password: Redis password (if required)
36
+ ttl_days: Days to keep incidents (default: from env or 30)
37
+ """
38
+ if not REDIS_AVAILABLE:
39
+ raise ImportError(
40
+ "redis package not installed. Install with: pip install redis"
41
+ )
42
+
43
+ self.redis_client = redis.Redis(
44
+ host=host,
45
+ port=port,
46
+ db=db,
47
+ password=password,
48
+ decode_responses=True
49
+ )
50
+
51
+ # TTL from environment variable or default
52
+ if ttl_days is None:
53
+ ttl_days = int(os.getenv("OPSPILOT_REDIS_TTL_DAYS", "30"))
54
+
55
+ self.ttl_seconds = ttl_days * 24 * 60 * 60
56
+
57
+ @staticmethod
58
+ def _get_project_hash(project_root: str) -> str:
59
+ """
60
+ Generate unique hash for project (user isolation).
61
+
62
+ Args:
63
+ project_root: Absolute path to project
64
+
65
+ Returns:
66
+ 16-character hash
67
+ """
68
+ return hashlib.sha256(project_root.encode()).hexdigest()[:16]
69
+
70
+ def save_incident(
71
+ self,
72
+ project_root: str,
73
+ hypothesis: str,
74
+ confidence: float,
75
+ severity: str,
76
+ error_patterns: Dict,
77
+ evidence: Dict,
78
+ remediation: Optional[Dict] = None
79
+ ) -> str:
80
+ """
81
+ Save incident analysis to Redis with TTL.
82
+
83
+ Args:
84
+ project_root: Project path (for user isolation)
85
+ hypothesis: Root cause hypothesis
86
+ confidence: Confidence score (0.0 - 1.0)
87
+ severity: P0/P1/P2/P3
88
+ error_patterns: Detected error patterns
89
+ evidence: Collected evidence
90
+ remediation: Remediation plan (optional)
91
+
92
+ Returns:
93
+ incident_key: Unique key for this incident
94
+ """
95
+ project_hash = self._get_project_hash(project_root)
96
+ timestamp = int(time.time())
97
+
98
+ # Create incident data
99
+ incident_data = {
100
+ "project": project_root,
101
+ "hypothesis": hypothesis,
102
+ "confidence": confidence,
103
+ "severity": severity,
104
+ "error_patterns": error_patterns,
105
+ "evidence": evidence,
106
+ "remediation": remediation,
107
+ "timestamp": timestamp,
108
+ "created_at": datetime.fromtimestamp(timestamp).isoformat()
109
+ }
110
+
111
+ # Generate incident key
112
+ incident_key = f"incident:{project_hash}:{timestamp}"
113
+
114
+ # Store incident with TTL
115
+ self.redis_client.setex(
116
+ incident_key,
117
+ self.ttl_seconds,
118
+ json.dumps(incident_data)
119
+ )
120
+
121
+ # Add to similarity index (sorted set by confidence)
122
+ similarity_key = f"incidents:similar:{project_hash}"
123
+ self.redis_client.zadd(
124
+ similarity_key,
125
+ {incident_key: confidence}
126
+ )
127
+ self.redis_client.expire(similarity_key, self.ttl_seconds)
128
+
129
+ # Add to severity index
130
+ severity_key = f"incidents:severity:{project_hash}:{severity}"
131
+ self.redis_client.sadd(severity_key, incident_key)
132
+ self.redis_client.expire(severity_key, self.ttl_seconds)
133
+
134
+ return incident_key
135
+
136
+ def find_similar_issues(
137
+ self,
138
+ project_root: str,
139
+ min_confidence: float = 0.6,
140
+ limit: int = 5
141
+ ) -> List[Dict]:
142
+ """
143
+ Find similar incidents for a project (user-isolated).
144
+
145
+ Args:
146
+ project_root: Project path
147
+ min_confidence: Minimum confidence threshold
148
+ limit: Max number of results
149
+
150
+ Returns:
151
+ List of similar incidents, sorted by confidence (desc)
152
+ """
153
+ project_hash = self._get_project_hash(project_root)
154
+ similarity_key = f"incidents:similar:{project_hash}"
155
+
156
+ # Get top incidents by confidence (sorted set)
157
+ incident_keys = self.redis_client.zrevrangebyscore(
158
+ similarity_key,
159
+ max=1.0,
160
+ min=min_confidence,
161
+ start=0,
162
+ num=limit
163
+ )
164
+
165
+ # Fetch incident data
166
+ incidents = []
167
+ for key in incident_keys:
168
+ data = self.redis_client.get(key)
169
+ if data:
170
+ incidents.append(json.loads(data))
171
+
172
+ return incidents
173
+
174
+ def get_incidents_by_severity(
175
+ self,
176
+ project_root: str,
177
+ severity: str
178
+ ) -> List[Dict]:
179
+ """
180
+ Get all incidents of a specific severity.
181
+
182
+ Args:
183
+ project_root: Project path
184
+ severity: P0/P1/P2/P3
185
+
186
+ Returns:
187
+ List of incidents
188
+ """
189
+ project_hash = self._get_project_hash(project_root)
190
+ severity_key = f"incidents:severity:{project_hash}:{severity}"
191
+
192
+ incident_keys = self.redis_client.smembers(severity_key)
193
+
194
+ incidents = []
195
+ for key in incident_keys:
196
+ data = self.redis_client.get(key)
197
+ if data:
198
+ incidents.append(json.loads(data))
199
+
200
+ return incidents
201
+
202
+ def record_llm_metrics(
203
+ self,
204
+ provider: str,
205
+ success: bool,
206
+ latency_ms: float
207
+ ):
208
+ """
209
+ Record LLM provider performance metrics.
210
+
211
+ Args:
212
+ provider: Provider name (OllamaProvider, OpenRouterProvider, etc.)
213
+ success: Whether call succeeded
214
+ latency_ms: Response time in milliseconds
215
+ """
216
+ metrics_key = f"llm:health:{provider}"
217
+
218
+ if success:
219
+ self.redis_client.hincrby(metrics_key, "success_count", 1)
220
+ self.redis_client.hset(metrics_key, "last_success", int(time.time()))
221
+ else:
222
+ self.redis_client.hincrby(metrics_key, "failure_count", 1)
223
+
224
+ # Update average latency (moving average)
225
+ current_avg = float(self.redis_client.hget(metrics_key, "avg_latency_ms") or 0)
226
+ total_calls = int(self.redis_client.hget(metrics_key, "success_count") or 0)
227
+
228
+ if total_calls > 0:
229
+ new_avg = ((current_avg * (total_calls - 1)) + latency_ms) / total_calls
230
+ self.redis_client.hset(metrics_key, "avg_latency_ms", new_avg)
231
+
232
+ # Set TTL (refresh metrics every hour)
233
+ self.redis_client.expire(metrics_key, 3600)
234
+
235
+ def get_llm_health(self, provider: str) -> Dict:
236
+ """
237
+ Get health metrics for LLM provider.
238
+
239
+ Args:
240
+ provider: Provider name
241
+
242
+ Returns:
243
+ Dict with success_count, failure_count, avg_latency_ms
244
+ """
245
+ metrics_key = f"llm:health:{provider}"
246
+ metrics = self.redis_client.hgetall(metrics_key)
247
+
248
+ return {
249
+ "success_count": int(metrics.get("success_count", 0)),
250
+ "failure_count": int(metrics.get("failure_count", 0)),
251
+ "avg_latency_ms": float(metrics.get("avg_latency_ms", 0)),
252
+ "last_success": int(metrics.get("last_success", 0))
253
+ }
254
+
255
+ def clear_project_memory(self, project_root: str):
256
+ """
257
+ Clear all incidents for a project (useful for testing).
258
+
259
+ Args:
260
+ project_root: Project path
261
+ """
262
+ project_hash = self._get_project_hash(project_root)
263
+
264
+ # Delete similarity index
265
+ self.redis_client.delete(f"incidents:similar:{project_hash}")
266
+
267
+ # Delete severity indexes
268
+ for severity in ["P0", "P1", "P2", "P3"]:
269
+ severity_key = f"incidents:severity:{project_hash}:{severity}"
270
+ incident_keys = self.redis_client.smembers(severity_key)
271
+
272
+ # Delete individual incidents
273
+ for key in incident_keys:
274
+ self.redis_client.delete(key)
275
+
276
+ # Delete severity index
277
+ self.redis_client.delete(severity_key)
278
+
279
+ def health_check(self) -> bool:
280
+ """
281
+ Check if Redis is available.
282
+
283
+ Returns:
284
+ True if Redis is reachable
285
+ """
286
+ try:
287
+ return self.redis_client.ping()
288
+ except Exception:
289
+ return False
290
+
291
+
292
+ # Fallback to file-based memory if Redis unavailable
293
+ def get_memory_backend(
294
+ redis_host: str = "localhost",
295
+ redis_port: int = 6379,
296
+ fallback_to_file: bool = True
297
+ ):
298
+ """
299
+ Get memory backend (Redis or file-based fallback).
300
+
301
+ Args:
302
+ redis_host: Redis host
303
+ redis_port: Redis port
304
+ fallback_to_file: Use file-based storage if Redis unavailable
305
+
306
+ Returns:
307
+ Memory backend instance
308
+ """
309
+ if REDIS_AVAILABLE:
310
+ try:
311
+ memory = RedisMemory(host=redis_host, port=redis_port)
312
+ if memory.health_check():
313
+ return memory
314
+ except Exception as e:
315
+ print(f"[WARNING] Redis unavailable: {e}")
316
+
317
+ if fallback_to_file:
318
+ print("[INFO] Using file-based memory (Redis unavailable)")
319
+ from opspilot.memory import load_memory, save_memory
320
+ return None # Use existing file-based system
321
+
322
+ raise RuntimeError("Redis unavailable and fallback disabled")
opspilot/state.py ADDED
@@ -0,0 +1,18 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Dict, Any, List, Optional
3
+
4
+ @dataclass
5
+ class AgentState:
6
+ project_root: str
7
+ context: Dict[str, Any] = field(default_factory=dict)
8
+
9
+ hypothesis: Optional[str] = None
10
+ confidence: float = 0.0
11
+
12
+ evidence: Dict[str, Any] = field(default_factory=dict)
13
+ suggestions: List[Dict[str, Any]] = field(default_factory=list)
14
+
15
+ iteration: int = 0
16
+ max_iterations: int = 2
17
+
18
+ terminated: bool = False
@@ -0,0 +1,52 @@
1
+ """Tool functions for evidence collection."""
2
+
3
+ from opspilot.tools.log_tools import analyze_log_errors
4
+ from opspilot.tools.env_tools import find_missing_env
5
+ from opspilot.tools.dep_tools import has_dependency
6
+ from opspilot.tools.pattern_analysis import identify_error_patterns, build_error_timeline
7
+
8
+
9
+ def collect_evidence(context: dict) -> dict:
10
+ """
11
+ Collect evidence from project context with advanced pattern analysis.
12
+
13
+ Args:
14
+ context: Project context dictionary
15
+
16
+ Returns:
17
+ Dictionary containing collected evidence with error patterns, severity, and timeline
18
+ """
19
+ evidence = {}
20
+
21
+ logs = context.get("logs")
22
+ env = context.get("env", {})
23
+ deps = context.get("dependencies", [])
24
+
25
+ # Advanced pattern analysis
26
+ if logs:
27
+ error_patterns = identify_error_patterns(logs)
28
+ if error_patterns:
29
+ evidence["error_patterns"] = error_patterns
30
+ evidence["severity"] = error_patterns.get("severity", "P3")
31
+ evidence["error_count"] = error_patterns.get("error_count", 0)
32
+
33
+ # Timeline analysis
34
+ timeline = build_error_timeline(logs)
35
+ if timeline:
36
+ evidence["timeline"] = timeline
37
+
38
+ # Basic log error counting (keep for backward compatibility)
39
+ log_errors = analyze_log_errors(logs)
40
+ if log_errors:
41
+ evidence["log_errors"] = log_errors
42
+
43
+ # Dependency detection
44
+ if has_dependency(deps, "redis"):
45
+ evidence["uses_redis"] = True
46
+
47
+ # Environment variable validation
48
+ missing_env = find_missing_env(["REDIS_URL"], env)
49
+ if missing_env:
50
+ evidence["missing_env"] = missing_env
51
+
52
+ return evidence
@@ -0,0 +1,5 @@
1
+ from typing import List
2
+
3
+
4
+ def has_dependency(deps: List[str], keyword: str) -> bool:
5
+ return any(keyword.lower() in d.lower() for d in deps)
@@ -0,0 +1,5 @@
1
+ from typing import Dict, List
2
+
3
+
4
+ def find_missing_env(required_keys: List[str], env: Dict[str, str]) -> List[str]:
5
+ return [k for k in required_keys if k not in env]
@@ -0,0 +1,11 @@
1
+ from collections import Counter
2
+ import re
3
+ from typing import Dict
4
+
5
+
6
+ def analyze_log_errors(log_text: str) -> Dict[str, int]:
7
+ if not log_text:
8
+ return {}
9
+
10
+ errors = re.findall(r"(ERROR|Exception|Traceback|Timeout)", log_text)
11
+ return dict(Counter(errors))