iflow-mcp_hulupeep_ruvscan-mcp 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcp/monitoring.py ADDED
@@ -0,0 +1,126 @@
1
+ """
2
+ Monitoring and observability for RuvScan
3
+ """
4
+
5
+ from typing import Dict, Any, List
6
+ from datetime import datetime
7
+ import logging
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from collections import defaultdict
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ @dataclass
15
+ class RequestMetrics:
16
+ """Metrics for a single request"""
17
+ endpoint: str
18
+ method: str
19
+ status_code: int
20
+ duration_ms: float
21
+ timestamp: datetime = field(default_factory=datetime.utcnow)
22
+
23
+ class MetricsCollector:
24
+ """Collect and aggregate metrics"""
25
+
26
+ def __init__(self):
27
+ self.requests: List[RequestMetrics] = []
28
+ self.endpoint_counts = defaultdict(int)
29
+ self.endpoint_durations = defaultdict(list)
30
+ self.error_counts = defaultdict(int)
31
+ self.start_time = datetime.utcnow()
32
+
33
+ def record_request(
34
+ self,
35
+ endpoint: str,
36
+ method: str,
37
+ status_code: int,
38
+ duration_ms: float
39
+ ):
40
+ """Record a request"""
41
+ metrics = RequestMetrics(
42
+ endpoint=endpoint,
43
+ method=method,
44
+ status_code=status_code,
45
+ duration_ms=duration_ms
46
+ )
47
+
48
+ self.requests.append(metrics)
49
+ self.endpoint_counts[endpoint] += 1
50
+ self.endpoint_durations[endpoint].append(duration_ms)
51
+
52
+ if status_code >= 400:
53
+ self.error_counts[endpoint] += 1
54
+
55
+ def get_summary(self) -> Dict[str, Any]:
56
+ """Get metrics summary"""
57
+ total_requests = len(self.requests)
58
+ uptime = (datetime.utcnow() - self.start_time).total_seconds()
59
+
60
+ # Calculate average durations
61
+ avg_durations = {}
62
+ for endpoint, durations in self.endpoint_durations.items():
63
+ avg_durations[endpoint] = sum(durations) / len(durations) if durations else 0
64
+
65
+ # Calculate error rates
66
+ error_rates = {}
67
+ for endpoint, errors in self.error_counts.items():
68
+ total = self.endpoint_counts[endpoint]
69
+ error_rates[endpoint] = (errors / total * 100) if total > 0 else 0
70
+
71
+ return {
72
+ "uptime_seconds": uptime,
73
+ "total_requests": total_requests,
74
+ "requests_per_second": total_requests / uptime if uptime > 0 else 0,
75
+ "endpoint_counts": dict(self.endpoint_counts),
76
+ "average_duration_ms": avg_durations,
77
+ "error_rates_percent": error_rates,
78
+ "total_errors": sum(self.error_counts.values())
79
+ }
80
+
81
+ def get_recent_requests(self, limit: int = 10) -> List[Dict[str, Any]]:
82
+ """Get recent requests"""
83
+ recent = self.requests[-limit:]
84
+ return [
85
+ {
86
+ "endpoint": r.endpoint,
87
+ "method": r.method,
88
+ "status_code": r.status_code,
89
+ "duration_ms": r.duration_ms,
90
+ "timestamp": r.timestamp.isoformat()
91
+ }
92
+ for r in recent
93
+ ]
94
+
95
+ # Global metrics collector
96
+ metrics_collector = MetricsCollector()
97
+
98
+ class RequestTimer:
99
+ """Context manager for timing requests"""
100
+
101
+ def __init__(self, endpoint: str, method: str):
102
+ self.endpoint = endpoint
103
+ self.method = method
104
+ self.start_time = None
105
+ self.status_code = 200
106
+
107
+ def __enter__(self):
108
+ self.start_time = time.time()
109
+ return self
110
+
111
+ def __exit__(self, exc_type, exc_val, exc_tb):
112
+ duration_ms = (time.time() - self.start_time) * 1000
113
+
114
+ if exc_type is not None:
115
+ self.status_code = 500
116
+
117
+ metrics_collector.record_request(
118
+ self.endpoint,
119
+ self.method,
120
+ self.status_code,
121
+ duration_ms
122
+ )
123
+
124
+ def set_status(self, status_code: int):
125
+ """Set the status code"""
126
+ self.status_code = status_code
@@ -0,0 +1,6 @@
1
+ """Reasoning layer for RuvScan"""
2
+
3
+ from .fact_cache import FACTCache
4
+ from .safla_agent import SAFLAAgent
5
+
6
+ __all__ = ['FACTCache', 'SAFLAAgent']
@@ -0,0 +1,208 @@
1
+ """
2
+ Embedding generation service
3
+ Supports OpenAI, Anthropic, and local models
4
+ """
5
+
6
+ from typing import List, Optional, Dict, Any
7
+ import numpy as np
8
+ import logging
9
+ import os
10
+ from enum import Enum
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class EmbeddingProvider(Enum):
15
+ """Supported embedding providers"""
16
+ OPENAI = "openai"
17
+ ANTHROPIC = "anthropic"
18
+ LOCAL = "local"
19
+
20
+ class EmbeddingService:
21
+ """
22
+ Service for generating embeddings from text
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ provider: str = "openai",
28
+ model: str = "text-embedding-3-small",
29
+ api_key: Optional[str] = None
30
+ ):
31
+ self.provider = EmbeddingProvider(provider)
32
+ self.model = model
33
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
34
+ self.dimension = 1536 # Default for OpenAI
35
+
36
+ self._init_client()
37
+
38
+ def _init_client(self):
39
+ """Initialize the embedding client"""
40
+ if self.provider == EmbeddingProvider.OPENAI:
41
+ try:
42
+ from openai import OpenAI
43
+ self.client = OpenAI(api_key=self.api_key)
44
+ logger.info(f"Initialized OpenAI client with model {self.model}")
45
+ except ImportError:
46
+ logger.error("OpenAI package not installed")
47
+ raise
48
+ elif self.provider == EmbeddingProvider.ANTHROPIC:
49
+ logger.warning("Anthropic embeddings not yet implemented")
50
+ self.client = None
51
+ else:
52
+ logger.warning("Local embeddings not yet implemented")
53
+ self.client = None
54
+
55
+ async def embed_text(self, text: str) -> np.ndarray:
56
+ """
57
+ Generate embedding for a single text
58
+
59
+ Args:
60
+ text: Input text
61
+
62
+ Returns:
63
+ Numpy array of embedding vector
64
+ """
65
+ if not text or not text.strip():
66
+ logger.warning("Empty text provided for embedding")
67
+ return np.zeros(self.dimension)
68
+
69
+ try:
70
+ if self.provider == EmbeddingProvider.OPENAI:
71
+ return await self._embed_openai(text)
72
+ else:
73
+ # Fallback to random embedding for development
74
+ logger.warning(f"Using random embedding for provider {self.provider}")
75
+ return np.random.randn(self.dimension)
76
+
77
+ except Exception as e:
78
+ logger.error(f"Error generating embedding: {e}")
79
+ return np.zeros(self.dimension)
80
+
81
+ async def embed_batch(self, texts: List[str]) -> List[np.ndarray]:
82
+ """
83
+ Generate embeddings for multiple texts
84
+
85
+ Args:
86
+ texts: List of input texts
87
+
88
+ Returns:
89
+ List of numpy arrays
90
+ """
91
+ embeddings = []
92
+
93
+ # Process in batches for efficiency
94
+ batch_size = 100
95
+ for i in range(0, len(texts), batch_size):
96
+ batch = texts[i:i + batch_size]
97
+
98
+ if self.provider == EmbeddingProvider.OPENAI:
99
+ batch_embeddings = await self._embed_openai_batch(batch)
100
+ embeddings.extend(batch_embeddings)
101
+ else:
102
+ # Fallback
103
+ for text in batch:
104
+ embeddings.append(np.random.randn(self.dimension))
105
+
106
+ return embeddings
107
+
108
+ async def _embed_openai(self, text: str) -> np.ndarray:
109
+ """Generate OpenAI embedding"""
110
+ try:
111
+ response = self.client.embeddings.create(
112
+ input=text,
113
+ model=self.model
114
+ )
115
+
116
+ embedding = response.data[0].embedding
117
+ return np.array(embedding, dtype=np.float64)
118
+
119
+ except Exception as e:
120
+ logger.error(f"OpenAI embedding error: {e}")
121
+ raise
122
+
123
+ async def _embed_openai_batch(self, texts: List[str]) -> List[np.ndarray]:
124
+ """Generate OpenAI embeddings in batch"""
125
+ try:
126
+ response = self.client.embeddings.create(
127
+ input=texts,
128
+ model=self.model
129
+ )
130
+
131
+ embeddings = [
132
+ np.array(data.embedding, dtype=np.float64)
133
+ for data in response.data
134
+ ]
135
+
136
+ return embeddings
137
+
138
+ except Exception as e:
139
+ logger.error(f"OpenAI batch embedding error: {e}")
140
+ raise
141
+
142
+ def embed_repo_summary(self, repo_data: Dict[str, Any]) -> np.ndarray:
143
+ """
144
+ Generate embedding for repository summary
145
+
146
+ Args:
147
+ repo_data: Repository data dictionary
148
+
149
+ Returns:
150
+ Embedding vector
151
+ """
152
+ # Combine relevant fields
153
+ text_parts = []
154
+
155
+ if repo_data.get('name'):
156
+ text_parts.append(f"Repository: {repo_data['name']}")
157
+
158
+ if repo_data.get('description'):
159
+ text_parts.append(f"Description: {repo_data['description']}")
160
+
161
+ if repo_data.get('topics'):
162
+ topics = ', '.join(repo_data['topics'])
163
+ text_parts.append(f"Topics: {topics}")
164
+
165
+ if repo_data.get('readme'):
166
+ # Use first 1000 chars of README
167
+ readme_excerpt = repo_data['readme'][:1000]
168
+ text_parts.append(f"README: {readme_excerpt}")
169
+
170
+ combined_text = "\n".join(text_parts)
171
+
172
+ # Generate embedding synchronously for now
173
+ # In production, this should be async
174
+ import asyncio
175
+ try:
176
+ loop = asyncio.get_event_loop()
177
+ except RuntimeError:
178
+ loop = asyncio.new_event_loop()
179
+ asyncio.set_event_loop(loop)
180
+
181
+ return loop.run_until_complete(self.embed_text(combined_text))
182
+
183
+ def cosine_similarity(self, vec_a: np.ndarray, vec_b: np.ndarray) -> float:
184
+ """
185
+ Compute cosine similarity between two vectors
186
+
187
+ Args:
188
+ vec_a: First vector
189
+ vec_b: Second vector
190
+
191
+ Returns:
192
+ Similarity score [0, 1]
193
+ """
194
+ dot_product = np.dot(vec_a, vec_b)
195
+ norm_a = np.linalg.norm(vec_a)
196
+ norm_b = np.linalg.norm(vec_b)
197
+
198
+ if norm_a == 0 or norm_b == 0:
199
+ return 0.0
200
+
201
+ similarity = dot_product / (norm_a * norm_b)
202
+
203
+ # Normalize to [0, 1]
204
+ return (similarity + 1) / 2
205
+
206
+ def get_dimension(self) -> int:
207
+ """Get embedding dimension"""
208
+ return self.dimension
@@ -0,0 +1,212 @@
1
+ """
2
+ FACT (Framework for Autonomous Context Tracking) Cache Implementation
3
+ Deterministic reasoning trace and prompt replay
4
+ """
5
+
6
+ import hashlib
7
+ import json
8
+ from typing import Dict, Any, Optional, List
9
+ from datetime import datetime
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class FACTCache:
15
+ """
16
+ Deterministic caching layer for reproducible reasoning
17
+ Implements prompt replay and reasoning trace storage
18
+ """
19
+
20
+ def __init__(self, db_manager=None):
21
+ self.db = db_manager
22
+ self.version = "0.5.0"
23
+
24
+ def generate_hash(self, prompt: str, context: Optional[Dict] = None) -> str:
25
+ """
26
+ Generate deterministic hash for prompt + context
27
+
28
+ Args:
29
+ prompt: Input prompt text
30
+ context: Optional context dictionary
31
+
32
+ Returns:
33
+ SHA256 hash string
34
+ """
35
+ content = prompt
36
+ if context:
37
+ # Sort keys for deterministic hashing
38
+ content += json.dumps(context, sort_keys=True)
39
+
40
+ return hashlib.sha256(content.encode()).hexdigest()
41
+
42
+ def get(self, prompt: str, context: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
43
+ """
44
+ Retrieve cached response if exists
45
+
46
+ Args:
47
+ prompt: Input prompt
48
+ context: Optional context
49
+
50
+ Returns:
51
+ Cached entry or None
52
+ """
53
+ if not self.db:
54
+ return None
55
+
56
+ try:
57
+ cache_hash = self.generate_hash(prompt, context)
58
+ entry = self.db.get_fact_cache(prompt)
59
+
60
+ if entry:
61
+ logger.info(f"FACT cache hit: {cache_hash[:16]}...")
62
+ return entry
63
+
64
+ logger.info(f"FACT cache miss: {cache_hash[:16]}...")
65
+ return None
66
+
67
+ except Exception as e:
68
+ logger.error(f"FACT cache retrieval error: {e}")
69
+ return None
70
+
71
+ def set(
72
+ self,
73
+ prompt: str,
74
+ response: str,
75
+ context: Optional[Dict] = None,
76
+ metadata: Optional[Dict] = None
77
+ ) -> str:
78
+ """
79
+ Store response in FACT cache
80
+
81
+ Args:
82
+ prompt: Input prompt
83
+ response: Response to cache
84
+ context: Optional context
85
+ metadata: Additional metadata
86
+
87
+ Returns:
88
+ Cache hash
89
+ """
90
+ if not self.db:
91
+ return ""
92
+
93
+ try:
94
+ cache_hash = self.generate_hash(prompt, context)
95
+
96
+ # Prepare metadata
97
+ cache_metadata = {
98
+ "version": self.version,
99
+ "timestamp": datetime.utcnow().isoformat(),
100
+ "context": context,
101
+ **(metadata or {})
102
+ }
103
+
104
+ # Store in database
105
+ self.db.add_fact_cache(prompt, response, cache_metadata)
106
+
107
+ logger.info(f"FACT cache stored: {cache_hash[:16]}...")
108
+ return cache_hash
109
+
110
+ except Exception as e:
111
+ logger.error(f"FACT cache storage error: {e}")
112
+ return ""
113
+
114
+ def trace_reasoning(
115
+ self,
116
+ query: str,
117
+ steps: List[Dict[str, Any]],
118
+ final_result: str
119
+ ) -> Dict[str, Any]:
120
+ """
121
+ Create reasoning trace for deterministic replay
122
+
123
+ Args:
124
+ query: Original query
125
+ steps: List of reasoning steps
126
+ final_result: Final reasoning output
127
+
128
+ Returns:
129
+ Reasoning trace dictionary
130
+ """
131
+ trace = {
132
+ "query": query,
133
+ "steps": steps,
134
+ "final_result": final_result,
135
+ "timestamp": datetime.utcnow().isoformat(),
136
+ "version": self.version
137
+ }
138
+
139
+ # Store trace
140
+ trace_json = json.dumps(trace, indent=2)
141
+ self.set(
142
+ prompt=f"reasoning_trace:{query}",
143
+ response=trace_json,
144
+ metadata={"type": "reasoning_trace", "steps_count": len(steps)}
145
+ )
146
+
147
+ return trace
148
+
149
+ def replay_reasoning(self, query: str) -> Optional[Dict[str, Any]]:
150
+ """
151
+ Replay cached reasoning trace
152
+
153
+ Args:
154
+ query: Original query
155
+
156
+ Returns:
157
+ Reasoning trace or None
158
+ """
159
+ entry = self.get(f"reasoning_trace:{query}")
160
+
161
+ if not entry:
162
+ return None
163
+
164
+ try:
165
+ trace = json.loads(entry['response'])
166
+ logger.info(f"Replaying reasoning trace with {len(trace['steps'])} steps")
167
+ return trace
168
+ except Exception as e:
169
+ logger.error(f"Error replaying reasoning trace: {e}")
170
+ return None
171
+
172
+ def validate_determinism(self, prompt: str, new_response: str) -> bool:
173
+ """
174
+ Validate that new response matches cached response (determinism check)
175
+
176
+ Args:
177
+ prompt: Input prompt
178
+ new_response: New response to validate
179
+
180
+ Returns:
181
+ True if deterministic (matches cache)
182
+ """
183
+ cached = self.get(prompt)
184
+
185
+ if not cached:
186
+ # No cached version, store this one
187
+ self.set(prompt, new_response)
188
+ return True
189
+
190
+ # Check if responses match
191
+ cached_response = cached.get('response', '')
192
+ is_deterministic = cached_response == new_response
193
+
194
+ if not is_deterministic:
195
+ logger.warning(f"Determinism violation detected for prompt: {prompt[:50]}...")
196
+
197
+ return is_deterministic
198
+
199
+ def get_cache_stats(self) -> Dict[str, Any]:
200
+ """
201
+ Get cache statistics
202
+
203
+ Returns:
204
+ Statistics dictionary
205
+ """
206
+ # TODO: Implement cache statistics from database
207
+ return {
208
+ "version": self.version,
209
+ "total_entries": 0,
210
+ "hit_rate": 0.0,
211
+ "avg_response_size": 0
212
+ }