reverse-diagrams 1.3.4__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/utils/cache.py ADDED
@@ -0,0 +1,274 @@
1
+ """Caching system for AWS API responses."""
2
+ import json
3
+ import hashlib
4
+ import logging
5
+ from pathlib import Path
6
+ from datetime import datetime, timedelta
7
+ from typing import Optional, Dict, Any, Union
8
+ import tempfile
9
+ import os
10
+
11
+ from ..config import get_config
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class AWSDataCache:
17
+ """Cache for AWS API responses with TTL support."""
18
+
19
+ def __init__(self, cache_dir: Optional[Path] = None, ttl_hours: int = 1):
20
+ """
21
+ Initialize cache.
22
+
23
+ Args:
24
+ cache_dir: Cache directory path (optional, uses temp dir if None)
25
+ ttl_hours: Time to live in hours
26
+ """
27
+ if cache_dir is None:
28
+ cache_dir = Path(tempfile.gettempdir()) / "reverse_diagrams_cache"
29
+
30
+ self.cache_dir = Path(cache_dir)
31
+ self.ttl = timedelta(hours=ttl_hours)
32
+
33
+ # Create cache directory
34
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
35
+
36
+ # Set appropriate permissions
37
+ try:
38
+ os.chmod(self.cache_dir, 0o755)
39
+ except OSError:
40
+ logger.warning(f"Could not set permissions on cache directory: {self.cache_dir}")
41
+
42
+ logger.debug(f"Cache initialized at {self.cache_dir} with TTL {ttl_hours}h")
43
+
44
+ def _get_cache_key(self, operation: str, params: Dict[str, Any]) -> str:
45
+ """
46
+ Generate cache key from operation and parameters.
47
+
48
+ Args:
49
+ operation: AWS operation name
50
+ params: Operation parameters
51
+
52
+ Returns:
53
+ Cache key string
54
+ """
55
+ # Sort parameters for consistent hashing
56
+ param_str = json.dumps(params, sort_keys=True, default=str)
57
+ cache_string = f"{operation}:{param_str}"
58
+
59
+ # Create hash
60
+ return hashlib.md5(cache_string.encode()).hexdigest()
61
+
62
+ def get(self, operation: str, params: Dict[str, Any]) -> Optional[Any]:
63
+ """
64
+ Get cached data if still valid.
65
+
66
+ Args:
67
+ operation: AWS operation name
68
+ params: Operation parameters
69
+
70
+ Returns:
71
+ Cached data if valid, None otherwise
72
+ """
73
+ try:
74
+ cache_key = self._get_cache_key(operation, params)
75
+ cache_file = self.cache_dir / f"{cache_key}.json"
76
+
77
+ if not cache_file.exists():
78
+ logger.debug(f"Cache miss for {operation}")
79
+ return None
80
+
81
+ # Check if cache is still valid
82
+ stat = cache_file.stat()
83
+ cache_time = datetime.fromtimestamp(stat.st_mtime)
84
+
85
+ if cache_time + self.ttl < datetime.now():
86
+ logger.debug(f"Cache expired for {operation}")
87
+ # Clean up expired cache
88
+ try:
89
+ cache_file.unlink()
90
+ except OSError:
91
+ pass
92
+ return None
93
+
94
+ # Load and return cached data
95
+ with cache_file.open('r', encoding='utf-8') as f:
96
+ data = json.load(f)
97
+
98
+ logger.debug(f"Cache hit for {operation}")
99
+ return data
100
+
101
+ except Exception as e:
102
+ logger.warning(f"Error reading cache for {operation}: {e}")
103
+ return None
104
+
105
+ def set(self, operation: str, params: Dict[str, Any], data: Any) -> None:
106
+ """
107
+ Cache data.
108
+
109
+ Args:
110
+ operation: AWS operation name
111
+ params: Operation parameters
112
+ data: Data to cache
113
+ """
114
+ try:
115
+ cache_key = self._get_cache_key(operation, params)
116
+ cache_file = self.cache_dir / f"{cache_key}.json"
117
+
118
+ # Write data to cache
119
+ with cache_file.open('w', encoding='utf-8') as f:
120
+ json.dump(data, f, indent=2, default=str)
121
+
122
+ # Set appropriate permissions
123
+ try:
124
+ os.chmod(cache_file, 0o644)
125
+ except OSError:
126
+ pass
127
+
128
+ logger.debug(f"Cached data for {operation}")
129
+
130
+ except Exception as e:
131
+ logger.warning(f"Error writing cache for {operation}: {e}")
132
+
133
+ def clear(self) -> None:
134
+ """Clear all cached data."""
135
+ try:
136
+ for cache_file in self.cache_dir.glob("*.json"):
137
+ cache_file.unlink()
138
+ logger.info("Cache cleared")
139
+ except Exception as e:
140
+ logger.warning(f"Error clearing cache: {e}")
141
+
142
+ def clear_expired(self) -> int:
143
+ """
144
+ Clear expired cache entries.
145
+
146
+ Returns:
147
+ Number of entries cleared
148
+ """
149
+ cleared = 0
150
+ try:
151
+ current_time = datetime.now()
152
+
153
+ for cache_file in self.cache_dir.glob("*.json"):
154
+ try:
155
+ stat = cache_file.stat()
156
+ cache_time = datetime.fromtimestamp(stat.st_mtime)
157
+
158
+ if cache_time + self.ttl < current_time:
159
+ cache_file.unlink()
160
+ cleared += 1
161
+
162
+ except OSError:
163
+ continue
164
+
165
+ if cleared > 0:
166
+ logger.info(f"Cleared {cleared} expired cache entries")
167
+
168
+ except Exception as e:
169
+ logger.warning(f"Error clearing expired cache: {e}")
170
+
171
+ return cleared
172
+
173
+ def get_cache_info(self) -> Dict[str, Any]:
174
+ """
175
+ Get cache statistics.
176
+
177
+ Returns:
178
+ Dictionary with cache information
179
+ """
180
+ try:
181
+ cache_files = list(self.cache_dir.glob("*.json"))
182
+ total_size = sum(f.stat().st_size for f in cache_files)
183
+
184
+ # Count expired entries
185
+ current_time = datetime.now()
186
+ expired = 0
187
+
188
+ for cache_file in cache_files:
189
+ try:
190
+ stat = cache_file.stat()
191
+ cache_time = datetime.fromtimestamp(stat.st_mtime)
192
+ if cache_time + self.ttl < current_time:
193
+ expired += 1
194
+ except OSError:
195
+ continue
196
+
197
+ return {
198
+ "cache_dir": str(self.cache_dir),
199
+ "total_entries": len(cache_files),
200
+ "expired_entries": expired,
201
+ "total_size_bytes": total_size,
202
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
203
+ "ttl_hours": self.ttl.total_seconds() / 3600
204
+ }
205
+
206
+ except Exception as e:
207
+ logger.warning(f"Error getting cache info: {e}")
208
+ return {"error": str(e)}
209
+
210
+
211
+ # Global cache instance
212
+ _cache: Optional[AWSDataCache] = None
213
+
214
+
215
+ def get_cache() -> AWSDataCache:
216
+ """Get or create global cache instance."""
217
+ global _cache
218
+
219
+ if _cache is None:
220
+ config = get_config()
221
+ if config.enable_caching:
222
+ _cache = AWSDataCache(ttl_hours=config.cache_ttl_hours)
223
+ else:
224
+ # Create a no-op cache that doesn't actually cache
225
+ _cache = NoOpCache()
226
+
227
+ return _cache
228
+
229
+
230
+ class NoOpCache:
231
+ """No-operation cache for when caching is disabled."""
232
+
233
+ def get(self, operation: str, params: Dict[str, Any]) -> Optional[Any]:
234
+ return None
235
+
236
+ def set(self, operation: str, params: Dict[str, Any], data: Any) -> None:
237
+ pass
238
+
239
+ def clear(self) -> None:
240
+ pass
241
+
242
+ def clear_expired(self) -> int:
243
+ return 0
244
+
245
+ def get_cache_info(self) -> Dict[str, Any]:
246
+ return {"caching": "disabled"}
247
+
248
+
249
+ def cached_aws_call(operation: str, params: Dict[str, Any], fetch_func) -> Any:
250
+ """
251
+ Decorator-like function for caching AWS API calls.
252
+
253
+ Args:
254
+ operation: AWS operation name
255
+ params: Operation parameters
256
+ fetch_func: Function to call if cache miss
257
+
258
+ Returns:
259
+ Cached or fresh data
260
+ """
261
+ cache = get_cache()
262
+
263
+ # Try to get from cache first
264
+ cached_data = cache.get(operation, params)
265
+ if cached_data is not None:
266
+ return cached_data
267
+
268
+ # Cache miss - fetch fresh data
269
+ fresh_data = fetch_func()
270
+
271
+ # Cache the result
272
+ cache.set(operation, params, fresh_data)
273
+
274
+ return fresh_data
@@ -0,0 +1,361 @@
1
+ """Concurrent processing utilities for AWS operations."""
2
+ import asyncio
3
+ import logging
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from typing import List, Dict, Any, Callable, Optional, TypeVar, Generic, Coroutine
6
+ from dataclasses import dataclass
7
+ import time
8
+
9
+ from ..config import get_config
10
+ from .progress import get_progress_tracker
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ T = TypeVar('T')
15
+ R = TypeVar('R')
16
+
17
+
18
+ @dataclass
19
+ class ProcessingResult(Generic[T, R]):
20
+ """Result of a concurrent processing operation."""
21
+ input_item: T
22
+ result: Optional[R] = None
23
+ error: Optional[Exception] = None
24
+ processing_time: float = 0.0
25
+
26
+ @property
27
+ def success(self) -> bool:
28
+ """Check if processing was successful."""
29
+ return self.error is None
30
+
31
+ @property
32
+ def failed(self) -> bool:
33
+ """Check if processing failed."""
34
+ return self.error is not None
35
+
36
+
37
+ class ConcurrentAWSProcessor:
38
+ """Concurrent processor for AWS operations with progress tracking."""
39
+
40
+ def __init__(self, max_workers: Optional[int] = None):
41
+ """
42
+ Initialize concurrent processor.
43
+
44
+ Args:
45
+ max_workers: Maximum number of worker threads (optional)
46
+ """
47
+ config = get_config()
48
+ self.max_workers = max_workers or config.max_concurrent_workers
49
+ self.progress = get_progress_tracker()
50
+
51
+ logger.info(f"Initialized concurrent processor with {self.max_workers} workers")
52
+
53
+ def process_items(
54
+ self,
55
+ items: List[T],
56
+ processor_func: Callable[[T], R],
57
+ description: str = "Processing items",
58
+ fail_fast: bool = False
59
+ ) -> List[ProcessingResult[T, R]]:
60
+ """
61
+ Process items concurrently with progress tracking.
62
+
63
+ Args:
64
+ items: List of items to process
65
+ processor_func: Function to process each item
66
+ description: Description for progress tracking
67
+ fail_fast: Whether to stop on first error
68
+
69
+ Returns:
70
+ List of processing results
71
+ """
72
+ if not items:
73
+ logger.info("No items to process")
74
+ return []
75
+
76
+ results: List[ProcessingResult[T, R]] = []
77
+
78
+ with self.progress.track_operation(f"{description} ({len(items)} items)", total=len(items)) as task_id:
79
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
80
+ # Submit all tasks
81
+ future_to_item = {
82
+ executor.submit(self._safe_process_item, item, processor_func): item
83
+ for item in items
84
+ }
85
+
86
+ # Process completed tasks
87
+ for future in as_completed(future_to_item):
88
+ item = future_to_item[future]
89
+ result = future.result()
90
+ results.append(result)
91
+
92
+ # Update progress
93
+ self.progress.update_progress(task_id)
94
+
95
+ # Handle errors
96
+ if result.failed:
97
+ logger.warning(f"Failed to process item {item}: {result.error}")
98
+ if fail_fast:
99
+ logger.error("Stopping processing due to fail_fast=True")
100
+ # Cancel remaining futures
101
+ for remaining_future in future_to_item:
102
+ if not remaining_future.done():
103
+ remaining_future.cancel()
104
+ break
105
+
106
+ # Log summary
107
+ successful = sum(1 for r in results if r.success)
108
+ failed = len(results) - successful
109
+ total_time = sum(r.processing_time for r in results)
110
+
111
+ logger.info(f"Processing complete: {successful} successful, {failed} failed, {total_time:.2f}s total")
112
+
113
+ if failed > 0:
114
+ self.progress.show_warning(
115
+ f"Processing completed with {failed} failures",
116
+ f"Successfully processed {successful}/{len(items)} items"
117
+ )
118
+ else:
119
+ self.progress.show_success(
120
+ f"All {successful} items processed successfully",
121
+ f"Total processing time: {total_time:.2f}s"
122
+ )
123
+
124
+ return results
125
+
126
+ def _safe_process_item(self, item: T, processor_func: Callable[[T], R]) -> ProcessingResult[T, R]:
127
+ """
128
+ Safely process a single item with error handling and timing.
129
+
130
+ Args:
131
+ item: Item to process
132
+ processor_func: Processing function
133
+
134
+ Returns:
135
+ Processing result
136
+ """
137
+ start_time = time.time()
138
+
139
+ try:
140
+ result = processor_func(item)
141
+ processing_time = time.time() - start_time
142
+
143
+ return ProcessingResult(
144
+ input_item=item,
145
+ result=result,
146
+ processing_time=processing_time
147
+ )
148
+
149
+ except Exception as e:
150
+ processing_time = time.time() - start_time
151
+ logger.debug(f"Error processing item {item}: {e}")
152
+
153
+ return ProcessingResult(
154
+ input_item=item,
155
+ error=e,
156
+ processing_time=processing_time
157
+ )
158
+
159
+ def process_accounts_concurrently(
160
+ self,
161
+ accounts: List[Dict[str, Any]],
162
+ processor_func: Callable[[Dict[str, Any]], Dict[str, Any]]
163
+ ) -> List[ProcessingResult[Dict[str, Any], Dict[str, Any]]]:
164
+ """
165
+ Process AWS accounts concurrently.
166
+
167
+ Args:
168
+ accounts: List of account dictionaries
169
+ processor_func: Function to process each account
170
+
171
+ Returns:
172
+ List of processing results
173
+ """
174
+ return self.process_items(
175
+ accounts,
176
+ processor_func,
177
+ description="Processing AWS accounts",
178
+ fail_fast=False
179
+ )
180
+
181
+ def process_organizational_units_concurrently(
182
+ self,
183
+ ous: List[Dict[str, Any]],
184
+ processor_func: Callable[[Dict[str, Any]], Dict[str, Any]]
185
+ ) -> List[ProcessingResult[Dict[str, Any], Dict[str, Any]]]:
186
+ """
187
+ Process organizational units concurrently.
188
+
189
+ Args:
190
+ ous: List of OU dictionaries
191
+ processor_func: Function to process each OU
192
+
193
+ Returns:
194
+ List of processing results
195
+ """
196
+ return self.process_items(
197
+ ous,
198
+ processor_func,
199
+ description="Processing organizational units",
200
+ fail_fast=False
201
+ )
202
+
203
+ def process_permission_sets_concurrently(
204
+ self,
205
+ permission_sets: List[str],
206
+ processor_func: Callable[[str], Dict[str, Any]]
207
+ ) -> List[ProcessingResult[str, Dict[str, Any]]]:
208
+ """
209
+ Process permission sets concurrently.
210
+
211
+ Args:
212
+ permission_sets: List of permission set ARNs
213
+ processor_func: Function to process each permission set
214
+
215
+ Returns:
216
+ List of processing results
217
+ """
218
+ return self.process_items(
219
+ permission_sets,
220
+ processor_func,
221
+ description="Processing permission sets",
222
+ fail_fast=False
223
+ )
224
+
225
+
226
+ class AsyncAWSProcessor:
227
+ """Async processor for AWS operations (for future async AWS SDK support)."""
228
+
229
+ def __init__(self, max_concurrent: Optional[int] = None):
230
+ """
231
+ Initialize async processor.
232
+
233
+ Args:
234
+ max_concurrent: Maximum concurrent operations
235
+ """
236
+ config = get_config()
237
+ self.max_concurrent = max_concurrent or config.max_concurrent_workers
238
+ self.semaphore = asyncio.Semaphore(self.max_concurrent)
239
+ self.progress = get_progress_tracker()
240
+
241
+ async def process_items_async(
242
+ self,
243
+ items: List[T],
244
+ processor_func: Callable[[T], Coroutine[Any, Any, R]],
245
+ description: str = "Processing items async"
246
+ ) -> List[ProcessingResult[T, R]]:
247
+ """
248
+ Process items asynchronously.
249
+
250
+ Args:
251
+ items: List of items to process
252
+ processor_func: Async function to process each item
253
+ description: Description for progress tracking
254
+
255
+ Returns:
256
+ List of processing results
257
+ """
258
+ if not items:
259
+ return []
260
+
261
+ async def _process_with_semaphore(item: T) -> ProcessingResult[T, R]:
262
+ async with self.semaphore:
263
+ return await self._safe_process_item_async(item, processor_func)
264
+
265
+ # Create tasks for all items
266
+ tasks = [_process_with_semaphore(item) for item in items]
267
+
268
+ # Process with progress tracking
269
+ results = []
270
+ with self.progress.track_operation(f"{description} ({len(items)} items)", total=len(items)) as task_id:
271
+ for coro in asyncio.as_completed(tasks):
272
+ result = await coro
273
+ results.append(result)
274
+ self.progress.update_progress(task_id)
275
+
276
+ return results
277
+
278
+ async def _safe_process_item_async(
279
+ self,
280
+ item: T,
281
+ processor_func: Callable[[T], Coroutine[Any, Any, R]]
282
+ ) -> ProcessingResult[T, R]:
283
+ """
284
+ Safely process a single item asynchronously.
285
+
286
+ Args:
287
+ item: Item to process
288
+ processor_func: Async processing function
289
+
290
+ Returns:
291
+ Processing result
292
+ """
293
+ start_time = time.time()
294
+
295
+ try:
296
+ result = await processor_func(item)
297
+ processing_time = time.time() - start_time
298
+
299
+ return ProcessingResult(
300
+ input_item=item,
301
+ result=result,
302
+ processing_time=processing_time
303
+ )
304
+
305
+ except Exception as e:
306
+ processing_time = time.time() - start_time
307
+
308
+ return ProcessingResult(
309
+ input_item=item,
310
+ error=e,
311
+ processing_time=processing_time
312
+ )
313
+
314
+
315
+ # Global processor instances
316
+ _concurrent_processor: Optional[ConcurrentAWSProcessor] = None
317
+ _async_processor: Optional[AsyncAWSProcessor] = None
318
+
319
+
320
+ def get_concurrent_processor() -> ConcurrentAWSProcessor:
321
+ """Get or create global concurrent processor instance."""
322
+ global _concurrent_processor
323
+ if _concurrent_processor is None:
324
+ _concurrent_processor = ConcurrentAWSProcessor()
325
+ return _concurrent_processor
326
+
327
+
328
+ def get_async_processor() -> AsyncAWSProcessor:
329
+ """Get or create global async processor instance."""
330
+ global _async_processor
331
+ if _async_processor is None:
332
+ _async_processor = AsyncAWSProcessor()
333
+ return _async_processor
334
+
335
+
336
+ def process_concurrently(
337
+ items: List[T],
338
+ processor_func: Callable[[T], R],
339
+ description: str = "Processing items",
340
+ max_workers: Optional[int] = None,
341
+ fail_fast: bool = False
342
+ ) -> List[ProcessingResult[T, R]]:
343
+ """
344
+ Convenience function for concurrent processing.
345
+
346
+ Args:
347
+ items: List of items to process
348
+ processor_func: Function to process each item
349
+ description: Description for progress tracking
350
+ max_workers: Maximum number of workers (optional)
351
+ fail_fast: Whether to stop on first error
352
+
353
+ Returns:
354
+ List of processing results
355
+ """
356
+ if max_workers:
357
+ processor = ConcurrentAWSProcessor(max_workers=max_workers)
358
+ else:
359
+ processor = get_concurrent_processor()
360
+
361
+ return processor.process_items(items, processor_func, description, fail_fast)