iam-policy-validator 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of iam-policy-validator might be problematic. Click here for more details.

Files changed (83) hide show
  1. iam_policy_validator-1.7.0.dist-info/METADATA +1057 -0
  2. iam_policy_validator-1.7.0.dist-info/RECORD +83 -0
  3. iam_policy_validator-1.7.0.dist-info/WHEEL +4 -0
  4. iam_policy_validator-1.7.0.dist-info/entry_points.txt +2 -0
  5. iam_policy_validator-1.7.0.dist-info/licenses/LICENSE +21 -0
  6. iam_validator/__init__.py +27 -0
  7. iam_validator/__main__.py +11 -0
  8. iam_validator/__version__.py +7 -0
  9. iam_validator/checks/__init__.py +43 -0
  10. iam_validator/checks/action_condition_enforcement.py +884 -0
  11. iam_validator/checks/action_resource_matching.py +441 -0
  12. iam_validator/checks/action_validation.py +72 -0
  13. iam_validator/checks/condition_key_validation.py +92 -0
  14. iam_validator/checks/condition_type_mismatch.py +259 -0
  15. iam_validator/checks/full_wildcard.py +71 -0
  16. iam_validator/checks/mfa_condition_check.py +112 -0
  17. iam_validator/checks/policy_size.py +147 -0
  18. iam_validator/checks/policy_type_validation.py +305 -0
  19. iam_validator/checks/principal_validation.py +776 -0
  20. iam_validator/checks/resource_validation.py +138 -0
  21. iam_validator/checks/sensitive_action.py +254 -0
  22. iam_validator/checks/service_wildcard.py +107 -0
  23. iam_validator/checks/set_operator_validation.py +157 -0
  24. iam_validator/checks/sid_uniqueness.py +170 -0
  25. iam_validator/checks/utils/__init__.py +1 -0
  26. iam_validator/checks/utils/policy_level_checks.py +143 -0
  27. iam_validator/checks/utils/sensitive_action_matcher.py +294 -0
  28. iam_validator/checks/utils/wildcard_expansion.py +87 -0
  29. iam_validator/checks/wildcard_action.py +67 -0
  30. iam_validator/checks/wildcard_resource.py +135 -0
  31. iam_validator/commands/__init__.py +25 -0
  32. iam_validator/commands/analyze.py +531 -0
  33. iam_validator/commands/base.py +48 -0
  34. iam_validator/commands/cache.py +392 -0
  35. iam_validator/commands/download_services.py +255 -0
  36. iam_validator/commands/post_to_pr.py +86 -0
  37. iam_validator/commands/validate.py +600 -0
  38. iam_validator/core/__init__.py +14 -0
  39. iam_validator/core/access_analyzer.py +671 -0
  40. iam_validator/core/access_analyzer_report.py +640 -0
  41. iam_validator/core/aws_fetcher.py +940 -0
  42. iam_validator/core/check_registry.py +607 -0
  43. iam_validator/core/cli.py +134 -0
  44. iam_validator/core/condition_validators.py +626 -0
  45. iam_validator/core/config/__init__.py +81 -0
  46. iam_validator/core/config/aws_api.py +35 -0
  47. iam_validator/core/config/aws_global_conditions.py +160 -0
  48. iam_validator/core/config/category_suggestions.py +104 -0
  49. iam_validator/core/config/condition_requirements.py +155 -0
  50. iam_validator/core/config/config_loader.py +472 -0
  51. iam_validator/core/config/defaults.py +523 -0
  52. iam_validator/core/config/principal_requirements.py +421 -0
  53. iam_validator/core/config/sensitive_actions.py +672 -0
  54. iam_validator/core/config/service_principals.py +95 -0
  55. iam_validator/core/config/wildcards.py +124 -0
  56. iam_validator/core/constants.py +74 -0
  57. iam_validator/core/formatters/__init__.py +27 -0
  58. iam_validator/core/formatters/base.py +147 -0
  59. iam_validator/core/formatters/console.py +59 -0
  60. iam_validator/core/formatters/csv.py +170 -0
  61. iam_validator/core/formatters/enhanced.py +440 -0
  62. iam_validator/core/formatters/html.py +672 -0
  63. iam_validator/core/formatters/json.py +33 -0
  64. iam_validator/core/formatters/markdown.py +63 -0
  65. iam_validator/core/formatters/sarif.py +251 -0
  66. iam_validator/core/models.py +327 -0
  67. iam_validator/core/policy_checks.py +656 -0
  68. iam_validator/core/policy_loader.py +396 -0
  69. iam_validator/core/pr_commenter.py +424 -0
  70. iam_validator/core/report.py +872 -0
  71. iam_validator/integrations/__init__.py +28 -0
  72. iam_validator/integrations/github_integration.py +815 -0
  73. iam_validator/integrations/ms_teams.py +442 -0
  74. iam_validator/sdk/__init__.py +187 -0
  75. iam_validator/sdk/arn_matching.py +382 -0
  76. iam_validator/sdk/context.py +222 -0
  77. iam_validator/sdk/exceptions.py +48 -0
  78. iam_validator/sdk/helpers.py +177 -0
  79. iam_validator/sdk/policy_utils.py +425 -0
  80. iam_validator/sdk/shortcuts.py +283 -0
  81. iam_validator/utils/__init__.py +31 -0
  82. iam_validator/utils/cache.py +105 -0
  83. iam_validator/utils/regex.py +206 -0
@@ -0,0 +1,940 @@
1
+ """AWS Service Fetcher Module with advanced caching and performance features.
2
+
3
+ This module provides functionality to fetch AWS service information from the AWS service reference API.
4
+ It includes methods to retrieve a list of services, fetch detailed information for specific services,
5
+ and handle errors gracefully.
6
+
7
+ Features:
8
+ - TTL-based caching with automatic expiry
9
+ - LRU memory cache for frequently accessed services
10
+ - Service pre-fetching for common services
11
+ - Batch API requests support
12
+ - Compiled regex patterns for better performance
13
+ - Connection pool optimization
14
+ - Request coalescing for duplicate requests
15
+
16
+ Example usage:
17
+ async with AWSServiceFetcher() as fetcher:
18
+ services = await fetcher.fetch_services()
19
+ service_detail = await fetcher.fetch_service_by_name("S3")
20
+ """
21
+
22
+ import asyncio
23
+ import hashlib
24
+ import json
25
+ import logging
26
+ import os
27
+ import re
28
+ import sys
29
+ import time
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ import httpx
34
+
35
+ from iam_validator.core.config import AWS_SERVICE_REFERENCE_BASE_URL
36
+ from iam_validator.core.models import ServiceDetail, ServiceInfo
37
+ from iam_validator.utils.cache import LRUCache
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ class CompiledPatterns:
43
+ """Pre-compiled regex patterns for validation.
44
+
45
+ This class implements the Singleton pattern to ensure patterns are compiled only once
46
+ and reused across all instances for better performance.
47
+ """
48
+
49
+ _instance = None
50
+ _initialized = False
51
+
52
+ def __new__(cls) -> "CompiledPatterns":
53
+ if cls._instance is None:
54
+ cls._instance = super().__new__(cls)
55
+ return cls._instance
56
+
57
+ def __init__(self) -> None:
58
+ """Initialize compiled patterns (only once due to Singleton pattern)."""
59
+ # Only initialize once, even if __init__ is called multiple times
60
+ if CompiledPatterns._initialized:
61
+ return
62
+
63
+ CompiledPatterns._initialized = True
64
+
65
+ # ARN validation pattern
66
+ self.arn_pattern = re.compile(
67
+ r"^arn:(?P<partition>(aws|aws-cn|aws-us-gov|aws-eusc|aws-iso|aws-iso-b|aws-iso-e|aws-iso-f)):"
68
+ r"(?P<service>[a-z0-9\-]+):"
69
+ r"(?P<region>[a-z0-9\-]*):"
70
+ r"(?P<account>[0-9]*):"
71
+ r"(?P<resource>.+)$",
72
+ re.IGNORECASE,
73
+ )
74
+
75
+ # Action format pattern
76
+ self.action_pattern = re.compile(
77
+ r"^(?P<service>[a-zA-Z0-9_-]+):(?P<action>[a-zA-Z0-9*_-]+)$"
78
+ )
79
+
80
+ # Wildcard detection patterns
81
+ self.wildcard_pattern = re.compile(r"\*")
82
+ self.partial_wildcard_pattern = re.compile(r"^[^*]+\*$")
83
+
84
+
85
+ class AWSServiceFetcher:
86
+ """Fetches AWS service information from the AWS service reference API with enhanced performance features.
87
+
88
+ This class provides a comprehensive interface for retrieving AWS service metadata,
89
+ including actions, resources, and condition keys. It includes multiple layers of
90
+ caching and optimization for high-performance policy validation.
91
+
92
+ Features:
93
+ - Multi-layer caching (memory LRU + disk with TTL)
94
+ - Service pre-fetching for common AWS services
95
+ - Request batching and coalescing
96
+ - Offline mode support with local AWS service files
97
+ - HTTP/2 connection pooling
98
+ - Automatic retry with exponential backoff
99
+
100
+ Example:
101
+ >>> async with AWSServiceFetcher() as fetcher:
102
+ ... # Fetch service list
103
+ ... services = await fetcher.fetch_services()
104
+ ...
105
+ ... # Fetch specific service details
106
+ ... s3_service = await fetcher.fetch_service_by_name("s3")
107
+ ...
108
+ ... # Validate actions
109
+ ... is_valid = await fetcher.validate_action("s3:GetObject", s3_service)
110
+
111
+ Method Organization:
112
+ Lifecycle Management:
113
+ - __init__: Initialize fetcher with configuration
114
+ - __aenter__, __aexit__: Context manager support
115
+
116
+ Caching (Private):
117
+ - _get_cache_directory: Determine cache location
118
+ - _get_cache_path: Generate cache file path
119
+ - _read_from_cache: Read from disk cache
120
+ - _write_to_cache: Write to disk cache
121
+ - clear_caches: Clear all caches
122
+
123
+ HTTP Operations (Private):
124
+ - _make_request: Core HTTP request handler
125
+ - _make_request_with_batching: Request coalescing
126
+ - _prefetch_common_services: Pre-load common services
127
+
128
+ File I/O (Private):
129
+ - _load_services_from_file: Load service list from local file
130
+ - _load_service_from_file: Load service details from local file
131
+
132
+ Public API - Fetching:
133
+ - fetch_services: Get list of all AWS services
134
+ - fetch_service_by_name: Get details for one service
135
+ - fetch_multiple_services: Batch fetch multiple services
136
+
137
+ Public API - Validation:
138
+ - validate_action: Check if action exists in service
139
+ - validate_arn: Validate ARN format
140
+ - validate_condition_key: Check condition key validity
141
+
142
+ Public API - Parsing:
143
+ - parse_action: Split action into service and name
144
+ - _match_wildcard_action: Match wildcard patterns
145
+
146
+ Utilities:
147
+ - get_stats: Get cache statistics
148
+ """
149
+
150
+ BASE_URL = AWS_SERVICE_REFERENCE_BASE_URL
151
+
152
+ # Common AWS services to pre-fetch
153
+ # All other services will be fetched on-demand (lazy loading if found in policies)
154
+ COMMON_SERVICES = [
155
+ "acm",
156
+ "apigateway",
157
+ "autoscaling",
158
+ "backup",
159
+ "batch",
160
+ "bedrock",
161
+ "cloudformation",
162
+ "cloudfront",
163
+ "cloudtrail",
164
+ "cloudwatch",
165
+ "config",
166
+ "dynamodb",
167
+ "ec2-instance-connect",
168
+ "ec2",
169
+ "ecr",
170
+ "ecs",
171
+ "eks",
172
+ "elasticache",
173
+ "elasticloadbalancing",
174
+ "events",
175
+ "firehose",
176
+ "glacier",
177
+ "glue",
178
+ "guardduty",
179
+ "iam",
180
+ "imagebuilder",
181
+ "inspector2",
182
+ "kinesis",
183
+ "kms",
184
+ "lambda",
185
+ "logs",
186
+ "rds",
187
+ "route53",
188
+ "s3",
189
+ "scheduler",
190
+ "secretsmanager",
191
+ "securityhub",
192
+ "sns",
193
+ "sqs",
194
+ "sts",
195
+ "support",
196
+ "waf",
197
+ "wafv2",
198
+ ]
199
+
200
+ def __init__(
201
+ self,
202
+ timeout: float = 30.0,
203
+ retries: int = 3,
204
+ enable_cache: bool = True,
205
+ cache_ttl: int = 604800,
206
+ memory_cache_size: int = 256,
207
+ connection_pool_size: int = 50,
208
+ keepalive_connections: int = 20,
209
+ prefetch_common: bool = True,
210
+ cache_dir: Path | str | None = None,
211
+ aws_services_dir: Path | str | None = None,
212
+ ):
213
+ """Initialize aws service fetcher.
214
+
215
+ Args:
216
+ timeout: Request timeout in seconds
217
+ retries: Number of retries for failed requests
218
+ enable_cache: Enable persistent disk caching
219
+ cache_ttl: Cache time-to-live in seconds
220
+ memory_cache_size: Size of in-memory LRU cache
221
+ connection_pool_size: HTTP connection pool size
222
+ keepalive_connections: Number of keepalive connections
223
+ prefetch_common: Prefetch common AWS services
224
+ cache_dir: Custom cache directory path
225
+ aws_services_dir: Directory containing pre-downloaded AWS service JSON files.
226
+ When set, the fetcher will load services from local files
227
+ instead of making API calls. Directory should contain:
228
+ - _services.json: List of all services
229
+ - {service}.json: Individual service files (e.g., s3.json)
230
+ """
231
+ self.timeout = timeout
232
+ self.retries = retries
233
+ self.enable_cache = enable_cache
234
+ self.cache_ttl = cache_ttl
235
+ self.prefetch_common = prefetch_common
236
+
237
+ # AWS services directory for offline mode
238
+ self.aws_services_dir: Path | None = None
239
+ if aws_services_dir:
240
+ self.aws_services_dir = Path(aws_services_dir)
241
+ if not self.aws_services_dir.exists():
242
+ raise ValueError(f"AWS services directory does not exist: {aws_services_dir}")
243
+ logger.info(f"Using local AWS services from: {self.aws_services_dir}")
244
+
245
+ self._client: httpx.AsyncClient | None = None
246
+ self._memory_cache = LRUCache(maxsize=memory_cache_size, ttl=cache_ttl)
247
+ self._cache_dir = self._get_cache_directory(cache_dir)
248
+ self._patterns = CompiledPatterns()
249
+
250
+ # Batch request queue
251
+ self._batch_queue: dict[str, asyncio.Future[Any]] = {}
252
+ self._batch_lock = asyncio.Lock()
253
+
254
+ # Connection pool settings
255
+ self.connection_pool_size = connection_pool_size
256
+ self.keepalive_connections = keepalive_connections
257
+
258
+ # Track prefetched services
259
+ self._prefetched_services: set[str] = set()
260
+
261
+ # Create cache directory if needed
262
+ if self.enable_cache:
263
+ self._cache_dir.mkdir(parents=True, exist_ok=True)
264
+
265
+ @staticmethod
266
+ def _get_cache_directory(cache_dir: Path | str | None = None) -> Path:
267
+ """Get the cache directory path, using platform-appropriate defaults.
268
+
269
+ Priority:
270
+ 1. Provided cache_dir parameter
271
+ 2. Platform-specific user cache directory
272
+ - Linux/Unix: ~/.cache/iam-validator/aws_services
273
+ - macOS: ~/Library/Caches/iam-validator/aws_services
274
+ - Windows: %LOCALAPPDATA%/iam-validator/cache/aws_services
275
+
276
+ Args:
277
+ cache_dir: Optional custom cache directory path
278
+
279
+ Returns:
280
+ Path object for the cache directory
281
+ """
282
+ if cache_dir is not None:
283
+ return Path(cache_dir)
284
+
285
+ # Determine platform-specific cache directory
286
+ if sys.platform == "darwin":
287
+ # macOS
288
+ base_cache = Path.home() / "Library" / "Caches"
289
+ elif sys.platform == "win32":
290
+ # Windows
291
+ base_cache = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
292
+ else:
293
+ # Linux and other Unix-like systems
294
+ base_cache = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
295
+
296
+ return base_cache / "iam-validator" / "aws_services"
297
+
298
+ async def __aenter__(self) -> "AWSServiceFetcher":
299
+ """Async context manager entry with optimized settings."""
300
+ self._client = httpx.AsyncClient(
301
+ timeout=httpx.Timeout(self.timeout),
302
+ follow_redirects=True,
303
+ limits=httpx.Limits(
304
+ max_keepalive_connections=self.keepalive_connections,
305
+ max_connections=self.connection_pool_size,
306
+ keepalive_expiry=30.0, # Keep connections alive for 30 seconds
307
+ ),
308
+ http2=True, # Enable HTTP/2 for multiplexing
309
+ )
310
+
311
+ # Pre-fetch common services if enabled
312
+ if self.prefetch_common:
313
+ await self._prefetch_common_services()
314
+
315
+ return self
316
+
317
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
318
+ """Async context manager exit."""
319
+ del exc_type, exc_val, exc_tb
320
+ if self._client:
321
+ await self._client.aclose()
322
+ self._client = None
323
+
324
+ async def _prefetch_common_services(self) -> None:
325
+ """Pre-fetch commonly used AWS services for better performance."""
326
+ logger.info(f"Pre-fetching {len(self.COMMON_SERVICES)} common AWS services...")
327
+
328
+ # First, fetch the services list once to populate the cache
329
+ # This prevents all concurrent calls from fetching the same list
330
+ await self.fetch_services()
331
+
332
+ async def fetch_service(name: str) -> None:
333
+ try:
334
+ await self.fetch_service_by_name(name)
335
+ self._prefetched_services.add(name)
336
+ except Exception as e:
337
+ logger.warning(f"Failed to prefetch service {name}: {e}")
338
+
339
+ # Fetch in batches to avoid overwhelming the API
340
+ batch_size = 5
341
+ for i in range(0, len(self.COMMON_SERVICES), batch_size):
342
+ batch = self.COMMON_SERVICES[i : i + batch_size]
343
+ await asyncio.gather(*[fetch_service(name) for name in batch])
344
+
345
+ logger.info(f"Pre-fetched {len(self._prefetched_services)} services successfully")
346
+
347
+ def _get_cache_path(self, url: str) -> Path:
348
+ """Get cache file path with timestamp for TTL checking."""
349
+ url_hash = hashlib.md5(url.encode()).hexdigest()
350
+
351
+ # Extract service name for better organization
352
+ filename = f"{url_hash}.json"
353
+ if "/v1/" in url:
354
+ service_name = url.split("/v1/")[1].split("/")[0]
355
+ filename = f"{service_name}_{url_hash[:8]}.json"
356
+ elif url == self.BASE_URL:
357
+ filename = "services_list.json"
358
+
359
+ return self._cache_dir / filename
360
+
361
+ def _read_from_cache(self, url: str) -> Any | None:
362
+ """Read from disk cache with TTL checking."""
363
+ if not self.enable_cache:
364
+ return None
365
+
366
+ cache_path = self._get_cache_path(url)
367
+
368
+ if not cache_path.exists():
369
+ return None
370
+
371
+ try:
372
+ # Check file modification time for TTL
373
+ mtime = cache_path.stat().st_mtime
374
+ if time.time() - mtime > self.cache_ttl:
375
+ logger.debug(f"Cache expired for {url}")
376
+ cache_path.unlink() # Remove expired cache
377
+ return None
378
+
379
+ with open(cache_path, encoding="utf-8") as f:
380
+ data = json.load(f)
381
+ logger.debug(f"Disk cache hit for {url}")
382
+ return data
383
+
384
+ except Exception as e:
385
+ logger.warning(f"Failed to read cache for {url}: {e}")
386
+ return None
387
+
388
+ def _write_to_cache(self, url: str, data: Any) -> None:
389
+ """Write to disk cache."""
390
+ if not self.enable_cache:
391
+ return
392
+
393
+ cache_path = self._get_cache_path(url)
394
+
395
+ try:
396
+ with open(cache_path, "w", encoding="utf-8") as f:
397
+ json.dump(data, f, indent=2)
398
+ logger.debug(f"Written to disk cache: {url}")
399
+ except Exception as e:
400
+ logger.warning(f"Failed to write cache for {url}: {e}")
401
+
402
+ async def _make_request_with_batching(self, url: str) -> Any:
403
+ """Make request with request batching/coalescing.
404
+
405
+ Uses double-check locking pattern to avoid race conditions and deadlocks.
406
+ """
407
+ # First check: see if request is already in progress
408
+ existing_future = None
409
+ async with self._batch_lock:
410
+ if url in self._batch_queue:
411
+ existing_future = self._batch_queue[url]
412
+
413
+ # Wait for existing request outside the lock
414
+ if existing_future is not None:
415
+ logger.debug(f"Coalescing request for {url}")
416
+ return await existing_future
417
+
418
+ # Create new future for this request
419
+ loop = asyncio.get_event_loop()
420
+ future: asyncio.Future[Any] = loop.create_future()
421
+
422
+ # Second check: register future or use existing one (double-check pattern)
423
+ async with self._batch_lock:
424
+ if url in self._batch_queue:
425
+ # Another coroutine registered while we were creating the future
426
+ existing_future = self._batch_queue[url]
427
+ else:
428
+ # We're the first, register our future
429
+ self._batch_queue[url] = future
430
+
431
+ # If we found an existing future, wait for it
432
+ if existing_future is not None:
433
+ logger.debug(f"Coalescing request for {url} (late check)")
434
+ return await existing_future
435
+
436
+ # We're responsible for making the request
437
+ try:
438
+ # Actually make the request
439
+ result = await self._make_request(url)
440
+ if not future.done():
441
+ future.set_result(result)
442
+ return result
443
+ except Exception as e:
444
+ if not future.done():
445
+ future.set_exception(e)
446
+ raise
447
+ finally:
448
+ # Remove from queue
449
+ async with self._batch_lock:
450
+ self._batch_queue.pop(url, None)
451
+
452
+ async def _make_request(self, url: str) -> Any:
453
+ """Make HTTP request with multi-level caching."""
454
+ # Check memory cache first
455
+ cache_key = f"url:{url}"
456
+ cached_data = await self._memory_cache.get(cache_key)
457
+ if cached_data is not None:
458
+ logger.debug(f"Memory cache hit for {url}")
459
+ return cached_data
460
+
461
+ # Check disk cache
462
+ cached_data = self._read_from_cache(url)
463
+ if cached_data is not None:
464
+ # Store in memory cache for faster access
465
+ await self._memory_cache.set(cache_key, cached_data)
466
+ return cached_data
467
+
468
+ if not self._client:
469
+ raise RuntimeError("Fetcher not initialized. Use as async context manager.")
470
+
471
+ last_exception: Exception | None = None
472
+
473
+ for attempt in range(self.retries):
474
+ try:
475
+ logger.debug(f"Fetching URL: {url} (attempt {attempt + 1})")
476
+ response = await self._client.get(url)
477
+ response.raise_for_status()
478
+
479
+ try:
480
+ data = response.json()
481
+
482
+ # Cache in both memory and disk
483
+ await self._memory_cache.set(cache_key, data)
484
+ self._write_to_cache(url, data)
485
+
486
+ return data
487
+
488
+ except Exception as json_error:
489
+ logger.error(f"Failed to parse response as JSON: {json_error}")
490
+ raise ValueError(f"Invalid JSON response from {url}: {json_error}")
491
+
492
+ except httpx.HTTPStatusError as e:
493
+ logger.error(f"HTTP error {e.response.status_code} for {url}")
494
+ if e.response.status_code == 404:
495
+ raise ValueError(f"Service not found: {url}")
496
+ last_exception = e
497
+
498
+ except httpx.RequestError as e:
499
+ logger.error(f"Request error for {url}: {e}")
500
+ last_exception = e
501
+
502
+ except Exception as e:
503
+ logger.error(f"Unexpected error for {url}: {e}")
504
+ last_exception = e
505
+
506
+ if attempt < self.retries - 1:
507
+ wait_time = 2**attempt
508
+ logger.info(f"Retrying in {wait_time} seconds...")
509
+ await asyncio.sleep(wait_time)
510
+
511
+ raise last_exception or Exception(f"Failed to fetch {url} after {self.retries} attempts")
512
+
513
+ def _load_services_from_file(self) -> list[ServiceInfo]:
514
+ """Load services list from local _services.json file.
515
+
516
+ Returns:
517
+ List of ServiceInfo objects loaded from _services.json
518
+
519
+ Raises:
520
+ FileNotFoundError: If _services.json doesn't exist
521
+ ValueError: If _services.json is invalid
522
+ """
523
+ if not self.aws_services_dir:
524
+ raise ValueError("aws_services_dir is not set")
525
+
526
+ services_file = self.aws_services_dir / "_services.json"
527
+ if not services_file.exists():
528
+ raise FileNotFoundError(f"_services.json not found in {self.aws_services_dir}")
529
+
530
+ try:
531
+ with open(services_file) as f:
532
+ data = json.load(f)
533
+
534
+ if not isinstance(data, list):
535
+ raise ValueError("Expected list of services from _services.json")
536
+
537
+ services: list[ServiceInfo] = []
538
+ for item in data:
539
+ if isinstance(item, dict):
540
+ service = item.get("service")
541
+ url = item.get("url")
542
+ if service and url:
543
+ services.append(ServiceInfo(service=str(service), url=str(url)))
544
+
545
+ logger.info(f"Loaded {len(services)} services from local file: {services_file}")
546
+ return services
547
+
548
+ except json.JSONDecodeError as e:
549
+ raise ValueError(f"Invalid JSON in services.json: {e}")
550
+
551
+ def _load_service_from_file(self, service_name: str) -> ServiceDetail:
552
+ """Load service detail from local JSON file.
553
+
554
+ Args:
555
+ service_name: Name of the service (case-insensitive)
556
+
557
+ Returns:
558
+ ServiceDetail object loaded from {service}.json
559
+
560
+ Raises:
561
+ FileNotFoundError: If service JSON file doesn't exist
562
+ ValueError: If service JSON is invalid
563
+ """
564
+ if not self.aws_services_dir:
565
+ raise ValueError("aws_services_dir is not set")
566
+
567
+ # Normalize filename (lowercase, replace spaces with underscores)
568
+ filename = f"{service_name.lower().replace(' ', '_')}.json"
569
+ service_file = self.aws_services_dir / filename
570
+
571
+ if not service_file.exists():
572
+ raise FileNotFoundError(f"Service file not found: {service_file}")
573
+
574
+ try:
575
+ with open(service_file) as f:
576
+ data = json.load(f)
577
+
578
+ service_detail = ServiceDetail.model_validate(data)
579
+ logger.debug(f"Loaded service {service_name} from local file: {service_file}")
580
+ return service_detail
581
+
582
+ except json.JSONDecodeError as e:
583
+ raise ValueError(f"Invalid JSON in {service_file}: {e}")
584
+
585
+ async def fetch_services(self) -> list[ServiceInfo]:
586
+ """Fetch list of AWS services with caching.
587
+
588
+ When aws_services_dir is set, loads from local services.json file.
589
+ Otherwise, fetches from AWS API.
590
+ """
591
+ # Check if we have the parsed services list in cache
592
+ services_cache_key = "parsed_services_list"
593
+ cached_services = await self._memory_cache.get(services_cache_key)
594
+ if cached_services is not None and isinstance(cached_services, list):
595
+ logger.debug(f"Retrieved {len(cached_services)} services from parsed cache")
596
+ return cached_services
597
+
598
+ # Load from local file if aws_services_dir is set
599
+ if self.aws_services_dir:
600
+ services = self._load_services_from_file()
601
+ # Cache the loaded services
602
+ await self._memory_cache.set(services_cache_key, services)
603
+ return services
604
+
605
+ # Not in parsed cache, fetch the raw data from API
606
+ data = await self._make_request_with_batching(self.BASE_URL)
607
+
608
+ if not isinstance(data, list):
609
+ raise ValueError("Expected list of services from root endpoint")
610
+
611
+ services: list[ServiceInfo] = []
612
+ for item in data:
613
+ if isinstance(item, dict):
614
+ service = item.get("service")
615
+ url = item.get("url")
616
+ if service and url:
617
+ services.append(ServiceInfo(service=str(service), url=str(url)))
618
+
619
+ # Cache the parsed services list
620
+ await self._memory_cache.set(services_cache_key, services)
621
+
622
+ # Log only on first fetch (when parsed cache was empty)
623
+ logger.info(f"Fetched and parsed {len(services)} services from AWS API")
624
+ return services
625
+
626
+ async def fetch_service_by_name(self, service_name: str) -> ServiceDetail:
627
+ """Fetch service detail with optimized caching.
628
+
629
+ When aws_services_dir is set, loads from local {service}.json file.
630
+ Otherwise, fetches from AWS API.
631
+ """
632
+ # Normalize service name
633
+ service_name_lower = service_name.lower()
634
+
635
+ # Check memory cache with service name as key
636
+ cache_key = f"service:{service_name_lower}"
637
+ cached_detail = await self._memory_cache.get(cache_key)
638
+ if isinstance(cached_detail, ServiceDetail):
639
+ logger.debug(f"Memory cache hit for service {service_name}")
640
+ return cached_detail
641
+
642
+ # Load from local file if aws_services_dir is set
643
+ if self.aws_services_dir:
644
+ try:
645
+ service_detail = self._load_service_from_file(service_name_lower)
646
+ # Cache the loaded service
647
+ await self._memory_cache.set(cache_key, service_detail)
648
+ return service_detail
649
+ except FileNotFoundError:
650
+ # Try to find the service in services.json to get proper name
651
+ services = await self.fetch_services()
652
+ for service in services:
653
+ if service.service.lower() == service_name_lower:
654
+ # Try with the exact service name from services.json
655
+ try:
656
+ service_detail = self._load_service_from_file(service.service)
657
+ await self._memory_cache.set(cache_key, service_detail)
658
+ return service_detail
659
+ except FileNotFoundError:
660
+ pass
661
+ raise ValueError(f"Service '{service_name}' not found in {self.aws_services_dir}")
662
+
663
+ # Fetch service list and find URL from API
664
+ services = await self.fetch_services()
665
+
666
+ for service in services:
667
+ if service.service.lower() == service_name_lower:
668
+ # Fetch service detail from API
669
+ data = await self._make_request_with_batching(service.url)
670
+
671
+ # Validate and parse
672
+ service_detail = ServiceDetail.model_validate(data)
673
+
674
+ # Cache with service name as key
675
+ await self._memory_cache.set(cache_key, service_detail)
676
+
677
+ return service_detail
678
+
679
+ raise ValueError(f"Service '{service_name}' not found")
680
+
681
+ async def fetch_multiple_services(self, service_names: list[str]) -> dict[str, ServiceDetail]:
682
+ """Fetch multiple services concurrently with optimized batching."""
683
+
684
+ async def fetch_single(name: str) -> tuple[str, ServiceDetail]:
685
+ try:
686
+ detail = await self.fetch_service_by_name(name)
687
+ return name, detail
688
+ except Exception as e:
689
+ logger.error(f"Failed to fetch service {name}: {e}")
690
+ raise
691
+
692
+ # Fetch all services concurrently
693
+ tasks = [fetch_single(name) for name in service_names]
694
+ results = await asyncio.gather(*tasks, return_exceptions=True)
695
+
696
+ services: dict[str, ServiceDetail] = {}
697
+ for i, result in enumerate(results):
698
+ if isinstance(result, Exception):
699
+ logger.error(f"Failed to fetch service {service_names[i]}: {result}")
700
+ raise result
701
+ elif isinstance(result, tuple):
702
+ name, detail = result
703
+ services[name] = detail
704
+
705
+ return services
706
+
707
+ def parse_action(self, action: str) -> tuple[str, str]:
708
+ """Parse IAM action using compiled regex for better performance."""
709
+ match = self._patterns.action_pattern.match(action)
710
+ if not match:
711
+ raise ValueError(f"Invalid action format: {action}")
712
+
713
+ return match.group("service").lower(), match.group("action")
714
+
715
+ def _match_wildcard_action(self, pattern: str, actions: list[str]) -> tuple[bool, list[str]]:
716
+ """Match wildcard pattern against list of actions.
717
+
718
+ Args:
719
+ pattern: Action pattern with wildcards (e.g., "Get*", "*Object", "Describe*")
720
+ actions: List of valid action names
721
+
722
+ Returns:
723
+ Tuple of (has_matches, list_of_matched_actions)
724
+ """
725
+ # Convert wildcard pattern to regex
726
+ # Escape special regex chars except *, then replace * with .*
727
+ regex_pattern = "^" + re.escape(pattern).replace(r"\*", ".*") + "$"
728
+ compiled_pattern = re.compile(regex_pattern, re.IGNORECASE)
729
+
730
+ matched = [a for a in actions if compiled_pattern.match(a)]
731
+ return len(matched) > 0, matched
732
+
733
+ async def validate_action(
734
+ self, action: str, allow_wildcards: bool = True
735
+ ) -> tuple[bool, str | None, bool]:
736
+ """Validate IAM action with optimized caching.
737
+
738
+ Supports:
739
+ - Exact actions: s3:GetObject
740
+ - Full wildcards: s3:*
741
+ - Partial wildcards: s3:Get*, s3:*Object, s3:*Get*
742
+
743
+ Returns:
744
+ Tuple of (is_valid, error_message, is_wildcard)
745
+ """
746
+ try:
747
+ service_prefix, action_name = self.parse_action(action)
748
+
749
+ # Quick wildcard check using compiled pattern
750
+ is_wildcard = bool(self._patterns.wildcard_pattern.search(action_name))
751
+
752
+ # Handle full wildcard
753
+ if action_name == "*":
754
+ if allow_wildcards:
755
+ # Just verify service exists
756
+ await self.fetch_service_by_name(service_prefix)
757
+ return True, None, True
758
+ else:
759
+ return False, "Wildcard actions are not allowed", True
760
+
761
+ # Fetch service details (will use cache)
762
+ service_detail = await self.fetch_service_by_name(service_prefix)
763
+ available_actions = list(service_detail.actions.keys())
764
+
765
+ # Handle partial wildcards (e.g., Get*, *Object, Describe*)
766
+ if is_wildcard:
767
+ if not allow_wildcards:
768
+ return False, "Wildcard actions are not allowed", True
769
+
770
+ has_matches, matched_actions = self._match_wildcard_action(
771
+ action_name, available_actions
772
+ )
773
+
774
+ if has_matches:
775
+ # Wildcard is valid and matches at least one action
776
+ match_count = len(matched_actions)
777
+ sample_actions = matched_actions[:5] # Show up to 5 examples
778
+ examples = ", ".join(sample_actions)
779
+ if match_count > 5:
780
+ examples += f", ... ({match_count - 5} more)"
781
+
782
+ return True, None, True
783
+ else:
784
+ # Wildcard doesn't match any actions
785
+ return (
786
+ False,
787
+ f"Action pattern '{action_name}' does not match any actions in service '{service_prefix}'",
788
+ True,
789
+ )
790
+
791
+ # Check if exact action exists (case-insensitive)
792
+ action_exists = any(a.lower() == action_name.lower() for a in available_actions)
793
+
794
+ if action_exists:
795
+ return True, None, False
796
+ else:
797
+ # Suggest similar actions
798
+ similar = [a for a in available_actions if action_name.lower() in a.lower()][:3]
799
+
800
+ suggestion = f" Did you mean: {', '.join(similar)}?" if similar else ""
801
+ return (
802
+ False,
803
+ f"Action '{action_name}' not found in service '{service_prefix}'.{suggestion}",
804
+ False,
805
+ )
806
+
807
+ except ValueError as e:
808
+ return False, str(e), False
809
+ except Exception as e:
810
+ logger.error(f"Error validating action {action}: {e}")
811
+ return False, f"Failed to validate action: {str(e)}", False
812
+
813
+ def validate_arn(self, arn: str) -> tuple[bool, str | None]:
814
+ """Validate ARN format using compiled regex."""
815
+ if arn == "*":
816
+ return True, None
817
+
818
+ match = self._patterns.arn_pattern.match(arn)
819
+ if not match:
820
+ return False, f"Invalid ARN format: {arn}"
821
+
822
+ return True, None
823
+
824
+ async def validate_condition_key(
825
+ self, action: str, condition_key: str, resources: list[str] | None = None
826
+ ) -> tuple[bool, str | None, str | None]:
827
+ """
828
+ Validate condition key against action and optionally resource types.
829
+
830
+ Args:
831
+ action: IAM action (e.g., "s3:GetObject")
832
+ condition_key: Condition key to validate (e.g., "s3:prefix")
833
+ resources: Optional list of resource ARNs to validate against
834
+
835
+ Returns:
836
+ Tuple of (is_valid, error_message, warning_message)
837
+ - is_valid: True if key is valid (even with warning)
838
+ - error_message: Error message if invalid (is_valid=False)
839
+ - warning_message: Warning message if valid but not recommended
840
+ """
841
+ try:
842
+ from iam_validator.core.config.aws_global_conditions import (
843
+ get_global_conditions,
844
+ )
845
+
846
+ service_prefix, action_name = self.parse_action(action)
847
+
848
+ # Check if it's a global condition key
849
+ is_global_key = False
850
+ if condition_key.startswith("aws:"):
851
+ global_conditions = get_global_conditions()
852
+ if global_conditions.is_valid_global_key(condition_key):
853
+ is_global_key = True
854
+ else:
855
+ return (
856
+ False,
857
+ f"Invalid AWS global condition key: '{condition_key}'.",
858
+ None,
859
+ )
860
+
861
+ # Fetch service detail (cached)
862
+ service_detail = await self.fetch_service_by_name(service_prefix)
863
+
864
+ # Check service-specific condition keys
865
+ if condition_key in service_detail.condition_keys:
866
+ return True, None, None
867
+
868
+ # Check action-specific condition keys
869
+ if action_name in service_detail.actions:
870
+ action_detail = service_detail.actions[action_name]
871
+ if (
872
+ action_detail.action_condition_keys
873
+ and condition_key in action_detail.action_condition_keys
874
+ ):
875
+ return True, None, None
876
+
877
+ # Check resource-specific condition keys
878
+ # Get resource types required by this action
879
+ if resources and action_detail.resources:
880
+ for res_req in action_detail.resources:
881
+ resource_name = res_req.get("Name", "")
882
+ if not resource_name:
883
+ continue
884
+
885
+ # Look up resource type definition
886
+ resource_type = service_detail.resources.get(resource_name)
887
+ if resource_type and resource_type.condition_keys:
888
+ if condition_key in resource_type.condition_keys:
889
+ return True, None, None
890
+
891
+ # If it's a global key but the action has specific condition keys defined,
892
+ # AWS allows it but the key may not be available in every request context
893
+ if is_global_key and action_detail.action_condition_keys is not None:
894
+ warning_msg = (
895
+ f"Global condition key '{condition_key}' is used with action '{action}'. "
896
+ f"While global condition keys can be used across all AWS services, "
897
+ f"the key may not be available in every request context. "
898
+ f"Verify that '{condition_key}' is available for this specific action's request context. "
899
+ f"Consider using '*IfExists' operators (e.g., StringEqualsIfExists) if the key might be missing."
900
+ )
901
+ return True, None, warning_msg
902
+
903
+ # If it's a global key and action doesn't define specific keys, allow it
904
+ if is_global_key:
905
+ return True, None, None
906
+
907
+ return (
908
+ False,
909
+ f"Condition key '{condition_key}' is not valid for action '{action}'",
910
+ None,
911
+ )
912
+
913
+ except Exception as e:
914
+ logger.error(f"Error validating condition key {condition_key} for {action}: {e}")
915
+ return False, f"Failed to validate condition key: {str(e)}", None
916
+
917
+ async def clear_caches(self) -> None:
918
+ """Clear all caches (memory and disk)."""
919
+ # Clear memory cache
920
+ await self._memory_cache.clear()
921
+
922
+ # Clear disk cache
923
+ if self.enable_cache and self._cache_dir.exists():
924
+ for cache_file in self._cache_dir.glob("*.json"):
925
+ try:
926
+ cache_file.unlink()
927
+ except Exception as e:
928
+ logger.warning(f"Failed to delete cache file {cache_file}: {e}")
929
+
930
+ logger.info("Cleared all caches")
931
+
932
+ def get_stats(self) -> dict[str, Any]:
933
+ """Get fetcher statistics for monitoring."""
934
+ return {
935
+ "prefetched_services": len(self._prefetched_services),
936
+ "memory_cache_size": len(self._memory_cache.cache),
937
+ "batch_queue_size": len(self._batch_queue),
938
+ "cache_ttl": self.cache_ttl,
939
+ "connection_pool_size": self.connection_pool_size,
940
+ }