iam-policy-validator 1.7.2__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.9.0.dist-info}/METADATA +127 -6
  2. iam_policy_validator-1.9.0.dist-info/RECORD +95 -0
  3. iam_validator/__init__.py +1 -1
  4. iam_validator/__version__.py +1 -1
  5. iam_validator/checks/__init__.py +5 -3
  6. iam_validator/checks/action_condition_enforcement.py +559 -207
  7. iam_validator/checks/action_resource_matching.py +12 -15
  8. iam_validator/checks/action_validation.py +7 -13
  9. iam_validator/checks/condition_key_validation.py +7 -13
  10. iam_validator/checks/condition_type_mismatch.py +15 -22
  11. iam_validator/checks/full_wildcard.py +9 -13
  12. iam_validator/checks/mfa_condition_check.py +8 -17
  13. iam_validator/checks/policy_size.py +6 -39
  14. iam_validator/checks/policy_structure.py +547 -0
  15. iam_validator/checks/policy_type_validation.py +61 -46
  16. iam_validator/checks/principal_validation.py +71 -148
  17. iam_validator/checks/resource_validation.py +13 -20
  18. iam_validator/checks/sensitive_action.py +15 -18
  19. iam_validator/checks/service_wildcard.py +8 -14
  20. iam_validator/checks/set_operator_validation.py +21 -28
  21. iam_validator/checks/sid_uniqueness.py +16 -42
  22. iam_validator/checks/trust_policy_validation.py +506 -0
  23. iam_validator/checks/utils/sensitive_action_matcher.py +26 -26
  24. iam_validator/checks/utils/wildcard_expansion.py +2 -2
  25. iam_validator/checks/wildcard_action.py +9 -13
  26. iam_validator/checks/wildcard_resource.py +9 -13
  27. iam_validator/commands/cache.py +4 -3
  28. iam_validator/commands/validate.py +15 -9
  29. iam_validator/core/__init__.py +2 -3
  30. iam_validator/core/access_analyzer.py +1 -1
  31. iam_validator/core/access_analyzer_report.py +2 -2
  32. iam_validator/core/aws_fetcher.py +24 -1028
  33. iam_validator/core/aws_service/__init__.py +21 -0
  34. iam_validator/core/aws_service/cache.py +108 -0
  35. iam_validator/core/aws_service/client.py +205 -0
  36. iam_validator/core/aws_service/fetcher.py +612 -0
  37. iam_validator/core/aws_service/parsers.py +149 -0
  38. iam_validator/core/aws_service/patterns.py +51 -0
  39. iam_validator/core/aws_service/storage.py +291 -0
  40. iam_validator/core/aws_service/validators.py +379 -0
  41. iam_validator/core/check_registry.py +165 -93
  42. iam_validator/core/config/condition_requirements.py +69 -17
  43. iam_validator/core/config/defaults.py +58 -52
  44. iam_validator/core/config/service_principals.py +40 -3
  45. iam_validator/core/constants.py +17 -0
  46. iam_validator/core/ignore_patterns.py +297 -0
  47. iam_validator/core/models.py +15 -5
  48. iam_validator/core/policy_checks.py +38 -475
  49. iam_validator/core/policy_loader.py +27 -4
  50. iam_validator/sdk/__init__.py +1 -1
  51. iam_validator/sdk/context.py +1 -1
  52. iam_validator/sdk/helpers.py +1 -1
  53. iam_policy_validator-1.7.2.dist-info/RECORD +0 -84
  54. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.9.0.dist-info}/WHEEL +0 -0
  55. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.9.0.dist-info}/entry_points.txt +0 -0
  56. {iam_policy_validator-1.7.2.dist-info → iam_policy_validator-1.9.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,1033 +1,29 @@
1
- """AWS Service Fetcher Module with advanced caching and performance features.
1
+ """AWS Service Fetcher - Backward compatibility facade.
2
2
 
3
- This module provides functionality to fetch AWS service information from the AWS service reference API.
4
- It includes methods to retrieve a list of services, fetch detailed information for specific services,
5
- and handle errors gracefully.
3
+ DEPRECATED: This module is kept for backward compatibility.
4
+ New code should import from iam_validator.core.aws_service instead:
6
5
 
7
- Features:
8
- - TTL-based caching with automatic expiry
9
- - LRU memory cache for frequently accessed services
10
- - Service pre-fetching for common services
11
- - Batch API requests support
12
- - Compiled regex patterns for better performance
13
- - Connection pool optimization
14
- - Request coalescing for duplicate requests
6
+ from iam_validator.core.aws_service import AWSServiceFetcher
15
7
 
16
- Example usage:
17
- async with AWSServiceFetcher() as fetcher:
18
- services = await fetcher.fetch_services()
19
- service_detail = await fetcher.fetch_service_by_name("S3")
8
+ This facade will be removed in a future major version.
20
9
  """
21
10
 
22
- import asyncio
23
- import hashlib
24
- import json
25
- import logging
26
- import os
27
- import re
28
- import sys
29
- import time
30
- from dataclasses import dataclass
31
- from pathlib import Path
32
- from typing import Any
33
-
34
- import httpx
35
-
36
- from iam_validator.core import constants
37
- from iam_validator.core.config import AWS_SERVICE_REFERENCE_BASE_URL
38
- from iam_validator.core.models import ServiceDetail, ServiceInfo
39
- from iam_validator.utils.cache import LRUCache
40
-
41
- logger = logging.getLogger(__name__)
42
-
43
-
44
- @dataclass
45
- class ConditionKeyValidationResult:
46
- """Result of condition key validation.
47
-
48
- Attributes:
49
- is_valid: True if the condition key is valid for the action
50
- error_message: Short error message if invalid (shown prominently)
51
- warning_message: Warning message if valid but not recommended
52
- suggestion: Detailed suggestion with valid keys (shown in collapsible section)
53
- """
54
-
55
- is_valid: bool
56
- error_message: str | None = None
57
- warning_message: str | None = None
58
- suggestion: str | None = None
59
-
60
-
61
- class CompiledPatterns:
62
- """Pre-compiled regex patterns for validation.
63
-
64
- This class implements the Singleton pattern to ensure patterns are compiled only once
65
- and reused across all instances for better performance.
66
- """
67
-
68
- _instance = None
69
- _initialized = False
70
-
71
- def __new__(cls) -> "CompiledPatterns":
72
- if cls._instance is None:
73
- cls._instance = super().__new__(cls)
74
- return cls._instance
75
-
76
- def __init__(self) -> None:
77
- """Initialize compiled patterns (only once due to Singleton pattern)."""
78
- # Only initialize once, even if __init__ is called multiple times
79
- if CompiledPatterns._initialized:
80
- return
81
-
82
- CompiledPatterns._initialized = True
83
-
84
- # ARN validation pattern
85
- self.arn_pattern = re.compile(
86
- r"^arn:(?P<partition>(aws|aws-cn|aws-us-gov|aws-eusc|aws-iso|aws-iso-b|aws-iso-e|aws-iso-f)):"
87
- r"(?P<service>[a-z0-9\-]+):"
88
- r"(?P<region>[a-z0-9\-]*):"
89
- r"(?P<account>[0-9]*):"
90
- r"(?P<resource>.+)$",
91
- re.IGNORECASE,
92
- )
93
-
94
- # Action format pattern
95
- self.action_pattern = re.compile(
96
- r"^(?P<service>[a-zA-Z0-9_-]+):(?P<action>[a-zA-Z0-9*_-]+)$"
97
- )
98
-
99
- # Wildcard detection patterns
100
- self.wildcard_pattern = re.compile(r"\*")
101
- self.partial_wildcard_pattern = re.compile(r"^[^*]+\*$")
102
-
103
-
104
- class AWSServiceFetcher:
105
- """Fetches AWS service information from the AWS service reference API with enhanced performance features.
106
-
107
- This class provides a comprehensive interface for retrieving AWS service metadata,
108
- including actions, resources, and condition keys. It includes multiple layers of
109
- caching and optimization for high-performance policy validation.
110
-
111
- Features:
112
- - Multi-layer caching (memory LRU + disk with TTL)
113
- - Service pre-fetching for common AWS services
114
- - Request batching and coalescing
115
- - Offline mode support with local AWS service files
116
- - HTTP/2 connection pooling
117
- - Automatic retry with exponential backoff
118
-
119
- Example:
120
- >>> async with AWSServiceFetcher() as fetcher:
121
- ... # Fetch service list
122
- ... services = await fetcher.fetch_services()
123
- ...
124
- ... # Fetch specific service details
125
- ... s3_service = await fetcher.fetch_service_by_name("s3")
126
- ...
127
- ... # Validate actions
128
- ... is_valid = await fetcher.validate_action("s3:GetObject", s3_service)
129
-
130
- Method Organization:
131
- Lifecycle Management:
132
- - __init__: Initialize fetcher with configuration
133
- - __aenter__, __aexit__: Context manager support
134
-
135
- Caching (Private):
136
- - _get_cache_directory: Determine cache location
137
- - _get_cache_path: Generate cache file path
138
- - _read_from_cache: Read from disk cache
139
- - _write_to_cache: Write to disk cache
140
- - clear_caches: Clear all caches
141
-
142
- HTTP Operations (Private):
143
- - _make_request: Core HTTP request handler
144
- - _make_request_with_batching: Request coalescing
145
- - _prefetch_common_services: Pre-load common services
146
-
147
- File I/O (Private):
148
- - _load_services_from_file: Load service list from local file
149
- - _load_service_from_file: Load service details from local file
150
-
151
- Public API - Fetching:
152
- - fetch_services: Get list of all AWS services
153
- - fetch_service_by_name: Get details for one service
154
- - fetch_multiple_services: Batch fetch multiple services
155
-
156
- Public API - Validation:
157
- - validate_action: Check if action exists in service
158
- - validate_arn: Validate ARN format
159
- - validate_condition_key: Check condition key validity
160
-
161
- Public API - Parsing:
162
- - parse_action: Split action into service and name
163
- - _match_wildcard_action: Match wildcard patterns
164
-
165
- Utilities:
166
- - get_stats: Get cache statistics
167
- """
168
-
169
- BASE_URL = AWS_SERVICE_REFERENCE_BASE_URL
170
-
171
- # Common AWS services to pre-fetch
172
- # All other services will be fetched on-demand (lazy loading if found in policies)
173
- COMMON_SERVICES = [
174
- "acm",
175
- "apigateway",
176
- "autoscaling",
177
- "backup",
178
- "batch",
179
- "bedrock",
180
- "cloudformation",
181
- "cloudfront",
182
- "cloudtrail",
183
- "cloudwatch",
184
- "config",
185
- "dynamodb",
186
- "ec2-instance-connect",
187
- "ec2",
188
- "ecr",
189
- "ecs",
190
- "eks",
191
- "elasticache",
192
- "elasticloadbalancing",
193
- "events",
194
- "firehose",
195
- "glacier",
196
- "glue",
197
- "guardduty",
198
- "iam",
199
- "imagebuilder",
200
- "inspector2",
201
- "kinesis",
202
- "kms",
203
- "lambda",
204
- "logs",
205
- "rds",
206
- "route53",
207
- "s3",
208
- "scheduler",
209
- "secretsmanager",
210
- "securityhub",
211
- "sns",
212
- "sqs",
213
- "sts",
214
- "support",
215
- "waf",
216
- "wafv2",
217
- ]
218
-
219
- def __init__(
220
- self,
221
- timeout: float = constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
222
- retries: int = 3,
223
- enable_cache: bool = True,
224
- cache_ttl: int = constants.DEFAULT_CACHE_TTL_SECONDS,
225
- memory_cache_size: int = 256,
226
- connection_pool_size: int = 50,
227
- keepalive_connections: int = 20,
228
- prefetch_common: bool = True,
229
- cache_dir: Path | str | None = None,
230
- aws_services_dir: Path | str | None = None,
231
- ):
232
- """Initialize aws service fetcher.
233
-
234
- Args:
235
- timeout: Request timeout in seconds
236
- retries: Number of retries for failed requests
237
- enable_cache: Enable persistent disk caching
238
- cache_ttl: Cache time-to-live in seconds
239
- memory_cache_size: Size of in-memory LRU cache
240
- connection_pool_size: HTTP connection pool size
241
- keepalive_connections: Number of keepalive connections
242
- prefetch_common: Prefetch common AWS services
243
- cache_dir: Custom cache directory path
244
- aws_services_dir: Directory containing pre-downloaded AWS service JSON files.
245
- When set, the fetcher will load services from local files
246
- instead of making API calls. Directory should contain:
247
- - _services.json: List of all services
248
- - {service}.json: Individual service files (e.g., s3.json)
249
- """
250
- self.timeout = timeout
251
- self.retries = retries
252
- self.enable_cache = enable_cache
253
- self.cache_ttl = cache_ttl
254
- self.prefetch_common = prefetch_common
255
-
256
- # AWS services directory for offline mode
257
- self.aws_services_dir: Path | None = None
258
- if aws_services_dir:
259
- self.aws_services_dir = Path(aws_services_dir)
260
- if not self.aws_services_dir.exists():
261
- raise ValueError(f"AWS services directory does not exist: {aws_services_dir}")
262
- logger.info(f"Using local AWS services from: {self.aws_services_dir}")
263
-
264
- self._client: httpx.AsyncClient | None = None
265
- self._memory_cache = LRUCache(maxsize=memory_cache_size, ttl=cache_ttl)
266
- self._cache_dir = self._get_cache_directory(cache_dir)
267
- self._patterns = CompiledPatterns()
268
-
269
- # Batch request queue
270
- self._batch_queue: dict[str, asyncio.Future[Any]] = {}
271
- self._batch_lock = asyncio.Lock()
272
-
273
- # Connection pool settings
274
- self.connection_pool_size = connection_pool_size
275
- self.keepalive_connections = keepalive_connections
276
-
277
- # Track prefetched services
278
- self._prefetched_services: set[str] = set()
279
-
280
- # Create cache directory if needed
281
- if self.enable_cache:
282
- self._cache_dir.mkdir(parents=True, exist_ok=True)
283
-
284
- @staticmethod
285
- def _get_cache_directory(cache_dir: Path | str | None = None) -> Path:
286
- """Get the cache directory path, using platform-appropriate defaults.
287
-
288
- Priority:
289
- 1. Provided cache_dir parameter
290
- 2. Platform-specific user cache directory
291
- - Linux/Unix: ~/.cache/iam-validator/aws_services
292
- - macOS: ~/Library/Caches/iam-validator/aws_services
293
- - Windows: %LOCALAPPDATA%/iam-validator/cache/aws_services
294
-
295
- Args:
296
- cache_dir: Optional custom cache directory path
297
-
298
- Returns:
299
- Path object for the cache directory
300
- """
301
- if cache_dir is not None:
302
- return Path(cache_dir)
303
-
304
- # Determine platform-specific cache directory
305
- if sys.platform == "darwin":
306
- # macOS
307
- base_cache = Path.home() / "Library" / "Caches"
308
- elif sys.platform == "win32":
309
- # Windows
310
- base_cache = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
311
- else:
312
- # Linux and other Unix-like systems
313
- base_cache = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
314
-
315
- return base_cache / "iam-validator" / "aws_services"
316
-
317
- async def __aenter__(self) -> "AWSServiceFetcher":
318
- """Async context manager entry with optimized settings."""
319
- self._client = httpx.AsyncClient(
320
- timeout=httpx.Timeout(self.timeout),
321
- follow_redirects=True,
322
- limits=httpx.Limits(
323
- max_keepalive_connections=self.keepalive_connections,
324
- max_connections=self.connection_pool_size,
325
- keepalive_expiry=constants.DEFAULT_HTTP_TIMEOUT_SECONDS, # Keep connections alive
326
- ),
327
- http2=True, # Enable HTTP/2 for multiplexing
328
- )
329
-
330
- # Pre-fetch common services if enabled
331
- if self.prefetch_common:
332
- await self._prefetch_common_services()
333
-
334
- return self
335
-
336
- async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
337
- """Async context manager exit."""
338
- del exc_type, exc_val, exc_tb
339
- if self._client:
340
- await self._client.aclose()
341
- self._client = None
342
-
343
- async def _prefetch_common_services(self) -> None:
344
- """Pre-fetch commonly used AWS services for better performance."""
345
- logger.info(f"Pre-fetching {len(self.COMMON_SERVICES)} common AWS services...")
346
-
347
- # First, fetch the services list once to populate the cache
348
- # This prevents all concurrent calls from fetching the same list
349
- await self.fetch_services()
350
-
351
- async def fetch_service(name: str) -> None:
352
- try:
353
- await self.fetch_service_by_name(name)
354
- self._prefetched_services.add(name)
355
- except Exception as e:
356
- logger.warning(f"Failed to prefetch service {name}: {e}")
357
-
358
- # Fetch in batches to avoid overwhelming the API
359
- batch_size = 5
360
- for i in range(0, len(self.COMMON_SERVICES), batch_size):
361
- batch = self.COMMON_SERVICES[i : i + batch_size]
362
- await asyncio.gather(*[fetch_service(name) for name in batch])
363
-
364
- logger.info(f"Pre-fetched {len(self._prefetched_services)} services successfully")
365
-
366
- def _get_cache_path(self, url: str) -> Path:
367
- """Get cache file path with timestamp for TTL checking."""
368
- url_hash = hashlib.md5(url.encode()).hexdigest()
369
-
370
- # Extract service name for better organization
371
- filename = f"{url_hash}.json"
372
- if "/v1/" in url:
373
- service_name = url.split("/v1/")[1].split("/")[0]
374
- filename = f"{service_name}_{url_hash[:8]}.json"
375
- elif url == self.BASE_URL:
376
- filename = "services_list.json"
377
-
378
- return self._cache_dir / filename
379
-
380
- def _read_from_cache(self, url: str) -> Any | None:
381
- """Read from disk cache with TTL checking."""
382
- if not self.enable_cache:
383
- return None
384
-
385
- cache_path = self._get_cache_path(url)
386
-
387
- if not cache_path.exists():
388
- return None
389
-
390
- try:
391
- # Check file modification time for TTL
392
- mtime = cache_path.stat().st_mtime
393
- if time.time() - mtime > self.cache_ttl:
394
- logger.debug(f"Cache expired for {url}")
395
- cache_path.unlink() # Remove expired cache
396
- return None
397
-
398
- with open(cache_path, encoding="utf-8") as f:
399
- data = json.load(f)
400
- logger.debug(f"Disk cache hit for {url}")
401
- return data
402
-
403
- except Exception as e:
404
- logger.warning(f"Failed to read cache for {url}: {e}")
405
- return None
406
-
407
- def _write_to_cache(self, url: str, data: Any) -> None:
408
- """Write to disk cache."""
409
- if not self.enable_cache:
410
- return
411
-
412
- cache_path = self._get_cache_path(url)
413
-
414
- try:
415
- with open(cache_path, "w", encoding="utf-8") as f:
416
- json.dump(data, f, indent=2)
417
- logger.debug(f"Written to disk cache: {url}")
418
- except Exception as e:
419
- logger.warning(f"Failed to write cache for {url}: {e}")
420
-
421
- async def _make_request_with_batching(self, url: str) -> Any:
422
- """Make request with request batching/coalescing.
423
-
424
- Uses double-check locking pattern to avoid race conditions and deadlocks.
425
- """
426
- # First check: see if request is already in progress
427
- existing_future = None
428
- async with self._batch_lock:
429
- if url in self._batch_queue:
430
- existing_future = self._batch_queue[url]
431
-
432
- # Wait for existing request outside the lock
433
- if existing_future is not None:
434
- logger.debug(f"Coalescing request for {url}")
435
- return await existing_future
436
-
437
- # Create new future for this request
438
- loop = asyncio.get_event_loop()
439
- future: asyncio.Future[Any] = loop.create_future()
440
-
441
- # Second check: register future or use existing one (double-check pattern)
442
- async with self._batch_lock:
443
- if url in self._batch_queue:
444
- # Another coroutine registered while we were creating the future
445
- existing_future = self._batch_queue[url]
446
- else:
447
- # We're the first, register our future
448
- self._batch_queue[url] = future
449
-
450
- # If we found an existing future, wait for it
451
- if existing_future is not None:
452
- logger.debug(f"Coalescing request for {url} (late check)")
453
- return await existing_future
454
-
455
- # We're responsible for making the request
456
- try:
457
- # Actually make the request
458
- result = await self._make_request(url)
459
- if not future.done():
460
- future.set_result(result)
461
- return result
462
- except Exception as e:
463
- if not future.done():
464
- future.set_exception(e)
465
- raise
466
- finally:
467
- # Remove from queue
468
- async with self._batch_lock:
469
- self._batch_queue.pop(url, None)
470
-
471
- async def _make_request(self, url: str) -> Any:
472
- """Make HTTP request with multi-level caching."""
473
- # Check memory cache first
474
- cache_key = f"url:{url}"
475
- cached_data = await self._memory_cache.get(cache_key)
476
- if cached_data is not None:
477
- logger.debug(f"Memory cache hit for {url}")
478
- return cached_data
479
-
480
- # Check disk cache
481
- cached_data = self._read_from_cache(url)
482
- if cached_data is not None:
483
- # Store in memory cache for faster access
484
- await self._memory_cache.set(cache_key, cached_data)
485
- return cached_data
486
-
487
- if not self._client:
488
- raise RuntimeError("Fetcher not initialized. Use as async context manager.")
489
-
490
- last_exception: Exception | None = None
491
-
492
- for attempt in range(self.retries):
493
- try:
494
- logger.debug(f"Fetching URL: {url} (attempt {attempt + 1})")
495
- response = await self._client.get(url)
496
- response.raise_for_status()
497
-
498
- try:
499
- data = response.json()
500
-
501
- # Cache in both memory and disk
502
- await self._memory_cache.set(cache_key, data)
503
- self._write_to_cache(url, data)
504
-
505
- return data
506
-
507
- except Exception as json_error:
508
- logger.error(f"Failed to parse response as JSON: {json_error}")
509
- raise ValueError(f"Invalid JSON response from {url}: {json_error}")
510
-
511
- except httpx.HTTPStatusError as e:
512
- logger.error(f"HTTP error {e.response.status_code} for {url}")
513
- if e.response.status_code == 404:
514
- raise ValueError(f"Service not found: {url}")
515
- last_exception = e
516
-
517
- except httpx.RequestError as e:
518
- logger.error(f"Request error for {url}: {e}")
519
- last_exception = e
520
-
521
- except Exception as e:
522
- logger.error(f"Unexpected error for {url}: {e}")
523
- last_exception = e
524
-
525
- if attempt < self.retries - 1:
526
- wait_time = 2**attempt
527
- logger.info(f"Retrying in {wait_time} seconds...")
528
- await asyncio.sleep(wait_time)
529
-
530
- raise last_exception or Exception(f"Failed to fetch {url} after {self.retries} attempts")
531
-
532
- def _load_services_from_file(self) -> list[ServiceInfo]:
533
- """Load services list from local _services.json file.
534
-
535
- Returns:
536
- List of ServiceInfo objects loaded from _services.json
537
-
538
- Raises:
539
- FileNotFoundError: If _services.json doesn't exist
540
- ValueError: If _services.json is invalid
541
- """
542
- if not self.aws_services_dir:
543
- raise ValueError("aws_services_dir is not set")
544
-
545
- services_file = self.aws_services_dir / "_services.json"
546
- if not services_file.exists():
547
- raise FileNotFoundError(f"_services.json not found in {self.aws_services_dir}")
548
-
549
- try:
550
- with open(services_file) as f:
551
- data = json.load(f)
552
-
553
- if not isinstance(data, list):
554
- raise ValueError("Expected list of services from _services.json")
555
-
556
- services: list[ServiceInfo] = []
557
- for item in data:
558
- if isinstance(item, dict):
559
- service = item.get("service")
560
- url = item.get("url")
561
- if service and url:
562
- services.append(ServiceInfo(service=str(service), url=str(url)))
563
-
564
- logger.info(f"Loaded {len(services)} services from local file: {services_file}")
565
- return services
566
-
567
- except json.JSONDecodeError as e:
568
- raise ValueError(f"Invalid JSON in services.json: {e}")
569
-
570
- def _load_service_from_file(self, service_name: str) -> ServiceDetail:
571
- """Load service detail from local JSON file.
572
-
573
- Args:
574
- service_name: Name of the service (case-insensitive)
575
-
576
- Returns:
577
- ServiceDetail object loaded from {service}.json
578
-
579
- Raises:
580
- FileNotFoundError: If service JSON file doesn't exist
581
- ValueError: If service JSON is invalid
582
- """
583
- if not self.aws_services_dir:
584
- raise ValueError("aws_services_dir is not set")
585
-
586
- # Normalize filename (lowercase, replace spaces with underscores)
587
- filename = f"{service_name.lower().replace(' ', '_')}.json"
588
- service_file = self.aws_services_dir / filename
589
-
590
- if not service_file.exists():
591
- raise FileNotFoundError(f"Service file not found: {service_file}")
592
-
593
- try:
594
- with open(service_file) as f:
595
- data = json.load(f)
596
-
597
- service_detail = ServiceDetail.model_validate(data)
598
- logger.debug(f"Loaded service {service_name} from local file: {service_file}")
599
- return service_detail
600
-
601
- except json.JSONDecodeError as e:
602
- raise ValueError(f"Invalid JSON in {service_file}: {e}")
603
-
604
- async def fetch_services(self) -> list[ServiceInfo]:
605
- """Fetch list of AWS services with caching.
606
-
607
- When aws_services_dir is set, loads from local services.json file.
608
- Otherwise, fetches from AWS API.
609
- """
610
- # Check if we have the parsed services list in cache
611
- services_cache_key = "parsed_services_list"
612
- cached_services = await self._memory_cache.get(services_cache_key)
613
- if cached_services is not None and isinstance(cached_services, list):
614
- logger.debug(f"Retrieved {len(cached_services)} services from parsed cache")
615
- return cached_services
616
-
617
- # Load from local file if aws_services_dir is set
618
- if self.aws_services_dir:
619
- services = self._load_services_from_file()
620
- # Cache the loaded services
621
- await self._memory_cache.set(services_cache_key, services)
622
- return services
623
-
624
- # Not in parsed cache, fetch the raw data from API
625
- data = await self._make_request_with_batching(self.BASE_URL)
626
-
627
- if not isinstance(data, list):
628
- raise ValueError("Expected list of services from root endpoint")
629
-
630
- services: list[ServiceInfo] = []
631
- for item in data:
632
- if isinstance(item, dict):
633
- service = item.get("service")
634
- url = item.get("url")
635
- if service and url:
636
- services.append(ServiceInfo(service=str(service), url=str(url)))
637
-
638
- # Cache the parsed services list
639
- await self._memory_cache.set(services_cache_key, services)
640
-
641
- # Log only on first fetch (when parsed cache was empty)
642
- logger.info(f"Fetched and parsed {len(services)} services from AWS API")
643
- return services
644
-
645
- async def fetch_service_by_name(self, service_name: str) -> ServiceDetail:
646
- """Fetch service detail with optimized caching.
647
-
648
- When aws_services_dir is set, loads from local {service}.json file.
649
- Otherwise, fetches from AWS API.
650
- """
651
- # Normalize service name
652
- service_name_lower = service_name.lower()
653
-
654
- # Check memory cache with service name as key
655
- cache_key = f"service:{service_name_lower}"
656
- cached_detail = await self._memory_cache.get(cache_key)
657
- if isinstance(cached_detail, ServiceDetail):
658
- logger.debug(f"Memory cache hit for service {service_name}")
659
- return cached_detail
660
-
661
- # Load from local file if aws_services_dir is set
662
- if self.aws_services_dir:
663
- try:
664
- service_detail = self._load_service_from_file(service_name_lower)
665
- # Cache the loaded service
666
- await self._memory_cache.set(cache_key, service_detail)
667
- return service_detail
668
- except FileNotFoundError:
669
- # Try to find the service in services.json to get proper name
670
- services = await self.fetch_services()
671
- for service in services:
672
- if service.service.lower() == service_name_lower:
673
- # Try with the exact service name from services.json
674
- try:
675
- service_detail = self._load_service_from_file(service.service)
676
- await self._memory_cache.set(cache_key, service_detail)
677
- return service_detail
678
- except FileNotFoundError:
679
- pass
680
- raise ValueError(f"Service `{service_name}` not found in {self.aws_services_dir}")
681
-
682
- # Fetch service list and find URL from API
683
- services = await self.fetch_services()
684
-
685
- for service in services:
686
- if service.service.lower() == service_name_lower:
687
- # Fetch service detail from API
688
- data = await self._make_request_with_batching(service.url)
689
-
690
- # Validate and parse
691
- service_detail = ServiceDetail.model_validate(data)
692
-
693
- # Cache with service name as key
694
- await self._memory_cache.set(cache_key, service_detail)
695
-
696
- return service_detail
697
-
698
- raise ValueError(f"Service `{service_name}` not found")
699
-
700
- async def fetch_multiple_services(self, service_names: list[str]) -> dict[str, ServiceDetail]:
701
- """Fetch multiple services concurrently with optimized batching."""
702
-
703
- async def fetch_single(name: str) -> tuple[str, ServiceDetail]:
704
- try:
705
- detail = await self.fetch_service_by_name(name)
706
- return name, detail
707
- except Exception as e:
708
- logger.error(f"Failed to fetch service {name}: {e}")
709
- raise
710
-
711
- # Fetch all services concurrently
712
- tasks = [fetch_single(name) for name in service_names]
713
- results = await asyncio.gather(*tasks, return_exceptions=True)
714
-
715
- services: dict[str, ServiceDetail] = {}
716
- for i, result in enumerate(results):
717
- if isinstance(result, Exception):
718
- logger.error(f"Failed to fetch service {service_names[i]}: {result}")
719
- raise result
720
- elif isinstance(result, tuple):
721
- name, detail = result
722
- services[name] = detail
723
-
724
- return services
725
-
726
- def parse_action(self, action: str) -> tuple[str, str]:
727
- """Parse IAM action using compiled regex for better performance."""
728
- match = self._patterns.action_pattern.match(action)
729
- if not match:
730
- raise ValueError(f"Invalid action format: {action}")
731
-
732
- return match.group("service").lower(), match.group("action")
733
-
734
- def _match_wildcard_action(self, pattern: str, actions: list[str]) -> tuple[bool, list[str]]:
735
- """Match wildcard pattern against list of actions.
736
-
737
- Args:
738
- pattern: Action pattern with wildcards (e.g., "Get*", "*Object", "Describe*")
739
- actions: List of valid action names
740
-
741
- Returns:
742
- Tuple of (has_matches, list_of_matched_actions)
743
- """
744
- # Convert wildcard pattern to regex
745
- # Escape special regex chars except *, then replace * with .*
746
- regex_pattern = "^" + re.escape(pattern).replace(r"\*", ".*") + "$"
747
- compiled_pattern = re.compile(regex_pattern, re.IGNORECASE)
748
-
749
- matched = [a for a in actions if compiled_pattern.match(a)]
750
- return len(matched) > 0, matched
751
-
752
- async def validate_action(
753
- self, action: str, allow_wildcards: bool = True
754
- ) -> tuple[bool, str | None, bool]:
755
- """Validate IAM action with optimized caching.
756
-
757
- Supports:
758
- - Exact actions: s3:GetObject
759
- - Full wildcards: s3:*
760
- - Partial wildcards: s3:Get*, s3:*Object, s3:*Get*
761
-
762
- Returns:
763
- Tuple of (is_valid, error_message, is_wildcard)
764
- """
765
- try:
766
- service_prefix, action_name = self.parse_action(action)
767
-
768
- # Quick wildcard check using compiled pattern
769
- is_wildcard = bool(self._patterns.wildcard_pattern.search(action_name))
770
-
771
- # Handle full wildcard
772
- if action_name == "*":
773
- if allow_wildcards:
774
- # Just verify service exists
775
- await self.fetch_service_by_name(service_prefix)
776
- return True, None, True
777
- else:
778
- return False, "Wildcard actions are not allowed", True
779
-
780
- # Fetch service details (will use cache)
781
- service_detail = await self.fetch_service_by_name(service_prefix)
782
- available_actions = list(service_detail.actions.keys())
783
-
784
- # Handle partial wildcards (e.g., Get*, *Object, Describe*)
785
- if is_wildcard:
786
- if not allow_wildcards:
787
- return False, "Wildcard actions are not allowed", True
788
-
789
- has_matches, matched_actions = self._match_wildcard_action(
790
- action_name, available_actions
791
- )
792
-
793
- if has_matches:
794
- # Wildcard is valid and matches at least one action
795
- match_count = len(matched_actions)
796
- sample_actions = matched_actions[:5] # Show up to 5 examples
797
- examples = ", ".join(sample_actions)
798
- if match_count > 5:
799
- examples += f", ... ({match_count - 5} more)"
800
-
801
- return True, None, True
802
- else:
803
- # Wildcard doesn't match any actions
804
- return (
805
- False,
806
- f"Action pattern '{action_name}' does not match any actions in service '{service_prefix}'",
807
- True,
808
- )
809
-
810
- # Check if exact action exists (case-insensitive)
811
- action_exists = any(a.lower() == action_name.lower() for a in available_actions)
812
-
813
- if action_exists:
814
- return True, None, False
815
- else:
816
- # Suggest similar actions
817
- similar = [a for a in available_actions if action_name.lower() in a.lower()][:3]
818
-
819
- suggestion = f" Did you mean: {', '.join(similar)}?" if similar else ""
820
- return (
821
- False,
822
- f"Action '{action_name}' not found in service '{service_prefix}'.{suggestion}",
823
- False,
824
- )
825
-
826
- except ValueError as e:
827
- return False, str(e), False
828
- except Exception as e:
829
- logger.error(f"Error validating action {action}: {e}")
830
- return False, f"Failed to validate action: {str(e)}", False
831
-
832
- def validate_arn(self, arn: str) -> tuple[bool, str | None]:
833
- """Validate ARN format using compiled regex."""
834
- if arn == "*":
835
- return True, None
836
-
837
- match = self._patterns.arn_pattern.match(arn)
838
- if not match:
839
- return False, f"Invalid ARN format: {arn}"
840
-
841
- return True, None
842
-
843
- async def validate_condition_key(
844
- self, action: str, condition_key: str, resources: list[str] | None = None
845
- ) -> ConditionKeyValidationResult:
846
- """
847
- Validate condition key against action and optionally resource types.
848
-
849
- Args:
850
- action: IAM action (e.g., "s3:GetObject")
851
- condition_key: Condition key to validate (e.g., "s3:prefix")
852
- resources: Optional list of resource ARNs to validate against
853
-
854
- Returns:
855
- ConditionKeyValidationResult with:
856
- - is_valid: True if key is valid (even with warning)
857
- - error_message: Short error message if invalid (shown prominently)
858
- - warning_message: Warning message if valid but not recommended
859
- - suggestion: Detailed suggestion with valid keys (shown in collapsible section)
860
- """
861
- try:
862
- from iam_validator.core.config.aws_global_conditions import (
863
- get_global_conditions,
864
- )
865
-
866
- service_prefix, action_name = self.parse_action(action)
867
-
868
- # Check if it's a global condition key
869
- is_global_key = False
870
- if condition_key.startswith("aws:"):
871
- global_conditions = get_global_conditions()
872
- if global_conditions.is_valid_global_key(condition_key):
873
- is_global_key = True
874
- else:
875
- return ConditionKeyValidationResult(
876
- is_valid=False,
877
- error_message=f"Invalid AWS global condition key: `{condition_key}`.",
878
- )
879
-
880
- # Fetch service detail (cached)
881
- service_detail = await self.fetch_service_by_name(service_prefix)
882
-
883
- # Check service-specific condition keys
884
- if condition_key in service_detail.condition_keys:
885
- return ConditionKeyValidationResult(is_valid=True)
886
-
887
- # Check action-specific condition keys
888
- if action_name in service_detail.actions:
889
- action_detail = service_detail.actions[action_name]
890
- if (
891
- action_detail.action_condition_keys
892
- and condition_key in action_detail.action_condition_keys
893
- ):
894
- return ConditionKeyValidationResult(is_valid=True)
895
-
896
- # Check resource-specific condition keys
897
- # Get resource types required by this action
898
- if resources and action_detail.resources:
899
- for res_req in action_detail.resources:
900
- resource_name = res_req.get("Name", "")
901
- if not resource_name:
902
- continue
903
-
904
- # Look up resource type definition
905
- resource_type = service_detail.resources.get(resource_name)
906
- if resource_type and resource_type.condition_keys:
907
- if condition_key in resource_type.condition_keys:
908
- return ConditionKeyValidationResult(is_valid=True)
909
-
910
- # If it's a global key but the action has specific condition keys defined,
911
- # AWS allows it but the key may not be available in every request context
912
- if is_global_key and action_detail.action_condition_keys is not None:
913
- warning_msg = (
914
- f"Global condition key '{condition_key}' is used with action '{action}'. "
915
- f"While global condition keys can be used across all AWS services, "
916
- f"the key may not be available in every request context. "
917
- f"Verify that '{condition_key}' is available for this specific action's request context. "
918
- f"Consider using '*IfExists' operators (e.g., StringEqualsIfExists) if the key might be missing."
919
- )
920
- return ConditionKeyValidationResult(is_valid=True, warning_message=warning_msg)
921
-
922
- # If it's a global key and action doesn't define specific keys, allow it
923
- if is_global_key:
924
- return ConditionKeyValidationResult(is_valid=True)
925
-
926
- # Short error message
927
- error_msg = f"Condition key `{condition_key}` is not valid for action `{action}`"
928
-
929
- # Collect valid condition keys for this action
930
- valid_keys = set()
931
-
932
- # Add service-level condition keys
933
- if service_detail.condition_keys:
934
- if isinstance(service_detail.condition_keys, dict):
935
- valid_keys.update(service_detail.condition_keys.keys())
936
- elif isinstance(service_detail.condition_keys, list):
937
- valid_keys.update(service_detail.condition_keys)
938
-
939
- # Add action-specific condition keys
940
- if action_name in service_detail.actions:
941
- action_detail = service_detail.actions[action_name]
942
- if action_detail.action_condition_keys:
943
- if isinstance(action_detail.action_condition_keys, dict):
944
- valid_keys.update(action_detail.action_condition_keys.keys())
945
- elif isinstance(action_detail.action_condition_keys, list):
946
- valid_keys.update(action_detail.action_condition_keys)
947
-
948
- # Add resource-specific condition keys
949
- if action_detail.resources:
950
- for res_req in action_detail.resources:
951
- resource_name = res_req.get("Name", "")
952
- if resource_name:
953
- resource_type = service_detail.resources.get(resource_name)
954
- if resource_type and resource_type.condition_keys:
955
- if isinstance(resource_type.condition_keys, dict):
956
- valid_keys.update(resource_type.condition_keys.keys())
957
- elif isinstance(resource_type.condition_keys, list):
958
- valid_keys.update(resource_type.condition_keys)
959
-
960
- # Build detailed suggestion with valid keys (goes in collapsible section)
961
- suggestion_parts = []
962
-
963
- if valid_keys:
964
- # Sort and limit to first 10 keys for readability
965
- sorted_keys = sorted(valid_keys)
966
- suggestion_parts.append("**Valid condition keys for this action:**")
967
- if len(sorted_keys) <= 10:
968
- for key in sorted_keys:
969
- suggestion_parts.append(f"- `{key}`")
970
- else:
971
- for key in sorted_keys[:10]:
972
- suggestion_parts.append(f"- `{key}`")
973
- suggestion_parts.append(f"- ... and {len(sorted_keys) - 10} more")
974
-
975
- suggestion_parts.append("")
976
- suggestion_parts.append(
977
- "**Global condition keys** (e.g., `aws:ResourceOrgID`, `aws:RequestedRegion`, `aws:SourceIp`, `aws:SourceVpce`) "
978
- "can also be used with any AWS action"
979
- )
980
- else:
981
- # No action-specific keys - mention global keys
982
- suggestion_parts.append(
983
- "This action does not have specific condition keys defined.\n\n"
984
- "However, you can use **global condition keys** such as:\n"
985
- "- `aws:RequestedRegion`\n"
986
- "- `aws:SourceIp`\n"
987
- "- `aws:SourceVpce`\n"
988
- "- `aws:UserAgent`\n"
989
- "- `aws:CurrentTime`\n"
990
- "- `aws:SecureTransport`\n"
991
- "- `aws:PrincipalArn`\n"
992
- "- And many others"
993
- )
994
-
995
- suggestion = "\n".join(suggestion_parts)
996
-
997
- return ConditionKeyValidationResult(
998
- is_valid=False,
999
- error_message=error_msg,
1000
- suggestion=suggestion,
1001
- )
1002
-
1003
- except Exception as e:
1004
- logger.error(f"Error validating condition key {condition_key} for {action}: {e}")
1005
- return ConditionKeyValidationResult(
1006
- is_valid=False,
1007
- error_message=f"Failed to validate condition key: {str(e)}",
1008
- )
1009
-
1010
- async def clear_caches(self) -> None:
1011
- """Clear all caches (memory and disk)."""
1012
- # Clear memory cache
1013
- await self._memory_cache.clear()
1014
-
1015
- # Clear disk cache
1016
- if self.enable_cache and self._cache_dir.exists():
1017
- for cache_file in self._cache_dir.glob("*.json"):
1018
- try:
1019
- cache_file.unlink()
1020
- except Exception as e:
1021
- logger.warning(f"Failed to delete cache file {cache_file}: {e}")
1022
-
1023
- logger.info("Cleared all caches")
1024
-
1025
- def get_stats(self) -> dict[str, Any]:
1026
- """Get fetcher statistics for monitoring."""
1027
- return {
1028
- "prefetched_services": len(self._prefetched_services),
1029
- "memory_cache_size": len(self._memory_cache.cache),
1030
- "batch_queue_size": len(self._batch_queue),
1031
- "cache_ttl": self.cache_ttl,
1032
- "connection_pool_size": self.connection_pool_size,
1033
- }
11
+ import warnings
12
+
13
+ # Re-export classes from new location
14
+ from iam_validator.core.aws_service import (
15
+ AWSServiceFetcher,
16
+ CompiledPatterns,
17
+ ConditionKeyValidationResult,
18
+ )
19
+
20
+ # Emit deprecation warning when this module is imported
21
+ warnings.warn(
22
+ "Importing from iam_validator.core.aws_fetcher is deprecated. "
23
+ "Use 'from iam_validator.core.aws_service import AWSServiceFetcher' instead. "
24
+ "This compatibility layer will be removed in a future major version.",
25
+ DeprecationWarning,
26
+ stacklevel=2,
27
+ )
28
+
29
+ __all__ = ["AWSServiceFetcher", "ConditionKeyValidationResult", "CompiledPatterns"]