iam-policy-validator 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/METADATA +106 -1
  2. iam_policy_validator-1.10.0.dist-info/RECORD +96 -0
  3. iam_validator/__init__.py +1 -1
  4. iam_validator/__version__.py +1 -1
  5. iam_validator/checks/action_condition_enforcement.py +504 -190
  6. iam_validator/checks/action_resource_matching.py +8 -15
  7. iam_validator/checks/action_validation.py +6 -12
  8. iam_validator/checks/condition_key_validation.py +6 -12
  9. iam_validator/checks/condition_type_mismatch.py +9 -16
  10. iam_validator/checks/full_wildcard.py +9 -13
  11. iam_validator/checks/mfa_condition_check.py +8 -17
  12. iam_validator/checks/policy_size.py +6 -39
  13. iam_validator/checks/policy_structure.py +10 -40
  14. iam_validator/checks/policy_type_validation.py +18 -19
  15. iam_validator/checks/principal_validation.py +11 -20
  16. iam_validator/checks/resource_validation.py +5 -12
  17. iam_validator/checks/sensitive_action.py +8 -15
  18. iam_validator/checks/service_wildcard.py +6 -12
  19. iam_validator/checks/set_operator_validation.py +11 -18
  20. iam_validator/checks/sid_uniqueness.py +8 -38
  21. iam_validator/checks/trust_policy_validation.py +8 -14
  22. iam_validator/checks/utils/wildcard_expansion.py +1 -1
  23. iam_validator/checks/wildcard_action.py +6 -12
  24. iam_validator/checks/wildcard_resource.py +6 -12
  25. iam_validator/commands/cache.py +4 -3
  26. iam_validator/commands/validate.py +26 -4
  27. iam_validator/core/__init__.py +1 -1
  28. iam_validator/core/aws_fetcher.py +24 -1030
  29. iam_validator/core/aws_service/__init__.py +21 -0
  30. iam_validator/core/aws_service/cache.py +108 -0
  31. iam_validator/core/aws_service/client.py +205 -0
  32. iam_validator/core/aws_service/fetcher.py +612 -0
  33. iam_validator/core/aws_service/parsers.py +149 -0
  34. iam_validator/core/aws_service/patterns.py +51 -0
  35. iam_validator/core/aws_service/storage.py +291 -0
  36. iam_validator/core/aws_service/validators.py +379 -0
  37. iam_validator/core/check_registry.py +82 -14
  38. iam_validator/core/config/defaults.py +10 -0
  39. iam_validator/core/constants.py +17 -0
  40. iam_validator/core/label_manager.py +197 -0
  41. iam_validator/core/policy_checks.py +7 -3
  42. iam_validator/core/pr_commenter.py +34 -7
  43. iam_validator/sdk/__init__.py +1 -1
  44. iam_validator/sdk/context.py +1 -1
  45. iam_validator/sdk/helpers.py +1 -1
  46. iam_policy_validator-1.8.0.dist-info/RECORD +0 -87
  47. {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/WHEEL +0 -0
  48. {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/entry_points.txt +0 -0
  49. {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,1035 +1,29 @@
1
- """AWS Service Fetcher Module with advanced caching and performance features.
1
+ """AWS Service Fetcher - Backward compatibility facade.
2
2
 
3
- This module provides functionality to fetch AWS service information from the AWS service reference API.
4
- It includes methods to retrieve a list of services, fetch detailed information for specific services,
5
- and handle errors gracefully.
3
+ DEPRECATED: This module is kept for backward compatibility.
4
+ New code should import from iam_validator.core.aws_service instead:
6
5
 
7
- Features:
8
- - TTL-based caching with automatic expiry
9
- - LRU memory cache for frequently accessed services
10
- - Service pre-fetching for common services
11
- - Batch API requests support
12
- - Compiled regex patterns for better performance
13
- - Connection pool optimization
14
- - Request coalescing for duplicate requests
6
+ from iam_validator.core.aws_service import AWSServiceFetcher
15
7
 
16
- Example usage:
17
- async with AWSServiceFetcher() as fetcher:
18
- services = await fetcher.fetch_services()
19
- service_detail = await fetcher.fetch_service_by_name("S3")
8
+ This facade will be removed in a future major version.
20
9
  """
21
10
 
22
- import asyncio
23
- import hashlib
24
- import json
25
- import logging
26
- import os
27
- import re
28
- import sys
29
- import time
30
- from dataclasses import dataclass
31
- from pathlib import Path
32
- from typing import Any
33
-
34
- import httpx
35
-
36
- from iam_validator.core import constants
37
- from iam_validator.core.config import AWS_SERVICE_REFERENCE_BASE_URL
38
- from iam_validator.core.models import ServiceDetail, ServiceInfo
39
- from iam_validator.utils.cache import LRUCache
40
-
41
- logger = logging.getLogger(__name__)
42
-
43
-
44
- @dataclass
45
- class ConditionKeyValidationResult:
46
- """Result of condition key validation.
47
-
48
- Attributes:
49
- is_valid: True if the condition key is valid for the action
50
- error_message: Short error message if invalid (shown prominently)
51
- warning_message: Warning message if valid but not recommended
52
- suggestion: Detailed suggestion with valid keys (shown in collapsible section)
53
- """
54
-
55
- is_valid: bool
56
- error_message: str | None = None
57
- warning_message: str | None = None
58
- suggestion: str | None = None
59
-
60
-
61
- class CompiledPatterns:
62
- """Pre-compiled regex patterns for validation.
63
-
64
- This class implements the Singleton pattern to ensure patterns are compiled only once
65
- and reused across all instances for better performance.
66
- """
67
-
68
- _instance = None
69
- _initialized = False
70
-
71
- def __new__(cls) -> "CompiledPatterns":
72
- if cls._instance is None:
73
- cls._instance = super().__new__(cls)
74
- return cls._instance
75
-
76
- def __init__(self) -> None:
77
- """Initialize compiled patterns (only once due to Singleton pattern)."""
78
- # Only initialize once, even if __init__ is called multiple times
79
- if CompiledPatterns._initialized:
80
- return
81
-
82
- CompiledPatterns._initialized = True
83
-
84
- # ARN validation pattern
85
- self.arn_pattern = re.compile(
86
- r"^arn:(?P<partition>(aws|aws-cn|aws-us-gov|aws-eusc|aws-iso|aws-iso-b|aws-iso-e|aws-iso-f)):"
87
- r"(?P<service>[a-z0-9\-]+):"
88
- r"(?P<region>[a-z0-9\-]*):"
89
- r"(?P<account>[0-9]*):"
90
- r"(?P<resource>.+)$",
91
- re.IGNORECASE,
92
- )
93
-
94
- # Action format pattern
95
- self.action_pattern = re.compile(
96
- r"^(?P<service>[a-zA-Z0-9_-]+):(?P<action>[a-zA-Z0-9*_-]+)$"
97
- )
98
-
99
- # Wildcard detection patterns
100
- self.wildcard_pattern = re.compile(r"\*")
101
- self.partial_wildcard_pattern = re.compile(r"^[^*]+\*$")
102
-
103
-
104
- class AWSServiceFetcher:
105
- """Fetches AWS service information from the AWS service reference API with enhanced performance features.
106
-
107
- This class provides a comprehensive interface for retrieving AWS service metadata,
108
- including actions, resources, and condition keys. It includes multiple layers of
109
- caching and optimization for high-performance policy validation.
110
-
111
- Features:
112
- - Multi-layer caching (memory LRU + disk with TTL)
113
- - Service pre-fetching for common AWS services
114
- - Request batching and coalescing
115
- - Offline mode support with local AWS service files
116
- - HTTP/2 connection pooling
117
- - Automatic retry with exponential backoff
118
-
119
- Example:
120
- >>> async with AWSServiceFetcher() as fetcher:
121
- ... # Fetch service list
122
- ... services = await fetcher.fetch_services()
123
- ...
124
- ... # Fetch specific service details
125
- ... s3_service = await fetcher.fetch_service_by_name("s3")
126
- ...
127
- ... # Validate actions
128
- ... is_valid = await fetcher.validate_action("s3:GetObject", s3_service)
129
-
130
- Method Organization:
131
- Lifecycle Management:
132
- - __init__: Initialize fetcher with configuration
133
- - __aenter__, __aexit__: Context manager support
134
-
135
- Caching (Private):
136
- - _get_cache_directory: Determine cache location
137
- - _get_cache_path: Generate cache file path
138
- - _read_from_cache: Read from disk cache
139
- - _write_to_cache: Write to disk cache
140
- - clear_caches: Clear all caches
141
-
142
- HTTP Operations (Private):
143
- - _make_request: Core HTTP request handler
144
- - _make_request_with_batching: Request coalescing
145
- - _prefetch_common_services: Pre-load common services
146
-
147
- File I/O (Private):
148
- - _load_services_from_file: Load service list from local file
149
- - _load_service_from_file: Load service details from local file
150
-
151
- Public API - Fetching:
152
- - fetch_services: Get list of all AWS services
153
- - fetch_service_by_name: Get details for one service
154
- - fetch_multiple_services: Batch fetch multiple services
155
-
156
- Public API - Validation:
157
- - validate_action: Check if action exists in service
158
- - validate_arn: Validate ARN format
159
- - validate_condition_key: Check condition key validity
160
-
161
- Public API - Parsing:
162
- - parse_action: Split action into service and name
163
- - match_wildcard_action: Match wildcard patterns
164
-
165
- Utilities:
166
- - get_stats: Get cache statistics
167
- """
168
-
169
- BASE_URL = AWS_SERVICE_REFERENCE_BASE_URL
170
-
171
- # Common AWS services to pre-fetch
172
- # All other services will be fetched on-demand (lazy loading if found in policies)
173
- COMMON_SERVICES = [
174
- "acm",
175
- "apigateway",
176
- "autoscaling",
177
- "backup",
178
- "batch",
179
- "bedrock",
180
- "cloudformation",
181
- "cloudfront",
182
- "cloudtrail",
183
- "cloudwatch",
184
- "config",
185
- "dynamodb",
186
- "ec2-instance-connect",
187
- "ec2",
188
- "ecr",
189
- "ecs",
190
- "eks",
191
- "elasticache",
192
- "elasticloadbalancing",
193
- "events",
194
- "firehose",
195
- "glacier",
196
- "glue",
197
- "guardduty",
198
- "iam",
199
- "imagebuilder",
200
- "inspector2",
201
- "kinesis",
202
- "kms",
203
- "lambda",
204
- "logs",
205
- "rds",
206
- "route53",
207
- "s3",
208
- "scheduler",
209
- "secretsmanager",
210
- "securityhub",
211
- "sns",
212
- "sqs",
213
- "sts",
214
- "support",
215
- "waf",
216
- "wafv2",
217
- ]
218
-
219
- def __init__(
220
- self,
221
- timeout: float = constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
222
- retries: int = 3,
223
- enable_cache: bool = True,
224
- cache_ttl: int = constants.DEFAULT_CACHE_TTL_SECONDS,
225
- memory_cache_size: int = 256,
226
- connection_pool_size: int = 50,
227
- keepalive_connections: int = 20,
228
- prefetch_common: bool = True,
229
- cache_dir: Path | str | None = None,
230
- aws_services_dir: Path | str | None = None,
231
- ):
232
- """Initialize aws service fetcher.
233
-
234
- Args:
235
- timeout: Request timeout in seconds
236
- retries: Number of retries for failed requests
237
- enable_cache: Enable persistent disk caching
238
- cache_ttl: Cache time-to-live in seconds
239
- memory_cache_size: Size of in-memory LRU cache
240
- connection_pool_size: HTTP connection pool size
241
- keepalive_connections: Number of keepalive connections
242
- prefetch_common: Prefetch common AWS services
243
- cache_dir: Custom cache directory path
244
- aws_services_dir: Directory containing pre-downloaded AWS service JSON files.
245
- When set, the fetcher will load services from local files
246
- instead of making API calls. Directory should contain:
247
- - _services.json: List of all services
248
- - {service}.json: Individual service files (e.g., s3.json)
249
- """
250
- self.timeout = timeout
251
- self.retries = retries
252
- self.enable_cache = enable_cache
253
- self.cache_ttl = cache_ttl
254
- self.prefetch_common = prefetch_common
255
-
256
- # AWS services directory for offline mode
257
- self.aws_services_dir: Path | None = None
258
- if aws_services_dir:
259
- self.aws_services_dir = Path(aws_services_dir)
260
- if not self.aws_services_dir.exists():
261
- raise ValueError(f"AWS services directory does not exist: {aws_services_dir}")
262
- logger.info(f"Using local AWS services from: {self.aws_services_dir}")
263
-
264
- self._client: httpx.AsyncClient | None = None
265
- self._memory_cache = LRUCache(maxsize=memory_cache_size, ttl=cache_ttl)
266
- self._cache_dir = self._get_cache_directory(cache_dir)
267
- self._patterns = CompiledPatterns()
268
-
269
- # Batch request queue
270
- self._batch_queue: dict[str, asyncio.Future[Any]] = {}
271
- self._batch_lock = asyncio.Lock()
272
-
273
- # Connection pool settings
274
- self.connection_pool_size = connection_pool_size
275
- self.keepalive_connections = keepalive_connections
276
-
277
- # Track prefetched services
278
- self._prefetched_services: set[str] = set()
279
-
280
- # Create cache directory if needed
281
- if self.enable_cache:
282
- self._cache_dir.mkdir(parents=True, exist_ok=True)
283
-
284
- @staticmethod
285
- def _get_cache_directory(cache_dir: Path | str | None = None) -> Path:
286
- """Get the cache directory path, using platform-appropriate defaults.
287
-
288
- Priority:
289
- 1. Provided cache_dir parameter
290
- 2. Platform-specific user cache directory
291
- - Linux/Unix: ~/.cache/iam-validator/aws_services
292
- - macOS: ~/Library/Caches/iam-validator/aws_services
293
- - Windows: %LOCALAPPDATA%/iam-validator/cache/aws_services
294
-
295
- Args:
296
- cache_dir: Optional custom cache directory path
297
-
298
- Returns:
299
- Path object for the cache directory
300
- """
301
- if cache_dir is not None:
302
- return Path(cache_dir)
303
-
304
- # Determine platform-specific cache directory
305
- if sys.platform == "darwin":
306
- # macOS
307
- base_cache = Path.home() / "Library" / "Caches"
308
- elif sys.platform == "win32":
309
- # Windows
310
- base_cache = Path(os.environ.get("LOCALAPPDATA", Path.home() / "AppData" / "Local"))
311
- else:
312
- # Linux and other Unix-like systems
313
- base_cache = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
314
-
315
- return base_cache / "iam-validator" / "aws_services"
316
-
317
- async def __aenter__(self) -> "AWSServiceFetcher":
318
- """Async context manager entry with optimized settings."""
319
- self._client = httpx.AsyncClient(
320
- timeout=httpx.Timeout(self.timeout),
321
- follow_redirects=True,
322
- limits=httpx.Limits(
323
- max_keepalive_connections=self.keepalive_connections,
324
- max_connections=self.connection_pool_size,
325
- keepalive_expiry=constants.DEFAULT_HTTP_TIMEOUT_SECONDS, # Keep connections alive
326
- ),
327
- http2=True, # Enable HTTP/2 for multiplexing
328
- )
329
-
330
- # Pre-fetch common services if enabled
331
- if self.prefetch_common:
332
- await self._prefetch_common_services()
333
-
334
- return self
335
-
336
- async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
337
- """Async context manager exit."""
338
- del exc_type, exc_val, exc_tb
339
- if self._client:
340
- await self._client.aclose()
341
- self._client = None
342
-
343
- async def _prefetch_common_services(self) -> None:
344
- """Pre-fetch commonly used AWS services for better performance."""
345
- logger.info(f"Pre-fetching {len(self.COMMON_SERVICES)} common AWS services...")
346
-
347
- # First, fetch the services list once to populate the cache
348
- # This prevents all concurrent calls from fetching the same list
349
- await self.fetch_services()
350
-
351
- async def fetch_service(name: str) -> None:
352
- try:
353
- await self.fetch_service_by_name(name)
354
- self._prefetched_services.add(name)
355
- except Exception as e: # pylint: disable=broad-exception-caught
356
- logger.warning(f"Failed to prefetch service {name}: {e}")
357
-
358
- # Fetch in batches to avoid overwhelming the API
359
- batch_size = 5
360
- for i in range(0, len(self.COMMON_SERVICES), batch_size):
361
- batch = self.COMMON_SERVICES[i : i + batch_size]
362
- await asyncio.gather(*[fetch_service(name) for name in batch])
363
-
364
- logger.info(f"Pre-fetched {len(self._prefetched_services)} services successfully")
365
-
366
- def _get_cache_path(self, url: str) -> Path:
367
- """Get cache file path with timestamp for TTL checking."""
368
- url_hash = hashlib.md5(url.encode()).hexdigest()
369
-
370
- # Extract service name for better organization
371
- filename = f"{url_hash}.json"
372
- if "/v1/" in url:
373
- service_name = url.split("/v1/")[1].split("/")[0]
374
- filename = f"{service_name}_{url_hash[:8]}.json"
375
- elif url == self.BASE_URL:
376
- filename = "services_list.json"
377
-
378
- return self._cache_dir / filename
379
-
380
- def _read_from_cache(self, url: str) -> Any | None:
381
- """Read from disk cache with TTL checking."""
382
- if not self.enable_cache:
383
- return None
384
-
385
- cache_path = self._get_cache_path(url)
386
-
387
- if not cache_path.exists():
388
- return None
389
-
390
- try:
391
- # Check file modification time for TTL
392
- mtime = cache_path.stat().st_mtime
393
- if time.time() - mtime > self.cache_ttl:
394
- logger.debug(f"Cache expired for {url}")
395
- cache_path.unlink() # Remove expired cache
396
- return None
397
-
398
- with open(cache_path, encoding="utf-8") as f:
399
- data = json.load(f)
400
- logger.debug(f"Disk cache hit for {url}")
401
- return data
402
-
403
- except Exception as e: # pylint: disable=broad-exception-caught
404
- logger.warning(f"Failed to read cache for {url}: {e}")
405
- return None
406
-
407
- def _write_to_cache(self, url: str, data: Any) -> None:
408
- """Write to disk cache."""
409
- if not self.enable_cache:
410
- return
411
-
412
- cache_path = self._get_cache_path(url)
413
-
414
- try:
415
- with open(cache_path, "w", encoding="utf-8") as f:
416
- json.dump(data, f, indent=2)
417
- logger.debug(f"Written to disk cache: {url}")
418
- except Exception as e: # pylint: disable=broad-exception-caught
419
- logger.warning(f"Failed to write cache for {url}: {e}")
420
-
421
- async def _make_request_with_batching(self, url: str) -> Any:
422
- """Make request with request batching/coalescing.
423
-
424
- Uses double-check locking pattern to avoid race conditions and deadlocks.
425
- """
426
- # First check: see if request is already in progress
427
- existing_future = None
428
- async with self._batch_lock:
429
- if url in self._batch_queue:
430
- existing_future = self._batch_queue[url]
431
-
432
- # Wait for existing request outside the lock
433
- if existing_future is not None:
434
- logger.debug(f"Coalescing request for {url}")
435
- return await existing_future
436
-
437
- # Create new future for this request
438
- loop = asyncio.get_event_loop()
439
- future: asyncio.Future[Any] = loop.create_future()
440
-
441
- # Second check: register future or use existing one (double-check pattern)
442
- async with self._batch_lock:
443
- if url in self._batch_queue:
444
- # Another coroutine registered while we were creating the future
445
- existing_future = self._batch_queue[url]
446
- else:
447
- # We're the first, register our future
448
- self._batch_queue[url] = future
449
-
450
- # If we found an existing future, wait for it
451
- if existing_future is not None:
452
- logger.debug(f"Coalescing request for {url} (late check)")
453
- return await existing_future
454
-
455
- # We're responsible for making the request
456
- try:
457
- # Actually make the request
458
- result = await self._make_request(url)
459
- if not future.done():
460
- future.set_result(result)
461
- return result
462
- except Exception as e: # pylint: disable=broad-exception-caught
463
- if not future.done():
464
- future.set_exception(e)
465
- raise
466
- finally:
467
- # Remove from queue
468
- async with self._batch_lock:
469
- self._batch_queue.pop(url, None)
470
-
471
- async def _make_request(self, url: str) -> Any:
472
- """Make HTTP request with multi-level caching."""
473
- # Check memory cache first
474
- cache_key = f"url:{url}"
475
- cached_data = await self._memory_cache.get(cache_key)
476
- if cached_data is not None:
477
- logger.debug(f"Memory cache hit for {url}")
478
- return cached_data
479
-
480
- # Check disk cache
481
- cached_data = self._read_from_cache(url)
482
- if cached_data is not None:
483
- # Store in memory cache for faster access
484
- await self._memory_cache.set(cache_key, cached_data)
485
- return cached_data
486
-
487
- if not self._client:
488
- raise RuntimeError("Fetcher not initialized. Use as async context manager.")
489
-
490
- last_exception: Exception | None = None
491
-
492
- for attempt in range(self.retries):
493
- try:
494
- logger.debug(f"Fetching URL: {url} (attempt {attempt + 1})")
495
- response = await self._client.get(url)
496
- response.raise_for_status()
497
-
498
- try:
499
- data = response.json()
500
-
501
- # Cache in both memory and disk
502
- await self._memory_cache.set(cache_key, data)
503
- self._write_to_cache(url, data)
504
-
505
- return data
506
-
507
- except Exception as json_error: # pylint: disable=broad-exception-caught
508
- logger.error(f"Failed to parse response as JSON: {json_error}")
509
- raise ValueError(
510
- f"Invalid JSON response from {url}: {json_error}"
511
- ) from json_error
512
-
513
- except httpx.HTTPStatusError as e:
514
- logger.error(f"HTTP error {e.response.status_code} for {url}")
515
- if e.response.status_code == 404:
516
- raise ValueError(f"Service not found: {url}") from e
517
- last_exception = e
518
-
519
- except httpx.RequestError as e:
520
- logger.error(f"Request error for {url}: {e}")
521
- last_exception = e
522
-
523
- except Exception as e: # pylint: disable=broad-exception-caught
524
- logger.error(f"Unexpected error for {url}: {e}")
525
- last_exception = e
526
-
527
- if attempt < self.retries - 1:
528
- wait_time = 2**attempt
529
- logger.info(f"Retrying in {wait_time} seconds...")
530
- await asyncio.sleep(wait_time)
531
-
532
- raise last_exception or Exception(f"Failed to fetch {url} after {self.retries} attempts")
533
-
534
- def _load_services_from_file(self) -> list[ServiceInfo]:
535
- """Load services list from local _services.json file.
536
-
537
- Returns:
538
- List of ServiceInfo objects loaded from _services.json
539
-
540
- Raises:
541
- FileNotFoundError: If _services.json doesn't exist
542
- ValueError: If _services.json is invalid
543
- """
544
- if not self.aws_services_dir:
545
- raise ValueError("aws_services_dir is not set")
546
-
547
- services_file = self.aws_services_dir / "_services.json"
548
- if not services_file.exists():
549
- raise FileNotFoundError(f"_services.json not found in {self.aws_services_dir}")
550
-
551
- try:
552
- with open(services_file, encoding="utf-8") as f:
553
- data = json.load(f)
554
-
555
- if not isinstance(data, list):
556
- raise ValueError("Expected list of services from _services.json")
557
-
558
- services: list[ServiceInfo] = []
559
- for item in data:
560
- if isinstance(item, dict):
561
- service = item.get("service")
562
- url = item.get("url")
563
- if service and url:
564
- services.append(ServiceInfo(service=str(service), url=str(url)))
565
-
566
- logger.info(f"Loaded {len(services)} services from local file: {services_file}")
567
- return services
568
-
569
- except json.JSONDecodeError as e:
570
- raise ValueError(f"Invalid JSON in services.json: {e}") from e
571
-
572
- def _load_service_from_file(self, service_name: str) -> ServiceDetail:
573
- """Load service detail from local JSON file.
574
-
575
- Args:
576
- service_name: Name of the service (case-insensitive)
577
-
578
- Returns:
579
- ServiceDetail object loaded from {service}.json
580
-
581
- Raises:
582
- FileNotFoundError: If service JSON file doesn't exist
583
- ValueError: If service JSON is invalid
584
- """
585
- if not self.aws_services_dir:
586
- raise ValueError("aws_services_dir is not set")
587
-
588
- # Normalize filename (lowercase, replace spaces with underscores)
589
- filename = f"{service_name.lower().replace(' ', '_')}.json"
590
- service_file = self.aws_services_dir / filename
591
-
592
- if not service_file.exists():
593
- raise FileNotFoundError(f"Service file not found: {service_file}")
594
-
595
- try:
596
- with open(service_file, encoding="utf-8") as f:
597
- data = json.load(f)
598
-
599
- service_detail = ServiceDetail.model_validate(data)
600
- logger.debug(f"Loaded service {service_name} from local file: {service_file}")
601
- return service_detail
602
-
603
- except json.JSONDecodeError as e:
604
- raise ValueError(f"Invalid JSON in {service_file}: {e}") from e
605
-
606
- async def fetch_services(self) -> list[ServiceInfo]:
607
- """Fetch list of AWS services with caching.
608
-
609
- When aws_services_dir is set, loads from local services.json file.
610
- Otherwise, fetches from AWS API.
611
- """
612
- # Check if we have the parsed services list in cache
613
- services_cache_key = "parsed_services_list"
614
- cached_services = await self._memory_cache.get(services_cache_key)
615
- if cached_services is not None and isinstance(cached_services, list):
616
- logger.debug(f"Retrieved {len(cached_services)} services from parsed cache")
617
- return cached_services
618
-
619
- # Load from local file if aws_services_dir is set
620
- if self.aws_services_dir:
621
- services = self._load_services_from_file()
622
- # Cache the loaded services
623
- await self._memory_cache.set(services_cache_key, services)
624
- return services
625
-
626
- # Not in parsed cache, fetch the raw data from API
627
- data = await self._make_request_with_batching(self.BASE_URL)
628
-
629
- if not isinstance(data, list):
630
- raise ValueError("Expected list of services from root endpoint")
631
-
632
- services: list[ServiceInfo] = []
633
- for item in data:
634
- if isinstance(item, dict):
635
- service = item.get("service")
636
- url = item.get("url")
637
- if service and url:
638
- services.append(ServiceInfo(service=str(service), url=str(url)))
639
-
640
- # Cache the parsed services list
641
- await self._memory_cache.set(services_cache_key, services)
642
-
643
- # Log only on first fetch (when parsed cache was empty)
644
- logger.info(f"Fetched and parsed {len(services)} services from AWS API")
645
- return services
646
-
647
- async def fetch_service_by_name(self, service_name: str) -> ServiceDetail:
648
- """Fetch service detail with optimized caching.
649
-
650
- When aws_services_dir is set, loads from local {service}.json file.
651
- Otherwise, fetches from AWS API.
652
- """
653
- # Normalize service name
654
- service_name_lower = service_name.lower()
655
-
656
- # Check memory cache with service name as key
657
- cache_key = f"service:{service_name_lower}"
658
- cached_detail = await self._memory_cache.get(cache_key)
659
- if isinstance(cached_detail, ServiceDetail):
660
- logger.debug(f"Memory cache hit for service {service_name}")
661
- return cached_detail
662
-
663
- # Load from local file if aws_services_dir is set
664
- if self.aws_services_dir:
665
- try:
666
- service_detail = self._load_service_from_file(service_name_lower)
667
- # Cache the loaded service
668
- await self._memory_cache.set(cache_key, service_detail)
669
- return service_detail
670
- except FileNotFoundError:
671
- # Try to find the service in services.json to get proper name
672
- services = await self.fetch_services()
673
- for service in services:
674
- if service.service.lower() == service_name_lower:
675
- # Try with the exact service name from services.json
676
- try:
677
- service_detail = self._load_service_from_file(service.service)
678
- await self._memory_cache.set(cache_key, service_detail)
679
- return service_detail
680
- except FileNotFoundError:
681
- pass
682
- raise ValueError(
683
- f"Service `{service_name}` not found in {self.aws_services_dir}"
684
- ) from FileNotFoundError
685
-
686
- # Fetch service list and find URL from API
687
- services = await self.fetch_services()
688
-
689
- for service in services:
690
- if service.service.lower() == service_name_lower:
691
- # Fetch service detail from API
692
- data = await self._make_request_with_batching(service.url)
693
-
694
- # Validate and parse
695
- service_detail = ServiceDetail.model_validate(data)
696
-
697
- # Cache with service name as key
698
- await self._memory_cache.set(cache_key, service_detail)
699
-
700
- return service_detail
701
-
702
- raise ValueError(f"Service `{service_name}` not found")
703
-
704
- async def fetch_multiple_services(self, service_names: list[str]) -> dict[str, ServiceDetail]:
705
- """Fetch multiple services concurrently with optimized batching."""
706
-
707
- async def fetch_single(name: str) -> tuple[str, ServiceDetail]:
708
- try:
709
- detail = await self.fetch_service_by_name(name)
710
- return name, detail
711
- except Exception as e: # pylint: disable=broad-exception-caught
712
- logger.error(f"Failed to fetch service {name}: {e}")
713
- raise
714
-
715
- # Fetch all services concurrently
716
- tasks = [fetch_single(name) for name in service_names]
717
- results = await asyncio.gather(*tasks, return_exceptions=True)
718
-
719
- services: dict[str, ServiceDetail] = {}
720
- for i, result in enumerate(results):
721
- if isinstance(result, Exception):
722
- logger.error(f"Failed to fetch service {service_names[i]}: {result}")
723
- raise result
724
- if isinstance(result, tuple):
725
- name, detail = result
726
- services[name] = detail
727
-
728
- return services
729
-
730
- def parse_action(self, action: str) -> tuple[str, str]:
731
- """Parse IAM action using compiled regex for better performance."""
732
- match = self._patterns.action_pattern.match(action)
733
- if not match:
734
- raise ValueError(f"Invalid action format: {action}")
735
-
736
- return match.group("service").lower(), match.group("action")
737
-
738
- def match_wildcard_action(self, pattern: str, actions: list[str]) -> tuple[bool, list[str]]:
739
- """Match wildcard pattern against list of actions.
740
-
741
- Args:
742
- pattern: Action pattern with wildcards (e.g., "Get*", "*Object", "Describe*")
743
- actions: List of valid action names
744
-
745
- Returns:
746
- Tuple of (has_matches, list_of_matched_actions)
747
- """
748
- # Convert wildcard pattern to regex
749
- # Escape special regex chars except *, then replace * with .*
750
- regex_pattern = "^" + re.escape(pattern).replace(r"\*", ".*") + "$"
751
- compiled_pattern = re.compile(regex_pattern, re.IGNORECASE)
752
-
753
- matched = [a for a in actions if compiled_pattern.match(a)]
754
- return len(matched) > 0, matched
755
-
756
- async def validate_action(
757
- self, action: str, allow_wildcards: bool = True
758
- ) -> tuple[bool, str | None, bool]:
759
- """Validate IAM action with optimized caching.
760
-
761
- Supports:
762
- - Exact actions: s3:GetObject
763
- - Full wildcards: s3:*
764
- - Partial wildcards: s3:Get*, s3:*Object, s3:*Get*
765
-
766
- Returns:
767
- Tuple of (is_valid, error_message, is_wildcard)
768
- """
769
- try:
770
- service_prefix, action_name = self.parse_action(action)
771
-
772
- # Quick wildcard check using compiled pattern
773
- is_wildcard = bool(self._patterns.wildcard_pattern.search(action_name))
774
-
775
- # Handle full wildcard
776
- if action_name == "*":
777
- if allow_wildcards:
778
- # Just verify service exists
779
- await self.fetch_service_by_name(service_prefix)
780
- return True, None, True
781
- return False, "Wildcard actions are not allowed", True
782
-
783
- # Fetch service details (will use cache)
784
- service_detail = await self.fetch_service_by_name(service_prefix)
785
- available_actions = list(service_detail.actions.keys())
786
-
787
- # Handle partial wildcards (e.g., Get*, *Object, Describe*)
788
- if is_wildcard:
789
- if not allow_wildcards:
790
- return False, "Wildcard actions are not allowed", True
791
-
792
- has_matches, matched_actions = self.match_wildcard_action(
793
- action_name, available_actions
794
- )
795
-
796
- if has_matches:
797
- # Wildcard is valid and matches at least one action
798
- match_count = len(matched_actions)
799
- sample_actions = matched_actions[:5] # Show up to 5 examples
800
- examples = ", ".join(sample_actions)
801
- if match_count > 5:
802
- examples += f", ... ({match_count - 5} more)"
803
-
804
- return True, None, True
805
- # Wildcard doesn't match any actions
806
- return (
807
- False,
808
- f"Action pattern `{action_name}` does not match any actions in service `{service_prefix}`",
809
- True,
810
- )
811
-
812
- # Check if exact action exists (case-insensitive)
813
- action_exists = any(a.lower() == action_name.lower() for a in available_actions)
814
-
815
- if action_exists:
816
- return True, None, False
817
-
818
- # Suggest similar actions
819
- similar = [f"`{a}`" for a in available_actions if action_name.lower() in a.lower()][:3]
820
-
821
- suggestion = f" Did you mean: {', '.join(similar)}?" if similar else ""
822
- return (
823
- False,
824
- f"Action `{action_name}` not found in service `{service_prefix}`.{suggestion}",
825
- False,
826
- )
827
-
828
- except ValueError as e:
829
- return False, str(e), False
830
- except Exception as e: # pylint: disable=broad-exception-caught
831
- logger.error(f"Error validating action {action}: {e}")
832
- return False, f"Failed to validate action: {str(e)}", False
833
-
834
- def validate_arn(self, arn: str) -> tuple[bool, str | None]:
835
- """Validate ARN format using compiled regex."""
836
- if arn == "*":
837
- return True, None
838
-
839
- match = self._patterns.arn_pattern.match(arn)
840
- if not match:
841
- return False, f"Invalid ARN format: {arn}"
842
-
843
- return True, None
844
-
845
- async def validate_condition_key(
846
- self, action: str, condition_key: str, resources: list[str] | None = None
847
- ) -> ConditionKeyValidationResult:
848
- """
849
- Validate condition key against action and optionally resource types.
850
-
851
- Args:
852
- action: IAM action (e.g., "s3:GetObject")
853
- condition_key: Condition key to validate (e.g., "s3:prefix")
854
- resources: Optional list of resource ARNs to validate against
855
-
856
- Returns:
857
- ConditionKeyValidationResult with:
858
- - is_valid: True if key is valid (even with warning)
859
- - error_message: Short error message if invalid (shown prominently)
860
- - warning_message: Warning message if valid but not recommended
861
- - suggestion: Detailed suggestion with valid keys (shown in collapsible section)
862
- """
863
- try:
864
- from iam_validator.core.config.aws_global_conditions import ( # pylint: disable=import-outside-toplevel
865
- get_global_conditions,
866
- )
867
-
868
- service_prefix, action_name = self.parse_action(action)
869
-
870
- # Check if it's a global condition key
871
- is_global_key = False
872
- if condition_key.startswith("aws:"):
873
- global_conditions = get_global_conditions()
874
- if global_conditions.is_valid_global_key(condition_key):
875
- is_global_key = True
876
- else:
877
- return ConditionKeyValidationResult(
878
- is_valid=False,
879
- error_message=f"Invalid AWS global condition key: `{condition_key}`.",
880
- )
881
-
882
- # Fetch service detail (cached)
883
- service_detail = await self.fetch_service_by_name(service_prefix)
884
-
885
- # Check service-specific condition keys
886
- if condition_key in service_detail.condition_keys:
887
- return ConditionKeyValidationResult(is_valid=True)
888
-
889
- # Check action-specific condition keys
890
- if action_name in service_detail.actions:
891
- action_detail = service_detail.actions[action_name]
892
- if (
893
- action_detail.action_condition_keys
894
- and condition_key in action_detail.action_condition_keys
895
- ):
896
- return ConditionKeyValidationResult(is_valid=True)
897
-
898
- # Check resource-specific condition keys
899
- # Get resource types required by this action
900
- if resources and action_detail.resources:
901
- for res_req in action_detail.resources:
902
- resource_name = res_req.get("Name", "")
903
- if not resource_name:
904
- continue
905
-
906
- # Look up resource type definition
907
- resource_type = service_detail.resources.get(resource_name)
908
- if resource_type and resource_type.condition_keys:
909
- if condition_key in resource_type.condition_keys:
910
- return ConditionKeyValidationResult(is_valid=True)
911
-
912
- # If it's a global key but the action has specific condition keys defined,
913
- # AWS allows it but the key may not be available in every request context
914
- if is_global_key and action_detail.action_condition_keys is not None:
915
- warning_msg = (
916
- f"Global condition key `{condition_key}` is used with action `{action}`. "
917
- f"While global condition keys can be used across all AWS services, "
918
- f"the key may not be available in every request context. "
919
- f"Verify that `{condition_key}` is available for this specific action's request context. "
920
- f"Consider using `*IfExists` operators (e.g., `StringEqualsIfExists`) if the key might be missing."
921
- )
922
- return ConditionKeyValidationResult(is_valid=True, warning_message=warning_msg)
923
-
924
- # If it's a global key and action doesn't define specific keys, allow it
925
- if is_global_key:
926
- return ConditionKeyValidationResult(is_valid=True)
927
-
928
- # Short error message
929
- error_msg = f"Condition key `{condition_key}` is not valid for action `{action}`"
930
-
931
- # Collect valid condition keys for this action
932
- valid_keys = set()
933
-
934
- # Add service-level condition keys
935
- if service_detail.condition_keys:
936
- if isinstance(service_detail.condition_keys, dict):
937
- valid_keys.update(service_detail.condition_keys.keys())
938
- elif isinstance(service_detail.condition_keys, list):
939
- valid_keys.update(service_detail.condition_keys)
940
-
941
- # Add action-specific condition keys
942
- if action_name in service_detail.actions:
943
- action_detail = service_detail.actions[action_name]
944
- if action_detail.action_condition_keys:
945
- if isinstance(action_detail.action_condition_keys, dict):
946
- valid_keys.update(action_detail.action_condition_keys.keys())
947
- elif isinstance(action_detail.action_condition_keys, list):
948
- valid_keys.update(action_detail.action_condition_keys)
949
-
950
- # Add resource-specific condition keys
951
- if action_detail.resources:
952
- for res_req in action_detail.resources:
953
- resource_name = res_req.get("Name", "")
954
- if resource_name:
955
- resource_type = service_detail.resources.get(resource_name)
956
- if resource_type and resource_type.condition_keys:
957
- if isinstance(resource_type.condition_keys, dict):
958
- valid_keys.update(resource_type.condition_keys.keys())
959
- elif isinstance(resource_type.condition_keys, list):
960
- valid_keys.update(resource_type.condition_keys)
961
-
962
- # Build detailed suggestion with valid keys (goes in collapsible section)
963
- suggestion_parts = []
964
-
965
- if valid_keys:
966
- # Sort and limit to first 10 keys for readability
967
- sorted_keys = sorted(valid_keys)
968
- suggestion_parts.append("**Valid condition keys for this action:**")
969
- if len(sorted_keys) <= 10:
970
- for key in sorted_keys:
971
- suggestion_parts.append(f"- `{key}`")
972
- else:
973
- for key in sorted_keys[:10]:
974
- suggestion_parts.append(f"- `{key}`")
975
- suggestion_parts.append(f"- ... and {len(sorted_keys) - 10} more")
976
-
977
- suggestion_parts.append("")
978
- suggestion_parts.append(
979
- "**Global condition keys** (e.g., `aws:ResourceOrgID`, `aws:RequestedRegion`, `aws:SourceIp`, `aws:SourceVpce`) "
980
- "can also be used with any AWS action"
981
- )
982
- else:
983
- # No action-specific keys - mention global keys
984
- suggestion_parts.append(
985
- "This action does not have specific condition keys defined.\n\n"
986
- "However, you can use **global condition keys** such as:\n"
987
- "- `aws:RequestedRegion`\n"
988
- "- `aws:SourceIp`\n"
989
- "- `aws:SourceVpce`\n"
990
- "- `aws:UserAgent`\n"
991
- "- `aws:CurrentTime`\n"
992
- "- `aws:SecureTransport`\n"
993
- "- `aws:PrincipalArn`\n"
994
- "- And many others"
995
- )
996
-
997
- suggestion = "\n".join(suggestion_parts)
998
-
999
- return ConditionKeyValidationResult(
1000
- is_valid=False,
1001
- error_message=error_msg,
1002
- suggestion=suggestion,
1003
- )
1004
-
1005
- except Exception as e: # pylint: disable=broad-exception-caught
1006
- logger.error(f"Error validating condition key {condition_key} for {action}: {e}")
1007
- return ConditionKeyValidationResult(
1008
- is_valid=False,
1009
- error_message=f"Failed to validate condition key: {str(e)}",
1010
- )
1011
-
1012
- async def clear_caches(self) -> None:
1013
- """Clear all caches (memory and disk)."""
1014
- # Clear memory cache
1015
- await self._memory_cache.clear()
1016
-
1017
- # Clear disk cache
1018
- if self.enable_cache and self._cache_dir.exists():
1019
- for cache_file in self._cache_dir.glob("*.json"):
1020
- try:
1021
- cache_file.unlink()
1022
- except Exception as e: # pylint: disable=broad-exception-caught
1023
- logger.warning(f"Failed to delete cache file {cache_file}: {e}")
1024
-
1025
- logger.info("Cleared all caches")
1026
-
1027
- def get_stats(self) -> dict[str, Any]:
1028
- """Get fetcher statistics for monitoring."""
1029
- return {
1030
- "prefetched_services": len(self._prefetched_services),
1031
- "memory_cache_size": len(self._memory_cache.cache),
1032
- "batch_queue_size": len(self._batch_queue),
1033
- "cache_ttl": self.cache_ttl,
1034
- "connection_pool_size": self.connection_pool_size,
1035
- }
11
+ import warnings
12
+
13
+ # Re-export classes from new location
14
+ from iam_validator.core.aws_service import (
15
+ AWSServiceFetcher,
16
+ CompiledPatterns,
17
+ ConditionKeyValidationResult,
18
+ )
19
+
20
+ # Emit deprecation warning when this module is imported
21
+ warnings.warn(
22
+ "Importing from iam_validator.core.aws_fetcher is deprecated. "
23
+ "Use 'from iam_validator.core.aws_service import AWSServiceFetcher' instead. "
24
+ "This compatibility layer will be removed in a future major version.",
25
+ DeprecationWarning,
26
+ stacklevel=2,
27
+ )
28
+
29
+ __all__ = ["AWSServiceFetcher", "ConditionKeyValidationResult", "CompiledPatterns"]