iam-policy-validator 1.8.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/METADATA +106 -1
- iam_policy_validator-1.10.0.dist-info/RECORD +96 -0
- iam_validator/__init__.py +1 -1
- iam_validator/__version__.py +1 -1
- iam_validator/checks/action_condition_enforcement.py +504 -190
- iam_validator/checks/action_resource_matching.py +8 -15
- iam_validator/checks/action_validation.py +6 -12
- iam_validator/checks/condition_key_validation.py +6 -12
- iam_validator/checks/condition_type_mismatch.py +9 -16
- iam_validator/checks/full_wildcard.py +9 -13
- iam_validator/checks/mfa_condition_check.py +8 -17
- iam_validator/checks/policy_size.py +6 -39
- iam_validator/checks/policy_structure.py +10 -40
- iam_validator/checks/policy_type_validation.py +18 -19
- iam_validator/checks/principal_validation.py +11 -20
- iam_validator/checks/resource_validation.py +5 -12
- iam_validator/checks/sensitive_action.py +8 -15
- iam_validator/checks/service_wildcard.py +6 -12
- iam_validator/checks/set_operator_validation.py +11 -18
- iam_validator/checks/sid_uniqueness.py +8 -38
- iam_validator/checks/trust_policy_validation.py +8 -14
- iam_validator/checks/utils/wildcard_expansion.py +1 -1
- iam_validator/checks/wildcard_action.py +6 -12
- iam_validator/checks/wildcard_resource.py +6 -12
- iam_validator/commands/cache.py +4 -3
- iam_validator/commands/validate.py +26 -4
- iam_validator/core/__init__.py +1 -1
- iam_validator/core/aws_fetcher.py +24 -1030
- iam_validator/core/aws_service/__init__.py +21 -0
- iam_validator/core/aws_service/cache.py +108 -0
- iam_validator/core/aws_service/client.py +205 -0
- iam_validator/core/aws_service/fetcher.py +612 -0
- iam_validator/core/aws_service/parsers.py +149 -0
- iam_validator/core/aws_service/patterns.py +51 -0
- iam_validator/core/aws_service/storage.py +291 -0
- iam_validator/core/aws_service/validators.py +379 -0
- iam_validator/core/check_registry.py +82 -14
- iam_validator/core/config/defaults.py +10 -0
- iam_validator/core/constants.py +17 -0
- iam_validator/core/label_manager.py +197 -0
- iam_validator/core/policy_checks.py +7 -3
- iam_validator/core/pr_commenter.py +34 -7
- iam_validator/sdk/__init__.py +1 -1
- iam_validator/sdk/context.py +1 -1
- iam_validator/sdk/helpers.py +1 -1
- iam_policy_validator-1.8.0.dist-info/RECORD +0 -87
- {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/WHEEL +0 -0
- {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/entry_points.txt +0 -0
- {iam_policy_validator-1.8.0.dist-info → iam_policy_validator-1.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""AWS Service Fetcher - Public API.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to fetch AWS service information from the AWS service
|
|
4
|
+
reference API with advanced caching and performance features.
|
|
5
|
+
|
|
6
|
+
Example usage:
|
|
7
|
+
async with AWSServiceFetcher() as fetcher:
|
|
8
|
+
services = await fetcher.fetch_services()
|
|
9
|
+
service_detail = await fetcher.fetch_service_by_name("s3")
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
# Re-export main classes for public API
|
|
13
|
+
from iam_validator.core.aws_service.fetcher import AWSServiceFetcher
|
|
14
|
+
from iam_validator.core.aws_service.patterns import CompiledPatterns
|
|
15
|
+
from iam_validator.core.aws_service.validators import ConditionKeyValidationResult
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"AWSServiceFetcher",
|
|
19
|
+
"ConditionKeyValidationResult",
|
|
20
|
+
"CompiledPatterns",
|
|
21
|
+
]
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Multi-layer caching for AWS service data.
|
|
2
|
+
|
|
3
|
+
This module coordinates memory (LRU) and disk caching to optimize
|
|
4
|
+
AWS service data retrieval performance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from iam_validator.core.aws_service.storage import ServiceFileStorage
|
|
11
|
+
from iam_validator.utils.cache import LRUCache
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ServiceCacheManager:
|
|
17
|
+
"""Coordinates memory and disk caching for service data.
|
|
18
|
+
|
|
19
|
+
This class implements a two-tier caching strategy:
|
|
20
|
+
1. Fast in-memory LRU cache for frequently accessed data
|
|
21
|
+
2. Disk-based cache with TTL for persistence across runs
|
|
22
|
+
|
|
23
|
+
Cache lookup order:
|
|
24
|
+
1. Check memory cache (fastest)
|
|
25
|
+
2. Check disk cache (if enabled)
|
|
26
|
+
3. Return None if not found in either
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
memory_cache_size: int = 256,
|
|
32
|
+
cache_ttl: int = 86400,
|
|
33
|
+
storage: ServiceFileStorage | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Initialize cache manager.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
memory_cache_size: Maximum number of items in memory cache
|
|
39
|
+
cache_ttl: Cache time-to-live in seconds
|
|
40
|
+
storage: Optional storage backend for disk caching
|
|
41
|
+
"""
|
|
42
|
+
self._memory_cache = LRUCache(maxsize=memory_cache_size, ttl=cache_ttl)
|
|
43
|
+
self._storage = storage
|
|
44
|
+
|
|
45
|
+
async def get(self, cache_key: str, url: str | None = None, base_url: str = "") -> Any | None:
|
|
46
|
+
"""Get from memory cache, fallback to disk cache.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
cache_key: Key to lookup in memory cache
|
|
50
|
+
url: Optional URL for disk cache lookup
|
|
51
|
+
base_url: Base URL for service reference API (used for disk cache path)
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Cached data if found, None otherwise
|
|
55
|
+
"""
|
|
56
|
+
# Check memory cache first (fastest)
|
|
57
|
+
cached = await self._memory_cache.get(cache_key)
|
|
58
|
+
if cached is not None:
|
|
59
|
+
logger.debug(f"Memory cache hit for key: {cache_key}")
|
|
60
|
+
return cached
|
|
61
|
+
|
|
62
|
+
# Check disk cache if URL provided and storage available
|
|
63
|
+
if url and self._storage:
|
|
64
|
+
cached = self._storage.read_from_cache(url, base_url)
|
|
65
|
+
if cached is not None:
|
|
66
|
+
logger.debug(f"Disk cache hit for URL: {url}")
|
|
67
|
+
# Populate memory cache for faster future access
|
|
68
|
+
await self._memory_cache.set(cache_key, cached)
|
|
69
|
+
return cached
|
|
70
|
+
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
async def set(
|
|
74
|
+
self, cache_key: str, value: Any, url: str | None = None, base_url: str = ""
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Store in memory and optionally disk cache.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
cache_key: Key to store in memory cache
|
|
80
|
+
value: Value to cache
|
|
81
|
+
url: Optional URL for disk cache storage
|
|
82
|
+
base_url: Base URL for service reference API (used for disk cache path)
|
|
83
|
+
"""
|
|
84
|
+
# Always store in memory cache
|
|
85
|
+
await self._memory_cache.set(cache_key, value)
|
|
86
|
+
|
|
87
|
+
# Store in disk cache if URL provided and storage available
|
|
88
|
+
if url and self._storage:
|
|
89
|
+
self._storage.write_to_cache(url, value, base_url)
|
|
90
|
+
|
|
91
|
+
async def clear(self) -> None:
|
|
92
|
+
"""Clear memory cache and optionally disk cache."""
|
|
93
|
+
await self._memory_cache.clear()
|
|
94
|
+
|
|
95
|
+
if self._storage:
|
|
96
|
+
self._storage.clear_disk_cache()
|
|
97
|
+
|
|
98
|
+
logger.info("Cleared all caches")
|
|
99
|
+
|
|
100
|
+
def get_stats(self) -> dict[str, int]:
|
|
101
|
+
"""Get cache statistics.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Dictionary with cache statistics
|
|
105
|
+
"""
|
|
106
|
+
return {
|
|
107
|
+
"memory_cache_size": len(self._memory_cache.cache),
|
|
108
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""HTTP client for AWS Service Reference API.
|
|
2
|
+
|
|
3
|
+
This module provides an async HTTP client with advanced features including
|
|
4
|
+
connection pooling, request batching/coalescing, and retry logic.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
|
|
13
|
+
from iam_validator.core import constants
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AWSServiceClient:
|
|
19
|
+
"""Async HTTP client with connection pooling and request coalescing.
|
|
20
|
+
|
|
21
|
+
This class handles all HTTP operations for fetching AWS service data,
|
|
22
|
+
including retry logic, request batching, and connection management.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
base_url: str,
|
|
28
|
+
timeout: float = constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
|
|
29
|
+
retries: int = 3,
|
|
30
|
+
connection_pool_size: int = 50,
|
|
31
|
+
keepalive_connections: int = 20,
|
|
32
|
+
) -> None:
|
|
33
|
+
"""Initialize HTTP client.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
base_url: Base URL for AWS service reference API
|
|
37
|
+
timeout: Request timeout in seconds
|
|
38
|
+
retries: Number of retries for failed requests
|
|
39
|
+
connection_pool_size: HTTP connection pool size
|
|
40
|
+
keepalive_connections: Number of keepalive connections
|
|
41
|
+
"""
|
|
42
|
+
self.base_url = base_url
|
|
43
|
+
self.timeout = timeout
|
|
44
|
+
self.retries = retries
|
|
45
|
+
self.connection_pool_size = connection_pool_size
|
|
46
|
+
self.keepalive_connections = keepalive_connections
|
|
47
|
+
|
|
48
|
+
self._client: httpx.AsyncClient | None = None
|
|
49
|
+
|
|
50
|
+
# Batch request queue for request coalescing
|
|
51
|
+
self._batch_queue: dict[str, asyncio.Future[Any]] = {}
|
|
52
|
+
self._batch_lock = asyncio.Lock()
|
|
53
|
+
|
|
54
|
+
async def __aenter__(self) -> "AWSServiceClient":
|
|
55
|
+
"""Setup httpx client with HTTP/2 and connection pooling."""
|
|
56
|
+
self._client = httpx.AsyncClient(
|
|
57
|
+
timeout=httpx.Timeout(self.timeout),
|
|
58
|
+
follow_redirects=True,
|
|
59
|
+
limits=httpx.Limits(
|
|
60
|
+
max_keepalive_connections=self.keepalive_connections,
|
|
61
|
+
max_connections=self.connection_pool_size,
|
|
62
|
+
keepalive_expiry=constants.DEFAULT_HTTP_TIMEOUT_SECONDS,
|
|
63
|
+
),
|
|
64
|
+
http2=True, # Enable HTTP/2 for multiplexing
|
|
65
|
+
)
|
|
66
|
+
return self
|
|
67
|
+
|
|
68
|
+
async def __aexit__(
|
|
69
|
+
self,
|
|
70
|
+
exc_type: type[BaseException] | None,
|
|
71
|
+
exc_val: BaseException | None,
|
|
72
|
+
exc_tb: Any,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""Close HTTP client.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
exc_type: Exception type (required by async context manager protocol)
|
|
78
|
+
exc_val: Exception value (required by async context manager protocol)
|
|
79
|
+
exc_tb: Exception traceback (required by async context manager protocol)
|
|
80
|
+
"""
|
|
81
|
+
# Parameters required by protocol but not used in this implementation
|
|
82
|
+
del exc_type, exc_val, exc_tb
|
|
83
|
+
|
|
84
|
+
if self._client:
|
|
85
|
+
await self._client.aclose()
|
|
86
|
+
self._client = None
|
|
87
|
+
|
|
88
|
+
async def fetch(self, url: str) -> Any:
|
|
89
|
+
"""Fetch data from URL with retry logic and batching.
|
|
90
|
+
|
|
91
|
+
This method implements request coalescing - if multiple coroutines
|
|
92
|
+
request the same URL simultaneously, only one HTTP request is made
|
|
93
|
+
and the result is shared.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
url: URL to fetch
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Parsed JSON response data
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
RuntimeError: If client is not initialized
|
|
103
|
+
ValueError: If response is not valid JSON or resource not found
|
|
104
|
+
Exception: If all retries fail
|
|
105
|
+
"""
|
|
106
|
+
# First check: see if request is already in progress
|
|
107
|
+
existing_future = None
|
|
108
|
+
async with self._batch_lock:
|
|
109
|
+
if url in self._batch_queue:
|
|
110
|
+
existing_future = self._batch_queue[url]
|
|
111
|
+
|
|
112
|
+
# Wait for existing request outside the lock
|
|
113
|
+
if existing_future is not None:
|
|
114
|
+
logger.debug(f"Coalescing request for {url}")
|
|
115
|
+
return await existing_future
|
|
116
|
+
|
|
117
|
+
# Create new future for this request
|
|
118
|
+
loop = asyncio.get_event_loop()
|
|
119
|
+
future: asyncio.Future[Any] = loop.create_future()
|
|
120
|
+
|
|
121
|
+
# Second check: register future or use existing one (double-check pattern)
|
|
122
|
+
async with self._batch_lock:
|
|
123
|
+
if url in self._batch_queue:
|
|
124
|
+
# Another coroutine registered while we were creating the future
|
|
125
|
+
existing_future = self._batch_queue[url]
|
|
126
|
+
else:
|
|
127
|
+
# We're the first, register our future
|
|
128
|
+
self._batch_queue[url] = future
|
|
129
|
+
|
|
130
|
+
# If we found an existing future, wait for it
|
|
131
|
+
if existing_future is not None:
|
|
132
|
+
logger.debug(f"Coalescing request for {url} (late check)")
|
|
133
|
+
return await existing_future
|
|
134
|
+
|
|
135
|
+
# We're responsible for making the request
|
|
136
|
+
try:
|
|
137
|
+
# Actually make the request
|
|
138
|
+
result = await self._make_request(url)
|
|
139
|
+
if not future.done():
|
|
140
|
+
future.set_result(result)
|
|
141
|
+
return result
|
|
142
|
+
except Exception as e:
|
|
143
|
+
if not future.done():
|
|
144
|
+
future.set_exception(e)
|
|
145
|
+
raise
|
|
146
|
+
finally:
|
|
147
|
+
# Remove from queue
|
|
148
|
+
async with self._batch_lock:
|
|
149
|
+
self._batch_queue.pop(url, None)
|
|
150
|
+
|
|
151
|
+
async def _make_request(self, url: str) -> Any:
|
|
152
|
+
"""Core HTTP request with exponential backoff.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
url: URL to fetch
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
Parsed JSON response data
|
|
159
|
+
|
|
160
|
+
Raises:
|
|
161
|
+
RuntimeError: If client is not initialized
|
|
162
|
+
ValueError: If response is not valid JSON or resource not found
|
|
163
|
+
Exception: If all retries fail
|
|
164
|
+
"""
|
|
165
|
+
if not self._client:
|
|
166
|
+
raise RuntimeError("Client not initialized. Use as async context manager.")
|
|
167
|
+
|
|
168
|
+
last_exception: Exception | None = None
|
|
169
|
+
|
|
170
|
+
for attempt in range(self.retries):
|
|
171
|
+
try:
|
|
172
|
+
logger.debug(f"Fetching URL: {url} (attempt {attempt + 1})")
|
|
173
|
+
response = await self._client.get(url)
|
|
174
|
+
response.raise_for_status()
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
data = response.json()
|
|
178
|
+
return data
|
|
179
|
+
|
|
180
|
+
except Exception as json_error: # pylint: disable=broad-exception-caught
|
|
181
|
+
logger.error(f"Failed to parse response as JSON: {json_error}")
|
|
182
|
+
raise ValueError(
|
|
183
|
+
f"Invalid JSON response from {url}: {json_error}"
|
|
184
|
+
) from json_error
|
|
185
|
+
|
|
186
|
+
except httpx.HTTPStatusError as e:
|
|
187
|
+
logger.error(f"HTTP error {e.response.status_code} for {url}")
|
|
188
|
+
if e.response.status_code == 404:
|
|
189
|
+
raise ValueError(f"Service not found: {url}") from e
|
|
190
|
+
last_exception = e
|
|
191
|
+
|
|
192
|
+
except httpx.RequestError as e:
|
|
193
|
+
logger.error(f"Request error for {url}: {e}")
|
|
194
|
+
last_exception = e
|
|
195
|
+
|
|
196
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
197
|
+
logger.error(f"Unexpected error for {url}: {e}")
|
|
198
|
+
last_exception = e
|
|
199
|
+
|
|
200
|
+
if attempt < self.retries - 1:
|
|
201
|
+
wait_time = 2**attempt
|
|
202
|
+
logger.info(f"Retrying in {wait_time} seconds...")
|
|
203
|
+
await asyncio.sleep(wait_time)
|
|
204
|
+
|
|
205
|
+
raise last_exception or Exception(f"Failed to fetch {url} after {self.retries} attempts")
|