aws-inventory-manager 0.17.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aws_inventory_manager-0.17.12.dist-info/LICENSE +21 -0
- aws_inventory_manager-0.17.12.dist-info/METADATA +1292 -0
- aws_inventory_manager-0.17.12.dist-info/RECORD +152 -0
- aws_inventory_manager-0.17.12.dist-info/WHEEL +5 -0
- aws_inventory_manager-0.17.12.dist-info/entry_points.txt +2 -0
- aws_inventory_manager-0.17.12.dist-info/top_level.txt +1 -0
- src/__init__.py +3 -0
- src/aws/__init__.py +11 -0
- src/aws/client.py +128 -0
- src/aws/credentials.py +191 -0
- src/aws/rate_limiter.py +177 -0
- src/cli/__init__.py +12 -0
- src/cli/config.py +130 -0
- src/cli/main.py +4046 -0
- src/cloudtrail/__init__.py +5 -0
- src/cloudtrail/query.py +642 -0
- src/config_service/__init__.py +21 -0
- src/config_service/collector.py +346 -0
- src/config_service/detector.py +256 -0
- src/config_service/resource_type_mapping.py +328 -0
- src/cost/__init__.py +5 -0
- src/cost/analyzer.py +226 -0
- src/cost/explorer.py +209 -0
- src/cost/reporter.py +237 -0
- src/delta/__init__.py +5 -0
- src/delta/calculator.py +206 -0
- src/delta/differ.py +185 -0
- src/delta/formatters.py +272 -0
- src/delta/models.py +154 -0
- src/delta/reporter.py +234 -0
- src/matching/__init__.py +6 -0
- src/matching/config.py +52 -0
- src/matching/normalizer.py +450 -0
- src/matching/prompts.py +33 -0
- src/models/__init__.py +21 -0
- src/models/config_diff.py +135 -0
- src/models/cost_report.py +87 -0
- src/models/deletion_operation.py +104 -0
- src/models/deletion_record.py +97 -0
- src/models/delta_report.py +122 -0
- src/models/efs_resource.py +80 -0
- src/models/elasticache_resource.py +90 -0
- src/models/group.py +318 -0
- src/models/inventory.py +133 -0
- src/models/protection_rule.py +123 -0
- src/models/report.py +288 -0
- src/models/resource.py +111 -0
- src/models/security_finding.py +102 -0
- src/models/snapshot.py +122 -0
- src/restore/__init__.py +20 -0
- src/restore/audit.py +175 -0
- src/restore/cleaner.py +461 -0
- src/restore/config.py +209 -0
- src/restore/deleter.py +976 -0
- src/restore/dependency.py +254 -0
- src/restore/safety.py +115 -0
- src/security/__init__.py +0 -0
- src/security/checks/__init__.py +0 -0
- src/security/checks/base.py +56 -0
- src/security/checks/ec2_checks.py +88 -0
- src/security/checks/elasticache_checks.py +149 -0
- src/security/checks/iam_checks.py +102 -0
- src/security/checks/rds_checks.py +140 -0
- src/security/checks/s3_checks.py +95 -0
- src/security/checks/secrets_checks.py +96 -0
- src/security/checks/sg_checks.py +142 -0
- src/security/cis_mapper.py +97 -0
- src/security/models.py +53 -0
- src/security/reporter.py +174 -0
- src/security/scanner.py +87 -0
- src/snapshot/__init__.py +6 -0
- src/snapshot/capturer.py +453 -0
- src/snapshot/filter.py +259 -0
- src/snapshot/inventory_storage.py +236 -0
- src/snapshot/report_formatter.py +250 -0
- src/snapshot/reporter.py +189 -0
- src/snapshot/resource_collectors/__init__.py +5 -0
- src/snapshot/resource_collectors/apigateway.py +140 -0
- src/snapshot/resource_collectors/backup.py +136 -0
- src/snapshot/resource_collectors/base.py +81 -0
- src/snapshot/resource_collectors/cloudformation.py +55 -0
- src/snapshot/resource_collectors/cloudwatch.py +109 -0
- src/snapshot/resource_collectors/codebuild.py +69 -0
- src/snapshot/resource_collectors/codepipeline.py +82 -0
- src/snapshot/resource_collectors/dynamodb.py +65 -0
- src/snapshot/resource_collectors/ec2.py +240 -0
- src/snapshot/resource_collectors/ecs.py +215 -0
- src/snapshot/resource_collectors/efs_collector.py +102 -0
- src/snapshot/resource_collectors/eks.py +200 -0
- src/snapshot/resource_collectors/elasticache_collector.py +79 -0
- src/snapshot/resource_collectors/elb.py +126 -0
- src/snapshot/resource_collectors/eventbridge.py +156 -0
- src/snapshot/resource_collectors/glue.py +199 -0
- src/snapshot/resource_collectors/iam.py +188 -0
- src/snapshot/resource_collectors/kms.py +111 -0
- src/snapshot/resource_collectors/lambda_func.py +139 -0
- src/snapshot/resource_collectors/rds.py +109 -0
- src/snapshot/resource_collectors/route53.py +86 -0
- src/snapshot/resource_collectors/s3.py +105 -0
- src/snapshot/resource_collectors/secretsmanager.py +70 -0
- src/snapshot/resource_collectors/sns.py +68 -0
- src/snapshot/resource_collectors/sqs.py +82 -0
- src/snapshot/resource_collectors/ssm.py +160 -0
- src/snapshot/resource_collectors/stepfunctions.py +74 -0
- src/snapshot/resource_collectors/vpcendpoints.py +79 -0
- src/snapshot/resource_collectors/waf.py +159 -0
- src/snapshot/storage.py +351 -0
- src/storage/__init__.py +21 -0
- src/storage/audit_store.py +419 -0
- src/storage/database.py +294 -0
- src/storage/group_store.py +763 -0
- src/storage/inventory_store.py +320 -0
- src/storage/resource_store.py +416 -0
- src/storage/schema.py +339 -0
- src/storage/snapshot_store.py +363 -0
- src/utils/__init__.py +12 -0
- src/utils/export.py +305 -0
- src/utils/hash.py +60 -0
- src/utils/logging.py +63 -0
- src/utils/pagination.py +41 -0
- src/utils/paths.py +51 -0
- src/utils/progress.py +41 -0
- src/utils/unsupported_resources.py +306 -0
- src/web/__init__.py +5 -0
- src/web/app.py +97 -0
- src/web/dependencies.py +69 -0
- src/web/routes/__init__.py +1 -0
- src/web/routes/api/__init__.py +18 -0
- src/web/routes/api/charts.py +156 -0
- src/web/routes/api/cleanup.py +186 -0
- src/web/routes/api/filters.py +253 -0
- src/web/routes/api/groups.py +305 -0
- src/web/routes/api/inventories.py +80 -0
- src/web/routes/api/queries.py +202 -0
- src/web/routes/api/resources.py +393 -0
- src/web/routes/api/snapshots.py +314 -0
- src/web/routes/api/views.py +260 -0
- src/web/routes/pages.py +198 -0
- src/web/services/__init__.py +1 -0
- src/web/templates/base.html +955 -0
- src/web/templates/components/navbar.html +31 -0
- src/web/templates/components/sidebar.html +104 -0
- src/web/templates/pages/audit_logs.html +86 -0
- src/web/templates/pages/cleanup.html +279 -0
- src/web/templates/pages/dashboard.html +227 -0
- src/web/templates/pages/diff.html +175 -0
- src/web/templates/pages/error.html +30 -0
- src/web/templates/pages/groups.html +721 -0
- src/web/templates/pages/queries.html +246 -0
- src/web/templates/pages/resources.html +2429 -0
- src/web/templates/pages/snapshot_detail.html +271 -0
- src/web/templates/pages/snapshots.html +429 -0
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
"""Resource name normalizer using rules and AI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
from .config import NormalizerConfig
|
|
11
|
+
from .prompts import NORMALIZATION_SYSTEM_PROMPT
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class NormalizationResult:
|
|
16
|
+
"""Result of normalizing a resource name.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
normalized_name: The semantic part after stripping auto-generated components
|
|
20
|
+
extracted_patterns: List of patterns that were stripped from the name
|
|
21
|
+
method: How normalization was determined ('tag:logical-id', 'tag:Name', 'pattern', 'none')
|
|
22
|
+
confidence: Confidence score (0.0-1.0) indicating reliability of the normalization
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
normalized_name: str
|
|
26
|
+
extracted_patterns: List[str] = field(default_factory=list)
|
|
27
|
+
method: str = "none"
|
|
28
|
+
confidence: float = 0.9
|
|
29
|
+
|
|
30
|
+
logger = logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
# Try to import openai, but don't fail if not installed
|
|
33
|
+
try:
|
|
34
|
+
from openai import OpenAI
|
|
35
|
+
|
|
36
|
+
OPENAI_AVAILABLE = True
|
|
37
|
+
except ImportError:
|
|
38
|
+
OPENAI_AVAILABLE = False
|
|
39
|
+
OpenAI = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ResourceNormalizer:
|
|
43
|
+
"""Normalize resource names using rules-based and AI approaches.
|
|
44
|
+
|
|
45
|
+
The normalizer first tries rules-based normalization for obvious cases,
|
|
46
|
+
then falls back to AI for ambiguous names.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, config: Optional[NormalizerConfig] = None):
|
|
50
|
+
"""Initialize the normalizer.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
config: Configuration for normalization. If None, loads from environment.
|
|
54
|
+
"""
|
|
55
|
+
self.config = config or NormalizerConfig.from_env()
|
|
56
|
+
self._client: Optional[Any] = None
|
|
57
|
+
self._total_tokens = 0
|
|
58
|
+
|
|
59
|
+
# Compile regex patterns for performance
|
|
60
|
+
self._random_patterns = [re.compile(p) for p in self.config.random_patterns]
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def client(self) -> Optional[Any]:
|
|
64
|
+
"""Lazy-init OpenAI client."""
|
|
65
|
+
if self._client is None and self.config.is_ai_enabled:
|
|
66
|
+
if not OPENAI_AVAILABLE:
|
|
67
|
+
logger.warning("OpenAI package not installed. Install with: pip install openai")
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
self._client = OpenAI(
|
|
71
|
+
api_key=self.config.api_key,
|
|
72
|
+
base_url=self.config.base_url,
|
|
73
|
+
timeout=self.config.timeout_seconds,
|
|
74
|
+
)
|
|
75
|
+
return self._client
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def tokens_used(self) -> int:
|
|
79
|
+
"""Total tokens used for AI normalization."""
|
|
80
|
+
return self._total_tokens
|
|
81
|
+
|
|
82
|
+
def normalize_resources(
|
|
83
|
+
self,
|
|
84
|
+
resources: List[Dict[str, Any]],
|
|
85
|
+
use_ai: bool = True,
|
|
86
|
+
) -> Dict[str, str]:
|
|
87
|
+
"""Normalize a list of resources.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
resources: List of resource dicts with 'arn', 'name', 'tags', 'resource_type'
|
|
91
|
+
use_ai: Whether to use AI for ambiguous names
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Dict mapping ARN -> normalized_name
|
|
95
|
+
"""
|
|
96
|
+
results: Dict[str, str] = {}
|
|
97
|
+
needs_ai: List[Dict[str, Any]] = []
|
|
98
|
+
|
|
99
|
+
# Phase 1: Try rules-based normalization
|
|
100
|
+
for resource in resources:
|
|
101
|
+
arn = resource.get("arn", "")
|
|
102
|
+
normalized = self._try_rules_based(resource)
|
|
103
|
+
|
|
104
|
+
if normalized:
|
|
105
|
+
results[arn] = normalized
|
|
106
|
+
logger.debug(f"Rules-based: {resource.get('name')} -> {normalized}")
|
|
107
|
+
else:
|
|
108
|
+
needs_ai.append(resource)
|
|
109
|
+
|
|
110
|
+
logger.info(
|
|
111
|
+
f"Normalization: {len(results)} via rules, {len(needs_ai)} need AI"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Phase 2: AI normalization for ambiguous names
|
|
115
|
+
if needs_ai and use_ai and self.client:
|
|
116
|
+
ai_results = self._normalize_with_ai(needs_ai)
|
|
117
|
+
results.update(ai_results)
|
|
118
|
+
logger.info(f"AI normalized {len(ai_results)} resources")
|
|
119
|
+
elif needs_ai:
|
|
120
|
+
# Fallback: use lowercase name if AI not available
|
|
121
|
+
for resource in needs_ai:
|
|
122
|
+
arn = resource.get("arn", "")
|
|
123
|
+
name = resource.get("name", "")
|
|
124
|
+
results[arn] = self._basic_normalize(name)
|
|
125
|
+
logger.debug(f"Fallback: {name} -> {results[arn]}")
|
|
126
|
+
|
|
127
|
+
return results
|
|
128
|
+
|
|
129
|
+
def normalize_single(
|
|
130
|
+
self,
|
|
131
|
+
name: str,
|
|
132
|
+
resource_type: str,
|
|
133
|
+
tags: Optional[Dict[str, str]] = None,
|
|
134
|
+
) -> NormalizationResult:
|
|
135
|
+
"""Normalize a single resource and return detailed result.
|
|
136
|
+
|
|
137
|
+
This method is used by snapshot_store to compute normalized names
|
|
138
|
+
during snapshot save.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
name: Physical resource name
|
|
142
|
+
resource_type: AWS resource type (e.g., 'AWS::Lambda::Function')
|
|
143
|
+
tags: Resource tags
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
NormalizationResult with normalized_name, extracted_patterns, method, confidence
|
|
147
|
+
"""
|
|
148
|
+
tags = tags or {}
|
|
149
|
+
|
|
150
|
+
# Priority 1: CloudFormation logical ID tag (most reliable)
|
|
151
|
+
logical_id = tags.get("aws:cloudformation:logical-id")
|
|
152
|
+
if logical_id:
|
|
153
|
+
return NormalizationResult(
|
|
154
|
+
normalized_name=self._basic_normalize(logical_id),
|
|
155
|
+
extracted_patterns=[],
|
|
156
|
+
method="tag:logical-id",
|
|
157
|
+
confidence=1.0,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Priority 2: Name tag (user-defined, stable)
|
|
161
|
+
name_tag = tags.get("Name")
|
|
162
|
+
if name_tag and not self._has_random_patterns(name_tag):
|
|
163
|
+
return NormalizationResult(
|
|
164
|
+
normalized_name=self._basic_normalize(name_tag),
|
|
165
|
+
extracted_patterns=[],
|
|
166
|
+
method="tag:Name",
|
|
167
|
+
confidence=0.95,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Priority 3: Check if entirely AWS-generated ID (subnet-xxx, vpc-xxx, etc.)
|
|
171
|
+
if self._is_aws_resource_id(name, resource_type):
|
|
172
|
+
# Can't normalize - needs Name tag for stable matching
|
|
173
|
+
return NormalizationResult(
|
|
174
|
+
normalized_name=self._basic_normalize(name),
|
|
175
|
+
extracted_patterns=[name],
|
|
176
|
+
method="none",
|
|
177
|
+
confidence=0.0, # Low confidence - needs tag for reliable matching
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Priority 4: Try to extract patterns from physical name
|
|
181
|
+
normalized, extracted = self._extract_patterns(name)
|
|
182
|
+
if extracted:
|
|
183
|
+
return NormalizationResult(
|
|
184
|
+
normalized_name=self._basic_normalize(normalized),
|
|
185
|
+
extracted_patterns=extracted,
|
|
186
|
+
method="pattern",
|
|
187
|
+
confidence=0.8,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Priority 5: Clean name - no normalization needed
|
|
191
|
+
return NormalizationResult(
|
|
192
|
+
normalized_name=self._basic_normalize(name),
|
|
193
|
+
extracted_patterns=[],
|
|
194
|
+
method="none",
|
|
195
|
+
confidence=0.9, # Clean name, high confidence
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def _is_aws_resource_id(self, name: str, resource_type: str) -> bool:
|
|
199
|
+
"""Check if name is entirely an AWS-generated resource ID.
|
|
200
|
+
|
|
201
|
+
These IDs (subnet-xxx, vpc-xxx, vol-xxx, etc.) are stable but
|
|
202
|
+
provide no semantic meaning without a Name tag.
|
|
203
|
+
"""
|
|
204
|
+
# Map resource types to their ID patterns
|
|
205
|
+
aws_id_patterns = {
|
|
206
|
+
"AWS::EC2::Subnet": r"^subnet-[a-f0-9]+$",
|
|
207
|
+
"AWS::EC2::VPC": r"^vpc-[a-f0-9]+$",
|
|
208
|
+
"AWS::EC2::SecurityGroup": r"^sg-[a-f0-9]+$",
|
|
209
|
+
"AWS::EC2::Volume": r"^vol-[a-f0-9]+$",
|
|
210
|
+
"AWS::EC2::Instance": r"^i-[a-f0-9]+$",
|
|
211
|
+
"AWS::EC2::InternetGateway": r"^igw-[a-f0-9]+$",
|
|
212
|
+
"AWS::EC2::RouteTable": r"^rtb-[a-f0-9]+$",
|
|
213
|
+
"AWS::EC2::NetworkAcl": r"^acl-[a-f0-9]+$",
|
|
214
|
+
"AWS::EC2::NetworkInterface": r"^eni-[a-f0-9]+$",
|
|
215
|
+
"AWS::EC2::NatGateway": r"^nat-[a-f0-9]+$",
|
|
216
|
+
"AWS::EC2::EIP": r"^eipalloc-[a-f0-9]+$",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
pattern = aws_id_patterns.get(resource_type)
|
|
220
|
+
if pattern:
|
|
221
|
+
# AWS resource IDs are always lowercase hex, no IGNORECASE needed
|
|
222
|
+
return bool(re.match(pattern, name))
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
def _extract_patterns(self, name: str) -> Tuple[str, List[str]]:
|
|
226
|
+
"""Extract auto-generated patterns from name.
|
|
227
|
+
|
|
228
|
+
Strips common patterns like CloudFormation suffixes, account IDs,
|
|
229
|
+
regions, timestamps, etc.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Tuple of (cleaned_name, list_of_extracted_patterns)
|
|
233
|
+
"""
|
|
234
|
+
extracted = []
|
|
235
|
+
result = name
|
|
236
|
+
|
|
237
|
+
# Patterns to extract (ordered by specificity)
|
|
238
|
+
extraction_patterns = [
|
|
239
|
+
# CloudFormation suffix (uppercase alphanumeric, 8-13 chars at end)
|
|
240
|
+
(r"-[A-Z0-9]{8,13}$", "cfn_suffix"),
|
|
241
|
+
# Bedrock/Kendra suffix (underscore + lowercase alphanumeric)
|
|
242
|
+
(r"_[a-z0-9]{4,6}$", "bedrock_suffix"),
|
|
243
|
+
# Account ID (12 digits, with optional surrounding hyphens)
|
|
244
|
+
(r"-?\d{12}-?", "account_id"),
|
|
245
|
+
# Region (e.g., us-east-1, eu-west-2)
|
|
246
|
+
(r"-?(us|eu|ap|sa|ca|me|af)-(east|west|north|south|central|northeast|southeast)-\d-?", "region"),
|
|
247
|
+
# Hex suffix (8+ lowercase hex chars at end)
|
|
248
|
+
(r"-[a-f0-9]{8,}$", "hex_suffix"),
|
|
249
|
+
# Timestamp suffix (8-14 digits at end)
|
|
250
|
+
(r"-\d{8,14}$", "timestamp"),
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
for pattern, _pattern_name in extraction_patterns:
|
|
254
|
+
# Note: Don't use IGNORECASE - CloudFormation suffixes are uppercase,
|
|
255
|
+
# and we want case-sensitive matching for accuracy
|
|
256
|
+
match = re.search(pattern, result)
|
|
257
|
+
if match:
|
|
258
|
+
extracted.append(match.group().strip("-"))
|
|
259
|
+
result = result[: match.start()] + result[match.end() :]
|
|
260
|
+
|
|
261
|
+
# Clean up trailing/leading separators
|
|
262
|
+
result = re.sub(r"^[-_]+|[-_]+$", "", result)
|
|
263
|
+
# Collapse multiple separators
|
|
264
|
+
result = re.sub(r"[-_]{2,}", "-", result)
|
|
265
|
+
|
|
266
|
+
return result, extracted
|
|
267
|
+
|
|
268
|
+
def _try_rules_based(self, resource: Dict[str, Any]) -> Optional[str]:
|
|
269
|
+
"""Try to normalize using rules.
|
|
270
|
+
|
|
271
|
+
Priority:
|
|
272
|
+
1. CloudFormation logical ID tag
|
|
273
|
+
2. Name tag (if clean)
|
|
274
|
+
3. Physical name (if clean)
|
|
275
|
+
|
|
276
|
+
Returns None if name appears to have random patterns.
|
|
277
|
+
"""
|
|
278
|
+
tags = resource.get("tags", {}) or {}
|
|
279
|
+
name = resource.get("name", "")
|
|
280
|
+
|
|
281
|
+
# 1. CloudFormation logical ID is the best canonical identifier
|
|
282
|
+
logical_id = tags.get("aws:cloudformation:logical-id")
|
|
283
|
+
if logical_id:
|
|
284
|
+
return self._basic_normalize(logical_id)
|
|
285
|
+
|
|
286
|
+
# 2. Name tag (if it looks clean)
|
|
287
|
+
name_tag = tags.get("Name")
|
|
288
|
+
if name_tag and not self._has_random_patterns(name_tag):
|
|
289
|
+
return self._basic_normalize(name_tag)
|
|
290
|
+
|
|
291
|
+
# 3. Physical name (if it looks clean)
|
|
292
|
+
if name and not self._has_random_patterns(name):
|
|
293
|
+
return self._basic_normalize(name)
|
|
294
|
+
|
|
295
|
+
# Name has random patterns - needs AI
|
|
296
|
+
return None
|
|
297
|
+
|
|
298
|
+
def _has_random_patterns(self, name: str) -> bool:
|
|
299
|
+
"""Check if name contains random-looking patterns."""
|
|
300
|
+
if not name:
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
for pattern in self._random_patterns:
|
|
304
|
+
if pattern.search(name):
|
|
305
|
+
return True
|
|
306
|
+
return False
|
|
307
|
+
|
|
308
|
+
def _basic_normalize(self, name: str) -> str:
|
|
309
|
+
"""Basic string normalization without AI.
|
|
310
|
+
|
|
311
|
+
- Lowercase
|
|
312
|
+
- Replace underscores/spaces with hyphens
|
|
313
|
+
- Strip leading/trailing hyphens
|
|
314
|
+
"""
|
|
315
|
+
if not name:
|
|
316
|
+
return ""
|
|
317
|
+
|
|
318
|
+
result = name.lower()
|
|
319
|
+
result = re.sub(r"[_\s]+", "-", result)
|
|
320
|
+
result = re.sub(r"-+", "-", result)
|
|
321
|
+
return result.strip("-")
|
|
322
|
+
|
|
323
|
+
def _normalize_with_ai(
|
|
324
|
+
self,
|
|
325
|
+
resources: List[Dict[str, Any]],
|
|
326
|
+
) -> Dict[str, str]:
|
|
327
|
+
"""Normalize resources using AI.
|
|
328
|
+
|
|
329
|
+
Batches resources and calls the AI API.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
resources: Resources that need AI normalization
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
Dict mapping ARN -> normalized_name
|
|
336
|
+
"""
|
|
337
|
+
results: Dict[str, str] = {}
|
|
338
|
+
|
|
339
|
+
# Process in batches
|
|
340
|
+
for i in range(0, len(resources), self.config.max_batch_size):
|
|
341
|
+
batch = resources[i : i + self.config.max_batch_size]
|
|
342
|
+
batch_results = self._process_ai_batch(batch)
|
|
343
|
+
results.update(batch_results)
|
|
344
|
+
|
|
345
|
+
return results
|
|
346
|
+
|
|
347
|
+
def _process_ai_batch(
|
|
348
|
+
self,
|
|
349
|
+
resources: List[Dict[str, Any]],
|
|
350
|
+
) -> Dict[str, str]:
|
|
351
|
+
"""Process a single batch through the AI.
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
resources: Batch of resources
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Dict mapping ARN -> normalized_name
|
|
358
|
+
"""
|
|
359
|
+
# Build the user prompt with resource details
|
|
360
|
+
resource_list = []
|
|
361
|
+
for r in resources:
|
|
362
|
+
item = {
|
|
363
|
+
"arn": r.get("arn", ""),
|
|
364
|
+
"name": r.get("name", ""),
|
|
365
|
+
"type": r.get("resource_type", ""),
|
|
366
|
+
}
|
|
367
|
+
# Include Name tag if present
|
|
368
|
+
tags = r.get("tags", {}) or {}
|
|
369
|
+
if tags.get("Name"):
|
|
370
|
+
item["name_tag"] = tags["Name"]
|
|
371
|
+
resource_list.append(item)
|
|
372
|
+
|
|
373
|
+
user_prompt = json.dumps({"resources": resource_list}, indent=2)
|
|
374
|
+
|
|
375
|
+
# Call the AI with retries
|
|
376
|
+
for attempt in range(self.config.max_retries):
|
|
377
|
+
try:
|
|
378
|
+
response = self.client.chat.completions.create(
|
|
379
|
+
model=self.config.model,
|
|
380
|
+
messages=[
|
|
381
|
+
{"role": "system", "content": NORMALIZATION_SYSTEM_PROMPT},
|
|
382
|
+
{"role": "user", "content": user_prompt},
|
|
383
|
+
],
|
|
384
|
+
temperature=0.1, # Low for consistency
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Track token usage
|
|
388
|
+
if hasattr(response, "usage") and response.usage:
|
|
389
|
+
self._total_tokens += response.usage.total_tokens
|
|
390
|
+
|
|
391
|
+
# Parse response
|
|
392
|
+
content = response.choices[0].message.content
|
|
393
|
+
return self._parse_ai_response(content, resources)
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
wait_time = 2**attempt
|
|
397
|
+
logger.warning(
|
|
398
|
+
f"AI normalization attempt {attempt + 1} failed: {e}. "
|
|
399
|
+
f"Retrying in {wait_time}s..."
|
|
400
|
+
)
|
|
401
|
+
if attempt < self.config.max_retries - 1:
|
|
402
|
+
time.sleep(wait_time)
|
|
403
|
+
|
|
404
|
+
# All retries failed - use fallback
|
|
405
|
+
logger.error("AI normalization failed after all retries")
|
|
406
|
+
return {
|
|
407
|
+
r.get("arn", ""): self._basic_normalize(r.get("name", ""))
|
|
408
|
+
for r in resources
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
def _parse_ai_response(
|
|
412
|
+
self,
|
|
413
|
+
content: str,
|
|
414
|
+
resources: List[Dict[str, Any]],
|
|
415
|
+
) -> Dict[str, str]:
|
|
416
|
+
"""Parse AI response into ARN -> normalized_name mapping.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
content: AI response content (JSON string)
|
|
420
|
+
resources: Original resources (for fallback)
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Dict mapping ARN -> normalized_name
|
|
424
|
+
"""
|
|
425
|
+
try:
|
|
426
|
+
data = json.loads(content)
|
|
427
|
+
normalizations = data.get("normalizations", [])
|
|
428
|
+
|
|
429
|
+
results = {}
|
|
430
|
+
for norm in normalizations:
|
|
431
|
+
arn = norm.get("arn", "")
|
|
432
|
+
normalized_name = norm.get("normalized_name", "")
|
|
433
|
+
if arn and normalized_name:
|
|
434
|
+
results[arn] = normalized_name
|
|
435
|
+
|
|
436
|
+
# Fallback for any missing
|
|
437
|
+
for r in resources:
|
|
438
|
+
arn = r.get("arn", "")
|
|
439
|
+
if arn and arn not in results:
|
|
440
|
+
results[arn] = self._basic_normalize(r.get("name", ""))
|
|
441
|
+
|
|
442
|
+
return results
|
|
443
|
+
|
|
444
|
+
except json.JSONDecodeError as e:
|
|
445
|
+
logger.error(f"Failed to parse AI response as JSON: {e}")
|
|
446
|
+
logger.debug(f"Response content: {content[:500]}...")
|
|
447
|
+
return {
|
|
448
|
+
r.get("arn", ""): self._basic_normalize(r.get("name", ""))
|
|
449
|
+
for r in resources
|
|
450
|
+
}
|
src/matching/prompts.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""AI prompts for resource name normalization."""
|
|
2
|
+
|
|
3
|
+
NORMALIZATION_SYSTEM_PROMPT = """You are an AWS resource name normalizer for cross-account infrastructure matching.
|
|
4
|
+
|
|
5
|
+
Your job is to extract the "logical identity" from AWS resource names by removing:
|
|
6
|
+
- Random suffixes (hex, alphanumeric, CloudFormation-generated)
|
|
7
|
+
- AWS account IDs (12-digit numbers)
|
|
8
|
+
- Region names (us-east-1, eu-west-2, etc.)
|
|
9
|
+
- Stack name prefixes (MyStack-, Stack-, etc.)
|
|
10
|
+
- AWS resource ID prefixes (subnet-, vpc-, vol-, i-, sg-, etc.)
|
|
11
|
+
- Timestamps and dates embedded in names
|
|
12
|
+
|
|
13
|
+
Keep the meaningful, purpose-identifying parts of the name.
|
|
14
|
+
|
|
15
|
+
Rules:
|
|
16
|
+
1. Output should be lowercase with hyphens (no underscores, no spaces)
|
|
17
|
+
2. If the name is already clean and meaningful, return it as-is (lowercase)
|
|
18
|
+
3. Preserve the semantic meaning - "policy-executor" not just "executor"
|
|
19
|
+
4. For AWS-generated IDs (subnet-xxx, vpc-xxx), use the Name tag if provided
|
|
20
|
+
5. Strip common AWS service prefixes that don't add meaning
|
|
21
|
+
|
|
22
|
+
Examples:
|
|
23
|
+
- "cloud-custodian-480738299408-policy-executor-abc123" → "cloud-custodian-policy-executor"
|
|
24
|
+
- "AmazonBedrockExecutionRoleForKnowledgeBase_jnwn1" → "bedrock-knowledge-base-execution-role"
|
|
25
|
+
- "MyStack-ProcessorLambda-XYZ789ABC" → "processor-lambda"
|
|
26
|
+
- "daybreak-transcribe-processor" → "daybreak-transcribe-processor" (already clean)
|
|
27
|
+
- "AWSServiceRoleForOrganizations" → "aws-service-role-organizations"
|
|
28
|
+
- "d-9067239ebb_controllers" → "directory-controllers"
|
|
29
|
+
- Resource with name "subnet-abc123def" and Name tag "Private-Subnet-AZ1" → "private-subnet-az1"
|
|
30
|
+
|
|
31
|
+
Respond ONLY with valid JSON in this exact format:
|
|
32
|
+
{"normalizations": [{"arn": "arn:aws:...", "normalized_name": "..."}]}
|
|
33
|
+
"""
|
src/models/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Data models for AWS Baseline Snapshot tool."""
|
|
2
|
+
|
|
3
|
+
from .cost_report import CostBreakdown, CostReport
|
|
4
|
+
from .delta_report import DeltaReport, ResourceChange
|
|
5
|
+
from .group import GroupMember, ResourceGroup, extract_resource_name
|
|
6
|
+
from .inventory import Inventory
|
|
7
|
+
from .resource import Resource
|
|
8
|
+
from .snapshot import Snapshot
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"Snapshot",
|
|
12
|
+
"Resource",
|
|
13
|
+
"DeltaReport",
|
|
14
|
+
"ResourceChange",
|
|
15
|
+
"CostReport",
|
|
16
|
+
"CostBreakdown",
|
|
17
|
+
"Inventory",
|
|
18
|
+
"ResourceGroup",
|
|
19
|
+
"GroupMember",
|
|
20
|
+
"extract_resource_name",
|
|
21
|
+
]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Configuration diff model for representing field-level changes between snapshots."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ChangeCategory(Enum):
|
|
11
|
+
"""Categories of configuration changes."""
|
|
12
|
+
|
|
13
|
+
TAGS = "tags"
|
|
14
|
+
CONFIGURATION = "configuration"
|
|
15
|
+
SECURITY = "security"
|
|
16
|
+
PERMISSIONS = "permissions"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Security-critical field patterns that should be flagged
|
|
20
|
+
SECURITY_CRITICAL_FIELDS = {
|
|
21
|
+
"PubliclyAccessible",
|
|
22
|
+
"public",
|
|
23
|
+
"encryption",
|
|
24
|
+
"kms",
|
|
25
|
+
"SecurityGroups",
|
|
26
|
+
"IpPermissions",
|
|
27
|
+
"IpPermissionsEgress",
|
|
28
|
+
"Policy",
|
|
29
|
+
"BucketPolicy",
|
|
30
|
+
"Acl",
|
|
31
|
+
"HttpTokens", # IMDSv2
|
|
32
|
+
"MetadataOptions",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class ConfigDiff:
|
|
38
|
+
"""Represents a field-level configuration change between two resource snapshots.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
resource_arn: AWS ARN of the resource that changed
|
|
42
|
+
field_path: Dot-notation path to the changed field (e.g., "Tags.Environment")
|
|
43
|
+
old_value: Previous value of the field (None if field was added)
|
|
44
|
+
new_value: New value of the field (None if field was removed)
|
|
45
|
+
category: Category of the change (tags/configuration/security/permissions)
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
resource_arn: str
|
|
49
|
+
field_path: str
|
|
50
|
+
old_value: Any
|
|
51
|
+
new_value: Any
|
|
52
|
+
category: ChangeCategory
|
|
53
|
+
|
|
54
|
+
def __post_init__(self) -> None:
|
|
55
|
+
"""Validate ConfigDiff fields after initialization."""
|
|
56
|
+
# Validate ARN format (basic check)
|
|
57
|
+
if not self.resource_arn or not self.resource_arn.startswith("arn:"):
|
|
58
|
+
raise ValueError(f"Invalid ARN format: {self.resource_arn}")
|
|
59
|
+
|
|
60
|
+
# Validate field_path is not empty
|
|
61
|
+
if not self.field_path:
|
|
62
|
+
raise ValueError("field_path cannot be empty")
|
|
63
|
+
|
|
64
|
+
# Validate category is ChangeCategory enum
|
|
65
|
+
if not isinstance(self.category, ChangeCategory):
|
|
66
|
+
raise ValueError(f"Invalid category type: {type(self.category)}. Must be ChangeCategory enum.")
|
|
67
|
+
|
|
68
|
+
def with_path_prefix(self, prefix: str) -> ConfigDiff:
|
|
69
|
+
"""Create a new ConfigDiff with a prefix added to the field path.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
prefix: Prefix to add to the field path
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
New ConfigDiff instance with prefixed field path
|
|
76
|
+
"""
|
|
77
|
+
return ConfigDiff(
|
|
78
|
+
resource_arn=self.resource_arn,
|
|
79
|
+
field_path=f"{prefix}.{self.field_path}",
|
|
80
|
+
old_value=self.old_value,
|
|
81
|
+
new_value=self.new_value,
|
|
82
|
+
category=self.category,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def is_security_critical(self) -> bool:
|
|
86
|
+
"""Check if this configuration change affects security-related settings.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
True if the change is security-critical, False otherwise
|
|
90
|
+
"""
|
|
91
|
+
# Check if field path contains any security-critical keywords
|
|
92
|
+
field_lower = self.field_path.lower()
|
|
93
|
+
return any(keyword.lower() in field_lower for keyword in SECURITY_CRITICAL_FIELDS)
|
|
94
|
+
|
|
95
|
+
def to_dict(self) -> dict[str, Any]:
|
|
96
|
+
"""Convert ConfigDiff to dictionary representation.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Dictionary with all diff attributes
|
|
100
|
+
"""
|
|
101
|
+
return {
|
|
102
|
+
"resource_arn": self.resource_arn,
|
|
103
|
+
"field_path": self.field_path,
|
|
104
|
+
"old_value": self.old_value,
|
|
105
|
+
"new_value": self.new_value,
|
|
106
|
+
"category": self.category.value,
|
|
107
|
+
"security_critical": self.is_security_critical(),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_dict(cls, data: dict[str, Any]) -> ConfigDiff:
|
|
112
|
+
"""Create ConfigDiff from dictionary representation.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
data: Dictionary with diff attributes
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
ConfigDiff instance
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
ValueError: If category value is invalid
|
|
122
|
+
"""
|
|
123
|
+
category_str = data.get("category", "").lower()
|
|
124
|
+
try:
|
|
125
|
+
category = ChangeCategory(category_str)
|
|
126
|
+
except ValueError:
|
|
127
|
+
raise ValueError(f"Invalid category value: {category_str}")
|
|
128
|
+
|
|
129
|
+
return cls(
|
|
130
|
+
resource_arn=data["resource_arn"],
|
|
131
|
+
field_path=data["field_path"],
|
|
132
|
+
old_value=data["old_value"],
|
|
133
|
+
new_value=data["new_value"],
|
|
134
|
+
category=category,
|
|
135
|
+
)
|