aws-inventory-manager 0.17.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. aws_inventory_manager-0.17.12.dist-info/LICENSE +21 -0
  2. aws_inventory_manager-0.17.12.dist-info/METADATA +1292 -0
  3. aws_inventory_manager-0.17.12.dist-info/RECORD +152 -0
  4. aws_inventory_manager-0.17.12.dist-info/WHEEL +5 -0
  5. aws_inventory_manager-0.17.12.dist-info/entry_points.txt +2 -0
  6. aws_inventory_manager-0.17.12.dist-info/top_level.txt +1 -0
  7. src/__init__.py +3 -0
  8. src/aws/__init__.py +11 -0
  9. src/aws/client.py +128 -0
  10. src/aws/credentials.py +191 -0
  11. src/aws/rate_limiter.py +177 -0
  12. src/cli/__init__.py +12 -0
  13. src/cli/config.py +130 -0
  14. src/cli/main.py +4046 -0
  15. src/cloudtrail/__init__.py +5 -0
  16. src/cloudtrail/query.py +642 -0
  17. src/config_service/__init__.py +21 -0
  18. src/config_service/collector.py +346 -0
  19. src/config_service/detector.py +256 -0
  20. src/config_service/resource_type_mapping.py +328 -0
  21. src/cost/__init__.py +5 -0
  22. src/cost/analyzer.py +226 -0
  23. src/cost/explorer.py +209 -0
  24. src/cost/reporter.py +237 -0
  25. src/delta/__init__.py +5 -0
  26. src/delta/calculator.py +206 -0
  27. src/delta/differ.py +185 -0
  28. src/delta/formatters.py +272 -0
  29. src/delta/models.py +154 -0
  30. src/delta/reporter.py +234 -0
  31. src/matching/__init__.py +6 -0
  32. src/matching/config.py +52 -0
  33. src/matching/normalizer.py +450 -0
  34. src/matching/prompts.py +33 -0
  35. src/models/__init__.py +21 -0
  36. src/models/config_diff.py +135 -0
  37. src/models/cost_report.py +87 -0
  38. src/models/deletion_operation.py +104 -0
  39. src/models/deletion_record.py +97 -0
  40. src/models/delta_report.py +122 -0
  41. src/models/efs_resource.py +80 -0
  42. src/models/elasticache_resource.py +90 -0
  43. src/models/group.py +318 -0
  44. src/models/inventory.py +133 -0
  45. src/models/protection_rule.py +123 -0
  46. src/models/report.py +288 -0
  47. src/models/resource.py +111 -0
  48. src/models/security_finding.py +102 -0
  49. src/models/snapshot.py +122 -0
  50. src/restore/__init__.py +20 -0
  51. src/restore/audit.py +175 -0
  52. src/restore/cleaner.py +461 -0
  53. src/restore/config.py +209 -0
  54. src/restore/deleter.py +976 -0
  55. src/restore/dependency.py +254 -0
  56. src/restore/safety.py +115 -0
  57. src/security/__init__.py +0 -0
  58. src/security/checks/__init__.py +0 -0
  59. src/security/checks/base.py +56 -0
  60. src/security/checks/ec2_checks.py +88 -0
  61. src/security/checks/elasticache_checks.py +149 -0
  62. src/security/checks/iam_checks.py +102 -0
  63. src/security/checks/rds_checks.py +140 -0
  64. src/security/checks/s3_checks.py +95 -0
  65. src/security/checks/secrets_checks.py +96 -0
  66. src/security/checks/sg_checks.py +142 -0
  67. src/security/cis_mapper.py +97 -0
  68. src/security/models.py +53 -0
  69. src/security/reporter.py +174 -0
  70. src/security/scanner.py +87 -0
  71. src/snapshot/__init__.py +6 -0
  72. src/snapshot/capturer.py +453 -0
  73. src/snapshot/filter.py +259 -0
  74. src/snapshot/inventory_storage.py +236 -0
  75. src/snapshot/report_formatter.py +250 -0
  76. src/snapshot/reporter.py +189 -0
  77. src/snapshot/resource_collectors/__init__.py +5 -0
  78. src/snapshot/resource_collectors/apigateway.py +140 -0
  79. src/snapshot/resource_collectors/backup.py +136 -0
  80. src/snapshot/resource_collectors/base.py +81 -0
  81. src/snapshot/resource_collectors/cloudformation.py +55 -0
  82. src/snapshot/resource_collectors/cloudwatch.py +109 -0
  83. src/snapshot/resource_collectors/codebuild.py +69 -0
  84. src/snapshot/resource_collectors/codepipeline.py +82 -0
  85. src/snapshot/resource_collectors/dynamodb.py +65 -0
  86. src/snapshot/resource_collectors/ec2.py +240 -0
  87. src/snapshot/resource_collectors/ecs.py +215 -0
  88. src/snapshot/resource_collectors/efs_collector.py +102 -0
  89. src/snapshot/resource_collectors/eks.py +200 -0
  90. src/snapshot/resource_collectors/elasticache_collector.py +79 -0
  91. src/snapshot/resource_collectors/elb.py +126 -0
  92. src/snapshot/resource_collectors/eventbridge.py +156 -0
  93. src/snapshot/resource_collectors/glue.py +199 -0
  94. src/snapshot/resource_collectors/iam.py +188 -0
  95. src/snapshot/resource_collectors/kms.py +111 -0
  96. src/snapshot/resource_collectors/lambda_func.py +139 -0
  97. src/snapshot/resource_collectors/rds.py +109 -0
  98. src/snapshot/resource_collectors/route53.py +86 -0
  99. src/snapshot/resource_collectors/s3.py +105 -0
  100. src/snapshot/resource_collectors/secretsmanager.py +70 -0
  101. src/snapshot/resource_collectors/sns.py +68 -0
  102. src/snapshot/resource_collectors/sqs.py +82 -0
  103. src/snapshot/resource_collectors/ssm.py +160 -0
  104. src/snapshot/resource_collectors/stepfunctions.py +74 -0
  105. src/snapshot/resource_collectors/vpcendpoints.py +79 -0
  106. src/snapshot/resource_collectors/waf.py +159 -0
  107. src/snapshot/storage.py +351 -0
  108. src/storage/__init__.py +21 -0
  109. src/storage/audit_store.py +419 -0
  110. src/storage/database.py +294 -0
  111. src/storage/group_store.py +763 -0
  112. src/storage/inventory_store.py +320 -0
  113. src/storage/resource_store.py +416 -0
  114. src/storage/schema.py +339 -0
  115. src/storage/snapshot_store.py +363 -0
  116. src/utils/__init__.py +12 -0
  117. src/utils/export.py +305 -0
  118. src/utils/hash.py +60 -0
  119. src/utils/logging.py +63 -0
  120. src/utils/pagination.py +41 -0
  121. src/utils/paths.py +51 -0
  122. src/utils/progress.py +41 -0
  123. src/utils/unsupported_resources.py +306 -0
  124. src/web/__init__.py +5 -0
  125. src/web/app.py +97 -0
  126. src/web/dependencies.py +69 -0
  127. src/web/routes/__init__.py +1 -0
  128. src/web/routes/api/__init__.py +18 -0
  129. src/web/routes/api/charts.py +156 -0
  130. src/web/routes/api/cleanup.py +186 -0
  131. src/web/routes/api/filters.py +253 -0
  132. src/web/routes/api/groups.py +305 -0
  133. src/web/routes/api/inventories.py +80 -0
  134. src/web/routes/api/queries.py +202 -0
  135. src/web/routes/api/resources.py +393 -0
  136. src/web/routes/api/snapshots.py +314 -0
  137. src/web/routes/api/views.py +260 -0
  138. src/web/routes/pages.py +198 -0
  139. src/web/services/__init__.py +1 -0
  140. src/web/templates/base.html +955 -0
  141. src/web/templates/components/navbar.html +31 -0
  142. src/web/templates/components/sidebar.html +104 -0
  143. src/web/templates/pages/audit_logs.html +86 -0
  144. src/web/templates/pages/cleanup.html +279 -0
  145. src/web/templates/pages/dashboard.html +227 -0
  146. src/web/templates/pages/diff.html +175 -0
  147. src/web/templates/pages/error.html +30 -0
  148. src/web/templates/pages/groups.html +721 -0
  149. src/web/templates/pages/queries.html +246 -0
  150. src/web/templates/pages/resources.html +2429 -0
  151. src/web/templates/pages/snapshot_detail.html +271 -0
  152. src/web/templates/pages/snapshots.html +429 -0
@@ -0,0 +1,450 @@
1
+ """Resource name normalizer using rules and AI."""
2
+
3
+ import json
4
+ import logging
5
+ import re
6
+ import time
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional, Tuple
9
+
10
+ from .config import NormalizerConfig
11
+ from .prompts import NORMALIZATION_SYSTEM_PROMPT
12
+
13
+
14
+ @dataclass
15
+ class NormalizationResult:
16
+ """Result of normalizing a resource name.
17
+
18
+ Attributes:
19
+ normalized_name: The semantic part after stripping auto-generated components
20
+ extracted_patterns: List of patterns that were stripped from the name
21
+ method: How normalization was determined ('tag:logical-id', 'tag:Name', 'pattern', 'none')
22
+ confidence: Confidence score (0.0-1.0) indicating reliability of the normalization
23
+ """
24
+
25
+ normalized_name: str
26
+ extracted_patterns: List[str] = field(default_factory=list)
27
+ method: str = "none"
28
+ confidence: float = 0.9
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Try to import openai, but don't fail if not installed
33
+ try:
34
+ from openai import OpenAI
35
+
36
+ OPENAI_AVAILABLE = True
37
+ except ImportError:
38
+ OPENAI_AVAILABLE = False
39
+ OpenAI = None
40
+
41
+
42
+ class ResourceNormalizer:
43
+ """Normalize resource names using rules-based and AI approaches.
44
+
45
+ The normalizer first tries rules-based normalization for obvious cases,
46
+ then falls back to AI for ambiguous names.
47
+ """
48
+
49
+ def __init__(self, config: Optional[NormalizerConfig] = None):
50
+ """Initialize the normalizer.
51
+
52
+ Args:
53
+ config: Configuration for normalization. If None, loads from environment.
54
+ """
55
+ self.config = config or NormalizerConfig.from_env()
56
+ self._client: Optional[Any] = None
57
+ self._total_tokens = 0
58
+
59
+ # Compile regex patterns for performance
60
+ self._random_patterns = [re.compile(p) for p in self.config.random_patterns]
61
+
62
+ @property
63
+ def client(self) -> Optional[Any]:
64
+ """Lazy-init OpenAI client."""
65
+ if self._client is None and self.config.is_ai_enabled:
66
+ if not OPENAI_AVAILABLE:
67
+ logger.warning("OpenAI package not installed. Install with: pip install openai")
68
+ return None
69
+
70
+ self._client = OpenAI(
71
+ api_key=self.config.api_key,
72
+ base_url=self.config.base_url,
73
+ timeout=self.config.timeout_seconds,
74
+ )
75
+ return self._client
76
+
77
+ @property
78
+ def tokens_used(self) -> int:
79
+ """Total tokens used for AI normalization."""
80
+ return self._total_tokens
81
+
82
+ def normalize_resources(
83
+ self,
84
+ resources: List[Dict[str, Any]],
85
+ use_ai: bool = True,
86
+ ) -> Dict[str, str]:
87
+ """Normalize a list of resources.
88
+
89
+ Args:
90
+ resources: List of resource dicts with 'arn', 'name', 'tags', 'resource_type'
91
+ use_ai: Whether to use AI for ambiguous names
92
+
93
+ Returns:
94
+ Dict mapping ARN -> normalized_name
95
+ """
96
+ results: Dict[str, str] = {}
97
+ needs_ai: List[Dict[str, Any]] = []
98
+
99
+ # Phase 1: Try rules-based normalization
100
+ for resource in resources:
101
+ arn = resource.get("arn", "")
102
+ normalized = self._try_rules_based(resource)
103
+
104
+ if normalized:
105
+ results[arn] = normalized
106
+ logger.debug(f"Rules-based: {resource.get('name')} -> {normalized}")
107
+ else:
108
+ needs_ai.append(resource)
109
+
110
+ logger.info(
111
+ f"Normalization: {len(results)} via rules, {len(needs_ai)} need AI"
112
+ )
113
+
114
+ # Phase 2: AI normalization for ambiguous names
115
+ if needs_ai and use_ai and self.client:
116
+ ai_results = self._normalize_with_ai(needs_ai)
117
+ results.update(ai_results)
118
+ logger.info(f"AI normalized {len(ai_results)} resources")
119
+ elif needs_ai:
120
+ # Fallback: use lowercase name if AI not available
121
+ for resource in needs_ai:
122
+ arn = resource.get("arn", "")
123
+ name = resource.get("name", "")
124
+ results[arn] = self._basic_normalize(name)
125
+ logger.debug(f"Fallback: {name} -> {results[arn]}")
126
+
127
+ return results
128
+
129
+ def normalize_single(
130
+ self,
131
+ name: str,
132
+ resource_type: str,
133
+ tags: Optional[Dict[str, str]] = None,
134
+ ) -> NormalizationResult:
135
+ """Normalize a single resource and return detailed result.
136
+
137
+ This method is used by snapshot_store to compute normalized names
138
+ during snapshot save.
139
+
140
+ Args:
141
+ name: Physical resource name
142
+ resource_type: AWS resource type (e.g., 'AWS::Lambda::Function')
143
+ tags: Resource tags
144
+
145
+ Returns:
146
+ NormalizationResult with normalized_name, extracted_patterns, method, confidence
147
+ """
148
+ tags = tags or {}
149
+
150
+ # Priority 1: CloudFormation logical ID tag (most reliable)
151
+ logical_id = tags.get("aws:cloudformation:logical-id")
152
+ if logical_id:
153
+ return NormalizationResult(
154
+ normalized_name=self._basic_normalize(logical_id),
155
+ extracted_patterns=[],
156
+ method="tag:logical-id",
157
+ confidence=1.0,
158
+ )
159
+
160
+ # Priority 2: Name tag (user-defined, stable)
161
+ name_tag = tags.get("Name")
162
+ if name_tag and not self._has_random_patterns(name_tag):
163
+ return NormalizationResult(
164
+ normalized_name=self._basic_normalize(name_tag),
165
+ extracted_patterns=[],
166
+ method="tag:Name",
167
+ confidence=0.95,
168
+ )
169
+
170
+ # Priority 3: Check if entirely AWS-generated ID (subnet-xxx, vpc-xxx, etc.)
171
+ if self._is_aws_resource_id(name, resource_type):
172
+ # Can't normalize - needs Name tag for stable matching
173
+ return NormalizationResult(
174
+ normalized_name=self._basic_normalize(name),
175
+ extracted_patterns=[name],
176
+ method="none",
177
+ confidence=0.0, # Low confidence - needs tag for reliable matching
178
+ )
179
+
180
+ # Priority 4: Try to extract patterns from physical name
181
+ normalized, extracted = self._extract_patterns(name)
182
+ if extracted:
183
+ return NormalizationResult(
184
+ normalized_name=self._basic_normalize(normalized),
185
+ extracted_patterns=extracted,
186
+ method="pattern",
187
+ confidence=0.8,
188
+ )
189
+
190
+ # Priority 5: Clean name - no normalization needed
191
+ return NormalizationResult(
192
+ normalized_name=self._basic_normalize(name),
193
+ extracted_patterns=[],
194
+ method="none",
195
+ confidence=0.9, # Clean name, high confidence
196
+ )
197
+
198
+ def _is_aws_resource_id(self, name: str, resource_type: str) -> bool:
199
+ """Check if name is entirely an AWS-generated resource ID.
200
+
201
+ These IDs (subnet-xxx, vpc-xxx, vol-xxx, etc.) are stable but
202
+ provide no semantic meaning without a Name tag.
203
+ """
204
+ # Map resource types to their ID patterns
205
+ aws_id_patterns = {
206
+ "AWS::EC2::Subnet": r"^subnet-[a-f0-9]+$",
207
+ "AWS::EC2::VPC": r"^vpc-[a-f0-9]+$",
208
+ "AWS::EC2::SecurityGroup": r"^sg-[a-f0-9]+$",
209
+ "AWS::EC2::Volume": r"^vol-[a-f0-9]+$",
210
+ "AWS::EC2::Instance": r"^i-[a-f0-9]+$",
211
+ "AWS::EC2::InternetGateway": r"^igw-[a-f0-9]+$",
212
+ "AWS::EC2::RouteTable": r"^rtb-[a-f0-9]+$",
213
+ "AWS::EC2::NetworkAcl": r"^acl-[a-f0-9]+$",
214
+ "AWS::EC2::NetworkInterface": r"^eni-[a-f0-9]+$",
215
+ "AWS::EC2::NatGateway": r"^nat-[a-f0-9]+$",
216
+ "AWS::EC2::EIP": r"^eipalloc-[a-f0-9]+$",
217
+ }
218
+
219
+ pattern = aws_id_patterns.get(resource_type)
220
+ if pattern:
221
+ # AWS resource IDs are always lowercase hex, no IGNORECASE needed
222
+ return bool(re.match(pattern, name))
223
+ return False
224
+
225
+ def _extract_patterns(self, name: str) -> Tuple[str, List[str]]:
226
+ """Extract auto-generated patterns from name.
227
+
228
+ Strips common patterns like CloudFormation suffixes, account IDs,
229
+ regions, timestamps, etc.
230
+
231
+ Returns:
232
+ Tuple of (cleaned_name, list_of_extracted_patterns)
233
+ """
234
+ extracted = []
235
+ result = name
236
+
237
+ # Patterns to extract (ordered by specificity)
238
+ extraction_patterns = [
239
+ # CloudFormation suffix (uppercase alphanumeric, 8-13 chars at end)
240
+ (r"-[A-Z0-9]{8,13}$", "cfn_suffix"),
241
+ # Bedrock/Kendra suffix (underscore + lowercase alphanumeric)
242
+ (r"_[a-z0-9]{4,6}$", "bedrock_suffix"),
243
+ # Account ID (12 digits, with optional surrounding hyphens)
244
+ (r"-?\d{12}-?", "account_id"),
245
+ # Region (e.g., us-east-1, eu-west-2)
246
+ (r"-?(us|eu|ap|sa|ca|me|af)-(east|west|north|south|central|northeast|southeast)-\d-?", "region"),
247
+ # Hex suffix (8+ lowercase hex chars at end)
248
+ (r"-[a-f0-9]{8,}$", "hex_suffix"),
249
+ # Timestamp suffix (8-14 digits at end)
250
+ (r"-\d{8,14}$", "timestamp"),
251
+ ]
252
+
253
+ for pattern, _pattern_name in extraction_patterns:
254
+ # Note: Don't use IGNORECASE - CloudFormation suffixes are uppercase,
255
+ # and we want case-sensitive matching for accuracy
256
+ match = re.search(pattern, result)
257
+ if match:
258
+ extracted.append(match.group().strip("-"))
259
+ result = result[: match.start()] + result[match.end() :]
260
+
261
+ # Clean up trailing/leading separators
262
+ result = re.sub(r"^[-_]+|[-_]+$", "", result)
263
+ # Collapse multiple separators
264
+ result = re.sub(r"[-_]{2,}", "-", result)
265
+
266
+ return result, extracted
267
+
268
+ def _try_rules_based(self, resource: Dict[str, Any]) -> Optional[str]:
269
+ """Try to normalize using rules.
270
+
271
+ Priority:
272
+ 1. CloudFormation logical ID tag
273
+ 2. Name tag (if clean)
274
+ 3. Physical name (if clean)
275
+
276
+ Returns None if name appears to have random patterns.
277
+ """
278
+ tags = resource.get("tags", {}) or {}
279
+ name = resource.get("name", "")
280
+
281
+ # 1. CloudFormation logical ID is the best canonical identifier
282
+ logical_id = tags.get("aws:cloudformation:logical-id")
283
+ if logical_id:
284
+ return self._basic_normalize(logical_id)
285
+
286
+ # 2. Name tag (if it looks clean)
287
+ name_tag = tags.get("Name")
288
+ if name_tag and not self._has_random_patterns(name_tag):
289
+ return self._basic_normalize(name_tag)
290
+
291
+ # 3. Physical name (if it looks clean)
292
+ if name and not self._has_random_patterns(name):
293
+ return self._basic_normalize(name)
294
+
295
+ # Name has random patterns - needs AI
296
+ return None
297
+
298
+ def _has_random_patterns(self, name: str) -> bool:
299
+ """Check if name contains random-looking patterns."""
300
+ if not name:
301
+ return True
302
+
303
+ for pattern in self._random_patterns:
304
+ if pattern.search(name):
305
+ return True
306
+ return False
307
+
308
+ def _basic_normalize(self, name: str) -> str:
309
+ """Basic string normalization without AI.
310
+
311
+ - Lowercase
312
+ - Replace underscores/spaces with hyphens
313
+ - Strip leading/trailing hyphens
314
+ """
315
+ if not name:
316
+ return ""
317
+
318
+ result = name.lower()
319
+ result = re.sub(r"[_\s]+", "-", result)
320
+ result = re.sub(r"-+", "-", result)
321
+ return result.strip("-")
322
+
323
+ def _normalize_with_ai(
324
+ self,
325
+ resources: List[Dict[str, Any]],
326
+ ) -> Dict[str, str]:
327
+ """Normalize resources using AI.
328
+
329
+ Batches resources and calls the AI API.
330
+
331
+ Args:
332
+ resources: Resources that need AI normalization
333
+
334
+ Returns:
335
+ Dict mapping ARN -> normalized_name
336
+ """
337
+ results: Dict[str, str] = {}
338
+
339
+ # Process in batches
340
+ for i in range(0, len(resources), self.config.max_batch_size):
341
+ batch = resources[i : i + self.config.max_batch_size]
342
+ batch_results = self._process_ai_batch(batch)
343
+ results.update(batch_results)
344
+
345
+ return results
346
+
347
+ def _process_ai_batch(
348
+ self,
349
+ resources: List[Dict[str, Any]],
350
+ ) -> Dict[str, str]:
351
+ """Process a single batch through the AI.
352
+
353
+ Args:
354
+ resources: Batch of resources
355
+
356
+ Returns:
357
+ Dict mapping ARN -> normalized_name
358
+ """
359
+ # Build the user prompt with resource details
360
+ resource_list = []
361
+ for r in resources:
362
+ item = {
363
+ "arn": r.get("arn", ""),
364
+ "name": r.get("name", ""),
365
+ "type": r.get("resource_type", ""),
366
+ }
367
+ # Include Name tag if present
368
+ tags = r.get("tags", {}) or {}
369
+ if tags.get("Name"):
370
+ item["name_tag"] = tags["Name"]
371
+ resource_list.append(item)
372
+
373
+ user_prompt = json.dumps({"resources": resource_list}, indent=2)
374
+
375
+ # Call the AI with retries
376
+ for attempt in range(self.config.max_retries):
377
+ try:
378
+ response = self.client.chat.completions.create(
379
+ model=self.config.model,
380
+ messages=[
381
+ {"role": "system", "content": NORMALIZATION_SYSTEM_PROMPT},
382
+ {"role": "user", "content": user_prompt},
383
+ ],
384
+ temperature=0.1, # Low for consistency
385
+ )
386
+
387
+ # Track token usage
388
+ if hasattr(response, "usage") and response.usage:
389
+ self._total_tokens += response.usage.total_tokens
390
+
391
+ # Parse response
392
+ content = response.choices[0].message.content
393
+ return self._parse_ai_response(content, resources)
394
+
395
+ except Exception as e:
396
+ wait_time = 2**attempt
397
+ logger.warning(
398
+ f"AI normalization attempt {attempt + 1} failed: {e}. "
399
+ f"Retrying in {wait_time}s..."
400
+ )
401
+ if attempt < self.config.max_retries - 1:
402
+ time.sleep(wait_time)
403
+
404
+ # All retries failed - use fallback
405
+ logger.error("AI normalization failed after all retries")
406
+ return {
407
+ r.get("arn", ""): self._basic_normalize(r.get("name", ""))
408
+ for r in resources
409
+ }
410
+
411
+ def _parse_ai_response(
412
+ self,
413
+ content: str,
414
+ resources: List[Dict[str, Any]],
415
+ ) -> Dict[str, str]:
416
+ """Parse AI response into ARN -> normalized_name mapping.
417
+
418
+ Args:
419
+ content: AI response content (JSON string)
420
+ resources: Original resources (for fallback)
421
+
422
+ Returns:
423
+ Dict mapping ARN -> normalized_name
424
+ """
425
+ try:
426
+ data = json.loads(content)
427
+ normalizations = data.get("normalizations", [])
428
+
429
+ results = {}
430
+ for norm in normalizations:
431
+ arn = norm.get("arn", "")
432
+ normalized_name = norm.get("normalized_name", "")
433
+ if arn and normalized_name:
434
+ results[arn] = normalized_name
435
+
436
+ # Fallback for any missing
437
+ for r in resources:
438
+ arn = r.get("arn", "")
439
+ if arn and arn not in results:
440
+ results[arn] = self._basic_normalize(r.get("name", ""))
441
+
442
+ return results
443
+
444
+ except json.JSONDecodeError as e:
445
+ logger.error(f"Failed to parse AI response as JSON: {e}")
446
+ logger.debug(f"Response content: {content[:500]}...")
447
+ return {
448
+ r.get("arn", ""): self._basic_normalize(r.get("name", ""))
449
+ for r in resources
450
+ }
@@ -0,0 +1,33 @@
1
+ """AI prompts for resource name normalization."""
2
+
3
+ NORMALIZATION_SYSTEM_PROMPT = """You are an AWS resource name normalizer for cross-account infrastructure matching.
4
+
5
+ Your job is to extract the "logical identity" from AWS resource names by removing:
6
+ - Random suffixes (hex, alphanumeric, CloudFormation-generated)
7
+ - AWS account IDs (12-digit numbers)
8
+ - Region names (us-east-1, eu-west-2, etc.)
9
+ - Stack name prefixes (MyStack-, Stack-, etc.)
10
+ - AWS resource ID prefixes (subnet-, vpc-, vol-, i-, sg-, etc.)
11
+ - Timestamps and dates embedded in names
12
+
13
+ Keep the meaningful, purpose-identifying parts of the name.
14
+
15
+ Rules:
16
+ 1. Output should be lowercase with hyphens (no underscores, no spaces)
17
+ 2. If the name is already clean and meaningful, return it as-is (lowercase)
18
+ 3. Preserve the semantic meaning - "policy-executor" not just "executor"
19
+ 4. For AWS-generated IDs (subnet-xxx, vpc-xxx), use the Name tag if provided
20
+ 5. Strip common AWS service prefixes that don't add meaning
21
+
22
+ Examples:
23
+ - "cloud-custodian-480738299408-policy-executor-abc123" → "cloud-custodian-policy-executor"
24
+ - "AmazonBedrockExecutionRoleForKnowledgeBase_jnwn1" → "bedrock-knowledge-base-execution-role"
25
+ - "MyStack-ProcessorLambda-XYZ789ABC" → "processor-lambda"
26
+ - "daybreak-transcribe-processor" → "daybreak-transcribe-processor" (already clean)
27
+ - "AWSServiceRoleForOrganizations" → "aws-service-role-organizations"
28
+ - "d-9067239ebb_controllers" → "directory-controllers"
29
+ - Resource with name "subnet-abc123def" and Name tag "Private-Subnet-AZ1" → "private-subnet-az1"
30
+
31
+ Respond ONLY with valid JSON in this exact format:
32
+ {"normalizations": [{"arn": "arn:aws:...", "normalized_name": "..."}]}
33
+ """
src/models/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """Data models for AWS Baseline Snapshot tool."""
2
+
3
+ from .cost_report import CostBreakdown, CostReport
4
+ from .delta_report import DeltaReport, ResourceChange
5
+ from .group import GroupMember, ResourceGroup, extract_resource_name
6
+ from .inventory import Inventory
7
+ from .resource import Resource
8
+ from .snapshot import Snapshot
9
+
10
+ __all__ = [
11
+ "Snapshot",
12
+ "Resource",
13
+ "DeltaReport",
14
+ "ResourceChange",
15
+ "CostReport",
16
+ "CostBreakdown",
17
+ "Inventory",
18
+ "ResourceGroup",
19
+ "GroupMember",
20
+ "extract_resource_name",
21
+ ]
@@ -0,0 +1,135 @@
1
+ """Configuration diff model for representing field-level changes between snapshots."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import Any
8
+
9
+
10
+ class ChangeCategory(Enum):
11
+ """Categories of configuration changes."""
12
+
13
+ TAGS = "tags"
14
+ CONFIGURATION = "configuration"
15
+ SECURITY = "security"
16
+ PERMISSIONS = "permissions"
17
+
18
+
19
+ # Security-critical field patterns that should be flagged
20
+ SECURITY_CRITICAL_FIELDS = {
21
+ "PubliclyAccessible",
22
+ "public",
23
+ "encryption",
24
+ "kms",
25
+ "SecurityGroups",
26
+ "IpPermissions",
27
+ "IpPermissionsEgress",
28
+ "Policy",
29
+ "BucketPolicy",
30
+ "Acl",
31
+ "HttpTokens", # IMDSv2
32
+ "MetadataOptions",
33
+ }
34
+
35
+
36
+ @dataclass
37
+ class ConfigDiff:
38
+ """Represents a field-level configuration change between two resource snapshots.
39
+
40
+ Attributes:
41
+ resource_arn: AWS ARN of the resource that changed
42
+ field_path: Dot-notation path to the changed field (e.g., "Tags.Environment")
43
+ old_value: Previous value of the field (None if field was added)
44
+ new_value: New value of the field (None if field was removed)
45
+ category: Category of the change (tags/configuration/security/permissions)
46
+ """
47
+
48
+ resource_arn: str
49
+ field_path: str
50
+ old_value: Any
51
+ new_value: Any
52
+ category: ChangeCategory
53
+
54
+ def __post_init__(self) -> None:
55
+ """Validate ConfigDiff fields after initialization."""
56
+ # Validate ARN format (basic check)
57
+ if not self.resource_arn or not self.resource_arn.startswith("arn:"):
58
+ raise ValueError(f"Invalid ARN format: {self.resource_arn}")
59
+
60
+ # Validate field_path is not empty
61
+ if not self.field_path:
62
+ raise ValueError("field_path cannot be empty")
63
+
64
+ # Validate category is ChangeCategory enum
65
+ if not isinstance(self.category, ChangeCategory):
66
+ raise ValueError(f"Invalid category type: {type(self.category)}. Must be ChangeCategory enum.")
67
+
68
+ def with_path_prefix(self, prefix: str) -> ConfigDiff:
69
+ """Create a new ConfigDiff with a prefix added to the field path.
70
+
71
+ Args:
72
+ prefix: Prefix to add to the field path
73
+
74
+ Returns:
75
+ New ConfigDiff instance with prefixed field path
76
+ """
77
+ return ConfigDiff(
78
+ resource_arn=self.resource_arn,
79
+ field_path=f"{prefix}.{self.field_path}",
80
+ old_value=self.old_value,
81
+ new_value=self.new_value,
82
+ category=self.category,
83
+ )
84
+
85
+ def is_security_critical(self) -> bool:
86
+ """Check if this configuration change affects security-related settings.
87
+
88
+ Returns:
89
+ True if the change is security-critical, False otherwise
90
+ """
91
+ # Check if field path contains any security-critical keywords
92
+ field_lower = self.field_path.lower()
93
+ return any(keyword.lower() in field_lower for keyword in SECURITY_CRITICAL_FIELDS)
94
+
95
+ def to_dict(self) -> dict[str, Any]:
96
+ """Convert ConfigDiff to dictionary representation.
97
+
98
+ Returns:
99
+ Dictionary with all diff attributes
100
+ """
101
+ return {
102
+ "resource_arn": self.resource_arn,
103
+ "field_path": self.field_path,
104
+ "old_value": self.old_value,
105
+ "new_value": self.new_value,
106
+ "category": self.category.value,
107
+ "security_critical": self.is_security_critical(),
108
+ }
109
+
110
+ @classmethod
111
+ def from_dict(cls, data: dict[str, Any]) -> ConfigDiff:
112
+ """Create ConfigDiff from dictionary representation.
113
+
114
+ Args:
115
+ data: Dictionary with diff attributes
116
+
117
+ Returns:
118
+ ConfigDiff instance
119
+
120
+ Raises:
121
+ ValueError: If category value is invalid
122
+ """
123
+ category_str = data.get("category", "").lower()
124
+ try:
125
+ category = ChangeCategory(category_str)
126
+ except ValueError:
127
+ raise ValueError(f"Invalid category value: {category_str}")
128
+
129
+ return cls(
130
+ resource_arn=data["resource_arn"],
131
+ field_path=data["field_path"],
132
+ old_value=data["old_value"],
133
+ new_value=data["new_value"],
134
+ category=category,
135
+ )