aws-inventory-manager 0.17.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (152) hide show
  1. aws_inventory_manager-0.17.12.dist-info/LICENSE +21 -0
  2. aws_inventory_manager-0.17.12.dist-info/METADATA +1292 -0
  3. aws_inventory_manager-0.17.12.dist-info/RECORD +152 -0
  4. aws_inventory_manager-0.17.12.dist-info/WHEEL +5 -0
  5. aws_inventory_manager-0.17.12.dist-info/entry_points.txt +2 -0
  6. aws_inventory_manager-0.17.12.dist-info/top_level.txt +1 -0
  7. src/__init__.py +3 -0
  8. src/aws/__init__.py +11 -0
  9. src/aws/client.py +128 -0
  10. src/aws/credentials.py +191 -0
  11. src/aws/rate_limiter.py +177 -0
  12. src/cli/__init__.py +12 -0
  13. src/cli/config.py +130 -0
  14. src/cli/main.py +4046 -0
  15. src/cloudtrail/__init__.py +5 -0
  16. src/cloudtrail/query.py +642 -0
  17. src/config_service/__init__.py +21 -0
  18. src/config_service/collector.py +346 -0
  19. src/config_service/detector.py +256 -0
  20. src/config_service/resource_type_mapping.py +328 -0
  21. src/cost/__init__.py +5 -0
  22. src/cost/analyzer.py +226 -0
  23. src/cost/explorer.py +209 -0
  24. src/cost/reporter.py +237 -0
  25. src/delta/__init__.py +5 -0
  26. src/delta/calculator.py +206 -0
  27. src/delta/differ.py +185 -0
  28. src/delta/formatters.py +272 -0
  29. src/delta/models.py +154 -0
  30. src/delta/reporter.py +234 -0
  31. src/matching/__init__.py +6 -0
  32. src/matching/config.py +52 -0
  33. src/matching/normalizer.py +450 -0
  34. src/matching/prompts.py +33 -0
  35. src/models/__init__.py +21 -0
  36. src/models/config_diff.py +135 -0
  37. src/models/cost_report.py +87 -0
  38. src/models/deletion_operation.py +104 -0
  39. src/models/deletion_record.py +97 -0
  40. src/models/delta_report.py +122 -0
  41. src/models/efs_resource.py +80 -0
  42. src/models/elasticache_resource.py +90 -0
  43. src/models/group.py +318 -0
  44. src/models/inventory.py +133 -0
  45. src/models/protection_rule.py +123 -0
  46. src/models/report.py +288 -0
  47. src/models/resource.py +111 -0
  48. src/models/security_finding.py +102 -0
  49. src/models/snapshot.py +122 -0
  50. src/restore/__init__.py +20 -0
  51. src/restore/audit.py +175 -0
  52. src/restore/cleaner.py +461 -0
  53. src/restore/config.py +209 -0
  54. src/restore/deleter.py +976 -0
  55. src/restore/dependency.py +254 -0
  56. src/restore/safety.py +115 -0
  57. src/security/__init__.py +0 -0
  58. src/security/checks/__init__.py +0 -0
  59. src/security/checks/base.py +56 -0
  60. src/security/checks/ec2_checks.py +88 -0
  61. src/security/checks/elasticache_checks.py +149 -0
  62. src/security/checks/iam_checks.py +102 -0
  63. src/security/checks/rds_checks.py +140 -0
  64. src/security/checks/s3_checks.py +95 -0
  65. src/security/checks/secrets_checks.py +96 -0
  66. src/security/checks/sg_checks.py +142 -0
  67. src/security/cis_mapper.py +97 -0
  68. src/security/models.py +53 -0
  69. src/security/reporter.py +174 -0
  70. src/security/scanner.py +87 -0
  71. src/snapshot/__init__.py +6 -0
  72. src/snapshot/capturer.py +453 -0
  73. src/snapshot/filter.py +259 -0
  74. src/snapshot/inventory_storage.py +236 -0
  75. src/snapshot/report_formatter.py +250 -0
  76. src/snapshot/reporter.py +189 -0
  77. src/snapshot/resource_collectors/__init__.py +5 -0
  78. src/snapshot/resource_collectors/apigateway.py +140 -0
  79. src/snapshot/resource_collectors/backup.py +136 -0
  80. src/snapshot/resource_collectors/base.py +81 -0
  81. src/snapshot/resource_collectors/cloudformation.py +55 -0
  82. src/snapshot/resource_collectors/cloudwatch.py +109 -0
  83. src/snapshot/resource_collectors/codebuild.py +69 -0
  84. src/snapshot/resource_collectors/codepipeline.py +82 -0
  85. src/snapshot/resource_collectors/dynamodb.py +65 -0
  86. src/snapshot/resource_collectors/ec2.py +240 -0
  87. src/snapshot/resource_collectors/ecs.py +215 -0
  88. src/snapshot/resource_collectors/efs_collector.py +102 -0
  89. src/snapshot/resource_collectors/eks.py +200 -0
  90. src/snapshot/resource_collectors/elasticache_collector.py +79 -0
  91. src/snapshot/resource_collectors/elb.py +126 -0
  92. src/snapshot/resource_collectors/eventbridge.py +156 -0
  93. src/snapshot/resource_collectors/glue.py +199 -0
  94. src/snapshot/resource_collectors/iam.py +188 -0
  95. src/snapshot/resource_collectors/kms.py +111 -0
  96. src/snapshot/resource_collectors/lambda_func.py +139 -0
  97. src/snapshot/resource_collectors/rds.py +109 -0
  98. src/snapshot/resource_collectors/route53.py +86 -0
  99. src/snapshot/resource_collectors/s3.py +105 -0
  100. src/snapshot/resource_collectors/secretsmanager.py +70 -0
  101. src/snapshot/resource_collectors/sns.py +68 -0
  102. src/snapshot/resource_collectors/sqs.py +82 -0
  103. src/snapshot/resource_collectors/ssm.py +160 -0
  104. src/snapshot/resource_collectors/stepfunctions.py +74 -0
  105. src/snapshot/resource_collectors/vpcendpoints.py +79 -0
  106. src/snapshot/resource_collectors/waf.py +159 -0
  107. src/snapshot/storage.py +351 -0
  108. src/storage/__init__.py +21 -0
  109. src/storage/audit_store.py +419 -0
  110. src/storage/database.py +294 -0
  111. src/storage/group_store.py +763 -0
  112. src/storage/inventory_store.py +320 -0
  113. src/storage/resource_store.py +416 -0
  114. src/storage/schema.py +339 -0
  115. src/storage/snapshot_store.py +363 -0
  116. src/utils/__init__.py +12 -0
  117. src/utils/export.py +305 -0
  118. src/utils/hash.py +60 -0
  119. src/utils/logging.py +63 -0
  120. src/utils/pagination.py +41 -0
  121. src/utils/paths.py +51 -0
  122. src/utils/progress.py +41 -0
  123. src/utils/unsupported_resources.py +306 -0
  124. src/web/__init__.py +5 -0
  125. src/web/app.py +97 -0
  126. src/web/dependencies.py +69 -0
  127. src/web/routes/__init__.py +1 -0
  128. src/web/routes/api/__init__.py +18 -0
  129. src/web/routes/api/charts.py +156 -0
  130. src/web/routes/api/cleanup.py +186 -0
  131. src/web/routes/api/filters.py +253 -0
  132. src/web/routes/api/groups.py +305 -0
  133. src/web/routes/api/inventories.py +80 -0
  134. src/web/routes/api/queries.py +202 -0
  135. src/web/routes/api/resources.py +393 -0
  136. src/web/routes/api/snapshots.py +314 -0
  137. src/web/routes/api/views.py +260 -0
  138. src/web/routes/pages.py +198 -0
  139. src/web/services/__init__.py +1 -0
  140. src/web/templates/base.html +955 -0
  141. src/web/templates/components/navbar.html +31 -0
  142. src/web/templates/components/sidebar.html +104 -0
  143. src/web/templates/pages/audit_logs.html +86 -0
  144. src/web/templates/pages/cleanup.html +279 -0
  145. src/web/templates/pages/dashboard.html +227 -0
  146. src/web/templates/pages/diff.html +175 -0
  147. src/web/templates/pages/error.html +30 -0
  148. src/web/templates/pages/groups.html +721 -0
  149. src/web/templates/pages/queries.html +246 -0
  150. src/web/templates/pages/resources.html +2429 -0
  151. src/web/templates/pages/snapshot_detail.html +271 -0
  152. src/web/templates/pages/snapshots.html +429 -0
@@ -0,0 +1,5 @@
1
+ """CloudTrail query module for resource provenance tracking."""
2
+
3
+ from .query import CloudTrailQuery, ResourceCreationEvent
4
+
5
+ __all__ = ["CloudTrailQuery", "ResourceCreationEvent"]
@@ -0,0 +1,642 @@
1
+ """CloudTrail query for resource creation events."""
2
+
3
+ import json
4
+ import logging
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timedelta, timezone
8
+ from typing import Dict, List, Optional, Set
9
+
10
+ from ..aws.client import create_boto_client
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Map of CloudTrail event names to resource types
15
+ # This maps creation events to the resource types they create
16
+ EVENT_TO_RESOURCE_TYPE: Dict[str, str] = {
17
+ # EC2
18
+ "RunInstances": "AWS::EC2::Instance",
19
+ "CreateVolume": "AWS::EC2::Volume",
20
+ "CreateVpc": "AWS::EC2::VPC",
21
+ "CreateSubnet": "AWS::EC2::Subnet",
22
+ "CreateSecurityGroup": "AWS::EC2::SecurityGroup",
23
+ "CreateVpcEndpoint": "AWS::EC2::VPCEndpoint",
24
+ # Lambda
25
+ "CreateFunction20150331": "AWS::Lambda::Function",
26
+ "CreateFunction": "AWS::Lambda::Function",
27
+ # S3
28
+ "CreateBucket": "AWS::S3::Bucket",
29
+ # RDS
30
+ "CreateDBInstance": "AWS::RDS::DBInstance",
31
+ "CreateDBCluster": "AWS::RDS::DBCluster",
32
+ # DynamoDB
33
+ "CreateTable": "AWS::DynamoDB::Table",
34
+ # IAM
35
+ "CreateRole": "AWS::IAM::Role",
36
+ "CreateUser": "AWS::IAM::User",
37
+ "CreateGroup": "AWS::IAM::Group",
38
+ "CreatePolicy": "AWS::IAM::Policy",
39
+ # CloudWatch
40
+ "PutMetricAlarm": "AWS::CloudWatch::Alarm",
41
+ "CreateLogGroup": "AWS::Logs::LogGroup",
42
+ # SNS
43
+ "CreateTopic": "AWS::SNS::Topic",
44
+ # SQS
45
+ "CreateQueue": "AWS::SQS::Queue",
46
+ # ELB
47
+ "CreateLoadBalancer": "AWS::ElasticLoadBalancingV2::LoadBalancer",
48
+ # CloudFormation
49
+ "CreateStack": "AWS::CloudFormation::Stack",
50
+ # API Gateway
51
+ "CreateRestApi": "AWS::ApiGateway::RestApi",
52
+ "CreateApi": "AWS::ApiGatewayV2::Api",
53
+ # EventBridge
54
+ "CreateEventBus": "AWS::Events::EventBus",
55
+ "PutRule": "AWS::Events::Rule",
56
+ # Secrets Manager
57
+ "CreateSecret": "AWS::SecretsManager::Secret",
58
+ # KMS
59
+ "CreateKey": "AWS::KMS::Key",
60
+ # SSM
61
+ "PutParameter": "AWS::SSM::Parameter",
62
+ # Route53
63
+ "CreateHostedZone": "AWS::Route53::HostedZone",
64
+ # ECS
65
+ "CreateCluster": "AWS::ECS::Cluster",
66
+ "CreateService": "AWS::ECS::Service",
67
+ "RegisterTaskDefinition": "AWS::ECS::TaskDefinition",
68
+ # EKS
69
+ "CreateCluster": "AWS::EKS::Cluster",
70
+ "CreateNodegroup": "AWS::EKS::Nodegroup",
71
+ # Step Functions
72
+ "CreateStateMachine": "AWS::StepFunctions::StateMachine",
73
+ # WAF
74
+ "CreateWebACL": "AWS::WAFv2::WebACL",
75
+ # CodePipeline
76
+ "CreatePipeline": "AWS::CodePipeline::Pipeline",
77
+ # CodeBuild
78
+ "CreateProject": "AWS::CodeBuild::Project",
79
+ # Backup
80
+ "CreateBackupPlan": "AWS::Backup::BackupPlan",
81
+ "CreateBackupVault": "AWS::Backup::BackupVault",
82
+ # Glue
83
+ "CreateDatabase": "AWS::Glue::Database",
84
+ "CreateTable": "AWS::Glue::Table",
85
+ "CreateCrawler": "AWS::Glue::Crawler",
86
+ "CreateJob": "AWS::Glue::Job",
87
+ "CreateConnection": "AWS::Glue::Connection",
88
+ # EFS
89
+ "CreateFileSystem": "AWS::EFS::FileSystem",
90
+ # ElastiCache
91
+ "CreateCacheCluster": "AWS::ElastiCache::CacheCluster",
92
+ "CreateReplicationGroup": "AWS::ElastiCache::ReplicationGroup",
93
+ }
94
+
95
+
96
+ @dataclass
97
+ class ResourceCreationEvent:
98
+ """Represents a resource creation event from CloudTrail."""
99
+
100
+ event_time: datetime
101
+ event_name: str
102
+ resource_type: str
103
+ resource_name: Optional[str]
104
+ resource_arn: Optional[str]
105
+ created_by_arn: str
106
+ created_by_type: str # 'Role', 'User', 'AssumedRole'
107
+ region: str
108
+ account_id: str
109
+ raw_event: dict
110
+
111
+
112
+ class CloudTrailQuery:
113
+ """Query CloudTrail for resource creation events."""
114
+
115
+ def __init__(
116
+ self,
117
+ profile_name: Optional[str] = None,
118
+ regions: Optional[List[str]] = None,
119
+ ):
120
+ """Initialize CloudTrail query.
121
+
122
+ Args:
123
+ profile_name: AWS profile to use
124
+ regions: Regions to query (defaults to all regions with events)
125
+ """
126
+ self.profile_name = profile_name
127
+ self.regions = regions or ["us-east-1"] # CloudTrail events are regional
128
+
129
+ def get_resources_created_by_role(
130
+ self,
131
+ role_arn: str,
132
+ days_back: int = 90,
133
+ regions: Optional[List[str]] = None,
134
+ ) -> List[ResourceCreationEvent]:
135
+ """Get all resources created by a specific IAM role.
136
+
137
+ Args:
138
+ role_arn: Full ARN of the IAM role (or just role name)
139
+ days_back: How many days to look back (max 90 for standard CloudTrail)
140
+ regions: Regions to query
141
+
142
+ Returns:
143
+ List of ResourceCreationEvent objects
144
+ """
145
+ events = []
146
+ query_regions = regions or self.regions
147
+
148
+ # Normalize role ARN - extract role name for matching
149
+ if role_arn.startswith("arn:aws:iam::"):
150
+ # Full ARN like arn:aws:iam::123456789012:role/MyRole
151
+ role_name = role_arn.split("/")[-1]
152
+ elif "/" in role_arn:
153
+ # Path format like role/MyRole
154
+ role_name = role_arn.split("/")[-1]
155
+ else:
156
+ # Just the role name
157
+ role_name = role_arn
158
+
159
+ logger.info(f"Querying CloudTrail for resources created by role: {role_name}")
160
+
161
+ for region in query_regions:
162
+ try:
163
+ region_events = self._query_region(role_name, role_arn, days_back, region)
164
+ events.extend(region_events)
165
+ logger.debug(f"Found {len(region_events)} creation events in {region}")
166
+ except Exception as e:
167
+ logger.warning(f"Error querying CloudTrail in {region}: {e}")
168
+
169
+ logger.info(f"Total creation events found: {len(events)}")
170
+ return events
171
+
172
+ def _query_region(
173
+ self,
174
+ role_name: str,
175
+ role_arn: str,
176
+ days_back: int,
177
+ region: str,
178
+ ) -> List[ResourceCreationEvent]:
179
+ """Query CloudTrail in a specific region."""
180
+ client = create_boto_client(
181
+ service_name="cloudtrail",
182
+ region_name=region,
183
+ profile_name=self.profile_name,
184
+ )
185
+
186
+ events = []
187
+ start_time = datetime.now(timezone.utc) - timedelta(days=days_back)
188
+ end_time = datetime.now(timezone.utc)
189
+
190
+ # Query by username (role session name includes role)
191
+ # CloudTrail stores assumed role sessions as "role/session-name"
192
+ paginator = client.get_paginator("lookup_events")
193
+
194
+ try:
195
+ # First try looking up by the role ARN pattern
196
+ for page in paginator.paginate(
197
+ StartTime=start_time,
198
+ EndTime=end_time,
199
+ MaxResults=50, # CloudTrail max per page
200
+ ):
201
+ for event in page.get("Events", []):
202
+ parsed = self._parse_event(event, role_name, role_arn, region)
203
+ if parsed:
204
+ events.append(parsed)
205
+
206
+ except Exception as e:
207
+ logger.error(f"Error querying CloudTrail: {e}")
208
+ raise
209
+
210
+ return events
211
+
212
+ def _parse_event(
213
+ self,
214
+ event: dict,
215
+ role_name: str,
216
+ role_arn: str,
217
+ region: str,
218
+ ) -> Optional[ResourceCreationEvent]:
219
+ """Parse a CloudTrail event and check if it matches our criteria."""
220
+ try:
221
+ cloud_trail_event = json.loads(event.get("CloudTrailEvent", "{}"))
222
+
223
+ event_name = cloud_trail_event.get("eventName", "")
224
+
225
+ # Check if this is a creation event we care about
226
+ if event_name not in EVENT_TO_RESOURCE_TYPE:
227
+ return None
228
+
229
+ # Check if the identity matches our role
230
+ user_identity = cloud_trail_event.get("userIdentity", {})
231
+ identity_type = user_identity.get("type", "")
232
+
233
+ # Match by role ARN or role name
234
+ matches_role = False
235
+ created_by_arn = ""
236
+
237
+ if identity_type == "AssumedRole":
238
+ # For assumed roles, check the role ARN
239
+ session_context = user_identity.get("sessionContext", {})
240
+ session_issuer = session_context.get("sessionIssuer", {})
241
+ arn = session_issuer.get("arn", "")
242
+ created_by_arn = arn
243
+
244
+ if role_arn and arn == role_arn:
245
+ matches_role = True
246
+ elif role_name and role_name in arn:
247
+ matches_role = True
248
+
249
+ elif identity_type == "Role":
250
+ arn = user_identity.get("arn", "")
251
+ created_by_arn = arn
252
+
253
+ if role_arn and arn == role_arn:
254
+ matches_role = True
255
+ elif role_name and role_name in arn:
256
+ matches_role = True
257
+
258
+ if not matches_role:
259
+ return None
260
+
261
+ # Extract resource information
262
+ resource_type = EVENT_TO_RESOURCE_TYPE[event_name]
263
+ resource_name, resource_arn_extracted = self._extract_resource_info(
264
+ cloud_trail_event, event_name
265
+ )
266
+
267
+ # Get account ID
268
+ account_id = cloud_trail_event.get("recipientAccountId", "")
269
+ if not account_id:
270
+ account_id = user_identity.get("accountId", "")
271
+
272
+ return ResourceCreationEvent(
273
+ event_time=event.get("EventTime", datetime.now(timezone.utc)),
274
+ event_name=event_name,
275
+ resource_type=resource_type,
276
+ resource_name=resource_name,
277
+ resource_arn=resource_arn_extracted,
278
+ created_by_arn=created_by_arn,
279
+ created_by_type=identity_type,
280
+ region=cloud_trail_event.get("awsRegion", region),
281
+ account_id=account_id,
282
+ raw_event=cloud_trail_event,
283
+ )
284
+
285
+ except Exception as e:
286
+ logger.debug(f"Error parsing CloudTrail event: {e}")
287
+ return None
288
+
289
+ def _extract_resource_info(
290
+ self, event: dict, event_name: str
291
+ ) -> tuple[Optional[str], Optional[str]]:
292
+ """Extract resource name and ARN from CloudTrail event.
293
+
294
+ Returns:
295
+ Tuple of (resource_name, resource_arn)
296
+ """
297
+ request_params = event.get("requestParameters", {}) or {}
298
+ response_elements = event.get("responseElements", {}) or {}
299
+
300
+ resource_name = None
301
+ resource_arn = None
302
+
303
+ # Try common patterns for resource names
304
+ name_keys = [
305
+ "name",
306
+ "bucketName",
307
+ "functionName",
308
+ "tableName",
309
+ "roleName",
310
+ "userName",
311
+ "groupName",
312
+ "policyName",
313
+ "topicName",
314
+ "queueName",
315
+ "stackName",
316
+ "clusterName",
317
+ "serviceName",
318
+ "stateMachineName",
319
+ "projectName",
320
+ "pipelineName",
321
+ "dBInstanceIdentifier",
322
+ "dBClusterIdentifier",
323
+ "hostedZoneName",
324
+ "fileSystemId",
325
+ "cacheClusterId",
326
+ "replicationGroupId",
327
+ "webACLName",
328
+ "eventBusName",
329
+ "ruleName",
330
+ "secretId",
331
+ "parameterName",
332
+ "databaseName",
333
+ "crawlerName",
334
+ "jobName",
335
+ "connectionName",
336
+ ]
337
+
338
+ for key in name_keys:
339
+ if key in request_params:
340
+ resource_name = request_params[key]
341
+ break
342
+
343
+ # Try to extract ARN from response
344
+ arn_keys = [
345
+ "functionArn",
346
+ "roleArn",
347
+ "topicArn",
348
+ "queueUrl", # SQS uses URL
349
+ "stackId",
350
+ "arn",
351
+ "clusterArn",
352
+ "serviceArn",
353
+ "stateMachineArn",
354
+ "webACLArn",
355
+ ]
356
+
357
+ for key in arn_keys:
358
+ if response_elements and key in response_elements:
359
+ resource_arn = response_elements[key]
360
+ break
361
+
362
+ # For EC2 instances, extract from response
363
+ if event_name == "RunInstances" and response_elements:
364
+ instances = response_elements.get("instancesSet", {}).get("items", [])
365
+ if instances:
366
+ resource_name = instances[0].get("instanceId")
367
+
368
+ return resource_name, resource_arn
369
+
370
+ def get_created_resource_arns(
371
+ self,
372
+ role_arn: str,
373
+ days_back: int = 90,
374
+ regions: Optional[List[str]] = None,
375
+ ) -> Set[str]:
376
+ """Get set of ARNs for resources created by a role.
377
+
378
+ Args:
379
+ role_arn: IAM role ARN or name
380
+ days_back: Days to look back
381
+ regions: Regions to query
382
+
383
+ Returns:
384
+ Set of resource ARNs
385
+ """
386
+ events = self.get_resources_created_by_role(role_arn, days_back, regions)
387
+
388
+ arns = set()
389
+ for event in events:
390
+ if event.resource_arn:
391
+ arns.add(event.resource_arn)
392
+
393
+ return arns
394
+
395
+ def get_created_resource_names(
396
+ self,
397
+ role_arn: str,
398
+ days_back: int = 90,
399
+ regions: Optional[List[str]] = None,
400
+ ) -> Dict[str, Set[str]]:
401
+ """Get resource names grouped by type for resources created by a role.
402
+
403
+ Args:
404
+ role_arn: IAM role ARN or name
405
+ days_back: Days to look back
406
+ regions: Regions to query
407
+
408
+ Returns:
409
+ Dict mapping resource_type to set of resource names
410
+ """
411
+ events = self.get_resources_created_by_role(role_arn, days_back, regions)
412
+
413
+ by_type: Dict[str, Set[str]] = {}
414
+ for event in events:
415
+ if event.resource_name:
416
+ if event.resource_type not in by_type:
417
+ by_type[event.resource_type] = set()
418
+ by_type[event.resource_type].add(event.resource_name)
419
+
420
+ return by_type
421
+
422
+ def get_all_creation_events(
423
+ self,
424
+ days_back: int = 90,
425
+ regions: Optional[List[str]] = None,
426
+ progress_callback: Optional[callable] = None,
427
+ ) -> List[ResourceCreationEvent]:
428
+ """Get all resource creation events from CloudTrail.
429
+
430
+ Args:
431
+ days_back: How many days to look back (max 90 for standard CloudTrail)
432
+ regions: Regions to query
433
+ progress_callback: Optional callback(event_name, events_found) for progress updates
434
+
435
+ Returns:
436
+ List of ResourceCreationEvent objects
437
+ """
438
+ events = []
439
+ query_regions = regions or self.regions
440
+
441
+ logger.info(f"Querying CloudTrail for all creation events (last {days_back} days)")
442
+
443
+ # Get unique event names to query
444
+ event_names = list(EVENT_TO_RESOURCE_TYPE.keys())
445
+ total_queries = len(event_names) * len(query_regions)
446
+
447
+ for region in query_regions:
448
+ try:
449
+ region_events = self._query_all_creation_events_fast(
450
+ days_back, region, progress_callback
451
+ )
452
+ events.extend(region_events)
453
+ logger.debug(f"Found {len(region_events)} creation events in {region}")
454
+ except Exception as e:
455
+ logger.warning(f"Error querying CloudTrail in {region}: {e}")
456
+
457
+ logger.info(f"Total creation events found: {len(events)}")
458
+ return events
459
+
460
+ def _query_single_event_type(
461
+ self,
462
+ client,
463
+ event_name: str,
464
+ start_time: datetime,
465
+ end_time: datetime,
466
+ region: str,
467
+ ) -> List[ResourceCreationEvent]:
468
+ """Query CloudTrail for a single event type."""
469
+ events = []
470
+ try:
471
+ paginator = client.get_paginator("lookup_events")
472
+ for page in paginator.paginate(
473
+ LookupAttributes=[
474
+ {"AttributeKey": "EventName", "AttributeValue": event_name}
475
+ ],
476
+ StartTime=start_time,
477
+ EndTime=end_time,
478
+ MaxResults=50,
479
+ ):
480
+ for event in page.get("Events", []):
481
+ parsed = self._parse_creation_event(event, region)
482
+ if parsed:
483
+ events.append(parsed)
484
+ except Exception as e:
485
+ logger.debug(f"Error querying {event_name}: {e}")
486
+ return events
487
+
488
+ def _query_all_creation_events_fast(
489
+ self,
490
+ days_back: int,
491
+ region: str,
492
+ progress_callback: Optional[callable] = None,
493
+ ) -> List[ResourceCreationEvent]:
494
+ """Query CloudTrail for all creation events using parallel queries by event name."""
495
+ client = create_boto_client(
496
+ service_name="cloudtrail",
497
+ region_name=region,
498
+ profile_name=self.profile_name,
499
+ )
500
+
501
+ events = []
502
+ start_time = datetime.now(timezone.utc) - timedelta(days=days_back)
503
+ end_time = datetime.now(timezone.utc)
504
+
505
+ event_names = list(EVENT_TO_RESOURCE_TYPE.keys())
506
+
507
+ # Use ThreadPoolExecutor for parallel queries
508
+ with ThreadPoolExecutor(max_workers=10) as executor:
509
+ futures = {
510
+ executor.submit(
511
+ self._query_single_event_type,
512
+ client,
513
+ event_name,
514
+ start_time,
515
+ end_time,
516
+ region,
517
+ ): event_name
518
+ for event_name in event_names
519
+ }
520
+
521
+ for future in as_completed(futures):
522
+ event_name = futures[future]
523
+ try:
524
+ result = future.result()
525
+ events.extend(result)
526
+ if progress_callback:
527
+ progress_callback(event_name, len(result))
528
+ except Exception as e:
529
+ logger.debug(f"Error querying {event_name}: {e}")
530
+
531
+ return events
532
+
533
+ def _query_all_creation_events(
534
+ self,
535
+ days_back: int,
536
+ region: str,
537
+ ) -> List[ResourceCreationEvent]:
538
+ """Query CloudTrail for all creation events in a specific region (legacy method)."""
539
+ return self._query_all_creation_events_fast(days_back, region)
540
+
541
+ def _parse_creation_event(
542
+ self,
543
+ event: dict,
544
+ region: str,
545
+ ) -> Optional[ResourceCreationEvent]:
546
+ """Parse a CloudTrail event for any creation event."""
547
+ try:
548
+ cloud_trail_event = json.loads(event.get("CloudTrailEvent", "{}"))
549
+
550
+ event_name = cloud_trail_event.get("eventName", "")
551
+
552
+ # Check if this is a creation event we care about
553
+ if event_name not in EVENT_TO_RESOURCE_TYPE:
554
+ return None
555
+
556
+ # Extract creator identity
557
+ user_identity = cloud_trail_event.get("userIdentity", {})
558
+ identity_type = user_identity.get("type", "")
559
+ created_by_arn = ""
560
+
561
+ if identity_type == "AssumedRole":
562
+ session_context = user_identity.get("sessionContext", {})
563
+ session_issuer = session_context.get("sessionIssuer", {})
564
+ created_by_arn = session_issuer.get("arn", "")
565
+ elif identity_type == "Role":
566
+ created_by_arn = user_identity.get("arn", "")
567
+ elif identity_type == "IAMUser":
568
+ created_by_arn = user_identity.get("arn", "")
569
+ elif identity_type == "Root":
570
+ created_by_arn = "root"
571
+ elif identity_type == "AWSService":
572
+ invoking_service = user_identity.get("invokedBy", "")
573
+ created_by_arn = f"service:{invoking_service}"
574
+
575
+ # Extract resource information
576
+ resource_type = EVENT_TO_RESOURCE_TYPE[event_name]
577
+ resource_name, resource_arn_extracted = self._extract_resource_info(
578
+ cloud_trail_event, event_name
579
+ )
580
+
581
+ # Get account ID
582
+ account_id = cloud_trail_event.get("recipientAccountId", "")
583
+ if not account_id:
584
+ account_id = user_identity.get("accountId", "")
585
+
586
+ return ResourceCreationEvent(
587
+ event_time=event.get("EventTime", datetime.now(timezone.utc)),
588
+ event_name=event_name,
589
+ resource_type=resource_type,
590
+ resource_name=resource_name,
591
+ resource_arn=resource_arn_extracted,
592
+ created_by_arn=created_by_arn,
593
+ created_by_type=identity_type,
594
+ region=cloud_trail_event.get("awsRegion", region),
595
+ account_id=account_id,
596
+ raw_event=cloud_trail_event,
597
+ )
598
+
599
+ except Exception as e:
600
+ logger.debug(f"Error parsing CloudTrail event: {e}")
601
+ return None
602
+
603
+ def get_resource_creators(
604
+ self,
605
+ days_back: int = 90,
606
+ regions: Optional[List[str]] = None,
607
+ progress_callback: Optional[callable] = None,
608
+ ) -> Dict[str, Dict[str, str]]:
609
+ """Build a mapping of resources to their creators.
610
+
611
+ Args:
612
+ days_back: Days to look back
613
+ regions: Regions to query
614
+ progress_callback: Optional callback(event_name, events_found) for progress updates
615
+
616
+ Returns:
617
+ Dict mapping (resource_type, resource_name) key to creator info:
618
+ {
619
+ "AWS::S3::Bucket:my-bucket": {
620
+ "created_by": "arn:aws:iam::123:role/MyRole",
621
+ "created_by_type": "AssumedRole",
622
+ "created_at": "2024-01-15T10:30:00Z"
623
+ }
624
+ }
625
+ """
626
+ events = self.get_all_creation_events(days_back, regions, progress_callback)
627
+
628
+ creators: Dict[str, Dict[str, str]] = {}
629
+ for event in events:
630
+ if event.resource_name:
631
+ key = f"{event.resource_type}:{event.resource_name}"
632
+ # Keep the most recent creation event for each resource
633
+ if key not in creators or event.event_time > datetime.fromisoformat(
634
+ creators[key]["created_at"].replace("Z", "+00:00")
635
+ ):
636
+ creators[key] = {
637
+ "created_by": event.created_by_arn,
638
+ "created_by_type": event.created_by_type,
639
+ "created_at": event.event_time.isoformat(),
640
+ }
641
+
642
+ return creators
@@ -0,0 +1,21 @@
1
+ """AWS Config service integration for resource collection.
2
+
3
+ This module provides Config-first resource collection with fallback to direct API.
4
+ """
5
+
6
+ from .detector import ConfigAvailability, detect_config_availability
7
+ from .collector import ConfigResourceCollector
8
+ from .resource_type_mapping import (
9
+ CONFIG_SUPPORTED_TYPES,
10
+ DIRECT_API_ONLY_TYPES,
11
+ is_config_supported_type,
12
+ )
13
+
14
+ __all__ = [
15
+ "ConfigAvailability",
16
+ "detect_config_availability",
17
+ "ConfigResourceCollector",
18
+ "CONFIG_SUPPORTED_TYPES",
19
+ "DIRECT_API_ONLY_TYPES",
20
+ "is_config_supported_type",
21
+ ]