aws-inventory-manager 0.13.2__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aws-inventory-manager might be problematic. Click here for more details.

@@ -0,0 +1,419 @@
1
+ """CloudTrail query for resource creation events."""
2
+
3
+ import json
4
+ import logging
5
+ from dataclasses import dataclass
6
+ from datetime import datetime, timedelta, timezone
7
+ from typing import Dict, List, Optional, Set
8
+
9
+ from ..aws.client import create_boto_client
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Map of CloudTrail event names to resource types
14
+ # This maps creation events to the resource types they create
15
+ EVENT_TO_RESOURCE_TYPE: Dict[str, str] = {
16
+ # EC2
17
+ "RunInstances": "AWS::EC2::Instance",
18
+ "CreateVolume": "AWS::EC2::Volume",
19
+ "CreateVpc": "AWS::EC2::VPC",
20
+ "CreateSubnet": "AWS::EC2::Subnet",
21
+ "CreateSecurityGroup": "AWS::EC2::SecurityGroup",
22
+ "CreateVpcEndpoint": "AWS::EC2::VPCEndpoint",
23
+ # Lambda
24
+ "CreateFunction20150331": "AWS::Lambda::Function",
25
+ "CreateFunction": "AWS::Lambda::Function",
26
+ # S3
27
+ "CreateBucket": "AWS::S3::Bucket",
28
+ # RDS
29
+ "CreateDBInstance": "AWS::RDS::DBInstance",
30
+ "CreateDBCluster": "AWS::RDS::DBCluster",
31
+ # DynamoDB
32
+ "CreateTable": "AWS::DynamoDB::Table",
33
+ # IAM
34
+ "CreateRole": "AWS::IAM::Role",
35
+ "CreateUser": "AWS::IAM::User",
36
+ "CreateGroup": "AWS::IAM::Group",
37
+ "CreatePolicy": "AWS::IAM::Policy",
38
+ # CloudWatch
39
+ "PutMetricAlarm": "AWS::CloudWatch::Alarm",
40
+ "CreateLogGroup": "AWS::Logs::LogGroup",
41
+ # SNS
42
+ "CreateTopic": "AWS::SNS::Topic",
43
+ # SQS
44
+ "CreateQueue": "AWS::SQS::Queue",
45
+ # ELB
46
+ "CreateLoadBalancer": "AWS::ElasticLoadBalancingV2::LoadBalancer",
47
+ # CloudFormation
48
+ "CreateStack": "AWS::CloudFormation::Stack",
49
+ # API Gateway
50
+ "CreateRestApi": "AWS::ApiGateway::RestApi",
51
+ "CreateApi": "AWS::ApiGatewayV2::Api",
52
+ # EventBridge
53
+ "CreateEventBus": "AWS::Events::EventBus",
54
+ "PutRule": "AWS::Events::Rule",
55
+ # Secrets Manager
56
+ "CreateSecret": "AWS::SecretsManager::Secret",
57
+ # KMS
58
+ "CreateKey": "AWS::KMS::Key",
59
+ # SSM
60
+ "PutParameter": "AWS::SSM::Parameter",
61
+ # Route53
62
+ "CreateHostedZone": "AWS::Route53::HostedZone",
63
+ # ECS
64
+ "CreateCluster": "AWS::ECS::Cluster",
65
+ "CreateService": "AWS::ECS::Service",
66
+ "RegisterTaskDefinition": "AWS::ECS::TaskDefinition",
67
+ # EKS
68
+ "CreateCluster": "AWS::EKS::Cluster",
69
+ "CreateNodegroup": "AWS::EKS::Nodegroup",
70
+ # Step Functions
71
+ "CreateStateMachine": "AWS::StepFunctions::StateMachine",
72
+ # WAF
73
+ "CreateWebACL": "AWS::WAFv2::WebACL",
74
+ # CodePipeline
75
+ "CreatePipeline": "AWS::CodePipeline::Pipeline",
76
+ # CodeBuild
77
+ "CreateProject": "AWS::CodeBuild::Project",
78
+ # Backup
79
+ "CreateBackupPlan": "AWS::Backup::BackupPlan",
80
+ "CreateBackupVault": "AWS::Backup::BackupVault",
81
+ # Glue
82
+ "CreateDatabase": "AWS::Glue::Database",
83
+ "CreateTable": "AWS::Glue::Table",
84
+ "CreateCrawler": "AWS::Glue::Crawler",
85
+ "CreateJob": "AWS::Glue::Job",
86
+ "CreateConnection": "AWS::Glue::Connection",
87
+ # EFS
88
+ "CreateFileSystem": "AWS::EFS::FileSystem",
89
+ # ElastiCache
90
+ "CreateCacheCluster": "AWS::ElastiCache::CacheCluster",
91
+ "CreateReplicationGroup": "AWS::ElastiCache::ReplicationGroup",
92
+ }
93
+
94
+
95
+ @dataclass
96
+ class ResourceCreationEvent:
97
+ """Represents a resource creation event from CloudTrail."""
98
+
99
+ event_time: datetime
100
+ event_name: str
101
+ resource_type: str
102
+ resource_name: Optional[str]
103
+ resource_arn: Optional[str]
104
+ created_by_arn: str
105
+ created_by_type: str # 'Role', 'User', 'AssumedRole'
106
+ region: str
107
+ account_id: str
108
+ raw_event: dict
109
+
110
+
111
+ class CloudTrailQuery:
112
+ """Query CloudTrail for resource creation events."""
113
+
114
+ def __init__(
115
+ self,
116
+ profile_name: Optional[str] = None,
117
+ regions: Optional[List[str]] = None,
118
+ ):
119
+ """Initialize CloudTrail query.
120
+
121
+ Args:
122
+ profile_name: AWS profile to use
123
+ regions: Regions to query (defaults to all regions with events)
124
+ """
125
+ self.profile_name = profile_name
126
+ self.regions = regions or ["us-east-1"] # CloudTrail events are regional
127
+
128
+ def get_resources_created_by_role(
129
+ self,
130
+ role_arn: str,
131
+ days_back: int = 90,
132
+ regions: Optional[List[str]] = None,
133
+ ) -> List[ResourceCreationEvent]:
134
+ """Get all resources created by a specific IAM role.
135
+
136
+ Args:
137
+ role_arn: Full ARN of the IAM role (or just role name)
138
+ days_back: How many days to look back (max 90 for standard CloudTrail)
139
+ regions: Regions to query
140
+
141
+ Returns:
142
+ List of ResourceCreationEvent objects
143
+ """
144
+ events = []
145
+ query_regions = regions or self.regions
146
+
147
+ # Normalize role ARN - extract role name for matching
148
+ if role_arn.startswith("arn:aws:iam::"):
149
+ # Full ARN like arn:aws:iam::123456789012:role/MyRole
150
+ role_name = role_arn.split("/")[-1]
151
+ elif "/" in role_arn:
152
+ # Path format like role/MyRole
153
+ role_name = role_arn.split("/")[-1]
154
+ else:
155
+ # Just the role name
156
+ role_name = role_arn
157
+
158
+ logger.info(f"Querying CloudTrail for resources created by role: {role_name}")
159
+
160
+ for region in query_regions:
161
+ try:
162
+ region_events = self._query_region(role_name, role_arn, days_back, region)
163
+ events.extend(region_events)
164
+ logger.debug(f"Found {len(region_events)} creation events in {region}")
165
+ except Exception as e:
166
+ logger.warning(f"Error querying CloudTrail in {region}: {e}")
167
+
168
+ logger.info(f"Total creation events found: {len(events)}")
169
+ return events
170
+
171
+ def _query_region(
172
+ self,
173
+ role_name: str,
174
+ role_arn: str,
175
+ days_back: int,
176
+ region: str,
177
+ ) -> List[ResourceCreationEvent]:
178
+ """Query CloudTrail in a specific region."""
179
+ client = create_boto_client(
180
+ service_name="cloudtrail",
181
+ region_name=region,
182
+ profile_name=self.profile_name,
183
+ )
184
+
185
+ events = []
186
+ start_time = datetime.now(timezone.utc) - timedelta(days=days_back)
187
+ end_time = datetime.now(timezone.utc)
188
+
189
+ # Query by username (role session name includes role)
190
+ # CloudTrail stores assumed role sessions as "role/session-name"
191
+ paginator = client.get_paginator("lookup_events")
192
+
193
+ try:
194
+ # First try looking up by the role ARN pattern
195
+ for page in paginator.paginate(
196
+ StartTime=start_time,
197
+ EndTime=end_time,
198
+ MaxResults=50, # CloudTrail max per page
199
+ ):
200
+ for event in page.get("Events", []):
201
+ parsed = self._parse_event(event, role_name, role_arn, region)
202
+ if parsed:
203
+ events.append(parsed)
204
+
205
+ except Exception as e:
206
+ logger.error(f"Error querying CloudTrail: {e}")
207
+ raise
208
+
209
+ return events
210
+
211
+ def _parse_event(
212
+ self,
213
+ event: dict,
214
+ role_name: str,
215
+ role_arn: str,
216
+ region: str,
217
+ ) -> Optional[ResourceCreationEvent]:
218
+ """Parse a CloudTrail event and check if it matches our criteria."""
219
+ try:
220
+ cloud_trail_event = json.loads(event.get("CloudTrailEvent", "{}"))
221
+
222
+ event_name = cloud_trail_event.get("eventName", "")
223
+
224
+ # Check if this is a creation event we care about
225
+ if event_name not in EVENT_TO_RESOURCE_TYPE:
226
+ return None
227
+
228
+ # Check if the identity matches our role
229
+ user_identity = cloud_trail_event.get("userIdentity", {})
230
+ identity_type = user_identity.get("type", "")
231
+
232
+ # Match by role ARN or role name
233
+ matches_role = False
234
+ created_by_arn = ""
235
+
236
+ if identity_type == "AssumedRole":
237
+ # For assumed roles, check the role ARN
238
+ session_context = user_identity.get("sessionContext", {})
239
+ session_issuer = session_context.get("sessionIssuer", {})
240
+ arn = session_issuer.get("arn", "")
241
+ created_by_arn = arn
242
+
243
+ if role_arn and arn == role_arn:
244
+ matches_role = True
245
+ elif role_name and role_name in arn:
246
+ matches_role = True
247
+
248
+ elif identity_type == "Role":
249
+ arn = user_identity.get("arn", "")
250
+ created_by_arn = arn
251
+
252
+ if role_arn and arn == role_arn:
253
+ matches_role = True
254
+ elif role_name and role_name in arn:
255
+ matches_role = True
256
+
257
+ if not matches_role:
258
+ return None
259
+
260
+ # Extract resource information
261
+ resource_type = EVENT_TO_RESOURCE_TYPE[event_name]
262
+ resource_name, resource_arn_extracted = self._extract_resource_info(
263
+ cloud_trail_event, event_name
264
+ )
265
+
266
+ # Get account ID
267
+ account_id = cloud_trail_event.get("recipientAccountId", "")
268
+ if not account_id:
269
+ account_id = user_identity.get("accountId", "")
270
+
271
+ return ResourceCreationEvent(
272
+ event_time=event.get("EventTime", datetime.now(timezone.utc)),
273
+ event_name=event_name,
274
+ resource_type=resource_type,
275
+ resource_name=resource_name,
276
+ resource_arn=resource_arn_extracted,
277
+ created_by_arn=created_by_arn,
278
+ created_by_type=identity_type,
279
+ region=cloud_trail_event.get("awsRegion", region),
280
+ account_id=account_id,
281
+ raw_event=cloud_trail_event,
282
+ )
283
+
284
+ except Exception as e:
285
+ logger.debug(f"Error parsing CloudTrail event: {e}")
286
+ return None
287
+
288
+ def _extract_resource_info(
289
+ self, event: dict, event_name: str
290
+ ) -> tuple[Optional[str], Optional[str]]:
291
+ """Extract resource name and ARN from CloudTrail event.
292
+
293
+ Returns:
294
+ Tuple of (resource_name, resource_arn)
295
+ """
296
+ request_params = event.get("requestParameters", {}) or {}
297
+ response_elements = event.get("responseElements", {}) or {}
298
+
299
+ resource_name = None
300
+ resource_arn = None
301
+
302
+ # Try common patterns for resource names
303
+ name_keys = [
304
+ "name",
305
+ "bucketName",
306
+ "functionName",
307
+ "tableName",
308
+ "roleName",
309
+ "userName",
310
+ "groupName",
311
+ "policyName",
312
+ "topicName",
313
+ "queueName",
314
+ "stackName",
315
+ "clusterName",
316
+ "serviceName",
317
+ "stateMachineName",
318
+ "projectName",
319
+ "pipelineName",
320
+ "dBInstanceIdentifier",
321
+ "dBClusterIdentifier",
322
+ "hostedZoneName",
323
+ "fileSystemId",
324
+ "cacheClusterId",
325
+ "replicationGroupId",
326
+ "webACLName",
327
+ "eventBusName",
328
+ "ruleName",
329
+ "secretId",
330
+ "parameterName",
331
+ "databaseName",
332
+ "crawlerName",
333
+ "jobName",
334
+ "connectionName",
335
+ ]
336
+
337
+ for key in name_keys:
338
+ if key in request_params:
339
+ resource_name = request_params[key]
340
+ break
341
+
342
+ # Try to extract ARN from response
343
+ arn_keys = [
344
+ "functionArn",
345
+ "roleArn",
346
+ "topicArn",
347
+ "queueUrl", # SQS uses URL
348
+ "stackId",
349
+ "arn",
350
+ "clusterArn",
351
+ "serviceArn",
352
+ "stateMachineArn",
353
+ "webACLArn",
354
+ ]
355
+
356
+ for key in arn_keys:
357
+ if response_elements and key in response_elements:
358
+ resource_arn = response_elements[key]
359
+ break
360
+
361
+ # For EC2 instances, extract from response
362
+ if event_name == "RunInstances" and response_elements:
363
+ instances = response_elements.get("instancesSet", {}).get("items", [])
364
+ if instances:
365
+ resource_name = instances[0].get("instanceId")
366
+
367
+ return resource_name, resource_arn
368
+
369
+ def get_created_resource_arns(
370
+ self,
371
+ role_arn: str,
372
+ days_back: int = 90,
373
+ regions: Optional[List[str]] = None,
374
+ ) -> Set[str]:
375
+ """Get set of ARNs for resources created by a role.
376
+
377
+ Args:
378
+ role_arn: IAM role ARN or name
379
+ days_back: Days to look back
380
+ regions: Regions to query
381
+
382
+ Returns:
383
+ Set of resource ARNs
384
+ """
385
+ events = self.get_resources_created_by_role(role_arn, days_back, regions)
386
+
387
+ arns = set()
388
+ for event in events:
389
+ if event.resource_arn:
390
+ arns.add(event.resource_arn)
391
+
392
+ return arns
393
+
394
+ def get_created_resource_names(
395
+ self,
396
+ role_arn: str,
397
+ days_back: int = 90,
398
+ regions: Optional[List[str]] = None,
399
+ ) -> Dict[str, Set[str]]:
400
+ """Get resource names grouped by type for resources created by a role.
401
+
402
+ Args:
403
+ role_arn: IAM role ARN or name
404
+ days_back: Days to look back
405
+ regions: Regions to query
406
+
407
+ Returns:
408
+ Dict mapping resource_type to set of resource names
409
+ """
410
+ events = self.get_resources_created_by_role(role_arn, days_back, regions)
411
+
412
+ by_type: Dict[str, Set[str]] = {}
413
+ for event in events:
414
+ if event.resource_name:
415
+ if event.resource_type not in by_type:
416
+ by_type[event.resource_type] = set()
417
+ by_type[event.resource_type].add(event.resource_name)
418
+
419
+ return by_type
@@ -0,0 +1,6 @@
1
+ """Resource name normalization for intelligent matching."""
2
+
3
+ from .config import NormalizerConfig
4
+ from .normalizer import NormalizationResult, ResourceNormalizer
5
+
6
+ __all__ = ["ResourceNormalizer", "NormalizationResult", "NormalizerConfig"]
src/matching/config.py ADDED
@@ -0,0 +1,52 @@
1
+ """Configuration for resource name normalization."""
2
+
3
+ import os
4
+ from dataclasses import dataclass, field
5
+ from typing import List, Optional
6
+
7
+
8
+ @dataclass
9
+ class NormalizerConfig:
10
+ """Configuration for the resource normalizer."""
11
+
12
+ # OpenAI API configuration
13
+ api_key: Optional[str] = None
14
+ base_url: Optional[str] = None
15
+ model: str = "gpt-4o-mini"
16
+
17
+ # Batch settings
18
+ max_batch_size: int = 50
19
+ timeout_seconds: int = 60
20
+ max_retries: int = 3
21
+
22
+ # Patterns to detect "random" names that need AI normalization
23
+ random_patterns: List[str] = field(
24
+ default_factory=lambda: [
25
+ r"-[a-f0-9]{8,}$", # Hex suffix: -a1b2c3d4e5
26
+ r"-[A-Z0-9]{8,}$", # CloudFormation suffix: -ABCD1234XYZ
27
+ r"_[a-z0-9]{5,}$", # Underscore suffix (Bedrock): _jnwn1
28
+ r"-\d{10,}$", # Timestamp suffix: -1704067200
29
+ r"\d{12}", # Account ID anywhere: 123456789012
30
+ r"^(subnet|vpc|vol|sg|i|rtb|igw|nat|eni)-[a-f0-9]+$", # AWS resource IDs
31
+ ]
32
+ )
33
+
34
+ @classmethod
35
+ def from_env(cls) -> "NormalizerConfig":
36
+ """Load configuration from environment variables.
37
+
38
+ Environment variables:
39
+ OPENAI_API_KEY: API key for OpenAI-compatible endpoint
40
+ OPENAI_BASE_URL: Custom API endpoint URL
41
+ OPENAI_MODEL: Model name (default: gpt-4o-mini)
42
+ """
43
+ return cls(
44
+ api_key=os.getenv("OPENAI_API_KEY"),
45
+ base_url=os.getenv("OPENAI_BASE_URL"),
46
+ model=os.getenv("OPENAI_MODEL", "gpt-4o-mini"),
47
+ )
48
+
49
+ @property
50
+ def is_ai_enabled(self) -> bool:
51
+ """Check if AI normalization is available."""
52
+ return bool(self.api_key)