cartography 0.109.0rc1__py3-none-any.whl → 0.110.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (58) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +22 -0
  3. cartography/config.py +13 -0
  4. cartography/data/indexes.cypher +0 -15
  5. cartography/intel/aws/cloudtrail_management_events.py +21 -0
  6. cartography/intel/aws/eventbridge.py +91 -0
  7. cartography/intel/aws/glue.py +117 -0
  8. cartography/intel/aws/identitycenter.py +71 -23
  9. cartography/intel/aws/kms.py +160 -200
  10. cartography/intel/aws/lambda_function.py +206 -190
  11. cartography/intel/aws/rds.py +243 -458
  12. cartography/intel/aws/resources.py +4 -0
  13. cartography/intel/aws/route53.py +334 -332
  14. cartography/intel/entra/__init__.py +43 -41
  15. cartography/intel/entra/applications.py +1 -2
  16. cartography/intel/entra/ou.py +1 -1
  17. cartography/intel/entra/resources.py +20 -0
  18. cartography/intel/trivy/__init__.py +73 -13
  19. cartography/intel/trivy/scanner.py +115 -92
  20. cartography/models/aws/eventbridge/__init__.py +0 -0
  21. cartography/models/aws/eventbridge/rule.py +77 -0
  22. cartography/models/aws/glue/__init__.py +0 -0
  23. cartography/models/aws/glue/connection.py +51 -0
  24. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  25. cartography/models/aws/kms/__init__.py +0 -0
  26. cartography/models/aws/kms/aliases.py +86 -0
  27. cartography/models/aws/kms/grants.py +65 -0
  28. cartography/models/aws/kms/keys.py +88 -0
  29. cartography/models/aws/lambda_function/__init__.py +0 -0
  30. cartography/models/aws/lambda_function/alias.py +74 -0
  31. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  32. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  33. cartography/models/aws/lambda_function/layer.py +72 -0
  34. cartography/models/aws/rds/__init__.py +0 -0
  35. cartography/models/aws/rds/cluster.py +89 -0
  36. cartography/models/aws/rds/instance.py +154 -0
  37. cartography/models/aws/rds/snapshot.py +108 -0
  38. cartography/models/aws/rds/subnet_group.py +101 -0
  39. cartography/models/aws/route53/__init__.py +0 -0
  40. cartography/models/aws/route53/dnsrecord.py +214 -0
  41. cartography/models/aws/route53/nameserver.py +63 -0
  42. cartography/models/aws/route53/subzone.py +40 -0
  43. cartography/models/aws/route53/zone.py +47 -0
  44. cartography/models/snipeit/asset.py +1 -0
  45. cartography/util.py +8 -1
  46. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/METADATA +2 -2
  47. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/RECORD +51 -32
  48. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  49. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  50. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  51. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  52. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  53. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  54. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  55. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/WHEEL +0 -0
  56. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/entry_points.txt +0 -0
  57. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/licenses/LICENSE +0 -0
  58. {cartography-0.109.0rc1.dist-info → cartography-0.110.0rc1.dist-info}/top_level.txt +0 -0
@@ -6,12 +6,17 @@ from typing import List
6
6
  import boto3
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.rds.cluster import RDSClusterSchema
12
+ from cartography.models.aws.rds.instance import RDSInstanceSchema
13
+ from cartography.models.aws.rds.snapshot import RDSSnapshotSchema
14
+ from cartography.models.aws.rds.subnet_group import DBSubnetGroupSchema
9
15
  from cartography.stats import get_stats_client
10
16
  from cartography.util import aws_handle_regions
11
17
  from cartography.util import aws_paginate
12
18
  from cartography.util import dict_value_to_str
13
19
  from cartography.util import merge_module_sync_metadata
14
- from cartography.util import run_cleanup_job
15
20
  from cartography.util import timeit
16
21
 
17
22
  logger = logging.getLogger(__name__)
@@ -39,7 +44,7 @@ def get_rds_cluster_data(
39
44
  @timeit
40
45
  def load_rds_clusters(
41
46
  neo4j_session: neo4j.Session,
42
- data: Dict,
47
+ data: List[Dict],
43
48
  region: str,
44
49
  current_aws_account_id: str,
45
50
  aws_update_tag: int,
@@ -47,91 +52,13 @@ def load_rds_clusters(
47
52
  """
48
53
  Ingest the RDS clusters to neo4j and link them to necessary nodes.
49
54
  """
50
- ingest_rds_cluster = """
51
- UNWIND $Clusters as rds_cluster
52
- MERGE (cluster:RDSCluster{id: rds_cluster.DBClusterArn})
53
- ON CREATE SET cluster.firstseen = timestamp(),
54
- cluster.arn = rds_cluster.DBClusterArn
55
- SET cluster.allocated_storage = rds_cluster.AllocatedStorage,
56
- cluster.availability_zones = rds_cluster.AvailabilityZones,
57
- cluster.backup_retention_period = rds_cluster.BackupRetentionPeriod,
58
- cluster.character_set_name = rds_cluster.CharacterSetName,
59
- cluster.database_name = rds_cluster.DatabaseName,
60
- cluster.db_cluster_identifier = rds_cluster.DBClusterIdentifier,
61
- cluster.db_parameter_group = rds_cluster.DBClusterParameterGroup,
62
- cluster.status = rds_cluster.Status,
63
- cluster.earliest_restorable_time = rds_cluster.EarliestRestorableTime,
64
- cluster.endpoint = rds_cluster.Endpoint,
65
- cluster.reader_endpoint = rds_cluster.ReaderEndpoint,
66
- cluster.multi_az = rds_cluster.MultiAZ,
67
- cluster.engine = rds_cluster.Engine,
68
- cluster.engine_version = rds_cluster.EngineVersion,
69
- cluster.latest_restorable_time = rds_cluster.LatestRestorableTime,
70
- cluster.port = rds_cluster.Port,
71
- cluster.master_username = rds_cluster.MasterUsername,
72
- cluster.preferred_backup_window = rds_cluster.PreferredBackupWindow,
73
- cluster.preferred_maintenance_window = rds_cluster.PreferredMaintenanceWindow,
74
- cluster.hosted_zone_id = rds_cluster.HostedZoneId,
75
- cluster.storage_encrypted = rds_cluster.StorageEncrypted,
76
- cluster.kms_key_id = rds_cluster.KmsKeyId,
77
- cluster.db_cluster_resource_id = rds_cluster.DbClusterResourceId,
78
- cluster.clone_group_id = rds_cluster.CloneGroupId,
79
- cluster.cluster_create_time = rds_cluster.ClusterCreateTime,
80
- cluster.earliest_backtrack_time = rds_cluster.EarliestBacktrackTime,
81
- cluster.backtrack_window = rds_cluster.BacktrackWindow,
82
- cluster.backtrack_consumed_change_records = rds_cluster.BacktrackConsumedChangeRecords,
83
- cluster.capacity = rds_cluster.Capacity,
84
- cluster.engine_mode = rds_cluster.EngineMode,
85
- cluster.scaling_configuration_info_min_capacity = rds_cluster.ScalingConfigurationInfoMinCapacity,
86
- cluster.scaling_configuration_info_max_capacity = rds_cluster.ScalingConfigurationInfoMaxCapacity,
87
- cluster.scaling_configuration_info_auto_pause = rds_cluster.ScalingConfigurationInfoAutoPause,
88
- cluster.deletion_protection = rds_cluster.DeletionProtection,
89
- cluster.region = $Region,
90
- cluster.lastupdated = $aws_update_tag
91
- WITH cluster
92
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
93
- MERGE (aa)-[r:RESOURCE]->(cluster)
94
- ON CREATE SET r.firstseen = timestamp()
95
- SET r.lastupdated = $aws_update_tag
96
- """
97
- for cluster in data:
98
- # TODO: track read replicas
99
- # TODO: track associated roles
100
- # TODO: track security groups
101
- # TODO: track subnet groups
102
-
103
- cluster["EarliestRestorableTime"] = dict_value_to_str(
104
- cluster,
105
- "EarliestRestorableTime",
106
- )
107
- cluster["LatestRestorableTime"] = dict_value_to_str(
108
- cluster,
109
- "LatestRestorableTime",
110
- )
111
- cluster["ClusterCreateTime"] = dict_value_to_str(cluster, "ClusterCreateTime")
112
- cluster["EarliestBacktrackTime"] = dict_value_to_str(
113
- cluster,
114
- "EarliestBacktrackTime",
115
- )
116
- cluster["ScalingConfigurationInfoMinCapacity"] = cluster.get(
117
- "ScalingConfigurationInfo",
118
- {},
119
- ).get("MinCapacity")
120
- cluster["ScalingConfigurationInfoMaxCapacity"] = cluster.get(
121
- "ScalingConfigurationInfo",
122
- {},
123
- ).get("MaxCapacity")
124
- cluster["ScalingConfigurationInfoAutoPause"] = cluster.get(
125
- "ScalingConfigurationInfo",
126
- {},
127
- ).get("AutoPause")
128
-
129
- neo4j_session.run(
130
- ingest_rds_cluster,
131
- Clusters=data,
55
+ load(
56
+ neo4j_session,
57
+ RDSClusterSchema(),
58
+ data,
59
+ lastupdated=aws_update_tag,
132
60
  Region=region,
133
- AWS_ACCOUNT_ID=current_aws_account_id,
134
- aws_update_tag=aws_update_tag,
61
+ AWS_ID=current_aws_account_id,
135
62
  )
136
63
 
137
64
 
@@ -156,101 +83,22 @@ def get_rds_instance_data(
156
83
  @timeit
157
84
  def load_rds_instances(
158
85
  neo4j_session: neo4j.Session,
159
- data: Dict,
86
+ data: List[Dict],
160
87
  region: str,
161
88
  current_aws_account_id: str,
162
89
  aws_update_tag: int,
163
90
  ) -> None:
164
91
  """
165
- Ingest the RDS instances to neo4j and link them to necessary nodes.
166
- """
167
- ingest_rds_instance = """
168
- UNWIND $Instances as rds_instance
169
- MERGE (rds:RDSInstance{id: rds_instance.DBInstanceArn})
170
- ON CREATE SET rds.firstseen = timestamp(),
171
- rds.arn = rds_instance.DBInstanceArn
172
- SET rds.db_instance_identifier = rds_instance.DBInstanceIdentifier,
173
- rds.db_instance_class = rds_instance.DBInstanceClass,
174
- rds.engine = rds_instance.Engine,
175
- rds.master_username = rds_instance.MasterUsername,
176
- rds.db_name = rds_instance.DBName,
177
- rds.instance_create_time = rds_instance.InstanceCreateTime,
178
- rds.availability_zone = rds_instance.AvailabilityZone,
179
- rds.multi_az = rds_instance.MultiAZ,
180
- rds.engine_version = rds_instance.EngineVersion,
181
- rds.publicly_accessible = rds_instance.PubliclyAccessible,
182
- rds.db_cluster_identifier = rds_instance.DBClusterIdentifier,
183
- rds.storage_encrypted = rds_instance.StorageEncrypted,
184
- rds.kms_key_id = rds_instance.KmsKeyId,
185
- rds.dbi_resource_id = rds_instance.DbiResourceId,
186
- rds.ca_certificate_identifier = rds_instance.CACertificateIdentifier,
187
- rds.enhanced_monitoring_resource_arn = rds_instance.EnhancedMonitoringResourceArn,
188
- rds.monitoring_role_arn = rds_instance.MonitoringRoleArn,
189
- rds.performance_insights_enabled = rds_instance.PerformanceInsightsEnabled,
190
- rds.performance_insights_kms_key_id = rds_instance.PerformanceInsightsKMSKeyId,
191
- rds.region = rds_instance.Region,
192
- rds.deletion_protection = rds_instance.DeletionProtection,
193
- rds.preferred_backup_window = rds_instance.PreferredBackupWindow,
194
- rds.latest_restorable_time = rds_instance.LatestRestorableTime,
195
- rds.preferred_maintenance_window = rds_instance.PreferredMaintenanceWindow,
196
- rds.backup_retention_period = rds_instance.BackupRetentionPeriod,
197
- rds.endpoint_address = rds_instance.EndpointAddress,
198
- rds.endpoint_hostedzoneid = rds_instance.EndpointHostedZoneId,
199
- rds.endpoint_port = rds_instance.EndpointPort,
200
- rds.iam_database_authentication_enabled = rds_instance.IAMDatabaseAuthenticationEnabled,
201
- rds.auto_minor_version_upgrade = rds_instance.AutoMinorVersionUpgrade,
202
- rds.lastupdated = $aws_update_tag
203
- WITH rds
204
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
205
- MERGE (aa)-[r:RESOURCE]->(rds)
206
- ON CREATE SET r.firstseen = timestamp()
207
- SET r.lastupdated = $aws_update_tag
208
- """
209
- read_replicas = []
210
- clusters = []
211
- secgroups = []
212
- subnets = []
213
-
214
- for rds in data:
215
- ep = _validate_rds_endpoint(rds)
216
-
217
- # Keep track of instances that are read replicas so we can attach them to their source instances later
218
- if rds.get("ReadReplicaSourceDBInstanceIdentifier"):
219
- read_replicas.append(rds)
220
-
221
- # Keep track of instances that are cluster members so we can attach them to their source clusters later
222
- if rds.get("DBClusterIdentifier"):
223
- clusters.append(rds)
224
-
225
- if rds.get("VpcSecurityGroups"):
226
- secgroups.append(rds)
227
-
228
- if rds.get("DBSubnetGroup"):
229
- subnets.append(rds)
230
-
231
- rds["InstanceCreateTime"] = dict_value_to_str(rds, "InstanceCreateTime")
232
- rds["LatestRestorableTime"] = dict_value_to_str(rds, "LatestRestorableTime")
233
- rds["EndpointAddress"] = ep.get("Address")
234
- rds["EndpointHostedZoneId"] = ep.get("HostedZoneId")
235
- rds["EndpointPort"] = ep.get("Port")
236
-
237
- neo4j_session.run(
238
- ingest_rds_instance,
239
- Instances=data,
240
- Region=region,
241
- AWS_ACCOUNT_ID=current_aws_account_id,
242
- aws_update_tag=aws_update_tag,
243
- )
244
- _attach_ec2_security_groups(neo4j_session, secgroups, aws_update_tag)
245
- _attach_ec2_subnet_groups(
92
+ Ingest the RDS instances to Neo4j and link them to necessary nodes.
93
+ """
94
+ load(
246
95
  neo4j_session,
247
- subnets,
248
- region,
249
- current_aws_account_id,
250
- aws_update_tag,
96
+ RDSInstanceSchema(),
97
+ data,
98
+ lastupdated=aws_update_tag,
99
+ Region=region,
100
+ AWS_ID=current_aws_account_id,
251
101
  )
252
- _attach_read_replicas(neo4j_session, read_replicas, aws_update_tag)
253
- _attach_clusters(neo4j_session, clusters, aws_update_tag)
254
102
 
255
103
 
256
104
  @timeit
@@ -270,7 +118,7 @@ def get_rds_snapshot_data(
270
118
  @timeit
271
119
  def load_rds_snapshots(
272
120
  neo4j_session: neo4j.Session,
273
- data: Dict,
121
+ data: List[Dict],
274
122
  region: str,
275
123
  current_aws_account_id: str,
276
124
  aws_update_tag: int,
@@ -278,266 +126,13 @@ def load_rds_snapshots(
278
126
  """
279
127
  Ingest the RDS snapshots to neo4j and link them to necessary nodes.
280
128
  """
281
- ingest_rds_snapshot = """
282
- UNWIND $Snapshots as rds_snapshot
283
- MERGE (snapshot:RDSSnapshot{id: rds_snapshot.DBSnapshotArn})
284
- ON CREATE SET snapshot.firstseen = timestamp(),
285
- snapshot.arn = rds_snapshot.DBSnapshotArn
286
- SET snapshot.db_snapshot_identifier = rds_snapshot.DBSnapshotIdentifier,
287
- snapshot.db_instance_identifier = rds_snapshot.DBInstanceIdentifier,
288
- snapshot.snapshot_create_time = rds_snapshot.SnapshotCreateTime,
289
- snapshot.engine = rds_snapshot.Engine,
290
- snapshot.allocated_storage = rds_snapshot.AllocatedStorage,
291
- snapshot.status = rds_snapshot.Status,
292
- snapshot.port = rds_snapshot.Port,
293
- snapshot.availability_zone = rds_snapshot.AvailabilityZone,
294
- snapshot.vpc_id = rds_snapshot.VpcId,
295
- snapshot.instance_create_time = rds_snapshot.InstanceCreateTime,
296
- snapshot.master_username = rds_snapshot.MasterUsername,
297
- snapshot.engine_version = rds_snapshot.EngineVersion,
298
- snapshot.license_model = rds_snapshot.LicenseModel,
299
- snapshot.snapshot_type = rds_snapshot.SnapshotType,
300
- snapshot.iops = rds_snapshot.Iops,
301
- snapshot.option_group_name = rds_snapshot.OptionGroupName,
302
- snapshot.percent_progress = rds_snapshot.PercentProgress,
303
- snapshot.source_region = rds_snapshot.SourceRegion,
304
- snapshot.source_db_snapshot_identifier = rds_snapshot.SourceDBSnapshotIdentifier,
305
- snapshot.storage_type = rds_snapshot.StorageType,
306
- snapshot.tde_credential_arn = rds_snapshot.TdeCredentialArn,
307
- snapshot.encrypted = rds_snapshot.Encrypted,
308
- snapshot.kms_key_id = rds_snapshot.KmsKeyId,
309
- snapshot.timezone = rds_snapshot.Timezone,
310
- snapshot.iam_database_authentication_enabled = rds_snapshot.IAMDatabaseAuthenticationEnabled,
311
- snapshot.processor_features = rds_snapshot.ProcessorFeatures,
312
- snapshot.dbi_resource_id = rds_snapshot.DbiResourceId,
313
- snapshot.original_snapshot_create_time = rds_snapshot.OriginalSnapshotCreateTime,
314
- snapshot.snapshot_database_time = rds_snapshot.SnapshotDatabaseTime,
315
- snapshot.snapshot_target = rds_snapshot.SnapshotTarget,
316
- snapshot.storage_throughput = rds_snapshot.StorageThroughput,
317
- snapshot.region = $Region,
318
- snapshot.lastupdated = $aws_update_tag
319
- WITH snapshot
320
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
321
- MERGE (aa)-[r:RESOURCE]->(snapshot)
322
- ON CREATE SET r.firstseen = timestamp()
323
- SET r.lastupdated = $aws_update_tag
324
- """
325
-
326
- snapshots = transform_rds_snapshots(data)
327
-
328
- neo4j_session.run(
329
- ingest_rds_snapshot,
330
- Snapshots=snapshots,
331
- Region=region,
332
- AWS_ACCOUNT_ID=current_aws_account_id,
333
- aws_update_tag=aws_update_tag,
334
- )
335
- _attach_snapshots(neo4j_session, snapshots, aws_update_tag)
336
-
337
-
338
- @timeit
339
- def _attach_snapshots(
340
- neo4j_session: neo4j.Session,
341
- snapshots: List[Dict],
342
- aws_update_tag: int,
343
- ) -> None:
344
- """
345
- Attach snapshots to their source instance
346
- """
347
- attach_member_to_source = """
348
- UNWIND $Snapshots as snapshot
349
- MATCH (rdsInstance:RDSInstance {db_instance_identifier: snapshot.DBInstanceIdentifier}),
350
- (rdsSnapshot:RDSSnapshot {arn: snapshot.DBSnapshotArn})
351
- MERGE (rdsInstance)-[r:IS_SNAPSHOT_SOURCE]->(rdsSnapshot)
352
- ON CREATE SET r.firstseen = timestamp()
353
- SET r.lastupdated = $aws_update_tag
354
- """
355
- neo4j_session.run(
356
- attach_member_to_source,
357
- Snapshots=snapshots,
358
- aws_update_tag=aws_update_tag,
359
- )
360
-
361
-
362
- @timeit
363
- def _attach_ec2_subnet_groups(
364
- neo4j_session: neo4j.Session,
365
- instances: List[Dict],
366
- region: str,
367
- current_aws_account_id: str,
368
- aws_update_tag: int,
369
- ) -> None:
370
- """
371
- Attach RDS instances to their EC2 subnet groups
372
- """
373
- attach_rds_to_subnet_group = """
374
- UNWIND $SubnetGroups as rds_sng
375
- MERGE (sng:DBSubnetGroup{id: rds_sng.arn})
376
- ON CREATE SET sng.firstseen = timestamp()
377
- SET sng.name = rds_sng.DBSubnetGroupName,
378
- sng.vpc_id = rds_sng.VpcId,
379
- sng.description = rds_sng.DBSubnetGroupDescription,
380
- sng.status = rds_sng.DBSubnetGroupStatus,
381
- sng.lastupdated = $aws_update_tag
382
- WITH sng, rds_sng.instance_arn AS instance_arn
383
- MATCH(rds:RDSInstance{id: instance_arn})
384
- MERGE(rds)-[r:MEMBER_OF_DB_SUBNET_GROUP]->(sng)
385
- ON CREATE SET r.firstseen = timestamp()
386
- SET r.lastupdated = $aws_update_tag
387
- """
388
- db_sngs = []
389
- for instance in instances:
390
- db_sng = instance["DBSubnetGroup"]
391
- db_sng["arn"] = _get_db_subnet_group_arn(
392
- region,
393
- current_aws_account_id,
394
- db_sng["DBSubnetGroupName"],
395
- )
396
- db_sng["instance_arn"] = instance["DBInstanceArn"]
397
- db_sngs.append(db_sng)
398
- neo4j_session.run(
399
- attach_rds_to_subnet_group,
400
- SubnetGroups=db_sngs,
401
- aws_update_tag=aws_update_tag,
402
- )
403
- _attach_ec2_subnets_to_subnetgroup(
129
+ load(
404
130
  neo4j_session,
405
- db_sngs,
406
- region,
407
- current_aws_account_id,
408
- aws_update_tag,
409
- )
410
-
411
-
412
- @timeit
413
- def _attach_ec2_subnets_to_subnetgroup(
414
- neo4j_session: neo4j.Session,
415
- db_subnet_groups: List[Dict],
416
- region: str,
417
- current_aws_account_id: str,
418
- aws_update_tag: int,
419
- ) -> None:
420
- """
421
- Attach EC2Subnets to their DB Subnet Group.
422
-
423
- From https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_VPC.WorkingWithRDSInstanceinaVPC.html:
424
- `Each DB subnet group should have subnets in at least two Availability Zones in a given region. When creating a DB
425
- instance in a VPC, you must select a DB subnet group. Amazon RDS uses that DB subnet group and your preferred
426
- Availability Zone to select a subnet and an IP address within that subnet to associate with your DB instance.`
427
- """
428
- attach_subnets_to_sng = """
429
- UNWIND $Subnets as rds_sn
430
- MATCH(sng:DBSubnetGroup{id: rds_sn.sng_arn})
431
- MERGE(subnet:EC2Subnet{subnetid: rds_sn.sn_id})
432
- ON CREATE SET subnet.firstseen = timestamp()
433
- MERGE(sng)-[r:RESOURCE]->(subnet)
434
- ON CREATE SET r.firstseen = timestamp()
435
- SET r.lastupdated = $aws_update_tag,
436
- subnet.availability_zone = rds_sn.az,
437
- subnet.lastupdated = $aws_update_tag
438
- """
439
- subnets = []
440
- for subnet_group in db_subnet_groups:
441
- for subnet in subnet_group.get("Subnets", []):
442
- sn_id = subnet.get("SubnetIdentifier")
443
- sng_arn = _get_db_subnet_group_arn(
444
- region,
445
- current_aws_account_id,
446
- subnet_group["DBSubnetGroupName"],
447
- )
448
- az = subnet.get("SubnetAvailabilityZone", {}).get("Name")
449
- subnets.append(
450
- {
451
- "sn_id": sn_id,
452
- "sng_arn": sng_arn,
453
- "az": az,
454
- },
455
- )
456
- neo4j_session.run(
457
- attach_subnets_to_sng,
458
- Subnets=subnets,
459
- aws_update_tag=aws_update_tag,
460
- )
461
-
462
-
463
- @timeit
464
- def _attach_ec2_security_groups(
465
- neo4j_session: neo4j.Session,
466
- instances: List[Dict],
467
- aws_update_tag: int,
468
- ) -> None:
469
- """
470
- Attach an RDS instance to its EC2SecurityGroups
471
- """
472
- attach_rds_to_group = """
473
- UNWIND $Groups as rds_sg
474
- MATCH (rds:RDSInstance{id: rds_sg.arn})
475
- MERGE (sg:EC2SecurityGroup{id: rds_sg.group_id})
476
- MERGE (rds)-[m:MEMBER_OF_EC2_SECURITY_GROUP]->(sg)
477
- ON CREATE SET m.firstseen = timestamp()
478
- SET m.lastupdated = $aws_update_tag
479
- """
480
- groups = []
481
- for instance in instances:
482
- for group in instance["VpcSecurityGroups"]:
483
- groups.append(
484
- {
485
- "arn": instance["DBInstanceArn"],
486
- "group_id": group["VpcSecurityGroupId"],
487
- },
488
- )
489
- neo4j_session.run(
490
- attach_rds_to_group,
491
- Groups=groups,
492
- aws_update_tag=aws_update_tag,
493
- )
494
-
495
-
496
- @timeit
497
- def _attach_read_replicas(
498
- neo4j_session: neo4j.Session,
499
- read_replicas: List[Dict],
500
- aws_update_tag: int,
501
- ) -> None:
502
- """
503
- Attach read replicas to their source instances
504
- """
505
- attach_replica_to_source = """
506
- UNWIND $Replicas as rds_replica
507
- MATCH (replica:RDSInstance{id: rds_replica.DBInstanceArn}),
508
- (source:RDSInstance{db_instance_identifier: rds_replica.ReadReplicaSourceDBInstanceIdentifier})
509
- MERGE (replica)-[r:IS_READ_REPLICA_OF]->(source)
510
- ON CREATE SET r.firstseen = timestamp()
511
- SET r.lastupdated = $aws_update_tag
512
- """
513
- neo4j_session.run(
514
- attach_replica_to_source,
515
- Replicas=read_replicas,
516
- aws_update_tag=aws_update_tag,
517
- )
518
-
519
-
520
- @timeit
521
- def _attach_clusters(
522
- neo4j_session: neo4j.Session,
523
- cluster_members: List[Dict],
524
- aws_update_tag: int,
525
- ) -> None:
526
- """
527
- Attach cluster members to their source clusters
528
- """
529
- attach_member_to_source = """
530
- UNWIND $Members as rds_cluster_member
531
- MATCH (member:RDSInstance{id: rds_cluster_member.DBInstanceArn}),
532
- (source:RDSCluster{db_cluster_identifier: rds_cluster_member.DBClusterIdentifier})
533
- MERGE (member)-[r:IS_CLUSTER_MEMBER_OF]->(source)
534
- ON CREATE SET r.firstseen = timestamp()
535
- SET r.lastupdated = $aws_update_tag
536
- """
537
- neo4j_session.run(
538
- attach_member_to_source,
539
- Members=cluster_members,
540
- aws_update_tag=aws_update_tag,
131
+ RDSSnapshotSchema(),
132
+ data,
133
+ lastupdated=aws_update_tag,
134
+ Region=region,
135
+ AWS_ID=current_aws_account_id,
541
136
  )
542
137
 
543
138
 
@@ -571,8 +166,48 @@ def _get_db_subnet_group_arn(
571
166
  )
572
167
 
573
168
 
574
- @timeit
575
- def transform_rds_snapshots(data: Dict) -> List[Dict]:
169
+ def transform_rds_clusters(data: List[Dict]) -> List[Dict]:
170
+ """
171
+ Transform RDS cluster data for Neo4j ingestion
172
+ """
173
+ clusters = []
174
+
175
+ for cluster in data:
176
+ # Copy the cluster data
177
+ transformed_cluster = cluster.copy()
178
+
179
+ # Convert datetime fields
180
+ transformed_cluster["EarliestRestorableTime"] = dict_value_to_str(
181
+ cluster, "EarliestRestorableTime"
182
+ )
183
+ transformed_cluster["LatestRestorableTime"] = dict_value_to_str(
184
+ cluster, "LatestRestorableTime"
185
+ )
186
+ transformed_cluster["ClusterCreateTime"] = dict_value_to_str(
187
+ cluster, "ClusterCreateTime"
188
+ )
189
+ transformed_cluster["EarliestBacktrackTime"] = dict_value_to_str(
190
+ cluster, "EarliestBacktrackTime"
191
+ )
192
+
193
+ # Extract scaling configuration info
194
+ scaling_config = cluster.get("ScalingConfigurationInfo", {})
195
+ transformed_cluster["ScalingConfigurationInfoMinCapacity"] = scaling_config.get(
196
+ "MinCapacity"
197
+ )
198
+ transformed_cluster["ScalingConfigurationInfoMaxCapacity"] = scaling_config.get(
199
+ "MaxCapacity"
200
+ )
201
+ transformed_cluster["ScalingConfigurationInfoAutoPause"] = scaling_config.get(
202
+ "AutoPause"
203
+ )
204
+
205
+ clusters.append(transformed_cluster)
206
+
207
+ return clusters
208
+
209
+
210
+ def transform_rds_snapshots(data: List[Dict]) -> List[Dict]:
576
211
  snapshots = []
577
212
 
578
213
  for snapshot in data:
@@ -599,18 +234,151 @@ def transform_rds_snapshots(data: Dict) -> List[Dict]:
599
234
  return snapshots
600
235
 
601
236
 
237
+ def transform_rds_instances(
238
+ data: List[Dict], region: str, current_aws_account_id: str
239
+ ) -> List[Dict]:
240
+ """
241
+ Transform RDS instance data for Neo4j ingestion
242
+ """
243
+ instances = []
244
+
245
+ for instance in data:
246
+ # Copy the instance data
247
+ transformed_instance = instance.copy()
248
+
249
+ # Extract security group IDs for the relationship
250
+ security_group_ids = []
251
+ if instance.get("VpcSecurityGroups"):
252
+ for group in instance["VpcSecurityGroups"]:
253
+ security_group_ids.append(group["VpcSecurityGroupId"])
254
+
255
+ transformed_instance["security_group_ids"] = security_group_ids
256
+
257
+ # Handle read replica source identifier for the relationship
258
+ if instance.get("ReadReplicaSourceDBInstanceIdentifier"):
259
+ transformed_instance["read_replica_source_identifier"] = instance[
260
+ "ReadReplicaSourceDBInstanceIdentifier"
261
+ ]
262
+
263
+ # Handle cluster identifier for the relationship
264
+ if instance.get("DBClusterIdentifier"):
265
+ transformed_instance["db_cluster_identifier"] = instance[
266
+ "DBClusterIdentifier"
267
+ ]
268
+
269
+ # Handle subnet group data for the relationship
270
+ if instance.get("DBSubnetGroup"):
271
+ db_subnet_group = instance["DBSubnetGroup"]
272
+ transformed_instance["db_subnet_group_arn"] = _get_db_subnet_group_arn(
273
+ region, current_aws_account_id, db_subnet_group["DBSubnetGroupName"]
274
+ )
275
+
276
+ # Handle endpoint data
277
+ ep = _validate_rds_endpoint(instance)
278
+ transformed_instance["EndpointAddress"] = ep.get("Address")
279
+ transformed_instance["EndpointHostedZoneId"] = ep.get("HostedZoneId")
280
+ transformed_instance["EndpointPort"] = ep.get("Port")
281
+
282
+ # Convert datetime fields
283
+ transformed_instance["InstanceCreateTime"] = dict_value_to_str(
284
+ instance, "InstanceCreateTime"
285
+ )
286
+ transformed_instance["LatestRestorableTime"] = dict_value_to_str(
287
+ instance, "LatestRestorableTime"
288
+ )
289
+
290
+ instances.append(transformed_instance)
291
+
292
+ return instances
293
+
294
+
295
+ def transform_rds_subnet_groups(
296
+ data: List[Dict], region: str, current_aws_account_id: str
297
+ ) -> List[Dict]:
298
+ """
299
+ Transform RDS subnet group data for Neo4j ingestion
300
+ """
301
+ subnet_groups_dict = {}
302
+
303
+ for instance in data:
304
+ if instance.get("DBSubnetGroup"):
305
+ db_subnet_group = instance["DBSubnetGroup"]
306
+ db_subnet_group_arn = _get_db_subnet_group_arn(
307
+ region, current_aws_account_id, db_subnet_group["DBSubnetGroupName"]
308
+ )
309
+
310
+ # If this subnet group doesn't exist yet, create it
311
+ if db_subnet_group_arn not in subnet_groups_dict:
312
+ subnet_groups_dict[db_subnet_group_arn] = {
313
+ "id": db_subnet_group_arn,
314
+ "name": db_subnet_group["DBSubnetGroupName"],
315
+ "vpc_id": db_subnet_group["VpcId"],
316
+ "description": db_subnet_group["DBSubnetGroupDescription"],
317
+ "status": db_subnet_group["SubnetGroupStatus"],
318
+ "db_instance_identifier": [],
319
+ "subnet_ids": [],
320
+ }
321
+
322
+ # Add this RDS instance to the subnet group's list
323
+ if instance.get("DBInstanceIdentifier"):
324
+ subnet_groups_dict[db_subnet_group_arn][
325
+ "db_instance_identifier"
326
+ ].append(instance["DBInstanceIdentifier"])
327
+
328
+ # Add subnet IDs from the DB subnet group
329
+ for subnet in db_subnet_group.get("Subnets", []):
330
+ subnet_id = subnet.get("SubnetIdentifier")
331
+ if (
332
+ subnet_id
333
+ and subnet_id
334
+ not in subnet_groups_dict[db_subnet_group_arn]["subnet_ids"]
335
+ ):
336
+ subnet_groups_dict[db_subnet_group_arn]["subnet_ids"].append(
337
+ subnet_id
338
+ )
339
+
340
+ return list(subnet_groups_dict.values())
341
+
342
+
343
+ @timeit
344
+ def load_rds_subnet_groups(
345
+ neo4j_session: neo4j.Session,
346
+ data: List[Dict],
347
+ region: str,
348
+ current_aws_account_id: str,
349
+ aws_update_tag: int,
350
+ ) -> None:
351
+ """
352
+ Ingest the RDS subnet groups to Neo4j and link them to necessary nodes.
353
+ """
354
+ load(
355
+ neo4j_session,
356
+ DBSubnetGroupSchema(),
357
+ data,
358
+ lastupdated=aws_update_tag,
359
+ Region=region,
360
+ AWS_ID=current_aws_account_id,
361
+ )
362
+
363
+
602
364
  @timeit
603
365
  def cleanup_rds_instances_and_db_subnet_groups(
604
366
  neo4j_session: neo4j.Session,
605
367
  common_job_parameters: Dict,
606
368
  ) -> None:
607
369
  """
608
- Remove RDS graph nodes and DBSubnetGroups that were created from other ingestion runs
370
+ Remove RDS instances and DB subnet groups that weren't updated in this sync run
609
371
  """
610
- run_cleanup_job(
611
- "aws_import_rds_instances_cleanup.json",
612
- neo4j_session,
613
- common_job_parameters,
372
+ logger.debug("Running RDS instances and DB subnet groups cleanup job")
373
+
374
+ # Clean up RDS instances
375
+ GraphJob.from_node_schema(RDSInstanceSchema(), common_job_parameters).run(
376
+ neo4j_session
377
+ )
378
+
379
+ # Clean up DB subnet groups
380
+ GraphJob.from_node_schema(DBSubnetGroupSchema(), common_job_parameters).run(
381
+ neo4j_session
614
382
  )
615
383
 
616
384
 
@@ -620,12 +388,12 @@ def cleanup_rds_clusters(
620
388
  common_job_parameters: Dict,
621
389
  ) -> None:
622
390
  """
623
- Remove RDS cluster graph nodes
391
+ Remove RDS clusters that weren't updated in this sync run
624
392
  """
625
- run_cleanup_job(
626
- "aws_import_rds_clusters_cleanup.json",
627
- neo4j_session,
628
- common_job_parameters,
393
+ logger.debug("Running RDS clusters cleanup job")
394
+
395
+ GraphJob.from_node_schema(RDSClusterSchema(), common_job_parameters).run(
396
+ neo4j_session
629
397
  )
630
398
 
631
399
 
@@ -635,12 +403,12 @@ def cleanup_rds_snapshots(
635
403
  common_job_parameters: Dict,
636
404
  ) -> None:
637
405
  """
638
- Remove RDS snapshots graph nodes
406
+ Remove RDS snapshots that weren't updated in this sync run
639
407
  """
640
- run_cleanup_job(
641
- "aws_import_rds_snapshots_cleanup.json",
642
- neo4j_session,
643
- common_job_parameters,
408
+ logger.debug("Running RDS snapshots cleanup job")
409
+
410
+ GraphJob.from_node_schema(RDSSnapshotSchema(), common_job_parameters).run(
411
+ neo4j_session
644
412
  )
645
413
 
646
414
 
@@ -654,16 +422,19 @@ def sync_rds_clusters(
654
422
  common_job_parameters: Dict,
655
423
  ) -> None:
656
424
  """
657
- Grab RDS instance data from AWS, ingest to neo4j, and run the cleanup job.
425
+ Grab RDS cluster data from AWS, ingest to neo4j, and run the cleanup job.
658
426
  """
659
427
  for region in regions:
660
428
  logger.info(
661
- "Syncing RDS for region '%s' in account '%s'.",
429
+ "Syncing RDS clusters for region '%s' in account '%s'.",
662
430
  region,
663
431
  current_aws_account_id,
664
432
  )
665
433
  data = get_rds_cluster_data(boto3_session, region)
666
- load_rds_clusters(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
434
+ transformed_data = transform_rds_clusters(data)
435
+ load_rds_clusters(
436
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
437
+ )
667
438
  cleanup_rds_clusters(neo4j_session, common_job_parameters)
668
439
 
669
440
 
@@ -681,12 +452,23 @@ def sync_rds_instances(
681
452
  """
682
453
  for region in regions:
683
454
  logger.info(
684
- "Syncing RDS for region '%s' in account '%s'.",
455
+ "Syncing RDS instances for region '%s' in account '%s'.",
685
456
  region,
686
457
  current_aws_account_id,
687
458
  )
688
459
  data = get_rds_instance_data(boto3_session, region)
689
- load_rds_instances(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
460
+ transformed_data = transform_rds_instances(data, region, current_aws_account_id)
461
+ load_rds_instances(
462
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
463
+ )
464
+
465
+ # Load subnet groups from RDS instances
466
+ subnet_group_data = transform_rds_subnet_groups(
467
+ data, region, current_aws_account_id
468
+ )
469
+ load_rds_subnet_groups(
470
+ neo4j_session, subnet_group_data, region, current_aws_account_id, update_tag
471
+ )
690
472
  cleanup_rds_instances_and_db_subnet_groups(neo4j_session, common_job_parameters)
691
473
 
692
474
 
@@ -704,12 +486,15 @@ def sync_rds_snapshots(
704
486
  """
705
487
  for region in regions:
706
488
  logger.info(
707
- "Syncing RDS for region '%s' in account '%s'.",
489
+ "Syncing RDS snapshots for region '%s' in account '%s'.",
708
490
  region,
709
491
  current_aws_account_id,
710
492
  )
711
493
  data = get_rds_snapshot_data(boto3_session, region)
712
- load_rds_snapshots(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
494
+ transformed_data = transform_rds_snapshots(data)
495
+ load_rds_snapshots(
496
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
497
+ )
713
498
  cleanup_rds_snapshots(neo4j_session, common_job_parameters)
714
499
 
715
500