cartography 0.109.0rc1__py3-none-any.whl → 0.110.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (78) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +14 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -15
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/intel/aws/cloudtrail_management_events.py +21 -0
  7. cartography/intel/aws/cognito.py +201 -0
  8. cartography/intel/aws/ecs.py +7 -1
  9. cartography/intel/aws/eventbridge.py +91 -0
  10. cartography/intel/aws/glue.py +181 -0
  11. cartography/intel/aws/identitycenter.py +71 -23
  12. cartography/intel/aws/kms.py +173 -201
  13. cartography/intel/aws/lambda_function.py +206 -190
  14. cartography/intel/aws/rds.py +335 -445
  15. cartography/intel/aws/resources.py +6 -0
  16. cartography/intel/aws/route53.py +336 -332
  17. cartography/intel/aws/s3.py +104 -0
  18. cartography/intel/github/__init__.py +21 -25
  19. cartography/intel/github/repos.py +4 -36
  20. cartography/intel/kubernetes/__init__.py +4 -0
  21. cartography/intel/kubernetes/rbac.py +464 -0
  22. cartography/intel/kubernetes/util.py +17 -0
  23. cartography/intel/trivy/__init__.py +73 -13
  24. cartography/intel/trivy/scanner.py +115 -92
  25. cartography/models/aws/cognito/__init__.py +0 -0
  26. cartography/models/aws/cognito/identity_pool.py +70 -0
  27. cartography/models/aws/cognito/user_pool.py +47 -0
  28. cartography/models/aws/ec2/security_groups.py +1 -1
  29. cartography/models/aws/ecs/services.py +17 -0
  30. cartography/models/aws/ecs/tasks.py +1 -0
  31. cartography/models/aws/eventbridge/__init__.py +0 -0
  32. cartography/models/aws/eventbridge/rule.py +77 -0
  33. cartography/models/aws/glue/__init__.py +0 -0
  34. cartography/models/aws/glue/connection.py +51 -0
  35. cartography/models/aws/glue/job.py +69 -0
  36. cartography/models/aws/identitycenter/awspermissionset.py +44 -0
  37. cartography/models/aws/kms/__init__.py +0 -0
  38. cartography/models/aws/kms/aliases.py +86 -0
  39. cartography/models/aws/kms/grants.py +65 -0
  40. cartography/models/aws/kms/keys.py +88 -0
  41. cartography/models/aws/lambda_function/__init__.py +0 -0
  42. cartography/models/aws/lambda_function/alias.py +74 -0
  43. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  44. cartography/models/aws/lambda_function/lambda_function.py +89 -0
  45. cartography/models/aws/lambda_function/layer.py +72 -0
  46. cartography/models/aws/rds/__init__.py +0 -0
  47. cartography/models/aws/rds/cluster.py +89 -0
  48. cartography/models/aws/rds/event_subscription.py +146 -0
  49. cartography/models/aws/rds/instance.py +154 -0
  50. cartography/models/aws/rds/snapshot.py +108 -0
  51. cartography/models/aws/rds/subnet_group.py +101 -0
  52. cartography/models/aws/route53/__init__.py +0 -0
  53. cartography/models/aws/route53/dnsrecord.py +235 -0
  54. cartography/models/aws/route53/nameserver.py +63 -0
  55. cartography/models/aws/route53/subzone.py +40 -0
  56. cartography/models/aws/route53/zone.py +47 -0
  57. cartography/models/github/dependencies.py +1 -2
  58. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  59. cartography/models/kubernetes/clusterroles.py +52 -0
  60. cartography/models/kubernetes/rolebindings.py +119 -0
  61. cartography/models/kubernetes/roles.py +76 -0
  62. cartography/models/kubernetes/serviceaccounts.py +77 -0
  63. cartography/models/snipeit/asset.py +1 -0
  64. cartography/util.py +8 -1
  65. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/METADATA +3 -3
  66. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/RECORD +71 -41
  67. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  68. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  69. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  70. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  71. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  72. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  74. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  75. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/WHEEL +0 -0
  76. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/entry_points.txt +0 -0
  77. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/licenses/LICENSE +0 -0
  78. {cartography-0.109.0rc1.dist-info → cartography-0.110.0.dist-info}/top_level.txt +0 -0
@@ -6,12 +6,18 @@ from typing import List
6
6
  import boto3
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.rds.cluster import RDSClusterSchema
12
+ from cartography.models.aws.rds.event_subscription import RDSEventSubscriptionSchema
13
+ from cartography.models.aws.rds.instance import RDSInstanceSchema
14
+ from cartography.models.aws.rds.snapshot import RDSSnapshotSchema
15
+ from cartography.models.aws.rds.subnet_group import DBSubnetGroupSchema
9
16
  from cartography.stats import get_stats_client
10
17
  from cartography.util import aws_handle_regions
11
18
  from cartography.util import aws_paginate
12
19
  from cartography.util import dict_value_to_str
13
20
  from cartography.util import merge_module_sync_metadata
14
- from cartography.util import run_cleanup_job
15
21
  from cartography.util import timeit
16
22
 
17
23
  logger = logging.getLogger(__name__)
@@ -39,7 +45,7 @@ def get_rds_cluster_data(
39
45
  @timeit
40
46
  def load_rds_clusters(
41
47
  neo4j_session: neo4j.Session,
42
- data: Dict,
48
+ data: List[Dict],
43
49
  region: str,
44
50
  current_aws_account_id: str,
45
51
  aws_update_tag: int,
@@ -47,91 +53,13 @@ def load_rds_clusters(
47
53
  """
48
54
  Ingest the RDS clusters to neo4j and link them to necessary nodes.
49
55
  """
50
- ingest_rds_cluster = """
51
- UNWIND $Clusters as rds_cluster
52
- MERGE (cluster:RDSCluster{id: rds_cluster.DBClusterArn})
53
- ON CREATE SET cluster.firstseen = timestamp(),
54
- cluster.arn = rds_cluster.DBClusterArn
55
- SET cluster.allocated_storage = rds_cluster.AllocatedStorage,
56
- cluster.availability_zones = rds_cluster.AvailabilityZones,
57
- cluster.backup_retention_period = rds_cluster.BackupRetentionPeriod,
58
- cluster.character_set_name = rds_cluster.CharacterSetName,
59
- cluster.database_name = rds_cluster.DatabaseName,
60
- cluster.db_cluster_identifier = rds_cluster.DBClusterIdentifier,
61
- cluster.db_parameter_group = rds_cluster.DBClusterParameterGroup,
62
- cluster.status = rds_cluster.Status,
63
- cluster.earliest_restorable_time = rds_cluster.EarliestRestorableTime,
64
- cluster.endpoint = rds_cluster.Endpoint,
65
- cluster.reader_endpoint = rds_cluster.ReaderEndpoint,
66
- cluster.multi_az = rds_cluster.MultiAZ,
67
- cluster.engine = rds_cluster.Engine,
68
- cluster.engine_version = rds_cluster.EngineVersion,
69
- cluster.latest_restorable_time = rds_cluster.LatestRestorableTime,
70
- cluster.port = rds_cluster.Port,
71
- cluster.master_username = rds_cluster.MasterUsername,
72
- cluster.preferred_backup_window = rds_cluster.PreferredBackupWindow,
73
- cluster.preferred_maintenance_window = rds_cluster.PreferredMaintenanceWindow,
74
- cluster.hosted_zone_id = rds_cluster.HostedZoneId,
75
- cluster.storage_encrypted = rds_cluster.StorageEncrypted,
76
- cluster.kms_key_id = rds_cluster.KmsKeyId,
77
- cluster.db_cluster_resource_id = rds_cluster.DbClusterResourceId,
78
- cluster.clone_group_id = rds_cluster.CloneGroupId,
79
- cluster.cluster_create_time = rds_cluster.ClusterCreateTime,
80
- cluster.earliest_backtrack_time = rds_cluster.EarliestBacktrackTime,
81
- cluster.backtrack_window = rds_cluster.BacktrackWindow,
82
- cluster.backtrack_consumed_change_records = rds_cluster.BacktrackConsumedChangeRecords,
83
- cluster.capacity = rds_cluster.Capacity,
84
- cluster.engine_mode = rds_cluster.EngineMode,
85
- cluster.scaling_configuration_info_min_capacity = rds_cluster.ScalingConfigurationInfoMinCapacity,
86
- cluster.scaling_configuration_info_max_capacity = rds_cluster.ScalingConfigurationInfoMaxCapacity,
87
- cluster.scaling_configuration_info_auto_pause = rds_cluster.ScalingConfigurationInfoAutoPause,
88
- cluster.deletion_protection = rds_cluster.DeletionProtection,
89
- cluster.region = $Region,
90
- cluster.lastupdated = $aws_update_tag
91
- WITH cluster
92
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
93
- MERGE (aa)-[r:RESOURCE]->(cluster)
94
- ON CREATE SET r.firstseen = timestamp()
95
- SET r.lastupdated = $aws_update_tag
96
- """
97
- for cluster in data:
98
- # TODO: track read replicas
99
- # TODO: track associated roles
100
- # TODO: track security groups
101
- # TODO: track subnet groups
102
-
103
- cluster["EarliestRestorableTime"] = dict_value_to_str(
104
- cluster,
105
- "EarliestRestorableTime",
106
- )
107
- cluster["LatestRestorableTime"] = dict_value_to_str(
108
- cluster,
109
- "LatestRestorableTime",
110
- )
111
- cluster["ClusterCreateTime"] = dict_value_to_str(cluster, "ClusterCreateTime")
112
- cluster["EarliestBacktrackTime"] = dict_value_to_str(
113
- cluster,
114
- "EarliestBacktrackTime",
115
- )
116
- cluster["ScalingConfigurationInfoMinCapacity"] = cluster.get(
117
- "ScalingConfigurationInfo",
118
- {},
119
- ).get("MinCapacity")
120
- cluster["ScalingConfigurationInfoMaxCapacity"] = cluster.get(
121
- "ScalingConfigurationInfo",
122
- {},
123
- ).get("MaxCapacity")
124
- cluster["ScalingConfigurationInfoAutoPause"] = cluster.get(
125
- "ScalingConfigurationInfo",
126
- {},
127
- ).get("AutoPause")
128
-
129
- neo4j_session.run(
130
- ingest_rds_cluster,
131
- Clusters=data,
56
+ load(
57
+ neo4j_session,
58
+ RDSClusterSchema(),
59
+ data,
60
+ lastupdated=aws_update_tag,
132
61
  Region=region,
133
- AWS_ACCOUNT_ID=current_aws_account_id,
134
- aws_update_tag=aws_update_tag,
62
+ AWS_ID=current_aws_account_id,
135
63
  )
136
64
 
137
65
 
@@ -156,101 +84,22 @@ def get_rds_instance_data(
156
84
  @timeit
157
85
  def load_rds_instances(
158
86
  neo4j_session: neo4j.Session,
159
- data: Dict,
87
+ data: List[Dict],
160
88
  region: str,
161
89
  current_aws_account_id: str,
162
90
  aws_update_tag: int,
163
91
  ) -> None:
164
92
  """
165
- Ingest the RDS instances to neo4j and link them to necessary nodes.
166
- """
167
- ingest_rds_instance = """
168
- UNWIND $Instances as rds_instance
169
- MERGE (rds:RDSInstance{id: rds_instance.DBInstanceArn})
170
- ON CREATE SET rds.firstseen = timestamp(),
171
- rds.arn = rds_instance.DBInstanceArn
172
- SET rds.db_instance_identifier = rds_instance.DBInstanceIdentifier,
173
- rds.db_instance_class = rds_instance.DBInstanceClass,
174
- rds.engine = rds_instance.Engine,
175
- rds.master_username = rds_instance.MasterUsername,
176
- rds.db_name = rds_instance.DBName,
177
- rds.instance_create_time = rds_instance.InstanceCreateTime,
178
- rds.availability_zone = rds_instance.AvailabilityZone,
179
- rds.multi_az = rds_instance.MultiAZ,
180
- rds.engine_version = rds_instance.EngineVersion,
181
- rds.publicly_accessible = rds_instance.PubliclyAccessible,
182
- rds.db_cluster_identifier = rds_instance.DBClusterIdentifier,
183
- rds.storage_encrypted = rds_instance.StorageEncrypted,
184
- rds.kms_key_id = rds_instance.KmsKeyId,
185
- rds.dbi_resource_id = rds_instance.DbiResourceId,
186
- rds.ca_certificate_identifier = rds_instance.CACertificateIdentifier,
187
- rds.enhanced_monitoring_resource_arn = rds_instance.EnhancedMonitoringResourceArn,
188
- rds.monitoring_role_arn = rds_instance.MonitoringRoleArn,
189
- rds.performance_insights_enabled = rds_instance.PerformanceInsightsEnabled,
190
- rds.performance_insights_kms_key_id = rds_instance.PerformanceInsightsKMSKeyId,
191
- rds.region = rds_instance.Region,
192
- rds.deletion_protection = rds_instance.DeletionProtection,
193
- rds.preferred_backup_window = rds_instance.PreferredBackupWindow,
194
- rds.latest_restorable_time = rds_instance.LatestRestorableTime,
195
- rds.preferred_maintenance_window = rds_instance.PreferredMaintenanceWindow,
196
- rds.backup_retention_period = rds_instance.BackupRetentionPeriod,
197
- rds.endpoint_address = rds_instance.EndpointAddress,
198
- rds.endpoint_hostedzoneid = rds_instance.EndpointHostedZoneId,
199
- rds.endpoint_port = rds_instance.EndpointPort,
200
- rds.iam_database_authentication_enabled = rds_instance.IAMDatabaseAuthenticationEnabled,
201
- rds.auto_minor_version_upgrade = rds_instance.AutoMinorVersionUpgrade,
202
- rds.lastupdated = $aws_update_tag
203
- WITH rds
204
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
205
- MERGE (aa)-[r:RESOURCE]->(rds)
206
- ON CREATE SET r.firstseen = timestamp()
207
- SET r.lastupdated = $aws_update_tag
208
- """
209
- read_replicas = []
210
- clusters = []
211
- secgroups = []
212
- subnets = []
213
-
214
- for rds in data:
215
- ep = _validate_rds_endpoint(rds)
216
-
217
- # Keep track of instances that are read replicas so we can attach them to their source instances later
218
- if rds.get("ReadReplicaSourceDBInstanceIdentifier"):
219
- read_replicas.append(rds)
220
-
221
- # Keep track of instances that are cluster members so we can attach them to their source clusters later
222
- if rds.get("DBClusterIdentifier"):
223
- clusters.append(rds)
224
-
225
- if rds.get("VpcSecurityGroups"):
226
- secgroups.append(rds)
227
-
228
- if rds.get("DBSubnetGroup"):
229
- subnets.append(rds)
230
-
231
- rds["InstanceCreateTime"] = dict_value_to_str(rds, "InstanceCreateTime")
232
- rds["LatestRestorableTime"] = dict_value_to_str(rds, "LatestRestorableTime")
233
- rds["EndpointAddress"] = ep.get("Address")
234
- rds["EndpointHostedZoneId"] = ep.get("HostedZoneId")
235
- rds["EndpointPort"] = ep.get("Port")
236
-
237
- neo4j_session.run(
238
- ingest_rds_instance,
239
- Instances=data,
240
- Region=region,
241
- AWS_ACCOUNT_ID=current_aws_account_id,
242
- aws_update_tag=aws_update_tag,
243
- )
244
- _attach_ec2_security_groups(neo4j_session, secgroups, aws_update_tag)
245
- _attach_ec2_subnet_groups(
93
+ Ingest the RDS instances to Neo4j and link them to necessary nodes.
94
+ """
95
+ load(
246
96
  neo4j_session,
247
- subnets,
248
- region,
249
- current_aws_account_id,
250
- aws_update_tag,
97
+ RDSInstanceSchema(),
98
+ data,
99
+ lastupdated=aws_update_tag,
100
+ Region=region,
101
+ AWS_ID=current_aws_account_id,
251
102
  )
252
- _attach_read_replicas(neo4j_session, read_replicas, aws_update_tag)
253
- _attach_clusters(neo4j_session, clusters, aws_update_tag)
254
103
 
255
104
 
256
105
  @timeit
@@ -270,7 +119,7 @@ def get_rds_snapshot_data(
270
119
  @timeit
271
120
  def load_rds_snapshots(
272
121
  neo4j_session: neo4j.Session,
273
- data: Dict,
122
+ data: List[Dict],
274
123
  region: str,
275
124
  current_aws_account_id: str,
276
125
  aws_update_tag: int,
@@ -278,266 +127,45 @@ def load_rds_snapshots(
278
127
  """
279
128
  Ingest the RDS snapshots to neo4j and link them to necessary nodes.
280
129
  """
281
- ingest_rds_snapshot = """
282
- UNWIND $Snapshots as rds_snapshot
283
- MERGE (snapshot:RDSSnapshot{id: rds_snapshot.DBSnapshotArn})
284
- ON CREATE SET snapshot.firstseen = timestamp(),
285
- snapshot.arn = rds_snapshot.DBSnapshotArn
286
- SET snapshot.db_snapshot_identifier = rds_snapshot.DBSnapshotIdentifier,
287
- snapshot.db_instance_identifier = rds_snapshot.DBInstanceIdentifier,
288
- snapshot.snapshot_create_time = rds_snapshot.SnapshotCreateTime,
289
- snapshot.engine = rds_snapshot.Engine,
290
- snapshot.allocated_storage = rds_snapshot.AllocatedStorage,
291
- snapshot.status = rds_snapshot.Status,
292
- snapshot.port = rds_snapshot.Port,
293
- snapshot.availability_zone = rds_snapshot.AvailabilityZone,
294
- snapshot.vpc_id = rds_snapshot.VpcId,
295
- snapshot.instance_create_time = rds_snapshot.InstanceCreateTime,
296
- snapshot.master_username = rds_snapshot.MasterUsername,
297
- snapshot.engine_version = rds_snapshot.EngineVersion,
298
- snapshot.license_model = rds_snapshot.LicenseModel,
299
- snapshot.snapshot_type = rds_snapshot.SnapshotType,
300
- snapshot.iops = rds_snapshot.Iops,
301
- snapshot.option_group_name = rds_snapshot.OptionGroupName,
302
- snapshot.percent_progress = rds_snapshot.PercentProgress,
303
- snapshot.source_region = rds_snapshot.SourceRegion,
304
- snapshot.source_db_snapshot_identifier = rds_snapshot.SourceDBSnapshotIdentifier,
305
- snapshot.storage_type = rds_snapshot.StorageType,
306
- snapshot.tde_credential_arn = rds_snapshot.TdeCredentialArn,
307
- snapshot.encrypted = rds_snapshot.Encrypted,
308
- snapshot.kms_key_id = rds_snapshot.KmsKeyId,
309
- snapshot.timezone = rds_snapshot.Timezone,
310
- snapshot.iam_database_authentication_enabled = rds_snapshot.IAMDatabaseAuthenticationEnabled,
311
- snapshot.processor_features = rds_snapshot.ProcessorFeatures,
312
- snapshot.dbi_resource_id = rds_snapshot.DbiResourceId,
313
- snapshot.original_snapshot_create_time = rds_snapshot.OriginalSnapshotCreateTime,
314
- snapshot.snapshot_database_time = rds_snapshot.SnapshotDatabaseTime,
315
- snapshot.snapshot_target = rds_snapshot.SnapshotTarget,
316
- snapshot.storage_throughput = rds_snapshot.StorageThroughput,
317
- snapshot.region = $Region,
318
- snapshot.lastupdated = $aws_update_tag
319
- WITH snapshot
320
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
321
- MERGE (aa)-[r:RESOURCE]->(snapshot)
322
- ON CREATE SET r.firstseen = timestamp()
323
- SET r.lastupdated = $aws_update_tag
324
- """
325
-
326
- snapshots = transform_rds_snapshots(data)
327
-
328
- neo4j_session.run(
329
- ingest_rds_snapshot,
330
- Snapshots=snapshots,
130
+ load(
131
+ neo4j_session,
132
+ RDSSnapshotSchema(),
133
+ data,
134
+ lastupdated=aws_update_tag,
331
135
  Region=region,
332
- AWS_ACCOUNT_ID=current_aws_account_id,
333
- aws_update_tag=aws_update_tag,
334
- )
335
- _attach_snapshots(neo4j_session, snapshots, aws_update_tag)
336
-
337
-
338
- @timeit
339
- def _attach_snapshots(
340
- neo4j_session: neo4j.Session,
341
- snapshots: List[Dict],
342
- aws_update_tag: int,
343
- ) -> None:
344
- """
345
- Attach snapshots to their source instance
346
- """
347
- attach_member_to_source = """
348
- UNWIND $Snapshots as snapshot
349
- MATCH (rdsInstance:RDSInstance {db_instance_identifier: snapshot.DBInstanceIdentifier}),
350
- (rdsSnapshot:RDSSnapshot {arn: snapshot.DBSnapshotArn})
351
- MERGE (rdsInstance)-[r:IS_SNAPSHOT_SOURCE]->(rdsSnapshot)
352
- ON CREATE SET r.firstseen = timestamp()
353
- SET r.lastupdated = $aws_update_tag
354
- """
355
- neo4j_session.run(
356
- attach_member_to_source,
357
- Snapshots=snapshots,
358
- aws_update_tag=aws_update_tag,
136
+ AWS_ID=current_aws_account_id,
359
137
  )
360
138
 
361
139
 
362
140
  @timeit
363
- def _attach_ec2_subnet_groups(
364
- neo4j_session: neo4j.Session,
365
- instances: List[Dict],
141
+ @aws_handle_regions
142
+ def get_rds_event_subscription_data(
143
+ boto3_session: boto3.session.Session,
366
144
  region: str,
367
- current_aws_account_id: str,
368
- aws_update_tag: int,
369
- ) -> None:
370
- """
371
- Attach RDS instances to their EC2 subnet groups
372
- """
373
- attach_rds_to_subnet_group = """
374
- UNWIND $SubnetGroups as rds_sng
375
- MERGE (sng:DBSubnetGroup{id: rds_sng.arn})
376
- ON CREATE SET sng.firstseen = timestamp()
377
- SET sng.name = rds_sng.DBSubnetGroupName,
378
- sng.vpc_id = rds_sng.VpcId,
379
- sng.description = rds_sng.DBSubnetGroupDescription,
380
- sng.status = rds_sng.DBSubnetGroupStatus,
381
- sng.lastupdated = $aws_update_tag
382
- WITH sng, rds_sng.instance_arn AS instance_arn
383
- MATCH(rds:RDSInstance{id: instance_arn})
384
- MERGE(rds)-[r:MEMBER_OF_DB_SUBNET_GROUP]->(sng)
385
- ON CREATE SET r.firstseen = timestamp()
386
- SET r.lastupdated = $aws_update_tag
387
- """
388
- db_sngs = []
389
- for instance in instances:
390
- db_sng = instance["DBSubnetGroup"]
391
- db_sng["arn"] = _get_db_subnet_group_arn(
392
- region,
393
- current_aws_account_id,
394
- db_sng["DBSubnetGroupName"],
395
- )
396
- db_sng["instance_arn"] = instance["DBInstanceArn"]
397
- db_sngs.append(db_sng)
398
- neo4j_session.run(
399
- attach_rds_to_subnet_group,
400
- SubnetGroups=db_sngs,
401
- aws_update_tag=aws_update_tag,
402
- )
403
- _attach_ec2_subnets_to_subnetgroup(
404
- neo4j_session,
405
- db_sngs,
406
- region,
407
- current_aws_account_id,
408
- aws_update_tag,
409
- )
145
+ ) -> List[Dict[str, Any]]:
146
+ client = boto3_session.client("rds", region_name=region)
147
+ paginator = client.get_paginator("describe_event_subscriptions")
148
+ subscriptions = []
149
+ for page in paginator.paginate():
150
+ subscriptions.extend(page["EventSubscriptionsList"])
151
+ return subscriptions
410
152
 
411
153
 
412
154
  @timeit
413
- def _attach_ec2_subnets_to_subnetgroup(
155
+ def load_rds_event_subscriptions(
414
156
  neo4j_session: neo4j.Session,
415
- db_subnet_groups: List[Dict],
157
+ data: List[Dict],
416
158
  region: str,
417
159
  current_aws_account_id: str,
418
160
  aws_update_tag: int,
419
161
  ) -> None:
420
- """
421
- Attach EC2Subnets to their DB Subnet Group.
422
-
423
- From https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_VPC.WorkingWithRDSInstanceinaVPC.html:
424
- `Each DB subnet group should have subnets in at least two Availability Zones in a given region. When creating a DB
425
- instance in a VPC, you must select a DB subnet group. Amazon RDS uses that DB subnet group and your preferred
426
- Availability Zone to select a subnet and an IP address within that subnet to associate with your DB instance.`
427
- """
428
- attach_subnets_to_sng = """
429
- UNWIND $Subnets as rds_sn
430
- MATCH(sng:DBSubnetGroup{id: rds_sn.sng_arn})
431
- MERGE(subnet:EC2Subnet{subnetid: rds_sn.sn_id})
432
- ON CREATE SET subnet.firstseen = timestamp()
433
- MERGE(sng)-[r:RESOURCE]->(subnet)
434
- ON CREATE SET r.firstseen = timestamp()
435
- SET r.lastupdated = $aws_update_tag,
436
- subnet.availability_zone = rds_sn.az,
437
- subnet.lastupdated = $aws_update_tag
438
- """
439
- subnets = []
440
- for subnet_group in db_subnet_groups:
441
- for subnet in subnet_group.get("Subnets", []):
442
- sn_id = subnet.get("SubnetIdentifier")
443
- sng_arn = _get_db_subnet_group_arn(
444
- region,
445
- current_aws_account_id,
446
- subnet_group["DBSubnetGroupName"],
447
- )
448
- az = subnet.get("SubnetAvailabilityZone", {}).get("Name")
449
- subnets.append(
450
- {
451
- "sn_id": sn_id,
452
- "sng_arn": sng_arn,
453
- "az": az,
454
- },
455
- )
456
- neo4j_session.run(
457
- attach_subnets_to_sng,
458
- Subnets=subnets,
459
- aws_update_tag=aws_update_tag,
460
- )
461
-
462
-
463
- @timeit
464
- def _attach_ec2_security_groups(
465
- neo4j_session: neo4j.Session,
466
- instances: List[Dict],
467
- aws_update_tag: int,
468
- ) -> None:
469
- """
470
- Attach an RDS instance to its EC2SecurityGroups
471
- """
472
- attach_rds_to_group = """
473
- UNWIND $Groups as rds_sg
474
- MATCH (rds:RDSInstance{id: rds_sg.arn})
475
- MERGE (sg:EC2SecurityGroup{id: rds_sg.group_id})
476
- MERGE (rds)-[m:MEMBER_OF_EC2_SECURITY_GROUP]->(sg)
477
- ON CREATE SET m.firstseen = timestamp()
478
- SET m.lastupdated = $aws_update_tag
479
- """
480
- groups = []
481
- for instance in instances:
482
- for group in instance["VpcSecurityGroups"]:
483
- groups.append(
484
- {
485
- "arn": instance["DBInstanceArn"],
486
- "group_id": group["VpcSecurityGroupId"],
487
- },
488
- )
489
- neo4j_session.run(
490
- attach_rds_to_group,
491
- Groups=groups,
492
- aws_update_tag=aws_update_tag,
493
- )
494
-
495
-
496
- @timeit
497
- def _attach_read_replicas(
498
- neo4j_session: neo4j.Session,
499
- read_replicas: List[Dict],
500
- aws_update_tag: int,
501
- ) -> None:
502
- """
503
- Attach read replicas to their source instances
504
- """
505
- attach_replica_to_source = """
506
- UNWIND $Replicas as rds_replica
507
- MATCH (replica:RDSInstance{id: rds_replica.DBInstanceArn}),
508
- (source:RDSInstance{db_instance_identifier: rds_replica.ReadReplicaSourceDBInstanceIdentifier})
509
- MERGE (replica)-[r:IS_READ_REPLICA_OF]->(source)
510
- ON CREATE SET r.firstseen = timestamp()
511
- SET r.lastupdated = $aws_update_tag
512
- """
513
- neo4j_session.run(
514
- attach_replica_to_source,
515
- Replicas=read_replicas,
516
- aws_update_tag=aws_update_tag,
517
- )
518
-
519
-
520
- @timeit
521
- def _attach_clusters(
522
- neo4j_session: neo4j.Session,
523
- cluster_members: List[Dict],
524
- aws_update_tag: int,
525
- ) -> None:
526
- """
527
- Attach cluster members to their source clusters
528
- """
529
- attach_member_to_source = """
530
- UNWIND $Members as rds_cluster_member
531
- MATCH (member:RDSInstance{id: rds_cluster_member.DBInstanceArn}),
532
- (source:RDSCluster{db_cluster_identifier: rds_cluster_member.DBClusterIdentifier})
533
- MERGE (member)-[r:IS_CLUSTER_MEMBER_OF]->(source)
534
- ON CREATE SET r.firstseen = timestamp()
535
- SET r.lastupdated = $aws_update_tag
536
- """
537
- neo4j_session.run(
538
- attach_member_to_source,
539
- Members=cluster_members,
540
- aws_update_tag=aws_update_tag,
162
+ load(
163
+ neo4j_session,
164
+ RDSEventSubscriptionSchema(),
165
+ data,
166
+ lastupdated=aws_update_tag,
167
+ Region=region,
168
+ AWS_ID=current_aws_account_id,
541
169
  )
542
170
 
543
171
 
@@ -571,8 +199,48 @@ def _get_db_subnet_group_arn(
571
199
  )
572
200
 
573
201
 
574
- @timeit
575
- def transform_rds_snapshots(data: Dict) -> List[Dict]:
202
+ def transform_rds_clusters(data: List[Dict]) -> List[Dict]:
203
+ """
204
+ Transform RDS cluster data for Neo4j ingestion
205
+ """
206
+ clusters = []
207
+
208
+ for cluster in data:
209
+ # Copy the cluster data
210
+ transformed_cluster = cluster.copy()
211
+
212
+ # Convert datetime fields
213
+ transformed_cluster["EarliestRestorableTime"] = dict_value_to_str(
214
+ cluster, "EarliestRestorableTime"
215
+ )
216
+ transformed_cluster["LatestRestorableTime"] = dict_value_to_str(
217
+ cluster, "LatestRestorableTime"
218
+ )
219
+ transformed_cluster["ClusterCreateTime"] = dict_value_to_str(
220
+ cluster, "ClusterCreateTime"
221
+ )
222
+ transformed_cluster["EarliestBacktrackTime"] = dict_value_to_str(
223
+ cluster, "EarliestBacktrackTime"
224
+ )
225
+
226
+ # Extract scaling configuration info
227
+ scaling_config = cluster.get("ScalingConfigurationInfo", {})
228
+ transformed_cluster["ScalingConfigurationInfoMinCapacity"] = scaling_config.get(
229
+ "MinCapacity"
230
+ )
231
+ transformed_cluster["ScalingConfigurationInfoMaxCapacity"] = scaling_config.get(
232
+ "MaxCapacity"
233
+ )
234
+ transformed_cluster["ScalingConfigurationInfoAutoPause"] = scaling_config.get(
235
+ "AutoPause"
236
+ )
237
+
238
+ clusters.append(transformed_cluster)
239
+
240
+ return clusters
241
+
242
+
243
+ def transform_rds_snapshots(data: List[Dict]) -> List[Dict]:
576
244
  snapshots = []
577
245
 
578
246
  for snapshot in data:
@@ -599,18 +267,173 @@ def transform_rds_snapshots(data: Dict) -> List[Dict]:
599
267
  return snapshots
600
268
 
601
269
 
270
+ def transform_rds_instances(
271
+ data: List[Dict], region: str, current_aws_account_id: str
272
+ ) -> List[Dict]:
273
+ """
274
+ Transform RDS instance data for Neo4j ingestion
275
+ """
276
+ instances = []
277
+
278
+ for instance in data:
279
+ # Copy the instance data
280
+ transformed_instance = instance.copy()
281
+
282
+ # Extract security group IDs for the relationship
283
+ security_group_ids = []
284
+ if instance.get("VpcSecurityGroups"):
285
+ for group in instance["VpcSecurityGroups"]:
286
+ security_group_ids.append(group["VpcSecurityGroupId"])
287
+
288
+ transformed_instance["security_group_ids"] = security_group_ids
289
+
290
+ # Handle read replica source identifier for the relationship
291
+ if instance.get("ReadReplicaSourceDBInstanceIdentifier"):
292
+ transformed_instance["read_replica_source_identifier"] = instance[
293
+ "ReadReplicaSourceDBInstanceIdentifier"
294
+ ]
295
+
296
+ # Handle cluster identifier for the relationship
297
+ if instance.get("DBClusterIdentifier"):
298
+ transformed_instance["db_cluster_identifier"] = instance[
299
+ "DBClusterIdentifier"
300
+ ]
301
+
302
+ # Handle subnet group data for the relationship
303
+ if instance.get("DBSubnetGroup"):
304
+ db_subnet_group = instance["DBSubnetGroup"]
305
+ transformed_instance["db_subnet_group_arn"] = _get_db_subnet_group_arn(
306
+ region, current_aws_account_id, db_subnet_group["DBSubnetGroupName"]
307
+ )
308
+
309
+ # Handle endpoint data
310
+ ep = _validate_rds_endpoint(instance)
311
+ transformed_instance["EndpointAddress"] = ep.get("Address")
312
+ transformed_instance["EndpointHostedZoneId"] = ep.get("HostedZoneId")
313
+ transformed_instance["EndpointPort"] = ep.get("Port")
314
+
315
+ # Convert datetime fields
316
+ transformed_instance["InstanceCreateTime"] = dict_value_to_str(
317
+ instance, "InstanceCreateTime"
318
+ )
319
+ transformed_instance["LatestRestorableTime"] = dict_value_to_str(
320
+ instance, "LatestRestorableTime"
321
+ )
322
+
323
+ instances.append(transformed_instance)
324
+
325
+ return instances
326
+
327
+
328
+ def transform_rds_event_subscriptions(data: List[Dict]) -> List[Dict]:
329
+ subscriptions = []
330
+ for subscription in data:
331
+ transformed = {
332
+ "CustSubscriptionId": subscription.get("CustSubscriptionId"),
333
+ "EventSubscriptionArn": subscription.get("EventSubscriptionArn"),
334
+ "CustomerAwsId": subscription.get("CustomerAwsId"),
335
+ "SnsTopicArn": subscription.get("SnsTopicArn"),
336
+ "SourceType": subscription.get("SourceType"),
337
+ "Status": subscription.get("Status"),
338
+ "Enabled": subscription.get("Enabled"),
339
+ "SubscriptionCreationTime": dict_value_to_str(
340
+ subscription, "SubscriptionCreationTime"
341
+ ),
342
+ "event_categories": subscription.get("EventCategoriesList") or None,
343
+ "source_ids": subscription.get("SourceIdsList") or None,
344
+ "lastupdated": None, # This will be set by the loader
345
+ }
346
+ subscriptions.append(transformed)
347
+ return subscriptions
348
+
349
+
350
+ def transform_rds_subnet_groups(
351
+ data: List[Dict], region: str, current_aws_account_id: str
352
+ ) -> List[Dict]:
353
+ """
354
+ Transform RDS subnet group data for Neo4j ingestion
355
+ """
356
+ subnet_groups_dict = {}
357
+
358
+ for instance in data:
359
+ if instance.get("DBSubnetGroup"):
360
+ db_subnet_group = instance["DBSubnetGroup"]
361
+ db_subnet_group_arn = _get_db_subnet_group_arn(
362
+ region, current_aws_account_id, db_subnet_group["DBSubnetGroupName"]
363
+ )
364
+
365
+ # If this subnet group doesn't exist yet, create it
366
+ if db_subnet_group_arn not in subnet_groups_dict:
367
+ subnet_groups_dict[db_subnet_group_arn] = {
368
+ "id": db_subnet_group_arn,
369
+ "name": db_subnet_group["DBSubnetGroupName"],
370
+ "vpc_id": db_subnet_group["VpcId"],
371
+ "description": db_subnet_group["DBSubnetGroupDescription"],
372
+ "status": db_subnet_group["SubnetGroupStatus"],
373
+ "db_instance_identifier": [],
374
+ "subnet_ids": [],
375
+ }
376
+
377
+ # Add this RDS instance to the subnet group's list
378
+ if instance.get("DBInstanceIdentifier"):
379
+ subnet_groups_dict[db_subnet_group_arn][
380
+ "db_instance_identifier"
381
+ ].append(instance["DBInstanceIdentifier"])
382
+
383
+ # Add subnet IDs from the DB subnet group
384
+ for subnet in db_subnet_group.get("Subnets", []):
385
+ subnet_id = subnet.get("SubnetIdentifier")
386
+ if (
387
+ subnet_id
388
+ and subnet_id
389
+ not in subnet_groups_dict[db_subnet_group_arn]["subnet_ids"]
390
+ ):
391
+ subnet_groups_dict[db_subnet_group_arn]["subnet_ids"].append(
392
+ subnet_id
393
+ )
394
+
395
+ return list(subnet_groups_dict.values())
396
+
397
+
398
+ @timeit
399
+ def load_rds_subnet_groups(
400
+ neo4j_session: neo4j.Session,
401
+ data: List[Dict],
402
+ region: str,
403
+ current_aws_account_id: str,
404
+ aws_update_tag: int,
405
+ ) -> None:
406
+ """
407
+ Ingest the RDS subnet groups to Neo4j and link them to necessary nodes.
408
+ """
409
+ load(
410
+ neo4j_session,
411
+ DBSubnetGroupSchema(),
412
+ data,
413
+ lastupdated=aws_update_tag,
414
+ Region=region,
415
+ AWS_ID=current_aws_account_id,
416
+ )
417
+
418
+
602
419
  @timeit
603
420
  def cleanup_rds_instances_and_db_subnet_groups(
604
421
  neo4j_session: neo4j.Session,
605
422
  common_job_parameters: Dict,
606
423
  ) -> None:
607
424
  """
608
- Remove RDS graph nodes and DBSubnetGroups that were created from other ingestion runs
425
+ Remove RDS instances and DB subnet groups that weren't updated in this sync run
609
426
  """
610
- run_cleanup_job(
611
- "aws_import_rds_instances_cleanup.json",
612
- neo4j_session,
613
- common_job_parameters,
427
+ logger.debug("Running RDS instances and DB subnet groups cleanup job")
428
+
429
+ # Clean up RDS instances
430
+ GraphJob.from_node_schema(RDSInstanceSchema(), common_job_parameters).run(
431
+ neo4j_session
432
+ )
433
+
434
+ # Clean up DB subnet groups
435
+ GraphJob.from_node_schema(DBSubnetGroupSchema(), common_job_parameters).run(
436
+ neo4j_session
614
437
  )
615
438
 
616
439
 
@@ -620,12 +443,12 @@ def cleanup_rds_clusters(
620
443
  common_job_parameters: Dict,
621
444
  ) -> None:
622
445
  """
623
- Remove RDS cluster graph nodes
446
+ Remove RDS clusters that weren't updated in this sync run
624
447
  """
625
- run_cleanup_job(
626
- "aws_import_rds_clusters_cleanup.json",
627
- neo4j_session,
628
- common_job_parameters,
448
+ logger.debug("Running RDS clusters cleanup job")
449
+
450
+ GraphJob.from_node_schema(RDSClusterSchema(), common_job_parameters).run(
451
+ neo4j_session
629
452
  )
630
453
 
631
454
 
@@ -635,12 +458,26 @@ def cleanup_rds_snapshots(
635
458
  common_job_parameters: Dict,
636
459
  ) -> None:
637
460
  """
638
- Remove RDS snapshots graph nodes
461
+ Remove RDS snapshots that weren't updated in this sync run
639
462
  """
640
- run_cleanup_job(
641
- "aws_import_rds_snapshots_cleanup.json",
642
- neo4j_session,
643
- common_job_parameters,
463
+ logger.debug("Running RDS snapshots cleanup job")
464
+
465
+ GraphJob.from_node_schema(RDSSnapshotSchema(), common_job_parameters).run(
466
+ neo4j_session
467
+ )
468
+
469
+
470
+ @timeit
471
+ def cleanup_rds_event_subscriptions(
472
+ neo4j_session: neo4j.Session,
473
+ common_job_parameters: Dict,
474
+ ) -> None:
475
+ """
476
+ Remove RDS event subscriptions that weren't updated in this sync run
477
+ """
478
+ logger.debug("Running RDS event subscriptions cleanup job")
479
+ GraphJob.from_node_schema(RDSEventSubscriptionSchema(), common_job_parameters).run(
480
+ neo4j_session
644
481
  )
645
482
 
646
483
 
@@ -654,16 +491,19 @@ def sync_rds_clusters(
654
491
  common_job_parameters: Dict,
655
492
  ) -> None:
656
493
  """
657
- Grab RDS instance data from AWS, ingest to neo4j, and run the cleanup job.
494
+ Grab RDS cluster data from AWS, ingest to neo4j, and run the cleanup job.
658
495
  """
659
496
  for region in regions:
660
497
  logger.info(
661
- "Syncing RDS for region '%s' in account '%s'.",
498
+ "Syncing RDS clusters for region '%s' in account '%s'.",
662
499
  region,
663
500
  current_aws_account_id,
664
501
  )
665
502
  data = get_rds_cluster_data(boto3_session, region)
666
- load_rds_clusters(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
503
+ transformed_data = transform_rds_clusters(data)
504
+ load_rds_clusters(
505
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
506
+ )
667
507
  cleanup_rds_clusters(neo4j_session, common_job_parameters)
668
508
 
669
509
 
@@ -681,12 +521,23 @@ def sync_rds_instances(
681
521
  """
682
522
  for region in regions:
683
523
  logger.info(
684
- "Syncing RDS for region '%s' in account '%s'.",
524
+ "Syncing RDS instances for region '%s' in account '%s'.",
685
525
  region,
686
526
  current_aws_account_id,
687
527
  )
688
528
  data = get_rds_instance_data(boto3_session, region)
689
- load_rds_instances(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
529
+ transformed_data = transform_rds_instances(data, region, current_aws_account_id)
530
+ load_rds_instances(
531
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
532
+ )
533
+
534
+ # Load subnet groups from RDS instances
535
+ subnet_group_data = transform_rds_subnet_groups(
536
+ data, region, current_aws_account_id
537
+ )
538
+ load_rds_subnet_groups(
539
+ neo4j_session, subnet_group_data, region, current_aws_account_id, update_tag
540
+ )
690
541
  cleanup_rds_instances_and_db_subnet_groups(neo4j_session, common_job_parameters)
691
542
 
692
543
 
@@ -704,15 +555,44 @@ def sync_rds_snapshots(
704
555
  """
705
556
  for region in regions:
706
557
  logger.info(
707
- "Syncing RDS for region '%s' in account '%s'.",
558
+ "Syncing RDS snapshots for region '%s' in account '%s'.",
708
559
  region,
709
560
  current_aws_account_id,
710
561
  )
711
562
  data = get_rds_snapshot_data(boto3_session, region)
712
- load_rds_snapshots(neo4j_session, data, region, current_aws_account_id, update_tag) # type: ignore
563
+ transformed_data = transform_rds_snapshots(data)
564
+ load_rds_snapshots(
565
+ neo4j_session, transformed_data, region, current_aws_account_id, update_tag
566
+ )
713
567
  cleanup_rds_snapshots(neo4j_session, common_job_parameters)
714
568
 
715
569
 
570
+ @timeit
571
+ def sync_rds_event_subscriptions(
572
+ neo4j_session: neo4j.Session,
573
+ boto3_session: boto3.session.Session,
574
+ regions: List[str],
575
+ current_aws_account_id: str,
576
+ update_tag: int,
577
+ common_job_parameters: Dict,
578
+ ) -> None:
579
+ """
580
+ Grab RDS event subscription data from AWS, ingest to neo4j, and run the cleanup job.
581
+ """
582
+ for region in regions:
583
+ logger.info(
584
+ "Syncing RDS event subscriptions for region '%s' in account '%s'.",
585
+ region,
586
+ current_aws_account_id,
587
+ )
588
+ data = get_rds_event_subscription_data(boto3_session, region)
589
+ transformed = transform_rds_event_subscriptions(data)
590
+ load_rds_event_subscriptions(
591
+ neo4j_session, transformed, region, current_aws_account_id, update_tag
592
+ )
593
+ cleanup_rds_event_subscriptions(neo4j_session, common_job_parameters)
594
+
595
+
716
596
  @timeit
717
597
  def sync(
718
598
  neo4j_session: neo4j.Session,
@@ -746,6 +626,16 @@ def sync(
746
626
  update_tag,
747
627
  common_job_parameters,
748
628
  )
629
+
630
+ sync_rds_event_subscriptions(
631
+ neo4j_session,
632
+ boto3_session,
633
+ regions,
634
+ current_aws_account_id,
635
+ update_tag,
636
+ common_job_parameters,
637
+ )
638
+
749
639
  merge_module_sync_metadata(
750
640
  neo4j_session,
751
641
  group_type="AWSAccount",