cartography 0.96.0rc2__py3-none-any.whl → 0.96.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (29) hide show
  1. cartography/client/core/tx.py +1 -1
  2. cartography/config.py +2 -2
  3. cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
  4. cartography/intel/aws/apigateway.py +3 -3
  5. cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
  6. cartography/intel/aws/ec2/instances.py +2 -0
  7. cartography/intel/aws/ec2/network_acls.py +2 -1
  8. cartography/intel/aws/ec2/subnets.py +2 -0
  9. cartography/intel/aws/iam.py +4 -3
  10. cartography/intel/cve/__init__.py +1 -1
  11. cartography/intel/cve/feed.py +10 -7
  12. cartography/intel/github/repos.py +209 -27
  13. cartography/intel/github/teams.py +160 -38
  14. cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
  15. cartography/models/aws/ec2/launch_configurations.py +55 -0
  16. cartography/models/aws/ec2/network_acl_rules.py +1 -0
  17. cartography/models/aws/identitycenter/__init__.py +0 -0
  18. cartography/models/aws/identitycenter/awsidentitycenter.py +44 -0
  19. cartography/models/aws/identitycenter/awspermissionset.py +84 -0
  20. cartography/models/aws/identitycenter/awsssouser.py +68 -0
  21. cartography/models/github/teams.py +29 -0
  22. cartography/util.py +22 -0
  23. cartography-0.96.1.dist-info/METADATA +53 -0
  24. {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/RECORD +28 -22
  25. {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/WHEEL +1 -1
  26. cartography-0.96.0rc2.dist-info/METADATA +0 -53
  27. {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/LICENSE +0 -0
  28. {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/entry_points.txt +0 -0
  29. {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/top_level.txt +0 -0
@@ -122,7 +122,7 @@ def read_list_of_tuples_tx(tx: neo4j.Transaction, query: str, **kwargs) -> List[
122
122
  return [tuple(val) for val in values]
123
123
 
124
124
 
125
- def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Dict[str, Any]:
125
+ def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Any:
126
126
  """
127
127
  Runs the given Neo4j query in the given transaction object and returns the single dict result. This is intended to
128
128
  be run only with queries that return a single dict.
cartography/config.py CHANGED
@@ -157,7 +157,7 @@ class Config:
157
157
  pagerduty_request_timeout=None,
158
158
  nist_cve_url=None,
159
159
  cve_enabled=False,
160
- cve_api_key=None,
160
+ cve_api_key: str | None = None,
161
161
  crowdstrike_client_id=None,
162
162
  crowdstrike_client_secret=None,
163
163
  crowdstrike_api_url=None,
@@ -215,7 +215,7 @@ class Config:
215
215
  self.pagerduty_request_timeout = pagerduty_request_timeout
216
216
  self.nist_cve_url = nist_cve_url
217
217
  self.cve_enabled = cve_enabled
218
- self.cve_api_key = cve_api_key
218
+ self.cve_api_key: str | None = cve_api_key
219
219
  self.crowdstrike_client_id = crowdstrike_client_id
220
220
  self.crowdstrike_client_secret = crowdstrike_client_secret
221
221
  self.crowdstrike_api_url = crowdstrike_api_url
@@ -63,6 +63,31 @@
63
63
  "query": "MATCH (:GitHubUser)-[r:OUTSIDE_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
64
64
  "iterative": true,
65
65
  "iterationsize": 100
66
+ },
67
+ {
68
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_ADMIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
69
+ "iterative": true,
70
+ "iterationsize": 100
71
+ },
72
+ {
73
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_MAINTAIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
74
+ "iterative": true,
75
+ "iterationsize": 100
76
+ },
77
+ {
78
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_READ]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
79
+ "iterative": true,
80
+ "iterationsize": 100
81
+ },
82
+ {
83
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_TRIAGE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
84
+ "iterative": true,
85
+ "iterationsize": 100
86
+ },
87
+ {
88
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
89
+ "iterative": true,
90
+ "iterationsize": 100
66
91
  }],
67
92
  "name": "cleanup GitHub repos data"
68
93
  }
@@ -43,7 +43,7 @@ def get_rest_api_details(
43
43
  for api in rest_apis:
44
44
  stages = get_rest_api_stages(api, client)
45
45
  # clientcertificate id is given by the api stage
46
- certificate = get_rest_api_client_certificate(stages, client) # type: ignore
46
+ certificate = get_rest_api_client_certificate(stages, client)
47
47
  resources = get_rest_api_resources(api, client)
48
48
  policy = get_rest_api_policy(api, client)
49
49
  apis.append((api['id'], stages, certificate, resources, policy))
@@ -51,7 +51,7 @@ def get_rest_api_details(
51
51
 
52
52
 
53
53
  @timeit
54
- def get_rest_api_stages(api: Dict, client: botocore.client.BaseClient) -> List[Any]:
54
+ def get_rest_api_stages(api: Dict, client: botocore.client.BaseClient) -> Any:
55
55
  """
56
56
  Gets the REST API Stage Resources.
57
57
  """
@@ -99,7 +99,7 @@ def get_rest_api_resources(api: Dict, client: botocore.client.BaseClient) -> Lis
99
99
 
100
100
 
101
101
  @timeit
102
- def get_rest_api_policy(api: Dict, client: botocore.client.BaseClient) -> List[Any]:
102
+ def get_rest_api_policy(api: Dict, client: botocore.client.BaseClient) -> Any:
103
103
  """
104
104
  Gets the REST API policy. Returns policy string or None if no policy is present.
105
105
  """
@@ -1,24 +1,37 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
2
+ from collections import namedtuple
3
+ from typing import Any
4
4
 
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
8
  from .util import get_botocore_config
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ec2.auto_scaling_groups import AutoScalingGroupSchema
12
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
13
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2SubnetAutoScalingGroupSchema
14
+ from cartography.models.aws.ec2.launch_configurations import LaunchConfigurationSchema
9
15
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
16
  from cartography.util import timeit
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
20
+ AsgData = namedtuple(
21
+ 'AsgData', [
22
+ "group_list",
23
+ "instance_list",
24
+ "subnet_list",
25
+ ],
26
+ )
27
+
15
28
 
16
29
  @timeit
17
30
  @aws_handle_regions
18
- def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
31
+ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> list[dict]:
19
32
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
20
33
  paginator = client.get_paginator('describe_auto_scaling_groups')
21
- asgs: List[Dict] = []
34
+ asgs: list[dict] = []
22
35
  for page in paginator.paginate():
23
36
  asgs.extend(page['AutoScalingGroups'])
24
37
  return asgs
@@ -26,218 +39,167 @@ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: st
26
39
 
27
40
  @timeit
28
41
  @aws_handle_regions
29
- def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
42
+ def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> list[dict]:
30
43
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
31
44
  paginator = client.get_paginator('describe_launch_configurations')
32
- lcs: List[Dict] = []
45
+ lcs: list[dict] = []
33
46
  for page in paginator.paginate():
34
47
  lcs.extend(page['LaunchConfigurations'])
35
48
  return lcs
36
49
 
37
50
 
51
+ def transform_launch_configurations(configurations: list[dict[str, Any]]) -> list[dict[str, Any]]:
52
+ transformed_configurations = []
53
+ for config in configurations:
54
+ transformed_configurations.append({
55
+ 'AssociatePublicIpAddress': config.get('AssociatePublicIpAddress'),
56
+ 'LaunchConfigurationARN': config.get('LaunchConfigurationARN'),
57
+ 'LaunchConfigurationName': config.get('LaunchConfigurationName'),
58
+ 'CreatedTime': config.get('CreatedTime'),
59
+ 'ImageId': config.get('ImageId'),
60
+ 'KeyName': config.get('KeyName'),
61
+ 'SecurityGroups': config.get('SecurityGroups'),
62
+ 'InstanceType': config.get('InstanceType'),
63
+ 'KernelId': config.get('KernelId'),
64
+ 'RamdiskId': config.get('RamdiskId'),
65
+ 'InstanceMonitoring': config.get('InstanceMonitoring', {}).get('Enabled'),
66
+ 'SpotPrice': config.get('SpotPrice'),
67
+ 'IamInstanceProfile': config.get('IamInstanceProfile'),
68
+ 'EbsOptimized': config.get('EbsOptimized'),
69
+ 'PlacementTenancy': config.get('PlacementTenancy'),
70
+ })
71
+ return transformed_configurations
72
+
73
+
74
+ def transform_auto_scaling_groups(groups: list[dict[str, Any]]) -> AsgData:
75
+ transformed_groups = []
76
+ related_vpcs = []
77
+ related_instances = []
78
+ for group in groups:
79
+ transformed_groups.append({
80
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
81
+ 'CapacityRebalance': group.get('CapacityRebalance'),
82
+ 'CreatedTime': str(group.get('CreatedTime')),
83
+ 'DefaultCooldown': group.get('DefaultCooldown'),
84
+ 'DesiredCapacity': group.get('DesiredCapacity'),
85
+ 'HealthCheckGracePeriod': group.get('HealthCheckGracePeriod'),
86
+ 'HealthCheckType': group.get('HealthCheckType'),
87
+ 'LaunchConfigurationName': group.get('LaunchConfigurationName'),
88
+ 'LaunchTemplateName': group.get('LaunchTemplate', {}).get('LaunchTemplateName'),
89
+ 'LaunchTemplateId': group.get('LaunchTemplate', {}).get('LaunchTemplateId'),
90
+ 'LaunchTemplateVersion': group.get('LaunchTemplate', {}).get('Version'),
91
+ 'MaxInstanceLifetime': group.get('MaxInstanceLifetime'),
92
+ 'MaxSize': group.get('MaxSize'),
93
+ 'MinSize': group.get('MinSize'),
94
+ 'AutoScalingGroupName': group.get('AutoScalingGroupName'),
95
+ 'NewInstancesProtectedFromScaleIn': group.get('NewInstancesProtectedFromScaleIn'),
96
+ 'Status': group.get('Status'),
97
+ })
98
+
99
+ if group.get('VPCZoneIdentifier', None):
100
+ vpclist = group['VPCZoneIdentifier']
101
+ subnet_ids = vpclist.split(',') if ',' in vpclist else [vpclist]
102
+ subnets = []
103
+ for subnet_id in subnet_ids:
104
+ subnets.append({
105
+ 'VPCZoneIdentifier': subnet_id,
106
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
107
+ })
108
+ related_vpcs.extend(subnets)
109
+
110
+ for instance_data in group.get('Instances', []):
111
+ related_instances.append({
112
+ 'InstanceId': instance_data['InstanceId'],
113
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
114
+ })
115
+
116
+ return AsgData(
117
+ group_list=transformed_groups,
118
+ instance_list=related_instances,
119
+ subnet_list=related_vpcs,
120
+ )
121
+
122
+
38
123
  @timeit
39
124
  def load_launch_configurations(
40
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
125
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
41
126
  ) -> None:
42
- ingest_lc = """
43
- UNWIND $launch_configurations as lc
44
- MERGE (config:LaunchConfiguration{id: lc.LaunchConfigurationARN})
45
- ON CREATE SET config.firstseen = timestamp(), config.name = lc.LaunchConfigurationName,
46
- config.arn = lc.LaunchConfigurationARN,
47
- config.created_time = lc.CreatedTime
48
- SET config.lastupdated = $update_tag, config.image_id = lc.ImageId,
49
- config.key_name = lc.KeyName,
50
- config.security_groups = lc.SecurityGroups,
51
- config.instance_type = lc.InstanceType,
52
- config.kernel_id = lc.KernelId,
53
- config.ramdisk_id = lc.RamdiskId,
54
- config.instance_monitoring_enabled = lc.InstanceMonitoring.Enabled,
55
- config.spot_price = lc.SpotPrice,
56
- config.iam_instance_profile = lc.IamInstanceProfile,
57
- config.ebs_optimized = lc.EbsOptimized,
58
- config.associate_public_ip_address = lc.AssociatePublicIpAddress,
59
- config.placement_tenancy = lc.PlacementTenancy,
60
- config.region=$Region
61
- WITH config
62
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
63
- MERGE (aa)-[r:RESOURCE]->(config)
64
- ON CREATE SET r.firstseen = timestamp()
65
- SET r.lastupdated = $update_tag
66
- """
67
- for lc in data:
68
- lc['CreatedTime'] = str(int(lc['CreatedTime'].timestamp()))
69
-
70
- neo4j_session.run(
71
- ingest_lc,
72
- launch_configurations=data,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
127
+ load(
128
+ neo4j_session,
129
+ LaunchConfigurationSchema(),
130
+ data,
74
131
  Region=region,
75
- update_tag=update_tag,
132
+ AWS_ID=current_aws_account_id,
133
+ lastupdated=update_tag,
76
134
  )
77
135
 
78
136
 
79
- @timeit
80
- def load_ec2_auto_scaling_groups(
81
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
137
+ def load_groups(
138
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
82
139
  ) -> None:
83
- ingest_group = """
84
- UNWIND $autoscaling_groups_list as ag
85
- MERGE (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
86
- ON CREATE SET group.firstseen = timestamp(),
87
- group.createdtime = ag.CreatedTime
88
- SET group.launchconfigurationname = ag.LaunchConfigurationName,
89
- group.launchtemplatename = ag.LaunchTemplate.LaunchTemplateName,
90
- group.launchtemplateid = ag.LaunchTemplate.LaunchTemplateId,
91
- group.launchtemplateversion = ag.LaunchTemplate.Version,
92
- group.maxsize = ag.MaxSize, group.minsize = ag.MinSize, group.defaultcooldown = ag.DefaultCooldown,
93
- group.desiredcapacity = ag.DesiredCapacity, group.healthchecktype = ag.HealthCheckType,
94
- group.healthcheckgraceperiod = ag.HealthCheckGracePeriod, group.status = ag.Status,
95
- group.newinstancesprotectedfromscalein = ag.NewInstancesProtectedFromScaleIn,
96
- group.maxinstancelifetime = ag.MaxInstanceLifetime, group.capacityrebalance = ag.CapacityRebalance,
97
- group.name = ag.AutoScalingGroupName,
98
- group.lastupdated = $update_tag,
99
- group.region=$Region
100
- WITH group
101
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
102
- MERGE (aa)-[r:RESOURCE]->(group)
103
- ON CREATE SET r.firstseen = timestamp()
104
- SET r.lastupdated = $update_tag
105
- """
106
-
107
- ingest_vpc = """
108
- UNWIND $vpc_id_list as vpc_id
109
- MERGE (subnet:EC2Subnet{subnetid: vpc_id})
110
- ON CREATE SET subnet.firstseen = timestamp()
111
- SET subnet.lastupdated = $update_tag
112
- WITH subnet
113
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
114
- MERGE (subnet)<-[r:VPC_IDENTIFIER]-(group)
115
- ON CREATE SET r.firstseen = timestamp()
116
- SET r.lastupdated = $update_tag
117
- """
118
-
119
- ingest_instance = """
120
- UNWIND $instances_list as i
121
- MERGE (instance:Instance:EC2Instance{id: i.InstanceId})
122
- ON CREATE SET instance.firstseen = timestamp()
123
- SET instance.lastupdated = $update_tag, instance.region=$Region
124
- WITH instance
125
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
126
- MERGE (instance)-[r:MEMBER_AUTO_SCALE_GROUP]->(group)
127
- ON CREATE SET r.firstseen = timestamp()
128
- SET r.lastupdated = $update_tag
129
- WITH instance
130
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
131
- MERGE (aa)-[r:RESOURCE]->(instance)
132
- ON CREATE SET r.firstseen = timestamp()
133
- SET r.lastupdated = $update_tag
134
- """
135
-
136
- ingest_lts = """
137
- UNWIND $autoscaling_groups_list as ag
138
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
139
- MATCH (template:LaunchTemplate{id: ag.LaunchTemplate.LaunchTemplateId})
140
- MERGE (group)-[r:HAS_LAUNCH_TEMPLATE]->(template)
141
- ON CREATE SET r.firstseen = timestamp()
142
- SET r.lastupdated = $update_tag
143
- """
144
-
145
- ingest_lcs = """
146
- UNWIND $autoscaling_groups_list as ag
147
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
148
- MATCH (config:LaunchConfiguration{name: ag.LaunchConfigurationName})
149
- MERGE (group)-[r:HAS_LAUNCH_CONFIG]->(config)
150
- ON CREATE SET r.firstseen = timestamp()
151
- SET r.lastupdated = $update_tag
152
- """
153
-
154
- launch_configs = []
155
- launch_templates = []
156
- for group in data:
157
- if group.get('LaunchConfigurationName'):
158
- launch_configs.append(group)
159
- if group.get('LaunchTemplate'):
160
- launch_templates.append(group)
161
-
162
- group['CreatedTime'] = str(group['CreatedTime'])
163
-
164
- neo4j_session.run(
165
- ingest_group,
166
- autoscaling_groups_list=data,
167
- AWS_ACCOUNT_ID=current_aws_account_id,
140
+ load(
141
+ neo4j_session,
142
+ AutoScalingGroupSchema(),
143
+ data,
168
144
  Region=region,
169
- update_tag=update_tag,
145
+ AWS_ID=current_aws_account_id,
146
+ lastupdated=update_tag,
170
147
  )
171
- neo4j_session.run(
172
- ingest_lcs,
173
- autoscaling_groups_list=launch_configs,
174
- AWS_ACCOUNT_ID=current_aws_account_id,
148
+
149
+
150
+ def load_asg_subnets(
151
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
152
+ ) -> None:
153
+ load(
154
+ neo4j_session,
155
+ EC2SubnetAutoScalingGroupSchema(),
156
+ data,
175
157
  Region=region,
176
- update_tag=update_tag,
158
+ AWS_ID=current_aws_account_id,
159
+ lastupdated=update_tag,
177
160
  )
178
- neo4j_session.run(
179
- ingest_lts,
180
- autoscaling_groups_list=launch_templates,
181
- AWS_ACCOUNT_ID=current_aws_account_id,
161
+
162
+
163
+ def load_asg_instances(
164
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
165
+ ) -> None:
166
+ load(
167
+ neo4j_session,
168
+ EC2InstanceAutoScalingGroupSchema(),
169
+ data,
182
170
  Region=region,
183
- update_tag=update_tag,
171
+ AWS_ID=current_aws_account_id,
172
+ lastupdated=update_tag,
184
173
  )
185
174
 
186
- for group in data:
187
- group_arn = group["AutoScalingGroupARN"]
188
- if group.get('VPCZoneIdentifier'):
189
- vpclist = group["VPCZoneIdentifier"]
190
- if ',' in vpclist:
191
- data = vpclist.split(',')
192
- else:
193
- data = vpclist
194
- neo4j_session.run(
195
- ingest_vpc,
196
- vpc_id_list=data,
197
- GROUPARN=group_arn,
198
- update_tag=update_tag,
199
- )
200
-
201
- if group.get("Instances"):
202
- data = group["Instances"]
203
- neo4j_session.run(
204
- ingest_instance,
205
- instances_list=data,
206
- GROUPARN=group_arn,
207
- AWS_ACCOUNT_ID=current_aws_account_id,
208
- Region=region,
209
- update_tag=update_tag,
210
- )
211
-
212
175
 
213
176
  @timeit
214
- def cleanup_ec2_auto_scaling_groups(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
215
- run_cleanup_job(
216
- 'aws_ingest_ec2_auto_scaling_groups_cleanup.json',
217
- neo4j_session,
218
- common_job_parameters,
219
- )
177
+ def load_auto_scaling_groups(
178
+ neo4j_session: neo4j.Session, data: AsgData, region: str, current_aws_account_id: str, update_tag: int,
179
+ ) -> None:
180
+ load_groups(neo4j_session, data.group_list, region, current_aws_account_id, update_tag)
181
+ load_asg_instances(neo4j_session, data.instance_list, region, current_aws_account_id, update_tag)
182
+ load_asg_subnets(neo4j_session, data.subnet_list, region, current_aws_account_id, update_tag)
220
183
 
221
184
 
222
185
  @timeit
223
- def cleanup_ec2_launch_configurations(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
224
- run_cleanup_job(
225
- 'aws_import_ec2_launch_configurations_cleanup.json',
226
- neo4j_session,
227
- common_job_parameters,
228
- )
186
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
187
+ logger.debug("Running EC2 instance cleanup")
188
+ GraphJob.from_node_schema(AutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
189
+ GraphJob.from_node_schema(LaunchConfigurationSchema(), common_job_parameters).run(neo4j_session)
229
190
 
230
191
 
231
192
  @timeit
232
193
  def sync_ec2_auto_scaling_groups(
233
- neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
234
- current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
194
+ neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: list[str],
195
+ current_aws_account_id: str, update_tag: int, common_job_parameters: dict,
235
196
  ) -> None:
236
197
  for region in regions:
237
198
  logger.debug("Syncing auto scaling groups for region '%s' in account '%s'.", region, current_aws_account_id)
238
199
  lc_data = get_launch_configurations(boto3_session, region)
239
- load_launch_configurations(neo4j_session, lc_data, region, current_aws_account_id, update_tag)
240
- data = get_ec2_auto_scaling_groups(boto3_session, region)
241
- load_ec2_auto_scaling_groups(neo4j_session, data, region, current_aws_account_id, update_tag)
242
- cleanup_ec2_auto_scaling_groups(neo4j_session, common_job_parameters)
243
- cleanup_ec2_launch_configurations(neo4j_session, common_job_parameters)
200
+ asg_data = get_ec2_auto_scaling_groups(boto3_session, region)
201
+ lc_transformed = transform_launch_configurations(lc_data)
202
+ asg_transformed = transform_auto_scaling_groups(asg_data)
203
+ load_launch_configurations(neo4j_session, lc_transformed, region, current_aws_account_id, update_tag)
204
+ load_auto_scaling_groups(neo4j_session, asg_transformed, region, current_aws_account_id, update_tag)
205
+ cleanup(neo4j_session, common_job_parameters)
@@ -11,6 +11,7 @@ import neo4j
11
11
  from cartography.client.core.tx import load
12
12
  from cartography.graph.job import GraphJob
13
13
  from cartography.intel.aws.ec2.util import get_botocore_config
14
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
14
15
  from cartography.models.aws.ec2.instances import EC2InstanceSchema
15
16
  from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema
16
17
  from cartography.models.aws.ec2.networkinterface_instance import EC2NetworkInterfaceInstanceSchema
@@ -308,6 +309,7 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any])
308
309
  logger.debug("Running EC2 instance cleanup")
309
310
  GraphJob.from_node_schema(EC2ReservationSchema(), common_job_parameters).run(neo4j_session)
310
311
  GraphJob.from_node_schema(EC2InstanceSchema(), common_job_parameters).run(neo4j_session)
312
+ GraphJob.from_node_schema(EC2InstanceAutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
311
313
 
312
314
 
313
315
  @timeit
@@ -69,7 +69,8 @@ def transform_network_acl_data(
69
69
  direction = 'egress' if rule['Egress'] else 'inbound'
70
70
  transformed_rule = {
71
71
  'Id': f"{network_acl['NetworkAclId']}/{direction}/{rule['RuleNumber']}",
72
- 'CidrBlock': rule['CidrBlock'],
72
+ 'CidrBlock': rule.get('CidrBlock'),
73
+ 'Ipv6CidrBlock': rule.get('Ipv6CidrBlock'),
73
74
  'Egress': rule['Egress'],
74
75
  'Protocol': rule['Protocol'],
75
76
  'RuleAction': rule['RuleAction'],
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from .util import get_botocore_config
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2SubnetAutoScalingGroupSchema
10
11
  from cartography.models.aws.ec2.subnet_instance import EC2SubnetInstanceSchema
11
12
  from cartography.util import aws_handle_regions
12
13
  from cartography.util import run_cleanup_job
@@ -79,6 +80,7 @@ def load_subnets(
79
80
  def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
81
  run_cleanup_job('aws_ingest_subnets_cleanup.json', neo4j_session, common_job_parameters)
81
82
  GraphJob.from_node_schema(EC2SubnetInstanceSchema(), common_job_parameters).run(neo4j_session)
83
+ GraphJob.from_node_schema(EC2SubnetAutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
82
84
 
83
85
 
84
86
  @timeit
@@ -539,11 +539,12 @@ def _transform_policy_statements(statements: Any, policy_id: str) -> List[Dict]:
539
539
  if not isinstance(statements, list):
540
540
  statements = [statements]
541
541
  for stmt in statements:
542
- if "Sid" not in stmt:
542
+ if "Sid" in stmt and stmt["Sid"]:
543
+ statement_id = stmt["Sid"]
544
+ else:
543
545
  statement_id = count
544
546
  count += 1
545
- else:
546
- statement_id = stmt["Sid"]
547
+
547
548
  stmt["id"] = f"{policy_id}/statement/{statement_id}"
548
549
  if "Resource" in stmt:
549
550
  stmt["Resource"] = ensure_list(stmt["Resource"])
@@ -25,7 +25,7 @@ def start_cve_ingestion(
25
25
  """
26
26
  if not config.cve_enabled:
27
27
  return
28
- cve_api_key = config.cve_api_key if config.cve_api_key else None
28
+ cve_api_key: str | None = config.cve_api_key if config.cve_api_key else None
29
29
 
30
30
  # sync CVE year archives, if not yet synced
31
31
  existing_years = feed.get_cve_sync_metadata(neo4j_session)
@@ -22,9 +22,9 @@ from cartography.util import timeit
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
- MAX_RETRIES = 3
26
- # Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
27
- CONNECT_AND_READ_TIMEOUT = (60, 60)
25
+ MAX_RETRIES = 8
26
+ # Connect and read timeouts of 120 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
27
+ CONNECT_AND_READ_TIMEOUT = (30, 120)
28
28
  CVE_FEED_ID = "NIST_NVD"
29
29
  BATCH_SIZE_DAYS = 120
30
30
  RESULTS_PER_PAGE = 2000
@@ -68,7 +68,7 @@ def _map_cve_dict(cve_dict: Dict[Any, Any], data: Dict[Any, Any]) -> None:
68
68
  cve_dict["startIndex"] = data["startIndex"]
69
69
 
70
70
 
71
- def _call_cves_api(url: str, api_key: str, params: Dict[str, Any]) -> Dict[Any, Any]:
71
+ def _call_cves_api(url: str, api_key: str | None, params: Dict[str, Any]) -> Dict[Any, Any]:
72
72
  totalResults = 0
73
73
  sleep_time = DEFAULT_SLEEP_TIME
74
74
  retries = 0
@@ -98,6 +98,9 @@ def _call_cves_api(url: str, api_key: str, params: Dict[str, Any]) -> Dict[Any,
98
98
  retries += 1
99
99
  if retries >= MAX_RETRIES:
100
100
  raise
101
+ # Exponential backoff
102
+ sleep_time *= 2
103
+ time.sleep(sleep_time)
101
104
  continue
102
105
  data = res.json()
103
106
  _map_cve_dict(results, data)
@@ -114,7 +117,7 @@ def get_cves_in_batches(
114
117
  start_date: datetime,
115
118
  end_date: datetime,
116
119
  date_param_names: Dict[str, str],
117
- api_key: str,
120
+ api_key: str | None,
118
121
  ) -> Dict[Any, Any]:
119
122
  cves: Dict[Any, Any] = dict()
120
123
  current_start_date: datetime = start_date
@@ -153,7 +156,7 @@ def get_cves_in_batches(
153
156
 
154
157
 
155
158
  def get_modified_cves(
156
- nist_cve_url: str, last_modified_date: str, api_key: str,
159
+ nist_cve_url: str, last_modified_date: str, api_key: str | None,
157
160
  ) -> Dict[Any, Any]:
158
161
  cves = dict()
159
162
  end_date = datetime.now(tz=timezone.utc)
@@ -171,7 +174,7 @@ def get_modified_cves(
171
174
 
172
175
 
173
176
  def get_published_cves_per_year(
174
- nist_cve_url: str, year: str, api_key: str,
177
+ nist_cve_url: str, year: str, api_key: str | None,
175
178
  ) -> Dict[Any, Any]:
176
179
  cves = {}
177
180
  start_of_year = datetime.strptime(f"{year}-01-01", "%Y-%m-%d")