cartography 0.96.0rc3__py3-none-any.whl → 0.96.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

@@ -63,6 +63,31 @@
63
63
  "query": "MATCH (:GitHubUser)-[r:OUTSIDE_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
64
64
  "iterative": true,
65
65
  "iterationsize": 100
66
+ },
67
+ {
68
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_ADMIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
69
+ "iterative": true,
70
+ "iterationsize": 100
71
+ },
72
+ {
73
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_MAINTAIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
74
+ "iterative": true,
75
+ "iterationsize": 100
76
+ },
77
+ {
78
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_READ]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
79
+ "iterative": true,
80
+ "iterationsize": 100
81
+ },
82
+ {
83
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_TRIAGE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
84
+ "iterative": true,
85
+ "iterationsize": 100
86
+ },
87
+ {
88
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
89
+ "iterative": true,
90
+ "iterationsize": 100
66
91
  }],
67
92
  "name": "cleanup GitHub repos data"
68
93
  }
@@ -1,24 +1,37 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
2
+ from collections import namedtuple
3
+ from typing import Any
4
4
 
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
8
  from .util import get_botocore_config
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ec2.auto_scaling_groups import AutoScalingGroupSchema
12
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
13
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2SubnetAutoScalingGroupSchema
14
+ from cartography.models.aws.ec2.launch_configurations import LaunchConfigurationSchema
9
15
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
16
  from cartography.util import timeit
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
20
+ AsgData = namedtuple(
21
+ 'AsgData', [
22
+ "group_list",
23
+ "instance_list",
24
+ "subnet_list",
25
+ ],
26
+ )
27
+
15
28
 
16
29
  @timeit
17
30
  @aws_handle_regions
18
- def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
31
+ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> list[dict]:
19
32
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
20
33
  paginator = client.get_paginator('describe_auto_scaling_groups')
21
- asgs: List[Dict] = []
34
+ asgs: list[dict] = []
22
35
  for page in paginator.paginate():
23
36
  asgs.extend(page['AutoScalingGroups'])
24
37
  return asgs
@@ -26,218 +39,167 @@ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: st
26
39
 
27
40
  @timeit
28
41
  @aws_handle_regions
29
- def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
42
+ def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> list[dict]:
30
43
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
31
44
  paginator = client.get_paginator('describe_launch_configurations')
32
- lcs: List[Dict] = []
45
+ lcs: list[dict] = []
33
46
  for page in paginator.paginate():
34
47
  lcs.extend(page['LaunchConfigurations'])
35
48
  return lcs
36
49
 
37
50
 
51
+ def transform_launch_configurations(configurations: list[dict[str, Any]]) -> list[dict[str, Any]]:
52
+ transformed_configurations = []
53
+ for config in configurations:
54
+ transformed_configurations.append({
55
+ 'AssociatePublicIpAddress': config.get('AssociatePublicIpAddress'),
56
+ 'LaunchConfigurationARN': config.get('LaunchConfigurationARN'),
57
+ 'LaunchConfigurationName': config.get('LaunchConfigurationName'),
58
+ 'CreatedTime': config.get('CreatedTime'),
59
+ 'ImageId': config.get('ImageId'),
60
+ 'KeyName': config.get('KeyName'),
61
+ 'SecurityGroups': config.get('SecurityGroups'),
62
+ 'InstanceType': config.get('InstanceType'),
63
+ 'KernelId': config.get('KernelId'),
64
+ 'RamdiskId': config.get('RamdiskId'),
65
+ 'InstanceMonitoring': config.get('InstanceMonitoring', {}).get('Enabled'),
66
+ 'SpotPrice': config.get('SpotPrice'),
67
+ 'IamInstanceProfile': config.get('IamInstanceProfile'),
68
+ 'EbsOptimized': config.get('EbsOptimized'),
69
+ 'PlacementTenancy': config.get('PlacementTenancy'),
70
+ })
71
+ return transformed_configurations
72
+
73
+
74
+ def transform_auto_scaling_groups(groups: list[dict[str, Any]]) -> AsgData:
75
+ transformed_groups = []
76
+ related_vpcs = []
77
+ related_instances = []
78
+ for group in groups:
79
+ transformed_groups.append({
80
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
81
+ 'CapacityRebalance': group.get('CapacityRebalance'),
82
+ 'CreatedTime': str(group.get('CreatedTime')),
83
+ 'DefaultCooldown': group.get('DefaultCooldown'),
84
+ 'DesiredCapacity': group.get('DesiredCapacity'),
85
+ 'HealthCheckGracePeriod': group.get('HealthCheckGracePeriod'),
86
+ 'HealthCheckType': group.get('HealthCheckType'),
87
+ 'LaunchConfigurationName': group.get('LaunchConfigurationName'),
88
+ 'LaunchTemplateName': group.get('LaunchTemplate', {}).get('LaunchTemplateName'),
89
+ 'LaunchTemplateId': group.get('LaunchTemplate', {}).get('LaunchTemplateId'),
90
+ 'LaunchTemplateVersion': group.get('LaunchTemplate', {}).get('Version'),
91
+ 'MaxInstanceLifetime': group.get('MaxInstanceLifetime'),
92
+ 'MaxSize': group.get('MaxSize'),
93
+ 'MinSize': group.get('MinSize'),
94
+ 'AutoScalingGroupName': group.get('AutoScalingGroupName'),
95
+ 'NewInstancesProtectedFromScaleIn': group.get('NewInstancesProtectedFromScaleIn'),
96
+ 'Status': group.get('Status'),
97
+ })
98
+
99
+ if group.get('VPCZoneIdentifier', None):
100
+ vpclist = group['VPCZoneIdentifier']
101
+ subnet_ids = vpclist.split(',') if ',' in vpclist else [vpclist]
102
+ subnets = []
103
+ for subnet_id in subnet_ids:
104
+ subnets.append({
105
+ 'VPCZoneIdentifier': subnet_id,
106
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
107
+ })
108
+ related_vpcs.extend(subnets)
109
+
110
+ for instance_data in group.get('Instances', []):
111
+ related_instances.append({
112
+ 'InstanceId': instance_data['InstanceId'],
113
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
114
+ })
115
+
116
+ return AsgData(
117
+ group_list=transformed_groups,
118
+ instance_list=related_instances,
119
+ subnet_list=related_vpcs,
120
+ )
121
+
122
+
38
123
  @timeit
39
124
  def load_launch_configurations(
40
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
125
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
41
126
  ) -> None:
42
- ingest_lc = """
43
- UNWIND $launch_configurations as lc
44
- MERGE (config:LaunchConfiguration{id: lc.LaunchConfigurationARN})
45
- ON CREATE SET config.firstseen = timestamp(), config.name = lc.LaunchConfigurationName,
46
- config.arn = lc.LaunchConfigurationARN,
47
- config.created_time = lc.CreatedTime
48
- SET config.lastupdated = $update_tag, config.image_id = lc.ImageId,
49
- config.key_name = lc.KeyName,
50
- config.security_groups = lc.SecurityGroups,
51
- config.instance_type = lc.InstanceType,
52
- config.kernel_id = lc.KernelId,
53
- config.ramdisk_id = lc.RamdiskId,
54
- config.instance_monitoring_enabled = lc.InstanceMonitoring.Enabled,
55
- config.spot_price = lc.SpotPrice,
56
- config.iam_instance_profile = lc.IamInstanceProfile,
57
- config.ebs_optimized = lc.EbsOptimized,
58
- config.associate_public_ip_address = lc.AssociatePublicIpAddress,
59
- config.placement_tenancy = lc.PlacementTenancy,
60
- config.region=$Region
61
- WITH config
62
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
63
- MERGE (aa)-[r:RESOURCE]->(config)
64
- ON CREATE SET r.firstseen = timestamp()
65
- SET r.lastupdated = $update_tag
66
- """
67
- for lc in data:
68
- lc['CreatedTime'] = str(int(lc['CreatedTime'].timestamp()))
69
-
70
- neo4j_session.run(
71
- ingest_lc,
72
- launch_configurations=data,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
127
+ load(
128
+ neo4j_session,
129
+ LaunchConfigurationSchema(),
130
+ data,
74
131
  Region=region,
75
- update_tag=update_tag,
132
+ AWS_ID=current_aws_account_id,
133
+ lastupdated=update_tag,
76
134
  )
77
135
 
78
136
 
79
- @timeit
80
- def load_ec2_auto_scaling_groups(
81
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
137
+ def load_groups(
138
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
82
139
  ) -> None:
83
- ingest_group = """
84
- UNWIND $autoscaling_groups_list as ag
85
- MERGE (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
86
- ON CREATE SET group.firstseen = timestamp(),
87
- group.createdtime = ag.CreatedTime
88
- SET group.launchconfigurationname = ag.LaunchConfigurationName,
89
- group.launchtemplatename = ag.LaunchTemplate.LaunchTemplateName,
90
- group.launchtemplateid = ag.LaunchTemplate.LaunchTemplateId,
91
- group.launchtemplateversion = ag.LaunchTemplate.Version,
92
- group.maxsize = ag.MaxSize, group.minsize = ag.MinSize, group.defaultcooldown = ag.DefaultCooldown,
93
- group.desiredcapacity = ag.DesiredCapacity, group.healthchecktype = ag.HealthCheckType,
94
- group.healthcheckgraceperiod = ag.HealthCheckGracePeriod, group.status = ag.Status,
95
- group.newinstancesprotectedfromscalein = ag.NewInstancesProtectedFromScaleIn,
96
- group.maxinstancelifetime = ag.MaxInstanceLifetime, group.capacityrebalance = ag.CapacityRebalance,
97
- group.name = ag.AutoScalingGroupName,
98
- group.lastupdated = $update_tag,
99
- group.region=$Region
100
- WITH group
101
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
102
- MERGE (aa)-[r:RESOURCE]->(group)
103
- ON CREATE SET r.firstseen = timestamp()
104
- SET r.lastupdated = $update_tag
105
- """
106
-
107
- ingest_vpc = """
108
- UNWIND $vpc_id_list as vpc_id
109
- MERGE (subnet:EC2Subnet{subnetid: vpc_id})
110
- ON CREATE SET subnet.firstseen = timestamp()
111
- SET subnet.lastupdated = $update_tag
112
- WITH subnet
113
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
114
- MERGE (subnet)<-[r:VPC_IDENTIFIER]-(group)
115
- ON CREATE SET r.firstseen = timestamp()
116
- SET r.lastupdated = $update_tag
117
- """
118
-
119
- ingest_instance = """
120
- UNWIND $instances_list as i
121
- MERGE (instance:Instance:EC2Instance{id: i.InstanceId})
122
- ON CREATE SET instance.firstseen = timestamp()
123
- SET instance.lastupdated = $update_tag, instance.region=$Region
124
- WITH instance
125
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
126
- MERGE (instance)-[r:MEMBER_AUTO_SCALE_GROUP]->(group)
127
- ON CREATE SET r.firstseen = timestamp()
128
- SET r.lastupdated = $update_tag
129
- WITH instance
130
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
131
- MERGE (aa)-[r:RESOURCE]->(instance)
132
- ON CREATE SET r.firstseen = timestamp()
133
- SET r.lastupdated = $update_tag
134
- """
135
-
136
- ingest_lts = """
137
- UNWIND $autoscaling_groups_list as ag
138
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
139
- MATCH (template:LaunchTemplate{id: ag.LaunchTemplate.LaunchTemplateId})
140
- MERGE (group)-[r:HAS_LAUNCH_TEMPLATE]->(template)
141
- ON CREATE SET r.firstseen = timestamp()
142
- SET r.lastupdated = $update_tag
143
- """
144
-
145
- ingest_lcs = """
146
- UNWIND $autoscaling_groups_list as ag
147
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
148
- MATCH (config:LaunchConfiguration{name: ag.LaunchConfigurationName})
149
- MERGE (group)-[r:HAS_LAUNCH_CONFIG]->(config)
150
- ON CREATE SET r.firstseen = timestamp()
151
- SET r.lastupdated = $update_tag
152
- """
153
-
154
- launch_configs = []
155
- launch_templates = []
156
- for group in data:
157
- if group.get('LaunchConfigurationName'):
158
- launch_configs.append(group)
159
- if group.get('LaunchTemplate'):
160
- launch_templates.append(group)
161
-
162
- group['CreatedTime'] = str(group['CreatedTime'])
163
-
164
- neo4j_session.run(
165
- ingest_group,
166
- autoscaling_groups_list=data,
167
- AWS_ACCOUNT_ID=current_aws_account_id,
140
+ load(
141
+ neo4j_session,
142
+ AutoScalingGroupSchema(),
143
+ data,
168
144
  Region=region,
169
- update_tag=update_tag,
145
+ AWS_ID=current_aws_account_id,
146
+ lastupdated=update_tag,
170
147
  )
171
- neo4j_session.run(
172
- ingest_lcs,
173
- autoscaling_groups_list=launch_configs,
174
- AWS_ACCOUNT_ID=current_aws_account_id,
148
+
149
+
150
+ def load_asg_subnets(
151
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
152
+ ) -> None:
153
+ load(
154
+ neo4j_session,
155
+ EC2SubnetAutoScalingGroupSchema(),
156
+ data,
175
157
  Region=region,
176
- update_tag=update_tag,
158
+ AWS_ID=current_aws_account_id,
159
+ lastupdated=update_tag,
177
160
  )
178
- neo4j_session.run(
179
- ingest_lts,
180
- autoscaling_groups_list=launch_templates,
181
- AWS_ACCOUNT_ID=current_aws_account_id,
161
+
162
+
163
+ def load_asg_instances(
164
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
165
+ ) -> None:
166
+ load(
167
+ neo4j_session,
168
+ EC2InstanceAutoScalingGroupSchema(),
169
+ data,
182
170
  Region=region,
183
- update_tag=update_tag,
171
+ AWS_ID=current_aws_account_id,
172
+ lastupdated=update_tag,
184
173
  )
185
174
 
186
- for group in data:
187
- group_arn = group["AutoScalingGroupARN"]
188
- if group.get('VPCZoneIdentifier'):
189
- vpclist = group["VPCZoneIdentifier"]
190
- if ',' in vpclist:
191
- data = vpclist.split(',')
192
- else:
193
- data = vpclist
194
- neo4j_session.run(
195
- ingest_vpc,
196
- vpc_id_list=data,
197
- GROUPARN=group_arn,
198
- update_tag=update_tag,
199
- )
200
-
201
- if group.get("Instances"):
202
- data = group["Instances"]
203
- neo4j_session.run(
204
- ingest_instance,
205
- instances_list=data,
206
- GROUPARN=group_arn,
207
- AWS_ACCOUNT_ID=current_aws_account_id,
208
- Region=region,
209
- update_tag=update_tag,
210
- )
211
-
212
175
 
213
176
  @timeit
214
- def cleanup_ec2_auto_scaling_groups(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
215
- run_cleanup_job(
216
- 'aws_ingest_ec2_auto_scaling_groups_cleanup.json',
217
- neo4j_session,
218
- common_job_parameters,
219
- )
177
+ def load_auto_scaling_groups(
178
+ neo4j_session: neo4j.Session, data: AsgData, region: str, current_aws_account_id: str, update_tag: int,
179
+ ) -> None:
180
+ load_groups(neo4j_session, data.group_list, region, current_aws_account_id, update_tag)
181
+ load_asg_instances(neo4j_session, data.instance_list, region, current_aws_account_id, update_tag)
182
+ load_asg_subnets(neo4j_session, data.subnet_list, region, current_aws_account_id, update_tag)
220
183
 
221
184
 
222
185
  @timeit
223
- def cleanup_ec2_launch_configurations(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
224
- run_cleanup_job(
225
- 'aws_import_ec2_launch_configurations_cleanup.json',
226
- neo4j_session,
227
- common_job_parameters,
228
- )
186
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
187
+ logger.debug("Running EC2 instance cleanup")
188
+ GraphJob.from_node_schema(AutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
189
+ GraphJob.from_node_schema(LaunchConfigurationSchema(), common_job_parameters).run(neo4j_session)
229
190
 
230
191
 
231
192
  @timeit
232
193
  def sync_ec2_auto_scaling_groups(
233
- neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
234
- current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
194
+ neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: list[str],
195
+ current_aws_account_id: str, update_tag: int, common_job_parameters: dict,
235
196
  ) -> None:
236
197
  for region in regions:
237
198
  logger.debug("Syncing auto scaling groups for region '%s' in account '%s'.", region, current_aws_account_id)
238
199
  lc_data = get_launch_configurations(boto3_session, region)
239
- load_launch_configurations(neo4j_session, lc_data, region, current_aws_account_id, update_tag)
240
- data = get_ec2_auto_scaling_groups(boto3_session, region)
241
- load_ec2_auto_scaling_groups(neo4j_session, data, region, current_aws_account_id, update_tag)
242
- cleanup_ec2_auto_scaling_groups(neo4j_session, common_job_parameters)
243
- cleanup_ec2_launch_configurations(neo4j_session, common_job_parameters)
200
+ asg_data = get_ec2_auto_scaling_groups(boto3_session, region)
201
+ lc_transformed = transform_launch_configurations(lc_data)
202
+ asg_transformed = transform_auto_scaling_groups(asg_data)
203
+ load_launch_configurations(neo4j_session, lc_transformed, region, current_aws_account_id, update_tag)
204
+ load_auto_scaling_groups(neo4j_session, asg_transformed, region, current_aws_account_id, update_tag)
205
+ cleanup(neo4j_session, common_job_parameters)
@@ -11,6 +11,7 @@ import neo4j
11
11
  from cartography.client.core.tx import load
12
12
  from cartography.graph.job import GraphJob
13
13
  from cartography.intel.aws.ec2.util import get_botocore_config
14
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
14
15
  from cartography.models.aws.ec2.instances import EC2InstanceSchema
15
16
  from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema
16
17
  from cartography.models.aws.ec2.networkinterface_instance import EC2NetworkInterfaceInstanceSchema
@@ -308,6 +309,7 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any])
308
309
  logger.debug("Running EC2 instance cleanup")
309
310
  GraphJob.from_node_schema(EC2ReservationSchema(), common_job_parameters).run(neo4j_session)
310
311
  GraphJob.from_node_schema(EC2InstanceSchema(), common_job_parameters).run(neo4j_session)
312
+ GraphJob.from_node_schema(EC2InstanceAutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
311
313
 
312
314
 
313
315
  @timeit
@@ -69,7 +69,8 @@ def transform_network_acl_data(
69
69
  direction = 'egress' if rule['Egress'] else 'inbound'
70
70
  transformed_rule = {
71
71
  'Id': f"{network_acl['NetworkAclId']}/{direction}/{rule['RuleNumber']}",
72
- 'CidrBlock': rule['CidrBlock'],
72
+ 'CidrBlock': rule.get('CidrBlock'),
73
+ 'Ipv6CidrBlock': rule.get('Ipv6CidrBlock'),
73
74
  'Egress': rule['Egress'],
74
75
  'Protocol': rule['Protocol'],
75
76
  'RuleAction': rule['RuleAction'],
@@ -7,6 +7,7 @@ import neo4j
7
7
 
8
8
  from .util import get_botocore_config
9
9
  from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2SubnetAutoScalingGroupSchema
10
11
  from cartography.models.aws.ec2.subnet_instance import EC2SubnetInstanceSchema
11
12
  from cartography.util import aws_handle_regions
12
13
  from cartography.util import run_cleanup_job
@@ -79,6 +80,7 @@ def load_subnets(
79
80
  def cleanup_subnets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
80
81
  run_cleanup_job('aws_ingest_subnets_cleanup.json', neo4j_session, common_job_parameters)
81
82
  GraphJob.from_node_schema(EC2SubnetInstanceSchema(), common_job_parameters).run(neo4j_session)
83
+ GraphJob.from_node_schema(EC2SubnetAutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
82
84
 
83
85
 
84
86
  @timeit
@@ -539,11 +539,12 @@ def _transform_policy_statements(statements: Any, policy_id: str) -> List[Dict]:
539
539
  if not isinstance(statements, list):
540
540
  statements = [statements]
541
541
  for stmt in statements:
542
- if "Sid" not in stmt:
542
+ if "Sid" in stmt and stmt["Sid"]:
543
+ statement_id = stmt["Sid"]
544
+ else:
543
545
  statement_id = count
544
546
  count += 1
545
- else:
546
- statement_id = stmt["Sid"]
547
+
547
548
  stmt["id"] = f"{policy_id}/statement/{statement_id}"
548
549
  if "Resource" in stmt:
549
550
  stmt["Resource"] = ensure_list(stmt["Resource"])
@@ -22,9 +22,9 @@ from cartography.util import timeit
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
- MAX_RETRIES = 3
26
- # Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
27
- CONNECT_AND_READ_TIMEOUT = (60, 60)
25
+ MAX_RETRIES = 8
26
+ # Connect and read timeouts of 120 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
27
+ CONNECT_AND_READ_TIMEOUT = (30, 120)
28
28
  CVE_FEED_ID = "NIST_NVD"
29
29
  BATCH_SIZE_DAYS = 120
30
30
  RESULTS_PER_PAGE = 2000
@@ -85,27 +85,32 @@ def _call_cves_api(url: str, api_key: str | None, params: Dict[str, Any]) -> Dic
85
85
  )
86
86
  results: Dict[Any, Any] = dict()
87
87
 
88
- while params["resultsPerPage"] > 0 or params["startIndex"] < totalResults:
89
- try:
90
- res = requests.get(
91
- url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT,
92
- )
93
- res.raise_for_status()
94
- except requests.exceptions.HTTPError:
95
- logger.error(
96
- f"Failed to get CVE data from NIST NVD API {res.status_code} : {res.text}",
97
- )
98
- retries += 1
99
- if retries >= MAX_RETRIES:
100
- raise
101
- continue
102
- data = res.json()
103
- _map_cve_dict(results, data)
104
- totalResults = data["totalResults"]
105
- params["resultsPerPage"] = data["resultsPerPage"]
106
- params["startIndex"] += data["resultsPerPage"]
107
- retries = 0
108
- time.sleep(sleep_time)
88
+ with requests.Session() as session:
89
+ while params["resultsPerPage"] > 0 or params["startIndex"] < totalResults:
90
+ logger.info(f"Calling NIST NVD API at {url} with params {params}")
91
+ try:
92
+ res = session.get(
93
+ url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT,
94
+ )
95
+ res.raise_for_status()
96
+ data = res.json()
97
+ except requests.exceptions.HTTPError:
98
+ logger.error(
99
+ f"Failed to get CVE data from NIST NVD API {res.status_code} : {res.text}",
100
+ )
101
+ retries += 1
102
+ if retries >= MAX_RETRIES:
103
+ raise
104
+ # Exponential backoff
105
+ sleep_time *= 2
106
+ time.sleep(sleep_time)
107
+ continue
108
+ _map_cve_dict(results, data)
109
+ totalResults = data["totalResults"]
110
+ params["resultsPerPage"] = data["resultsPerPage"]
111
+ params["startIndex"] += data["resultsPerPage"]
112
+ retries = 0
113
+ time.sleep(sleep_time)
109
114
  return results
110
115
 
111
116