cartography 0.95.0rc1__py3-none-any.whl → 0.96.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (44) hide show
  1. cartography/cli.py +15 -0
  2. cartography/client/core/tx.py +1 -1
  3. cartography/config.py +6 -2
  4. cartography/data/indexes.cypher +1 -2
  5. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +16 -0
  6. cartography/data/jobs/cleanup/{github_users_cleanup.json → github_org_and_users_cleanup.json} +5 -0
  7. cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
  8. cartography/graph/querybuilder.py +4 -0
  9. cartography/intel/aws/apigateway.py +3 -3
  10. cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
  11. cartography/intel/aws/ec2/instances.py +2 -0
  12. cartography/intel/aws/ec2/network_acls.py +209 -0
  13. cartography/intel/aws/ec2/subnets.py +2 -0
  14. cartography/intel/aws/iam.py +4 -3
  15. cartography/intel/aws/identitycenter.py +307 -0
  16. cartography/intel/aws/resources.py +4 -0
  17. cartography/intel/cve/__init__.py +1 -1
  18. cartography/intel/cve/feed.py +10 -7
  19. cartography/intel/github/repos.py +176 -27
  20. cartography/intel/github/users.py +156 -39
  21. cartography/intel/okta/users.py +2 -1
  22. cartography/intel/semgrep/__init__.py +9 -2
  23. cartography/intel/semgrep/dependencies.py +233 -0
  24. cartography/intel/semgrep/deployment.py +67 -0
  25. cartography/intel/semgrep/findings.py +22 -53
  26. cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
  27. cartography/models/aws/ec2/launch_configurations.py +55 -0
  28. cartography/models/aws/ec2/network_acl_rules.py +98 -0
  29. cartography/models/aws/ec2/network_acls.py +86 -0
  30. cartography/models/aws/identitycenter/__init__.py +0 -0
  31. cartography/models/aws/identitycenter/awsidentitycenter.py +44 -0
  32. cartography/models/aws/identitycenter/awspermissionset.py +84 -0
  33. cartography/models/aws/identitycenter/awsssouser.py +68 -0
  34. cartography/models/core/common.py +18 -1
  35. cartography/models/github/orgs.py +26 -0
  36. cartography/models/github/users.py +119 -0
  37. cartography/models/semgrep/dependencies.py +90 -0
  38. cartography-0.96.0.dist-info/METADATA +53 -0
  39. {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/RECORD +43 -27
  40. {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/WHEEL +1 -1
  41. cartography-0.95.0rc1.dist-info/METADATA +0 -53
  42. {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/LICENSE +0 -0
  43. {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/entry_points.txt +0 -0
  44. {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/top_level.txt +0 -0
cartography/cli.py CHANGED
@@ -9,6 +9,7 @@ import cartography.config
9
9
  import cartography.sync
10
10
  import cartography.util
11
11
  from cartography.intel.aws.util.common import parse_and_validate_aws_requested_syncs
12
+ from cartography.intel.semgrep.dependencies import parse_and_validate_semgrep_ecosystems
12
13
 
13
14
 
14
15
  logger = logging.getLogger(__name__)
@@ -524,6 +525,17 @@ class CLI:
524
525
  'Required if you are using the Semgrep intel module. Ignored otherwise.'
525
526
  ),
526
527
  )
528
+ parser.add_argument(
529
+ '--semgrep-dependency-ecosystems',
530
+ type=str,
531
+ default=None,
532
+ help=(
533
+ 'Comma-separated list of language ecosystems for which dependencies will be retrieved from Semgrep. '
534
+ 'For example, a value of "gomod,npm" will retrieve Go and NPM dependencies. '
535
+ 'See the full list of supported ecosystems in source code at cartography.intel.semgrep.dependencies. '
536
+ 'Required if you are using the Semgrep dependencies intel module. Ignored otherwise.'
537
+ ),
538
+ )
527
539
  parser.add_argument(
528
540
  '--snipeit-base-uri',
529
541
  type=str,
@@ -734,6 +746,9 @@ class CLI:
734
746
  config.semgrep_app_token = os.environ.get(config.semgrep_app_token_env_var)
735
747
  else:
736
748
  config.semgrep_app_token = None
749
+ if config.semgrep_dependency_ecosystems:
750
+ # No need to store the returned value; we're using this for input validation.
751
+ parse_and_validate_semgrep_ecosystems(config.semgrep_dependency_ecosystems)
737
752
 
738
753
  # CVE feed config
739
754
  if config.cve_api_key_env_var:
@@ -122,7 +122,7 @@ def read_list_of_tuples_tx(tx: neo4j.Transaction, query: str, **kwargs) -> List[
122
122
  return [tuple(val) for val in values]
123
123
 
124
124
 
125
- def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Dict[str, Any]:
125
+ def read_single_dict_tx(tx: neo4j.Transaction, query: str, **kwargs) -> Any:
126
126
  """
127
127
  Runs the given Neo4j query in the given transaction object and returns the single dict result. This is intended to
128
128
  be run only with queries that return a single dict.
cartography/config.py CHANGED
@@ -107,6 +107,8 @@ class Config:
107
107
  :param duo_api_hostname: The Duo api hostname, e.g. "api-abc123.duosecurity.com". Optional.
108
108
  :param semgrep_app_token: The Semgrep api token. Optional.
109
109
  :type semgrep_app_token: str
110
+ :param semgrep_dependency_ecosystems: Comma-separated list of Semgrep dependency ecosystems to fetch. Optional.
111
+ :type semgrep_dependency_ecosystems: str
110
112
  :type snipeit_base_uri: string
111
113
  :param snipeit_base_uri: SnipeIT data provider base URI. Optional.
112
114
  :type snipeit_token: string
@@ -155,7 +157,7 @@ class Config:
155
157
  pagerduty_request_timeout=None,
156
158
  nist_cve_url=None,
157
159
  cve_enabled=False,
158
- cve_api_key=None,
160
+ cve_api_key: str | None = None,
159
161
  crowdstrike_client_id=None,
160
162
  crowdstrike_client_secret=None,
161
163
  crowdstrike_api_url=None,
@@ -170,6 +172,7 @@ class Config:
170
172
  duo_api_secret=None,
171
173
  duo_api_hostname=None,
172
174
  semgrep_app_token=None,
175
+ semgrep_dependency_ecosystems=None,
173
176
  snipeit_base_uri=None,
174
177
  snipeit_token=None,
175
178
  snipeit_tenant_id=None,
@@ -212,7 +215,7 @@ class Config:
212
215
  self.pagerduty_request_timeout = pagerduty_request_timeout
213
216
  self.nist_cve_url = nist_cve_url
214
217
  self.cve_enabled = cve_enabled
215
- self.cve_api_key = cve_api_key
218
+ self.cve_api_key: str | None = cve_api_key
216
219
  self.crowdstrike_client_id = crowdstrike_client_id
217
220
  self.crowdstrike_client_secret = crowdstrike_client_secret
218
221
  self.crowdstrike_api_url = crowdstrike_api_url
@@ -227,6 +230,7 @@ class Config:
227
230
  self.duo_api_secret = duo_api_secret
228
231
  self.duo_api_hostname = duo_api_hostname
229
232
  self.semgrep_app_token = semgrep_app_token
233
+ self.semgrep_dependency_ecosystems = semgrep_dependency_ecosystems
230
234
  self.snipeit_base_uri = snipeit_base_uri
231
235
  self.snipeit_token = snipeit_token
232
236
  self.snipeit_tenant_id = snipeit_tenant_id
@@ -305,8 +305,7 @@ CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.host_info_local_
305
305
  CREATE INDEX IF NOT EXISTS FOR (n:SpotlightVulnerability) ON (n.lastupdated);
306
306
  CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.id);
307
307
  CREATE INDEX IF NOT EXISTS FOR (n:SQSQueue) ON (n.lastupdated);
308
- CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.arn);
309
- CREATE INDEX IF NOT EXISTS FOR (n:User) ON (n.lastupdated);
308
+ CREATE INDEX IF NOT EXISTS FOR (n:UserAccount) ON (n.id);
310
309
  CREATE INDEX IF NOT EXISTS FOR (n:AzureTenant) ON (n.id);
311
310
  CREATE INDEX IF NOT EXISTS FOR (n:AzureTenant) ON (n.lastupdated);
312
311
  CREATE INDEX IF NOT EXISTS FOR (n:AzurePrincipal) ON (n.email);
@@ -0,0 +1,16 @@
1
+ {
2
+ "statements": [
3
+
4
+ {
5
+ "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:AWSSSOUser)<-[r:CAN_ASSUME_IDENTITY]-(:OktaUser) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r) RETURN COUNT(*) as TotalDeleted",
6
+ "iterative": true,
7
+ "iterationsize": 100
8
+ },
9
+ {
10
+ "query": "MATCH (:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:AWSRole)-[r:ALLOWED_BY]->(:AWSSSOUser) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r) RETURN COUNT(*) as TotalDeleted",
11
+ "iterative": true,
12
+ "iterationsize": 100
13
+ }
14
+ ],
15
+ "name": "cleanup AWS Identity Center Instances and Related Data"
16
+ }
@@ -18,6 +18,11 @@
18
18
  "query": "MATCH (:GitHubUser)-[r:MEMBER_OF]->(:GitHubOrganization) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
19
19
  "iterative": true,
20
20
  "iterationsize": 100
21
+ },
22
+ {
23
+ "query": "MATCH (:GitHubUser)-[r:UNAFFILIATED]->(:GitHubOrganization) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
24
+ "iterative": true,
25
+ "iterationsize": 100
21
26
  }],
22
27
  "name": "cleanup GitHub users data"
23
28
  }
@@ -63,6 +63,31 @@
63
63
  "query": "MATCH (:GitHubUser)-[r:OUTSIDE_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
64
64
  "iterative": true,
65
65
  "iterationsize": 100
66
+ },
67
+ {
68
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_ADMIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
69
+ "iterative": true,
70
+ "iterationsize": 100
71
+ },
72
+ {
73
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_MAINTAIN]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
74
+ "iterative": true,
75
+ "iterationsize": 100
76
+ },
77
+ {
78
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_READ]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
79
+ "iterative": true,
80
+ "iterationsize": 100
81
+ },
82
+ {
83
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_TRIAGE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
84
+ "iterative": true,
85
+ "iterationsize": 100
86
+ },
87
+ {
88
+ "query": "MATCH (:GitHubUser)-[r:DIRECT_COLLAB_WRITE]->(:GitHubRepository) WHERE r.lastupdated <> $UPDATE_TAG WITH r LIMIT $LIMIT_SIZE DELETE (r)",
89
+ "iterative": true,
90
+ "iterationsize": 100
66
91
  }],
67
92
  "name": "cleanup GitHub repos data"
68
93
  }
@@ -118,6 +118,7 @@ def _build_where_clause_for_rel_match(node_var: str, matcher: TargetNodeMatcher)
118
118
  """
119
119
  match = Template("$node_var.$key = $prop_ref")
120
120
  case_insensitive_match = Template("toLower($node_var.$key) = toLower($prop_ref)")
121
+ fuzzy_and_ignorecase_match = Template("toLower($node_var.$key) CONTAINS toLower($prop_ref)")
121
122
 
122
123
  matcher_asdict = asdict(matcher)
123
124
 
@@ -125,7 +126,10 @@ def _build_where_clause_for_rel_match(node_var: str, matcher: TargetNodeMatcher)
125
126
  for key, prop_ref in matcher_asdict.items():
126
127
  if prop_ref.ignore_case:
127
128
  prop_line = case_insensitive_match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
129
+ elif prop_ref.fuzzy_and_ignore_case:
130
+ prop_line = fuzzy_and_ignorecase_match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
128
131
  else:
132
+ # Exact match (default; most efficient)
129
133
  prop_line = match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
130
134
  result.append(prop_line)
131
135
  return ' AND\n'.join(result)
@@ -43,7 +43,7 @@ def get_rest_api_details(
43
43
  for api in rest_apis:
44
44
  stages = get_rest_api_stages(api, client)
45
45
  # clientcertificate id is given by the api stage
46
- certificate = get_rest_api_client_certificate(stages, client) # type: ignore
46
+ certificate = get_rest_api_client_certificate(stages, client)
47
47
  resources = get_rest_api_resources(api, client)
48
48
  policy = get_rest_api_policy(api, client)
49
49
  apis.append((api['id'], stages, certificate, resources, policy))
@@ -51,7 +51,7 @@ def get_rest_api_details(
51
51
 
52
52
 
53
53
  @timeit
54
- def get_rest_api_stages(api: Dict, client: botocore.client.BaseClient) -> List[Any]:
54
+ def get_rest_api_stages(api: Dict, client: botocore.client.BaseClient) -> Any:
55
55
  """
56
56
  Gets the REST API Stage Resources.
57
57
  """
@@ -99,7 +99,7 @@ def get_rest_api_resources(api: Dict, client: botocore.client.BaseClient) -> Lis
99
99
 
100
100
 
101
101
  @timeit
102
- def get_rest_api_policy(api: Dict, client: botocore.client.BaseClient) -> List[Any]:
102
+ def get_rest_api_policy(api: Dict, client: botocore.client.BaseClient) -> Any:
103
103
  """
104
104
  Gets the REST API policy. Returns policy string or None if no policy is present.
105
105
  """
@@ -1,24 +1,37 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
2
+ from collections import namedtuple
3
+ from typing import Any
4
4
 
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
8
  from .util import get_botocore_config
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.aws.ec2.auto_scaling_groups import AutoScalingGroupSchema
12
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
13
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2SubnetAutoScalingGroupSchema
14
+ from cartography.models.aws.ec2.launch_configurations import LaunchConfigurationSchema
9
15
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
16
  from cartography.util import timeit
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
20
+ AsgData = namedtuple(
21
+ 'AsgData', [
22
+ "group_list",
23
+ "instance_list",
24
+ "subnet_list",
25
+ ],
26
+ )
27
+
15
28
 
16
29
  @timeit
17
30
  @aws_handle_regions
18
- def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
31
+ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: str) -> list[dict]:
19
32
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
20
33
  paginator = client.get_paginator('describe_auto_scaling_groups')
21
- asgs: List[Dict] = []
34
+ asgs: list[dict] = []
22
35
  for page in paginator.paginate():
23
36
  asgs.extend(page['AutoScalingGroups'])
24
37
  return asgs
@@ -26,218 +39,167 @@ def get_ec2_auto_scaling_groups(boto3_session: boto3.session.Session, region: st
26
39
 
27
40
  @timeit
28
41
  @aws_handle_regions
29
- def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
42
+ def get_launch_configurations(boto3_session: boto3.session.Session, region: str) -> list[dict]:
30
43
  client = boto3_session.client('autoscaling', region_name=region, config=get_botocore_config())
31
44
  paginator = client.get_paginator('describe_launch_configurations')
32
- lcs: List[Dict] = []
45
+ lcs: list[dict] = []
33
46
  for page in paginator.paginate():
34
47
  lcs.extend(page['LaunchConfigurations'])
35
48
  return lcs
36
49
 
37
50
 
51
+ def transform_launch_configurations(configurations: list[dict[str, Any]]) -> list[dict[str, Any]]:
52
+ transformed_configurations = []
53
+ for config in configurations:
54
+ transformed_configurations.append({
55
+ 'AssociatePublicIpAddress': config.get('AssociatePublicIpAddress'),
56
+ 'LaunchConfigurationARN': config.get('LaunchConfigurationARN'),
57
+ 'LaunchConfigurationName': config.get('LaunchConfigurationName'),
58
+ 'CreatedTime': config.get('CreatedTime'),
59
+ 'ImageId': config.get('ImageId'),
60
+ 'KeyName': config.get('KeyName'),
61
+ 'SecurityGroups': config.get('SecurityGroups'),
62
+ 'InstanceType': config.get('InstanceType'),
63
+ 'KernelId': config.get('KernelId'),
64
+ 'RamdiskId': config.get('RamdiskId'),
65
+ 'InstanceMonitoring': config.get('InstanceMonitoring', {}).get('Enabled'),
66
+ 'SpotPrice': config.get('SpotPrice'),
67
+ 'IamInstanceProfile': config.get('IamInstanceProfile'),
68
+ 'EbsOptimized': config.get('EbsOptimized'),
69
+ 'PlacementTenancy': config.get('PlacementTenancy'),
70
+ })
71
+ return transformed_configurations
72
+
73
+
74
+ def transform_auto_scaling_groups(groups: list[dict[str, Any]]) -> AsgData:
75
+ transformed_groups = []
76
+ related_vpcs = []
77
+ related_instances = []
78
+ for group in groups:
79
+ transformed_groups.append({
80
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
81
+ 'CapacityRebalance': group.get('CapacityRebalance'),
82
+ 'CreatedTime': str(group.get('CreatedTime')),
83
+ 'DefaultCooldown': group.get('DefaultCooldown'),
84
+ 'DesiredCapacity': group.get('DesiredCapacity'),
85
+ 'HealthCheckGracePeriod': group.get('HealthCheckGracePeriod'),
86
+ 'HealthCheckType': group.get('HealthCheckType'),
87
+ 'LaunchConfigurationName': group.get('LaunchConfigurationName'),
88
+ 'LaunchTemplateName': group.get('LaunchTemplate', {}).get('LaunchTemplateName'),
89
+ 'LaunchTemplateId': group.get('LaunchTemplate', {}).get('LaunchTemplateId'),
90
+ 'LaunchTemplateVersion': group.get('LaunchTemplate', {}).get('Version'),
91
+ 'MaxInstanceLifetime': group.get('MaxInstanceLifetime'),
92
+ 'MaxSize': group.get('MaxSize'),
93
+ 'MinSize': group.get('MinSize'),
94
+ 'AutoScalingGroupName': group.get('AutoScalingGroupName'),
95
+ 'NewInstancesProtectedFromScaleIn': group.get('NewInstancesProtectedFromScaleIn'),
96
+ 'Status': group.get('Status'),
97
+ })
98
+
99
+ if group.get('VPCZoneIdentifier', None):
100
+ vpclist = group['VPCZoneIdentifier']
101
+ subnet_ids = vpclist.split(',') if ',' in vpclist else [vpclist]
102
+ subnets = []
103
+ for subnet_id in subnet_ids:
104
+ subnets.append({
105
+ 'VPCZoneIdentifier': subnet_id,
106
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
107
+ })
108
+ related_vpcs.extend(subnets)
109
+
110
+ for instance_data in group.get('Instances', []):
111
+ related_instances.append({
112
+ 'InstanceId': instance_data['InstanceId'],
113
+ 'AutoScalingGroupARN': group['AutoScalingGroupARN'],
114
+ })
115
+
116
+ return AsgData(
117
+ group_list=transformed_groups,
118
+ instance_list=related_instances,
119
+ subnet_list=related_vpcs,
120
+ )
121
+
122
+
38
123
  @timeit
39
124
  def load_launch_configurations(
40
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
125
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
41
126
  ) -> None:
42
- ingest_lc = """
43
- UNWIND $launch_configurations as lc
44
- MERGE (config:LaunchConfiguration{id: lc.LaunchConfigurationARN})
45
- ON CREATE SET config.firstseen = timestamp(), config.name = lc.LaunchConfigurationName,
46
- config.arn = lc.LaunchConfigurationARN,
47
- config.created_time = lc.CreatedTime
48
- SET config.lastupdated = $update_tag, config.image_id = lc.ImageId,
49
- config.key_name = lc.KeyName,
50
- config.security_groups = lc.SecurityGroups,
51
- config.instance_type = lc.InstanceType,
52
- config.kernel_id = lc.KernelId,
53
- config.ramdisk_id = lc.RamdiskId,
54
- config.instance_monitoring_enabled = lc.InstanceMonitoring.Enabled,
55
- config.spot_price = lc.SpotPrice,
56
- config.iam_instance_profile = lc.IamInstanceProfile,
57
- config.ebs_optimized = lc.EbsOptimized,
58
- config.associate_public_ip_address = lc.AssociatePublicIpAddress,
59
- config.placement_tenancy = lc.PlacementTenancy,
60
- config.region=$Region
61
- WITH config
62
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
63
- MERGE (aa)-[r:RESOURCE]->(config)
64
- ON CREATE SET r.firstseen = timestamp()
65
- SET r.lastupdated = $update_tag
66
- """
67
- for lc in data:
68
- lc['CreatedTime'] = str(int(lc['CreatedTime'].timestamp()))
69
-
70
- neo4j_session.run(
71
- ingest_lc,
72
- launch_configurations=data,
73
- AWS_ACCOUNT_ID=current_aws_account_id,
127
+ load(
128
+ neo4j_session,
129
+ LaunchConfigurationSchema(),
130
+ data,
74
131
  Region=region,
75
- update_tag=update_tag,
132
+ AWS_ID=current_aws_account_id,
133
+ lastupdated=update_tag,
76
134
  )
77
135
 
78
136
 
79
- @timeit
80
- def load_ec2_auto_scaling_groups(
81
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str, update_tag: int,
137
+ def load_groups(
138
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
82
139
  ) -> None:
83
- ingest_group = """
84
- UNWIND $autoscaling_groups_list as ag
85
- MERGE (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
86
- ON CREATE SET group.firstseen = timestamp(),
87
- group.createdtime = ag.CreatedTime
88
- SET group.launchconfigurationname = ag.LaunchConfigurationName,
89
- group.launchtemplatename = ag.LaunchTemplate.LaunchTemplateName,
90
- group.launchtemplateid = ag.LaunchTemplate.LaunchTemplateId,
91
- group.launchtemplateversion = ag.LaunchTemplate.Version,
92
- group.maxsize = ag.MaxSize, group.minsize = ag.MinSize, group.defaultcooldown = ag.DefaultCooldown,
93
- group.desiredcapacity = ag.DesiredCapacity, group.healthchecktype = ag.HealthCheckType,
94
- group.healthcheckgraceperiod = ag.HealthCheckGracePeriod, group.status = ag.Status,
95
- group.newinstancesprotectedfromscalein = ag.NewInstancesProtectedFromScaleIn,
96
- group.maxinstancelifetime = ag.MaxInstanceLifetime, group.capacityrebalance = ag.CapacityRebalance,
97
- group.name = ag.AutoScalingGroupName,
98
- group.lastupdated = $update_tag,
99
- group.region=$Region
100
- WITH group
101
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
102
- MERGE (aa)-[r:RESOURCE]->(group)
103
- ON CREATE SET r.firstseen = timestamp()
104
- SET r.lastupdated = $update_tag
105
- """
106
-
107
- ingest_vpc = """
108
- UNWIND $vpc_id_list as vpc_id
109
- MERGE (subnet:EC2Subnet{subnetid: vpc_id})
110
- ON CREATE SET subnet.firstseen = timestamp()
111
- SET subnet.lastupdated = $update_tag
112
- WITH subnet
113
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
114
- MERGE (subnet)<-[r:VPC_IDENTIFIER]-(group)
115
- ON CREATE SET r.firstseen = timestamp()
116
- SET r.lastupdated = $update_tag
117
- """
118
-
119
- ingest_instance = """
120
- UNWIND $instances_list as i
121
- MERGE (instance:Instance:EC2Instance{id: i.InstanceId})
122
- ON CREATE SET instance.firstseen = timestamp()
123
- SET instance.lastupdated = $update_tag, instance.region=$Region
124
- WITH instance
125
- MATCH (group:AutoScalingGroup{arn: $GROUPARN})
126
- MERGE (instance)-[r:MEMBER_AUTO_SCALE_GROUP]->(group)
127
- ON CREATE SET r.firstseen = timestamp()
128
- SET r.lastupdated = $update_tag
129
- WITH instance
130
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
131
- MERGE (aa)-[r:RESOURCE]->(instance)
132
- ON CREATE SET r.firstseen = timestamp()
133
- SET r.lastupdated = $update_tag
134
- """
135
-
136
- ingest_lts = """
137
- UNWIND $autoscaling_groups_list as ag
138
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
139
- MATCH (template:LaunchTemplate{id: ag.LaunchTemplate.LaunchTemplateId})
140
- MERGE (group)-[r:HAS_LAUNCH_TEMPLATE]->(template)
141
- ON CREATE SET r.firstseen = timestamp()
142
- SET r.lastupdated = $update_tag
143
- """
144
-
145
- ingest_lcs = """
146
- UNWIND $autoscaling_groups_list as ag
147
- MATCH (group:AutoScalingGroup{arn: ag.AutoScalingGroupARN})
148
- MATCH (config:LaunchConfiguration{name: ag.LaunchConfigurationName})
149
- MERGE (group)-[r:HAS_LAUNCH_CONFIG]->(config)
150
- ON CREATE SET r.firstseen = timestamp()
151
- SET r.lastupdated = $update_tag
152
- """
153
-
154
- launch_configs = []
155
- launch_templates = []
156
- for group in data:
157
- if group.get('LaunchConfigurationName'):
158
- launch_configs.append(group)
159
- if group.get('LaunchTemplate'):
160
- launch_templates.append(group)
161
-
162
- group['CreatedTime'] = str(group['CreatedTime'])
163
-
164
- neo4j_session.run(
165
- ingest_group,
166
- autoscaling_groups_list=data,
167
- AWS_ACCOUNT_ID=current_aws_account_id,
140
+ load(
141
+ neo4j_session,
142
+ AutoScalingGroupSchema(),
143
+ data,
168
144
  Region=region,
169
- update_tag=update_tag,
145
+ AWS_ID=current_aws_account_id,
146
+ lastupdated=update_tag,
170
147
  )
171
- neo4j_session.run(
172
- ingest_lcs,
173
- autoscaling_groups_list=launch_configs,
174
- AWS_ACCOUNT_ID=current_aws_account_id,
148
+
149
+
150
+ def load_asg_subnets(
151
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
152
+ ) -> None:
153
+ load(
154
+ neo4j_session,
155
+ EC2SubnetAutoScalingGroupSchema(),
156
+ data,
175
157
  Region=region,
176
- update_tag=update_tag,
158
+ AWS_ID=current_aws_account_id,
159
+ lastupdated=update_tag,
177
160
  )
178
- neo4j_session.run(
179
- ingest_lts,
180
- autoscaling_groups_list=launch_templates,
181
- AWS_ACCOUNT_ID=current_aws_account_id,
161
+
162
+
163
+ def load_asg_instances(
164
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str, update_tag: int,
165
+ ) -> None:
166
+ load(
167
+ neo4j_session,
168
+ EC2InstanceAutoScalingGroupSchema(),
169
+ data,
182
170
  Region=region,
183
- update_tag=update_tag,
171
+ AWS_ID=current_aws_account_id,
172
+ lastupdated=update_tag,
184
173
  )
185
174
 
186
- for group in data:
187
- group_arn = group["AutoScalingGroupARN"]
188
- if group.get('VPCZoneIdentifier'):
189
- vpclist = group["VPCZoneIdentifier"]
190
- if ',' in vpclist:
191
- data = vpclist.split(',')
192
- else:
193
- data = vpclist
194
- neo4j_session.run(
195
- ingest_vpc,
196
- vpc_id_list=data,
197
- GROUPARN=group_arn,
198
- update_tag=update_tag,
199
- )
200
-
201
- if group.get("Instances"):
202
- data = group["Instances"]
203
- neo4j_session.run(
204
- ingest_instance,
205
- instances_list=data,
206
- GROUPARN=group_arn,
207
- AWS_ACCOUNT_ID=current_aws_account_id,
208
- Region=region,
209
- update_tag=update_tag,
210
- )
211
-
212
175
 
213
176
  @timeit
214
- def cleanup_ec2_auto_scaling_groups(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
215
- run_cleanup_job(
216
- 'aws_ingest_ec2_auto_scaling_groups_cleanup.json',
217
- neo4j_session,
218
- common_job_parameters,
219
- )
177
+ def load_auto_scaling_groups(
178
+ neo4j_session: neo4j.Session, data: AsgData, region: str, current_aws_account_id: str, update_tag: int,
179
+ ) -> None:
180
+ load_groups(neo4j_session, data.group_list, region, current_aws_account_id, update_tag)
181
+ load_asg_instances(neo4j_session, data.instance_list, region, current_aws_account_id, update_tag)
182
+ load_asg_subnets(neo4j_session, data.subnet_list, region, current_aws_account_id, update_tag)
220
183
 
221
184
 
222
185
  @timeit
223
- def cleanup_ec2_launch_configurations(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
224
- run_cleanup_job(
225
- 'aws_import_ec2_launch_configurations_cleanup.json',
226
- neo4j_session,
227
- common_job_parameters,
228
- )
186
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
187
+ logger.debug("Running EC2 instance cleanup")
188
+ GraphJob.from_node_schema(AutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
189
+ GraphJob.from_node_schema(LaunchConfigurationSchema(), common_job_parameters).run(neo4j_session)
229
190
 
230
191
 
231
192
  @timeit
232
193
  def sync_ec2_auto_scaling_groups(
233
- neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str],
234
- current_aws_account_id: str, update_tag: int, common_job_parameters: Dict,
194
+ neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: list[str],
195
+ current_aws_account_id: str, update_tag: int, common_job_parameters: dict,
235
196
  ) -> None:
236
197
  for region in regions:
237
198
  logger.debug("Syncing auto scaling groups for region '%s' in account '%s'.", region, current_aws_account_id)
238
199
  lc_data = get_launch_configurations(boto3_session, region)
239
- load_launch_configurations(neo4j_session, lc_data, region, current_aws_account_id, update_tag)
240
- data = get_ec2_auto_scaling_groups(boto3_session, region)
241
- load_ec2_auto_scaling_groups(neo4j_session, data, region, current_aws_account_id, update_tag)
242
- cleanup_ec2_auto_scaling_groups(neo4j_session, common_job_parameters)
243
- cleanup_ec2_launch_configurations(neo4j_session, common_job_parameters)
200
+ asg_data = get_ec2_auto_scaling_groups(boto3_session, region)
201
+ lc_transformed = transform_launch_configurations(lc_data)
202
+ asg_transformed = transform_auto_scaling_groups(asg_data)
203
+ load_launch_configurations(neo4j_session, lc_transformed, region, current_aws_account_id, update_tag)
204
+ load_auto_scaling_groups(neo4j_session, asg_transformed, region, current_aws_account_id, update_tag)
205
+ cleanup(neo4j_session, common_job_parameters)
@@ -11,6 +11,7 @@ import neo4j
11
11
  from cartography.client.core.tx import load
12
12
  from cartography.graph.job import GraphJob
13
13
  from cartography.intel.aws.ec2.util import get_botocore_config
14
+ from cartography.models.aws.ec2.auto_scaling_groups import EC2InstanceAutoScalingGroupSchema
14
15
  from cartography.models.aws.ec2.instances import EC2InstanceSchema
15
16
  from cartography.models.aws.ec2.keypairs import EC2KeyPairSchema
16
17
  from cartography.models.aws.ec2.networkinterface_instance import EC2NetworkInterfaceInstanceSchema
@@ -308,6 +309,7 @@ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any])
308
309
  logger.debug("Running EC2 instance cleanup")
309
310
  GraphJob.from_node_schema(EC2ReservationSchema(), common_job_parameters).run(neo4j_session)
310
311
  GraphJob.from_node_schema(EC2InstanceSchema(), common_job_parameters).run(neo4j_session)
312
+ GraphJob.from_node_schema(EC2InstanceAutoScalingGroupSchema(), common_job_parameters).run(neo4j_session)
311
313
 
312
314
 
313
315
  @timeit