cartography 0.101.0rc1__py3-none-any.whl → 0.101.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (32) hide show
  1. cartography/_version.py +2 -2
  2. cartography/data/indexes.cypher +0 -6
  3. cartography/data/jobs/cleanup/crowdstrike_import_cleanup.json +0 -5
  4. cartography/data/jobs/scoped_analysis/aws_ec2_iaminstanceprofile.json +15 -0
  5. cartography/graph/querybuilder.py +9 -1
  6. cartography/intel/aws/__init__.py +5 -1
  7. cartography/intel/aws/ec2/launch_templates.py +24 -7
  8. cartography/intel/aws/ec2/load_balancers.py +126 -148
  9. cartography/intel/aws/iam_instance_profiles.py +73 -0
  10. cartography/intel/aws/resources.py +4 -1
  11. cartography/intel/crowdstrike/__init__.py +17 -5
  12. cartography/intel/crowdstrike/endpoints.py +12 -44
  13. cartography/intel/gcp/__init__.py +7 -2
  14. cartography/intel/gsuite/__init__.py +8 -0
  15. cartography/intel/kandji/devices.py +27 -3
  16. cartography/models/aws/ec2/instances.py +17 -0
  17. cartography/models/aws/ec2/load_balancer_listeners.py +68 -0
  18. cartography/models/aws/ec2/load_balancers.py +102 -0
  19. cartography/models/aws/iam/__init__.py +0 -0
  20. cartography/models/aws/iam/instanceprofile.py +67 -0
  21. cartography/models/core/common.py +37 -6
  22. cartography/models/crowdstrike/__init__.py +0 -0
  23. cartography/models/crowdstrike/hosts.py +49 -0
  24. cartography/stats.py +1 -1
  25. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/METADATA +4 -3
  26. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/RECORD +30 -24
  27. cartography/data/jobs/analysis/aws_ec2_iaminstance.json +0 -10
  28. cartography/data/jobs/analysis/aws_ec2_iaminstanceprofile.json +0 -10
  29. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/WHEEL +0 -0
  30. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/entry_points.txt +0 -0
  31. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/licenses/LICENSE +0 -0
  32. {cartography-0.101.0rc1.dist-info → cartography-0.101.1.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.101.0rc1'
21
- __version_tuple__ = version_tuple = (0, 101, 0)
20
+ __version__ = version = '0.101.1'
21
+ __version_tuple__ = version_tuple = (0, 101, 1)
@@ -65,9 +65,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.accesskeyid);
65
65
  CREATE INDEX IF NOT EXISTS FOR (n:AccountAccessKey) ON (n.lastupdated);
66
66
  CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.arn);
67
67
  CREATE INDEX IF NOT EXISTS FOR (n:AutoScalingGroup) ON (n.lastupdated);
68
- CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.id);
69
- CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.instance_id);
70
- CREATE INDEX IF NOT EXISTS FOR (n:CrowdstrikeHost) ON (n.lastupdated);
71
68
  CREATE INDEX IF NOT EXISTS FOR (n:CVE) ON (n.id);
72
69
  CREATE INDEX IF NOT EXISTS FOR (n:CVE) ON (n.lastupdated);
73
70
  CREATE INDEX IF NOT EXISTS FOR (n:Dependency) ON (n.id);
@@ -194,9 +191,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:KMSGrant) ON (n.lastupdated);
194
191
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.id);
195
192
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.name);
196
193
  CREATE INDEX IF NOT EXISTS FOR (n:LaunchConfiguration) ON (n.lastupdated);
197
- CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancer) ON (n.dnsname);
198
- CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancer) ON (n.id);
199
- CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancer) ON (n.lastupdated);
200
194
  CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.dnsname);
201
195
  CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.id);
202
196
  CREATE INDEX IF NOT EXISTS FOR (n:LoadBalancerV2) ON (n.lastupdated);
@@ -5,11 +5,6 @@
5
5
  "iterative": true,
6
6
  "iterationsize": 100
7
7
  },
8
- {
9
- "query": "MATCH (h:CrowdstrikeHost) WHERE h.lastupdated <> $UPDATE_TAG WITH h LIMIT $LIMIT_SIZE DETACH DELETE (h)",
10
- "iterative": true,
11
- "iterationsize": 100
12
- },
13
8
  {
14
9
  "query": "MATCH (:CrowdstrikeFinding)<-[hc:HAS_CVE]-(:SpotlightVulnerability) WHERE hc.lastupdated <> $UPDATE_TAG WITH hc LIMIT $LIMIT_SIZE DELETE (hc)",
15
10
  "iterative": true,
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "EC2 Instances assume IAM roles",
3
+ "statements": [
4
+ {
5
+ "__comment": "Create STS_ASSUMEROLE_ALLOW relationships from EC2 instances to the IAM roles they can assume via their iaminstanceprofiles",
6
+ "query":"MATCH (aa:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(i:EC2Instance)-[:INSTANCE_PROFILE]->(p:AWSInstanceProfile)-[:ASSOCIATED_WITH]->(r:AWSRole)\nMERGE (i)-[s:STS_ASSUMEROLE_ALLOW]->(r)\nON CREATE SET s.firstseen = timestamp(), s.lastupdated = $UPDATE_TAG",
7
+ "iterative": true
8
+ },
9
+ {
10
+ "__comment": "Cleanup",
11
+ "query":"MATCH (aa:AWSAccount{id: $AWS_ID})-[:RESOURCE]->(:EC2Instance)-[s:STS_ASSUMEROLE_ALLOW]->(:AWSRole)\nWHERE s.lastupdated <> $UPDATE_TAG\nDELETE s",
12
+ "iterative": true
13
+ }
14
+ ]
15
+ }
@@ -109,7 +109,10 @@ def _build_match_clause(matcher: TargetNodeMatcher) -> str:
109
109
  return ', '.join(match.safe_substitute(Key=key, PropRef=prop_ref) for key, prop_ref in matcher_asdict.items())
110
110
 
111
111
 
112
- def _build_where_clause_for_rel_match(node_var: str, matcher: TargetNodeMatcher) -> str:
112
+ def _build_where_clause_for_rel_match(
113
+ node_var: str,
114
+ matcher: TargetNodeMatcher,
115
+ ) -> str:
113
116
  """
114
117
  Same as _build_match_clause, but puts the matching logic in a WHERE clause.
115
118
  This is intended specifically to use for joining with relationships where we need a case-insensitive match.
@@ -119,6 +122,8 @@ def _build_where_clause_for_rel_match(node_var: str, matcher: TargetNodeMatcher)
119
122
  match = Template("$node_var.$key = $prop_ref")
120
123
  case_insensitive_match = Template("toLower($node_var.$key) = toLower($prop_ref)")
121
124
  fuzzy_and_ignorecase_match = Template("toLower($node_var.$key) CONTAINS toLower($prop_ref)")
125
+ # This assumes that item.$prop_ref points to a list available on the data object
126
+ one_to_many_match = Template("$node_var.$key IN $prop_ref")
122
127
 
123
128
  matcher_asdict = asdict(matcher)
124
129
 
@@ -128,6 +133,9 @@ def _build_where_clause_for_rel_match(node_var: str, matcher: TargetNodeMatcher)
128
133
  prop_line = case_insensitive_match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
129
134
  elif prop_ref.fuzzy_and_ignore_case:
130
135
  prop_line = fuzzy_and_ignorecase_match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
136
+ elif prop_ref.one_to_many:
137
+ # Allow a single node to be attached to multiple others at once using a list of IDs provided in kwargs
138
+ prop_line = one_to_many_match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
131
139
  else:
132
140
  # Exact match (default; most efficient)
133
141
  prop_line = match.safe_substitute(node_var=node_var, key=key, prop_ref=prop_ref)
@@ -20,6 +20,7 @@ from cartography.util import merge_module_sync_metadata
20
20
  from cartography.util import run_analysis_and_ensure_deps
21
21
  from cartography.util import run_analysis_job
22
22
  from cartography.util import run_cleanup_job
23
+ from cartography.util import run_scoped_analysis_job
23
24
  from cartography.util import timeit
24
25
 
25
26
 
@@ -75,7 +76,7 @@ def _sync_one_account(
75
76
  if 'resourcegroupstaggingapi' in aws_requested_syncs:
76
77
  RESOURCE_FUNCTIONS['resourcegroupstaggingapi'](**sync_args)
77
78
 
78
- run_analysis_job(
79
+ run_scoped_analysis_job(
79
80
  'aws_ec2_iaminstanceprofile.json',
80
81
  neo4j_session,
81
82
  common_job_parameters,
@@ -211,6 +212,9 @@ def _perform_aws_analysis(
211
212
  neo4j_session: neo4j.Session,
212
213
  common_job_parameters: Dict[str, Any],
213
214
  ) -> None:
215
+ """
216
+ Performs AWS analysis jobs that span multiple accounts.
217
+ """
214
218
  requested_syncs_as_set = set(requested_syncs)
215
219
 
216
220
  ec2_asset_exposure_requirements = {
@@ -2,6 +2,7 @@ import logging
2
2
  from typing import Any
3
3
 
4
4
  import boto3
5
+ import botocore
5
6
  import neo4j
6
7
 
7
8
  from .util import get_botocore_config
@@ -56,15 +57,27 @@ def get_launch_template_versions_by_template(
56
57
  client = boto3_session.client('ec2', region_name=region, config=get_botocore_config())
57
58
  v_paginator = client.get_paginator('describe_launch_template_versions')
58
59
  template_versions = []
59
- for versions in v_paginator.paginate(LaunchTemplateId=launch_template_id):
60
- template_versions.extend(versions['LaunchTemplateVersions'])
60
+ try:
61
+ for versions in v_paginator.paginate(LaunchTemplateId=launch_template_id):
62
+ template_versions.extend(versions['LaunchTemplateVersions'])
63
+ except botocore.exceptions.ClientError as e:
64
+ error_code = e.response['Error']['Code']
65
+ if error_code == 'InvalidLaunchTemplateId.NotFound':
66
+ logger.warning("Launch template %s no longer exists in region %s", launch_template_id, region)
67
+ else:
68
+ raise
61
69
  return template_versions
62
70
 
63
71
 
64
- def transform_launch_templates(templates: list[dict[str, Any]]) -> list[dict[str, Any]]:
72
+ def transform_launch_templates(templates: list[dict[str, Any]], versions: list[dict[str, Any]]) -> list[dict[str, Any]]:
73
+ valid_template_ids = {v['LaunchTemplateId'] for v in versions}
65
74
  result: list[dict[str, Any]] = []
66
75
  for template in templates:
76
+ if template['LaunchTemplateId'] not in valid_template_ids:
77
+ continue
78
+
67
79
  current = template.copy()
80
+ # Convert CreateTime to timestamp string
68
81
  current['CreateTime'] = str(int(current['CreateTime'].timestamp()))
69
82
  result.append(current)
70
83
  return result
@@ -157,9 +170,13 @@ def sync_ec2_launch_templates(
157
170
  logger.info(f"Syncing launch templates for region '{region}' in account '{current_aws_account_id}'.")
158
171
  templates = get_launch_templates(boto3_session, region)
159
172
  versions = get_launch_template_versions(boto3_session, region, templates)
160
- templates = transform_launch_templates(templates)
161
- load_launch_templates(neo4j_session, templates, region, current_aws_account_id, update_tag)
162
- versions = transform_launch_template_versions(versions)
163
- load_launch_template_versions(neo4j_session, versions, region, current_aws_account_id, update_tag)
173
+
174
+ # Transform and load the templates that have versions
175
+ transformed_templates = transform_launch_templates(templates, versions)
176
+ load_launch_templates(neo4j_session, transformed_templates, region, current_aws_account_id, update_tag)
177
+
178
+ # Transform and load the versions
179
+ transformed_versions = transform_launch_template_versions(versions)
180
+ load_launch_template_versions(neo4j_session, transformed_versions, region, current_aws_account_id, update_tag)
164
181
 
165
182
  cleanup(neo4j_session, common_job_parameters)
@@ -1,190 +1,168 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
4
2
 
5
3
  import boto3
6
4
  import neo4j
7
5
 
8
6
  from .util import get_botocore_config
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.ec2.load_balancer_listeners import ELBListenerSchema
10
+ from cartography.models.aws.ec2.load_balancers import LoadBalancerSchema
9
11
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
12
13
 
13
14
  logger = logging.getLogger(__name__)
14
15
 
15
16
 
17
+ def _get_listener_id(load_balancer_id: str, port: int, protocol: str) -> str:
18
+ """
19
+ Generate a unique ID for a load balancer listener.
20
+
21
+ Args:
22
+ load_balancer_id: The ID of the load balancer
23
+ port: The listener port
24
+ protocol: The listener protocol
25
+
26
+ Returns:
27
+ A unique ID string for the listener
28
+ """
29
+ return f"{load_balancer_id}{port}{protocol}"
30
+
31
+
32
+ def transform_load_balancer_listener_data(load_balancer_id: str, listener_data: list[dict]) -> list[dict]:
33
+ """
34
+ Transform load balancer listener data into a format suitable for cartography ingestion.
35
+
36
+ Args:
37
+ load_balancer_id: The ID of the load balancer
38
+ listener_data: List of listener data from AWS API
39
+
40
+ Returns:
41
+ List of transformed listener data
42
+ """
43
+ transformed = []
44
+ for listener in listener_data:
45
+ listener_info = listener['Listener']
46
+ transformed_listener = {
47
+ 'id': _get_listener_id(load_balancer_id, listener_info['LoadBalancerPort'], listener_info['Protocol']),
48
+ 'port': listener_info.get('LoadBalancerPort'),
49
+ 'protocol': listener_info.get('Protocol'),
50
+ 'instance_port': listener_info.get('InstancePort'),
51
+ 'instance_protocol': listener_info.get('InstanceProtocol'),
52
+ 'policy_names': listener.get('PolicyNames', []),
53
+ 'LoadBalancerId': load_balancer_id,
54
+ }
55
+ transformed.append(transformed_listener)
56
+ return transformed
57
+
58
+
59
+ def transform_load_balancer_data(load_balancers: list[dict]) -> tuple[list[dict], list[dict]]:
60
+ """
61
+ Transform load balancer data into a format suitable for cartography ingestion.
62
+
63
+ Args:
64
+ load_balancers: List of load balancer data from AWS API
65
+
66
+ Returns:
67
+ Tuple of (transformed load balancer data, transformed listener data)
68
+ """
69
+ transformed = []
70
+ listener_data = []
71
+
72
+ for lb in load_balancers:
73
+ load_balancer_id = lb['DNSName']
74
+ transformed_lb = {
75
+ 'id': load_balancer_id,
76
+ 'name': lb['LoadBalancerName'],
77
+ 'dnsname': lb['DNSName'],
78
+ 'canonicalhostedzonename': lb.get('CanonicalHostedZoneName'),
79
+ 'canonicalhostedzonenameid': lb.get('CanonicalHostedZoneNameID'),
80
+ 'scheme': lb.get('Scheme'),
81
+ 'createdtime': str(lb['CreatedTime']),
82
+ 'GROUP_NAME': lb.get('SourceSecurityGroup', {}).get('GroupName'),
83
+ 'GROUP_IDS': [str(group) for group in lb.get('SecurityGroups', [])],
84
+ 'INSTANCE_IDS': [instance['InstanceId'] for instance in lb.get('Instances', [])],
85
+ 'LISTENER_IDS': [
86
+ _get_listener_id(
87
+ load_balancer_id,
88
+ listener['Listener']['LoadBalancerPort'],
89
+ listener['Listener']['Protocol'],
90
+ ) for listener in lb.get('ListenerDescriptions', [])
91
+ ],
92
+ }
93
+ transformed.append(transformed_lb)
94
+
95
+ # Classic ELB listeners are not returned anywhere else in AWS, so we must parse them out
96
+ # of the describe_load_balancers response.
97
+ if lb.get('ListenerDescriptions'):
98
+ listener_data.extend(
99
+ transform_load_balancer_listener_data(
100
+ load_balancer_id,
101
+ lb.get('ListenerDescriptions', []),
102
+ ),
103
+ )
104
+
105
+ return transformed, listener_data
106
+
107
+
16
108
  @timeit
17
109
  @aws_handle_regions
18
- def get_loadbalancer_data(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
110
+ def get_loadbalancer_data(boto3_session: boto3.session.Session, region: str) -> list[dict]:
19
111
  client = boto3_session.client('elb', region_name=region, config=get_botocore_config())
20
112
  paginator = client.get_paginator('describe_load_balancers')
21
- elbs: List[Dict] = []
113
+ elbs: list[dict] = []
22
114
  for page in paginator.paginate():
23
115
  elbs.extend(page['LoadBalancerDescriptions'])
24
116
  return elbs
25
117
 
26
118
 
27
119
  @timeit
28
- def load_load_balancer_listeners(
29
- neo4j_session: neo4j.Session, load_balancer_id: str, listener_data: List[Dict],
120
+ def load_load_balancers(
121
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str,
30
122
  update_tag: int,
31
123
  ) -> None:
32
- ingest_listener = """
33
- MATCH (elb:LoadBalancer{id: $LoadBalancerId})
34
- WITH elb
35
- UNWIND $Listeners as data
36
- MERGE (l:Endpoint:ELBListener{id: elb.id + toString(data.Listener.LoadBalancerPort) +
37
- toString(data.Listener.Protocol)})
38
- ON CREATE SET l.port = data.Listener.LoadBalancerPort, l.protocol = data.Listener.Protocol,
39
- l.firstseen = timestamp()
40
- SET l.instance_port = data.Listener.InstancePort, l.instance_protocol = data.Listener.InstanceProtocol,
41
- l.policy_names = data.PolicyNames,
42
- l.lastupdated = $update_tag
43
- WITH l, elb
44
- MERGE (elb)-[r:ELB_LISTENER]->(l)
45
- ON CREATE SET r.firstseen = timestamp()
46
- SET r.lastupdated = $update_tag
47
- """
48
-
49
- neo4j_session.run(
50
- ingest_listener,
51
- LoadBalancerId=load_balancer_id,
52
- Listeners=listener_data,
53
- update_tag=update_tag,
124
+ load(
125
+ neo4j_session,
126
+ LoadBalancerSchema(),
127
+ data,
128
+ Region=region,
129
+ AWS_ID=current_aws_account_id,
130
+ lastupdated=update_tag,
54
131
  )
55
132
 
56
133
 
57
134
  @timeit
58
- def load_load_balancer_subnets(
59
- neo4j_session: neo4j.Session, load_balancer_id: str, subnets_data: List[Dict],
60
- update_tag: int,
61
- ) -> None:
62
- ingest_load_balancer_subnet = """
63
- MATCH (elb:LoadBalancer{id: $ID}), (subnet:EC2Subnet{subnetid: $SUBNET_ID})
64
- MERGE (elb)-[r:SUBNET]->(subnet)
65
- ON CREATE SET r.firstseen = timestamp()
66
- SET r.lastupdated = $update_tag
67
- """
68
-
69
- for subnet_id in subnets_data:
70
- neo4j_session.run(
71
- ingest_load_balancer_subnet,
72
- ID=load_balancer_id,
73
- SUBNET_ID=subnet_id,
74
- update_tag=update_tag,
75
- )
76
-
77
-
78
- @timeit
79
- def load_load_balancers(
80
- neo4j_session: neo4j.Session, data: List[Dict], region: str, current_aws_account_id: str,
135
+ def load_load_balancer_listeners(
136
+ neo4j_session: neo4j.Session, data: list[dict], region: str, current_aws_account_id: str,
81
137
  update_tag: int,
82
138
  ) -> None:
83
- ingest_load_balancer = """
84
- MERGE (elb:LoadBalancer{id: $ID})
85
- ON CREATE SET elb.firstseen = timestamp(), elb.createdtime = $CREATED_TIME
86
- SET elb.lastupdated = $update_tag, elb.name = $NAME, elb.dnsname = $DNS_NAME,
87
- elb.canonicalhostedzonename = $HOSTED_ZONE_NAME, elb.canonicalhostedzonenameid = $HOSTED_ZONE_NAME_ID,
88
- elb.scheme = $SCHEME, elb.region = $Region
89
- WITH elb
90
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
91
- MERGE (aa)-[r:RESOURCE]->(elb)
92
- ON CREATE SET r.firstseen = timestamp()
93
- SET r.lastupdated = $update_tag
94
- """
95
-
96
- ingest_load_balancersource_security_group = """
97
- MATCH (elb:LoadBalancer{id: $ID}),
98
- (group:EC2SecurityGroup{name: $GROUP_NAME})
99
- MERGE (elb)-[r:SOURCE_SECURITY_GROUP]->(group)
100
- ON CREATE SET r.firstseen = timestamp()
101
- SET r.lastupdated = $update_tag
102
- """
103
-
104
- ingest_load_balancer_security_group = """
105
- MATCH (elb:LoadBalancer{id: $ID}),
106
- (group:EC2SecurityGroup{groupid: $GROUP_ID})
107
- MERGE (elb)-[r:MEMBER_OF_EC2_SECURITY_GROUP]->(group)
108
- ON CREATE SET r.firstseen = timestamp()
109
- SET r.lastupdated = $update_tag
110
- """
111
-
112
- ingest_instances = """
113
- MATCH (elb:LoadBalancer{id: $ID}), (instance:EC2Instance{instanceid: $INSTANCE_ID})
114
- MERGE (elb)-[r:EXPOSE]->(instance)
115
- ON CREATE SET r.firstseen = timestamp()
116
- SET r.lastupdated = $update_tag
117
- WITH instance
118
- MATCH (aa:AWSAccount{id: $AWS_ACCOUNT_ID})
119
- MERGE (aa)-[r:RESOURCE]->(instance)
120
- ON CREATE SET r.firstseen = timestamp()
121
- SET r.lastupdated = $update_tag
122
- """
123
-
124
- for lb in data:
125
- load_balancer_id = lb["DNSName"]
126
-
127
- neo4j_session.run(
128
- ingest_load_balancer,
129
- ID=load_balancer_id,
130
- CREATED_TIME=str(lb["CreatedTime"]),
131
- NAME=lb["LoadBalancerName"],
132
- DNS_NAME=load_balancer_id,
133
- HOSTED_ZONE_NAME=lb.get("CanonicalHostedZoneName"),
134
- HOSTED_ZONE_NAME_ID=lb.get("CanonicalHostedZoneNameID"),
135
- SCHEME=lb.get("Scheme", ""),
136
- AWS_ACCOUNT_ID=current_aws_account_id,
137
- Region=region,
138
- update_tag=update_tag,
139
- )
140
-
141
- if lb["Subnets"]:
142
- load_load_balancer_subnets(neo4j_session, load_balancer_id, lb["Subnets"], update_tag)
143
-
144
- if lb["SecurityGroups"]:
145
- for group in lb["SecurityGroups"]:
146
- neo4j_session.run(
147
- ingest_load_balancer_security_group,
148
- ID=load_balancer_id,
149
- GROUP_ID=str(group),
150
- update_tag=update_tag,
151
- )
152
-
153
- if lb["SourceSecurityGroup"]:
154
- source_group = lb["SourceSecurityGroup"]
155
- neo4j_session.run(
156
- ingest_load_balancersource_security_group,
157
- ID=load_balancer_id,
158
- GROUP_NAME=source_group["GroupName"],
159
- update_tag=update_tag,
160
- )
161
-
162
- if lb["Instances"]:
163
- for instance in lb["Instances"]:
164
- neo4j_session.run(
165
- ingest_instances,
166
- ID=load_balancer_id,
167
- INSTANCE_ID=instance["InstanceId"],
168
- AWS_ACCOUNT_ID=current_aws_account_id,
169
- update_tag=update_tag,
170
- )
171
-
172
- if lb["ListenerDescriptions"]:
173
- load_load_balancer_listeners(neo4j_session, load_balancer_id, lb["ListenerDescriptions"], update_tag)
139
+ load(
140
+ neo4j_session,
141
+ ELBListenerSchema(),
142
+ data,
143
+ Region=region,
144
+ AWS_ID=current_aws_account_id,
145
+ lastupdated=update_tag,
146
+ )
174
147
 
175
148
 
176
149
  @timeit
177
- def cleanup_load_balancers(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
178
- run_cleanup_job('aws_ingest_load_balancers_cleanup.json', neo4j_session, common_job_parameters)
150
+ def cleanup_load_balancers(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
151
+ GraphJob.from_node_schema(ELBListenerSchema(), common_job_parameters).run(neo4j_session)
152
+ GraphJob.from_node_schema(LoadBalancerSchema(), common_job_parameters).run(neo4j_session)
179
153
 
180
154
 
181
155
  @timeit
182
156
  def sync_load_balancers(
183
- neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: List[str], current_aws_account_id: str,
184
- update_tag: int, common_job_parameters: Dict,
157
+ neo4j_session: neo4j.Session, boto3_session: boto3.session.Session, regions: list[str], current_aws_account_id: str,
158
+ update_tag: int, common_job_parameters: dict,
185
159
  ) -> None:
186
160
  for region in regions:
187
161
  logger.info("Syncing EC2 load balancers for region '%s' in account '%s'.", region, current_aws_account_id)
188
162
  data = get_loadbalancer_data(boto3_session, region)
189
- load_load_balancers(neo4j_session, data, region, current_aws_account_id, update_tag)
163
+ transformed_data, listener_data = transform_load_balancer_data(data)
164
+
165
+ load_load_balancers(neo4j_session, transformed_data, region, current_aws_account_id, update_tag)
166
+ load_load_balancer_listeners(neo4j_session, listener_data, region, current_aws_account_id, update_tag)
167
+
190
168
  cleanup_load_balancers(neo4j_session, common_job_parameters)
@@ -0,0 +1,73 @@
1
+ from typing import Any
2
+
3
+ import boto3
4
+ import neo4j
5
+
6
+ from cartography.client.core.tx import load
7
+ from cartography.intel.aws.ec2.util import get_botocore_config
8
+ from cartography.models.aws.iam.instanceprofile import InstanceProfileSchema
9
+ from cartography.util import aws_handle_regions
10
+ from cartography.util import timeit
11
+
12
+
13
+ @timeit
14
+ @aws_handle_regions
15
+ def get_iam_instance_profiles(boto3_session: boto3.Session) -> list[dict[str, Any]]:
16
+ client = boto3_session.client('iam', config=get_botocore_config())
17
+ paginator = client.get_paginator('list_instance_profiles')
18
+ instance_profiles = []
19
+ for page in paginator.paginate():
20
+ instance_profiles.extend(page['InstanceProfiles'])
21
+ return instance_profiles
22
+
23
+
24
+ def transform_instance_profiles(data: list[dict[str, Any]]) -> list[dict[str, Any]]:
25
+ transformed = []
26
+ for profile in data:
27
+ transformed_profile = {
28
+ 'Arn': profile['Arn'],
29
+ 'CreateDate': profile['CreateDate'],
30
+ 'InstanceProfileId': profile['InstanceProfileId'],
31
+ 'InstanceProfileName': profile['InstanceProfileName'],
32
+ 'Path': profile['Path'],
33
+ 'Roles': [role['Arn'] for role in profile.get('Roles', [])],
34
+ }
35
+ transformed.append(transformed_profile)
36
+ return transformed
37
+
38
+
39
+ @timeit
40
+ def load_iam_instance_profiles(
41
+ neo4j_session: neo4j.Session,
42
+ data: list[dict[str, Any]],
43
+ current_aws_account_id: str,
44
+ update_tag: int,
45
+ common_job_parameters: dict[str, Any],
46
+ ) -> None:
47
+ load(
48
+ neo4j_session,
49
+ InstanceProfileSchema(),
50
+ data,
51
+ AWS_ID=current_aws_account_id,
52
+ lastupdated=update_tag,
53
+ )
54
+
55
+
56
+ @timeit
57
+ def sync_iam_instance_profiles(
58
+ boto3_session: boto3.Session,
59
+ neo4j_session: neo4j.Session,
60
+ current_aws_account_id: str,
61
+ update_tag: int,
62
+ regions: list[str],
63
+ common_job_parameters: dict[str, Any],
64
+ ) -> None:
65
+ profiles = get_iam_instance_profiles(boto3_session)
66
+ profiles = transform_instance_profiles(profiles)
67
+ load_iam_instance_profiles(
68
+ neo4j_session,
69
+ profiles,
70
+ common_job_parameters['AWS_ID'],
71
+ common_job_parameters['UPDATE_TAG'],
72
+ common_job_parameters,
73
+ )
@@ -1,3 +1,4 @@
1
+ from typing import Callable
1
2
  from typing import Dict
2
3
 
3
4
  from . import apigateway
@@ -43,9 +44,11 @@ from .ec2.tgw import sync_transit_gateways
43
44
  from .ec2.volumes import sync_ebs_volumes
44
45
  from .ec2.vpc import sync_vpc
45
46
  from .ec2.vpc_peerings import sync_vpc_peerings
47
+ from .iam_instance_profiles import sync_iam_instance_profiles
46
48
 
47
- RESOURCE_FUNCTIONS: Dict = {
49
+ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
48
50
  'iam': iam.sync,
51
+ 'iaminstanceprofiles': sync_iam_instance_profiles,
49
52
  's3': s3.sync,
50
53
  'dynamodb': dynamodb.sync,
51
54
  'ec2:launch_templates': sync_ec2_launch_templates,
@@ -1,11 +1,14 @@
1
1
  import logging
2
+ from typing import Any
2
3
 
3
4
  import neo4j
4
5
 
5
6
  from cartography.config import Config
7
+ from cartography.graph.job import GraphJob
6
8
  from cartography.intel.crowdstrike.endpoints import sync_hosts
7
9
  from cartography.intel.crowdstrike.spotlight import sync_vulnerabilities
8
10
  from cartography.intel.crowdstrike.util import get_authorization
11
+ from cartography.models.crowdstrike.hosts import CrowdstrikeHostSchema
9
12
  from cartography.stats import get_stats_client
10
13
  from cartography.util import merge_module_sync_metadata
11
14
  from cartography.util import run_cleanup_job
@@ -50,11 +53,7 @@ def start_crowdstrike_ingestion(
50
53
  config.update_tag,
51
54
  authorization,
52
55
  )
53
- run_cleanup_job(
54
- "crowdstrike_import_cleanup.json",
55
- neo4j_session,
56
- common_job_parameters,
57
- )
56
+ cleanup(neo4j_session, common_job_parameters)
58
57
 
59
58
  group_id = "public"
60
59
  if config.crowdstrike_api_url:
@@ -67,3 +66,16 @@ def start_crowdstrike_ingestion(
67
66
  update_tag=config.update_tag,
68
67
  stat_handler=stat_handler,
69
68
  )
69
+
70
+
71
+ @timeit
72
+ def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
73
+ logger.info("Running Crowdstrike cleanup")
74
+ GraphJob.from_node_schema(CrowdstrikeHostSchema(), common_job_parameters).run(neo4j_session)
75
+
76
+ # Cleanup other crowdstrike assets not handled by the data model
77
+ run_cleanup_job(
78
+ "crowdstrike_import_cleanup.json",
79
+ neo4j_session,
80
+ common_job_parameters,
81
+ )