cartography 0.110.0rc1__py3-none-any.whl → 0.111.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (59) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +0 -8
  3. cartography/config.py +0 -9
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/graph/querybuilder.py +70 -0
  7. cartography/intel/aws/apigateway.py +111 -4
  8. cartography/intel/aws/cognito.py +201 -0
  9. cartography/intel/aws/ec2/vpc.py +140 -124
  10. cartography/intel/aws/ecs.py +7 -1
  11. cartography/intel/aws/eventbridge.py +73 -0
  12. cartography/intel/aws/glue.py +64 -0
  13. cartography/intel/aws/kms.py +13 -1
  14. cartography/intel/aws/rds.py +105 -0
  15. cartography/intel/aws/resources.py +2 -0
  16. cartography/intel/aws/route53.py +3 -1
  17. cartography/intel/aws/s3.py +104 -0
  18. cartography/intel/entra/__init__.py +41 -43
  19. cartography/intel/entra/applications.py +2 -1
  20. cartography/intel/entra/ou.py +1 -1
  21. cartography/intel/github/__init__.py +21 -25
  22. cartography/intel/github/repos.py +13 -38
  23. cartography/intel/kubernetes/__init__.py +4 -0
  24. cartography/intel/kubernetes/rbac.py +464 -0
  25. cartography/intel/kubernetes/util.py +17 -0
  26. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  27. cartography/models/aws/cognito/__init__.py +0 -0
  28. cartography/models/aws/cognito/identity_pool.py +70 -0
  29. cartography/models/aws/cognito/user_pool.py +47 -0
  30. cartography/models/aws/ec2/security_groups.py +1 -1
  31. cartography/models/aws/ec2/vpc.py +46 -0
  32. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  33. cartography/models/aws/ecs/services.py +17 -0
  34. cartography/models/aws/ecs/tasks.py +1 -0
  35. cartography/models/aws/eventbridge/target.py +71 -0
  36. cartography/models/aws/glue/job.py +69 -0
  37. cartography/models/aws/rds/event_subscription.py +146 -0
  38. cartography/models/aws/route53/dnsrecord.py +21 -0
  39. cartography/models/github/dependencies.py +1 -2
  40. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  41. cartography/models/kubernetes/clusterroles.py +52 -0
  42. cartography/models/kubernetes/rolebindings.py +119 -0
  43. cartography/models/kubernetes/roles.py +76 -0
  44. cartography/models/kubernetes/serviceaccounts.py +77 -0
  45. cartography/models/tailscale/device.py +1 -0
  46. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/METADATA +3 -3
  47. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/RECORD +57 -43
  48. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  49. cartography/intel/entra/resources.py +0 -20
  50. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  51. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  52. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  53. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  54. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  55. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  56. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/WHEEL +0 -0
  57. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/entry_points.txt +0 -0
  58. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/licenses/LICENSE +0 -0
  59. {cartography-0.110.0rc1.dist-info → cartography-0.111.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,15 @@
1
1
  import logging
2
- from string import Template
3
- from typing import Dict
4
- from typing import List
2
+ from typing import Any
5
3
 
6
4
  import boto3
7
5
  import neo4j
8
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.ec2.vpc import AWSVpcSchema
10
+ from cartography.models.aws.ec2.vpc_cidr import AWSIPv4CidrBlockSchema
11
+ from cartography.models.aws.ec2.vpc_cidr import AWSIPv6CidrBlockSchema
9
12
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
13
  from cartography.util import timeit
12
14
 
13
15
  from .util import get_botocore_config
@@ -17,87 +19,78 @@ logger = logging.getLogger(__name__)
17
19
 
18
20
  @timeit
19
21
  @aws_handle_regions
20
- def get_ec2_vpcs(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
22
+ def get_ec2_vpcs(
23
+ boto3_session: boto3.session.Session,
24
+ region: str,
25
+ ) -> list[dict[str, Any]]:
21
26
  client = boto3_session.client(
22
27
  "ec2",
23
28
  region_name=region,
24
29
  config=get_botocore_config(),
25
30
  )
26
- return client.describe_vpcs()["Vpcs"]
27
-
28
-
29
- def _get_cidr_association_statement(block_type: str) -> str:
30
- INGEST_CIDR_TEMPLATE = Template(
31
- """
32
- MATCH (vpc:AWSVpc{id: $VpcId})
33
- WITH vpc
34
- UNWIND $CidrBlock as block_data
35
- MERGE (new_block:$block_label{id: $VpcId + '|' + block_data.$block_cidr})
36
- ON CREATE SET new_block.firstseen = timestamp()
37
- SET new_block.association_id = block_data.AssociationId,
38
- new_block.cidr_block = block_data.$block_cidr,
39
- new_block.block_state = block_data.$state_name.State,
40
- new_block.block_state_message = block_data.$state_name.StatusMessage,
41
- new_block.lastupdated = $update_tag
42
- WITH vpc, new_block
43
- MERGE (vpc)-[r:BLOCK_ASSOCIATION]->(new_block)
44
- ON CREATE SET r.firstseen = timestamp()
45
- SET r.lastupdated = $update_tag""",
46
- )
47
-
48
- BLOCK_CIDR = "CidrBlock"
49
- STATE_NAME = "CidrBlockState"
50
-
51
- # base label type. We add the AWS ipv4 or 6 depending on block type
52
- BLOCK_TYPE = "AWSCidrBlock"
53
-
54
- if block_type == "ipv6":
55
- BLOCK_CIDR = "Ipv6" + BLOCK_CIDR
56
- STATE_NAME = "Ipv6" + STATE_NAME
57
- BLOCK_TYPE = BLOCK_TYPE + ":AWSIpv6CidrBlock"
58
- elif block_type == "ipv4":
59
- BLOCK_TYPE = BLOCK_TYPE + ":AWSIpv4CidrBlock"
60
- else:
61
- raise ValueError(f"Unsupported block type specified - {block_type}")
62
-
63
- return INGEST_CIDR_TEMPLATE.safe_substitute(
64
- block_label=BLOCK_TYPE,
65
- block_cidr=BLOCK_CIDR,
66
- state_name=STATE_NAME,
67
- )
68
-
69
-
70
- @timeit
71
- def load_cidr_association_set(
72
- neo4j_session: neo4j.Session,
73
- vpc_id: str,
74
- vpc_data: Dict,
75
- block_type: str,
76
- update_tag: int,
77
- ) -> None:
78
- ingest_statement = _get_cidr_association_statement(block_type)
79
-
80
- if block_type == "ipv6":
81
- data = vpc_data.get("Ipv6CidrBlockAssociationSet", [])
82
- else:
83
- data = vpc_data.get("CidrBlockAssociationSet", [])
84
-
85
- neo4j_session.run(
86
- ingest_statement,
87
- VpcId=vpc_id,
88
- CidrBlock=data,
89
- update_tag=update_tag,
90
- )
31
+ return client.describe_vpcs().get("Vpcs", [])
32
+
33
+
34
+ def transform_vpc_data(
35
+ vpc_list: list[dict[str, Any]], region: str
36
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]:
37
+
38
+ vpc_data: list[dict[str, Any]] = []
39
+ ipv4_cidr_blocks: list[dict[str, Any]] = []
40
+ ipv6_cidr_blocks: list[dict[str, Any]] = []
41
+
42
+ for vpc in vpc_list:
43
+ vpc_record = {
44
+ "VpcId": vpc.get("VpcId"),
45
+ "InstanceTenancy": vpc.get("InstanceTenancy"),
46
+ "State": vpc.get("State"),
47
+ "IsDefault": vpc.get("IsDefault"),
48
+ "PrimaryCIDRBlock": vpc.get("CidrBlock"),
49
+ "DhcpOptionsId": vpc.get("DhcpOptionsId"),
50
+ "lastupdated": vpc.get("lastupdated"),
51
+ }
52
+ vpc_data.append(vpc_record)
53
+
54
+ ipv4_associations = vpc.get("CidrBlockAssociationSet", [])
55
+ for association in ipv4_associations:
56
+ ipv4_block = {
57
+ "Id": vpc["VpcId"] + "|" + association.get("CidrBlock"),
58
+ "VpcId": vpc["VpcId"],
59
+ "AssociationId": association.get("AssociationId"),
60
+ "CidrBlock": association.get("CidrBlock"),
61
+ "BlockState": association.get("CidrBlockState", {}).get("State"),
62
+ "BlockStateMessage": association.get("CidrBlockState", {}).get(
63
+ "StatusMessage"
64
+ ),
65
+ }
66
+ ipv4_cidr_blocks.append(ipv4_block)
67
+
68
+ ipv6_associations = vpc.get("Ipv6CidrBlockAssociationSet", [])
69
+ for association in ipv6_associations:
70
+ ipv6_block = {
71
+ "Id": vpc["VpcId"] + "|" + association.get("Ipv6CidrBlock"),
72
+ "VpcId": vpc["VpcId"],
73
+ "AssociationId": association.get("AssociationId"),
74
+ "CidrBlock": association.get("Ipv6CidrBlock"),
75
+ "BlockState": association.get("Ipv6CidrBlockState", {}).get("State"),
76
+ "BlockStateMessage": association.get("Ipv6CidrBlockState", {}).get(
77
+ "StatusMessage"
78
+ ),
79
+ }
80
+ ipv6_cidr_blocks.append(ipv6_block)
81
+
82
+ return vpc_data, ipv4_cidr_blocks, ipv6_cidr_blocks
91
83
 
92
84
 
93
85
  @timeit
94
86
  def load_ec2_vpcs(
95
87
  neo4j_session: neo4j.Session,
96
- data: List[Dict],
88
+ vpcs: list[dict[str, Any]],
97
89
  region: str,
98
- current_aws_account_id: str,
90
+ aws_account_id: str,
99
91
  update_tag: int,
100
92
  ) -> None:
93
+ logger.info(f"Loading {len(vpcs)} EC2 VPCs for region '{region}' into graph.")
101
94
  # https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-vpcs.html
102
95
  # {
103
96
  # "Vpcs": [
@@ -126,69 +119,69 @@ def load_ec2_vpcs(
126
119
  # }
127
120
  # ]
128
121
  # }
122
+ load(
123
+ neo4j_session,
124
+ AWSVpcSchema(),
125
+ vpcs,
126
+ lastupdated=update_tag,
127
+ Region=region,
128
+ AWS_ID=aws_account_id,
129
+ )
129
130
 
130
- ingest_vpc = """
131
- MERGE (new_vpc:AWSVpc{id: $VpcId})
132
- ON CREATE SET new_vpc.firstseen = timestamp(), new_vpc.vpcid =$VpcId
133
- SET new_vpc.instance_tenancy = $InstanceTenancy,
134
- new_vpc.state = $State,
135
- new_vpc.is_default = $IsDefault,
136
- new_vpc.primary_cidr_block = $PrimaryCIDRBlock,
137
- new_vpc.dhcp_options_id = $DhcpOptionsId,
138
- new_vpc.region = $Region,
139
- new_vpc.lastupdated = $update_tag
140
- WITH new_vpc
141
- MATCH (awsAccount:AWSAccount{id: $AWS_ACCOUNT_ID})
142
- MERGE (awsAccount)-[r:RESOURCE]->(new_vpc)
143
- ON CREATE SET r.firstseen = timestamp()
144
- SET r.lastupdated = $update_tag"""
145
-
146
- for vpc in data:
147
- vpc_id = vpc["VpcId"] # fail if not present
148
-
149
- neo4j_session.run(
150
- ingest_vpc,
151
- VpcId=vpc_id,
152
- InstanceTenancy=vpc.get("InstanceTenancy", None),
153
- State=vpc.get("State", None),
154
- IsDefault=vpc.get("IsDefault", None),
155
- PrimaryCIDRBlock=vpc.get("CidrBlock", None),
156
- DhcpOptionsId=vpc.get("DhcpOptionsId", None),
157
- Region=region,
158
- AWS_ACCOUNT_ID=current_aws_account_id,
159
- update_tag=update_tag,
160
- )
161
131
 
162
- load_cidr_association_set(
163
- neo4j_session,
164
- vpc_id=vpc_id,
165
- block_type="ipv4",
166
- vpc_data=vpc,
167
- update_tag=update_tag,
168
- )
132
+ @timeit
133
+ def load_ipv4_cidr_blocks(
134
+ neo4j_session: neo4j.Session,
135
+ ipv4_cidr_blocks: list[dict[str, Any]],
136
+ region: str,
137
+ aws_account_id: str,
138
+ update_tag: int,
139
+ ) -> None:
140
+ load(
141
+ neo4j_session,
142
+ AWSIPv4CidrBlockSchema(),
143
+ ipv4_cidr_blocks,
144
+ lastupdated=update_tag,
145
+ )
169
146
 
170
- load_cidr_association_set(
171
- neo4j_session,
172
- vpc_id=vpc_id,
173
- block_type="ipv6",
174
- vpc_data=vpc,
175
- update_tag=update_tag,
176
- )
147
+
148
+ @timeit
149
+ def load_ipv6_cidr_blocks(
150
+ neo4j_session: neo4j.Session,
151
+ ipv6_cidr_blocks: list[dict[str, Any]],
152
+ region: str,
153
+ aws_account_id: str,
154
+ update_tag: int,
155
+ ) -> None:
156
+ load(
157
+ neo4j_session,
158
+ AWSIPv6CidrBlockSchema(),
159
+ ipv6_cidr_blocks,
160
+ lastupdated=update_tag,
161
+ )
177
162
 
178
163
 
179
164
  @timeit
180
- def cleanup_ec2_vpcs(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
181
- run_cleanup_job("aws_import_vpc_cleanup.json", neo4j_session, common_job_parameters)
165
+ def cleanup(
166
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
167
+ ) -> None:
168
+ GraphJob.from_node_schema(AWSIPv6CidrBlockSchema(), common_job_parameters).run(
169
+ neo4j_session
170
+ )
171
+ GraphJob.from_node_schema(AWSIPv4CidrBlockSchema(), common_job_parameters).run(
172
+ neo4j_session
173
+ )
174
+ GraphJob.from_node_schema(AWSVpcSchema(), common_job_parameters).run(neo4j_session)
182
175
 
183
176
 
184
177
  @timeit
185
178
  def sync_vpc(
186
179
  neo4j_session: neo4j.Session,
187
180
  boto3_session: boto3.session.Session,
188
- regions: List[str],
181
+ regions: list[str],
189
182
  current_aws_account_id: str,
190
183
  update_tag: int,
191
- common_job_parameters: Dict,
184
+ common_job_parameters: dict[str, Any],
192
185
  ) -> None:
193
186
  for region in regions:
194
187
  logger.info(
@@ -196,6 +189,29 @@ def sync_vpc(
196
189
  region,
197
190
  current_aws_account_id,
198
191
  )
199
- data = get_ec2_vpcs(boto3_session, region)
200
- load_ec2_vpcs(neo4j_session, data, region, current_aws_account_id, update_tag)
201
- cleanup_ec2_vpcs(neo4j_session, common_job_parameters)
192
+ raw_vpc_data = get_ec2_vpcs(boto3_session, region)
193
+ vpc_data, ipv4_cidr_blocks, ipv6_cidr_blocks = transform_vpc_data(
194
+ raw_vpc_data, region
195
+ )
196
+ load_ec2_vpcs(
197
+ neo4j_session,
198
+ vpc_data,
199
+ region,
200
+ current_aws_account_id,
201
+ update_tag,
202
+ )
203
+ load_ipv4_cidr_blocks(
204
+ neo4j_session,
205
+ ipv4_cidr_blocks,
206
+ region,
207
+ current_aws_account_id,
208
+ update_tag,
209
+ )
210
+ load_ipv6_cidr_blocks(
211
+ neo4j_session,
212
+ ipv6_cidr_blocks,
213
+ region,
214
+ current_aws_account_id,
215
+ update_tag,
216
+ )
217
+ cleanup(neo4j_session, common_job_parameters)
@@ -171,9 +171,15 @@ def _get_containers_from_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, An
171
171
 
172
172
  def transform_ecs_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
173
173
  """
174
- Extract network interface ID from task attachments.
174
+ Extract network interface ID from task attachments and service name from group.
175
175
  """
176
176
  for task in tasks:
177
+ # Extract serviceName from group field
178
+ group = task.get("group")
179
+ if group and group.startswith("service:"):
180
+ task["serviceName"] = group.split("service:", 1)[1]
181
+
182
+ # Extract network interface ID from task attachments
177
183
  for attachment in task.get("attachments", []):
178
184
  if attachment.get("type") == "ElasticNetworkInterface":
179
185
  details = attachment.get("details", [])
@@ -10,6 +10,7 @@ from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.intel.aws.ec2.util import get_botocore_config
12
12
  from cartography.models.aws.eventbridge.rule import EventBridgeRuleSchema
13
+ from cartography.models.aws.eventbridge.target import EventBridgeTargetSchema
13
14
  from cartography.util import aws_handle_regions
14
15
  from cartography.util import timeit
15
16
 
@@ -33,6 +34,44 @@ def get_eventbridge_rules(
33
34
  return rules
34
35
 
35
36
 
37
+ @timeit
38
+ @aws_handle_regions
39
+ def get_eventbridge_targets(
40
+ boto3_session: boto3.Session, region: str, rules: List[Dict[str, Any]]
41
+ ) -> List[Dict[str, Any]]:
42
+ client = boto3_session.client(
43
+ "events", region_name=region, config=get_botocore_config()
44
+ )
45
+ targets = []
46
+ for rule in rules:
47
+ paginator = client.get_paginator("list_targets_by_rule")
48
+ for page in paginator.paginate(Rule=rule["Name"]):
49
+ for target in page.get("Targets", []):
50
+ target["RuleArn"] = rule["Arn"]
51
+ targets.append(target)
52
+ return targets
53
+
54
+
55
+ def transform_eventbridge_targets(
56
+ targets: List[Dict[str, Any]],
57
+ region: str,
58
+ ) -> List[Dict[str, Any]]:
59
+ """
60
+ Transform EventBridge target data for ingestion into Neo4j.
61
+ """
62
+ transformed_data = []
63
+ for target in targets:
64
+ transformed_target = {
65
+ "Id": target["Arn"],
66
+ "Arn": target["Arn"],
67
+ "RuleArn": target["RuleArn"],
68
+ "RoleArn": target.get("RoleArn"),
69
+ "Region": region,
70
+ }
71
+ transformed_data.append(transformed_target)
72
+ return transformed_data
73
+
74
+
36
75
  @timeit
37
76
  def load_eventbridge_rules(
38
77
  neo4j_session: neo4j.Session,
@@ -54,6 +93,27 @@ def load_eventbridge_rules(
54
93
  )
55
94
 
56
95
 
96
+ @timeit
97
+ def load_eventbridge_targets(
98
+ neo4j_session: neo4j.Session,
99
+ data: List[Dict[str, Any]],
100
+ region: str,
101
+ current_aws_account_id: str,
102
+ aws_update_tag: int,
103
+ ) -> None:
104
+ logger.info(
105
+ f"Loading EventBridge {len(data)} targets for region '{region}' into graph.",
106
+ )
107
+ load(
108
+ neo4j_session,
109
+ EventBridgeTargetSchema(),
110
+ data,
111
+ lastupdated=aws_update_tag,
112
+ Region=region,
113
+ AWS_ID=current_aws_account_id,
114
+ )
115
+
116
+
57
117
  @timeit
58
118
  def cleanup(
59
119
  neo4j_session: neo4j.Session,
@@ -63,6 +123,9 @@ def cleanup(
63
123
  GraphJob.from_node_schema(EventBridgeRuleSchema(), common_job_parameters).run(
64
124
  neo4j_session
65
125
  )
126
+ GraphJob.from_node_schema(EventBridgeTargetSchema(), common_job_parameters).run(
127
+ neo4j_session
128
+ )
66
129
 
67
130
 
68
131
  @timeit
@@ -88,4 +151,14 @@ def sync(
88
151
  update_tag,
89
152
  )
90
153
 
154
+ targets = get_eventbridge_targets(boto3_session, region, rules)
155
+ transformed_targets = transform_eventbridge_targets(targets, region)
156
+ load_eventbridge_targets(
157
+ neo4j_session,
158
+ transformed_targets,
159
+ region,
160
+ current_aws_account_id,
161
+ update_tag,
162
+ )
163
+
91
164
  cleanup(neo4j_session, common_job_parameters)
@@ -10,6 +10,7 @@ from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.intel.aws.ec2.util import get_botocore_config
12
12
  from cartography.models.aws.glue.connection import GlueConnectionSchema
13
+ from cartography.models.aws.glue.job import GlueJobSchema
13
14
  from cartography.util import aws_handle_regions
14
15
  from cartography.util import timeit
15
16
 
@@ -32,6 +33,37 @@ def get_glue_connections(
32
33
  return connections
33
34
 
34
35
 
36
+ @timeit
37
+ @aws_handle_regions
38
+ def get_glue_jobs(boto3_session: boto3.Session, region: str) -> List[Dict[str, Any]]:
39
+ client = boto3_session.client(
40
+ "glue", region_name=region, config=get_botocore_config()
41
+ )
42
+ paginator = client.get_paginator("get_jobs")
43
+ jobs = []
44
+ for page in paginator.paginate():
45
+ jobs.extend(page.get("Jobs", []))
46
+ return jobs
47
+
48
+
49
+ def transform_glue_job(jobs: List[Dict[str, Any]], region: str) -> List[Dict[str, Any]]:
50
+ """
51
+ Transform Glue job data for ingestion
52
+ """
53
+ transformed_jobs = []
54
+ for job in jobs:
55
+ transformed_job = {
56
+ "Name": job["Name"],
57
+ "ProfileName": job.get("ProfileName"),
58
+ "JobMode": job.get("JobMode"),
59
+ "Connections": job.get("Connections", {}).get("Connections"),
60
+ "Region": region,
61
+ "Description": job.get("Description"),
62
+ }
63
+ transformed_jobs.append(transformed_job)
64
+ return transformed_jobs
65
+
66
+
35
67
  def transform_glue_connections(
36
68
  connections: List[Dict[str, Any]], region: str
37
69
  ) -> List[Dict[str, Any]]:
@@ -79,6 +111,27 @@ def load_glue_connections(
79
111
  )
80
112
 
81
113
 
114
+ @timeit
115
+ def load_glue_jobs(
116
+ neo4j_session: neo4j.Session,
117
+ data: List[Dict[str, Any]],
118
+ region: str,
119
+ current_aws_account_id: str,
120
+ aws_update_tag: int,
121
+ ) -> None:
122
+ logger.info(
123
+ f"Loading Glue {len(data)} jobs for region '{region}' into graph.",
124
+ )
125
+ load(
126
+ neo4j_session,
127
+ GlueJobSchema(),
128
+ data,
129
+ lastupdated=aws_update_tag,
130
+ Region=region,
131
+ AWS_ID=current_aws_account_id,
132
+ )
133
+
134
+
82
135
  @timeit
83
136
  def cleanup(
84
137
  neo4j_session: neo4j.Session,
@@ -88,6 +141,7 @@ def cleanup(
88
141
  GraphJob.from_node_schema(GlueConnectionSchema(), common_job_parameters).run(
89
142
  neo4j_session
90
143
  )
144
+ GraphJob.from_node_schema(GlueJobSchema(), common_job_parameters).run(neo4j_session)
91
145
 
92
146
 
93
147
  @timeit
@@ -114,4 +168,14 @@ def sync(
114
168
  update_tag,
115
169
  )
116
170
 
171
+ jobs = get_glue_jobs(boto3_session, region)
172
+ transformed_jobs = transform_glue_job(jobs, region)
173
+ load_glue_jobs(
174
+ neo4j_session,
175
+ transformed_jobs,
176
+ region,
177
+ current_aws_account_id,
178
+ update_tag,
179
+ )
180
+
117
181
  cleanup(neo4j_session, common_job_parameters)
@@ -76,8 +76,8 @@ def get_policy(key: Dict, client: botocore.client.BaseClient) -> Any:
76
76
  try:
77
77
  policy = client.get_key_policy(KeyId=key["KeyId"], PolicyName="default")
78
78
  except ClientError as e:
79
- policy = None
80
79
  if e.response["Error"]["Code"] == "AccessDeniedException":
80
+ policy = None
81
81
  logger.warning(
82
82
  f"kms:get_key_policy on key id {key['KeyId']} failed with AccessDeniedException; continuing sync.",
83
83
  exc_info=True,
@@ -187,6 +187,18 @@ def transform_kms_key_policies(
187
187
  policy_data = {}
188
188
 
189
189
  for key_id, policy, *_ in policy_alias_grants_data:
190
+ # Handle keys with null policy (access denied)
191
+ if policy is None:
192
+ logger.info(
193
+ f"Skipping KMS key {key_id} policy due to AccessDenied; policy analysis properties will be null"
194
+ )
195
+ policy_data[key_id] = {
196
+ "kms_key": key_id,
197
+ "anonymous_access": None,
198
+ "anonymous_actions": None,
199
+ }
200
+ continue
201
+
190
202
  parsed_policy = parse_policy(key_id, policy)
191
203
  policy_data[key_id] = parsed_policy
192
204