cartography 0.110.0rc2__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (54) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +46 -0
  3. cartography/config.py +16 -0
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  6. cartography/graph/querybuilder.py +70 -0
  7. cartography/intel/aws/apigateway.py +113 -4
  8. cartography/intel/aws/ec2/vpc.py +140 -124
  9. cartography/intel/aws/eventbridge.py +73 -0
  10. cartography/intel/github/repos.py +28 -12
  11. cartography/intel/github/util.py +12 -0
  12. cartography/intel/keycloak/__init__.py +153 -0
  13. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  14. cartography/intel/keycloak/authenticationflows.py +77 -0
  15. cartography/intel/keycloak/clients.py +187 -0
  16. cartography/intel/keycloak/groups.py +126 -0
  17. cartography/intel/keycloak/identityproviders.py +94 -0
  18. cartography/intel/keycloak/organizations.py +163 -0
  19. cartography/intel/keycloak/realms.py +61 -0
  20. cartography/intel/keycloak/roles.py +202 -0
  21. cartography/intel/keycloak/scopes.py +73 -0
  22. cartography/intel/keycloak/users.py +70 -0
  23. cartography/intel/keycloak/util.py +47 -0
  24. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  25. cartography/models/aws/ec2/vpc.py +46 -0
  26. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  27. cartography/models/aws/eventbridge/target.py +71 -0
  28. cartography/models/keycloak/__init__.py +0 -0
  29. cartography/models/keycloak/authenticationexecution.py +160 -0
  30. cartography/models/keycloak/authenticationflow.py +54 -0
  31. cartography/models/keycloak/client.py +177 -0
  32. cartography/models/keycloak/group.py +101 -0
  33. cartography/models/keycloak/identityprovider.py +89 -0
  34. cartography/models/keycloak/organization.py +116 -0
  35. cartography/models/keycloak/organizationdomain.py +73 -0
  36. cartography/models/keycloak/realm.py +173 -0
  37. cartography/models/keycloak/role.py +126 -0
  38. cartography/models/keycloak/scope.py +73 -0
  39. cartography/models/keycloak/user.py +51 -0
  40. cartography/models/tailscale/device.py +1 -0
  41. cartography/sync.py +2 -0
  42. cartography/util.py +8 -0
  43. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/METADATA +2 -1
  44. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/RECORD +53 -25
  45. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  46. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  47. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  48. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  49. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  50. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  51. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/WHEEL +0 -0
  52. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/entry_points.txt +0 -0
  53. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/licenses/LICENSE +0 -0
  54. {cartography-0.110.0rc2.dist-info → cartography-0.111.0.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,15 @@
1
1
  import logging
2
- from string import Template
3
- from typing import Dict
4
- from typing import List
2
+ from typing import Any
5
3
 
6
4
  import boto3
7
5
  import neo4j
8
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
9
+ from cartography.models.aws.ec2.vpc import AWSVpcSchema
10
+ from cartography.models.aws.ec2.vpc_cidr import AWSIPv4CidrBlockSchema
11
+ from cartography.models.aws.ec2.vpc_cidr import AWSIPv6CidrBlockSchema
9
12
  from cartography.util import aws_handle_regions
10
- from cartography.util import run_cleanup_job
11
13
  from cartography.util import timeit
12
14
 
13
15
  from .util import get_botocore_config
@@ -17,87 +19,78 @@ logger = logging.getLogger(__name__)
17
19
 
18
20
  @timeit
19
21
  @aws_handle_regions
20
- def get_ec2_vpcs(boto3_session: boto3.session.Session, region: str) -> List[Dict]:
22
+ def get_ec2_vpcs(
23
+ boto3_session: boto3.session.Session,
24
+ region: str,
25
+ ) -> list[dict[str, Any]]:
21
26
  client = boto3_session.client(
22
27
  "ec2",
23
28
  region_name=region,
24
29
  config=get_botocore_config(),
25
30
  )
26
- return client.describe_vpcs()["Vpcs"]
27
-
28
-
29
- def _get_cidr_association_statement(block_type: str) -> str:
30
- INGEST_CIDR_TEMPLATE = Template(
31
- """
32
- MATCH (vpc:AWSVpc{id: $VpcId})
33
- WITH vpc
34
- UNWIND $CidrBlock as block_data
35
- MERGE (new_block:$block_label{id: $VpcId + '|' + block_data.$block_cidr})
36
- ON CREATE SET new_block.firstseen = timestamp()
37
- SET new_block.association_id = block_data.AssociationId,
38
- new_block.cidr_block = block_data.$block_cidr,
39
- new_block.block_state = block_data.$state_name.State,
40
- new_block.block_state_message = block_data.$state_name.StatusMessage,
41
- new_block.lastupdated = $update_tag
42
- WITH vpc, new_block
43
- MERGE (vpc)-[r:BLOCK_ASSOCIATION]->(new_block)
44
- ON CREATE SET r.firstseen = timestamp()
45
- SET r.lastupdated = $update_tag""",
46
- )
47
-
48
- BLOCK_CIDR = "CidrBlock"
49
- STATE_NAME = "CidrBlockState"
50
-
51
- # base label type. We add the AWS ipv4 or 6 depending on block type
52
- BLOCK_TYPE = "AWSCidrBlock"
53
-
54
- if block_type == "ipv6":
55
- BLOCK_CIDR = "Ipv6" + BLOCK_CIDR
56
- STATE_NAME = "Ipv6" + STATE_NAME
57
- BLOCK_TYPE = BLOCK_TYPE + ":AWSIpv6CidrBlock"
58
- elif block_type == "ipv4":
59
- BLOCK_TYPE = BLOCK_TYPE + ":AWSIpv4CidrBlock"
60
- else:
61
- raise ValueError(f"Unsupported block type specified - {block_type}")
62
-
63
- return INGEST_CIDR_TEMPLATE.safe_substitute(
64
- block_label=BLOCK_TYPE,
65
- block_cidr=BLOCK_CIDR,
66
- state_name=STATE_NAME,
67
- )
68
-
69
-
70
- @timeit
71
- def load_cidr_association_set(
72
- neo4j_session: neo4j.Session,
73
- vpc_id: str,
74
- vpc_data: Dict,
75
- block_type: str,
76
- update_tag: int,
77
- ) -> None:
78
- ingest_statement = _get_cidr_association_statement(block_type)
79
-
80
- if block_type == "ipv6":
81
- data = vpc_data.get("Ipv6CidrBlockAssociationSet", [])
82
- else:
83
- data = vpc_data.get("CidrBlockAssociationSet", [])
84
-
85
- neo4j_session.run(
86
- ingest_statement,
87
- VpcId=vpc_id,
88
- CidrBlock=data,
89
- update_tag=update_tag,
90
- )
31
+ return client.describe_vpcs().get("Vpcs", [])
32
+
33
+
34
+ def transform_vpc_data(
35
+ vpc_list: list[dict[str, Any]], region: str
36
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]]]:
37
+
38
+ vpc_data: list[dict[str, Any]] = []
39
+ ipv4_cidr_blocks: list[dict[str, Any]] = []
40
+ ipv6_cidr_blocks: list[dict[str, Any]] = []
41
+
42
+ for vpc in vpc_list:
43
+ vpc_record = {
44
+ "VpcId": vpc.get("VpcId"),
45
+ "InstanceTenancy": vpc.get("InstanceTenancy"),
46
+ "State": vpc.get("State"),
47
+ "IsDefault": vpc.get("IsDefault"),
48
+ "PrimaryCIDRBlock": vpc.get("CidrBlock"),
49
+ "DhcpOptionsId": vpc.get("DhcpOptionsId"),
50
+ "lastupdated": vpc.get("lastupdated"),
51
+ }
52
+ vpc_data.append(vpc_record)
53
+
54
+ ipv4_associations = vpc.get("CidrBlockAssociationSet", [])
55
+ for association in ipv4_associations:
56
+ ipv4_block = {
57
+ "Id": vpc["VpcId"] + "|" + association.get("CidrBlock"),
58
+ "VpcId": vpc["VpcId"],
59
+ "AssociationId": association.get("AssociationId"),
60
+ "CidrBlock": association.get("CidrBlock"),
61
+ "BlockState": association.get("CidrBlockState", {}).get("State"),
62
+ "BlockStateMessage": association.get("CidrBlockState", {}).get(
63
+ "StatusMessage"
64
+ ),
65
+ }
66
+ ipv4_cidr_blocks.append(ipv4_block)
67
+
68
+ ipv6_associations = vpc.get("Ipv6CidrBlockAssociationSet", [])
69
+ for association in ipv6_associations:
70
+ ipv6_block = {
71
+ "Id": vpc["VpcId"] + "|" + association.get("Ipv6CidrBlock"),
72
+ "VpcId": vpc["VpcId"],
73
+ "AssociationId": association.get("AssociationId"),
74
+ "CidrBlock": association.get("Ipv6CidrBlock"),
75
+ "BlockState": association.get("Ipv6CidrBlockState", {}).get("State"),
76
+ "BlockStateMessage": association.get("Ipv6CidrBlockState", {}).get(
77
+ "StatusMessage"
78
+ ),
79
+ }
80
+ ipv6_cidr_blocks.append(ipv6_block)
81
+
82
+ return vpc_data, ipv4_cidr_blocks, ipv6_cidr_blocks
91
83
 
92
84
 
93
85
  @timeit
94
86
  def load_ec2_vpcs(
95
87
  neo4j_session: neo4j.Session,
96
- data: List[Dict],
88
+ vpcs: list[dict[str, Any]],
97
89
  region: str,
98
- current_aws_account_id: str,
90
+ aws_account_id: str,
99
91
  update_tag: int,
100
92
  ) -> None:
93
+ logger.info(f"Loading {len(vpcs)} EC2 VPCs for region '{region}' into graph.")
101
94
  # https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-vpcs.html
102
95
  # {
103
96
  # "Vpcs": [
@@ -126,69 +119,69 @@ def load_ec2_vpcs(
126
119
  # }
127
120
  # ]
128
121
  # }
122
+ load(
123
+ neo4j_session,
124
+ AWSVpcSchema(),
125
+ vpcs,
126
+ lastupdated=update_tag,
127
+ Region=region,
128
+ AWS_ID=aws_account_id,
129
+ )
129
130
 
130
- ingest_vpc = """
131
- MERGE (new_vpc:AWSVpc{id: $VpcId})
132
- ON CREATE SET new_vpc.firstseen = timestamp(), new_vpc.vpcid =$VpcId
133
- SET new_vpc.instance_tenancy = $InstanceTenancy,
134
- new_vpc.state = $State,
135
- new_vpc.is_default = $IsDefault,
136
- new_vpc.primary_cidr_block = $PrimaryCIDRBlock,
137
- new_vpc.dhcp_options_id = $DhcpOptionsId,
138
- new_vpc.region = $Region,
139
- new_vpc.lastupdated = $update_tag
140
- WITH new_vpc
141
- MATCH (awsAccount:AWSAccount{id: $AWS_ACCOUNT_ID})
142
- MERGE (awsAccount)-[r:RESOURCE]->(new_vpc)
143
- ON CREATE SET r.firstseen = timestamp()
144
- SET r.lastupdated = $update_tag"""
145
-
146
- for vpc in data:
147
- vpc_id = vpc["VpcId"] # fail if not present
148
-
149
- neo4j_session.run(
150
- ingest_vpc,
151
- VpcId=vpc_id,
152
- InstanceTenancy=vpc.get("InstanceTenancy", None),
153
- State=vpc.get("State", None),
154
- IsDefault=vpc.get("IsDefault", None),
155
- PrimaryCIDRBlock=vpc.get("CidrBlock", None),
156
- DhcpOptionsId=vpc.get("DhcpOptionsId", None),
157
- Region=region,
158
- AWS_ACCOUNT_ID=current_aws_account_id,
159
- update_tag=update_tag,
160
- )
161
131
 
162
- load_cidr_association_set(
163
- neo4j_session,
164
- vpc_id=vpc_id,
165
- block_type="ipv4",
166
- vpc_data=vpc,
167
- update_tag=update_tag,
168
- )
132
+ @timeit
133
+ def load_ipv4_cidr_blocks(
134
+ neo4j_session: neo4j.Session,
135
+ ipv4_cidr_blocks: list[dict[str, Any]],
136
+ region: str,
137
+ aws_account_id: str,
138
+ update_tag: int,
139
+ ) -> None:
140
+ load(
141
+ neo4j_session,
142
+ AWSIPv4CidrBlockSchema(),
143
+ ipv4_cidr_blocks,
144
+ lastupdated=update_tag,
145
+ )
169
146
 
170
- load_cidr_association_set(
171
- neo4j_session,
172
- vpc_id=vpc_id,
173
- block_type="ipv6",
174
- vpc_data=vpc,
175
- update_tag=update_tag,
176
- )
147
+
148
+ @timeit
149
+ def load_ipv6_cidr_blocks(
150
+ neo4j_session: neo4j.Session,
151
+ ipv6_cidr_blocks: list[dict[str, Any]],
152
+ region: str,
153
+ aws_account_id: str,
154
+ update_tag: int,
155
+ ) -> None:
156
+ load(
157
+ neo4j_session,
158
+ AWSIPv6CidrBlockSchema(),
159
+ ipv6_cidr_blocks,
160
+ lastupdated=update_tag,
161
+ )
177
162
 
178
163
 
179
164
  @timeit
180
- def cleanup_ec2_vpcs(neo4j_session: neo4j.Session, common_job_parameters: Dict) -> None:
181
- run_cleanup_job("aws_import_vpc_cleanup.json", neo4j_session, common_job_parameters)
165
+ def cleanup(
166
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
167
+ ) -> None:
168
+ GraphJob.from_node_schema(AWSIPv6CidrBlockSchema(), common_job_parameters).run(
169
+ neo4j_session
170
+ )
171
+ GraphJob.from_node_schema(AWSIPv4CidrBlockSchema(), common_job_parameters).run(
172
+ neo4j_session
173
+ )
174
+ GraphJob.from_node_schema(AWSVpcSchema(), common_job_parameters).run(neo4j_session)
182
175
 
183
176
 
184
177
  @timeit
185
178
  def sync_vpc(
186
179
  neo4j_session: neo4j.Session,
187
180
  boto3_session: boto3.session.Session,
188
- regions: List[str],
181
+ regions: list[str],
189
182
  current_aws_account_id: str,
190
183
  update_tag: int,
191
- common_job_parameters: Dict,
184
+ common_job_parameters: dict[str, Any],
192
185
  ) -> None:
193
186
  for region in regions:
194
187
  logger.info(
@@ -196,6 +189,29 @@ def sync_vpc(
196
189
  region,
197
190
  current_aws_account_id,
198
191
  )
199
- data = get_ec2_vpcs(boto3_session, region)
200
- load_ec2_vpcs(neo4j_session, data, region, current_aws_account_id, update_tag)
201
- cleanup_ec2_vpcs(neo4j_session, common_job_parameters)
192
+ raw_vpc_data = get_ec2_vpcs(boto3_session, region)
193
+ vpc_data, ipv4_cidr_blocks, ipv6_cidr_blocks = transform_vpc_data(
194
+ raw_vpc_data, region
195
+ )
196
+ load_ec2_vpcs(
197
+ neo4j_session,
198
+ vpc_data,
199
+ region,
200
+ current_aws_account_id,
201
+ update_tag,
202
+ )
203
+ load_ipv4_cidr_blocks(
204
+ neo4j_session,
205
+ ipv4_cidr_blocks,
206
+ region,
207
+ current_aws_account_id,
208
+ update_tag,
209
+ )
210
+ load_ipv6_cidr_blocks(
211
+ neo4j_session,
212
+ ipv6_cidr_blocks,
213
+ region,
214
+ current_aws_account_id,
215
+ update_tag,
216
+ )
217
+ cleanup(neo4j_session, common_job_parameters)
@@ -10,6 +10,7 @@ from cartography.client.core.tx import load
10
10
  from cartography.graph.job import GraphJob
11
11
  from cartography.intel.aws.ec2.util import get_botocore_config
12
12
  from cartography.models.aws.eventbridge.rule import EventBridgeRuleSchema
13
+ from cartography.models.aws.eventbridge.target import EventBridgeTargetSchema
13
14
  from cartography.util import aws_handle_regions
14
15
  from cartography.util import timeit
15
16
 
@@ -33,6 +34,44 @@ def get_eventbridge_rules(
33
34
  return rules
34
35
 
35
36
 
37
+ @timeit
38
+ @aws_handle_regions
39
+ def get_eventbridge_targets(
40
+ boto3_session: boto3.Session, region: str, rules: List[Dict[str, Any]]
41
+ ) -> List[Dict[str, Any]]:
42
+ client = boto3_session.client(
43
+ "events", region_name=region, config=get_botocore_config()
44
+ )
45
+ targets = []
46
+ for rule in rules:
47
+ paginator = client.get_paginator("list_targets_by_rule")
48
+ for page in paginator.paginate(Rule=rule["Name"]):
49
+ for target in page.get("Targets", []):
50
+ target["RuleArn"] = rule["Arn"]
51
+ targets.append(target)
52
+ return targets
53
+
54
+
55
+ def transform_eventbridge_targets(
56
+ targets: List[Dict[str, Any]],
57
+ region: str,
58
+ ) -> List[Dict[str, Any]]:
59
+ """
60
+ Transform EventBridge target data for ingestion into Neo4j.
61
+ """
62
+ transformed_data = []
63
+ for target in targets:
64
+ transformed_target = {
65
+ "Id": target["Arn"],
66
+ "Arn": target["Arn"],
67
+ "RuleArn": target["RuleArn"],
68
+ "RoleArn": target.get("RoleArn"),
69
+ "Region": region,
70
+ }
71
+ transformed_data.append(transformed_target)
72
+ return transformed_data
73
+
74
+
36
75
  @timeit
37
76
  def load_eventbridge_rules(
38
77
  neo4j_session: neo4j.Session,
@@ -54,6 +93,27 @@ def load_eventbridge_rules(
54
93
  )
55
94
 
56
95
 
96
+ @timeit
97
+ def load_eventbridge_targets(
98
+ neo4j_session: neo4j.Session,
99
+ data: List[Dict[str, Any]],
100
+ region: str,
101
+ current_aws_account_id: str,
102
+ aws_update_tag: int,
103
+ ) -> None:
104
+ logger.info(
105
+ f"Loading EventBridge {len(data)} targets for region '{region}' into graph.",
106
+ )
107
+ load(
108
+ neo4j_session,
109
+ EventBridgeTargetSchema(),
110
+ data,
111
+ lastupdated=aws_update_tag,
112
+ Region=region,
113
+ AWS_ID=current_aws_account_id,
114
+ )
115
+
116
+
57
117
  @timeit
58
118
  def cleanup(
59
119
  neo4j_session: neo4j.Session,
@@ -63,6 +123,9 @@ def cleanup(
63
123
  GraphJob.from_node_schema(EventBridgeRuleSchema(), common_job_parameters).run(
64
124
  neo4j_session
65
125
  )
126
+ GraphJob.from_node_schema(EventBridgeTargetSchema(), common_job_parameters).run(
127
+ neo4j_session
128
+ )
66
129
 
67
130
 
68
131
  @timeit
@@ -88,4 +151,14 @@ def sync(
88
151
  update_tag,
89
152
  )
90
153
 
154
+ targets = get_eventbridge_targets(boto3_session, region, rules)
155
+ transformed_targets = transform_eventbridge_targets(targets, region)
156
+ load_eventbridge_targets(
157
+ neo4j_session,
158
+ transformed_targets,
159
+ region,
160
+ current_aws_account_id,
161
+ update_tag,
162
+ )
163
+
91
164
  cleanup(neo4j_session, common_job_parameters)
@@ -41,12 +41,12 @@ UserAffiliationAndRepoPermission = namedtuple(
41
41
 
42
42
 
43
43
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
44
- query($login: String!, $cursor: String) {
44
+ query($login: String!, $cursor: String, $count: Int!) {
45
45
  organization(login: $login)
46
46
  {
47
47
  url
48
48
  login
49
- repositories(first: 50, after: $cursor){
49
+ repositories(first: $count, after: $cursor){
50
50
  pageInfo{
51
51
  endCursor
52
52
  hasNextPage
@@ -168,14 +168,22 @@ def _get_repo_collaborators_inner_func(
168
168
  repo_name = repo["name"]
169
169
  repo_url = repo["url"]
170
170
 
171
- if (
172
- affiliation == "OUTSIDE" and repo["outsideCollaborators"]["totalCount"] == 0
173
- ) or (
174
- affiliation == "DIRECT" and repo["directCollaborators"]["totalCount"] == 0
175
- ):
176
- # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
177
- result[repo_url] = []
178
- continue
171
+ # Guard against None when collaborator fields are not accessible due to permissions.
172
+ direct_info = repo.get("directCollaborators")
173
+ outside_info = repo.get("outsideCollaborators")
174
+
175
+ if affiliation == "OUTSIDE":
176
+ total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
177
+ if total_outside == 0:
178
+ # No outside collaborators or not permitted to view; skip API calls for this repo.
179
+ result[repo_url] = []
180
+ continue
181
+ else: # DIRECT
182
+ total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
183
+ if total_direct == 0:
184
+ # No direct collaborators or not permitted to view; skip API calls for this repo.
185
+ result[repo_url] = []
186
+ continue
179
187
 
180
188
  logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
181
189
  collaborators = _get_repo_collaborators(
@@ -290,6 +298,7 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
290
298
  organization,
291
299
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
292
300
  "repositories",
301
+ count=50,
293
302
  )
294
303
  return repos.nodes
295
304
 
@@ -405,9 +414,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
405
414
 
406
415
  def _create_git_url_from_ssh_url(ssh_url: str) -> str:
407
416
  """
408
- Return a git:// URL from the given ssh_url
417
+ Convert SSH URL to git:// URL.
418
+ Example:
419
+ git@github.com:cartography-cncf/cartography.git
420
+ -> git://github.com/cartography-cncf/cartography.git
409
421
  """
410
- return ssh_url.replace("/", ":").replace("git@", "git://")
422
+ # Remove the user part (e.g., "git@")
423
+ _, host_and_path = ssh_url.split("@", 1)
424
+ # Replace first ':' (separating host and repo) with '/'
425
+ host, path = host_and_path.split(":", 1)
426
+ return f"git://{host}/{path}"
411
427
 
412
428
 
413
429
  def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
@@ -157,6 +157,18 @@ def fetch_all(
157
157
  retry += 1
158
158
  exc = err
159
159
  except requests.exceptions.HTTPError as err:
160
+ if (
161
+ err.response is not None
162
+ and err.response.status_code == 502
163
+ and kwargs.get("count")
164
+ and kwargs["count"] > 1
165
+ ):
166
+ kwargs["count"] = max(1, kwargs["count"] // 2)
167
+ logger.warning(
168
+ "GitHub: Received 502 response. Reducing page size to %s and retrying.",
169
+ kwargs["count"],
170
+ )
171
+ continue
160
172
  retry += 1
161
173
  exc = err
162
174
  except requests.exceptions.ChunkedEncodingError as err:
@@ -0,0 +1,153 @@
1
+ import logging
2
+
3
+ import neo4j
4
+ import requests
5
+
6
+ import cartography.intel.keycloak.authenticationexecutions
7
+ import cartography.intel.keycloak.authenticationflows
8
+ import cartography.intel.keycloak.clients
9
+ import cartography.intel.keycloak.groups
10
+ import cartography.intel.keycloak.identityproviders
11
+ import cartography.intel.keycloak.organizations
12
+ import cartography.intel.keycloak.realms
13
+ import cartography.intel.keycloak.roles
14
+ import cartography.intel.keycloak.scopes
15
+ import cartography.intel.keycloak.users
16
+ from cartography.config import Config
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+ _TIMEOUT = (60, 60)
21
+
22
+
23
+ @timeit
24
+ def start_keycloak_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
25
+ """
26
+ If this module is configured, perform ingestion of Keycloak data. Otherwise warn and exit
27
+ :param neo4j_session: Neo4J session for database interface
28
+ :param config: A cartography.config object
29
+ :return: None
30
+ """
31
+ if (
32
+ not config.keycloak_client_id
33
+ or not config.keycloak_client_secret
34
+ or not config.keycloak_url
35
+ or not config.keycloak_realm
36
+ ):
37
+ logger.info(
38
+ "Keycloak import is not configured - skipping this module. "
39
+ "See docs to configure.",
40
+ )
41
+ return
42
+
43
+ # Create requests sessions
44
+ with requests.session() as api_session:
45
+ payload = {
46
+ "grant_type": "client_credentials",
47
+ "client_id": config.keycloak_client_id,
48
+ "client_secret": config.keycloak_client_secret,
49
+ }
50
+ req = api_session.post(
51
+ f"{config.keycloak_url}/realms/{config.keycloak_realm}/protocol/openid-connect/token",
52
+ data=payload,
53
+ timeout=_TIMEOUT,
54
+ )
55
+ req.raise_for_status()
56
+ api_session.headers.update(
57
+ {"Authorization": f'Bearer {req.json()["access_token"]}'}
58
+ )
59
+
60
+ common_job_parameters = {
61
+ "UPDATE_TAG": config.update_tag,
62
+ }
63
+
64
+ for realm in cartography.intel.keycloak.realms.sync(
65
+ neo4j_session, api_session, config.keycloak_url, common_job_parameters
66
+ ):
67
+ realm_scopped_job_parameters = {
68
+ "UPDATE_TAG": config.update_tag,
69
+ "REALM": realm["realm"],
70
+ "REALM_ID": realm["id"],
71
+ }
72
+ cartography.intel.keycloak.users.sync(
73
+ neo4j_session,
74
+ api_session,
75
+ config.keycloak_url,
76
+ realm_scopped_job_parameters,
77
+ )
78
+ cartography.intel.keycloak.identityproviders.sync(
79
+ neo4j_session,
80
+ api_session,
81
+ config.keycloak_url,
82
+ realm_scopped_job_parameters,
83
+ )
84
+ scopes = cartography.intel.keycloak.scopes.sync(
85
+ neo4j_session,
86
+ api_session,
87
+ config.keycloak_url,
88
+ realm_scopped_job_parameters,
89
+ )
90
+ scope_ids = [s["id"] for s in scopes]
91
+ flows = cartography.intel.keycloak.authenticationflows.sync(
92
+ neo4j_session,
93
+ api_session,
94
+ config.keycloak_url,
95
+ realm_scopped_job_parameters,
96
+ )
97
+ flow_aliases_to_id = {f["alias"]: f["id"] for f in flows}
98
+ cartography.intel.keycloak.authenticationexecutions.sync(
99
+ neo4j_session,
100
+ api_session,
101
+ config.keycloak_url,
102
+ realm_scopped_job_parameters,
103
+ list(flow_aliases_to_id.keys()),
104
+ )
105
+ realm_default_flows = {
106
+ "browser": flow_aliases_to_id.get(realm.get("browserFlow")),
107
+ "registration": flow_aliases_to_id.get(realm.get("registrationFlow")),
108
+ "direct_grant": flow_aliases_to_id.get(realm.get("directGrantFlow")),
109
+ "reset_credentials": flow_aliases_to_id.get(
110
+ realm.get("resetCredentialsFlow")
111
+ ),
112
+ "client_authentication": flow_aliases_to_id.get(
113
+ realm.get("clientAuthenticationFlow")
114
+ ),
115
+ "docker_authentication": flow_aliases_to_id.get(
116
+ realm.get("dockerAuthenticationFlow")
117
+ ),
118
+ "first_broker_login": flow_aliases_to_id.get(
119
+ realm.get("firstBrokerLoginFlow")
120
+ ),
121
+ }
122
+
123
+ clients = cartography.intel.keycloak.clients.sync(
124
+ neo4j_session,
125
+ api_session,
126
+ config.keycloak_url,
127
+ realm_scopped_job_parameters,
128
+ realm_default_flows,
129
+ )
130
+ client_ids = [c["id"] for c in clients]
131
+ cartography.intel.keycloak.roles.sync(
132
+ neo4j_session,
133
+ api_session,
134
+ config.keycloak_url,
135
+ realm_scopped_job_parameters,
136
+ client_ids,
137
+ scope_ids,
138
+ )
139
+ cartography.intel.keycloak.groups.sync(
140
+ neo4j_session,
141
+ api_session,
142
+ config.keycloak_url,
143
+ realm_scopped_job_parameters,
144
+ )
145
+
146
+ # Organizations if they are enabled
147
+ if realm.get("organizationsEnabled", False):
148
+ cartography.intel.keycloak.organizations.sync(
149
+ neo4j_session,
150
+ api_session,
151
+ config.keycloak_url,
152
+ realm_scopped_job_parameters,
153
+ )