cartography 0.110.0rc1__py3-none-any.whl → 0.111.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (87) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +46 -8
  3. cartography/config.py +16 -9
  4. cartography/data/indexes.cypher +0 -2
  5. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  6. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  7. cartography/graph/querybuilder.py +70 -0
  8. cartography/intel/aws/apigateway.py +113 -4
  9. cartography/intel/aws/cognito.py +201 -0
  10. cartography/intel/aws/ec2/vpc.py +140 -124
  11. cartography/intel/aws/ecs.py +7 -1
  12. cartography/intel/aws/eventbridge.py +73 -0
  13. cartography/intel/aws/glue.py +64 -0
  14. cartography/intel/aws/kms.py +13 -1
  15. cartography/intel/aws/rds.py +105 -0
  16. cartography/intel/aws/resources.py +2 -0
  17. cartography/intel/aws/route53.py +3 -1
  18. cartography/intel/aws/s3.py +104 -0
  19. cartography/intel/entra/__init__.py +41 -43
  20. cartography/intel/entra/applications.py +2 -1
  21. cartography/intel/entra/ou.py +1 -1
  22. cartography/intel/github/__init__.py +21 -25
  23. cartography/intel/github/repos.py +32 -48
  24. cartography/intel/github/util.py +12 -0
  25. cartography/intel/keycloak/__init__.py +153 -0
  26. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  27. cartography/intel/keycloak/authenticationflows.py +77 -0
  28. cartography/intel/keycloak/clients.py +187 -0
  29. cartography/intel/keycloak/groups.py +126 -0
  30. cartography/intel/keycloak/identityproviders.py +94 -0
  31. cartography/intel/keycloak/organizations.py +163 -0
  32. cartography/intel/keycloak/realms.py +61 -0
  33. cartography/intel/keycloak/roles.py +202 -0
  34. cartography/intel/keycloak/scopes.py +73 -0
  35. cartography/intel/keycloak/users.py +70 -0
  36. cartography/intel/keycloak/util.py +47 -0
  37. cartography/intel/kubernetes/__init__.py +4 -0
  38. cartography/intel/kubernetes/rbac.py +464 -0
  39. cartography/intel/kubernetes/util.py +17 -0
  40. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  41. cartography/models/aws/cognito/__init__.py +0 -0
  42. cartography/models/aws/cognito/identity_pool.py +70 -0
  43. cartography/models/aws/cognito/user_pool.py +47 -0
  44. cartography/models/aws/ec2/security_groups.py +1 -1
  45. cartography/models/aws/ec2/vpc.py +46 -0
  46. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  47. cartography/models/aws/ecs/services.py +17 -0
  48. cartography/models/aws/ecs/tasks.py +1 -0
  49. cartography/models/aws/eventbridge/target.py +71 -0
  50. cartography/models/aws/glue/job.py +69 -0
  51. cartography/models/aws/rds/event_subscription.py +146 -0
  52. cartography/models/aws/route53/dnsrecord.py +21 -0
  53. cartography/models/github/dependencies.py +1 -2
  54. cartography/models/keycloak/__init__.py +0 -0
  55. cartography/models/keycloak/authenticationexecution.py +160 -0
  56. cartography/models/keycloak/authenticationflow.py +54 -0
  57. cartography/models/keycloak/client.py +177 -0
  58. cartography/models/keycloak/group.py +101 -0
  59. cartography/models/keycloak/identityprovider.py +89 -0
  60. cartography/models/keycloak/organization.py +116 -0
  61. cartography/models/keycloak/organizationdomain.py +73 -0
  62. cartography/models/keycloak/realm.py +173 -0
  63. cartography/models/keycloak/role.py +126 -0
  64. cartography/models/keycloak/scope.py +73 -0
  65. cartography/models/keycloak/user.py +51 -0
  66. cartography/models/kubernetes/clusterrolebindings.py +98 -0
  67. cartography/models/kubernetes/clusterroles.py +52 -0
  68. cartography/models/kubernetes/rolebindings.py +119 -0
  69. cartography/models/kubernetes/roles.py +76 -0
  70. cartography/models/kubernetes/serviceaccounts.py +77 -0
  71. cartography/models/tailscale/device.py +1 -0
  72. cartography/sync.py +2 -0
  73. cartography/util.py +8 -0
  74. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/METADATA +4 -3
  75. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/RECORD +85 -46
  76. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  77. cartography/intel/entra/resources.py +0 -20
  78. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  79. /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
  80. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  81. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  82. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  83. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  84. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/WHEEL +0 -0
  85. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/entry_points.txt +0 -0
  86. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/licenses/LICENSE +0 -0
  87. {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/top_level.txt +0 -0
@@ -41,12 +41,12 @@ UserAffiliationAndRepoPermission = namedtuple(
41
41
 
42
42
 
43
43
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
44
- query($login: String!, $cursor: String) {
44
+ query($login: String!, $cursor: String, $count: Int!) {
45
45
  organization(login: $login)
46
46
  {
47
47
  url
48
48
  login
49
- repositories(first: 50, after: $cursor){
49
+ repositories(first: $count, after: $cursor){
50
50
  pageInfo{
51
51
  endCursor
52
52
  hasNextPage
@@ -168,14 +168,22 @@ def _get_repo_collaborators_inner_func(
168
168
  repo_name = repo["name"]
169
169
  repo_url = repo["url"]
170
170
 
171
- if (
172
- affiliation == "OUTSIDE" and repo["outsideCollaborators"]["totalCount"] == 0
173
- ) or (
174
- affiliation == "DIRECT" and repo["directCollaborators"]["totalCount"] == 0
175
- ):
176
- # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
177
- result[repo_url] = []
178
- continue
171
+ # Guard against None when collaborator fields are not accessible due to permissions.
172
+ direct_info = repo.get("directCollaborators")
173
+ outside_info = repo.get("outsideCollaborators")
174
+
175
+ if affiliation == "OUTSIDE":
176
+ total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
177
+ if total_outside == 0:
178
+ # No outside collaborators or not permitted to view; skip API calls for this repo.
179
+ result[repo_url] = []
180
+ continue
181
+ else: # DIRECT
182
+ total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
183
+ if total_direct == 0:
184
+ # No direct collaborators or not permitted to view; skip API calls for this repo.
185
+ result[repo_url] = []
186
+ continue
179
187
 
180
188
  logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
181
189
  collaborators = _get_repo_collaborators(
@@ -290,6 +298,7 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
290
298
  organization,
291
299
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
292
300
  "repositories",
301
+ count=50,
293
302
  )
294
303
  return repos.nodes
295
304
 
@@ -405,9 +414,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
405
414
 
406
415
  def _create_git_url_from_ssh_url(ssh_url: str) -> str:
407
416
  """
408
- Return a git:// URL from the given ssh_url
417
+ Convert SSH URL to git:// URL.
418
+ Example:
419
+ git@github.com:cartography-cncf/cartography.git
420
+ -> git://github.com/cartography-cncf/cartography.git
409
421
  """
410
- return ssh_url.replace("/", ":").replace("git@", "git://")
422
+ # Remove the user part (e.g., "git@")
423
+ _, host_and_path = ssh_url.split("@", 1)
424
+ # Replace first ':' (separating host and repo) with '/'
425
+ host, path = host_and_path.split(":", 1)
426
+ return f"git://{host}/{path}"
411
427
 
412
428
 
413
429
  def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
@@ -647,9 +663,6 @@ def _transform_dependency_graph(
647
663
  requirements = dep.get("requirements", "")
648
664
  package_manager = dep.get("packageManager", "").upper()
649
665
 
650
- # Extract version from requirements string if available
651
- pinned_version = _extract_version_from_requirements(requirements)
652
-
653
666
  # Create ecosystem-specific canonical name
654
667
  canonical_name = _canonicalize_dependency_name(
655
668
  package_name, package_manager
@@ -658,11 +671,12 @@ def _transform_dependency_graph(
658
671
  # Create ecosystem identifier
659
672
  ecosystem = package_manager.lower() if package_manager else "unknown"
660
673
 
661
- # Create simple dependency ID using canonical name and version
674
+ # Create simple dependency ID using canonical name and requirements
662
675
  # This allows the same dependency to be shared across multiple repos
676
+ requirements_for_id = (requirements or "").strip()
663
677
  dependency_id = (
664
- f"{canonical_name}|{pinned_version}"
665
- if pinned_version
678
+ f"{canonical_name}|{requirements_for_id}"
679
+ if requirements_for_id
666
680
  else canonical_name
667
681
  )
668
682
 
@@ -677,15 +691,12 @@ def _transform_dependency_graph(
677
691
  "id": dependency_id,
678
692
  "name": canonical_name,
679
693
  "original_name": package_name, # Keep original for reference
680
- "version": pinned_version,
681
694
  "requirements": normalized_requirements,
682
695
  "ecosystem": ecosystem,
683
696
  "package_manager": package_manager,
684
697
  "manifest_path": manifest_path,
685
698
  "manifest_id": manifest_id,
686
699
  "repo_url": repo_url,
687
- # Add separate fields for easier querying
688
- "repo_name": repo_url.split("/")[-1] if repo_url else "",
689
700
  "manifest_file": (
690
701
  manifest_path.split("/")[-1] if manifest_path else ""
691
702
  ),
@@ -698,33 +709,6 @@ def _transform_dependency_graph(
698
709
  logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
699
710
 
700
711
 
701
- def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
702
- """
703
- Extract a pinned version from a requirements string if it exists.
704
- Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
705
- """
706
- if not requirements or not requirements.strip():
707
- return None
708
-
709
- # Handle exact version specifications (no operators)
710
- if requirements and not any(
711
- op in requirements for op in ["^", "~", ">", "<", "=", "*"]
712
- ):
713
- stripped = requirements.strip()
714
- return stripped if stripped else None
715
-
716
- # Handle == specifications
717
- if "==" in requirements:
718
- parts = requirements.split("==")
719
- if len(parts) == 2:
720
- version = parts[1].strip()
721
- # Remove any trailing constraints
722
- version = version.split(",")[0].split(" ")[0]
723
- return version if version else None
724
-
725
- return None
726
-
727
-
728
712
  def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
729
713
  """
730
714
  Canonicalize dependency names based on ecosystem conventions.
@@ -157,6 +157,18 @@ def fetch_all(
157
157
  retry += 1
158
158
  exc = err
159
159
  except requests.exceptions.HTTPError as err:
160
+ if (
161
+ err.response is not None
162
+ and err.response.status_code == 502
163
+ and kwargs.get("count")
164
+ and kwargs["count"] > 1
165
+ ):
166
+ kwargs["count"] = max(1, kwargs["count"] // 2)
167
+ logger.warning(
168
+ "GitHub: Received 502 response. Reducing page size to %s and retrying.",
169
+ kwargs["count"],
170
+ )
171
+ continue
160
172
  retry += 1
161
173
  exc = err
162
174
  except requests.exceptions.ChunkedEncodingError as err:
@@ -0,0 +1,153 @@
1
+ import logging
2
+
3
+ import neo4j
4
+ import requests
5
+
6
+ import cartography.intel.keycloak.authenticationexecutions
7
+ import cartography.intel.keycloak.authenticationflows
8
+ import cartography.intel.keycloak.clients
9
+ import cartography.intel.keycloak.groups
10
+ import cartography.intel.keycloak.identityproviders
11
+ import cartography.intel.keycloak.organizations
12
+ import cartography.intel.keycloak.realms
13
+ import cartography.intel.keycloak.roles
14
+ import cartography.intel.keycloak.scopes
15
+ import cartography.intel.keycloak.users
16
+ from cartography.config import Config
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+ _TIMEOUT = (60, 60)
21
+
22
+
23
+ @timeit
24
+ def start_keycloak_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
25
+ """
26
+ If this module is configured, perform ingestion of Keycloak data. Otherwise warn and exit
27
+ :param neo4j_session: Neo4J session for database interface
28
+ :param config: A cartography.config object
29
+ :return: None
30
+ """
31
+ if (
32
+ not config.keycloak_client_id
33
+ or not config.keycloak_client_secret
34
+ or not config.keycloak_url
35
+ or not config.keycloak_realm
36
+ ):
37
+ logger.info(
38
+ "Keycloak import is not configured - skipping this module. "
39
+ "See docs to configure.",
40
+ )
41
+ return
42
+
43
+ # Create requests sessions
44
+ with requests.session() as api_session:
45
+ payload = {
46
+ "grant_type": "client_credentials",
47
+ "client_id": config.keycloak_client_id,
48
+ "client_secret": config.keycloak_client_secret,
49
+ }
50
+ req = api_session.post(
51
+ f"{config.keycloak_url}/realms/{config.keycloak_realm}/protocol/openid-connect/token",
52
+ data=payload,
53
+ timeout=_TIMEOUT,
54
+ )
55
+ req.raise_for_status()
56
+ api_session.headers.update(
57
+ {"Authorization": f'Bearer {req.json()["access_token"]}'}
58
+ )
59
+
60
+ common_job_parameters = {
61
+ "UPDATE_TAG": config.update_tag,
62
+ }
63
+
64
+ for realm in cartography.intel.keycloak.realms.sync(
65
+ neo4j_session, api_session, config.keycloak_url, common_job_parameters
66
+ ):
67
+ realm_scopped_job_parameters = {
68
+ "UPDATE_TAG": config.update_tag,
69
+ "REALM": realm["realm"],
70
+ "REALM_ID": realm["id"],
71
+ }
72
+ cartography.intel.keycloak.users.sync(
73
+ neo4j_session,
74
+ api_session,
75
+ config.keycloak_url,
76
+ realm_scopped_job_parameters,
77
+ )
78
+ cartography.intel.keycloak.identityproviders.sync(
79
+ neo4j_session,
80
+ api_session,
81
+ config.keycloak_url,
82
+ realm_scopped_job_parameters,
83
+ )
84
+ scopes = cartography.intel.keycloak.scopes.sync(
85
+ neo4j_session,
86
+ api_session,
87
+ config.keycloak_url,
88
+ realm_scopped_job_parameters,
89
+ )
90
+ scope_ids = [s["id"] for s in scopes]
91
+ flows = cartography.intel.keycloak.authenticationflows.sync(
92
+ neo4j_session,
93
+ api_session,
94
+ config.keycloak_url,
95
+ realm_scopped_job_parameters,
96
+ )
97
+ flow_aliases_to_id = {f["alias"]: f["id"] for f in flows}
98
+ cartography.intel.keycloak.authenticationexecutions.sync(
99
+ neo4j_session,
100
+ api_session,
101
+ config.keycloak_url,
102
+ realm_scopped_job_parameters,
103
+ list(flow_aliases_to_id.keys()),
104
+ )
105
+ realm_default_flows = {
106
+ "browser": flow_aliases_to_id.get(realm.get("browserFlow")),
107
+ "registration": flow_aliases_to_id.get(realm.get("registrationFlow")),
108
+ "direct_grant": flow_aliases_to_id.get(realm.get("directGrantFlow")),
109
+ "reset_credentials": flow_aliases_to_id.get(
110
+ realm.get("resetCredentialsFlow")
111
+ ),
112
+ "client_authentication": flow_aliases_to_id.get(
113
+ realm.get("clientAuthenticationFlow")
114
+ ),
115
+ "docker_authentication": flow_aliases_to_id.get(
116
+ realm.get("dockerAuthenticationFlow")
117
+ ),
118
+ "first_broker_login": flow_aliases_to_id.get(
119
+ realm.get("firstBrokerLoginFlow")
120
+ ),
121
+ }
122
+
123
+ clients = cartography.intel.keycloak.clients.sync(
124
+ neo4j_session,
125
+ api_session,
126
+ config.keycloak_url,
127
+ realm_scopped_job_parameters,
128
+ realm_default_flows,
129
+ )
130
+ client_ids = [c["id"] for c in clients]
131
+ cartography.intel.keycloak.roles.sync(
132
+ neo4j_session,
133
+ api_session,
134
+ config.keycloak_url,
135
+ realm_scopped_job_parameters,
136
+ client_ids,
137
+ scope_ids,
138
+ )
139
+ cartography.intel.keycloak.groups.sync(
140
+ neo4j_session,
141
+ api_session,
142
+ config.keycloak_url,
143
+ realm_scopped_job_parameters,
144
+ )
145
+
146
+ # Organizations if they are enabled
147
+ if realm.get("organizationsEnabled", False):
148
+ cartography.intel.keycloak.organizations.sync(
149
+ neo4j_session,
150
+ api_session,
151
+ config.keycloak_url,
152
+ realm_scopped_job_parameters,
153
+ )
@@ -0,0 +1,322 @@
1
+ import logging
2
+ from collections import OrderedDict
3
+ from typing import Any
4
+ from urllib.parse import quote
5
+
6
+ import neo4j
7
+ import requests
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.client.core.tx import load_matchlinks
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.models.keycloak.authenticationexecution import (
13
+ ExecutionToExecutionMatchLink,
14
+ )
15
+ from cartography.models.keycloak.authenticationexecution import ExecutionToFlowMatchLink
16
+ from cartography.models.keycloak.authenticationexecution import (
17
+ KeycloakAuthenticationExecutionSchema,
18
+ )
19
+ from cartography.util import timeit
20
+
21
+ logger = logging.getLogger(__name__)
22
+ # Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
23
+ _TIMEOUT = (60, 60)
24
+
25
+
26
+ @timeit
27
+ def sync(
28
+ neo4j_session: neo4j.Session,
29
+ api_session: requests.Session,
30
+ base_url: str,
31
+ common_job_parameters: dict[str, Any],
32
+ flow_aliases: list[str],
33
+ ) -> None:
34
+ exec_by_flow = get(
35
+ api_session,
36
+ base_url,
37
+ common_job_parameters["REALM"],
38
+ flow_aliases,
39
+ )
40
+ transformed_exec, flow_steps, initial_flow_steps = transform(
41
+ exec_by_flow, common_job_parameters["REALM"]
42
+ )
43
+ load_authenticationexecutions(
44
+ neo4j_session,
45
+ transformed_exec,
46
+ common_job_parameters["REALM"],
47
+ common_job_parameters["UPDATE_TAG"],
48
+ )
49
+ load_execution_flow(
50
+ neo4j_session,
51
+ flow_steps,
52
+ initial_flow_steps,
53
+ common_job_parameters["REALM_ID"],
54
+ common_job_parameters["UPDATE_TAG"],
55
+ )
56
+ cleanup(neo4j_session, common_job_parameters)
57
+
58
+
59
+ @timeit
60
+ def get(
61
+ api_session: requests.Session, base_url: str, realm: str, flow_aliases: list[str]
62
+ ) -> dict[str, list[dict[str, Any]]]:
63
+ """Fetch authentication execution data for each flow from Keycloak API.
64
+
65
+ Args:
66
+ api_session: Authenticated requests session
67
+ base_url: Keycloak base URL
68
+ realm: Target realm name
69
+ flow_aliases: List of authentication flow names to process
70
+
71
+ Returns:
72
+ Dictionary mapping flow names to their execution lists
73
+ """
74
+ results: dict[str, list[dict[str, Any]]] = {}
75
+ for flow_name in flow_aliases:
76
+ # URL-encode flow names to handle special characters safely
77
+ encoded_flow_name = quote(flow_name, safe="")
78
+ req = api_session.get(
79
+ f"{base_url}/admin/realms/{realm}/authentication/flows/{encoded_flow_name}/executions",
80
+ timeout=_TIMEOUT,
81
+ )
82
+ req.raise_for_status()
83
+ results[flow_name] = req.json()
84
+ return results
85
+
86
+
87
+ def _recursive_transform_flow(
88
+ root_executions: list[dict[str, Any]],
89
+ ) -> tuple[list[str], list[tuple[str, str]], list[str]]:
90
+ """Recursively transforms Keycloak authentication executions into a flow graph structure.
91
+
92
+ This function processes authentication executions and builds a directed graph representation
93
+ suitable for Neo4j ingestion. It handles different execution requirements (REQUIRED,
94
+ ALTERNATIVE, CONDITIONAL, DISABLED) and nested subflows.
95
+
96
+ The function returns three components:
97
+ - entries: Execution IDs that serve as entry points to the flow
98
+ - links: Tuples representing directed edges between executions
99
+ - outs: Execution IDs that serve as exit points from the flow
100
+
101
+ Each execution dict must contain:
102
+ - id: Unique execution identifier
103
+ - requirement: Execution requirement type (REQUIRED/ALTERNATIVE/CONDITIONAL/DISABLED)
104
+ - _children: List of nested child executions (for subflows)
105
+
106
+ Args:
107
+ root_executions: List of execution dictionaries to process
108
+
109
+ Returns:
110
+ A tuple containing (entry_points, execution_links, exit_points)
111
+ """
112
+ entries: list[str] = []
113
+ links: list[tuple[str, str]] = []
114
+ outs: list[str] = []
115
+
116
+ for execution in root_executions:
117
+ # Skip disabled executions as they don't participate in the flow
118
+ if execution["requirement"] == "DISABLED":
119
+ continue
120
+
121
+ if execution["requirement"] == "REQUIRED":
122
+ # If no entry point exists, this required execution becomes the flow's starting point
123
+ if len(entries) == 0:
124
+ entries.append(execution["id"])
125
+
126
+ # Connect all current outputs to this required execution
127
+ for i in outs:
128
+ links.append((i, execution["id"]))
129
+
130
+ # Handle subflow execution: recursively process children and wire them up
131
+ if len(execution.get("_children", [])) > 0:
132
+ c_ins, c_links, c_outs = _recursive_transform_flow(
133
+ execution["_children"]
134
+ )
135
+ for c_in in c_ins:
136
+ links.append((execution["id"], c_in))
137
+ outs = c_outs
138
+ links.extend(c_links)
139
+ # For leaf executions, this becomes the sole output
140
+ else:
141
+ outs = [execution["id"]] # Reset outs to the current execution
142
+
143
+ continue
144
+
145
+ if execution["requirement"] == "ALTERNATIVE":
146
+ # Alternative executions create branching paths (OR logic)
147
+ # This execution becomes an alternative entry point while preserving existing outputs
148
+ entries.append(execution["id"])
149
+
150
+ # Process subflow: wire up child inputs and aggregate child outputs
151
+ if len(execution.get("_children", [])) > 0:
152
+ c_ins, c_links, c_outs = _recursive_transform_flow(
153
+ execution["_children"]
154
+ )
155
+ for c_in in c_ins:
156
+ links.append((execution["id"], c_in))
157
+ for c_out in c_outs:
158
+ outs.append(c_out)
159
+ links.extend(c_links)
160
+ else:
161
+ outs.append(execution["id"])
162
+
163
+ continue
164
+
165
+ if execution["requirement"] == "CONDITIONAL":
166
+ # Conditional executions only apply to subflows - skip if no children
167
+ if len(execution.get("_children", [])) == 0:
168
+ continue
169
+
170
+ # Conditional logic creates two possible paths:
171
+ # 1. Subflow evaluates to True: execution is treated as required
172
+ # 2. Subflow evaluates to False: execution is skipped
173
+
174
+ # Make this execution an entry point if none exist
175
+ if len(entries) == 0:
176
+ entries.append(execution["id"])
177
+
178
+ # Connect all existing outputs to this conditional execution
179
+ for i in outs:
180
+ links.append((i, execution["id"]))
181
+
182
+ # Process child executions recursively
183
+ c_ins, c_links, c_outs = _recursive_transform_flow(execution["_children"])
184
+
185
+ # Wire this execution to child entry points
186
+ for c_in in c_ins:
187
+ links.append((execution["id"], c_in))
188
+
189
+ # Preserve both existing outputs and child outputs to model both conditional paths
190
+ outs.extend(c_outs)
191
+
192
+ # Add child links to the overall link collection
193
+ links.extend(c_links)
194
+
195
+ return entries, links, outs
196
+
197
+
198
+ def transform(
199
+ exec_by_flow: dict[str, list[dict[str, Any]]], realm: str
200
+ ) -> tuple[list[dict[str, Any]], list[dict[str, str]], list[dict[str, str]]]:
201
+ transformed_by_id: OrderedDict[str, dict[str, Any]] = OrderedDict()
202
+ initial_flow_steps: list[dict[str, str]] = []
203
+ flow_steps: list[dict[str, str]] = []
204
+
205
+ for flow_name, executions in exec_by_flow.items():
206
+ _parent_by_level: dict[int, str] = {}
207
+ _root_executions: list[dict[str, Any]] = []
208
+
209
+ # Transform executions to include parent flow/subflow relationships
210
+ # and create a hierarchical structure for graph processing
211
+ for execution in executions:
212
+ # Level 0 executions belong directly to the named flow
213
+ if execution["level"] == 0:
214
+ execution["_parent_flow"] = flow_name
215
+ _root_executions.append(execution)
216
+ else:
217
+ # Nested executions belong to their parent subflow
218
+ execution["_parent_subflow"] = _parent_by_level[execution["level"] - 1]
219
+ transformed_by_id[execution["_parent_subflow"]]["_children"].append(
220
+ execution
221
+ )
222
+
223
+ # Track subflow parents for the next nesting level
224
+ if execution.get("authenticationFlow", True):
225
+ _parent_by_level[execution["level"]] = execution["id"]
226
+
227
+ execution["_children"] = []
228
+ execution["is_terminal_step"] = False # Placeholder for terminal step flag
229
+ transformed_by_id[execution["id"]] = execution
230
+
231
+ # Process authentication flow structure and build execution graph
232
+ # Reference: https://www.keycloak.org/docs/latest/server_admin/index.html#_execution-requirements
233
+ entries, links, terminals = _recursive_transform_flow(_root_executions)
234
+
235
+ for entry in entries:
236
+ initial_flow_steps.append(
237
+ {
238
+ "flow_name": flow_name,
239
+ "execution_id": entry,
240
+ "realm": realm,
241
+ }
242
+ )
243
+
244
+ for link in links:
245
+ flow_steps.append(
246
+ {
247
+ "source": link[0],
248
+ "target": link[1],
249
+ }
250
+ )
251
+
252
+ for node_id in terminals:
253
+ transformed_by_id[node_id]["is_terminal_step"] = True
254
+
255
+ return list(transformed_by_id.values()), flow_steps, initial_flow_steps
256
+
257
+
258
+ @timeit
259
+ def load_authenticationexecutions(
260
+ neo4j_session: neo4j.Session,
261
+ data: list[dict[str, Any]],
262
+ realm: str,
263
+ update_tag: int,
264
+ ) -> None:
265
+ logger.info(
266
+ "Loading %d Keycloak AuthenticationExecutions (%s) into Neo4j.",
267
+ len(data),
268
+ realm,
269
+ )
270
+ load(
271
+ neo4j_session,
272
+ KeycloakAuthenticationExecutionSchema(),
273
+ data,
274
+ LASTUPDATED=update_tag,
275
+ REALM=realm,
276
+ )
277
+
278
+
279
+ def load_execution_flow(
280
+ neo4j_session: neo4j.Session,
281
+ flow_steps: list[dict[str, Any]],
282
+ initial_flow_steps: list[dict[str, str]],
283
+ realm_id: str,
284
+ update_tag: int,
285
+ ) -> None:
286
+ load_matchlinks(
287
+ neo4j_session,
288
+ ExecutionToExecutionMatchLink(),
289
+ flow_steps,
290
+ LASTUPDATED=update_tag,
291
+ _sub_resource_label="KeycloakRealm",
292
+ _sub_resource_id=realm_id,
293
+ )
294
+ load_matchlinks(
295
+ neo4j_session,
296
+ ExecutionToFlowMatchLink(),
297
+ initial_flow_steps,
298
+ LASTUPDATED=update_tag,
299
+ _sub_resource_label="KeycloakRealm",
300
+ _sub_resource_id=realm_id,
301
+ )
302
+
303
+
304
+ @timeit
305
+ def cleanup(
306
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
307
+ ) -> None:
308
+ GraphJob.from_node_schema(
309
+ KeycloakAuthenticationExecutionSchema(), common_job_parameters
310
+ ).run(neo4j_session)
311
+ GraphJob.from_matchlink(
312
+ ExecutionToExecutionMatchLink(),
313
+ "KeycloakRealm",
314
+ common_job_parameters["REALM_ID"],
315
+ common_job_parameters["UPDATE_TAG"],
316
+ ).run(neo4j_session)
317
+ GraphJob.from_matchlink(
318
+ ExecutionToFlowMatchLink(),
319
+ "KeycloakRealm",
320
+ common_job_parameters["REALM_ID"],
321
+ common_job_parameters["UPDATE_TAG"],
322
+ ).run(neo4j_session)