cartography 0.110.0rc1__py3-none-any.whl → 0.111.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +16 -3
- cartography/cli.py +46 -8
- cartography/config.py +16 -9
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
- cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
- cartography/graph/querybuilder.py +70 -0
- cartography/intel/aws/apigateway.py +113 -4
- cartography/intel/aws/cognito.py +201 -0
- cartography/intel/aws/ec2/vpc.py +140 -124
- cartography/intel/aws/ecs.py +7 -1
- cartography/intel/aws/eventbridge.py +73 -0
- cartography/intel/aws/glue.py +64 -0
- cartography/intel/aws/kms.py +13 -1
- cartography/intel/aws/rds.py +105 -0
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/aws/route53.py +3 -1
- cartography/intel/aws/s3.py +104 -0
- cartography/intel/entra/__init__.py +41 -43
- cartography/intel/entra/applications.py +2 -1
- cartography/intel/entra/ou.py +1 -1
- cartography/intel/github/__init__.py +21 -25
- cartography/intel/github/repos.py +32 -48
- cartography/intel/github/util.py +12 -0
- cartography/intel/keycloak/__init__.py +153 -0
- cartography/intel/keycloak/authenticationexecutions.py +322 -0
- cartography/intel/keycloak/authenticationflows.py +77 -0
- cartography/intel/keycloak/clients.py +187 -0
- cartography/intel/keycloak/groups.py +126 -0
- cartography/intel/keycloak/identityproviders.py +94 -0
- cartography/intel/keycloak/organizations.py +163 -0
- cartography/intel/keycloak/realms.py +61 -0
- cartography/intel/keycloak/roles.py +202 -0
- cartography/intel/keycloak/scopes.py +73 -0
- cartography/intel/keycloak/users.py +70 -0
- cartography/intel/keycloak/util.py +47 -0
- cartography/intel/kubernetes/__init__.py +4 -0
- cartography/intel/kubernetes/rbac.py +464 -0
- cartography/intel/kubernetes/util.py +17 -0
- cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
- cartography/models/aws/cognito/__init__.py +0 -0
- cartography/models/aws/cognito/identity_pool.py +70 -0
- cartography/models/aws/cognito/user_pool.py +47 -0
- cartography/models/aws/ec2/security_groups.py +1 -1
- cartography/models/aws/ec2/vpc.py +46 -0
- cartography/models/aws/ec2/vpc_cidr.py +102 -0
- cartography/models/aws/ecs/services.py +17 -0
- cartography/models/aws/ecs/tasks.py +1 -0
- cartography/models/aws/eventbridge/target.py +71 -0
- cartography/models/aws/glue/job.py +69 -0
- cartography/models/aws/rds/event_subscription.py +146 -0
- cartography/models/aws/route53/dnsrecord.py +21 -0
- cartography/models/github/dependencies.py +1 -2
- cartography/models/keycloak/__init__.py +0 -0
- cartography/models/keycloak/authenticationexecution.py +160 -0
- cartography/models/keycloak/authenticationflow.py +54 -0
- cartography/models/keycloak/client.py +177 -0
- cartography/models/keycloak/group.py +101 -0
- cartography/models/keycloak/identityprovider.py +89 -0
- cartography/models/keycloak/organization.py +116 -0
- cartography/models/keycloak/organizationdomain.py +73 -0
- cartography/models/keycloak/realm.py +173 -0
- cartography/models/keycloak/role.py +126 -0
- cartography/models/keycloak/scope.py +73 -0
- cartography/models/keycloak/user.py +51 -0
- cartography/models/kubernetes/clusterrolebindings.py +98 -0
- cartography/models/kubernetes/clusterroles.py +52 -0
- cartography/models/kubernetes/rolebindings.py +119 -0
- cartography/models/kubernetes/roles.py +76 -0
- cartography/models/kubernetes/serviceaccounts.py +77 -0
- cartography/models/tailscale/device.py +1 -0
- cartography/sync.py +2 -0
- cartography/util.py +8 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/METADATA +4 -3
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/RECORD +85 -46
- cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
- cartography/intel/entra/resources.py +0 -20
- /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
- /cartography/models/aws/{__init__.py → apigateway/__init__.py} +0 -0
- /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
- /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
- /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
- /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/WHEEL +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.110.0rc1.dist-info → cartography-0.111.0.dist-info}/top_level.txt +0 -0
|
@@ -41,12 +41,12 @@ UserAffiliationAndRepoPermission = namedtuple(
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
44
|
-
query($login: String!, $cursor: String) {
|
|
44
|
+
query($login: String!, $cursor: String, $count: Int!) {
|
|
45
45
|
organization(login: $login)
|
|
46
46
|
{
|
|
47
47
|
url
|
|
48
48
|
login
|
|
49
|
-
repositories(first:
|
|
49
|
+
repositories(first: $count, after: $cursor){
|
|
50
50
|
pageInfo{
|
|
51
51
|
endCursor
|
|
52
52
|
hasNextPage
|
|
@@ -168,14 +168,22 @@ def _get_repo_collaborators_inner_func(
|
|
|
168
168
|
repo_name = repo["name"]
|
|
169
169
|
repo_url = repo["url"]
|
|
170
170
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
171
|
+
# Guard against None when collaborator fields are not accessible due to permissions.
|
|
172
|
+
direct_info = repo.get("directCollaborators")
|
|
173
|
+
outside_info = repo.get("outsideCollaborators")
|
|
174
|
+
|
|
175
|
+
if affiliation == "OUTSIDE":
|
|
176
|
+
total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
|
|
177
|
+
if total_outside == 0:
|
|
178
|
+
# No outside collaborators or not permitted to view; skip API calls for this repo.
|
|
179
|
+
result[repo_url] = []
|
|
180
|
+
continue
|
|
181
|
+
else: # DIRECT
|
|
182
|
+
total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
|
|
183
|
+
if total_direct == 0:
|
|
184
|
+
# No direct collaborators or not permitted to view; skip API calls for this repo.
|
|
185
|
+
result[repo_url] = []
|
|
186
|
+
continue
|
|
179
187
|
|
|
180
188
|
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
181
189
|
collaborators = _get_repo_collaborators(
|
|
@@ -290,6 +298,7 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
290
298
|
organization,
|
|
291
299
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
|
|
292
300
|
"repositories",
|
|
301
|
+
count=50,
|
|
293
302
|
)
|
|
294
303
|
return repos.nodes
|
|
295
304
|
|
|
@@ -405,9 +414,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
|
|
|
405
414
|
|
|
406
415
|
def _create_git_url_from_ssh_url(ssh_url: str) -> str:
|
|
407
416
|
"""
|
|
408
|
-
|
|
417
|
+
Convert SSH URL to git:// URL.
|
|
418
|
+
Example:
|
|
419
|
+
git@github.com:cartography-cncf/cartography.git
|
|
420
|
+
-> git://github.com/cartography-cncf/cartography.git
|
|
409
421
|
"""
|
|
410
|
-
|
|
422
|
+
# Remove the user part (e.g., "git@")
|
|
423
|
+
_, host_and_path = ssh_url.split("@", 1)
|
|
424
|
+
# Replace first ':' (separating host and repo) with '/'
|
|
425
|
+
host, path = host_and_path.split(":", 1)
|
|
426
|
+
return f"git://{host}/{path}"
|
|
411
427
|
|
|
412
428
|
|
|
413
429
|
def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
|
|
@@ -647,9 +663,6 @@ def _transform_dependency_graph(
|
|
|
647
663
|
requirements = dep.get("requirements", "")
|
|
648
664
|
package_manager = dep.get("packageManager", "").upper()
|
|
649
665
|
|
|
650
|
-
# Extract version from requirements string if available
|
|
651
|
-
pinned_version = _extract_version_from_requirements(requirements)
|
|
652
|
-
|
|
653
666
|
# Create ecosystem-specific canonical name
|
|
654
667
|
canonical_name = _canonicalize_dependency_name(
|
|
655
668
|
package_name, package_manager
|
|
@@ -658,11 +671,12 @@ def _transform_dependency_graph(
|
|
|
658
671
|
# Create ecosystem identifier
|
|
659
672
|
ecosystem = package_manager.lower() if package_manager else "unknown"
|
|
660
673
|
|
|
661
|
-
# Create simple dependency ID using canonical name and
|
|
674
|
+
# Create simple dependency ID using canonical name and requirements
|
|
662
675
|
# This allows the same dependency to be shared across multiple repos
|
|
676
|
+
requirements_for_id = (requirements or "").strip()
|
|
663
677
|
dependency_id = (
|
|
664
|
-
f"{canonical_name}|{
|
|
665
|
-
if
|
|
678
|
+
f"{canonical_name}|{requirements_for_id}"
|
|
679
|
+
if requirements_for_id
|
|
666
680
|
else canonical_name
|
|
667
681
|
)
|
|
668
682
|
|
|
@@ -677,15 +691,12 @@ def _transform_dependency_graph(
|
|
|
677
691
|
"id": dependency_id,
|
|
678
692
|
"name": canonical_name,
|
|
679
693
|
"original_name": package_name, # Keep original for reference
|
|
680
|
-
"version": pinned_version,
|
|
681
694
|
"requirements": normalized_requirements,
|
|
682
695
|
"ecosystem": ecosystem,
|
|
683
696
|
"package_manager": package_manager,
|
|
684
697
|
"manifest_path": manifest_path,
|
|
685
698
|
"manifest_id": manifest_id,
|
|
686
699
|
"repo_url": repo_url,
|
|
687
|
-
# Add separate fields for easier querying
|
|
688
|
-
"repo_name": repo_url.split("/")[-1] if repo_url else "",
|
|
689
700
|
"manifest_file": (
|
|
690
701
|
manifest_path.split("/")[-1] if manifest_path else ""
|
|
691
702
|
),
|
|
@@ -698,33 +709,6 @@ def _transform_dependency_graph(
|
|
|
698
709
|
logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
|
|
699
710
|
|
|
700
711
|
|
|
701
|
-
def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
|
|
702
|
-
"""
|
|
703
|
-
Extract a pinned version from a requirements string if it exists.
|
|
704
|
-
Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
|
|
705
|
-
"""
|
|
706
|
-
if not requirements or not requirements.strip():
|
|
707
|
-
return None
|
|
708
|
-
|
|
709
|
-
# Handle exact version specifications (no operators)
|
|
710
|
-
if requirements and not any(
|
|
711
|
-
op in requirements for op in ["^", "~", ">", "<", "=", "*"]
|
|
712
|
-
):
|
|
713
|
-
stripped = requirements.strip()
|
|
714
|
-
return stripped if stripped else None
|
|
715
|
-
|
|
716
|
-
# Handle == specifications
|
|
717
|
-
if "==" in requirements:
|
|
718
|
-
parts = requirements.split("==")
|
|
719
|
-
if len(parts) == 2:
|
|
720
|
-
version = parts[1].strip()
|
|
721
|
-
# Remove any trailing constraints
|
|
722
|
-
version = version.split(",")[0].split(" ")[0]
|
|
723
|
-
return version if version else None
|
|
724
|
-
|
|
725
|
-
return None
|
|
726
|
-
|
|
727
|
-
|
|
728
712
|
def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
|
|
729
713
|
"""
|
|
730
714
|
Canonicalize dependency names based on ecosystem conventions.
|
cartography/intel/github/util.py
CHANGED
|
@@ -157,6 +157,18 @@ def fetch_all(
|
|
|
157
157
|
retry += 1
|
|
158
158
|
exc = err
|
|
159
159
|
except requests.exceptions.HTTPError as err:
|
|
160
|
+
if (
|
|
161
|
+
err.response is not None
|
|
162
|
+
and err.response.status_code == 502
|
|
163
|
+
and kwargs.get("count")
|
|
164
|
+
and kwargs["count"] > 1
|
|
165
|
+
):
|
|
166
|
+
kwargs["count"] = max(1, kwargs["count"] // 2)
|
|
167
|
+
logger.warning(
|
|
168
|
+
"GitHub: Received 502 response. Reducing page size to %s and retrying.",
|
|
169
|
+
kwargs["count"],
|
|
170
|
+
)
|
|
171
|
+
continue
|
|
160
172
|
retry += 1
|
|
161
173
|
exc = err
|
|
162
174
|
except requests.exceptions.ChunkedEncodingError as err:
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import neo4j
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
import cartography.intel.keycloak.authenticationexecutions
|
|
7
|
+
import cartography.intel.keycloak.authenticationflows
|
|
8
|
+
import cartography.intel.keycloak.clients
|
|
9
|
+
import cartography.intel.keycloak.groups
|
|
10
|
+
import cartography.intel.keycloak.identityproviders
|
|
11
|
+
import cartography.intel.keycloak.organizations
|
|
12
|
+
import cartography.intel.keycloak.realms
|
|
13
|
+
import cartography.intel.keycloak.roles
|
|
14
|
+
import cartography.intel.keycloak.scopes
|
|
15
|
+
import cartography.intel.keycloak.users
|
|
16
|
+
from cartography.config import Config
|
|
17
|
+
from cartography.util import timeit
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
_TIMEOUT = (60, 60)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@timeit
|
|
24
|
+
def start_keycloak_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
25
|
+
"""
|
|
26
|
+
If this module is configured, perform ingestion of Keycloak data. Otherwise warn and exit
|
|
27
|
+
:param neo4j_session: Neo4J session for database interface
|
|
28
|
+
:param config: A cartography.config object
|
|
29
|
+
:return: None
|
|
30
|
+
"""
|
|
31
|
+
if (
|
|
32
|
+
not config.keycloak_client_id
|
|
33
|
+
or not config.keycloak_client_secret
|
|
34
|
+
or not config.keycloak_url
|
|
35
|
+
or not config.keycloak_realm
|
|
36
|
+
):
|
|
37
|
+
logger.info(
|
|
38
|
+
"Keycloak import is not configured - skipping this module. "
|
|
39
|
+
"See docs to configure.",
|
|
40
|
+
)
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
# Create requests sessions
|
|
44
|
+
with requests.session() as api_session:
|
|
45
|
+
payload = {
|
|
46
|
+
"grant_type": "client_credentials",
|
|
47
|
+
"client_id": config.keycloak_client_id,
|
|
48
|
+
"client_secret": config.keycloak_client_secret,
|
|
49
|
+
}
|
|
50
|
+
req = api_session.post(
|
|
51
|
+
f"{config.keycloak_url}/realms/{config.keycloak_realm}/protocol/openid-connect/token",
|
|
52
|
+
data=payload,
|
|
53
|
+
timeout=_TIMEOUT,
|
|
54
|
+
)
|
|
55
|
+
req.raise_for_status()
|
|
56
|
+
api_session.headers.update(
|
|
57
|
+
{"Authorization": f'Bearer {req.json()["access_token"]}'}
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
common_job_parameters = {
|
|
61
|
+
"UPDATE_TAG": config.update_tag,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
for realm in cartography.intel.keycloak.realms.sync(
|
|
65
|
+
neo4j_session, api_session, config.keycloak_url, common_job_parameters
|
|
66
|
+
):
|
|
67
|
+
realm_scopped_job_parameters = {
|
|
68
|
+
"UPDATE_TAG": config.update_tag,
|
|
69
|
+
"REALM": realm["realm"],
|
|
70
|
+
"REALM_ID": realm["id"],
|
|
71
|
+
}
|
|
72
|
+
cartography.intel.keycloak.users.sync(
|
|
73
|
+
neo4j_session,
|
|
74
|
+
api_session,
|
|
75
|
+
config.keycloak_url,
|
|
76
|
+
realm_scopped_job_parameters,
|
|
77
|
+
)
|
|
78
|
+
cartography.intel.keycloak.identityproviders.sync(
|
|
79
|
+
neo4j_session,
|
|
80
|
+
api_session,
|
|
81
|
+
config.keycloak_url,
|
|
82
|
+
realm_scopped_job_parameters,
|
|
83
|
+
)
|
|
84
|
+
scopes = cartography.intel.keycloak.scopes.sync(
|
|
85
|
+
neo4j_session,
|
|
86
|
+
api_session,
|
|
87
|
+
config.keycloak_url,
|
|
88
|
+
realm_scopped_job_parameters,
|
|
89
|
+
)
|
|
90
|
+
scope_ids = [s["id"] for s in scopes]
|
|
91
|
+
flows = cartography.intel.keycloak.authenticationflows.sync(
|
|
92
|
+
neo4j_session,
|
|
93
|
+
api_session,
|
|
94
|
+
config.keycloak_url,
|
|
95
|
+
realm_scopped_job_parameters,
|
|
96
|
+
)
|
|
97
|
+
flow_aliases_to_id = {f["alias"]: f["id"] for f in flows}
|
|
98
|
+
cartography.intel.keycloak.authenticationexecutions.sync(
|
|
99
|
+
neo4j_session,
|
|
100
|
+
api_session,
|
|
101
|
+
config.keycloak_url,
|
|
102
|
+
realm_scopped_job_parameters,
|
|
103
|
+
list(flow_aliases_to_id.keys()),
|
|
104
|
+
)
|
|
105
|
+
realm_default_flows = {
|
|
106
|
+
"browser": flow_aliases_to_id.get(realm.get("browserFlow")),
|
|
107
|
+
"registration": flow_aliases_to_id.get(realm.get("registrationFlow")),
|
|
108
|
+
"direct_grant": flow_aliases_to_id.get(realm.get("directGrantFlow")),
|
|
109
|
+
"reset_credentials": flow_aliases_to_id.get(
|
|
110
|
+
realm.get("resetCredentialsFlow")
|
|
111
|
+
),
|
|
112
|
+
"client_authentication": flow_aliases_to_id.get(
|
|
113
|
+
realm.get("clientAuthenticationFlow")
|
|
114
|
+
),
|
|
115
|
+
"docker_authentication": flow_aliases_to_id.get(
|
|
116
|
+
realm.get("dockerAuthenticationFlow")
|
|
117
|
+
),
|
|
118
|
+
"first_broker_login": flow_aliases_to_id.get(
|
|
119
|
+
realm.get("firstBrokerLoginFlow")
|
|
120
|
+
),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
clients = cartography.intel.keycloak.clients.sync(
|
|
124
|
+
neo4j_session,
|
|
125
|
+
api_session,
|
|
126
|
+
config.keycloak_url,
|
|
127
|
+
realm_scopped_job_parameters,
|
|
128
|
+
realm_default_flows,
|
|
129
|
+
)
|
|
130
|
+
client_ids = [c["id"] for c in clients]
|
|
131
|
+
cartography.intel.keycloak.roles.sync(
|
|
132
|
+
neo4j_session,
|
|
133
|
+
api_session,
|
|
134
|
+
config.keycloak_url,
|
|
135
|
+
realm_scopped_job_parameters,
|
|
136
|
+
client_ids,
|
|
137
|
+
scope_ids,
|
|
138
|
+
)
|
|
139
|
+
cartography.intel.keycloak.groups.sync(
|
|
140
|
+
neo4j_session,
|
|
141
|
+
api_session,
|
|
142
|
+
config.keycloak_url,
|
|
143
|
+
realm_scopped_job_parameters,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Organizations if they are enabled
|
|
147
|
+
if realm.get("organizationsEnabled", False):
|
|
148
|
+
cartography.intel.keycloak.organizations.sync(
|
|
149
|
+
neo4j_session,
|
|
150
|
+
api_session,
|
|
151
|
+
config.keycloak_url,
|
|
152
|
+
realm_scopped_job_parameters,
|
|
153
|
+
)
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from collections import OrderedDict
|
|
3
|
+
from typing import Any
|
|
4
|
+
from urllib.parse import quote
|
|
5
|
+
|
|
6
|
+
import neo4j
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.client.core.tx import load_matchlinks
|
|
11
|
+
from cartography.graph.job import GraphJob
|
|
12
|
+
from cartography.models.keycloak.authenticationexecution import (
|
|
13
|
+
ExecutionToExecutionMatchLink,
|
|
14
|
+
)
|
|
15
|
+
from cartography.models.keycloak.authenticationexecution import ExecutionToFlowMatchLink
|
|
16
|
+
from cartography.models.keycloak.authenticationexecution import (
|
|
17
|
+
KeycloakAuthenticationExecutionSchema,
|
|
18
|
+
)
|
|
19
|
+
from cartography.util import timeit
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
# Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
|
|
23
|
+
_TIMEOUT = (60, 60)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@timeit
|
|
27
|
+
def sync(
|
|
28
|
+
neo4j_session: neo4j.Session,
|
|
29
|
+
api_session: requests.Session,
|
|
30
|
+
base_url: str,
|
|
31
|
+
common_job_parameters: dict[str, Any],
|
|
32
|
+
flow_aliases: list[str],
|
|
33
|
+
) -> None:
|
|
34
|
+
exec_by_flow = get(
|
|
35
|
+
api_session,
|
|
36
|
+
base_url,
|
|
37
|
+
common_job_parameters["REALM"],
|
|
38
|
+
flow_aliases,
|
|
39
|
+
)
|
|
40
|
+
transformed_exec, flow_steps, initial_flow_steps = transform(
|
|
41
|
+
exec_by_flow, common_job_parameters["REALM"]
|
|
42
|
+
)
|
|
43
|
+
load_authenticationexecutions(
|
|
44
|
+
neo4j_session,
|
|
45
|
+
transformed_exec,
|
|
46
|
+
common_job_parameters["REALM"],
|
|
47
|
+
common_job_parameters["UPDATE_TAG"],
|
|
48
|
+
)
|
|
49
|
+
load_execution_flow(
|
|
50
|
+
neo4j_session,
|
|
51
|
+
flow_steps,
|
|
52
|
+
initial_flow_steps,
|
|
53
|
+
common_job_parameters["REALM_ID"],
|
|
54
|
+
common_job_parameters["UPDATE_TAG"],
|
|
55
|
+
)
|
|
56
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@timeit
|
|
60
|
+
def get(
|
|
61
|
+
api_session: requests.Session, base_url: str, realm: str, flow_aliases: list[str]
|
|
62
|
+
) -> dict[str, list[dict[str, Any]]]:
|
|
63
|
+
"""Fetch authentication execution data for each flow from Keycloak API.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
api_session: Authenticated requests session
|
|
67
|
+
base_url: Keycloak base URL
|
|
68
|
+
realm: Target realm name
|
|
69
|
+
flow_aliases: List of authentication flow names to process
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Dictionary mapping flow names to their execution lists
|
|
73
|
+
"""
|
|
74
|
+
results: dict[str, list[dict[str, Any]]] = {}
|
|
75
|
+
for flow_name in flow_aliases:
|
|
76
|
+
# URL-encode flow names to handle special characters safely
|
|
77
|
+
encoded_flow_name = quote(flow_name, safe="")
|
|
78
|
+
req = api_session.get(
|
|
79
|
+
f"{base_url}/admin/realms/{realm}/authentication/flows/{encoded_flow_name}/executions",
|
|
80
|
+
timeout=_TIMEOUT,
|
|
81
|
+
)
|
|
82
|
+
req.raise_for_status()
|
|
83
|
+
results[flow_name] = req.json()
|
|
84
|
+
return results
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _recursive_transform_flow(
|
|
88
|
+
root_executions: list[dict[str, Any]],
|
|
89
|
+
) -> tuple[list[str], list[tuple[str, str]], list[str]]:
|
|
90
|
+
"""Recursively transforms Keycloak authentication executions into a flow graph structure.
|
|
91
|
+
|
|
92
|
+
This function processes authentication executions and builds a directed graph representation
|
|
93
|
+
suitable for Neo4j ingestion. It handles different execution requirements (REQUIRED,
|
|
94
|
+
ALTERNATIVE, CONDITIONAL, DISABLED) and nested subflows.
|
|
95
|
+
|
|
96
|
+
The function returns three components:
|
|
97
|
+
- entries: Execution IDs that serve as entry points to the flow
|
|
98
|
+
- links: Tuples representing directed edges between executions
|
|
99
|
+
- outs: Execution IDs that serve as exit points from the flow
|
|
100
|
+
|
|
101
|
+
Each execution dict must contain:
|
|
102
|
+
- id: Unique execution identifier
|
|
103
|
+
- requirement: Execution requirement type (REQUIRED/ALTERNATIVE/CONDITIONAL/DISABLED)
|
|
104
|
+
- _children: List of nested child executions (for subflows)
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
root_executions: List of execution dictionaries to process
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
A tuple containing (entry_points, execution_links, exit_points)
|
|
111
|
+
"""
|
|
112
|
+
entries: list[str] = []
|
|
113
|
+
links: list[tuple[str, str]] = []
|
|
114
|
+
outs: list[str] = []
|
|
115
|
+
|
|
116
|
+
for execution in root_executions:
|
|
117
|
+
# Skip disabled executions as they don't participate in the flow
|
|
118
|
+
if execution["requirement"] == "DISABLED":
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
if execution["requirement"] == "REQUIRED":
|
|
122
|
+
# If no entry point exists, this required execution becomes the flow's starting point
|
|
123
|
+
if len(entries) == 0:
|
|
124
|
+
entries.append(execution["id"])
|
|
125
|
+
|
|
126
|
+
# Connect all current outputs to this required execution
|
|
127
|
+
for i in outs:
|
|
128
|
+
links.append((i, execution["id"]))
|
|
129
|
+
|
|
130
|
+
# Handle subflow execution: recursively process children and wire them up
|
|
131
|
+
if len(execution.get("_children", [])) > 0:
|
|
132
|
+
c_ins, c_links, c_outs = _recursive_transform_flow(
|
|
133
|
+
execution["_children"]
|
|
134
|
+
)
|
|
135
|
+
for c_in in c_ins:
|
|
136
|
+
links.append((execution["id"], c_in))
|
|
137
|
+
outs = c_outs
|
|
138
|
+
links.extend(c_links)
|
|
139
|
+
# For leaf executions, this becomes the sole output
|
|
140
|
+
else:
|
|
141
|
+
outs = [execution["id"]] # Reset outs to the current execution
|
|
142
|
+
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
if execution["requirement"] == "ALTERNATIVE":
|
|
146
|
+
# Alternative executions create branching paths (OR logic)
|
|
147
|
+
# This execution becomes an alternative entry point while preserving existing outputs
|
|
148
|
+
entries.append(execution["id"])
|
|
149
|
+
|
|
150
|
+
# Process subflow: wire up child inputs and aggregate child outputs
|
|
151
|
+
if len(execution.get("_children", [])) > 0:
|
|
152
|
+
c_ins, c_links, c_outs = _recursive_transform_flow(
|
|
153
|
+
execution["_children"]
|
|
154
|
+
)
|
|
155
|
+
for c_in in c_ins:
|
|
156
|
+
links.append((execution["id"], c_in))
|
|
157
|
+
for c_out in c_outs:
|
|
158
|
+
outs.append(c_out)
|
|
159
|
+
links.extend(c_links)
|
|
160
|
+
else:
|
|
161
|
+
outs.append(execution["id"])
|
|
162
|
+
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
if execution["requirement"] == "CONDITIONAL":
|
|
166
|
+
# Conditional executions only apply to subflows - skip if no children
|
|
167
|
+
if len(execution.get("_children", [])) == 0:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
# Conditional logic creates two possible paths:
|
|
171
|
+
# 1. Subflow evaluates to True: execution is treated as required
|
|
172
|
+
# 2. Subflow evaluates to False: execution is skipped
|
|
173
|
+
|
|
174
|
+
# Make this execution an entry point if none exist
|
|
175
|
+
if len(entries) == 0:
|
|
176
|
+
entries.append(execution["id"])
|
|
177
|
+
|
|
178
|
+
# Connect all existing outputs to this conditional execution
|
|
179
|
+
for i in outs:
|
|
180
|
+
links.append((i, execution["id"]))
|
|
181
|
+
|
|
182
|
+
# Process child executions recursively
|
|
183
|
+
c_ins, c_links, c_outs = _recursive_transform_flow(execution["_children"])
|
|
184
|
+
|
|
185
|
+
# Wire this execution to child entry points
|
|
186
|
+
for c_in in c_ins:
|
|
187
|
+
links.append((execution["id"], c_in))
|
|
188
|
+
|
|
189
|
+
# Preserve both existing outputs and child outputs to model both conditional paths
|
|
190
|
+
outs.extend(c_outs)
|
|
191
|
+
|
|
192
|
+
# Add child links to the overall link collection
|
|
193
|
+
links.extend(c_links)
|
|
194
|
+
|
|
195
|
+
return entries, links, outs
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def transform(
|
|
199
|
+
exec_by_flow: dict[str, list[dict[str, Any]]], realm: str
|
|
200
|
+
) -> tuple[list[dict[str, Any]], list[dict[str, str]], list[dict[str, str]]]:
|
|
201
|
+
transformed_by_id: OrderedDict[str, dict[str, Any]] = OrderedDict()
|
|
202
|
+
initial_flow_steps: list[dict[str, str]] = []
|
|
203
|
+
flow_steps: list[dict[str, str]] = []
|
|
204
|
+
|
|
205
|
+
for flow_name, executions in exec_by_flow.items():
|
|
206
|
+
_parent_by_level: dict[int, str] = {}
|
|
207
|
+
_root_executions: list[dict[str, Any]] = []
|
|
208
|
+
|
|
209
|
+
# Transform executions to include parent flow/subflow relationships
|
|
210
|
+
# and create a hierarchical structure for graph processing
|
|
211
|
+
for execution in executions:
|
|
212
|
+
# Level 0 executions belong directly to the named flow
|
|
213
|
+
if execution["level"] == 0:
|
|
214
|
+
execution["_parent_flow"] = flow_name
|
|
215
|
+
_root_executions.append(execution)
|
|
216
|
+
else:
|
|
217
|
+
# Nested executions belong to their parent subflow
|
|
218
|
+
execution["_parent_subflow"] = _parent_by_level[execution["level"] - 1]
|
|
219
|
+
transformed_by_id[execution["_parent_subflow"]]["_children"].append(
|
|
220
|
+
execution
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Track subflow parents for the next nesting level
|
|
224
|
+
if execution.get("authenticationFlow", True):
|
|
225
|
+
_parent_by_level[execution["level"]] = execution["id"]
|
|
226
|
+
|
|
227
|
+
execution["_children"] = []
|
|
228
|
+
execution["is_terminal_step"] = False # Placeholder for terminal step flag
|
|
229
|
+
transformed_by_id[execution["id"]] = execution
|
|
230
|
+
|
|
231
|
+
# Process authentication flow structure and build execution graph
|
|
232
|
+
# Reference: https://www.keycloak.org/docs/latest/server_admin/index.html#_execution-requirements
|
|
233
|
+
entries, links, terminals = _recursive_transform_flow(_root_executions)
|
|
234
|
+
|
|
235
|
+
for entry in entries:
|
|
236
|
+
initial_flow_steps.append(
|
|
237
|
+
{
|
|
238
|
+
"flow_name": flow_name,
|
|
239
|
+
"execution_id": entry,
|
|
240
|
+
"realm": realm,
|
|
241
|
+
}
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
for link in links:
|
|
245
|
+
flow_steps.append(
|
|
246
|
+
{
|
|
247
|
+
"source": link[0],
|
|
248
|
+
"target": link[1],
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
for node_id in terminals:
|
|
253
|
+
transformed_by_id[node_id]["is_terminal_step"] = True
|
|
254
|
+
|
|
255
|
+
return list(transformed_by_id.values()), flow_steps, initial_flow_steps
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@timeit
|
|
259
|
+
def load_authenticationexecutions(
|
|
260
|
+
neo4j_session: neo4j.Session,
|
|
261
|
+
data: list[dict[str, Any]],
|
|
262
|
+
realm: str,
|
|
263
|
+
update_tag: int,
|
|
264
|
+
) -> None:
|
|
265
|
+
logger.info(
|
|
266
|
+
"Loading %d Keycloak AuthenticationExecutions (%s) into Neo4j.",
|
|
267
|
+
len(data),
|
|
268
|
+
realm,
|
|
269
|
+
)
|
|
270
|
+
load(
|
|
271
|
+
neo4j_session,
|
|
272
|
+
KeycloakAuthenticationExecutionSchema(),
|
|
273
|
+
data,
|
|
274
|
+
LASTUPDATED=update_tag,
|
|
275
|
+
REALM=realm,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def load_execution_flow(
|
|
280
|
+
neo4j_session: neo4j.Session,
|
|
281
|
+
flow_steps: list[dict[str, Any]],
|
|
282
|
+
initial_flow_steps: list[dict[str, str]],
|
|
283
|
+
realm_id: str,
|
|
284
|
+
update_tag: int,
|
|
285
|
+
) -> None:
|
|
286
|
+
load_matchlinks(
|
|
287
|
+
neo4j_session,
|
|
288
|
+
ExecutionToExecutionMatchLink(),
|
|
289
|
+
flow_steps,
|
|
290
|
+
LASTUPDATED=update_tag,
|
|
291
|
+
_sub_resource_label="KeycloakRealm",
|
|
292
|
+
_sub_resource_id=realm_id,
|
|
293
|
+
)
|
|
294
|
+
load_matchlinks(
|
|
295
|
+
neo4j_session,
|
|
296
|
+
ExecutionToFlowMatchLink(),
|
|
297
|
+
initial_flow_steps,
|
|
298
|
+
LASTUPDATED=update_tag,
|
|
299
|
+
_sub_resource_label="KeycloakRealm",
|
|
300
|
+
_sub_resource_id=realm_id,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
@timeit
|
|
305
|
+
def cleanup(
|
|
306
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
307
|
+
) -> None:
|
|
308
|
+
GraphJob.from_node_schema(
|
|
309
|
+
KeycloakAuthenticationExecutionSchema(), common_job_parameters
|
|
310
|
+
).run(neo4j_session)
|
|
311
|
+
GraphJob.from_matchlink(
|
|
312
|
+
ExecutionToExecutionMatchLink(),
|
|
313
|
+
"KeycloakRealm",
|
|
314
|
+
common_job_parameters["REALM_ID"],
|
|
315
|
+
common_job_parameters["UPDATE_TAG"],
|
|
316
|
+
).run(neo4j_session)
|
|
317
|
+
GraphJob.from_matchlink(
|
|
318
|
+
ExecutionToFlowMatchLink(),
|
|
319
|
+
"KeycloakRealm",
|
|
320
|
+
common_job_parameters["REALM_ID"],
|
|
321
|
+
common_job_parameters["UPDATE_TAG"],
|
|
322
|
+
).run(neo4j_session)
|