cartography 0.113.0__py3-none-any.whl → 0.114.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +8 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -27
- cartography/intel/aws/iam.py +741 -492
- cartography/intel/aws/organizations.py +7 -8
- cartography/intel/aws/permission_relationships.py +4 -16
- cartography/intel/azure/__init__.py +16 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/entra/__init__.py +31 -0
- cartography/intel/entra/app_role_assignments.py +277 -0
- cartography/intel/entra/applications.py +4 -238
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/gcp/__init__.py +136 -440
- cartography/intel/gcp/clients.py +65 -0
- cartography/intel/gcp/compute.py +18 -44
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +108 -0
- cartography/intel/gcp/crm/orgs.py +65 -0
- cartography/intel/gcp/crm/projects.py +109 -0
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/github/__init__.py +41 -0
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +73 -39
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +26 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +54 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/github/commits.py +63 -0
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/RECORD +58 -32
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/intel/gcp/crm.py +0 -355
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@ import boto3
|
|
|
5
5
|
import botocore.exceptions
|
|
6
6
|
import neo4j
|
|
7
7
|
|
|
8
|
+
from cartography.intel.aws.iam import sync_root_principal
|
|
8
9
|
from cartography.util import timeit
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
@@ -110,14 +111,6 @@ def load_aws_accounts(
|
|
|
110
111
|
ON CREATE SET aa.firstseen = timestamp()
|
|
111
112
|
SET aa.lastupdated = $aws_update_tag, aa.name = $ACCOUNT_NAME, aa.inscope=true
|
|
112
113
|
REMOVE aa.foreign
|
|
113
|
-
WITH aa
|
|
114
|
-
MERGE (root:AWSPrincipal{arn: $RootArn})
|
|
115
|
-
ON CREATE SET root.firstseen = timestamp(), root.type = 'AWS'
|
|
116
|
-
SET root.lastupdated = $aws_update_tag
|
|
117
|
-
WITH aa, root
|
|
118
|
-
MERGE (aa)-[r:RESOURCE]->(root)
|
|
119
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
120
|
-
SET r.lastupdated = $aws_update_tag;
|
|
121
114
|
"""
|
|
122
115
|
for account_name, account_id in aws_accounts.items():
|
|
123
116
|
root_arn = f"arn:aws:iam::{account_id}:root"
|
|
@@ -128,6 +121,12 @@ def load_aws_accounts(
|
|
|
128
121
|
RootArn=root_arn,
|
|
129
122
|
aws_update_tag=aws_update_tag,
|
|
130
123
|
)
|
|
124
|
+
# Every AWS account has a root principal
|
|
125
|
+
sync_root_principal(
|
|
126
|
+
neo4j_session,
|
|
127
|
+
account_id,
|
|
128
|
+
aws_update_tag,
|
|
129
|
+
)
|
|
131
130
|
|
|
132
131
|
|
|
133
132
|
@timeit
|
|
@@ -12,6 +12,7 @@ import boto3
|
|
|
12
12
|
import neo4j
|
|
13
13
|
import yaml
|
|
14
14
|
|
|
15
|
+
from cartography.client.core.tx import read_list_of_dicts_tx
|
|
15
16
|
from cartography.graph.statement import GraphStatement
|
|
16
17
|
from cartography.util import timeit
|
|
17
18
|
|
|
@@ -210,18 +211,6 @@ def calculate_permission_relationships(
|
|
|
210
211
|
return allowed_mappings
|
|
211
212
|
|
|
212
213
|
|
|
213
|
-
def parse_statement_node(node_group: List[Any]) -> List[Any]:
|
|
214
|
-
"""Parse a dict from group of Neo4J node
|
|
215
|
-
|
|
216
|
-
Arguments:
|
|
217
|
-
node_group {[Neo4j.Node]} -- the node to parse
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
[list] -- A list of statements from the node
|
|
221
|
-
"""
|
|
222
|
-
return [n._properties for n in node_group]
|
|
223
|
-
|
|
224
|
-
|
|
225
214
|
def compile_regex(item: str) -> Pattern:
|
|
226
215
|
r"""Compile a clause into a regex. Clause checking in AWS is case insensitive
|
|
227
216
|
The following regex symbols will be replaced to make AWS * and ? matching a regex
|
|
@@ -280,7 +269,8 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
|
|
|
280
269
|
RETURN
|
|
281
270
|
DISTINCT principal.arn as principal_arn, policy.id as policy_id, collect(statements) as statements
|
|
282
271
|
"""
|
|
283
|
-
results = neo4j_session.
|
|
272
|
+
results = neo4j_session.execute_read(
|
|
273
|
+
read_list_of_dicts_tx,
|
|
284
274
|
get_policy_query,
|
|
285
275
|
AccountId=account_id,
|
|
286
276
|
)
|
|
@@ -291,9 +281,7 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
|
|
|
291
281
|
statements = r["statements"]
|
|
292
282
|
if principal_arn not in principals:
|
|
293
283
|
principals[principal_arn] = {}
|
|
294
|
-
principals[principal_arn][policy_id] = compile_statement(
|
|
295
|
-
parse_statement_node(statements),
|
|
296
|
-
)
|
|
284
|
+
principals[principal_arn][policy_id] = compile_statement(statements)
|
|
297
285
|
return principals
|
|
298
286
|
|
|
299
287
|
|
|
@@ -7,8 +7,10 @@ import neo4j
|
|
|
7
7
|
from cartography.config import Config
|
|
8
8
|
from cartography.util import timeit
|
|
9
9
|
|
|
10
|
+
from . import app_service
|
|
10
11
|
from . import compute
|
|
11
12
|
from . import cosmosdb
|
|
13
|
+
from . import functions
|
|
12
14
|
from . import sql
|
|
13
15
|
from . import storage
|
|
14
16
|
from . import subscription
|
|
@@ -40,6 +42,20 @@ def _sync_one_subscription(
|
|
|
40
42
|
update_tag,
|
|
41
43
|
common_job_parameters,
|
|
42
44
|
)
|
|
45
|
+
app_service.sync(
|
|
46
|
+
neo4j_session,
|
|
47
|
+
credentials,
|
|
48
|
+
subscription_id,
|
|
49
|
+
update_tag,
|
|
50
|
+
common_job_parameters,
|
|
51
|
+
)
|
|
52
|
+
functions.sync(
|
|
53
|
+
neo4j_session,
|
|
54
|
+
credentials,
|
|
55
|
+
subscription_id,
|
|
56
|
+
update_tag,
|
|
57
|
+
common_job_parameters,
|
|
58
|
+
)
|
|
43
59
|
sql.sync(
|
|
44
60
|
neo4j_session,
|
|
45
61
|
credentials.credential,
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import neo4j
|
|
7
|
+
from azure.core.exceptions import ClientAuthenticationError
|
|
8
|
+
from azure.core.exceptions import HttpResponseError
|
|
9
|
+
from azure.mgmt.web import WebSiteManagementClient
|
|
10
|
+
|
|
11
|
+
from cartography.client.core.tx import load
|
|
12
|
+
from cartography.graph.job import GraphJob
|
|
13
|
+
from cartography.models.azure.app_service import AzureAppServiceSchema
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
from .util.credentials import Credentials
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@timeit
|
|
22
|
+
def get_app_services(credentials: Credentials, subscription_id: str) -> List[Dict]:
|
|
23
|
+
"""
|
|
24
|
+
Get a list of App Services from the given Azure subscription.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
client = WebSiteManagementClient(credentials.credential, subscription_id)
|
|
28
|
+
# NOTE: This is the same API call as Functions. We get all web apps
|
|
29
|
+
# and then filter them in the transform stage.
|
|
30
|
+
return [app.as_dict() for app in client.web_apps.list()]
|
|
31
|
+
except (ClientAuthenticationError, HttpResponseError) as e:
|
|
32
|
+
logger.warning(
|
|
33
|
+
f"Failed to get app services for subscription {subscription_id}: {str(e)}"
|
|
34
|
+
)
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@timeit
|
|
39
|
+
def transform_app_services(app_services_response: List[Dict]) -> List[Dict]:
|
|
40
|
+
"""
|
|
41
|
+
Transform the raw API response to the dictionary structure that the model expects.
|
|
42
|
+
"""
|
|
43
|
+
transformed_apps: List[Dict[str, Any]] = []
|
|
44
|
+
for app in app_services_response:
|
|
45
|
+
if "functionapp" not in app.get("kind", ""):
|
|
46
|
+
transformed_app = {
|
|
47
|
+
"id": app.get("id"),
|
|
48
|
+
"name": app.get("name"),
|
|
49
|
+
"kind": app.get("kind"),
|
|
50
|
+
"location": app.get("location"),
|
|
51
|
+
"state": app.get("state"),
|
|
52
|
+
"default_host_name": app.get("default_host_name"),
|
|
53
|
+
"https_only": app.get("https_only"),
|
|
54
|
+
}
|
|
55
|
+
transformed_apps.append(transformed_app)
|
|
56
|
+
return transformed_apps
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@timeit
|
|
60
|
+
def load_app_services(
|
|
61
|
+
neo4j_session: neo4j.Session,
|
|
62
|
+
data: List[Dict[str, Any]],
|
|
63
|
+
subscription_id: str,
|
|
64
|
+
update_tag: int,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Load the transformed Azure App Service data to Neo4j.
|
|
68
|
+
"""
|
|
69
|
+
load(
|
|
70
|
+
neo4j_session,
|
|
71
|
+
AzureAppServiceSchema(),
|
|
72
|
+
data,
|
|
73
|
+
lastupdated=update_tag,
|
|
74
|
+
AZURE_SUBSCRIPTION_ID=subscription_id,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@timeit
|
|
79
|
+
def cleanup_app_services(
|
|
80
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict
|
|
81
|
+
) -> None:
|
|
82
|
+
"""
|
|
83
|
+
Run the cleanup job for Azure App Services.
|
|
84
|
+
"""
|
|
85
|
+
GraphJob.from_node_schema(AzureAppServiceSchema(), common_job_parameters).run(
|
|
86
|
+
neo4j_session
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@timeit
|
|
91
|
+
def sync(
|
|
92
|
+
neo4j_session: neo4j.Session,
|
|
93
|
+
credentials: Credentials,
|
|
94
|
+
subscription_id: str,
|
|
95
|
+
update_tag: int,
|
|
96
|
+
common_job_parameters: Dict,
|
|
97
|
+
) -> None:
|
|
98
|
+
"""
|
|
99
|
+
The main sync function for Azure App Services.
|
|
100
|
+
"""
|
|
101
|
+
logger.info(f"Syncing Azure App Services for subscription {subscription_id}.")
|
|
102
|
+
raw_apps = get_app_services(credentials, subscription_id)
|
|
103
|
+
transformed_apps = transform_app_services(raw_apps)
|
|
104
|
+
load_app_services(neo4j_session, transformed_apps, subscription_id, update_tag)
|
|
105
|
+
cleanup_app_services(neo4j_session, common_job_parameters)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import neo4j
|
|
7
|
+
from azure.core.exceptions import ClientAuthenticationError
|
|
8
|
+
from azure.core.exceptions import HttpResponseError
|
|
9
|
+
from azure.mgmt.web import WebSiteManagementClient
|
|
10
|
+
|
|
11
|
+
from cartography.client.core.tx import load
|
|
12
|
+
from cartography.graph.job import GraphJob
|
|
13
|
+
from cartography.models.azure.function_app import AzureFunctionAppSchema
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
from .util.credentials import Credentials
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@timeit
|
|
22
|
+
def get_function_apps(credentials: Credentials, subscription_id: str) -> List[Dict]:
|
|
23
|
+
"""
|
|
24
|
+
Get a list of Function Apps from the given Azure subscription.
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
client = WebSiteManagementClient(credentials.credential, subscription_id)
|
|
28
|
+
# Note: Function Apps are a type of Web App, so we list all web apps
|
|
29
|
+
# and then filter them in the transform stage.
|
|
30
|
+
return [app.as_dict() for app in client.web_apps.list()]
|
|
31
|
+
|
|
32
|
+
except ClientAuthenticationError as e:
|
|
33
|
+
logger.warning(
|
|
34
|
+
(
|
|
35
|
+
"Failed to authenticate to get function apps for subscription '%s'. "
|
|
36
|
+
"Please check your credentials. Error: %s"
|
|
37
|
+
),
|
|
38
|
+
subscription_id,
|
|
39
|
+
e,
|
|
40
|
+
)
|
|
41
|
+
return []
|
|
42
|
+
|
|
43
|
+
except HttpResponseError as e:
|
|
44
|
+
logger.warning(
|
|
45
|
+
(
|
|
46
|
+
"Failed to get function apps for subscription '%s' due to an API error. "
|
|
47
|
+
"Status code: %s. Message: %s"
|
|
48
|
+
),
|
|
49
|
+
subscription_id,
|
|
50
|
+
e.status_code,
|
|
51
|
+
str(e),
|
|
52
|
+
)
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@timeit
|
|
57
|
+
def transform_function_apps(function_apps_response: List[Dict]) -> List[Dict]:
|
|
58
|
+
"""
|
|
59
|
+
Transform the raw API response to the dictionary structure that the model expects.
|
|
60
|
+
"""
|
|
61
|
+
transformed_apps: List[Dict[str, Any]] = []
|
|
62
|
+
for app in function_apps_response:
|
|
63
|
+
# We only want to ingest resources that are explicitly function apps.
|
|
64
|
+
if "functionapp" in app.get("kind", ""):
|
|
65
|
+
transformed_app = {
|
|
66
|
+
"id": app.get("id"),
|
|
67
|
+
"name": app.get("name"),
|
|
68
|
+
"kind": app.get("kind"),
|
|
69
|
+
"location": app.get("location"),
|
|
70
|
+
"state": app.get("state"),
|
|
71
|
+
"default_host_name": app.get("default_host_name"),
|
|
72
|
+
"https_only": app.get("https_only"),
|
|
73
|
+
}
|
|
74
|
+
transformed_apps.append(transformed_app)
|
|
75
|
+
return transformed_apps
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@timeit
|
|
79
|
+
def load_function_apps(
|
|
80
|
+
neo4j_session: neo4j.Session,
|
|
81
|
+
data: List[Dict[str, Any]],
|
|
82
|
+
subscription_id: str,
|
|
83
|
+
update_tag: int,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Load the transformed Azure Function App data to Neo4j.
|
|
87
|
+
"""
|
|
88
|
+
load(
|
|
89
|
+
neo4j_session,
|
|
90
|
+
AzureFunctionAppSchema(),
|
|
91
|
+
data,
|
|
92
|
+
lastupdated=update_tag,
|
|
93
|
+
AZURE_SUBSCRIPTION_ID=subscription_id,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@timeit
|
|
98
|
+
def cleanup_function_apps(
|
|
99
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict
|
|
100
|
+
) -> None:
|
|
101
|
+
"""
|
|
102
|
+
Run the cleanup job for Azure Function Apps.
|
|
103
|
+
"""
|
|
104
|
+
GraphJob.from_node_schema(AzureFunctionAppSchema(), common_job_parameters).run(
|
|
105
|
+
neo4j_session
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@timeit
|
|
110
|
+
def sync(
|
|
111
|
+
neo4j_session: neo4j.Session,
|
|
112
|
+
credentials: Credentials,
|
|
113
|
+
subscription_id: str,
|
|
114
|
+
update_tag: int,
|
|
115
|
+
common_job_parameters: Dict,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""
|
|
118
|
+
The main sync function for Azure Function Apps.
|
|
119
|
+
"""
|
|
120
|
+
logger.info(f"Syncing Azure Function Apps for subscription {subscription_id}.")
|
|
121
|
+
raw_apps = get_function_apps(credentials, subscription_id)
|
|
122
|
+
transformed_apps = transform_function_apps(raw_apps)
|
|
123
|
+
load_function_apps(neo4j_session, transformed_apps, subscription_id, update_tag)
|
|
124
|
+
cleanup_function_apps(neo4j_session, common_job_parameters)
|
|
@@ -6,9 +6,12 @@ from azure.identity import ClientSecretCredential
|
|
|
6
6
|
from msgraph import GraphServiceClient
|
|
7
7
|
|
|
8
8
|
from cartography.config import Config
|
|
9
|
+
from cartography.intel.entra.app_role_assignments import sync_app_role_assignments
|
|
9
10
|
from cartography.intel.entra.applications import sync_entra_applications
|
|
11
|
+
from cartography.intel.entra.federation.aws_identity_center import sync_entra_federation
|
|
10
12
|
from cartography.intel.entra.groups import sync_entra_groups
|
|
11
13
|
from cartography.intel.entra.ou import sync_entra_ous
|
|
14
|
+
from cartography.intel.entra.service_principals import sync_service_principals
|
|
12
15
|
from cartography.intel.entra.users import get_tenant
|
|
13
16
|
from cartography.intel.entra.users import load_tenant
|
|
14
17
|
from cartography.intel.entra.users import sync_entra_users
|
|
@@ -125,5 +128,33 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
|
125
128
|
common_job_parameters,
|
|
126
129
|
)
|
|
127
130
|
|
|
131
|
+
# Run service principals sync
|
|
132
|
+
await sync_service_principals(
|
|
133
|
+
neo4j_session,
|
|
134
|
+
config.entra_tenant_id,
|
|
135
|
+
config.entra_client_id,
|
|
136
|
+
config.entra_client_secret,
|
|
137
|
+
config.update_tag,
|
|
138
|
+
common_job_parameters,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
# Run app role assignments sync
|
|
142
|
+
await sync_app_role_assignments(
|
|
143
|
+
neo4j_session,
|
|
144
|
+
config.entra_tenant_id,
|
|
145
|
+
config.entra_client_id,
|
|
146
|
+
config.entra_client_secret,
|
|
147
|
+
config.update_tag,
|
|
148
|
+
common_job_parameters,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Run federation sync (after all resources are synced)
|
|
152
|
+
await sync_entra_federation(
|
|
153
|
+
neo4j_session,
|
|
154
|
+
config.update_tag,
|
|
155
|
+
config.entra_tenant_id,
|
|
156
|
+
common_job_parameters,
|
|
157
|
+
)
|
|
158
|
+
|
|
128
159
|
# Execute syncs in sequence
|
|
129
160
|
asyncio.run(main())
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import AsyncGenerator
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
from azure.identity import ClientSecretCredential
|
|
7
|
+
from msgraph import GraphServiceClient
|
|
8
|
+
from msgraph.generated.models.app_role_assignment_collection_response import (
|
|
9
|
+
AppRoleAssignmentCollectionResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.client.core.tx import read_list_of_values_tx
|
|
14
|
+
from cartography.client.core.tx import read_single_value_tx
|
|
15
|
+
from cartography.graph.job import GraphJob
|
|
16
|
+
from cartography.intel.entra.applications import APP_ROLE_ASSIGNMENTS_PAGE_SIZE
|
|
17
|
+
from cartography.intel.entra.applications import logger
|
|
18
|
+
from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
|
|
19
|
+
from cartography.util import timeit
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@timeit
|
|
23
|
+
async def get_app_role_assignments_for_app(
|
|
24
|
+
client: GraphServiceClient, neo4j_session: neo4j.Session, app_id: str
|
|
25
|
+
) -> AsyncGenerator[dict[str, Any], None]:
|
|
26
|
+
"""
|
|
27
|
+
Gets app role assignments for a single application by querying the graph for service principal ID.
|
|
28
|
+
|
|
29
|
+
:param client: GraphServiceClient
|
|
30
|
+
:param neo4j_session: Neo4j session for querying service principal
|
|
31
|
+
:param app_id: Application ID
|
|
32
|
+
:return: Generator of app role assignment data as dicts
|
|
33
|
+
"""
|
|
34
|
+
logger.info(f"Fetching role assignments for application: {app_id}")
|
|
35
|
+
|
|
36
|
+
# Query the graph to get the service principal ID for this application
|
|
37
|
+
query = """
|
|
38
|
+
MATCH (sp:EntraServicePrincipal {app_id: $app_id})
|
|
39
|
+
RETURN sp.id as service_principal_id
|
|
40
|
+
"""
|
|
41
|
+
service_principal_id = neo4j_session.execute_read(
|
|
42
|
+
read_single_value_tx, query, app_id=app_id
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if not service_principal_id:
|
|
46
|
+
logger.warning(
|
|
47
|
+
f"No service principal found in graph for application {app_id}. Continuing."
|
|
48
|
+
)
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
# Get assignments for this service principal with pagination and limits
|
|
52
|
+
# Use maximum page size (999) to get more data per request
|
|
53
|
+
# Memory is managed through streaming and batching, not page size
|
|
54
|
+
request_config = client.service_principals.by_service_principal_id(
|
|
55
|
+
service_principal_id
|
|
56
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
|
|
57
|
+
query_parameters=client.service_principals.by_service_principal_id(
|
|
58
|
+
service_principal_id
|
|
59
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
|
|
60
|
+
top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
assignments_page: AppRoleAssignmentCollectionResponse | None = (
|
|
65
|
+
await client.service_principals.by_service_principal_id(
|
|
66
|
+
service_principal_id
|
|
67
|
+
).app_role_assigned_to.get(request_configuration=request_config)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assignment_count = 0
|
|
71
|
+
page_count = 0
|
|
72
|
+
|
|
73
|
+
while assignments_page:
|
|
74
|
+
page_count += 1
|
|
75
|
+
|
|
76
|
+
if assignments_page.value:
|
|
77
|
+
page_valid_count = 0
|
|
78
|
+
page_skipped_count = 0
|
|
79
|
+
|
|
80
|
+
# Process assignments and immediately yield to avoid accumulation
|
|
81
|
+
for assignment in assignments_page.value:
|
|
82
|
+
# Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
|
|
83
|
+
if assignment.principal_id:
|
|
84
|
+
assignment_count += 1
|
|
85
|
+
page_valid_count += 1
|
|
86
|
+
yield {
|
|
87
|
+
"id": assignment.id,
|
|
88
|
+
"app_role_id": assignment.app_role_id,
|
|
89
|
+
"created_date_time": assignment.created_date_time,
|
|
90
|
+
"principal_id": assignment.principal_id,
|
|
91
|
+
"principal_display_name": assignment.principal_display_name,
|
|
92
|
+
"principal_type": assignment.principal_type,
|
|
93
|
+
"resource_display_name": assignment.resource_display_name,
|
|
94
|
+
"resource_id": assignment.resource_id,
|
|
95
|
+
"application_app_id": app_id,
|
|
96
|
+
}
|
|
97
|
+
else:
|
|
98
|
+
page_skipped_count += 1
|
|
99
|
+
|
|
100
|
+
# Log page results with details about skipped objects
|
|
101
|
+
if page_skipped_count > 0:
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"Page {page_count} for {app_id}: {page_valid_count} valid assignments, "
|
|
104
|
+
f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
logger.debug(
|
|
108
|
+
f"Page {page_count} for {app_id}: {page_valid_count} assignments. "
|
|
109
|
+
f"Total: {assignment_count}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Force garbage collection after each page
|
|
113
|
+
gc.collect()
|
|
114
|
+
|
|
115
|
+
# Check if we have more pages to fetch
|
|
116
|
+
if not assignments_page.odata_next_link:
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# Clear previous page before fetching next
|
|
120
|
+
assignments_page.value = None
|
|
121
|
+
|
|
122
|
+
# Fetch next page
|
|
123
|
+
logger.debug(f"Fetching page {page_count + 1} of assignments for {app_id}")
|
|
124
|
+
next_page_url = assignments_page.odata_next_link
|
|
125
|
+
assignments_page = await client.service_principals.with_url(next_page_url).get()
|
|
126
|
+
|
|
127
|
+
logger.info(
|
|
128
|
+
f"Successfully retrieved {assignment_count} assignments for application {app_id} (pages: {page_count})"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def transform_app_role_assignments(
|
|
133
|
+
assignments: list[dict[str, Any]],
|
|
134
|
+
) -> list[dict[str, Any]]:
|
|
135
|
+
"""
|
|
136
|
+
Transform app role assignment data for graph loading.
|
|
137
|
+
|
|
138
|
+
:param assignments: Raw app role assignment data as dicts
|
|
139
|
+
:return: Transformed assignment data for graph loading
|
|
140
|
+
"""
|
|
141
|
+
transformed = []
|
|
142
|
+
for assign in assignments:
|
|
143
|
+
transformed.append(
|
|
144
|
+
{
|
|
145
|
+
"id": assign["id"],
|
|
146
|
+
"app_role_id": (
|
|
147
|
+
str(assign["app_role_id"]) if assign["app_role_id"] else None
|
|
148
|
+
),
|
|
149
|
+
"created_date_time": assign["created_date_time"],
|
|
150
|
+
"principal_id": (
|
|
151
|
+
str(assign["principal_id"]) if assign["principal_id"] else None
|
|
152
|
+
),
|
|
153
|
+
"principal_display_name": assign["principal_display_name"],
|
|
154
|
+
"principal_type": assign["principal_type"],
|
|
155
|
+
"resource_display_name": assign["resource_display_name"],
|
|
156
|
+
"resource_id": (
|
|
157
|
+
str(assign["resource_id"]) if assign["resource_id"] else None
|
|
158
|
+
),
|
|
159
|
+
"application_app_id": assign["application_app_id"],
|
|
160
|
+
}
|
|
161
|
+
)
|
|
162
|
+
return transformed
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@timeit
|
|
166
|
+
def load_app_role_assignments(
|
|
167
|
+
neo4j_session: neo4j.Session,
|
|
168
|
+
assignments_data: list[dict[str, Any]],
|
|
169
|
+
update_tag: int,
|
|
170
|
+
tenant_id: str,
|
|
171
|
+
) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Load Entra app role assignments to the graph.
|
|
174
|
+
|
|
175
|
+
:param neo4j_session: Neo4j session
|
|
176
|
+
:param assignments_data: Assignment data to load
|
|
177
|
+
:param update_tag: Update tag for tracking data freshness
|
|
178
|
+
:param tenant_id: Entra tenant ID
|
|
179
|
+
"""
|
|
180
|
+
load(
|
|
181
|
+
neo4j_session,
|
|
182
|
+
EntraAppRoleAssignmentSchema(),
|
|
183
|
+
assignments_data,
|
|
184
|
+
lastupdated=update_tag,
|
|
185
|
+
TENANT_ID=tenant_id,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@timeit
|
|
190
|
+
def cleanup_app_role_assignments(
|
|
191
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
192
|
+
) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
|
|
195
|
+
|
|
196
|
+
:param neo4j_session: Neo4j session
|
|
197
|
+
:param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
|
|
198
|
+
"""
|
|
199
|
+
GraphJob.from_node_schema(
|
|
200
|
+
EntraAppRoleAssignmentSchema(), common_job_parameters
|
|
201
|
+
).run(neo4j_session)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@timeit
|
|
205
|
+
async def sync_app_role_assignments(
|
|
206
|
+
neo4j_session: neo4j.Session,
|
|
207
|
+
tenant_id: str,
|
|
208
|
+
client_id: str,
|
|
209
|
+
client_secret: str,
|
|
210
|
+
update_tag: int,
|
|
211
|
+
common_job_parameters: dict[str, Any],
|
|
212
|
+
) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Sync Entra app role assignments to the graph.
|
|
215
|
+
|
|
216
|
+
:param neo4j_session: Neo4j session
|
|
217
|
+
:param tenant_id: Entra tenant ID
|
|
218
|
+
:param client_id: Azure application client ID
|
|
219
|
+
:param client_secret: Azure application client secret
|
|
220
|
+
:param update_tag: Update tag for tracking data freshness
|
|
221
|
+
:param common_job_parameters: Common job parameters for cleanup
|
|
222
|
+
"""
|
|
223
|
+
# Create credentials and client
|
|
224
|
+
credential = ClientSecretCredential(
|
|
225
|
+
tenant_id=tenant_id,
|
|
226
|
+
client_id=client_id,
|
|
227
|
+
client_secret=client_secret,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
client = GraphServiceClient(
|
|
231
|
+
credential,
|
|
232
|
+
scopes=["https://graph.microsoft.com/.default"],
|
|
233
|
+
)
|
|
234
|
+
assignment_batch_size = 200 # Batch size for assignments
|
|
235
|
+
assignments_batch = []
|
|
236
|
+
total_assignment_count = 0
|
|
237
|
+
|
|
238
|
+
# Get app_ids from graph instead of streaming from API again
|
|
239
|
+
query = "MATCH (app:EntraApplication) RETURN app.app_id"
|
|
240
|
+
app_ids = neo4j_session.execute_read(read_list_of_values_tx, query)
|
|
241
|
+
|
|
242
|
+
for app_id in app_ids:
|
|
243
|
+
# Stream app role assignments (now using graph query for service principal ID)
|
|
244
|
+
async for assignment in get_app_role_assignments_for_app(
|
|
245
|
+
client, neo4j_session, app_id
|
|
246
|
+
):
|
|
247
|
+
assignments_batch.append(assignment)
|
|
248
|
+
total_assignment_count += 1
|
|
249
|
+
|
|
250
|
+
# Transform and load assignments in batches
|
|
251
|
+
if len(assignments_batch) >= assignment_batch_size:
|
|
252
|
+
transformed_assignments = transform_app_role_assignments(
|
|
253
|
+
assignments_batch
|
|
254
|
+
)
|
|
255
|
+
load_app_role_assignments(
|
|
256
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
257
|
+
)
|
|
258
|
+
logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
|
|
259
|
+
assignments_batch.clear()
|
|
260
|
+
transformed_assignments.clear()
|
|
261
|
+
|
|
262
|
+
# Force garbage collection after batch load
|
|
263
|
+
gc.collect()
|
|
264
|
+
|
|
265
|
+
# Process remaining assignments
|
|
266
|
+
if assignments_batch:
|
|
267
|
+
transformed_assignments = transform_app_role_assignments(assignments_batch)
|
|
268
|
+
load_app_role_assignments(
|
|
269
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
270
|
+
)
|
|
271
|
+
assignments_batch.clear()
|
|
272
|
+
transformed_assignments.clear()
|
|
273
|
+
|
|
274
|
+
cleanup_app_role_assignments(neo4j_session, common_job_parameters)
|
|
275
|
+
logger.info(f"Completed syncing {total_assignment_count} app role assignments")
|
|
276
|
+
# Final garbage collection
|
|
277
|
+
gc.collect()
|