cartography 0.113.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (69) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -27
  5. cartography/intel/aws/iam.py +741 -492
  6. cartography/intel/aws/organizations.py +7 -8
  7. cartography/intel/aws/permission_relationships.py +4 -16
  8. cartography/intel/azure/__init__.py +16 -0
  9. cartography/intel/azure/app_service.py +105 -0
  10. cartography/intel/azure/functions.py +124 -0
  11. cartography/intel/entra/__init__.py +31 -0
  12. cartography/intel/entra/app_role_assignments.py +277 -0
  13. cartography/intel/entra/applications.py +4 -238
  14. cartography/intel/entra/federation/__init__.py +0 -0
  15. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  16. cartography/intel/entra/service_principals.py +217 -0
  17. cartography/intel/gcp/__init__.py +136 -440
  18. cartography/intel/gcp/clients.py +65 -0
  19. cartography/intel/gcp/compute.py +18 -44
  20. cartography/intel/gcp/crm/__init__.py +0 -0
  21. cartography/intel/gcp/crm/folders.py +108 -0
  22. cartography/intel/gcp/crm/orgs.py +65 -0
  23. cartography/intel/gcp/crm/projects.py +109 -0
  24. cartography/intel/gcp/gke.py +72 -113
  25. cartography/intel/github/__init__.py +41 -0
  26. cartography/intel/github/commits.py +423 -0
  27. cartography/intel/github/repos.py +73 -39
  28. cartography/models/aws/iam/access_key.py +103 -0
  29. cartography/models/aws/iam/account_role.py +24 -0
  30. cartography/models/aws/iam/federated_principal.py +60 -0
  31. cartography/models/aws/iam/group.py +60 -0
  32. cartography/models/aws/iam/group_membership.py +26 -0
  33. cartography/models/aws/iam/inline_policy.py +78 -0
  34. cartography/models/aws/iam/managed_policy.py +51 -0
  35. cartography/models/aws/iam/policy_statement.py +57 -0
  36. cartography/models/aws/iam/role.py +83 -0
  37. cartography/models/aws/iam/root_principal.py +52 -0
  38. cartography/models/aws/iam/service_principal.py +30 -0
  39. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  40. cartography/models/aws/iam/user.py +54 -0
  41. cartography/models/azure/__init__.py +0 -0
  42. cartography/models/azure/app_service.py +59 -0
  43. cartography/models/azure/function_app.py +59 -0
  44. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  45. cartography/models/entra/service_principal.py +104 -0
  46. cartography/models/gcp/compute/subnet.py +74 -0
  47. cartography/models/gcp/crm/__init__.py +0 -0
  48. cartography/models/gcp/crm/folders.py +98 -0
  49. cartography/models/gcp/crm/organizations.py +21 -0
  50. cartography/models/gcp/crm/projects.py +100 -0
  51. cartography/models/gcp/gke.py +69 -0
  52. cartography/models/github/commits.py +63 -0
  53. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  54. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/RECORD +58 -32
  55. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  56. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  57. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  58. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  59. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  60. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  61. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  62. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  63. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  64. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  65. cartography/intel/gcp/crm.py +0 -355
  66. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  67. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  68. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  69. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ import boto3
5
5
  import botocore.exceptions
6
6
  import neo4j
7
7
 
8
+ from cartography.intel.aws.iam import sync_root_principal
8
9
  from cartography.util import timeit
9
10
 
10
11
  logger = logging.getLogger(__name__)
@@ -110,14 +111,6 @@ def load_aws_accounts(
110
111
  ON CREATE SET aa.firstseen = timestamp()
111
112
  SET aa.lastupdated = $aws_update_tag, aa.name = $ACCOUNT_NAME, aa.inscope=true
112
113
  REMOVE aa.foreign
113
- WITH aa
114
- MERGE (root:AWSPrincipal{arn: $RootArn})
115
- ON CREATE SET root.firstseen = timestamp(), root.type = 'AWS'
116
- SET root.lastupdated = $aws_update_tag
117
- WITH aa, root
118
- MERGE (aa)-[r:RESOURCE]->(root)
119
- ON CREATE SET r.firstseen = timestamp()
120
- SET r.lastupdated = $aws_update_tag;
121
114
  """
122
115
  for account_name, account_id in aws_accounts.items():
123
116
  root_arn = f"arn:aws:iam::{account_id}:root"
@@ -128,6 +121,12 @@ def load_aws_accounts(
128
121
  RootArn=root_arn,
129
122
  aws_update_tag=aws_update_tag,
130
123
  )
124
+ # Every AWS account has a root principal
125
+ sync_root_principal(
126
+ neo4j_session,
127
+ account_id,
128
+ aws_update_tag,
129
+ )
131
130
 
132
131
 
133
132
  @timeit
@@ -12,6 +12,7 @@ import boto3
12
12
  import neo4j
13
13
  import yaml
14
14
 
15
+ from cartography.client.core.tx import read_list_of_dicts_tx
15
16
  from cartography.graph.statement import GraphStatement
16
17
  from cartography.util import timeit
17
18
 
@@ -210,18 +211,6 @@ def calculate_permission_relationships(
210
211
  return allowed_mappings
211
212
 
212
213
 
213
- def parse_statement_node(node_group: List[Any]) -> List[Any]:
214
- """Parse a dict from group of Neo4J node
215
-
216
- Arguments:
217
- node_group {[Neo4j.Node]} -- the node to parse
218
-
219
- Returns:
220
- [list] -- A list of statements from the node
221
- """
222
- return [n._properties for n in node_group]
223
-
224
-
225
214
  def compile_regex(item: str) -> Pattern:
226
215
  r"""Compile a clause into a regex. Clause checking in AWS is case insensitive
227
216
  The following regex symbols will be replaced to make AWS * and ? matching a regex
@@ -280,7 +269,8 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
280
269
  RETURN
281
270
  DISTINCT principal.arn as principal_arn, policy.id as policy_id, collect(statements) as statements
282
271
  """
283
- results = neo4j_session.run(
272
+ results = neo4j_session.execute_read(
273
+ read_list_of_dicts_tx,
284
274
  get_policy_query,
285
275
  AccountId=account_id,
286
276
  )
@@ -291,9 +281,7 @@ def get_principals_for_account(neo4j_session: neo4j.Session, account_id: str) ->
291
281
  statements = r["statements"]
292
282
  if principal_arn not in principals:
293
283
  principals[principal_arn] = {}
294
- principals[principal_arn][policy_id] = compile_statement(
295
- parse_statement_node(statements),
296
- )
284
+ principals[principal_arn][policy_id] = compile_statement(statements)
297
285
  return principals
298
286
 
299
287
 
@@ -7,8 +7,10 @@ import neo4j
7
7
  from cartography.config import Config
8
8
  from cartography.util import timeit
9
9
 
10
+ from . import app_service
10
11
  from . import compute
11
12
  from . import cosmosdb
13
+ from . import functions
12
14
  from . import sql
13
15
  from . import storage
14
16
  from . import subscription
@@ -40,6 +42,20 @@ def _sync_one_subscription(
40
42
  update_tag,
41
43
  common_job_parameters,
42
44
  )
45
+ app_service.sync(
46
+ neo4j_session,
47
+ credentials,
48
+ subscription_id,
49
+ update_tag,
50
+ common_job_parameters,
51
+ )
52
+ functions.sync(
53
+ neo4j_session,
54
+ credentials,
55
+ subscription_id,
56
+ update_tag,
57
+ common_job_parameters,
58
+ )
43
59
  sql.sync(
44
60
  neo4j_session,
45
61
  credentials.credential,
@@ -0,0 +1,105 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+ from azure.core.exceptions import ClientAuthenticationError
8
+ from azure.core.exceptions import HttpResponseError
9
+ from azure.mgmt.web import WebSiteManagementClient
10
+
11
+ from cartography.client.core.tx import load
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.azure.app_service import AzureAppServiceSchema
14
+ from cartography.util import timeit
15
+
16
+ from .util.credentials import Credentials
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @timeit
22
+ def get_app_services(credentials: Credentials, subscription_id: str) -> List[Dict]:
23
+ """
24
+ Get a list of App Services from the given Azure subscription.
25
+ """
26
+ try:
27
+ client = WebSiteManagementClient(credentials.credential, subscription_id)
28
+ # NOTE: This is the same API call as Functions. We get all web apps
29
+ # and then filter them in the transform stage.
30
+ return [app.as_dict() for app in client.web_apps.list()]
31
+ except (ClientAuthenticationError, HttpResponseError) as e:
32
+ logger.warning(
33
+ f"Failed to get app services for subscription {subscription_id}: {str(e)}"
34
+ )
35
+ return []
36
+
37
+
38
+ @timeit
39
+ def transform_app_services(app_services_response: List[Dict]) -> List[Dict]:
40
+ """
41
+ Transform the raw API response to the dictionary structure that the model expects.
42
+ """
43
+ transformed_apps: List[Dict[str, Any]] = []
44
+ for app in app_services_response:
45
+ if "functionapp" not in app.get("kind", ""):
46
+ transformed_app = {
47
+ "id": app.get("id"),
48
+ "name": app.get("name"),
49
+ "kind": app.get("kind"),
50
+ "location": app.get("location"),
51
+ "state": app.get("state"),
52
+ "default_host_name": app.get("default_host_name"),
53
+ "https_only": app.get("https_only"),
54
+ }
55
+ transformed_apps.append(transformed_app)
56
+ return transformed_apps
57
+
58
+
59
+ @timeit
60
+ def load_app_services(
61
+ neo4j_session: neo4j.Session,
62
+ data: List[Dict[str, Any]],
63
+ subscription_id: str,
64
+ update_tag: int,
65
+ ) -> None:
66
+ """
67
+ Load the transformed Azure App Service data to Neo4j.
68
+ """
69
+ load(
70
+ neo4j_session,
71
+ AzureAppServiceSchema(),
72
+ data,
73
+ lastupdated=update_tag,
74
+ AZURE_SUBSCRIPTION_ID=subscription_id,
75
+ )
76
+
77
+
78
+ @timeit
79
+ def cleanup_app_services(
80
+ neo4j_session: neo4j.Session, common_job_parameters: Dict
81
+ ) -> None:
82
+ """
83
+ Run the cleanup job for Azure App Services.
84
+ """
85
+ GraphJob.from_node_schema(AzureAppServiceSchema(), common_job_parameters).run(
86
+ neo4j_session
87
+ )
88
+
89
+
90
+ @timeit
91
+ def sync(
92
+ neo4j_session: neo4j.Session,
93
+ credentials: Credentials,
94
+ subscription_id: str,
95
+ update_tag: int,
96
+ common_job_parameters: Dict,
97
+ ) -> None:
98
+ """
99
+ The main sync function for Azure App Services.
100
+ """
101
+ logger.info(f"Syncing Azure App Services for subscription {subscription_id}.")
102
+ raw_apps = get_app_services(credentials, subscription_id)
103
+ transformed_apps = transform_app_services(raw_apps)
104
+ load_app_services(neo4j_session, transformed_apps, subscription_id, update_tag)
105
+ cleanup_app_services(neo4j_session, common_job_parameters)
@@ -0,0 +1,124 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+ from azure.core.exceptions import ClientAuthenticationError
8
+ from azure.core.exceptions import HttpResponseError
9
+ from azure.mgmt.web import WebSiteManagementClient
10
+
11
+ from cartography.client.core.tx import load
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.azure.function_app import AzureFunctionAppSchema
14
+ from cartography.util import timeit
15
+
16
+ from .util.credentials import Credentials
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @timeit
22
+ def get_function_apps(credentials: Credentials, subscription_id: str) -> List[Dict]:
23
+ """
24
+ Get a list of Function Apps from the given Azure subscription.
25
+ """
26
+ try:
27
+ client = WebSiteManagementClient(credentials.credential, subscription_id)
28
+ # Note: Function Apps are a type of Web App, so we list all web apps
29
+ # and then filter them in the transform stage.
30
+ return [app.as_dict() for app in client.web_apps.list()]
31
+
32
+ except ClientAuthenticationError as e:
33
+ logger.warning(
34
+ (
35
+ "Failed to authenticate to get function apps for subscription '%s'. "
36
+ "Please check your credentials. Error: %s"
37
+ ),
38
+ subscription_id,
39
+ e,
40
+ )
41
+ return []
42
+
43
+ except HttpResponseError as e:
44
+ logger.warning(
45
+ (
46
+ "Failed to get function apps for subscription '%s' due to an API error. "
47
+ "Status code: %s. Message: %s"
48
+ ),
49
+ subscription_id,
50
+ e.status_code,
51
+ str(e),
52
+ )
53
+ return []
54
+
55
+
56
+ @timeit
57
+ def transform_function_apps(function_apps_response: List[Dict]) -> List[Dict]:
58
+ """
59
+ Transform the raw API response to the dictionary structure that the model expects.
60
+ """
61
+ transformed_apps: List[Dict[str, Any]] = []
62
+ for app in function_apps_response:
63
+ # We only want to ingest resources that are explicitly function apps.
64
+ if "functionapp" in app.get("kind", ""):
65
+ transformed_app = {
66
+ "id": app.get("id"),
67
+ "name": app.get("name"),
68
+ "kind": app.get("kind"),
69
+ "location": app.get("location"),
70
+ "state": app.get("state"),
71
+ "default_host_name": app.get("default_host_name"),
72
+ "https_only": app.get("https_only"),
73
+ }
74
+ transformed_apps.append(transformed_app)
75
+ return transformed_apps
76
+
77
+
78
+ @timeit
79
+ def load_function_apps(
80
+ neo4j_session: neo4j.Session,
81
+ data: List[Dict[str, Any]],
82
+ subscription_id: str,
83
+ update_tag: int,
84
+ ) -> None:
85
+ """
86
+ Load the transformed Azure Function App data to Neo4j.
87
+ """
88
+ load(
89
+ neo4j_session,
90
+ AzureFunctionAppSchema(),
91
+ data,
92
+ lastupdated=update_tag,
93
+ AZURE_SUBSCRIPTION_ID=subscription_id,
94
+ )
95
+
96
+
97
+ @timeit
98
+ def cleanup_function_apps(
99
+ neo4j_session: neo4j.Session, common_job_parameters: Dict
100
+ ) -> None:
101
+ """
102
+ Run the cleanup job for Azure Function Apps.
103
+ """
104
+ GraphJob.from_node_schema(AzureFunctionAppSchema(), common_job_parameters).run(
105
+ neo4j_session
106
+ )
107
+
108
+
109
+ @timeit
110
+ def sync(
111
+ neo4j_session: neo4j.Session,
112
+ credentials: Credentials,
113
+ subscription_id: str,
114
+ update_tag: int,
115
+ common_job_parameters: Dict,
116
+ ) -> None:
117
+ """
118
+ The main sync function for Azure Function Apps.
119
+ """
120
+ logger.info(f"Syncing Azure Function Apps for subscription {subscription_id}.")
121
+ raw_apps = get_function_apps(credentials, subscription_id)
122
+ transformed_apps = transform_function_apps(raw_apps)
123
+ load_function_apps(neo4j_session, transformed_apps, subscription_id, update_tag)
124
+ cleanup_function_apps(neo4j_session, common_job_parameters)
@@ -6,9 +6,12 @@ from azure.identity import ClientSecretCredential
6
6
  from msgraph import GraphServiceClient
7
7
 
8
8
  from cartography.config import Config
9
+ from cartography.intel.entra.app_role_assignments import sync_app_role_assignments
9
10
  from cartography.intel.entra.applications import sync_entra_applications
11
+ from cartography.intel.entra.federation.aws_identity_center import sync_entra_federation
10
12
  from cartography.intel.entra.groups import sync_entra_groups
11
13
  from cartography.intel.entra.ou import sync_entra_ous
14
+ from cartography.intel.entra.service_principals import sync_service_principals
12
15
  from cartography.intel.entra.users import get_tenant
13
16
  from cartography.intel.entra.users import load_tenant
14
17
  from cartography.intel.entra.users import sync_entra_users
@@ -125,5 +128,33 @@ def start_entra_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
125
128
  common_job_parameters,
126
129
  )
127
130
 
131
+ # Run service principals sync
132
+ await sync_service_principals(
133
+ neo4j_session,
134
+ config.entra_tenant_id,
135
+ config.entra_client_id,
136
+ config.entra_client_secret,
137
+ config.update_tag,
138
+ common_job_parameters,
139
+ )
140
+
141
+ # Run app role assignments sync
142
+ await sync_app_role_assignments(
143
+ neo4j_session,
144
+ config.entra_tenant_id,
145
+ config.entra_client_id,
146
+ config.entra_client_secret,
147
+ config.update_tag,
148
+ common_job_parameters,
149
+ )
150
+
151
+ # Run federation sync (after all resources are synced)
152
+ await sync_entra_federation(
153
+ neo4j_session,
154
+ config.update_tag,
155
+ config.entra_tenant_id,
156
+ common_job_parameters,
157
+ )
158
+
128
159
  # Execute syncs in sequence
129
160
  asyncio.run(main())
@@ -0,0 +1,277 @@
1
+ import gc
2
+ from typing import Any
3
+ from typing import AsyncGenerator
4
+
5
+ import neo4j
6
+ from azure.identity import ClientSecretCredential
7
+ from msgraph import GraphServiceClient
8
+ from msgraph.generated.models.app_role_assignment_collection_response import (
9
+ AppRoleAssignmentCollectionResponse,
10
+ )
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.client.core.tx import read_list_of_values_tx
14
+ from cartography.client.core.tx import read_single_value_tx
15
+ from cartography.graph.job import GraphJob
16
+ from cartography.intel.entra.applications import APP_ROLE_ASSIGNMENTS_PAGE_SIZE
17
+ from cartography.intel.entra.applications import logger
18
+ from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
19
+ from cartography.util import timeit
20
+
21
+
22
+ @timeit
23
+ async def get_app_role_assignments_for_app(
24
+ client: GraphServiceClient, neo4j_session: neo4j.Session, app_id: str
25
+ ) -> AsyncGenerator[dict[str, Any], None]:
26
+ """
27
+ Gets app role assignments for a single application by querying the graph for service principal ID.
28
+
29
+ :param client: GraphServiceClient
30
+ :param neo4j_session: Neo4j session for querying service principal
31
+ :param app_id: Application ID
32
+ :return: Generator of app role assignment data as dicts
33
+ """
34
+ logger.info(f"Fetching role assignments for application: {app_id}")
35
+
36
+ # Query the graph to get the service principal ID for this application
37
+ query = """
38
+ MATCH (sp:EntraServicePrincipal {app_id: $app_id})
39
+ RETURN sp.id as service_principal_id
40
+ """
41
+ service_principal_id = neo4j_session.execute_read(
42
+ read_single_value_tx, query, app_id=app_id
43
+ )
44
+
45
+ if not service_principal_id:
46
+ logger.warning(
47
+ f"No service principal found in graph for application {app_id}. Continuing."
48
+ )
49
+ return
50
+
51
+ # Get assignments for this service principal with pagination and limits
52
+ # Use maximum page size (999) to get more data per request
53
+ # Memory is managed through streaming and batching, not page size
54
+ request_config = client.service_principals.by_service_principal_id(
55
+ service_principal_id
56
+ ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
57
+ query_parameters=client.service_principals.by_service_principal_id(
58
+ service_principal_id
59
+ ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
60
+ top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
61
+ )
62
+ )
63
+
64
+ assignments_page: AppRoleAssignmentCollectionResponse | None = (
65
+ await client.service_principals.by_service_principal_id(
66
+ service_principal_id
67
+ ).app_role_assigned_to.get(request_configuration=request_config)
68
+ )
69
+
70
+ assignment_count = 0
71
+ page_count = 0
72
+
73
+ while assignments_page:
74
+ page_count += 1
75
+
76
+ if assignments_page.value:
77
+ page_valid_count = 0
78
+ page_skipped_count = 0
79
+
80
+ # Process assignments and immediately yield to avoid accumulation
81
+ for assignment in assignments_page.value:
82
+ # Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
83
+ if assignment.principal_id:
84
+ assignment_count += 1
85
+ page_valid_count += 1
86
+ yield {
87
+ "id": assignment.id,
88
+ "app_role_id": assignment.app_role_id,
89
+ "created_date_time": assignment.created_date_time,
90
+ "principal_id": assignment.principal_id,
91
+ "principal_display_name": assignment.principal_display_name,
92
+ "principal_type": assignment.principal_type,
93
+ "resource_display_name": assignment.resource_display_name,
94
+ "resource_id": assignment.resource_id,
95
+ "application_app_id": app_id,
96
+ }
97
+ else:
98
+ page_skipped_count += 1
99
+
100
+ # Log page results with details about skipped objects
101
+ if page_skipped_count > 0:
102
+ logger.warning(
103
+ f"Page {page_count} for {app_id}: {page_valid_count} valid assignments, "
104
+ f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
105
+ )
106
+ else:
107
+ logger.debug(
108
+ f"Page {page_count} for {app_id}: {page_valid_count} assignments. "
109
+ f"Total: {assignment_count}"
110
+ )
111
+
112
+ # Force garbage collection after each page
113
+ gc.collect()
114
+
115
+ # Check if we have more pages to fetch
116
+ if not assignments_page.odata_next_link:
117
+ break
118
+
119
+ # Clear previous page before fetching next
120
+ assignments_page.value = None
121
+
122
+ # Fetch next page
123
+ logger.debug(f"Fetching page {page_count + 1} of assignments for {app_id}")
124
+ next_page_url = assignments_page.odata_next_link
125
+ assignments_page = await client.service_principals.with_url(next_page_url).get()
126
+
127
+ logger.info(
128
+ f"Successfully retrieved {assignment_count} assignments for application {app_id} (pages: {page_count})"
129
+ )
130
+
131
+
132
+ def transform_app_role_assignments(
133
+ assignments: list[dict[str, Any]],
134
+ ) -> list[dict[str, Any]]:
135
+ """
136
+ Transform app role assignment data for graph loading.
137
+
138
+ :param assignments: Raw app role assignment data as dicts
139
+ :return: Transformed assignment data for graph loading
140
+ """
141
+ transformed = []
142
+ for assign in assignments:
143
+ transformed.append(
144
+ {
145
+ "id": assign["id"],
146
+ "app_role_id": (
147
+ str(assign["app_role_id"]) if assign["app_role_id"] else None
148
+ ),
149
+ "created_date_time": assign["created_date_time"],
150
+ "principal_id": (
151
+ str(assign["principal_id"]) if assign["principal_id"] else None
152
+ ),
153
+ "principal_display_name": assign["principal_display_name"],
154
+ "principal_type": assign["principal_type"],
155
+ "resource_display_name": assign["resource_display_name"],
156
+ "resource_id": (
157
+ str(assign["resource_id"]) if assign["resource_id"] else None
158
+ ),
159
+ "application_app_id": assign["application_app_id"],
160
+ }
161
+ )
162
+ return transformed
163
+
164
+
165
+ @timeit
166
+ def load_app_role_assignments(
167
+ neo4j_session: neo4j.Session,
168
+ assignments_data: list[dict[str, Any]],
169
+ update_tag: int,
170
+ tenant_id: str,
171
+ ) -> None:
172
+ """
173
+ Load Entra app role assignments to the graph.
174
+
175
+ :param neo4j_session: Neo4j session
176
+ :param assignments_data: Assignment data to load
177
+ :param update_tag: Update tag for tracking data freshness
178
+ :param tenant_id: Entra tenant ID
179
+ """
180
+ load(
181
+ neo4j_session,
182
+ EntraAppRoleAssignmentSchema(),
183
+ assignments_data,
184
+ lastupdated=update_tag,
185
+ TENANT_ID=tenant_id,
186
+ )
187
+
188
+
189
+ @timeit
190
+ def cleanup_app_role_assignments(
191
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
192
+ ) -> None:
193
+ """
194
+ Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
195
+
196
+ :param neo4j_session: Neo4j session
197
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
198
+ """
199
+ GraphJob.from_node_schema(
200
+ EntraAppRoleAssignmentSchema(), common_job_parameters
201
+ ).run(neo4j_session)
202
+
203
+
204
+ @timeit
205
+ async def sync_app_role_assignments(
206
+ neo4j_session: neo4j.Session,
207
+ tenant_id: str,
208
+ client_id: str,
209
+ client_secret: str,
210
+ update_tag: int,
211
+ common_job_parameters: dict[str, Any],
212
+ ) -> None:
213
+ """
214
+ Sync Entra app role assignments to the graph.
215
+
216
+ :param neo4j_session: Neo4j session
217
+ :param tenant_id: Entra tenant ID
218
+ :param client_id: Azure application client ID
219
+ :param client_secret: Azure application client secret
220
+ :param update_tag: Update tag for tracking data freshness
221
+ :param common_job_parameters: Common job parameters for cleanup
222
+ """
223
+ # Create credentials and client
224
+ credential = ClientSecretCredential(
225
+ tenant_id=tenant_id,
226
+ client_id=client_id,
227
+ client_secret=client_secret,
228
+ )
229
+
230
+ client = GraphServiceClient(
231
+ credential,
232
+ scopes=["https://graph.microsoft.com/.default"],
233
+ )
234
+ assignment_batch_size = 200 # Batch size for assignments
235
+ assignments_batch = []
236
+ total_assignment_count = 0
237
+
238
+ # Get app_ids from graph instead of streaming from API again
239
+ query = "MATCH (app:EntraApplication) RETURN app.app_id"
240
+ app_ids = neo4j_session.execute_read(read_list_of_values_tx, query)
241
+
242
+ for app_id in app_ids:
243
+ # Stream app role assignments (now using graph query for service principal ID)
244
+ async for assignment in get_app_role_assignments_for_app(
245
+ client, neo4j_session, app_id
246
+ ):
247
+ assignments_batch.append(assignment)
248
+ total_assignment_count += 1
249
+
250
+ # Transform and load assignments in batches
251
+ if len(assignments_batch) >= assignment_batch_size:
252
+ transformed_assignments = transform_app_role_assignments(
253
+ assignments_batch
254
+ )
255
+ load_app_role_assignments(
256
+ neo4j_session, transformed_assignments, update_tag, tenant_id
257
+ )
258
+ logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
259
+ assignments_batch.clear()
260
+ transformed_assignments.clear()
261
+
262
+ # Force garbage collection after batch load
263
+ gc.collect()
264
+
265
+ # Process remaining assignments
266
+ if assignments_batch:
267
+ transformed_assignments = transform_app_role_assignments(assignments_batch)
268
+ load_app_role_assignments(
269
+ neo4j_session, transformed_assignments, update_tag, tenant_id
270
+ )
271
+ assignments_batch.clear()
272
+ transformed_assignments.clear()
273
+
274
+ cleanup_app_role_assignments(neo4j_session, common_job_parameters)
275
+ logger.info(f"Completed syncing {total_assignment_count} app role assignments")
276
+ # Final garbage collection
277
+ gc.collect()