cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (82) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -31
  5. cartography/intel/aws/apigatewayv2.py +116 -0
  6. cartography/intel/aws/iam.py +741 -492
  7. cartography/intel/aws/organizations.py +7 -8
  8. cartography/intel/aws/permission_relationships.py +4 -16
  9. cartography/intel/aws/resources.py +2 -0
  10. cartography/intel/azure/__init__.py +16 -0
  11. cartography/intel/azure/app_service.py +105 -0
  12. cartography/intel/azure/functions.py +124 -0
  13. cartography/intel/entra/__init__.py +31 -0
  14. cartography/intel/entra/app_role_assignments.py +277 -0
  15. cartography/intel/entra/applications.py +4 -238
  16. cartography/intel/entra/federation/__init__.py +0 -0
  17. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  18. cartography/intel/entra/service_principals.py +217 -0
  19. cartography/intel/gcp/__init__.py +136 -436
  20. cartography/intel/gcp/clients.py +65 -0
  21. cartography/intel/gcp/compute.py +18 -44
  22. cartography/intel/gcp/crm/__init__.py +0 -0
  23. cartography/intel/gcp/crm/folders.py +108 -0
  24. cartography/intel/gcp/crm/orgs.py +65 -0
  25. cartography/intel/gcp/crm/projects.py +109 -0
  26. cartography/intel/gcp/dns.py +82 -169
  27. cartography/intel/gcp/gke.py +72 -113
  28. cartography/intel/gcp/iam.py +66 -54
  29. cartography/intel/gcp/storage.py +75 -159
  30. cartography/intel/github/__init__.py +41 -0
  31. cartography/intel/github/commits.py +423 -0
  32. cartography/intel/github/repos.py +73 -39
  33. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  34. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  35. cartography/models/aws/iam/access_key.py +103 -0
  36. cartography/models/aws/iam/account_role.py +24 -0
  37. cartography/models/aws/iam/federated_principal.py +60 -0
  38. cartography/models/aws/iam/group.py +60 -0
  39. cartography/models/aws/iam/group_membership.py +26 -0
  40. cartography/models/aws/iam/inline_policy.py +78 -0
  41. cartography/models/aws/iam/managed_policy.py +51 -0
  42. cartography/models/aws/iam/policy_statement.py +57 -0
  43. cartography/models/aws/iam/role.py +83 -0
  44. cartography/models/aws/iam/root_principal.py +52 -0
  45. cartography/models/aws/iam/service_principal.py +30 -0
  46. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  47. cartography/models/aws/iam/user.py +54 -0
  48. cartography/models/azure/__init__.py +0 -0
  49. cartography/models/azure/app_service.py +59 -0
  50. cartography/models/azure/function_app.py +59 -0
  51. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  52. cartography/models/entra/service_principal.py +104 -0
  53. cartography/models/gcp/compute/subnet.py +74 -0
  54. cartography/models/gcp/crm/__init__.py +0 -0
  55. cartography/models/gcp/crm/folders.py +98 -0
  56. cartography/models/gcp/crm/organizations.py +21 -0
  57. cartography/models/gcp/crm/projects.py +100 -0
  58. cartography/models/gcp/dns.py +109 -0
  59. cartography/models/gcp/gke.py +69 -0
  60. cartography/models/gcp/iam.py +3 -0
  61. cartography/models/gcp/storage/__init__.py +0 -0
  62. cartography/models/gcp/storage/bucket.py +119 -0
  63. cartography/models/github/commits.py +63 -0
  64. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  65. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
  66. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  67. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  68. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  69. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  70. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  71. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  72. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  74. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  75. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  76. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. cartography/intel/gcp/crm.py +0 -355
  79. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  80. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  81. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  82. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -6,16 +6,11 @@ from typing import Generator
6
6
 
7
7
  import neo4j
8
8
  from azure.identity import ClientSecretCredential
9
- from msgraph.generated.models.app_role_assignment_collection_response import (
10
- AppRoleAssignmentCollectionResponse,
11
- )
12
9
  from msgraph.generated.models.application import Application
13
- from msgraph.generated.models.service_principal import ServicePrincipal
14
10
  from msgraph.graph_service_client import GraphServiceClient
15
11
 
16
12
  from cartography.client.core.tx import load
17
13
  from cartography.graph.job import GraphJob
18
- from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
19
14
  from cartography.models.entra.application import EntraApplicationSchema
20
15
  from cartography.util import timeit
21
16
 
@@ -65,125 +60,6 @@ async def get_entra_applications(
65
60
  logger.info(f"Retrieved {count} Entra applications total")
66
61
 
67
62
 
68
- @timeit
69
- async def get_app_role_assignments_for_app(
70
- client: GraphServiceClient, app: Application
71
- ) -> AsyncGenerator[dict[str, Any], None]:
72
- """
73
- Gets app role assignments for a single application with safety limits.
74
-
75
- :param client: GraphServiceClient
76
- :param app: Application object
77
- :return: Generator of app role assignment data as dicts
78
- """
79
- if not app.app_id:
80
- logger.warning(f"Application {app.id} has no app_id, skipping")
81
- return
82
-
83
- logger.info(
84
- f"Fetching role assignments for application: {app.display_name} ({app.app_id})"
85
- )
86
-
87
- # First, get the service principal for this application
88
- service_principals_page = await client.service_principals.get(
89
- request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
90
- query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
91
- filter=f"appId eq '{app.app_id}'"
92
- )
93
- )
94
- )
95
-
96
- if not service_principals_page or not service_principals_page.value:
97
- logger.warning(
98
- f"No service principal found for application {app.app_id} ({app.display_name}). Continuing."
99
- )
100
- return
101
-
102
- service_principal: ServicePrincipal = service_principals_page.value[0]
103
-
104
- # Get assignments for this service principal with pagination and limits
105
- # Use maximum page size (999) to get more data per request
106
- # Memory is managed through streaming and batching, not page size
107
- request_config = client.service_principals.by_service_principal_id(
108
- service_principal.id
109
- ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
110
- query_parameters=client.service_principals.by_service_principal_id(
111
- service_principal.id
112
- ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
113
- top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
114
- )
115
- )
116
-
117
- assignments_page: AppRoleAssignmentCollectionResponse | None = (
118
- await client.service_principals.by_service_principal_id(
119
- service_principal.id
120
- ).app_role_assigned_to.get(request_configuration=request_config)
121
- )
122
-
123
- assignment_count = 0
124
- page_count = 0
125
-
126
- while assignments_page:
127
- page_count += 1
128
-
129
- if assignments_page.value:
130
- page_valid_count = 0
131
- page_skipped_count = 0
132
-
133
- # Process assignments and immediately yield to avoid accumulation
134
- for assignment in assignments_page.value:
135
- # Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
136
- if assignment.principal_id:
137
- assignment_count += 1
138
- page_valid_count += 1
139
- yield {
140
- "id": assignment.id,
141
- "app_role_id": assignment.app_role_id,
142
- "created_date_time": assignment.created_date_time,
143
- "principal_id": assignment.principal_id,
144
- "principal_display_name": assignment.principal_display_name,
145
- "principal_type": assignment.principal_type,
146
- "resource_display_name": assignment.resource_display_name,
147
- "resource_id": assignment.resource_id,
148
- "application_app_id": app.app_id,
149
- }
150
- else:
151
- page_skipped_count += 1
152
-
153
- # Log page results with details about skipped objects
154
- if page_skipped_count > 0:
155
- logger.warning(
156
- f"Page {page_count} for {app.display_name}: {page_valid_count} valid assignments, "
157
- f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
158
- )
159
- else:
160
- logger.debug(
161
- f"Page {page_count} for {app.display_name}: {page_valid_count} assignments. "
162
- f"Total: {assignment_count}"
163
- )
164
-
165
- # Force garbage collection after each page
166
- gc.collect()
167
-
168
- # Check if we have more pages to fetch
169
- if not assignments_page.odata_next_link:
170
- break
171
-
172
- # Clear previous page before fetching next
173
- assignments_page.value = None
174
-
175
- # Fetch next page
176
- logger.debug(
177
- f"Fetching page {page_count + 1} of assignments for {app.display_name}"
178
- )
179
- next_page_url = assignments_page.odata_next_link
180
- assignments_page = await client.service_principals.with_url(next_page_url).get()
181
-
182
- logger.info(
183
- f"Successfully retrieved {assignment_count} assignments for application {app.display_name} (pages: {page_count})"
184
- )
185
-
186
-
187
63
  def transform_applications(
188
64
  applications: list[Application],
189
65
  ) -> Generator[dict[str, Any], None, None]:
@@ -203,39 +79,6 @@ def transform_applications(
203
79
  }
204
80
 
205
81
 
206
- def transform_app_role_assignments(
207
- assignments: list[dict[str, Any]],
208
- ) -> list[dict[str, Any]]:
209
- """
210
- Transform app role assignment data for graph loading.
211
-
212
- :param assignments: Raw app role assignment data as dicts
213
- :return: Transformed assignment data for graph loading
214
- """
215
- transformed = []
216
- for assign in assignments:
217
- transformed.append(
218
- {
219
- "id": assign["id"],
220
- "app_role_id": (
221
- str(assign["app_role_id"]) if assign["app_role_id"] else None
222
- ),
223
- "created_date_time": assign["created_date_time"],
224
- "principal_id": (
225
- str(assign["principal_id"]) if assign["principal_id"] else None
226
- ),
227
- "principal_display_name": assign["principal_display_name"],
228
- "principal_type": assign["principal_type"],
229
- "resource_display_name": assign["resource_display_name"],
230
- "resource_id": (
231
- str(assign["resource_id"]) if assign["resource_id"] else None
232
- ),
233
- "application_app_id": assign["application_app_id"],
234
- }
235
- )
236
- return transformed
237
-
238
-
239
82
  @timeit
240
83
  def load_applications(
241
84
  neo4j_session: neo4j.Session,
@@ -260,30 +103,6 @@ def load_applications(
260
103
  )
261
104
 
262
105
 
263
- @timeit
264
- def load_app_role_assignments(
265
- neo4j_session: neo4j.Session,
266
- assignments_data: list[dict[str, Any]],
267
- update_tag: int,
268
- tenant_id: str,
269
- ) -> None:
270
- """
271
- Load Entra app role assignments to the graph.
272
-
273
- :param neo4j_session: Neo4j session
274
- :param assignments_data: Assignment data to load
275
- :param update_tag: Update tag for tracking data freshness
276
- :param tenant_id: Entra tenant ID
277
- """
278
- load(
279
- neo4j_session,
280
- EntraAppRoleAssignmentSchema(),
281
- assignments_data,
282
- lastupdated=update_tag,
283
- TENANT_ID=tenant_id,
284
- )
285
-
286
-
287
106
  @timeit
288
107
  def cleanup_applications(
289
108
  neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
@@ -299,21 +118,6 @@ def cleanup_applications(
299
118
  )
300
119
 
301
120
 
302
- @timeit
303
- def cleanup_app_role_assignments(
304
- neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
305
- ) -> None:
306
- """
307
- Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
308
-
309
- :param neo4j_session: Neo4j session
310
- :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
311
- """
312
- GraphJob.from_node_schema(
313
- EntraAppRoleAssignmentSchema(), common_job_parameters
314
- ).run(neo4j_session)
315
-
316
-
317
121
  @timeit
318
122
  async def sync_entra_applications(
319
123
  neo4j_session: neo4j.Session,
@@ -345,18 +149,12 @@ async def sync_entra_applications(
345
149
  scopes=["https://graph.microsoft.com/.default"],
346
150
  )
347
151
 
348
- # Process applications and their assignments in batches
152
+ # Step 1: Sync applications
349
153
  app_batch_size = 10 # Batch size for applications
350
- assignment_batch_size = (
351
- 200 # Batch size for assignments (increased since we handle memory better now)
352
- )
353
-
354
154
  apps_batch = []
355
- assignments_batch = []
356
- total_assignment_count = 0
357
155
  total_app_count = 0
358
156
 
359
- # Stream apps
157
+ # Stream and load applications
360
158
  async for app in get_entra_applications(client):
361
159
  total_app_count += 1
362
160
  apps_batch.append(app)
@@ -372,45 +170,13 @@ async def sync_entra_applications(
372
170
  transformed_apps.clear()
373
171
  gc.collect() # Force garbage collection
374
172
 
375
- # Stream app role assignments
376
- async for assignment in get_app_role_assignments_for_app(client, app):
377
- assignments_batch.append(assignment)
378
- total_assignment_count += 1
379
-
380
- # Transform and load assignments in batches
381
- if len(assignments_batch) >= assignment_batch_size:
382
- transformed_assignments = transform_app_role_assignments(
383
- assignments_batch
384
- )
385
- load_app_role_assignments(
386
- neo4j_session, transformed_assignments, update_tag, tenant_id
387
- )
388
- logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
389
- assignments_batch.clear()
390
- transformed_assignments.clear()
391
-
392
- # Force garbage collection after batch load
393
- gc.collect()
394
-
395
173
  # Process remaining applications
396
174
  if apps_batch:
397
175
  transformed_apps = list(transform_applications(apps_batch))
398
176
  load_applications(neo4j_session, transformed_apps, update_tag, tenant_id)
399
177
  apps_batch.clear()
400
178
  transformed_apps.clear()
401
-
402
- # Process remaining assignments
403
- if assignments_batch:
404
- transformed_assignments = transform_app_role_assignments(assignments_batch)
405
- load_app_role_assignments(
406
- neo4j_session, transformed_assignments, update_tag, tenant_id
407
- )
408
- assignments_batch.clear()
409
- transformed_assignments.clear()
410
-
179
+ cleanup_applications(neo4j_session, common_job_parameters)
180
+ logger.info(f"Completed syncing {total_app_count} applications")
411
181
  # Final garbage collection
412
182
  gc.collect()
413
-
414
- # Cleanup stale data
415
- cleanup_applications(neo4j_session, common_job_parameters)
416
- cleanup_app_role_assignments(neo4j_session, common_job_parameters)
File without changes
@@ -0,0 +1,77 @@
1
+ from typing import Any
2
+
3
+ import neo4j
4
+
5
+ from cartography.client.core.tx import load_matchlinks
6
+ from cartography.client.core.tx import read_list_of_dicts_tx
7
+ from cartography.graph.job import GraphJob
8
+ from cartography.models.entra.entra_user_to_aws_sso import (
9
+ EntraUserToAWSSSOUserMatchLink,
10
+ )
11
+ from cartography.util import timeit
12
+
13
+
14
+ @timeit
15
+ def sync_entra_to_aws_identity_center(
16
+ neo4j_session: neo4j.Session,
17
+ update_tag: int,
18
+ tenant_id: str,
19
+ common_job_parameters: dict[str, Any],
20
+ ) -> None:
21
+ query = """
22
+ MATCH (:EntraTenant{id: $TENANT_ID})-[:RESOURCE]->(e:EntraUser)
23
+ -[:HAS_APP_ROLE]->(ar:EntraAppRoleAssignment)
24
+ -[:ASSIGNED_TO]->(n:EntraApplication)
25
+ -[:SERVICE_PRINCIPAL]->(spn:EntraServicePrincipal)
26
+ -[:FEDERATES_TO]->(ic:AWSIdentityCenter)
27
+ MATCH (sso:AWSSSOUser{identity_store_id:ic.identity_store_id})
28
+ WHERE e.user_principal_name = sso.user_name
29
+ RETURN e.user_principal_name as entra_user_principal_name, sso.user_name as aws_user_name, sso.identity_store_id as identity_store_id
30
+ """
31
+ entrauser_to_awssso_users = neo4j_session.execute_read(
32
+ read_list_of_dicts_tx, query, TENANT_ID=tenant_id
33
+ )
34
+
35
+ # Load MatchLink relationships from Entra users to AWS SSO users
36
+ load_matchlinks(
37
+ neo4j_session,
38
+ EntraUserToAWSSSOUserMatchLink(),
39
+ entrauser_to_awssso_users,
40
+ lastupdated=update_tag,
41
+ _sub_resource_label="EntraTenant",
42
+ _sub_resource_id=tenant_id,
43
+ )
44
+
45
+ cleanup_entra_user_to_aws_sso_user_matchlinks(neo4j_session, common_job_parameters)
46
+
47
+
48
+ @timeit
49
+ async def sync_entra_federation(
50
+ neo4j_session: neo4j.Session,
51
+ update_tag: int,
52
+ tenant_id: str,
53
+ common_job_parameters: dict[str, Any],
54
+ ) -> None:
55
+ """
56
+ Sync Entra federation relationships to the graph.
57
+
58
+ :param neo4j_session: Neo4j session
59
+ :param update_tag: Update tag for tracking data freshness
60
+ :param tenant_id: Entra tenant ID
61
+ :param common_job_parameters: Common job parameters for cleanup
62
+ """
63
+ sync_entra_to_aws_identity_center(
64
+ neo4j_session, update_tag, tenant_id, common_job_parameters
65
+ )
66
+
67
+
68
+ @timeit
69
+ def cleanup_entra_user_to_aws_sso_user_matchlinks(
70
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
71
+ ) -> None:
72
+ GraphJob.from_matchlink(
73
+ EntraUserToAWSSSOUserMatchLink(),
74
+ "EntraTenant",
75
+ common_job_parameters["TENANT_ID"],
76
+ common_job_parameters["UPDATE_TAG"],
77
+ ).run(neo4j_session)
@@ -0,0 +1,217 @@
1
+ import logging
2
+ import re
3
+ from typing import Any
4
+ from typing import AsyncGenerator
5
+
6
+ import neo4j
7
+ from azure.identity import ClientSecretCredential
8
+ from msgraph import GraphServiceClient
9
+ from msgraph.generated.models.service_principal import ServicePrincipal
10
+
11
+ from cartography.client.core.tx import load
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.entra.service_principal import EntraServicePrincipalSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ SERVICE_PRINCIPALS_PAGE_SIZE = 999
19
+
20
+
21
+ @timeit
22
+ async def get_entra_service_principals(
23
+ client: GraphServiceClient,
24
+ ) -> AsyncGenerator[ServicePrincipal, None]:
25
+ """
26
+ Gets Entra service principals using the Microsoft Graph API with a generator.
27
+
28
+ :param client: GraphServiceClient
29
+ :return: Generator of raw ServicePrincipal objects from Microsoft Graph
30
+ """
31
+ count = 0
32
+ # Get all service principals with pagination
33
+ request_configuration = client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
34
+ query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
35
+ top=SERVICE_PRINCIPALS_PAGE_SIZE
36
+ )
37
+ )
38
+ page = await client.service_principals.get(
39
+ request_configuration=request_configuration
40
+ )
41
+
42
+ while page:
43
+ if page.value:
44
+ for spn in page.value:
45
+ count += 1
46
+ yield spn
47
+
48
+ if not page.odata_next_link:
49
+ break
50
+ page = await client.service_principals.with_url(page.odata_next_link).get()
51
+
52
+ logger.info(f"Retrieved {count} Entra service principals total")
53
+
54
+
55
+ async def get_service_principal_by_app_id(
56
+ client: GraphServiceClient, app_id: str
57
+ ) -> ServicePrincipal | None:
58
+ """
59
+ Gets a service principal by app_id using the Microsoft Graph API.
60
+ This function is extracted from the original app_role_assignments logic.
61
+
62
+ :param client: GraphServiceClient
63
+ :param app_id: Application ID to search for
64
+ :return: ServicePrincipal object or None if not found
65
+ """
66
+ service_principals_page = await client.service_principals.get(
67
+ request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
68
+ query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
69
+ filter=f"appId eq '{app_id}'"
70
+ )
71
+ )
72
+ )
73
+
74
+ if not service_principals_page or not service_principals_page.value:
75
+ logger.warning(
76
+ f"No service principal found for application {app_id}. Continuing."
77
+ )
78
+ return None
79
+
80
+ return service_principals_page.value[0]
81
+
82
+
83
+ def transform_service_principals(
84
+ service_principals: list[ServicePrincipal],
85
+ ) -> list[dict[str, Any]]:
86
+ result = []
87
+ for spn in service_principals:
88
+ aws_identity_center_instance_id = None
89
+ match = re.search(r"d-[a-z0-9]{10}", spn.login_url or "")
90
+ aws_identity_center_instance_id = match.group(0) if match else None
91
+ transformed = {
92
+ "id": spn.id,
93
+ "app_id": spn.app_id,
94
+ "account_enabled": spn.account_enabled,
95
+ # uuid.UUID to string
96
+ "app_owner_organization_id": (
97
+ str(spn.app_owner_organization_id)
98
+ if spn.app_owner_organization_id
99
+ else None
100
+ ),
101
+ "aws_identity_center_instance_id": aws_identity_center_instance_id,
102
+ "display_name": spn.display_name,
103
+ "login_url": spn.login_url,
104
+ "preferred_single_sign_on_mode": spn.preferred_single_sign_on_mode,
105
+ "preferred_token_signing_key_thumbprint": spn.preferred_token_signing_key_thumbprint,
106
+ "reply_urls": spn.reply_urls,
107
+ "service_principal_type": spn.service_principal_type,
108
+ "sign_in_audience": spn.sign_in_audience,
109
+ "tags": spn.tags,
110
+ # uuid.UUID to string
111
+ "token_encryption_key_id": (
112
+ str(spn.token_encryption_key_id)
113
+ if spn.token_encryption_key_id
114
+ else None
115
+ ),
116
+ }
117
+ result.append(transformed)
118
+ return result
119
+
120
+
121
+ @timeit
122
+ def load_service_principals(
123
+ neo4j_session: neo4j.Session,
124
+ service_principal_data: list[dict[str, Any]],
125
+ update_tag: int,
126
+ tenant_id: str,
127
+ ) -> None:
128
+ load(
129
+ neo4j_session,
130
+ EntraServicePrincipalSchema(),
131
+ service_principal_data,
132
+ lastupdated=update_tag,
133
+ TENANT_ID=tenant_id,
134
+ )
135
+
136
+
137
+ @timeit
138
+ def cleanup_service_principals(
139
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
140
+ ) -> None:
141
+ """
142
+ Delete Entra service principals from the graph if they were not updated in the last sync.
143
+
144
+ :param neo4j_session: Neo4j session
145
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
146
+ """
147
+ GraphJob.from_node_schema(EntraServicePrincipalSchema(), common_job_parameters).run(
148
+ neo4j_session
149
+ )
150
+
151
+
152
+ @timeit
153
+ async def sync_service_principals(
154
+ neo4j_session: neo4j.Session,
155
+ tenant_id: str,
156
+ client_id: str,
157
+ client_secret: str,
158
+ update_tag: int,
159
+ common_job_parameters: dict[str, Any],
160
+ ) -> None:
161
+ """
162
+ Sync Entra service principals to the graph.
163
+
164
+ :param neo4j_session: Neo4j session
165
+ :param tenant_id: Entra tenant ID
166
+ :param client_id: Azure application client ID
167
+ :param client_secret: Azure application client secret
168
+ :param update_tag: Update tag for tracking data freshness
169
+ :param common_job_parameters: Common job parameters for cleanup
170
+ """
171
+ # Create credentials and client
172
+ credential = ClientSecretCredential(
173
+ tenant_id=tenant_id,
174
+ client_id=client_id,
175
+ client_secret=client_secret,
176
+ )
177
+
178
+ client = GraphServiceClient(
179
+ credential,
180
+ scopes=["https://graph.microsoft.com/.default"],
181
+ )
182
+ service_principals_batch = []
183
+ batch_size = 50 # Batch size for service principals
184
+ total_count = 0
185
+
186
+ # Stream service principals and process in batches
187
+ async for spn in get_entra_service_principals(client):
188
+ service_principals_batch.append(spn)
189
+ total_count += 1
190
+
191
+ # Transform and load service principals in batches
192
+ if len(service_principals_batch) >= batch_size:
193
+ transformed_service_principals = transform_service_principals(
194
+ service_principals_batch
195
+ )
196
+ load_service_principals(
197
+ neo4j_session, transformed_service_principals, update_tag, tenant_id
198
+ )
199
+ logger.info(
200
+ f"Loaded batch of {len(service_principals_batch)} service principals (total: {total_count})"
201
+ )
202
+ service_principals_batch.clear()
203
+ transformed_service_principals.clear()
204
+
205
+ # Process remaining service principals
206
+ if service_principals_batch:
207
+ transformed_service_principals = transform_service_principals(
208
+ service_principals_batch
209
+ )
210
+ load_service_principals(
211
+ neo4j_session, transformed_service_principals, update_tag, tenant_id
212
+ )
213
+ service_principals_batch.clear()
214
+ transformed_service_principals.clear()
215
+
216
+ logger.info(f"Completed loading {total_count} service principals")
217
+ cleanup_service_principals(neo4j_session, common_job_parameters)