cartography 0.104.0rc3__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (134) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +104 -3
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +62 -0
  6. cartography/config.py +32 -0
  7. cartography/data/indexes.cypher +0 -37
  8. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  9. cartography/driftdetect/cli.py +3 -2
  10. cartography/graph/cleanupbuilder.py +198 -41
  11. cartography/graph/job.py +42 -0
  12. cartography/graph/querybuilder.py +136 -2
  13. cartography/graph/statement.py +1 -1
  14. cartography/intel/airbyte/__init__.py +105 -0
  15. cartography/intel/airbyte/connections.py +120 -0
  16. cartography/intel/airbyte/destinations.py +81 -0
  17. cartography/intel/airbyte/organizations.py +59 -0
  18. cartography/intel/airbyte/sources.py +78 -0
  19. cartography/intel/airbyte/tags.py +64 -0
  20. cartography/intel/airbyte/users.py +106 -0
  21. cartography/intel/airbyte/util.py +122 -0
  22. cartography/intel/airbyte/workspaces.py +63 -0
  23. cartography/intel/aws/acm.py +124 -0
  24. cartography/intel/aws/cloudtrail.py +3 -38
  25. cartography/intel/aws/codebuild.py +132 -0
  26. cartography/intel/aws/ecr.py +8 -2
  27. cartography/intel/aws/ecs.py +228 -380
  28. cartography/intel/aws/efs.py +179 -11
  29. cartography/intel/aws/iam.py +1 -1
  30. cartography/intel/aws/identitycenter.py +14 -3
  31. cartography/intel/aws/inspector.py +96 -53
  32. cartography/intel/aws/lambda_function.py +1 -1
  33. cartography/intel/aws/rds.py +2 -1
  34. cartography/intel/aws/resources.py +4 -0
  35. cartography/intel/aws/s3.py +195 -4
  36. cartography/intel/aws/sqs.py +36 -90
  37. cartography/intel/entra/__init__.py +22 -0
  38. cartography/intel/entra/applications.py +366 -0
  39. cartography/intel/entra/groups.py +151 -0
  40. cartography/intel/entra/ou.py +21 -5
  41. cartography/intel/entra/users.py +84 -42
  42. cartography/intel/kubernetes/__init__.py +30 -14
  43. cartography/intel/kubernetes/clusters.py +86 -0
  44. cartography/intel/kubernetes/namespaces.py +59 -57
  45. cartography/intel/kubernetes/pods.py +140 -77
  46. cartography/intel/kubernetes/secrets.py +95 -45
  47. cartography/intel/kubernetes/services.py +131 -67
  48. cartography/intel/kubernetes/util.py +125 -14
  49. cartography/intel/scaleway/__init__.py +127 -0
  50. cartography/intel/scaleway/iam/__init__.py +0 -0
  51. cartography/intel/scaleway/iam/apikeys.py +71 -0
  52. cartography/intel/scaleway/iam/applications.py +71 -0
  53. cartography/intel/scaleway/iam/groups.py +71 -0
  54. cartography/intel/scaleway/iam/users.py +71 -0
  55. cartography/intel/scaleway/instances/__init__.py +0 -0
  56. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  57. cartography/intel/scaleway/instances/instances.py +92 -0
  58. cartography/intel/scaleway/projects.py +79 -0
  59. cartography/intel/scaleway/storage/__init__.py +0 -0
  60. cartography/intel/scaleway/storage/snapshots.py +86 -0
  61. cartography/intel/scaleway/storage/volumes.py +84 -0
  62. cartography/intel/scaleway/utils.py +37 -0
  63. cartography/intel/trivy/__init__.py +161 -0
  64. cartography/intel/trivy/scanner.py +363 -0
  65. cartography/models/airbyte/__init__.py +0 -0
  66. cartography/models/airbyte/connection.py +138 -0
  67. cartography/models/airbyte/destination.py +75 -0
  68. cartography/models/airbyte/organization.py +19 -0
  69. cartography/models/airbyte/source.py +75 -0
  70. cartography/models/airbyte/stream.py +74 -0
  71. cartography/models/airbyte/tag.py +69 -0
  72. cartography/models/airbyte/user.py +111 -0
  73. cartography/models/airbyte/workspace.py +46 -0
  74. cartography/models/aws/acm/__init__.py +0 -0
  75. cartography/models/aws/acm/certificate.py +75 -0
  76. cartography/models/aws/cloudtrail/trail.py +24 -0
  77. cartography/models/aws/codebuild/__init__.py +0 -0
  78. cartography/models/aws/codebuild/project.py +49 -0
  79. cartography/models/aws/ecs/__init__.py +0 -0
  80. cartography/models/aws/ecs/clusters.py +64 -0
  81. cartography/models/aws/ecs/container_definitions.py +93 -0
  82. cartography/models/aws/ecs/container_instances.py +84 -0
  83. cartography/models/aws/ecs/containers.py +99 -0
  84. cartography/models/aws/ecs/services.py +117 -0
  85. cartography/models/aws/ecs/task_definitions.py +135 -0
  86. cartography/models/aws/ecs/tasks.py +110 -0
  87. cartography/models/aws/efs/access_point.py +77 -0
  88. cartography/models/aws/efs/file_system.py +60 -0
  89. cartography/models/aws/efs/mount_target.py +29 -2
  90. cartography/models/aws/s3/notification.py +24 -0
  91. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  92. cartography/models/aws/sqs/__init__.py +0 -0
  93. cartography/models/aws/sqs/queue.py +89 -0
  94. cartography/models/core/common.py +1 -0
  95. cartography/models/core/nodes.py +15 -2
  96. cartography/models/core/relationships.py +44 -0
  97. cartography/models/entra/app_role_assignment.py +115 -0
  98. cartography/models/entra/application.py +47 -0
  99. cartography/models/entra/group.py +91 -0
  100. cartography/models/entra/user.py +17 -51
  101. cartography/models/kubernetes/__init__.py +0 -0
  102. cartography/models/kubernetes/clusters.py +26 -0
  103. cartography/models/kubernetes/containers.py +108 -0
  104. cartography/models/kubernetes/namespaces.py +51 -0
  105. cartography/models/kubernetes/pods.py +80 -0
  106. cartography/models/kubernetes/secrets.py +79 -0
  107. cartography/models/kubernetes/services.py +108 -0
  108. cartography/models/scaleway/__init__.py +0 -0
  109. cartography/models/scaleway/iam/__init__.py +0 -0
  110. cartography/models/scaleway/iam/apikey.py +96 -0
  111. cartography/models/scaleway/iam/application.py +52 -0
  112. cartography/models/scaleway/iam/group.py +95 -0
  113. cartography/models/scaleway/iam/user.py +60 -0
  114. cartography/models/scaleway/instance/__init__.py +0 -0
  115. cartography/models/scaleway/instance/flexibleip.py +52 -0
  116. cartography/models/scaleway/instance/instance.py +118 -0
  117. cartography/models/scaleway/organization.py +19 -0
  118. cartography/models/scaleway/project.py +48 -0
  119. cartography/models/scaleway/storage/__init__.py +0 -0
  120. cartography/models/scaleway/storage/snapshot.py +78 -0
  121. cartography/models/scaleway/storage/volume.py +51 -0
  122. cartography/models/trivy/__init__.py +0 -0
  123. cartography/models/trivy/findings.py +66 -0
  124. cartography/models/trivy/fix.py +66 -0
  125. cartography/models/trivy/package.py +71 -0
  126. cartography/sync.py +10 -4
  127. cartography/util.py +15 -10
  128. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/METADATA +6 -2
  129. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/RECORD +133 -49
  130. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  131. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  132. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  133. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  134. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,366 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import httpx
7
+ import neo4j
8
+ from azure.identity import ClientSecretCredential
9
+ from kiota_abstractions.api_error import APIError
10
+ from msgraph.graph_service_client import GraphServiceClient
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.intel.entra.users import load_tenant
15
+ from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
16
+ from cartography.models.entra.application import EntraApplicationSchema
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Configurable constants for API pagination
22
+ # Microsoft Graph API recommends page sizes up to 999 for most resources
23
+ # Set to 999 by default, but can be adjusted if needed
24
+ #
25
+ # Adjust these values if:
26
+ # - You have performance issues (decrease values)
27
+ # - You want to minimize API calls (increase values up to 999)
28
+ # - You're hitting rate limits (decrease values)
29
+ APPLICATIONS_PAGE_SIZE = 999
30
+ APP_ROLE_ASSIGNMENTS_PAGE_SIZE = (
31
+ 999 # Currently not used, but reserved for future pagination improvements
32
+ )
33
+
34
+ # Warning thresholds for potential data completeness issues
35
+ # Log warnings when individual users/groups have more assignments than this threshold
36
+ HIGH_ASSIGNMENT_COUNT_THRESHOLD = 100
37
+
38
+
39
+ @timeit
40
+ async def get_entra_applications(client: GraphServiceClient) -> List[Any]:
41
+ """
42
+ Gets Entra applications using the Microsoft Graph API.
43
+
44
+ :param client: GraphServiceClient
45
+ :return: List of raw Application objects from Microsoft Graph
46
+ """
47
+ applications = []
48
+
49
+ # Get all applications with pagination
50
+ request_configuration = client.applications.ApplicationsRequestBuilderGetRequestConfiguration(
51
+ query_parameters=client.applications.ApplicationsRequestBuilderGetQueryParameters(
52
+ top=APPLICATIONS_PAGE_SIZE
53
+ )
54
+ )
55
+ page = await client.applications.get(request_configuration=request_configuration)
56
+
57
+ while page:
58
+ if page.value:
59
+ applications.extend(page.value)
60
+
61
+ if not page.odata_next_link:
62
+ break
63
+ page = await client.applications.with_url(page.odata_next_link).get()
64
+
65
+ logger.info(f"Retrieved {len(applications)} Entra applications total")
66
+ return applications
67
+
68
+
69
+ @timeit
70
+ async def get_app_role_assignments(
71
+ client: GraphServiceClient, applications: List[Any]
72
+ ) -> List[Any]:
73
+ """
74
+ Gets app role assignments efficiently by querying each application's service principal.
75
+
76
+ :param client: GraphServiceClient
77
+ :param applications: List of Application objects (from get_entra_applications)
78
+ :return: List of raw app role assignment objects from Microsoft Graph
79
+ """
80
+ assignments = []
81
+
82
+ for app in applications:
83
+ if not app.app_id:
84
+ logger.warning(f"Application {app.id} has no app_id, skipping")
85
+ continue
86
+
87
+ try:
88
+ # First, get the service principal for this application
89
+ # The service principal represents the app in the directory
90
+ service_principals_page = await client.service_principals.get(
91
+ request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
92
+ query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
93
+ filter=f"appId eq '{app.app_id}'"
94
+ )
95
+ )
96
+ )
97
+
98
+ if not service_principals_page or not service_principals_page.value:
99
+ logger.debug(
100
+ f"No service principal found for application {app.app_id} ({app.display_name})"
101
+ )
102
+ continue
103
+
104
+ service_principal = service_principals_page.value[0]
105
+
106
+ # Ensure service principal has an ID
107
+ if not service_principal.id:
108
+ logger.warning(
109
+ f"Service principal for application {app.app_id} ({app.display_name}) has no ID, skipping"
110
+ )
111
+ continue
112
+
113
+ # Get all assignments for this service principal (users, groups, service principals)
114
+ assignments_page = await client.service_principals.by_service_principal_id(
115
+ service_principal.id
116
+ ).app_role_assigned_to.get()
117
+
118
+ app_assignments = []
119
+ while assignments_page:
120
+ if assignments_page.value:
121
+ # Add application context to each assignment
122
+ for assignment in assignments_page.value:
123
+ # Add the application app_id to the assignment for relationship matching
124
+ assignment.application_app_id = app.app_id
125
+ app_assignments.extend(assignments_page.value)
126
+
127
+ if not assignments_page.odata_next_link:
128
+ break
129
+ assignments_page = await client.service_principals.with_url(
130
+ assignments_page.odata_next_link
131
+ ).get()
132
+
133
+ # Log warning if a single application has many assignments (potential pagination issues)
134
+ if len(app_assignments) >= HIGH_ASSIGNMENT_COUNT_THRESHOLD:
135
+ logger.warning(
136
+ f"Application {app.display_name} ({app.app_id}) has {len(app_assignments)} role assignments. "
137
+ f"If this seems unexpectedly high, there may be pagination limits affecting data completeness."
138
+ )
139
+
140
+ assignments.extend(app_assignments)
141
+ logger.debug(
142
+ f"Retrieved {len(app_assignments)} assignments for application {app.display_name}"
143
+ )
144
+
145
+ except APIError as e:
146
+ # Handle Microsoft Graph API errors (403 Forbidden, 404 Not Found, etc.)
147
+ if e.response_status_code == 403:
148
+ logger.warning(
149
+ f"Access denied when fetching app role assignments for application {app.app_id} ({app.display_name}). "
150
+ f"This application may not have sufficient permissions or may not exist."
151
+ )
152
+ elif e.response_status_code == 404:
153
+ logger.warning(
154
+ f"Application {app.app_id} ({app.display_name}) not found when fetching app role assignments. "
155
+ f"Application may have been deleted or does not exist."
156
+ )
157
+ elif e.response_status_code == 429:
158
+ logger.warning(
159
+ f"Rate limit hit when fetching app role assignments for application {app.app_id} ({app.display_name}). "
160
+ f"Consider reducing APPLICATIONS_PAGE_SIZE or implementing retry logic."
161
+ )
162
+ else:
163
+ logger.warning(
164
+ f"Microsoft Graph API error when fetching app role assignments for application {app.app_id} ({app.display_name}): "
165
+ f"Status {e.response_status_code}, Error: {str(e)}"
166
+ )
167
+ continue
168
+ except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError) as e:
169
+ # Handle network-related errors
170
+ logger.warning(
171
+ f"Network error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}"
172
+ )
173
+ continue
174
+ except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
176
+ logger.error(
177
+ f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
178
+ exc_info=True,
179
+ )
180
+ continue
181
+
182
+ logger.info(f"Retrieved {len(assignments)} app role assignments total")
183
+ return assignments
184
+
185
+
186
+ def transform_applications(applications: List[Any]) -> List[Dict[str, Any]]:
187
+ """
188
+ Transform application data for graph loading.
189
+
190
+ :param applications: Raw Application objects from Microsoft Graph API
191
+ :return: Transformed application data for graph loading
192
+ """
193
+ result = []
194
+ for app in applications:
195
+ transformed = {
196
+ "id": app.id,
197
+ "app_id": app.app_id,
198
+ "display_name": app.display_name,
199
+ "publisher_domain": getattr(app, "publisher_domain", None),
200
+ "sign_in_audience": app.sign_in_audience,
201
+ }
202
+ result.append(transformed)
203
+ return result
204
+
205
+
206
+ def transform_app_role_assignments(
207
+ assignments: List[Any],
208
+ ) -> List[Dict[str, Any]]:
209
+ """
210
+ Transform app role assignment data for graph loading.
211
+
212
+ :param assignments: Raw app role assignment objects from Microsoft Graph API
213
+ :return: Transformed assignment data for graph loading
214
+ """
215
+ result = []
216
+ for assignment in assignments:
217
+ transformed = {
218
+ "id": assignment.id,
219
+ "app_role_id": (
220
+ str(assignment.app_role_id) if assignment.app_role_id else None
221
+ ),
222
+ "created_date_time": assignment.created_date_time,
223
+ "principal_id": (
224
+ str(assignment.principal_id) if assignment.principal_id else None
225
+ ),
226
+ "principal_display_name": assignment.principal_display_name,
227
+ "principal_type": assignment.principal_type,
228
+ "resource_display_name": assignment.resource_display_name,
229
+ "resource_id": (
230
+ str(assignment.resource_id) if assignment.resource_id else None
231
+ ),
232
+ "application_app_id": getattr(assignment, "application_app_id", None),
233
+ }
234
+ result.append(transformed)
235
+ return result
236
+
237
+
238
+ @timeit
239
+ def load_applications(
240
+ neo4j_session: neo4j.Session,
241
+ applications_data: List[Dict[str, Any]],
242
+ update_tag: int,
243
+ tenant_id: str,
244
+ ) -> None:
245
+ """
246
+ Load Entra applications to the graph.
247
+
248
+ :param neo4j_session: Neo4j session
249
+ :param applications_data: Application data to load
250
+ :param update_tag: Update tag for tracking data freshness
251
+ :param tenant_id: Entra tenant ID
252
+ """
253
+ load(
254
+ neo4j_session,
255
+ EntraApplicationSchema(),
256
+ applications_data,
257
+ lastupdated=update_tag,
258
+ TENANT_ID=tenant_id,
259
+ )
260
+
261
+
262
+ @timeit
263
+ def load_app_role_assignments(
264
+ neo4j_session: neo4j.Session,
265
+ assignments_data: List[Dict[str, Any]],
266
+ update_tag: int,
267
+ tenant_id: str,
268
+ ) -> None:
269
+ """
270
+ Load Entra app role assignments to the graph.
271
+
272
+ :param neo4j_session: Neo4j session
273
+ :param assignments_data: Assignment data to load
274
+ :param update_tag: Update tag for tracking data freshness
275
+ :param tenant_id: Entra tenant ID
276
+ """
277
+ load(
278
+ neo4j_session,
279
+ EntraAppRoleAssignmentSchema(),
280
+ assignments_data,
281
+ lastupdated=update_tag,
282
+ TENANT_ID=tenant_id,
283
+ )
284
+
285
+
286
+ @timeit
287
+ def cleanup_applications(
288
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
289
+ ) -> None:
290
+ """
291
+ Delete Entra applications and their relationships from the graph if they were not updated in the last sync.
292
+
293
+ :param neo4j_session: Neo4j session
294
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
295
+ """
296
+ GraphJob.from_node_schema(EntraApplicationSchema(), common_job_parameters).run(
297
+ neo4j_session
298
+ )
299
+
300
+
301
+ @timeit
302
+ def cleanup_app_role_assignments(
303
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
304
+ ) -> None:
305
+ """
306
+ Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
307
+
308
+ :param neo4j_session: Neo4j session
309
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
310
+ """
311
+ GraphJob.from_node_schema(
312
+ EntraAppRoleAssignmentSchema(), common_job_parameters
313
+ ).run(neo4j_session)
314
+
315
+
316
+ @timeit
317
+ async def sync_entra_applications(
318
+ neo4j_session: neo4j.Session,
319
+ tenant_id: str,
320
+ client_id: str,
321
+ client_secret: str,
322
+ update_tag: int,
323
+ common_job_parameters: Dict[str, Any],
324
+ ) -> None:
325
+ """
326
+ Sync Entra applications and their app role assignments to the graph.
327
+
328
+ :param neo4j_session: Neo4j session
329
+ :param tenant_id: Entra tenant ID
330
+ :param client_id: Azure application client ID
331
+ :param client_secret: Azure application client secret
332
+ :param update_tag: Update tag for tracking data freshness
333
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
334
+ """
335
+ # Create credentials and client
336
+ credential = ClientSecretCredential(
337
+ tenant_id=tenant_id,
338
+ client_id=client_id,
339
+ client_secret=client_secret,
340
+ )
341
+
342
+ client = GraphServiceClient(
343
+ credential,
344
+ scopes=["https://graph.microsoft.com/.default"],
345
+ )
346
+
347
+ # Load tenant (prerequisite)
348
+ load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
349
+
350
+ # Get and transform applications data
351
+ applications_data = await get_entra_applications(client)
352
+ transformed_applications = transform_applications(applications_data)
353
+
354
+ # Get and transform app role assignments data
355
+ assignments_data = await get_app_role_assignments(client, applications_data)
356
+ transformed_assignments = transform_app_role_assignments(assignments_data)
357
+
358
+ # Load applications and assignments
359
+ load_applications(neo4j_session, transformed_applications, update_tag, tenant_id)
360
+ load_app_role_assignments(
361
+ neo4j_session, transformed_assignments, update_tag, tenant_id
362
+ )
363
+
364
+ # Cleanup stale data
365
+ cleanup_applications(neo4j_session, common_job_parameters)
366
+ cleanup_app_role_assignments(neo4j_session, common_job_parameters)
@@ -0,0 +1,151 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ from azure.identity import ClientSecretCredential
6
+ from msgraph import GraphServiceClient
7
+ from msgraph.generated.models.directory_object import DirectoryObject
8
+ from msgraph.generated.models.group import Group
9
+
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.intel.entra.users import load_tenant
13
+ from cartography.models.entra.group import EntraGroupSchema
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ async def get_entra_groups(client: GraphServiceClient) -> list[Group]:
21
+ """Get all groups from Microsoft Graph API with pagination."""
22
+ all_groups: list[Group] = []
23
+
24
+ request_configuration = client.groups.GroupsRequestBuilderGetRequestConfiguration(
25
+ query_parameters=client.groups.GroupsRequestBuilderGetQueryParameters(top=999)
26
+ )
27
+ page = await client.groups.get(request_configuration=request_configuration)
28
+ while page:
29
+ if page.value:
30
+ all_groups.extend(page.value)
31
+ if not page.odata_next_link:
32
+ break
33
+ page = await client.groups.with_url(page.odata_next_link).get()
34
+
35
+ return all_groups
36
+
37
+
38
+ @timeit
39
+ async def get_group_members(
40
+ client: GraphServiceClient, group_id: str
41
+ ) -> tuple[list[str], list[str]]:
42
+ """Get member user IDs and subgroup IDs for a given group."""
43
+ user_ids: list[str] = []
44
+ group_ids: list[str] = []
45
+ request_builder = client.groups.by_group_id(group_id).members
46
+ page = await request_builder.get()
47
+ while page:
48
+ if page.value:
49
+ for obj in page.value:
50
+ if isinstance(obj, DirectoryObject):
51
+ odata_type = getattr(obj, "odata_type", "")
52
+ if odata_type == "#microsoft.graph.user":
53
+ user_ids.append(obj.id)
54
+ elif odata_type == "#microsoft.graph.group":
55
+ group_ids.append(obj.id)
56
+ if not page.odata_next_link:
57
+ break
58
+ page = await request_builder.with_url(page.odata_next_link).get()
59
+ return user_ids, group_ids
60
+
61
+
62
+ def transform_groups(
63
+ groups: list[Group],
64
+ user_member_map: dict[str, list[str]],
65
+ group_member_map: dict[str, list[str]],
66
+ ) -> list[dict[str, Any]]:
67
+ """Transform API responses into dictionaries for ingestion."""
68
+ result: list[dict[str, Any]] = []
69
+ for g in groups:
70
+ transformed = {
71
+ "id": g.id,
72
+ "display_name": g.display_name,
73
+ "description": g.description,
74
+ "mail": g.mail,
75
+ "mail_nickname": g.mail_nickname,
76
+ "mail_enabled": g.mail_enabled,
77
+ "security_enabled": g.security_enabled,
78
+ "group_types": g.group_types,
79
+ "visibility": g.visibility,
80
+ "is_assignable_to_role": g.is_assignable_to_role,
81
+ "created_date_time": g.created_date_time,
82
+ "deleted_date_time": g.deleted_date_time,
83
+ "member_ids": user_member_map.get(g.id, []),
84
+ "member_group_ids": group_member_map.get(g.id, []),
85
+ }
86
+ result.append(transformed)
87
+ return result
88
+
89
+
90
+ @timeit
91
+ def load_groups(
92
+ neo4j_session: neo4j.Session,
93
+ groups: list[dict[str, Any]],
94
+ update_tag: int,
95
+ tenant_id: str,
96
+ ) -> None:
97
+ logger.info(f"Loading {len(groups)} Entra groups")
98
+ load(
99
+ neo4j_session,
100
+ EntraGroupSchema(),
101
+ groups,
102
+ lastupdated=update_tag,
103
+ TENANT_ID=tenant_id,
104
+ )
105
+
106
+
107
+ @timeit
108
+ def cleanup_groups(
109
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
110
+ ) -> None:
111
+ GraphJob.from_node_schema(EntraGroupSchema(), common_job_parameters).run(
112
+ neo4j_session
113
+ )
114
+
115
+
116
+ @timeit
117
+ async def sync_entra_groups(
118
+ neo4j_session: neo4j.Session,
119
+ tenant_id: str,
120
+ client_id: str,
121
+ client_secret: str,
122
+ update_tag: int,
123
+ common_job_parameters: dict[str, Any],
124
+ ) -> None:
125
+ """Sync Entra groups."""
126
+ credential = ClientSecretCredential(
127
+ tenant_id=tenant_id, client_id=client_id, client_secret=client_secret
128
+ )
129
+ client = GraphServiceClient(
130
+ credential, scopes=["https://graph.microsoft.com/.default"]
131
+ )
132
+
133
+ groups = await get_entra_groups(client)
134
+
135
+ user_member_map: dict[str, list[str]] = {}
136
+ group_member_map: dict[str, list[str]] = {}
137
+ for group in groups:
138
+ try:
139
+ users, subgroups = await get_group_members(client, group.id)
140
+ user_member_map[group.id] = users
141
+ group_member_map[group.id] = subgroups
142
+ except Exception as e:
143
+ logger.error(f"Failed to fetch members for group {group.id}: {e}")
144
+ user_member_map[group.id] = []
145
+ group_member_map[group.id] = []
146
+
147
+ transformed_groups = transform_groups(groups, user_member_map, group_member_map)
148
+
149
+ load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
150
+ load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
151
+ cleanup_groups(neo4j_session, common_job_parameters)
@@ -22,12 +22,28 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
22
22
  Get all OUs from Microsoft Graph API with pagination support
23
23
  """
24
24
  all_units: list[AdministrativeUnit] = []
25
- request = client.directory.administrative_units.request()
26
25
 
27
- while request:
28
- response = await request.get()
29
- all_units.extend(response.value)
30
- request = response.odata_next_link if response.odata_next_link else None
26
+ # Initialize first page request
27
+ current_request = client.directory.administrative_units
28
+
29
+ while current_request:
30
+ try:
31
+ response = await current_request.get()
32
+ if response and response.value:
33
+ all_units.extend(response.value)
34
+
35
+ # Handle next page using OData link
36
+ if response.odata_next_link:
37
+ current_request = client.directory.administrative_units.with_url(
38
+ response.odata_next_link
39
+ )
40
+ else:
41
+ current_request = None
42
+ else:
43
+ current_request = None
44
+ except Exception as e:
45
+ logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
+ current_request = None
31
47
 
32
48
  return all_units
33
49