cartography 0.105.0__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (108) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +78 -2
  3. cartography/client/core/tx.py +62 -0
  4. cartography/config.py +24 -0
  5. cartography/data/indexes.cypher +0 -34
  6. cartography/driftdetect/cli.py +3 -2
  7. cartography/graph/cleanupbuilder.py +47 -0
  8. cartography/graph/job.py +42 -0
  9. cartography/graph/querybuilder.py +136 -2
  10. cartography/graph/statement.py +1 -1
  11. cartography/intel/airbyte/__init__.py +105 -0
  12. cartography/intel/airbyte/connections.py +120 -0
  13. cartography/intel/airbyte/destinations.py +81 -0
  14. cartography/intel/airbyte/organizations.py +59 -0
  15. cartography/intel/airbyte/sources.py +78 -0
  16. cartography/intel/airbyte/tags.py +64 -0
  17. cartography/intel/airbyte/users.py +106 -0
  18. cartography/intel/airbyte/util.py +122 -0
  19. cartography/intel/airbyte/workspaces.py +63 -0
  20. cartography/intel/aws/codebuild.py +132 -0
  21. cartography/intel/aws/ecs.py +228 -380
  22. cartography/intel/aws/efs.py +261 -0
  23. cartography/intel/aws/identitycenter.py +14 -3
  24. cartography/intel/aws/inspector.py +96 -53
  25. cartography/intel/aws/rds.py +2 -1
  26. cartography/intel/aws/resources.py +4 -0
  27. cartography/intel/entra/__init__.py +11 -0
  28. cartography/intel/entra/applications.py +366 -0
  29. cartography/intel/entra/users.py +84 -42
  30. cartography/intel/kubernetes/__init__.py +30 -14
  31. cartography/intel/kubernetes/clusters.py +86 -0
  32. cartography/intel/kubernetes/namespaces.py +59 -57
  33. cartography/intel/kubernetes/pods.py +140 -77
  34. cartography/intel/kubernetes/secrets.py +95 -45
  35. cartography/intel/kubernetes/services.py +131 -67
  36. cartography/intel/kubernetes/util.py +125 -14
  37. cartography/intel/scaleway/__init__.py +127 -0
  38. cartography/intel/scaleway/iam/__init__.py +0 -0
  39. cartography/intel/scaleway/iam/apikeys.py +71 -0
  40. cartography/intel/scaleway/iam/applications.py +71 -0
  41. cartography/intel/scaleway/iam/groups.py +71 -0
  42. cartography/intel/scaleway/iam/users.py +71 -0
  43. cartography/intel/scaleway/instances/__init__.py +0 -0
  44. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  45. cartography/intel/scaleway/instances/instances.py +92 -0
  46. cartography/intel/scaleway/projects.py +79 -0
  47. cartography/intel/scaleway/storage/__init__.py +0 -0
  48. cartography/intel/scaleway/storage/snapshots.py +86 -0
  49. cartography/intel/scaleway/storage/volumes.py +84 -0
  50. cartography/intel/scaleway/utils.py +37 -0
  51. cartography/models/airbyte/__init__.py +0 -0
  52. cartography/models/airbyte/connection.py +138 -0
  53. cartography/models/airbyte/destination.py +75 -0
  54. cartography/models/airbyte/organization.py +19 -0
  55. cartography/models/airbyte/source.py +75 -0
  56. cartography/models/airbyte/stream.py +74 -0
  57. cartography/models/airbyte/tag.py +69 -0
  58. cartography/models/airbyte/user.py +111 -0
  59. cartography/models/airbyte/workspace.py +46 -0
  60. cartography/models/aws/codebuild/__init__.py +0 -0
  61. cartography/models/aws/codebuild/project.py +49 -0
  62. cartography/models/aws/ecs/__init__.py +0 -0
  63. cartography/models/aws/ecs/clusters.py +64 -0
  64. cartography/models/aws/ecs/container_definitions.py +93 -0
  65. cartography/models/aws/ecs/container_instances.py +84 -0
  66. cartography/models/aws/ecs/containers.py +99 -0
  67. cartography/models/aws/ecs/services.py +117 -0
  68. cartography/models/aws/ecs/task_definitions.py +135 -0
  69. cartography/models/aws/ecs/tasks.py +110 -0
  70. cartography/models/aws/efs/__init__.py +0 -0
  71. cartography/models/aws/efs/access_point.py +77 -0
  72. cartography/models/aws/efs/file_system.py +60 -0
  73. cartography/models/aws/efs/mount_target.py +79 -0
  74. cartography/models/core/common.py +1 -0
  75. cartography/models/core/relationships.py +44 -0
  76. cartography/models/entra/app_role_assignment.py +115 -0
  77. cartography/models/entra/application.py +47 -0
  78. cartography/models/entra/user.py +17 -51
  79. cartography/models/kubernetes/__init__.py +0 -0
  80. cartography/models/kubernetes/clusters.py +26 -0
  81. cartography/models/kubernetes/containers.py +108 -0
  82. cartography/models/kubernetes/namespaces.py +51 -0
  83. cartography/models/kubernetes/pods.py +80 -0
  84. cartography/models/kubernetes/secrets.py +79 -0
  85. cartography/models/kubernetes/services.py +108 -0
  86. cartography/models/scaleway/__init__.py +0 -0
  87. cartography/models/scaleway/iam/__init__.py +0 -0
  88. cartography/models/scaleway/iam/apikey.py +96 -0
  89. cartography/models/scaleway/iam/application.py +52 -0
  90. cartography/models/scaleway/iam/group.py +95 -0
  91. cartography/models/scaleway/iam/user.py +60 -0
  92. cartography/models/scaleway/instance/__init__.py +0 -0
  93. cartography/models/scaleway/instance/flexibleip.py +52 -0
  94. cartography/models/scaleway/instance/instance.py +118 -0
  95. cartography/models/scaleway/organization.py +19 -0
  96. cartography/models/scaleway/project.py +48 -0
  97. cartography/models/scaleway/storage/__init__.py +0 -0
  98. cartography/models/scaleway/storage/snapshot.py +78 -0
  99. cartography/models/scaleway/storage/volume.py +51 -0
  100. cartography/sync.py +8 -4
  101. cartography/util.py +15 -10
  102. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/METADATA +5 -2
  103. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/RECORD +107 -35
  104. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  105. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  106. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  107. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  108. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,366 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import httpx
7
+ import neo4j
8
+ from azure.identity import ClientSecretCredential
9
+ from kiota_abstractions.api_error import APIError
10
+ from msgraph.graph_service_client import GraphServiceClient
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.intel.entra.users import load_tenant
15
+ from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
16
+ from cartography.models.entra.application import EntraApplicationSchema
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Configurable constants for API pagination
22
+ # Microsoft Graph API recommends page sizes up to 999 for most resources
23
+ # Set to 999 by default, but can be adjusted if needed
24
+ #
25
+ # Adjust these values if:
26
+ # - You have performance issues (decrease values)
27
+ # - You want to minimize API calls (increase values up to 999)
28
+ # - You're hitting rate limits (decrease values)
29
+ APPLICATIONS_PAGE_SIZE = 999
30
+ APP_ROLE_ASSIGNMENTS_PAGE_SIZE = (
31
+ 999 # Currently not used, but reserved for future pagination improvements
32
+ )
33
+
34
+ # Warning thresholds for potential data completeness issues
35
+ # Log warnings when individual users/groups have more assignments than this threshold
36
+ HIGH_ASSIGNMENT_COUNT_THRESHOLD = 100
37
+
38
+
39
+ @timeit
40
+ async def get_entra_applications(client: GraphServiceClient) -> List[Any]:
41
+ """
42
+ Gets Entra applications using the Microsoft Graph API.
43
+
44
+ :param client: GraphServiceClient
45
+ :return: List of raw Application objects from Microsoft Graph
46
+ """
47
+ applications = []
48
+
49
+ # Get all applications with pagination
50
+ request_configuration = client.applications.ApplicationsRequestBuilderGetRequestConfiguration(
51
+ query_parameters=client.applications.ApplicationsRequestBuilderGetQueryParameters(
52
+ top=APPLICATIONS_PAGE_SIZE
53
+ )
54
+ )
55
+ page = await client.applications.get(request_configuration=request_configuration)
56
+
57
+ while page:
58
+ if page.value:
59
+ applications.extend(page.value)
60
+
61
+ if not page.odata_next_link:
62
+ break
63
+ page = await client.applications.with_url(page.odata_next_link).get()
64
+
65
+ logger.info(f"Retrieved {len(applications)} Entra applications total")
66
+ return applications
67
+
68
+
69
+ @timeit
70
+ async def get_app_role_assignments(
71
+ client: GraphServiceClient, applications: List[Any]
72
+ ) -> List[Any]:
73
+ """
74
+ Gets app role assignments efficiently by querying each application's service principal.
75
+
76
+ :param client: GraphServiceClient
77
+ :param applications: List of Application objects (from get_entra_applications)
78
+ :return: List of raw app role assignment objects from Microsoft Graph
79
+ """
80
+ assignments = []
81
+
82
+ for app in applications:
83
+ if not app.app_id:
84
+ logger.warning(f"Application {app.id} has no app_id, skipping")
85
+ continue
86
+
87
+ try:
88
+ # First, get the service principal for this application
89
+ # The service principal represents the app in the directory
90
+ service_principals_page = await client.service_principals.get(
91
+ request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
92
+ query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
93
+ filter=f"appId eq '{app.app_id}'"
94
+ )
95
+ )
96
+ )
97
+
98
+ if not service_principals_page or not service_principals_page.value:
99
+ logger.debug(
100
+ f"No service principal found for application {app.app_id} ({app.display_name})"
101
+ )
102
+ continue
103
+
104
+ service_principal = service_principals_page.value[0]
105
+
106
+ # Ensure service principal has an ID
107
+ if not service_principal.id:
108
+ logger.warning(
109
+ f"Service principal for application {app.app_id} ({app.display_name}) has no ID, skipping"
110
+ )
111
+ continue
112
+
113
+ # Get all assignments for this service principal (users, groups, service principals)
114
+ assignments_page = await client.service_principals.by_service_principal_id(
115
+ service_principal.id
116
+ ).app_role_assigned_to.get()
117
+
118
+ app_assignments = []
119
+ while assignments_page:
120
+ if assignments_page.value:
121
+ # Add application context to each assignment
122
+ for assignment in assignments_page.value:
123
+ # Add the application app_id to the assignment for relationship matching
124
+ assignment.application_app_id = app.app_id
125
+ app_assignments.extend(assignments_page.value)
126
+
127
+ if not assignments_page.odata_next_link:
128
+ break
129
+ assignments_page = await client.service_principals.with_url(
130
+ assignments_page.odata_next_link
131
+ ).get()
132
+
133
+ # Log warning if a single application has many assignments (potential pagination issues)
134
+ if len(app_assignments) >= HIGH_ASSIGNMENT_COUNT_THRESHOLD:
135
+ logger.warning(
136
+ f"Application {app.display_name} ({app.app_id}) has {len(app_assignments)} role assignments. "
137
+ f"If this seems unexpectedly high, there may be pagination limits affecting data completeness."
138
+ )
139
+
140
+ assignments.extend(app_assignments)
141
+ logger.debug(
142
+ f"Retrieved {len(app_assignments)} assignments for application {app.display_name}"
143
+ )
144
+
145
+ except APIError as e:
146
+ # Handle Microsoft Graph API errors (403 Forbidden, 404 Not Found, etc.)
147
+ if e.response_status_code == 403:
148
+ logger.warning(
149
+ f"Access denied when fetching app role assignments for application {app.app_id} ({app.display_name}). "
150
+ f"This application may not have sufficient permissions or may not exist."
151
+ )
152
+ elif e.response_status_code == 404:
153
+ logger.warning(
154
+ f"Application {app.app_id} ({app.display_name}) not found when fetching app role assignments. "
155
+ f"Application may have been deleted or does not exist."
156
+ )
157
+ elif e.response_status_code == 429:
158
+ logger.warning(
159
+ f"Rate limit hit when fetching app role assignments for application {app.app_id} ({app.display_name}). "
160
+ f"Consider reducing APPLICATIONS_PAGE_SIZE or implementing retry logic."
161
+ )
162
+ else:
163
+ logger.warning(
164
+ f"Microsoft Graph API error when fetching app role assignments for application {app.app_id} ({app.display_name}): "
165
+ f"Status {e.response_status_code}, Error: {str(e)}"
166
+ )
167
+ continue
168
+ except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError) as e:
169
+ # Handle network-related errors
170
+ logger.warning(
171
+ f"Network error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}"
172
+ )
173
+ continue
174
+ except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
176
+ logger.error(
177
+ f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
178
+ exc_info=True,
179
+ )
180
+ continue
181
+
182
+ logger.info(f"Retrieved {len(assignments)} app role assignments total")
183
+ return assignments
184
+
185
+
186
+ def transform_applications(applications: List[Any]) -> List[Dict[str, Any]]:
187
+ """
188
+ Transform application data for graph loading.
189
+
190
+ :param applications: Raw Application objects from Microsoft Graph API
191
+ :return: Transformed application data for graph loading
192
+ """
193
+ result = []
194
+ for app in applications:
195
+ transformed = {
196
+ "id": app.id,
197
+ "app_id": app.app_id,
198
+ "display_name": app.display_name,
199
+ "publisher_domain": getattr(app, "publisher_domain", None),
200
+ "sign_in_audience": app.sign_in_audience,
201
+ }
202
+ result.append(transformed)
203
+ return result
204
+
205
+
206
+ def transform_app_role_assignments(
207
+ assignments: List[Any],
208
+ ) -> List[Dict[str, Any]]:
209
+ """
210
+ Transform app role assignment data for graph loading.
211
+
212
+ :param assignments: Raw app role assignment objects from Microsoft Graph API
213
+ :return: Transformed assignment data for graph loading
214
+ """
215
+ result = []
216
+ for assignment in assignments:
217
+ transformed = {
218
+ "id": assignment.id,
219
+ "app_role_id": (
220
+ str(assignment.app_role_id) if assignment.app_role_id else None
221
+ ),
222
+ "created_date_time": assignment.created_date_time,
223
+ "principal_id": (
224
+ str(assignment.principal_id) if assignment.principal_id else None
225
+ ),
226
+ "principal_display_name": assignment.principal_display_name,
227
+ "principal_type": assignment.principal_type,
228
+ "resource_display_name": assignment.resource_display_name,
229
+ "resource_id": (
230
+ str(assignment.resource_id) if assignment.resource_id else None
231
+ ),
232
+ "application_app_id": getattr(assignment, "application_app_id", None),
233
+ }
234
+ result.append(transformed)
235
+ return result
236
+
237
+
238
+ @timeit
239
+ def load_applications(
240
+ neo4j_session: neo4j.Session,
241
+ applications_data: List[Dict[str, Any]],
242
+ update_tag: int,
243
+ tenant_id: str,
244
+ ) -> None:
245
+ """
246
+ Load Entra applications to the graph.
247
+
248
+ :param neo4j_session: Neo4j session
249
+ :param applications_data: Application data to load
250
+ :param update_tag: Update tag for tracking data freshness
251
+ :param tenant_id: Entra tenant ID
252
+ """
253
+ load(
254
+ neo4j_session,
255
+ EntraApplicationSchema(),
256
+ applications_data,
257
+ lastupdated=update_tag,
258
+ TENANT_ID=tenant_id,
259
+ )
260
+
261
+
262
+ @timeit
263
+ def load_app_role_assignments(
264
+ neo4j_session: neo4j.Session,
265
+ assignments_data: List[Dict[str, Any]],
266
+ update_tag: int,
267
+ tenant_id: str,
268
+ ) -> None:
269
+ """
270
+ Load Entra app role assignments to the graph.
271
+
272
+ :param neo4j_session: Neo4j session
273
+ :param assignments_data: Assignment data to load
274
+ :param update_tag: Update tag for tracking data freshness
275
+ :param tenant_id: Entra tenant ID
276
+ """
277
+ load(
278
+ neo4j_session,
279
+ EntraAppRoleAssignmentSchema(),
280
+ assignments_data,
281
+ lastupdated=update_tag,
282
+ TENANT_ID=tenant_id,
283
+ )
284
+
285
+
286
+ @timeit
287
+ def cleanup_applications(
288
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
289
+ ) -> None:
290
+ """
291
+ Delete Entra applications and their relationships from the graph if they were not updated in the last sync.
292
+
293
+ :param neo4j_session: Neo4j session
294
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
295
+ """
296
+ GraphJob.from_node_schema(EntraApplicationSchema(), common_job_parameters).run(
297
+ neo4j_session
298
+ )
299
+
300
+
301
+ @timeit
302
+ def cleanup_app_role_assignments(
303
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
304
+ ) -> None:
305
+ """
306
+ Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
307
+
308
+ :param neo4j_session: Neo4j session
309
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
310
+ """
311
+ GraphJob.from_node_schema(
312
+ EntraAppRoleAssignmentSchema(), common_job_parameters
313
+ ).run(neo4j_session)
314
+
315
+
316
+ @timeit
317
+ async def sync_entra_applications(
318
+ neo4j_session: neo4j.Session,
319
+ tenant_id: str,
320
+ client_id: str,
321
+ client_secret: str,
322
+ update_tag: int,
323
+ common_job_parameters: Dict[str, Any],
324
+ ) -> None:
325
+ """
326
+ Sync Entra applications and their app role assignments to the graph.
327
+
328
+ :param neo4j_session: Neo4j session
329
+ :param tenant_id: Entra tenant ID
330
+ :param client_id: Azure application client ID
331
+ :param client_secret: Azure application client secret
332
+ :param update_tag: Update tag for tracking data freshness
333
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
334
+ """
335
+ # Create credentials and client
336
+ credential = ClientSecretCredential(
337
+ tenant_id=tenant_id,
338
+ client_id=client_id,
339
+ client_secret=client_secret,
340
+ )
341
+
342
+ client = GraphServiceClient(
343
+ credential,
344
+ scopes=["https://graph.microsoft.com/.default"],
345
+ )
346
+
347
+ # Load tenant (prerequisite)
348
+ load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
349
+
350
+ # Get and transform applications data
351
+ applications_data = await get_entra_applications(client)
352
+ transformed_applications = transform_applications(applications_data)
353
+
354
+ # Get and transform app role assignments data
355
+ assignments_data = await get_app_role_assignments(client, applications_data)
356
+ transformed_assignments = transform_app_role_assignments(assignments_data)
357
+
358
+ # Load applications and assignments
359
+ load_applications(neo4j_session, transformed_applications, update_tag, tenant_id)
360
+ load_app_role_assignments(
361
+ neo4j_session, transformed_assignments, update_tag, tenant_id
362
+ )
363
+
364
+ # Cleanup stale data
365
+ cleanup_applications(neo4j_session, common_job_parameters)
366
+ cleanup_app_role_assignments(neo4j_session, common_job_parameters)
@@ -15,6 +15,51 @@ from cartography.util import timeit
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
+ # NOTE:
19
+ # Microsoft Graph imposes limits on the length of the $select clause as well as
20
+ # the number of properties that can be selected in a single request. In
21
+ # practice we have seen 400 Bad Request responses that bubble up as
22
+ # `Microsoft.SharePoint.Client.InvalidClientQueryException` once that limit is
23
+ # breached (Graph internally rewrites the next-link using a SharePoint style
24
+ # `id in (…)` filter which is then rejected).
25
+ #
26
+ # To avoid tripping this bug we only request a *core* subset of user attributes
27
+ # that are most commonly used in downstream analysis. The transform() function
28
+ # tolerates missing attributes (the generated MS Graph SDK simply returns
29
+ # `None` for properties that are not present in the payload), so fetching fewer
30
+ # fields is safe – we merely get more `null` values in the graph.
31
+ #
32
+ # If you need additional attributes in the future, append them here but keep the
33
+ # total character count of the comma-separated list comfortably below 500 and
34
+ # stay within the official v1.0 contract (beta-only fields cause similar
35
+ # failures). 20–25 fields is a good rule-of-thumb.
36
+ #
37
+ # References:
38
+ # • https://learn.microsoft.com/graph/query-parameters#select-parameter
39
+ # • https://learn.microsoft.com/graph/api/user-list?view=graph-rest-1.0
40
+ #
41
+ USER_SELECT_FIELDS = [
42
+ "id",
43
+ "userPrincipalName",
44
+ "displayName",
45
+ "givenName",
46
+ "surname",
47
+ "mail",
48
+ "mobilePhone",
49
+ "businessPhones",
50
+ "jobTitle",
51
+ "department",
52
+ "officeLocation",
53
+ "city",
54
+ "country",
55
+ "companyName",
56
+ "preferredLanguage",
57
+ "employeeId",
58
+ "employeeType",
59
+ "accountEnabled",
60
+ "ageGroup",
61
+ ]
62
+
18
63
 
19
64
  @timeit
20
65
  async def get_tenant(client: GraphServiceClient) -> Organization:
@@ -27,14 +72,20 @@ async def get_tenant(client: GraphServiceClient) -> Organization:
27
72
 
28
73
  @timeit
29
74
  async def get_users(client: GraphServiceClient) -> list[User]:
75
+ """Fetch all users with their manager reference in as few requests as possible.
76
+
77
+ We leverage `$expand=manager($select=id)` so the manager's *id* is hydrated
78
+ alongside every user record. This avoids making a second round-trip per
79
+ user – vastly reducing latency and eliminating the noisy 404s that occur
80
+ when a user has no manager assigned.
30
81
  """
31
- Get all users from Microsoft Graph API with pagination support
32
- """
82
+
33
83
  all_users: list[User] = []
34
84
  request_configuration = client.users.UsersRequestBuilderGetRequestConfiguration(
35
85
  query_parameters=client.users.UsersRequestBuilderGetQueryParameters(
36
- # Request more items per page to reduce number of API calls
37
86
  top=999,
87
+ select=USER_SELECT_FIELDS,
88
+ expand=["manager($select=id)"],
38
89
  ),
39
90
  )
40
91
 
@@ -43,18 +94,32 @@ async def get_users(client: GraphServiceClient) -> list[User]:
43
94
  all_users.extend(page.value)
44
95
  if not page.odata_next_link:
45
96
  break
46
- page = await client.users.with_url(page.odata_next_link).get()
97
+
98
+ try:
99
+ page = await client.users.with_url(page.odata_next_link).get()
100
+ except Exception as e:
101
+ logger.error(
102
+ "Failed to fetch next page of Entra ID users – stopping pagination early: %s",
103
+ e,
104
+ )
105
+ break
47
106
 
48
107
  return all_users
49
108
 
50
109
 
51
110
  @timeit
111
+ # The manager reference is now embedded in the user objects courtesy of the
112
+ # `$expand` we added above, so we no longer need a separate `manager_map`.
52
113
  def transform_users(users: list[User]) -> list[dict[str, Any]]:
53
- """
54
- Transform the API response into the format expected by our schema
55
- """
114
+ """Convert MS Graph SDK `User` models into dicts matching our schema."""
115
+
56
116
  result: list[dict[str, Any]] = []
57
117
  for user in users:
118
+ manager_id: str | None = None
119
+ if getattr(user, "manager", None) is not None:
120
+ # The SDK materialises `manager` as a DirectoryObject (or subclass)
121
+ manager_id = getattr(user.manager, "id", None)
122
+
58
123
  transformed_user = {
59
124
  "id": user.id,
60
125
  "user_principal_name": user.user_principal_name,
@@ -62,47 +127,24 @@ def transform_users(users: list[User]) -> list[dict[str, Any]]:
62
127
  "given_name": user.given_name,
63
128
  "surname": user.surname,
64
129
  "mail": user.mail,
65
- "other_mails": user.other_mails,
66
- "preferred_language": user.preferred_language,
67
- "preferred_name": user.preferred_name,
68
- "state": user.state,
69
- "usage_location": user.usage_location,
70
- "user_type": user.user_type,
71
- "show_in_address_list": user.show_in_address_list,
72
- "sign_in_sessions_valid_from_date_time": user.sign_in_sessions_valid_from_date_time,
73
- "security_identifier": user.on_premises_security_identifier,
74
- "account_enabled": user.account_enabled,
75
- "age_group": user.age_group,
130
+ "mobile_phone": user.mobile_phone,
76
131
  "business_phones": user.business_phones,
132
+ "job_title": user.job_title,
133
+ "department": user.department,
134
+ "office_location": user.office_location,
77
135
  "city": user.city,
78
- "company_name": user.company_name,
79
- "consent_provided_for_minor": user.consent_provided_for_minor,
136
+ "state": user.state,
80
137
  "country": user.country,
81
- "created_date_time": user.created_date_time,
82
- "creation_type": user.creation_type,
83
- "deleted_date_time": user.deleted_date_time,
84
- "department": user.department,
138
+ "company_name": user.company_name,
139
+ "preferred_language": user.preferred_language,
85
140
  "employee_id": user.employee_id,
86
141
  "employee_type": user.employee_type,
87
- "external_user_state": user.external_user_state,
88
- "external_user_state_change_date_time": user.external_user_state_change_date_time,
89
- "hire_date": user.hire_date,
90
- "is_management_restricted": user.is_management_restricted,
91
- "is_resource_account": user.is_resource_account,
92
- "job_title": user.job_title,
93
- "last_password_change_date_time": user.last_password_change_date_time,
94
- "mail_nickname": user.mail_nickname,
95
- "office_location": user.office_location,
96
- "on_premises_distinguished_name": user.on_premises_distinguished_name,
97
- "on_premises_domain_name": user.on_premises_domain_name,
98
- "on_premises_immutable_id": user.on_premises_immutable_id,
99
- "on_premises_last_sync_date_time": user.on_premises_last_sync_date_time,
100
- "on_premises_sam_account_name": user.on_premises_sam_account_name,
101
- "on_premises_security_identifier": user.on_premises_security_identifier,
102
- "on_premises_sync_enabled": user.on_premises_sync_enabled,
103
- "on_premises_user_principal_name": user.on_premises_user_principal_name,
142
+ "account_enabled": user.account_enabled,
143
+ "age_group": user.age_group,
144
+ "manager_id": manager_id,
104
145
  }
105
146
  result.append(transformed_user)
147
+
106
148
  return result
107
149
 
108
150
 
@@ -198,7 +240,7 @@ async def sync_entra_users(
198
240
  credential, scopes=["https://graph.microsoft.com/.default"]
199
241
  )
200
242
 
201
- # Get tenant information
243
+ # Fetch tenant and users (with manager reference already populated by `$expand`)
202
244
  tenant = await get_tenant(client)
203
245
  users = await get_users(client)
204
246
 
@@ -3,12 +3,12 @@ import logging
3
3
  from neo4j import Session
4
4
 
5
5
  from cartography.config import Config
6
+ from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
6
7
  from cartography.intel.kubernetes.namespaces import sync_namespaces
7
8
  from cartography.intel.kubernetes.pods import sync_pods
8
9
  from cartography.intel.kubernetes.secrets import sync_secrets
9
10
  from cartography.intel.kubernetes.services import sync_services
10
11
  from cartography.intel.kubernetes.util import get_k8s_clients
11
- from cartography.util import run_cleanup_job
12
12
  from cartography.util import timeit
13
13
 
14
14
  logger = logging.getLogger(__name__)
@@ -16,26 +16,42 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  @timeit
18
18
  def start_k8s_ingestion(session: Session, config: Config) -> None:
19
+ if not config.update_tag:
20
+ logger.error("Cartography update tag not provided.")
21
+ return
19
22
 
20
- common_job_parameters = {"UPDATE_TAG": config.update_tag}
21
23
  if not config.k8s_kubeconfig:
22
- logger.error("kubeconfig not found.")
24
+ logger.error("Kubernetes kubeconfig not provided.")
23
25
  return
24
26
 
27
+ common_job_parameters = {"UPDATE_TAG": config.update_tag}
28
+
25
29
  for client in get_k8s_clients(config.k8s_kubeconfig):
26
30
  logger.info(f"Syncing data for k8s cluster {client.name}...")
27
31
  try:
28
- cluster = sync_namespaces(session, client, config.update_tag)
29
- pods = sync_pods(session, client, config.update_tag, cluster)
30
- sync_services(session, client, config.update_tag, cluster, pods)
31
- sync_secrets(session, client, config.update_tag, cluster)
32
+ cluster_info = sync_kubernetes_cluster(
33
+ session,
34
+ client,
35
+ config.update_tag,
36
+ common_job_parameters,
37
+ )
38
+ common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
39
+
40
+ sync_namespaces(session, client, config.update_tag, common_job_parameters)
41
+ all_pods = sync_pods(
42
+ session,
43
+ client,
44
+ config.update_tag,
45
+ common_job_parameters,
46
+ )
47
+ sync_secrets(session, client, config.update_tag, common_job_parameters)
48
+ sync_services(
49
+ session,
50
+ client,
51
+ all_pods,
52
+ config.update_tag,
53
+ common_job_parameters,
54
+ )
32
55
  except Exception:
33
56
  logger.exception(f"Failed to sync data for k8s cluster {client.name}...")
34
57
  raise
35
-
36
- run_cleanup_job(
37
- "kubernetes_import_cleanup.json",
38
- session,
39
- common_job_parameters,
40
- package="cartography.data.jobs.cleanup",
41
- )