cartography 0.113.0__py3-none-any.whl → 0.115.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (96) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -2
  3. cartography/client/core/tx.py +11 -0
  4. cartography/config.py +4 -0
  5. cartography/data/indexes.cypher +0 -27
  6. cartography/intel/aws/config.py +7 -3
  7. cartography/intel/aws/ecr.py +9 -9
  8. cartography/intel/aws/iam.py +741 -492
  9. cartography/intel/aws/identitycenter.py +240 -13
  10. cartography/intel/aws/lambda_function.py +69 -2
  11. cartography/intel/aws/organizations.py +10 -9
  12. cartography/intel/aws/permission_relationships.py +7 -17
  13. cartography/intel/aws/redshift.py +9 -4
  14. cartography/intel/aws/route53.py +53 -3
  15. cartography/intel/aws/securityhub.py +3 -1
  16. cartography/intel/azure/__init__.py +24 -0
  17. cartography/intel/azure/app_service.py +105 -0
  18. cartography/intel/azure/functions.py +124 -0
  19. cartography/intel/azure/logic_apps.py +101 -0
  20. cartography/intel/create_indexes.py +2 -1
  21. cartography/intel/dns.py +5 -2
  22. cartography/intel/entra/__init__.py +31 -0
  23. cartography/intel/entra/app_role_assignments.py +277 -0
  24. cartography/intel/entra/applications.py +4 -238
  25. cartography/intel/entra/federation/__init__.py +0 -0
  26. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  27. cartography/intel/entra/service_principals.py +217 -0
  28. cartography/intel/gcp/__init__.py +136 -440
  29. cartography/intel/gcp/clients.py +65 -0
  30. cartography/intel/gcp/compute.py +18 -44
  31. cartography/intel/gcp/crm/__init__.py +0 -0
  32. cartography/intel/gcp/crm/folders.py +108 -0
  33. cartography/intel/gcp/crm/orgs.py +65 -0
  34. cartography/intel/gcp/crm/projects.py +109 -0
  35. cartography/intel/gcp/dns.py +2 -1
  36. cartography/intel/gcp/gke.py +72 -113
  37. cartography/intel/github/__init__.py +41 -0
  38. cartography/intel/github/commits.py +423 -0
  39. cartography/intel/github/repos.py +76 -45
  40. cartography/intel/gsuite/api.py +17 -4
  41. cartography/intel/okta/applications.py +9 -4
  42. cartography/intel/okta/awssaml.py +5 -2
  43. cartography/intel/okta/factors.py +3 -1
  44. cartography/intel/okta/groups.py +5 -2
  45. cartography/intel/okta/organization.py +3 -1
  46. cartography/intel/okta/origins.py +3 -1
  47. cartography/intel/okta/roles.py +5 -2
  48. cartography/intel/okta/users.py +3 -1
  49. cartography/models/aws/iam/access_key.py +103 -0
  50. cartography/models/aws/iam/account_role.py +24 -0
  51. cartography/models/aws/iam/federated_principal.py +60 -0
  52. cartography/models/aws/iam/group.py +60 -0
  53. cartography/models/aws/iam/group_membership.py +26 -0
  54. cartography/models/aws/iam/inline_policy.py +78 -0
  55. cartography/models/aws/iam/managed_policy.py +51 -0
  56. cartography/models/aws/iam/policy_statement.py +57 -0
  57. cartography/models/aws/iam/role.py +83 -0
  58. cartography/models/aws/iam/root_principal.py +52 -0
  59. cartography/models/aws/iam/service_principal.py +30 -0
  60. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  61. cartography/models/aws/iam/user.py +54 -0
  62. cartography/models/aws/identitycenter/awspermissionset.py +24 -1
  63. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  64. cartography/models/aws/identitycenter/awsssouser.py +37 -1
  65. cartography/models/aws/lambda_function/lambda_function.py +2 -0
  66. cartography/models/azure/__init__.py +0 -0
  67. cartography/models/azure/app_service.py +59 -0
  68. cartography/models/azure/function_app.py +59 -0
  69. cartography/models/azure/logic_apps.py +56 -0
  70. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  71. cartography/models/entra/service_principal.py +104 -0
  72. cartography/models/entra/user.py +18 -0
  73. cartography/models/gcp/compute/subnet.py +74 -0
  74. cartography/models/gcp/crm/__init__.py +0 -0
  75. cartography/models/gcp/crm/folders.py +98 -0
  76. cartography/models/gcp/crm/organizations.py +21 -0
  77. cartography/models/gcp/crm/projects.py +100 -0
  78. cartography/models/gcp/gke.py +69 -0
  79. cartography/models/github/commits.py +63 -0
  80. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/METADATA +8 -5
  81. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/RECORD +85 -56
  82. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  83. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  84. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  85. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  86. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  87. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  88. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  89. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  90. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  91. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  92. cartography/intel/gcp/crm.py +0 -355
  93. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/WHEEL +0 -0
  94. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/entry_points.txt +0 -0
  95. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/licenses/LICENSE +0 -0
  96. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,277 @@
1
+ import gc
2
+ from typing import Any
3
+ from typing import AsyncGenerator
4
+
5
+ import neo4j
6
+ from azure.identity import ClientSecretCredential
7
+ from msgraph import GraphServiceClient
8
+ from msgraph.generated.models.app_role_assignment_collection_response import (
9
+ AppRoleAssignmentCollectionResponse,
10
+ )
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.client.core.tx import read_list_of_values_tx
14
+ from cartography.client.core.tx import read_single_value_tx
15
+ from cartography.graph.job import GraphJob
16
+ from cartography.intel.entra.applications import APP_ROLE_ASSIGNMENTS_PAGE_SIZE
17
+ from cartography.intel.entra.applications import logger
18
+ from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
19
+ from cartography.util import timeit
20
+
21
+
22
+ @timeit
23
+ async def get_app_role_assignments_for_app(
24
+ client: GraphServiceClient, neo4j_session: neo4j.Session, app_id: str
25
+ ) -> AsyncGenerator[dict[str, Any], None]:
26
+ """
27
+ Gets app role assignments for a single application by querying the graph for service principal ID.
28
+
29
+ :param client: GraphServiceClient
30
+ :param neo4j_session: Neo4j session for querying service principal
31
+ :param app_id: Application ID
32
+ :return: Generator of app role assignment data as dicts
33
+ """
34
+ logger.info(f"Fetching role assignments for application: {app_id}")
35
+
36
+ # Query the graph to get the service principal ID for this application
37
+ query = """
38
+ MATCH (sp:EntraServicePrincipal {app_id: $app_id})
39
+ RETURN sp.id as service_principal_id
40
+ """
41
+ service_principal_id = neo4j_session.execute_read(
42
+ read_single_value_tx, query, app_id=app_id
43
+ )
44
+
45
+ if not service_principal_id:
46
+ logger.warning(
47
+ f"No service principal found in graph for application {app_id}. Continuing."
48
+ )
49
+ return
50
+
51
+ # Get assignments for this service principal with pagination and limits
52
+ # Use maximum page size (999) to get more data per request
53
+ # Memory is managed through streaming and batching, not page size
54
+ request_config = client.service_principals.by_service_principal_id(
55
+ service_principal_id
56
+ ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
57
+ query_parameters=client.service_principals.by_service_principal_id(
58
+ service_principal_id
59
+ ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
60
+ top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
61
+ )
62
+ )
63
+
64
+ assignments_page: AppRoleAssignmentCollectionResponse | None = (
65
+ await client.service_principals.by_service_principal_id(
66
+ service_principal_id
67
+ ).app_role_assigned_to.get(request_configuration=request_config)
68
+ )
69
+
70
+ assignment_count = 0
71
+ page_count = 0
72
+
73
+ while assignments_page:
74
+ page_count += 1
75
+
76
+ if assignments_page.value:
77
+ page_valid_count = 0
78
+ page_skipped_count = 0
79
+
80
+ # Process assignments and immediately yield to avoid accumulation
81
+ for assignment in assignments_page.value:
82
+ # Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
83
+ if assignment.principal_id:
84
+ assignment_count += 1
85
+ page_valid_count += 1
86
+ yield {
87
+ "id": assignment.id,
88
+ "app_role_id": assignment.app_role_id,
89
+ "created_date_time": assignment.created_date_time,
90
+ "principal_id": assignment.principal_id,
91
+ "principal_display_name": assignment.principal_display_name,
92
+ "principal_type": assignment.principal_type,
93
+ "resource_display_name": assignment.resource_display_name,
94
+ "resource_id": assignment.resource_id,
95
+ "application_app_id": app_id,
96
+ }
97
+ else:
98
+ page_skipped_count += 1
99
+
100
+ # Log page results with details about skipped objects
101
+ if page_skipped_count > 0:
102
+ logger.warning(
103
+ f"Page {page_count} for {app_id}: {page_valid_count} valid assignments, "
104
+ f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
105
+ )
106
+ else:
107
+ logger.debug(
108
+ f"Page {page_count} for {app_id}: {page_valid_count} assignments. "
109
+ f"Total: {assignment_count}"
110
+ )
111
+
112
+ # Force garbage collection after each page
113
+ gc.collect()
114
+
115
+ # Check if we have more pages to fetch
116
+ if not assignments_page.odata_next_link:
117
+ break
118
+
119
+ # Clear previous page before fetching next
120
+ assignments_page.value = None
121
+
122
+ # Fetch next page
123
+ logger.debug(f"Fetching page {page_count + 1} of assignments for {app_id}")
124
+ next_page_url = assignments_page.odata_next_link
125
+ assignments_page = await client.service_principals.with_url(next_page_url).get()
126
+
127
+ logger.info(
128
+ f"Successfully retrieved {assignment_count} assignments for application {app_id} (pages: {page_count})"
129
+ )
130
+
131
+
132
+ def transform_app_role_assignments(
133
+ assignments: list[dict[str, Any]],
134
+ ) -> list[dict[str, Any]]:
135
+ """
136
+ Transform app role assignment data for graph loading.
137
+
138
+ :param assignments: Raw app role assignment data as dicts
139
+ :return: Transformed assignment data for graph loading
140
+ """
141
+ transformed = []
142
+ for assign in assignments:
143
+ transformed.append(
144
+ {
145
+ "id": assign["id"],
146
+ "app_role_id": (
147
+ str(assign["app_role_id"]) if assign["app_role_id"] else None
148
+ ),
149
+ "created_date_time": assign["created_date_time"],
150
+ "principal_id": (
151
+ str(assign["principal_id"]) if assign["principal_id"] else None
152
+ ),
153
+ "principal_display_name": assign["principal_display_name"],
154
+ "principal_type": assign["principal_type"],
155
+ "resource_display_name": assign["resource_display_name"],
156
+ "resource_id": (
157
+ str(assign["resource_id"]) if assign["resource_id"] else None
158
+ ),
159
+ "application_app_id": assign["application_app_id"],
160
+ }
161
+ )
162
+ return transformed
163
+
164
+
165
+ @timeit
166
+ def load_app_role_assignments(
167
+ neo4j_session: neo4j.Session,
168
+ assignments_data: list[dict[str, Any]],
169
+ update_tag: int,
170
+ tenant_id: str,
171
+ ) -> None:
172
+ """
173
+ Load Entra app role assignments to the graph.
174
+
175
+ :param neo4j_session: Neo4j session
176
+ :param assignments_data: Assignment data to load
177
+ :param update_tag: Update tag for tracking data freshness
178
+ :param tenant_id: Entra tenant ID
179
+ """
180
+ load(
181
+ neo4j_session,
182
+ EntraAppRoleAssignmentSchema(),
183
+ assignments_data,
184
+ lastupdated=update_tag,
185
+ TENANT_ID=tenant_id,
186
+ )
187
+
188
+
189
+ @timeit
190
+ def cleanup_app_role_assignments(
191
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
192
+ ) -> None:
193
+ """
194
+ Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
195
+
196
+ :param neo4j_session: Neo4j session
197
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
198
+ """
199
+ GraphJob.from_node_schema(
200
+ EntraAppRoleAssignmentSchema(), common_job_parameters
201
+ ).run(neo4j_session)
202
+
203
+
204
+ @timeit
205
+ async def sync_app_role_assignments(
206
+ neo4j_session: neo4j.Session,
207
+ tenant_id: str,
208
+ client_id: str,
209
+ client_secret: str,
210
+ update_tag: int,
211
+ common_job_parameters: dict[str, Any],
212
+ ) -> None:
213
+ """
214
+ Sync Entra app role assignments to the graph.
215
+
216
+ :param neo4j_session: Neo4j session
217
+ :param tenant_id: Entra tenant ID
218
+ :param client_id: Azure application client ID
219
+ :param client_secret: Azure application client secret
220
+ :param update_tag: Update tag for tracking data freshness
221
+ :param common_job_parameters: Common job parameters for cleanup
222
+ """
223
+ # Create credentials and client
224
+ credential = ClientSecretCredential(
225
+ tenant_id=tenant_id,
226
+ client_id=client_id,
227
+ client_secret=client_secret,
228
+ )
229
+
230
+ client = GraphServiceClient(
231
+ credential,
232
+ scopes=["https://graph.microsoft.com/.default"],
233
+ )
234
+ assignment_batch_size = 200 # Batch size for assignments
235
+ assignments_batch = []
236
+ total_assignment_count = 0
237
+
238
+ # Get app_ids from graph instead of streaming from API again
239
+ query = "MATCH (app:EntraApplication) RETURN app.app_id"
240
+ app_ids = neo4j_session.execute_read(read_list_of_values_tx, query)
241
+
242
+ for app_id in app_ids:
243
+ # Stream app role assignments (now using graph query for service principal ID)
244
+ async for assignment in get_app_role_assignments_for_app(
245
+ client, neo4j_session, app_id
246
+ ):
247
+ assignments_batch.append(assignment)
248
+ total_assignment_count += 1
249
+
250
+ # Transform and load assignments in batches
251
+ if len(assignments_batch) >= assignment_batch_size:
252
+ transformed_assignments = transform_app_role_assignments(
253
+ assignments_batch
254
+ )
255
+ load_app_role_assignments(
256
+ neo4j_session, transformed_assignments, update_tag, tenant_id
257
+ )
258
+ logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
259
+ assignments_batch.clear()
260
+ transformed_assignments.clear()
261
+
262
+ # Force garbage collection after batch load
263
+ gc.collect()
264
+
265
+ # Process remaining assignments
266
+ if assignments_batch:
267
+ transformed_assignments = transform_app_role_assignments(assignments_batch)
268
+ load_app_role_assignments(
269
+ neo4j_session, transformed_assignments, update_tag, tenant_id
270
+ )
271
+ assignments_batch.clear()
272
+ transformed_assignments.clear()
273
+
274
+ cleanup_app_role_assignments(neo4j_session, common_job_parameters)
275
+ logger.info(f"Completed syncing {total_assignment_count} app role assignments")
276
+ # Final garbage collection
277
+ gc.collect()
@@ -6,16 +6,11 @@ from typing import Generator
6
6
 
7
7
  import neo4j
8
8
  from azure.identity import ClientSecretCredential
9
- from msgraph.generated.models.app_role_assignment_collection_response import (
10
- AppRoleAssignmentCollectionResponse,
11
- )
12
9
  from msgraph.generated.models.application import Application
13
- from msgraph.generated.models.service_principal import ServicePrincipal
14
10
  from msgraph.graph_service_client import GraphServiceClient
15
11
 
16
12
  from cartography.client.core.tx import load
17
13
  from cartography.graph.job import GraphJob
18
- from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
19
14
  from cartography.models.entra.application import EntraApplicationSchema
20
15
  from cartography.util import timeit
21
16
 
@@ -65,125 +60,6 @@ async def get_entra_applications(
65
60
  logger.info(f"Retrieved {count} Entra applications total")
66
61
 
67
62
 
68
- @timeit
69
- async def get_app_role_assignments_for_app(
70
- client: GraphServiceClient, app: Application
71
- ) -> AsyncGenerator[dict[str, Any], None]:
72
- """
73
- Gets app role assignments for a single application with safety limits.
74
-
75
- :param client: GraphServiceClient
76
- :param app: Application object
77
- :return: Generator of app role assignment data as dicts
78
- """
79
- if not app.app_id:
80
- logger.warning(f"Application {app.id} has no app_id, skipping")
81
- return
82
-
83
- logger.info(
84
- f"Fetching role assignments for application: {app.display_name} ({app.app_id})"
85
- )
86
-
87
- # First, get the service principal for this application
88
- service_principals_page = await client.service_principals.get(
89
- request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
90
- query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
91
- filter=f"appId eq '{app.app_id}'"
92
- )
93
- )
94
- )
95
-
96
- if not service_principals_page or not service_principals_page.value:
97
- logger.warning(
98
- f"No service principal found for application {app.app_id} ({app.display_name}). Continuing."
99
- )
100
- return
101
-
102
- service_principal: ServicePrincipal = service_principals_page.value[0]
103
-
104
- # Get assignments for this service principal with pagination and limits
105
- # Use maximum page size (999) to get more data per request
106
- # Memory is managed through streaming and batching, not page size
107
- request_config = client.service_principals.by_service_principal_id(
108
- service_principal.id
109
- ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
110
- query_parameters=client.service_principals.by_service_principal_id(
111
- service_principal.id
112
- ).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
113
- top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
114
- )
115
- )
116
-
117
- assignments_page: AppRoleAssignmentCollectionResponse | None = (
118
- await client.service_principals.by_service_principal_id(
119
- service_principal.id
120
- ).app_role_assigned_to.get(request_configuration=request_config)
121
- )
122
-
123
- assignment_count = 0
124
- page_count = 0
125
-
126
- while assignments_page:
127
- page_count += 1
128
-
129
- if assignments_page.value:
130
- page_valid_count = 0
131
- page_skipped_count = 0
132
-
133
- # Process assignments and immediately yield to avoid accumulation
134
- for assignment in assignments_page.value:
135
- # Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
136
- if assignment.principal_id:
137
- assignment_count += 1
138
- page_valid_count += 1
139
- yield {
140
- "id": assignment.id,
141
- "app_role_id": assignment.app_role_id,
142
- "created_date_time": assignment.created_date_time,
143
- "principal_id": assignment.principal_id,
144
- "principal_display_name": assignment.principal_display_name,
145
- "principal_type": assignment.principal_type,
146
- "resource_display_name": assignment.resource_display_name,
147
- "resource_id": assignment.resource_id,
148
- "application_app_id": app.app_id,
149
- }
150
- else:
151
- page_skipped_count += 1
152
-
153
- # Log page results with details about skipped objects
154
- if page_skipped_count > 0:
155
- logger.warning(
156
- f"Page {page_count} for {app.display_name}: {page_valid_count} valid assignments, "
157
- f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
158
- )
159
- else:
160
- logger.debug(
161
- f"Page {page_count} for {app.display_name}: {page_valid_count} assignments. "
162
- f"Total: {assignment_count}"
163
- )
164
-
165
- # Force garbage collection after each page
166
- gc.collect()
167
-
168
- # Check if we have more pages to fetch
169
- if not assignments_page.odata_next_link:
170
- break
171
-
172
- # Clear previous page before fetching next
173
- assignments_page.value = None
174
-
175
- # Fetch next page
176
- logger.debug(
177
- f"Fetching page {page_count + 1} of assignments for {app.display_name}"
178
- )
179
- next_page_url = assignments_page.odata_next_link
180
- assignments_page = await client.service_principals.with_url(next_page_url).get()
181
-
182
- logger.info(
183
- f"Successfully retrieved {assignment_count} assignments for application {app.display_name} (pages: {page_count})"
184
- )
185
-
186
-
187
63
  def transform_applications(
188
64
  applications: list[Application],
189
65
  ) -> Generator[dict[str, Any], None, None]:
@@ -203,39 +79,6 @@ def transform_applications(
203
79
  }
204
80
 
205
81
 
206
- def transform_app_role_assignments(
207
- assignments: list[dict[str, Any]],
208
- ) -> list[dict[str, Any]]:
209
- """
210
- Transform app role assignment data for graph loading.
211
-
212
- :param assignments: Raw app role assignment data as dicts
213
- :return: Transformed assignment data for graph loading
214
- """
215
- transformed = []
216
- for assign in assignments:
217
- transformed.append(
218
- {
219
- "id": assign["id"],
220
- "app_role_id": (
221
- str(assign["app_role_id"]) if assign["app_role_id"] else None
222
- ),
223
- "created_date_time": assign["created_date_time"],
224
- "principal_id": (
225
- str(assign["principal_id"]) if assign["principal_id"] else None
226
- ),
227
- "principal_display_name": assign["principal_display_name"],
228
- "principal_type": assign["principal_type"],
229
- "resource_display_name": assign["resource_display_name"],
230
- "resource_id": (
231
- str(assign["resource_id"]) if assign["resource_id"] else None
232
- ),
233
- "application_app_id": assign["application_app_id"],
234
- }
235
- )
236
- return transformed
237
-
238
-
239
82
  @timeit
240
83
  def load_applications(
241
84
  neo4j_session: neo4j.Session,
@@ -260,30 +103,6 @@ def load_applications(
260
103
  )
261
104
 
262
105
 
263
- @timeit
264
- def load_app_role_assignments(
265
- neo4j_session: neo4j.Session,
266
- assignments_data: list[dict[str, Any]],
267
- update_tag: int,
268
- tenant_id: str,
269
- ) -> None:
270
- """
271
- Load Entra app role assignments to the graph.
272
-
273
- :param neo4j_session: Neo4j session
274
- :param assignments_data: Assignment data to load
275
- :param update_tag: Update tag for tracking data freshness
276
- :param tenant_id: Entra tenant ID
277
- """
278
- load(
279
- neo4j_session,
280
- EntraAppRoleAssignmentSchema(),
281
- assignments_data,
282
- lastupdated=update_tag,
283
- TENANT_ID=tenant_id,
284
- )
285
-
286
-
287
106
  @timeit
288
107
  def cleanup_applications(
289
108
  neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
@@ -299,21 +118,6 @@ def cleanup_applications(
299
118
  )
300
119
 
301
120
 
302
- @timeit
303
- def cleanup_app_role_assignments(
304
- neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
305
- ) -> None:
306
- """
307
- Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
308
-
309
- :param neo4j_session: Neo4j session
310
- :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
311
- """
312
- GraphJob.from_node_schema(
313
- EntraAppRoleAssignmentSchema(), common_job_parameters
314
- ).run(neo4j_session)
315
-
316
-
317
121
  @timeit
318
122
  async def sync_entra_applications(
319
123
  neo4j_session: neo4j.Session,
@@ -345,18 +149,12 @@ async def sync_entra_applications(
345
149
  scopes=["https://graph.microsoft.com/.default"],
346
150
  )
347
151
 
348
- # Process applications and their assignments in batches
152
+ # Step 1: Sync applications
349
153
  app_batch_size = 10 # Batch size for applications
350
- assignment_batch_size = (
351
- 200 # Batch size for assignments (increased since we handle memory better now)
352
- )
353
-
354
154
  apps_batch = []
355
- assignments_batch = []
356
- total_assignment_count = 0
357
155
  total_app_count = 0
358
156
 
359
- # Stream apps
157
+ # Stream and load applications
360
158
  async for app in get_entra_applications(client):
361
159
  total_app_count += 1
362
160
  apps_batch.append(app)
@@ -372,45 +170,13 @@ async def sync_entra_applications(
372
170
  transformed_apps.clear()
373
171
  gc.collect() # Force garbage collection
374
172
 
375
- # Stream app role assignments
376
- async for assignment in get_app_role_assignments_for_app(client, app):
377
- assignments_batch.append(assignment)
378
- total_assignment_count += 1
379
-
380
- # Transform and load assignments in batches
381
- if len(assignments_batch) >= assignment_batch_size:
382
- transformed_assignments = transform_app_role_assignments(
383
- assignments_batch
384
- )
385
- load_app_role_assignments(
386
- neo4j_session, transformed_assignments, update_tag, tenant_id
387
- )
388
- logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
389
- assignments_batch.clear()
390
- transformed_assignments.clear()
391
-
392
- # Force garbage collection after batch load
393
- gc.collect()
394
-
395
173
  # Process remaining applications
396
174
  if apps_batch:
397
175
  transformed_apps = list(transform_applications(apps_batch))
398
176
  load_applications(neo4j_session, transformed_apps, update_tag, tenant_id)
399
177
  apps_batch.clear()
400
178
  transformed_apps.clear()
401
-
402
- # Process remaining assignments
403
- if assignments_batch:
404
- transformed_assignments = transform_app_role_assignments(assignments_batch)
405
- load_app_role_assignments(
406
- neo4j_session, transformed_assignments, update_tag, tenant_id
407
- )
408
- assignments_batch.clear()
409
- transformed_assignments.clear()
410
-
179
+ cleanup_applications(neo4j_session, common_job_parameters)
180
+ logger.info(f"Completed syncing {total_app_count} applications")
411
181
  # Final garbage collection
412
182
  gc.collect()
413
-
414
- # Cleanup stale data
415
- cleanup_applications(neo4j_session, common_job_parameters)
416
- cleanup_app_role_assignments(neo4j_session, common_job_parameters)
File without changes
@@ -0,0 +1,77 @@
1
+ from typing import Any
2
+
3
+ import neo4j
4
+
5
+ from cartography.client.core.tx import load_matchlinks
6
+ from cartography.client.core.tx import read_list_of_dicts_tx
7
+ from cartography.graph.job import GraphJob
8
+ from cartography.models.entra.entra_user_to_aws_sso import (
9
+ EntraUserToAWSSSOUserMatchLink,
10
+ )
11
+ from cartography.util import timeit
12
+
13
+
14
+ @timeit
15
+ def sync_entra_to_aws_identity_center(
16
+ neo4j_session: neo4j.Session,
17
+ update_tag: int,
18
+ tenant_id: str,
19
+ common_job_parameters: dict[str, Any],
20
+ ) -> None:
21
+ query = """
22
+ MATCH (:EntraTenant{id: $TENANT_ID})-[:RESOURCE]->(e:EntraUser)
23
+ -[:HAS_APP_ROLE]->(ar:EntraAppRoleAssignment)
24
+ -[:ASSIGNED_TO]->(n:EntraApplication)
25
+ -[:SERVICE_PRINCIPAL]->(spn:EntraServicePrincipal)
26
+ -[:FEDERATES_TO]->(ic:AWSIdentityCenter)
27
+ MATCH (sso:AWSSSOUser{identity_store_id:ic.identity_store_id})
28
+ WHERE e.user_principal_name = sso.user_name
29
+ RETURN e.user_principal_name as entra_user_principal_name, sso.user_name as aws_user_name, sso.identity_store_id as identity_store_id
30
+ """
31
+ entrauser_to_awssso_users = neo4j_session.execute_read(
32
+ read_list_of_dicts_tx, query, TENANT_ID=tenant_id
33
+ )
34
+
35
+ # Load MatchLink relationships from Entra users to AWS SSO users
36
+ load_matchlinks(
37
+ neo4j_session,
38
+ EntraUserToAWSSSOUserMatchLink(),
39
+ entrauser_to_awssso_users,
40
+ lastupdated=update_tag,
41
+ _sub_resource_label="EntraTenant",
42
+ _sub_resource_id=tenant_id,
43
+ )
44
+
45
+ cleanup_entra_user_to_aws_sso_user_matchlinks(neo4j_session, common_job_parameters)
46
+
47
+
48
+ @timeit
49
+ async def sync_entra_federation(
50
+ neo4j_session: neo4j.Session,
51
+ update_tag: int,
52
+ tenant_id: str,
53
+ common_job_parameters: dict[str, Any],
54
+ ) -> None:
55
+ """
56
+ Sync Entra federation relationships to the graph.
57
+
58
+ :param neo4j_session: Neo4j session
59
+ :param update_tag: Update tag for tracking data freshness
60
+ :param tenant_id: Entra tenant ID
61
+ :param common_job_parameters: Common job parameters for cleanup
62
+ """
63
+ sync_entra_to_aws_identity_center(
64
+ neo4j_session, update_tag, tenant_id, common_job_parameters
65
+ )
66
+
67
+
68
+ @timeit
69
+ def cleanup_entra_user_to_aws_sso_user_matchlinks(
70
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
71
+ ) -> None:
72
+ GraphJob.from_matchlink(
73
+ EntraUserToAWSSSOUserMatchLink(),
74
+ "EntraTenant",
75
+ common_job_parameters["TENANT_ID"],
76
+ common_job_parameters["UPDATE_TAG"],
77
+ ).run(neo4j_session)