cartography 0.113.0__py3-none-any.whl → 0.115.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +10 -2
- cartography/client/core/tx.py +11 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -27
- cartography/intel/aws/config.py +7 -3
- cartography/intel/aws/ecr.py +9 -9
- cartography/intel/aws/iam.py +741 -492
- cartography/intel/aws/identitycenter.py +240 -13
- cartography/intel/aws/lambda_function.py +69 -2
- cartography/intel/aws/organizations.py +10 -9
- cartography/intel/aws/permission_relationships.py +7 -17
- cartography/intel/aws/redshift.py +9 -4
- cartography/intel/aws/route53.py +53 -3
- cartography/intel/aws/securityhub.py +3 -1
- cartography/intel/azure/__init__.py +24 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/azure/logic_apps.py +101 -0
- cartography/intel/create_indexes.py +2 -1
- cartography/intel/dns.py +5 -2
- cartography/intel/entra/__init__.py +31 -0
- cartography/intel/entra/app_role_assignments.py +277 -0
- cartography/intel/entra/applications.py +4 -238
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/gcp/__init__.py +136 -440
- cartography/intel/gcp/clients.py +65 -0
- cartography/intel/gcp/compute.py +18 -44
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +108 -0
- cartography/intel/gcp/crm/orgs.py +65 -0
- cartography/intel/gcp/crm/projects.py +109 -0
- cartography/intel/gcp/dns.py +2 -1
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/github/__init__.py +41 -0
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +76 -45
- cartography/intel/gsuite/api.py +17 -4
- cartography/intel/okta/applications.py +9 -4
- cartography/intel/okta/awssaml.py +5 -2
- cartography/intel/okta/factors.py +3 -1
- cartography/intel/okta/groups.py +5 -2
- cartography/intel/okta/organization.py +3 -1
- cartography/intel/okta/origins.py +3 -1
- cartography/intel/okta/roles.py +5 -2
- cartography/intel/okta/users.py +3 -1
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +26 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +54 -0
- cartography/models/aws/identitycenter/awspermissionset.py +24 -1
- cartography/models/aws/identitycenter/awssogroup.py +70 -0
- cartography/models/aws/identitycenter/awsssouser.py +37 -1
- cartography/models/aws/lambda_function/lambda_function.py +2 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/azure/logic_apps.py +56 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/entra/user.py +18 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/github/commits.py +63 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/METADATA +8 -5
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/RECORD +85 -56
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/intel/gcp/crm.py +0 -355
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/WHEEL +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import AsyncGenerator
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
from azure.identity import ClientSecretCredential
|
|
7
|
+
from msgraph import GraphServiceClient
|
|
8
|
+
from msgraph.generated.models.app_role_assignment_collection_response import (
|
|
9
|
+
AppRoleAssignmentCollectionResponse,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from cartography.client.core.tx import load
|
|
13
|
+
from cartography.client.core.tx import read_list_of_values_tx
|
|
14
|
+
from cartography.client.core.tx import read_single_value_tx
|
|
15
|
+
from cartography.graph.job import GraphJob
|
|
16
|
+
from cartography.intel.entra.applications import APP_ROLE_ASSIGNMENTS_PAGE_SIZE
|
|
17
|
+
from cartography.intel.entra.applications import logger
|
|
18
|
+
from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
|
|
19
|
+
from cartography.util import timeit
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@timeit
|
|
23
|
+
async def get_app_role_assignments_for_app(
|
|
24
|
+
client: GraphServiceClient, neo4j_session: neo4j.Session, app_id: str
|
|
25
|
+
) -> AsyncGenerator[dict[str, Any], None]:
|
|
26
|
+
"""
|
|
27
|
+
Gets app role assignments for a single application by querying the graph for service principal ID.
|
|
28
|
+
|
|
29
|
+
:param client: GraphServiceClient
|
|
30
|
+
:param neo4j_session: Neo4j session for querying service principal
|
|
31
|
+
:param app_id: Application ID
|
|
32
|
+
:return: Generator of app role assignment data as dicts
|
|
33
|
+
"""
|
|
34
|
+
logger.info(f"Fetching role assignments for application: {app_id}")
|
|
35
|
+
|
|
36
|
+
# Query the graph to get the service principal ID for this application
|
|
37
|
+
query = """
|
|
38
|
+
MATCH (sp:EntraServicePrincipal {app_id: $app_id})
|
|
39
|
+
RETURN sp.id as service_principal_id
|
|
40
|
+
"""
|
|
41
|
+
service_principal_id = neo4j_session.execute_read(
|
|
42
|
+
read_single_value_tx, query, app_id=app_id
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if not service_principal_id:
|
|
46
|
+
logger.warning(
|
|
47
|
+
f"No service principal found in graph for application {app_id}. Continuing."
|
|
48
|
+
)
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
# Get assignments for this service principal with pagination and limits
|
|
52
|
+
# Use maximum page size (999) to get more data per request
|
|
53
|
+
# Memory is managed through streaming and batching, not page size
|
|
54
|
+
request_config = client.service_principals.by_service_principal_id(
|
|
55
|
+
service_principal_id
|
|
56
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
|
|
57
|
+
query_parameters=client.service_principals.by_service_principal_id(
|
|
58
|
+
service_principal_id
|
|
59
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
|
|
60
|
+
top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
|
|
61
|
+
)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
assignments_page: AppRoleAssignmentCollectionResponse | None = (
|
|
65
|
+
await client.service_principals.by_service_principal_id(
|
|
66
|
+
service_principal_id
|
|
67
|
+
).app_role_assigned_to.get(request_configuration=request_config)
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
assignment_count = 0
|
|
71
|
+
page_count = 0
|
|
72
|
+
|
|
73
|
+
while assignments_page:
|
|
74
|
+
page_count += 1
|
|
75
|
+
|
|
76
|
+
if assignments_page.value:
|
|
77
|
+
page_valid_count = 0
|
|
78
|
+
page_skipped_count = 0
|
|
79
|
+
|
|
80
|
+
# Process assignments and immediately yield to avoid accumulation
|
|
81
|
+
for assignment in assignments_page.value:
|
|
82
|
+
# Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
|
|
83
|
+
if assignment.principal_id:
|
|
84
|
+
assignment_count += 1
|
|
85
|
+
page_valid_count += 1
|
|
86
|
+
yield {
|
|
87
|
+
"id": assignment.id,
|
|
88
|
+
"app_role_id": assignment.app_role_id,
|
|
89
|
+
"created_date_time": assignment.created_date_time,
|
|
90
|
+
"principal_id": assignment.principal_id,
|
|
91
|
+
"principal_display_name": assignment.principal_display_name,
|
|
92
|
+
"principal_type": assignment.principal_type,
|
|
93
|
+
"resource_display_name": assignment.resource_display_name,
|
|
94
|
+
"resource_id": assignment.resource_id,
|
|
95
|
+
"application_app_id": app_id,
|
|
96
|
+
}
|
|
97
|
+
else:
|
|
98
|
+
page_skipped_count += 1
|
|
99
|
+
|
|
100
|
+
# Log page results with details about skipped objects
|
|
101
|
+
if page_skipped_count > 0:
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"Page {page_count} for {app_id}: {page_valid_count} valid assignments, "
|
|
104
|
+
f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
logger.debug(
|
|
108
|
+
f"Page {page_count} for {app_id}: {page_valid_count} assignments. "
|
|
109
|
+
f"Total: {assignment_count}"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Force garbage collection after each page
|
|
113
|
+
gc.collect()
|
|
114
|
+
|
|
115
|
+
# Check if we have more pages to fetch
|
|
116
|
+
if not assignments_page.odata_next_link:
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# Clear previous page before fetching next
|
|
120
|
+
assignments_page.value = None
|
|
121
|
+
|
|
122
|
+
# Fetch next page
|
|
123
|
+
logger.debug(f"Fetching page {page_count + 1} of assignments for {app_id}")
|
|
124
|
+
next_page_url = assignments_page.odata_next_link
|
|
125
|
+
assignments_page = await client.service_principals.with_url(next_page_url).get()
|
|
126
|
+
|
|
127
|
+
logger.info(
|
|
128
|
+
f"Successfully retrieved {assignment_count} assignments for application {app_id} (pages: {page_count})"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def transform_app_role_assignments(
|
|
133
|
+
assignments: list[dict[str, Any]],
|
|
134
|
+
) -> list[dict[str, Any]]:
|
|
135
|
+
"""
|
|
136
|
+
Transform app role assignment data for graph loading.
|
|
137
|
+
|
|
138
|
+
:param assignments: Raw app role assignment data as dicts
|
|
139
|
+
:return: Transformed assignment data for graph loading
|
|
140
|
+
"""
|
|
141
|
+
transformed = []
|
|
142
|
+
for assign in assignments:
|
|
143
|
+
transformed.append(
|
|
144
|
+
{
|
|
145
|
+
"id": assign["id"],
|
|
146
|
+
"app_role_id": (
|
|
147
|
+
str(assign["app_role_id"]) if assign["app_role_id"] else None
|
|
148
|
+
),
|
|
149
|
+
"created_date_time": assign["created_date_time"],
|
|
150
|
+
"principal_id": (
|
|
151
|
+
str(assign["principal_id"]) if assign["principal_id"] else None
|
|
152
|
+
),
|
|
153
|
+
"principal_display_name": assign["principal_display_name"],
|
|
154
|
+
"principal_type": assign["principal_type"],
|
|
155
|
+
"resource_display_name": assign["resource_display_name"],
|
|
156
|
+
"resource_id": (
|
|
157
|
+
str(assign["resource_id"]) if assign["resource_id"] else None
|
|
158
|
+
),
|
|
159
|
+
"application_app_id": assign["application_app_id"],
|
|
160
|
+
}
|
|
161
|
+
)
|
|
162
|
+
return transformed
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@timeit
|
|
166
|
+
def load_app_role_assignments(
|
|
167
|
+
neo4j_session: neo4j.Session,
|
|
168
|
+
assignments_data: list[dict[str, Any]],
|
|
169
|
+
update_tag: int,
|
|
170
|
+
tenant_id: str,
|
|
171
|
+
) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Load Entra app role assignments to the graph.
|
|
174
|
+
|
|
175
|
+
:param neo4j_session: Neo4j session
|
|
176
|
+
:param assignments_data: Assignment data to load
|
|
177
|
+
:param update_tag: Update tag for tracking data freshness
|
|
178
|
+
:param tenant_id: Entra tenant ID
|
|
179
|
+
"""
|
|
180
|
+
load(
|
|
181
|
+
neo4j_session,
|
|
182
|
+
EntraAppRoleAssignmentSchema(),
|
|
183
|
+
assignments_data,
|
|
184
|
+
lastupdated=update_tag,
|
|
185
|
+
TENANT_ID=tenant_id,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@timeit
|
|
190
|
+
def cleanup_app_role_assignments(
|
|
191
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
192
|
+
) -> None:
|
|
193
|
+
"""
|
|
194
|
+
Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
|
|
195
|
+
|
|
196
|
+
:param neo4j_session: Neo4j session
|
|
197
|
+
:param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
|
|
198
|
+
"""
|
|
199
|
+
GraphJob.from_node_schema(
|
|
200
|
+
EntraAppRoleAssignmentSchema(), common_job_parameters
|
|
201
|
+
).run(neo4j_session)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@timeit
|
|
205
|
+
async def sync_app_role_assignments(
|
|
206
|
+
neo4j_session: neo4j.Session,
|
|
207
|
+
tenant_id: str,
|
|
208
|
+
client_id: str,
|
|
209
|
+
client_secret: str,
|
|
210
|
+
update_tag: int,
|
|
211
|
+
common_job_parameters: dict[str, Any],
|
|
212
|
+
) -> None:
|
|
213
|
+
"""
|
|
214
|
+
Sync Entra app role assignments to the graph.
|
|
215
|
+
|
|
216
|
+
:param neo4j_session: Neo4j session
|
|
217
|
+
:param tenant_id: Entra tenant ID
|
|
218
|
+
:param client_id: Azure application client ID
|
|
219
|
+
:param client_secret: Azure application client secret
|
|
220
|
+
:param update_tag: Update tag for tracking data freshness
|
|
221
|
+
:param common_job_parameters: Common job parameters for cleanup
|
|
222
|
+
"""
|
|
223
|
+
# Create credentials and client
|
|
224
|
+
credential = ClientSecretCredential(
|
|
225
|
+
tenant_id=tenant_id,
|
|
226
|
+
client_id=client_id,
|
|
227
|
+
client_secret=client_secret,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
client = GraphServiceClient(
|
|
231
|
+
credential,
|
|
232
|
+
scopes=["https://graph.microsoft.com/.default"],
|
|
233
|
+
)
|
|
234
|
+
assignment_batch_size = 200 # Batch size for assignments
|
|
235
|
+
assignments_batch = []
|
|
236
|
+
total_assignment_count = 0
|
|
237
|
+
|
|
238
|
+
# Get app_ids from graph instead of streaming from API again
|
|
239
|
+
query = "MATCH (app:EntraApplication) RETURN app.app_id"
|
|
240
|
+
app_ids = neo4j_session.execute_read(read_list_of_values_tx, query)
|
|
241
|
+
|
|
242
|
+
for app_id in app_ids:
|
|
243
|
+
# Stream app role assignments (now using graph query for service principal ID)
|
|
244
|
+
async for assignment in get_app_role_assignments_for_app(
|
|
245
|
+
client, neo4j_session, app_id
|
|
246
|
+
):
|
|
247
|
+
assignments_batch.append(assignment)
|
|
248
|
+
total_assignment_count += 1
|
|
249
|
+
|
|
250
|
+
# Transform and load assignments in batches
|
|
251
|
+
if len(assignments_batch) >= assignment_batch_size:
|
|
252
|
+
transformed_assignments = transform_app_role_assignments(
|
|
253
|
+
assignments_batch
|
|
254
|
+
)
|
|
255
|
+
load_app_role_assignments(
|
|
256
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
257
|
+
)
|
|
258
|
+
logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
|
|
259
|
+
assignments_batch.clear()
|
|
260
|
+
transformed_assignments.clear()
|
|
261
|
+
|
|
262
|
+
# Force garbage collection after batch load
|
|
263
|
+
gc.collect()
|
|
264
|
+
|
|
265
|
+
# Process remaining assignments
|
|
266
|
+
if assignments_batch:
|
|
267
|
+
transformed_assignments = transform_app_role_assignments(assignments_batch)
|
|
268
|
+
load_app_role_assignments(
|
|
269
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
270
|
+
)
|
|
271
|
+
assignments_batch.clear()
|
|
272
|
+
transformed_assignments.clear()
|
|
273
|
+
|
|
274
|
+
cleanup_app_role_assignments(neo4j_session, common_job_parameters)
|
|
275
|
+
logger.info(f"Completed syncing {total_assignment_count} app role assignments")
|
|
276
|
+
# Final garbage collection
|
|
277
|
+
gc.collect()
|
|
@@ -6,16 +6,11 @@ from typing import Generator
|
|
|
6
6
|
|
|
7
7
|
import neo4j
|
|
8
8
|
from azure.identity import ClientSecretCredential
|
|
9
|
-
from msgraph.generated.models.app_role_assignment_collection_response import (
|
|
10
|
-
AppRoleAssignmentCollectionResponse,
|
|
11
|
-
)
|
|
12
9
|
from msgraph.generated.models.application import Application
|
|
13
|
-
from msgraph.generated.models.service_principal import ServicePrincipal
|
|
14
10
|
from msgraph.graph_service_client import GraphServiceClient
|
|
15
11
|
|
|
16
12
|
from cartography.client.core.tx import load
|
|
17
13
|
from cartography.graph.job import GraphJob
|
|
18
|
-
from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
|
|
19
14
|
from cartography.models.entra.application import EntraApplicationSchema
|
|
20
15
|
from cartography.util import timeit
|
|
21
16
|
|
|
@@ -65,125 +60,6 @@ async def get_entra_applications(
|
|
|
65
60
|
logger.info(f"Retrieved {count} Entra applications total")
|
|
66
61
|
|
|
67
62
|
|
|
68
|
-
@timeit
|
|
69
|
-
async def get_app_role_assignments_for_app(
|
|
70
|
-
client: GraphServiceClient, app: Application
|
|
71
|
-
) -> AsyncGenerator[dict[str, Any], None]:
|
|
72
|
-
"""
|
|
73
|
-
Gets app role assignments for a single application with safety limits.
|
|
74
|
-
|
|
75
|
-
:param client: GraphServiceClient
|
|
76
|
-
:param app: Application object
|
|
77
|
-
:return: Generator of app role assignment data as dicts
|
|
78
|
-
"""
|
|
79
|
-
if not app.app_id:
|
|
80
|
-
logger.warning(f"Application {app.id} has no app_id, skipping")
|
|
81
|
-
return
|
|
82
|
-
|
|
83
|
-
logger.info(
|
|
84
|
-
f"Fetching role assignments for application: {app.display_name} ({app.app_id})"
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# First, get the service principal for this application
|
|
88
|
-
service_principals_page = await client.service_principals.get(
|
|
89
|
-
request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
|
|
90
|
-
query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
|
|
91
|
-
filter=f"appId eq '{app.app_id}'"
|
|
92
|
-
)
|
|
93
|
-
)
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
if not service_principals_page or not service_principals_page.value:
|
|
97
|
-
logger.warning(
|
|
98
|
-
f"No service principal found for application {app.app_id} ({app.display_name}). Continuing."
|
|
99
|
-
)
|
|
100
|
-
return
|
|
101
|
-
|
|
102
|
-
service_principal: ServicePrincipal = service_principals_page.value[0]
|
|
103
|
-
|
|
104
|
-
# Get assignments for this service principal with pagination and limits
|
|
105
|
-
# Use maximum page size (999) to get more data per request
|
|
106
|
-
# Memory is managed through streaming and batching, not page size
|
|
107
|
-
request_config = client.service_principals.by_service_principal_id(
|
|
108
|
-
service_principal.id
|
|
109
|
-
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
|
|
110
|
-
query_parameters=client.service_principals.by_service_principal_id(
|
|
111
|
-
service_principal.id
|
|
112
|
-
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
|
|
113
|
-
top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
|
|
114
|
-
)
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
assignments_page: AppRoleAssignmentCollectionResponse | None = (
|
|
118
|
-
await client.service_principals.by_service_principal_id(
|
|
119
|
-
service_principal.id
|
|
120
|
-
).app_role_assigned_to.get(request_configuration=request_config)
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
assignment_count = 0
|
|
124
|
-
page_count = 0
|
|
125
|
-
|
|
126
|
-
while assignments_page:
|
|
127
|
-
page_count += 1
|
|
128
|
-
|
|
129
|
-
if assignments_page.value:
|
|
130
|
-
page_valid_count = 0
|
|
131
|
-
page_skipped_count = 0
|
|
132
|
-
|
|
133
|
-
# Process assignments and immediately yield to avoid accumulation
|
|
134
|
-
for assignment in assignments_page.value:
|
|
135
|
-
# Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
|
|
136
|
-
if assignment.principal_id:
|
|
137
|
-
assignment_count += 1
|
|
138
|
-
page_valid_count += 1
|
|
139
|
-
yield {
|
|
140
|
-
"id": assignment.id,
|
|
141
|
-
"app_role_id": assignment.app_role_id,
|
|
142
|
-
"created_date_time": assignment.created_date_time,
|
|
143
|
-
"principal_id": assignment.principal_id,
|
|
144
|
-
"principal_display_name": assignment.principal_display_name,
|
|
145
|
-
"principal_type": assignment.principal_type,
|
|
146
|
-
"resource_display_name": assignment.resource_display_name,
|
|
147
|
-
"resource_id": assignment.resource_id,
|
|
148
|
-
"application_app_id": app.app_id,
|
|
149
|
-
}
|
|
150
|
-
else:
|
|
151
|
-
page_skipped_count += 1
|
|
152
|
-
|
|
153
|
-
# Log page results with details about skipped objects
|
|
154
|
-
if page_skipped_count > 0:
|
|
155
|
-
logger.warning(
|
|
156
|
-
f"Page {page_count} for {app.display_name}: {page_valid_count} valid assignments, "
|
|
157
|
-
f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
|
|
158
|
-
)
|
|
159
|
-
else:
|
|
160
|
-
logger.debug(
|
|
161
|
-
f"Page {page_count} for {app.display_name}: {page_valid_count} assignments. "
|
|
162
|
-
f"Total: {assignment_count}"
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
# Force garbage collection after each page
|
|
166
|
-
gc.collect()
|
|
167
|
-
|
|
168
|
-
# Check if we have more pages to fetch
|
|
169
|
-
if not assignments_page.odata_next_link:
|
|
170
|
-
break
|
|
171
|
-
|
|
172
|
-
# Clear previous page before fetching next
|
|
173
|
-
assignments_page.value = None
|
|
174
|
-
|
|
175
|
-
# Fetch next page
|
|
176
|
-
logger.debug(
|
|
177
|
-
f"Fetching page {page_count + 1} of assignments for {app.display_name}"
|
|
178
|
-
)
|
|
179
|
-
next_page_url = assignments_page.odata_next_link
|
|
180
|
-
assignments_page = await client.service_principals.with_url(next_page_url).get()
|
|
181
|
-
|
|
182
|
-
logger.info(
|
|
183
|
-
f"Successfully retrieved {assignment_count} assignments for application {app.display_name} (pages: {page_count})"
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
|
|
187
63
|
def transform_applications(
|
|
188
64
|
applications: list[Application],
|
|
189
65
|
) -> Generator[dict[str, Any], None, None]:
|
|
@@ -203,39 +79,6 @@ def transform_applications(
|
|
|
203
79
|
}
|
|
204
80
|
|
|
205
81
|
|
|
206
|
-
def transform_app_role_assignments(
|
|
207
|
-
assignments: list[dict[str, Any]],
|
|
208
|
-
) -> list[dict[str, Any]]:
|
|
209
|
-
"""
|
|
210
|
-
Transform app role assignment data for graph loading.
|
|
211
|
-
|
|
212
|
-
:param assignments: Raw app role assignment data as dicts
|
|
213
|
-
:return: Transformed assignment data for graph loading
|
|
214
|
-
"""
|
|
215
|
-
transformed = []
|
|
216
|
-
for assign in assignments:
|
|
217
|
-
transformed.append(
|
|
218
|
-
{
|
|
219
|
-
"id": assign["id"],
|
|
220
|
-
"app_role_id": (
|
|
221
|
-
str(assign["app_role_id"]) if assign["app_role_id"] else None
|
|
222
|
-
),
|
|
223
|
-
"created_date_time": assign["created_date_time"],
|
|
224
|
-
"principal_id": (
|
|
225
|
-
str(assign["principal_id"]) if assign["principal_id"] else None
|
|
226
|
-
),
|
|
227
|
-
"principal_display_name": assign["principal_display_name"],
|
|
228
|
-
"principal_type": assign["principal_type"],
|
|
229
|
-
"resource_display_name": assign["resource_display_name"],
|
|
230
|
-
"resource_id": (
|
|
231
|
-
str(assign["resource_id"]) if assign["resource_id"] else None
|
|
232
|
-
),
|
|
233
|
-
"application_app_id": assign["application_app_id"],
|
|
234
|
-
}
|
|
235
|
-
)
|
|
236
|
-
return transformed
|
|
237
|
-
|
|
238
|
-
|
|
239
82
|
@timeit
|
|
240
83
|
def load_applications(
|
|
241
84
|
neo4j_session: neo4j.Session,
|
|
@@ -260,30 +103,6 @@ def load_applications(
|
|
|
260
103
|
)
|
|
261
104
|
|
|
262
105
|
|
|
263
|
-
@timeit
|
|
264
|
-
def load_app_role_assignments(
|
|
265
|
-
neo4j_session: neo4j.Session,
|
|
266
|
-
assignments_data: list[dict[str, Any]],
|
|
267
|
-
update_tag: int,
|
|
268
|
-
tenant_id: str,
|
|
269
|
-
) -> None:
|
|
270
|
-
"""
|
|
271
|
-
Load Entra app role assignments to the graph.
|
|
272
|
-
|
|
273
|
-
:param neo4j_session: Neo4j session
|
|
274
|
-
:param assignments_data: Assignment data to load
|
|
275
|
-
:param update_tag: Update tag for tracking data freshness
|
|
276
|
-
:param tenant_id: Entra tenant ID
|
|
277
|
-
"""
|
|
278
|
-
load(
|
|
279
|
-
neo4j_session,
|
|
280
|
-
EntraAppRoleAssignmentSchema(),
|
|
281
|
-
assignments_data,
|
|
282
|
-
lastupdated=update_tag,
|
|
283
|
-
TENANT_ID=tenant_id,
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
|
|
287
106
|
@timeit
|
|
288
107
|
def cleanup_applications(
|
|
289
108
|
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
@@ -299,21 +118,6 @@ def cleanup_applications(
|
|
|
299
118
|
)
|
|
300
119
|
|
|
301
120
|
|
|
302
|
-
@timeit
|
|
303
|
-
def cleanup_app_role_assignments(
|
|
304
|
-
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
305
|
-
) -> None:
|
|
306
|
-
"""
|
|
307
|
-
Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
|
|
308
|
-
|
|
309
|
-
:param neo4j_session: Neo4j session
|
|
310
|
-
:param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
|
|
311
|
-
"""
|
|
312
|
-
GraphJob.from_node_schema(
|
|
313
|
-
EntraAppRoleAssignmentSchema(), common_job_parameters
|
|
314
|
-
).run(neo4j_session)
|
|
315
|
-
|
|
316
|
-
|
|
317
121
|
@timeit
|
|
318
122
|
async def sync_entra_applications(
|
|
319
123
|
neo4j_session: neo4j.Session,
|
|
@@ -345,18 +149,12 @@ async def sync_entra_applications(
|
|
|
345
149
|
scopes=["https://graph.microsoft.com/.default"],
|
|
346
150
|
)
|
|
347
151
|
|
|
348
|
-
#
|
|
152
|
+
# Step 1: Sync applications
|
|
349
153
|
app_batch_size = 10 # Batch size for applications
|
|
350
|
-
assignment_batch_size = (
|
|
351
|
-
200 # Batch size for assignments (increased since we handle memory better now)
|
|
352
|
-
)
|
|
353
|
-
|
|
354
154
|
apps_batch = []
|
|
355
|
-
assignments_batch = []
|
|
356
|
-
total_assignment_count = 0
|
|
357
155
|
total_app_count = 0
|
|
358
156
|
|
|
359
|
-
# Stream
|
|
157
|
+
# Stream and load applications
|
|
360
158
|
async for app in get_entra_applications(client):
|
|
361
159
|
total_app_count += 1
|
|
362
160
|
apps_batch.append(app)
|
|
@@ -372,45 +170,13 @@ async def sync_entra_applications(
|
|
|
372
170
|
transformed_apps.clear()
|
|
373
171
|
gc.collect() # Force garbage collection
|
|
374
172
|
|
|
375
|
-
# Stream app role assignments
|
|
376
|
-
async for assignment in get_app_role_assignments_for_app(client, app):
|
|
377
|
-
assignments_batch.append(assignment)
|
|
378
|
-
total_assignment_count += 1
|
|
379
|
-
|
|
380
|
-
# Transform and load assignments in batches
|
|
381
|
-
if len(assignments_batch) >= assignment_batch_size:
|
|
382
|
-
transformed_assignments = transform_app_role_assignments(
|
|
383
|
-
assignments_batch
|
|
384
|
-
)
|
|
385
|
-
load_app_role_assignments(
|
|
386
|
-
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
387
|
-
)
|
|
388
|
-
logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
|
|
389
|
-
assignments_batch.clear()
|
|
390
|
-
transformed_assignments.clear()
|
|
391
|
-
|
|
392
|
-
# Force garbage collection after batch load
|
|
393
|
-
gc.collect()
|
|
394
|
-
|
|
395
173
|
# Process remaining applications
|
|
396
174
|
if apps_batch:
|
|
397
175
|
transformed_apps = list(transform_applications(apps_batch))
|
|
398
176
|
load_applications(neo4j_session, transformed_apps, update_tag, tenant_id)
|
|
399
177
|
apps_batch.clear()
|
|
400
178
|
transformed_apps.clear()
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
if assignments_batch:
|
|
404
|
-
transformed_assignments = transform_app_role_assignments(assignments_batch)
|
|
405
|
-
load_app_role_assignments(
|
|
406
|
-
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
407
|
-
)
|
|
408
|
-
assignments_batch.clear()
|
|
409
|
-
transformed_assignments.clear()
|
|
410
|
-
|
|
179
|
+
cleanup_applications(neo4j_session, common_job_parameters)
|
|
180
|
+
logger.info(f"Completed syncing {total_app_count} applications")
|
|
411
181
|
# Final garbage collection
|
|
412
182
|
gc.collect()
|
|
413
|
-
|
|
414
|
-
# Cleanup stale data
|
|
415
|
-
cleanup_applications(neo4j_session, common_job_parameters)
|
|
416
|
-
cleanup_app_role_assignments(neo4j_session, common_job_parameters)
|
|
File without changes
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import neo4j
|
|
4
|
+
|
|
5
|
+
from cartography.client.core.tx import load_matchlinks
|
|
6
|
+
from cartography.client.core.tx import read_list_of_dicts_tx
|
|
7
|
+
from cartography.graph.job import GraphJob
|
|
8
|
+
from cartography.models.entra.entra_user_to_aws_sso import (
|
|
9
|
+
EntraUserToAWSSSOUserMatchLink,
|
|
10
|
+
)
|
|
11
|
+
from cartography.util import timeit
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@timeit
|
|
15
|
+
def sync_entra_to_aws_identity_center(
|
|
16
|
+
neo4j_session: neo4j.Session,
|
|
17
|
+
update_tag: int,
|
|
18
|
+
tenant_id: str,
|
|
19
|
+
common_job_parameters: dict[str, Any],
|
|
20
|
+
) -> None:
|
|
21
|
+
query = """
|
|
22
|
+
MATCH (:EntraTenant{id: $TENANT_ID})-[:RESOURCE]->(e:EntraUser)
|
|
23
|
+
-[:HAS_APP_ROLE]->(ar:EntraAppRoleAssignment)
|
|
24
|
+
-[:ASSIGNED_TO]->(n:EntraApplication)
|
|
25
|
+
-[:SERVICE_PRINCIPAL]->(spn:EntraServicePrincipal)
|
|
26
|
+
-[:FEDERATES_TO]->(ic:AWSIdentityCenter)
|
|
27
|
+
MATCH (sso:AWSSSOUser{identity_store_id:ic.identity_store_id})
|
|
28
|
+
WHERE e.user_principal_name = sso.user_name
|
|
29
|
+
RETURN e.user_principal_name as entra_user_principal_name, sso.user_name as aws_user_name, sso.identity_store_id as identity_store_id
|
|
30
|
+
"""
|
|
31
|
+
entrauser_to_awssso_users = neo4j_session.execute_read(
|
|
32
|
+
read_list_of_dicts_tx, query, TENANT_ID=tenant_id
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Load MatchLink relationships from Entra users to AWS SSO users
|
|
36
|
+
load_matchlinks(
|
|
37
|
+
neo4j_session,
|
|
38
|
+
EntraUserToAWSSSOUserMatchLink(),
|
|
39
|
+
entrauser_to_awssso_users,
|
|
40
|
+
lastupdated=update_tag,
|
|
41
|
+
_sub_resource_label="EntraTenant",
|
|
42
|
+
_sub_resource_id=tenant_id,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
cleanup_entra_user_to_aws_sso_user_matchlinks(neo4j_session, common_job_parameters)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@timeit
|
|
49
|
+
async def sync_entra_federation(
|
|
50
|
+
neo4j_session: neo4j.Session,
|
|
51
|
+
update_tag: int,
|
|
52
|
+
tenant_id: str,
|
|
53
|
+
common_job_parameters: dict[str, Any],
|
|
54
|
+
) -> None:
|
|
55
|
+
"""
|
|
56
|
+
Sync Entra federation relationships to the graph.
|
|
57
|
+
|
|
58
|
+
:param neo4j_session: Neo4j session
|
|
59
|
+
:param update_tag: Update tag for tracking data freshness
|
|
60
|
+
:param tenant_id: Entra tenant ID
|
|
61
|
+
:param common_job_parameters: Common job parameters for cleanup
|
|
62
|
+
"""
|
|
63
|
+
sync_entra_to_aws_identity_center(
|
|
64
|
+
neo4j_session, update_tag, tenant_id, common_job_parameters
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@timeit
|
|
69
|
+
def cleanup_entra_user_to_aws_sso_user_matchlinks(
|
|
70
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
71
|
+
) -> None:
|
|
72
|
+
GraphJob.from_matchlink(
|
|
73
|
+
EntraUserToAWSSSOUserMatchLink(),
|
|
74
|
+
"EntraTenant",
|
|
75
|
+
common_job_parameters["TENANT_ID"],
|
|
76
|
+
common_job_parameters["UPDATE_TAG"],
|
|
77
|
+
).run(neo4j_session)
|