cartography 0.111.0rc1__py3-none-any.whl → 0.112.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +57 -0
- cartography/config.py +24 -0
- cartography/data/indexes.cypher +0 -2
- cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
- cartography/intel/aws/apigateway.py +128 -17
- cartography/intel/aws/ec2/instances.py +3 -1
- cartography/intel/aws/ec2/network_interfaces.py +1 -1
- cartography/intel/aws/ec2/vpc_peerings.py +262 -125
- cartography/intel/azure/__init__.py +35 -32
- cartography/intel/azure/subscription.py +2 -2
- cartography/intel/azure/tenant.py +39 -30
- cartography/intel/azure/util/credentials.py +49 -174
- cartography/intel/entra/__init__.py +47 -1
- cartography/intel/entra/applications.py +220 -170
- cartography/intel/entra/groups.py +41 -22
- cartography/intel/entra/ou.py +28 -20
- cartography/intel/entra/users.py +24 -18
- cartography/intel/gcp/__init__.py +25 -8
- cartography/intel/gcp/compute.py +47 -12
- cartography/intel/github/repos.py +19 -10
- cartography/intel/github/util.py +12 -0
- cartography/intel/keycloak/__init__.py +153 -0
- cartography/intel/keycloak/authenticationexecutions.py +322 -0
- cartography/intel/keycloak/authenticationflows.py +77 -0
- cartography/intel/keycloak/clients.py +187 -0
- cartography/intel/keycloak/groups.py +126 -0
- cartography/intel/keycloak/identityproviders.py +94 -0
- cartography/intel/keycloak/organizations.py +163 -0
- cartography/intel/keycloak/realms.py +61 -0
- cartography/intel/keycloak/roles.py +202 -0
- cartography/intel/keycloak/scopes.py +73 -0
- cartography/intel/keycloak/users.py +70 -0
- cartography/intel/keycloak/util.py +47 -0
- cartography/intel/kubernetes/__init__.py +26 -0
- cartography/intel/kubernetes/eks.py +402 -0
- cartography/intel/kubernetes/rbac.py +133 -0
- cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
- cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
- cartography/models/aws/ec2/vpc_peering.py +157 -0
- cartography/models/azure/principal.py +44 -0
- cartography/models/azure/tenant.py +20 -0
- cartography/models/keycloak/__init__.py +0 -0
- cartography/models/keycloak/authenticationexecution.py +160 -0
- cartography/models/keycloak/authenticationflow.py +54 -0
- cartography/models/keycloak/client.py +177 -0
- cartography/models/keycloak/group.py +101 -0
- cartography/models/keycloak/identityprovider.py +89 -0
- cartography/models/keycloak/organization.py +116 -0
- cartography/models/keycloak/organizationdomain.py +73 -0
- cartography/models/keycloak/realm.py +173 -0
- cartography/models/keycloak/role.py +126 -0
- cartography/models/keycloak/scope.py +73 -0
- cartography/models/keycloak/user.py +51 -0
- cartography/models/kubernetes/clusterrolebindings.py +40 -0
- cartography/models/kubernetes/groups.py +107 -0
- cartography/models/kubernetes/oidc.py +51 -0
- cartography/models/kubernetes/rolebindings.py +40 -0
- cartography/models/kubernetes/users.py +105 -0
- cartography/sync.py +2 -0
- cartography/util.py +10 -0
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/METADATA +9 -5
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/RECORD +67 -34
- cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/WHEEL +0 -0
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.111.0rc1.dist-info → cartography-0.112.0.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,20 @@
|
|
|
1
|
+
import gc
|
|
1
2
|
import logging
|
|
2
3
|
from typing import Any
|
|
3
|
-
from typing import
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import AsyncGenerator
|
|
5
|
+
from typing import Generator
|
|
5
6
|
|
|
6
|
-
import httpx
|
|
7
7
|
import neo4j
|
|
8
8
|
from azure.identity import ClientSecretCredential
|
|
9
|
-
from
|
|
9
|
+
from msgraph.generated.models.app_role_assignment_collection_response import (
|
|
10
|
+
AppRoleAssignmentCollectionResponse,
|
|
11
|
+
)
|
|
12
|
+
from msgraph.generated.models.application import Application
|
|
13
|
+
from msgraph.generated.models.service_principal import ServicePrincipal
|
|
10
14
|
from msgraph.graph_service_client import GraphServiceClient
|
|
11
15
|
|
|
12
16
|
from cartography.client.core.tx import load
|
|
13
17
|
from cartography.graph.job import GraphJob
|
|
14
|
-
from cartography.intel.entra.users import load_tenant
|
|
15
18
|
from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
|
|
16
19
|
from cartography.models.entra.application import EntraApplicationSchema
|
|
17
20
|
from cartography.util import timeit
|
|
@@ -27,25 +30,20 @@ logger = logging.getLogger(__name__)
|
|
|
27
30
|
# - You want to minimize API calls (increase values up to 999)
|
|
28
31
|
# - You're hitting rate limits (decrease values)
|
|
29
32
|
APPLICATIONS_PAGE_SIZE = 999
|
|
30
|
-
APP_ROLE_ASSIGNMENTS_PAGE_SIZE =
|
|
31
|
-
999 # Currently not used, but reserved for future pagination improvements
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
# Warning thresholds for potential data completeness issues
|
|
35
|
-
# Log warnings when individual users/groups have more assignments than this threshold
|
|
36
|
-
HIGH_ASSIGNMENT_COUNT_THRESHOLD = 100
|
|
33
|
+
APP_ROLE_ASSIGNMENTS_PAGE_SIZE = 999
|
|
37
34
|
|
|
38
35
|
|
|
39
36
|
@timeit
|
|
40
|
-
async def get_entra_applications(
|
|
37
|
+
async def get_entra_applications(
|
|
38
|
+
client: GraphServiceClient,
|
|
39
|
+
) -> AsyncGenerator[Application, None]:
|
|
41
40
|
"""
|
|
42
|
-
Gets Entra applications using the Microsoft Graph API.
|
|
41
|
+
Gets Entra applications using the Microsoft Graph API with a generator.
|
|
43
42
|
|
|
44
43
|
:param client: GraphServiceClient
|
|
45
|
-
:return:
|
|
44
|
+
:return: Generator of raw Application objects from Microsoft Graph
|
|
46
45
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
count = 0
|
|
49
47
|
# Get all applications with pagination
|
|
50
48
|
request_configuration = client.applications.ApplicationsRequestBuilderGetRequestConfiguration(
|
|
51
49
|
query_parameters=client.applications.ApplicationsRequestBuilderGetQueryParameters(
|
|
@@ -56,189 +54,192 @@ async def get_entra_applications(client: GraphServiceClient) -> List[Any]:
|
|
|
56
54
|
|
|
57
55
|
while page:
|
|
58
56
|
if page.value:
|
|
59
|
-
|
|
57
|
+
for app in page.value:
|
|
58
|
+
count += 1
|
|
59
|
+
yield app
|
|
60
60
|
|
|
61
61
|
if not page.odata_next_link:
|
|
62
62
|
break
|
|
63
63
|
page = await client.applications.with_url(page.odata_next_link).get()
|
|
64
64
|
|
|
65
|
-
logger.info(f"Retrieved {
|
|
66
|
-
return applications
|
|
65
|
+
logger.info(f"Retrieved {count} Entra applications total")
|
|
67
66
|
|
|
68
67
|
|
|
69
68
|
@timeit
|
|
70
|
-
async def
|
|
71
|
-
client: GraphServiceClient,
|
|
72
|
-
) ->
|
|
69
|
+
async def get_app_role_assignments_for_app(
|
|
70
|
+
client: GraphServiceClient, app: Application
|
|
71
|
+
) -> AsyncGenerator[dict[str, Any], None]:
|
|
73
72
|
"""
|
|
74
|
-
Gets app role assignments
|
|
73
|
+
Gets app role assignments for a single application with safety limits.
|
|
75
74
|
|
|
76
75
|
:param client: GraphServiceClient
|
|
77
|
-
:param
|
|
78
|
-
:return:
|
|
76
|
+
:param app: Application object
|
|
77
|
+
:return: Generator of app role assignment data as dicts
|
|
79
78
|
"""
|
|
80
|
-
|
|
79
|
+
if not app.app_id:
|
|
80
|
+
logger.warning(f"Application {app.id} has no app_id, skipping")
|
|
81
|
+
return
|
|
81
82
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
|
|
92
|
-
query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
|
|
93
|
-
filter=f"appId eq '{app.app_id}'"
|
|
94
|
-
)
|
|
95
|
-
)
|
|
83
|
+
logger.info(
|
|
84
|
+
f"Fetching role assignments for application: {app.display_name} ({app.app_id})"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# First, get the service principal for this application
|
|
88
|
+
service_principals_page = await client.service_principals.get(
|
|
89
|
+
request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
|
|
90
|
+
query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
|
|
91
|
+
filter=f"appId eq '{app.app_id}'"
|
|
96
92
|
)
|
|
93
|
+
)
|
|
94
|
+
)
|
|
97
95
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
96
|
+
if not service_principals_page or not service_principals_page.value:
|
|
97
|
+
logger.warning(
|
|
98
|
+
f"No service principal found for application {app.app_id} ({app.display_name}). Continuing."
|
|
99
|
+
)
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
service_principal: ServicePrincipal = service_principals_page.value[0]
|
|
103
|
+
|
|
104
|
+
# Get assignments for this service principal with pagination and limits
|
|
105
|
+
# Use maximum page size (999) to get more data per request
|
|
106
|
+
# Memory is managed through streaming and batching, not page size
|
|
107
|
+
request_config = client.service_principals.by_service_principal_id(
|
|
108
|
+
service_principal.id
|
|
109
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetRequestConfiguration(
|
|
110
|
+
query_parameters=client.service_principals.by_service_principal_id(
|
|
111
|
+
service_principal.id
|
|
112
|
+
).app_role_assigned_to.AppRoleAssignedToRequestBuilderGetQueryParameters(
|
|
113
|
+
top=APP_ROLE_ASSIGNMENTS_PAGE_SIZE # Maximum allowed by Microsoft Graph API
|
|
114
|
+
)
|
|
115
|
+
)
|
|
103
116
|
|
|
104
|
-
|
|
117
|
+
assignments_page: AppRoleAssignmentCollectionResponse | None = (
|
|
118
|
+
await client.service_principals.by_service_principal_id(
|
|
119
|
+
service_principal.id
|
|
120
|
+
).app_role_assigned_to.get(request_configuration=request_config)
|
|
121
|
+
)
|
|
105
122
|
|
|
106
|
-
|
|
107
|
-
|
|
123
|
+
assignment_count = 0
|
|
124
|
+
page_count = 0
|
|
125
|
+
|
|
126
|
+
while assignments_page:
|
|
127
|
+
page_count += 1
|
|
128
|
+
|
|
129
|
+
if assignments_page.value:
|
|
130
|
+
page_valid_count = 0
|
|
131
|
+
page_skipped_count = 0
|
|
132
|
+
|
|
133
|
+
# Process assignments and immediately yield to avoid accumulation
|
|
134
|
+
for assignment in assignments_page.value:
|
|
135
|
+
# Only yield if we have valid data since it's possible (but unlikely) for assignment.id to be None
|
|
136
|
+
if assignment.principal_id:
|
|
137
|
+
assignment_count += 1
|
|
138
|
+
page_valid_count += 1
|
|
139
|
+
yield {
|
|
140
|
+
"id": assignment.id,
|
|
141
|
+
"app_role_id": assignment.app_role_id,
|
|
142
|
+
"created_date_time": assignment.created_date_time,
|
|
143
|
+
"principal_id": assignment.principal_id,
|
|
144
|
+
"principal_display_name": assignment.principal_display_name,
|
|
145
|
+
"principal_type": assignment.principal_type,
|
|
146
|
+
"resource_display_name": assignment.resource_display_name,
|
|
147
|
+
"resource_id": assignment.resource_id,
|
|
148
|
+
"application_app_id": app.app_id,
|
|
149
|
+
}
|
|
150
|
+
else:
|
|
151
|
+
page_skipped_count += 1
|
|
152
|
+
|
|
153
|
+
# Log page results with details about skipped objects
|
|
154
|
+
if page_skipped_count > 0:
|
|
108
155
|
logger.warning(
|
|
109
|
-
f"
|
|
156
|
+
f"Page {page_count} for {app.display_name}: {page_valid_count} valid assignments, "
|
|
157
|
+
f"{page_skipped_count} skipped objects. Total valid: {assignment_count}"
|
|
110
158
|
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
service_principal.id
|
|
116
|
-
).app_role_assigned_to.get()
|
|
117
|
-
|
|
118
|
-
app_assignments = []
|
|
119
|
-
while assignments_page:
|
|
120
|
-
if assignments_page.value:
|
|
121
|
-
# Add application context to each assignment
|
|
122
|
-
for assignment in assignments_page.value:
|
|
123
|
-
# Add the application app_id to the assignment for relationship matching
|
|
124
|
-
assignment.application_app_id = app.app_id
|
|
125
|
-
app_assignments.extend(assignments_page.value)
|
|
126
|
-
|
|
127
|
-
if not assignments_page.odata_next_link:
|
|
128
|
-
break
|
|
129
|
-
assignments_page = await client.service_principals.with_url(
|
|
130
|
-
assignments_page.odata_next_link
|
|
131
|
-
).get()
|
|
132
|
-
|
|
133
|
-
# Log warning if a single application has many assignments (potential pagination issues)
|
|
134
|
-
if len(app_assignments) >= HIGH_ASSIGNMENT_COUNT_THRESHOLD:
|
|
135
|
-
logger.warning(
|
|
136
|
-
f"Application {app.display_name} ({app.app_id}) has {len(app_assignments)} role assignments. "
|
|
137
|
-
f"If this seems unexpectedly high, there may be pagination limits affecting data completeness."
|
|
159
|
+
else:
|
|
160
|
+
logger.debug(
|
|
161
|
+
f"Page {page_count} for {app.display_name}: {page_valid_count} assignments. "
|
|
162
|
+
f"Total: {assignment_count}"
|
|
138
163
|
)
|
|
139
164
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
f"Retrieved {len(app_assignments)} assignments for application {app.display_name}"
|
|
143
|
-
)
|
|
165
|
+
# Force garbage collection after each page
|
|
166
|
+
gc.collect()
|
|
144
167
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
logger.warning(
|
|
159
|
-
f"Rate limit hit when fetching app role assignments for application {app.app_id} ({app.display_name}). "
|
|
160
|
-
f"Consider reducing APPLICATIONS_PAGE_SIZE or implementing retry logic."
|
|
161
|
-
)
|
|
162
|
-
else:
|
|
163
|
-
logger.warning(
|
|
164
|
-
f"Microsoft Graph API error when fetching app role assignments for application {app.app_id} ({app.display_name}): "
|
|
165
|
-
f"Status {e.response_status_code}, Error: {str(e)}"
|
|
166
|
-
)
|
|
167
|
-
continue
|
|
168
|
-
except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError) as e:
|
|
169
|
-
# Handle network-related errors
|
|
170
|
-
logger.warning(
|
|
171
|
-
f"Network error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}"
|
|
172
|
-
)
|
|
173
|
-
continue
|
|
174
|
-
except Exception as e:
|
|
175
|
-
# Only catch truly unexpected errors - these should be rare
|
|
176
|
-
logger.error(
|
|
177
|
-
f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
|
|
178
|
-
exc_info=True,
|
|
179
|
-
)
|
|
180
|
-
continue
|
|
168
|
+
# Check if we have more pages to fetch
|
|
169
|
+
if not assignments_page.odata_next_link:
|
|
170
|
+
break
|
|
171
|
+
|
|
172
|
+
# Clear previous page before fetching next
|
|
173
|
+
assignments_page.value = None
|
|
174
|
+
|
|
175
|
+
# Fetch next page
|
|
176
|
+
logger.debug(
|
|
177
|
+
f"Fetching page {page_count + 1} of assignments for {app.display_name}"
|
|
178
|
+
)
|
|
179
|
+
next_page_url = assignments_page.odata_next_link
|
|
180
|
+
assignments_page = await client.service_principals.with_url(next_page_url).get()
|
|
181
181
|
|
|
182
|
-
logger.info(
|
|
183
|
-
|
|
182
|
+
logger.info(
|
|
183
|
+
f"Successfully retrieved {assignment_count} assignments for application {app.display_name} (pages: {page_count})"
|
|
184
|
+
)
|
|
184
185
|
|
|
185
186
|
|
|
186
|
-
def transform_applications(
|
|
187
|
+
def transform_applications(
|
|
188
|
+
applications: list[Application],
|
|
189
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
187
190
|
"""
|
|
188
|
-
Transform application data for graph loading.
|
|
191
|
+
Transform application data for graph loading using a generator.
|
|
189
192
|
|
|
190
193
|
:param applications: Raw Application objects from Microsoft Graph API
|
|
191
|
-
:return:
|
|
194
|
+
:return: Generator of transformed application data for graph loading
|
|
192
195
|
"""
|
|
193
|
-
result = []
|
|
194
196
|
for app in applications:
|
|
195
|
-
|
|
197
|
+
yield {
|
|
196
198
|
"id": app.id,
|
|
197
199
|
"app_id": app.app_id,
|
|
198
200
|
"display_name": app.display_name,
|
|
199
|
-
"publisher_domain":
|
|
201
|
+
"publisher_domain": app.publisher_domain,
|
|
200
202
|
"sign_in_audience": app.sign_in_audience,
|
|
201
203
|
}
|
|
202
|
-
result.append(transformed)
|
|
203
|
-
return result
|
|
204
204
|
|
|
205
205
|
|
|
206
206
|
def transform_app_role_assignments(
|
|
207
|
-
assignments:
|
|
208
|
-
) ->
|
|
207
|
+
assignments: list[dict[str, Any]],
|
|
208
|
+
) -> list[dict[str, Any]]:
|
|
209
209
|
"""
|
|
210
210
|
Transform app role assignment data for graph loading.
|
|
211
211
|
|
|
212
|
-
:param assignments: Raw app role assignment
|
|
212
|
+
:param assignments: Raw app role assignment data as dicts
|
|
213
213
|
:return: Transformed assignment data for graph loading
|
|
214
214
|
"""
|
|
215
|
-
|
|
216
|
-
for
|
|
217
|
-
transformed
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
215
|
+
transformed = []
|
|
216
|
+
for assign in assignments:
|
|
217
|
+
transformed.append(
|
|
218
|
+
{
|
|
219
|
+
"id": assign["id"],
|
|
220
|
+
"app_role_id": (
|
|
221
|
+
str(assign["app_role_id"]) if assign["app_role_id"] else None
|
|
222
|
+
),
|
|
223
|
+
"created_date_time": assign["created_date_time"],
|
|
224
|
+
"principal_id": (
|
|
225
|
+
str(assign["principal_id"]) if assign["principal_id"] else None
|
|
226
|
+
),
|
|
227
|
+
"principal_display_name": assign["principal_display_name"],
|
|
228
|
+
"principal_type": assign["principal_type"],
|
|
229
|
+
"resource_display_name": assign["resource_display_name"],
|
|
230
|
+
"resource_id": (
|
|
231
|
+
str(assign["resource_id"]) if assign["resource_id"] else None
|
|
232
|
+
),
|
|
233
|
+
"application_app_id": assign["application_app_id"],
|
|
234
|
+
}
|
|
235
|
+
)
|
|
236
|
+
return transformed
|
|
236
237
|
|
|
237
238
|
|
|
238
239
|
@timeit
|
|
239
240
|
def load_applications(
|
|
240
241
|
neo4j_session: neo4j.Session,
|
|
241
|
-
applications_data:
|
|
242
|
+
applications_data: list[dict[str, Any]],
|
|
242
243
|
update_tag: int,
|
|
243
244
|
tenant_id: str,
|
|
244
245
|
) -> None:
|
|
@@ -262,7 +263,7 @@ def load_applications(
|
|
|
262
263
|
@timeit
|
|
263
264
|
def load_app_role_assignments(
|
|
264
265
|
neo4j_session: neo4j.Session,
|
|
265
|
-
assignments_data:
|
|
266
|
+
assignments_data: list[dict[str, Any]],
|
|
266
267
|
update_tag: int,
|
|
267
268
|
tenant_id: str,
|
|
268
269
|
) -> None:
|
|
@@ -285,7 +286,7 @@ def load_app_role_assignments(
|
|
|
285
286
|
|
|
286
287
|
@timeit
|
|
287
288
|
def cleanup_applications(
|
|
288
|
-
neo4j_session: neo4j.Session, common_job_parameters:
|
|
289
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
289
290
|
) -> None:
|
|
290
291
|
"""
|
|
291
292
|
Delete Entra applications and their relationships from the graph if they were not updated in the last sync.
|
|
@@ -300,7 +301,7 @@ def cleanup_applications(
|
|
|
300
301
|
|
|
301
302
|
@timeit
|
|
302
303
|
def cleanup_app_role_assignments(
|
|
303
|
-
neo4j_session: neo4j.Session, common_job_parameters:
|
|
304
|
+
neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
|
|
304
305
|
) -> None:
|
|
305
306
|
"""
|
|
306
307
|
Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
|
|
@@ -320,7 +321,7 @@ async def sync_entra_applications(
|
|
|
320
321
|
client_id: str,
|
|
321
322
|
client_secret: str,
|
|
322
323
|
update_tag: int,
|
|
323
|
-
common_job_parameters:
|
|
324
|
+
common_job_parameters: dict[str, Any],
|
|
324
325
|
) -> None:
|
|
325
326
|
"""
|
|
326
327
|
Sync Entra applications and their app role assignments to the graph.
|
|
@@ -344,22 +345,71 @@ async def sync_entra_applications(
|
|
|
344
345
|
scopes=["https://graph.microsoft.com/.default"],
|
|
345
346
|
)
|
|
346
347
|
|
|
347
|
-
#
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
transformed_applications = transform_applications(applications_data)
|
|
348
|
+
# Process applications and their assignments in batches
|
|
349
|
+
app_batch_size = 10 # Batch size for applications
|
|
350
|
+
assignment_batch_size = (
|
|
351
|
+
200 # Batch size for assignments (increased since we handle memory better now)
|
|
352
|
+
)
|
|
353
353
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
354
|
+
apps_batch = []
|
|
355
|
+
assignments_batch = []
|
|
356
|
+
total_assignment_count = 0
|
|
357
|
+
total_app_count = 0
|
|
358
|
+
|
|
359
|
+
# Stream apps
|
|
360
|
+
async for app in get_entra_applications(client):
|
|
361
|
+
total_app_count += 1
|
|
362
|
+
apps_batch.append(app)
|
|
363
|
+
|
|
364
|
+
# Transform and load applications in batches
|
|
365
|
+
if len(apps_batch) >= app_batch_size:
|
|
366
|
+
transformed_apps = list(transform_applications(apps_batch))
|
|
367
|
+
load_applications(neo4j_session, transformed_apps, update_tag, tenant_id)
|
|
368
|
+
logger.info(
|
|
369
|
+
f"Loaded batch of {len(apps_batch)} applications (total: {total_app_count})"
|
|
370
|
+
)
|
|
371
|
+
apps_batch.clear()
|
|
372
|
+
transformed_apps.clear()
|
|
373
|
+
gc.collect() # Force garbage collection
|
|
374
|
+
|
|
375
|
+
# Stream app role assignments
|
|
376
|
+
async for assignment in get_app_role_assignments_for_app(client, app):
|
|
377
|
+
assignments_batch.append(assignment)
|
|
378
|
+
total_assignment_count += 1
|
|
379
|
+
|
|
380
|
+
# Transform and load assignments in batches
|
|
381
|
+
if len(assignments_batch) >= assignment_batch_size:
|
|
382
|
+
transformed_assignments = transform_app_role_assignments(
|
|
383
|
+
assignments_batch
|
|
384
|
+
)
|
|
385
|
+
load_app_role_assignments(
|
|
386
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
387
|
+
)
|
|
388
|
+
logger.debug(f"Loaded batch of {len(assignments_batch)} assignments")
|
|
389
|
+
assignments_batch.clear()
|
|
390
|
+
transformed_assignments.clear()
|
|
391
|
+
|
|
392
|
+
# Force garbage collection after batch load
|
|
393
|
+
gc.collect()
|
|
394
|
+
|
|
395
|
+
# Process remaining applications
|
|
396
|
+
if apps_batch:
|
|
397
|
+
transformed_apps = list(transform_applications(apps_batch))
|
|
398
|
+
load_applications(neo4j_session, transformed_apps, update_tag, tenant_id)
|
|
399
|
+
apps_batch.clear()
|
|
400
|
+
transformed_apps.clear()
|
|
401
|
+
|
|
402
|
+
# Process remaining assignments
|
|
403
|
+
if assignments_batch:
|
|
404
|
+
transformed_assignments = transform_app_role_assignments(assignments_batch)
|
|
405
|
+
load_app_role_assignments(
|
|
406
|
+
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
407
|
+
)
|
|
408
|
+
assignments_batch.clear()
|
|
409
|
+
transformed_assignments.clear()
|
|
357
410
|
|
|
358
|
-
#
|
|
359
|
-
|
|
360
|
-
load_app_role_assignments(
|
|
361
|
-
neo4j_session, transformed_assignments, update_tag, tenant_id
|
|
362
|
-
)
|
|
411
|
+
# Final garbage collection
|
|
412
|
+
gc.collect()
|
|
363
413
|
|
|
364
414
|
# Cleanup stale data
|
|
365
415
|
cleanup_applications(neo4j_session, common_job_parameters)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Any
|
|
3
|
+
from typing import AsyncGenerator
|
|
4
|
+
from typing import Generator
|
|
3
5
|
|
|
4
6
|
import neo4j
|
|
5
7
|
from azure.identity import ClientSecretCredential
|
|
@@ -9,7 +11,6 @@ from msgraph.generated.models.group import Group
|
|
|
9
11
|
|
|
10
12
|
from cartography.client.core.tx import load
|
|
11
13
|
from cartography.graph.job import GraphJob
|
|
12
|
-
from cartography.intel.entra.users import load_tenant
|
|
13
14
|
from cartography.models.entra.group import EntraGroupSchema
|
|
14
15
|
from cartography.util import timeit
|
|
15
16
|
|
|
@@ -17,23 +18,20 @@ logger = logging.getLogger(__name__)
|
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
@timeit
|
|
20
|
-
async def get_entra_groups(client: GraphServiceClient) ->
|
|
21
|
-
"""Get all groups from Microsoft Graph API with pagination."""
|
|
22
|
-
all_groups: list[Group] = []
|
|
23
|
-
|
|
21
|
+
async def get_entra_groups(client: GraphServiceClient) -> AsyncGenerator[Group, None]:
|
|
22
|
+
"""Get all groups from Microsoft Graph API with pagination using a generator."""
|
|
24
23
|
request_configuration = client.groups.GroupsRequestBuilderGetRequestConfiguration(
|
|
25
24
|
query_parameters=client.groups.GroupsRequestBuilderGetQueryParameters(top=999)
|
|
26
25
|
)
|
|
27
26
|
page = await client.groups.get(request_configuration=request_configuration)
|
|
28
27
|
while page:
|
|
29
28
|
if page.value:
|
|
30
|
-
|
|
29
|
+
for group in page.value:
|
|
30
|
+
yield group
|
|
31
31
|
if not page.odata_next_link:
|
|
32
32
|
break
|
|
33
33
|
page = await client.groups.with_url(page.odata_next_link).get()
|
|
34
34
|
|
|
35
|
-
return all_groups
|
|
36
|
-
|
|
37
35
|
|
|
38
36
|
@timeit
|
|
39
37
|
async def get_group_members(
|
|
@@ -82,11 +80,10 @@ def transform_groups(
|
|
|
82
80
|
user_member_map: dict[str, list[str]],
|
|
83
81
|
group_member_map: dict[str, list[str]],
|
|
84
82
|
group_owner_map: dict[str, list[str]],
|
|
85
|
-
) ->
|
|
86
|
-
"""Transform API responses into dictionaries for ingestion."""
|
|
87
|
-
result: list[dict[str, Any]] = []
|
|
83
|
+
) -> Generator[dict[str, Any], None, None]:
|
|
84
|
+
"""Transform API responses into dictionaries for ingestion using a generator."""
|
|
88
85
|
for g in groups:
|
|
89
|
-
|
|
86
|
+
yield {
|
|
90
87
|
"id": g.id,
|
|
91
88
|
"display_name": g.display_name,
|
|
92
89
|
"description": g.description,
|
|
@@ -103,8 +100,6 @@ def transform_groups(
|
|
|
103
100
|
"member_group_ids": group_member_map.get(g.id, []),
|
|
104
101
|
"owner_ids": group_owner_map.get(g.id, []),
|
|
105
102
|
}
|
|
106
|
-
result.append(transformed)
|
|
107
|
-
return result
|
|
108
103
|
|
|
109
104
|
|
|
110
105
|
@timeit
|
|
@@ -150,17 +145,22 @@ async def sync_entra_groups(
|
|
|
150
145
|
credential, scopes=["https://graph.microsoft.com/.default"]
|
|
151
146
|
)
|
|
152
147
|
|
|
153
|
-
groups
|
|
148
|
+
# Collect groups in batches to avoid loading all at once
|
|
149
|
+
groups_batch = []
|
|
150
|
+
batch_size = 100 # Process groups in batches
|
|
154
151
|
|
|
155
152
|
user_member_map: dict[str, list[str]] = {}
|
|
156
153
|
group_member_map: dict[str, list[str]] = {}
|
|
157
154
|
group_owner_map: dict[str, list[str]] = {}
|
|
158
155
|
|
|
159
|
-
|
|
156
|
+
# First pass: collect groups and their owners/members
|
|
157
|
+
async for group in get_entra_groups(client):
|
|
158
|
+
groups_batch.append(group)
|
|
159
|
+
|
|
160
|
+
# Fetch owners and members for this group
|
|
160
161
|
owners = await get_group_owners(client, group.id)
|
|
161
162
|
group_owner_map[group.id] = owners
|
|
162
163
|
|
|
163
|
-
for group in groups:
|
|
164
164
|
try:
|
|
165
165
|
users, subgroups = await get_group_members(client, group.id)
|
|
166
166
|
user_member_map[group.id] = users
|
|
@@ -170,10 +170,29 @@ async def sync_entra_groups(
|
|
|
170
170
|
user_member_map[group.id] = []
|
|
171
171
|
group_member_map[group.id] = []
|
|
172
172
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
173
|
+
# Process batch when it reaches the size limit
|
|
174
|
+
if len(groups_batch) >= batch_size:
|
|
175
|
+
transformed_groups = list(
|
|
176
|
+
transform_groups(
|
|
177
|
+
groups_batch, user_member_map, group_member_map, group_owner_map
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
181
|
+
|
|
182
|
+
# Clear the batch and maps for processed groups
|
|
183
|
+
for g in groups_batch:
|
|
184
|
+
user_member_map.pop(g.id, None)
|
|
185
|
+
group_member_map.pop(g.id, None)
|
|
186
|
+
group_owner_map.pop(g.id, None)
|
|
187
|
+
groups_batch.clear()
|
|
188
|
+
|
|
189
|
+
# Process any remaining groups
|
|
190
|
+
if groups_batch:
|
|
191
|
+
transformed_groups = list(
|
|
192
|
+
transform_groups(
|
|
193
|
+
groups_batch, user_member_map, group_member_map, group_owner_map
|
|
194
|
+
)
|
|
195
|
+
)
|
|
196
|
+
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
176
197
|
|
|
177
|
-
load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
|
|
178
|
-
load_groups(neo4j_session, transformed_groups, update_tag, tenant_id)
|
|
179
198
|
cleanup_groups(neo4j_session, common_job_parameters)
|