cartography 0.105.0__py3-none-any.whl → 0.106.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (53) hide show
  1. cartography/_version.py +2 -2
  2. cartography/client/core/tx.py +62 -0
  3. cartography/data/indexes.cypher +0 -34
  4. cartography/graph/cleanupbuilder.py +47 -0
  5. cartography/graph/job.py +42 -0
  6. cartography/graph/querybuilder.py +136 -2
  7. cartography/graph/statement.py +1 -1
  8. cartography/intel/aws/ecs.py +228 -380
  9. cartography/intel/aws/efs.py +261 -0
  10. cartography/intel/aws/identitycenter.py +14 -3
  11. cartography/intel/aws/inspector.py +96 -53
  12. cartography/intel/aws/rds.py +2 -1
  13. cartography/intel/aws/resources.py +2 -0
  14. cartography/intel/entra/__init__.py +11 -0
  15. cartography/intel/entra/applications.py +366 -0
  16. cartography/intel/kubernetes/__init__.py +30 -14
  17. cartography/intel/kubernetes/clusters.py +86 -0
  18. cartography/intel/kubernetes/namespaces.py +59 -57
  19. cartography/intel/kubernetes/pods.py +140 -77
  20. cartography/intel/kubernetes/secrets.py +95 -45
  21. cartography/intel/kubernetes/services.py +131 -67
  22. cartography/intel/kubernetes/util.py +125 -14
  23. cartography/models/aws/ecs/__init__.py +0 -0
  24. cartography/models/aws/ecs/clusters.py +64 -0
  25. cartography/models/aws/ecs/container_definitions.py +93 -0
  26. cartography/models/aws/ecs/container_instances.py +84 -0
  27. cartography/models/aws/ecs/containers.py +80 -0
  28. cartography/models/aws/ecs/services.py +117 -0
  29. cartography/models/aws/ecs/task_definitions.py +97 -0
  30. cartography/models/aws/ecs/tasks.py +110 -0
  31. cartography/models/aws/efs/__init__.py +0 -0
  32. cartography/models/aws/efs/access_point.py +77 -0
  33. cartography/models/aws/efs/file_system.py +60 -0
  34. cartography/models/aws/efs/mount_target.py +79 -0
  35. cartography/models/core/common.py +1 -0
  36. cartography/models/core/relationships.py +44 -0
  37. cartography/models/entra/app_role_assignment.py +115 -0
  38. cartography/models/entra/application.py +47 -0
  39. cartography/models/kubernetes/__init__.py +0 -0
  40. cartography/models/kubernetes/clusters.py +26 -0
  41. cartography/models/kubernetes/containers.py +108 -0
  42. cartography/models/kubernetes/namespaces.py +51 -0
  43. cartography/models/kubernetes/pods.py +80 -0
  44. cartography/models/kubernetes/secrets.py +79 -0
  45. cartography/models/kubernetes/services.py +108 -0
  46. cartography/util.py +15 -10
  47. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/METADATA +1 -1
  48. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/RECORD +52 -29
  49. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  50. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/WHEEL +0 -0
  51. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/entry_points.txt +0 -0
  52. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/licenses/LICENSE +0 -0
  53. {cartography-0.105.0.dist-info → cartography-0.106.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,366 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import httpx
7
+ import neo4j
8
+ from azure.identity import ClientSecretCredential
9
+ from kiota_abstractions.api_error import APIError
10
+ from msgraph.graph_service_client import GraphServiceClient
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.intel.entra.users import load_tenant
15
+ from cartography.models.entra.app_role_assignment import EntraAppRoleAssignmentSchema
16
+ from cartography.models.entra.application import EntraApplicationSchema
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Configurable constants for API pagination
22
+ # Microsoft Graph API recommends page sizes up to 999 for most resources
23
+ # Set to 999 by default, but can be adjusted if needed
24
+ #
25
+ # Adjust these values if:
26
+ # - You have performance issues (decrease values)
27
+ # - You want to minimize API calls (increase values up to 999)
28
+ # - You're hitting rate limits (decrease values)
29
+ APPLICATIONS_PAGE_SIZE = 999
30
+ APP_ROLE_ASSIGNMENTS_PAGE_SIZE = (
31
+ 999 # Currently not used, but reserved for future pagination improvements
32
+ )
33
+
34
+ # Warning thresholds for potential data completeness issues
35
+ # Log warnings when individual users/groups have more assignments than this threshold
36
+ HIGH_ASSIGNMENT_COUNT_THRESHOLD = 100
37
+
38
+
39
+ @timeit
40
+ async def get_entra_applications(client: GraphServiceClient) -> List[Any]:
41
+ """
42
+ Gets Entra applications using the Microsoft Graph API.
43
+
44
+ :param client: GraphServiceClient
45
+ :return: List of raw Application objects from Microsoft Graph
46
+ """
47
+ applications = []
48
+
49
+ # Get all applications with pagination
50
+ request_configuration = client.applications.ApplicationsRequestBuilderGetRequestConfiguration(
51
+ query_parameters=client.applications.ApplicationsRequestBuilderGetQueryParameters(
52
+ top=APPLICATIONS_PAGE_SIZE
53
+ )
54
+ )
55
+ page = await client.applications.get(request_configuration=request_configuration)
56
+
57
+ while page:
58
+ if page.value:
59
+ applications.extend(page.value)
60
+
61
+ if not page.odata_next_link:
62
+ break
63
+ page = await client.applications.with_url(page.odata_next_link).get()
64
+
65
+ logger.info(f"Retrieved {len(applications)} Entra applications total")
66
+ return applications
67
+
68
+
69
+ @timeit
70
+ async def get_app_role_assignments(
71
+ client: GraphServiceClient, applications: List[Any]
72
+ ) -> List[Any]:
73
+ """
74
+ Gets app role assignments efficiently by querying each application's service principal.
75
+
76
+ :param client: GraphServiceClient
77
+ :param applications: List of Application objects (from get_entra_applications)
78
+ :return: List of raw app role assignment objects from Microsoft Graph
79
+ """
80
+ assignments = []
81
+
82
+ for app in applications:
83
+ if not app.app_id:
84
+ logger.warning(f"Application {app.id} has no app_id, skipping")
85
+ continue
86
+
87
+ try:
88
+ # First, get the service principal for this application
89
+ # The service principal represents the app in the directory
90
+ service_principals_page = await client.service_principals.get(
91
+ request_configuration=client.service_principals.ServicePrincipalsRequestBuilderGetRequestConfiguration(
92
+ query_parameters=client.service_principals.ServicePrincipalsRequestBuilderGetQueryParameters(
93
+ filter=f"appId eq '{app.app_id}'"
94
+ )
95
+ )
96
+ )
97
+
98
+ if not service_principals_page or not service_principals_page.value:
99
+ logger.debug(
100
+ f"No service principal found for application {app.app_id} ({app.display_name})"
101
+ )
102
+ continue
103
+
104
+ service_principal = service_principals_page.value[0]
105
+
106
+ # Ensure service principal has an ID
107
+ if not service_principal.id:
108
+ logger.warning(
109
+ f"Service principal for application {app.app_id} ({app.display_name}) has no ID, skipping"
110
+ )
111
+ continue
112
+
113
+ # Get all assignments for this service principal (users, groups, service principals)
114
+ assignments_page = await client.service_principals.by_service_principal_id(
115
+ service_principal.id
116
+ ).app_role_assigned_to.get()
117
+
118
+ app_assignments = []
119
+ while assignments_page:
120
+ if assignments_page.value:
121
+ # Add application context to each assignment
122
+ for assignment in assignments_page.value:
123
+ # Add the application app_id to the assignment for relationship matching
124
+ assignment.application_app_id = app.app_id
125
+ app_assignments.extend(assignments_page.value)
126
+
127
+ if not assignments_page.odata_next_link:
128
+ break
129
+ assignments_page = await client.service_principals.with_url(
130
+ assignments_page.odata_next_link
131
+ ).get()
132
+
133
+ # Log warning if a single application has many assignments (potential pagination issues)
134
+ if len(app_assignments) >= HIGH_ASSIGNMENT_COUNT_THRESHOLD:
135
+ logger.warning(
136
+ f"Application {app.display_name} ({app.app_id}) has {len(app_assignments)} role assignments. "
137
+ f"If this seems unexpectedly high, there may be pagination limits affecting data completeness."
138
+ )
139
+
140
+ assignments.extend(app_assignments)
141
+ logger.debug(
142
+ f"Retrieved {len(app_assignments)} assignments for application {app.display_name}"
143
+ )
144
+
145
+ except APIError as e:
146
+ # Handle Microsoft Graph API errors (403 Forbidden, 404 Not Found, etc.)
147
+ if e.response_status_code == 403:
148
+ logger.warning(
149
+ f"Access denied when fetching app role assignments for application {app.app_id} ({app.display_name}). "
150
+ f"This application may not have sufficient permissions or may not exist."
151
+ )
152
+ elif e.response_status_code == 404:
153
+ logger.warning(
154
+ f"Application {app.app_id} ({app.display_name}) not found when fetching app role assignments. "
155
+ f"Application may have been deleted or does not exist."
156
+ )
157
+ elif e.response_status_code == 429:
158
+ logger.warning(
159
+ f"Rate limit hit when fetching app role assignments for application {app.app_id} ({app.display_name}). "
160
+ f"Consider reducing APPLICATIONS_PAGE_SIZE or implementing retry logic."
161
+ )
162
+ else:
163
+ logger.warning(
164
+ f"Microsoft Graph API error when fetching app role assignments for application {app.app_id} ({app.display_name}): "
165
+ f"Status {e.response_status_code}, Error: {str(e)}"
166
+ )
167
+ continue
168
+ except (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError) as e:
169
+ # Handle network-related errors
170
+ logger.warning(
171
+ f"Network error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}"
172
+ )
173
+ continue
174
+ except Exception as e:
175
+ # Only catch truly unexpected errors - these should be rare
176
+ logger.error(
177
+ f"Unexpected error when fetching app role assignments for application {app.app_id} ({app.display_name}): {e}",
178
+ exc_info=True,
179
+ )
180
+ continue
181
+
182
+ logger.info(f"Retrieved {len(assignments)} app role assignments total")
183
+ return assignments
184
+
185
+
186
+ def transform_applications(applications: List[Any]) -> List[Dict[str, Any]]:
187
+ """
188
+ Transform application data for graph loading.
189
+
190
+ :param applications: Raw Application objects from Microsoft Graph API
191
+ :return: Transformed application data for graph loading
192
+ """
193
+ result = []
194
+ for app in applications:
195
+ transformed = {
196
+ "id": app.id,
197
+ "app_id": app.app_id,
198
+ "display_name": app.display_name,
199
+ "publisher_domain": getattr(app, "publisher_domain", None),
200
+ "sign_in_audience": app.sign_in_audience,
201
+ }
202
+ result.append(transformed)
203
+ return result
204
+
205
+
206
+ def transform_app_role_assignments(
207
+ assignments: List[Any],
208
+ ) -> List[Dict[str, Any]]:
209
+ """
210
+ Transform app role assignment data for graph loading.
211
+
212
+ :param assignments: Raw app role assignment objects from Microsoft Graph API
213
+ :return: Transformed assignment data for graph loading
214
+ """
215
+ result = []
216
+ for assignment in assignments:
217
+ transformed = {
218
+ "id": assignment.id,
219
+ "app_role_id": (
220
+ str(assignment.app_role_id) if assignment.app_role_id else None
221
+ ),
222
+ "created_date_time": assignment.created_date_time,
223
+ "principal_id": (
224
+ str(assignment.principal_id) if assignment.principal_id else None
225
+ ),
226
+ "principal_display_name": assignment.principal_display_name,
227
+ "principal_type": assignment.principal_type,
228
+ "resource_display_name": assignment.resource_display_name,
229
+ "resource_id": (
230
+ str(assignment.resource_id) if assignment.resource_id else None
231
+ ),
232
+ "application_app_id": getattr(assignment, "application_app_id", None),
233
+ }
234
+ result.append(transformed)
235
+ return result
236
+
237
+
238
+ @timeit
239
+ def load_applications(
240
+ neo4j_session: neo4j.Session,
241
+ applications_data: List[Dict[str, Any]],
242
+ update_tag: int,
243
+ tenant_id: str,
244
+ ) -> None:
245
+ """
246
+ Load Entra applications to the graph.
247
+
248
+ :param neo4j_session: Neo4j session
249
+ :param applications_data: Application data to load
250
+ :param update_tag: Update tag for tracking data freshness
251
+ :param tenant_id: Entra tenant ID
252
+ """
253
+ load(
254
+ neo4j_session,
255
+ EntraApplicationSchema(),
256
+ applications_data,
257
+ lastupdated=update_tag,
258
+ TENANT_ID=tenant_id,
259
+ )
260
+
261
+
262
+ @timeit
263
+ def load_app_role_assignments(
264
+ neo4j_session: neo4j.Session,
265
+ assignments_data: List[Dict[str, Any]],
266
+ update_tag: int,
267
+ tenant_id: str,
268
+ ) -> None:
269
+ """
270
+ Load Entra app role assignments to the graph.
271
+
272
+ :param neo4j_session: Neo4j session
273
+ :param assignments_data: Assignment data to load
274
+ :param update_tag: Update tag for tracking data freshness
275
+ :param tenant_id: Entra tenant ID
276
+ """
277
+ load(
278
+ neo4j_session,
279
+ EntraAppRoleAssignmentSchema(),
280
+ assignments_data,
281
+ lastupdated=update_tag,
282
+ TENANT_ID=tenant_id,
283
+ )
284
+
285
+
286
+ @timeit
287
+ def cleanup_applications(
288
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
289
+ ) -> None:
290
+ """
291
+ Delete Entra applications and their relationships from the graph if they were not updated in the last sync.
292
+
293
+ :param neo4j_session: Neo4j session
294
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
295
+ """
296
+ GraphJob.from_node_schema(EntraApplicationSchema(), common_job_parameters).run(
297
+ neo4j_session
298
+ )
299
+
300
+
301
+ @timeit
302
+ def cleanup_app_role_assignments(
303
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
304
+ ) -> None:
305
+ """
306
+ Delete Entra app role assignments and their relationships from the graph if they were not updated in the last sync.
307
+
308
+ :param neo4j_session: Neo4j session
309
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
310
+ """
311
+ GraphJob.from_node_schema(
312
+ EntraAppRoleAssignmentSchema(), common_job_parameters
313
+ ).run(neo4j_session)
314
+
315
+
316
+ @timeit
317
+ async def sync_entra_applications(
318
+ neo4j_session: neo4j.Session,
319
+ tenant_id: str,
320
+ client_id: str,
321
+ client_secret: str,
322
+ update_tag: int,
323
+ common_job_parameters: Dict[str, Any],
324
+ ) -> None:
325
+ """
326
+ Sync Entra applications and their app role assignments to the graph.
327
+
328
+ :param neo4j_session: Neo4j session
329
+ :param tenant_id: Entra tenant ID
330
+ :param client_id: Azure application client ID
331
+ :param client_secret: Azure application client secret
332
+ :param update_tag: Update tag for tracking data freshness
333
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG and TENANT_ID
334
+ """
335
+ # Create credentials and client
336
+ credential = ClientSecretCredential(
337
+ tenant_id=tenant_id,
338
+ client_id=client_id,
339
+ client_secret=client_secret,
340
+ )
341
+
342
+ client = GraphServiceClient(
343
+ credential,
344
+ scopes=["https://graph.microsoft.com/.default"],
345
+ )
346
+
347
+ # Load tenant (prerequisite)
348
+ load_tenant(neo4j_session, {"id": tenant_id}, update_tag)
349
+
350
+ # Get and transform applications data
351
+ applications_data = await get_entra_applications(client)
352
+ transformed_applications = transform_applications(applications_data)
353
+
354
+ # Get and transform app role assignments data
355
+ assignments_data = await get_app_role_assignments(client, applications_data)
356
+ transformed_assignments = transform_app_role_assignments(assignments_data)
357
+
358
+ # Load applications and assignments
359
+ load_applications(neo4j_session, transformed_applications, update_tag, tenant_id)
360
+ load_app_role_assignments(
361
+ neo4j_session, transformed_assignments, update_tag, tenant_id
362
+ )
363
+
364
+ # Cleanup stale data
365
+ cleanup_applications(neo4j_session, common_job_parameters)
366
+ cleanup_app_role_assignments(neo4j_session, common_job_parameters)
@@ -3,12 +3,12 @@ import logging
3
3
  from neo4j import Session
4
4
 
5
5
  from cartography.config import Config
6
+ from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
6
7
  from cartography.intel.kubernetes.namespaces import sync_namespaces
7
8
  from cartography.intel.kubernetes.pods import sync_pods
8
9
  from cartography.intel.kubernetes.secrets import sync_secrets
9
10
  from cartography.intel.kubernetes.services import sync_services
10
11
  from cartography.intel.kubernetes.util import get_k8s_clients
11
- from cartography.util import run_cleanup_job
12
12
  from cartography.util import timeit
13
13
 
14
14
  logger = logging.getLogger(__name__)
@@ -16,26 +16,42 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  @timeit
18
18
  def start_k8s_ingestion(session: Session, config: Config) -> None:
19
+ if not config.update_tag:
20
+ logger.error("Cartography update tag not provided.")
21
+ return
19
22
 
20
- common_job_parameters = {"UPDATE_TAG": config.update_tag}
21
23
  if not config.k8s_kubeconfig:
22
- logger.error("kubeconfig not found.")
24
+ logger.error("Kubernetes kubeconfig not provided.")
23
25
  return
24
26
 
27
+ common_job_parameters = {"UPDATE_TAG": config.update_tag}
28
+
25
29
  for client in get_k8s_clients(config.k8s_kubeconfig):
26
30
  logger.info(f"Syncing data for k8s cluster {client.name}...")
27
31
  try:
28
- cluster = sync_namespaces(session, client, config.update_tag)
29
- pods = sync_pods(session, client, config.update_tag, cluster)
30
- sync_services(session, client, config.update_tag, cluster, pods)
31
- sync_secrets(session, client, config.update_tag, cluster)
32
+ cluster_info = sync_kubernetes_cluster(
33
+ session,
34
+ client,
35
+ config.update_tag,
36
+ common_job_parameters,
37
+ )
38
+ common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
39
+
40
+ sync_namespaces(session, client, config.update_tag, common_job_parameters)
41
+ all_pods = sync_pods(
42
+ session,
43
+ client,
44
+ config.update_tag,
45
+ common_job_parameters,
46
+ )
47
+ sync_secrets(session, client, config.update_tag, common_job_parameters)
48
+ sync_services(
49
+ session,
50
+ client,
51
+ all_pods,
52
+ config.update_tag,
53
+ common_job_parameters,
54
+ )
32
55
  except Exception:
33
56
  logger.exception(f"Failed to sync data for k8s cluster {client.name}...")
34
57
  raise
35
-
36
- run_cleanup_job(
37
- "kubernetes_import_cleanup.json",
38
- session,
39
- common_job_parameters,
40
- package="cartography.data.jobs.cleanup",
41
- )
@@ -0,0 +1,86 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ from kubernetes.client.models import V1Namespace
6
+ from kubernetes.client.models import VersionInfo
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.intel.kubernetes.util import get_epoch
10
+ from cartography.intel.kubernetes.util import K8sClient
11
+ from cartography.models.kubernetes.clusters import KubernetesClusterSchema
12
+ from cartography.util import timeit
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @timeit
18
+ def get_kubernetes_cluster_namespace(client: K8sClient) -> V1Namespace:
19
+ return client.core.read_namespace("kube-system")
20
+
21
+
22
+ @timeit
23
+ def get_kubernetes_cluster_version(client: K8sClient) -> VersionInfo:
24
+ return client.version.get_code()
25
+
26
+
27
+ def transform_kubernetes_cluster(
28
+ client: K8sClient,
29
+ namespace: V1Namespace,
30
+ version: VersionInfo,
31
+ ) -> list[dict[str, Any]]:
32
+ cluster = {
33
+ "id": namespace.metadata.uid,
34
+ "creation_timestamp": get_epoch(namespace.metadata.creation_timestamp),
35
+ "external_id": client.external_id,
36
+ "name": client.name,
37
+ "git_version": version.git_version,
38
+ "version_major": version.major,
39
+ "version_minor": version.minor,
40
+ "go_version": version.go_version,
41
+ "compiler": version.compiler,
42
+ "platform": version.platform,
43
+ }
44
+
45
+ return [cluster]
46
+
47
+
48
+ def load_kubernetes_cluster(
49
+ neo4j_session: neo4j.Session,
50
+ cluster_data: list[dict[str, Any]],
51
+ update_tag: int,
52
+ ) -> None:
53
+ logger.info(
54
+ "Loading '{}' Kubernetes cluster into graph".format(cluster_data[0].get("name"))
55
+ )
56
+ load(
57
+ neo4j_session,
58
+ KubernetesClusterSchema(),
59
+ cluster_data,
60
+ lastupdated=update_tag,
61
+ )
62
+
63
+
64
+ # cleaning up the kubernetes cluster node is currently not supported
65
+ # def cleanup(
66
+ # neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
67
+ # ) -> None:
68
+ # logger.debug("Running cleanup job for KubernetesCluster")
69
+ # run_cleanup_job(
70
+ # "kubernetes_cluster_cleanup.json", neo4j_session, common_job_parameters
71
+ # )
72
+
73
+
74
+ @timeit
75
+ def sync_kubernetes_cluster(
76
+ neo4j_session: neo4j.Session,
77
+ client: K8sClient,
78
+ update_tag: int,
79
+ common_job_parameters: dict[str, Any],
80
+ ) -> dict[str, Any]:
81
+ namespace = get_kubernetes_cluster_namespace(client)
82
+ version = get_kubernetes_cluster_version(client)
83
+ cluster_info = transform_kubernetes_cluster(client, namespace, version)
84
+
85
+ load_kubernetes_cluster(neo4j_session, cluster_info, update_tag)
86
+ return cluster_info[0]
@@ -1,82 +1,84 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
4
- from typing import Tuple
2
+ from typing import Any
5
3
 
6
- from neo4j import Session
4
+ import neo4j
5
+ from kubernetes.client.models import V1Namespace
7
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
8
9
  from cartography.intel.kubernetes.util import get_epoch
10
+ from cartography.intel.kubernetes.util import k8s_paginate
9
11
  from cartography.intel.kubernetes.util import K8sClient
10
- from cartography.stats import get_stats_client
11
- from cartography.util import merge_module_sync_metadata
12
+ from cartography.models.kubernetes.namespaces import KubernetesNamespaceSchema
12
13
  from cartography.util import timeit
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
- stat_handler = get_stats_client(__name__)
16
16
 
17
17
 
18
18
  @timeit
19
- def sync_namespaces(session: Session, client: K8sClient, update_tag: int) -> Dict:
20
- cluster, namespaces = get_namespaces(client)
21
- load_namespaces(session, cluster, namespaces, update_tag)
22
- merge_module_sync_metadata(
23
- session,
24
- group_type="KubernetesCluster",
25
- group_id=cluster["uid"],
26
- synced_type="KubernetesCluster",
27
- update_tag=update_tag,
28
- stat_handler=stat_handler,
29
- )
30
- return cluster
19
+ def get_namespaces(client: K8sClient) -> list[V1Namespace]:
20
+ items = k8s_paginate(client.core.list_namespace)
21
+ return items
31
22
 
32
23
 
33
- @timeit
34
- def get_namespaces(client: K8sClient) -> Tuple[Dict, List[Dict]]:
35
- cluster = dict()
36
- namespaces = list()
37
- for namespace in client.core.list_namespace().items:
38
- namespaces.append(
24
+ def transform_namespaces(namespaces: list[V1Namespace]) -> list[dict[str, Any]]:
25
+ transformed_namespaces = []
26
+ for namespace in namespaces:
27
+ transformed_namespaces.append(
39
28
  {
40
29
  "uid": namespace.metadata.uid,
41
30
  "name": namespace.metadata.name,
42
31
  "creation_timestamp": get_epoch(namespace.metadata.creation_timestamp),
43
32
  "deletion_timestamp": get_epoch(namespace.metadata.deletion_timestamp),
44
- },
33
+ "status_phase": namespace.status.phase if namespace.status else None,
34
+ }
45
35
  )
46
- if namespace.metadata.name == "kube-system":
47
- cluster = {"uid": namespace.metadata.uid, "name": client.name}
48
- return cluster, namespaces
36
+ return transformed_namespaces
49
37
 
50
38
 
51
39
  def load_namespaces(
52
- session: Session,
53
- cluster: Dict,
54
- data: List[Dict],
40
+ session: neo4j.Session,
41
+ namespaces: list[dict[str, Any]],
55
42
  update_tag: int,
43
+ cluster_name: str,
44
+ cluster_id: str,
45
+ ) -> None:
46
+ logger.info(f"Loading {len(namespaces)} kubernetes namespaces.")
47
+ load(
48
+ session,
49
+ KubernetesNamespaceSchema(),
50
+ namespaces,
51
+ lastupdated=update_tag,
52
+ cluster_name=cluster_name,
53
+ CLUSTER_ID=cluster_id,
54
+ )
55
+
56
+
57
+ def cleanup(
58
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
56
59
  ) -> None:
57
- ingestion_cypher_query = """
58
- MERGE (cluster:KubernetesCluster {id: $cluster_id})
59
- ON CREATE SET cluster.firstseen = timestamp()
60
- SET cluster.name = $cluster_name,
61
- cluster.lastupdated = $update_tag
62
- WITH cluster
63
- UNWIND $namespaces as namespace
64
- MERGE (space:KubernetesNamespace {id: namespace.uid})
65
- ON CREATE SET space.firstseen = timestamp()
66
- SET space.lastupdated = $update_tag,
67
- space.name = namespace.name,
68
- space.created_at = namespace.creation_timestamp,
69
- space.deleted_at = namespace.deletion_timestamp
70
- WITH cluster, space
71
- MERGE (cluster)-[rel1:HAS_NAMESPACE]->(space)
72
- ON CREATE SET rel1.firstseen = timestamp()
73
- SET rel1.lastupdated = $update_tag
74
- """
75
- logger.info(f"Loading {len(data)} kubernetes namespaces.")
76
- session.run(
77
- ingestion_cypher_query,
78
- namespaces=data,
79
- cluster_id=cluster["uid"],
80
- cluster_name=cluster["name"],
81
- update_tag=update_tag,
60
+ logger.debug("Running cleanup job for KubernetesNamespace")
61
+ cleanup_job = GraphJob.from_node_schema(
62
+ KubernetesNamespaceSchema(), common_job_parameters
63
+ )
64
+ cleanup_job.run(neo4j_session)
65
+
66
+
67
+ @timeit
68
+ def sync_namespaces(
69
+ session: neo4j.Session,
70
+ client: K8sClient,
71
+ update_tag: int,
72
+ common_job_parameters: dict[str, Any],
73
+ ) -> None:
74
+ namespaces = get_namespaces(client)
75
+ transformed_namespaces = transform_namespaces(namespaces)
76
+ cluster_id: str = common_job_parameters["CLUSTER_ID"]
77
+ load_namespaces(
78
+ session,
79
+ transformed_namespaces,
80
+ update_tag,
81
+ client.name,
82
+ cluster_id,
82
83
  )
84
+ cleanup(session, common_job_parameters)