cartography 0.104.0rc3__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (134) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +104 -3
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +62 -0
  6. cartography/config.py +32 -0
  7. cartography/data/indexes.cypher +0 -37
  8. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
  9. cartography/driftdetect/cli.py +3 -2
  10. cartography/graph/cleanupbuilder.py +198 -41
  11. cartography/graph/job.py +42 -0
  12. cartography/graph/querybuilder.py +136 -2
  13. cartography/graph/statement.py +1 -1
  14. cartography/intel/airbyte/__init__.py +105 -0
  15. cartography/intel/airbyte/connections.py +120 -0
  16. cartography/intel/airbyte/destinations.py +81 -0
  17. cartography/intel/airbyte/organizations.py +59 -0
  18. cartography/intel/airbyte/sources.py +78 -0
  19. cartography/intel/airbyte/tags.py +64 -0
  20. cartography/intel/airbyte/users.py +106 -0
  21. cartography/intel/airbyte/util.py +122 -0
  22. cartography/intel/airbyte/workspaces.py +63 -0
  23. cartography/intel/aws/acm.py +124 -0
  24. cartography/intel/aws/cloudtrail.py +3 -38
  25. cartography/intel/aws/codebuild.py +132 -0
  26. cartography/intel/aws/ecr.py +8 -2
  27. cartography/intel/aws/ecs.py +228 -380
  28. cartography/intel/aws/efs.py +179 -11
  29. cartography/intel/aws/iam.py +1 -1
  30. cartography/intel/aws/identitycenter.py +14 -3
  31. cartography/intel/aws/inspector.py +96 -53
  32. cartography/intel/aws/lambda_function.py +1 -1
  33. cartography/intel/aws/rds.py +2 -1
  34. cartography/intel/aws/resources.py +4 -0
  35. cartography/intel/aws/s3.py +195 -4
  36. cartography/intel/aws/sqs.py +36 -90
  37. cartography/intel/entra/__init__.py +22 -0
  38. cartography/intel/entra/applications.py +366 -0
  39. cartography/intel/entra/groups.py +151 -0
  40. cartography/intel/entra/ou.py +21 -5
  41. cartography/intel/entra/users.py +84 -42
  42. cartography/intel/kubernetes/__init__.py +30 -14
  43. cartography/intel/kubernetes/clusters.py +86 -0
  44. cartography/intel/kubernetes/namespaces.py +59 -57
  45. cartography/intel/kubernetes/pods.py +140 -77
  46. cartography/intel/kubernetes/secrets.py +95 -45
  47. cartography/intel/kubernetes/services.py +131 -67
  48. cartography/intel/kubernetes/util.py +125 -14
  49. cartography/intel/scaleway/__init__.py +127 -0
  50. cartography/intel/scaleway/iam/__init__.py +0 -0
  51. cartography/intel/scaleway/iam/apikeys.py +71 -0
  52. cartography/intel/scaleway/iam/applications.py +71 -0
  53. cartography/intel/scaleway/iam/groups.py +71 -0
  54. cartography/intel/scaleway/iam/users.py +71 -0
  55. cartography/intel/scaleway/instances/__init__.py +0 -0
  56. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  57. cartography/intel/scaleway/instances/instances.py +92 -0
  58. cartography/intel/scaleway/projects.py +79 -0
  59. cartography/intel/scaleway/storage/__init__.py +0 -0
  60. cartography/intel/scaleway/storage/snapshots.py +86 -0
  61. cartography/intel/scaleway/storage/volumes.py +84 -0
  62. cartography/intel/scaleway/utils.py +37 -0
  63. cartography/intel/trivy/__init__.py +161 -0
  64. cartography/intel/trivy/scanner.py +363 -0
  65. cartography/models/airbyte/__init__.py +0 -0
  66. cartography/models/airbyte/connection.py +138 -0
  67. cartography/models/airbyte/destination.py +75 -0
  68. cartography/models/airbyte/organization.py +19 -0
  69. cartography/models/airbyte/source.py +75 -0
  70. cartography/models/airbyte/stream.py +74 -0
  71. cartography/models/airbyte/tag.py +69 -0
  72. cartography/models/airbyte/user.py +111 -0
  73. cartography/models/airbyte/workspace.py +46 -0
  74. cartography/models/aws/acm/__init__.py +0 -0
  75. cartography/models/aws/acm/certificate.py +75 -0
  76. cartography/models/aws/cloudtrail/trail.py +24 -0
  77. cartography/models/aws/codebuild/__init__.py +0 -0
  78. cartography/models/aws/codebuild/project.py +49 -0
  79. cartography/models/aws/ecs/__init__.py +0 -0
  80. cartography/models/aws/ecs/clusters.py +64 -0
  81. cartography/models/aws/ecs/container_definitions.py +93 -0
  82. cartography/models/aws/ecs/container_instances.py +84 -0
  83. cartography/models/aws/ecs/containers.py +99 -0
  84. cartography/models/aws/ecs/services.py +117 -0
  85. cartography/models/aws/ecs/task_definitions.py +135 -0
  86. cartography/models/aws/ecs/tasks.py +110 -0
  87. cartography/models/aws/efs/access_point.py +77 -0
  88. cartography/models/aws/efs/file_system.py +60 -0
  89. cartography/models/aws/efs/mount_target.py +29 -2
  90. cartography/models/aws/s3/notification.py +24 -0
  91. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  92. cartography/models/aws/sqs/__init__.py +0 -0
  93. cartography/models/aws/sqs/queue.py +89 -0
  94. cartography/models/core/common.py +1 -0
  95. cartography/models/core/nodes.py +15 -2
  96. cartography/models/core/relationships.py +44 -0
  97. cartography/models/entra/app_role_assignment.py +115 -0
  98. cartography/models/entra/application.py +47 -0
  99. cartography/models/entra/group.py +91 -0
  100. cartography/models/entra/user.py +17 -51
  101. cartography/models/kubernetes/__init__.py +0 -0
  102. cartography/models/kubernetes/clusters.py +26 -0
  103. cartography/models/kubernetes/containers.py +108 -0
  104. cartography/models/kubernetes/namespaces.py +51 -0
  105. cartography/models/kubernetes/pods.py +80 -0
  106. cartography/models/kubernetes/secrets.py +79 -0
  107. cartography/models/kubernetes/services.py +108 -0
  108. cartography/models/scaleway/__init__.py +0 -0
  109. cartography/models/scaleway/iam/__init__.py +0 -0
  110. cartography/models/scaleway/iam/apikey.py +96 -0
  111. cartography/models/scaleway/iam/application.py +52 -0
  112. cartography/models/scaleway/iam/group.py +95 -0
  113. cartography/models/scaleway/iam/user.py +60 -0
  114. cartography/models/scaleway/instance/__init__.py +0 -0
  115. cartography/models/scaleway/instance/flexibleip.py +52 -0
  116. cartography/models/scaleway/instance/instance.py +118 -0
  117. cartography/models/scaleway/organization.py +19 -0
  118. cartography/models/scaleway/project.py +48 -0
  119. cartography/models/scaleway/storage/__init__.py +0 -0
  120. cartography/models/scaleway/storage/snapshot.py +78 -0
  121. cartography/models/scaleway/storage/volume.py +51 -0
  122. cartography/models/trivy/__init__.py +0 -0
  123. cartography/models/trivy/findings.py +66 -0
  124. cartography/models/trivy/fix.py +66 -0
  125. cartography/models/trivy/package.py +71 -0
  126. cartography/sync.py +10 -4
  127. cartography/util.py +15 -10
  128. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/METADATA +6 -2
  129. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/RECORD +133 -49
  130. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  131. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  132. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  133. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  134. {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,51 @@ from cartography.util import timeit
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
+ # NOTE:
19
+ # Microsoft Graph imposes limits on the length of the $select clause as well as
20
+ # the number of properties that can be selected in a single request. In
21
+ # practice we have seen 400 Bad Request responses that bubble up as
22
+ # `Microsoft.SharePoint.Client.InvalidClientQueryException` once that limit is
23
+ # breached (Graph internally rewrites the next-link using a SharePoint style
24
+ # `id in (…)` filter which is then rejected).
25
+ #
26
+ # To avoid tripping this bug we only request a *core* subset of user attributes
27
+ # that are most commonly used in downstream analysis. The transform() function
28
+ # tolerates missing attributes (the generated MS Graph SDK simply returns
29
+ # `None` for properties that are not present in the payload), so fetching fewer
30
+ # fields is safe – we merely get more `null` values in the graph.
31
+ #
32
+ # If you need additional attributes in the future, append them here but keep the
33
+ # total character count of the comma-separated list comfortably below 500 and
34
+ # stay within the official v1.0 contract (beta-only fields cause similar
35
+ # failures). 20–25 fields is a good rule-of-thumb.
36
+ #
37
+ # References:
38
+ # • https://learn.microsoft.com/graph/query-parameters#select-parameter
39
+ # • https://learn.microsoft.com/graph/api/user-list?view=graph-rest-1.0
40
+ #
41
+ USER_SELECT_FIELDS = [
42
+ "id",
43
+ "userPrincipalName",
44
+ "displayName",
45
+ "givenName",
46
+ "surname",
47
+ "mail",
48
+ "mobilePhone",
49
+ "businessPhones",
50
+ "jobTitle",
51
+ "department",
52
+ "officeLocation",
53
+ "city",
54
+ "country",
55
+ "companyName",
56
+ "preferredLanguage",
57
+ "employeeId",
58
+ "employeeType",
59
+ "accountEnabled",
60
+ "ageGroup",
61
+ ]
62
+
18
63
 
19
64
  @timeit
20
65
  async def get_tenant(client: GraphServiceClient) -> Organization:
@@ -27,14 +72,20 @@ async def get_tenant(client: GraphServiceClient) -> Organization:
27
72
 
28
73
  @timeit
29
74
  async def get_users(client: GraphServiceClient) -> list[User]:
75
+ """Fetch all users with their manager reference in as few requests as possible.
76
+
77
+ We leverage `$expand=manager($select=id)` so the manager's *id* is hydrated
78
+ alongside every user record. This avoids making a second round-trip per
79
+ user – vastly reducing latency and eliminating the noisy 404s that occur
80
+ when a user has no manager assigned.
30
81
  """
31
- Get all users from Microsoft Graph API with pagination support
32
- """
82
+
33
83
  all_users: list[User] = []
34
84
  request_configuration = client.users.UsersRequestBuilderGetRequestConfiguration(
35
85
  query_parameters=client.users.UsersRequestBuilderGetQueryParameters(
36
- # Request more items per page to reduce number of API calls
37
86
  top=999,
87
+ select=USER_SELECT_FIELDS,
88
+ expand=["manager($select=id)"],
38
89
  ),
39
90
  )
40
91
 
@@ -43,18 +94,32 @@ async def get_users(client: GraphServiceClient) -> list[User]:
43
94
  all_users.extend(page.value)
44
95
  if not page.odata_next_link:
45
96
  break
46
- page = await client.users.with_url(page.odata_next_link).get()
97
+
98
+ try:
99
+ page = await client.users.with_url(page.odata_next_link).get()
100
+ except Exception as e:
101
+ logger.error(
102
+ "Failed to fetch next page of Entra ID users – stopping pagination early: %s",
103
+ e,
104
+ )
105
+ break
47
106
 
48
107
  return all_users
49
108
 
50
109
 
51
110
  @timeit
111
+ # The manager reference is now embedded in the user objects courtesy of the
112
+ # `$expand` we added above, so we no longer need a separate `manager_map`.
52
113
  def transform_users(users: list[User]) -> list[dict[str, Any]]:
53
- """
54
- Transform the API response into the format expected by our schema
55
- """
114
+ """Convert MS Graph SDK `User` models into dicts matching our schema."""
115
+
56
116
  result: list[dict[str, Any]] = []
57
117
  for user in users:
118
+ manager_id: str | None = None
119
+ if getattr(user, "manager", None) is not None:
120
+ # The SDK materialises `manager` as a DirectoryObject (or subclass)
121
+ manager_id = getattr(user.manager, "id", None)
122
+
58
123
  transformed_user = {
59
124
  "id": user.id,
60
125
  "user_principal_name": user.user_principal_name,
@@ -62,47 +127,24 @@ def transform_users(users: list[User]) -> list[dict[str, Any]]:
62
127
  "given_name": user.given_name,
63
128
  "surname": user.surname,
64
129
  "mail": user.mail,
65
- "other_mails": user.other_mails,
66
- "preferred_language": user.preferred_language,
67
- "preferred_name": user.preferred_name,
68
- "state": user.state,
69
- "usage_location": user.usage_location,
70
- "user_type": user.user_type,
71
- "show_in_address_list": user.show_in_address_list,
72
- "sign_in_sessions_valid_from_date_time": user.sign_in_sessions_valid_from_date_time,
73
- "security_identifier": user.on_premises_security_identifier,
74
- "account_enabled": user.account_enabled,
75
- "age_group": user.age_group,
130
+ "mobile_phone": user.mobile_phone,
76
131
  "business_phones": user.business_phones,
132
+ "job_title": user.job_title,
133
+ "department": user.department,
134
+ "office_location": user.office_location,
77
135
  "city": user.city,
78
- "company_name": user.company_name,
79
- "consent_provided_for_minor": user.consent_provided_for_minor,
136
+ "state": user.state,
80
137
  "country": user.country,
81
- "created_date_time": user.created_date_time,
82
- "creation_type": user.creation_type,
83
- "deleted_date_time": user.deleted_date_time,
84
- "department": user.department,
138
+ "company_name": user.company_name,
139
+ "preferred_language": user.preferred_language,
85
140
  "employee_id": user.employee_id,
86
141
  "employee_type": user.employee_type,
87
- "external_user_state": user.external_user_state,
88
- "external_user_state_change_date_time": user.external_user_state_change_date_time,
89
- "hire_date": user.hire_date,
90
- "is_management_restricted": user.is_management_restricted,
91
- "is_resource_account": user.is_resource_account,
92
- "job_title": user.job_title,
93
- "last_password_change_date_time": user.last_password_change_date_time,
94
- "mail_nickname": user.mail_nickname,
95
- "office_location": user.office_location,
96
- "on_premises_distinguished_name": user.on_premises_distinguished_name,
97
- "on_premises_domain_name": user.on_premises_domain_name,
98
- "on_premises_immutable_id": user.on_premises_immutable_id,
99
- "on_premises_last_sync_date_time": user.on_premises_last_sync_date_time,
100
- "on_premises_sam_account_name": user.on_premises_sam_account_name,
101
- "on_premises_security_identifier": user.on_premises_security_identifier,
102
- "on_premises_sync_enabled": user.on_premises_sync_enabled,
103
- "on_premises_user_principal_name": user.on_premises_user_principal_name,
142
+ "account_enabled": user.account_enabled,
143
+ "age_group": user.age_group,
144
+ "manager_id": manager_id,
104
145
  }
105
146
  result.append(transformed_user)
147
+
106
148
  return result
107
149
 
108
150
 
@@ -198,7 +240,7 @@ async def sync_entra_users(
198
240
  credential, scopes=["https://graph.microsoft.com/.default"]
199
241
  )
200
242
 
201
- # Get tenant information
243
+ # Fetch tenant and users (with manager reference already populated by `$expand`)
202
244
  tenant = await get_tenant(client)
203
245
  users = await get_users(client)
204
246
 
@@ -3,12 +3,12 @@ import logging
3
3
  from neo4j import Session
4
4
 
5
5
  from cartography.config import Config
6
+ from cartography.intel.kubernetes.clusters import sync_kubernetes_cluster
6
7
  from cartography.intel.kubernetes.namespaces import sync_namespaces
7
8
  from cartography.intel.kubernetes.pods import sync_pods
8
9
  from cartography.intel.kubernetes.secrets import sync_secrets
9
10
  from cartography.intel.kubernetes.services import sync_services
10
11
  from cartography.intel.kubernetes.util import get_k8s_clients
11
- from cartography.util import run_cleanup_job
12
12
  from cartography.util import timeit
13
13
 
14
14
  logger = logging.getLogger(__name__)
@@ -16,26 +16,42 @@ logger = logging.getLogger(__name__)
16
16
 
17
17
  @timeit
18
18
  def start_k8s_ingestion(session: Session, config: Config) -> None:
19
+ if not config.update_tag:
20
+ logger.error("Cartography update tag not provided.")
21
+ return
19
22
 
20
- common_job_parameters = {"UPDATE_TAG": config.update_tag}
21
23
  if not config.k8s_kubeconfig:
22
- logger.error("kubeconfig not found.")
24
+ logger.error("Kubernetes kubeconfig not provided.")
23
25
  return
24
26
 
27
+ common_job_parameters = {"UPDATE_TAG": config.update_tag}
28
+
25
29
  for client in get_k8s_clients(config.k8s_kubeconfig):
26
30
  logger.info(f"Syncing data for k8s cluster {client.name}...")
27
31
  try:
28
- cluster = sync_namespaces(session, client, config.update_tag)
29
- pods = sync_pods(session, client, config.update_tag, cluster)
30
- sync_services(session, client, config.update_tag, cluster, pods)
31
- sync_secrets(session, client, config.update_tag, cluster)
32
+ cluster_info = sync_kubernetes_cluster(
33
+ session,
34
+ client,
35
+ config.update_tag,
36
+ common_job_parameters,
37
+ )
38
+ common_job_parameters["CLUSTER_ID"] = cluster_info.get("id")
39
+
40
+ sync_namespaces(session, client, config.update_tag, common_job_parameters)
41
+ all_pods = sync_pods(
42
+ session,
43
+ client,
44
+ config.update_tag,
45
+ common_job_parameters,
46
+ )
47
+ sync_secrets(session, client, config.update_tag, common_job_parameters)
48
+ sync_services(
49
+ session,
50
+ client,
51
+ all_pods,
52
+ config.update_tag,
53
+ common_job_parameters,
54
+ )
32
55
  except Exception:
33
56
  logger.exception(f"Failed to sync data for k8s cluster {client.name}...")
34
57
  raise
35
-
36
- run_cleanup_job(
37
- "kubernetes_import_cleanup.json",
38
- session,
39
- common_job_parameters,
40
- package="cartography.data.jobs.cleanup",
41
- )
@@ -0,0 +1,86 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ from kubernetes.client.models import V1Namespace
6
+ from kubernetes.client.models import VersionInfo
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.intel.kubernetes.util import get_epoch
10
+ from cartography.intel.kubernetes.util import K8sClient
11
+ from cartography.models.kubernetes.clusters import KubernetesClusterSchema
12
+ from cartography.util import timeit
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @timeit
18
+ def get_kubernetes_cluster_namespace(client: K8sClient) -> V1Namespace:
19
+ return client.core.read_namespace("kube-system")
20
+
21
+
22
+ @timeit
23
+ def get_kubernetes_cluster_version(client: K8sClient) -> VersionInfo:
24
+ return client.version.get_code()
25
+
26
+
27
+ def transform_kubernetes_cluster(
28
+ client: K8sClient,
29
+ namespace: V1Namespace,
30
+ version: VersionInfo,
31
+ ) -> list[dict[str, Any]]:
32
+ cluster = {
33
+ "id": namespace.metadata.uid,
34
+ "creation_timestamp": get_epoch(namespace.metadata.creation_timestamp),
35
+ "external_id": client.external_id,
36
+ "name": client.name,
37
+ "git_version": version.git_version,
38
+ "version_major": version.major,
39
+ "version_minor": version.minor,
40
+ "go_version": version.go_version,
41
+ "compiler": version.compiler,
42
+ "platform": version.platform,
43
+ }
44
+
45
+ return [cluster]
46
+
47
+
48
+ def load_kubernetes_cluster(
49
+ neo4j_session: neo4j.Session,
50
+ cluster_data: list[dict[str, Any]],
51
+ update_tag: int,
52
+ ) -> None:
53
+ logger.info(
54
+ "Loading '{}' Kubernetes cluster into graph".format(cluster_data[0].get("name"))
55
+ )
56
+ load(
57
+ neo4j_session,
58
+ KubernetesClusterSchema(),
59
+ cluster_data,
60
+ lastupdated=update_tag,
61
+ )
62
+
63
+
64
+ # cleaning up the kubernetes cluster node is currently not supported
65
+ # def cleanup(
66
+ # neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
67
+ # ) -> None:
68
+ # logger.debug("Running cleanup job for KubernetesCluster")
69
+ # run_cleanup_job(
70
+ # "kubernetes_cluster_cleanup.json", neo4j_session, common_job_parameters
71
+ # )
72
+
73
+
74
+ @timeit
75
+ def sync_kubernetes_cluster(
76
+ neo4j_session: neo4j.Session,
77
+ client: K8sClient,
78
+ update_tag: int,
79
+ common_job_parameters: dict[str, Any],
80
+ ) -> dict[str, Any]:
81
+ namespace = get_kubernetes_cluster_namespace(client)
82
+ version = get_kubernetes_cluster_version(client)
83
+ cluster_info = transform_kubernetes_cluster(client, namespace, version)
84
+
85
+ load_kubernetes_cluster(neo4j_session, cluster_info, update_tag)
86
+ return cluster_info[0]
@@ -1,82 +1,84 @@
1
1
  import logging
2
- from typing import Dict
3
- from typing import List
4
- from typing import Tuple
2
+ from typing import Any
5
3
 
6
- from neo4j import Session
4
+ import neo4j
5
+ from kubernetes.client.models import V1Namespace
7
6
 
7
+ from cartography.client.core.tx import load
8
+ from cartography.graph.job import GraphJob
8
9
  from cartography.intel.kubernetes.util import get_epoch
10
+ from cartography.intel.kubernetes.util import k8s_paginate
9
11
  from cartography.intel.kubernetes.util import K8sClient
10
- from cartography.stats import get_stats_client
11
- from cartography.util import merge_module_sync_metadata
12
+ from cartography.models.kubernetes.namespaces import KubernetesNamespaceSchema
12
13
  from cartography.util import timeit
13
14
 
14
15
  logger = logging.getLogger(__name__)
15
- stat_handler = get_stats_client(__name__)
16
16
 
17
17
 
18
18
  @timeit
19
- def sync_namespaces(session: Session, client: K8sClient, update_tag: int) -> Dict:
20
- cluster, namespaces = get_namespaces(client)
21
- load_namespaces(session, cluster, namespaces, update_tag)
22
- merge_module_sync_metadata(
23
- session,
24
- group_type="KubernetesCluster",
25
- group_id=cluster["uid"],
26
- synced_type="KubernetesCluster",
27
- update_tag=update_tag,
28
- stat_handler=stat_handler,
29
- )
30
- return cluster
19
+ def get_namespaces(client: K8sClient) -> list[V1Namespace]:
20
+ items = k8s_paginate(client.core.list_namespace)
21
+ return items
31
22
 
32
23
 
33
- @timeit
34
- def get_namespaces(client: K8sClient) -> Tuple[Dict, List[Dict]]:
35
- cluster = dict()
36
- namespaces = list()
37
- for namespace in client.core.list_namespace().items:
38
- namespaces.append(
24
+ def transform_namespaces(namespaces: list[V1Namespace]) -> list[dict[str, Any]]:
25
+ transformed_namespaces = []
26
+ for namespace in namespaces:
27
+ transformed_namespaces.append(
39
28
  {
40
29
  "uid": namespace.metadata.uid,
41
30
  "name": namespace.metadata.name,
42
31
  "creation_timestamp": get_epoch(namespace.metadata.creation_timestamp),
43
32
  "deletion_timestamp": get_epoch(namespace.metadata.deletion_timestamp),
44
- },
33
+ "status_phase": namespace.status.phase if namespace.status else None,
34
+ }
45
35
  )
46
- if namespace.metadata.name == "kube-system":
47
- cluster = {"uid": namespace.metadata.uid, "name": client.name}
48
- return cluster, namespaces
36
+ return transformed_namespaces
49
37
 
50
38
 
51
39
  def load_namespaces(
52
- session: Session,
53
- cluster: Dict,
54
- data: List[Dict],
40
+ session: neo4j.Session,
41
+ namespaces: list[dict[str, Any]],
55
42
  update_tag: int,
43
+ cluster_name: str,
44
+ cluster_id: str,
45
+ ) -> None:
46
+ logger.info(f"Loading {len(namespaces)} kubernetes namespaces.")
47
+ load(
48
+ session,
49
+ KubernetesNamespaceSchema(),
50
+ namespaces,
51
+ lastupdated=update_tag,
52
+ cluster_name=cluster_name,
53
+ CLUSTER_ID=cluster_id,
54
+ )
55
+
56
+
57
+ def cleanup(
58
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
56
59
  ) -> None:
57
- ingestion_cypher_query = """
58
- MERGE (cluster:KubernetesCluster {id: $cluster_id})
59
- ON CREATE SET cluster.firstseen = timestamp()
60
- SET cluster.name = $cluster_name,
61
- cluster.lastupdated = $update_tag
62
- WITH cluster
63
- UNWIND $namespaces as namespace
64
- MERGE (space:KubernetesNamespace {id: namespace.uid})
65
- ON CREATE SET space.firstseen = timestamp()
66
- SET space.lastupdated = $update_tag,
67
- space.name = namespace.name,
68
- space.created_at = namespace.creation_timestamp,
69
- space.deleted_at = namespace.deletion_timestamp
70
- WITH cluster, space
71
- MERGE (cluster)-[rel1:HAS_NAMESPACE]->(space)
72
- ON CREATE SET rel1.firstseen = timestamp()
73
- SET rel1.lastupdated = $update_tag
74
- """
75
- logger.info(f"Loading {len(data)} kubernetes namespaces.")
76
- session.run(
77
- ingestion_cypher_query,
78
- namespaces=data,
79
- cluster_id=cluster["uid"],
80
- cluster_name=cluster["name"],
81
- update_tag=update_tag,
60
+ logger.debug("Running cleanup job for KubernetesNamespace")
61
+ cleanup_job = GraphJob.from_node_schema(
62
+ KubernetesNamespaceSchema(), common_job_parameters
63
+ )
64
+ cleanup_job.run(neo4j_session)
65
+
66
+
67
+ @timeit
68
+ def sync_namespaces(
69
+ session: neo4j.Session,
70
+ client: K8sClient,
71
+ update_tag: int,
72
+ common_job_parameters: dict[str, Any],
73
+ ) -> None:
74
+ namespaces = get_namespaces(client)
75
+ transformed_namespaces = transform_namespaces(namespaces)
76
+ cluster_id: str = common_job_parameters["CLUSTER_ID"]
77
+ load_namespaces(
78
+ session,
79
+ transformed_namespaces,
80
+ update_tag,
81
+ client.name,
82
+ cluster_id,
82
83
  )
84
+ cleanup(session, common_job_parameters)