cartography 0.113.0__py3-none-any.whl → 0.115.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (96) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -2
  3. cartography/client/core/tx.py +11 -0
  4. cartography/config.py +4 -0
  5. cartography/data/indexes.cypher +0 -27
  6. cartography/intel/aws/config.py +7 -3
  7. cartography/intel/aws/ecr.py +9 -9
  8. cartography/intel/aws/iam.py +741 -492
  9. cartography/intel/aws/identitycenter.py +240 -13
  10. cartography/intel/aws/lambda_function.py +69 -2
  11. cartography/intel/aws/organizations.py +10 -9
  12. cartography/intel/aws/permission_relationships.py +7 -17
  13. cartography/intel/aws/redshift.py +9 -4
  14. cartography/intel/aws/route53.py +53 -3
  15. cartography/intel/aws/securityhub.py +3 -1
  16. cartography/intel/azure/__init__.py +24 -0
  17. cartography/intel/azure/app_service.py +105 -0
  18. cartography/intel/azure/functions.py +124 -0
  19. cartography/intel/azure/logic_apps.py +101 -0
  20. cartography/intel/create_indexes.py +2 -1
  21. cartography/intel/dns.py +5 -2
  22. cartography/intel/entra/__init__.py +31 -0
  23. cartography/intel/entra/app_role_assignments.py +277 -0
  24. cartography/intel/entra/applications.py +4 -238
  25. cartography/intel/entra/federation/__init__.py +0 -0
  26. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  27. cartography/intel/entra/service_principals.py +217 -0
  28. cartography/intel/gcp/__init__.py +136 -440
  29. cartography/intel/gcp/clients.py +65 -0
  30. cartography/intel/gcp/compute.py +18 -44
  31. cartography/intel/gcp/crm/__init__.py +0 -0
  32. cartography/intel/gcp/crm/folders.py +108 -0
  33. cartography/intel/gcp/crm/orgs.py +65 -0
  34. cartography/intel/gcp/crm/projects.py +109 -0
  35. cartography/intel/gcp/dns.py +2 -1
  36. cartography/intel/gcp/gke.py +72 -113
  37. cartography/intel/github/__init__.py +41 -0
  38. cartography/intel/github/commits.py +423 -0
  39. cartography/intel/github/repos.py +76 -45
  40. cartography/intel/gsuite/api.py +17 -4
  41. cartography/intel/okta/applications.py +9 -4
  42. cartography/intel/okta/awssaml.py +5 -2
  43. cartography/intel/okta/factors.py +3 -1
  44. cartography/intel/okta/groups.py +5 -2
  45. cartography/intel/okta/organization.py +3 -1
  46. cartography/intel/okta/origins.py +3 -1
  47. cartography/intel/okta/roles.py +5 -2
  48. cartography/intel/okta/users.py +3 -1
  49. cartography/models/aws/iam/access_key.py +103 -0
  50. cartography/models/aws/iam/account_role.py +24 -0
  51. cartography/models/aws/iam/federated_principal.py +60 -0
  52. cartography/models/aws/iam/group.py +60 -0
  53. cartography/models/aws/iam/group_membership.py +26 -0
  54. cartography/models/aws/iam/inline_policy.py +78 -0
  55. cartography/models/aws/iam/managed_policy.py +51 -0
  56. cartography/models/aws/iam/policy_statement.py +57 -0
  57. cartography/models/aws/iam/role.py +83 -0
  58. cartography/models/aws/iam/root_principal.py +52 -0
  59. cartography/models/aws/iam/service_principal.py +30 -0
  60. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  61. cartography/models/aws/iam/user.py +54 -0
  62. cartography/models/aws/identitycenter/awspermissionset.py +24 -1
  63. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  64. cartography/models/aws/identitycenter/awsssouser.py +37 -1
  65. cartography/models/aws/lambda_function/lambda_function.py +2 -0
  66. cartography/models/azure/__init__.py +0 -0
  67. cartography/models/azure/app_service.py +59 -0
  68. cartography/models/azure/function_app.py +59 -0
  69. cartography/models/azure/logic_apps.py +56 -0
  70. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  71. cartography/models/entra/service_principal.py +104 -0
  72. cartography/models/entra/user.py +18 -0
  73. cartography/models/gcp/compute/subnet.py +74 -0
  74. cartography/models/gcp/crm/__init__.py +0 -0
  75. cartography/models/gcp/crm/folders.py +98 -0
  76. cartography/models/gcp/crm/organizations.py +21 -0
  77. cartography/models/gcp/crm/projects.py +100 -0
  78. cartography/models/gcp/gke.py +69 -0
  79. cartography/models/github/commits.py +63 -0
  80. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/METADATA +8 -5
  81. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/RECORD +85 -56
  82. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  83. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  84. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  85. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  86. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  87. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  88. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  89. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  90. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  91. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  92. cartography/intel/gcp/crm.py +0 -355
  93. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/WHEEL +0 -0
  94. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/entry_points.txt +0 -0
  95. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/licenses/LICENSE +0 -0
  96. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,65 @@
1
+ import logging
2
+ from typing import Optional
3
+
4
+ import googleapiclient.discovery
5
+ import httplib2
6
+ from google.auth import default
7
+ from google.auth.credentials import Credentials as GoogleCredentials
8
+ from google.auth.exceptions import DefaultCredentialsError
9
+ from google_auth_httplib2 import AuthorizedHttp
10
+ from googleapiclient.discovery import Resource
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Default HTTP timeout (seconds) for Google API clients built via discovery.build
15
+ _GCP_HTTP_TIMEOUT = 120
16
+
17
+
18
+ def _authorized_http_with_timeout(
19
+ credentials: GoogleCredentials,
20
+ timeout: int = _GCP_HTTP_TIMEOUT,
21
+ ) -> AuthorizedHttp:
22
+ """
23
+ Build an AuthorizedHttp with a per-request timeout, avoiding global socket timeouts.
24
+ """
25
+ return AuthorizedHttp(credentials, http=httplib2.Http(timeout=timeout))
26
+
27
+
28
+ def build_client(service: str, version: str = "v1") -> Resource:
29
+ credentials = get_gcp_credentials()
30
+ if credentials is None:
31
+ raise RuntimeError("GCP credentials are not available; cannot build client.")
32
+ client = googleapiclient.discovery.build(
33
+ service,
34
+ version,
35
+ http=_authorized_http_with_timeout(credentials),
36
+ cache_discovery=False,
37
+ )
38
+ return client
39
+
40
+
41
+ def get_gcp_credentials() -> Optional[GoogleCredentials]:
42
+ """
43
+ Gets access tokens for GCP API access.
44
+ """
45
+ try:
46
+ # Explicitly use Application Default Credentials with the cloud-platform scope.
47
+ credentials, _ = default(
48
+ scopes=["https://www.googleapis.com/auth/cloud-platform"],
49
+ )
50
+ return credentials
51
+ except DefaultCredentialsError as e:
52
+ logger.debug(
53
+ "Error occurred calling google.auth.default().",
54
+ exc_info=True,
55
+ )
56
+ logger.error(
57
+ (
58
+ "Unable to initialize Google Compute Platform creds. If you don't have GCP data or don't want to load "
59
+ "GCP data then you can ignore this message. Otherwise, the error code is: %s "
60
+ "Make sure your GCP credentials are configured correctly, your credentials file (if any) is valid, and "
61
+ "that the identity you are authenticating to has the securityReviewer role attached."
62
+ ),
63
+ e,
64
+ )
65
+ return None
@@ -656,51 +656,25 @@ def load_gcp_subnets(
656
656
  neo4j_session: neo4j.Session,
657
657
  subnets: List[Dict],
658
658
  gcp_update_tag: int,
659
+ project_id: str,
659
660
  ) -> None:
660
661
  """
661
- Ingest GCP subnet data to Neo4j
662
+ Ingest GCP subnet data to Neo4j using the data model
662
663
  :param neo4j_session: The Neo4j session
663
664
  :param subnets: List of the subnets
664
665
  :param gcp_update_tag: The timestamp to set these Neo4j nodes with
666
+ :param project_id: The project ID
665
667
  :return: Nothing
666
668
  """
667
- query = """
668
- MERGE(vpc:GCPVpc{id:$VpcPartialUri})
669
- ON CREATE SET vpc.firstseen = timestamp(),
670
- vpc.partial_uri = $VpcPartialUri
669
+ from cartography.models.gcp.compute.subnet import GCPSubnetSchema
671
670
 
672
- MERGE(subnet:GCPSubnet{id:$PartialUri})
673
- ON CREATE SET subnet.firstseen = timestamp(),
674
- subnet.partial_uri = $PartialUri
675
- SET subnet.self_link = $SubnetSelfLink,
676
- subnet.project_id = $ProjectId,
677
- subnet.name = $SubnetName,
678
- subnet.region = $Region,
679
- subnet.gateway_address = $GatewayAddress,
680
- subnet.ip_cidr_range = $IpCidrRange,
681
- subnet.private_ip_google_access = $PrivateIpGoogleAccess,
682
- subnet.vpc_partial_uri = $VpcPartialUri,
683
- subnet.lastupdated = $gcp_update_tag
684
-
685
- MERGE (vpc)-[r:RESOURCE]->(subnet)
686
- ON CREATE SET r.firstseen = timestamp()
687
- SET r.lastupdated = $gcp_update_tag
688
- """
689
- for s in subnets:
690
- neo4j_session.run(
691
- query,
692
- VpcPartialUri=s["vpc_partial_uri"],
693
- VpcSelfLink=s["vpc_self_link"],
694
- PartialUri=s["partial_uri"],
695
- SubnetSelfLink=s["self_link"],
696
- ProjectId=s["project_id"],
697
- SubnetName=s["name"],
698
- Region=s["region"],
699
- GatewayAddress=s["gateway_address"],
700
- IpCidrRange=s["ip_cidr_range"],
701
- PrivateIpGoogleAccess=s["private_ip_google_access"],
702
- gcp_update_tag=gcp_update_tag,
703
- )
671
+ load(
672
+ neo4j_session,
673
+ GCPSubnetSchema(),
674
+ subnets,
675
+ lastupdated=gcp_update_tag,
676
+ PROJECT_ID=project_id,
677
+ )
704
678
 
705
679
 
706
680
  @timeit
@@ -981,7 +955,7 @@ def _attach_gcp_vpc(
981
955
  """
982
956
  query = """
983
957
  MATCH (i:GCPInstance{id:$InstanceId})-[:NETWORK_INTERFACE]->(nic:GCPNetworkInterface)
984
- -[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:RESOURCE]-(vpc:GCPVpc)
958
+ -[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:HAS]-(vpc:GCPVpc)
985
959
  MERGE (i)-[m:MEMBER_OF_GCP_VPC]->(vpc)
986
960
  ON CREATE SET m.firstseen = timestamp()
987
961
  SET m.lastupdated = $gcp_update_tag
@@ -1185,15 +1159,15 @@ def cleanup_gcp_subnets(
1185
1159
  common_job_parameters: Dict,
1186
1160
  ) -> None:
1187
1161
  """
1188
- Delete out-of-date GCP VPC subnet nodes and relationships
1162
+ Delete out-of-date GCP VPC subnet nodes and relationships using data model
1189
1163
  :param neo4j_session: The Neo4j session
1190
1164
  :param common_job_parameters: dict of other job parameters to pass to Neo4j
1191
1165
  :return: Nothing
1192
1166
  """
1193
- run_cleanup_job(
1194
- "gcp_compute_vpc_subnet_cleanup.json",
1195
- neo4j_session,
1196
- common_job_parameters,
1167
+ from cartography.models.gcp.compute.subnet import GCPSubnetSchema
1168
+
1169
+ GraphJob.from_node_schema(GCPSubnetSchema(), common_job_parameters).run(
1170
+ neo4j_session
1197
1171
  )
1198
1172
 
1199
1173
 
@@ -1296,7 +1270,7 @@ def sync_gcp_subnets(
1296
1270
  for r in regions:
1297
1271
  subnet_res = get_gcp_subnets(project_id, r, compute)
1298
1272
  subnets = transform_gcp_subnets(subnet_res)
1299
- load_gcp_subnets(neo4j_session, subnets, gcp_update_tag)
1273
+ load_gcp_subnets(neo4j_session, subnets, gcp_update_tag, project_id)
1300
1274
  # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
1301
1275
  cleanup_gcp_subnets(neo4j_session, common_job_parameters)
1302
1276
 
File without changes
@@ -0,0 +1,108 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.folders import GCPFolderSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_folders(org_resource_name: str) -> List[Dict]:
17
+ """
18
+ Return a list of all descendant GCP folders under the specified organization by traversing the folder tree.
19
+
20
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
21
+ :return: List of folder dicts with 'name' field containing full resource names (e.g., "folders/123456")
22
+ """
23
+ results: List[Dict] = []
24
+ client = resourcemanager_v3.FoldersClient()
25
+ # BFS over folders starting at the org root
26
+ queue: List[str] = [org_resource_name]
27
+ seen: set[str] = set()
28
+ while queue:
29
+ parent = queue.pop(0)
30
+ if parent in seen:
31
+ continue
32
+ seen.add(parent)
33
+
34
+ for folder in client.list_folders(parent=parent):
35
+ results.append(
36
+ {
37
+ "name": folder.name,
38
+ "parent": parent,
39
+ "displayName": folder.display_name,
40
+ "lifecycleState": folder.state.name,
41
+ }
42
+ )
43
+ if folder.name:
44
+ queue.append(folder.name)
45
+ return results
46
+
47
+
48
+ @timeit
49
+ def transform_gcp_folders(data: List[Dict]) -> List[Dict]:
50
+ """
51
+ Transform GCP folder data to add parent_org or parent_folder fields based on parent type.
52
+
53
+ :param data: List of folder dicts
54
+ :return: List of transformed folder dicts with parent_org and parent_folder fields
55
+ """
56
+ for folder in data:
57
+ folder["parent_org"] = None
58
+ folder["parent_folder"] = None
59
+
60
+ if folder["parent"].startswith("organizations"):
61
+ folder["parent_org"] = folder["parent"]
62
+ elif folder["parent"].startswith("folders"):
63
+ folder["parent_folder"] = folder["parent"]
64
+ else:
65
+ logger.warning(
66
+ f"Folder {folder['name']} has unexpected parent type: {folder['parent']}"
67
+ )
68
+
69
+ return data
70
+
71
+
72
+ @timeit
73
+ def load_gcp_folders(
74
+ neo4j_session: neo4j.Session,
75
+ data: List[Dict],
76
+ gcp_update_tag: int,
77
+ org_resource_name: str,
78
+ ) -> None:
79
+ """
80
+ Load GCP folders into the graph.
81
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
82
+ """
83
+ transformed_data = transform_gcp_folders(data)
84
+ load(
85
+ neo4j_session,
86
+ GCPFolderSchema(),
87
+ transformed_data,
88
+ lastupdated=gcp_update_tag,
89
+ ORG_RESOURCE_NAME=org_resource_name,
90
+ )
91
+
92
+
93
+ @timeit
94
+ def sync_gcp_folders(
95
+ neo4j_session: neo4j.Session,
96
+ gcp_update_tag: int,
97
+ common_job_parameters: Dict,
98
+ org_resource_name: str,
99
+ ) -> List[Dict]:
100
+ """
101
+ Get GCP folder data using the CRM v2 resource object and load the data to Neo4j.
102
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
103
+ :return: List of folders synced
104
+ """
105
+ logger.debug("Syncing GCP folders")
106
+ folders = get_gcp_folders(org_resource_name)
107
+ load_gcp_folders(neo4j_session, folders, gcp_update_tag, org_resource_name)
108
+ return folders
@@ -0,0 +1,65 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.organizations import GCPOrganizationSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_organizations() -> List[Dict]:
17
+ """
18
+ Return list of GCP organizations that the authenticated principal can access using the high-level client.
19
+ Returns empty list on error.
20
+ :return: List of org dicts with keys: name, displayName, lifecycleState.
21
+ """
22
+ client = resourcemanager_v3.OrganizationsClient()
23
+ orgs = []
24
+ for org in client.search_organizations():
25
+ orgs.append(
26
+ {
27
+ "name": org.name,
28
+ "displayName": org.display_name,
29
+ "lifecycleState": org.state.name,
30
+ }
31
+ )
32
+ return orgs
33
+
34
+
35
+ @timeit
36
+ def load_gcp_organizations(
37
+ neo4j_session: neo4j.Session,
38
+ data: List[Dict],
39
+ gcp_update_tag: int,
40
+ ) -> None:
41
+ for org in data:
42
+ org["id"] = org["name"]
43
+
44
+ load(
45
+ neo4j_session,
46
+ GCPOrganizationSchema(),
47
+ data,
48
+ lastupdated=gcp_update_tag,
49
+ )
50
+
51
+
52
+ @timeit
53
+ def sync_gcp_organizations(
54
+ neo4j_session: neo4j.Session,
55
+ gcp_update_tag: int,
56
+ common_job_parameters: Dict,
57
+ ) -> List[Dict]:
58
+ """
59
+ Get GCP organization data using the CRM v1 resource object and load the data to Neo4j.
60
+ Returns the list of organizations synced.
61
+ """
62
+ logger.debug("Syncing GCP organizations")
63
+ data = get_gcp_organizations()
64
+ load_gcp_organizations(neo4j_session, data, gcp_update_tag)
65
+ return data
@@ -0,0 +1,109 @@
1
+ import logging
2
+ from typing import Dict
3
+ from typing import List
4
+
5
+ import neo4j
6
+ from google.cloud import resourcemanager_v3
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.gcp.crm.projects import GCPProjectSchema
10
+ from cartography.util import timeit
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @timeit
16
+ def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
17
+ """
18
+ Return list of ACTIVE GCP projects under the specified organization
19
+ and within the specified folders.
20
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
21
+ :param folders: List of folder dictionaries containing 'name' field with full resource names
22
+ """
23
+ folder_names = [folder["name"] for folder in folders] if folders else []
24
+ # Build list of parent resources to check (org and all folders)
25
+ parents = set([org_resource_name] + folder_names)
26
+ results: List[Dict] = []
27
+ for parent in parents:
28
+ client = resourcemanager_v3.ProjectsClient()
29
+ for proj in client.list_projects(parent=parent):
30
+ # list_projects returns ACTIVE projects by default
31
+ name_field = proj.name # "projects/<number>"
32
+ project_number = name_field.split("/")[-1] if name_field else None
33
+ project_parent = proj.parent
34
+ results.append(
35
+ {
36
+ "projectId": getattr(proj, "project_id", None),
37
+ "projectNumber": project_number,
38
+ "name": getattr(proj, "display_name", None),
39
+ "lifecycleState": proj.state.name,
40
+ "parent": project_parent,
41
+ }
42
+ )
43
+ return results
44
+
45
+
46
+ @timeit
47
+ def transform_gcp_projects(data: List[Dict]) -> List[Dict]:
48
+ """
49
+ Transform GCP project data to add parent_org or parent_folder fields based on parent type.
50
+
51
+ :param data: List of project dicts
52
+ :return: List of transformed project dicts with parent_org and parent_folder fields
53
+ """
54
+ for project in data:
55
+ project["parent_org"] = None
56
+ project["parent_folder"] = None
57
+
58
+ # Set parent fields based on parent type
59
+ if project["parent"].startswith("organizations"):
60
+ project["parent_org"] = project["parent"]
61
+ elif project["parent"].startswith("folders"):
62
+ project["parent_folder"] = project["parent"]
63
+ else:
64
+ logger.warning(
65
+ f"Project {project['projectId']} has unexpected parent type: {project['parent']}"
66
+ )
67
+
68
+ return data
69
+
70
+
71
+ @timeit
72
+ def load_gcp_projects(
73
+ neo4j_session: neo4j.Session,
74
+ data: List[Dict],
75
+ gcp_update_tag: int,
76
+ org_resource_name: str,
77
+ ) -> None:
78
+ """
79
+ Load GCP projects into the graph.
80
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
81
+ """
82
+ transformed_data = transform_gcp_projects(data)
83
+ load(
84
+ neo4j_session,
85
+ GCPProjectSchema(),
86
+ transformed_data,
87
+ lastupdated=gcp_update_tag,
88
+ ORG_RESOURCE_NAME=org_resource_name,
89
+ )
90
+
91
+
92
+ @timeit
93
+ def sync_gcp_projects(
94
+ neo4j_session: neo4j.Session,
95
+ org_resource_name: str,
96
+ folders: List[Dict],
97
+ gcp_update_tag: int,
98
+ common_job_parameters: Dict,
99
+ ) -> List[Dict]:
100
+ """
101
+ Get and sync GCP project data to Neo4j.
102
+ :param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
103
+ :param folders: List of folder dictionaries containing 'name' field with full resource names
104
+ :return: List of projects synced
105
+ """
106
+ logger.debug("Syncing GCP projects")
107
+ projects = get_gcp_projects(org_resource_name, folders)
108
+ load_gcp_projects(neo4j_session, projects, gcp_update_tag, org_resource_name)
109
+ return projects
@@ -116,7 +116,8 @@ def transform_dns_rrs(dns_rrs: List[Dict]) -> List[Dict]:
116
116
  for r in dns_rrs:
117
117
  records.append(
118
118
  {
119
- "id": r["name"],
119
+ # Compose a unique ID to avoid collisions across types and zones
120
+ "id": f"{r['name']}|{r.get('type')}|{r.get('zone')}",
120
121
  "name": r["name"],
121
122
  "type": r.get("type"),
122
123
  "ttl": r.get("ttl"),
@@ -1,12 +1,16 @@
1
1
  import json
2
2
  import logging
3
+ from typing import Any
3
4
  from typing import Dict
5
+ from typing import List
4
6
 
5
7
  import neo4j
6
8
  from googleapiclient.discovery import HttpError
7
9
  from googleapiclient.discovery import Resource
8
10
 
9
- from cartography.util import run_cleanup_job
11
+ from cartography.client.core.tx import load
12
+ from cartography.graph.job import GraphJob
13
+ from cartography.models.gcp.gke import GCPGKEClusterSchema
10
14
  from cartography.util import timeit
11
15
 
12
16
  logger = logging.getLogger(__name__)
@@ -56,105 +60,20 @@ def load_gke_clusters(
56
60
  gcp_update_tag: int,
57
61
  ) -> None:
58
62
  """
59
- Ingest GCP GKE Clusters to Neo4j
60
-
61
- :type neo4j_session: Neo4j session object
62
- :param neo4j session: The Neo4j session object
63
-
64
- :type cluster_resp: Dict
65
- :param cluster_resp: A cluster response object from the GKE API
66
-
67
- :type gcp_update_tag: timestamp
68
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
69
-
70
- :rtype: NoneType
71
- :return: Nothing
63
+ Ingest GCP GKE clusters using the data model loader.
72
64
  """
65
+ clusters: List[Dict[str, Any]] = transform_gke_clusters(cluster_resp)
73
66
 
74
- query = """
75
- MERGE(cluster:GKECluster{id:$ClusterSelfLink})
76
- ON CREATE SET
77
- cluster.firstseen = timestamp(),
78
- cluster.created_at = $ClusterCreateTime
79
- SET
80
- cluster.name = $ClusterName,
81
- cluster.self_link = $ClusterSelfLink,
82
- cluster.description = $ClusterDescription,
83
- cluster.logging_service = $ClusterLoggingService,
84
- cluster.monitoring_service = $ClusterMonitoringService,
85
- cluster.network = $ClusterNetwork,
86
- cluster.subnetwork = $ClusterSubnetwork,
87
- cluster.cluster_ipv4cidr = $ClusterIPv4Cidr,
88
- cluster.zone = $ClusterZone,
89
- cluster.location = $ClusterLocation,
90
- cluster.endpoint = $ClusterEndpoint,
91
- cluster.initial_version = $ClusterInitialVersion,
92
- cluster.current_master_version = $ClusterMasterVersion,
93
- cluster.status = $ClusterStatus,
94
- cluster.services_ipv4cidr = $ClusterServicesIPv4Cidr,
95
- cluster.database_encryption = $ClusterDatabaseEncryption,
96
- cluster.network_policy = $ClusterNetworkPolicy,
97
- cluster.master_authorized_networks = $ClusterMasterAuthorizedNetworks,
98
- cluster.legacy_abac = $ClusterAbac,
99
- cluster.shielded_nodes = $ClusterShieldedNodes,
100
- cluster.private_nodes = $ClusterPrivateNodes,
101
- cluster.private_endpoint_enabled = $ClusterPrivateEndpointEnabled,
102
- cluster.private_endpoint = $ClusterPrivateEndpoint,
103
- cluster.public_endpoint = $ClusterPublicEndpoint,
104
- cluster.masterauth_username = $ClusterMasterUsername,
105
- cluster.masterauth_password = $ClusterMasterPassword
106
- WITH cluster
107
- MATCH (owner:GCPProject{id:$ProjectId})
108
- MERGE (owner)-[r:RESOURCE]->(cluster)
109
- ON CREATE SET r.firstseen = timestamp()
110
- SET r.lastupdated = $gcp_update_tag
111
- """
112
- for cluster in cluster_resp.get("clusters", []):
113
- neo4j_session.run(
114
- query,
115
- ProjectId=project_id,
116
- ClusterSelfLink=cluster["selfLink"],
117
- ClusterCreateTime=cluster["createTime"],
118
- ClusterName=cluster["name"],
119
- ClusterDescription=cluster.get("description"),
120
- ClusterLoggingService=cluster.get("loggingService"),
121
- ClusterMonitoringService=cluster.get("monitoringService"),
122
- ClusterNetwork=cluster.get("network"),
123
- ClusterSubnetwork=cluster.get("subnetwork"),
124
- ClusterIPv4Cidr=cluster.get("clusterIpv4Cidr"),
125
- ClusterZone=cluster.get("zone"),
126
- ClusterLocation=cluster.get("location"),
127
- ClusterEndpoint=cluster.get("endpoint"),
128
- ClusterInitialVersion=cluster.get("initialClusterVersion"),
129
- ClusterMasterVersion=cluster.get("currentMasterVersion"),
130
- ClusterStatus=cluster.get("status"),
131
- ClusterServicesIPv4Cidr=cluster.get("servicesIpv4Cidr"),
132
- ClusterDatabaseEncryption=cluster.get("databaseEncryption", {}).get(
133
- "state",
134
- ),
135
- ClusterNetworkPolicy=_process_network_policy(cluster),
136
- ClusterMasterAuthorizedNetworks=cluster.get(
137
- "masterAuthorizedNetworksConfig",
138
- {},
139
- ).get("enabled"),
140
- ClusterAbac=cluster.get("legacyAbac", {}).get("enabled"),
141
- ClusterShieldedNodes=cluster.get("shieldedNodes", {}).get("enabled"),
142
- ClusterPrivateNodes=cluster.get("privateClusterConfig", {}).get(
143
- "enablePrivateNodes",
144
- ),
145
- ClusterPrivateEndpointEnabled=cluster.get("privateClusterConfig", {}).get(
146
- "enablePrivateEndpoint",
147
- ),
148
- ClusterPrivateEndpoint=cluster.get("privateClusterConfig", {}).get(
149
- "privateEndpoint",
150
- ),
151
- ClusterPublicEndpoint=cluster.get("privateClusterConfig", {}).get(
152
- "publicEndpoint",
153
- ),
154
- ClusterMasterUsername=cluster.get("masterAuth", {}).get("username"),
155
- ClusterMasterPassword=cluster.get("masterAuth", {}).get("password"),
156
- gcp_update_tag=gcp_update_tag,
157
- )
67
+ if not clusters:
68
+ return
69
+
70
+ load(
71
+ neo4j_session,
72
+ GCPGKEClusterSchema(),
73
+ clusters,
74
+ lastupdated=gcp_update_tag,
75
+ PROJECT_ID=project_id,
76
+ )
158
77
 
159
78
 
160
79
  def _process_network_policy(cluster: Dict) -> bool:
@@ -175,21 +94,10 @@ def cleanup_gke_clusters(
175
94
  common_job_parameters: Dict,
176
95
  ) -> None:
177
96
  """
178
- Delete out-of-date GCP GKE Clusters nodes and relationships
179
-
180
- :type neo4j_session: The Neo4j session object
181
- :param neo4j_session: The Neo4j session
182
-
183
- :type common_job_parameters: dict
184
- :param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
185
-
186
- :rtype: NoneType
187
- :return: Nothing
97
+ Scoped cleanup for GKE clusters based on the project sub-resource relationship.
188
98
  """
189
- run_cleanup_job(
190
- "gcp_gke_cluster_cleanup.json",
99
+ GraphJob.from_node_schema(GCPGKEClusterSchema(), common_job_parameters).run(
191
100
  neo4j_session,
192
- common_job_parameters,
193
101
  )
194
102
 
195
103
 
@@ -222,8 +130,59 @@ def sync_gke_clusters(
222
130
  :rtype: NoneType
223
131
  :return: Nothing
224
132
  """
225
- logger.info("Syncing Compute objects for project %s.", project_id)
133
+ logger.info("Syncing GKE clusters for project %s.", project_id)
226
134
  gke_res = get_gke_clusters(container, project_id)
227
135
  load_gke_clusters(neo4j_session, gke_res, project_id, gcp_update_tag)
228
- # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
229
136
  cleanup_gke_clusters(neo4j_session, common_job_parameters)
137
+
138
+
139
+ def transform_gke_clusters(api_result: Dict[str, Any]) -> List[Dict[str, Any]]:
140
+ """
141
+ Transform GKE API response into a list of dicts suitable for the data model loader.
142
+ """
143
+ result: List[Dict[str, Any]] = []
144
+ for c in api_result.get("clusters", []):
145
+ transformed: Dict[str, Any] = {
146
+ # Required fields
147
+ "id": c["selfLink"],
148
+ "self_link": c["selfLink"],
149
+ "name": c["name"],
150
+ "created_at": c.get("createTime"),
151
+ # Optional fields
152
+ "description": c.get("description"),
153
+ "logging_service": c.get("loggingService"),
154
+ "monitoring_service": c.get("monitoringService"),
155
+ "network": c.get("network"),
156
+ "subnetwork": c.get("subnetwork"),
157
+ "cluster_ipv4cidr": c.get("clusterIpv4Cidr"),
158
+ "zone": c.get("zone"),
159
+ "location": c.get("location"),
160
+ "endpoint": c.get("endpoint"),
161
+ "initial_version": c.get("initialClusterVersion"),
162
+ "current_master_version": c.get("currentMasterVersion"),
163
+ "status": c.get("status"),
164
+ "services_ipv4cidr": c.get("servicesIpv4Cidr"),
165
+ "database_encryption": (c.get("databaseEncryption", {}) or {}).get("state"),
166
+ "network_policy": _process_network_policy(c),
167
+ "master_authorized_networks": (
168
+ c.get("masterAuthorizedNetworksConfig", {}) or {}
169
+ ).get("enabled"),
170
+ "legacy_abac": (c.get("legacyAbac", {}) or {}).get("enabled"),
171
+ "shielded_nodes": (c.get("shieldedNodes", {}) or {}).get("enabled"),
172
+ "private_nodes": (c.get("privateClusterConfig", {}) or {}).get(
173
+ "enablePrivateNodes"
174
+ ),
175
+ "private_endpoint_enabled": (c.get("privateClusterConfig", {}) or {}).get(
176
+ "enablePrivateEndpoint"
177
+ ),
178
+ "private_endpoint": (c.get("privateClusterConfig", {}) or {}).get(
179
+ "privateEndpoint"
180
+ ),
181
+ "public_endpoint": (c.get("privateClusterConfig", {}) or {}).get(
182
+ "publicEndpoint"
183
+ ),
184
+ "masterauth_username": (c.get("masterAuth", {}) or {}).get("username"),
185
+ "masterauth_password": (c.get("masterAuth", {}) or {}).get("password"),
186
+ }
187
+ result.append(transformed)
188
+ return result