cartography 0.113.0__py3-none-any.whl → 0.115.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +10 -2
- cartography/client/core/tx.py +11 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -27
- cartography/intel/aws/config.py +7 -3
- cartography/intel/aws/ecr.py +9 -9
- cartography/intel/aws/iam.py +741 -492
- cartography/intel/aws/identitycenter.py +240 -13
- cartography/intel/aws/lambda_function.py +69 -2
- cartography/intel/aws/organizations.py +10 -9
- cartography/intel/aws/permission_relationships.py +7 -17
- cartography/intel/aws/redshift.py +9 -4
- cartography/intel/aws/route53.py +53 -3
- cartography/intel/aws/securityhub.py +3 -1
- cartography/intel/azure/__init__.py +24 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/azure/logic_apps.py +101 -0
- cartography/intel/create_indexes.py +2 -1
- cartography/intel/dns.py +5 -2
- cartography/intel/entra/__init__.py +31 -0
- cartography/intel/entra/app_role_assignments.py +277 -0
- cartography/intel/entra/applications.py +4 -238
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/gcp/__init__.py +136 -440
- cartography/intel/gcp/clients.py +65 -0
- cartography/intel/gcp/compute.py +18 -44
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +108 -0
- cartography/intel/gcp/crm/orgs.py +65 -0
- cartography/intel/gcp/crm/projects.py +109 -0
- cartography/intel/gcp/dns.py +2 -1
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/github/__init__.py +41 -0
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +76 -45
- cartography/intel/gsuite/api.py +17 -4
- cartography/intel/okta/applications.py +9 -4
- cartography/intel/okta/awssaml.py +5 -2
- cartography/intel/okta/factors.py +3 -1
- cartography/intel/okta/groups.py +5 -2
- cartography/intel/okta/organization.py +3 -1
- cartography/intel/okta/origins.py +3 -1
- cartography/intel/okta/roles.py +5 -2
- cartography/intel/okta/users.py +3 -1
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +26 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +54 -0
- cartography/models/aws/identitycenter/awspermissionset.py +24 -1
- cartography/models/aws/identitycenter/awssogroup.py +70 -0
- cartography/models/aws/identitycenter/awsssouser.py +37 -1
- cartography/models/aws/lambda_function/lambda_function.py +2 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/azure/logic_apps.py +56 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/entra/user.py +18 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/github/commits.py +63 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/METADATA +8 -5
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/RECORD +85 -56
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/intel/gcp/crm.py +0 -355
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/WHEEL +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import googleapiclient.discovery
|
|
5
|
+
import httplib2
|
|
6
|
+
from google.auth import default
|
|
7
|
+
from google.auth.credentials import Credentials as GoogleCredentials
|
|
8
|
+
from google.auth.exceptions import DefaultCredentialsError
|
|
9
|
+
from google_auth_httplib2 import AuthorizedHttp
|
|
10
|
+
from googleapiclient.discovery import Resource
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
# Default HTTP timeout (seconds) for Google API clients built via discovery.build
|
|
15
|
+
_GCP_HTTP_TIMEOUT = 120
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _authorized_http_with_timeout(
|
|
19
|
+
credentials: GoogleCredentials,
|
|
20
|
+
timeout: int = _GCP_HTTP_TIMEOUT,
|
|
21
|
+
) -> AuthorizedHttp:
|
|
22
|
+
"""
|
|
23
|
+
Build an AuthorizedHttp with a per-request timeout, avoiding global socket timeouts.
|
|
24
|
+
"""
|
|
25
|
+
return AuthorizedHttp(credentials, http=httplib2.Http(timeout=timeout))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_client(service: str, version: str = "v1") -> Resource:
|
|
29
|
+
credentials = get_gcp_credentials()
|
|
30
|
+
if credentials is None:
|
|
31
|
+
raise RuntimeError("GCP credentials are not available; cannot build client.")
|
|
32
|
+
client = googleapiclient.discovery.build(
|
|
33
|
+
service,
|
|
34
|
+
version,
|
|
35
|
+
http=_authorized_http_with_timeout(credentials),
|
|
36
|
+
cache_discovery=False,
|
|
37
|
+
)
|
|
38
|
+
return client
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_gcp_credentials() -> Optional[GoogleCredentials]:
|
|
42
|
+
"""
|
|
43
|
+
Gets access tokens for GCP API access.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
# Explicitly use Application Default Credentials with the cloud-platform scope.
|
|
47
|
+
credentials, _ = default(
|
|
48
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
49
|
+
)
|
|
50
|
+
return credentials
|
|
51
|
+
except DefaultCredentialsError as e:
|
|
52
|
+
logger.debug(
|
|
53
|
+
"Error occurred calling google.auth.default().",
|
|
54
|
+
exc_info=True,
|
|
55
|
+
)
|
|
56
|
+
logger.error(
|
|
57
|
+
(
|
|
58
|
+
"Unable to initialize Google Compute Platform creds. If you don't have GCP data or don't want to load "
|
|
59
|
+
"GCP data then you can ignore this message. Otherwise, the error code is: %s "
|
|
60
|
+
"Make sure your GCP credentials are configured correctly, your credentials file (if any) is valid, and "
|
|
61
|
+
"that the identity you are authenticating to has the securityReviewer role attached."
|
|
62
|
+
),
|
|
63
|
+
e,
|
|
64
|
+
)
|
|
65
|
+
return None
|
cartography/intel/gcp/compute.py
CHANGED
|
@@ -656,51 +656,25 @@ def load_gcp_subnets(
|
|
|
656
656
|
neo4j_session: neo4j.Session,
|
|
657
657
|
subnets: List[Dict],
|
|
658
658
|
gcp_update_tag: int,
|
|
659
|
+
project_id: str,
|
|
659
660
|
) -> None:
|
|
660
661
|
"""
|
|
661
|
-
Ingest GCP subnet data to Neo4j
|
|
662
|
+
Ingest GCP subnet data to Neo4j using the data model
|
|
662
663
|
:param neo4j_session: The Neo4j session
|
|
663
664
|
:param subnets: List of the subnets
|
|
664
665
|
:param gcp_update_tag: The timestamp to set these Neo4j nodes with
|
|
666
|
+
:param project_id: The project ID
|
|
665
667
|
:return: Nothing
|
|
666
668
|
"""
|
|
667
|
-
|
|
668
|
-
MERGE(vpc:GCPVpc{id:$VpcPartialUri})
|
|
669
|
-
ON CREATE SET vpc.firstseen = timestamp(),
|
|
670
|
-
vpc.partial_uri = $VpcPartialUri
|
|
669
|
+
from cartography.models.gcp.compute.subnet import GCPSubnetSchema
|
|
671
670
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
subnet.gateway_address = $GatewayAddress,
|
|
680
|
-
subnet.ip_cidr_range = $IpCidrRange,
|
|
681
|
-
subnet.private_ip_google_access = $PrivateIpGoogleAccess,
|
|
682
|
-
subnet.vpc_partial_uri = $VpcPartialUri,
|
|
683
|
-
subnet.lastupdated = $gcp_update_tag
|
|
684
|
-
|
|
685
|
-
MERGE (vpc)-[r:RESOURCE]->(subnet)
|
|
686
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
687
|
-
SET r.lastupdated = $gcp_update_tag
|
|
688
|
-
"""
|
|
689
|
-
for s in subnets:
|
|
690
|
-
neo4j_session.run(
|
|
691
|
-
query,
|
|
692
|
-
VpcPartialUri=s["vpc_partial_uri"],
|
|
693
|
-
VpcSelfLink=s["vpc_self_link"],
|
|
694
|
-
PartialUri=s["partial_uri"],
|
|
695
|
-
SubnetSelfLink=s["self_link"],
|
|
696
|
-
ProjectId=s["project_id"],
|
|
697
|
-
SubnetName=s["name"],
|
|
698
|
-
Region=s["region"],
|
|
699
|
-
GatewayAddress=s["gateway_address"],
|
|
700
|
-
IpCidrRange=s["ip_cidr_range"],
|
|
701
|
-
PrivateIpGoogleAccess=s["private_ip_google_access"],
|
|
702
|
-
gcp_update_tag=gcp_update_tag,
|
|
703
|
-
)
|
|
671
|
+
load(
|
|
672
|
+
neo4j_session,
|
|
673
|
+
GCPSubnetSchema(),
|
|
674
|
+
subnets,
|
|
675
|
+
lastupdated=gcp_update_tag,
|
|
676
|
+
PROJECT_ID=project_id,
|
|
677
|
+
)
|
|
704
678
|
|
|
705
679
|
|
|
706
680
|
@timeit
|
|
@@ -981,7 +955,7 @@ def _attach_gcp_vpc(
|
|
|
981
955
|
"""
|
|
982
956
|
query = """
|
|
983
957
|
MATCH (i:GCPInstance{id:$InstanceId})-[:NETWORK_INTERFACE]->(nic:GCPNetworkInterface)
|
|
984
|
-
-[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:
|
|
958
|
+
-[p:PART_OF_SUBNET]->(sn:GCPSubnet)<-[r:HAS]-(vpc:GCPVpc)
|
|
985
959
|
MERGE (i)-[m:MEMBER_OF_GCP_VPC]->(vpc)
|
|
986
960
|
ON CREATE SET m.firstseen = timestamp()
|
|
987
961
|
SET m.lastupdated = $gcp_update_tag
|
|
@@ -1185,15 +1159,15 @@ def cleanup_gcp_subnets(
|
|
|
1185
1159
|
common_job_parameters: Dict,
|
|
1186
1160
|
) -> None:
|
|
1187
1161
|
"""
|
|
1188
|
-
Delete out-of-date GCP VPC subnet nodes and relationships
|
|
1162
|
+
Delete out-of-date GCP VPC subnet nodes and relationships using data model
|
|
1189
1163
|
:param neo4j_session: The Neo4j session
|
|
1190
1164
|
:param common_job_parameters: dict of other job parameters to pass to Neo4j
|
|
1191
1165
|
:return: Nothing
|
|
1192
1166
|
"""
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1167
|
+
from cartography.models.gcp.compute.subnet import GCPSubnetSchema
|
|
1168
|
+
|
|
1169
|
+
GraphJob.from_node_schema(GCPSubnetSchema(), common_job_parameters).run(
|
|
1170
|
+
neo4j_session
|
|
1197
1171
|
)
|
|
1198
1172
|
|
|
1199
1173
|
|
|
@@ -1296,7 +1270,7 @@ def sync_gcp_subnets(
|
|
|
1296
1270
|
for r in regions:
|
|
1297
1271
|
subnet_res = get_gcp_subnets(project_id, r, compute)
|
|
1298
1272
|
subnets = transform_gcp_subnets(subnet_res)
|
|
1299
|
-
load_gcp_subnets(neo4j_session, subnets, gcp_update_tag)
|
|
1273
|
+
load_gcp_subnets(neo4j_session, subnets, gcp_update_tag, project_id)
|
|
1300
1274
|
# TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
|
|
1301
1275
|
cleanup_gcp_subnets(neo4j_session, common_job_parameters)
|
|
1302
1276
|
|
|
File without changes
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
from google.cloud import resourcemanager_v3
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.models.gcp.crm.folders import GCPFolderSchema
|
|
10
|
+
from cartography.util import timeit
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@timeit
|
|
16
|
+
def get_gcp_folders(org_resource_name: str) -> List[Dict]:
|
|
17
|
+
"""
|
|
18
|
+
Return a list of all descendant GCP folders under the specified organization by traversing the folder tree.
|
|
19
|
+
|
|
20
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
21
|
+
:return: List of folder dicts with 'name' field containing full resource names (e.g., "folders/123456")
|
|
22
|
+
"""
|
|
23
|
+
results: List[Dict] = []
|
|
24
|
+
client = resourcemanager_v3.FoldersClient()
|
|
25
|
+
# BFS over folders starting at the org root
|
|
26
|
+
queue: List[str] = [org_resource_name]
|
|
27
|
+
seen: set[str] = set()
|
|
28
|
+
while queue:
|
|
29
|
+
parent = queue.pop(0)
|
|
30
|
+
if parent in seen:
|
|
31
|
+
continue
|
|
32
|
+
seen.add(parent)
|
|
33
|
+
|
|
34
|
+
for folder in client.list_folders(parent=parent):
|
|
35
|
+
results.append(
|
|
36
|
+
{
|
|
37
|
+
"name": folder.name,
|
|
38
|
+
"parent": parent,
|
|
39
|
+
"displayName": folder.display_name,
|
|
40
|
+
"lifecycleState": folder.state.name,
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
if folder.name:
|
|
44
|
+
queue.append(folder.name)
|
|
45
|
+
return results
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@timeit
|
|
49
|
+
def transform_gcp_folders(data: List[Dict]) -> List[Dict]:
|
|
50
|
+
"""
|
|
51
|
+
Transform GCP folder data to add parent_org or parent_folder fields based on parent type.
|
|
52
|
+
|
|
53
|
+
:param data: List of folder dicts
|
|
54
|
+
:return: List of transformed folder dicts with parent_org and parent_folder fields
|
|
55
|
+
"""
|
|
56
|
+
for folder in data:
|
|
57
|
+
folder["parent_org"] = None
|
|
58
|
+
folder["parent_folder"] = None
|
|
59
|
+
|
|
60
|
+
if folder["parent"].startswith("organizations"):
|
|
61
|
+
folder["parent_org"] = folder["parent"]
|
|
62
|
+
elif folder["parent"].startswith("folders"):
|
|
63
|
+
folder["parent_folder"] = folder["parent"]
|
|
64
|
+
else:
|
|
65
|
+
logger.warning(
|
|
66
|
+
f"Folder {folder['name']} has unexpected parent type: {folder['parent']}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return data
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@timeit
|
|
73
|
+
def load_gcp_folders(
|
|
74
|
+
neo4j_session: neo4j.Session,
|
|
75
|
+
data: List[Dict],
|
|
76
|
+
gcp_update_tag: int,
|
|
77
|
+
org_resource_name: str,
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Load GCP folders into the graph.
|
|
81
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
82
|
+
"""
|
|
83
|
+
transformed_data = transform_gcp_folders(data)
|
|
84
|
+
load(
|
|
85
|
+
neo4j_session,
|
|
86
|
+
GCPFolderSchema(),
|
|
87
|
+
transformed_data,
|
|
88
|
+
lastupdated=gcp_update_tag,
|
|
89
|
+
ORG_RESOURCE_NAME=org_resource_name,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@timeit
|
|
94
|
+
def sync_gcp_folders(
|
|
95
|
+
neo4j_session: neo4j.Session,
|
|
96
|
+
gcp_update_tag: int,
|
|
97
|
+
common_job_parameters: Dict,
|
|
98
|
+
org_resource_name: str,
|
|
99
|
+
) -> List[Dict]:
|
|
100
|
+
"""
|
|
101
|
+
Get GCP folder data using the CRM v2 resource object and load the data to Neo4j.
|
|
102
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
103
|
+
:return: List of folders synced
|
|
104
|
+
"""
|
|
105
|
+
logger.debug("Syncing GCP folders")
|
|
106
|
+
folders = get_gcp_folders(org_resource_name)
|
|
107
|
+
load_gcp_folders(neo4j_session, folders, gcp_update_tag, org_resource_name)
|
|
108
|
+
return folders
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
from google.cloud import resourcemanager_v3
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.models.gcp.crm.organizations import GCPOrganizationSchema
|
|
10
|
+
from cartography.util import timeit
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@timeit
|
|
16
|
+
def get_gcp_organizations() -> List[Dict]:
|
|
17
|
+
"""
|
|
18
|
+
Return list of GCP organizations that the authenticated principal can access using the high-level client.
|
|
19
|
+
Returns empty list on error.
|
|
20
|
+
:return: List of org dicts with keys: name, displayName, lifecycleState.
|
|
21
|
+
"""
|
|
22
|
+
client = resourcemanager_v3.OrganizationsClient()
|
|
23
|
+
orgs = []
|
|
24
|
+
for org in client.search_organizations():
|
|
25
|
+
orgs.append(
|
|
26
|
+
{
|
|
27
|
+
"name": org.name,
|
|
28
|
+
"displayName": org.display_name,
|
|
29
|
+
"lifecycleState": org.state.name,
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
return orgs
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@timeit
|
|
36
|
+
def load_gcp_organizations(
|
|
37
|
+
neo4j_session: neo4j.Session,
|
|
38
|
+
data: List[Dict],
|
|
39
|
+
gcp_update_tag: int,
|
|
40
|
+
) -> None:
|
|
41
|
+
for org in data:
|
|
42
|
+
org["id"] = org["name"]
|
|
43
|
+
|
|
44
|
+
load(
|
|
45
|
+
neo4j_session,
|
|
46
|
+
GCPOrganizationSchema(),
|
|
47
|
+
data,
|
|
48
|
+
lastupdated=gcp_update_tag,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@timeit
|
|
53
|
+
def sync_gcp_organizations(
|
|
54
|
+
neo4j_session: neo4j.Session,
|
|
55
|
+
gcp_update_tag: int,
|
|
56
|
+
common_job_parameters: Dict,
|
|
57
|
+
) -> List[Dict]:
|
|
58
|
+
"""
|
|
59
|
+
Get GCP organization data using the CRM v1 resource object and load the data to Neo4j.
|
|
60
|
+
Returns the list of organizations synced.
|
|
61
|
+
"""
|
|
62
|
+
logger.debug("Syncing GCP organizations")
|
|
63
|
+
data = get_gcp_organizations()
|
|
64
|
+
load_gcp_organizations(neo4j_session, data, gcp_update_tag)
|
|
65
|
+
return data
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
import neo4j
|
|
6
|
+
from google.cloud import resourcemanager_v3
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.models.gcp.crm.projects import GCPProjectSchema
|
|
10
|
+
from cartography.util import timeit
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@timeit
|
|
16
|
+
def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
|
|
17
|
+
"""
|
|
18
|
+
Return list of ACTIVE GCP projects under the specified organization
|
|
19
|
+
and within the specified folders.
|
|
20
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
21
|
+
:param folders: List of folder dictionaries containing 'name' field with full resource names
|
|
22
|
+
"""
|
|
23
|
+
folder_names = [folder["name"] for folder in folders] if folders else []
|
|
24
|
+
# Build list of parent resources to check (org and all folders)
|
|
25
|
+
parents = set([org_resource_name] + folder_names)
|
|
26
|
+
results: List[Dict] = []
|
|
27
|
+
for parent in parents:
|
|
28
|
+
client = resourcemanager_v3.ProjectsClient()
|
|
29
|
+
for proj in client.list_projects(parent=parent):
|
|
30
|
+
# list_projects returns ACTIVE projects by default
|
|
31
|
+
name_field = proj.name # "projects/<number>"
|
|
32
|
+
project_number = name_field.split("/")[-1] if name_field else None
|
|
33
|
+
project_parent = proj.parent
|
|
34
|
+
results.append(
|
|
35
|
+
{
|
|
36
|
+
"projectId": getattr(proj, "project_id", None),
|
|
37
|
+
"projectNumber": project_number,
|
|
38
|
+
"name": getattr(proj, "display_name", None),
|
|
39
|
+
"lifecycleState": proj.state.name,
|
|
40
|
+
"parent": project_parent,
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
return results
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@timeit
|
|
47
|
+
def transform_gcp_projects(data: List[Dict]) -> List[Dict]:
|
|
48
|
+
"""
|
|
49
|
+
Transform GCP project data to add parent_org or parent_folder fields based on parent type.
|
|
50
|
+
|
|
51
|
+
:param data: List of project dicts
|
|
52
|
+
:return: List of transformed project dicts with parent_org and parent_folder fields
|
|
53
|
+
"""
|
|
54
|
+
for project in data:
|
|
55
|
+
project["parent_org"] = None
|
|
56
|
+
project["parent_folder"] = None
|
|
57
|
+
|
|
58
|
+
# Set parent fields based on parent type
|
|
59
|
+
if project["parent"].startswith("organizations"):
|
|
60
|
+
project["parent_org"] = project["parent"]
|
|
61
|
+
elif project["parent"].startswith("folders"):
|
|
62
|
+
project["parent_folder"] = project["parent"]
|
|
63
|
+
else:
|
|
64
|
+
logger.warning(
|
|
65
|
+
f"Project {project['projectId']} has unexpected parent type: {project['parent']}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return data
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@timeit
|
|
72
|
+
def load_gcp_projects(
|
|
73
|
+
neo4j_session: neo4j.Session,
|
|
74
|
+
data: List[Dict],
|
|
75
|
+
gcp_update_tag: int,
|
|
76
|
+
org_resource_name: str,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""
|
|
79
|
+
Load GCP projects into the graph.
|
|
80
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
81
|
+
"""
|
|
82
|
+
transformed_data = transform_gcp_projects(data)
|
|
83
|
+
load(
|
|
84
|
+
neo4j_session,
|
|
85
|
+
GCPProjectSchema(),
|
|
86
|
+
transformed_data,
|
|
87
|
+
lastupdated=gcp_update_tag,
|
|
88
|
+
ORG_RESOURCE_NAME=org_resource_name,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@timeit
|
|
93
|
+
def sync_gcp_projects(
|
|
94
|
+
neo4j_session: neo4j.Session,
|
|
95
|
+
org_resource_name: str,
|
|
96
|
+
folders: List[Dict],
|
|
97
|
+
gcp_update_tag: int,
|
|
98
|
+
common_job_parameters: Dict,
|
|
99
|
+
) -> List[Dict]:
|
|
100
|
+
"""
|
|
101
|
+
Get and sync GCP project data to Neo4j.
|
|
102
|
+
:param org_resource_name: Full organization resource name (e.g., "organizations/123456789012")
|
|
103
|
+
:param folders: List of folder dictionaries containing 'name' field with full resource names
|
|
104
|
+
:return: List of projects synced
|
|
105
|
+
"""
|
|
106
|
+
logger.debug("Syncing GCP projects")
|
|
107
|
+
projects = get_gcp_projects(org_resource_name, folders)
|
|
108
|
+
load_gcp_projects(neo4j_session, projects, gcp_update_tag, org_resource_name)
|
|
109
|
+
return projects
|
cartography/intel/gcp/dns.py
CHANGED
|
@@ -116,7 +116,8 @@ def transform_dns_rrs(dns_rrs: List[Dict]) -> List[Dict]:
|
|
|
116
116
|
for r in dns_rrs:
|
|
117
117
|
records.append(
|
|
118
118
|
{
|
|
119
|
-
|
|
119
|
+
# Compose a unique ID to avoid collisions across types and zones
|
|
120
|
+
"id": f"{r['name']}|{r.get('type')}|{r.get('zone')}",
|
|
120
121
|
"name": r["name"],
|
|
121
122
|
"type": r.get("type"),
|
|
122
123
|
"ttl": r.get("ttl"),
|
cartography/intel/gcp/gke.py
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
5
|
+
from typing import List
|
|
4
6
|
|
|
5
7
|
import neo4j
|
|
6
8
|
from googleapiclient.discovery import HttpError
|
|
7
9
|
from googleapiclient.discovery import Resource
|
|
8
10
|
|
|
9
|
-
from cartography.
|
|
11
|
+
from cartography.client.core.tx import load
|
|
12
|
+
from cartography.graph.job import GraphJob
|
|
13
|
+
from cartography.models.gcp.gke import GCPGKEClusterSchema
|
|
10
14
|
from cartography.util import timeit
|
|
11
15
|
|
|
12
16
|
logger = logging.getLogger(__name__)
|
|
@@ -56,105 +60,20 @@ def load_gke_clusters(
|
|
|
56
60
|
gcp_update_tag: int,
|
|
57
61
|
) -> None:
|
|
58
62
|
"""
|
|
59
|
-
Ingest GCP GKE
|
|
60
|
-
|
|
61
|
-
:type neo4j_session: Neo4j session object
|
|
62
|
-
:param neo4j session: The Neo4j session object
|
|
63
|
-
|
|
64
|
-
:type cluster_resp: Dict
|
|
65
|
-
:param cluster_resp: A cluster response object from the GKE API
|
|
66
|
-
|
|
67
|
-
:type gcp_update_tag: timestamp
|
|
68
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
69
|
-
|
|
70
|
-
:rtype: NoneType
|
|
71
|
-
:return: Nothing
|
|
63
|
+
Ingest GCP GKE clusters using the data model loader.
|
|
72
64
|
"""
|
|
65
|
+
clusters: List[Dict[str, Any]] = transform_gke_clusters(cluster_resp)
|
|
73
66
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
cluster.monitoring_service = $ClusterMonitoringService,
|
|
85
|
-
cluster.network = $ClusterNetwork,
|
|
86
|
-
cluster.subnetwork = $ClusterSubnetwork,
|
|
87
|
-
cluster.cluster_ipv4cidr = $ClusterIPv4Cidr,
|
|
88
|
-
cluster.zone = $ClusterZone,
|
|
89
|
-
cluster.location = $ClusterLocation,
|
|
90
|
-
cluster.endpoint = $ClusterEndpoint,
|
|
91
|
-
cluster.initial_version = $ClusterInitialVersion,
|
|
92
|
-
cluster.current_master_version = $ClusterMasterVersion,
|
|
93
|
-
cluster.status = $ClusterStatus,
|
|
94
|
-
cluster.services_ipv4cidr = $ClusterServicesIPv4Cidr,
|
|
95
|
-
cluster.database_encryption = $ClusterDatabaseEncryption,
|
|
96
|
-
cluster.network_policy = $ClusterNetworkPolicy,
|
|
97
|
-
cluster.master_authorized_networks = $ClusterMasterAuthorizedNetworks,
|
|
98
|
-
cluster.legacy_abac = $ClusterAbac,
|
|
99
|
-
cluster.shielded_nodes = $ClusterShieldedNodes,
|
|
100
|
-
cluster.private_nodes = $ClusterPrivateNodes,
|
|
101
|
-
cluster.private_endpoint_enabled = $ClusterPrivateEndpointEnabled,
|
|
102
|
-
cluster.private_endpoint = $ClusterPrivateEndpoint,
|
|
103
|
-
cluster.public_endpoint = $ClusterPublicEndpoint,
|
|
104
|
-
cluster.masterauth_username = $ClusterMasterUsername,
|
|
105
|
-
cluster.masterauth_password = $ClusterMasterPassword
|
|
106
|
-
WITH cluster
|
|
107
|
-
MATCH (owner:GCPProject{id:$ProjectId})
|
|
108
|
-
MERGE (owner)-[r:RESOURCE]->(cluster)
|
|
109
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
110
|
-
SET r.lastupdated = $gcp_update_tag
|
|
111
|
-
"""
|
|
112
|
-
for cluster in cluster_resp.get("clusters", []):
|
|
113
|
-
neo4j_session.run(
|
|
114
|
-
query,
|
|
115
|
-
ProjectId=project_id,
|
|
116
|
-
ClusterSelfLink=cluster["selfLink"],
|
|
117
|
-
ClusterCreateTime=cluster["createTime"],
|
|
118
|
-
ClusterName=cluster["name"],
|
|
119
|
-
ClusterDescription=cluster.get("description"),
|
|
120
|
-
ClusterLoggingService=cluster.get("loggingService"),
|
|
121
|
-
ClusterMonitoringService=cluster.get("monitoringService"),
|
|
122
|
-
ClusterNetwork=cluster.get("network"),
|
|
123
|
-
ClusterSubnetwork=cluster.get("subnetwork"),
|
|
124
|
-
ClusterIPv4Cidr=cluster.get("clusterIpv4Cidr"),
|
|
125
|
-
ClusterZone=cluster.get("zone"),
|
|
126
|
-
ClusterLocation=cluster.get("location"),
|
|
127
|
-
ClusterEndpoint=cluster.get("endpoint"),
|
|
128
|
-
ClusterInitialVersion=cluster.get("initialClusterVersion"),
|
|
129
|
-
ClusterMasterVersion=cluster.get("currentMasterVersion"),
|
|
130
|
-
ClusterStatus=cluster.get("status"),
|
|
131
|
-
ClusterServicesIPv4Cidr=cluster.get("servicesIpv4Cidr"),
|
|
132
|
-
ClusterDatabaseEncryption=cluster.get("databaseEncryption", {}).get(
|
|
133
|
-
"state",
|
|
134
|
-
),
|
|
135
|
-
ClusterNetworkPolicy=_process_network_policy(cluster),
|
|
136
|
-
ClusterMasterAuthorizedNetworks=cluster.get(
|
|
137
|
-
"masterAuthorizedNetworksConfig",
|
|
138
|
-
{},
|
|
139
|
-
).get("enabled"),
|
|
140
|
-
ClusterAbac=cluster.get("legacyAbac", {}).get("enabled"),
|
|
141
|
-
ClusterShieldedNodes=cluster.get("shieldedNodes", {}).get("enabled"),
|
|
142
|
-
ClusterPrivateNodes=cluster.get("privateClusterConfig", {}).get(
|
|
143
|
-
"enablePrivateNodes",
|
|
144
|
-
),
|
|
145
|
-
ClusterPrivateEndpointEnabled=cluster.get("privateClusterConfig", {}).get(
|
|
146
|
-
"enablePrivateEndpoint",
|
|
147
|
-
),
|
|
148
|
-
ClusterPrivateEndpoint=cluster.get("privateClusterConfig", {}).get(
|
|
149
|
-
"privateEndpoint",
|
|
150
|
-
),
|
|
151
|
-
ClusterPublicEndpoint=cluster.get("privateClusterConfig", {}).get(
|
|
152
|
-
"publicEndpoint",
|
|
153
|
-
),
|
|
154
|
-
ClusterMasterUsername=cluster.get("masterAuth", {}).get("username"),
|
|
155
|
-
ClusterMasterPassword=cluster.get("masterAuth", {}).get("password"),
|
|
156
|
-
gcp_update_tag=gcp_update_tag,
|
|
157
|
-
)
|
|
67
|
+
if not clusters:
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
load(
|
|
71
|
+
neo4j_session,
|
|
72
|
+
GCPGKEClusterSchema(),
|
|
73
|
+
clusters,
|
|
74
|
+
lastupdated=gcp_update_tag,
|
|
75
|
+
PROJECT_ID=project_id,
|
|
76
|
+
)
|
|
158
77
|
|
|
159
78
|
|
|
160
79
|
def _process_network_policy(cluster: Dict) -> bool:
|
|
@@ -175,21 +94,10 @@ def cleanup_gke_clusters(
|
|
|
175
94
|
common_job_parameters: Dict,
|
|
176
95
|
) -> None:
|
|
177
96
|
"""
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
:type neo4j_session: The Neo4j session object
|
|
181
|
-
:param neo4j_session: The Neo4j session
|
|
182
|
-
|
|
183
|
-
:type common_job_parameters: dict
|
|
184
|
-
:param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
|
|
185
|
-
|
|
186
|
-
:rtype: NoneType
|
|
187
|
-
:return: Nothing
|
|
97
|
+
Scoped cleanup for GKE clusters based on the project sub-resource relationship.
|
|
188
98
|
"""
|
|
189
|
-
|
|
190
|
-
"gcp_gke_cluster_cleanup.json",
|
|
99
|
+
GraphJob.from_node_schema(GCPGKEClusterSchema(), common_job_parameters).run(
|
|
191
100
|
neo4j_session,
|
|
192
|
-
common_job_parameters,
|
|
193
101
|
)
|
|
194
102
|
|
|
195
103
|
|
|
@@ -222,8 +130,59 @@ def sync_gke_clusters(
|
|
|
222
130
|
:rtype: NoneType
|
|
223
131
|
:return: Nothing
|
|
224
132
|
"""
|
|
225
|
-
logger.info("Syncing
|
|
133
|
+
logger.info("Syncing GKE clusters for project %s.", project_id)
|
|
226
134
|
gke_res = get_gke_clusters(container, project_id)
|
|
227
135
|
load_gke_clusters(neo4j_session, gke_res, project_id, gcp_update_tag)
|
|
228
|
-
# TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
|
|
229
136
|
cleanup_gke_clusters(neo4j_session, common_job_parameters)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def transform_gke_clusters(api_result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
140
|
+
"""
|
|
141
|
+
Transform GKE API response into a list of dicts suitable for the data model loader.
|
|
142
|
+
"""
|
|
143
|
+
result: List[Dict[str, Any]] = []
|
|
144
|
+
for c in api_result.get("clusters", []):
|
|
145
|
+
transformed: Dict[str, Any] = {
|
|
146
|
+
# Required fields
|
|
147
|
+
"id": c["selfLink"],
|
|
148
|
+
"self_link": c["selfLink"],
|
|
149
|
+
"name": c["name"],
|
|
150
|
+
"created_at": c.get("createTime"),
|
|
151
|
+
# Optional fields
|
|
152
|
+
"description": c.get("description"),
|
|
153
|
+
"logging_service": c.get("loggingService"),
|
|
154
|
+
"monitoring_service": c.get("monitoringService"),
|
|
155
|
+
"network": c.get("network"),
|
|
156
|
+
"subnetwork": c.get("subnetwork"),
|
|
157
|
+
"cluster_ipv4cidr": c.get("clusterIpv4Cidr"),
|
|
158
|
+
"zone": c.get("zone"),
|
|
159
|
+
"location": c.get("location"),
|
|
160
|
+
"endpoint": c.get("endpoint"),
|
|
161
|
+
"initial_version": c.get("initialClusterVersion"),
|
|
162
|
+
"current_master_version": c.get("currentMasterVersion"),
|
|
163
|
+
"status": c.get("status"),
|
|
164
|
+
"services_ipv4cidr": c.get("servicesIpv4Cidr"),
|
|
165
|
+
"database_encryption": (c.get("databaseEncryption", {}) or {}).get("state"),
|
|
166
|
+
"network_policy": _process_network_policy(c),
|
|
167
|
+
"master_authorized_networks": (
|
|
168
|
+
c.get("masterAuthorizedNetworksConfig", {}) or {}
|
|
169
|
+
).get("enabled"),
|
|
170
|
+
"legacy_abac": (c.get("legacyAbac", {}) or {}).get("enabled"),
|
|
171
|
+
"shielded_nodes": (c.get("shieldedNodes", {}) or {}).get("enabled"),
|
|
172
|
+
"private_nodes": (c.get("privateClusterConfig", {}) or {}).get(
|
|
173
|
+
"enablePrivateNodes"
|
|
174
|
+
),
|
|
175
|
+
"private_endpoint_enabled": (c.get("privateClusterConfig", {}) or {}).get(
|
|
176
|
+
"enablePrivateEndpoint"
|
|
177
|
+
),
|
|
178
|
+
"private_endpoint": (c.get("privateClusterConfig", {}) or {}).get(
|
|
179
|
+
"privateEndpoint"
|
|
180
|
+
),
|
|
181
|
+
"public_endpoint": (c.get("privateClusterConfig", {}) or {}).get(
|
|
182
|
+
"publicEndpoint"
|
|
183
|
+
),
|
|
184
|
+
"masterauth_username": (c.get("masterAuth", {}) or {}).get("username"),
|
|
185
|
+
"masterauth_password": (c.get("masterAuth", {}) or {}).get("password"),
|
|
186
|
+
}
|
|
187
|
+
result.append(transformed)
|
|
188
|
+
return result
|