cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +8 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -31
- cartography/intel/aws/apigatewayv2.py +116 -0
- cartography/intel/aws/iam.py +741 -492
- cartography/intel/aws/organizations.py +7 -8
- cartography/intel/aws/permission_relationships.py +4 -16
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/azure/__init__.py +16 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/entra/__init__.py +31 -0
- cartography/intel/entra/app_role_assignments.py +277 -0
- cartography/intel/entra/applications.py +4 -238
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/gcp/__init__.py +136 -436
- cartography/intel/gcp/clients.py +65 -0
- cartography/intel/gcp/compute.py +18 -44
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +108 -0
- cartography/intel/gcp/crm/orgs.py +65 -0
- cartography/intel/gcp/crm/projects.py +109 -0
- cartography/intel/gcp/dns.py +82 -169
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/gcp/iam.py +66 -54
- cartography/intel/gcp/storage.py +75 -159
- cartography/intel/github/__init__.py +41 -0
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +73 -39
- cartography/models/aws/apigatewayv2/__init__.py +0 -0
- cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +26 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +54 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/dns.py +109 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/gcp/iam.py +3 -0
- cartography/models/gcp/storage/__init__.py +0 -0
- cartography/models/gcp/storage/bucket.py +119 -0
- cartography/models/github/commits.py +63 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
- cartography/intel/gcp/crm.py +0 -355
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
|
@@ -3,32 +3,30 @@ import logging
|
|
|
3
3
|
from collections import namedtuple
|
|
4
4
|
from typing import Dict
|
|
5
5
|
from typing import List
|
|
6
|
-
from typing import Optional
|
|
7
6
|
from typing import Set
|
|
8
7
|
|
|
9
|
-
import googleapiclient.discovery
|
|
10
|
-
import httplib2
|
|
11
8
|
import neo4j
|
|
12
|
-
from
|
|
13
|
-
from google.auth.credentials import Credentials as GoogleCredentials
|
|
14
|
-
from google.auth.exceptions import DefaultCredentialsError
|
|
15
|
-
from google_auth_httplib2 import AuthorizedHttp
|
|
9
|
+
from googleapiclient.discovery import HttpError
|
|
16
10
|
from googleapiclient.discovery import Resource
|
|
17
11
|
|
|
18
12
|
from cartography.config import Config
|
|
13
|
+
from cartography.graph.job import GraphJob
|
|
19
14
|
from cartography.intel.gcp import compute
|
|
20
|
-
from cartography.intel.gcp import crm
|
|
21
15
|
from cartography.intel.gcp import dns
|
|
22
16
|
from cartography.intel.gcp import gke
|
|
23
17
|
from cartography.intel.gcp import iam
|
|
24
18
|
from cartography.intel.gcp import storage
|
|
19
|
+
from cartography.intel.gcp.clients import build_client
|
|
20
|
+
from cartography.intel.gcp.crm.folders import sync_gcp_folders
|
|
21
|
+
from cartography.intel.gcp.crm.orgs import sync_gcp_organizations
|
|
22
|
+
from cartography.intel.gcp.crm.projects import sync_gcp_projects
|
|
23
|
+
from cartography.models.gcp.crm.folders import GCPFolderSchema
|
|
24
|
+
from cartography.models.gcp.crm.organizations import GCPOrganizationSchema
|
|
25
|
+
from cartography.models.gcp.crm.projects import GCPProjectSchema
|
|
25
26
|
from cartography.util import run_analysis_job
|
|
26
27
|
from cartography.util import timeit
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
|
-
Resources = namedtuple(
|
|
30
|
-
"Resources", "compute container crm_v1 crm_v2 dns storage serviceusage iam"
|
|
31
|
-
)
|
|
32
30
|
|
|
33
31
|
# Mapping of service short names to their full names as in docs. See https://developers.google.com/apis-explorer,
|
|
34
32
|
# and https://cloud.google.com/service-usage/docs/reference/rest/v1/services#ServiceConfig
|
|
@@ -41,160 +39,6 @@ service_names = Services(
|
|
|
41
39
|
iam="iam.googleapis.com",
|
|
42
40
|
)
|
|
43
41
|
|
|
44
|
-
# Default HTTP timeout (seconds) for Google API clients built via discovery.build
|
|
45
|
-
_GCP_HTTP_TIMEOUT = 120
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def _authorized_http_with_timeout(
|
|
49
|
-
credentials: GoogleCredentials, timeout: int = _GCP_HTTP_TIMEOUT
|
|
50
|
-
) -> AuthorizedHttp:
|
|
51
|
-
"""
|
|
52
|
-
Build an AuthorizedHttp with a per-request timeout, avoiding global socket timeouts.
|
|
53
|
-
"""
|
|
54
|
-
return AuthorizedHttp(credentials, http=httplib2.Http(timeout=timeout))
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def _get_crm_resource_v1(credentials: GoogleCredentials) -> Resource:
|
|
58
|
-
"""
|
|
59
|
-
Instantiates a Google Compute Resource Manager v1 resource object to call the Resource Manager API.
|
|
60
|
-
See https://cloud.google.com/resource-manager/reference/rest/.
|
|
61
|
-
:param credentials: The GoogleCredentials object
|
|
62
|
-
:return: A CRM v1 resource object
|
|
63
|
-
"""
|
|
64
|
-
# cache_discovery=False to suppress extra warnings.
|
|
65
|
-
# See https://github.com/googleapis/google-api-python-client/issues/299#issuecomment-268915510 and related issues
|
|
66
|
-
return googleapiclient.discovery.build(
|
|
67
|
-
"cloudresourcemanager",
|
|
68
|
-
"v1",
|
|
69
|
-
http=_authorized_http_with_timeout(credentials),
|
|
70
|
-
cache_discovery=False,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def _get_crm_resource_v2(credentials: GoogleCredentials) -> Resource:
|
|
75
|
-
"""
|
|
76
|
-
Instantiates a Google Compute Resource Manager v2 resource object to call the Resource Manager API.
|
|
77
|
-
We need a v2 resource object to query for GCP folders.
|
|
78
|
-
:param credentials: The GoogleCredentials object
|
|
79
|
-
:return: A CRM v2 resource object
|
|
80
|
-
"""
|
|
81
|
-
return googleapiclient.discovery.build(
|
|
82
|
-
"cloudresourcemanager",
|
|
83
|
-
"v2",
|
|
84
|
-
http=_authorized_http_with_timeout(credentials),
|
|
85
|
-
cache_discovery=False,
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def _get_compute_resource(credentials: GoogleCredentials) -> Resource:
|
|
90
|
-
"""
|
|
91
|
-
Instantiates a Google Compute resource object to call the Compute API. This is used to pull zone, instance, and
|
|
92
|
-
networking data. See https://cloud.google.com/compute/docs/reference/rest/v1/.
|
|
93
|
-
:param credentials: The GoogleCredentials object
|
|
94
|
-
:return: A Compute resource object
|
|
95
|
-
"""
|
|
96
|
-
return googleapiclient.discovery.build(
|
|
97
|
-
"compute",
|
|
98
|
-
"v1",
|
|
99
|
-
http=_authorized_http_with_timeout(credentials),
|
|
100
|
-
cache_discovery=False,
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def _get_storage_resource(credentials: GoogleCredentials) -> Resource:
|
|
105
|
-
"""
|
|
106
|
-
Instantiates a Google Cloud Storage resource object to call the Storage API.
|
|
107
|
-
This is used to pull bucket metadata and IAM Policies
|
|
108
|
-
as well as list buckets in a specified project.
|
|
109
|
-
See https://cloud.google.com/storage/docs/json_api/.
|
|
110
|
-
:param credentials: The GoogleCredentials object
|
|
111
|
-
:return: A Storage resource object
|
|
112
|
-
"""
|
|
113
|
-
return googleapiclient.discovery.build(
|
|
114
|
-
"storage",
|
|
115
|
-
"v1",
|
|
116
|
-
http=_authorized_http_with_timeout(credentials),
|
|
117
|
-
cache_discovery=False,
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def _get_container_resource(credentials: GoogleCredentials) -> Resource:
|
|
122
|
-
"""
|
|
123
|
-
Instantiates a Google Cloud Container resource object to call the
|
|
124
|
-
Container API. See: https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1/.
|
|
125
|
-
|
|
126
|
-
:param credentials: The GoogleCredentials object
|
|
127
|
-
:return: A Container resource object
|
|
128
|
-
"""
|
|
129
|
-
return googleapiclient.discovery.build(
|
|
130
|
-
"container",
|
|
131
|
-
"v1",
|
|
132
|
-
http=_authorized_http_with_timeout(credentials),
|
|
133
|
-
cache_discovery=False,
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
def _get_dns_resource(credentials: GoogleCredentials) -> Resource:
|
|
138
|
-
"""
|
|
139
|
-
Instantiates a Google Cloud DNS resource object to call the
|
|
140
|
-
Container API. See: https://cloud.google.com/dns/docs/reference/v1/.
|
|
141
|
-
|
|
142
|
-
:param credentials: The GoogleCredentials object
|
|
143
|
-
:return: A DNS resource object
|
|
144
|
-
"""
|
|
145
|
-
return googleapiclient.discovery.build(
|
|
146
|
-
"dns",
|
|
147
|
-
"v1",
|
|
148
|
-
http=_authorized_http_with_timeout(credentials),
|
|
149
|
-
cache_discovery=False,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
def _get_serviceusage_resource(credentials: GoogleCredentials) -> Resource:
|
|
154
|
-
"""
|
|
155
|
-
Instantiates a serviceusage resource object.
|
|
156
|
-
See: https://cloud.google.com/service-usage/docs/reference/rest/v1/operations/list.
|
|
157
|
-
|
|
158
|
-
:param credentials: The GoogleCredentials object
|
|
159
|
-
:return: A serviceusage resource object
|
|
160
|
-
"""
|
|
161
|
-
return googleapiclient.discovery.build(
|
|
162
|
-
"serviceusage",
|
|
163
|
-
"v1",
|
|
164
|
-
http=_authorized_http_with_timeout(credentials),
|
|
165
|
-
cache_discovery=False,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def _get_iam_resource(credentials: GoogleCredentials) -> Resource:
|
|
170
|
-
"""
|
|
171
|
-
Instantiates a Google IAM resource object to call the IAM API.
|
|
172
|
-
"""
|
|
173
|
-
return googleapiclient.discovery.build(
|
|
174
|
-
"iam",
|
|
175
|
-
"v1",
|
|
176
|
-
http=_authorized_http_with_timeout(credentials),
|
|
177
|
-
cache_discovery=False,
|
|
178
|
-
)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
def _initialize_resources(credentials: GoogleCredentials) -> Resource:
|
|
182
|
-
"""
|
|
183
|
-
Create namedtuple of all resource objects necessary for GCP data gathering.
|
|
184
|
-
:param credentials: The GoogleCredentials object
|
|
185
|
-
:return: namedtuple of all resource objects
|
|
186
|
-
"""
|
|
187
|
-
return Resources(
|
|
188
|
-
crm_v1=_get_crm_resource_v1(credentials),
|
|
189
|
-
crm_v2=_get_crm_resource_v2(credentials),
|
|
190
|
-
serviceusage=_get_serviceusage_resource(credentials),
|
|
191
|
-
compute=None,
|
|
192
|
-
container=None,
|
|
193
|
-
dns=None,
|
|
194
|
-
storage=None,
|
|
195
|
-
iam=_get_iam_resource(credentials),
|
|
196
|
-
)
|
|
197
|
-
|
|
198
42
|
|
|
199
43
|
def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set:
|
|
200
44
|
"""
|
|
@@ -220,7 +64,7 @@ def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set
|
|
|
220
64
|
previous_response=res,
|
|
221
65
|
)
|
|
222
66
|
return services
|
|
223
|
-
except
|
|
67
|
+
except HttpError as http_error:
|
|
224
68
|
http_error = json.loads(http_error.content.decode("utf-8"))
|
|
225
69
|
# This is set to log-level `info` because Google creates many projects under the hood that cartography cannot
|
|
226
70
|
# audit (e.g. adding a script to a Google spreadsheet causes a project to get created) and we don't need to emit
|
|
@@ -233,318 +77,174 @@ def _services_enabled_on_project(serviceusage: Resource, project_id: str) -> Set
|
|
|
233
77
|
return set()
|
|
234
78
|
|
|
235
79
|
|
|
236
|
-
def
|
|
80
|
+
def _sync_project_resources(
|
|
237
81
|
neo4j_session: neo4j.Session,
|
|
238
|
-
|
|
239
|
-
project_id: str,
|
|
82
|
+
projects: List[Dict],
|
|
240
83
|
gcp_update_tag: int,
|
|
241
84
|
common_job_parameters: Dict,
|
|
242
85
|
) -> None:
|
|
243
86
|
"""
|
|
244
|
-
|
|
87
|
+
Syncs GCP service-specific resources (Compute, Storage, GKE, DNS, IAM) for each project.
|
|
245
88
|
:param neo4j_session: The Neo4j session
|
|
246
|
-
:param
|
|
247
|
-
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
248
|
-
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
89
|
+
:param projects: A list of projects containing at minimum a "projectId" field.
|
|
249
90
|
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
250
91
|
:param common_job_parameters: Other parameters sent to Neo4j
|
|
251
92
|
:return: Nothing
|
|
252
93
|
"""
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
project_id,
|
|
261
|
-
gcp_update_tag,
|
|
262
|
-
common_job_parameters,
|
|
94
|
+
logger.info("Syncing resources for %d GCP projects.", len(projects))
|
|
95
|
+
# Per-project sync across services
|
|
96
|
+
for project in projects:
|
|
97
|
+
project_id = project["projectId"]
|
|
98
|
+
common_job_parameters["PROJECT_ID"] = project_id
|
|
99
|
+
enabled_services = _services_enabled_on_project(
|
|
100
|
+
build_client("serviceusage", "v1"), project_id
|
|
263
101
|
)
|
|
264
102
|
|
|
103
|
+
if service_names.compute in enabled_services:
|
|
104
|
+
logger.info("Syncing GCP project %s for Compute.", project_id)
|
|
105
|
+
compute_cred = build_client("compute", "v1")
|
|
106
|
+
compute.sync(
|
|
107
|
+
neo4j_session,
|
|
108
|
+
compute_cred,
|
|
109
|
+
project_id,
|
|
110
|
+
gcp_update_tag,
|
|
111
|
+
common_job_parameters,
|
|
112
|
+
)
|
|
265
113
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
:param resources: namedtuple of the GCP resource objects
|
|
277
|
-
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
278
|
-
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
279
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
280
|
-
:param common_job_parameters: Other parameters sent to Neo4j
|
|
281
|
-
:return: Nothing
|
|
282
|
-
"""
|
|
283
|
-
# Determine the resources available on the project.
|
|
284
|
-
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
285
|
-
storage_cred = _get_storage_resource(get_gcp_credentials())
|
|
286
|
-
if service_names.storage in enabled_services:
|
|
287
|
-
storage.sync_gcp_buckets(
|
|
288
|
-
neo4j_session,
|
|
289
|
-
storage_cred,
|
|
290
|
-
project_id,
|
|
291
|
-
gcp_update_tag,
|
|
292
|
-
common_job_parameters,
|
|
293
|
-
)
|
|
114
|
+
if service_names.storage in enabled_services:
|
|
115
|
+
logger.info("Syncing GCP project %s for Storage.", project_id)
|
|
116
|
+
storage_cred = build_client("storage", "v1")
|
|
117
|
+
storage.sync_gcp_buckets(
|
|
118
|
+
neo4j_session,
|
|
119
|
+
storage_cred,
|
|
120
|
+
project_id,
|
|
121
|
+
gcp_update_tag,
|
|
122
|
+
common_job_parameters,
|
|
123
|
+
)
|
|
294
124
|
|
|
125
|
+
if service_names.gke in enabled_services:
|
|
126
|
+
logger.info("Syncing GCP project %s for GKE.", project_id)
|
|
127
|
+
container_cred = build_client("container", "v1")
|
|
128
|
+
gke.sync_gke_clusters(
|
|
129
|
+
neo4j_session,
|
|
130
|
+
container_cred,
|
|
131
|
+
project_id,
|
|
132
|
+
gcp_update_tag,
|
|
133
|
+
common_job_parameters,
|
|
134
|
+
)
|
|
295
135
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
:param resources: namedtuple of the GCP resource objects
|
|
307
|
-
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
308
|
-
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
309
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
310
|
-
:param common_job_parameters: Other parameters sent to Neo4j
|
|
311
|
-
:return: Nothing
|
|
312
|
-
"""
|
|
313
|
-
# Determine the resources available on the project.
|
|
314
|
-
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
315
|
-
container_cred = _get_container_resource(get_gcp_credentials())
|
|
316
|
-
if service_names.gke in enabled_services:
|
|
317
|
-
gke.sync_gke_clusters(
|
|
318
|
-
neo4j_session,
|
|
319
|
-
container_cred,
|
|
320
|
-
project_id,
|
|
321
|
-
gcp_update_tag,
|
|
322
|
-
common_job_parameters,
|
|
323
|
-
)
|
|
136
|
+
if service_names.dns in enabled_services:
|
|
137
|
+
logger.info("Syncing GCP project %s for DNS.", project_id)
|
|
138
|
+
dns_cred = build_client("dns", "v1")
|
|
139
|
+
dns.sync(
|
|
140
|
+
neo4j_session,
|
|
141
|
+
dns_cred,
|
|
142
|
+
project_id,
|
|
143
|
+
gcp_update_tag,
|
|
144
|
+
common_job_parameters,
|
|
145
|
+
)
|
|
324
146
|
|
|
147
|
+
if service_names.iam in enabled_services:
|
|
148
|
+
logger.info("Syncing GCP project %s for IAM.", project_id)
|
|
149
|
+
iam_cred = build_client("iam", "v1")
|
|
150
|
+
iam.sync(
|
|
151
|
+
neo4j_session,
|
|
152
|
+
iam_cred,
|
|
153
|
+
project_id,
|
|
154
|
+
gcp_update_tag,
|
|
155
|
+
common_job_parameters,
|
|
156
|
+
)
|
|
325
157
|
|
|
326
|
-
|
|
327
|
-
neo4j_session: neo4j.Session,
|
|
328
|
-
resources: Resource,
|
|
329
|
-
project_id: str,
|
|
330
|
-
gcp_update_tag: int,
|
|
331
|
-
common_job_parameters: Dict,
|
|
332
|
-
) -> None:
|
|
333
|
-
"""
|
|
334
|
-
Handles graph sync for a single GCP project DNS resources.
|
|
335
|
-
:param neo4j_session: The Neo4j session
|
|
336
|
-
:param resources: namedtuple of the GCP resource objects
|
|
337
|
-
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
338
|
-
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
339
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
340
|
-
:param common_job_parameters: Other parameters sent to Neo4j
|
|
341
|
-
:return: Nothing
|
|
342
|
-
"""
|
|
343
|
-
# Determine the resources available on the project.
|
|
344
|
-
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
|
|
345
|
-
dns_cred = _get_dns_resource(get_gcp_credentials())
|
|
346
|
-
if service_names.dns in enabled_services:
|
|
347
|
-
dns.sync(
|
|
348
|
-
neo4j_session,
|
|
349
|
-
dns_cred,
|
|
350
|
-
project_id,
|
|
351
|
-
gcp_update_tag,
|
|
352
|
-
common_job_parameters,
|
|
353
|
-
)
|
|
158
|
+
del common_job_parameters["PROJECT_ID"]
|
|
354
159
|
|
|
355
160
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
resources: Resource,
|
|
359
|
-
project_id: str,
|
|
360
|
-
gcp_update_tag: int,
|
|
361
|
-
common_job_parameters: Dict,
|
|
362
|
-
) -> None:
|
|
161
|
+
@timeit
|
|
162
|
+
def start_gcp_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
363
163
|
"""
|
|
364
|
-
|
|
164
|
+
Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
|
|
165
|
+
resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
|
|
166
|
+
context to all intel modules.
|
|
365
167
|
:param neo4j_session: The Neo4j session
|
|
366
|
-
:param
|
|
367
|
-
:param project_id: The project ID number to sync. See the `projectId` field in
|
|
368
|
-
https://cloud.google.com/resource-manager/reference/rest/v1/projects
|
|
369
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
370
|
-
:param common_job_parameters: Other parameters sent to Neo4j
|
|
168
|
+
:param config: A `cartography.config` object
|
|
371
169
|
:return: Nothing
|
|
372
170
|
"""
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
if service_names.iam in enabled_services:
|
|
377
|
-
iam.sync(
|
|
378
|
-
neo4j_session, iam_cred, project_id, gcp_update_tag, common_job_parameters
|
|
379
|
-
)
|
|
380
|
-
|
|
171
|
+
common_job_parameters = {
|
|
172
|
+
"UPDATE_TAG": config.update_tag,
|
|
173
|
+
}
|
|
381
174
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
This
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
:return: Nothing
|
|
400
|
-
"""
|
|
401
|
-
logger.info("Syncing %d GCP projects.", len(projects))
|
|
402
|
-
crm.sync_gcp_projects(
|
|
403
|
-
neo4j_session,
|
|
404
|
-
projects,
|
|
405
|
-
gcp_update_tag,
|
|
406
|
-
common_job_parameters,
|
|
175
|
+
# IMPORTANT: We defer cleanup for hierarchical resources (orgs, folders, projects) and run them
|
|
176
|
+
# in reverse order. This prevents orphaned nodes when a parent is deleted.
|
|
177
|
+
# Without this, deleting an org would break its relationships to projects/folders, leaving them
|
|
178
|
+
# disconnected and unable to be cleaned up by their own cleanup jobs.
|
|
179
|
+
#
|
|
180
|
+
# Order of operations:
|
|
181
|
+
# 1. Sync all orgs
|
|
182
|
+
# 2. For each org:
|
|
183
|
+
# a. Sync folders and projects
|
|
184
|
+
# b. Sync project resources (with immediate cleanup)
|
|
185
|
+
# c. Clean up projects and folders for this org
|
|
186
|
+
# 3. Clean up all orgs at the end
|
|
187
|
+
#
|
|
188
|
+
# This ensures children are cleaned up before their parents.
|
|
189
|
+
|
|
190
|
+
orgs = sync_gcp_organizations(
|
|
191
|
+
neo4j_session, config.update_tag, common_job_parameters
|
|
407
192
|
)
|
|
408
|
-
# Compute data sync
|
|
409
|
-
for project in projects:
|
|
410
|
-
project_id = project["projectId"]
|
|
411
|
-
common_job_parameters["PROJECT_ID"] = project_id
|
|
412
|
-
logger.info("Syncing GCP project %s for Compute.", project_id)
|
|
413
|
-
_sync_single_project_compute(
|
|
414
|
-
neo4j_session,
|
|
415
|
-
resources,
|
|
416
|
-
project_id,
|
|
417
|
-
gcp_update_tag,
|
|
418
|
-
common_job_parameters,
|
|
419
|
-
)
|
|
420
|
-
del common_job_parameters["PROJECT_ID"]
|
|
421
193
|
|
|
422
|
-
#
|
|
423
|
-
|
|
424
|
-
project_id = project["projectId"]
|
|
425
|
-
common_job_parameters["PROJECT_ID"] = project_id
|
|
426
|
-
logger.info("Syncing GCP project %s for Storage", project_id)
|
|
427
|
-
_sync_single_project_storage(
|
|
428
|
-
neo4j_session,
|
|
429
|
-
resources,
|
|
430
|
-
project_id,
|
|
431
|
-
gcp_update_tag,
|
|
432
|
-
common_job_parameters,
|
|
433
|
-
)
|
|
434
|
-
del common_job_parameters["PROJECT_ID"]
|
|
194
|
+
# Track org cleanup jobs to run at the very end
|
|
195
|
+
org_cleanup_jobs = []
|
|
435
196
|
|
|
436
|
-
#
|
|
437
|
-
for
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
197
|
+
# For each org, sync its folders and projects (as sub-resources), then ingest per-project services
|
|
198
|
+
for org in orgs:
|
|
199
|
+
org_resource_name = org.get("name", "") # e.g., organizations/123456789012
|
|
200
|
+
if not org_resource_name or "/" not in org_resource_name:
|
|
201
|
+
logger.error(f"Invalid org resource name: {org_resource_name}")
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
# Store the full resource name for cleanup operations
|
|
205
|
+
common_job_parameters["ORG_RESOURCE_NAME"] = org_resource_name
|
|
206
|
+
|
|
207
|
+
# Sync folders under org
|
|
208
|
+
folders = sync_gcp_folders(
|
|
442
209
|
neo4j_session,
|
|
443
|
-
|
|
444
|
-
project_id,
|
|
445
|
-
gcp_update_tag,
|
|
210
|
+
config.update_tag,
|
|
446
211
|
common_job_parameters,
|
|
212
|
+
org_resource_name,
|
|
447
213
|
)
|
|
448
|
-
del common_job_parameters["PROJECT_ID"]
|
|
449
214
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
project_id = project["projectId"]
|
|
453
|
-
common_job_parameters["PROJECT_ID"] = project_id
|
|
454
|
-
logger.info("Syncing GCP project %s for DNS", project_id)
|
|
455
|
-
_sync_single_project_dns(
|
|
215
|
+
# Sync projects under org and each folder
|
|
216
|
+
projects = sync_gcp_projects(
|
|
456
217
|
neo4j_session,
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
218
|
+
org_resource_name,
|
|
219
|
+
folders,
|
|
220
|
+
config.update_tag,
|
|
460
221
|
common_job_parameters,
|
|
461
222
|
)
|
|
462
|
-
del common_job_parameters["PROJECT_ID"]
|
|
463
223
|
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
common_job_parameters["PROJECT_ID"] = project_id
|
|
468
|
-
logger.info("Syncing GCP project %s for IAM", project_id)
|
|
469
|
-
_sync_single_project_iam(
|
|
470
|
-
neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters
|
|
224
|
+
# Ingest per-project resources (these run their own cleanup immediately since they're leaf nodes)
|
|
225
|
+
_sync_project_resources(
|
|
226
|
+
neo4j_session, projects, config.update_tag, common_job_parameters
|
|
471
227
|
)
|
|
472
|
-
del common_job_parameters["PROJECT_ID"]
|
|
473
228
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
Gets access tokens for GCP API access.
|
|
479
|
-
:param: None
|
|
480
|
-
:return: GoogleCredentials
|
|
481
|
-
"""
|
|
482
|
-
try:
|
|
483
|
-
# Explicitly use Application Default Credentials.
|
|
484
|
-
# See https://google-auth.readthedocs.io/en/master/user-guide.html#application-default-credentials
|
|
485
|
-
credentials, project_id = default()
|
|
486
|
-
return credentials
|
|
487
|
-
except DefaultCredentialsError as e:
|
|
488
|
-
logger.debug(
|
|
489
|
-
"Error occurred calling GoogleCredentials.get_application_default().",
|
|
490
|
-
exc_info=True,
|
|
229
|
+
# Clean up projects and folders for this org (children before parents)
|
|
230
|
+
logger.debug(f"Running cleanup for projects and folders in {org_resource_name}")
|
|
231
|
+
GraphJob.from_node_schema(GCPProjectSchema(), common_job_parameters).run(
|
|
232
|
+
neo4j_session
|
|
491
233
|
)
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
"Unable to initialize Google Compute Platform creds. If you don't have GCP data or don't want to load "
|
|
495
|
-
"GCP data then you can ignore this message. Otherwise, the error code is: %s "
|
|
496
|
-
"Make sure your GCP credentials are configured correctly, your credentials file (if any) is valid, and "
|
|
497
|
-
"that the identity you are authenticating to has the securityReviewer role attached."
|
|
498
|
-
),
|
|
499
|
-
e,
|
|
234
|
+
GraphJob.from_node_schema(GCPFolderSchema(), common_job_parameters).run(
|
|
235
|
+
neo4j_session
|
|
500
236
|
)
|
|
501
|
-
return None
|
|
502
237
|
|
|
238
|
+
# Save org cleanup job for later
|
|
239
|
+
org_cleanup_jobs.append((GCPOrganizationSchema, dict(common_job_parameters)))
|
|
503
240
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
"""
|
|
507
|
-
Starts the GCP ingestion process by initializing Google Application Default Credentials, creating the necessary
|
|
508
|
-
resource objects, listing all GCP organizations and projects available to the GCP identity, and supplying that
|
|
509
|
-
context to all intel modules.
|
|
510
|
-
:param neo4j_session: The Neo4j session
|
|
511
|
-
:param config: A `cartography.config` object
|
|
512
|
-
:return: Nothing
|
|
513
|
-
"""
|
|
514
|
-
common_job_parameters = {
|
|
515
|
-
"UPDATE_TAG": config.update_tag,
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
credentials = get_gcp_credentials()
|
|
519
|
-
if credentials is None:
|
|
520
|
-
logger.warning("Unable to initialize GCP credentials. Skipping module.")
|
|
521
|
-
return
|
|
241
|
+
# Remove org ID from common job parameters after processing
|
|
242
|
+
del common_job_parameters["ORG_RESOURCE_NAME"]
|
|
522
243
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
neo4j_session,
|
|
528
|
-
resources.crm_v1,
|
|
529
|
-
config.update_tag,
|
|
530
|
-
common_job_parameters,
|
|
531
|
-
)
|
|
532
|
-
crm.sync_gcp_folders(
|
|
533
|
-
neo4j_session,
|
|
534
|
-
resources.crm_v2,
|
|
535
|
-
config.update_tag,
|
|
536
|
-
common_job_parameters,
|
|
537
|
-
)
|
|
538
|
-
|
|
539
|
-
projects = crm.get_gcp_projects(resources.crm_v1)
|
|
540
|
-
|
|
541
|
-
_sync_multiple_projects(
|
|
542
|
-
neo4j_session,
|
|
543
|
-
resources,
|
|
544
|
-
projects,
|
|
545
|
-
config.update_tag,
|
|
546
|
-
common_job_parameters,
|
|
547
|
-
)
|
|
244
|
+
# Run all org cleanup jobs at the very end, after all children have been cleaned up
|
|
245
|
+
logger.info("Running cleanup for GCP organizations")
|
|
246
|
+
for schema_class, params in org_cleanup_jobs:
|
|
247
|
+
GraphJob.from_node_schema(schema_class(), params).run(neo4j_session)
|
|
548
248
|
|
|
549
249
|
run_analysis_job(
|
|
550
250
|
"gcp_compute_asset_inet_exposure.json",
|