cartography 0.118.0__py3-none-any.whl → 0.119.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +20 -0
- cartography/client/core/tx.py +19 -3
- cartography/config.py +9 -0
- cartography/data/indexes.cypher +0 -6
- cartography/graph/job.py +7 -5
- cartography/intel/aws/__init__.py +21 -9
- cartography/intel/aws/ecr.py +7 -0
- cartography/intel/aws/ecr_image_layers.py +143 -42
- cartography/intel/aws/inspector.py +65 -33
- cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
- cartography/intel/gcp/compute.py +3 -3
- cartography/intel/github/repos.py +23 -5
- cartography/intel/gsuite/__init__.py +12 -8
- cartography/intel/gsuite/groups.py +291 -0
- cartography/intel/gsuite/users.py +142 -0
- cartography/intel/okta/awssaml.py +1 -1
- cartography/intel/okta/users.py +1 -1
- cartography/intel/ontology/__init__.py +44 -0
- cartography/intel/ontology/devices.py +54 -0
- cartography/intel/ontology/users.py +54 -0
- cartography/intel/ontology/utils.py +121 -0
- cartography/models/airbyte/user.py +4 -0
- cartography/models/anthropic/user.py +4 -0
- cartography/models/aws/ecr/image.py +47 -0
- cartography/models/aws/iam/group_membership.py +3 -2
- cartography/models/aws/identitycenter/awsssouser.py +3 -1
- cartography/models/bigfix/bigfix_computer.py +1 -1
- cartography/models/cloudflare/member.py +4 -0
- cartography/models/crowdstrike/hosts.py +1 -1
- cartography/models/duo/endpoint.py +1 -1
- cartography/models/duo/phone.py +2 -2
- cartography/models/duo/user.py +4 -0
- cartography/models/entra/user.py +2 -1
- cartography/models/github/users.py +4 -0
- cartography/models/gsuite/__init__.py +0 -0
- cartography/models/gsuite/group.py +218 -0
- cartography/models/gsuite/tenant.py +29 -0
- cartography/models/gsuite/user.py +107 -0
- cartography/models/kandji/device.py +1 -2
- cartography/models/keycloak/user.py +4 -0
- cartography/models/lastpass/user.py +4 -0
- cartography/models/ontology/__init__.py +0 -0
- cartography/models/ontology/device.py +125 -0
- cartography/models/ontology/mapping/__init__.py +16 -0
- cartography/models/ontology/mapping/data/__init__.py +1 -0
- cartography/models/ontology/mapping/data/devices.py +160 -0
- cartography/models/ontology/mapping/data/users.py +239 -0
- cartography/models/ontology/mapping/specs.py +65 -0
- cartography/models/ontology/user.py +52 -0
- cartography/models/openai/user.py +4 -0
- cartography/models/scaleway/iam/user.py +4 -0
- cartography/models/snipeit/asset.py +1 -0
- cartography/models/snipeit/user.py +4 -0
- cartography/models/tailscale/device.py +1 -1
- cartography/models/tailscale/user.py +6 -1
- cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
- cartography/sync.py +3 -0
- cartography/util.py +44 -17
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/METADATA +1 -1
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/RECORD +65 -50
- cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
- cartography/intel/gsuite/api.py +0 -355
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,7 @@ from typing import Set
|
|
|
7
7
|
from typing import Tuple
|
|
8
8
|
|
|
9
9
|
import boto3
|
|
10
|
+
import botocore
|
|
10
11
|
import neo4j
|
|
11
12
|
|
|
12
13
|
from cartography.client.core.tx import load
|
|
@@ -17,7 +18,9 @@ from cartography.models.aws.inspector.findings import InspectorFindingToPackageM
|
|
|
17
18
|
from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
|
|
18
19
|
from cartography.util import aws_handle_regions
|
|
19
20
|
from cartography.util import aws_paginate
|
|
21
|
+
from cartography.util import AWS_REGION_ACCESS_DENIED_ERROR_CODES
|
|
20
22
|
from cartography.util import batch
|
|
23
|
+
from cartography.util import is_service_control_policy_explicit_deny
|
|
21
24
|
from cartography.util import timeit
|
|
22
25
|
|
|
23
26
|
logger = logging.getLogger(__name__)
|
|
@@ -70,7 +73,6 @@ def get_member_accounts(
|
|
|
70
73
|
|
|
71
74
|
|
|
72
75
|
@timeit
|
|
73
|
-
@aws_handle_regions
|
|
74
76
|
def get_inspector_findings(
|
|
75
77
|
session: boto3.session.Session,
|
|
76
78
|
region: str,
|
|
@@ -83,6 +85,10 @@ def get_inspector_findings(
|
|
|
83
85
|
only fetch those in ACTIVE or SUPPRESSED statuses.
|
|
84
86
|
Run the query in batches and return an iterator to fetch the results.
|
|
85
87
|
"""
|
|
88
|
+
# Note: We can't use @aws_handle_regions decorator here because this function returns a generator.
|
|
89
|
+
# The decorator would only catch exceptions during function call, not during iteration.
|
|
90
|
+
# Instead, we rely on aws_handle_regions being applied at get_member_accounts level,
|
|
91
|
+
# and the paginate operation itself will raise errors that bubble up naturally.
|
|
86
92
|
client = session.client("inspector2", region_name=region)
|
|
87
93
|
logger.info(
|
|
88
94
|
f"Getting findings in batches of {batch_size} for account {account_id} in region {region}"
|
|
@@ -308,38 +314,64 @@ def _sync_findings_for_account(
|
|
|
308
314
|
"""
|
|
309
315
|
Syncs the findings for a given account in a given region.
|
|
310
316
|
"""
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
317
|
+
try:
|
|
318
|
+
findings = get_inspector_findings(boto3_session, region, account_id, batch_size)
|
|
319
|
+
if not findings:
|
|
320
|
+
logger.info(
|
|
321
|
+
f"No findings to sync for account {account_id} in region {region}"
|
|
322
|
+
)
|
|
323
|
+
return
|
|
324
|
+
for f_batch in findings:
|
|
325
|
+
finding_data, package_data, finding_to_package_map = (
|
|
326
|
+
transform_inspector_findings(f_batch)
|
|
327
|
+
)
|
|
328
|
+
logger.info(
|
|
329
|
+
f"Loading {len(finding_data)} findings from account {account_id}"
|
|
330
|
+
)
|
|
331
|
+
load_inspector_findings(
|
|
332
|
+
neo4j_session,
|
|
333
|
+
finding_data,
|
|
334
|
+
region,
|
|
335
|
+
update_tag,
|
|
336
|
+
current_aws_account_id,
|
|
337
|
+
)
|
|
338
|
+
logger.info(f"Loading {len(package_data)} packages")
|
|
339
|
+
load_inspector_packages(
|
|
340
|
+
neo4j_session,
|
|
341
|
+
package_data,
|
|
342
|
+
update_tag,
|
|
343
|
+
current_aws_account_id,
|
|
344
|
+
)
|
|
345
|
+
logger.info(
|
|
346
|
+
f"Loading {len(finding_to_package_map)} finding to package relationships"
|
|
347
|
+
)
|
|
348
|
+
load_inspector_finding_to_package_match_links(
|
|
349
|
+
neo4j_session,
|
|
350
|
+
finding_to_package_map,
|
|
351
|
+
update_tag,
|
|
352
|
+
current_aws_account_id,
|
|
353
|
+
)
|
|
354
|
+
except botocore.exceptions.ClientError as e:
|
|
355
|
+
error_code = e.response.get("Error", {}).get("Code")
|
|
356
|
+
# Handle the same error codes as aws_handle_regions decorator
|
|
357
|
+
if error_code in AWS_REGION_ACCESS_DENIED_ERROR_CODES:
|
|
358
|
+
error_message = e.response.get("Error", {}).get("Message")
|
|
359
|
+
if is_service_control_policy_explicit_deny(e):
|
|
360
|
+
logger.warning(
|
|
361
|
+
"Service control policy denied access to Inspector findings for account %s in region %s: %s",
|
|
362
|
+
account_id,
|
|
363
|
+
region,
|
|
364
|
+
error_message,
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
logger.warning(
|
|
368
|
+
"Access denied to Inspector findings for account %s in region %s. Skipping...",
|
|
369
|
+
account_id,
|
|
370
|
+
region,
|
|
371
|
+
)
|
|
372
|
+
return
|
|
373
|
+
else:
|
|
374
|
+
raise
|
|
343
375
|
|
|
344
376
|
|
|
345
377
|
@timeit
|
cartography/intel/gcp/compute.py
CHANGED
|
@@ -1032,7 +1032,7 @@ def _load_gcp_ingress_firewalls_tx(
|
|
|
1032
1032
|
VpcPartialUri=fw["vpc_partial_uri"],
|
|
1033
1033
|
HasTargetServiceAccounts=fw["has_target_service_accounts"],
|
|
1034
1034
|
gcp_update_tag=gcp_update_tag,
|
|
1035
|
-
)
|
|
1035
|
+
).consume()
|
|
1036
1036
|
_attach_firewall_rules(tx, fw, gcp_update_tag)
|
|
1037
1037
|
_attach_target_tags(tx, fw, gcp_update_tag)
|
|
1038
1038
|
|
|
@@ -1095,7 +1095,7 @@ def _attach_firewall_rules(
|
|
|
1095
1095
|
ToPort=rule.get("toport"),
|
|
1096
1096
|
Range=ip_range,
|
|
1097
1097
|
gcp_update_tag=gcp_update_tag,
|
|
1098
|
-
)
|
|
1098
|
+
).consume()
|
|
1099
1099
|
|
|
1100
1100
|
|
|
1101
1101
|
@timeit
|
|
@@ -1132,7 +1132,7 @@ def _attach_target_tags(
|
|
|
1132
1132
|
TagId=tag_id,
|
|
1133
1133
|
TagValue=tag,
|
|
1134
1134
|
gcp_update_tag=gcp_update_tag,
|
|
1135
|
-
)
|
|
1135
|
+
).consume()
|
|
1136
1136
|
|
|
1137
1137
|
|
|
1138
1138
|
@timeit
|
|
@@ -4,6 +4,7 @@ from collections import defaultdict
|
|
|
4
4
|
from collections import namedtuple
|
|
5
5
|
from string import Template
|
|
6
6
|
from typing import Any
|
|
7
|
+
from typing import cast
|
|
7
8
|
from typing import Dict
|
|
8
9
|
from typing import List
|
|
9
10
|
from typing import Optional
|
|
@@ -157,12 +158,19 @@ def _get_repo_collaborators_inner_func(
|
|
|
157
158
|
org: str,
|
|
158
159
|
api_url: str,
|
|
159
160
|
token: str,
|
|
160
|
-
repo_raw_data: list[dict[str, Any]],
|
|
161
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
161
162
|
affiliation: str,
|
|
162
163
|
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
163
164
|
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
164
165
|
|
|
165
166
|
for repo in repo_raw_data:
|
|
167
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
168
|
+
if repo is None:
|
|
169
|
+
logger.info(
|
|
170
|
+
"Skipping null repository entry while fetching %s collaborators.",
|
|
171
|
+
affiliation,
|
|
172
|
+
)
|
|
173
|
+
continue
|
|
166
174
|
repo_name = repo["name"]
|
|
167
175
|
repo_url = repo["url"]
|
|
168
176
|
|
|
@@ -212,7 +220,7 @@ def _get_repo_collaborators_inner_func(
|
|
|
212
220
|
|
|
213
221
|
|
|
214
222
|
def _get_repo_collaborators_for_multiple_repos(
|
|
215
|
-
repo_raw_data: list[dict[str, Any]],
|
|
223
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
216
224
|
affiliation: str,
|
|
217
225
|
org: str,
|
|
218
226
|
api_url: str,
|
|
@@ -279,7 +287,7 @@ def _get_repo_collaborators(
|
|
|
279
287
|
|
|
280
288
|
|
|
281
289
|
@timeit
|
|
282
|
-
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
290
|
+
def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
|
|
283
291
|
"""
|
|
284
292
|
Retrieve a list of repos from a Github organization as described in
|
|
285
293
|
https://docs.github.com/en/graphql/reference/objects#repository.
|
|
@@ -287,6 +295,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
287
295
|
:param api_url: The Github v4 API endpoint as string.
|
|
288
296
|
:param organization: The name of the target Github organization as string.
|
|
289
297
|
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
298
|
+
Note: The list may contain None entries per GraphQL spec when resolvers error
|
|
299
|
+
(permissions, rate limits, transient issues). See issues #1334 and #1404.
|
|
290
300
|
"""
|
|
291
301
|
# TODO: link the Github organization to the repositories
|
|
292
302
|
repos, _ = fetch_all(
|
|
@@ -297,11 +307,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
297
307
|
"repositories",
|
|
298
308
|
count=50,
|
|
299
309
|
)
|
|
300
|
-
|
|
310
|
+
# Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
|
|
311
|
+
# per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
|
|
312
|
+
# See https://github.com/cartography-cncf/cartography/issues/1334
|
|
313
|
+
# and https://github.com/cartography-cncf/cartography/issues/1404
|
|
314
|
+
return cast(List[Optional[Dict]], repos.nodes)
|
|
301
315
|
|
|
302
316
|
|
|
303
317
|
def transform(
|
|
304
|
-
repos_json: List[Dict],
|
|
318
|
+
repos_json: List[Optional[Dict]],
|
|
305
319
|
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
306
320
|
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
307
321
|
) -> Dict:
|
|
@@ -340,6 +354,10 @@ def transform(
|
|
|
340
354
|
transformed_dependencies: List[Dict] = []
|
|
341
355
|
transformed_manifests: List[Dict] = []
|
|
342
356
|
for repo_object in repos_json:
|
|
357
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
358
|
+
if repo_object is None:
|
|
359
|
+
logger.debug("Skipping null repository entry during transformation.")
|
|
360
|
+
continue
|
|
343
361
|
_transform_repo_languages(
|
|
344
362
|
repo_object["url"],
|
|
345
363
|
repo_object,
|
|
@@ -16,7 +16,8 @@ from google.oauth2.service_account import Credentials as ServiceAccountCredentia
|
|
|
16
16
|
from googleapiclient.discovery import Resource
|
|
17
17
|
|
|
18
18
|
from cartography.config import Config
|
|
19
|
-
from cartography.intel.gsuite import
|
|
19
|
+
from cartography.intel.gsuite import groups
|
|
20
|
+
from cartography.intel.gsuite import users
|
|
20
21
|
from cartography.util import timeit
|
|
21
22
|
|
|
22
23
|
OAUTH_SCOPES = [
|
|
@@ -148,15 +149,18 @@ def start_gsuite_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
|
|
|
148
149
|
return
|
|
149
150
|
|
|
150
151
|
resources = _initialize_resources(creds)
|
|
151
|
-
|
|
152
|
-
neo4j_session,
|
|
153
|
-
resources.admin,
|
|
154
|
-
config.update_tag,
|
|
155
|
-
common_job_parameters,
|
|
156
|
-
)
|
|
157
|
-
api.sync_gsuite_groups(
|
|
152
|
+
customer_ids = users.sync_gsuite_users(
|
|
158
153
|
neo4j_session,
|
|
159
154
|
resources.admin,
|
|
160
155
|
config.update_tag,
|
|
161
156
|
common_job_parameters,
|
|
162
157
|
)
|
|
158
|
+
for customer_id in customer_ids:
|
|
159
|
+
scoped_job_parameters = common_job_parameters.copy()
|
|
160
|
+
scoped_job_parameters["CUSTOMER_ID"] = customer_id
|
|
161
|
+
groups.sync_gsuite_groups(
|
|
162
|
+
neo4j_session,
|
|
163
|
+
resources.admin,
|
|
164
|
+
config.update_tag,
|
|
165
|
+
scoped_job_parameters,
|
|
166
|
+
)
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import neo4j
|
|
5
|
+
from googleapiclient.discovery import Resource
|
|
6
|
+
from googleapiclient.errors import HttpError
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.client.core.tx import load_matchlinks
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.models.gsuite.group import GSuiteGroupSchema
|
|
12
|
+
from cartography.models.gsuite.group import GSuiteGroupToGroupMemberRel
|
|
13
|
+
from cartography.models.gsuite.group import GSuiteGroupToGroupOwnerRel
|
|
14
|
+
from cartography.models.gsuite.tenant import GSuiteTenantSchema
|
|
15
|
+
from cartography.util import timeit
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
GOOGLE_API_NUM_RETRIES = 5
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@timeit
|
|
23
|
+
def get_all_groups(
|
|
24
|
+
admin: Resource, customer_id: str = "my_customer"
|
|
25
|
+
) -> list[dict[str, Any]]:
|
|
26
|
+
"""
|
|
27
|
+
Return list of Google Groups in your organization
|
|
28
|
+
Returns empty list if we are unable to enumerate the groups for any reasons
|
|
29
|
+
|
|
30
|
+
googleapiclient.discovery.build('admin', 'directory_v1', credentials=credentials, cache_discovery=False)
|
|
31
|
+
|
|
32
|
+
:param admin: google's apiclient discovery resource object. From googleapiclient.discovery.build
|
|
33
|
+
See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
|
|
34
|
+
:return: list of Google groups in domain
|
|
35
|
+
"""
|
|
36
|
+
request = admin.groups().list(
|
|
37
|
+
customer=customer_id,
|
|
38
|
+
maxResults=20,
|
|
39
|
+
orderBy="email",
|
|
40
|
+
)
|
|
41
|
+
response_objects = []
|
|
42
|
+
while request is not None:
|
|
43
|
+
try:
|
|
44
|
+
resp = request.execute(num_retries=GOOGLE_API_NUM_RETRIES)
|
|
45
|
+
response_objects.extend(resp.get("groups", []))
|
|
46
|
+
request = admin.groups().list_next(request, resp)
|
|
47
|
+
except HttpError as e:
|
|
48
|
+
if (
|
|
49
|
+
e.resp.status == 403
|
|
50
|
+
and "Request had insufficient authentication scopes" in str(e)
|
|
51
|
+
):
|
|
52
|
+
logger.error(
|
|
53
|
+
"Missing required GSuite scopes. If using the gcloud CLI, "
|
|
54
|
+
"run: gcloud auth application-default login --scopes="
|
|
55
|
+
'"https://www.googleapis.com/auth/admin.directory.user.readonly,'
|
|
56
|
+
"https://www.googleapis.com/auth/admin.directory.group.readonly,"
|
|
57
|
+
"https://www.googleapis.com/auth/admin.directory.group.member.readonly,"
|
|
58
|
+
'https://www.googleapis.com/auth/cloud-platform"'
|
|
59
|
+
)
|
|
60
|
+
raise
|
|
61
|
+
return response_objects
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@timeit
|
|
65
|
+
def get_members_for_groups(
|
|
66
|
+
admin: Resource, groups_email: list[str]
|
|
67
|
+
) -> dict[str, list[dict[str, Any]]]:
|
|
68
|
+
"""Get all members for given groups emails
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
admin (Resource): google's apiclient discovery resource object. From googleapiclient.discovery.build
|
|
72
|
+
See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
|
|
73
|
+
groups_email (list[str]): List of group email addresses to get members for
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
:return: list of dictionaries representing Users or Groups grouped by group email
|
|
77
|
+
"""
|
|
78
|
+
results: dict[str, list[dict]] = {}
|
|
79
|
+
for group_email in groups_email:
|
|
80
|
+
request = admin.members().list(
|
|
81
|
+
groupKey=group_email,
|
|
82
|
+
maxResults=500,
|
|
83
|
+
)
|
|
84
|
+
members: list[dict] = []
|
|
85
|
+
while request is not None:
|
|
86
|
+
resp = request.execute(num_retries=GOOGLE_API_NUM_RETRIES)
|
|
87
|
+
members = members + resp.get("members", [])
|
|
88
|
+
request = admin.members().list_next(request, resp)
|
|
89
|
+
results[group_email] = members
|
|
90
|
+
|
|
91
|
+
return results
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@timeit
|
|
95
|
+
def transform_groups(
|
|
96
|
+
groups: list[dict], group_memberships: dict[str, list[dict[str, Any]]]
|
|
97
|
+
) -> tuple[list[dict], list[dict], list[dict]]:
|
|
98
|
+
"""Strips list of API response objects to return list of group objects only and lists of subgroup relationships
|
|
99
|
+
|
|
100
|
+
:param groups: Raw groups from Google API
|
|
101
|
+
:param group_memberships: Group memberships data
|
|
102
|
+
:return: tuple of (groups, group_member_relationships, group_owner_relationships)
|
|
103
|
+
"""
|
|
104
|
+
transformed_groups: list[dict] = []
|
|
105
|
+
group_member_relationships: list[dict] = []
|
|
106
|
+
group_owner_relationships: list[dict] = []
|
|
107
|
+
|
|
108
|
+
for group in groups:
|
|
109
|
+
group_id = group["id"]
|
|
110
|
+
group_email = group["email"]
|
|
111
|
+
group["member_ids"] = []
|
|
112
|
+
group["owner_ids"] = []
|
|
113
|
+
|
|
114
|
+
for member in group_memberships.get(group_email, []):
|
|
115
|
+
if member["type"] == "GROUP":
|
|
116
|
+
# Create group-to-group relationships
|
|
117
|
+
relationship_data = {
|
|
118
|
+
"parent_group_id": group_id,
|
|
119
|
+
"subgroup_id": member.get("id"),
|
|
120
|
+
"role": member.get("role"),
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if member.get("role") == "OWNER":
|
|
124
|
+
group_owner_relationships.append(relationship_data)
|
|
125
|
+
else:
|
|
126
|
+
group_member_relationships.append(relationship_data)
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Handle user memberships
|
|
130
|
+
if member.get("role") == "OWNER":
|
|
131
|
+
group["owner_ids"].append(member.get("id"))
|
|
132
|
+
group["member_ids"].append(member.get("id"))
|
|
133
|
+
|
|
134
|
+
transformed_groups.append(group)
|
|
135
|
+
|
|
136
|
+
return transformed_groups, group_member_relationships, group_owner_relationships
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@timeit
|
|
140
|
+
def load_gsuite_groups(
|
|
141
|
+
neo4j_session: neo4j.Session,
|
|
142
|
+
groups: list[dict],
|
|
143
|
+
customer_id: str,
|
|
144
|
+
gsuite_update_tag: int,
|
|
145
|
+
) -> None:
|
|
146
|
+
"""
|
|
147
|
+
Load GSuite groups using the modern data model
|
|
148
|
+
"""
|
|
149
|
+
logger.info("Ingesting %d gsuite groups", len(groups))
|
|
150
|
+
|
|
151
|
+
# Load tenant first if it doesn't exist
|
|
152
|
+
tenant_data = [{"id": customer_id}]
|
|
153
|
+
load(
|
|
154
|
+
neo4j_session,
|
|
155
|
+
GSuiteTenantSchema(),
|
|
156
|
+
tenant_data,
|
|
157
|
+
lastupdated=gsuite_update_tag,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Load groups with relationship to tenant
|
|
161
|
+
load(
|
|
162
|
+
neo4j_session,
|
|
163
|
+
GSuiteGroupSchema(),
|
|
164
|
+
groups,
|
|
165
|
+
lastupdated=gsuite_update_tag,
|
|
166
|
+
CUSTOMER_ID=customer_id,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@timeit
|
|
171
|
+
def load_gsuite_group_to_group_relationships(
|
|
172
|
+
neo4j_session: neo4j.Session,
|
|
173
|
+
group_member_relationships: list[dict],
|
|
174
|
+
group_owner_relationships: list[dict],
|
|
175
|
+
customer_id: str,
|
|
176
|
+
gsuite_update_tag: int,
|
|
177
|
+
) -> None:
|
|
178
|
+
"""
|
|
179
|
+
Load GSuite group-to-group relationships using MatchLinks
|
|
180
|
+
"""
|
|
181
|
+
logger.info(
|
|
182
|
+
"Ingesting %d group member relationships", len(group_member_relationships)
|
|
183
|
+
)
|
|
184
|
+
logger.info(
|
|
185
|
+
"Ingesting %d group owner relationships", len(group_owner_relationships)
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Load group member relationships (Group -> Group MEMBER)
|
|
189
|
+
if group_member_relationships:
|
|
190
|
+
load_matchlinks(
|
|
191
|
+
neo4j_session,
|
|
192
|
+
GSuiteGroupToGroupMemberRel(),
|
|
193
|
+
group_member_relationships,
|
|
194
|
+
lastupdated=gsuite_update_tag,
|
|
195
|
+
_sub_resource_label="GSuiteTenant",
|
|
196
|
+
_sub_resource_id=customer_id,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Load group owner relationships (Group -> Group OWNER)
|
|
200
|
+
if group_owner_relationships:
|
|
201
|
+
load_matchlinks(
|
|
202
|
+
neo4j_session,
|
|
203
|
+
GSuiteGroupToGroupOwnerRel(),
|
|
204
|
+
group_owner_relationships,
|
|
205
|
+
lastupdated=gsuite_update_tag,
|
|
206
|
+
_sub_resource_label="GSuiteTenant",
|
|
207
|
+
_sub_resource_id=customer_id,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@timeit
|
|
212
|
+
def cleanup_gsuite_groups(
|
|
213
|
+
neo4j_session: neo4j.Session,
|
|
214
|
+
common_job_parameters: dict[str, Any],
|
|
215
|
+
customer_id: str,
|
|
216
|
+
gsuite_update_tag: int,
|
|
217
|
+
) -> None:
|
|
218
|
+
"""
|
|
219
|
+
Clean up GSuite groups and group-to-group relationships using the modern data model
|
|
220
|
+
"""
|
|
221
|
+
logger.debug("Running GSuite groups cleanup job")
|
|
222
|
+
|
|
223
|
+
# Cleanup group nodes
|
|
224
|
+
GraphJob.from_node_schema(GSuiteGroupSchema(), common_job_parameters).run(
|
|
225
|
+
neo4j_session
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Cleanup group-to-group member relationships
|
|
229
|
+
GraphJob.from_matchlink(
|
|
230
|
+
GSuiteGroupToGroupMemberRel(),
|
|
231
|
+
"GSuiteTenant",
|
|
232
|
+
customer_id,
|
|
233
|
+
gsuite_update_tag,
|
|
234
|
+
).run(neo4j_session)
|
|
235
|
+
|
|
236
|
+
# Cleanup group-to-group owner relationships
|
|
237
|
+
GraphJob.from_matchlink(
|
|
238
|
+
GSuiteGroupToGroupOwnerRel(),
|
|
239
|
+
"GSuiteTenant",
|
|
240
|
+
customer_id,
|
|
241
|
+
gsuite_update_tag,
|
|
242
|
+
).run(neo4j_session)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
@timeit
|
|
246
|
+
def sync_gsuite_groups(
|
|
247
|
+
neo4j_session: neo4j.Session,
|
|
248
|
+
admin: Resource,
|
|
249
|
+
gsuite_update_tag: int,
|
|
250
|
+
common_job_parameters: dict[str, Any],
|
|
251
|
+
) -> None:
|
|
252
|
+
"""
|
|
253
|
+
GET GSuite group objects using the google admin api resource, load the data into Neo4j and clean up stale nodes.
|
|
254
|
+
|
|
255
|
+
:param neo4j_session: The Neo4j session
|
|
256
|
+
:param admin: Google admin resource object created by `googleapiclient.discovery.build()`.
|
|
257
|
+
See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
|
|
258
|
+
:param gsuite_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
259
|
+
:param common_job_parameters: Parameters to carry to the Neo4j jobs
|
|
260
|
+
:return: Nothing
|
|
261
|
+
"""
|
|
262
|
+
logger.debug("Syncing GSuite Groups")
|
|
263
|
+
|
|
264
|
+
customer_id = common_job_parameters.get(
|
|
265
|
+
"CUSTOMER_ID", "my_customer"
|
|
266
|
+
) # Default to "my_customer" for backward compatibility
|
|
267
|
+
|
|
268
|
+
# 1. GET - Fetch data from API
|
|
269
|
+
resp_objs = get_all_groups(admin, customer_id)
|
|
270
|
+
group_members = get_members_for_groups(admin, [resp["email"] for resp in resp_objs])
|
|
271
|
+
|
|
272
|
+
# 2. TRANSFORM - Shape data for ingestion
|
|
273
|
+
groups, group_member_relationships, group_owner_relationships = transform_groups(
|
|
274
|
+
resp_objs, group_members
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# 3. LOAD - Ingest to Neo4j using data model
|
|
278
|
+
load_gsuite_groups(neo4j_session, groups, customer_id, gsuite_update_tag)
|
|
279
|
+
|
|
280
|
+
# Load group-to-group relationships after groups are loaded
|
|
281
|
+
load_gsuite_group_to_group_relationships(
|
|
282
|
+
neo4j_session,
|
|
283
|
+
group_member_relationships,
|
|
284
|
+
group_owner_relationships,
|
|
285
|
+
customer_id,
|
|
286
|
+
gsuite_update_tag,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# 4. CLEANUP - Remove stale data
|
|
290
|
+
cleanup_params = {**common_job_parameters, "CUSTOMER_ID": customer_id}
|
|
291
|
+
cleanup_gsuite_groups(neo4j_session, cleanup_params, customer_id, gsuite_update_tag)
|