cartography 0.118.0__py3-none-any.whl → 0.119.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (68) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +20 -0
  3. cartography/client/core/tx.py +19 -3
  4. cartography/config.py +9 -0
  5. cartography/data/indexes.cypher +0 -6
  6. cartography/graph/job.py +7 -5
  7. cartography/intel/aws/__init__.py +21 -9
  8. cartography/intel/aws/ecr.py +7 -0
  9. cartography/intel/aws/ecr_image_layers.py +143 -42
  10. cartography/intel/aws/inspector.py +65 -33
  11. cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
  12. cartography/intel/gcp/compute.py +3 -3
  13. cartography/intel/github/repos.py +23 -5
  14. cartography/intel/gsuite/__init__.py +12 -8
  15. cartography/intel/gsuite/groups.py +291 -0
  16. cartography/intel/gsuite/users.py +142 -0
  17. cartography/intel/okta/awssaml.py +1 -1
  18. cartography/intel/okta/users.py +1 -1
  19. cartography/intel/ontology/__init__.py +44 -0
  20. cartography/intel/ontology/devices.py +54 -0
  21. cartography/intel/ontology/users.py +54 -0
  22. cartography/intel/ontology/utils.py +121 -0
  23. cartography/models/airbyte/user.py +4 -0
  24. cartography/models/anthropic/user.py +4 -0
  25. cartography/models/aws/ecr/image.py +47 -0
  26. cartography/models/aws/iam/group_membership.py +3 -2
  27. cartography/models/aws/identitycenter/awsssouser.py +3 -1
  28. cartography/models/bigfix/bigfix_computer.py +1 -1
  29. cartography/models/cloudflare/member.py +4 -0
  30. cartography/models/crowdstrike/hosts.py +1 -1
  31. cartography/models/duo/endpoint.py +1 -1
  32. cartography/models/duo/phone.py +2 -2
  33. cartography/models/duo/user.py +4 -0
  34. cartography/models/entra/user.py +2 -1
  35. cartography/models/github/users.py +4 -0
  36. cartography/models/gsuite/__init__.py +0 -0
  37. cartography/models/gsuite/group.py +218 -0
  38. cartography/models/gsuite/tenant.py +29 -0
  39. cartography/models/gsuite/user.py +107 -0
  40. cartography/models/kandji/device.py +1 -2
  41. cartography/models/keycloak/user.py +4 -0
  42. cartography/models/lastpass/user.py +4 -0
  43. cartography/models/ontology/__init__.py +0 -0
  44. cartography/models/ontology/device.py +125 -0
  45. cartography/models/ontology/mapping/__init__.py +16 -0
  46. cartography/models/ontology/mapping/data/__init__.py +1 -0
  47. cartography/models/ontology/mapping/data/devices.py +160 -0
  48. cartography/models/ontology/mapping/data/users.py +239 -0
  49. cartography/models/ontology/mapping/specs.py +65 -0
  50. cartography/models/ontology/user.py +52 -0
  51. cartography/models/openai/user.py +4 -0
  52. cartography/models/scaleway/iam/user.py +4 -0
  53. cartography/models/snipeit/asset.py +1 -0
  54. cartography/models/snipeit/user.py +4 -0
  55. cartography/models/tailscale/device.py +1 -1
  56. cartography/models/tailscale/user.py +6 -1
  57. cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
  58. cartography/sync.py +3 -0
  59. cartography/util.py +44 -17
  60. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/METADATA +1 -1
  61. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/RECORD +65 -50
  62. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  63. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  64. cartography/intel/gsuite/api.py +0 -355
  65. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
  66. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
  67. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
  68. {cartography-0.118.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,7 @@ from typing import Set
7
7
  from typing import Tuple
8
8
 
9
9
  import boto3
10
+ import botocore
10
11
  import neo4j
11
12
 
12
13
  from cartography.client.core.tx import load
@@ -17,7 +18,9 @@ from cartography.models.aws.inspector.findings import InspectorFindingToPackageM
17
18
  from cartography.models.aws.inspector.packages import AWSInspectorPackageSchema
18
19
  from cartography.util import aws_handle_regions
19
20
  from cartography.util import aws_paginate
21
+ from cartography.util import AWS_REGION_ACCESS_DENIED_ERROR_CODES
20
22
  from cartography.util import batch
23
+ from cartography.util import is_service_control_policy_explicit_deny
21
24
  from cartography.util import timeit
22
25
 
23
26
  logger = logging.getLogger(__name__)
@@ -70,7 +73,6 @@ def get_member_accounts(
70
73
 
71
74
 
72
75
  @timeit
73
- @aws_handle_regions
74
76
  def get_inspector_findings(
75
77
  session: boto3.session.Session,
76
78
  region: str,
@@ -83,6 +85,10 @@ def get_inspector_findings(
83
85
  only fetch those in ACTIVE or SUPPRESSED statuses.
84
86
  Run the query in batches and return an iterator to fetch the results.
85
87
  """
88
+ # Note: We can't use @aws_handle_regions decorator here because this function returns a generator.
89
+ # The decorator would only catch exceptions during function call, not during iteration.
90
+ # Instead, we rely on aws_handle_regions being applied at get_member_accounts level,
91
+ # and the paginate operation itself will raise errors that bubble up naturally.
86
92
  client = session.client("inspector2", region_name=region)
87
93
  logger.info(
88
94
  f"Getting findings in batches of {batch_size} for account {account_id} in region {region}"
@@ -308,38 +314,64 @@ def _sync_findings_for_account(
308
314
  """
309
315
  Syncs the findings for a given account in a given region.
310
316
  """
311
- findings = get_inspector_findings(boto3_session, region, account_id, batch_size)
312
- if not findings:
313
- logger.info(f"No findings to sync for account {account_id} in region {region}")
314
- return
315
- for f_batch in findings:
316
- finding_data, package_data, finding_to_package_map = (
317
- transform_inspector_findings(f_batch)
318
- )
319
- logger.info(f"Loading {len(finding_data)} findings from account {account_id}")
320
- load_inspector_findings(
321
- neo4j_session,
322
- finding_data,
323
- region,
324
- update_tag,
325
- current_aws_account_id,
326
- )
327
- logger.info(f"Loading {len(package_data)} packages")
328
- load_inspector_packages(
329
- neo4j_session,
330
- package_data,
331
- update_tag,
332
- current_aws_account_id,
333
- )
334
- logger.info(
335
- f"Loading {len(finding_to_package_map)} finding to package relationships"
336
- )
337
- load_inspector_finding_to_package_match_links(
338
- neo4j_session,
339
- finding_to_package_map,
340
- update_tag,
341
- current_aws_account_id,
342
- )
317
+ try:
318
+ findings = get_inspector_findings(boto3_session, region, account_id, batch_size)
319
+ if not findings:
320
+ logger.info(
321
+ f"No findings to sync for account {account_id} in region {region}"
322
+ )
323
+ return
324
+ for f_batch in findings:
325
+ finding_data, package_data, finding_to_package_map = (
326
+ transform_inspector_findings(f_batch)
327
+ )
328
+ logger.info(
329
+ f"Loading {len(finding_data)} findings from account {account_id}"
330
+ )
331
+ load_inspector_findings(
332
+ neo4j_session,
333
+ finding_data,
334
+ region,
335
+ update_tag,
336
+ current_aws_account_id,
337
+ )
338
+ logger.info(f"Loading {len(package_data)} packages")
339
+ load_inspector_packages(
340
+ neo4j_session,
341
+ package_data,
342
+ update_tag,
343
+ current_aws_account_id,
344
+ )
345
+ logger.info(
346
+ f"Loading {len(finding_to_package_map)} finding to package relationships"
347
+ )
348
+ load_inspector_finding_to_package_match_links(
349
+ neo4j_session,
350
+ finding_to_package_map,
351
+ update_tag,
352
+ current_aws_account_id,
353
+ )
354
+ except botocore.exceptions.ClientError as e:
355
+ error_code = e.response.get("Error", {}).get("Code")
356
+ # Handle the same error codes as aws_handle_regions decorator
357
+ if error_code in AWS_REGION_ACCESS_DENIED_ERROR_CODES:
358
+ error_message = e.response.get("Error", {}).get("Message")
359
+ if is_service_control_policy_explicit_deny(e):
360
+ logger.warning(
361
+ "Service control policy denied access to Inspector findings for account %s in region %s: %s",
362
+ account_id,
363
+ region,
364
+ error_message,
365
+ )
366
+ else:
367
+ logger.warning(
368
+ "Access denied to Inspector findings for account %s in region %s. Skipping...",
369
+ account_id,
370
+ region,
371
+ )
372
+ return
373
+ else:
374
+ raise
343
375
 
344
376
 
345
377
  @timeit
@@ -253,7 +253,7 @@ def _load_tags_tx(
253
253
  UpdateTag=aws_update_tag,
254
254
  Region=region,
255
255
  Account=current_aws_account_id,
256
- )
256
+ ).consume()
257
257
 
258
258
 
259
259
  @timeit
@@ -1032,7 +1032,7 @@ def _load_gcp_ingress_firewalls_tx(
1032
1032
  VpcPartialUri=fw["vpc_partial_uri"],
1033
1033
  HasTargetServiceAccounts=fw["has_target_service_accounts"],
1034
1034
  gcp_update_tag=gcp_update_tag,
1035
- )
1035
+ ).consume()
1036
1036
  _attach_firewall_rules(tx, fw, gcp_update_tag)
1037
1037
  _attach_target_tags(tx, fw, gcp_update_tag)
1038
1038
 
@@ -1095,7 +1095,7 @@ def _attach_firewall_rules(
1095
1095
  ToPort=rule.get("toport"),
1096
1096
  Range=ip_range,
1097
1097
  gcp_update_tag=gcp_update_tag,
1098
- )
1098
+ ).consume()
1099
1099
 
1100
1100
 
1101
1101
  @timeit
@@ -1132,7 +1132,7 @@ def _attach_target_tags(
1132
1132
  TagId=tag_id,
1133
1133
  TagValue=tag,
1134
1134
  gcp_update_tag=gcp_update_tag,
1135
- )
1135
+ ).consume()
1136
1136
 
1137
1137
 
1138
1138
  @timeit
@@ -4,6 +4,7 @@ from collections import defaultdict
4
4
  from collections import namedtuple
5
5
  from string import Template
6
6
  from typing import Any
7
+ from typing import cast
7
8
  from typing import Dict
8
9
  from typing import List
9
10
  from typing import Optional
@@ -157,12 +158,19 @@ def _get_repo_collaborators_inner_func(
157
158
  org: str,
158
159
  api_url: str,
159
160
  token: str,
160
- repo_raw_data: list[dict[str, Any]],
161
+ repo_raw_data: list[dict[str, Any] | None],
161
162
  affiliation: str,
162
163
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
163
164
  result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
164
165
 
165
166
  for repo in repo_raw_data:
167
+ # GitHub can return null repo entries. See issues #1334 and #1404.
168
+ if repo is None:
169
+ logger.info(
170
+ "Skipping null repository entry while fetching %s collaborators.",
171
+ affiliation,
172
+ )
173
+ continue
166
174
  repo_name = repo["name"]
167
175
  repo_url = repo["url"]
168
176
 
@@ -212,7 +220,7 @@ def _get_repo_collaborators_inner_func(
212
220
 
213
221
 
214
222
  def _get_repo_collaborators_for_multiple_repos(
215
- repo_raw_data: list[dict[str, Any]],
223
+ repo_raw_data: list[dict[str, Any] | None],
216
224
  affiliation: str,
217
225
  org: str,
218
226
  api_url: str,
@@ -279,7 +287,7 @@ def _get_repo_collaborators(
279
287
 
280
288
 
281
289
  @timeit
282
- def get(token: str, api_url: str, organization: str) -> List[Dict]:
290
+ def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
283
291
  """
284
292
  Retrieve a list of repos from a Github organization as described in
285
293
  https://docs.github.com/en/graphql/reference/objects#repository.
@@ -287,6 +295,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
287
295
  :param api_url: The Github v4 API endpoint as string.
288
296
  :param organization: The name of the target Github organization as string.
289
297
  :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
298
+ Note: The list may contain None entries per GraphQL spec when resolvers error
299
+ (permissions, rate limits, transient issues). See issues #1334 and #1404.
290
300
  """
291
301
  # TODO: link the Github organization to the repositories
292
302
  repos, _ = fetch_all(
@@ -297,11 +307,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
297
307
  "repositories",
298
308
  count=50,
299
309
  )
300
- return repos.nodes
310
+ # Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
311
+ # per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
312
+ # See https://github.com/cartography-cncf/cartography/issues/1334
313
+ # and https://github.com/cartography-cncf/cartography/issues/1404
314
+ return cast(List[Optional[Dict]], repos.nodes)
301
315
 
302
316
 
303
317
  def transform(
304
- repos_json: List[Dict],
318
+ repos_json: List[Optional[Dict]],
305
319
  direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
306
320
  outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
307
321
  ) -> Dict:
@@ -340,6 +354,10 @@ def transform(
340
354
  transformed_dependencies: List[Dict] = []
341
355
  transformed_manifests: List[Dict] = []
342
356
  for repo_object in repos_json:
357
+ # GitHub can return null repo entries. See issues #1334 and #1404.
358
+ if repo_object is None:
359
+ logger.debug("Skipping null repository entry during transformation.")
360
+ continue
343
361
  _transform_repo_languages(
344
362
  repo_object["url"],
345
363
  repo_object,
@@ -16,7 +16,8 @@ from google.oauth2.service_account import Credentials as ServiceAccountCredentia
16
16
  from googleapiclient.discovery import Resource
17
17
 
18
18
  from cartography.config import Config
19
- from cartography.intel.gsuite import api
19
+ from cartography.intel.gsuite import groups
20
+ from cartography.intel.gsuite import users
20
21
  from cartography.util import timeit
21
22
 
22
23
  OAUTH_SCOPES = [
@@ -148,15 +149,18 @@ def start_gsuite_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
148
149
  return
149
150
 
150
151
  resources = _initialize_resources(creds)
151
- api.sync_gsuite_users(
152
- neo4j_session,
153
- resources.admin,
154
- config.update_tag,
155
- common_job_parameters,
156
- )
157
- api.sync_gsuite_groups(
152
+ customer_ids = users.sync_gsuite_users(
158
153
  neo4j_session,
159
154
  resources.admin,
160
155
  config.update_tag,
161
156
  common_job_parameters,
162
157
  )
158
+ for customer_id in customer_ids:
159
+ scoped_job_parameters = common_job_parameters.copy()
160
+ scoped_job_parameters["CUSTOMER_ID"] = customer_id
161
+ groups.sync_gsuite_groups(
162
+ neo4j_session,
163
+ resources.admin,
164
+ config.update_tag,
165
+ scoped_job_parameters,
166
+ )
@@ -0,0 +1,291 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ import neo4j
5
+ from googleapiclient.discovery import Resource
6
+ from googleapiclient.errors import HttpError
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.client.core.tx import load_matchlinks
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.models.gsuite.group import GSuiteGroupSchema
12
+ from cartography.models.gsuite.group import GSuiteGroupToGroupMemberRel
13
+ from cartography.models.gsuite.group import GSuiteGroupToGroupOwnerRel
14
+ from cartography.models.gsuite.tenant import GSuiteTenantSchema
15
+ from cartography.util import timeit
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ GOOGLE_API_NUM_RETRIES = 5
20
+
21
+
22
+ @timeit
23
+ def get_all_groups(
24
+ admin: Resource, customer_id: str = "my_customer"
25
+ ) -> list[dict[str, Any]]:
26
+ """
27
+ Return list of Google Groups in your organization
28
+ Returns empty list if we are unable to enumerate the groups for any reasons
29
+
30
+ googleapiclient.discovery.build('admin', 'directory_v1', credentials=credentials, cache_discovery=False)
31
+
32
+ :param admin: google's apiclient discovery resource object. From googleapiclient.discovery.build
33
+ See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
34
+ :return: list of Google groups in domain
35
+ """
36
+ request = admin.groups().list(
37
+ customer=customer_id,
38
+ maxResults=20,
39
+ orderBy="email",
40
+ )
41
+ response_objects = []
42
+ while request is not None:
43
+ try:
44
+ resp = request.execute(num_retries=GOOGLE_API_NUM_RETRIES)
45
+ response_objects.extend(resp.get("groups", []))
46
+ request = admin.groups().list_next(request, resp)
47
+ except HttpError as e:
48
+ if (
49
+ e.resp.status == 403
50
+ and "Request had insufficient authentication scopes" in str(e)
51
+ ):
52
+ logger.error(
53
+ "Missing required GSuite scopes. If using the gcloud CLI, "
54
+ "run: gcloud auth application-default login --scopes="
55
+ '"https://www.googleapis.com/auth/admin.directory.user.readonly,'
56
+ "https://www.googleapis.com/auth/admin.directory.group.readonly,"
57
+ "https://www.googleapis.com/auth/admin.directory.group.member.readonly,"
58
+ 'https://www.googleapis.com/auth/cloud-platform"'
59
+ )
60
+ raise
61
+ return response_objects
62
+
63
+
64
+ @timeit
65
+ def get_members_for_groups(
66
+ admin: Resource, groups_email: list[str]
67
+ ) -> dict[str, list[dict[str, Any]]]:
68
+ """Get all members for given groups emails
69
+
70
+ Args:
71
+ admin (Resource): google's apiclient discovery resource object. From googleapiclient.discovery.build
72
+ See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
73
+ groups_email (list[str]): List of group email addresses to get members for
74
+
75
+
76
+ :return: list of dictionaries representing Users or Groups grouped by group email
77
+ """
78
+ results: dict[str, list[dict]] = {}
79
+ for group_email in groups_email:
80
+ request = admin.members().list(
81
+ groupKey=group_email,
82
+ maxResults=500,
83
+ )
84
+ members: list[dict] = []
85
+ while request is not None:
86
+ resp = request.execute(num_retries=GOOGLE_API_NUM_RETRIES)
87
+ members = members + resp.get("members", [])
88
+ request = admin.members().list_next(request, resp)
89
+ results[group_email] = members
90
+
91
+ return results
92
+
93
+
94
+ @timeit
95
+ def transform_groups(
96
+ groups: list[dict], group_memberships: dict[str, list[dict[str, Any]]]
97
+ ) -> tuple[list[dict], list[dict], list[dict]]:
98
+ """Strips list of API response objects to return list of group objects only and lists of subgroup relationships
99
+
100
+ :param groups: Raw groups from Google API
101
+ :param group_memberships: Group memberships data
102
+ :return: tuple of (groups, group_member_relationships, group_owner_relationships)
103
+ """
104
+ transformed_groups: list[dict] = []
105
+ group_member_relationships: list[dict] = []
106
+ group_owner_relationships: list[dict] = []
107
+
108
+ for group in groups:
109
+ group_id = group["id"]
110
+ group_email = group["email"]
111
+ group["member_ids"] = []
112
+ group["owner_ids"] = []
113
+
114
+ for member in group_memberships.get(group_email, []):
115
+ if member["type"] == "GROUP":
116
+ # Create group-to-group relationships
117
+ relationship_data = {
118
+ "parent_group_id": group_id,
119
+ "subgroup_id": member.get("id"),
120
+ "role": member.get("role"),
121
+ }
122
+
123
+ if member.get("role") == "OWNER":
124
+ group_owner_relationships.append(relationship_data)
125
+ else:
126
+ group_member_relationships.append(relationship_data)
127
+ continue
128
+
129
+ # Handle user memberships
130
+ if member.get("role") == "OWNER":
131
+ group["owner_ids"].append(member.get("id"))
132
+ group["member_ids"].append(member.get("id"))
133
+
134
+ transformed_groups.append(group)
135
+
136
+ return transformed_groups, group_member_relationships, group_owner_relationships
137
+
138
+
139
+ @timeit
140
+ def load_gsuite_groups(
141
+ neo4j_session: neo4j.Session,
142
+ groups: list[dict],
143
+ customer_id: str,
144
+ gsuite_update_tag: int,
145
+ ) -> None:
146
+ """
147
+ Load GSuite groups using the modern data model
148
+ """
149
+ logger.info("Ingesting %d gsuite groups", len(groups))
150
+
151
+ # Load tenant first if it doesn't exist
152
+ tenant_data = [{"id": customer_id}]
153
+ load(
154
+ neo4j_session,
155
+ GSuiteTenantSchema(),
156
+ tenant_data,
157
+ lastupdated=gsuite_update_tag,
158
+ )
159
+
160
+ # Load groups with relationship to tenant
161
+ load(
162
+ neo4j_session,
163
+ GSuiteGroupSchema(),
164
+ groups,
165
+ lastupdated=gsuite_update_tag,
166
+ CUSTOMER_ID=customer_id,
167
+ )
168
+
169
+
170
+ @timeit
171
+ def load_gsuite_group_to_group_relationships(
172
+ neo4j_session: neo4j.Session,
173
+ group_member_relationships: list[dict],
174
+ group_owner_relationships: list[dict],
175
+ customer_id: str,
176
+ gsuite_update_tag: int,
177
+ ) -> None:
178
+ """
179
+ Load GSuite group-to-group relationships using MatchLinks
180
+ """
181
+ logger.info(
182
+ "Ingesting %d group member relationships", len(group_member_relationships)
183
+ )
184
+ logger.info(
185
+ "Ingesting %d group owner relationships", len(group_owner_relationships)
186
+ )
187
+
188
+ # Load group member relationships (Group -> Group MEMBER)
189
+ if group_member_relationships:
190
+ load_matchlinks(
191
+ neo4j_session,
192
+ GSuiteGroupToGroupMemberRel(),
193
+ group_member_relationships,
194
+ lastupdated=gsuite_update_tag,
195
+ _sub_resource_label="GSuiteTenant",
196
+ _sub_resource_id=customer_id,
197
+ )
198
+
199
+ # Load group owner relationships (Group -> Group OWNER)
200
+ if group_owner_relationships:
201
+ load_matchlinks(
202
+ neo4j_session,
203
+ GSuiteGroupToGroupOwnerRel(),
204
+ group_owner_relationships,
205
+ lastupdated=gsuite_update_tag,
206
+ _sub_resource_label="GSuiteTenant",
207
+ _sub_resource_id=customer_id,
208
+ )
209
+
210
+
211
+ @timeit
212
+ def cleanup_gsuite_groups(
213
+ neo4j_session: neo4j.Session,
214
+ common_job_parameters: dict[str, Any],
215
+ customer_id: str,
216
+ gsuite_update_tag: int,
217
+ ) -> None:
218
+ """
219
+ Clean up GSuite groups and group-to-group relationships using the modern data model
220
+ """
221
+ logger.debug("Running GSuite groups cleanup job")
222
+
223
+ # Cleanup group nodes
224
+ GraphJob.from_node_schema(GSuiteGroupSchema(), common_job_parameters).run(
225
+ neo4j_session
226
+ )
227
+
228
+ # Cleanup group-to-group member relationships
229
+ GraphJob.from_matchlink(
230
+ GSuiteGroupToGroupMemberRel(),
231
+ "GSuiteTenant",
232
+ customer_id,
233
+ gsuite_update_tag,
234
+ ).run(neo4j_session)
235
+
236
+ # Cleanup group-to-group owner relationships
237
+ GraphJob.from_matchlink(
238
+ GSuiteGroupToGroupOwnerRel(),
239
+ "GSuiteTenant",
240
+ customer_id,
241
+ gsuite_update_tag,
242
+ ).run(neo4j_session)
243
+
244
+
245
+ @timeit
246
+ def sync_gsuite_groups(
247
+ neo4j_session: neo4j.Session,
248
+ admin: Resource,
249
+ gsuite_update_tag: int,
250
+ common_job_parameters: dict[str, Any],
251
+ ) -> None:
252
+ """
253
+ GET GSuite group objects using the google admin api resource, load the data into Neo4j and clean up stale nodes.
254
+
255
+ :param neo4j_session: The Neo4j session
256
+ :param admin: Google admin resource object created by `googleapiclient.discovery.build()`.
257
+ See https://googleapis.github.io/google-api-python-client/docs/epy/googleapiclient.discovery-module.html#build.
258
+ :param gsuite_update_tag: The timestamp value to set our new Neo4j nodes with
259
+ :param common_job_parameters: Parameters to carry to the Neo4j jobs
260
+ :return: Nothing
261
+ """
262
+ logger.debug("Syncing GSuite Groups")
263
+
264
+ customer_id = common_job_parameters.get(
265
+ "CUSTOMER_ID", "my_customer"
266
+ ) # Default to "my_customer" for backward compatibility
267
+
268
+ # 1. GET - Fetch data from API
269
+ resp_objs = get_all_groups(admin, customer_id)
270
+ group_members = get_members_for_groups(admin, [resp["email"] for resp in resp_objs])
271
+
272
+ # 2. TRANSFORM - Shape data for ingestion
273
+ groups, group_member_relationships, group_owner_relationships = transform_groups(
274
+ resp_objs, group_members
275
+ )
276
+
277
+ # 3. LOAD - Ingest to Neo4j using data model
278
+ load_gsuite_groups(neo4j_session, groups, customer_id, gsuite_update_tag)
279
+
280
+ # Load group-to-group relationships after groups are loaded
281
+ load_gsuite_group_to_group_relationships(
282
+ neo4j_session,
283
+ group_member_relationships,
284
+ group_owner_relationships,
285
+ customer_id,
286
+ gsuite_update_tag,
287
+ )
288
+
289
+ # 4. CLEANUP - Remove stale data
290
+ cleanup_params = {**common_job_parameters, "CUSTOMER_ID": customer_id}
291
+ cleanup_gsuite_groups(neo4j_session, cleanup_params, customer_id, gsuite_update_tag)