cartography 0.117.0__py3-none-any.whl → 0.119.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (107) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +31 -0
  3. cartography/client/core/tx.py +19 -3
  4. cartography/config.py +14 -0
  5. cartography/data/indexes.cypher +0 -6
  6. cartography/graph/job.py +13 -7
  7. cartography/graph/statement.py +4 -0
  8. cartography/intel/aws/__init__.py +22 -9
  9. cartography/intel/aws/apigateway.py +18 -5
  10. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  11. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  12. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  13. cartography/intel/aws/ec2/network_interfaces.py +4 -0
  14. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  15. cartography/intel/aws/ec2/tgw.py +11 -5
  16. cartography/intel/aws/ec2/volumes.py +1 -1
  17. cartography/intel/aws/ecr.py +209 -26
  18. cartography/intel/aws/ecr_image_layers.py +143 -42
  19. cartography/intel/aws/elasticsearch.py +13 -4
  20. cartography/intel/aws/identitycenter.py +93 -54
  21. cartography/intel/aws/inspector.py +90 -46
  22. cartography/intel/aws/permission_relationships.py +3 -3
  23. cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
  24. cartography/intel/aws/s3.py +26 -13
  25. cartography/intel/aws/ssm.py +3 -5
  26. cartography/intel/azure/compute.py +9 -4
  27. cartography/intel/azure/cosmosdb.py +31 -15
  28. cartography/intel/azure/sql.py +25 -12
  29. cartography/intel/azure/storage.py +19 -9
  30. cartography/intel/azure/subscription.py +3 -1
  31. cartography/intel/crowdstrike/spotlight.py +5 -2
  32. cartography/intel/entra/app_role_assignments.py +9 -2
  33. cartography/intel/gcp/__init__.py +26 -9
  34. cartography/intel/gcp/clients.py +8 -4
  35. cartography/intel/gcp/compute.py +42 -21
  36. cartography/intel/gcp/crm/folders.py +9 -3
  37. cartography/intel/gcp/crm/orgs.py +8 -3
  38. cartography/intel/gcp/crm/projects.py +14 -3
  39. cartography/intel/github/repos.py +23 -5
  40. cartography/intel/gsuite/__init__.py +12 -8
  41. cartography/intel/gsuite/groups.py +291 -0
  42. cartography/intel/gsuite/users.py +142 -0
  43. cartography/intel/jamf/computers.py +7 -1
  44. cartography/intel/oci/iam.py +23 -9
  45. cartography/intel/oci/organizations.py +3 -1
  46. cartography/intel/oci/utils.py +28 -5
  47. cartography/intel/okta/awssaml.py +9 -8
  48. cartography/intel/okta/users.py +1 -1
  49. cartography/intel/ontology/__init__.py +44 -0
  50. cartography/intel/ontology/devices.py +54 -0
  51. cartography/intel/ontology/users.py +54 -0
  52. cartography/intel/ontology/utils.py +121 -0
  53. cartography/intel/pagerduty/escalation_policies.py +13 -6
  54. cartography/intel/pagerduty/schedules.py +9 -4
  55. cartography/intel/pagerduty/services.py +7 -3
  56. cartography/intel/pagerduty/teams.py +5 -2
  57. cartography/intel/pagerduty/users.py +3 -1
  58. cartography/intel/pagerduty/vendors.py +3 -1
  59. cartography/intel/trivy/__init__.py +109 -58
  60. cartography/models/airbyte/user.py +4 -0
  61. cartography/models/anthropic/user.py +4 -0
  62. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  63. cartography/models/aws/ecr/image.py +55 -0
  64. cartography/models/aws/ecr/repository_image.py +1 -1
  65. cartography/models/aws/iam/group_membership.py +3 -2
  66. cartography/models/aws/identitycenter/awsssouser.py +3 -1
  67. cartography/models/bigfix/bigfix_computer.py +1 -1
  68. cartography/models/cloudflare/member.py +4 -0
  69. cartography/models/crowdstrike/hosts.py +1 -1
  70. cartography/models/duo/endpoint.py +1 -1
  71. cartography/models/duo/phone.py +2 -2
  72. cartography/models/duo/user.py +4 -0
  73. cartography/models/entra/user.py +2 -1
  74. cartography/models/github/users.py +4 -0
  75. cartography/models/gsuite/__init__.py +0 -0
  76. cartography/models/gsuite/group.py +218 -0
  77. cartography/models/gsuite/tenant.py +29 -0
  78. cartography/models/gsuite/user.py +107 -0
  79. cartography/models/kandji/device.py +1 -2
  80. cartography/models/keycloak/user.py +4 -0
  81. cartography/models/lastpass/user.py +4 -0
  82. cartography/models/ontology/__init__.py +0 -0
  83. cartography/models/ontology/device.py +125 -0
  84. cartography/models/ontology/mapping/__init__.py +16 -0
  85. cartography/models/ontology/mapping/data/__init__.py +1 -0
  86. cartography/models/ontology/mapping/data/devices.py +160 -0
  87. cartography/models/ontology/mapping/data/users.py +239 -0
  88. cartography/models/ontology/mapping/specs.py +65 -0
  89. cartography/models/ontology/user.py +52 -0
  90. cartography/models/openai/user.py +4 -0
  91. cartography/models/scaleway/iam/user.py +4 -0
  92. cartography/models/snipeit/asset.py +1 -0
  93. cartography/models/snipeit/user.py +4 -0
  94. cartography/models/tailscale/device.py +1 -1
  95. cartography/models/tailscale/user.py +6 -1
  96. cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
  97. cartography/sync.py +4 -1
  98. cartography/util.py +49 -18
  99. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/METADATA +3 -3
  100. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/RECORD +104 -89
  101. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  102. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  103. cartography/intel/gsuite/api.py +0 -355
  104. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
  105. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
  106. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
  107. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@ from googleapiclient.discovery import Resource
15
15
  from googleapiclient.errors import HttpError
16
16
 
17
17
  from cartography.client.core.tx import load
18
+ from cartography.client.core.tx import run_write_query
18
19
  from cartography.graph.job import GraphJob
19
20
  from cartography.models.gcp.compute.vpc import GCPVpcSchema
20
21
  from cartography.util import run_cleanup_job
@@ -619,7 +620,8 @@ def load_gcp_instances(
619
620
  SET r.lastupdated = $gcp_update_tag
620
621
  """
621
622
  for instance in data:
622
- neo4j_session.run(
623
+ run_write_query(
624
+ neo4j_session,
623
625
  query,
624
626
  ProjectId=instance["project_id"],
625
627
  PartialUri=instance["partial_uri"],
@@ -714,7 +716,8 @@ def load_gcp_forwarding_rules(
714
716
  network = fwd.get("network", None)
715
717
  subnetwork = fwd.get("subnetwork", None)
716
718
 
717
- neo4j_session.run(
719
+ run_write_query(
720
+ neo4j_session,
718
721
  query,
719
722
  PartialUri=fwd["partial_uri"],
720
723
  IPAddress=fwd["ip_address"],
@@ -760,7 +763,8 @@ def _attach_fwd_rule_to_subnet(
760
763
  SET p.lastupdated = $gcp_update_tag
761
764
  """
762
765
 
763
- neo4j_session.run(
766
+ run_write_query(
767
+ neo4j_session,
764
768
  query,
765
769
  PartialUri=fwd["partial_uri"],
766
770
  SubNetworkPartialUri=fwd.get("subnetwork_partial_uri", None),
@@ -787,7 +791,8 @@ def _attach_fwd_rule_to_vpc(
787
791
  SET r.lastupdated = $gcp_update_tag
788
792
  """
789
793
 
790
- neo4j_session.run(
794
+ run_write_query(
795
+ neo4j_session,
791
796
  query,
792
797
  PartialUri=fwd["partial_uri"],
793
798
  NetworkPartialUri=fwd.get("network_partial_uri", None),
@@ -831,7 +836,8 @@ def _attach_instance_tags(
831
836
  for tag in instance.get("tags", {}).get("items", []):
832
837
  for nic in instance.get("networkInterfaces", []):
833
838
  tag_id = _create_gcp_network_tag_id(nic["vpc_partial_uri"], tag)
834
- neo4j_session.run(
839
+ run_write_query(
840
+ neo4j_session,
835
841
  query,
836
842
  InstanceId=instance["partial_uri"],
837
843
  TagId=tag_id,
@@ -880,7 +886,8 @@ def _attach_gcp_nics(
880
886
  for nic in instance.get("networkInterfaces", []):
881
887
  # Make an ID for GCPNetworkInterface nodes because GCP doesn't define one but we need to uniquely identify them
882
888
  nic_id = f"{instance['partial_uri']}/networkinterfaces/{nic['name']}"
883
- neo4j_session.run(
889
+ run_write_query(
890
+ neo4j_session,
884
891
  query,
885
892
  InstanceId=instance["partial_uri"],
886
893
  NicId=nic_id,
@@ -926,7 +933,8 @@ def _attach_gcp_nic_access_configs(
926
933
  for ac in nic.get("accessConfigs", []):
927
934
  # Make an ID for GCPNicAccessConfig nodes because GCP doesn't define one but we need to uniquely identify them
928
935
  access_config_id = f"{nic_id}/accessconfigs/{ac['type']}"
929
- neo4j_session.run(
936
+ run_write_query(
937
+ neo4j_session,
930
938
  query,
931
939
  NicId=nic_id,
932
940
  AccessConfigId=access_config_id,
@@ -960,7 +968,8 @@ def _attach_gcp_vpc(
960
968
  ON CREATE SET m.firstseen = timestamp()
961
969
  SET m.lastupdated = $gcp_update_tag
962
970
  """
963
- neo4j_session.run(
971
+ run_write_query(
972
+ neo4j_session,
964
973
  query,
965
974
  InstanceId=instance_id,
966
975
  gcp_update_tag=gcp_update_tag,
@@ -974,10 +983,22 @@ def load_gcp_ingress_firewalls(
974
983
  gcp_update_tag: int,
975
984
  ) -> None:
976
985
  """
977
- Load the firewall list to Neo4j
986
+ Load the firewall list to Neo4j.
978
987
  :param fw_list: The transformed list of firewalls
979
988
  :return: Nothing
980
989
  """
990
+ neo4j_session.execute_write(
991
+ _load_gcp_ingress_firewalls_tx,
992
+ fw_list,
993
+ gcp_update_tag,
994
+ )
995
+
996
+
997
+ def _load_gcp_ingress_firewalls_tx(
998
+ tx: neo4j.Transaction,
999
+ fw_list: List[Resource],
1000
+ gcp_update_tag: int,
1001
+ ) -> None:
981
1002
  query = """
982
1003
  MERGE (fw:GCPFirewall{id:$FwPartialUri})
983
1004
  ON CREATE SET fw.firstseen = timestamp(),
@@ -1000,7 +1021,7 @@ def load_gcp_ingress_firewalls(
1000
1021
  SET r.lastupdated = $gcp_update_tag
1001
1022
  """
1002
1023
  for fw in fw_list:
1003
- neo4j_session.run(
1024
+ tx.run(
1004
1025
  query,
1005
1026
  FwPartialUri=fw["id"],
1006
1027
  Direction=fw["direction"],
@@ -1011,20 +1032,20 @@ def load_gcp_ingress_firewalls(
1011
1032
  VpcPartialUri=fw["vpc_partial_uri"],
1012
1033
  HasTargetServiceAccounts=fw["has_target_service_accounts"],
1013
1034
  gcp_update_tag=gcp_update_tag,
1014
- )
1015
- _attach_firewall_rules(neo4j_session, fw, gcp_update_tag)
1016
- _attach_target_tags(neo4j_session, fw, gcp_update_tag)
1035
+ ).consume()
1036
+ _attach_firewall_rules(tx, fw, gcp_update_tag)
1037
+ _attach_target_tags(tx, fw, gcp_update_tag)
1017
1038
 
1018
1039
 
1019
1040
  @timeit
1020
1041
  def _attach_firewall_rules(
1021
- neo4j_session: neo4j.Session,
1042
+ tx: neo4j.Transaction,
1022
1043
  fw: Resource,
1023
1044
  gcp_update_tag: int,
1024
1045
  ) -> None:
1025
1046
  """
1026
1047
  Attach the allow_rules to the Firewall object
1027
- :param neo4j_session: The Neo4j session
1048
+ :param tx: The Neo4j transaction
1028
1049
  :param fw: The Firewall object
1029
1050
  :param gcp_update_tag: The timestamp
1030
1051
  :return: Nothing
@@ -1065,7 +1086,7 @@ def _attach_firewall_rules(
1065
1086
  # If sourceRanges is not specified then the rule must specify sourceTags.
1066
1087
  # Since an IP range cannot have a tag applied to it, it is ok if we don't ingest this rule.
1067
1088
  for ip_range in fw.get("sourceRanges", []):
1068
- neo4j_session.run(
1089
+ tx.run(
1069
1090
  template.safe_substitute(fw_rule_relationship_label=label),
1070
1091
  FwPartialUri=fw["id"],
1071
1092
  RuleId=rule["ruleid"],
@@ -1074,18 +1095,18 @@ def _attach_firewall_rules(
1074
1095
  ToPort=rule.get("toport"),
1075
1096
  Range=ip_range,
1076
1097
  gcp_update_tag=gcp_update_tag,
1077
- )
1098
+ ).consume()
1078
1099
 
1079
1100
 
1080
1101
  @timeit
1081
1102
  def _attach_target_tags(
1082
- neo4j_session: neo4j.Session,
1103
+ tx: neo4j.Transaction,
1083
1104
  fw: Resource,
1084
1105
  gcp_update_tag: int,
1085
1106
  ) -> None:
1086
1107
  """
1087
1108
  Attach target tags to the firewall object
1088
- :param neo4j_session: The neo4j session
1109
+ :param tx: The neo4j transaction
1089
1110
  :param fw: The firewall object
1090
1111
  :param gcp_update_tag: The timestamp
1091
1112
  :return: Nothing
@@ -1105,13 +1126,13 @@ def _attach_target_tags(
1105
1126
  """
1106
1127
  for tag in fw.get("targetTags", []):
1107
1128
  tag_id = _create_gcp_network_tag_id(fw["vpc_partial_uri"], tag)
1108
- neo4j_session.run(
1129
+ tx.run(
1109
1130
  query,
1110
1131
  FwPartialUri=fw["id"],
1111
1132
  TagId=tag_id,
1112
1133
  TagValue=tag,
1113
1134
  gcp_update_tag=gcp_update_tag,
1114
- )
1135
+ ).consume()
1115
1136
 
1116
1137
 
1117
1138
  @timeit
@@ -1,8 +1,10 @@
1
1
  import logging
2
2
  from typing import Dict
3
3
  from typing import List
4
+ from typing import Optional
4
5
 
5
6
  import neo4j
7
+ from google.auth.credentials import Credentials as GoogleCredentials
6
8
  from google.cloud import resourcemanager_v3
7
9
 
8
10
  from cartography.client.core.tx import load
@@ -13,7 +15,10 @@ logger = logging.getLogger(__name__)
13
15
 
14
16
 
15
17
  @timeit
16
- def get_gcp_folders(org_resource_name: str) -> List[Dict]:
18
+ def get_gcp_folders(
19
+ org_resource_name: str,
20
+ credentials: Optional[GoogleCredentials] = None,
21
+ ) -> List[Dict]:
17
22
  """
18
23
  Return a list of all descendant GCP folders under the specified organization by traversing the folder tree.
19
24
 
@@ -21,7 +26,7 @@ def get_gcp_folders(org_resource_name: str) -> List[Dict]:
21
26
  :return: List of folder dicts with 'name' field containing full resource names (e.g., "folders/123456")
22
27
  """
23
28
  results: List[Dict] = []
24
- client = resourcemanager_v3.FoldersClient()
29
+ client = resourcemanager_v3.FoldersClient(credentials=credentials)
25
30
  # BFS over folders starting at the org root
26
31
  queue: List[str] = [org_resource_name]
27
32
  seen: set[str] = set()
@@ -96,6 +101,7 @@ def sync_gcp_folders(
96
101
  gcp_update_tag: int,
97
102
  common_job_parameters: Dict,
98
103
  org_resource_name: str,
104
+ credentials: Optional[GoogleCredentials] = None,
99
105
  ) -> List[Dict]:
100
106
  """
101
107
  Get GCP folder data using the CRM v2 resource object and load the data to Neo4j.
@@ -103,6 +109,6 @@ def sync_gcp_folders(
103
109
  :return: List of folders synced
104
110
  """
105
111
  logger.debug("Syncing GCP folders")
106
- folders = get_gcp_folders(org_resource_name)
112
+ folders = get_gcp_folders(org_resource_name, credentials=credentials)
107
113
  load_gcp_folders(neo4j_session, folders, gcp_update_tag, org_resource_name)
108
114
  return folders
@@ -1,8 +1,10 @@
1
1
  import logging
2
2
  from typing import Dict
3
3
  from typing import List
4
+ from typing import Optional
4
5
 
5
6
  import neo4j
7
+ from google.auth.credentials import Credentials as GoogleCredentials
6
8
  from google.cloud import resourcemanager_v3
7
9
 
8
10
  from cartography.client.core.tx import load
@@ -13,13 +15,15 @@ logger = logging.getLogger(__name__)
13
15
 
14
16
 
15
17
  @timeit
16
- def get_gcp_organizations() -> List[Dict]:
18
+ def get_gcp_organizations(
19
+ credentials: Optional[GoogleCredentials] = None,
20
+ ) -> List[Dict]:
17
21
  """
18
22
  Return list of GCP organizations that the authenticated principal can access using the high-level client.
19
23
  Returns empty list on error.
20
24
  :return: List of org dicts with keys: name, displayName, lifecycleState.
21
25
  """
22
- client = resourcemanager_v3.OrganizationsClient()
26
+ client = resourcemanager_v3.OrganizationsClient(credentials=credentials)
23
27
  orgs = []
24
28
  for org in client.search_organizations():
25
29
  orgs.append(
@@ -54,12 +58,13 @@ def sync_gcp_organizations(
54
58
  neo4j_session: neo4j.Session,
55
59
  gcp_update_tag: int,
56
60
  common_job_parameters: Dict,
61
+ credentials: Optional[GoogleCredentials] = None,
57
62
  ) -> List[Dict]:
58
63
  """
59
64
  Get GCP organization data using the CRM v1 resource object and load the data to Neo4j.
60
65
  Returns the list of organizations synced.
61
66
  """
62
67
  logger.debug("Syncing GCP organizations")
63
- data = get_gcp_organizations()
68
+ data = get_gcp_organizations(credentials=credentials)
64
69
  load_gcp_organizations(neo4j_session, data, gcp_update_tag)
65
70
  return data
@@ -1,8 +1,10 @@
1
1
  import logging
2
2
  from typing import Dict
3
3
  from typing import List
4
+ from typing import Optional
4
5
 
5
6
  import neo4j
7
+ from google.auth.credentials import Credentials as GoogleCredentials
6
8
  from google.cloud import resourcemanager_v3
7
9
 
8
10
  from cartography.client.core.tx import load
@@ -13,7 +15,11 @@ logger = logging.getLogger(__name__)
13
15
 
14
16
 
15
17
  @timeit
16
- def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
18
+ def get_gcp_projects(
19
+ org_resource_name: str,
20
+ folders: List[Dict],
21
+ credentials: Optional[GoogleCredentials] = None,
22
+ ) -> List[Dict]:
17
23
  """
18
24
  Return list of ACTIVE GCP projects under the specified organization
19
25
  and within the specified folders.
@@ -25,7 +31,7 @@ def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
25
31
  parents = set([org_resource_name] + folder_names)
26
32
  results: List[Dict] = []
27
33
  for parent in parents:
28
- client = resourcemanager_v3.ProjectsClient()
34
+ client = resourcemanager_v3.ProjectsClient(credentials=credentials)
29
35
  for proj in client.list_projects(parent=parent):
30
36
  # list_projects returns ACTIVE projects by default
31
37
  name_field = proj.name # "projects/<number>"
@@ -96,6 +102,7 @@ def sync_gcp_projects(
96
102
  folders: List[Dict],
97
103
  gcp_update_tag: int,
98
104
  common_job_parameters: Dict,
105
+ credentials: Optional[GoogleCredentials] = None,
99
106
  ) -> List[Dict]:
100
107
  """
101
108
  Get and sync GCP project data to Neo4j.
@@ -104,6 +111,10 @@ def sync_gcp_projects(
104
111
  :return: List of projects synced
105
112
  """
106
113
  logger.debug("Syncing GCP projects")
107
- projects = get_gcp_projects(org_resource_name, folders)
114
+ projects = get_gcp_projects(
115
+ org_resource_name,
116
+ folders,
117
+ credentials=credentials,
118
+ )
108
119
  load_gcp_projects(neo4j_session, projects, gcp_update_tag, org_resource_name)
109
120
  return projects
@@ -4,6 +4,7 @@ from collections import defaultdict
4
4
  from collections import namedtuple
5
5
  from string import Template
6
6
  from typing import Any
7
+ from typing import cast
7
8
  from typing import Dict
8
9
  from typing import List
9
10
  from typing import Optional
@@ -157,12 +158,19 @@ def _get_repo_collaborators_inner_func(
157
158
  org: str,
158
159
  api_url: str,
159
160
  token: str,
160
- repo_raw_data: list[dict[str, Any]],
161
+ repo_raw_data: list[dict[str, Any] | None],
161
162
  affiliation: str,
162
163
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
163
164
  result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
164
165
 
165
166
  for repo in repo_raw_data:
167
+ # GitHub can return null repo entries. See issues #1334 and #1404.
168
+ if repo is None:
169
+ logger.info(
170
+ "Skipping null repository entry while fetching %s collaborators.",
171
+ affiliation,
172
+ )
173
+ continue
166
174
  repo_name = repo["name"]
167
175
  repo_url = repo["url"]
168
176
 
@@ -212,7 +220,7 @@ def _get_repo_collaborators_inner_func(
212
220
 
213
221
 
214
222
  def _get_repo_collaborators_for_multiple_repos(
215
- repo_raw_data: list[dict[str, Any]],
223
+ repo_raw_data: list[dict[str, Any] | None],
216
224
  affiliation: str,
217
225
  org: str,
218
226
  api_url: str,
@@ -279,7 +287,7 @@ def _get_repo_collaborators(
279
287
 
280
288
 
281
289
  @timeit
282
- def get(token: str, api_url: str, organization: str) -> List[Dict]:
290
+ def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
283
291
  """
284
292
  Retrieve a list of repos from a Github organization as described in
285
293
  https://docs.github.com/en/graphql/reference/objects#repository.
@@ -287,6 +295,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
287
295
  :param api_url: The Github v4 API endpoint as string.
288
296
  :param organization: The name of the target Github organization as string.
289
297
  :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
298
+ Note: The list may contain None entries per GraphQL spec when resolvers error
299
+ (permissions, rate limits, transient issues). See issues #1334 and #1404.
290
300
  """
291
301
  # TODO: link the Github organization to the repositories
292
302
  repos, _ = fetch_all(
@@ -297,11 +307,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
297
307
  "repositories",
298
308
  count=50,
299
309
  )
300
- return repos.nodes
310
+ # Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
311
+ # per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
312
+ # See https://github.com/cartography-cncf/cartography/issues/1334
313
+ # and https://github.com/cartography-cncf/cartography/issues/1404
314
+ return cast(List[Optional[Dict]], repos.nodes)
301
315
 
302
316
 
303
317
  def transform(
304
- repos_json: List[Dict],
318
+ repos_json: List[Optional[Dict]],
305
319
  direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
306
320
  outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
307
321
  ) -> Dict:
@@ -340,6 +354,10 @@ def transform(
340
354
  transformed_dependencies: List[Dict] = []
341
355
  transformed_manifests: List[Dict] = []
342
356
  for repo_object in repos_json:
357
+ # GitHub can return null repo entries. See issues #1334 and #1404.
358
+ if repo_object is None:
359
+ logger.debug("Skipping null repository entry during transformation.")
360
+ continue
343
361
  _transform_repo_languages(
344
362
  repo_object["url"],
345
363
  repo_object,
@@ -16,7 +16,8 @@ from google.oauth2.service_account import Credentials as ServiceAccountCredentia
16
16
  from googleapiclient.discovery import Resource
17
17
 
18
18
  from cartography.config import Config
19
- from cartography.intel.gsuite import api
19
+ from cartography.intel.gsuite import groups
20
+ from cartography.intel.gsuite import users
20
21
  from cartography.util import timeit
21
22
 
22
23
  OAUTH_SCOPES = [
@@ -148,15 +149,18 @@ def start_gsuite_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
148
149
  return
149
150
 
150
151
  resources = _initialize_resources(creds)
151
- api.sync_gsuite_users(
152
- neo4j_session,
153
- resources.admin,
154
- config.update_tag,
155
- common_job_parameters,
156
- )
157
- api.sync_gsuite_groups(
152
+ customer_ids = users.sync_gsuite_users(
158
153
  neo4j_session,
159
154
  resources.admin,
160
155
  config.update_tag,
161
156
  common_job_parameters,
162
157
  )
158
+ for customer_id in customer_ids:
159
+ scoped_job_parameters = common_job_parameters.copy()
160
+ scoped_job_parameters["CUSTOMER_ID"] = customer_id
161
+ groups.sync_gsuite_groups(
162
+ neo4j_session,
163
+ resources.admin,
164
+ config.update_tag,
165
+ scoped_job_parameters,
166
+ )