cartography 0.117.0__py3-none-any.whl → 0.119.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +31 -0
- cartography/client/core/tx.py +19 -3
- cartography/config.py +14 -0
- cartography/data/indexes.cypher +0 -6
- cartography/graph/job.py +13 -7
- cartography/graph/statement.py +4 -0
- cartography/intel/aws/__init__.py +22 -9
- cartography/intel/aws/apigateway.py +18 -5
- cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
- cartography/intel/aws/ec2/internet_gateways.py +4 -2
- cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
- cartography/intel/aws/ec2/network_interfaces.py +4 -0
- cartography/intel/aws/ec2/reserved_instances.py +3 -1
- cartography/intel/aws/ec2/tgw.py +11 -5
- cartography/intel/aws/ec2/volumes.py +1 -1
- cartography/intel/aws/ecr.py +209 -26
- cartography/intel/aws/ecr_image_layers.py +143 -42
- cartography/intel/aws/elasticsearch.py +13 -4
- cartography/intel/aws/identitycenter.py +93 -54
- cartography/intel/aws/inspector.py +90 -46
- cartography/intel/aws/permission_relationships.py +3 -3
- cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
- cartography/intel/aws/s3.py +26 -13
- cartography/intel/aws/ssm.py +3 -5
- cartography/intel/azure/compute.py +9 -4
- cartography/intel/azure/cosmosdb.py +31 -15
- cartography/intel/azure/sql.py +25 -12
- cartography/intel/azure/storage.py +19 -9
- cartography/intel/azure/subscription.py +3 -1
- cartography/intel/crowdstrike/spotlight.py +5 -2
- cartography/intel/entra/app_role_assignments.py +9 -2
- cartography/intel/gcp/__init__.py +26 -9
- cartography/intel/gcp/clients.py +8 -4
- cartography/intel/gcp/compute.py +42 -21
- cartography/intel/gcp/crm/folders.py +9 -3
- cartography/intel/gcp/crm/orgs.py +8 -3
- cartography/intel/gcp/crm/projects.py +14 -3
- cartography/intel/github/repos.py +23 -5
- cartography/intel/gsuite/__init__.py +12 -8
- cartography/intel/gsuite/groups.py +291 -0
- cartography/intel/gsuite/users.py +142 -0
- cartography/intel/jamf/computers.py +7 -1
- cartography/intel/oci/iam.py +23 -9
- cartography/intel/oci/organizations.py +3 -1
- cartography/intel/oci/utils.py +28 -5
- cartography/intel/okta/awssaml.py +9 -8
- cartography/intel/okta/users.py +1 -1
- cartography/intel/ontology/__init__.py +44 -0
- cartography/intel/ontology/devices.py +54 -0
- cartography/intel/ontology/users.py +54 -0
- cartography/intel/ontology/utils.py +121 -0
- cartography/intel/pagerduty/escalation_policies.py +13 -6
- cartography/intel/pagerduty/schedules.py +9 -4
- cartography/intel/pagerduty/services.py +7 -3
- cartography/intel/pagerduty/teams.py +5 -2
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/trivy/__init__.py +109 -58
- cartography/models/airbyte/user.py +4 -0
- cartography/models/anthropic/user.py +4 -0
- cartography/models/aws/ec2/networkinterfaces.py +2 -0
- cartography/models/aws/ecr/image.py +55 -0
- cartography/models/aws/ecr/repository_image.py +1 -1
- cartography/models/aws/iam/group_membership.py +3 -2
- cartography/models/aws/identitycenter/awsssouser.py +3 -1
- cartography/models/bigfix/bigfix_computer.py +1 -1
- cartography/models/cloudflare/member.py +4 -0
- cartography/models/crowdstrike/hosts.py +1 -1
- cartography/models/duo/endpoint.py +1 -1
- cartography/models/duo/phone.py +2 -2
- cartography/models/duo/user.py +4 -0
- cartography/models/entra/user.py +2 -1
- cartography/models/github/users.py +4 -0
- cartography/models/gsuite/__init__.py +0 -0
- cartography/models/gsuite/group.py +218 -0
- cartography/models/gsuite/tenant.py +29 -0
- cartography/models/gsuite/user.py +107 -0
- cartography/models/kandji/device.py +1 -2
- cartography/models/keycloak/user.py +4 -0
- cartography/models/lastpass/user.py +4 -0
- cartography/models/ontology/__init__.py +0 -0
- cartography/models/ontology/device.py +125 -0
- cartography/models/ontology/mapping/__init__.py +16 -0
- cartography/models/ontology/mapping/data/__init__.py +1 -0
- cartography/models/ontology/mapping/data/devices.py +160 -0
- cartography/models/ontology/mapping/data/users.py +239 -0
- cartography/models/ontology/mapping/specs.py +65 -0
- cartography/models/ontology/user.py +52 -0
- cartography/models/openai/user.py +4 -0
- cartography/models/scaleway/iam/user.py +4 -0
- cartography/models/snipeit/asset.py +1 -0
- cartography/models/snipeit/user.py +4 -0
- cartography/models/tailscale/device.py +1 -1
- cartography/models/tailscale/user.py +6 -1
- cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
- cartography/sync.py +4 -1
- cartography/util.py +49 -18
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/METADATA +3 -3
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/RECORD +104 -89
- cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
- cartography/intel/gsuite/api.py +0 -355
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
cartography/intel/gcp/compute.py
CHANGED
|
@@ -15,6 +15,7 @@ from googleapiclient.discovery import Resource
|
|
|
15
15
|
from googleapiclient.errors import HttpError
|
|
16
16
|
|
|
17
17
|
from cartography.client.core.tx import load
|
|
18
|
+
from cartography.client.core.tx import run_write_query
|
|
18
19
|
from cartography.graph.job import GraphJob
|
|
19
20
|
from cartography.models.gcp.compute.vpc import GCPVpcSchema
|
|
20
21
|
from cartography.util import run_cleanup_job
|
|
@@ -619,7 +620,8 @@ def load_gcp_instances(
|
|
|
619
620
|
SET r.lastupdated = $gcp_update_tag
|
|
620
621
|
"""
|
|
621
622
|
for instance in data:
|
|
622
|
-
|
|
623
|
+
run_write_query(
|
|
624
|
+
neo4j_session,
|
|
623
625
|
query,
|
|
624
626
|
ProjectId=instance["project_id"],
|
|
625
627
|
PartialUri=instance["partial_uri"],
|
|
@@ -714,7 +716,8 @@ def load_gcp_forwarding_rules(
|
|
|
714
716
|
network = fwd.get("network", None)
|
|
715
717
|
subnetwork = fwd.get("subnetwork", None)
|
|
716
718
|
|
|
717
|
-
|
|
719
|
+
run_write_query(
|
|
720
|
+
neo4j_session,
|
|
718
721
|
query,
|
|
719
722
|
PartialUri=fwd["partial_uri"],
|
|
720
723
|
IPAddress=fwd["ip_address"],
|
|
@@ -760,7 +763,8 @@ def _attach_fwd_rule_to_subnet(
|
|
|
760
763
|
SET p.lastupdated = $gcp_update_tag
|
|
761
764
|
"""
|
|
762
765
|
|
|
763
|
-
|
|
766
|
+
run_write_query(
|
|
767
|
+
neo4j_session,
|
|
764
768
|
query,
|
|
765
769
|
PartialUri=fwd["partial_uri"],
|
|
766
770
|
SubNetworkPartialUri=fwd.get("subnetwork_partial_uri", None),
|
|
@@ -787,7 +791,8 @@ def _attach_fwd_rule_to_vpc(
|
|
|
787
791
|
SET r.lastupdated = $gcp_update_tag
|
|
788
792
|
"""
|
|
789
793
|
|
|
790
|
-
|
|
794
|
+
run_write_query(
|
|
795
|
+
neo4j_session,
|
|
791
796
|
query,
|
|
792
797
|
PartialUri=fwd["partial_uri"],
|
|
793
798
|
NetworkPartialUri=fwd.get("network_partial_uri", None),
|
|
@@ -831,7 +836,8 @@ def _attach_instance_tags(
|
|
|
831
836
|
for tag in instance.get("tags", {}).get("items", []):
|
|
832
837
|
for nic in instance.get("networkInterfaces", []):
|
|
833
838
|
tag_id = _create_gcp_network_tag_id(nic["vpc_partial_uri"], tag)
|
|
834
|
-
|
|
839
|
+
run_write_query(
|
|
840
|
+
neo4j_session,
|
|
835
841
|
query,
|
|
836
842
|
InstanceId=instance["partial_uri"],
|
|
837
843
|
TagId=tag_id,
|
|
@@ -880,7 +886,8 @@ def _attach_gcp_nics(
|
|
|
880
886
|
for nic in instance.get("networkInterfaces", []):
|
|
881
887
|
# Make an ID for GCPNetworkInterface nodes because GCP doesn't define one but we need to uniquely identify them
|
|
882
888
|
nic_id = f"{instance['partial_uri']}/networkinterfaces/{nic['name']}"
|
|
883
|
-
|
|
889
|
+
run_write_query(
|
|
890
|
+
neo4j_session,
|
|
884
891
|
query,
|
|
885
892
|
InstanceId=instance["partial_uri"],
|
|
886
893
|
NicId=nic_id,
|
|
@@ -926,7 +933,8 @@ def _attach_gcp_nic_access_configs(
|
|
|
926
933
|
for ac in nic.get("accessConfigs", []):
|
|
927
934
|
# Make an ID for GCPNicAccessConfig nodes because GCP doesn't define one but we need to uniquely identify them
|
|
928
935
|
access_config_id = f"{nic_id}/accessconfigs/{ac['type']}"
|
|
929
|
-
|
|
936
|
+
run_write_query(
|
|
937
|
+
neo4j_session,
|
|
930
938
|
query,
|
|
931
939
|
NicId=nic_id,
|
|
932
940
|
AccessConfigId=access_config_id,
|
|
@@ -960,7 +968,8 @@ def _attach_gcp_vpc(
|
|
|
960
968
|
ON CREATE SET m.firstseen = timestamp()
|
|
961
969
|
SET m.lastupdated = $gcp_update_tag
|
|
962
970
|
"""
|
|
963
|
-
|
|
971
|
+
run_write_query(
|
|
972
|
+
neo4j_session,
|
|
964
973
|
query,
|
|
965
974
|
InstanceId=instance_id,
|
|
966
975
|
gcp_update_tag=gcp_update_tag,
|
|
@@ -974,10 +983,22 @@ def load_gcp_ingress_firewalls(
|
|
|
974
983
|
gcp_update_tag: int,
|
|
975
984
|
) -> None:
|
|
976
985
|
"""
|
|
977
|
-
Load the firewall list to Neo4j
|
|
986
|
+
Load the firewall list to Neo4j.
|
|
978
987
|
:param fw_list: The transformed list of firewalls
|
|
979
988
|
:return: Nothing
|
|
980
989
|
"""
|
|
990
|
+
neo4j_session.execute_write(
|
|
991
|
+
_load_gcp_ingress_firewalls_tx,
|
|
992
|
+
fw_list,
|
|
993
|
+
gcp_update_tag,
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def _load_gcp_ingress_firewalls_tx(
|
|
998
|
+
tx: neo4j.Transaction,
|
|
999
|
+
fw_list: List[Resource],
|
|
1000
|
+
gcp_update_tag: int,
|
|
1001
|
+
) -> None:
|
|
981
1002
|
query = """
|
|
982
1003
|
MERGE (fw:GCPFirewall{id:$FwPartialUri})
|
|
983
1004
|
ON CREATE SET fw.firstseen = timestamp(),
|
|
@@ -1000,7 +1021,7 @@ def load_gcp_ingress_firewalls(
|
|
|
1000
1021
|
SET r.lastupdated = $gcp_update_tag
|
|
1001
1022
|
"""
|
|
1002
1023
|
for fw in fw_list:
|
|
1003
|
-
|
|
1024
|
+
tx.run(
|
|
1004
1025
|
query,
|
|
1005
1026
|
FwPartialUri=fw["id"],
|
|
1006
1027
|
Direction=fw["direction"],
|
|
@@ -1011,20 +1032,20 @@ def load_gcp_ingress_firewalls(
|
|
|
1011
1032
|
VpcPartialUri=fw["vpc_partial_uri"],
|
|
1012
1033
|
HasTargetServiceAccounts=fw["has_target_service_accounts"],
|
|
1013
1034
|
gcp_update_tag=gcp_update_tag,
|
|
1014
|
-
)
|
|
1015
|
-
_attach_firewall_rules(
|
|
1016
|
-
_attach_target_tags(
|
|
1035
|
+
).consume()
|
|
1036
|
+
_attach_firewall_rules(tx, fw, gcp_update_tag)
|
|
1037
|
+
_attach_target_tags(tx, fw, gcp_update_tag)
|
|
1017
1038
|
|
|
1018
1039
|
|
|
1019
1040
|
@timeit
|
|
1020
1041
|
def _attach_firewall_rules(
|
|
1021
|
-
|
|
1042
|
+
tx: neo4j.Transaction,
|
|
1022
1043
|
fw: Resource,
|
|
1023
1044
|
gcp_update_tag: int,
|
|
1024
1045
|
) -> None:
|
|
1025
1046
|
"""
|
|
1026
1047
|
Attach the allow_rules to the Firewall object
|
|
1027
|
-
:param
|
|
1048
|
+
:param tx: The Neo4j transaction
|
|
1028
1049
|
:param fw: The Firewall object
|
|
1029
1050
|
:param gcp_update_tag: The timestamp
|
|
1030
1051
|
:return: Nothing
|
|
@@ -1065,7 +1086,7 @@ def _attach_firewall_rules(
|
|
|
1065
1086
|
# If sourceRanges is not specified then the rule must specify sourceTags.
|
|
1066
1087
|
# Since an IP range cannot have a tag applied to it, it is ok if we don't ingest this rule.
|
|
1067
1088
|
for ip_range in fw.get("sourceRanges", []):
|
|
1068
|
-
|
|
1089
|
+
tx.run(
|
|
1069
1090
|
template.safe_substitute(fw_rule_relationship_label=label),
|
|
1070
1091
|
FwPartialUri=fw["id"],
|
|
1071
1092
|
RuleId=rule["ruleid"],
|
|
@@ -1074,18 +1095,18 @@ def _attach_firewall_rules(
|
|
|
1074
1095
|
ToPort=rule.get("toport"),
|
|
1075
1096
|
Range=ip_range,
|
|
1076
1097
|
gcp_update_tag=gcp_update_tag,
|
|
1077
|
-
)
|
|
1098
|
+
).consume()
|
|
1078
1099
|
|
|
1079
1100
|
|
|
1080
1101
|
@timeit
|
|
1081
1102
|
def _attach_target_tags(
|
|
1082
|
-
|
|
1103
|
+
tx: neo4j.Transaction,
|
|
1083
1104
|
fw: Resource,
|
|
1084
1105
|
gcp_update_tag: int,
|
|
1085
1106
|
) -> None:
|
|
1086
1107
|
"""
|
|
1087
1108
|
Attach target tags to the firewall object
|
|
1088
|
-
:param
|
|
1109
|
+
:param tx: The neo4j transaction
|
|
1089
1110
|
:param fw: The firewall object
|
|
1090
1111
|
:param gcp_update_tag: The timestamp
|
|
1091
1112
|
:return: Nothing
|
|
@@ -1105,13 +1126,13 @@ def _attach_target_tags(
|
|
|
1105
1126
|
"""
|
|
1106
1127
|
for tag in fw.get("targetTags", []):
|
|
1107
1128
|
tag_id = _create_gcp_network_tag_id(fw["vpc_partial_uri"], tag)
|
|
1108
|
-
|
|
1129
|
+
tx.run(
|
|
1109
1130
|
query,
|
|
1110
1131
|
FwPartialUri=fw["id"],
|
|
1111
1132
|
TagId=tag_id,
|
|
1112
1133
|
TagValue=tag,
|
|
1113
1134
|
gcp_update_tag=gcp_update_tag,
|
|
1114
|
-
)
|
|
1135
|
+
).consume()
|
|
1115
1136
|
|
|
1116
1137
|
|
|
1117
1138
|
@timeit
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict
|
|
3
3
|
from typing import List
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import neo4j
|
|
7
|
+
from google.auth.credentials import Credentials as GoogleCredentials
|
|
6
8
|
from google.cloud import resourcemanager_v3
|
|
7
9
|
|
|
8
10
|
from cartography.client.core.tx import load
|
|
@@ -13,7 +15,10 @@ logger = logging.getLogger(__name__)
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
@timeit
|
|
16
|
-
def get_gcp_folders(
|
|
18
|
+
def get_gcp_folders(
|
|
19
|
+
org_resource_name: str,
|
|
20
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
21
|
+
) -> List[Dict]:
|
|
17
22
|
"""
|
|
18
23
|
Return a list of all descendant GCP folders under the specified organization by traversing the folder tree.
|
|
19
24
|
|
|
@@ -21,7 +26,7 @@ def get_gcp_folders(org_resource_name: str) -> List[Dict]:
|
|
|
21
26
|
:return: List of folder dicts with 'name' field containing full resource names (e.g., "folders/123456")
|
|
22
27
|
"""
|
|
23
28
|
results: List[Dict] = []
|
|
24
|
-
client = resourcemanager_v3.FoldersClient()
|
|
29
|
+
client = resourcemanager_v3.FoldersClient(credentials=credentials)
|
|
25
30
|
# BFS over folders starting at the org root
|
|
26
31
|
queue: List[str] = [org_resource_name]
|
|
27
32
|
seen: set[str] = set()
|
|
@@ -96,6 +101,7 @@ def sync_gcp_folders(
|
|
|
96
101
|
gcp_update_tag: int,
|
|
97
102
|
common_job_parameters: Dict,
|
|
98
103
|
org_resource_name: str,
|
|
104
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
99
105
|
) -> List[Dict]:
|
|
100
106
|
"""
|
|
101
107
|
Get GCP folder data using the CRM v2 resource object and load the data to Neo4j.
|
|
@@ -103,6 +109,6 @@ def sync_gcp_folders(
|
|
|
103
109
|
:return: List of folders synced
|
|
104
110
|
"""
|
|
105
111
|
logger.debug("Syncing GCP folders")
|
|
106
|
-
folders = get_gcp_folders(org_resource_name)
|
|
112
|
+
folders = get_gcp_folders(org_resource_name, credentials=credentials)
|
|
107
113
|
load_gcp_folders(neo4j_session, folders, gcp_update_tag, org_resource_name)
|
|
108
114
|
return folders
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict
|
|
3
3
|
from typing import List
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import neo4j
|
|
7
|
+
from google.auth.credentials import Credentials as GoogleCredentials
|
|
6
8
|
from google.cloud import resourcemanager_v3
|
|
7
9
|
|
|
8
10
|
from cartography.client.core.tx import load
|
|
@@ -13,13 +15,15 @@ logger = logging.getLogger(__name__)
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
@timeit
|
|
16
|
-
def get_gcp_organizations(
|
|
18
|
+
def get_gcp_organizations(
|
|
19
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
20
|
+
) -> List[Dict]:
|
|
17
21
|
"""
|
|
18
22
|
Return list of GCP organizations that the authenticated principal can access using the high-level client.
|
|
19
23
|
Returns empty list on error.
|
|
20
24
|
:return: List of org dicts with keys: name, displayName, lifecycleState.
|
|
21
25
|
"""
|
|
22
|
-
client = resourcemanager_v3.OrganizationsClient()
|
|
26
|
+
client = resourcemanager_v3.OrganizationsClient(credentials=credentials)
|
|
23
27
|
orgs = []
|
|
24
28
|
for org in client.search_organizations():
|
|
25
29
|
orgs.append(
|
|
@@ -54,12 +58,13 @@ def sync_gcp_organizations(
|
|
|
54
58
|
neo4j_session: neo4j.Session,
|
|
55
59
|
gcp_update_tag: int,
|
|
56
60
|
common_job_parameters: Dict,
|
|
61
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
57
62
|
) -> List[Dict]:
|
|
58
63
|
"""
|
|
59
64
|
Get GCP organization data using the CRM v1 resource object and load the data to Neo4j.
|
|
60
65
|
Returns the list of organizations synced.
|
|
61
66
|
"""
|
|
62
67
|
logger.debug("Syncing GCP organizations")
|
|
63
|
-
data = get_gcp_organizations()
|
|
68
|
+
data = get_gcp_organizations(credentials=credentials)
|
|
64
69
|
load_gcp_organizations(neo4j_session, data, gcp_update_tag)
|
|
65
70
|
return data
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict
|
|
3
3
|
from typing import List
|
|
4
|
+
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import neo4j
|
|
7
|
+
from google.auth.credentials import Credentials as GoogleCredentials
|
|
6
8
|
from google.cloud import resourcemanager_v3
|
|
7
9
|
|
|
8
10
|
from cartography.client.core.tx import load
|
|
@@ -13,7 +15,11 @@ logger = logging.getLogger(__name__)
|
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
@timeit
|
|
16
|
-
def get_gcp_projects(
|
|
18
|
+
def get_gcp_projects(
|
|
19
|
+
org_resource_name: str,
|
|
20
|
+
folders: List[Dict],
|
|
21
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
22
|
+
) -> List[Dict]:
|
|
17
23
|
"""
|
|
18
24
|
Return list of ACTIVE GCP projects under the specified organization
|
|
19
25
|
and within the specified folders.
|
|
@@ -25,7 +31,7 @@ def get_gcp_projects(org_resource_name: str, folders: List[Dict]) -> List[Dict]:
|
|
|
25
31
|
parents = set([org_resource_name] + folder_names)
|
|
26
32
|
results: List[Dict] = []
|
|
27
33
|
for parent in parents:
|
|
28
|
-
client = resourcemanager_v3.ProjectsClient()
|
|
34
|
+
client = resourcemanager_v3.ProjectsClient(credentials=credentials)
|
|
29
35
|
for proj in client.list_projects(parent=parent):
|
|
30
36
|
# list_projects returns ACTIVE projects by default
|
|
31
37
|
name_field = proj.name # "projects/<number>"
|
|
@@ -96,6 +102,7 @@ def sync_gcp_projects(
|
|
|
96
102
|
folders: List[Dict],
|
|
97
103
|
gcp_update_tag: int,
|
|
98
104
|
common_job_parameters: Dict,
|
|
105
|
+
credentials: Optional[GoogleCredentials] = None,
|
|
99
106
|
) -> List[Dict]:
|
|
100
107
|
"""
|
|
101
108
|
Get and sync GCP project data to Neo4j.
|
|
@@ -104,6 +111,10 @@ def sync_gcp_projects(
|
|
|
104
111
|
:return: List of projects synced
|
|
105
112
|
"""
|
|
106
113
|
logger.debug("Syncing GCP projects")
|
|
107
|
-
projects = get_gcp_projects(
|
|
114
|
+
projects = get_gcp_projects(
|
|
115
|
+
org_resource_name,
|
|
116
|
+
folders,
|
|
117
|
+
credentials=credentials,
|
|
118
|
+
)
|
|
108
119
|
load_gcp_projects(neo4j_session, projects, gcp_update_tag, org_resource_name)
|
|
109
120
|
return projects
|
|
@@ -4,6 +4,7 @@ from collections import defaultdict
|
|
|
4
4
|
from collections import namedtuple
|
|
5
5
|
from string import Template
|
|
6
6
|
from typing import Any
|
|
7
|
+
from typing import cast
|
|
7
8
|
from typing import Dict
|
|
8
9
|
from typing import List
|
|
9
10
|
from typing import Optional
|
|
@@ -157,12 +158,19 @@ def _get_repo_collaborators_inner_func(
|
|
|
157
158
|
org: str,
|
|
158
159
|
api_url: str,
|
|
159
160
|
token: str,
|
|
160
|
-
repo_raw_data: list[dict[str, Any]],
|
|
161
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
161
162
|
affiliation: str,
|
|
162
163
|
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
163
164
|
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
164
165
|
|
|
165
166
|
for repo in repo_raw_data:
|
|
167
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
168
|
+
if repo is None:
|
|
169
|
+
logger.info(
|
|
170
|
+
"Skipping null repository entry while fetching %s collaborators.",
|
|
171
|
+
affiliation,
|
|
172
|
+
)
|
|
173
|
+
continue
|
|
166
174
|
repo_name = repo["name"]
|
|
167
175
|
repo_url = repo["url"]
|
|
168
176
|
|
|
@@ -212,7 +220,7 @@ def _get_repo_collaborators_inner_func(
|
|
|
212
220
|
|
|
213
221
|
|
|
214
222
|
def _get_repo_collaborators_for_multiple_repos(
|
|
215
|
-
repo_raw_data: list[dict[str, Any]],
|
|
223
|
+
repo_raw_data: list[dict[str, Any] | None],
|
|
216
224
|
affiliation: str,
|
|
217
225
|
org: str,
|
|
218
226
|
api_url: str,
|
|
@@ -279,7 +287,7 @@ def _get_repo_collaborators(
|
|
|
279
287
|
|
|
280
288
|
|
|
281
289
|
@timeit
|
|
282
|
-
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
290
|
+
def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
|
|
283
291
|
"""
|
|
284
292
|
Retrieve a list of repos from a Github organization as described in
|
|
285
293
|
https://docs.github.com/en/graphql/reference/objects#repository.
|
|
@@ -287,6 +295,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
287
295
|
:param api_url: The Github v4 API endpoint as string.
|
|
288
296
|
:param organization: The name of the target Github organization as string.
|
|
289
297
|
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
298
|
+
Note: The list may contain None entries per GraphQL spec when resolvers error
|
|
299
|
+
(permissions, rate limits, transient issues). See issues #1334 and #1404.
|
|
290
300
|
"""
|
|
291
301
|
# TODO: link the Github organization to the repositories
|
|
292
302
|
repos, _ = fetch_all(
|
|
@@ -297,11 +307,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
297
307
|
"repositories",
|
|
298
308
|
count=50,
|
|
299
309
|
)
|
|
300
|
-
|
|
310
|
+
# Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
|
|
311
|
+
# per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
|
|
312
|
+
# See https://github.com/cartography-cncf/cartography/issues/1334
|
|
313
|
+
# and https://github.com/cartography-cncf/cartography/issues/1404
|
|
314
|
+
return cast(List[Optional[Dict]], repos.nodes)
|
|
301
315
|
|
|
302
316
|
|
|
303
317
|
def transform(
|
|
304
|
-
repos_json: List[Dict],
|
|
318
|
+
repos_json: List[Optional[Dict]],
|
|
305
319
|
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
306
320
|
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
307
321
|
) -> Dict:
|
|
@@ -340,6 +354,10 @@ def transform(
|
|
|
340
354
|
transformed_dependencies: List[Dict] = []
|
|
341
355
|
transformed_manifests: List[Dict] = []
|
|
342
356
|
for repo_object in repos_json:
|
|
357
|
+
# GitHub can return null repo entries. See issues #1334 and #1404.
|
|
358
|
+
if repo_object is None:
|
|
359
|
+
logger.debug("Skipping null repository entry during transformation.")
|
|
360
|
+
continue
|
|
343
361
|
_transform_repo_languages(
|
|
344
362
|
repo_object["url"],
|
|
345
363
|
repo_object,
|
|
@@ -16,7 +16,8 @@ from google.oauth2.service_account import Credentials as ServiceAccountCredentia
|
|
|
16
16
|
from googleapiclient.discovery import Resource
|
|
17
17
|
|
|
18
18
|
from cartography.config import Config
|
|
19
|
-
from cartography.intel.gsuite import
|
|
19
|
+
from cartography.intel.gsuite import groups
|
|
20
|
+
from cartography.intel.gsuite import users
|
|
20
21
|
from cartography.util import timeit
|
|
21
22
|
|
|
22
23
|
OAUTH_SCOPES = [
|
|
@@ -148,15 +149,18 @@ def start_gsuite_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
|
|
|
148
149
|
return
|
|
149
150
|
|
|
150
151
|
resources = _initialize_resources(creds)
|
|
151
|
-
|
|
152
|
-
neo4j_session,
|
|
153
|
-
resources.admin,
|
|
154
|
-
config.update_tag,
|
|
155
|
-
common_job_parameters,
|
|
156
|
-
)
|
|
157
|
-
api.sync_gsuite_groups(
|
|
152
|
+
customer_ids = users.sync_gsuite_users(
|
|
158
153
|
neo4j_session,
|
|
159
154
|
resources.admin,
|
|
160
155
|
config.update_tag,
|
|
161
156
|
common_job_parameters,
|
|
162
157
|
)
|
|
158
|
+
for customer_id in customer_ids:
|
|
159
|
+
scoped_job_parameters = common_job_parameters.copy()
|
|
160
|
+
scoped_job_parameters["CUSTOMER_ID"] = customer_id
|
|
161
|
+
groups.sync_gsuite_groups(
|
|
162
|
+
neo4j_session,
|
|
163
|
+
resources.admin,
|
|
164
|
+
config.update_tag,
|
|
165
|
+
scoped_job_parameters,
|
|
166
|
+
)
|