cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +8 -0
- cartography/config.py +4 -0
- cartography/data/indexes.cypher +0 -31
- cartography/intel/aws/apigatewayv2.py +116 -0
- cartography/intel/aws/iam.py +741 -492
- cartography/intel/aws/organizations.py +7 -8
- cartography/intel/aws/permission_relationships.py +4 -16
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/azure/__init__.py +16 -0
- cartography/intel/azure/app_service.py +105 -0
- cartography/intel/azure/functions.py +124 -0
- cartography/intel/entra/__init__.py +31 -0
- cartography/intel/entra/app_role_assignments.py +277 -0
- cartography/intel/entra/applications.py +4 -238
- cartography/intel/entra/federation/__init__.py +0 -0
- cartography/intel/entra/federation/aws_identity_center.py +77 -0
- cartography/intel/entra/service_principals.py +217 -0
- cartography/intel/gcp/__init__.py +136 -436
- cartography/intel/gcp/clients.py +65 -0
- cartography/intel/gcp/compute.py +18 -44
- cartography/intel/gcp/crm/__init__.py +0 -0
- cartography/intel/gcp/crm/folders.py +108 -0
- cartography/intel/gcp/crm/orgs.py +65 -0
- cartography/intel/gcp/crm/projects.py +109 -0
- cartography/intel/gcp/dns.py +82 -169
- cartography/intel/gcp/gke.py +72 -113
- cartography/intel/gcp/iam.py +66 -54
- cartography/intel/gcp/storage.py +75 -159
- cartography/intel/github/__init__.py +41 -0
- cartography/intel/github/commits.py +423 -0
- cartography/intel/github/repos.py +73 -39
- cartography/models/aws/apigatewayv2/__init__.py +0 -0
- cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
- cartography/models/aws/iam/access_key.py +103 -0
- cartography/models/aws/iam/account_role.py +24 -0
- cartography/models/aws/iam/federated_principal.py +60 -0
- cartography/models/aws/iam/group.py +60 -0
- cartography/models/aws/iam/group_membership.py +26 -0
- cartography/models/aws/iam/inline_policy.py +78 -0
- cartography/models/aws/iam/managed_policy.py +51 -0
- cartography/models/aws/iam/policy_statement.py +57 -0
- cartography/models/aws/iam/role.py +83 -0
- cartography/models/aws/iam/root_principal.py +52 -0
- cartography/models/aws/iam/service_principal.py +30 -0
- cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
- cartography/models/aws/iam/user.py +54 -0
- cartography/models/azure/__init__.py +0 -0
- cartography/models/azure/app_service.py +59 -0
- cartography/models/azure/function_app.py +59 -0
- cartography/models/entra/entra_user_to_aws_sso.py +41 -0
- cartography/models/entra/service_principal.py +104 -0
- cartography/models/gcp/compute/subnet.py +74 -0
- cartography/models/gcp/crm/__init__.py +0 -0
- cartography/models/gcp/crm/folders.py +98 -0
- cartography/models/gcp/crm/organizations.py +21 -0
- cartography/models/gcp/crm/projects.py +100 -0
- cartography/models/gcp/dns.py +109 -0
- cartography/models/gcp/gke.py +69 -0
- cartography/models/gcp/iam.py +3 -0
- cartography/models/gcp/storage/__init__.py +0 -0
- cartography/models/gcp/storage/bucket.py +119 -0
- cartography/models/github/commits.py +63 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
- cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
- cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
- cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
- cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
- cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
- cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
- cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
- cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
- cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
- cartography/intel/gcp/crm.py +0 -355
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
cartography/intel/gcp/iam.py
CHANGED
|
@@ -90,6 +90,29 @@ def get_gcp_roles(iam_client: Resource, project_id: str) -> List[Dict]:
|
|
|
90
90
|
return []
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def transform_gcp_service_accounts(
|
|
94
|
+
raw_accounts: List[Dict[str, Any]],
|
|
95
|
+
project_id: str,
|
|
96
|
+
) -> List[Dict[str, Any]]:
|
|
97
|
+
"""
|
|
98
|
+
Transform raw GCP service accounts into loader-friendly dicts.
|
|
99
|
+
"""
|
|
100
|
+
result: List[Dict[str, Any]] = []
|
|
101
|
+
for sa in raw_accounts:
|
|
102
|
+
result.append(
|
|
103
|
+
{
|
|
104
|
+
"id": sa["uniqueId"],
|
|
105
|
+
"email": sa.get("email"),
|
|
106
|
+
"displayName": sa.get("displayName"),
|
|
107
|
+
"oauth2ClientId": sa.get("oauth2ClientId"),
|
|
108
|
+
"uniqueId": sa.get("uniqueId"),
|
|
109
|
+
"disabled": sa.get("disabled", False),
|
|
110
|
+
"projectId": project_id,
|
|
111
|
+
},
|
|
112
|
+
)
|
|
113
|
+
return result
|
|
114
|
+
|
|
115
|
+
|
|
93
116
|
@timeit
|
|
94
117
|
def load_gcp_service_accounts(
|
|
95
118
|
neo4j_session: neo4j.Session,
|
|
@@ -99,38 +122,55 @@ def load_gcp_service_accounts(
|
|
|
99
122
|
) -> None:
|
|
100
123
|
"""
|
|
101
124
|
Load GCP service account data into Neo4j.
|
|
102
|
-
|
|
103
|
-
:param neo4j_session: The Neo4j session.
|
|
104
|
-
:param service_accounts: A list of service account data to load.
|
|
105
|
-
:param project_id: The GCP Project ID associated with the service accounts.
|
|
106
|
-
:param gcp_update_tag: The timestamp of the current sync run.
|
|
107
125
|
"""
|
|
108
126
|
logger.debug(
|
|
109
127
|
f"Loading {len(service_accounts)} service accounts for project {project_id}"
|
|
110
128
|
)
|
|
111
|
-
transformed_service_accounts = []
|
|
112
|
-
for sa in service_accounts:
|
|
113
|
-
transformed_sa = {
|
|
114
|
-
"id": sa["uniqueId"],
|
|
115
|
-
"email": sa.get("email"),
|
|
116
|
-
"displayName": sa.get("displayName"),
|
|
117
|
-
"oauth2ClientId": sa.get("oauth2ClientId"),
|
|
118
|
-
"uniqueId": sa.get("uniqueId"),
|
|
119
|
-
"disabled": sa.get("disabled", False),
|
|
120
|
-
"projectId": project_id,
|
|
121
|
-
}
|
|
122
|
-
transformed_service_accounts.append(transformed_sa)
|
|
123
129
|
|
|
124
130
|
load(
|
|
125
131
|
neo4j_session,
|
|
126
132
|
GCPServiceAccountSchema(),
|
|
127
|
-
|
|
133
|
+
service_accounts,
|
|
128
134
|
lastupdated=gcp_update_tag,
|
|
129
135
|
projectId=project_id,
|
|
130
|
-
additional_labels=["GCPPrincipal"],
|
|
131
136
|
)
|
|
132
137
|
|
|
133
138
|
|
|
139
|
+
def transform_gcp_roles(
|
|
140
|
+
raw_roles: List[Dict[str, Any]],
|
|
141
|
+
project_id: str,
|
|
142
|
+
) -> List[Dict[str, Any]]:
|
|
143
|
+
"""
|
|
144
|
+
Transform raw GCP roles into loader-friendly dicts.
|
|
145
|
+
"""
|
|
146
|
+
result: List[Dict[str, Any]] = []
|
|
147
|
+
for role in raw_roles:
|
|
148
|
+
role_name = role["name"]
|
|
149
|
+
if role_name.startswith("roles/"):
|
|
150
|
+
role_type = (
|
|
151
|
+
"BASIC"
|
|
152
|
+
if role_name in ["roles/owner", "roles/editor", "roles/viewer"]
|
|
153
|
+
else "PREDEFINED"
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
role_type = "CUSTOM"
|
|
157
|
+
|
|
158
|
+
result.append(
|
|
159
|
+
{
|
|
160
|
+
"id": role_name,
|
|
161
|
+
"name": role_name,
|
|
162
|
+
"title": role.get("title"),
|
|
163
|
+
"description": role.get("description"),
|
|
164
|
+
"deleted": role.get("deleted", False),
|
|
165
|
+
"etag": role.get("etag"),
|
|
166
|
+
"includedPermissions": role.get("includedPermissions", []),
|
|
167
|
+
"roleType": role_type,
|
|
168
|
+
"projectId": project_id,
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
return result
|
|
172
|
+
|
|
173
|
+
|
|
134
174
|
@timeit
|
|
135
175
|
def load_gcp_roles(
|
|
136
176
|
neo4j_session: neo4j.Session,
|
|
@@ -140,41 +180,13 @@ def load_gcp_roles(
|
|
|
140
180
|
) -> None:
|
|
141
181
|
"""
|
|
142
182
|
Load GCP role data into Neo4j.
|
|
143
|
-
|
|
144
|
-
:param neo4j_session: The Neo4j session.
|
|
145
|
-
:param roles: A list of role data to load.
|
|
146
|
-
:param project_id: The GCP Project ID associated with the roles.
|
|
147
|
-
:param gcp_update_tag: The timestamp of the current sync run.
|
|
148
183
|
"""
|
|
149
184
|
logger.debug(f"Loading {len(roles)} roles for project {project_id}")
|
|
150
|
-
transformed_roles = []
|
|
151
|
-
for role in roles:
|
|
152
|
-
role_name = role["name"]
|
|
153
|
-
if role_name.startswith("roles/"):
|
|
154
|
-
if role_name in ["roles/owner", "roles/editor", "roles/viewer"]:
|
|
155
|
-
role_type = "BASIC"
|
|
156
|
-
else:
|
|
157
|
-
role_type = "PREDEFINED"
|
|
158
|
-
else:
|
|
159
|
-
role_type = "CUSTOM"
|
|
160
|
-
|
|
161
|
-
transformed_role = {
|
|
162
|
-
"id": role_name,
|
|
163
|
-
"name": role_name,
|
|
164
|
-
"title": role.get("title"),
|
|
165
|
-
"description": role.get("description"),
|
|
166
|
-
"deleted": role.get("deleted", False),
|
|
167
|
-
"etag": role.get("etag"),
|
|
168
|
-
"includedPermissions": role.get("includedPermissions", []),
|
|
169
|
-
"roleType": role_type,
|
|
170
|
-
"projectId": project_id,
|
|
171
|
-
}
|
|
172
|
-
transformed_roles.append(transformed_role)
|
|
173
185
|
|
|
174
186
|
load(
|
|
175
187
|
neo4j_session,
|
|
176
188
|
GCPRoleSchema(),
|
|
177
|
-
|
|
189
|
+
roles,
|
|
178
190
|
lastupdated=gcp_update_tag,
|
|
179
191
|
projectId=project_id,
|
|
180
192
|
)
|
|
@@ -224,18 +236,18 @@ def sync(
|
|
|
224
236
|
"""
|
|
225
237
|
logger.info(f"Syncing GCP IAM for project {project_id}")
|
|
226
238
|
|
|
227
|
-
|
|
228
|
-
service_accounts = get_gcp_service_accounts(iam_client, project_id)
|
|
239
|
+
service_accounts_raw = get_gcp_service_accounts(iam_client, project_id)
|
|
229
240
|
logger.info(
|
|
230
|
-
f"Found {len(
|
|
241
|
+
f"Found {len(service_accounts_raw)} service accounts in project {project_id}"
|
|
231
242
|
)
|
|
243
|
+
service_accounts = transform_gcp_service_accounts(service_accounts_raw, project_id)
|
|
232
244
|
load_gcp_service_accounts(
|
|
233
245
|
neo4j_session, service_accounts, project_id, gcp_update_tag
|
|
234
246
|
)
|
|
235
247
|
|
|
236
|
-
|
|
237
|
-
roles
|
|
238
|
-
|
|
248
|
+
roles_raw = get_gcp_roles(iam_client, project_id)
|
|
249
|
+
logger.info(f"Found {len(roles_raw)} roles in project {project_id}")
|
|
250
|
+
roles = transform_gcp_roles(roles_raw, project_id)
|
|
239
251
|
load_gcp_roles(neo4j_session, roles, project_id, gcp_update_tag)
|
|
240
252
|
|
|
241
253
|
# Run cleanup
|
cartography/intel/gcp/storage.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict
|
|
3
3
|
from typing import List
|
|
4
|
+
from typing import Tuple
|
|
4
5
|
|
|
5
6
|
import neo4j
|
|
6
7
|
from googleapiclient.discovery import HttpError
|
|
7
8
|
from googleapiclient.discovery import Resource
|
|
8
9
|
|
|
10
|
+
from cartography.client.core.tx import load
|
|
11
|
+
from cartography.graph.job import GraphJob
|
|
9
12
|
from cartography.intel.gcp import compute
|
|
10
|
-
from cartography.
|
|
13
|
+
from cartography.models.gcp.storage.bucket import GCPBucketLabelSchema
|
|
14
|
+
from cartography.models.gcp.storage.bucket import GCPBucketSchema
|
|
11
15
|
from cartography.util import timeit
|
|
12
16
|
|
|
13
17
|
logger = logging.getLogger(__name__)
|
|
@@ -58,165 +62,85 @@ def get_gcp_buckets(storage: Resource, project_id: str) -> Dict:
|
|
|
58
62
|
|
|
59
63
|
|
|
60
64
|
@timeit
|
|
61
|
-
def
|
|
65
|
+
def transform_gcp_buckets_and_labels(bucket_res: Dict) -> Tuple[List[Dict], List[Dict]]:
|
|
62
66
|
"""
|
|
63
|
-
Transform the GCP Storage Bucket response object for Neo4j ingestion
|
|
67
|
+
Transform the GCP Storage Bucket response object for Neo4j ingestion.
|
|
64
68
|
|
|
65
|
-
:
|
|
66
|
-
:
|
|
67
|
-
|
|
68
|
-
:rtype: list
|
|
69
|
-
:return: List of buckets ready for ingestion to Neo4j
|
|
69
|
+
:param bucket_res: The raw GCP bucket response.
|
|
70
|
+
:return: A tuple of (buckets, bucket_labels) ready for ingestion to Neo4j.
|
|
70
71
|
"""
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
buckets: List[Dict] = []
|
|
74
|
+
labels: List[Dict] = []
|
|
73
75
|
for b in bucket_res.get("items", []):
|
|
74
|
-
bucket = {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
76
|
+
bucket = {
|
|
77
|
+
"iam_config_bucket_policy_only": (
|
|
78
|
+
b.get("iamConfiguration", {}).get("bucketPolicyOnly", {}).get("enabled")
|
|
79
|
+
),
|
|
80
|
+
"id": b["id"],
|
|
81
|
+
# Preserve legacy bucket_id field for compatibility
|
|
82
|
+
"bucket_id": b["id"],
|
|
83
|
+
"owner_entity": b.get("owner", {}).get("entity"),
|
|
84
|
+
"owner_entity_id": b.get("owner", {}).get("entityId"),
|
|
85
|
+
"kind": b.get("kind"),
|
|
86
|
+
"location": b.get("location"),
|
|
87
|
+
"location_type": b.get("locationType"),
|
|
88
|
+
"meta_generation": b.get("metageneration"),
|
|
89
|
+
"project_number": b.get("projectNumber"),
|
|
90
|
+
"self_link": b.get("selfLink"),
|
|
91
|
+
"storage_class": b.get("storageClass"),
|
|
92
|
+
"time_created": b.get("timeCreated"),
|
|
93
|
+
"versioning_enabled": b.get("versioning", {}).get("enabled"),
|
|
94
|
+
"retention_period": b.get("retentionPolicy", {}).get("retentionPeriod"),
|
|
95
|
+
"default_kms_key_name": b.get("encryption", {}).get("defaultKmsKeyName"),
|
|
96
|
+
"log_bucket": b.get("logging", {}).get("logBucket"),
|
|
97
|
+
"requester_pays": b.get("billing", {}).get("requesterPays"),
|
|
98
|
+
}
|
|
99
|
+
buckets.append(bucket)
|
|
100
|
+
for key, val in b.get("labels", {}).items():
|
|
101
|
+
labels.append(
|
|
102
|
+
{
|
|
103
|
+
"id": f"GCPBucket_{key}",
|
|
104
|
+
"key": key,
|
|
105
|
+
"value": val,
|
|
106
|
+
"bucket_id": b["id"],
|
|
107
|
+
}
|
|
108
|
+
)
|
|
109
|
+
return buckets, labels
|
|
107
110
|
|
|
108
111
|
|
|
109
112
|
@timeit
|
|
110
113
|
def load_gcp_buckets(
|
|
111
114
|
neo4j_session: neo4j.Session,
|
|
112
115
|
buckets: List[Dict],
|
|
116
|
+
project_id: str,
|
|
113
117
|
gcp_update_tag: int,
|
|
114
118
|
) -> None:
|
|
115
|
-
"""
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
:type gcp_update_tag: timestamp
|
|
125
|
-
:param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
|
|
126
|
-
|
|
127
|
-
:rtype: NoneType
|
|
128
|
-
:return: Nothing
|
|
129
|
-
"""
|
|
130
|
-
|
|
131
|
-
query = """
|
|
132
|
-
MERGE(p:GCPProject{projectnumber:$ProjectNumber})
|
|
133
|
-
ON CREATE SET p.firstseen = timestamp()
|
|
134
|
-
SET p.lastupdated = $gcp_update_tag
|
|
135
|
-
|
|
136
|
-
MERGE(bucket:GCPBucket{id:$BucketId})
|
|
137
|
-
ON CREATE SET bucket.firstseen = timestamp(),
|
|
138
|
-
bucket.bucket_id = $BucketId
|
|
139
|
-
SET bucket.self_link = $SelfLink,
|
|
140
|
-
bucket.project_number = $ProjectNumber,
|
|
141
|
-
bucket.kind = $Kind,
|
|
142
|
-
bucket.location = $Location,
|
|
143
|
-
bucket.location_type = $LocationType,
|
|
144
|
-
bucket.meta_generation = $MetaGeneration,
|
|
145
|
-
bucket.storage_class = $StorageClass,
|
|
146
|
-
bucket.time_created = $TimeCreated,
|
|
147
|
-
bucket.retention_period = $RetentionPeriod,
|
|
148
|
-
bucket.iam_config_bucket_policy_only = $IamConfigBucketPolicyOnly,
|
|
149
|
-
bucket.owner_entity = $OwnerEntity,
|
|
150
|
-
bucket.owner_entity_id = $OwnerEntityId,
|
|
151
|
-
bucket.lastupdated = $gcp_update_tag,
|
|
152
|
-
bucket.versioning_enabled = $VersioningEnabled,
|
|
153
|
-
bucket.log_bucket = $LogBucket,
|
|
154
|
-
bucket.requester_pays = $RequesterPays,
|
|
155
|
-
bucket.default_kms_key_name = $DefaultKmsKeyName
|
|
156
|
-
|
|
157
|
-
MERGE (p)-[r:RESOURCE]->(bucket)
|
|
158
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
159
|
-
SET r.lastupdated = $gcp_update_tag
|
|
160
|
-
"""
|
|
161
|
-
for bucket in buckets:
|
|
162
|
-
neo4j_session.run(
|
|
163
|
-
query,
|
|
164
|
-
ProjectNumber=bucket["project_number"],
|
|
165
|
-
BucketId=bucket["id"],
|
|
166
|
-
SelfLink=bucket["self_link"],
|
|
167
|
-
Kind=bucket["kind"],
|
|
168
|
-
Location=bucket["location"],
|
|
169
|
-
LocationType=bucket["location_type"],
|
|
170
|
-
MetaGeneration=bucket["meta_generation"],
|
|
171
|
-
StorageClass=bucket["storage_class"],
|
|
172
|
-
TimeCreated=bucket["time_created"],
|
|
173
|
-
RetentionPeriod=bucket["retention_period"],
|
|
174
|
-
IamConfigBucketPolicyOnly=bucket["iam_config_bucket_policy_only"],
|
|
175
|
-
OwnerEntity=bucket["owner_entity"],
|
|
176
|
-
OwnerEntityId=bucket["owner_entity_id"],
|
|
177
|
-
VersioningEnabled=bucket["versioning_enabled"],
|
|
178
|
-
LogBucket=bucket["log_bucket"],
|
|
179
|
-
RequesterPays=bucket["requester_pays"],
|
|
180
|
-
DefaultKmsKeyName=bucket["default_kms_key_name"],
|
|
181
|
-
gcp_update_tag=gcp_update_tag,
|
|
182
|
-
)
|
|
183
|
-
_attach_gcp_bucket_labels(neo4j_session, bucket, gcp_update_tag)
|
|
119
|
+
"""Ingest GCP Storage Buckets to Neo4j."""
|
|
120
|
+
load(
|
|
121
|
+
neo4j_session,
|
|
122
|
+
GCPBucketSchema(),
|
|
123
|
+
buckets,
|
|
124
|
+
lastupdated=gcp_update_tag,
|
|
125
|
+
PROJECT_ID=project_id,
|
|
126
|
+
)
|
|
184
127
|
|
|
185
128
|
|
|
186
129
|
@timeit
|
|
187
|
-
def
|
|
130
|
+
def load_gcp_bucket_labels(
|
|
188
131
|
neo4j_session: neo4j.Session,
|
|
189
|
-
|
|
132
|
+
bucket_labels: List[Dict],
|
|
133
|
+
project_id: str,
|
|
190
134
|
gcp_update_tag: int,
|
|
191
135
|
) -> None:
|
|
192
|
-
"""
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
MERGE (l:Label:GCPBucketLabel{id: $BucketLabelId})
|
|
201
|
-
ON CREATE SET l.firstseen = timestamp(),
|
|
202
|
-
l.key = $Key
|
|
203
|
-
SET l.value = $Value,
|
|
204
|
-
l.lastupdated = $gcp_update_tag
|
|
205
|
-
WITH l
|
|
206
|
-
MATCH (bucket:GCPBucket{id:$BucketId})
|
|
207
|
-
MERGE (l)<-[r:LABELED]-(bucket)
|
|
208
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
209
|
-
SET r.lastupdated = $gcp_update_tag
|
|
210
|
-
"""
|
|
211
|
-
for key, val in bucket.get("labels", []):
|
|
212
|
-
neo4j_session.run(
|
|
213
|
-
query,
|
|
214
|
-
BucketLabelId=f"GCPBucket_{key}",
|
|
215
|
-
Key=key,
|
|
216
|
-
Value=val,
|
|
217
|
-
BucketId=bucket["id"],
|
|
218
|
-
gcp_update_tag=gcp_update_tag,
|
|
219
|
-
)
|
|
136
|
+
"""Ingest GCP Storage Bucket labels and attach them to buckets."""
|
|
137
|
+
load(
|
|
138
|
+
neo4j_session,
|
|
139
|
+
GCPBucketLabelSchema(),
|
|
140
|
+
bucket_labels,
|
|
141
|
+
lastupdated=gcp_update_tag,
|
|
142
|
+
PROJECT_ID=project_id,
|
|
143
|
+
)
|
|
220
144
|
|
|
221
145
|
|
|
222
146
|
@timeit
|
|
@@ -224,22 +148,14 @@ def cleanup_gcp_buckets(
|
|
|
224
148
|
neo4j_session: neo4j.Session,
|
|
225
149
|
common_job_parameters: Dict,
|
|
226
150
|
) -> None:
|
|
227
|
-
"""
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
:param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
|
|
235
|
-
|
|
236
|
-
:rtype: NoneType
|
|
237
|
-
:return: Nothing
|
|
238
|
-
"""
|
|
239
|
-
run_cleanup_job(
|
|
240
|
-
"gcp_storage_bucket_cleanup.json",
|
|
151
|
+
"""Delete out-of-date GCP Storage Bucket nodes and relationships."""
|
|
152
|
+
# Bucket labels depend on buckets, so we must remove labels first to avoid
|
|
153
|
+
# dangling references before deleting the buckets themselves.
|
|
154
|
+
GraphJob.from_node_schema(GCPBucketLabelSchema(), common_job_parameters).run(
|
|
155
|
+
neo4j_session,
|
|
156
|
+
)
|
|
157
|
+
GraphJob.from_node_schema(GCPBucketSchema(), common_job_parameters).run(
|
|
241
158
|
neo4j_session,
|
|
242
|
-
common_job_parameters,
|
|
243
159
|
)
|
|
244
160
|
|
|
245
161
|
|
|
@@ -274,7 +190,7 @@ def sync_gcp_buckets(
|
|
|
274
190
|
"""
|
|
275
191
|
logger.info("Syncing Storage objects for project %s.", project_id)
|
|
276
192
|
storage_res = get_gcp_buckets(storage, project_id)
|
|
277
|
-
|
|
278
|
-
load_gcp_buckets(neo4j_session,
|
|
279
|
-
|
|
193
|
+
buckets, bucket_labels = transform_gcp_buckets_and_labels(storage_res)
|
|
194
|
+
load_gcp_buckets(neo4j_session, buckets, project_id, gcp_update_tag)
|
|
195
|
+
load_gcp_bucket_labels(neo4j_session, bucket_labels, project_id, gcp_update_tag)
|
|
280
196
|
cleanup_gcp_buckets(neo4j_session, common_job_parameters)
|
|
@@ -1,18 +1,45 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
+
from typing import cast
|
|
4
5
|
|
|
5
6
|
import neo4j
|
|
6
7
|
|
|
8
|
+
import cartography.intel.github.commits
|
|
7
9
|
import cartography.intel.github.repos
|
|
8
10
|
import cartography.intel.github.teams
|
|
9
11
|
import cartography.intel.github.users
|
|
12
|
+
from cartography.client.core.tx import read_list_of_values_tx
|
|
10
13
|
from cartography.config import Config
|
|
11
14
|
from cartography.util import timeit
|
|
12
15
|
|
|
13
16
|
logger = logging.getLogger(__name__)
|
|
14
17
|
|
|
15
18
|
|
|
19
|
+
def _get_repos_from_graph(neo4j_session: neo4j.Session, organization: str) -> list[str]:
|
|
20
|
+
"""
|
|
21
|
+
Get repository names for an organization from the graph instead of making an API call.
|
|
22
|
+
|
|
23
|
+
:param neo4j_session: Neo4j session for database interface
|
|
24
|
+
:param organization: GitHub organization name
|
|
25
|
+
:return: List of repository names
|
|
26
|
+
"""
|
|
27
|
+
org_url = f"https://github.com/{organization}"
|
|
28
|
+
query = """
|
|
29
|
+
MATCH (org:GitHubOrganization {id: $org_url})<-[:OWNER]-(repo:GitHubRepository)
|
|
30
|
+
RETURN repo.name
|
|
31
|
+
ORDER BY repo.name
|
|
32
|
+
"""
|
|
33
|
+
return cast(
|
|
34
|
+
list[str],
|
|
35
|
+
neo4j_session.execute_read(
|
|
36
|
+
read_list_of_values_tx,
|
|
37
|
+
query,
|
|
38
|
+
org_url=org_url,
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
16
43
|
@timeit
|
|
17
44
|
def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
18
45
|
"""
|
|
@@ -54,3 +81,17 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
|
|
|
54
81
|
auth_data["url"],
|
|
55
82
|
auth_data["name"],
|
|
56
83
|
)
|
|
84
|
+
|
|
85
|
+
# Sync commit relationships for the configured lookback period
|
|
86
|
+
# Get repo names from the graph instead of making another API call
|
|
87
|
+
repo_names = _get_repos_from_graph(neo4j_session, auth_data["name"])
|
|
88
|
+
|
|
89
|
+
cartography.intel.github.commits.sync_github_commits(
|
|
90
|
+
neo4j_session,
|
|
91
|
+
auth_data["token"],
|
|
92
|
+
auth_data["url"],
|
|
93
|
+
auth_data["name"],
|
|
94
|
+
repo_names,
|
|
95
|
+
common_job_parameters["UPDATE_TAG"],
|
|
96
|
+
config.github_commit_lookback_days,
|
|
97
|
+
)
|