cartography 0.112.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (82) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -31
  5. cartography/intel/aws/apigatewayv2.py +116 -0
  6. cartography/intel/aws/iam.py +741 -492
  7. cartography/intel/aws/organizations.py +7 -8
  8. cartography/intel/aws/permission_relationships.py +4 -16
  9. cartography/intel/aws/resources.py +2 -0
  10. cartography/intel/azure/__init__.py +16 -0
  11. cartography/intel/azure/app_service.py +105 -0
  12. cartography/intel/azure/functions.py +124 -0
  13. cartography/intel/entra/__init__.py +31 -0
  14. cartography/intel/entra/app_role_assignments.py +277 -0
  15. cartography/intel/entra/applications.py +4 -238
  16. cartography/intel/entra/federation/__init__.py +0 -0
  17. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  18. cartography/intel/entra/service_principals.py +217 -0
  19. cartography/intel/gcp/__init__.py +136 -436
  20. cartography/intel/gcp/clients.py +65 -0
  21. cartography/intel/gcp/compute.py +18 -44
  22. cartography/intel/gcp/crm/__init__.py +0 -0
  23. cartography/intel/gcp/crm/folders.py +108 -0
  24. cartography/intel/gcp/crm/orgs.py +65 -0
  25. cartography/intel/gcp/crm/projects.py +109 -0
  26. cartography/intel/gcp/dns.py +82 -169
  27. cartography/intel/gcp/gke.py +72 -113
  28. cartography/intel/gcp/iam.py +66 -54
  29. cartography/intel/gcp/storage.py +75 -159
  30. cartography/intel/github/__init__.py +41 -0
  31. cartography/intel/github/commits.py +423 -0
  32. cartography/intel/github/repos.py +73 -39
  33. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  34. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  35. cartography/models/aws/iam/access_key.py +103 -0
  36. cartography/models/aws/iam/account_role.py +24 -0
  37. cartography/models/aws/iam/federated_principal.py +60 -0
  38. cartography/models/aws/iam/group.py +60 -0
  39. cartography/models/aws/iam/group_membership.py +26 -0
  40. cartography/models/aws/iam/inline_policy.py +78 -0
  41. cartography/models/aws/iam/managed_policy.py +51 -0
  42. cartography/models/aws/iam/policy_statement.py +57 -0
  43. cartography/models/aws/iam/role.py +83 -0
  44. cartography/models/aws/iam/root_principal.py +52 -0
  45. cartography/models/aws/iam/service_principal.py +30 -0
  46. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  47. cartography/models/aws/iam/user.py +54 -0
  48. cartography/models/azure/__init__.py +0 -0
  49. cartography/models/azure/app_service.py +59 -0
  50. cartography/models/azure/function_app.py +59 -0
  51. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  52. cartography/models/entra/service_principal.py +104 -0
  53. cartography/models/gcp/compute/subnet.py +74 -0
  54. cartography/models/gcp/crm/__init__.py +0 -0
  55. cartography/models/gcp/crm/folders.py +98 -0
  56. cartography/models/gcp/crm/organizations.py +21 -0
  57. cartography/models/gcp/crm/projects.py +100 -0
  58. cartography/models/gcp/dns.py +109 -0
  59. cartography/models/gcp/gke.py +69 -0
  60. cartography/models/gcp/iam.py +3 -0
  61. cartography/models/gcp/storage/__init__.py +0 -0
  62. cartography/models/gcp/storage/bucket.py +119 -0
  63. cartography/models/github/commits.py +63 -0
  64. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  65. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/RECORD +69 -39
  66. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  67. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  68. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  69. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  70. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  71. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  72. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  73. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  74. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  75. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  76. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  77. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  78. cartography/intel/gcp/crm.py +0 -355
  79. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  80. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  81. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  82. {cartography-0.112.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -90,6 +90,29 @@ def get_gcp_roles(iam_client: Resource, project_id: str) -> List[Dict]:
90
90
  return []
91
91
 
92
92
 
93
+ def transform_gcp_service_accounts(
94
+ raw_accounts: List[Dict[str, Any]],
95
+ project_id: str,
96
+ ) -> List[Dict[str, Any]]:
97
+ """
98
+ Transform raw GCP service accounts into loader-friendly dicts.
99
+ """
100
+ result: List[Dict[str, Any]] = []
101
+ for sa in raw_accounts:
102
+ result.append(
103
+ {
104
+ "id": sa["uniqueId"],
105
+ "email": sa.get("email"),
106
+ "displayName": sa.get("displayName"),
107
+ "oauth2ClientId": sa.get("oauth2ClientId"),
108
+ "uniqueId": sa.get("uniqueId"),
109
+ "disabled": sa.get("disabled", False),
110
+ "projectId": project_id,
111
+ },
112
+ )
113
+ return result
114
+
115
+
93
116
  @timeit
94
117
  def load_gcp_service_accounts(
95
118
  neo4j_session: neo4j.Session,
@@ -99,38 +122,55 @@ def load_gcp_service_accounts(
99
122
  ) -> None:
100
123
  """
101
124
  Load GCP service account data into Neo4j.
102
-
103
- :param neo4j_session: The Neo4j session.
104
- :param service_accounts: A list of service account data to load.
105
- :param project_id: The GCP Project ID associated with the service accounts.
106
- :param gcp_update_tag: The timestamp of the current sync run.
107
125
  """
108
126
  logger.debug(
109
127
  f"Loading {len(service_accounts)} service accounts for project {project_id}"
110
128
  )
111
- transformed_service_accounts = []
112
- for sa in service_accounts:
113
- transformed_sa = {
114
- "id": sa["uniqueId"],
115
- "email": sa.get("email"),
116
- "displayName": sa.get("displayName"),
117
- "oauth2ClientId": sa.get("oauth2ClientId"),
118
- "uniqueId": sa.get("uniqueId"),
119
- "disabled": sa.get("disabled", False),
120
- "projectId": project_id,
121
- }
122
- transformed_service_accounts.append(transformed_sa)
123
129
 
124
130
  load(
125
131
  neo4j_session,
126
132
  GCPServiceAccountSchema(),
127
- transformed_service_accounts,
133
+ service_accounts,
128
134
  lastupdated=gcp_update_tag,
129
135
  projectId=project_id,
130
- additional_labels=["GCPPrincipal"],
131
136
  )
132
137
 
133
138
 
139
+ def transform_gcp_roles(
140
+ raw_roles: List[Dict[str, Any]],
141
+ project_id: str,
142
+ ) -> List[Dict[str, Any]]:
143
+ """
144
+ Transform raw GCP roles into loader-friendly dicts.
145
+ """
146
+ result: List[Dict[str, Any]] = []
147
+ for role in raw_roles:
148
+ role_name = role["name"]
149
+ if role_name.startswith("roles/"):
150
+ role_type = (
151
+ "BASIC"
152
+ if role_name in ["roles/owner", "roles/editor", "roles/viewer"]
153
+ else "PREDEFINED"
154
+ )
155
+ else:
156
+ role_type = "CUSTOM"
157
+
158
+ result.append(
159
+ {
160
+ "id": role_name,
161
+ "name": role_name,
162
+ "title": role.get("title"),
163
+ "description": role.get("description"),
164
+ "deleted": role.get("deleted", False),
165
+ "etag": role.get("etag"),
166
+ "includedPermissions": role.get("includedPermissions", []),
167
+ "roleType": role_type,
168
+ "projectId": project_id,
169
+ },
170
+ )
171
+ return result
172
+
173
+
134
174
  @timeit
135
175
  def load_gcp_roles(
136
176
  neo4j_session: neo4j.Session,
@@ -140,41 +180,13 @@ def load_gcp_roles(
140
180
  ) -> None:
141
181
  """
142
182
  Load GCP role data into Neo4j.
143
-
144
- :param neo4j_session: The Neo4j session.
145
- :param roles: A list of role data to load.
146
- :param project_id: The GCP Project ID associated with the roles.
147
- :param gcp_update_tag: The timestamp of the current sync run.
148
183
  """
149
184
  logger.debug(f"Loading {len(roles)} roles for project {project_id}")
150
- transformed_roles = []
151
- for role in roles:
152
- role_name = role["name"]
153
- if role_name.startswith("roles/"):
154
- if role_name in ["roles/owner", "roles/editor", "roles/viewer"]:
155
- role_type = "BASIC"
156
- else:
157
- role_type = "PREDEFINED"
158
- else:
159
- role_type = "CUSTOM"
160
-
161
- transformed_role = {
162
- "id": role_name,
163
- "name": role_name,
164
- "title": role.get("title"),
165
- "description": role.get("description"),
166
- "deleted": role.get("deleted", False),
167
- "etag": role.get("etag"),
168
- "includedPermissions": role.get("includedPermissions", []),
169
- "roleType": role_type,
170
- "projectId": project_id,
171
- }
172
- transformed_roles.append(transformed_role)
173
185
 
174
186
  load(
175
187
  neo4j_session,
176
188
  GCPRoleSchema(),
177
- transformed_roles,
189
+ roles,
178
190
  lastupdated=gcp_update_tag,
179
191
  projectId=project_id,
180
192
  )
@@ -224,18 +236,18 @@ def sync(
224
236
  """
225
237
  logger.info(f"Syncing GCP IAM for project {project_id}")
226
238
 
227
- # Get and load service accounts
228
- service_accounts = get_gcp_service_accounts(iam_client, project_id)
239
+ service_accounts_raw = get_gcp_service_accounts(iam_client, project_id)
229
240
  logger.info(
230
- f"Found {len(service_accounts)} service accounts in project {project_id}"
241
+ f"Found {len(service_accounts_raw)} service accounts in project {project_id}"
231
242
  )
243
+ service_accounts = transform_gcp_service_accounts(service_accounts_raw, project_id)
232
244
  load_gcp_service_accounts(
233
245
  neo4j_session, service_accounts, project_id, gcp_update_tag
234
246
  )
235
247
 
236
- # Get and load roles
237
- roles = get_gcp_roles(iam_client, project_id)
238
- logger.info(f"Found {len(roles)} roles in project {project_id}")
248
+ roles_raw = get_gcp_roles(iam_client, project_id)
249
+ logger.info(f"Found {len(roles_raw)} roles in project {project_id}")
250
+ roles = transform_gcp_roles(roles_raw, project_id)
239
251
  load_gcp_roles(neo4j_session, roles, project_id, gcp_update_tag)
240
252
 
241
253
  # Run cleanup
@@ -1,13 +1,17 @@
1
1
  import logging
2
2
  from typing import Dict
3
3
  from typing import List
4
+ from typing import Tuple
4
5
 
5
6
  import neo4j
6
7
  from googleapiclient.discovery import HttpError
7
8
  from googleapiclient.discovery import Resource
8
9
 
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
9
12
  from cartography.intel.gcp import compute
10
- from cartography.util import run_cleanup_job
13
+ from cartography.models.gcp.storage.bucket import GCPBucketLabelSchema
14
+ from cartography.models.gcp.storage.bucket import GCPBucketSchema
11
15
  from cartography.util import timeit
12
16
 
13
17
  logger = logging.getLogger(__name__)
@@ -58,165 +62,85 @@ def get_gcp_buckets(storage: Resource, project_id: str) -> Dict:
58
62
 
59
63
 
60
64
  @timeit
61
- def transform_gcp_buckets(bucket_res: Dict) -> List[Dict]:
65
+ def transform_gcp_buckets_and_labels(bucket_res: Dict) -> Tuple[List[Dict], List[Dict]]:
62
66
  """
63
- Transform the GCP Storage Bucket response object for Neo4j ingestion
67
+ Transform the GCP Storage Bucket response object for Neo4j ingestion.
64
68
 
65
- :type bucket_res: The GCP storage resource object (https://cloud.google.com/storage/docs/json_api/v1/buckets)
66
- :param bucket_res: The return data
67
-
68
- :rtype: list
69
- :return: List of buckets ready for ingestion to Neo4j
69
+ :param bucket_res: The raw GCP bucket response.
70
+ :return: A tuple of (buckets, bucket_labels) ready for ingestion to Neo4j.
70
71
  """
71
72
 
72
- bucket_list = []
73
+ buckets: List[Dict] = []
74
+ labels: List[Dict] = []
73
75
  for b in bucket_res.get("items", []):
74
- bucket = {}
75
- bucket["etag"] = b.get("etag")
76
- bucket["iam_config_bucket_policy_only"] = (
77
- b.get("iamConfiguration", {})
78
- .get("bucketPolicyOnly", {})
79
- .get("enabled", None)
80
- )
81
- bucket["id"] = b["id"]
82
- bucket["labels"] = [(key, val) for (key, val) in b.get("labels", {}).items()]
83
- bucket["owner_entity"] = b.get("owner", {}).get("entity")
84
- bucket["owner_entity_id"] = b.get("owner", {}).get("entityId")
85
- bucket["kind"] = b.get("kind")
86
- bucket["location"] = b.get("location")
87
- bucket["location_type"] = b.get("locationType")
88
- bucket["meta_generation"] = b.get("metageneration", None)
89
- bucket["project_number"] = b["projectNumber"]
90
- bucket["self_link"] = b.get("selfLink")
91
- bucket["storage_class"] = b.get("storageClass")
92
- bucket["time_created"] = b.get("timeCreated")
93
- bucket["updated"] = b.get("updated")
94
- bucket["versioning_enabled"] = b.get("versioning", {}).get("enabled", None)
95
- bucket["default_event_based_hold"] = b.get("defaultEventBasedHold", None)
96
- bucket["retention_period"] = b.get("retentionPolicy", {}).get(
97
- "retentionPeriod",
98
- None,
99
- )
100
- bucket["default_kms_key_name"] = b.get("encryption", {}).get(
101
- "defaultKmsKeyName",
102
- )
103
- bucket["log_bucket"] = b.get("logging", {}).get("logBucket")
104
- bucket["requester_pays"] = b.get("billing", {}).get("requesterPays", None)
105
- bucket_list.append(bucket)
106
- return bucket_list
76
+ bucket = {
77
+ "iam_config_bucket_policy_only": (
78
+ b.get("iamConfiguration", {}).get("bucketPolicyOnly", {}).get("enabled")
79
+ ),
80
+ "id": b["id"],
81
+ # Preserve legacy bucket_id field for compatibility
82
+ "bucket_id": b["id"],
83
+ "owner_entity": b.get("owner", {}).get("entity"),
84
+ "owner_entity_id": b.get("owner", {}).get("entityId"),
85
+ "kind": b.get("kind"),
86
+ "location": b.get("location"),
87
+ "location_type": b.get("locationType"),
88
+ "meta_generation": b.get("metageneration"),
89
+ "project_number": b.get("projectNumber"),
90
+ "self_link": b.get("selfLink"),
91
+ "storage_class": b.get("storageClass"),
92
+ "time_created": b.get("timeCreated"),
93
+ "versioning_enabled": b.get("versioning", {}).get("enabled"),
94
+ "retention_period": b.get("retentionPolicy", {}).get("retentionPeriod"),
95
+ "default_kms_key_name": b.get("encryption", {}).get("defaultKmsKeyName"),
96
+ "log_bucket": b.get("logging", {}).get("logBucket"),
97
+ "requester_pays": b.get("billing", {}).get("requesterPays"),
98
+ }
99
+ buckets.append(bucket)
100
+ for key, val in b.get("labels", {}).items():
101
+ labels.append(
102
+ {
103
+ "id": f"GCPBucket_{key}",
104
+ "key": key,
105
+ "value": val,
106
+ "bucket_id": b["id"],
107
+ }
108
+ )
109
+ return buckets, labels
107
110
 
108
111
 
109
112
  @timeit
110
113
  def load_gcp_buckets(
111
114
  neo4j_session: neo4j.Session,
112
115
  buckets: List[Dict],
116
+ project_id: str,
113
117
  gcp_update_tag: int,
114
118
  ) -> None:
115
- """
116
- Ingest GCP Storage Buckets to Neo4j
117
-
118
- :type neo4j_session: Neo4j session object
119
- :param neo4j session: The Neo4j session object
120
-
121
- :type buckets: list
122
- :param buckets: List of GCP Storage Buckets to injest
123
-
124
- :type gcp_update_tag: timestamp
125
- :param gcp_update_tag: The timestamp value to set our new Neo4j nodes with
126
-
127
- :rtype: NoneType
128
- :return: Nothing
129
- """
130
-
131
- query = """
132
- MERGE(p:GCPProject{projectnumber:$ProjectNumber})
133
- ON CREATE SET p.firstseen = timestamp()
134
- SET p.lastupdated = $gcp_update_tag
135
-
136
- MERGE(bucket:GCPBucket{id:$BucketId})
137
- ON CREATE SET bucket.firstseen = timestamp(),
138
- bucket.bucket_id = $BucketId
139
- SET bucket.self_link = $SelfLink,
140
- bucket.project_number = $ProjectNumber,
141
- bucket.kind = $Kind,
142
- bucket.location = $Location,
143
- bucket.location_type = $LocationType,
144
- bucket.meta_generation = $MetaGeneration,
145
- bucket.storage_class = $StorageClass,
146
- bucket.time_created = $TimeCreated,
147
- bucket.retention_period = $RetentionPeriod,
148
- bucket.iam_config_bucket_policy_only = $IamConfigBucketPolicyOnly,
149
- bucket.owner_entity = $OwnerEntity,
150
- bucket.owner_entity_id = $OwnerEntityId,
151
- bucket.lastupdated = $gcp_update_tag,
152
- bucket.versioning_enabled = $VersioningEnabled,
153
- bucket.log_bucket = $LogBucket,
154
- bucket.requester_pays = $RequesterPays,
155
- bucket.default_kms_key_name = $DefaultKmsKeyName
156
-
157
- MERGE (p)-[r:RESOURCE]->(bucket)
158
- ON CREATE SET r.firstseen = timestamp()
159
- SET r.lastupdated = $gcp_update_tag
160
- """
161
- for bucket in buckets:
162
- neo4j_session.run(
163
- query,
164
- ProjectNumber=bucket["project_number"],
165
- BucketId=bucket["id"],
166
- SelfLink=bucket["self_link"],
167
- Kind=bucket["kind"],
168
- Location=bucket["location"],
169
- LocationType=bucket["location_type"],
170
- MetaGeneration=bucket["meta_generation"],
171
- StorageClass=bucket["storage_class"],
172
- TimeCreated=bucket["time_created"],
173
- RetentionPeriod=bucket["retention_period"],
174
- IamConfigBucketPolicyOnly=bucket["iam_config_bucket_policy_only"],
175
- OwnerEntity=bucket["owner_entity"],
176
- OwnerEntityId=bucket["owner_entity_id"],
177
- VersioningEnabled=bucket["versioning_enabled"],
178
- LogBucket=bucket["log_bucket"],
179
- RequesterPays=bucket["requester_pays"],
180
- DefaultKmsKeyName=bucket["default_kms_key_name"],
181
- gcp_update_tag=gcp_update_tag,
182
- )
183
- _attach_gcp_bucket_labels(neo4j_session, bucket, gcp_update_tag)
119
+ """Ingest GCP Storage Buckets to Neo4j."""
120
+ load(
121
+ neo4j_session,
122
+ GCPBucketSchema(),
123
+ buckets,
124
+ lastupdated=gcp_update_tag,
125
+ PROJECT_ID=project_id,
126
+ )
184
127
 
185
128
 
186
129
  @timeit
187
- def _attach_gcp_bucket_labels(
130
+ def load_gcp_bucket_labels(
188
131
  neo4j_session: neo4j.Session,
189
- bucket: Resource,
132
+ bucket_labels: List[Dict],
133
+ project_id: str,
190
134
  gcp_update_tag: int,
191
135
  ) -> None:
192
- """
193
- Attach GCP bucket labels to the bucket.
194
- :param neo4j_session: The neo4j session
195
- :param bucket: The GCP bucket object
196
- :param gcp_update_tag: The update tag for this sync
197
- :return: Nothing
198
- """
199
- query = """
200
- MERGE (l:Label:GCPBucketLabel{id: $BucketLabelId})
201
- ON CREATE SET l.firstseen = timestamp(),
202
- l.key = $Key
203
- SET l.value = $Value,
204
- l.lastupdated = $gcp_update_tag
205
- WITH l
206
- MATCH (bucket:GCPBucket{id:$BucketId})
207
- MERGE (l)<-[r:LABELED]-(bucket)
208
- ON CREATE SET r.firstseen = timestamp()
209
- SET r.lastupdated = $gcp_update_tag
210
- """
211
- for key, val in bucket.get("labels", []):
212
- neo4j_session.run(
213
- query,
214
- BucketLabelId=f"GCPBucket_{key}",
215
- Key=key,
216
- Value=val,
217
- BucketId=bucket["id"],
218
- gcp_update_tag=gcp_update_tag,
219
- )
136
+ """Ingest GCP Storage Bucket labels and attach them to buckets."""
137
+ load(
138
+ neo4j_session,
139
+ GCPBucketLabelSchema(),
140
+ bucket_labels,
141
+ lastupdated=gcp_update_tag,
142
+ PROJECT_ID=project_id,
143
+ )
220
144
 
221
145
 
222
146
  @timeit
@@ -224,22 +148,14 @@ def cleanup_gcp_buckets(
224
148
  neo4j_session: neo4j.Session,
225
149
  common_job_parameters: Dict,
226
150
  ) -> None:
227
- """
228
- Delete out-of-date GCP Storage Bucket nodes and relationships
229
-
230
- :type neo4j_session: The Neo4j session object
231
- :param neo4j_session: The Neo4j session
232
-
233
- :type common_job_parameters: dict
234
- :param common_job_parameters: Dictionary of other job parameters to pass to Neo4j
235
-
236
- :rtype: NoneType
237
- :return: Nothing
238
- """
239
- run_cleanup_job(
240
- "gcp_storage_bucket_cleanup.json",
151
+ """Delete out-of-date GCP Storage Bucket nodes and relationships."""
152
+ # Bucket labels depend on buckets, so we must remove labels first to avoid
153
+ # dangling references before deleting the buckets themselves.
154
+ GraphJob.from_node_schema(GCPBucketLabelSchema(), common_job_parameters).run(
155
+ neo4j_session,
156
+ )
157
+ GraphJob.from_node_schema(GCPBucketSchema(), common_job_parameters).run(
241
158
  neo4j_session,
242
- common_job_parameters,
243
159
  )
244
160
 
245
161
 
@@ -274,7 +190,7 @@ def sync_gcp_buckets(
274
190
  """
275
191
  logger.info("Syncing Storage objects for project %s.", project_id)
276
192
  storage_res = get_gcp_buckets(storage, project_id)
277
- bucket_list = transform_gcp_buckets(storage_res)
278
- load_gcp_buckets(neo4j_session, bucket_list, gcp_update_tag)
279
- # TODO scope the cleanup to the current project - https://github.com/cartography-cncf/cartography/issues/381
193
+ buckets, bucket_labels = transform_gcp_buckets_and_labels(storage_res)
194
+ load_gcp_buckets(neo4j_session, buckets, project_id, gcp_update_tag)
195
+ load_gcp_bucket_labels(neo4j_session, bucket_labels, project_id, gcp_update_tag)
280
196
  cleanup_gcp_buckets(neo4j_session, common_job_parameters)
@@ -1,18 +1,45 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
+ from typing import cast
4
5
 
5
6
  import neo4j
6
7
 
8
+ import cartography.intel.github.commits
7
9
  import cartography.intel.github.repos
8
10
  import cartography.intel.github.teams
9
11
  import cartography.intel.github.users
12
+ from cartography.client.core.tx import read_list_of_values_tx
10
13
  from cartography.config import Config
11
14
  from cartography.util import timeit
12
15
 
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
19
+ def _get_repos_from_graph(neo4j_session: neo4j.Session, organization: str) -> list[str]:
20
+ """
21
+ Get repository names for an organization from the graph instead of making an API call.
22
+
23
+ :param neo4j_session: Neo4j session for database interface
24
+ :param organization: GitHub organization name
25
+ :return: List of repository names
26
+ """
27
+ org_url = f"https://github.com/{organization}"
28
+ query = """
29
+ MATCH (org:GitHubOrganization {id: $org_url})<-[:OWNER]-(repo:GitHubRepository)
30
+ RETURN repo.name
31
+ ORDER BY repo.name
32
+ """
33
+ return cast(
34
+ list[str],
35
+ neo4j_session.execute_read(
36
+ read_list_of_values_tx,
37
+ query,
38
+ org_url=org_url,
39
+ ),
40
+ )
41
+
42
+
16
43
  @timeit
17
44
  def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
18
45
  """
@@ -54,3 +81,17 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
54
81
  auth_data["url"],
55
82
  auth_data["name"],
56
83
  )
84
+
85
+ # Sync commit relationships for the configured lookback period
86
+ # Get repo names from the graph instead of making another API call
87
+ repo_names = _get_repos_from_graph(neo4j_session, auth_data["name"])
88
+
89
+ cartography.intel.github.commits.sync_github_commits(
90
+ neo4j_session,
91
+ auth_data["token"],
92
+ auth_data["url"],
93
+ auth_data["name"],
94
+ repo_names,
95
+ common_job_parameters["UPDATE_TAG"],
96
+ config.github_commit_lookback_days,
97
+ )