cartography 0.113.0__py3-none-any.whl → 0.115.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (96) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +10 -2
  3. cartography/client/core/tx.py +11 -0
  4. cartography/config.py +4 -0
  5. cartography/data/indexes.cypher +0 -27
  6. cartography/intel/aws/config.py +7 -3
  7. cartography/intel/aws/ecr.py +9 -9
  8. cartography/intel/aws/iam.py +741 -492
  9. cartography/intel/aws/identitycenter.py +240 -13
  10. cartography/intel/aws/lambda_function.py +69 -2
  11. cartography/intel/aws/organizations.py +10 -9
  12. cartography/intel/aws/permission_relationships.py +7 -17
  13. cartography/intel/aws/redshift.py +9 -4
  14. cartography/intel/aws/route53.py +53 -3
  15. cartography/intel/aws/securityhub.py +3 -1
  16. cartography/intel/azure/__init__.py +24 -0
  17. cartography/intel/azure/app_service.py +105 -0
  18. cartography/intel/azure/functions.py +124 -0
  19. cartography/intel/azure/logic_apps.py +101 -0
  20. cartography/intel/create_indexes.py +2 -1
  21. cartography/intel/dns.py +5 -2
  22. cartography/intel/entra/__init__.py +31 -0
  23. cartography/intel/entra/app_role_assignments.py +277 -0
  24. cartography/intel/entra/applications.py +4 -238
  25. cartography/intel/entra/federation/__init__.py +0 -0
  26. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  27. cartography/intel/entra/service_principals.py +217 -0
  28. cartography/intel/gcp/__init__.py +136 -440
  29. cartography/intel/gcp/clients.py +65 -0
  30. cartography/intel/gcp/compute.py +18 -44
  31. cartography/intel/gcp/crm/__init__.py +0 -0
  32. cartography/intel/gcp/crm/folders.py +108 -0
  33. cartography/intel/gcp/crm/orgs.py +65 -0
  34. cartography/intel/gcp/crm/projects.py +109 -0
  35. cartography/intel/gcp/dns.py +2 -1
  36. cartography/intel/gcp/gke.py +72 -113
  37. cartography/intel/github/__init__.py +41 -0
  38. cartography/intel/github/commits.py +423 -0
  39. cartography/intel/github/repos.py +76 -45
  40. cartography/intel/gsuite/api.py +17 -4
  41. cartography/intel/okta/applications.py +9 -4
  42. cartography/intel/okta/awssaml.py +5 -2
  43. cartography/intel/okta/factors.py +3 -1
  44. cartography/intel/okta/groups.py +5 -2
  45. cartography/intel/okta/organization.py +3 -1
  46. cartography/intel/okta/origins.py +3 -1
  47. cartography/intel/okta/roles.py +5 -2
  48. cartography/intel/okta/users.py +3 -1
  49. cartography/models/aws/iam/access_key.py +103 -0
  50. cartography/models/aws/iam/account_role.py +24 -0
  51. cartography/models/aws/iam/federated_principal.py +60 -0
  52. cartography/models/aws/iam/group.py +60 -0
  53. cartography/models/aws/iam/group_membership.py +26 -0
  54. cartography/models/aws/iam/inline_policy.py +78 -0
  55. cartography/models/aws/iam/managed_policy.py +51 -0
  56. cartography/models/aws/iam/policy_statement.py +57 -0
  57. cartography/models/aws/iam/role.py +83 -0
  58. cartography/models/aws/iam/root_principal.py +52 -0
  59. cartography/models/aws/iam/service_principal.py +30 -0
  60. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  61. cartography/models/aws/iam/user.py +54 -0
  62. cartography/models/aws/identitycenter/awspermissionset.py +24 -1
  63. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  64. cartography/models/aws/identitycenter/awsssouser.py +37 -1
  65. cartography/models/aws/lambda_function/lambda_function.py +2 -0
  66. cartography/models/azure/__init__.py +0 -0
  67. cartography/models/azure/app_service.py +59 -0
  68. cartography/models/azure/function_app.py +59 -0
  69. cartography/models/azure/logic_apps.py +56 -0
  70. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  71. cartography/models/entra/service_principal.py +104 -0
  72. cartography/models/entra/user.py +18 -0
  73. cartography/models/gcp/compute/subnet.py +74 -0
  74. cartography/models/gcp/crm/__init__.py +0 -0
  75. cartography/models/gcp/crm/folders.py +98 -0
  76. cartography/models/gcp/crm/organizations.py +21 -0
  77. cartography/models/gcp/crm/projects.py +100 -0
  78. cartography/models/gcp/gke.py +69 -0
  79. cartography/models/github/commits.py +63 -0
  80. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/METADATA +8 -5
  81. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/RECORD +85 -56
  82. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  83. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  84. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  85. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  86. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  87. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  88. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  89. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  90. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  91. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  92. cartography/intel/gcp/crm.py +0 -355
  93. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/WHEEL +0 -0
  94. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/entry_points.txt +0 -0
  95. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/licenses/LICENSE +0 -0
  96. {cartography-0.113.0.dist-info → cartography-0.115.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,45 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
+ from typing import cast
4
5
 
5
6
  import neo4j
6
7
 
8
+ import cartography.intel.github.commits
7
9
  import cartography.intel.github.repos
8
10
  import cartography.intel.github.teams
9
11
  import cartography.intel.github.users
12
+ from cartography.client.core.tx import read_list_of_values_tx
10
13
  from cartography.config import Config
11
14
  from cartography.util import timeit
12
15
 
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
19
+ def _get_repos_from_graph(neo4j_session: neo4j.Session, organization: str) -> list[str]:
20
+ """
21
+ Get repository names for an organization from the graph instead of making an API call.
22
+
23
+ :param neo4j_session: Neo4j session for database interface
24
+ :param organization: GitHub organization name
25
+ :return: List of repository names
26
+ """
27
+ org_url = f"https://github.com/{organization}"
28
+ query = """
29
+ MATCH (org:GitHubOrganization {id: $org_url})<-[:OWNER]-(repo:GitHubRepository)
30
+ RETURN repo.name
31
+ ORDER BY repo.name
32
+ """
33
+ return cast(
34
+ list[str],
35
+ neo4j_session.execute_read(
36
+ read_list_of_values_tx,
37
+ query,
38
+ org_url=org_url,
39
+ ),
40
+ )
41
+
42
+
16
43
  @timeit
17
44
  def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
18
45
  """
@@ -54,3 +81,17 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
54
81
  auth_data["url"],
55
82
  auth_data["name"],
56
83
  )
84
+
85
+ # Sync commit relationships for the configured lookback period
86
+ # Get repo names from the graph instead of making another API call
87
+ repo_names = _get_repos_from_graph(neo4j_session, auth_data["name"])
88
+
89
+ cartography.intel.github.commits.sync_github_commits(
90
+ neo4j_session,
91
+ auth_data["token"],
92
+ auth_data["url"],
93
+ auth_data["name"],
94
+ repo_names,
95
+ common_job_parameters["UPDATE_TAG"],
96
+ config.github_commit_lookback_days,
97
+ )
@@ -0,0 +1,423 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from datetime import timedelta
4
+ from datetime import timezone
5
+ from typing import Any
6
+
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load_matchlinks
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.github.util import fetch_page
12
+ from cartography.models.github.commits import GitHubUserCommittedToRepoRel
13
+ from cartography.util import timeit
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL = """
19
+ query($login: String!, $repo: String!, $since: GitTimestamp!, $cursor: String) {
20
+ organization(login: $login) {
21
+ repository(name: $repo) {
22
+ name
23
+ url
24
+ defaultBranchRef {
25
+ target {
26
+ ... on Commit {
27
+ history(first: 100, since: $since, after: $cursor) {
28
+ pageInfo {
29
+ endCursor
30
+ hasNextPage
31
+ }
32
+ nodes {
33
+ committedDate
34
+ author {
35
+ user {
36
+ url
37
+ }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
46
+ rateLimit {
47
+ limit
48
+ cost
49
+ remaining
50
+ resetAt
51
+ }
52
+ }
53
+ """
54
+
55
+
56
+ @timeit
57
+ def get_repo_commits(
58
+ token: str,
59
+ api_url: str,
60
+ organization: str,
61
+ repo_name: str,
62
+ since_date: datetime,
63
+ ) -> list[dict[str, Any]]:
64
+ """
65
+ Retrieve commits from a GitHub repository since a specific date.
66
+
67
+ :param token: The Github API token as string.
68
+ :param api_url: The Github v4 API endpoint as string.
69
+ :param organization: The name of the target Github organization as string.
70
+ :param repo_name: The name of the target Github repository as string.
71
+ :param since_date: The datetime to fetch commits since.
72
+ :return: A list of commits from the repository.
73
+ """
74
+ # Convert datetime to ISO format for GraphQL (GitTimestamp requires 'Z' suffix for UTC)
75
+ since_iso = since_date.strftime("%Y-%m-%dT%H:%M:%SZ")
76
+
77
+ logger.debug(f"Fetching commits for {organization}/{repo_name} since {since_iso}")
78
+
79
+ all_commits = []
80
+ cursor = None
81
+ has_next_page = True
82
+
83
+ while has_next_page:
84
+ response = fetch_page(
85
+ token,
86
+ api_url,
87
+ organization,
88
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL,
89
+ cursor,
90
+ repo=repo_name,
91
+ since=since_iso,
92
+ )
93
+
94
+ # Navigate to the nested commit history
95
+ repo_data = response.get("data", {}).get("organization", {}).get("repository")
96
+ if not repo_data:
97
+ logger.warning(f"No repository data found for {organization}/{repo_name}")
98
+ break
99
+
100
+ default_branch = repo_data.get("defaultBranchRef")
101
+ if not default_branch:
102
+ logger.debug(f"Repository {organization}/{repo_name} has no default branch")
103
+ break
104
+
105
+ target = default_branch.get("target")
106
+ if not target:
107
+ logger.debug(
108
+ f"Repository {organization}/{repo_name} default branch has no target"
109
+ )
110
+ break
111
+
112
+ history = target.get("history")
113
+ if not history:
114
+ logger.debug(f"Repository {organization}/{repo_name} has no commit history")
115
+ break
116
+
117
+ # Add commits from this page
118
+ commits = history.get("nodes", [])
119
+ all_commits.extend(commits)
120
+
121
+ # Check pagination
122
+ page_info = history.get("pageInfo", {})
123
+ has_next_page = page_info.get("hasNextPage", False)
124
+ cursor = page_info.get("endCursor")
125
+
126
+ return all_commits
127
+
128
+
129
+ def process_repo_commits_batch(
130
+ neo4j_session: neo4j.Session,
131
+ token: str,
132
+ api_url: str,
133
+ organization: str,
134
+ repo_names: list[str],
135
+ update_tag: int,
136
+ lookback_days: int = 30,
137
+ batch_size: int = 10,
138
+ ) -> None:
139
+ """
140
+ Process repository commits in batches to save memory and API quota.
141
+
142
+ :param neo4j_session: Neo4j session for database interface.
143
+ :param token: The Github API token as string.
144
+ :param api_url: The Github v4 API endpoint as string.
145
+ :param organization: The name of the target Github organization as string.
146
+ :param repo_names: List of repository names to process.
147
+ :param update_tag: Timestamp used to determine data freshness.
148
+ :param lookback_days: Number of days to look back for commits.
149
+ :param batch_size: Number of repositories to process in each batch.
150
+ """
151
+ # Calculate lookback date based on configured days
152
+ lookback_date = datetime.now(timezone.utc) - timedelta(days=lookback_days)
153
+
154
+ logger.info(f"Processing {len(repo_names)} repositories in batches of {batch_size}")
155
+
156
+ # Process repositories in batches
157
+ for i in range(0, len(repo_names), batch_size):
158
+ batch = repo_names[i : i + batch_size]
159
+ logger.info(
160
+ f"Processing batch {i // batch_size + 1}: {len(batch)} repositories"
161
+ )
162
+
163
+ # Process each repository in the batch
164
+ batch_relationships = []
165
+
166
+ for repo_name in batch:
167
+ try:
168
+ commits = get_repo_commits(
169
+ token,
170
+ api_url,
171
+ organization,
172
+ repo_name,
173
+ lookback_date,
174
+ )
175
+
176
+ # Transform commits for this single repo immediately
177
+ repo_relationships = transform_single_repo_commits_to_relationships(
178
+ repo_name,
179
+ commits,
180
+ organization,
181
+ )
182
+ batch_relationships.extend(repo_relationships)
183
+
184
+ logger.debug(
185
+ f"Found {len(commits)} commits in {repo_name}, created {len(repo_relationships)} relationships"
186
+ )
187
+
188
+ except Exception:
189
+ logger.warning(
190
+ f"Failed to fetch commits for {repo_name}", exc_info=True
191
+ )
192
+ continue
193
+
194
+ # Load this batch of relationships
195
+ if batch_relationships:
196
+ logger.info(f"Loading {len(batch_relationships)} relationships for batch")
197
+ load_github_commit_relationships(
198
+ neo4j_session,
199
+ batch_relationships,
200
+ organization,
201
+ update_tag,
202
+ )
203
+
204
+ # Clear memory for next batch
205
+ batch_relationships.clear()
206
+
207
+
208
+ def transform_single_repo_commits_to_relationships(
209
+ repo_name: str,
210
+ commits: list[dict[str, Any]],
211
+ organization: str,
212
+ ) -> list[dict[str, Any]]:
213
+ """
214
+ Transform commits from a single repository into user-repository relationships.
215
+ Optimized for memory efficiency by processing one repo at a time.
216
+
217
+ :param repo_name: The repository name.
218
+ :param commits: List of commit data from the repository.
219
+ :param organization: The Github organization name.
220
+ :return: List of user-repository relationship records for this repo.
221
+ """
222
+ if not commits:
223
+ return []
224
+
225
+ repo_url = f"https://github.com/{organization}/{repo_name}"
226
+
227
+ # Count commits and track date ranges per user for this repo
228
+ user_commit_data: dict[str, dict[str, Any]] = {}
229
+
230
+ for commit in commits:
231
+ # Get user URL from author, skip if not available
232
+ author_user = commit.get("author", {}).get("user")
233
+ if not author_user or not author_user.get("url"):
234
+ continue
235
+
236
+ user_url = author_user["url"]
237
+ commit_date = datetime.fromisoformat(
238
+ commit["committedDate"].replace("Z", "+00:00")
239
+ )
240
+
241
+ if user_url not in user_commit_data:
242
+ user_commit_data[user_url] = {"commit_count": 0, "commit_dates": []}
243
+
244
+ user_commit_data[user_url]["commit_count"] += 1
245
+ user_commit_data[user_url]["commit_dates"].append(commit_date)
246
+
247
+ # Transform to relationship records
248
+ relationships = []
249
+ for user_url, data in user_commit_data.items():
250
+ commit_dates = data["commit_dates"]
251
+ relationships.append(
252
+ {
253
+ "user_url": user_url,
254
+ "repo_url": repo_url,
255
+ "commit_count": data["commit_count"],
256
+ "last_commit_date": max(commit_dates).isoformat(),
257
+ "first_commit_date": min(commit_dates).isoformat(),
258
+ }
259
+ )
260
+
261
+ return relationships
262
+
263
+
264
+ def transform_commits_to_user_repo_relationships(
265
+ commits_by_repo: dict[str, list[dict[str, Any]]],
266
+ organization: str,
267
+ ) -> list[dict[str, Any]]:
268
+ """
269
+ Transform commit data into user-repository relationship data.
270
+
271
+ :param commits_by_repo: Dict mapping repo names to commit lists.
272
+ :param organization: The Github organization name.
273
+ :return: List of user-repository relationship records.
274
+ """
275
+ logger.info("Transforming commit data into user-repository relationships")
276
+
277
+ # Group commits by user and repository
278
+ user_repo_commits: dict[tuple[str, str], list[dict[str, Any]]] = {}
279
+
280
+ for repo_name, commits in commits_by_repo.items():
281
+ repo_url = f"https://github.com/{organization}/{repo_name}"
282
+
283
+ for commit in commits:
284
+ # Use author if available, otherwise use committer
285
+ commit_user = commit.get("author", {}).get("user") or commit.get(
286
+ "committer", {}
287
+ ).get("user")
288
+
289
+ if not commit_user or not commit_user.get("url"):
290
+ continue
291
+
292
+ user_url = commit_user["url"]
293
+ key = (user_url, repo_url)
294
+
295
+ if key not in user_repo_commits:
296
+ user_repo_commits[key] = []
297
+
298
+ user_repo_commits[key].append(commit)
299
+
300
+ # Transform to relationship records
301
+ relationships = []
302
+ for (user_url, repo_url), commits in user_repo_commits.items():
303
+ commit_dates = [
304
+ datetime.fromisoformat(commit["committedDate"].replace("Z", "+00:00"))
305
+ for commit in commits
306
+ ]
307
+
308
+ relationships.append(
309
+ {
310
+ "user_url": user_url,
311
+ "repo_url": repo_url,
312
+ "commit_count": len(commits),
313
+ "last_commit_date": max(commit_dates).isoformat(),
314
+ "first_commit_date": min(commit_dates).isoformat(),
315
+ }
316
+ )
317
+
318
+ logger.info(f"Created {len(relationships)} user-repository relationships")
319
+ return relationships
320
+
321
+
322
+ @timeit
323
+ def load_github_commit_relationships(
324
+ neo4j_session: neo4j.Session,
325
+ commit_relationships: list[dict[str, Any]],
326
+ organization: str,
327
+ update_tag: int,
328
+ ) -> None:
329
+ """
330
+ Load GitHub user-repository commit relationships using MatchLinks.
331
+
332
+ :param neo4j_session: Neo4j session for database interface.
333
+ :param commit_relationships: List of user-repository relationship records.
334
+ :param organization: The Github organization name for sub-resource scoping.
335
+ :param update_tag: Timestamp used to determine data freshness.
336
+ """
337
+ if not commit_relationships:
338
+ logger.info("No commit relationships to load")
339
+ return
340
+
341
+ logger.info(
342
+ f"Loading {len(commit_relationships)} user-repository commit relationships"
343
+ )
344
+
345
+ # Use organization URL as the sub-resource identifier
346
+ org_url = f"https://github.com/{organization}"
347
+
348
+ load_matchlinks(
349
+ neo4j_session,
350
+ GitHubUserCommittedToRepoRel(),
351
+ commit_relationships,
352
+ lastupdated=update_tag,
353
+ _sub_resource_label="GitHubOrganization",
354
+ _sub_resource_id=org_url,
355
+ )
356
+
357
+
358
+ @timeit
359
+ def cleanup_github_commit_relationships(
360
+ neo4j_session: neo4j.Session,
361
+ organization: str,
362
+ update_tag: int,
363
+ ) -> None:
364
+ """
365
+ Clean up stale GitHub user-repository commit relationships.
366
+
367
+ :param neo4j_session: Neo4j session for database interface.
368
+ :param organization: The Github organization name.
369
+ :param update_tag: Timestamp used to determine data freshness.
370
+ """
371
+ logger.debug("Cleaning up GitHub user-repository commit relationships")
372
+
373
+ org_url = f"https://github.com/{organization}"
374
+
375
+ GraphJob.from_matchlink(
376
+ GitHubUserCommittedToRepoRel(),
377
+ "GitHubOrganization",
378
+ org_url,
379
+ update_tag,
380
+ ).run(neo4j_session)
381
+
382
+
383
+ @timeit
384
+ def sync_github_commits(
385
+ neo4j_session: neo4j.Session,
386
+ token: str,
387
+ api_url: str,
388
+ organization: str,
389
+ repo_names: list[str],
390
+ update_tag: int,
391
+ lookback_days: int = 30,
392
+ ) -> None:
393
+ """
394
+ Sync GitHub commit relationships for the specified lookback period.
395
+ Uses batch processing to minimize memory usage and API quota consumption.
396
+
397
+ :param neo4j_session: Neo4j session for database interface.
398
+ :param token: The Github API token as string.
399
+ :param api_url: The Github v4 API endpoint as string.
400
+ :param organization: The name of the target Github organization as string.
401
+ :param repo_names: List of repository names to sync commits for.
402
+ :param update_tag: Timestamp used to determine data freshness.
403
+ :param lookback_days: Number of days to look back for commits.
404
+ """
405
+ logger.info(f"Starting GitHub commits sync for organization: {organization}")
406
+
407
+ # Process repositories in batches to save memory and API quota
408
+ # This approach processes repos in batches, transforms immediately, and loads in batches
409
+ process_repo_commits_batch(
410
+ neo4j_session,
411
+ token,
412
+ api_url,
413
+ organization,
414
+ repo_names,
415
+ update_tag,
416
+ lookback_days=lookback_days,
417
+ batch_size=10, # Process 10 repos at a time
418
+ )
419
+
420
+ # Cleanup stale relationships after all batches are processed
421
+ cleanup_github_commit_relationships(neo4j_session, organization, update_tag)
422
+
423
+ logger.info("Completed GitHub commits sync")
@@ -159,8 +159,6 @@ def _get_repo_collaborators_inner_func(
159
159
  token: str,
160
160
  repo_raw_data: list[dict[str, Any]],
161
161
  affiliation: str,
162
- collab_users: list[dict[str, Any]],
163
- collab_permission: list[str],
164
162
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
165
163
  result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
166
164
 
@@ -194,6 +192,9 @@ def _get_repo_collaborators_inner_func(
194
192
  affiliation,
195
193
  )
196
194
 
195
+ collab_users: List[dict[str, Any]] = []
196
+ collab_permission: List[str] = []
197
+
197
198
  # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
198
199
  # however sometimes GitHub returns None, as in issue 1334 and 1404.
199
200
  for collab in collaborators.nodes or []:
@@ -230,8 +231,6 @@ def _get_repo_collaborators_for_multiple_repos(
230
231
  logger.info(
231
232
  f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
232
233
  )
233
- collab_users: List[dict[str, Any]] = []
234
- collab_permission: List[str] = []
235
234
 
236
235
  result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
237
236
  _get_repo_collaborators_inner_func,
@@ -244,8 +243,6 @@ def _get_repo_collaborators_for_multiple_repos(
244
243
  token=token,
245
244
  repo_raw_data=repo_raw_data,
246
245
  affiliation=affiliation,
247
- collab_users=collab_users,
248
- collab_permission=collab_permission,
249
246
  )
250
247
  return result
251
248
 
@@ -864,11 +861,15 @@ def load_github_repos(
864
861
  ON CREATE SET r.firstseen = timestamp()
865
862
  SET r.lastupdated = r.UpdateTag
866
863
  """
867
- neo4j_session.run(
868
- ingest_repo,
869
- RepoData=repo_data,
870
- UpdateTag=update_tag,
871
- )
864
+
865
+ def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
866
+ tx.run(
867
+ ingest_repo,
868
+ RepoData=repo_data,
869
+ UpdateTag=update_tag,
870
+ ).consume()
871
+
872
+ neo4j_session.execute_write(_ingest_repos_tx)
872
873
 
873
874
 
874
875
  @timeit
@@ -898,11 +899,14 @@ def load_github_languages(
898
899
  ON CREATE SET r.firstseen = timestamp()
899
900
  SET r.lastupdated = $UpdateTag"""
900
901
 
901
- neo4j_session.run(
902
- ingest_languages,
903
- Languages=repo_languages,
904
- UpdateTag=update_tag,
905
- )
902
+ def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
903
+ tx.run(
904
+ ingest_languages,
905
+ Languages=repo_languages,
906
+ UpdateTag=update_tag,
907
+ ).consume()
908
+
909
+ neo4j_session.execute_write(_ingest_languages_tx)
906
910
 
907
911
 
908
912
  @timeit
@@ -918,31 +922,42 @@ def load_github_owners(
918
922
  :param repo_owners: list of owner to repo mappings
919
923
  :return: Nothing
920
924
  """
921
- for owner in repo_owners:
922
- ingest_owner_template = Template(
923
- """
924
- MERGE (user:$account_type{id: $Id})
925
- ON CREATE SET user.firstseen = timestamp()
926
- SET user.username = $UserName,
927
- user.lastupdated = $UpdateTag
928
- WITH user
929
-
930
- MATCH (repo:GitHubRepository{id: $RepoId})
931
- MERGE (user)<-[r:OWNER]-(repo)
932
- ON CREATE SET r.firstseen = timestamp()
933
- SET r.lastupdated = $UpdateTag""",
934
- )
925
+ ingest_owner_template = Template(
926
+ """
927
+ MERGE (user:$account_type{id: $Id})
928
+ ON CREATE SET user.firstseen = timestamp()
929
+ SET user.username = $UserName,
930
+ user.lastupdated = $UpdateTag
931
+ WITH user
932
+
933
+ MATCH (repo:GitHubRepository{id: $RepoId})
934
+ MERGE (user)<-[r:OWNER]-(repo)
935
+ ON CREATE SET r.firstseen = timestamp()
936
+ SET r.lastupdated = $UpdateTag""",
937
+ )
935
938
 
936
- account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
939
+ account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
937
940
 
938
- neo4j_session.run(
941
+ def _ingest_owner_tx(
942
+ tx: neo4j.Transaction,
943
+ owner_record: Dict,
944
+ owner_label: str,
945
+ ) -> None:
946
+ tx.run(
939
947
  ingest_owner_template.safe_substitute(
940
- account_type=account_type[owner["type"]],
948
+ account_type=owner_label,
941
949
  ),
942
- Id=owner["owner_id"],
943
- UserName=owner["owner"],
944
- RepoId=owner["repo_id"],
950
+ Id=owner_record["owner_id"],
951
+ UserName=owner_record["owner"],
952
+ RepoId=owner_record["repo_id"],
945
953
  UpdateTag=update_tag,
954
+ ).consume()
955
+
956
+ for owner in repo_owners:
957
+ neo4j_session.execute_write(
958
+ _ingest_owner_tx,
959
+ owner,
960
+ account_type[owner["type"]],
946
961
  )
947
962
 
948
963
 
@@ -973,12 +988,24 @@ def load_collaborators(
973
988
  SET o.lastupdated = $UpdateTag
974
989
  """,
975
990
  )
976
- for collab_type in collaborators.keys():
977
- relationship_label = f"{affiliation}_COLLAB_{collab_type}"
978
- neo4j_session.run(
991
+
992
+ def _ingest_collaborators_tx(
993
+ tx: neo4j.Transaction,
994
+ relationship_label: str,
995
+ collaborator_data: List[Dict],
996
+ ) -> None:
997
+ tx.run(
979
998
  query.safe_substitute(rel_label=relationship_label),
980
- UserData=collaborators[collab_type],
999
+ UserData=collaborator_data,
981
1000
  UpdateTag=update_tag,
1001
+ ).consume()
1002
+
1003
+ for collab_type, collab_data in collaborators.items():
1004
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
1005
+ neo4j_session.execute_write(
1006
+ _ingest_collaborators_tx,
1007
+ relationship_label,
1008
+ collab_data,
982
1009
  )
983
1010
 
984
1011
 
@@ -1003,11 +1030,15 @@ def load_python_requirements(
1003
1030
  SET r.lastupdated = $UpdateTag,
1004
1031
  r.specifier = req.specifier
1005
1032
  """
1006
- neo4j_session.run(
1007
- query,
1008
- Requirements=requirements_objects,
1009
- UpdateTag=update_tag,
1010
- )
1033
+
1034
+ def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
1035
+ tx.run(
1036
+ query,
1037
+ Requirements=requirements_objects,
1038
+ UpdateTag=update_tag,
1039
+ ).consume()
1040
+
1041
+ neo4j_session.execute_write(_ingest_requirements_tx)
1011
1042
 
1012
1043
 
1013
1044
  @timeit