cartography 0.113.0__py3-none-any.whl → 0.114.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (69) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +8 -0
  3. cartography/config.py +4 -0
  4. cartography/data/indexes.cypher +0 -27
  5. cartography/intel/aws/iam.py +741 -492
  6. cartography/intel/aws/organizations.py +7 -8
  7. cartography/intel/aws/permission_relationships.py +4 -16
  8. cartography/intel/azure/__init__.py +16 -0
  9. cartography/intel/azure/app_service.py +105 -0
  10. cartography/intel/azure/functions.py +124 -0
  11. cartography/intel/entra/__init__.py +31 -0
  12. cartography/intel/entra/app_role_assignments.py +277 -0
  13. cartography/intel/entra/applications.py +4 -238
  14. cartography/intel/entra/federation/__init__.py +0 -0
  15. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  16. cartography/intel/entra/service_principals.py +217 -0
  17. cartography/intel/gcp/__init__.py +136 -440
  18. cartography/intel/gcp/clients.py +65 -0
  19. cartography/intel/gcp/compute.py +18 -44
  20. cartography/intel/gcp/crm/__init__.py +0 -0
  21. cartography/intel/gcp/crm/folders.py +108 -0
  22. cartography/intel/gcp/crm/orgs.py +65 -0
  23. cartography/intel/gcp/crm/projects.py +109 -0
  24. cartography/intel/gcp/gke.py +72 -113
  25. cartography/intel/github/__init__.py +41 -0
  26. cartography/intel/github/commits.py +423 -0
  27. cartography/intel/github/repos.py +73 -39
  28. cartography/models/aws/iam/access_key.py +103 -0
  29. cartography/models/aws/iam/account_role.py +24 -0
  30. cartography/models/aws/iam/federated_principal.py +60 -0
  31. cartography/models/aws/iam/group.py +60 -0
  32. cartography/models/aws/iam/group_membership.py +26 -0
  33. cartography/models/aws/iam/inline_policy.py +78 -0
  34. cartography/models/aws/iam/managed_policy.py +51 -0
  35. cartography/models/aws/iam/policy_statement.py +57 -0
  36. cartography/models/aws/iam/role.py +83 -0
  37. cartography/models/aws/iam/root_principal.py +52 -0
  38. cartography/models/aws/iam/service_principal.py +30 -0
  39. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  40. cartography/models/aws/iam/user.py +54 -0
  41. cartography/models/azure/__init__.py +0 -0
  42. cartography/models/azure/app_service.py +59 -0
  43. cartography/models/azure/function_app.py +59 -0
  44. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  45. cartography/models/entra/service_principal.py +104 -0
  46. cartography/models/gcp/compute/subnet.py +74 -0
  47. cartography/models/gcp/crm/__init__.py +0 -0
  48. cartography/models/gcp/crm/folders.py +98 -0
  49. cartography/models/gcp/crm/organizations.py +21 -0
  50. cartography/models/gcp/crm/projects.py +100 -0
  51. cartography/models/gcp/gke.py +69 -0
  52. cartography/models/github/commits.py +63 -0
  53. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/METADATA +7 -5
  54. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/RECORD +58 -32
  55. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  56. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  57. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  58. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  59. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  60. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  61. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  62. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  63. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  64. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  65. cartography/intel/gcp/crm.py +0 -355
  66. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/WHEEL +0 -0
  67. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/entry_points.txt +0 -0
  68. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/licenses/LICENSE +0 -0
  69. {cartography-0.113.0.dist-info → cartography-0.114.0.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,45 @@
1
1
  import base64
2
2
  import json
3
3
  import logging
4
+ from typing import cast
4
5
 
5
6
  import neo4j
6
7
 
8
+ import cartography.intel.github.commits
7
9
  import cartography.intel.github.repos
8
10
  import cartography.intel.github.teams
9
11
  import cartography.intel.github.users
12
+ from cartography.client.core.tx import read_list_of_values_tx
10
13
  from cartography.config import Config
11
14
  from cartography.util import timeit
12
15
 
13
16
  logger = logging.getLogger(__name__)
14
17
 
15
18
 
19
+ def _get_repos_from_graph(neo4j_session: neo4j.Session, organization: str) -> list[str]:
20
+ """
21
+ Get repository names for an organization from the graph instead of making an API call.
22
+
23
+ :param neo4j_session: Neo4j session for database interface
24
+ :param organization: GitHub organization name
25
+ :return: List of repository names
26
+ """
27
+ org_url = f"https://github.com/{organization}"
28
+ query = """
29
+ MATCH (org:GitHubOrganization {id: $org_url})<-[:OWNER]-(repo:GitHubRepository)
30
+ RETURN repo.name
31
+ ORDER BY repo.name
32
+ """
33
+ return cast(
34
+ list[str],
35
+ neo4j_session.execute_read(
36
+ read_list_of_values_tx,
37
+ query,
38
+ org_url=org_url,
39
+ ),
40
+ )
41
+
42
+
16
43
  @timeit
17
44
  def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
18
45
  """
@@ -54,3 +81,17 @@ def start_github_ingestion(neo4j_session: neo4j.Session, config: Config) -> None
54
81
  auth_data["url"],
55
82
  auth_data["name"],
56
83
  )
84
+
85
+ # Sync commit relationships for the configured lookback period
86
+ # Get repo names from the graph instead of making another API call
87
+ repo_names = _get_repos_from_graph(neo4j_session, auth_data["name"])
88
+
89
+ cartography.intel.github.commits.sync_github_commits(
90
+ neo4j_session,
91
+ auth_data["token"],
92
+ auth_data["url"],
93
+ auth_data["name"],
94
+ repo_names,
95
+ common_job_parameters["UPDATE_TAG"],
96
+ config.github_commit_lookback_days,
97
+ )
@@ -0,0 +1,423 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from datetime import timedelta
4
+ from datetime import timezone
5
+ from typing import Any
6
+
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load_matchlinks
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.github.util import fetch_page
12
+ from cartography.models.github.commits import GitHubUserCommittedToRepoRel
13
+ from cartography.util import timeit
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL = """
19
+ query($login: String!, $repo: String!, $since: GitTimestamp!, $cursor: String) {
20
+ organization(login: $login) {
21
+ repository(name: $repo) {
22
+ name
23
+ url
24
+ defaultBranchRef {
25
+ target {
26
+ ... on Commit {
27
+ history(first: 100, since: $since, after: $cursor) {
28
+ pageInfo {
29
+ endCursor
30
+ hasNextPage
31
+ }
32
+ nodes {
33
+ committedDate
34
+ author {
35
+ user {
36
+ url
37
+ }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
46
+ rateLimit {
47
+ limit
48
+ cost
49
+ remaining
50
+ resetAt
51
+ }
52
+ }
53
+ """
54
+
55
+
56
+ @timeit
57
+ def get_repo_commits(
58
+ token: str,
59
+ api_url: str,
60
+ organization: str,
61
+ repo_name: str,
62
+ since_date: datetime,
63
+ ) -> list[dict[str, Any]]:
64
+ """
65
+ Retrieve commits from a GitHub repository since a specific date.
66
+
67
+ :param token: The Github API token as string.
68
+ :param api_url: The Github v4 API endpoint as string.
69
+ :param organization: The name of the target Github organization as string.
70
+ :param repo_name: The name of the target Github repository as string.
71
+ :param since_date: The datetime to fetch commits since.
72
+ :return: A list of commits from the repository.
73
+ """
74
+ # Convert datetime to ISO format for GraphQL (GitTimestamp requires 'Z' suffix for UTC)
75
+ since_iso = since_date.strftime("%Y-%m-%dT%H:%M:%SZ")
76
+
77
+ logger.debug(f"Fetching commits for {organization}/{repo_name} since {since_iso}")
78
+
79
+ all_commits = []
80
+ cursor = None
81
+ has_next_page = True
82
+
83
+ while has_next_page:
84
+ response = fetch_page(
85
+ token,
86
+ api_url,
87
+ organization,
88
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL,
89
+ cursor,
90
+ repo=repo_name,
91
+ since=since_iso,
92
+ )
93
+
94
+ # Navigate to the nested commit history
95
+ repo_data = response.get("data", {}).get("organization", {}).get("repository")
96
+ if not repo_data:
97
+ logger.warning(f"No repository data found for {organization}/{repo_name}")
98
+ break
99
+
100
+ default_branch = repo_data.get("defaultBranchRef")
101
+ if not default_branch:
102
+ logger.debug(f"Repository {organization}/{repo_name} has no default branch")
103
+ break
104
+
105
+ target = default_branch.get("target")
106
+ if not target:
107
+ logger.debug(
108
+ f"Repository {organization}/{repo_name} default branch has no target"
109
+ )
110
+ break
111
+
112
+ history = target.get("history")
113
+ if not history:
114
+ logger.debug(f"Repository {organization}/{repo_name} has no commit history")
115
+ break
116
+
117
+ # Add commits from this page
118
+ commits = history.get("nodes", [])
119
+ all_commits.extend(commits)
120
+
121
+ # Check pagination
122
+ page_info = history.get("pageInfo", {})
123
+ has_next_page = page_info.get("hasNextPage", False)
124
+ cursor = page_info.get("endCursor")
125
+
126
+ return all_commits
127
+
128
+
129
+ def process_repo_commits_batch(
130
+ neo4j_session: neo4j.Session,
131
+ token: str,
132
+ api_url: str,
133
+ organization: str,
134
+ repo_names: list[str],
135
+ update_tag: int,
136
+ lookback_days: int = 30,
137
+ batch_size: int = 10,
138
+ ) -> None:
139
+ """
140
+ Process repository commits in batches to save memory and API quota.
141
+
142
+ :param neo4j_session: Neo4j session for database interface.
143
+ :param token: The Github API token as string.
144
+ :param api_url: The Github v4 API endpoint as string.
145
+ :param organization: The name of the target Github organization as string.
146
+ :param repo_names: List of repository names to process.
147
+ :param update_tag: Timestamp used to determine data freshness.
148
+ :param lookback_days: Number of days to look back for commits.
149
+ :param batch_size: Number of repositories to process in each batch.
150
+ """
151
+ # Calculate lookback date based on configured days
152
+ lookback_date = datetime.now(timezone.utc) - timedelta(days=lookback_days)
153
+
154
+ logger.info(f"Processing {len(repo_names)} repositories in batches of {batch_size}")
155
+
156
+ # Process repositories in batches
157
+ for i in range(0, len(repo_names), batch_size):
158
+ batch = repo_names[i : i + batch_size]
159
+ logger.info(
160
+ f"Processing batch {i // batch_size + 1}: {len(batch)} repositories"
161
+ )
162
+
163
+ # Process each repository in the batch
164
+ batch_relationships = []
165
+
166
+ for repo_name in batch:
167
+ try:
168
+ commits = get_repo_commits(
169
+ token,
170
+ api_url,
171
+ organization,
172
+ repo_name,
173
+ lookback_date,
174
+ )
175
+
176
+ # Transform commits for this single repo immediately
177
+ repo_relationships = transform_single_repo_commits_to_relationships(
178
+ repo_name,
179
+ commits,
180
+ organization,
181
+ )
182
+ batch_relationships.extend(repo_relationships)
183
+
184
+ logger.debug(
185
+ f"Found {len(commits)} commits in {repo_name}, created {len(repo_relationships)} relationships"
186
+ )
187
+
188
+ except Exception:
189
+ logger.warning(
190
+ f"Failed to fetch commits for {repo_name}", exc_info=True
191
+ )
192
+ continue
193
+
194
+ # Load this batch of relationships
195
+ if batch_relationships:
196
+ logger.info(f"Loading {len(batch_relationships)} relationships for batch")
197
+ load_github_commit_relationships(
198
+ neo4j_session,
199
+ batch_relationships,
200
+ organization,
201
+ update_tag,
202
+ )
203
+
204
+ # Clear memory for next batch
205
+ batch_relationships.clear()
206
+
207
+
208
+ def transform_single_repo_commits_to_relationships(
209
+ repo_name: str,
210
+ commits: list[dict[str, Any]],
211
+ organization: str,
212
+ ) -> list[dict[str, Any]]:
213
+ """
214
+ Transform commits from a single repository into user-repository relationships.
215
+ Optimized for memory efficiency by processing one repo at a time.
216
+
217
+ :param repo_name: The repository name.
218
+ :param commits: List of commit data from the repository.
219
+ :param organization: The Github organization name.
220
+ :return: List of user-repository relationship records for this repo.
221
+ """
222
+ if not commits:
223
+ return []
224
+
225
+ repo_url = f"https://github.com/{organization}/{repo_name}"
226
+
227
+ # Count commits and track date ranges per user for this repo
228
+ user_commit_data: dict[str, dict[str, Any]] = {}
229
+
230
+ for commit in commits:
231
+ # Get user URL from author, skip if not available
232
+ author_user = commit.get("author", {}).get("user")
233
+ if not author_user or not author_user.get("url"):
234
+ continue
235
+
236
+ user_url = author_user["url"]
237
+ commit_date = datetime.fromisoformat(
238
+ commit["committedDate"].replace("Z", "+00:00")
239
+ )
240
+
241
+ if user_url not in user_commit_data:
242
+ user_commit_data[user_url] = {"commit_count": 0, "commit_dates": []}
243
+
244
+ user_commit_data[user_url]["commit_count"] += 1
245
+ user_commit_data[user_url]["commit_dates"].append(commit_date)
246
+
247
+ # Transform to relationship records
248
+ relationships = []
249
+ for user_url, data in user_commit_data.items():
250
+ commit_dates = data["commit_dates"]
251
+ relationships.append(
252
+ {
253
+ "user_url": user_url,
254
+ "repo_url": repo_url,
255
+ "commit_count": data["commit_count"],
256
+ "last_commit_date": max(commit_dates).isoformat(),
257
+ "first_commit_date": min(commit_dates).isoformat(),
258
+ }
259
+ )
260
+
261
+ return relationships
262
+
263
+
264
+ def transform_commits_to_user_repo_relationships(
265
+ commits_by_repo: dict[str, list[dict[str, Any]]],
266
+ organization: str,
267
+ ) -> list[dict[str, Any]]:
268
+ """
269
+ Transform commit data into user-repository relationship data.
270
+
271
+ :param commits_by_repo: Dict mapping repo names to commit lists.
272
+ :param organization: The Github organization name.
273
+ :return: List of user-repository relationship records.
274
+ """
275
+ logger.info("Transforming commit data into user-repository relationships")
276
+
277
+ # Group commits by user and repository
278
+ user_repo_commits: dict[tuple[str, str], list[dict[str, Any]]] = {}
279
+
280
+ for repo_name, commits in commits_by_repo.items():
281
+ repo_url = f"https://github.com/{organization}/{repo_name}"
282
+
283
+ for commit in commits:
284
+ # Use author if available, otherwise use committer
285
+ commit_user = commit.get("author", {}).get("user") or commit.get(
286
+ "committer", {}
287
+ ).get("user")
288
+
289
+ if not commit_user or not commit_user.get("url"):
290
+ continue
291
+
292
+ user_url = commit_user["url"]
293
+ key = (user_url, repo_url)
294
+
295
+ if key not in user_repo_commits:
296
+ user_repo_commits[key] = []
297
+
298
+ user_repo_commits[key].append(commit)
299
+
300
+ # Transform to relationship records
301
+ relationships = []
302
+ for (user_url, repo_url), commits in user_repo_commits.items():
303
+ commit_dates = [
304
+ datetime.fromisoformat(commit["committedDate"].replace("Z", "+00:00"))
305
+ for commit in commits
306
+ ]
307
+
308
+ relationships.append(
309
+ {
310
+ "user_url": user_url,
311
+ "repo_url": repo_url,
312
+ "commit_count": len(commits),
313
+ "last_commit_date": max(commit_dates).isoformat(),
314
+ "first_commit_date": min(commit_dates).isoformat(),
315
+ }
316
+ )
317
+
318
+ logger.info(f"Created {len(relationships)} user-repository relationships")
319
+ return relationships
320
+
321
+
322
+ @timeit
323
+ def load_github_commit_relationships(
324
+ neo4j_session: neo4j.Session,
325
+ commit_relationships: list[dict[str, Any]],
326
+ organization: str,
327
+ update_tag: int,
328
+ ) -> None:
329
+ """
330
+ Load GitHub user-repository commit relationships using MatchLinks.
331
+
332
+ :param neo4j_session: Neo4j session for database interface.
333
+ :param commit_relationships: List of user-repository relationship records.
334
+ :param organization: The Github organization name for sub-resource scoping.
335
+ :param update_tag: Timestamp used to determine data freshness.
336
+ """
337
+ if not commit_relationships:
338
+ logger.info("No commit relationships to load")
339
+ return
340
+
341
+ logger.info(
342
+ f"Loading {len(commit_relationships)} user-repository commit relationships"
343
+ )
344
+
345
+ # Use organization URL as the sub-resource identifier
346
+ org_url = f"https://github.com/{organization}"
347
+
348
+ load_matchlinks(
349
+ neo4j_session,
350
+ GitHubUserCommittedToRepoRel(),
351
+ commit_relationships,
352
+ lastupdated=update_tag,
353
+ _sub_resource_label="GitHubOrganization",
354
+ _sub_resource_id=org_url,
355
+ )
356
+
357
+
358
+ @timeit
359
+ def cleanup_github_commit_relationships(
360
+ neo4j_session: neo4j.Session,
361
+ organization: str,
362
+ update_tag: int,
363
+ ) -> None:
364
+ """
365
+ Clean up stale GitHub user-repository commit relationships.
366
+
367
+ :param neo4j_session: Neo4j session for database interface.
368
+ :param organization: The Github organization name.
369
+ :param update_tag: Timestamp used to determine data freshness.
370
+ """
371
+ logger.debug("Cleaning up GitHub user-repository commit relationships")
372
+
373
+ org_url = f"https://github.com/{organization}"
374
+
375
+ GraphJob.from_matchlink(
376
+ GitHubUserCommittedToRepoRel(),
377
+ "GitHubOrganization",
378
+ org_url,
379
+ update_tag,
380
+ ).run(neo4j_session)
381
+
382
+
383
+ @timeit
384
+ def sync_github_commits(
385
+ neo4j_session: neo4j.Session,
386
+ token: str,
387
+ api_url: str,
388
+ organization: str,
389
+ repo_names: list[str],
390
+ update_tag: int,
391
+ lookback_days: int = 30,
392
+ ) -> None:
393
+ """
394
+ Sync GitHub commit relationships for the specified lookback period.
395
+ Uses batch processing to minimize memory usage and API quota consumption.
396
+
397
+ :param neo4j_session: Neo4j session for database interface.
398
+ :param token: The Github API token as string.
399
+ :param api_url: The Github v4 API endpoint as string.
400
+ :param organization: The name of the target Github organization as string.
401
+ :param repo_names: List of repository names to sync commits for.
402
+ :param update_tag: Timestamp used to determine data freshness.
403
+ :param lookback_days: Number of days to look back for commits.
404
+ """
405
+ logger.info(f"Starting GitHub commits sync for organization: {organization}")
406
+
407
+ # Process repositories in batches to save memory and API quota
408
+ # This approach processes repos in batches, transforms immediately, and loads in batches
409
+ process_repo_commits_batch(
410
+ neo4j_session,
411
+ token,
412
+ api_url,
413
+ organization,
414
+ repo_names,
415
+ update_tag,
416
+ lookback_days=lookback_days,
417
+ batch_size=10, # Process 10 repos at a time
418
+ )
419
+
420
+ # Cleanup stale relationships after all batches are processed
421
+ cleanup_github_commit_relationships(neo4j_session, organization, update_tag)
422
+
423
+ logger.info("Completed GitHub commits sync")
@@ -864,11 +864,15 @@ def load_github_repos(
864
864
  ON CREATE SET r.firstseen = timestamp()
865
865
  SET r.lastupdated = r.UpdateTag
866
866
  """
867
- neo4j_session.run(
868
- ingest_repo,
869
- RepoData=repo_data,
870
- UpdateTag=update_tag,
871
- )
867
+
868
+ def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
869
+ tx.run(
870
+ ingest_repo,
871
+ RepoData=repo_data,
872
+ UpdateTag=update_tag,
873
+ ).consume()
874
+
875
+ neo4j_session.execute_write(_ingest_repos_tx)
872
876
 
873
877
 
874
878
  @timeit
@@ -898,11 +902,14 @@ def load_github_languages(
898
902
  ON CREATE SET r.firstseen = timestamp()
899
903
  SET r.lastupdated = $UpdateTag"""
900
904
 
901
- neo4j_session.run(
902
- ingest_languages,
903
- Languages=repo_languages,
904
- UpdateTag=update_tag,
905
- )
905
+ def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
906
+ tx.run(
907
+ ingest_languages,
908
+ Languages=repo_languages,
909
+ UpdateTag=update_tag,
910
+ ).consume()
911
+
912
+ neo4j_session.execute_write(_ingest_languages_tx)
906
913
 
907
914
 
908
915
  @timeit
@@ -918,31 +925,42 @@ def load_github_owners(
918
925
  :param repo_owners: list of owner to repo mappings
919
926
  :return: Nothing
920
927
  """
921
- for owner in repo_owners:
922
- ingest_owner_template = Template(
923
- """
924
- MERGE (user:$account_type{id: $Id})
925
- ON CREATE SET user.firstseen = timestamp()
926
- SET user.username = $UserName,
927
- user.lastupdated = $UpdateTag
928
- WITH user
929
-
930
- MATCH (repo:GitHubRepository{id: $RepoId})
931
- MERGE (user)<-[r:OWNER]-(repo)
932
- ON CREATE SET r.firstseen = timestamp()
933
- SET r.lastupdated = $UpdateTag""",
934
- )
928
+ ingest_owner_template = Template(
929
+ """
930
+ MERGE (user:$account_type{id: $Id})
931
+ ON CREATE SET user.firstseen = timestamp()
932
+ SET user.username = $UserName,
933
+ user.lastupdated = $UpdateTag
934
+ WITH user
935
+
936
+ MATCH (repo:GitHubRepository{id: $RepoId})
937
+ MERGE (user)<-[r:OWNER]-(repo)
938
+ ON CREATE SET r.firstseen = timestamp()
939
+ SET r.lastupdated = $UpdateTag""",
940
+ )
935
941
 
936
- account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
942
+ account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
937
943
 
938
- neo4j_session.run(
944
+ def _ingest_owner_tx(
945
+ tx: neo4j.Transaction,
946
+ owner_record: Dict,
947
+ owner_label: str,
948
+ ) -> None:
949
+ tx.run(
939
950
  ingest_owner_template.safe_substitute(
940
- account_type=account_type[owner["type"]],
951
+ account_type=owner_label,
941
952
  ),
942
- Id=owner["owner_id"],
943
- UserName=owner["owner"],
944
- RepoId=owner["repo_id"],
953
+ Id=owner_record["owner_id"],
954
+ UserName=owner_record["owner"],
955
+ RepoId=owner_record["repo_id"],
945
956
  UpdateTag=update_tag,
957
+ ).consume()
958
+
959
+ for owner in repo_owners:
960
+ neo4j_session.execute_write(
961
+ _ingest_owner_tx,
962
+ owner,
963
+ account_type[owner["type"]],
946
964
  )
947
965
 
948
966
 
@@ -973,12 +991,24 @@ def load_collaborators(
973
991
  SET o.lastupdated = $UpdateTag
974
992
  """,
975
993
  )
976
- for collab_type in collaborators.keys():
977
- relationship_label = f"{affiliation}_COLLAB_{collab_type}"
978
- neo4j_session.run(
994
+
995
+ def _ingest_collaborators_tx(
996
+ tx: neo4j.Transaction,
997
+ relationship_label: str,
998
+ collaborator_data: List[Dict],
999
+ ) -> None:
1000
+ tx.run(
979
1001
  query.safe_substitute(rel_label=relationship_label),
980
- UserData=collaborators[collab_type],
1002
+ UserData=collaborator_data,
981
1003
  UpdateTag=update_tag,
1004
+ ).consume()
1005
+
1006
+ for collab_type, collab_data in collaborators.items():
1007
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
1008
+ neo4j_session.execute_write(
1009
+ _ingest_collaborators_tx,
1010
+ relationship_label,
1011
+ collab_data,
982
1012
  )
983
1013
 
984
1014
 
@@ -1003,11 +1033,15 @@ def load_python_requirements(
1003
1033
  SET r.lastupdated = $UpdateTag,
1004
1034
  r.specifier = req.specifier
1005
1035
  """
1006
- neo4j_session.run(
1007
- query,
1008
- Requirements=requirements_objects,
1009
- UpdateTag=update_tag,
1010
- )
1036
+
1037
+ def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
1038
+ tx.run(
1039
+ query,
1040
+ Requirements=requirements_objects,
1041
+ UpdateTag=update_tag,
1042
+ ).consume()
1043
+
1044
+ neo4j_session.execute_write(_ingest_requirements_tx)
1011
1045
 
1012
1046
 
1013
1047
  @timeit