cartography 0.102.0rc2__py3-none-any.whl → 0.103.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (297) hide show
  1. cartography/__main__.py +1 -2
  2. cartography/_version.py +2 -2
  3. cartography/cli.py +376 -249
  4. cartography/client/core/tx.py +39 -18
  5. cartography/config.py +28 -0
  6. cartography/driftdetect/__main__.py +1 -2
  7. cartography/driftdetect/add_shortcut.py +10 -2
  8. cartography/driftdetect/cli.py +71 -75
  9. cartography/driftdetect/detect_deviations.py +7 -3
  10. cartography/driftdetect/get_states.py +20 -8
  11. cartography/driftdetect/model.py +5 -5
  12. cartography/driftdetect/serializers.py +8 -6
  13. cartography/driftdetect/storage.py +2 -2
  14. cartography/graph/cleanupbuilder.py +35 -15
  15. cartography/graph/job.py +46 -17
  16. cartography/graph/querybuilder.py +165 -80
  17. cartography/graph/statement.py +35 -26
  18. cartography/intel/analysis.py +4 -1
  19. cartography/intel/aws/__init__.py +114 -55
  20. cartography/intel/aws/apigateway.py +134 -63
  21. cartography/intel/aws/cloudtrail.py +127 -0
  22. cartography/intel/aws/cloudwatch.py +93 -0
  23. cartography/intel/aws/config.py +56 -20
  24. cartography/intel/aws/dynamodb.py +108 -40
  25. cartography/intel/aws/ec2/__init__.py +2 -2
  26. cartography/intel/aws/ec2/auto_scaling_groups.py +181 -78
  27. cartography/intel/aws/ec2/elastic_ip_addresses.py +41 -13
  28. cartography/intel/aws/ec2/images.py +49 -20
  29. cartography/intel/aws/ec2/instances.py +234 -136
  30. cartography/intel/aws/ec2/internet_gateways.py +40 -11
  31. cartography/intel/aws/ec2/key_pairs.py +44 -20
  32. cartography/intel/aws/ec2/launch_templates.py +101 -59
  33. cartography/intel/aws/ec2/load_balancer_v2s.py +104 -39
  34. cartography/intel/aws/ec2/load_balancers.py +82 -42
  35. cartography/intel/aws/ec2/network_acls.py +89 -65
  36. cartography/intel/aws/ec2/network_interfaces.py +146 -87
  37. cartography/intel/aws/ec2/reserved_instances.py +45 -16
  38. cartography/intel/aws/ec2/route_tables.py +138 -98
  39. cartography/intel/aws/ec2/security_groups.py +71 -21
  40. cartography/intel/aws/ec2/snapshots.py +61 -22
  41. cartography/intel/aws/ec2/subnets.py +54 -18
  42. cartography/intel/aws/ec2/tgw.py +100 -34
  43. cartography/intel/aws/ec2/util.py +1 -1
  44. cartography/intel/aws/ec2/volumes.py +69 -41
  45. cartography/intel/aws/ec2/vpc.py +37 -12
  46. cartography/intel/aws/ec2/vpc_peerings.py +83 -24
  47. cartography/intel/aws/ecr.py +88 -32
  48. cartography/intel/aws/ecs.py +83 -47
  49. cartography/intel/aws/efs.py +93 -0
  50. cartography/intel/aws/eks.py +55 -29
  51. cartography/intel/aws/elasticache.py +42 -18
  52. cartography/intel/aws/elasticsearch.py +57 -20
  53. cartography/intel/aws/emr.py +61 -23
  54. cartography/intel/aws/iam.py +401 -145
  55. cartography/intel/aws/iam_instance_profiles.py +22 -22
  56. cartography/intel/aws/identitycenter.py +71 -37
  57. cartography/intel/aws/inspector.py +159 -89
  58. cartography/intel/aws/kms.py +92 -38
  59. cartography/intel/aws/lambda_function.py +103 -34
  60. cartography/intel/aws/organizations.py +30 -10
  61. cartography/intel/aws/permission_relationships.py +133 -51
  62. cartography/intel/aws/rds.py +249 -85
  63. cartography/intel/aws/redshift.py +107 -46
  64. cartography/intel/aws/resourcegroupstaggingapi.py +120 -66
  65. cartography/intel/aws/resources.py +57 -46
  66. cartography/intel/aws/route53.py +108 -61
  67. cartography/intel/aws/s3.py +168 -83
  68. cartography/intel/aws/s3accountpublicaccessblock.py +157 -0
  69. cartography/intel/aws/secretsmanager.py +24 -12
  70. cartography/intel/aws/securityhub.py +20 -9
  71. cartography/intel/aws/sns.py +166 -0
  72. cartography/intel/aws/sqs.py +60 -28
  73. cartography/intel/aws/ssm.py +70 -30
  74. cartography/intel/aws/util/arns.py +7 -7
  75. cartography/intel/aws/util/common.py +31 -4
  76. cartography/intel/azure/__init__.py +78 -19
  77. cartography/intel/azure/compute.py +101 -27
  78. cartography/intel/azure/cosmosdb.py +496 -170
  79. cartography/intel/azure/sql.py +296 -105
  80. cartography/intel/azure/storage.py +322 -113
  81. cartography/intel/azure/subscription.py +39 -23
  82. cartography/intel/azure/tenant.py +13 -4
  83. cartography/intel/azure/util/credentials.py +95 -55
  84. cartography/intel/bigfix/__init__.py +2 -2
  85. cartography/intel/bigfix/computers.py +93 -65
  86. cartography/intel/cloudflare/__init__.py +74 -0
  87. cartography/intel/cloudflare/accounts.py +57 -0
  88. cartography/intel/cloudflare/dnsrecords.py +64 -0
  89. cartography/intel/cloudflare/members.py +75 -0
  90. cartography/intel/cloudflare/roles.py +65 -0
  91. cartography/intel/cloudflare/zones.py +64 -0
  92. cartography/intel/create_indexes.py +3 -2
  93. cartography/intel/crowdstrike/__init__.py +11 -9
  94. cartography/intel/crowdstrike/endpoints.py +5 -1
  95. cartography/intel/crowdstrike/spotlight.py +8 -3
  96. cartography/intel/cve/__init__.py +46 -13
  97. cartography/intel/cve/feed.py +48 -12
  98. cartography/intel/digitalocean/__init__.py +22 -13
  99. cartography/intel/digitalocean/compute.py +75 -108
  100. cartography/intel/digitalocean/management.py +44 -80
  101. cartography/intel/digitalocean/platform.py +48 -43
  102. cartography/intel/dns.py +36 -10
  103. cartography/intel/duo/__init__.py +21 -16
  104. cartography/intel/duo/api_host.py +14 -9
  105. cartography/intel/duo/endpoints.py +50 -45
  106. cartography/intel/duo/groups.py +18 -14
  107. cartography/intel/duo/phones.py +37 -34
  108. cartography/intel/duo/tokens.py +26 -23
  109. cartography/intel/duo/users.py +54 -50
  110. cartography/intel/duo/web_authn_credentials.py +30 -25
  111. cartography/intel/entra/__init__.py +25 -7
  112. cartography/intel/entra/ou.py +112 -0
  113. cartography/intel/entra/users.py +69 -63
  114. cartography/intel/gcp/__init__.py +185 -49
  115. cartography/intel/gcp/compute.py +418 -231
  116. cartography/intel/gcp/crm.py +96 -43
  117. cartography/intel/gcp/dns.py +60 -19
  118. cartography/intel/gcp/gke.py +72 -38
  119. cartography/intel/gcp/iam.py +61 -41
  120. cartography/intel/gcp/storage.py +84 -55
  121. cartography/intel/github/__init__.py +13 -11
  122. cartography/intel/github/repos.py +270 -137
  123. cartography/intel/github/teams.py +170 -88
  124. cartography/intel/github/users.py +70 -39
  125. cartography/intel/github/util.py +36 -34
  126. cartography/intel/gsuite/__init__.py +47 -26
  127. cartography/intel/gsuite/api.py +73 -30
  128. cartography/intel/jamf/__init__.py +19 -1
  129. cartography/intel/jamf/computers.py +30 -7
  130. cartography/intel/jamf/util.py +7 -2
  131. cartography/intel/kandji/__init__.py +6 -3
  132. cartography/intel/kandji/devices.py +14 -8
  133. cartography/intel/kubernetes/namespaces.py +7 -4
  134. cartography/intel/kubernetes/pods.py +7 -4
  135. cartography/intel/kubernetes/services.py +8 -4
  136. cartography/intel/lastpass/__init__.py +2 -2
  137. cartography/intel/lastpass/users.py +23 -12
  138. cartography/intel/oci/__init__.py +44 -11
  139. cartography/intel/oci/iam.py +134 -38
  140. cartography/intel/oci/organizations.py +13 -6
  141. cartography/intel/oci/utils.py +43 -20
  142. cartography/intel/okta/__init__.py +66 -15
  143. cartography/intel/okta/applications.py +42 -20
  144. cartography/intel/okta/awssaml.py +93 -33
  145. cartography/intel/okta/factors.py +16 -4
  146. cartography/intel/okta/groups.py +56 -29
  147. cartography/intel/okta/organization.py +5 -1
  148. cartography/intel/okta/origins.py +6 -2
  149. cartography/intel/okta/roles.py +15 -5
  150. cartography/intel/okta/users.py +20 -8
  151. cartography/intel/okta/utils.py +6 -4
  152. cartography/intel/openai/__init__.py +86 -0
  153. cartography/intel/openai/adminapikeys.py +90 -0
  154. cartography/intel/openai/apikeys.py +96 -0
  155. cartography/intel/openai/projects.py +94 -0
  156. cartography/intel/openai/serviceaccounts.py +82 -0
  157. cartography/intel/openai/users.py +78 -0
  158. cartography/intel/openai/util.py +29 -0
  159. cartography/intel/pagerduty/__init__.py +8 -7
  160. cartography/intel/pagerduty/escalation_policies.py +18 -6
  161. cartography/intel/pagerduty/schedules.py +12 -4
  162. cartography/intel/pagerduty/services.py +11 -4
  163. cartography/intel/pagerduty/teams.py +8 -3
  164. cartography/intel/pagerduty/users.py +3 -1
  165. cartography/intel/pagerduty/vendors.py +3 -1
  166. cartography/intel/semgrep/__init__.py +24 -6
  167. cartography/intel/semgrep/dependencies.py +50 -28
  168. cartography/intel/semgrep/deployment.py +3 -1
  169. cartography/intel/semgrep/findings.py +42 -18
  170. cartography/intel/snipeit/__init__.py +17 -3
  171. cartography/intel/snipeit/asset.py +12 -6
  172. cartography/intel/snipeit/user.py +8 -5
  173. cartography/intel/snipeit/util.py +9 -4
  174. cartography/intel/tailscale/__init__.py +77 -0
  175. cartography/intel/tailscale/acls.py +146 -0
  176. cartography/intel/tailscale/devices.py +127 -0
  177. cartography/intel/tailscale/postureintegrations.py +81 -0
  178. cartography/intel/tailscale/tailnets.py +76 -0
  179. cartography/intel/tailscale/users.py +80 -0
  180. cartography/intel/tailscale/utils.py +132 -0
  181. cartography/models/aws/apigateway.py +21 -17
  182. cartography/models/aws/apigatewaycertificate.py +28 -22
  183. cartography/models/aws/apigatewayresource.py +28 -20
  184. cartography/models/aws/apigatewaystage.py +33 -25
  185. cartography/models/aws/cloudtrail/__init__.py +0 -0
  186. cartography/models/aws/cloudtrail/trail.py +61 -0
  187. cartography/models/aws/cloudwatch/__init__.py +0 -0
  188. cartography/models/aws/cloudwatch/loggroup.py +52 -0
  189. cartography/models/aws/dynamodb/gsi.py +30 -22
  190. cartography/models/aws/dynamodb/tables.py +25 -17
  191. cartography/models/aws/ec2/auto_scaling_groups.py +102 -82
  192. cartography/models/aws/ec2/images.py +36 -34
  193. cartography/models/aws/ec2/instances.py +51 -45
  194. cartography/models/aws/ec2/keypair.py +21 -16
  195. cartography/models/aws/ec2/keypair_instance.py +28 -21
  196. cartography/models/aws/ec2/launch_configurations.py +30 -26
  197. cartography/models/aws/ec2/launch_template_versions.py +48 -38
  198. cartography/models/aws/ec2/launch_templates.py +21 -17
  199. cartography/models/aws/ec2/load_balancer_listeners.py +27 -23
  200. cartography/models/aws/ec2/load_balancers.py +47 -37
  201. cartography/models/aws/ec2/network_acl_rules.py +38 -30
  202. cartography/models/aws/ec2/network_acls.py +38 -29
  203. cartography/models/aws/ec2/networkinterface_instance.py +52 -39
  204. cartography/models/aws/ec2/networkinterfaces.py +53 -37
  205. cartography/models/aws/ec2/privateip_networkinterface.py +32 -22
  206. cartography/models/aws/ec2/reservations.py +18 -14
  207. cartography/models/aws/ec2/route_table_associations.py +44 -34
  208. cartography/models/aws/ec2/route_tables.py +50 -43
  209. cartography/models/aws/ec2/routes.py +45 -37
  210. cartography/models/aws/ec2/securitygroup_instance.py +29 -20
  211. cartography/models/aws/ec2/securitygroup_networkinterface.py +24 -15
  212. cartography/models/aws/ec2/subnet_instance.py +24 -19
  213. cartography/models/aws/ec2/subnet_networkinterface.py +40 -31
  214. cartography/models/aws/ec2/volumes.py +47 -40
  215. cartography/models/aws/efs/__init__.py +0 -0
  216. cartography/models/aws/efs/mount_target.py +52 -0
  217. cartography/models/aws/eks/clusters.py +23 -21
  218. cartography/models/aws/emr.py +32 -30
  219. cartography/models/aws/iam/instanceprofile.py +33 -24
  220. cartography/models/aws/identitycenter/awsidentitycenter.py +18 -14
  221. cartography/models/aws/identitycenter/awspermissionset.py +37 -29
  222. cartography/models/aws/identitycenter/awsssouser.py +23 -21
  223. cartography/models/aws/inspector/findings.py +77 -65
  224. cartography/models/aws/inspector/packages.py +35 -29
  225. cartography/models/aws/s3/__init__.py +0 -0
  226. cartography/models/aws/s3/account_public_access_block.py +51 -0
  227. cartography/models/aws/sns/__init__.py +0 -0
  228. cartography/models/aws/sns/topic.py +50 -0
  229. cartography/models/aws/ssm/instance_information.py +51 -39
  230. cartography/models/aws/ssm/instance_patch.py +32 -26
  231. cartography/models/bigfix/bigfix_computer.py +42 -38
  232. cartography/models/bigfix/bigfix_root.py +3 -3
  233. cartography/models/cloudflare/__init__.py +0 -0
  234. cartography/models/cloudflare/account.py +25 -0
  235. cartography/models/cloudflare/dnsrecord.py +55 -0
  236. cartography/models/cloudflare/member.py +82 -0
  237. cartography/models/cloudflare/role.py +44 -0
  238. cartography/models/cloudflare/zone.py +59 -0
  239. cartography/models/core/common.py +12 -10
  240. cartography/models/core/nodes.py +5 -2
  241. cartography/models/core/relationships.py +14 -6
  242. cartography/models/crowdstrike/hosts.py +37 -35
  243. cartography/models/cve/cve.py +34 -32
  244. cartography/models/cve/cve_feed.py +6 -6
  245. cartography/models/digitalocean/__init__.py +0 -0
  246. cartography/models/digitalocean/account.py +21 -0
  247. cartography/models/digitalocean/droplet.py +56 -0
  248. cartography/models/digitalocean/project.py +48 -0
  249. cartography/models/duo/api_host.py +3 -3
  250. cartography/models/duo/endpoint.py +43 -41
  251. cartography/models/duo/group.py +14 -14
  252. cartography/models/duo/phone.py +27 -27
  253. cartography/models/duo/token.py +16 -16
  254. cartography/models/duo/user.py +46 -44
  255. cartography/models/duo/web_authn_credential.py +27 -19
  256. cartography/models/entra/ou.py +48 -0
  257. cartography/models/entra/tenant.py +24 -18
  258. cartography/models/entra/user.py +64 -48
  259. cartography/models/gcp/iam.py +23 -23
  260. cartography/models/github/orgs.py +5 -4
  261. cartography/models/github/teams.py +37 -31
  262. cartography/models/github/users.py +34 -23
  263. cartography/models/kandji/device.py +22 -16
  264. cartography/models/kandji/tenant.py +6 -4
  265. cartography/models/lastpass/tenant.py +3 -3
  266. cartography/models/lastpass/user.py +32 -28
  267. cartography/models/openai/__init__.py +0 -0
  268. cartography/models/openai/adminapikey.py +90 -0
  269. cartography/models/openai/apikey.py +84 -0
  270. cartography/models/openai/organization.py +17 -0
  271. cartography/models/openai/project.py +70 -0
  272. cartography/models/openai/serviceaccount.py +50 -0
  273. cartography/models/openai/user.py +49 -0
  274. cartography/models/semgrep/dependencies.py +36 -24
  275. cartography/models/semgrep/deployment.py +5 -5
  276. cartography/models/semgrep/findings.py +58 -42
  277. cartography/models/semgrep/locations.py +27 -21
  278. cartography/models/snipeit/asset.py +30 -21
  279. cartography/models/snipeit/tenant.py +6 -4
  280. cartography/models/snipeit/user.py +19 -12
  281. cartography/models/tailscale/__init__.py +0 -0
  282. cartography/models/tailscale/device.py +95 -0
  283. cartography/models/tailscale/group.py +86 -0
  284. cartography/models/tailscale/postureintegration.py +58 -0
  285. cartography/models/tailscale/tag.py +102 -0
  286. cartography/models/tailscale/tailnet.py +29 -0
  287. cartography/models/tailscale/user.py +52 -0
  288. cartography/stats.py +3 -3
  289. cartography/sync.py +113 -31
  290. cartography/util.py +84 -62
  291. {cartography-0.102.0rc2.dist-info → cartography-0.103.0.dist-info}/METADATA +8 -15
  292. cartography-0.103.0.dist-info/RECORD +442 -0
  293. {cartography-0.102.0rc2.dist-info → cartography-0.103.0.dist-info}/WHEEL +1 -1
  294. cartography-0.102.0rc2.dist-info/RECORD +0 -381
  295. {cartography-0.102.0rc2.dist-info → cartography-0.103.0.dist-info}/entry_points.txt +0 -0
  296. {cartography-0.102.0rc2.dist-info → cartography-0.103.0.dist-info}/licenses/LICENSE +0 -0
  297. {cartography-0.102.0rc2.dist-info → cartography-0.103.0.dist-info}/top_level.txt +0 -0
@@ -26,11 +26,11 @@ logger = logging.getLogger(__name__)
26
26
  # - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
27
27
  # - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
28
28
  UserAffiliationAndRepoPermission = namedtuple(
29
- 'UserAffiliationAndRepoPermission',
29
+ "UserAffiliationAndRepoPermission",
30
30
  [
31
- 'user', # Dict
32
- 'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
33
- 'affiliation', # 'OUTSIDE', 'DIRECT'
31
+ "user", # Dict
32
+ "permission", # 'WRITE', 'MAINTAIN', 'ADMIN', etc
33
+ "affiliation", # 'OUTSIDE', 'DIRECT'
34
34
  ],
35
35
  )
36
36
 
@@ -137,28 +137,37 @@ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
137
137
 
138
138
 
139
139
  def _get_repo_collaborators_inner_func(
140
- org: str,
141
- api_url: str,
142
- token: str,
143
- repo_raw_data: list[dict[str, Any]],
144
- affiliation: str,
145
- collab_users: list[dict[str, Any]],
146
- collab_permission: list[str],
140
+ org: str,
141
+ api_url: str,
142
+ token: str,
143
+ repo_raw_data: list[dict[str, Any]],
144
+ affiliation: str,
145
+ collab_users: list[dict[str, Any]],
146
+ collab_permission: list[str],
147
147
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
148
148
  result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
149
149
 
150
150
  for repo in repo_raw_data:
151
- repo_name = repo['name']
152
- repo_url = repo['url']
153
-
154
- if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
155
- (affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
151
+ repo_name = repo["name"]
152
+ repo_url = repo["url"]
153
+
154
+ if (
155
+ affiliation == "OUTSIDE" and repo["outsideCollaborators"]["totalCount"] == 0
156
+ ) or (
157
+ affiliation == "DIRECT" and repo["directCollaborators"]["totalCount"] == 0
158
+ ):
156
159
  # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
157
160
  result[repo_url] = []
158
161
  continue
159
162
 
160
163
  logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
161
- collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
164
+ collaborators = _get_repo_collaborators(
165
+ token,
166
+ api_url,
167
+ org,
168
+ repo_name,
169
+ affiliation,
170
+ )
162
171
 
163
172
  # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
164
173
  # however sometimes GitHub returns None, as in issue 1334 and 1404.
@@ -167,7 +176,7 @@ def _get_repo_collaborators_inner_func(
167
176
 
168
177
  # The `or []` is because `.edges` can be None.
169
178
  for perm in collaborators.edges or []:
170
- collab_permission.append(perm['permission'])
179
+ collab_permission.append(perm["permission"])
171
180
 
172
181
  result[repo_url] = [
173
182
  UserAffiliationAndRepoPermission(user, permission, affiliation)
@@ -177,11 +186,11 @@ def _get_repo_collaborators_inner_func(
177
186
 
178
187
 
179
188
  def _get_repo_collaborators_for_multiple_repos(
180
- repo_raw_data: list[dict[str, Any]],
181
- affiliation: str,
182
- org: str,
183
- api_url: str,
184
- token: str,
189
+ repo_raw_data: list[dict[str, Any]],
190
+ affiliation: str,
191
+ org: str,
192
+ api_url: str,
193
+ token: str,
185
194
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
186
195
  """
187
196
  For every repo in the given list, retrieve the collaborators.
@@ -193,7 +202,9 @@ def _get_repo_collaborators_for_multiple_repos(
193
202
  :param token: The Github API token as string.
194
203
  :return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
195
204
  """
196
- logger.info(f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".')
205
+ logger.info(
206
+ f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
207
+ )
197
208
  collab_users: List[dict[str, Any]] = []
198
209
  collab_permission: List[str] = []
199
210
 
@@ -215,7 +226,11 @@ def _get_repo_collaborators_for_multiple_repos(
215
226
 
216
227
 
217
228
  def _get_repo_collaborators(
218
- token: str, api_url: str, organization: str, repo: str, affiliation: str,
229
+ token: str,
230
+ api_url: str,
231
+ organization: str,
232
+ repo: str,
233
+ affiliation: str,
219
234
  ) -> PaginatedGraphqlData:
220
235
  """
221
236
  Retrieve a list of collaborators for a given repository, as described in
@@ -233,8 +248,8 @@ def _get_repo_collaborators(
233
248
  api_url,
234
249
  organization,
235
250
  GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
236
- 'repository',
237
- resource_inner_type='collaborators',
251
+ "repository",
252
+ resource_inner_type="collaborators",
238
253
  repo=repo,
239
254
  affiliation=affiliation,
240
255
  )
@@ -257,15 +272,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
257
272
  api_url,
258
273
  organization,
259
274
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
260
- 'repositories',
275
+ "repositories",
261
276
  )
262
277
  return repos.nodes
263
278
 
264
279
 
265
280
  def transform(
266
- repos_json: List[Dict],
267
- direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
268
- outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
281
+ repos_json: List[Dict],
282
+ direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
283
+ outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
269
284
  ) -> Dict:
270
285
  """
271
286
  Parses the JSON returned from GitHub API to create data for graph ingestion
@@ -283,41 +298,65 @@ def transform(
283
298
  transformed_repo_owners: List[Dict] = []
284
299
  # See https://docs.github.com/en/graphql/reference/enums#repositorypermission
285
300
  transformed_outside_collaborators: Dict[str, List[Any]] = {
286
- 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
301
+ "ADMIN": [],
302
+ "MAINTAIN": [],
303
+ "READ": [],
304
+ "TRIAGE": [],
305
+ "WRITE": [],
287
306
  }
288
307
  transformed_direct_collaborators: Dict[str, List[Any]] = {
289
- 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
308
+ "ADMIN": [],
309
+ "MAINTAIN": [],
310
+ "READ": [],
311
+ "TRIAGE": [],
312
+ "WRITE": [],
290
313
  }
291
314
  transformed_requirements_files: List[Dict] = []
292
315
  for repo_object in repos_json:
293
- _transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
316
+ _transform_repo_languages(
317
+ repo_object["url"],
318
+ repo_object,
319
+ transformed_repo_languages,
320
+ )
294
321
  _transform_repo_objects(repo_object, transformed_repo_list)
295
- _transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
322
+ _transform_repo_owners(
323
+ repo_object["owner"]["url"],
324
+ repo_object,
325
+ transformed_repo_owners,
326
+ )
296
327
 
297
328
  # Allow sync to continue if we didn't have permissions to list collaborators
298
- repo_url = repo_object['url']
329
+ repo_url = repo_object["url"]
299
330
  if repo_url in outside_collaborators:
300
331
  _transform_collaborators(
301
- repo_object['url'],
302
- outside_collaborators[repo_object['url']],
332
+ repo_object["url"],
333
+ outside_collaborators[repo_object["url"]],
303
334
  transformed_outside_collaborators,
304
335
  )
305
336
  if repo_url in direct_collaborators:
306
337
  _transform_collaborators(
307
- repo_object['url'],
308
- direct_collaborators[repo_object['url']],
338
+ repo_object["url"],
339
+ direct_collaborators[repo_object["url"]],
309
340
  transformed_direct_collaborators,
310
341
  )
311
342
 
312
- _transform_requirements_txt(repo_object['requirements'], repo_url, transformed_requirements_files)
313
- _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_url, transformed_requirements_files)
343
+ _transform_requirements_txt(
344
+ repo_object["requirements"],
345
+ repo_url,
346
+ transformed_requirements_files,
347
+ )
348
+ _transform_setup_cfg_requirements(
349
+ repo_object["setupCfg"],
350
+ repo_url,
351
+ transformed_requirements_files,
352
+ )
314
353
  results = {
315
- 'repos': transformed_repo_list,
316
- 'repo_languages': transformed_repo_languages,
317
- 'repo_owners': transformed_repo_owners,
318
- 'repo_outside_collaborators': transformed_outside_collaborators,
319
- 'repo_direct_collaborators': transformed_direct_collaborators,
320
- 'python_requirements': transformed_requirements_files,
354
+ "repos": transformed_repo_list,
355
+ "repo_languages": transformed_repo_languages,
356
+ "repo_owners": transformed_repo_owners,
357
+ "repo_outside_collaborators": transformed_outside_collaborators,
358
+ "repo_direct_collaborators": transformed_direct_collaborators,
359
+ "python_requirements": transformed_requirements_files,
321
360
  }
322
361
  return results
323
362
 
@@ -346,33 +385,37 @@ def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict])
346
385
  :return: Nothing
347
386
  """
348
387
  # Create a unique ID for a GitHubBranch node representing the default branch of this repo object.
349
- dbr = input_repo_object['defaultBranchRef']
350
- default_branch_name = dbr['name'] if dbr else None
351
- default_branch_id = _create_default_branch_id(input_repo_object['url'], dbr['id']) if dbr else None
388
+ dbr = input_repo_object["defaultBranchRef"]
389
+ default_branch_name = dbr["name"] if dbr else None
390
+ default_branch_id = (
391
+ _create_default_branch_id(input_repo_object["url"], dbr["id"]) if dbr else None
392
+ )
352
393
 
353
394
  # Create a git:// URL from the given SSH URL, if it exists.
354
- ssh_url = input_repo_object.get('sshUrl')
395
+ ssh_url = input_repo_object.get("sshUrl")
355
396
  git_url = _create_git_url_from_ssh_url(ssh_url) if ssh_url else None
356
397
 
357
- out_repo_list.append({
358
- 'id': input_repo_object['url'],
359
- 'createdat': input_repo_object['createdAt'],
360
- 'name': input_repo_object['name'],
361
- 'fullname': input_repo_object['nameWithOwner'],
362
- 'description': input_repo_object['description'],
363
- 'primarylanguage': input_repo_object['primaryLanguage'],
364
- 'homepage': input_repo_object['homepageUrl'],
365
- 'defaultbranch': default_branch_name,
366
- 'defaultbranchid': default_branch_id,
367
- 'private': input_repo_object['isPrivate'],
368
- 'disabled': input_repo_object['isDisabled'],
369
- 'archived': input_repo_object['isArchived'],
370
- 'locked': input_repo_object['isLocked'],
371
- 'giturl': git_url,
372
- 'url': input_repo_object['url'],
373
- 'sshurl': ssh_url,
374
- 'updatedat': input_repo_object['updatedAt'],
375
- })
398
+ out_repo_list.append(
399
+ {
400
+ "id": input_repo_object["url"],
401
+ "createdat": input_repo_object["createdAt"],
402
+ "name": input_repo_object["name"],
403
+ "fullname": input_repo_object["nameWithOwner"],
404
+ "description": input_repo_object["description"],
405
+ "primarylanguage": input_repo_object["primaryLanguage"],
406
+ "homepage": input_repo_object["homepageUrl"],
407
+ "defaultbranch": default_branch_name,
408
+ "defaultbranchid": default_branch_id,
409
+ "private": input_repo_object["isPrivate"],
410
+ "disabled": input_repo_object["isDisabled"],
411
+ "archived": input_repo_object["isArchived"],
412
+ "locked": input_repo_object["isLocked"],
413
+ "giturl": git_url,
414
+ "url": input_repo_object["url"],
415
+ "sshurl": ssh_url,
416
+ "updatedat": input_repo_object["updatedAt"],
417
+ },
418
+ )
376
419
 
377
420
 
378
421
  def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -> None:
@@ -383,15 +426,21 @@ def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -
383
426
  :param repo_owners: Output array to append transformed results to.
384
427
  :return: Nothing.
385
428
  """
386
- repo_owners.append({
387
- 'repo_id': repo['url'],
388
- 'owner': repo['owner']['login'],
389
- 'owner_id': owner_id,
390
- 'type': repo['owner']['__typename'],
391
- })
429
+ repo_owners.append(
430
+ {
431
+ "repo_id": repo["url"],
432
+ "owner": repo["owner"]["login"],
433
+ "owner_id": owner_id,
434
+ "type": repo["owner"]["__typename"],
435
+ },
436
+ )
392
437
 
393
438
 
394
- def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Dict]) -> None:
439
+ def _transform_repo_languages(
440
+ repo_url: str,
441
+ repo: Dict,
442
+ repo_languages: List[Dict],
443
+ ) -> None:
395
444
  """
396
445
  Helper function to transform the languages in a GitHub repo.
397
446
  :param repo_url: The URL of the repo.
@@ -399,16 +448,20 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
399
448
  :param repo_languages: Output array to append transformed results to.
400
449
  :return: Nothing.
401
450
  """
402
- if repo['languages']['totalCount'] > 0:
403
- for language in repo['languages']['nodes']:
404
- repo_languages.append({
405
- 'repo_id': repo_url,
406
- 'language_name': language['name'],
407
- })
451
+ if repo["languages"]["totalCount"] > 0:
452
+ for language in repo["languages"]["nodes"]:
453
+ repo_languages.append(
454
+ {
455
+ "repo_id": repo_url,
456
+ "language_name": language["name"],
457
+ },
458
+ )
408
459
 
409
460
 
410
461
  def _transform_collaborators(
411
- repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
462
+ repo_url: str,
463
+ collaborators: List[UserAffiliationAndRepoPermission],
464
+ transformed_collaborators: Dict,
412
465
  ) -> None:
413
466
  """
414
467
  Performs data adjustments for collaborators in a GitHub repo.
@@ -425,8 +478,8 @@ def _transform_collaborators(
425
478
  if collaborators:
426
479
  for collaborator in collaborators:
427
480
  user = collaborator.user
428
- user['repo_url'] = repo_url
429
- user['affiliation'] = collaborator.affiliation
481
+ user["repo_url"] = repo_url
482
+ user["affiliation"] = collaborator.affiliation
430
483
  transformed_collaborators[collaborator.permission].append(user)
431
484
 
432
485
 
@@ -442,10 +495,14 @@ def _transform_requirements_txt(
442
495
  :param out_requirements_files: Output array to append transformed results to.
443
496
  :return: Nothing.
444
497
  """
445
- if req_file_contents and req_file_contents.get('text'):
446
- text_contents = req_file_contents['text']
498
+ if req_file_contents and req_file_contents.get("text"):
499
+ text_contents = req_file_contents["text"]
447
500
  requirements_list = text_contents.split("\n")
448
- _transform_python_requirements(requirements_list, repo_url, out_requirements_files)
501
+ _transform_python_requirements(
502
+ requirements_list,
503
+ repo_url,
504
+ out_requirements_files,
505
+ )
449
506
 
450
507
 
451
508
  def _transform_setup_cfg_requirements(
@@ -460,9 +517,9 @@ def _transform_setup_cfg_requirements(
460
517
  :param out_requirements_files: Output array to append transformed results to.
461
518
  :return: Nothing.
462
519
  """
463
- if not setup_cfg_contents or not setup_cfg_contents.get('text'):
520
+ if not setup_cfg_contents or not setup_cfg_contents.get("text"):
464
521
  return
465
- text_contents = setup_cfg_contents['text']
522
+ text_contents = setup_cfg_contents["text"]
466
523
  setup_cfg = configparser.ConfigParser()
467
524
  try:
468
525
  setup_cfg.read_string(text_contents)
@@ -490,8 +547,8 @@ def _transform_python_requirements(
490
547
  """
491
548
  parsed_list = []
492
549
  for line in requirements_list:
493
- stripped_line = line.partition('#')[0].strip()
494
- if stripped_line == '':
550
+ stripped_line = line.partition("#")[0].strip()
551
+ if stripped_line == "":
495
552
  continue
496
553
  try:
497
554
  req = Requirement(stripped_line)
@@ -499,7 +556,7 @@ def _transform_python_requirements(
499
556
  except InvalidRequirement:
500
557
  # INFO and not WARN/ERROR as we intentionally don't support all ways to specify Python requirements
501
558
  logger.info(
502
- f"Failed to parse line \"{line}\" in repo {repo_url}'s requirements.txt; skipping line.",
559
+ f'Failed to parse line "{line}" in repo {repo_url}\'s requirements.txt; skipping line.',
503
560
  exc_info=True,
504
561
  )
505
562
 
@@ -507,32 +564,44 @@ def _transform_python_requirements(
507
564
  pinned_version = None
508
565
  if len(req.specifier) == 1:
509
566
  specifier = next(iter(req.specifier))
510
- if specifier.operator == '==':
567
+ if specifier.operator == "==":
511
568
  pinned_version = specifier.version
512
569
 
513
570
  # Set `spec` to a default value. Example values for str(req.specifier): "<4.0,>=3.0" or "==1.0.0".
514
571
  spec: Optional[str] = str(req.specifier)
515
572
  # Set spec to `None` instead of empty string so that the Neo4j driver will leave the library.specifier field
516
573
  # undefined. As convention, we prefer undefined values over empty strings in the graph.
517
- if spec == '':
574
+ if spec == "":
518
575
  spec = None
519
576
 
520
577
  canon_name = canonicalize_name(req.name)
521
- requirement_id = f"{canon_name}|{pinned_version}" if pinned_version else canon_name
578
+ requirement_id = (
579
+ f"{canon_name}|{pinned_version}" if pinned_version else canon_name
580
+ )
522
581
 
523
- out_requirements_files.append({
524
- "id": requirement_id,
525
- "name": canon_name,
526
- "specifier": spec,
527
- "version": pinned_version,
528
- "repo_url": repo_url,
529
- })
582
+ out_requirements_files.append(
583
+ {
584
+ "id": requirement_id,
585
+ "name": canon_name,
586
+ "specifier": spec,
587
+ "version": pinned_version,
588
+ "repo_url": repo_url,
589
+ },
590
+ )
530
591
 
531
592
 
532
593
  def parse_setup_cfg(config: configparser.ConfigParser) -> List[str]:
533
594
  reqs: List[str] = []
534
- reqs.extend(_parse_setup_cfg_requirements(config.get("options", "install_requires", fallback="")))
535
- reqs.extend(_parse_setup_cfg_requirements(config.get("options", "setup_requires", fallback="")))
595
+ reqs.extend(
596
+ _parse_setup_cfg_requirements(
597
+ config.get("options", "install_requires", fallback=""),
598
+ ),
599
+ )
600
+ reqs.extend(
601
+ _parse_setup_cfg_requirements(
602
+ config.get("options", "setup_requires", fallback=""),
603
+ ),
604
+ )
536
605
  if config.has_section("options.extras_require"):
537
606
  for _, val in config.items("options.extras_require"):
538
607
  reqs.extend(_parse_setup_cfg_requirements(val))
@@ -551,7 +620,11 @@ def _parse_setup_cfg_requirements(reqs: str, separator: str = ";") -> List[str]:
551
620
 
552
621
 
553
622
  @timeit
554
- def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data: List[Dict]) -> None:
623
+ def load_github_repos(
624
+ neo4j_session: neo4j.Session,
625
+ update_tag: int,
626
+ repo_data: List[Dict],
627
+ ) -> None:
555
628
  """
556
629
  Ingest the GitHub repository information
557
630
  :param neo4j_session: Neo4J session object for server communication
@@ -602,7 +675,11 @@ def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data:
602
675
 
603
676
 
604
677
  @timeit
605
- def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_languages: List[Dict]) -> None:
678
+ def load_github_languages(
679
+ neo4j_session: neo4j.Session,
680
+ update_tag: int,
681
+ repo_languages: List[Dict],
682
+ ) -> None:
606
683
  """
607
684
  Ingest the relationships for repo languages
608
685
  :param neo4j_session: Neo4J session object for server communication
@@ -632,7 +709,11 @@ def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_la
632
709
 
633
710
 
634
711
  @timeit
635
- def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owners: List[Dict]) -> None:
712
+ def load_github_owners(
713
+ neo4j_session: neo4j.Session,
714
+ update_tag: int,
715
+ repo_owners: List[Dict],
716
+ ) -> None:
636
717
  """
637
718
  Ingest the relationships for repo owners
638
719
  :param neo4j_session: Neo4J session object for server communication
@@ -641,7 +722,8 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
641
722
  :return: Nothing
642
723
  """
643
724
  for owner in repo_owners:
644
- ingest_owner_template = Template("""
725
+ ingest_owner_template = Template(
726
+ """
645
727
  MERGE (user:$account_type{id: $Id})
646
728
  ON CREATE SET user.firstseen = timestamp()
647
729
  SET user.username = $UserName,
@@ -651,22 +733,31 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
651
733
  MATCH (repo:GitHubRepository{id: $RepoId})
652
734
  MERGE (user)<-[r:OWNER]-(repo)
653
735
  ON CREATE SET r.firstseen = timestamp()
654
- SET r.lastupdated = $UpdateTag""")
736
+ SET r.lastupdated = $UpdateTag""",
737
+ )
655
738
 
656
- account_type = {'User': "GitHubUser", 'Organization': "GitHubOrganization"}
739
+ account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
657
740
 
658
741
  neo4j_session.run(
659
- ingest_owner_template.safe_substitute(account_type=account_type[owner['type']]),
660
- Id=owner['owner_id'],
661
- UserName=owner['owner'],
662
- RepoId=owner['repo_id'],
742
+ ingest_owner_template.safe_substitute(
743
+ account_type=account_type[owner["type"]],
744
+ ),
745
+ Id=owner["owner_id"],
746
+ UserName=owner["owner"],
747
+ RepoId=owner["repo_id"],
663
748
  UpdateTag=update_tag,
664
749
  )
665
750
 
666
751
 
667
752
  @timeit
668
- def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
669
- query = Template("""
753
+ def load_collaborators(
754
+ neo4j_session: neo4j.Session,
755
+ update_tag: int,
756
+ collaborators: Dict,
757
+ affiliation: str,
758
+ ) -> None:
759
+ query = Template(
760
+ """
670
761
  UNWIND $UserData as user
671
762
 
672
763
  MERGE (u:GitHubUser{id: user.url})
@@ -683,7 +774,8 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
683
774
  MERGE (repo)<-[o:$rel_label]-(u)
684
775
  ON CREATE SET o.firstseen = timestamp()
685
776
  SET o.lastupdated = $UpdateTag
686
- """)
777
+ """,
778
+ )
687
779
  for collab_type in collaborators.keys():
688
780
  relationship_label = f"{affiliation}_COLLAB_{collab_type}"
689
781
  neo4j_session.run(
@@ -694,21 +786,51 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
694
786
 
695
787
 
696
788
  @timeit
697
- def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: Dict) -> None:
698
- load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
699
- load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
700
- load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
789
+ def load(
790
+ neo4j_session: neo4j.Session,
791
+ common_job_parameters: Dict,
792
+ repo_data: Dict,
793
+ ) -> None:
794
+ load_github_repos(
795
+ neo4j_session,
796
+ common_job_parameters["UPDATE_TAG"],
797
+ repo_data["repos"],
798
+ )
799
+ load_github_owners(
800
+ neo4j_session,
801
+ common_job_parameters["UPDATE_TAG"],
802
+ repo_data["repo_owners"],
803
+ )
804
+ load_github_languages(
805
+ neo4j_session,
806
+ common_job_parameters["UPDATE_TAG"],
807
+ repo_data["repo_languages"],
808
+ )
701
809
  load_collaborators(
702
- neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
810
+ neo4j_session,
811
+ common_job_parameters["UPDATE_TAG"],
812
+ repo_data["repo_direct_collaborators"],
813
+ "DIRECT",
703
814
  )
704
815
  load_collaborators(
705
- neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
816
+ neo4j_session,
817
+ common_job_parameters["UPDATE_TAG"],
818
+ repo_data["repo_outside_collaborators"],
819
+ "OUTSIDE",
820
+ )
821
+ load_python_requirements(
822
+ neo4j_session,
823
+ common_job_parameters["UPDATE_TAG"],
824
+ repo_data["python_requirements"],
706
825
  )
707
- load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
708
826
 
709
827
 
710
828
  @timeit
711
- def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requirements_objects: List[Dict]) -> None:
829
+ def load_python_requirements(
830
+ neo4j_session: neo4j.Session,
831
+ update_tag: int,
832
+ requirements_objects: List[Dict],
833
+ ) -> None:
712
834
  query = """
713
835
  UNWIND $Requirements AS req
714
836
  MERGE (lib:PythonLibrary:Dependency{id: req.id})
@@ -732,11 +854,11 @@ def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requ
732
854
 
733
855
 
734
856
  def sync(
735
- neo4j_session: neo4j.Session,
736
- common_job_parameters: Dict[str, Any],
737
- github_api_key: str,
738
- github_url: str,
739
- organization: str,
857
+ neo4j_session: neo4j.Session,
858
+ common_job_parameters: Dict[str, Any],
859
+ github_api_key: str,
860
+ github_url: str,
861
+ organization: str,
740
862
  ) -> None:
741
863
  """
742
864
  Performs the sequential tasks to collect, transform, and sync github data
@@ -753,14 +875,25 @@ def sync(
753
875
  outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
754
876
  try:
755
877
  direct_collabs = _get_repo_collaborators_for_multiple_repos(
756
- repos_json, "DIRECT", organization, github_url, github_api_key,
878
+ repos_json,
879
+ "DIRECT",
880
+ organization,
881
+ github_url,
882
+ github_api_key,
757
883
  )
758
884
  outside_collabs = _get_repo_collaborators_for_multiple_repos(
759
- repos_json, "OUTSIDE", organization, github_url, github_api_key,
885
+ repos_json,
886
+ "OUTSIDE",
887
+ organization,
888
+ github_url,
889
+ github_api_key,
760
890
  )
761
891
  except TypeError:
762
892
  # due to permission errors or transient network error or some other nonsense
763
- logger.warning('Unable to list repo collaborators due to permission errors; continuing on.', exc_info=True)
893
+ logger.warning(
894
+ "Unable to list repo collaborators due to permission errors; continuing on.",
895
+ exc_info=True,
896
+ )
764
897
  repo_data = transform(repos_json, direct_collabs, outside_collabs)
765
898
  load(neo4j_session, common_job_parameters, repo_data)
766
- run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
899
+ run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)