cartography 0.93.0rc1__py3-none-any.whl → 0.123.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (822) hide show
  1. cartography/__main__.py +1 -2
  2. cartography/_version.py +34 -0
  3. cartography/cli.py +903 -225
  4. cartography/client/aws/__init__.py +19 -0
  5. cartography/client/aws/ecr.py +51 -0
  6. cartography/client/core/tx.py +400 -27
  7. cartography/config.py +215 -10
  8. cartography/data/azure_permission_relationships.yaml +20 -0
  9. cartography/data/gcp_permission_relationships.yaml +21 -0
  10. cartography/data/indexes.cypher +1 -200
  11. cartography/data/jobs/analysis/aws_ec2_asset_exposure.json +17 -2
  12. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  13. cartography/data/jobs/analysis/gcp_compute_asset_inet_exposure.json +1 -1
  14. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  15. cartography/data/jobs/cleanup/crowdstrike_import_cleanup.json +0 -5
  16. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  17. cartography/data/jobs/cleanup/github_repos_cleanup.json +27 -0
  18. cartography/data/jobs/scoped_analysis/aws_ec2_iaminstanceprofile.json +15 -0
  19. cartography/data/jobs/scoped_analysis/semgrep_sca_risk_analysis.json +13 -13
  20. cartography/driftdetect/__main__.py +1 -2
  21. cartography/driftdetect/add_shortcut.py +10 -2
  22. cartography/driftdetect/cli.py +72 -75
  23. cartography/driftdetect/detect_deviations.py +7 -3
  24. cartography/driftdetect/get_states.py +20 -8
  25. cartography/driftdetect/model.py +5 -5
  26. cartography/driftdetect/serializers.py +8 -6
  27. cartography/driftdetect/storage.py +2 -2
  28. cartography/graph/cleanupbuilder.py +255 -35
  29. cartography/graph/job.py +104 -20
  30. cartography/graph/querybuilder.py +689 -91
  31. cartography/graph/statement.py +49 -36
  32. cartography/intel/airbyte/__init__.py +105 -0
  33. cartography/intel/airbyte/connections.py +120 -0
  34. cartography/intel/airbyte/destinations.py +81 -0
  35. cartography/intel/airbyte/organizations.py +59 -0
  36. cartography/intel/airbyte/sources.py +78 -0
  37. cartography/intel/airbyte/tags.py +64 -0
  38. cartography/intel/airbyte/users.py +106 -0
  39. cartography/intel/airbyte/util.py +122 -0
  40. cartography/intel/airbyte/workspaces.py +63 -0
  41. cartography/intel/analysis.py +4 -1
  42. cartography/intel/anthropic/__init__.py +62 -0
  43. cartography/intel/anthropic/apikeys.py +72 -0
  44. cartography/intel/anthropic/users.py +75 -0
  45. cartography/intel/anthropic/util.py +51 -0
  46. cartography/intel/anthropic/workspaces.py +95 -0
  47. cartography/intel/aws/__init__.py +137 -59
  48. cartography/intel/aws/acm.py +124 -0
  49. cartography/intel/aws/apigateway.py +482 -217
  50. cartography/intel/aws/apigatewayv2.py +116 -0
  51. cartography/intel/aws/cloudtrail.py +105 -0
  52. cartography/intel/aws/cloudtrail_management_events.py +962 -0
  53. cartography/intel/aws/cloudwatch.py +239 -0
  54. cartography/intel/aws/codebuild.py +132 -0
  55. cartography/intel/aws/cognito.py +201 -0
  56. cartography/intel/aws/config.py +63 -23
  57. cartography/intel/aws/dynamodb.py +108 -40
  58. cartography/intel/aws/ec2/__init__.py +2 -2
  59. cartography/intel/aws/ec2/auto_scaling_groups.py +254 -189
  60. cartography/intel/aws/ec2/elastic_ip_addresses.py +44 -14
  61. cartography/intel/aws/ec2/images.py +74 -39
  62. cartography/intel/aws/ec2/instances.py +262 -137
  63. cartography/intel/aws/ec2/internet_gateways.py +44 -13
  64. cartography/intel/aws/ec2/key_pairs.py +72 -39
  65. cartography/intel/aws/ec2/launch_templates.py +143 -66
  66. cartography/intel/aws/ec2/load_balancer_v2s.py +119 -45
  67. cartography/intel/aws/ec2/load_balancers.py +165 -147
  68. cartography/intel/aws/ec2/network_acls.py +233 -0
  69. cartography/intel/aws/ec2/network_interfaces.py +150 -87
  70. cartography/intel/aws/ec2/reserved_instances.py +48 -17
  71. cartography/intel/aws/ec2/route_tables.py +327 -0
  72. cartography/intel/aws/ec2/security_groups.py +189 -121
  73. cartography/intel/aws/ec2/snapshots.py +93 -91
  74. cartography/intel/aws/ec2/subnets.py +70 -58
  75. cartography/intel/aws/ec2/tgw.py +111 -39
  76. cartography/intel/aws/ec2/util.py +1 -1
  77. cartography/intel/aws/ec2/volumes.py +69 -41
  78. cartography/intel/aws/ec2/vpc.py +157 -116
  79. cartography/intel/aws/ec2/vpc_peerings.py +317 -121
  80. cartography/intel/aws/ecr.py +336 -93
  81. cartography/intel/aws/ecr_image_layers.py +923 -0
  82. cartography/intel/aws/ecs.py +310 -403
  83. cartography/intel/aws/efs.py +261 -0
  84. cartography/intel/aws/eks.py +55 -29
  85. cartography/intel/aws/elasticache.py +130 -83
  86. cartography/intel/aws/elasticsearch.py +70 -24
  87. cartography/intel/aws/emr.py +61 -23
  88. cartography/intel/aws/eventbridge.py +164 -0
  89. cartography/intel/aws/glue.py +181 -0
  90. cartography/intel/aws/guardduty.py +443 -0
  91. cartography/intel/aws/iam.py +978 -464
  92. cartography/intel/aws/iam_instance_profiles.py +73 -0
  93. cartography/intel/aws/identitycenter.py +847 -0
  94. cartography/intel/aws/inspector.py +330 -133
  95. cartography/intel/aws/kms.py +235 -209
  96. cartography/intel/aws/lambda_function.py +328 -176
  97. cartography/intel/aws/organizations.py +40 -19
  98. cartography/intel/aws/permission_relationships.py +144 -68
  99. cartography/intel/aws/rds.py +467 -412
  100. cartography/intel/aws/redshift.py +116 -50
  101. cartography/intel/aws/resourcegroupstaggingapi.py +198 -82
  102. cartography/intel/aws/resources.py +80 -42
  103. cartography/intel/aws/route53.py +419 -318
  104. cartography/intel/aws/s3.py +489 -96
  105. cartography/intel/aws/s3accountpublicaccessblock.py +157 -0
  106. cartography/intel/aws/secretsmanager.py +217 -40
  107. cartography/intel/aws/securityhub.py +23 -10
  108. cartography/intel/aws/sns.py +226 -0
  109. cartography/intel/aws/sqs.py +74 -96
  110. cartography/intel/aws/ssm.py +142 -33
  111. cartography/intel/aws/util/arns.py +7 -7
  112. cartography/intel/aws/util/common.py +31 -4
  113. cartography/intel/azure/__init__.py +259 -46
  114. cartography/intel/azure/aks.py +175 -0
  115. cartography/intel/azure/app_service.py +105 -0
  116. cartography/intel/azure/compute.py +141 -120
  117. cartography/intel/azure/container_instances.py +95 -0
  118. cartography/intel/azure/cosmosdb.py +706 -519
  119. cartography/intel/azure/data_factory.py +85 -0
  120. cartography/intel/azure/data_factory_dataset.py +128 -0
  121. cartography/intel/azure/data_factory_linked_service.py +119 -0
  122. cartography/intel/azure/data_factory_pipeline.py +142 -0
  123. cartography/intel/azure/data_lake.py +124 -0
  124. cartography/intel/azure/event_grid.py +94 -0
  125. cartography/intel/azure/functions.py +124 -0
  126. cartography/intel/azure/load_balancers.py +263 -0
  127. cartography/intel/azure/logic_apps.py +101 -0
  128. cartography/intel/azure/monitor.py +105 -0
  129. cartography/intel/azure/network.py +467 -0
  130. cartography/intel/azure/permission_relationships.py +466 -0
  131. cartography/intel/azure/rbac.py +309 -0
  132. cartography/intel/azure/resource_groups.py +82 -0
  133. cartography/intel/azure/security_center.py +106 -0
  134. cartography/intel/azure/sql.py +436 -392
  135. cartography/intel/azure/storage.py +467 -335
  136. cartography/intel/azure/subscription.py +49 -55
  137. cartography/intel/azure/tenant.py +46 -28
  138. cartography/intel/azure/util/common.py +13 -0
  139. cartography/intel/azure/util/credentials.py +58 -143
  140. cartography/intel/azure/util/tag.py +41 -0
  141. cartography/intel/bigfix/__init__.py +2 -2
  142. cartography/intel/bigfix/computers.py +93 -65
  143. cartography/intel/cloudflare/__init__.py +74 -0
  144. cartography/intel/cloudflare/accounts.py +57 -0
  145. cartography/intel/cloudflare/dnsrecords.py +64 -0
  146. cartography/intel/cloudflare/members.py +75 -0
  147. cartography/intel/cloudflare/roles.py +65 -0
  148. cartography/intel/cloudflare/zones.py +64 -0
  149. cartography/intel/create_indexes.py +5 -3
  150. cartography/intel/crowdstrike/__init__.py +26 -12
  151. cartography/intel/crowdstrike/endpoints.py +17 -45
  152. cartography/intel/crowdstrike/spotlight.py +13 -5
  153. cartography/intel/cve/__init__.py +91 -26
  154. cartography/intel/cve/feed.py +77 -56
  155. cartography/intel/digitalocean/__init__.py +22 -13
  156. cartography/intel/digitalocean/compute.py +75 -108
  157. cartography/intel/digitalocean/management.py +44 -80
  158. cartography/intel/digitalocean/platform.py +48 -43
  159. cartography/intel/dns.py +41 -12
  160. cartography/intel/duo/__init__.py +21 -16
  161. cartography/intel/duo/api_host.py +14 -9
  162. cartography/intel/duo/endpoints.py +50 -45
  163. cartography/intel/duo/groups.py +18 -14
  164. cartography/intel/duo/phones.py +37 -34
  165. cartography/intel/duo/tokens.py +26 -23
  166. cartography/intel/duo/users.py +54 -50
  167. cartography/intel/duo/web_authn_credentials.py +30 -25
  168. cartography/intel/entra/__init__.py +160 -0
  169. cartography/intel/entra/app_role_assignments.py +284 -0
  170. cartography/intel/entra/applications.py +182 -0
  171. cartography/intel/entra/federation/__init__.py +0 -0
  172. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  173. cartography/intel/entra/groups.py +198 -0
  174. cartography/intel/entra/ou.py +136 -0
  175. cartography/intel/entra/service_principals.py +217 -0
  176. cartography/intel/entra/users.py +259 -0
  177. cartography/intel/gcp/__init__.py +381 -175
  178. cartography/intel/gcp/bigtable_app_profile.py +101 -0
  179. cartography/intel/gcp/bigtable_backup.py +91 -0
  180. cartography/intel/gcp/bigtable_cluster.py +93 -0
  181. cartography/intel/gcp/bigtable_instance.py +86 -0
  182. cartography/intel/gcp/bigtable_table.py +87 -0
  183. cartography/intel/gcp/cai.py +292 -0
  184. cartography/intel/gcp/clients.py +112 -0
  185. cartography/intel/gcp/compute.py +521 -325
  186. cartography/intel/gcp/crm/__init__.py +0 -0
  187. cartography/intel/gcp/crm/folders.py +114 -0
  188. cartography/intel/gcp/crm/orgs.py +70 -0
  189. cartography/intel/gcp/crm/projects.py +120 -0
  190. cartography/intel/gcp/dns.py +134 -179
  191. cartography/intel/gcp/gke.py +100 -107
  192. cartography/intel/gcp/iam.py +262 -0
  193. cartography/intel/gcp/permission_relationships.py +394 -0
  194. cartography/intel/gcp/policy_bindings.py +225 -0
  195. cartography/intel/gcp/storage.py +103 -158
  196. cartography/intel/github/__init__.py +66 -27
  197. cartography/intel/github/commits.py +423 -0
  198. cartography/intel/github/repos.py +871 -160
  199. cartography/intel/github/teams.py +386 -53
  200. cartography/intel/github/users.py +214 -49
  201. cartography/intel/github/util.py +50 -35
  202. cartography/intel/googleworkspace/__init__.py +193 -0
  203. cartography/intel/googleworkspace/devices.py +254 -0
  204. cartography/intel/googleworkspace/groups.py +568 -0
  205. cartography/intel/googleworkspace/oauth_apps.py +259 -0
  206. cartography/intel/googleworkspace/tenant.py +85 -0
  207. cartography/intel/googleworkspace/users.py +138 -0
  208. cartography/intel/gsuite/__init__.py +101 -42
  209. cartography/intel/gsuite/groups.py +291 -0
  210. cartography/intel/gsuite/users.py +142 -0
  211. cartography/intel/jamf/__init__.py +19 -1
  212. cartography/intel/jamf/computers.py +37 -8
  213. cartography/intel/jamf/util.py +7 -2
  214. cartography/intel/kandji/__init__.py +6 -3
  215. cartography/intel/kandji/devices.py +40 -10
  216. cartography/intel/keycloak/__init__.py +153 -0
  217. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  218. cartography/intel/keycloak/authenticationflows.py +77 -0
  219. cartography/intel/keycloak/clients.py +187 -0
  220. cartography/intel/keycloak/groups.py +126 -0
  221. cartography/intel/keycloak/identityproviders.py +94 -0
  222. cartography/intel/keycloak/organizations.py +163 -0
  223. cartography/intel/keycloak/realms.py +61 -0
  224. cartography/intel/keycloak/roles.py +202 -0
  225. cartography/intel/keycloak/scopes.py +73 -0
  226. cartography/intel/keycloak/users.py +70 -0
  227. cartography/intel/keycloak/util.py +47 -0
  228. cartography/intel/kubernetes/__init__.py +60 -14
  229. cartography/intel/kubernetes/clusters.py +86 -0
  230. cartography/intel/kubernetes/eks.py +402 -0
  231. cartography/intel/kubernetes/namespaces.py +60 -55
  232. cartography/intel/kubernetes/pods.py +171 -75
  233. cartography/intel/kubernetes/rbac.py +597 -0
  234. cartography/intel/kubernetes/secrets.py +95 -45
  235. cartography/intel/kubernetes/services.py +131 -63
  236. cartography/intel/kubernetes/util.py +142 -14
  237. cartography/intel/lastpass/__init__.py +2 -2
  238. cartography/intel/lastpass/users.py +23 -12
  239. cartography/intel/oci/__init__.py +44 -11
  240. cartography/intel/oci/iam.py +157 -47
  241. cartography/intel/oci/organizations.py +16 -7
  242. cartography/intel/oci/utils.py +71 -25
  243. cartography/intel/okta/__init__.py +66 -15
  244. cartography/intel/okta/applications.py +57 -25
  245. cartography/intel/okta/awssaml.py +105 -41
  246. cartography/intel/okta/factors.py +19 -5
  247. cartography/intel/okta/groups.py +61 -31
  248. cartography/intel/okta/organization.py +8 -2
  249. cartography/intel/okta/origins.py +9 -3
  250. cartography/intel/okta/roles.py +20 -7
  251. cartography/intel/okta/users.py +31 -10
  252. cartography/intel/okta/utils.py +6 -4
  253. cartography/intel/ontology/__init__.py +44 -0
  254. cartography/intel/ontology/devices.py +54 -0
  255. cartography/intel/ontology/users.py +54 -0
  256. cartography/intel/ontology/utils.py +176 -0
  257. cartography/intel/openai/__init__.py +86 -0
  258. cartography/intel/openai/adminapikeys.py +89 -0
  259. cartography/intel/openai/apikeys.py +96 -0
  260. cartography/intel/openai/projects.py +97 -0
  261. cartography/intel/openai/serviceaccounts.py +82 -0
  262. cartography/intel/openai/users.py +75 -0
  263. cartography/intel/openai/util.py +45 -0
  264. cartography/intel/pagerduty/__init__.py +8 -7
  265. cartography/intel/pagerduty/escalation_policies.py +31 -12
  266. cartography/intel/pagerduty/schedules.py +21 -8
  267. cartography/intel/pagerduty/services.py +18 -7
  268. cartography/intel/pagerduty/teams.py +13 -5
  269. cartography/intel/pagerduty/users.py +6 -2
  270. cartography/intel/pagerduty/vendors.py +6 -2
  271. cartography/intel/scaleway/__init__.py +127 -0
  272. cartography/intel/scaleway/iam/__init__.py +0 -0
  273. cartography/intel/scaleway/iam/apikeys.py +71 -0
  274. cartography/intel/scaleway/iam/applications.py +71 -0
  275. cartography/intel/scaleway/iam/groups.py +71 -0
  276. cartography/intel/scaleway/iam/users.py +71 -0
  277. cartography/intel/scaleway/instances/__init__.py +0 -0
  278. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  279. cartography/intel/scaleway/instances/instances.py +92 -0
  280. cartography/intel/scaleway/projects.py +79 -0
  281. cartography/intel/scaleway/storage/__init__.py +0 -0
  282. cartography/intel/scaleway/storage/snapshots.py +86 -0
  283. cartography/intel/scaleway/storage/volumes.py +84 -0
  284. cartography/intel/scaleway/utils.py +37 -0
  285. cartography/intel/semgrep/__init__.py +30 -5
  286. cartography/intel/semgrep/dependencies.py +255 -0
  287. cartography/intel/semgrep/deployment.py +69 -0
  288. cartography/intel/semgrep/findings.py +157 -117
  289. cartography/intel/sentinelone/__init__.py +75 -0
  290. cartography/intel/sentinelone/account.py +140 -0
  291. cartography/intel/sentinelone/agent.py +139 -0
  292. cartography/intel/sentinelone/api.py +124 -0
  293. cartography/intel/sentinelone/application.py +248 -0
  294. cartography/intel/sentinelone/cve.py +119 -0
  295. cartography/intel/sentinelone/utils.py +28 -0
  296. cartography/intel/slack/__init__.py +78 -0
  297. cartography/intel/slack/channels.py +80 -0
  298. cartography/intel/slack/groups.py +90 -0
  299. cartography/intel/slack/teams.py +65 -0
  300. cartography/intel/slack/users.py +57 -0
  301. cartography/intel/slack/utils.py +29 -0
  302. cartography/intel/snipeit/__init__.py +44 -0
  303. cartography/intel/snipeit/asset.py +80 -0
  304. cartography/intel/snipeit/user.py +78 -0
  305. cartography/intel/snipeit/util.py +40 -0
  306. cartography/intel/spacelift/__init__.py +161 -0
  307. cartography/intel/spacelift/account.py +73 -0
  308. cartography/intel/spacelift/ec2_ownership.py +280 -0
  309. cartography/intel/spacelift/runs.py +463 -0
  310. cartography/intel/spacelift/spaces.py +112 -0
  311. cartography/intel/spacelift/stacks.py +119 -0
  312. cartography/intel/spacelift/util.py +122 -0
  313. cartography/intel/spacelift/workerpools.py +131 -0
  314. cartography/intel/spacelift/workers.py +128 -0
  315. cartography/intel/tailscale/__init__.py +77 -0
  316. cartography/intel/tailscale/acls.py +146 -0
  317. cartography/intel/tailscale/devices.py +127 -0
  318. cartography/intel/tailscale/postureintegrations.py +81 -0
  319. cartography/intel/tailscale/tailnets.py +76 -0
  320. cartography/intel/tailscale/users.py +80 -0
  321. cartography/intel/tailscale/utils.py +132 -0
  322. cartography/intel/trivy/__init__.py +272 -0
  323. cartography/intel/trivy/scanner.py +386 -0
  324. cartography/models/airbyte/__init__.py +0 -0
  325. cartography/models/airbyte/connection.py +138 -0
  326. cartography/models/airbyte/destination.py +75 -0
  327. cartography/models/airbyte/organization.py +19 -0
  328. cartography/models/airbyte/source.py +75 -0
  329. cartography/models/airbyte/stream.py +74 -0
  330. cartography/models/airbyte/tag.py +69 -0
  331. cartography/models/airbyte/user.py +115 -0
  332. cartography/models/airbyte/workspace.py +46 -0
  333. cartography/models/anthropic/__init__.py +0 -0
  334. cartography/models/anthropic/apikey.py +94 -0
  335. cartography/models/anthropic/organization.py +19 -0
  336. cartography/models/anthropic/user.py +52 -0
  337. cartography/models/anthropic/workspace.py +90 -0
  338. cartography/models/aws/acm/__init__.py +0 -0
  339. cartography/models/aws/acm/certificate.py +75 -0
  340. cartography/models/aws/apigateway/__init__.py +0 -0
  341. cartography/models/aws/apigateway/apigateway.py +51 -0
  342. cartography/models/aws/apigateway/apigatewaycertificate.py +72 -0
  343. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  344. cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
  345. cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
  346. cartography/models/aws/apigateway/apigatewayresource.py +70 -0
  347. cartography/models/aws/apigateway/apigatewaystage.py +75 -0
  348. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  349. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  350. cartography/models/aws/cloudtrail/__init__.py +0 -0
  351. cartography/models/aws/cloudtrail/management_events.py +153 -0
  352. cartography/models/aws/cloudtrail/trail.py +106 -0
  353. cartography/models/aws/cloudwatch/__init__.py +0 -0
  354. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  355. cartography/models/aws/cloudwatch/loggroup.py +52 -0
  356. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  357. cartography/models/aws/codebuild/__init__.py +0 -0
  358. cartography/models/aws/codebuild/project.py +49 -0
  359. cartography/models/aws/cognito/__init__.py +0 -0
  360. cartography/models/aws/cognito/identity_pool.py +70 -0
  361. cartography/models/aws/cognito/user_pool.py +47 -0
  362. cartography/models/aws/dynamodb/gsi.py +30 -22
  363. cartography/models/aws/dynamodb/tables.py +27 -17
  364. cartography/models/aws/ec2/auto_scaling_groups.py +224 -0
  365. cartography/models/aws/ec2/images.py +36 -34
  366. cartography/models/aws/ec2/instances.py +85 -38
  367. cartography/models/aws/ec2/keypair.py +59 -0
  368. cartography/models/aws/ec2/keypair_instance.py +76 -0
  369. cartography/models/aws/ec2/launch_configurations.py +59 -0
  370. cartography/models/aws/ec2/launch_template_versions.py +48 -38
  371. cartography/models/aws/ec2/launch_templates.py +21 -17
  372. cartography/models/aws/ec2/load_balancer_listeners.py +72 -0
  373. cartography/models/aws/ec2/load_balancers.py +112 -0
  374. cartography/models/aws/ec2/network_acl_rules.py +106 -0
  375. cartography/models/aws/ec2/network_acls.py +95 -0
  376. cartography/models/aws/ec2/networkinterface_instance.py +52 -39
  377. cartography/models/aws/ec2/networkinterfaces.py +57 -37
  378. cartography/models/aws/ec2/privateip_networkinterface.py +32 -22
  379. cartography/models/aws/ec2/reservations.py +18 -14
  380. cartography/models/aws/ec2/route_table_associations.py +97 -0
  381. cartography/models/aws/ec2/route_tables.py +128 -0
  382. cartography/models/aws/ec2/routes.py +85 -0
  383. cartography/models/aws/ec2/security_group_rules.py +109 -0
  384. cartography/models/aws/ec2/security_groups.py +90 -0
  385. cartography/models/aws/ec2/securitygroup_instance.py +29 -20
  386. cartography/models/aws/ec2/securitygroup_networkinterface.py +24 -15
  387. cartography/models/aws/ec2/snapshots.py +58 -0
  388. cartography/models/aws/ec2/subnet_instance.py +26 -19
  389. cartography/models/aws/ec2/subnet_networkinterface.py +42 -31
  390. cartography/models/aws/ec2/subnets.py +65 -0
  391. cartography/models/aws/ec2/volumes.py +67 -40
  392. cartography/models/aws/ec2/vpc.py +46 -0
  393. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  394. cartography/models/aws/ec2/vpc_peering.py +157 -0
  395. cartography/models/aws/ecr/__init__.py +0 -0
  396. cartography/models/aws/ecr/image.py +146 -0
  397. cartography/models/aws/ecr/image_layer.py +107 -0
  398. cartography/models/aws/ecr/repository.py +72 -0
  399. cartography/models/aws/ecr/repository_image.py +95 -0
  400. cartography/models/aws/ecs/__init__.py +0 -0
  401. cartography/models/aws/ecs/clusters.py +64 -0
  402. cartography/models/aws/ecs/container_definitions.py +93 -0
  403. cartography/models/aws/ecs/container_instances.py +84 -0
  404. cartography/models/aws/ecs/containers.py +101 -0
  405. cartography/models/aws/ecs/services.py +134 -0
  406. cartography/models/aws/ecs/task_definitions.py +135 -0
  407. cartography/models/aws/ecs/tasks.py +134 -0
  408. cartography/models/aws/efs/__init__.py +0 -0
  409. cartography/models/aws/efs/access_point.py +77 -0
  410. cartography/models/aws/efs/file_system.py +60 -0
  411. cartography/models/aws/efs/mount_target.py +79 -0
  412. cartography/models/aws/eks/clusters.py +23 -21
  413. cartography/models/aws/elasticache/__init__.py +0 -0
  414. cartography/models/aws/elasticache/cluster.py +65 -0
  415. cartography/models/aws/elasticache/topic.py +67 -0
  416. cartography/models/aws/emr.py +32 -30
  417. cartography/models/aws/eventbridge/__init__.py +0 -0
  418. cartography/models/aws/eventbridge/rule.py +77 -0
  419. cartography/models/aws/eventbridge/target.py +71 -0
  420. cartography/models/aws/glue/__init__.py +0 -0
  421. cartography/models/aws/glue/connection.py +51 -0
  422. cartography/models/aws/glue/job.py +69 -0
  423. cartography/models/aws/guardduty/__init__.py +1 -0
  424. cartography/models/aws/guardduty/detectors.py +50 -0
  425. cartography/models/aws/guardduty/findings.py +121 -0
  426. cartography/models/aws/iam/__init__.py +0 -0
  427. cartography/models/aws/iam/access_key.py +103 -0
  428. cartography/models/aws/iam/account_role.py +24 -0
  429. cartography/models/aws/iam/federated_principal.py +60 -0
  430. cartography/models/aws/iam/group.py +60 -0
  431. cartography/models/aws/iam/group_membership.py +27 -0
  432. cartography/models/aws/iam/inline_policy.py +78 -0
  433. cartography/models/aws/iam/instanceprofile.py +76 -0
  434. cartography/models/aws/iam/managed_policy.py +51 -0
  435. cartography/models/aws/iam/policy_statement.py +57 -0
  436. cartography/models/aws/iam/role.py +83 -0
  437. cartography/models/aws/iam/root_principal.py +52 -0
  438. cartography/models/aws/iam/service_principal.py +30 -0
  439. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  440. cartography/models/aws/iam/user.py +59 -0
  441. cartography/models/aws/identitycenter/__init__.py +0 -0
  442. cartography/models/aws/identitycenter/awsidentitycenter.py +49 -0
  443. cartography/models/aws/identitycenter/awspermissionset.py +162 -0
  444. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  445. cartography/models/aws/identitycenter/awsssouser.py +110 -0
  446. cartography/models/aws/inspector/findings.py +124 -58
  447. cartography/models/aws/inspector/packages.py +18 -42
  448. cartography/models/aws/kms/__init__.py +0 -0
  449. cartography/models/aws/kms/aliases.py +86 -0
  450. cartography/models/aws/kms/grants.py +65 -0
  451. cartography/models/aws/kms/keys.py +88 -0
  452. cartography/models/aws/lambda_function/__init__.py +0 -0
  453. cartography/models/aws/lambda_function/alias.py +74 -0
  454. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  455. cartography/models/aws/lambda_function/lambda_function.py +91 -0
  456. cartography/models/aws/lambda_function/layer.py +72 -0
  457. cartography/models/aws/rds/__init__.py +0 -0
  458. cartography/models/aws/rds/cluster.py +91 -0
  459. cartography/models/aws/rds/event_subscription.py +146 -0
  460. cartography/models/aws/rds/instance.py +156 -0
  461. cartography/models/aws/rds/snapshot.py +108 -0
  462. cartography/models/aws/rds/subnet_group.py +101 -0
  463. cartography/models/aws/route53/__init__.py +0 -0
  464. cartography/models/aws/route53/dnsrecord.py +235 -0
  465. cartography/models/aws/route53/nameserver.py +63 -0
  466. cartography/models/aws/route53/subzone.py +40 -0
  467. cartography/models/aws/route53/zone.py +47 -0
  468. cartography/models/aws/s3/__init__.py +0 -0
  469. cartography/models/aws/s3/account_public_access_block.py +51 -0
  470. cartography/models/aws/s3/notification.py +24 -0
  471. cartography/models/aws/secretsmanager/__init__.py +0 -0
  472. cartography/models/aws/secretsmanager/secret.py +106 -0
  473. cartography/models/aws/secretsmanager/secret_version.py +114 -0
  474. cartography/models/aws/sns/__init__.py +0 -0
  475. cartography/models/aws/sns/topic.py +50 -0
  476. cartography/models/aws/sns/topic_subscription.py +74 -0
  477. cartography/models/aws/sqs/__init__.py +0 -0
  478. cartography/models/aws/sqs/queue.py +89 -0
  479. cartography/models/aws/ssm/instance_information.py +51 -39
  480. cartography/models/aws/ssm/instance_patch.py +32 -26
  481. cartography/models/aws/ssm/parameters.py +84 -0
  482. cartography/models/azure/__init__.py +0 -0
  483. cartography/models/azure/aks_cluster.py +54 -0
  484. cartography/models/azure/aks_nodepool.py +54 -0
  485. cartography/models/azure/app_service.py +59 -0
  486. cartography/models/azure/container_instance.py +57 -0
  487. cartography/models/azure/cosmosdb/__init__.py +0 -0
  488. cartography/models/azure/cosmosdb/account.py +77 -0
  489. cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
  490. cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
  491. cartography/models/azure/cosmosdb/cassandratable.py +81 -0
  492. cartography/models/azure/cosmosdb/corspolicy.py +74 -0
  493. cartography/models/azure/cosmosdb/dblocation.py +120 -0
  494. cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
  495. cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
  496. cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
  497. cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
  498. cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
  499. cartography/models/azure/cosmosdb/tableresource.py +76 -0
  500. cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
  501. cartography/models/azure/data_factory/__init__.py +0 -0
  502. cartography/models/azure/data_factory/data_factory.py +51 -0
  503. cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
  504. cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
  505. cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
  506. cartography/models/azure/data_lake_filesystem.py +51 -0
  507. cartography/models/azure/event_grid_topic.py +57 -0
  508. cartography/models/azure/function_app.py +59 -0
  509. cartography/models/azure/load_balancer/__init__.py +0 -0
  510. cartography/models/azure/load_balancer/load_balancer.py +49 -0
  511. cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
  512. cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
  513. cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
  514. cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
  515. cartography/models/azure/logic_apps.py +56 -0
  516. cartography/models/azure/monitor.py +54 -0
  517. cartography/models/azure/network_interface.py +112 -0
  518. cartography/models/azure/network_security_group.py +50 -0
  519. cartography/models/azure/permission_relationships.py +60 -0
  520. cartography/models/azure/principal.py +41 -0
  521. cartography/models/azure/public_ip_address.py +50 -0
  522. cartography/models/azure/rbac.py +268 -0
  523. cartography/models/azure/resource_groups.py +52 -0
  524. cartography/models/azure/security_center.py +50 -0
  525. cartography/models/azure/sql/__init__.py +0 -0
  526. cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
  527. cartography/models/azure/sql/elasticpool.py +77 -0
  528. cartography/models/azure/sql/failovergroup.py +73 -0
  529. cartography/models/azure/sql/recoverabledatabase.py +75 -0
  530. cartography/models/azure/sql/replicationlink.py +81 -0
  531. cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
  532. cartography/models/azure/sql/restorepoint.py +74 -0
  533. cartography/models/azure/sql/serveradadministrator.py +74 -0
  534. cartography/models/azure/sql/serverdnsalias.py +71 -0
  535. cartography/models/azure/sql/sqldatabase.py +85 -0
  536. cartography/models/azure/sql/sqlserver.py +50 -0
  537. cartography/models/azure/sql/transparentdataencryption.py +76 -0
  538. cartography/models/azure/storage/__init__.py +0 -0
  539. cartography/models/azure/storage/account.py +59 -0
  540. cartography/models/azure/storage/blobcontainer.py +85 -0
  541. cartography/models/azure/storage/blobservice.py +71 -0
  542. cartography/models/azure/storage/fileservice.py +71 -0
  543. cartography/models/azure/storage/fileshare.py +82 -0
  544. cartography/models/azure/storage/queue.py +71 -0
  545. cartography/models/azure/storage/queueservice.py +73 -0
  546. cartography/models/azure/storage/table.py +72 -0
  547. cartography/models/azure/storage/tableservice.py +73 -0
  548. cartography/models/azure/subnet.py +101 -0
  549. cartography/models/azure/subscription.py +47 -0
  550. cartography/models/azure/tags/__init__.py +0 -0
  551. cartography/models/azure/tags/storage_tag.py +40 -0
  552. cartography/models/azure/tags/tag.py +37 -0
  553. cartography/models/azure/tenant.py +17 -0
  554. cartography/models/azure/virtual_network.py +49 -0
  555. cartography/models/azure/vm/__init__.py +0 -0
  556. cartography/models/azure/vm/datadisk.py +80 -0
  557. cartography/models/azure/vm/disk.py +55 -0
  558. cartography/models/azure/vm/snapshot.py +56 -0
  559. cartography/models/azure/vm/virtualmachine.py +59 -0
  560. cartography/models/bigfix/bigfix_computer.py +42 -38
  561. cartography/models/bigfix/bigfix_root.py +3 -3
  562. cartography/models/cloudflare/__init__.py +0 -0
  563. cartography/models/cloudflare/account.py +25 -0
  564. cartography/models/cloudflare/dnsrecord.py +55 -0
  565. cartography/models/cloudflare/member.py +86 -0
  566. cartography/models/cloudflare/role.py +44 -0
  567. cartography/models/cloudflare/zone.py +59 -0
  568. cartography/models/core/common.py +53 -2
  569. cartography/models/core/nodes.py +20 -4
  570. cartography/models/core/relationships.py +58 -6
  571. cartography/models/crowdstrike/__init__.py +0 -0
  572. cartography/models/crowdstrike/hosts.py +51 -0
  573. cartography/models/cve/cve.py +34 -32
  574. cartography/models/cve/cve_feed.py +6 -6
  575. cartography/models/digitalocean/__init__.py +0 -0
  576. cartography/models/digitalocean/account.py +21 -0
  577. cartography/models/digitalocean/droplet.py +58 -0
  578. cartography/models/digitalocean/project.py +48 -0
  579. cartography/models/duo/api_host.py +3 -3
  580. cartography/models/duo/endpoint.py +43 -41
  581. cartography/models/duo/group.py +14 -14
  582. cartography/models/duo/phone.py +27 -27
  583. cartography/models/duo/token.py +16 -16
  584. cartography/models/duo/user.py +50 -44
  585. cartography/models/duo/web_authn_credential.py +27 -19
  586. cartography/models/entra/__init__.py +0 -0
  587. cartography/models/entra/app_role_assignment.py +115 -0
  588. cartography/models/entra/application.py +49 -0
  589. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  590. cartography/models/entra/group.py +117 -0
  591. cartography/models/entra/ou.py +48 -0
  592. cartography/models/entra/service_principal.py +104 -0
  593. cartography/models/entra/tenant.py +39 -0
  594. cartography/models/entra/user.py +90 -0
  595. cartography/models/gcp/__init__.py +0 -0
  596. cartography/models/gcp/bigtable/__init__.py +0 -0
  597. cartography/models/gcp/bigtable/app_profile.py +94 -0
  598. cartography/models/gcp/bigtable/backup.py +91 -0
  599. cartography/models/gcp/bigtable/cluster.py +73 -0
  600. cartography/models/gcp/bigtable/instance.py +52 -0
  601. cartography/models/gcp/bigtable/table.py +69 -0
  602. cartography/models/gcp/compute/__init__.py +0 -0
  603. cartography/models/gcp/compute/subnet.py +74 -0
  604. cartography/models/gcp/compute/vpc.py +50 -0
  605. cartography/models/gcp/crm/__init__.py +0 -0
  606. cartography/models/gcp/crm/folders.py +98 -0
  607. cartography/models/gcp/crm/organizations.py +21 -0
  608. cartography/models/gcp/crm/projects.py +100 -0
  609. cartography/models/gcp/dns.py +109 -0
  610. cartography/models/gcp/gke.py +69 -0
  611. cartography/models/gcp/iam.py +73 -0
  612. cartography/models/gcp/permission_relationships.py +61 -0
  613. cartography/models/gcp/policy_bindings.py +93 -0
  614. cartography/models/gcp/storage/__init__.py +0 -0
  615. cartography/models/gcp/storage/bucket.py +119 -0
  616. cartography/models/github/commits.py +63 -0
  617. cartography/models/github/dependencies.py +73 -0
  618. cartography/models/github/manifests.py +49 -0
  619. cartography/models/github/orgs.py +27 -0
  620. cartography/models/github/teams.py +74 -22
  621. cartography/models/github/users.py +149 -0
  622. cartography/models/googleworkspace/__init__.py +0 -0
  623. cartography/models/googleworkspace/device.py +132 -0
  624. cartography/models/googleworkspace/group.py +382 -0
  625. cartography/models/googleworkspace/oauth_app.py +124 -0
  626. cartography/models/googleworkspace/tenant.py +30 -0
  627. cartography/models/googleworkspace/user.py +113 -0
  628. cartography/models/gsuite/__init__.py +0 -0
  629. cartography/models/gsuite/group.py +218 -0
  630. cartography/models/gsuite/tenant.py +29 -0
  631. cartography/models/gsuite/user.py +107 -0
  632. cartography/models/kandji/device.py +22 -17
  633. cartography/models/kandji/tenant.py +6 -4
  634. cartography/models/keycloak/__init__.py +0 -0
  635. cartography/models/keycloak/authenticationexecution.py +160 -0
  636. cartography/models/keycloak/authenticationflow.py +54 -0
  637. cartography/models/keycloak/client.py +179 -0
  638. cartography/models/keycloak/group.py +101 -0
  639. cartography/models/keycloak/identityprovider.py +89 -0
  640. cartography/models/keycloak/organization.py +116 -0
  641. cartography/models/keycloak/organizationdomain.py +73 -0
  642. cartography/models/keycloak/realm.py +173 -0
  643. cartography/models/keycloak/role.py +126 -0
  644. cartography/models/keycloak/scope.py +73 -0
  645. cartography/models/keycloak/user.py +55 -0
  646. cartography/models/kubernetes/__init__.py +0 -0
  647. cartography/models/kubernetes/clusterrolebindings.py +138 -0
  648. cartography/models/kubernetes/clusterroles.py +52 -0
  649. cartography/models/kubernetes/clusters.py +26 -0
  650. cartography/models/kubernetes/containers.py +133 -0
  651. cartography/models/kubernetes/groups.py +107 -0
  652. cartography/models/kubernetes/namespaces.py +51 -0
  653. cartography/models/kubernetes/oidc.py +51 -0
  654. cartography/models/kubernetes/pods.py +80 -0
  655. cartography/models/kubernetes/rolebindings.py +159 -0
  656. cartography/models/kubernetes/roles.py +76 -0
  657. cartography/models/kubernetes/secrets.py +79 -0
  658. cartography/models/kubernetes/serviceaccounts.py +77 -0
  659. cartography/models/kubernetes/services.py +108 -0
  660. cartography/models/kubernetes/users.py +105 -0
  661. cartography/models/lastpass/tenant.py +3 -3
  662. cartography/models/lastpass/user.py +36 -28
  663. cartography/models/ontology/__init__.py +0 -0
  664. cartography/models/ontology/device.py +137 -0
  665. cartography/models/ontology/mapping/__init__.py +76 -0
  666. cartography/models/ontology/mapping/data/__init__.py +0 -0
  667. cartography/models/ontology/mapping/data/apikeys.py +93 -0
  668. cartography/models/ontology/mapping/data/computeinstance.py +95 -0
  669. cartography/models/ontology/mapping/data/containers.py +88 -0
  670. cartography/models/ontology/mapping/data/databases.py +182 -0
  671. cartography/models/ontology/mapping/data/devices.py +194 -0
  672. cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
  673. cartography/models/ontology/mapping/data/useraccounts.py +416 -0
  674. cartography/models/ontology/mapping/data/users.py +63 -0
  675. cartography/models/ontology/mapping/specs.py +85 -0
  676. cartography/models/ontology/user.py +51 -0
  677. cartography/models/openai/__init__.py +0 -0
  678. cartography/models/openai/adminapikey.py +94 -0
  679. cartography/models/openai/apikey.py +88 -0
  680. cartography/models/openai/organization.py +17 -0
  681. cartography/models/openai/project.py +89 -0
  682. cartography/models/openai/serviceaccount.py +50 -0
  683. cartography/models/openai/user.py +53 -0
  684. cartography/models/scaleway/__init__.py +0 -0
  685. cartography/models/scaleway/iam/__init__.py +0 -0
  686. cartography/models/scaleway/iam/apikey.py +100 -0
  687. cartography/models/scaleway/iam/application.py +52 -0
  688. cartography/models/scaleway/iam/group.py +95 -0
  689. cartography/models/scaleway/iam/user.py +64 -0
  690. cartography/models/scaleway/instance/__init__.py +0 -0
  691. cartography/models/scaleway/instance/flexibleip.py +52 -0
  692. cartography/models/scaleway/instance/instance.py +120 -0
  693. cartography/models/scaleway/organization.py +19 -0
  694. cartography/models/scaleway/project.py +48 -0
  695. cartography/models/scaleway/storage/__init__.py +0 -0
  696. cartography/models/scaleway/storage/snapshot.py +78 -0
  697. cartography/models/scaleway/storage/volume.py +51 -0
  698. cartography/models/semgrep/dependencies.py +102 -0
  699. cartography/models/semgrep/deployment.py +5 -5
  700. cartography/models/semgrep/findings.py +58 -40
  701. cartography/models/semgrep/locations.py +27 -21
  702. cartography/models/sentinelone/__init__.py +1 -0
  703. cartography/models/sentinelone/account.py +40 -0
  704. cartography/models/sentinelone/agent.py +50 -0
  705. cartography/models/sentinelone/application.py +44 -0
  706. cartography/models/sentinelone/application_version.py +96 -0
  707. cartography/models/sentinelone/cve.py +73 -0
  708. cartography/models/slack/__init__.py +0 -0
  709. cartography/models/slack/channels.py +92 -0
  710. cartography/models/slack/group.py +129 -0
  711. cartography/models/slack/team.py +22 -0
  712. cartography/models/slack/user.py +62 -0
  713. cartography/models/snipeit/__init__.py +0 -0
  714. cartography/models/snipeit/asset.py +92 -0
  715. cartography/models/snipeit/tenant.py +19 -0
  716. cartography/models/snipeit/user.py +60 -0
  717. cartography/models/spacelift/__init__.py +0 -0
  718. cartography/models/spacelift/cloudtrailevent.py +120 -0
  719. cartography/models/spacelift/run.py +162 -0
  720. cartography/models/spacelift/space.py +131 -0
  721. cartography/models/spacelift/spaceliftaccount.py +31 -0
  722. cartography/models/spacelift/spaceliftgitcommit.py +157 -0
  723. cartography/models/spacelift/stack.py +96 -0
  724. cartography/models/spacelift/user.py +63 -0
  725. cartography/models/spacelift/worker.py +97 -0
  726. cartography/models/spacelift/workerpool.py +90 -0
  727. cartography/models/tailscale/__init__.py +0 -0
  728. cartography/models/tailscale/device.py +96 -0
  729. cartography/models/tailscale/group.py +86 -0
  730. cartography/models/tailscale/postureintegration.py +58 -0
  731. cartography/models/tailscale/tag.py +102 -0
  732. cartography/models/tailscale/tailnet.py +29 -0
  733. cartography/models/tailscale/user.py +57 -0
  734. cartography/models/trivy/__init__.py +0 -0
  735. cartography/models/trivy/findings.py +66 -0
  736. cartography/models/trivy/fix.py +66 -0
  737. cartography/models/trivy/package.py +71 -0
  738. cartography/rules/README.md +1 -0
  739. cartography/rules/__init__.py +0 -0
  740. cartography/rules/cli.py +261 -0
  741. cartography/rules/data/__init__.py +0 -0
  742. cartography/rules/data/rules/__init__.py +46 -0
  743. cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
  744. cartography/rules/data/rules/compute_instance_exposed.py +51 -0
  745. cartography/rules/data/rules/database_instance_exposed.py +53 -0
  746. cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
  747. cartography/rules/data/rules/identity_administration_privileges.py +100 -0
  748. cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
  749. cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
  750. cartography/rules/data/rules/mfa_missing.py +46 -0
  751. cartography/rules/data/rules/object_storage_public.py +100 -0
  752. cartography/rules/data/rules/policy_administration_privileges.py +104 -0
  753. cartography/rules/data/rules/unmanaged_accounts.py +43 -0
  754. cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
  755. cartography/rules/formatters.py +108 -0
  756. cartography/rules/runners.py +216 -0
  757. cartography/rules/spec/__init__.py +0 -0
  758. cartography/rules/spec/model.py +267 -0
  759. cartography/rules/spec/result.py +38 -0
  760. cartography/stats.py +4 -4
  761. cartography/sync.py +137 -31
  762. cartography/util.py +187 -77
  763. cartography-0.123.0.dist-info/METADATA +230 -0
  764. cartography-0.123.0.dist-info/RECORD +856 -0
  765. {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/WHEEL +1 -1
  766. {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
  767. {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info/licenses}/LICENSE +1 -1
  768. cartography/data/jobs/analysis/aws_ec2_iaminstance.json +0 -10
  769. cartography/data/jobs/analysis/aws_ec2_iaminstanceprofile.json +0 -10
  770. cartography/data/jobs/cleanup/aws_apigateway_details.json +0 -10
  771. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  772. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  773. cartography/data/jobs/cleanup/aws_import_apigateway_cleanup.json +0 -45
  774. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  775. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  776. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  777. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  778. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  779. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  780. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  781. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  782. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  783. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  784. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  785. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  786. cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
  787. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  788. cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
  789. cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
  790. cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
  791. cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
  792. cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
  793. cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
  794. cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
  795. cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
  796. cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
  797. cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
  798. cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
  799. cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
  800. cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
  801. cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
  802. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  803. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  804. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  805. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  806. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  807. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  808. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  809. cartography/data/jobs/cleanup/github_users_cleanup.json +0 -23
  810. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  811. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  812. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  813. cartography/intel/crxcavator/__init__.py +0 -44
  814. cartography/intel/crxcavator/crxcavator.py +0 -329
  815. cartography/intel/gcp/crm.py +0 -302
  816. cartography/intel/gsuite/api.py +0 -284
  817. cartography/models/aws/ec2/keypairs.py +0 -64
  818. cartography-0.93.0rc1.dist-info/METADATA +0 -55
  819. cartography-0.93.0rc1.dist-info/NOTICE +0 -4
  820. cartography-0.93.0rc1.dist-info/RECORD +0 -341
  821. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  822. {cartography-0.93.0rc1.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,10 @@
1
1
  import configparser
2
2
  import logging
3
+ from collections import defaultdict
4
+ from collections import namedtuple
3
5
  from string import Template
4
6
  from typing import Any
7
+ from typing import cast
5
8
  from typing import Dict
6
9
  from typing import List
7
10
  from typing import Optional
@@ -11,19 +14,41 @@ from packaging.requirements import InvalidRequirement
11
14
  from packaging.requirements import Requirement
12
15
  from packaging.utils import canonicalize_name
13
16
 
17
+ from cartography.client.core.tx import execute_write_with_retry
18
+ from cartography.client.core.tx import load as load_data
19
+ from cartography.graph.job import GraphJob
14
20
  from cartography.intel.github.util import fetch_all
21
+ from cartography.intel.github.util import PaginatedGraphqlData
22
+ from cartography.models.github.dependencies import GitHubDependencySchema
23
+ from cartography.models.github.manifests import DependencyGraphManifestSchema
24
+ from cartography.util import backoff_handler
25
+ from cartography.util import retries_with_backoff
15
26
  from cartography.util import run_cleanup_job
16
27
  from cartography.util import timeit
17
28
 
18
29
  logger = logging.getLogger(__name__)
19
30
 
31
+
32
+ # Representation of a user's permission level and affiliation to a GitHub repo. See:
33
+ # - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
34
+ # - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
35
+ UserAffiliationAndRepoPermission = namedtuple(
36
+ "UserAffiliationAndRepoPermission",
37
+ [
38
+ "user", # Dict
39
+ "permission", # 'WRITE', 'MAINTAIN', 'ADMIN', etc
40
+ "affiliation", # 'OUTSIDE', 'DIRECT'
41
+ ],
42
+ )
43
+
44
+
20
45
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
21
- query($login: String!, $cursor: String) {
46
+ query($login: String!, $cursor: String, $count: Int!) {
22
47
  organization(login: $login)
23
48
  {
24
49
  url
25
50
  login
26
- repositories(first: 50, after: $cursor){
51
+ repositories(first: $count, after: $cursor){
27
52
  pageInfo{
28
53
  endCursor
29
54
  hasNextPage
@@ -59,17 +84,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
59
84
  login
60
85
  __typename
61
86
  }
62
- collaborators(affiliation: OUTSIDE, first: 50) {
63
- edges {
64
- permission
65
- }
66
- nodes {
67
- url
68
- login
69
- name
70
- email
71
- company
72
- }
87
+ directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
88
+ totalCount
89
+ }
90
+ outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
91
+ totalCount
73
92
  }
74
93
  requirements:object(expression: "HEAD:requirements.txt") {
75
94
  ... on Blob {
@@ -81,6 +100,18 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
81
100
  text
82
101
  }
83
102
  }
103
+ dependencyGraphManifests(first: 20) {
104
+ nodes {
105
+ blobPath
106
+ dependencies(first: 100) {
107
+ nodes {
108
+ packageName
109
+ requirements
110
+ packageManager
111
+ }
112
+ }
113
+ }
114
+ }
84
115
  }
85
116
  }
86
117
  }
@@ -89,9 +120,175 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
89
120
  # Note: In the above query, `HEAD` references the default branch.
90
121
  # See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
91
122
 
123
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
124
+ query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
125
+ organization(login: $login) {
126
+ url
127
+ login
128
+ repository(name: $repo){
129
+ name
130
+ collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
131
+ edges {
132
+ permission
133
+ }
134
+ nodes {
135
+ url
136
+ login
137
+ name
138
+ email
139
+ company
140
+ }
141
+ pageInfo{
142
+ endCursor
143
+ hasNextPage
144
+ }
145
+ }
146
+ }
147
+ }
148
+ rateLimit {
149
+ limit
150
+ cost
151
+ remaining
152
+ resetAt
153
+ }
154
+ }
155
+ """
156
+
157
+
158
+ def _get_repo_collaborators_inner_func(
159
+ org: str,
160
+ api_url: str,
161
+ token: str,
162
+ repo_raw_data: list[dict[str, Any] | None],
163
+ affiliation: str,
164
+ ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
165
+ result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
166
+
167
+ for repo in repo_raw_data:
168
+ # GitHub can return null repo entries. See issues #1334 and #1404.
169
+ if repo is None:
170
+ logger.info(
171
+ "Skipping null repository entry while fetching %s collaborators.",
172
+ affiliation,
173
+ )
174
+ continue
175
+ repo_name = repo["name"]
176
+ repo_url = repo["url"]
177
+
178
+ # Guard against None when collaborator fields are not accessible due to permissions.
179
+ direct_info = repo.get("directCollaborators")
180
+ outside_info = repo.get("outsideCollaborators")
181
+
182
+ if affiliation == "OUTSIDE":
183
+ total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
184
+ if total_outside == 0:
185
+ # No outside collaborators or not permitted to view; skip API calls for this repo.
186
+ result[repo_url] = []
187
+ continue
188
+ else: # DIRECT
189
+ total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
190
+ if total_direct == 0:
191
+ # No direct collaborators or not permitted to view; skip API calls for this repo.
192
+ result[repo_url] = []
193
+ continue
194
+
195
+ logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
196
+ collaborators = _get_repo_collaborators(
197
+ token,
198
+ api_url,
199
+ org,
200
+ repo_name,
201
+ affiliation,
202
+ )
203
+
204
+ collab_users: List[dict[str, Any]] = []
205
+ collab_permission: List[str] = []
206
+
207
+ # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
208
+ # however sometimes GitHub returns None, as in issue 1334 and 1404.
209
+ for collab in collaborators.nodes or []:
210
+ collab_users.append(collab)
211
+
212
+ # The `or []` is because `.edges` can be None.
213
+ for perm in collaborators.edges or []:
214
+ collab_permission.append(perm["permission"])
215
+
216
+ result[repo_url] = [
217
+ UserAffiliationAndRepoPermission(user, permission, affiliation)
218
+ for user, permission in zip(collab_users, collab_permission)
219
+ ]
220
+ return result
221
+
222
+
223
+ def _get_repo_collaborators_for_multiple_repos(
224
+ repo_raw_data: list[dict[str, Any] | None],
225
+ affiliation: str,
226
+ org: str,
227
+ api_url: str,
228
+ token: str,
229
+ ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
230
+ """
231
+ For every repo in the given list, retrieve the collaborators.
232
+ :param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
233
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
234
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
235
+ :param org: The name of the target Github organization as string.
236
+ :param api_url: The Github v4 API endpoint as string.
237
+ :param token: The Github API token as string.
238
+ :return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
239
+ """
240
+ logger.info(
241
+ f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
242
+ )
243
+
244
+ result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
245
+ _get_repo_collaborators_inner_func,
246
+ TypeError,
247
+ 5,
248
+ backoff_handler,
249
+ )(
250
+ org=org,
251
+ api_url=api_url,
252
+ token=token,
253
+ repo_raw_data=repo_raw_data,
254
+ affiliation=affiliation,
255
+ )
256
+ return result
257
+
258
+
259
+ def _get_repo_collaborators(
260
+ token: str,
261
+ api_url: str,
262
+ organization: str,
263
+ repo: str,
264
+ affiliation: str,
265
+ ) -> PaginatedGraphqlData:
266
+ """
267
+ Retrieve a list of collaborators for a given repository, as described in
268
+ https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
269
+ :param token: The Github API token as string.
270
+ :param api_url: The Github v4 API endpoint as string.
271
+ :param organization: The name of the target Github organization as string.
272
+ :pram repo: The name of the target Github repository as string.
273
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
274
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
275
+ :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
276
+ """
277
+ collaborators, _ = fetch_all(
278
+ token,
279
+ api_url,
280
+ organization,
281
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
282
+ "repository",
283
+ resource_inner_type="collaborators",
284
+ repo=repo,
285
+ affiliation=affiliation,
286
+ )
287
+ return collaborators
288
+
92
289
 
93
290
  @timeit
94
- def get(token: str, api_url: str, organization: str) -> List[Dict]:
291
+ def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
95
292
  """
96
293
  Retrieve a list of repos from a Github organization as described in
97
294
  https://docs.github.com/en/graphql/reference/objects#repository.
@@ -99,6 +296,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
99
296
  :param api_url: The Github v4 API endpoint as string.
100
297
  :param organization: The name of the target Github organization as string.
101
298
  :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
299
+ Note: The list may contain None entries per GraphQL spec when resolvers error
300
+ (permissions, rate limits, transient issues). See issues #1334 and #1404.
102
301
  """
103
302
  # TODO: link the Github organization to the repositories
104
303
  repos, _ = fetch_all(
@@ -106,41 +305,118 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
106
305
  api_url,
107
306
  organization,
108
307
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
109
- 'repositories',
308
+ "repositories",
309
+ count=50,
110
310
  )
111
- return repos.nodes
112
-
113
-
114
- def transform(repos_json: List[Dict]) -> Dict:
311
+ # Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
312
+ # per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
313
+ # See https://github.com/cartography-cncf/cartography/issues/1334
314
+ # and https://github.com/cartography-cncf/cartography/issues/1404
315
+ return cast(List[Optional[Dict]], repos.nodes)
316
+
317
+
318
+ def transform(
319
+ repos_json: List[Optional[Dict]],
320
+ direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
321
+ outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
322
+ ) -> Dict:
115
323
  """
116
324
  Parses the JSON returned from GitHub API to create data for graph ingestion
117
- :param repos_json: the list of individual repository nodes from GitHub. See tests.data.github.repos.GET_REPOS for
118
- data shape.
325
+ :param repos_json: the list of individual repository nodes from GitHub.
326
+ See tests.data.github.repos.GET_REPOS for data shape.
327
+ :param direct_collaborators: dict of repo URL to list of direct collaborators.
328
+ See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
329
+ :param outside_collaborators: dict of repo URL to list of outside collaborators.
330
+ See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
119
331
  :return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
120
- mapping, and Python requirements files (if any) in a repo.
332
+ mapping, Python requirements files (if any) in a repo, manifests from GitHub's dependency graph, and all
333
+ dependencies from GitHub's dependency graph.
121
334
  """
335
+ logger.info(f"Processing {len(repos_json)} GitHub repositories")
122
336
  transformed_repo_list: List[Dict] = []
123
337
  transformed_repo_languages: List[Dict] = []
124
338
  transformed_repo_owners: List[Dict] = []
125
339
  # See https://docs.github.com/en/graphql/reference/enums#repositorypermission
126
- transformed_collaborators: Dict[str, List[Any]] = {
127
- 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
340
+ transformed_outside_collaborators: Dict[str, List[Any]] = {
341
+ "ADMIN": [],
342
+ "MAINTAIN": [],
343
+ "READ": [],
344
+ "TRIAGE": [],
345
+ "WRITE": [],
346
+ }
347
+ transformed_direct_collaborators: Dict[str, List[Any]] = {
348
+ "ADMIN": [],
349
+ "MAINTAIN": [],
350
+ "READ": [],
351
+ "TRIAGE": [],
352
+ "WRITE": [],
128
353
  }
129
354
  transformed_requirements_files: List[Dict] = []
355
+ transformed_dependencies: List[Dict] = []
356
+ transformed_manifests: List[Dict] = []
130
357
  for repo_object in repos_json:
131
- _transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
358
+ # GitHub can return null repo entries. See issues #1334 and #1404.
359
+ if repo_object is None:
360
+ logger.debug("Skipping null repository entry during transformation.")
361
+ continue
362
+ _transform_repo_languages(
363
+ repo_object["url"],
364
+ repo_object,
365
+ transformed_repo_languages,
366
+ )
132
367
  _transform_repo_objects(repo_object, transformed_repo_list)
133
- _transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
134
- _transform_collaborators(repo_object['collaborators'], repo_object['url'], transformed_collaborators)
135
- _transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files)
136
- _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files)
368
+ _transform_repo_owners(
369
+ repo_object["owner"]["url"],
370
+ repo_object,
371
+ transformed_repo_owners,
372
+ )
373
+
374
+ # Allow sync to continue if we didn't have permissions to list collaborators
375
+ repo_url = repo_object["url"]
376
+ if repo_url in outside_collaborators:
377
+ _transform_collaborators(
378
+ repo_object["url"],
379
+ outside_collaborators[repo_object["url"]],
380
+ transformed_outside_collaborators,
381
+ )
382
+ if repo_url in direct_collaborators:
383
+ _transform_collaborators(
384
+ repo_object["url"],
385
+ direct_collaborators[repo_object["url"]],
386
+ transformed_direct_collaborators,
387
+ )
388
+
389
+ _transform_requirements_txt(
390
+ repo_object["requirements"],
391
+ repo_url,
392
+ transformed_requirements_files,
393
+ )
394
+ _transform_setup_cfg_requirements(
395
+ repo_object["setupCfg"],
396
+ repo_url,
397
+ transformed_requirements_files,
398
+ )
399
+ _transform_dependency_manifests(
400
+ repo_object.get("dependencyGraphManifests"),
401
+ repo_url,
402
+ transformed_manifests,
403
+ )
404
+ _transform_dependency_graph(
405
+ repo_object.get("dependencyGraphManifests"),
406
+ repo_url,
407
+ transformed_dependencies,
408
+ )
137
409
  results = {
138
- 'repos': transformed_repo_list,
139
- 'repo_languages': transformed_repo_languages,
140
- 'repo_owners': transformed_repo_owners,
141
- 'repo_collaborators': transformed_collaborators,
142
- 'python_requirements': transformed_requirements_files,
410
+ "repos": transformed_repo_list,
411
+ "repo_languages": transformed_repo_languages,
412
+ "repo_owners": transformed_repo_owners,
413
+ "repo_outside_collaborators": transformed_outside_collaborators,
414
+ "repo_direct_collaborators": transformed_direct_collaborators,
415
+ "python_requirements": transformed_requirements_files,
416
+ "dependencies": transformed_dependencies,
417
+ "manifests": transformed_manifests,
143
418
  }
419
+
144
420
  return results
145
421
 
146
422
 
@@ -154,9 +430,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
154
430
 
155
431
  def _create_git_url_from_ssh_url(ssh_url: str) -> str:
156
432
  """
157
- Return a git:// URL from the given ssh_url
433
+ Convert SSH URL to git:// URL.
434
+ Example:
435
+ git@github.com:cartography-cncf/cartography.git
436
+ -> git://github.com/cartography-cncf/cartography.git
158
437
  """
159
- return ssh_url.replace("/", ":").replace("git@", "git://")
438
+ # Remove the user part (e.g., "git@")
439
+ _, host_and_path = ssh_url.split("@", 1)
440
+ # Replace first ':' (separating host and repo) with '/'
441
+ host, path = host_and_path.split(":", 1)
442
+ return f"git://{host}/{path}"
160
443
 
161
444
 
162
445
  def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
@@ -168,33 +451,37 @@ def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict])
168
451
  :return: Nothing
169
452
  """
170
453
  # Create a unique ID for a GitHubBranch node representing the default branch of this repo object.
171
- dbr = input_repo_object['defaultBranchRef']
172
- default_branch_name = dbr['name'] if dbr else None
173
- default_branch_id = _create_default_branch_id(input_repo_object['url'], dbr['id']) if dbr else None
454
+ dbr = input_repo_object["defaultBranchRef"]
455
+ default_branch_name = dbr["name"] if dbr else None
456
+ default_branch_id = (
457
+ _create_default_branch_id(input_repo_object["url"], dbr["id"]) if dbr else None
458
+ )
174
459
 
175
460
  # Create a git:// URL from the given SSH URL, if it exists.
176
- ssh_url = input_repo_object.get('sshUrl')
461
+ ssh_url = input_repo_object.get("sshUrl")
177
462
  git_url = _create_git_url_from_ssh_url(ssh_url) if ssh_url else None
178
463
 
179
- out_repo_list.append({
180
- 'id': input_repo_object['url'],
181
- 'createdat': input_repo_object['createdAt'],
182
- 'name': input_repo_object['name'],
183
- 'fullname': input_repo_object['nameWithOwner'],
184
- 'description': input_repo_object['description'],
185
- 'primarylanguage': input_repo_object['primaryLanguage'],
186
- 'homepage': input_repo_object['homepageUrl'],
187
- 'defaultbranch': default_branch_name,
188
- 'defaultbranchid': default_branch_id,
189
- 'private': input_repo_object['isPrivate'],
190
- 'disabled': input_repo_object['isDisabled'],
191
- 'archived': input_repo_object['isArchived'],
192
- 'locked': input_repo_object['isLocked'],
193
- 'giturl': git_url,
194
- 'url': input_repo_object['url'],
195
- 'sshurl': ssh_url,
196
- 'updatedat': input_repo_object['updatedAt'],
197
- })
464
+ out_repo_list.append(
465
+ {
466
+ "id": input_repo_object["url"],
467
+ "createdat": input_repo_object["createdAt"],
468
+ "name": input_repo_object["name"],
469
+ "fullname": input_repo_object["nameWithOwner"],
470
+ "description": input_repo_object["description"],
471
+ "primarylanguage": input_repo_object["primaryLanguage"],
472
+ "homepage": input_repo_object["homepageUrl"],
473
+ "defaultbranch": default_branch_name,
474
+ "defaultbranchid": default_branch_id,
475
+ "private": input_repo_object["isPrivate"],
476
+ "disabled": input_repo_object["isDisabled"],
477
+ "archived": input_repo_object["isArchived"],
478
+ "locked": input_repo_object["isLocked"],
479
+ "giturl": git_url,
480
+ "url": input_repo_object["url"],
481
+ "sshurl": ssh_url,
482
+ "updatedat": input_repo_object["updatedAt"],
483
+ },
484
+ )
198
485
 
199
486
 
200
487
  def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -> None:
@@ -205,15 +492,21 @@ def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -
205
492
  :param repo_owners: Output array to append transformed results to.
206
493
  :return: Nothing.
207
494
  """
208
- repo_owners.append({
209
- 'repo_id': repo['url'],
210
- 'owner': repo['owner']['login'],
211
- 'owner_id': owner_id,
212
- 'type': repo['owner']['__typename'],
213
- })
495
+ repo_owners.append(
496
+ {
497
+ "repo_id": repo["url"],
498
+ "owner": repo["owner"]["login"],
499
+ "owner_id": owner_id,
500
+ "type": repo["owner"]["__typename"],
501
+ },
502
+ )
214
503
 
215
504
 
216
- def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Dict]) -> None:
505
+ def _transform_repo_languages(
506
+ repo_url: str,
507
+ repo: Dict,
508
+ repo_languages: List[Dict],
509
+ ) -> None:
217
510
  """
218
511
  Helper function to transform the languages in a GitHub repo.
219
512
  :param repo_url: The URL of the repo.
@@ -221,19 +514,27 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
221
514
  :param repo_languages: Output array to append transformed results to.
222
515
  :return: Nothing.
223
516
  """
224
- if repo['languages']['totalCount'] > 0:
225
- for language in repo['languages']['nodes']:
226
- repo_languages.append({
227
- 'repo_id': repo_url,
228
- 'language_name': language['name'],
229
- })
517
+ if repo["languages"]["totalCount"] > 0:
518
+ for language in repo["languages"]["nodes"]:
519
+ repo_languages.append(
520
+ {
521
+ "repo_id": repo_url,
522
+ "language_name": language["name"],
523
+ },
524
+ )
230
525
 
231
526
 
232
- def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_collaborators: Dict) -> None:
527
+ def _transform_collaborators(
528
+ repo_url: str,
529
+ collaborators: List[UserAffiliationAndRepoPermission],
530
+ transformed_collaborators: Dict,
531
+ ) -> None:
233
532
  """
234
- Performs data adjustments for outside collaborators in a GitHub repo.
533
+ Performs data adjustments for collaborators in a GitHub repo.
235
534
  Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
236
- :param collaborators: See cartography.tests.data.github.repos for data shape.
535
+ :param collaborators: For data shape, see
536
+ cartography.tests.data.github.repos.DIRECT_COLLABORATORS
537
+ cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
237
538
  :param repo_url: The URL of the GitHub repo.
238
539
  :param transformed_collaborators: Output dict. Data shape =
239
540
  {'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
@@ -241,10 +542,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
241
542
  """
242
543
  # `collaborators` is sometimes None
243
544
  if collaborators:
244
- for idx, user in enumerate(collaborators['nodes']):
245
- user_permission = collaborators['edges'][idx]['permission']
246
- user['repo_url'] = repo_url
247
- transformed_collaborators[user_permission].append(user)
545
+ for collaborator in collaborators:
546
+ user = collaborator.user
547
+ user["repo_url"] = repo_url
548
+ user["affiliation"] = collaborator.affiliation
549
+ transformed_collaborators[collaborator.permission].append(user)
248
550
 
249
551
 
250
552
  def _transform_requirements_txt(
@@ -259,10 +561,14 @@ def _transform_requirements_txt(
259
561
  :param out_requirements_files: Output array to append transformed results to.
260
562
  :return: Nothing.
261
563
  """
262
- if req_file_contents and req_file_contents.get('text'):
263
- text_contents = req_file_contents['text']
564
+ if req_file_contents and req_file_contents.get("text"):
565
+ text_contents = req_file_contents["text"]
264
566
  requirements_list = text_contents.split("\n")
265
- _transform_python_requirements(requirements_list, repo_url, out_requirements_files)
567
+ _transform_python_requirements(
568
+ requirements_list,
569
+ repo_url,
570
+ out_requirements_files,
571
+ )
266
572
 
267
573
 
268
574
  def _transform_setup_cfg_requirements(
@@ -277,9 +583,9 @@ def _transform_setup_cfg_requirements(
277
583
  :param out_requirements_files: Output array to append transformed results to.
278
584
  :return: Nothing.
279
585
  """
280
- if not setup_cfg_contents or not setup_cfg_contents.get('text'):
586
+ if not setup_cfg_contents or not setup_cfg_contents.get("text"):
281
587
  return
282
- text_contents = setup_cfg_contents['text']
588
+ text_contents = setup_cfg_contents["text"]
283
589
  setup_cfg = configparser.ConfigParser()
284
590
  try:
285
591
  setup_cfg.read_string(text_contents)
@@ -293,6 +599,153 @@ def _transform_setup_cfg_requirements(
293
599
  _transform_python_requirements(requirements_list, repo_url, out_requirements_files)
294
600
 
295
601
 
602
+ def _transform_dependency_manifests(
603
+ dependency_manifests: Optional[Dict],
604
+ repo_url: str,
605
+ out_manifests_list: List[Dict],
606
+ ) -> None:
607
+ """
608
+ Transform GitHub dependency graph manifests into cartography manifest format.
609
+ :param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
610
+ :param repo_url: The URL of the GitHub repo
611
+ :param out_manifests_list: Output array to append transformed results to
612
+ :return: Nothing
613
+ """
614
+ if not dependency_manifests or not dependency_manifests.get("nodes"):
615
+ return
616
+
617
+ manifests_added = 0
618
+
619
+ for manifest in dependency_manifests["nodes"]:
620
+ blob_path = manifest.get("blobPath", "")
621
+ if not blob_path:
622
+ continue
623
+
624
+ # Count dependencies in this manifest
625
+ dependencies = manifest.get("dependencies", {})
626
+ dependencies_count = len(dependencies.get("nodes", []) if dependencies else [])
627
+
628
+ # Create unique manifest ID by combining repo URL and blob path
629
+ manifest_id = f"{repo_url}#{blob_path}"
630
+
631
+ # Extract filename from blob path
632
+ filename = blob_path.split("/")[-1] if blob_path else "None"
633
+
634
+ out_manifests_list.append(
635
+ {
636
+ "id": manifest_id,
637
+ "blob_path": blob_path,
638
+ "filename": filename,
639
+ "dependencies_count": dependencies_count,
640
+ "repo_url": repo_url,
641
+ }
642
+ )
643
+ manifests_added += 1
644
+
645
+ if manifests_added > 0:
646
+ repo_name = repo_url.split("/")[-1] if repo_url else "repository"
647
+ logger.info(f"Found {manifests_added} dependency manifests in {repo_name}")
648
+
649
+
650
+ def _transform_dependency_graph(
651
+ dependency_manifests: Optional[Dict],
652
+ repo_url: str,
653
+ out_dependencies_list: List[Dict],
654
+ ) -> None:
655
+ """
656
+ Transform GitHub dependency graph manifests into cartography dependency format.
657
+ :param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
658
+ :param repo_url: The URL of the GitHub repo
659
+ :param out_dependencies_list: Output array to append transformed results to
660
+ :return: Nothing
661
+ """
662
+ if not dependency_manifests or not dependency_manifests.get("nodes"):
663
+ return
664
+
665
+ dependencies_added = 0
666
+
667
+ for manifest in dependency_manifests["nodes"]:
668
+ dependencies = manifest.get("dependencies", {})
669
+ if not dependencies or not dependencies.get("nodes"):
670
+ continue
671
+
672
+ manifest_path = manifest.get("blobPath", "")
673
+
674
+ for dep in dependencies["nodes"]:
675
+ package_name = dep.get("packageName")
676
+ if not package_name:
677
+ continue
678
+
679
+ requirements = dep.get("requirements", "")
680
+ package_manager = dep.get("packageManager", "").upper()
681
+
682
+ # Create ecosystem-specific canonical name
683
+ canonical_name = _canonicalize_dependency_name(
684
+ package_name, package_manager
685
+ )
686
+
687
+ # Create ecosystem identifier
688
+ ecosystem = package_manager.lower() if package_manager else "unknown"
689
+
690
+ # Create simple dependency ID using canonical name and requirements
691
+ # This allows the same dependency to be shared across multiple repos
692
+ requirements_for_id = (requirements or "").strip()
693
+ dependency_id = (
694
+ f"{canonical_name}|{requirements_for_id}"
695
+ if requirements_for_id
696
+ else canonical_name
697
+ )
698
+
699
+ # Normalize requirements field (prefer None over empty string)
700
+ normalized_requirements = requirements if requirements else None
701
+
702
+ # Create manifest ID for the HAS_DEP relationship
703
+ manifest_id = f"{repo_url}#{manifest_path}"
704
+
705
+ out_dependencies_list.append(
706
+ {
707
+ "id": dependency_id,
708
+ "name": canonical_name,
709
+ "original_name": package_name, # Keep original for reference
710
+ "requirements": normalized_requirements,
711
+ "ecosystem": ecosystem,
712
+ "package_manager": package_manager,
713
+ "manifest_path": manifest_path,
714
+ "manifest_id": manifest_id,
715
+ "repo_url": repo_url,
716
+ "manifest_file": (
717
+ manifest_path.split("/")[-1] if manifest_path else ""
718
+ ),
719
+ }
720
+ )
721
+ dependencies_added += 1
722
+
723
+ if dependencies_added > 0:
724
+ repo_name = repo_url.split("/")[-1] if repo_url else "repository"
725
+ logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
726
+
727
+
728
+ def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
729
+ """
730
+ Canonicalize dependency names based on ecosystem conventions.
731
+ """
732
+ if not name:
733
+ return name
734
+
735
+ # For Python packages, use existing canonicalization
736
+ if package_manager in ["PIP", "CONDA"]:
737
+ try:
738
+ from packaging.utils import canonicalize_name
739
+
740
+ return str(canonicalize_name(name))
741
+ except ImportError:
742
+ # Fallback if packaging not available
743
+ return name.lower().replace("_", "-")
744
+
745
+ # For other ecosystems, use lowercase
746
+ return name.lower()
747
+
748
+
296
749
  def _transform_python_requirements(
297
750
  requirements_list: List[str],
298
751
  repo_url: str,
@@ -307,8 +760,8 @@ def _transform_python_requirements(
307
760
  """
308
761
  parsed_list = []
309
762
  for line in requirements_list:
310
- stripped_line = line.partition('#')[0].strip()
311
- if stripped_line == '':
763
+ stripped_line = line.partition("#")[0].strip()
764
+ if stripped_line == "":
312
765
  continue
313
766
  try:
314
767
  req = Requirement(stripped_line)
@@ -316,7 +769,7 @@ def _transform_python_requirements(
316
769
  except InvalidRequirement:
317
770
  # INFO and not WARN/ERROR as we intentionally don't support all ways to specify Python requirements
318
771
  logger.info(
319
- f"Failed to parse line \"{line}\" in repo {repo_url}'s requirements.txt; skipping line.",
772
+ f'Failed to parse line "{line}" in repo {repo_url}\'s requirements.txt; skipping line.',
320
773
  exc_info=True,
321
774
  )
322
775
 
@@ -324,32 +777,44 @@ def _transform_python_requirements(
324
777
  pinned_version = None
325
778
  if len(req.specifier) == 1:
326
779
  specifier = next(iter(req.specifier))
327
- if specifier.operator == '==':
780
+ if specifier.operator == "==":
328
781
  pinned_version = specifier.version
329
782
 
330
783
  # Set `spec` to a default value. Example values for str(req.specifier): "<4.0,>=3.0" or "==1.0.0".
331
784
  spec: Optional[str] = str(req.specifier)
332
785
  # Set spec to `None` instead of empty string so that the Neo4j driver will leave the library.specifier field
333
786
  # undefined. As convention, we prefer undefined values over empty strings in the graph.
334
- if spec == '':
787
+ if spec == "":
335
788
  spec = None
336
789
 
337
790
  canon_name = canonicalize_name(req.name)
338
- requirement_id = f"{canon_name}|{pinned_version}" if pinned_version else canon_name
791
+ requirement_id = (
792
+ f"{canon_name}|{pinned_version}" if pinned_version else canon_name
793
+ )
339
794
 
340
- out_requirements_files.append({
341
- "id": requirement_id,
342
- "name": canon_name,
343
- "specifier": spec,
344
- "version": pinned_version,
345
- "repo_url": repo_url,
346
- })
795
+ out_requirements_files.append(
796
+ {
797
+ "id": requirement_id,
798
+ "name": canon_name,
799
+ "specifier": spec,
800
+ "version": pinned_version,
801
+ "repo_url": repo_url,
802
+ },
803
+ )
347
804
 
348
805
 
349
806
  def parse_setup_cfg(config: configparser.ConfigParser) -> List[str]:
350
807
  reqs: List[str] = []
351
- reqs.extend(_parse_setup_cfg_requirements(config.get("options", "install_requires", fallback="")))
352
- reqs.extend(_parse_setup_cfg_requirements(config.get("options", "setup_requires", fallback="")))
808
+ reqs.extend(
809
+ _parse_setup_cfg_requirements(
810
+ config.get("options", "install_requires", fallback=""),
811
+ ),
812
+ )
813
+ reqs.extend(
814
+ _parse_setup_cfg_requirements(
815
+ config.get("options", "setup_requires", fallback=""),
816
+ ),
817
+ )
353
818
  if config.has_section("options.extras_require"):
354
819
  for _, val in config.items("options.extras_require"):
355
820
  reqs.extend(_parse_setup_cfg_requirements(val))
@@ -368,7 +833,11 @@ def _parse_setup_cfg_requirements(reqs: str, separator: str = ";") -> List[str]:
368
833
 
369
834
 
370
835
  @timeit
371
- def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data: List[Dict]) -> None:
836
+ def load_github_repos(
837
+ neo4j_session: neo4j.Session,
838
+ update_tag: int,
839
+ repo_data: List[Dict],
840
+ ) -> None:
372
841
  """
373
842
  Ingest the GitHub repository information
374
843
  :param neo4j_session: Neo4J session object for server communication
@@ -411,15 +880,23 @@ def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data:
411
880
  ON CREATE SET r.firstseen = timestamp()
412
881
  SET r.lastupdated = r.UpdateTag
413
882
  """
414
- neo4j_session.run(
415
- ingest_repo,
416
- RepoData=repo_data,
417
- UpdateTag=update_tag,
418
- )
883
+
884
+ def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
885
+ tx.run(
886
+ ingest_repo,
887
+ RepoData=repo_data,
888
+ UpdateTag=update_tag,
889
+ ).consume()
890
+
891
+ execute_write_with_retry(neo4j_session, _ingest_repos_tx)
419
892
 
420
893
 
421
894
  @timeit
422
- def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_languages: List[Dict]) -> None:
895
+ def load_github_languages(
896
+ neo4j_session: neo4j.Session,
897
+ update_tag: int,
898
+ repo_languages: List[Dict],
899
+ ) -> None:
423
900
  """
424
901
  Ingest the relationships for repo languages
425
902
  :param neo4j_session: Neo4J session object for server communication
@@ -441,15 +918,22 @@ def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_la
441
918
  ON CREATE SET r.firstseen = timestamp()
442
919
  SET r.lastupdated = $UpdateTag"""
443
920
 
444
- neo4j_session.run(
445
- ingest_languages,
446
- Languages=repo_languages,
447
- UpdateTag=update_tag,
448
- )
921
+ def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
922
+ tx.run(
923
+ ingest_languages,
924
+ Languages=repo_languages,
925
+ UpdateTag=update_tag,
926
+ ).consume()
927
+
928
+ execute_write_with_retry(neo4j_session, _ingest_languages_tx)
449
929
 
450
930
 
451
931
  @timeit
452
- def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owners: List[Dict]) -> None:
932
+ def load_github_owners(
933
+ neo4j_session: neo4j.Session,
934
+ update_tag: int,
935
+ repo_owners: List[Dict],
936
+ ) -> None:
453
937
  """
454
938
  Ingest the relationships for repo owners
455
939
  :param neo4j_session: Neo4J session object for server communication
@@ -457,33 +941,55 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
457
941
  :param repo_owners: list of owner to repo mappings
458
942
  :return: Nothing
459
943
  """
460
- for owner in repo_owners:
461
- ingest_owner_template = Template("""
462
- MERGE (user:$account_type{id: $Id})
463
- ON CREATE SET user.firstseen = timestamp()
464
- SET user.username = $UserName,
465
- user.lastupdated = $UpdateTag
466
- WITH user
467
-
468
- MATCH (repo:GitHubRepository{id: $RepoId})
469
- MERGE (user)<-[r:OWNER]-(repo)
470
- ON CREATE SET r.firstseen = timestamp()
471
- SET r.lastupdated = $UpdateTag""")
472
-
473
- account_type = {'User': "GitHubUser", 'Organization': "GitHubOrganization"}
474
-
475
- neo4j_session.run(
476
- ingest_owner_template.safe_substitute(account_type=account_type[owner['type']]),
477
- Id=owner['owner_id'],
478
- UserName=owner['owner'],
479
- RepoId=owner['repo_id'],
944
+ ingest_owner_template = Template(
945
+ """
946
+ MERGE (user:$account_type{id: $Id})
947
+ ON CREATE SET user.firstseen = timestamp()
948
+ SET user.username = $UserName,
949
+ user.lastupdated = $UpdateTag
950
+ WITH user
951
+
952
+ MATCH (repo:GitHubRepository{id: $RepoId})
953
+ MERGE (user)<-[r:OWNER]-(repo)
954
+ ON CREATE SET r.firstseen = timestamp()
955
+ SET r.lastupdated = $UpdateTag""",
956
+ )
957
+
958
+ account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
959
+
960
+ def _ingest_owner_tx(
961
+ tx: neo4j.Transaction,
962
+ owner_record: Dict,
963
+ owner_label: str,
964
+ ) -> None:
965
+ tx.run(
966
+ ingest_owner_template.safe_substitute(
967
+ account_type=owner_label,
968
+ ),
969
+ Id=owner_record["owner_id"],
970
+ UserName=owner_record["owner"],
971
+ RepoId=owner_record["repo_id"],
480
972
  UpdateTag=update_tag,
973
+ ).consume()
974
+
975
+ for owner in repo_owners:
976
+ execute_write_with_retry(
977
+ neo4j_session,
978
+ _ingest_owner_tx,
979
+ owner,
980
+ account_type[owner["type"]],
481
981
  )
482
982
 
483
983
 
484
984
  @timeit
485
- def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
486
- query = Template("""
985
+ def load_collaborators(
986
+ neo4j_session: neo4j.Session,
987
+ update_tag: int,
988
+ collaborators: Dict,
989
+ affiliation: str,
990
+ ) -> None:
991
+ query = Template(
992
+ """
487
993
  UNWIND $UserData as user
488
994
 
489
995
  MERGE (u:GitHubUser{id: user.url})
@@ -500,27 +1006,36 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
500
1006
  MERGE (repo)<-[o:$rel_label]-(u)
501
1007
  ON CREATE SET o.firstseen = timestamp()
502
1008
  SET o.lastupdated = $UpdateTag
503
- """)
504
- for collab_type in collaborators.keys():
505
- relationship_label = f"OUTSIDE_COLLAB_{collab_type}"
506
- neo4j_session.run(
1009
+ """,
1010
+ )
1011
+
1012
+ def _ingest_collaborators_tx(
1013
+ tx: neo4j.Transaction,
1014
+ relationship_label: str,
1015
+ collaborator_data: List[Dict],
1016
+ ) -> None:
1017
+ tx.run(
507
1018
  query.safe_substitute(rel_label=relationship_label),
508
- UserData=collaborators[collab_type],
1019
+ UserData=collaborator_data,
509
1020
  UpdateTag=update_tag,
1021
+ ).consume()
1022
+
1023
+ for collab_type, collab_data in collaborators.items():
1024
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
1025
+ execute_write_with_retry(
1026
+ neo4j_session,
1027
+ _ingest_collaborators_tx,
1028
+ relationship_label,
1029
+ collab_data,
510
1030
  )
511
1031
 
512
1032
 
513
1033
  @timeit
514
- def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: Dict) -> None:
515
- load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
516
- load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
517
- load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
518
- load_collaborators(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_collaborators'])
519
- load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
520
-
521
-
522
- @timeit
523
- def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requirements_objects: List[Dict]) -> None:
1034
+ def load_python_requirements(
1035
+ neo4j_session: neo4j.Session,
1036
+ update_tag: int,
1037
+ requirements_objects: List[Dict],
1038
+ ) -> None:
524
1039
  query = """
525
1040
  UNWIND $Requirements AS req
526
1041
  MERGE (lib:PythonLibrary:Dependency{id: req.id})
@@ -536,19 +1051,175 @@ def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requ
536
1051
  SET r.lastupdated = $UpdateTag,
537
1052
  r.specifier = req.specifier
538
1053
  """
539
- neo4j_session.run(
540
- query,
541
- Requirements=requirements_objects,
542
- UpdateTag=update_tag,
1054
+
1055
+ def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
1056
+ tx.run(
1057
+ query,
1058
+ Requirements=requirements_objects,
1059
+ UpdateTag=update_tag,
1060
+ ).consume()
1061
+
1062
+ execute_write_with_retry(neo4j_session, _ingest_requirements_tx)
1063
+
1064
+
1065
+ @timeit
1066
+ def load_github_dependencies(
1067
+ neo4j_session: neo4j.Session,
1068
+ update_tag: int,
1069
+ dependencies: List[Dict],
1070
+ ) -> None:
1071
+ """
1072
+ Ingest GitHub dependency data into Neo4j using the new data model
1073
+ :param neo4j_session: Neo4J session object for server communication
1074
+ :param update_tag: Timestamp used to determine data freshness
1075
+ :param dependencies: List of dependency objects from GitHub's dependency graph
1076
+ :return: Nothing
1077
+ """
1078
+ # Group dependencies by both repo_url and manifest_id for schema-based loading
1079
+ dependencies_by_repo_and_manifest = defaultdict(list)
1080
+
1081
+ for dep in dependencies:
1082
+ repo_url = dep["repo_url"]
1083
+ manifest_id = dep["manifest_id"]
1084
+ # Create a key combining both repo_url and manifest_id
1085
+ group_key = (repo_url, manifest_id)
1086
+ # Remove repo_url and manifest_id from the dependency object since we'll pass them as kwargs
1087
+ dep_without_kwargs = {
1088
+ k: v for k, v in dep.items() if k not in ["repo_url", "manifest_id"]
1089
+ }
1090
+ dependencies_by_repo_and_manifest[group_key].append(dep_without_kwargs)
1091
+
1092
+ # Load dependencies for each repository/manifest combination separately
1093
+ for (
1094
+ repo_url,
1095
+ manifest_id,
1096
+ ), group_dependencies in dependencies_by_repo_and_manifest.items():
1097
+ load_data(
1098
+ neo4j_session,
1099
+ GitHubDependencySchema(),
1100
+ group_dependencies,
1101
+ lastupdated=update_tag,
1102
+ repo_url=repo_url,
1103
+ manifest_id=manifest_id,
1104
+ )
1105
+
1106
+
1107
+ @timeit
1108
+ def load_github_dependency_manifests(
1109
+ neo4j_session: neo4j.Session,
1110
+ update_tag: int,
1111
+ manifests: List[Dict],
1112
+ ) -> None:
1113
+ """
1114
+ Ingest GitHub dependency manifests into Neo4j
1115
+ """
1116
+ manifests_by_repo = defaultdict(list)
1117
+
1118
+ for manifest in manifests:
1119
+ repo_url = manifest["repo_url"]
1120
+ manifests_by_repo[repo_url].append(manifest)
1121
+
1122
+ # Load manifests for each repository separately
1123
+ for repo_url, repo_manifests in manifests_by_repo.items():
1124
+ load_data(
1125
+ neo4j_session,
1126
+ DependencyGraphManifestSchema(),
1127
+ repo_manifests,
1128
+ lastupdated=update_tag,
1129
+ repo_url=repo_url,
1130
+ )
1131
+
1132
+
1133
+ @timeit
1134
+ def cleanup_github_dependencies(
1135
+ neo4j_session: neo4j.Session,
1136
+ common_job_parameters: Dict[str, Any],
1137
+ repo_urls: List[str],
1138
+ ) -> None:
1139
+ # Run cleanup for each repository separately
1140
+ for repo_url in repo_urls:
1141
+ cleanup_params = {**common_job_parameters, "repo_url": repo_url}
1142
+ GraphJob.from_node_schema(GitHubDependencySchema(), cleanup_params).run(
1143
+ neo4j_session
1144
+ )
1145
+
1146
+
1147
+ @timeit
1148
+ def cleanup_github_manifests(
1149
+ neo4j_session: neo4j.Session,
1150
+ common_job_parameters: Dict[str, Any],
1151
+ repo_urls: List[str],
1152
+ ) -> None:
1153
+ """
1154
+ Delete GitHub dependency manifests and their relationships from the graph if they were not updated in the last sync.
1155
+ :param neo4j_session: Neo4j session
1156
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG
1157
+ :param repo_urls: List of repository URLs to clean up manifests for
1158
+ """
1159
+ # Run cleanup for each repository separately
1160
+ for repo_url in repo_urls:
1161
+ cleanup_params = {**common_job_parameters, "repo_url": repo_url}
1162
+ GraphJob.from_node_schema(DependencyGraphManifestSchema(), cleanup_params).run(
1163
+ neo4j_session
1164
+ )
1165
+
1166
+
1167
+ @timeit
1168
+ def load(
1169
+ neo4j_session: neo4j.Session,
1170
+ common_job_parameters: Dict,
1171
+ repo_data: Dict,
1172
+ ) -> None:
1173
+ load_github_repos(
1174
+ neo4j_session,
1175
+ common_job_parameters["UPDATE_TAG"],
1176
+ repo_data["repos"],
1177
+ )
1178
+ load_github_owners(
1179
+ neo4j_session,
1180
+ common_job_parameters["UPDATE_TAG"],
1181
+ repo_data["repo_owners"],
1182
+ )
1183
+ load_github_languages(
1184
+ neo4j_session,
1185
+ common_job_parameters["UPDATE_TAG"],
1186
+ repo_data["repo_languages"],
1187
+ )
1188
+ load_collaborators(
1189
+ neo4j_session,
1190
+ common_job_parameters["UPDATE_TAG"],
1191
+ repo_data["repo_direct_collaborators"],
1192
+ "DIRECT",
1193
+ )
1194
+ load_collaborators(
1195
+ neo4j_session,
1196
+ common_job_parameters["UPDATE_TAG"],
1197
+ repo_data["repo_outside_collaborators"],
1198
+ "OUTSIDE",
1199
+ )
1200
+ load_python_requirements(
1201
+ neo4j_session,
1202
+ common_job_parameters["UPDATE_TAG"],
1203
+ repo_data["python_requirements"],
1204
+ )
1205
+ load_github_dependency_manifests(
1206
+ neo4j_session,
1207
+ common_job_parameters["UPDATE_TAG"],
1208
+ repo_data["manifests"],
1209
+ )
1210
+ load_github_dependencies(
1211
+ neo4j_session,
1212
+ common_job_parameters["UPDATE_TAG"],
1213
+ repo_data["dependencies"],
543
1214
  )
544
1215
 
545
1216
 
546
1217
  def sync(
547
- neo4j_session: neo4j.Session,
548
- common_job_parameters: Dict[str, Any],
549
- github_api_key: str,
550
- github_url: str,
551
- organization: str,
1218
+ neo4j_session: neo4j.Session,
1219
+ common_job_parameters: Dict[str, Any],
1220
+ github_api_key: str,
1221
+ github_url: str,
1222
+ organization: str,
552
1223
  ) -> None:
553
1224
  """
554
1225
  Performs the sequential tasks to collect, transform, and sync github data
@@ -561,6 +1232,46 @@ def sync(
561
1232
  """
562
1233
  logger.info("Syncing GitHub repos")
563
1234
  repos_json = get(github_api_key, github_url, organization)
564
- repo_data = transform(repos_json)
1235
+ direct_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
1236
+ outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
1237
+ try:
1238
+ direct_collabs = _get_repo_collaborators_for_multiple_repos(
1239
+ repos_json,
1240
+ "DIRECT",
1241
+ organization,
1242
+ github_url,
1243
+ github_api_key,
1244
+ )
1245
+ outside_collabs = _get_repo_collaborators_for_multiple_repos(
1246
+ repos_json,
1247
+ "OUTSIDE",
1248
+ organization,
1249
+ github_url,
1250
+ github_api_key,
1251
+ )
1252
+ except TypeError:
1253
+ # due to permission errors or transient network error or some other nonsense
1254
+ logger.warning(
1255
+ "Unable to list repo collaborators due to permission errors; continuing on.",
1256
+ exc_info=True,
1257
+ )
1258
+ repo_data = transform(repos_json, direct_collabs, outside_collabs)
565
1259
  load(neo4j_session, common_job_parameters, repo_data)
566
- run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
1260
+
1261
+ # Collect repository URLs that have dependencies for cleanup
1262
+ repo_urls_with_dependencies = list(
1263
+ {dep["repo_url"] for dep in repo_data["dependencies"]}
1264
+ )
1265
+ cleanup_github_dependencies(
1266
+ neo4j_session, common_job_parameters, repo_urls_with_dependencies
1267
+ )
1268
+
1269
+ # Collect repository URLs that have manifests for cleanup
1270
+ repo_urls_with_manifests = list(
1271
+ {manifest["repo_url"] for manifest in repo_data["manifests"]}
1272
+ )
1273
+ cleanup_github_manifests(
1274
+ neo4j_session, common_job_parameters, repo_urls_with_manifests
1275
+ )
1276
+
1277
+ run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)