cartography 0.104.0rc2__py3-none-any.whl → 0.123.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (642) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +466 -5
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +357 -8
  6. cartography/config.py +153 -0
  7. cartography/data/azure_permission_relationships.yaml +20 -0
  8. cartography/data/gcp_permission_relationships.yaml +21 -0
  9. cartography/data/indexes.cypher +0 -186
  10. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  11. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  12. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  13. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  14. cartography/driftdetect/cli.py +3 -2
  15. cartography/graph/cleanupbuilder.py +198 -41
  16. cartography/graph/job.py +54 -6
  17. cartography/graph/querybuilder.py +528 -27
  18. cartography/graph/statement.py +5 -1
  19. cartography/intel/airbyte/__init__.py +105 -0
  20. cartography/intel/airbyte/connections.py +120 -0
  21. cartography/intel/airbyte/destinations.py +81 -0
  22. cartography/intel/airbyte/organizations.py +59 -0
  23. cartography/intel/airbyte/sources.py +78 -0
  24. cartography/intel/airbyte/tags.py +64 -0
  25. cartography/intel/airbyte/users.py +106 -0
  26. cartography/intel/airbyte/util.py +122 -0
  27. cartography/intel/airbyte/workspaces.py +63 -0
  28. cartography/intel/aws/__init__.py +24 -9
  29. cartography/intel/aws/acm.py +124 -0
  30. cartography/intel/aws/apigateway.py +253 -22
  31. cartography/intel/aws/apigatewayv2.py +116 -0
  32. cartography/intel/aws/cloudtrail.py +17 -39
  33. cartography/intel/aws/cloudtrail_management_events.py +962 -0
  34. cartography/intel/aws/cloudwatch.py +150 -4
  35. cartography/intel/aws/codebuild.py +132 -0
  36. cartography/intel/aws/cognito.py +201 -0
  37. cartography/intel/aws/config.py +7 -3
  38. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  39. cartography/intel/aws/ec2/instances.py +25 -1
  40. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  41. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  42. cartography/intel/aws/ec2/network_interfaces.py +5 -1
  43. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  44. cartography/intel/aws/ec2/security_groups.py +140 -122
  45. cartography/intel/aws/ec2/snapshots.py +47 -84
  46. cartography/intel/aws/ec2/subnets.py +37 -63
  47. cartography/intel/aws/ec2/tgw.py +11 -5
  48. cartography/intel/aws/ec2/volumes.py +1 -1
  49. cartography/intel/aws/ec2/vpc.py +140 -124
  50. cartography/intel/aws/ec2/vpc_peerings.py +262 -125
  51. cartography/intel/aws/ecr.py +269 -98
  52. cartography/intel/aws/ecr_image_layers.py +923 -0
  53. cartography/intel/aws/ecs.py +251 -380
  54. cartography/intel/aws/efs.py +179 -11
  55. cartography/intel/aws/elasticache.py +102 -79
  56. cartography/intel/aws/elasticsearch.py +13 -4
  57. cartography/intel/aws/eventbridge.py +164 -0
  58. cartography/intel/aws/glue.py +181 -0
  59. cartography/intel/aws/guardduty.py +443 -0
  60. cartography/intel/aws/iam.py +750 -493
  61. cartography/intel/aws/identitycenter.py +605 -83
  62. cartography/intel/aws/inspector.py +221 -105
  63. cartography/intel/aws/kms.py +173 -201
  64. cartography/intel/aws/lambda_function.py +272 -189
  65. cartography/intel/aws/organizations.py +10 -9
  66. cartography/intel/aws/permission_relationships.py +10 -20
  67. cartography/intel/aws/rds.py +337 -446
  68. cartography/intel/aws/redshift.py +9 -4
  69. cartography/intel/aws/resourcegroupstaggingapi.py +78 -19
  70. cartography/intel/aws/resources.py +18 -0
  71. cartography/intel/aws/route53.py +386 -332
  72. cartography/intel/aws/s3.py +322 -14
  73. cartography/intel/aws/secretsmanager.py +81 -49
  74. cartography/intel/aws/securityhub.py +3 -1
  75. cartography/intel/aws/sns.py +62 -2
  76. cartography/intel/aws/sqs.py +36 -90
  77. cartography/intel/aws/ssm.py +3 -5
  78. cartography/intel/azure/__init__.py +202 -48
  79. cartography/intel/azure/aks.py +175 -0
  80. cartography/intel/azure/app_service.py +105 -0
  81. cartography/intel/azure/compute.py +59 -112
  82. cartography/intel/azure/container_instances.py +95 -0
  83. cartography/intel/azure/cosmosdb.py +222 -361
  84. cartography/intel/azure/data_factory.py +85 -0
  85. cartography/intel/azure/data_factory_dataset.py +128 -0
  86. cartography/intel/azure/data_factory_linked_service.py +119 -0
  87. cartography/intel/azure/data_factory_pipeline.py +142 -0
  88. cartography/intel/azure/data_lake.py +124 -0
  89. cartography/intel/azure/event_grid.py +94 -0
  90. cartography/intel/azure/functions.py +124 -0
  91. cartography/intel/azure/load_balancers.py +263 -0
  92. cartography/intel/azure/logic_apps.py +101 -0
  93. cartography/intel/azure/monitor.py +105 -0
  94. cartography/intel/azure/network.py +467 -0
  95. cartography/intel/azure/permission_relationships.py +466 -0
  96. cartography/intel/azure/rbac.py +309 -0
  97. cartography/intel/azure/resource_groups.py +82 -0
  98. cartography/intel/azure/security_center.py +106 -0
  99. cartography/intel/azure/sql.py +145 -292
  100. cartography/intel/azure/storage.py +185 -262
  101. cartography/intel/azure/subscription.py +21 -43
  102. cartography/intel/azure/tenant.py +39 -30
  103. cartography/intel/azure/util/common.py +13 -0
  104. cartography/intel/azure/util/credentials.py +49 -174
  105. cartography/intel/azure/util/tag.py +41 -0
  106. cartography/intel/create_indexes.py +2 -1
  107. cartography/intel/crowdstrike/spotlight.py +5 -2
  108. cartography/intel/dns.py +5 -2
  109. cartography/intel/entra/__init__.py +100 -1
  110. cartography/intel/entra/app_role_assignments.py +284 -0
  111. cartography/intel/entra/applications.py +182 -0
  112. cartography/intel/entra/federation/__init__.py +0 -0
  113. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  114. cartography/intel/entra/groups.py +198 -0
  115. cartography/intel/entra/ou.py +48 -24
  116. cartography/intel/entra/service_principals.py +217 -0
  117. cartography/intel/entra/users.py +105 -57
  118. cartography/intel/gcp/__init__.py +334 -396
  119. cartography/intel/gcp/bigtable_app_profile.py +101 -0
  120. cartography/intel/gcp/bigtable_backup.py +91 -0
  121. cartography/intel/gcp/bigtable_cluster.py +93 -0
  122. cartography/intel/gcp/bigtable_instance.py +86 -0
  123. cartography/intel/gcp/bigtable_table.py +87 -0
  124. cartography/intel/gcp/cai.py +292 -0
  125. cartography/intel/gcp/clients.py +112 -0
  126. cartography/intel/gcp/compute.py +128 -119
  127. cartography/intel/gcp/crm/__init__.py +0 -0
  128. cartography/intel/gcp/crm/folders.py +114 -0
  129. cartography/intel/gcp/crm/orgs.py +70 -0
  130. cartography/intel/gcp/crm/projects.py +120 -0
  131. cartography/intel/gcp/dns.py +83 -169
  132. cartography/intel/gcp/gke.py +72 -113
  133. cartography/intel/gcp/iam.py +111 -91
  134. cartography/intel/gcp/permission_relationships.py +394 -0
  135. cartography/intel/gcp/policy_bindings.py +225 -0
  136. cartography/intel/gcp/storage.py +75 -159
  137. cartography/intel/github/__init__.py +62 -25
  138. cartography/intel/github/commits.py +423 -0
  139. cartography/intel/github/repos.py +463 -85
  140. cartography/intel/github/teams.py +3 -3
  141. cartography/intel/github/users.py +5 -0
  142. cartography/intel/github/util.py +12 -0
  143. cartography/intel/googleworkspace/__init__.py +193 -0
  144. cartography/intel/googleworkspace/devices.py +254 -0
  145. cartography/intel/googleworkspace/groups.py +568 -0
  146. cartography/intel/googleworkspace/oauth_apps.py +259 -0
  147. cartography/intel/googleworkspace/tenant.py +85 -0
  148. cartography/intel/googleworkspace/users.py +138 -0
  149. cartography/intel/gsuite/__init__.py +17 -9
  150. cartography/intel/gsuite/groups.py +291 -0
  151. cartography/intel/gsuite/users.py +142 -0
  152. cartography/intel/jamf/computers.py +7 -1
  153. cartography/intel/keycloak/__init__.py +153 -0
  154. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  155. cartography/intel/keycloak/authenticationflows.py +77 -0
  156. cartography/intel/keycloak/clients.py +187 -0
  157. cartography/intel/keycloak/groups.py +126 -0
  158. cartography/intel/keycloak/identityproviders.py +94 -0
  159. cartography/intel/keycloak/organizations.py +163 -0
  160. cartography/intel/keycloak/realms.py +61 -0
  161. cartography/intel/keycloak/roles.py +202 -0
  162. cartography/intel/keycloak/scopes.py +73 -0
  163. cartography/intel/keycloak/users.py +70 -0
  164. cartography/intel/keycloak/util.py +47 -0
  165. cartography/intel/kubernetes/__init__.py +60 -14
  166. cartography/intel/kubernetes/clusters.py +86 -0
  167. cartography/intel/kubernetes/eks.py +402 -0
  168. cartography/intel/kubernetes/namespaces.py +59 -57
  169. cartography/intel/kubernetes/pods.py +168 -75
  170. cartography/intel/kubernetes/rbac.py +597 -0
  171. cartography/intel/kubernetes/secrets.py +95 -45
  172. cartography/intel/kubernetes/services.py +131 -67
  173. cartography/intel/kubernetes/util.py +142 -14
  174. cartography/intel/oci/iam.py +23 -9
  175. cartography/intel/oci/organizations.py +3 -1
  176. cartography/intel/oci/utils.py +28 -5
  177. cartography/intel/okta/applications.py +15 -5
  178. cartography/intel/okta/awssaml.py +14 -10
  179. cartography/intel/okta/factors.py +3 -1
  180. cartography/intel/okta/groups.py +5 -2
  181. cartography/intel/okta/organization.py +3 -1
  182. cartography/intel/okta/origins.py +3 -1
  183. cartography/intel/okta/roles.py +5 -2
  184. cartography/intel/okta/users.py +10 -2
  185. cartography/intel/ontology/__init__.py +44 -0
  186. cartography/intel/ontology/devices.py +54 -0
  187. cartography/intel/ontology/users.py +54 -0
  188. cartography/intel/ontology/utils.py +176 -0
  189. cartography/intel/pagerduty/escalation_policies.py +13 -6
  190. cartography/intel/pagerduty/schedules.py +9 -4
  191. cartography/intel/pagerduty/services.py +7 -3
  192. cartography/intel/pagerduty/teams.py +5 -2
  193. cartography/intel/pagerduty/users.py +3 -1
  194. cartography/intel/pagerduty/vendors.py +3 -1
  195. cartography/intel/scaleway/__init__.py +127 -0
  196. cartography/intel/scaleway/iam/__init__.py +0 -0
  197. cartography/intel/scaleway/iam/apikeys.py +71 -0
  198. cartography/intel/scaleway/iam/applications.py +71 -0
  199. cartography/intel/scaleway/iam/groups.py +71 -0
  200. cartography/intel/scaleway/iam/users.py +71 -0
  201. cartography/intel/scaleway/instances/__init__.py +0 -0
  202. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  203. cartography/intel/scaleway/instances/instances.py +92 -0
  204. cartography/intel/scaleway/projects.py +79 -0
  205. cartography/intel/scaleway/storage/__init__.py +0 -0
  206. cartography/intel/scaleway/storage/snapshots.py +86 -0
  207. cartography/intel/scaleway/storage/volumes.py +84 -0
  208. cartography/intel/scaleway/utils.py +37 -0
  209. cartography/intel/sentinelone/__init__.py +75 -0
  210. cartography/intel/sentinelone/account.py +140 -0
  211. cartography/intel/sentinelone/agent.py +139 -0
  212. cartography/intel/sentinelone/api.py +124 -0
  213. cartography/intel/sentinelone/application.py +248 -0
  214. cartography/intel/sentinelone/cve.py +119 -0
  215. cartography/intel/sentinelone/utils.py +28 -0
  216. cartography/intel/slack/__init__.py +78 -0
  217. cartography/intel/slack/channels.py +80 -0
  218. cartography/intel/slack/groups.py +90 -0
  219. cartography/intel/slack/teams.py +65 -0
  220. cartography/intel/slack/users.py +57 -0
  221. cartography/intel/slack/utils.py +29 -0
  222. cartography/intel/spacelift/__init__.py +161 -0
  223. cartography/intel/spacelift/account.py +73 -0
  224. cartography/intel/spacelift/ec2_ownership.py +280 -0
  225. cartography/intel/spacelift/runs.py +463 -0
  226. cartography/intel/spacelift/spaces.py +112 -0
  227. cartography/intel/spacelift/stacks.py +119 -0
  228. cartography/intel/spacelift/util.py +122 -0
  229. cartography/intel/spacelift/workerpools.py +131 -0
  230. cartography/intel/spacelift/workers.py +128 -0
  231. cartography/intel/trivy/__init__.py +272 -0
  232. cartography/intel/trivy/scanner.py +386 -0
  233. cartography/models/airbyte/__init__.py +0 -0
  234. cartography/models/airbyte/connection.py +138 -0
  235. cartography/models/airbyte/destination.py +75 -0
  236. cartography/models/airbyte/organization.py +19 -0
  237. cartography/models/airbyte/source.py +75 -0
  238. cartography/models/airbyte/stream.py +74 -0
  239. cartography/models/airbyte/tag.py +69 -0
  240. cartography/models/airbyte/user.py +115 -0
  241. cartography/models/airbyte/workspace.py +46 -0
  242. cartography/models/anthropic/apikey.py +4 -0
  243. cartography/models/anthropic/user.py +4 -0
  244. cartography/models/aws/acm/__init__.py +0 -0
  245. cartography/models/aws/acm/certificate.py +75 -0
  246. cartography/models/aws/apigateway/__init__.py +0 -0
  247. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  248. cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
  249. cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
  250. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  251. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  252. cartography/models/aws/cloudtrail/management_events.py +153 -0
  253. cartography/models/aws/cloudtrail/trail.py +45 -0
  254. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  255. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  256. cartography/models/aws/codebuild/__init__.py +0 -0
  257. cartography/models/aws/codebuild/project.py +49 -0
  258. cartography/models/aws/cognito/__init__.py +0 -0
  259. cartography/models/aws/cognito/identity_pool.py +70 -0
  260. cartography/models/aws/cognito/user_pool.py +47 -0
  261. cartography/models/aws/dynamodb/tables.py +2 -0
  262. cartography/models/aws/ec2/instances.py +25 -1
  263. cartography/models/aws/ec2/networkinterfaces.py +4 -0
  264. cartography/models/aws/ec2/security_group_rules.py +109 -0
  265. cartography/models/aws/ec2/security_groups.py +90 -0
  266. cartography/models/aws/ec2/snapshots.py +58 -0
  267. cartography/models/aws/ec2/subnet_instance.py +2 -0
  268. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  269. cartography/models/aws/ec2/subnets.py +65 -0
  270. cartography/models/aws/ec2/volumes.py +20 -0
  271. cartography/models/aws/ec2/vpc.py +46 -0
  272. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  273. cartography/models/aws/ec2/vpc_peering.py +157 -0
  274. cartography/models/aws/ecr/__init__.py +0 -0
  275. cartography/models/aws/ecr/image.py +146 -0
  276. cartography/models/aws/ecr/image_layer.py +107 -0
  277. cartography/models/aws/ecr/repository.py +72 -0
  278. cartography/models/aws/ecr/repository_image.py +95 -0
  279. cartography/models/aws/ecs/__init__.py +0 -0
  280. cartography/models/aws/ecs/clusters.py +64 -0
  281. cartography/models/aws/ecs/container_definitions.py +93 -0
  282. cartography/models/aws/ecs/container_instances.py +84 -0
  283. cartography/models/aws/ecs/containers.py +101 -0
  284. cartography/models/aws/ecs/services.py +134 -0
  285. cartography/models/aws/ecs/task_definitions.py +135 -0
  286. cartography/models/aws/ecs/tasks.py +134 -0
  287. cartography/models/aws/efs/access_point.py +77 -0
  288. cartography/models/aws/efs/file_system.py +60 -0
  289. cartography/models/aws/efs/mount_target.py +29 -2
  290. cartography/models/aws/elasticache/__init__.py +0 -0
  291. cartography/models/aws/elasticache/cluster.py +65 -0
  292. cartography/models/aws/elasticache/topic.py +67 -0
  293. cartography/models/aws/eventbridge/__init__.py +0 -0
  294. cartography/models/aws/eventbridge/rule.py +77 -0
  295. cartography/models/aws/eventbridge/target.py +71 -0
  296. cartography/models/aws/glue/__init__.py +0 -0
  297. cartography/models/aws/glue/connection.py +51 -0
  298. cartography/models/aws/glue/job.py +69 -0
  299. cartography/models/aws/guardduty/__init__.py +1 -0
  300. cartography/models/aws/guardduty/detectors.py +50 -0
  301. cartography/models/aws/guardduty/findings.py +121 -0
  302. cartography/models/aws/iam/access_key.py +103 -0
  303. cartography/models/aws/iam/account_role.py +24 -0
  304. cartography/models/aws/iam/federated_principal.py +60 -0
  305. cartography/models/aws/iam/group.py +60 -0
  306. cartography/models/aws/iam/group_membership.py +27 -0
  307. cartography/models/aws/iam/inline_policy.py +78 -0
  308. cartography/models/aws/iam/managed_policy.py +51 -0
  309. cartography/models/aws/iam/policy_statement.py +57 -0
  310. cartography/models/aws/iam/role.py +83 -0
  311. cartography/models/aws/iam/root_principal.py +52 -0
  312. cartography/models/aws/iam/service_principal.py +30 -0
  313. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  314. cartography/models/aws/iam/user.py +59 -0
  315. cartography/models/aws/identitycenter/awsidentitycenter.py +1 -0
  316. cartography/models/aws/identitycenter/awspermissionset.py +70 -0
  317. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  318. cartography/models/aws/identitycenter/awsssouser.py +49 -9
  319. cartography/models/aws/inspector/findings.py +37 -0
  320. cartography/models/aws/inspector/packages.py +1 -31
  321. cartography/models/aws/kms/__init__.py +0 -0
  322. cartography/models/aws/kms/aliases.py +86 -0
  323. cartography/models/aws/kms/grants.py +65 -0
  324. cartography/models/aws/kms/keys.py +88 -0
  325. cartography/models/aws/lambda_function/__init__.py +0 -0
  326. cartography/models/aws/lambda_function/alias.py +74 -0
  327. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  328. cartography/models/aws/lambda_function/lambda_function.py +91 -0
  329. cartography/models/aws/lambda_function/layer.py +72 -0
  330. cartography/models/aws/rds/__init__.py +0 -0
  331. cartography/models/aws/rds/cluster.py +91 -0
  332. cartography/models/aws/rds/event_subscription.py +146 -0
  333. cartography/models/aws/rds/instance.py +156 -0
  334. cartography/models/aws/rds/snapshot.py +108 -0
  335. cartography/models/aws/rds/subnet_group.py +101 -0
  336. cartography/models/aws/route53/__init__.py +0 -0
  337. cartography/models/aws/route53/dnsrecord.py +235 -0
  338. cartography/models/aws/route53/nameserver.py +63 -0
  339. cartography/models/aws/route53/subzone.py +40 -0
  340. cartography/models/aws/route53/zone.py +47 -0
  341. cartography/models/aws/s3/notification.py +24 -0
  342. cartography/models/aws/secretsmanager/secret.py +106 -0
  343. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  344. cartography/models/aws/sns/topic_subscription.py +74 -0
  345. cartography/models/aws/sqs/__init__.py +0 -0
  346. cartography/models/aws/sqs/queue.py +89 -0
  347. cartography/models/azure/__init__.py +0 -0
  348. cartography/models/azure/aks_cluster.py +54 -0
  349. cartography/models/azure/aks_nodepool.py +54 -0
  350. cartography/models/azure/app_service.py +59 -0
  351. cartography/models/azure/container_instance.py +57 -0
  352. cartography/models/azure/cosmosdb/__init__.py +0 -0
  353. cartography/models/azure/cosmosdb/account.py +77 -0
  354. cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
  355. cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
  356. cartography/models/azure/cosmosdb/cassandratable.py +81 -0
  357. cartography/models/azure/cosmosdb/corspolicy.py +74 -0
  358. cartography/models/azure/cosmosdb/dblocation.py +120 -0
  359. cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
  360. cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
  361. cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
  362. cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
  363. cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
  364. cartography/models/azure/cosmosdb/tableresource.py +76 -0
  365. cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
  366. cartography/models/azure/data_factory/__init__.py +0 -0
  367. cartography/models/azure/data_factory/data_factory.py +51 -0
  368. cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
  369. cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
  370. cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
  371. cartography/models/azure/data_lake_filesystem.py +51 -0
  372. cartography/models/azure/event_grid_topic.py +57 -0
  373. cartography/models/azure/function_app.py +59 -0
  374. cartography/models/azure/load_balancer/__init__.py +0 -0
  375. cartography/models/azure/load_balancer/load_balancer.py +49 -0
  376. cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
  377. cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
  378. cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
  379. cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
  380. cartography/models/azure/logic_apps.py +56 -0
  381. cartography/models/azure/monitor.py +54 -0
  382. cartography/models/azure/network_interface.py +112 -0
  383. cartography/models/azure/network_security_group.py +50 -0
  384. cartography/models/azure/permission_relationships.py +60 -0
  385. cartography/models/azure/principal.py +41 -0
  386. cartography/models/azure/public_ip_address.py +50 -0
  387. cartography/models/azure/rbac.py +268 -0
  388. cartography/models/azure/resource_groups.py +52 -0
  389. cartography/models/azure/security_center.py +50 -0
  390. cartography/models/azure/sql/__init__.py +0 -0
  391. cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
  392. cartography/models/azure/sql/elasticpool.py +77 -0
  393. cartography/models/azure/sql/failovergroup.py +73 -0
  394. cartography/models/azure/sql/recoverabledatabase.py +75 -0
  395. cartography/models/azure/sql/replicationlink.py +81 -0
  396. cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
  397. cartography/models/azure/sql/restorepoint.py +74 -0
  398. cartography/models/azure/sql/serveradadministrator.py +74 -0
  399. cartography/models/azure/sql/serverdnsalias.py +71 -0
  400. cartography/models/azure/sql/sqldatabase.py +85 -0
  401. cartography/models/azure/sql/sqlserver.py +50 -0
  402. cartography/models/azure/sql/transparentdataencryption.py +76 -0
  403. cartography/models/azure/storage/__init__.py +0 -0
  404. cartography/models/azure/storage/account.py +59 -0
  405. cartography/models/azure/storage/blobcontainer.py +85 -0
  406. cartography/models/azure/storage/blobservice.py +71 -0
  407. cartography/models/azure/storage/fileservice.py +71 -0
  408. cartography/models/azure/storage/fileshare.py +82 -0
  409. cartography/models/azure/storage/queue.py +71 -0
  410. cartography/models/azure/storage/queueservice.py +73 -0
  411. cartography/models/azure/storage/table.py +72 -0
  412. cartography/models/azure/storage/tableservice.py +73 -0
  413. cartography/models/azure/subnet.py +101 -0
  414. cartography/models/azure/subscription.py +47 -0
  415. cartography/models/azure/tags/__init__.py +0 -0
  416. cartography/models/azure/tags/storage_tag.py +40 -0
  417. cartography/models/azure/tags/tag.py +37 -0
  418. cartography/models/azure/tenant.py +17 -0
  419. cartography/models/azure/virtual_network.py +49 -0
  420. cartography/models/azure/vm/__init__.py +0 -0
  421. cartography/models/azure/vm/datadisk.py +80 -0
  422. cartography/models/azure/vm/disk.py +55 -0
  423. cartography/models/azure/vm/snapshot.py +56 -0
  424. cartography/models/azure/vm/virtualmachine.py +59 -0
  425. cartography/models/bigfix/bigfix_computer.py +1 -1
  426. cartography/models/cloudflare/member.py +4 -0
  427. cartography/models/core/common.py +1 -0
  428. cartography/models/core/nodes.py +15 -2
  429. cartography/models/core/relationships.py +44 -0
  430. cartography/models/crowdstrike/hosts.py +1 -1
  431. cartography/models/digitalocean/droplet.py +2 -0
  432. cartography/models/duo/endpoint.py +1 -1
  433. cartography/models/duo/phone.py +2 -2
  434. cartography/models/duo/user.py +4 -0
  435. cartography/models/entra/app_role_assignment.py +115 -0
  436. cartography/models/entra/application.py +49 -0
  437. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  438. cartography/models/entra/group.py +117 -0
  439. cartography/models/entra/service_principal.py +104 -0
  440. cartography/models/entra/user.py +42 -51
  441. cartography/models/gcp/__init__.py +0 -0
  442. cartography/models/gcp/bigtable/__init__.py +0 -0
  443. cartography/models/gcp/bigtable/app_profile.py +94 -0
  444. cartography/models/gcp/bigtable/backup.py +91 -0
  445. cartography/models/gcp/bigtable/cluster.py +73 -0
  446. cartography/models/gcp/bigtable/instance.py +52 -0
  447. cartography/models/gcp/bigtable/table.py +69 -0
  448. cartography/models/gcp/compute/__init__.py +0 -0
  449. cartography/models/gcp/compute/subnet.py +74 -0
  450. cartography/models/gcp/compute/vpc.py +50 -0
  451. cartography/models/gcp/crm/__init__.py +0 -0
  452. cartography/models/gcp/crm/folders.py +98 -0
  453. cartography/models/gcp/crm/organizations.py +21 -0
  454. cartography/models/gcp/crm/projects.py +100 -0
  455. cartography/models/gcp/dns.py +109 -0
  456. cartography/models/gcp/gke.py +69 -0
  457. cartography/models/gcp/iam.py +3 -0
  458. cartography/models/gcp/permission_relationships.py +61 -0
  459. cartography/models/gcp/policy_bindings.py +93 -0
  460. cartography/models/gcp/storage/__init__.py +0 -0
  461. cartography/models/gcp/storage/bucket.py +119 -0
  462. cartography/models/github/commits.py +63 -0
  463. cartography/models/github/dependencies.py +73 -0
  464. cartography/models/github/manifests.py +49 -0
  465. cartography/models/github/users.py +10 -0
  466. cartography/models/googleworkspace/__init__.py +0 -0
  467. cartography/models/googleworkspace/device.py +132 -0
  468. cartography/models/googleworkspace/group.py +382 -0
  469. cartography/models/googleworkspace/oauth_app.py +124 -0
  470. cartography/models/googleworkspace/tenant.py +30 -0
  471. cartography/models/googleworkspace/user.py +113 -0
  472. cartography/models/gsuite/__init__.py +0 -0
  473. cartography/models/gsuite/group.py +218 -0
  474. cartography/models/gsuite/tenant.py +29 -0
  475. cartography/models/gsuite/user.py +107 -0
  476. cartography/models/kandji/device.py +1 -2
  477. cartography/models/keycloak/__init__.py +0 -0
  478. cartography/models/keycloak/authenticationexecution.py +160 -0
  479. cartography/models/keycloak/authenticationflow.py +54 -0
  480. cartography/models/keycloak/client.py +179 -0
  481. cartography/models/keycloak/group.py +101 -0
  482. cartography/models/keycloak/identityprovider.py +89 -0
  483. cartography/models/keycloak/organization.py +116 -0
  484. cartography/models/keycloak/organizationdomain.py +73 -0
  485. cartography/models/keycloak/realm.py +173 -0
  486. cartography/models/keycloak/role.py +126 -0
  487. cartography/models/keycloak/scope.py +73 -0
  488. cartography/models/keycloak/user.py +55 -0
  489. cartography/models/kubernetes/__init__.py +0 -0
  490. cartography/models/kubernetes/clusterrolebindings.py +138 -0
  491. cartography/models/kubernetes/clusterroles.py +52 -0
  492. cartography/models/kubernetes/clusters.py +26 -0
  493. cartography/models/kubernetes/containers.py +133 -0
  494. cartography/models/kubernetes/groups.py +107 -0
  495. cartography/models/kubernetes/namespaces.py +51 -0
  496. cartography/models/kubernetes/oidc.py +51 -0
  497. cartography/models/kubernetes/pods.py +80 -0
  498. cartography/models/kubernetes/rolebindings.py +159 -0
  499. cartography/models/kubernetes/roles.py +76 -0
  500. cartography/models/kubernetes/secrets.py +79 -0
  501. cartography/models/kubernetes/serviceaccounts.py +77 -0
  502. cartography/models/kubernetes/services.py +108 -0
  503. cartography/models/kubernetes/users.py +105 -0
  504. cartography/models/lastpass/user.py +4 -0
  505. cartography/models/ontology/__init__.py +0 -0
  506. cartography/models/ontology/device.py +137 -0
  507. cartography/models/ontology/mapping/__init__.py +76 -0
  508. cartography/models/ontology/mapping/data/__init__.py +0 -0
  509. cartography/models/ontology/mapping/data/apikeys.py +93 -0
  510. cartography/models/ontology/mapping/data/computeinstance.py +95 -0
  511. cartography/models/ontology/mapping/data/containers.py +88 -0
  512. cartography/models/ontology/mapping/data/databases.py +182 -0
  513. cartography/models/ontology/mapping/data/devices.py +194 -0
  514. cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
  515. cartography/models/ontology/mapping/data/useraccounts.py +416 -0
  516. cartography/models/ontology/mapping/data/users.py +63 -0
  517. cartography/models/ontology/mapping/specs.py +85 -0
  518. cartography/models/ontology/user.py +51 -0
  519. cartography/models/openai/adminapikey.py +4 -0
  520. cartography/models/openai/apikey.py +4 -0
  521. cartography/models/openai/user.py +4 -0
  522. cartography/models/scaleway/__init__.py +0 -0
  523. cartography/models/scaleway/iam/__init__.py +0 -0
  524. cartography/models/scaleway/iam/apikey.py +100 -0
  525. cartography/models/scaleway/iam/application.py +52 -0
  526. cartography/models/scaleway/iam/group.py +95 -0
  527. cartography/models/scaleway/iam/user.py +64 -0
  528. cartography/models/scaleway/instance/__init__.py +0 -0
  529. cartography/models/scaleway/instance/flexibleip.py +52 -0
  530. cartography/models/scaleway/instance/instance.py +120 -0
  531. cartography/models/scaleway/organization.py +19 -0
  532. cartography/models/scaleway/project.py +48 -0
  533. cartography/models/scaleway/storage/__init__.py +0 -0
  534. cartography/models/scaleway/storage/snapshot.py +78 -0
  535. cartography/models/scaleway/storage/volume.py +51 -0
  536. cartography/models/sentinelone/__init__.py +1 -0
  537. cartography/models/sentinelone/account.py +40 -0
  538. cartography/models/sentinelone/agent.py +50 -0
  539. cartography/models/sentinelone/application.py +44 -0
  540. cartography/models/sentinelone/application_version.py +96 -0
  541. cartography/models/sentinelone/cve.py +73 -0
  542. cartography/models/slack/__init__.py +0 -0
  543. cartography/models/slack/channels.py +92 -0
  544. cartography/models/slack/group.py +129 -0
  545. cartography/models/slack/team.py +22 -0
  546. cartography/models/slack/user.py +62 -0
  547. cartography/models/snipeit/asset.py +2 -0
  548. cartography/models/snipeit/user.py +4 -0
  549. cartography/models/spacelift/__init__.py +0 -0
  550. cartography/models/spacelift/cloudtrailevent.py +120 -0
  551. cartography/models/spacelift/run.py +162 -0
  552. cartography/models/spacelift/space.py +131 -0
  553. cartography/models/spacelift/spaceliftaccount.py +31 -0
  554. cartography/models/spacelift/spaceliftgitcommit.py +157 -0
  555. cartography/models/spacelift/stack.py +96 -0
  556. cartography/models/spacelift/user.py +63 -0
  557. cartography/models/spacelift/worker.py +97 -0
  558. cartography/models/spacelift/workerpool.py +90 -0
  559. cartography/models/tailscale/device.py +2 -1
  560. cartography/models/tailscale/user.py +6 -1
  561. cartography/models/trivy/__init__.py +0 -0
  562. cartography/models/trivy/findings.py +66 -0
  563. cartography/models/trivy/fix.py +66 -0
  564. cartography/models/trivy/package.py +71 -0
  565. cartography/rules/README.md +1 -0
  566. cartography/rules/__init__.py +0 -0
  567. cartography/rules/cli.py +261 -0
  568. cartography/rules/data/__init__.py +0 -0
  569. cartography/rules/data/rules/__init__.py +46 -0
  570. cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
  571. cartography/rules/data/rules/compute_instance_exposed.py +51 -0
  572. cartography/rules/data/rules/database_instance_exposed.py +53 -0
  573. cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
  574. cartography/rules/data/rules/identity_administration_privileges.py +100 -0
  575. cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
  576. cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
  577. cartography/rules/data/rules/mfa_missing.py +46 -0
  578. cartography/rules/data/rules/object_storage_public.py +100 -0
  579. cartography/rules/data/rules/policy_administration_privileges.py +104 -0
  580. cartography/rules/data/rules/unmanaged_accounts.py +43 -0
  581. cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
  582. cartography/rules/formatters.py +108 -0
  583. cartography/rules/runners.py +216 -0
  584. cartography/rules/spec/__init__.py +0 -0
  585. cartography/rules/spec/model.py +267 -0
  586. cartography/rules/spec/result.py +38 -0
  587. cartography/sync.py +25 -5
  588. cartography/util.py +101 -31
  589. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/METADATA +61 -22
  590. cartography-0.123.0.dist-info/RECORD +856 -0
  591. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
  592. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  593. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  594. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  595. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  596. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  597. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  598. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  599. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  600. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  601. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  602. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  603. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  604. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  605. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  606. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  607. cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
  608. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  609. cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
  610. cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
  611. cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
  612. cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
  613. cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
  614. cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
  615. cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
  616. cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
  617. cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
  618. cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
  619. cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
  620. cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
  621. cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
  622. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  623. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  624. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  625. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  626. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  627. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  628. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  629. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  630. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  631. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  632. cartography/intel/gcp/crm.py +0 -355
  633. cartography/intel/gsuite/api.py +0 -342
  634. cartography-0.104.0rc2.dist-info/RECORD +0 -455
  635. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  636. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  637. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  638. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  639. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  640. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/WHEEL +0 -0
  641. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/licenses/LICENSE +0 -0
  642. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
1
  import configparser
2
2
  import logging
3
+ from collections import defaultdict
3
4
  from collections import namedtuple
4
5
  from string import Template
5
6
  from typing import Any
7
+ from typing import cast
6
8
  from typing import Dict
7
9
  from typing import List
8
10
  from typing import Optional
@@ -12,8 +14,13 @@ from packaging.requirements import InvalidRequirement
12
14
  from packaging.requirements import Requirement
13
15
  from packaging.utils import canonicalize_name
14
16
 
17
+ from cartography.client.core.tx import execute_write_with_retry
18
+ from cartography.client.core.tx import load as load_data
19
+ from cartography.graph.job import GraphJob
15
20
  from cartography.intel.github.util import fetch_all
16
21
  from cartography.intel.github.util import PaginatedGraphqlData
22
+ from cartography.models.github.dependencies import GitHubDependencySchema
23
+ from cartography.models.github.manifests import DependencyGraphManifestSchema
17
24
  from cartography.util import backoff_handler
18
25
  from cartography.util import retries_with_backoff
19
26
  from cartography.util import run_cleanup_job
@@ -36,12 +43,12 @@ UserAffiliationAndRepoPermission = namedtuple(
36
43
 
37
44
 
38
45
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
39
- query($login: String!, $cursor: String) {
46
+ query($login: String!, $cursor: String, $count: Int!) {
40
47
  organization(login: $login)
41
48
  {
42
49
  url
43
50
  login
44
- repositories(first: 50, after: $cursor){
51
+ repositories(first: $count, after: $cursor){
45
52
  pageInfo{
46
53
  endCursor
47
54
  hasNextPage
@@ -93,6 +100,18 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
93
100
  text
94
101
  }
95
102
  }
103
+ dependencyGraphManifests(first: 20) {
104
+ nodes {
105
+ blobPath
106
+ dependencies(first: 100) {
107
+ nodes {
108
+ packageName
109
+ requirements
110
+ packageManager
111
+ }
112
+ }
113
+ }
114
+ }
96
115
  }
97
116
  }
98
117
  }
@@ -140,25 +159,38 @@ def _get_repo_collaborators_inner_func(
140
159
  org: str,
141
160
  api_url: str,
142
161
  token: str,
143
- repo_raw_data: list[dict[str, Any]],
162
+ repo_raw_data: list[dict[str, Any] | None],
144
163
  affiliation: str,
145
- collab_users: list[dict[str, Any]],
146
- collab_permission: list[str],
147
164
  ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
148
165
  result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
149
166
 
150
167
  for repo in repo_raw_data:
168
+ # GitHub can return null repo entries. See issues #1334 and #1404.
169
+ if repo is None:
170
+ logger.info(
171
+ "Skipping null repository entry while fetching %s collaborators.",
172
+ affiliation,
173
+ )
174
+ continue
151
175
  repo_name = repo["name"]
152
176
  repo_url = repo["url"]
153
177
 
154
- if (
155
- affiliation == "OUTSIDE" and repo["outsideCollaborators"]["totalCount"] == 0
156
- ) or (
157
- affiliation == "DIRECT" and repo["directCollaborators"]["totalCount"] == 0
158
- ):
159
- # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
160
- result[repo_url] = []
161
- continue
178
+ # Guard against None when collaborator fields are not accessible due to permissions.
179
+ direct_info = repo.get("directCollaborators")
180
+ outside_info = repo.get("outsideCollaborators")
181
+
182
+ if affiliation == "OUTSIDE":
183
+ total_outside = 0 if not outside_info else outside_info.get("totalCount", 0)
184
+ if total_outside == 0:
185
+ # No outside collaborators or not permitted to view; skip API calls for this repo.
186
+ result[repo_url] = []
187
+ continue
188
+ else: # DIRECT
189
+ total_direct = 0 if not direct_info else direct_info.get("totalCount", 0)
190
+ if total_direct == 0:
191
+ # No direct collaborators or not permitted to view; skip API calls for this repo.
192
+ result[repo_url] = []
193
+ continue
162
194
 
163
195
  logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
164
196
  collaborators = _get_repo_collaborators(
@@ -169,6 +201,9 @@ def _get_repo_collaborators_inner_func(
169
201
  affiliation,
170
202
  )
171
203
 
204
+ collab_users: List[dict[str, Any]] = []
205
+ collab_permission: List[str] = []
206
+
172
207
  # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
173
208
  # however sometimes GitHub returns None, as in issue 1334 and 1404.
174
209
  for collab in collaborators.nodes or []:
@@ -186,7 +221,7 @@ def _get_repo_collaborators_inner_func(
186
221
 
187
222
 
188
223
  def _get_repo_collaborators_for_multiple_repos(
189
- repo_raw_data: list[dict[str, Any]],
224
+ repo_raw_data: list[dict[str, Any] | None],
190
225
  affiliation: str,
191
226
  org: str,
192
227
  api_url: str,
@@ -205,8 +240,6 @@ def _get_repo_collaborators_for_multiple_repos(
205
240
  logger.info(
206
241
  f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
207
242
  )
208
- collab_users: List[dict[str, Any]] = []
209
- collab_permission: List[str] = []
210
243
 
211
244
  result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
212
245
  _get_repo_collaborators_inner_func,
@@ -219,8 +252,6 @@ def _get_repo_collaborators_for_multiple_repos(
219
252
  token=token,
220
253
  repo_raw_data=repo_raw_data,
221
254
  affiliation=affiliation,
222
- collab_users=collab_users,
223
- collab_permission=collab_permission,
224
255
  )
225
256
  return result
226
257
 
@@ -257,7 +288,7 @@ def _get_repo_collaborators(
257
288
 
258
289
 
259
290
  @timeit
260
- def get(token: str, api_url: str, organization: str) -> List[Dict]:
291
+ def get(token: str, api_url: str, organization: str) -> List[Optional[Dict]]:
261
292
  """
262
293
  Retrieve a list of repos from a Github organization as described in
263
294
  https://docs.github.com/en/graphql/reference/objects#repository.
@@ -265,6 +296,8 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
265
296
  :param api_url: The Github v4 API endpoint as string.
266
297
  :param organization: The name of the target Github organization as string.
267
298
  :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
299
+ Note: The list may contain None entries per GraphQL spec when resolvers error
300
+ (permissions, rate limits, transient issues). See issues #1334 and #1404.
268
301
  """
269
302
  # TODO: link the Github organization to the repositories
270
303
  repos, _ = fetch_all(
@@ -273,12 +306,17 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
273
306
  organization,
274
307
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
275
308
  "repositories",
309
+ count=50,
276
310
  )
277
- return repos.nodes
311
+ # Cast is needed because GitHub's GraphQL RepositoryConnection.nodes is typed [Repository] (not [Repository!])
312
+ # per GraphQL spec, allowing null entries when resolvers error (permissions, rate limits, transient issues).
313
+ # See https://github.com/cartography-cncf/cartography/issues/1334
314
+ # and https://github.com/cartography-cncf/cartography/issues/1404
315
+ return cast(List[Optional[Dict]], repos.nodes)
278
316
 
279
317
 
280
318
  def transform(
281
- repos_json: List[Dict],
319
+ repos_json: List[Optional[Dict]],
282
320
  direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
283
321
  outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
284
322
  ) -> Dict:
@@ -291,8 +329,10 @@ def transform(
291
329
  :param outside_collaborators: dict of repo URL to list of outside collaborators.
292
330
  See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
293
331
  :return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
294
- mapping, and Python requirements files (if any) in a repo.
332
+ mapping, Python requirements files (if any) in a repo, manifests from GitHub's dependency graph, and all
333
+ dependencies from GitHub's dependency graph.
295
334
  """
335
+ logger.info(f"Processing {len(repos_json)} GitHub repositories")
296
336
  transformed_repo_list: List[Dict] = []
297
337
  transformed_repo_languages: List[Dict] = []
298
338
  transformed_repo_owners: List[Dict] = []
@@ -312,7 +352,13 @@ def transform(
312
352
  "WRITE": [],
313
353
  }
314
354
  transformed_requirements_files: List[Dict] = []
355
+ transformed_dependencies: List[Dict] = []
356
+ transformed_manifests: List[Dict] = []
315
357
  for repo_object in repos_json:
358
+ # GitHub can return null repo entries. See issues #1334 and #1404.
359
+ if repo_object is None:
360
+ logger.debug("Skipping null repository entry during transformation.")
361
+ continue
316
362
  _transform_repo_languages(
317
363
  repo_object["url"],
318
364
  repo_object,
@@ -350,6 +396,16 @@ def transform(
350
396
  repo_url,
351
397
  transformed_requirements_files,
352
398
  )
399
+ _transform_dependency_manifests(
400
+ repo_object.get("dependencyGraphManifests"),
401
+ repo_url,
402
+ transformed_manifests,
403
+ )
404
+ _transform_dependency_graph(
405
+ repo_object.get("dependencyGraphManifests"),
406
+ repo_url,
407
+ transformed_dependencies,
408
+ )
353
409
  results = {
354
410
  "repos": transformed_repo_list,
355
411
  "repo_languages": transformed_repo_languages,
@@ -357,7 +413,10 @@ def transform(
357
413
  "repo_outside_collaborators": transformed_outside_collaborators,
358
414
  "repo_direct_collaborators": transformed_direct_collaborators,
359
415
  "python_requirements": transformed_requirements_files,
416
+ "dependencies": transformed_dependencies,
417
+ "manifests": transformed_manifests,
360
418
  }
419
+
361
420
  return results
362
421
 
363
422
 
@@ -371,9 +430,16 @@ def _create_default_branch_id(repo_url: str, default_branch_ref_id: str) -> str:
371
430
 
372
431
  def _create_git_url_from_ssh_url(ssh_url: str) -> str:
373
432
  """
374
- Return a git:// URL from the given ssh_url
433
+ Convert SSH URL to git:// URL.
434
+ Example:
435
+ git@github.com:cartography-cncf/cartography.git
436
+ -> git://github.com/cartography-cncf/cartography.git
375
437
  """
376
- return ssh_url.replace("/", ":").replace("git@", "git://")
438
+ # Remove the user part (e.g., "git@")
439
+ _, host_and_path = ssh_url.split("@", 1)
440
+ # Replace first ':' (separating host and repo) with '/'
441
+ host, path = host_and_path.split(":", 1)
442
+ return f"git://{host}/{path}"
377
443
 
378
444
 
379
445
  def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict]) -> None:
@@ -533,6 +599,153 @@ def _transform_setup_cfg_requirements(
533
599
  _transform_python_requirements(requirements_list, repo_url, out_requirements_files)
534
600
 
535
601
 
602
+ def _transform_dependency_manifests(
603
+ dependency_manifests: Optional[Dict],
604
+ repo_url: str,
605
+ out_manifests_list: List[Dict],
606
+ ) -> None:
607
+ """
608
+ Transform GitHub dependency graph manifests into cartography manifest format.
609
+ :param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
610
+ :param repo_url: The URL of the GitHub repo
611
+ :param out_manifests_list: Output array to append transformed results to
612
+ :return: Nothing
613
+ """
614
+ if not dependency_manifests or not dependency_manifests.get("nodes"):
615
+ return
616
+
617
+ manifests_added = 0
618
+
619
+ for manifest in dependency_manifests["nodes"]:
620
+ blob_path = manifest.get("blobPath", "")
621
+ if not blob_path:
622
+ continue
623
+
624
+ # Count dependencies in this manifest
625
+ dependencies = manifest.get("dependencies", {})
626
+ dependencies_count = len(dependencies.get("nodes", []) if dependencies else [])
627
+
628
+ # Create unique manifest ID by combining repo URL and blob path
629
+ manifest_id = f"{repo_url}#{blob_path}"
630
+
631
+ # Extract filename from blob path
632
+ filename = blob_path.split("/")[-1] if blob_path else "None"
633
+
634
+ out_manifests_list.append(
635
+ {
636
+ "id": manifest_id,
637
+ "blob_path": blob_path,
638
+ "filename": filename,
639
+ "dependencies_count": dependencies_count,
640
+ "repo_url": repo_url,
641
+ }
642
+ )
643
+ manifests_added += 1
644
+
645
+ if manifests_added > 0:
646
+ repo_name = repo_url.split("/")[-1] if repo_url else "repository"
647
+ logger.info(f"Found {manifests_added} dependency manifests in {repo_name}")
648
+
649
+
650
+ def _transform_dependency_graph(
651
+ dependency_manifests: Optional[Dict],
652
+ repo_url: str,
653
+ out_dependencies_list: List[Dict],
654
+ ) -> None:
655
+ """
656
+ Transform GitHub dependency graph manifests into cartography dependency format.
657
+ :param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
658
+ :param repo_url: The URL of the GitHub repo
659
+ :param out_dependencies_list: Output array to append transformed results to
660
+ :return: Nothing
661
+ """
662
+ if not dependency_manifests or not dependency_manifests.get("nodes"):
663
+ return
664
+
665
+ dependencies_added = 0
666
+
667
+ for manifest in dependency_manifests["nodes"]:
668
+ dependencies = manifest.get("dependencies", {})
669
+ if not dependencies or not dependencies.get("nodes"):
670
+ continue
671
+
672
+ manifest_path = manifest.get("blobPath", "")
673
+
674
+ for dep in dependencies["nodes"]:
675
+ package_name = dep.get("packageName")
676
+ if not package_name:
677
+ continue
678
+
679
+ requirements = dep.get("requirements", "")
680
+ package_manager = dep.get("packageManager", "").upper()
681
+
682
+ # Create ecosystem-specific canonical name
683
+ canonical_name = _canonicalize_dependency_name(
684
+ package_name, package_manager
685
+ )
686
+
687
+ # Create ecosystem identifier
688
+ ecosystem = package_manager.lower() if package_manager else "unknown"
689
+
690
+ # Create simple dependency ID using canonical name and requirements
691
+ # This allows the same dependency to be shared across multiple repos
692
+ requirements_for_id = (requirements or "").strip()
693
+ dependency_id = (
694
+ f"{canonical_name}|{requirements_for_id}"
695
+ if requirements_for_id
696
+ else canonical_name
697
+ )
698
+
699
+ # Normalize requirements field (prefer None over empty string)
700
+ normalized_requirements = requirements if requirements else None
701
+
702
+ # Create manifest ID for the HAS_DEP relationship
703
+ manifest_id = f"{repo_url}#{manifest_path}"
704
+
705
+ out_dependencies_list.append(
706
+ {
707
+ "id": dependency_id,
708
+ "name": canonical_name,
709
+ "original_name": package_name, # Keep original for reference
710
+ "requirements": normalized_requirements,
711
+ "ecosystem": ecosystem,
712
+ "package_manager": package_manager,
713
+ "manifest_path": manifest_path,
714
+ "manifest_id": manifest_id,
715
+ "repo_url": repo_url,
716
+ "manifest_file": (
717
+ manifest_path.split("/")[-1] if manifest_path else ""
718
+ ),
719
+ }
720
+ )
721
+ dependencies_added += 1
722
+
723
+ if dependencies_added > 0:
724
+ repo_name = repo_url.split("/")[-1] if repo_url else "repository"
725
+ logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
726
+
727
+
728
+ def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
729
+ """
730
+ Canonicalize dependency names based on ecosystem conventions.
731
+ """
732
+ if not name:
733
+ return name
734
+
735
+ # For Python packages, use existing canonicalization
736
+ if package_manager in ["PIP", "CONDA"]:
737
+ try:
738
+ from packaging.utils import canonicalize_name
739
+
740
+ return str(canonicalize_name(name))
741
+ except ImportError:
742
+ # Fallback if packaging not available
743
+ return name.lower().replace("_", "-")
744
+
745
+ # For other ecosystems, use lowercase
746
+ return name.lower()
747
+
748
+
536
749
  def _transform_python_requirements(
537
750
  requirements_list: List[str],
538
751
  repo_url: str,
@@ -667,11 +880,15 @@ def load_github_repos(
667
880
  ON CREATE SET r.firstseen = timestamp()
668
881
  SET r.lastupdated = r.UpdateTag
669
882
  """
670
- neo4j_session.run(
671
- ingest_repo,
672
- RepoData=repo_data,
673
- UpdateTag=update_tag,
674
- )
883
+
884
+ def _ingest_repos_tx(tx: neo4j.Transaction) -> None:
885
+ tx.run(
886
+ ingest_repo,
887
+ RepoData=repo_data,
888
+ UpdateTag=update_tag,
889
+ ).consume()
890
+
891
+ execute_write_with_retry(neo4j_session, _ingest_repos_tx)
675
892
 
676
893
 
677
894
  @timeit
@@ -701,11 +918,14 @@ def load_github_languages(
701
918
  ON CREATE SET r.firstseen = timestamp()
702
919
  SET r.lastupdated = $UpdateTag"""
703
920
 
704
- neo4j_session.run(
705
- ingest_languages,
706
- Languages=repo_languages,
707
- UpdateTag=update_tag,
708
- )
921
+ def _ingest_languages_tx(tx: neo4j.Transaction) -> None:
922
+ tx.run(
923
+ ingest_languages,
924
+ Languages=repo_languages,
925
+ UpdateTag=update_tag,
926
+ ).consume()
927
+
928
+ execute_write_with_retry(neo4j_session, _ingest_languages_tx)
709
929
 
710
930
 
711
931
  @timeit
@@ -721,31 +941,43 @@ def load_github_owners(
721
941
  :param repo_owners: list of owner to repo mappings
722
942
  :return: Nothing
723
943
  """
724
- for owner in repo_owners:
725
- ingest_owner_template = Template(
726
- """
727
- MERGE (user:$account_type{id: $Id})
728
- ON CREATE SET user.firstseen = timestamp()
729
- SET user.username = $UserName,
730
- user.lastupdated = $UpdateTag
731
- WITH user
732
-
733
- MATCH (repo:GitHubRepository{id: $RepoId})
734
- MERGE (user)<-[r:OWNER]-(repo)
735
- ON CREATE SET r.firstseen = timestamp()
736
- SET r.lastupdated = $UpdateTag""",
737
- )
944
+ ingest_owner_template = Template(
945
+ """
946
+ MERGE (user:$account_type{id: $Id})
947
+ ON CREATE SET user.firstseen = timestamp()
948
+ SET user.username = $UserName,
949
+ user.lastupdated = $UpdateTag
950
+ WITH user
951
+
952
+ MATCH (repo:GitHubRepository{id: $RepoId})
953
+ MERGE (user)<-[r:OWNER]-(repo)
954
+ ON CREATE SET r.firstseen = timestamp()
955
+ SET r.lastupdated = $UpdateTag""",
956
+ )
738
957
 
739
- account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
958
+ account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
740
959
 
741
- neo4j_session.run(
960
+ def _ingest_owner_tx(
961
+ tx: neo4j.Transaction,
962
+ owner_record: Dict,
963
+ owner_label: str,
964
+ ) -> None:
965
+ tx.run(
742
966
  ingest_owner_template.safe_substitute(
743
- account_type=account_type[owner["type"]],
967
+ account_type=owner_label,
744
968
  ),
745
- Id=owner["owner_id"],
746
- UserName=owner["owner"],
747
- RepoId=owner["repo_id"],
969
+ Id=owner_record["owner_id"],
970
+ UserName=owner_record["owner"],
971
+ RepoId=owner_record["repo_id"],
748
972
  UpdateTag=update_tag,
973
+ ).consume()
974
+
975
+ for owner in repo_owners:
976
+ execute_write_with_retry(
977
+ neo4j_session,
978
+ _ingest_owner_tx,
979
+ owner,
980
+ account_type[owner["type"]],
749
981
  )
750
982
 
751
983
 
@@ -776,12 +1008,159 @@ def load_collaborators(
776
1008
  SET o.lastupdated = $UpdateTag
777
1009
  """,
778
1010
  )
779
- for collab_type in collaborators.keys():
780
- relationship_label = f"{affiliation}_COLLAB_{collab_type}"
781
- neo4j_session.run(
1011
+
1012
+ def _ingest_collaborators_tx(
1013
+ tx: neo4j.Transaction,
1014
+ relationship_label: str,
1015
+ collaborator_data: List[Dict],
1016
+ ) -> None:
1017
+ tx.run(
782
1018
  query.safe_substitute(rel_label=relationship_label),
783
- UserData=collaborators[collab_type],
1019
+ UserData=collaborator_data,
784
1020
  UpdateTag=update_tag,
1021
+ ).consume()
1022
+
1023
+ for collab_type, collab_data in collaborators.items():
1024
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
1025
+ execute_write_with_retry(
1026
+ neo4j_session,
1027
+ _ingest_collaborators_tx,
1028
+ relationship_label,
1029
+ collab_data,
1030
+ )
1031
+
1032
+
1033
+ @timeit
1034
+ def load_python_requirements(
1035
+ neo4j_session: neo4j.Session,
1036
+ update_tag: int,
1037
+ requirements_objects: List[Dict],
1038
+ ) -> None:
1039
+ query = """
1040
+ UNWIND $Requirements AS req
1041
+ MERGE (lib:PythonLibrary:Dependency{id: req.id})
1042
+ ON CREATE SET lib.firstseen = timestamp(),
1043
+ lib.name = req.name
1044
+ SET lib.lastupdated = $UpdateTag,
1045
+ lib.version = req.version
1046
+
1047
+ WITH lib, req
1048
+ MATCH (repo:GitHubRepository{id: req.repo_url})
1049
+ MERGE (repo)-[r:REQUIRES]->(lib)
1050
+ ON CREATE SET r.firstseen = timestamp()
1051
+ SET r.lastupdated = $UpdateTag,
1052
+ r.specifier = req.specifier
1053
+ """
1054
+
1055
+ def _ingest_requirements_tx(tx: neo4j.Transaction) -> None:
1056
+ tx.run(
1057
+ query,
1058
+ Requirements=requirements_objects,
1059
+ UpdateTag=update_tag,
1060
+ ).consume()
1061
+
1062
+ execute_write_with_retry(neo4j_session, _ingest_requirements_tx)
1063
+
1064
+
1065
+ @timeit
1066
+ def load_github_dependencies(
1067
+ neo4j_session: neo4j.Session,
1068
+ update_tag: int,
1069
+ dependencies: List[Dict],
1070
+ ) -> None:
1071
+ """
1072
+ Ingest GitHub dependency data into Neo4j using the new data model
1073
+ :param neo4j_session: Neo4J session object for server communication
1074
+ :param update_tag: Timestamp used to determine data freshness
1075
+ :param dependencies: List of dependency objects from GitHub's dependency graph
1076
+ :return: Nothing
1077
+ """
1078
+ # Group dependencies by both repo_url and manifest_id for schema-based loading
1079
+ dependencies_by_repo_and_manifest = defaultdict(list)
1080
+
1081
+ for dep in dependencies:
1082
+ repo_url = dep["repo_url"]
1083
+ manifest_id = dep["manifest_id"]
1084
+ # Create a key combining both repo_url and manifest_id
1085
+ group_key = (repo_url, manifest_id)
1086
+ # Remove repo_url and manifest_id from the dependency object since we'll pass them as kwargs
1087
+ dep_without_kwargs = {
1088
+ k: v for k, v in dep.items() if k not in ["repo_url", "manifest_id"]
1089
+ }
1090
+ dependencies_by_repo_and_manifest[group_key].append(dep_without_kwargs)
1091
+
1092
+ # Load dependencies for each repository/manifest combination separately
1093
+ for (
1094
+ repo_url,
1095
+ manifest_id,
1096
+ ), group_dependencies in dependencies_by_repo_and_manifest.items():
1097
+ load_data(
1098
+ neo4j_session,
1099
+ GitHubDependencySchema(),
1100
+ group_dependencies,
1101
+ lastupdated=update_tag,
1102
+ repo_url=repo_url,
1103
+ manifest_id=manifest_id,
1104
+ )
1105
+
1106
+
1107
+ @timeit
1108
+ def load_github_dependency_manifests(
1109
+ neo4j_session: neo4j.Session,
1110
+ update_tag: int,
1111
+ manifests: List[Dict],
1112
+ ) -> None:
1113
+ """
1114
+ Ingest GitHub dependency manifests into Neo4j
1115
+ """
1116
+ manifests_by_repo = defaultdict(list)
1117
+
1118
+ for manifest in manifests:
1119
+ repo_url = manifest["repo_url"]
1120
+ manifests_by_repo[repo_url].append(manifest)
1121
+
1122
+ # Load manifests for each repository separately
1123
+ for repo_url, repo_manifests in manifests_by_repo.items():
1124
+ load_data(
1125
+ neo4j_session,
1126
+ DependencyGraphManifestSchema(),
1127
+ repo_manifests,
1128
+ lastupdated=update_tag,
1129
+ repo_url=repo_url,
1130
+ )
1131
+
1132
+
1133
+ @timeit
1134
+ def cleanup_github_dependencies(
1135
+ neo4j_session: neo4j.Session,
1136
+ common_job_parameters: Dict[str, Any],
1137
+ repo_urls: List[str],
1138
+ ) -> None:
1139
+ # Run cleanup for each repository separately
1140
+ for repo_url in repo_urls:
1141
+ cleanup_params = {**common_job_parameters, "repo_url": repo_url}
1142
+ GraphJob.from_node_schema(GitHubDependencySchema(), cleanup_params).run(
1143
+ neo4j_session
1144
+ )
1145
+
1146
+
1147
+ @timeit
1148
+ def cleanup_github_manifests(
1149
+ neo4j_session: neo4j.Session,
1150
+ common_job_parameters: Dict[str, Any],
1151
+ repo_urls: List[str],
1152
+ ) -> None:
1153
+ """
1154
+ Delete GitHub dependency manifests and their relationships from the graph if they were not updated in the last sync.
1155
+ :param neo4j_session: Neo4j session
1156
+ :param common_job_parameters: Common job parameters containing UPDATE_TAG
1157
+ :param repo_urls: List of repository URLs to clean up manifests for
1158
+ """
1159
+ # Run cleanup for each repository separately
1160
+ for repo_url in repo_urls:
1161
+ cleanup_params = {**common_job_parameters, "repo_url": repo_url}
1162
+ GraphJob.from_node_schema(DependencyGraphManifestSchema(), cleanup_params).run(
1163
+ neo4j_session
785
1164
  )
786
1165
 
787
1166
 
@@ -823,33 +1202,15 @@ def load(
823
1202
  common_job_parameters["UPDATE_TAG"],
824
1203
  repo_data["python_requirements"],
825
1204
  )
826
-
827
-
828
- @timeit
829
- def load_python_requirements(
830
- neo4j_session: neo4j.Session,
831
- update_tag: int,
832
- requirements_objects: List[Dict],
833
- ) -> None:
834
- query = """
835
- UNWIND $Requirements AS req
836
- MERGE (lib:PythonLibrary:Dependency{id: req.id})
837
- ON CREATE SET lib.firstseen = timestamp(),
838
- lib.name = req.name
839
- SET lib.lastupdated = $UpdateTag,
840
- lib.version = req.version
841
-
842
- WITH lib, req
843
- MATCH (repo:GitHubRepository{id: req.repo_url})
844
- MERGE (repo)-[r:REQUIRES]->(lib)
845
- ON CREATE SET r.firstseen = timestamp()
846
- SET r.lastupdated = $UpdateTag,
847
- r.specifier = req.specifier
848
- """
849
- neo4j_session.run(
850
- query,
851
- Requirements=requirements_objects,
852
- UpdateTag=update_tag,
1205
+ load_github_dependency_manifests(
1206
+ neo4j_session,
1207
+ common_job_parameters["UPDATE_TAG"],
1208
+ repo_data["manifests"],
1209
+ )
1210
+ load_github_dependencies(
1211
+ neo4j_session,
1212
+ common_job_parameters["UPDATE_TAG"],
1213
+ repo_data["dependencies"],
853
1214
  )
854
1215
 
855
1216
 
@@ -896,4 +1257,21 @@ def sync(
896
1257
  )
897
1258
  repo_data = transform(repos_json, direct_collabs, outside_collabs)
898
1259
  load(neo4j_session, common_job_parameters, repo_data)
1260
+
1261
+ # Collect repository URLs that have dependencies for cleanup
1262
+ repo_urls_with_dependencies = list(
1263
+ {dep["repo_url"] for dep in repo_data["dependencies"]}
1264
+ )
1265
+ cleanup_github_dependencies(
1266
+ neo4j_session, common_job_parameters, repo_urls_with_dependencies
1267
+ )
1268
+
1269
+ # Collect repository URLs that have manifests for cleanup
1270
+ repo_urls_with_manifests = list(
1271
+ {manifest["repo_url"] for manifest in repo_data["manifests"]}
1272
+ )
1273
+ cleanup_github_manifests(
1274
+ neo4j_session, common_job_parameters, repo_urls_with_manifests
1275
+ )
1276
+
899
1277
  run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)