cartography 0.104.0rc2__py3-none-any.whl → 0.123.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (642) hide show
  1. cartography/_version.py +16 -3
  2. cartography/cli.py +466 -5
  3. cartography/client/aws/__init__.py +19 -0
  4. cartography/client/aws/ecr.py +51 -0
  5. cartography/client/core/tx.py +357 -8
  6. cartography/config.py +153 -0
  7. cartography/data/azure_permission_relationships.yaml +20 -0
  8. cartography/data/gcp_permission_relationships.yaml +21 -0
  9. cartography/data/indexes.cypher +0 -186
  10. cartography/data/jobs/analysis/aws_ec2_keypair_analysis.json +2 -2
  11. cartography/data/jobs/analysis/keycloak_inheritance.json +30 -0
  12. cartography/data/jobs/cleanup/gcp_compute_vpc_cleanup.json +0 -12
  13. cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
  14. cartography/driftdetect/cli.py +3 -2
  15. cartography/graph/cleanupbuilder.py +198 -41
  16. cartography/graph/job.py +54 -6
  17. cartography/graph/querybuilder.py +528 -27
  18. cartography/graph/statement.py +5 -1
  19. cartography/intel/airbyte/__init__.py +105 -0
  20. cartography/intel/airbyte/connections.py +120 -0
  21. cartography/intel/airbyte/destinations.py +81 -0
  22. cartography/intel/airbyte/organizations.py +59 -0
  23. cartography/intel/airbyte/sources.py +78 -0
  24. cartography/intel/airbyte/tags.py +64 -0
  25. cartography/intel/airbyte/users.py +106 -0
  26. cartography/intel/airbyte/util.py +122 -0
  27. cartography/intel/airbyte/workspaces.py +63 -0
  28. cartography/intel/aws/__init__.py +24 -9
  29. cartography/intel/aws/acm.py +124 -0
  30. cartography/intel/aws/apigateway.py +253 -22
  31. cartography/intel/aws/apigatewayv2.py +116 -0
  32. cartography/intel/aws/cloudtrail.py +17 -39
  33. cartography/intel/aws/cloudtrail_management_events.py +962 -0
  34. cartography/intel/aws/cloudwatch.py +150 -4
  35. cartography/intel/aws/codebuild.py +132 -0
  36. cartography/intel/aws/cognito.py +201 -0
  37. cartography/intel/aws/config.py +7 -3
  38. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  39. cartography/intel/aws/ec2/instances.py +25 -1
  40. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  41. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  42. cartography/intel/aws/ec2/network_interfaces.py +5 -1
  43. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  44. cartography/intel/aws/ec2/security_groups.py +140 -122
  45. cartography/intel/aws/ec2/snapshots.py +47 -84
  46. cartography/intel/aws/ec2/subnets.py +37 -63
  47. cartography/intel/aws/ec2/tgw.py +11 -5
  48. cartography/intel/aws/ec2/volumes.py +1 -1
  49. cartography/intel/aws/ec2/vpc.py +140 -124
  50. cartography/intel/aws/ec2/vpc_peerings.py +262 -125
  51. cartography/intel/aws/ecr.py +269 -98
  52. cartography/intel/aws/ecr_image_layers.py +923 -0
  53. cartography/intel/aws/ecs.py +251 -380
  54. cartography/intel/aws/efs.py +179 -11
  55. cartography/intel/aws/elasticache.py +102 -79
  56. cartography/intel/aws/elasticsearch.py +13 -4
  57. cartography/intel/aws/eventbridge.py +164 -0
  58. cartography/intel/aws/glue.py +181 -0
  59. cartography/intel/aws/guardduty.py +443 -0
  60. cartography/intel/aws/iam.py +750 -493
  61. cartography/intel/aws/identitycenter.py +605 -83
  62. cartography/intel/aws/inspector.py +221 -105
  63. cartography/intel/aws/kms.py +173 -201
  64. cartography/intel/aws/lambda_function.py +272 -189
  65. cartography/intel/aws/organizations.py +10 -9
  66. cartography/intel/aws/permission_relationships.py +10 -20
  67. cartography/intel/aws/rds.py +337 -446
  68. cartography/intel/aws/redshift.py +9 -4
  69. cartography/intel/aws/resourcegroupstaggingapi.py +78 -19
  70. cartography/intel/aws/resources.py +18 -0
  71. cartography/intel/aws/route53.py +386 -332
  72. cartography/intel/aws/s3.py +322 -14
  73. cartography/intel/aws/secretsmanager.py +81 -49
  74. cartography/intel/aws/securityhub.py +3 -1
  75. cartography/intel/aws/sns.py +62 -2
  76. cartography/intel/aws/sqs.py +36 -90
  77. cartography/intel/aws/ssm.py +3 -5
  78. cartography/intel/azure/__init__.py +202 -48
  79. cartography/intel/azure/aks.py +175 -0
  80. cartography/intel/azure/app_service.py +105 -0
  81. cartography/intel/azure/compute.py +59 -112
  82. cartography/intel/azure/container_instances.py +95 -0
  83. cartography/intel/azure/cosmosdb.py +222 -361
  84. cartography/intel/azure/data_factory.py +85 -0
  85. cartography/intel/azure/data_factory_dataset.py +128 -0
  86. cartography/intel/azure/data_factory_linked_service.py +119 -0
  87. cartography/intel/azure/data_factory_pipeline.py +142 -0
  88. cartography/intel/azure/data_lake.py +124 -0
  89. cartography/intel/azure/event_grid.py +94 -0
  90. cartography/intel/azure/functions.py +124 -0
  91. cartography/intel/azure/load_balancers.py +263 -0
  92. cartography/intel/azure/logic_apps.py +101 -0
  93. cartography/intel/azure/monitor.py +105 -0
  94. cartography/intel/azure/network.py +467 -0
  95. cartography/intel/azure/permission_relationships.py +466 -0
  96. cartography/intel/azure/rbac.py +309 -0
  97. cartography/intel/azure/resource_groups.py +82 -0
  98. cartography/intel/azure/security_center.py +106 -0
  99. cartography/intel/azure/sql.py +145 -292
  100. cartography/intel/azure/storage.py +185 -262
  101. cartography/intel/azure/subscription.py +21 -43
  102. cartography/intel/azure/tenant.py +39 -30
  103. cartography/intel/azure/util/common.py +13 -0
  104. cartography/intel/azure/util/credentials.py +49 -174
  105. cartography/intel/azure/util/tag.py +41 -0
  106. cartography/intel/create_indexes.py +2 -1
  107. cartography/intel/crowdstrike/spotlight.py +5 -2
  108. cartography/intel/dns.py +5 -2
  109. cartography/intel/entra/__init__.py +100 -1
  110. cartography/intel/entra/app_role_assignments.py +284 -0
  111. cartography/intel/entra/applications.py +182 -0
  112. cartography/intel/entra/federation/__init__.py +0 -0
  113. cartography/intel/entra/federation/aws_identity_center.py +77 -0
  114. cartography/intel/entra/groups.py +198 -0
  115. cartography/intel/entra/ou.py +48 -24
  116. cartography/intel/entra/service_principals.py +217 -0
  117. cartography/intel/entra/users.py +105 -57
  118. cartography/intel/gcp/__init__.py +334 -396
  119. cartography/intel/gcp/bigtable_app_profile.py +101 -0
  120. cartography/intel/gcp/bigtable_backup.py +91 -0
  121. cartography/intel/gcp/bigtable_cluster.py +93 -0
  122. cartography/intel/gcp/bigtable_instance.py +86 -0
  123. cartography/intel/gcp/bigtable_table.py +87 -0
  124. cartography/intel/gcp/cai.py +292 -0
  125. cartography/intel/gcp/clients.py +112 -0
  126. cartography/intel/gcp/compute.py +128 -119
  127. cartography/intel/gcp/crm/__init__.py +0 -0
  128. cartography/intel/gcp/crm/folders.py +114 -0
  129. cartography/intel/gcp/crm/orgs.py +70 -0
  130. cartography/intel/gcp/crm/projects.py +120 -0
  131. cartography/intel/gcp/dns.py +83 -169
  132. cartography/intel/gcp/gke.py +72 -113
  133. cartography/intel/gcp/iam.py +111 -91
  134. cartography/intel/gcp/permission_relationships.py +394 -0
  135. cartography/intel/gcp/policy_bindings.py +225 -0
  136. cartography/intel/gcp/storage.py +75 -159
  137. cartography/intel/github/__init__.py +62 -25
  138. cartography/intel/github/commits.py +423 -0
  139. cartography/intel/github/repos.py +463 -85
  140. cartography/intel/github/teams.py +3 -3
  141. cartography/intel/github/users.py +5 -0
  142. cartography/intel/github/util.py +12 -0
  143. cartography/intel/googleworkspace/__init__.py +193 -0
  144. cartography/intel/googleworkspace/devices.py +254 -0
  145. cartography/intel/googleworkspace/groups.py +568 -0
  146. cartography/intel/googleworkspace/oauth_apps.py +259 -0
  147. cartography/intel/googleworkspace/tenant.py +85 -0
  148. cartography/intel/googleworkspace/users.py +138 -0
  149. cartography/intel/gsuite/__init__.py +17 -9
  150. cartography/intel/gsuite/groups.py +291 -0
  151. cartography/intel/gsuite/users.py +142 -0
  152. cartography/intel/jamf/computers.py +7 -1
  153. cartography/intel/keycloak/__init__.py +153 -0
  154. cartography/intel/keycloak/authenticationexecutions.py +322 -0
  155. cartography/intel/keycloak/authenticationflows.py +77 -0
  156. cartography/intel/keycloak/clients.py +187 -0
  157. cartography/intel/keycloak/groups.py +126 -0
  158. cartography/intel/keycloak/identityproviders.py +94 -0
  159. cartography/intel/keycloak/organizations.py +163 -0
  160. cartography/intel/keycloak/realms.py +61 -0
  161. cartography/intel/keycloak/roles.py +202 -0
  162. cartography/intel/keycloak/scopes.py +73 -0
  163. cartography/intel/keycloak/users.py +70 -0
  164. cartography/intel/keycloak/util.py +47 -0
  165. cartography/intel/kubernetes/__init__.py +60 -14
  166. cartography/intel/kubernetes/clusters.py +86 -0
  167. cartography/intel/kubernetes/eks.py +402 -0
  168. cartography/intel/kubernetes/namespaces.py +59 -57
  169. cartography/intel/kubernetes/pods.py +168 -75
  170. cartography/intel/kubernetes/rbac.py +597 -0
  171. cartography/intel/kubernetes/secrets.py +95 -45
  172. cartography/intel/kubernetes/services.py +131 -67
  173. cartography/intel/kubernetes/util.py +142 -14
  174. cartography/intel/oci/iam.py +23 -9
  175. cartography/intel/oci/organizations.py +3 -1
  176. cartography/intel/oci/utils.py +28 -5
  177. cartography/intel/okta/applications.py +15 -5
  178. cartography/intel/okta/awssaml.py +14 -10
  179. cartography/intel/okta/factors.py +3 -1
  180. cartography/intel/okta/groups.py +5 -2
  181. cartography/intel/okta/organization.py +3 -1
  182. cartography/intel/okta/origins.py +3 -1
  183. cartography/intel/okta/roles.py +5 -2
  184. cartography/intel/okta/users.py +10 -2
  185. cartography/intel/ontology/__init__.py +44 -0
  186. cartography/intel/ontology/devices.py +54 -0
  187. cartography/intel/ontology/users.py +54 -0
  188. cartography/intel/ontology/utils.py +176 -0
  189. cartography/intel/pagerduty/escalation_policies.py +13 -6
  190. cartography/intel/pagerduty/schedules.py +9 -4
  191. cartography/intel/pagerduty/services.py +7 -3
  192. cartography/intel/pagerduty/teams.py +5 -2
  193. cartography/intel/pagerduty/users.py +3 -1
  194. cartography/intel/pagerduty/vendors.py +3 -1
  195. cartography/intel/scaleway/__init__.py +127 -0
  196. cartography/intel/scaleway/iam/__init__.py +0 -0
  197. cartography/intel/scaleway/iam/apikeys.py +71 -0
  198. cartography/intel/scaleway/iam/applications.py +71 -0
  199. cartography/intel/scaleway/iam/groups.py +71 -0
  200. cartography/intel/scaleway/iam/users.py +71 -0
  201. cartography/intel/scaleway/instances/__init__.py +0 -0
  202. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  203. cartography/intel/scaleway/instances/instances.py +92 -0
  204. cartography/intel/scaleway/projects.py +79 -0
  205. cartography/intel/scaleway/storage/__init__.py +0 -0
  206. cartography/intel/scaleway/storage/snapshots.py +86 -0
  207. cartography/intel/scaleway/storage/volumes.py +84 -0
  208. cartography/intel/scaleway/utils.py +37 -0
  209. cartography/intel/sentinelone/__init__.py +75 -0
  210. cartography/intel/sentinelone/account.py +140 -0
  211. cartography/intel/sentinelone/agent.py +139 -0
  212. cartography/intel/sentinelone/api.py +124 -0
  213. cartography/intel/sentinelone/application.py +248 -0
  214. cartography/intel/sentinelone/cve.py +119 -0
  215. cartography/intel/sentinelone/utils.py +28 -0
  216. cartography/intel/slack/__init__.py +78 -0
  217. cartography/intel/slack/channels.py +80 -0
  218. cartography/intel/slack/groups.py +90 -0
  219. cartography/intel/slack/teams.py +65 -0
  220. cartography/intel/slack/users.py +57 -0
  221. cartography/intel/slack/utils.py +29 -0
  222. cartography/intel/spacelift/__init__.py +161 -0
  223. cartography/intel/spacelift/account.py +73 -0
  224. cartography/intel/spacelift/ec2_ownership.py +280 -0
  225. cartography/intel/spacelift/runs.py +463 -0
  226. cartography/intel/spacelift/spaces.py +112 -0
  227. cartography/intel/spacelift/stacks.py +119 -0
  228. cartography/intel/spacelift/util.py +122 -0
  229. cartography/intel/spacelift/workerpools.py +131 -0
  230. cartography/intel/spacelift/workers.py +128 -0
  231. cartography/intel/trivy/__init__.py +272 -0
  232. cartography/intel/trivy/scanner.py +386 -0
  233. cartography/models/airbyte/__init__.py +0 -0
  234. cartography/models/airbyte/connection.py +138 -0
  235. cartography/models/airbyte/destination.py +75 -0
  236. cartography/models/airbyte/organization.py +19 -0
  237. cartography/models/airbyte/source.py +75 -0
  238. cartography/models/airbyte/stream.py +74 -0
  239. cartography/models/airbyte/tag.py +69 -0
  240. cartography/models/airbyte/user.py +115 -0
  241. cartography/models/airbyte/workspace.py +46 -0
  242. cartography/models/anthropic/apikey.py +4 -0
  243. cartography/models/anthropic/user.py +4 -0
  244. cartography/models/aws/acm/__init__.py +0 -0
  245. cartography/models/aws/acm/certificate.py +75 -0
  246. cartography/models/aws/apigateway/__init__.py +0 -0
  247. cartography/models/aws/apigateway/apigatewaydeployment.py +74 -0
  248. cartography/models/aws/apigateway/apigatewayintegration.py +79 -0
  249. cartography/models/aws/apigateway/apigatewaymethod.py +74 -0
  250. cartography/models/aws/apigatewayv2/__init__.py +0 -0
  251. cartography/models/aws/apigatewayv2/apigatewayv2.py +53 -0
  252. cartography/models/aws/cloudtrail/management_events.py +153 -0
  253. cartography/models/aws/cloudtrail/trail.py +45 -0
  254. cartography/models/aws/cloudwatch/log_metric_filter.py +79 -0
  255. cartography/models/aws/cloudwatch/metric_alarm.py +53 -0
  256. cartography/models/aws/codebuild/__init__.py +0 -0
  257. cartography/models/aws/codebuild/project.py +49 -0
  258. cartography/models/aws/cognito/__init__.py +0 -0
  259. cartography/models/aws/cognito/identity_pool.py +70 -0
  260. cartography/models/aws/cognito/user_pool.py +47 -0
  261. cartography/models/aws/dynamodb/tables.py +2 -0
  262. cartography/models/aws/ec2/instances.py +25 -1
  263. cartography/models/aws/ec2/networkinterfaces.py +4 -0
  264. cartography/models/aws/ec2/security_group_rules.py +109 -0
  265. cartography/models/aws/ec2/security_groups.py +90 -0
  266. cartography/models/aws/ec2/snapshots.py +58 -0
  267. cartography/models/aws/ec2/subnet_instance.py +2 -0
  268. cartography/models/aws/ec2/subnet_networkinterface.py +2 -0
  269. cartography/models/aws/ec2/subnets.py +65 -0
  270. cartography/models/aws/ec2/volumes.py +20 -0
  271. cartography/models/aws/ec2/vpc.py +46 -0
  272. cartography/models/aws/ec2/vpc_cidr.py +102 -0
  273. cartography/models/aws/ec2/vpc_peering.py +157 -0
  274. cartography/models/aws/ecr/__init__.py +0 -0
  275. cartography/models/aws/ecr/image.py +146 -0
  276. cartography/models/aws/ecr/image_layer.py +107 -0
  277. cartography/models/aws/ecr/repository.py +72 -0
  278. cartography/models/aws/ecr/repository_image.py +95 -0
  279. cartography/models/aws/ecs/__init__.py +0 -0
  280. cartography/models/aws/ecs/clusters.py +64 -0
  281. cartography/models/aws/ecs/container_definitions.py +93 -0
  282. cartography/models/aws/ecs/container_instances.py +84 -0
  283. cartography/models/aws/ecs/containers.py +101 -0
  284. cartography/models/aws/ecs/services.py +134 -0
  285. cartography/models/aws/ecs/task_definitions.py +135 -0
  286. cartography/models/aws/ecs/tasks.py +134 -0
  287. cartography/models/aws/efs/access_point.py +77 -0
  288. cartography/models/aws/efs/file_system.py +60 -0
  289. cartography/models/aws/efs/mount_target.py +29 -2
  290. cartography/models/aws/elasticache/__init__.py +0 -0
  291. cartography/models/aws/elasticache/cluster.py +65 -0
  292. cartography/models/aws/elasticache/topic.py +67 -0
  293. cartography/models/aws/eventbridge/__init__.py +0 -0
  294. cartography/models/aws/eventbridge/rule.py +77 -0
  295. cartography/models/aws/eventbridge/target.py +71 -0
  296. cartography/models/aws/glue/__init__.py +0 -0
  297. cartography/models/aws/glue/connection.py +51 -0
  298. cartography/models/aws/glue/job.py +69 -0
  299. cartography/models/aws/guardduty/__init__.py +1 -0
  300. cartography/models/aws/guardduty/detectors.py +50 -0
  301. cartography/models/aws/guardduty/findings.py +121 -0
  302. cartography/models/aws/iam/access_key.py +103 -0
  303. cartography/models/aws/iam/account_role.py +24 -0
  304. cartography/models/aws/iam/federated_principal.py +60 -0
  305. cartography/models/aws/iam/group.py +60 -0
  306. cartography/models/aws/iam/group_membership.py +27 -0
  307. cartography/models/aws/iam/inline_policy.py +78 -0
  308. cartography/models/aws/iam/managed_policy.py +51 -0
  309. cartography/models/aws/iam/policy_statement.py +57 -0
  310. cartography/models/aws/iam/role.py +83 -0
  311. cartography/models/aws/iam/root_principal.py +52 -0
  312. cartography/models/aws/iam/service_principal.py +30 -0
  313. cartography/models/aws/iam/sts_assumerole_allow.py +38 -0
  314. cartography/models/aws/iam/user.py +59 -0
  315. cartography/models/aws/identitycenter/awsidentitycenter.py +1 -0
  316. cartography/models/aws/identitycenter/awspermissionset.py +70 -0
  317. cartography/models/aws/identitycenter/awssogroup.py +70 -0
  318. cartography/models/aws/identitycenter/awsssouser.py +49 -9
  319. cartography/models/aws/inspector/findings.py +37 -0
  320. cartography/models/aws/inspector/packages.py +1 -31
  321. cartography/models/aws/kms/__init__.py +0 -0
  322. cartography/models/aws/kms/aliases.py +86 -0
  323. cartography/models/aws/kms/grants.py +65 -0
  324. cartography/models/aws/kms/keys.py +88 -0
  325. cartography/models/aws/lambda_function/__init__.py +0 -0
  326. cartography/models/aws/lambda_function/alias.py +74 -0
  327. cartography/models/aws/lambda_function/event_source_mapping.py +88 -0
  328. cartography/models/aws/lambda_function/lambda_function.py +91 -0
  329. cartography/models/aws/lambda_function/layer.py +72 -0
  330. cartography/models/aws/rds/__init__.py +0 -0
  331. cartography/models/aws/rds/cluster.py +91 -0
  332. cartography/models/aws/rds/event_subscription.py +146 -0
  333. cartography/models/aws/rds/instance.py +156 -0
  334. cartography/models/aws/rds/snapshot.py +108 -0
  335. cartography/models/aws/rds/subnet_group.py +101 -0
  336. cartography/models/aws/route53/__init__.py +0 -0
  337. cartography/models/aws/route53/dnsrecord.py +235 -0
  338. cartography/models/aws/route53/nameserver.py +63 -0
  339. cartography/models/aws/route53/subzone.py +40 -0
  340. cartography/models/aws/route53/zone.py +47 -0
  341. cartography/models/aws/s3/notification.py +24 -0
  342. cartography/models/aws/secretsmanager/secret.py +106 -0
  343. cartography/models/aws/secretsmanager/secret_version.py +0 -2
  344. cartography/models/aws/sns/topic_subscription.py +74 -0
  345. cartography/models/aws/sqs/__init__.py +0 -0
  346. cartography/models/aws/sqs/queue.py +89 -0
  347. cartography/models/azure/__init__.py +0 -0
  348. cartography/models/azure/aks_cluster.py +54 -0
  349. cartography/models/azure/aks_nodepool.py +54 -0
  350. cartography/models/azure/app_service.py +59 -0
  351. cartography/models/azure/container_instance.py +57 -0
  352. cartography/models/azure/cosmosdb/__init__.py +0 -0
  353. cartography/models/azure/cosmosdb/account.py +77 -0
  354. cartography/models/azure/cosmosdb/accountfailoverpolicy.py +77 -0
  355. cartography/models/azure/cosmosdb/cassandrakeyspace.py +82 -0
  356. cartography/models/azure/cosmosdb/cassandratable.py +81 -0
  357. cartography/models/azure/cosmosdb/corspolicy.py +74 -0
  358. cartography/models/azure/cosmosdb/dblocation.py +120 -0
  359. cartography/models/azure/cosmosdb/mongodbcollection.py +82 -0
  360. cartography/models/azure/cosmosdb/mongodbdatabase.py +78 -0
  361. cartography/models/azure/cosmosdb/privateendpointconnection.py +81 -0
  362. cartography/models/azure/cosmosdb/sqlcontainer.py +88 -0
  363. cartography/models/azure/cosmosdb/sqldatabase.py +78 -0
  364. cartography/models/azure/cosmosdb/tableresource.py +76 -0
  365. cartography/models/azure/cosmosdb/virtualnetworkrule.py +78 -0
  366. cartography/models/azure/data_factory/__init__.py +0 -0
  367. cartography/models/azure/data_factory/data_factory.py +51 -0
  368. cartography/models/azure/data_factory/data_factory_dataset.py +94 -0
  369. cartography/models/azure/data_factory/data_factory_linked_service.py +78 -0
  370. cartography/models/azure/data_factory/data_factory_pipeline.py +93 -0
  371. cartography/models/azure/data_lake_filesystem.py +51 -0
  372. cartography/models/azure/event_grid_topic.py +57 -0
  373. cartography/models/azure/function_app.py +59 -0
  374. cartography/models/azure/load_balancer/__init__.py +0 -0
  375. cartography/models/azure/load_balancer/load_balancer.py +49 -0
  376. cartography/models/azure/load_balancer/load_balancer_backend_pool.py +73 -0
  377. cartography/models/azure/load_balancer/load_balancer_frontend_ip.py +75 -0
  378. cartography/models/azure/load_balancer/load_balancer_inbound_nat_rule.py +78 -0
  379. cartography/models/azure/load_balancer/load_balancer_rule.py +108 -0
  380. cartography/models/azure/logic_apps.py +56 -0
  381. cartography/models/azure/monitor.py +54 -0
  382. cartography/models/azure/network_interface.py +112 -0
  383. cartography/models/azure/network_security_group.py +50 -0
  384. cartography/models/azure/permission_relationships.py +60 -0
  385. cartography/models/azure/principal.py +41 -0
  386. cartography/models/azure/public_ip_address.py +50 -0
  387. cartography/models/azure/rbac.py +268 -0
  388. cartography/models/azure/resource_groups.py +52 -0
  389. cartography/models/azure/security_center.py +50 -0
  390. cartography/models/azure/sql/__init__.py +0 -0
  391. cartography/models/azure/sql/databasethreatdetectionpolicy.py +85 -0
  392. cartography/models/azure/sql/elasticpool.py +77 -0
  393. cartography/models/azure/sql/failovergroup.py +73 -0
  394. cartography/models/azure/sql/recoverabledatabase.py +75 -0
  395. cartography/models/azure/sql/replicationlink.py +81 -0
  396. cartography/models/azure/sql/restorabledroppeddatabase.py +82 -0
  397. cartography/models/azure/sql/restorepoint.py +74 -0
  398. cartography/models/azure/sql/serveradadministrator.py +74 -0
  399. cartography/models/azure/sql/serverdnsalias.py +71 -0
  400. cartography/models/azure/sql/sqldatabase.py +85 -0
  401. cartography/models/azure/sql/sqlserver.py +50 -0
  402. cartography/models/azure/sql/transparentdataencryption.py +76 -0
  403. cartography/models/azure/storage/__init__.py +0 -0
  404. cartography/models/azure/storage/account.py +59 -0
  405. cartography/models/azure/storage/blobcontainer.py +85 -0
  406. cartography/models/azure/storage/blobservice.py +71 -0
  407. cartography/models/azure/storage/fileservice.py +71 -0
  408. cartography/models/azure/storage/fileshare.py +82 -0
  409. cartography/models/azure/storage/queue.py +71 -0
  410. cartography/models/azure/storage/queueservice.py +73 -0
  411. cartography/models/azure/storage/table.py +72 -0
  412. cartography/models/azure/storage/tableservice.py +73 -0
  413. cartography/models/azure/subnet.py +101 -0
  414. cartography/models/azure/subscription.py +47 -0
  415. cartography/models/azure/tags/__init__.py +0 -0
  416. cartography/models/azure/tags/storage_tag.py +40 -0
  417. cartography/models/azure/tags/tag.py +37 -0
  418. cartography/models/azure/tenant.py +17 -0
  419. cartography/models/azure/virtual_network.py +49 -0
  420. cartography/models/azure/vm/__init__.py +0 -0
  421. cartography/models/azure/vm/datadisk.py +80 -0
  422. cartography/models/azure/vm/disk.py +55 -0
  423. cartography/models/azure/vm/snapshot.py +56 -0
  424. cartography/models/azure/vm/virtualmachine.py +59 -0
  425. cartography/models/bigfix/bigfix_computer.py +1 -1
  426. cartography/models/cloudflare/member.py +4 -0
  427. cartography/models/core/common.py +1 -0
  428. cartography/models/core/nodes.py +15 -2
  429. cartography/models/core/relationships.py +44 -0
  430. cartography/models/crowdstrike/hosts.py +1 -1
  431. cartography/models/digitalocean/droplet.py +2 -0
  432. cartography/models/duo/endpoint.py +1 -1
  433. cartography/models/duo/phone.py +2 -2
  434. cartography/models/duo/user.py +4 -0
  435. cartography/models/entra/app_role_assignment.py +115 -0
  436. cartography/models/entra/application.py +49 -0
  437. cartography/models/entra/entra_user_to_aws_sso.py +41 -0
  438. cartography/models/entra/group.py +117 -0
  439. cartography/models/entra/service_principal.py +104 -0
  440. cartography/models/entra/user.py +42 -51
  441. cartography/models/gcp/__init__.py +0 -0
  442. cartography/models/gcp/bigtable/__init__.py +0 -0
  443. cartography/models/gcp/bigtable/app_profile.py +94 -0
  444. cartography/models/gcp/bigtable/backup.py +91 -0
  445. cartography/models/gcp/bigtable/cluster.py +73 -0
  446. cartography/models/gcp/bigtable/instance.py +52 -0
  447. cartography/models/gcp/bigtable/table.py +69 -0
  448. cartography/models/gcp/compute/__init__.py +0 -0
  449. cartography/models/gcp/compute/subnet.py +74 -0
  450. cartography/models/gcp/compute/vpc.py +50 -0
  451. cartography/models/gcp/crm/__init__.py +0 -0
  452. cartography/models/gcp/crm/folders.py +98 -0
  453. cartography/models/gcp/crm/organizations.py +21 -0
  454. cartography/models/gcp/crm/projects.py +100 -0
  455. cartography/models/gcp/dns.py +109 -0
  456. cartography/models/gcp/gke.py +69 -0
  457. cartography/models/gcp/iam.py +3 -0
  458. cartography/models/gcp/permission_relationships.py +61 -0
  459. cartography/models/gcp/policy_bindings.py +93 -0
  460. cartography/models/gcp/storage/__init__.py +0 -0
  461. cartography/models/gcp/storage/bucket.py +119 -0
  462. cartography/models/github/commits.py +63 -0
  463. cartography/models/github/dependencies.py +73 -0
  464. cartography/models/github/manifests.py +49 -0
  465. cartography/models/github/users.py +10 -0
  466. cartography/models/googleworkspace/__init__.py +0 -0
  467. cartography/models/googleworkspace/device.py +132 -0
  468. cartography/models/googleworkspace/group.py +382 -0
  469. cartography/models/googleworkspace/oauth_app.py +124 -0
  470. cartography/models/googleworkspace/tenant.py +30 -0
  471. cartography/models/googleworkspace/user.py +113 -0
  472. cartography/models/gsuite/__init__.py +0 -0
  473. cartography/models/gsuite/group.py +218 -0
  474. cartography/models/gsuite/tenant.py +29 -0
  475. cartography/models/gsuite/user.py +107 -0
  476. cartography/models/kandji/device.py +1 -2
  477. cartography/models/keycloak/__init__.py +0 -0
  478. cartography/models/keycloak/authenticationexecution.py +160 -0
  479. cartography/models/keycloak/authenticationflow.py +54 -0
  480. cartography/models/keycloak/client.py +179 -0
  481. cartography/models/keycloak/group.py +101 -0
  482. cartography/models/keycloak/identityprovider.py +89 -0
  483. cartography/models/keycloak/organization.py +116 -0
  484. cartography/models/keycloak/organizationdomain.py +73 -0
  485. cartography/models/keycloak/realm.py +173 -0
  486. cartography/models/keycloak/role.py +126 -0
  487. cartography/models/keycloak/scope.py +73 -0
  488. cartography/models/keycloak/user.py +55 -0
  489. cartography/models/kubernetes/__init__.py +0 -0
  490. cartography/models/kubernetes/clusterrolebindings.py +138 -0
  491. cartography/models/kubernetes/clusterroles.py +52 -0
  492. cartography/models/kubernetes/clusters.py +26 -0
  493. cartography/models/kubernetes/containers.py +133 -0
  494. cartography/models/kubernetes/groups.py +107 -0
  495. cartography/models/kubernetes/namespaces.py +51 -0
  496. cartography/models/kubernetes/oidc.py +51 -0
  497. cartography/models/kubernetes/pods.py +80 -0
  498. cartography/models/kubernetes/rolebindings.py +159 -0
  499. cartography/models/kubernetes/roles.py +76 -0
  500. cartography/models/kubernetes/secrets.py +79 -0
  501. cartography/models/kubernetes/serviceaccounts.py +77 -0
  502. cartography/models/kubernetes/services.py +108 -0
  503. cartography/models/kubernetes/users.py +105 -0
  504. cartography/models/lastpass/user.py +4 -0
  505. cartography/models/ontology/__init__.py +0 -0
  506. cartography/models/ontology/device.py +137 -0
  507. cartography/models/ontology/mapping/__init__.py +76 -0
  508. cartography/models/ontology/mapping/data/__init__.py +0 -0
  509. cartography/models/ontology/mapping/data/apikeys.py +93 -0
  510. cartography/models/ontology/mapping/data/computeinstance.py +95 -0
  511. cartography/models/ontology/mapping/data/containers.py +88 -0
  512. cartography/models/ontology/mapping/data/databases.py +182 -0
  513. cartography/models/ontology/mapping/data/devices.py +194 -0
  514. cartography/models/ontology/mapping/data/thirdpartyapps.py +140 -0
  515. cartography/models/ontology/mapping/data/useraccounts.py +416 -0
  516. cartography/models/ontology/mapping/data/users.py +63 -0
  517. cartography/models/ontology/mapping/specs.py +85 -0
  518. cartography/models/ontology/user.py +51 -0
  519. cartography/models/openai/adminapikey.py +4 -0
  520. cartography/models/openai/apikey.py +4 -0
  521. cartography/models/openai/user.py +4 -0
  522. cartography/models/scaleway/__init__.py +0 -0
  523. cartography/models/scaleway/iam/__init__.py +0 -0
  524. cartography/models/scaleway/iam/apikey.py +100 -0
  525. cartography/models/scaleway/iam/application.py +52 -0
  526. cartography/models/scaleway/iam/group.py +95 -0
  527. cartography/models/scaleway/iam/user.py +64 -0
  528. cartography/models/scaleway/instance/__init__.py +0 -0
  529. cartography/models/scaleway/instance/flexibleip.py +52 -0
  530. cartography/models/scaleway/instance/instance.py +120 -0
  531. cartography/models/scaleway/organization.py +19 -0
  532. cartography/models/scaleway/project.py +48 -0
  533. cartography/models/scaleway/storage/__init__.py +0 -0
  534. cartography/models/scaleway/storage/snapshot.py +78 -0
  535. cartography/models/scaleway/storage/volume.py +51 -0
  536. cartography/models/sentinelone/__init__.py +1 -0
  537. cartography/models/sentinelone/account.py +40 -0
  538. cartography/models/sentinelone/agent.py +50 -0
  539. cartography/models/sentinelone/application.py +44 -0
  540. cartography/models/sentinelone/application_version.py +96 -0
  541. cartography/models/sentinelone/cve.py +73 -0
  542. cartography/models/slack/__init__.py +0 -0
  543. cartography/models/slack/channels.py +92 -0
  544. cartography/models/slack/group.py +129 -0
  545. cartography/models/slack/team.py +22 -0
  546. cartography/models/slack/user.py +62 -0
  547. cartography/models/snipeit/asset.py +2 -0
  548. cartography/models/snipeit/user.py +4 -0
  549. cartography/models/spacelift/__init__.py +0 -0
  550. cartography/models/spacelift/cloudtrailevent.py +120 -0
  551. cartography/models/spacelift/run.py +162 -0
  552. cartography/models/spacelift/space.py +131 -0
  553. cartography/models/spacelift/spaceliftaccount.py +31 -0
  554. cartography/models/spacelift/spaceliftgitcommit.py +157 -0
  555. cartography/models/spacelift/stack.py +96 -0
  556. cartography/models/spacelift/user.py +63 -0
  557. cartography/models/spacelift/worker.py +97 -0
  558. cartography/models/spacelift/workerpool.py +90 -0
  559. cartography/models/tailscale/device.py +2 -1
  560. cartography/models/tailscale/user.py +6 -1
  561. cartography/models/trivy/__init__.py +0 -0
  562. cartography/models/trivy/findings.py +66 -0
  563. cartography/models/trivy/fix.py +66 -0
  564. cartography/models/trivy/package.py +71 -0
  565. cartography/rules/README.md +1 -0
  566. cartography/rules/__init__.py +0 -0
  567. cartography/rules/cli.py +261 -0
  568. cartography/rules/data/__init__.py +0 -0
  569. cartography/rules/data/rules/__init__.py +46 -0
  570. cartography/rules/data/rules/cloud_security_product_deactivated.py +49 -0
  571. cartography/rules/data/rules/compute_instance_exposed.py +51 -0
  572. cartography/rules/data/rules/database_instance_exposed.py +53 -0
  573. cartography/rules/data/rules/delegation_boundary_modifiable.py +90 -0
  574. cartography/rules/data/rules/identity_administration_privileges.py +100 -0
  575. cartography/rules/data/rules/inactive_user_active_accounts.py +48 -0
  576. cartography/rules/data/rules/malicious_npm_dependencies_shai_hulud.py +2222 -0
  577. cartography/rules/data/rules/mfa_missing.py +46 -0
  578. cartography/rules/data/rules/object_storage_public.py +100 -0
  579. cartography/rules/data/rules/policy_administration_privileges.py +104 -0
  580. cartography/rules/data/rules/unmanaged_accounts.py +43 -0
  581. cartography/rules/data/rules/workload_identity_admin_capabilities.py +193 -0
  582. cartography/rules/formatters.py +108 -0
  583. cartography/rules/runners.py +216 -0
  584. cartography/rules/spec/__init__.py +0 -0
  585. cartography/rules/spec/model.py +267 -0
  586. cartography/rules/spec/result.py +38 -0
  587. cartography/sync.py +25 -5
  588. cartography/util.py +101 -31
  589. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/METADATA +61 -22
  590. cartography-0.123.0.dist-info/RECORD +856 -0
  591. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/entry_points.txt +1 -0
  592. cartography/data/jobs/cleanup/aws_dns_cleanup.json +0 -65
  593. cartography/data/jobs/cleanup/aws_import_account_access_key_cleanup.json +0 -17
  594. cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
  595. cartography/data/jobs/cleanup/aws_import_groups_cleanup.json +0 -13
  596. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +0 -16
  597. cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +0 -50
  598. cartography/data/jobs/cleanup/aws_import_principals_cleanup.json +0 -30
  599. cartography/data/jobs/cleanup/aws_import_rds_clusters_cleanup.json +0 -23
  600. cartography/data/jobs/cleanup/aws_import_rds_instances_cleanup.json +0 -47
  601. cartography/data/jobs/cleanup/aws_import_rds_snapshots_cleanup.json +0 -23
  602. cartography/data/jobs/cleanup/aws_import_roles_cleanup.json +0 -13
  603. cartography/data/jobs/cleanup/aws_import_secrets_cleanup.json +0 -8
  604. cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
  605. cartography/data/jobs/cleanup/aws_import_users_cleanup.json +0 -8
  606. cartography/data/jobs/cleanup/aws_import_vpc_cleanup.json +0 -23
  607. cartography/data/jobs/cleanup/aws_import_vpc_peering_cleanup.json +0 -45
  608. cartography/data/jobs/cleanup/aws_kms_details.json +0 -10
  609. cartography/data/jobs/cleanup/azure_cosmosdb_cassandra_keyspace_cleanup.json +0 -25
  610. cartography/data/jobs/cleanup/azure_cosmosdb_cors_details.json +0 -15
  611. cartography/data/jobs/cleanup/azure_cosmosdb_mongodb_database_cleanup.json +0 -25
  612. cartography/data/jobs/cleanup/azure_cosmosdb_sql_database_cleanup.json +0 -25
  613. cartography/data/jobs/cleanup/azure_cosmosdb_table_resources_cleanup.json +0 -15
  614. cartography/data/jobs/cleanup/azure_database_account_cleanup.json +0 -85
  615. cartography/data/jobs/cleanup/azure_import_disks_cleanup.json +0 -15
  616. cartography/data/jobs/cleanup/azure_import_snapshots_cleanup.json +0 -15
  617. cartography/data/jobs/cleanup/azure_import_virtual_machines_cleanup.json +0 -25
  618. cartography/data/jobs/cleanup/azure_sql_server_cleanup.json +0 -125
  619. cartography/data/jobs/cleanup/azure_storage_account_cleanup.json +0 -95
  620. cartography/data/jobs/cleanup/azure_subscriptions_cleanup.json +0 -14
  621. cartography/data/jobs/cleanup/azure_tenant_cleanup.json +0 -9
  622. cartography/data/jobs/cleanup/gcp_compute_vpc_subnet_cleanup.json +0 -35
  623. cartography/data/jobs/cleanup/gcp_crm_folder_cleanup.json +0 -23
  624. cartography/data/jobs/cleanup/gcp_crm_organization_cleanup.json +0 -17
  625. cartography/data/jobs/cleanup/gcp_crm_project_cleanup.json +0 -23
  626. cartography/data/jobs/cleanup/gcp_dns_cleanup.json +0 -29
  627. cartography/data/jobs/cleanup/gcp_gke_cluster_cleanup.json +0 -17
  628. cartography/data/jobs/cleanup/gcp_storage_bucket_cleanup.json +0 -29
  629. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  630. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  631. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  632. cartography/intel/gcp/crm.py +0 -355
  633. cartography/intel/gsuite/api.py +0 -342
  634. cartography-0.104.0rc2.dist-info/RECORD +0 -455
  635. /cartography/data/jobs/{analysis → scoped_analysis}/aws_s3acl_analysis.json +0 -0
  636. /cartography/models/aws/{apigateway.py → apigateway/apigateway.py} +0 -0
  637. /cartography/models/aws/{apigatewaycertificate.py → apigateway/apigatewaycertificate.py} +0 -0
  638. /cartography/models/aws/{apigatewayresource.py → apigateway/apigatewayresource.py} +0 -0
  639. /cartography/models/aws/{apigatewaystage.py → apigateway/apigatewaystage.py} +0 -0
  640. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/WHEEL +0 -0
  641. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/licenses/LICENSE +0 -0
  642. {cartography-0.104.0rc2.dist-info → cartography-0.123.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,423 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from datetime import timedelta
4
+ from datetime import timezone
5
+ from typing import Any
6
+
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load_matchlinks
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.github.util import fetch_page
12
+ from cartography.models.github.commits import GitHubUserCommittedToRepoRel
13
+ from cartography.util import timeit
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL = """
19
+ query($login: String!, $repo: String!, $since: GitTimestamp!, $cursor: String) {
20
+ organization(login: $login) {
21
+ repository(name: $repo) {
22
+ name
23
+ url
24
+ defaultBranchRef {
25
+ target {
26
+ ... on Commit {
27
+ history(first: 100, since: $since, after: $cursor) {
28
+ pageInfo {
29
+ endCursor
30
+ hasNextPage
31
+ }
32
+ nodes {
33
+ committedDate
34
+ author {
35
+ user {
36
+ url
37
+ }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ }
43
+ }
44
+ }
45
+ }
46
+ rateLimit {
47
+ limit
48
+ cost
49
+ remaining
50
+ resetAt
51
+ }
52
+ }
53
+ """
54
+
55
+
56
+ @timeit
57
+ def get_repo_commits(
58
+ token: str,
59
+ api_url: str,
60
+ organization: str,
61
+ repo_name: str,
62
+ since_date: datetime,
63
+ ) -> list[dict[str, Any]]:
64
+ """
65
+ Retrieve commits from a GitHub repository since a specific date.
66
+
67
+ :param token: The Github API token as string.
68
+ :param api_url: The Github v4 API endpoint as string.
69
+ :param organization: The name of the target Github organization as string.
70
+ :param repo_name: The name of the target Github repository as string.
71
+ :param since_date: The datetime to fetch commits since.
72
+ :return: A list of commits from the repository.
73
+ """
74
+ # Convert datetime to ISO format for GraphQL (GitTimestamp requires 'Z' suffix for UTC)
75
+ since_iso = since_date.strftime("%Y-%m-%dT%H:%M:%SZ")
76
+
77
+ logger.debug(f"Fetching commits for {organization}/{repo_name} since {since_iso}")
78
+
79
+ all_commits = []
80
+ cursor = None
81
+ has_next_page = True
82
+
83
+ while has_next_page:
84
+ response = fetch_page(
85
+ token,
86
+ api_url,
87
+ organization,
88
+ GITHUB_REPO_COMMITS_PAGINATED_GRAPHQL,
89
+ cursor,
90
+ repo=repo_name,
91
+ since=since_iso,
92
+ )
93
+
94
+ # Navigate to the nested commit history
95
+ repo_data = response.get("data", {}).get("organization", {}).get("repository")
96
+ if not repo_data:
97
+ logger.warning(f"No repository data found for {organization}/{repo_name}")
98
+ break
99
+
100
+ default_branch = repo_data.get("defaultBranchRef")
101
+ if not default_branch:
102
+ logger.debug(f"Repository {organization}/{repo_name} has no default branch")
103
+ break
104
+
105
+ target = default_branch.get("target")
106
+ if not target:
107
+ logger.debug(
108
+ f"Repository {organization}/{repo_name} default branch has no target"
109
+ )
110
+ break
111
+
112
+ history = target.get("history")
113
+ if not history:
114
+ logger.debug(f"Repository {organization}/{repo_name} has no commit history")
115
+ break
116
+
117
+ # Add commits from this page
118
+ commits = history.get("nodes", [])
119
+ all_commits.extend(commits)
120
+
121
+ # Check pagination
122
+ page_info = history.get("pageInfo", {})
123
+ has_next_page = page_info.get("hasNextPage", False)
124
+ cursor = page_info.get("endCursor")
125
+
126
+ return all_commits
127
+
128
+
129
+ def process_repo_commits_batch(
130
+ neo4j_session: neo4j.Session,
131
+ token: str,
132
+ api_url: str,
133
+ organization: str,
134
+ repo_names: list[str],
135
+ update_tag: int,
136
+ lookback_days: int = 30,
137
+ batch_size: int = 10,
138
+ ) -> None:
139
+ """
140
+ Process repository commits in batches to save memory and API quota.
141
+
142
+ :param neo4j_session: Neo4j session for database interface.
143
+ :param token: The Github API token as string.
144
+ :param api_url: The Github v4 API endpoint as string.
145
+ :param organization: The name of the target Github organization as string.
146
+ :param repo_names: List of repository names to process.
147
+ :param update_tag: Timestamp used to determine data freshness.
148
+ :param lookback_days: Number of days to look back for commits.
149
+ :param batch_size: Number of repositories to process in each batch.
150
+ """
151
+ # Calculate lookback date based on configured days
152
+ lookback_date = datetime.now(timezone.utc) - timedelta(days=lookback_days)
153
+
154
+ logger.info(f"Processing {len(repo_names)} repositories in batches of {batch_size}")
155
+
156
+ # Process repositories in batches
157
+ for i in range(0, len(repo_names), batch_size):
158
+ batch = repo_names[i : i + batch_size]
159
+ logger.info(
160
+ f"Processing batch {i // batch_size + 1}: {len(batch)} repositories"
161
+ )
162
+
163
+ # Process each repository in the batch
164
+ batch_relationships = []
165
+
166
+ for repo_name in batch:
167
+ try:
168
+ commits = get_repo_commits(
169
+ token,
170
+ api_url,
171
+ organization,
172
+ repo_name,
173
+ lookback_date,
174
+ )
175
+
176
+ # Transform commits for this single repo immediately
177
+ repo_relationships = transform_single_repo_commits_to_relationships(
178
+ repo_name,
179
+ commits,
180
+ organization,
181
+ )
182
+ batch_relationships.extend(repo_relationships)
183
+
184
+ logger.debug(
185
+ f"Found {len(commits)} commits in {repo_name}, created {len(repo_relationships)} relationships"
186
+ )
187
+
188
+ except Exception:
189
+ logger.warning(
190
+ f"Failed to fetch commits for {repo_name}", exc_info=True
191
+ )
192
+ continue
193
+
194
+ # Load this batch of relationships
195
+ if batch_relationships:
196
+ logger.info(f"Loading {len(batch_relationships)} relationships for batch")
197
+ load_github_commit_relationships(
198
+ neo4j_session,
199
+ batch_relationships,
200
+ organization,
201
+ update_tag,
202
+ )
203
+
204
+ # Clear memory for next batch
205
+ batch_relationships.clear()
206
+
207
+
208
+ def transform_single_repo_commits_to_relationships(
209
+ repo_name: str,
210
+ commits: list[dict[str, Any]],
211
+ organization: str,
212
+ ) -> list[dict[str, Any]]:
213
+ """
214
+ Transform commits from a single repository into user-repository relationships.
215
+ Optimized for memory efficiency by processing one repo at a time.
216
+
217
+ :param repo_name: The repository name.
218
+ :param commits: List of commit data from the repository.
219
+ :param organization: The Github organization name.
220
+ :return: List of user-repository relationship records for this repo.
221
+ """
222
+ if not commits:
223
+ return []
224
+
225
+ repo_url = f"https://github.com/{organization}/{repo_name}"
226
+
227
+ # Count commits and track date ranges per user for this repo
228
+ user_commit_data: dict[str, dict[str, Any]] = {}
229
+
230
+ for commit in commits:
231
+ # Get user URL from author, skip if not available
232
+ author_user = commit.get("author", {}).get("user")
233
+ if not author_user or not author_user.get("url"):
234
+ continue
235
+
236
+ user_url = author_user["url"]
237
+ commit_date = datetime.fromisoformat(
238
+ commit["committedDate"].replace("Z", "+00:00")
239
+ )
240
+
241
+ if user_url not in user_commit_data:
242
+ user_commit_data[user_url] = {"commit_count": 0, "commit_dates": []}
243
+
244
+ user_commit_data[user_url]["commit_count"] += 1
245
+ user_commit_data[user_url]["commit_dates"].append(commit_date)
246
+
247
+ # Transform to relationship records
248
+ relationships = []
249
+ for user_url, data in user_commit_data.items():
250
+ commit_dates = data["commit_dates"]
251
+ relationships.append(
252
+ {
253
+ "user_url": user_url,
254
+ "repo_url": repo_url,
255
+ "commit_count": data["commit_count"],
256
+ "last_commit_date": max(commit_dates).isoformat(),
257
+ "first_commit_date": min(commit_dates).isoformat(),
258
+ }
259
+ )
260
+
261
+ return relationships
262
+
263
+
264
+ def transform_commits_to_user_repo_relationships(
265
+ commits_by_repo: dict[str, list[dict[str, Any]]],
266
+ organization: str,
267
+ ) -> list[dict[str, Any]]:
268
+ """
269
+ Transform commit data into user-repository relationship data.
270
+
271
+ :param commits_by_repo: Dict mapping repo names to commit lists.
272
+ :param organization: The Github organization name.
273
+ :return: List of user-repository relationship records.
274
+ """
275
+ logger.info("Transforming commit data into user-repository relationships")
276
+
277
+ # Group commits by user and repository
278
+ user_repo_commits: dict[tuple[str, str], list[dict[str, Any]]] = {}
279
+
280
+ for repo_name, commits in commits_by_repo.items():
281
+ repo_url = f"https://github.com/{organization}/{repo_name}"
282
+
283
+ for commit in commits:
284
+ # Use author if available, otherwise use committer
285
+ commit_user = commit.get("author", {}).get("user") or commit.get(
286
+ "committer", {}
287
+ ).get("user")
288
+
289
+ if not commit_user or not commit_user.get("url"):
290
+ continue
291
+
292
+ user_url = commit_user["url"]
293
+ key = (user_url, repo_url)
294
+
295
+ if key not in user_repo_commits:
296
+ user_repo_commits[key] = []
297
+
298
+ user_repo_commits[key].append(commit)
299
+
300
+ # Transform to relationship records
301
+ relationships = []
302
+ for (user_url, repo_url), commits in user_repo_commits.items():
303
+ commit_dates = [
304
+ datetime.fromisoformat(commit["committedDate"].replace("Z", "+00:00"))
305
+ for commit in commits
306
+ ]
307
+
308
+ relationships.append(
309
+ {
310
+ "user_url": user_url,
311
+ "repo_url": repo_url,
312
+ "commit_count": len(commits),
313
+ "last_commit_date": max(commit_dates).isoformat(),
314
+ "first_commit_date": min(commit_dates).isoformat(),
315
+ }
316
+ )
317
+
318
+ logger.info(f"Created {len(relationships)} user-repository relationships")
319
+ return relationships
320
+
321
+
322
+ @timeit
323
+ def load_github_commit_relationships(
324
+ neo4j_session: neo4j.Session,
325
+ commit_relationships: list[dict[str, Any]],
326
+ organization: str,
327
+ update_tag: int,
328
+ ) -> None:
329
+ """
330
+ Load GitHub user-repository commit relationships using MatchLinks.
331
+
332
+ :param neo4j_session: Neo4j session for database interface.
333
+ :param commit_relationships: List of user-repository relationship records.
334
+ :param organization: The Github organization name for sub-resource scoping.
335
+ :param update_tag: Timestamp used to determine data freshness.
336
+ """
337
+ if not commit_relationships:
338
+ logger.info("No commit relationships to load")
339
+ return
340
+
341
+ logger.info(
342
+ f"Loading {len(commit_relationships)} user-repository commit relationships"
343
+ )
344
+
345
+ # Use organization URL as the sub-resource identifier
346
+ org_url = f"https://github.com/{organization}"
347
+
348
+ load_matchlinks(
349
+ neo4j_session,
350
+ GitHubUserCommittedToRepoRel(),
351
+ commit_relationships,
352
+ lastupdated=update_tag,
353
+ _sub_resource_label="GitHubOrganization",
354
+ _sub_resource_id=org_url,
355
+ )
356
+
357
+
358
+ @timeit
359
+ def cleanup_github_commit_relationships(
360
+ neo4j_session: neo4j.Session,
361
+ organization: str,
362
+ update_tag: int,
363
+ ) -> None:
364
+ """
365
+ Clean up stale GitHub user-repository commit relationships.
366
+
367
+ :param neo4j_session: Neo4j session for database interface.
368
+ :param organization: The Github organization name.
369
+ :param update_tag: Timestamp used to determine data freshness.
370
+ """
371
+ logger.debug("Cleaning up GitHub user-repository commit relationships")
372
+
373
+ org_url = f"https://github.com/{organization}"
374
+
375
+ GraphJob.from_matchlink(
376
+ GitHubUserCommittedToRepoRel(),
377
+ "GitHubOrganization",
378
+ org_url,
379
+ update_tag,
380
+ ).run(neo4j_session)
381
+
382
+
383
+ @timeit
384
+ def sync_github_commits(
385
+ neo4j_session: neo4j.Session,
386
+ token: str,
387
+ api_url: str,
388
+ organization: str,
389
+ repo_names: list[str],
390
+ update_tag: int,
391
+ lookback_days: int = 30,
392
+ ) -> None:
393
+ """
394
+ Sync GitHub commit relationships for the specified lookback period.
395
+ Uses batch processing to minimize memory usage and API quota consumption.
396
+
397
+ :param neo4j_session: Neo4j session for database interface.
398
+ :param token: The Github API token as string.
399
+ :param api_url: The Github v4 API endpoint as string.
400
+ :param organization: The name of the target Github organization as string.
401
+ :param repo_names: List of repository names to sync commits for.
402
+ :param update_tag: Timestamp used to determine data freshness.
403
+ :param lookback_days: Number of days to look back for commits.
404
+ """
405
+ logger.info(f"Starting GitHub commits sync for organization: {organization}")
406
+
407
+ # Process repositories in batches to save memory and API quota
408
+ # This approach processes repos in batches, transforms immediately, and loads in batches
409
+ process_repo_commits_batch(
410
+ neo4j_session,
411
+ token,
412
+ api_url,
413
+ organization,
414
+ repo_names,
415
+ update_tag,
416
+ lookback_days=lookback_days,
417
+ batch_size=10, # Process 10 repos at a time
418
+ )
419
+
420
+ # Cleanup stale relationships after all batches are processed
421
+ cleanup_github_commit_relationships(neo4j_session, organization, update_tag)
422
+
423
+ logger.info("Completed GitHub commits sync")