rucio 37.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (487) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/cli/__init__.py +14 -0
  4. rucio/cli/account.py +216 -0
  5. rucio/cli/bin_legacy/__init__.py +13 -0
  6. rucio/cli/bin_legacy/rucio.py +2825 -0
  7. rucio/cli/bin_legacy/rucio_admin.py +2500 -0
  8. rucio/cli/command.py +272 -0
  9. rucio/cli/config.py +72 -0
  10. rucio/cli/did.py +191 -0
  11. rucio/cli/download.py +128 -0
  12. rucio/cli/lifetime_exception.py +33 -0
  13. rucio/cli/replica.py +162 -0
  14. rucio/cli/rse.py +293 -0
  15. rucio/cli/rule.py +158 -0
  16. rucio/cli/scope.py +40 -0
  17. rucio/cli/subscription.py +73 -0
  18. rucio/cli/upload.py +60 -0
  19. rucio/cli/utils.py +226 -0
  20. rucio/client/__init__.py +15 -0
  21. rucio/client/accountclient.py +432 -0
  22. rucio/client/accountlimitclient.py +183 -0
  23. rucio/client/baseclient.py +983 -0
  24. rucio/client/client.py +120 -0
  25. rucio/client/configclient.py +126 -0
  26. rucio/client/credentialclient.py +59 -0
  27. rucio/client/didclient.py +868 -0
  28. rucio/client/diracclient.py +56 -0
  29. rucio/client/downloadclient.py +1783 -0
  30. rucio/client/exportclient.py +44 -0
  31. rucio/client/fileclient.py +50 -0
  32. rucio/client/importclient.py +42 -0
  33. rucio/client/lifetimeclient.py +90 -0
  34. rucio/client/lockclient.py +109 -0
  35. rucio/client/metaconventionsclient.py +140 -0
  36. rucio/client/pingclient.py +44 -0
  37. rucio/client/replicaclient.py +452 -0
  38. rucio/client/requestclient.py +125 -0
  39. rucio/client/richclient.py +317 -0
  40. rucio/client/rseclient.py +746 -0
  41. rucio/client/ruleclient.py +294 -0
  42. rucio/client/scopeclient.py +90 -0
  43. rucio/client/subscriptionclient.py +173 -0
  44. rucio/client/touchclient.py +82 -0
  45. rucio/client/uploadclient.py +969 -0
  46. rucio/common/__init__.py +13 -0
  47. rucio/common/bittorrent.py +234 -0
  48. rucio/common/cache.py +111 -0
  49. rucio/common/checksum.py +168 -0
  50. rucio/common/client.py +122 -0
  51. rucio/common/config.py +788 -0
  52. rucio/common/constants.py +217 -0
  53. rucio/common/constraints.py +17 -0
  54. rucio/common/didtype.py +237 -0
  55. rucio/common/dumper/__init__.py +342 -0
  56. rucio/common/dumper/consistency.py +497 -0
  57. rucio/common/dumper/data_models.py +362 -0
  58. rucio/common/dumper/path_parsing.py +75 -0
  59. rucio/common/exception.py +1208 -0
  60. rucio/common/extra.py +31 -0
  61. rucio/common/logging.py +420 -0
  62. rucio/common/pcache.py +1409 -0
  63. rucio/common/plugins.py +185 -0
  64. rucio/common/policy.py +93 -0
  65. rucio/common/schema/__init__.py +200 -0
  66. rucio/common/schema/generic.py +416 -0
  67. rucio/common/schema/generic_multi_vo.py +395 -0
  68. rucio/common/stomp_utils.py +423 -0
  69. rucio/common/stopwatch.py +55 -0
  70. rucio/common/test_rucio_server.py +154 -0
  71. rucio/common/types.py +483 -0
  72. rucio/common/utils.py +1688 -0
  73. rucio/core/__init__.py +13 -0
  74. rucio/core/account.py +496 -0
  75. rucio/core/account_counter.py +236 -0
  76. rucio/core/account_limit.py +425 -0
  77. rucio/core/authentication.py +620 -0
  78. rucio/core/config.py +437 -0
  79. rucio/core/credential.py +224 -0
  80. rucio/core/did.py +3004 -0
  81. rucio/core/did_meta_plugins/__init__.py +252 -0
  82. rucio/core/did_meta_plugins/did_column_meta.py +331 -0
  83. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
  84. rucio/core/did_meta_plugins/elasticsearch_meta.py +407 -0
  85. rucio/core/did_meta_plugins/filter_engine.py +672 -0
  86. rucio/core/did_meta_plugins/json_meta.py +240 -0
  87. rucio/core/did_meta_plugins/mongo_meta.py +229 -0
  88. rucio/core/did_meta_plugins/postgres_meta.py +352 -0
  89. rucio/core/dirac.py +237 -0
  90. rucio/core/distance.py +187 -0
  91. rucio/core/exporter.py +59 -0
  92. rucio/core/heartbeat.py +363 -0
  93. rucio/core/identity.py +301 -0
  94. rucio/core/importer.py +260 -0
  95. rucio/core/lifetime_exception.py +377 -0
  96. rucio/core/lock.py +577 -0
  97. rucio/core/message.py +288 -0
  98. rucio/core/meta_conventions.py +203 -0
  99. rucio/core/monitor.py +448 -0
  100. rucio/core/naming_convention.py +195 -0
  101. rucio/core/nongrid_trace.py +136 -0
  102. rucio/core/oidc.py +1463 -0
  103. rucio/core/permission/__init__.py +161 -0
  104. rucio/core/permission/generic.py +1124 -0
  105. rucio/core/permission/generic_multi_vo.py +1144 -0
  106. rucio/core/quarantined_replica.py +224 -0
  107. rucio/core/replica.py +4483 -0
  108. rucio/core/replica_sorter.py +362 -0
  109. rucio/core/request.py +3091 -0
  110. rucio/core/rse.py +2079 -0
  111. rucio/core/rse_counter.py +185 -0
  112. rucio/core/rse_expression_parser.py +459 -0
  113. rucio/core/rse_selector.py +304 -0
  114. rucio/core/rule.py +4484 -0
  115. rucio/core/rule_grouping.py +1620 -0
  116. rucio/core/scope.py +181 -0
  117. rucio/core/subscription.py +362 -0
  118. rucio/core/topology.py +490 -0
  119. rucio/core/trace.py +375 -0
  120. rucio/core/transfer.py +1531 -0
  121. rucio/core/vo.py +169 -0
  122. rucio/core/volatile_replica.py +151 -0
  123. rucio/daemons/__init__.py +13 -0
  124. rucio/daemons/abacus/__init__.py +13 -0
  125. rucio/daemons/abacus/account.py +116 -0
  126. rucio/daemons/abacus/collection_replica.py +124 -0
  127. rucio/daemons/abacus/rse.py +117 -0
  128. rucio/daemons/atropos/__init__.py +13 -0
  129. rucio/daemons/atropos/atropos.py +242 -0
  130. rucio/daemons/auditor/__init__.py +289 -0
  131. rucio/daemons/auditor/hdfs.py +97 -0
  132. rucio/daemons/auditor/srmdumps.py +355 -0
  133. rucio/daemons/automatix/__init__.py +13 -0
  134. rucio/daemons/automatix/automatix.py +304 -0
  135. rucio/daemons/badreplicas/__init__.py +13 -0
  136. rucio/daemons/badreplicas/minos.py +322 -0
  137. rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
  138. rucio/daemons/badreplicas/necromancer.py +196 -0
  139. rucio/daemons/bb8/__init__.py +13 -0
  140. rucio/daemons/bb8/bb8.py +353 -0
  141. rucio/daemons/bb8/common.py +759 -0
  142. rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
  143. rucio/daemons/bb8/t2_background_rebalance.py +153 -0
  144. rucio/daemons/cache/__init__.py +13 -0
  145. rucio/daemons/cache/consumer.py +133 -0
  146. rucio/daemons/common.py +405 -0
  147. rucio/daemons/conveyor/__init__.py +13 -0
  148. rucio/daemons/conveyor/common.py +562 -0
  149. rucio/daemons/conveyor/finisher.py +529 -0
  150. rucio/daemons/conveyor/poller.py +394 -0
  151. rucio/daemons/conveyor/preparer.py +205 -0
  152. rucio/daemons/conveyor/receiver.py +179 -0
  153. rucio/daemons/conveyor/stager.py +133 -0
  154. rucio/daemons/conveyor/submitter.py +403 -0
  155. rucio/daemons/conveyor/throttler.py +532 -0
  156. rucio/daemons/follower/__init__.py +13 -0
  157. rucio/daemons/follower/follower.py +101 -0
  158. rucio/daemons/hermes/__init__.py +13 -0
  159. rucio/daemons/hermes/hermes.py +534 -0
  160. rucio/daemons/judge/__init__.py +13 -0
  161. rucio/daemons/judge/cleaner.py +159 -0
  162. rucio/daemons/judge/evaluator.py +185 -0
  163. rucio/daemons/judge/injector.py +162 -0
  164. rucio/daemons/judge/repairer.py +154 -0
  165. rucio/daemons/oauthmanager/__init__.py +13 -0
  166. rucio/daemons/oauthmanager/oauthmanager.py +198 -0
  167. rucio/daemons/reaper/__init__.py +13 -0
  168. rucio/daemons/reaper/dark_reaper.py +282 -0
  169. rucio/daemons/reaper/reaper.py +739 -0
  170. rucio/daemons/replicarecoverer/__init__.py +13 -0
  171. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
  172. rucio/daemons/rsedecommissioner/__init__.py +13 -0
  173. rucio/daemons/rsedecommissioner/config.py +81 -0
  174. rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
  175. rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
  176. rucio/daemons/rsedecommissioner/profiles/generic.py +452 -0
  177. rucio/daemons/rsedecommissioner/profiles/types.py +93 -0
  178. rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
  179. rucio/daemons/storage/__init__.py +13 -0
  180. rucio/daemons/storage/consistency/__init__.py +13 -0
  181. rucio/daemons/storage/consistency/actions.py +848 -0
  182. rucio/daemons/tracer/__init__.py +13 -0
  183. rucio/daemons/tracer/kronos.py +511 -0
  184. rucio/daemons/transmogrifier/__init__.py +13 -0
  185. rucio/daemons/transmogrifier/transmogrifier.py +762 -0
  186. rucio/daemons/undertaker/__init__.py +13 -0
  187. rucio/daemons/undertaker/undertaker.py +137 -0
  188. rucio/db/__init__.py +13 -0
  189. rucio/db/sqla/__init__.py +52 -0
  190. rucio/db/sqla/constants.py +206 -0
  191. rucio/db/sqla/migrate_repo/__init__.py +13 -0
  192. rucio/db/sqla/migrate_repo/env.py +110 -0
  193. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
  194. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
  195. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
  196. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
  197. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
  198. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
  199. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
  200. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
  201. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
  202. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
  203. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
  204. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
  205. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
  206. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
  207. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
  208. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
  209. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
  210. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
  211. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
  212. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
  213. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
  214. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
  215. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
  216. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
  217. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
  218. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
  219. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
  220. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
  221. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
  222. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
  223. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
  224. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
  225. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
  226. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
  227. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
  228. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
  229. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
  230. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
  231. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
  232. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
  233. rucio/db/sqla/migrate_repo/versions/30d5206e9cad_increase_oauthrequest_redirect_msg_.py +37 -0
  234. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
  235. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
  236. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
  237. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
  238. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
  239. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
  240. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
  241. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
  242. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
  243. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
  244. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
  245. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
  246. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
  247. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
  248. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
  249. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
  250. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
  251. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
  252. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
  253. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
  254. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
  255. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
  256. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
  257. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
  258. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
  259. rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
  260. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
  261. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
  262. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
  263. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
  264. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
  265. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
  266. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
  267. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
  268. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
  269. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
  270. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
  271. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
  272. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
  273. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
  274. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
  275. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
  276. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
  277. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
  278. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
  279. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
  280. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
  281. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
  282. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
  283. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
  284. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
  285. rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
  286. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
  287. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
  288. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
  289. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
  290. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
  291. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
  292. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
  293. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
  294. rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
  295. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
  296. rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
  297. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
  298. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
  299. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
  300. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
  301. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
  302. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
  303. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
  304. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
  305. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
  306. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
  307. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
  308. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
  309. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
  310. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
  311. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
  312. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
  313. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
  314. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
  315. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
  316. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
  317. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
  318. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
  319. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
  320. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
  321. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
  322. rucio/db/sqla/models.py +1743 -0
  323. rucio/db/sqla/sautils.py +55 -0
  324. rucio/db/sqla/session.py +529 -0
  325. rucio/db/sqla/types.py +206 -0
  326. rucio/db/sqla/util.py +543 -0
  327. rucio/gateway/__init__.py +13 -0
  328. rucio/gateway/account.py +345 -0
  329. rucio/gateway/account_limit.py +363 -0
  330. rucio/gateway/authentication.py +381 -0
  331. rucio/gateway/config.py +227 -0
  332. rucio/gateway/credential.py +70 -0
  333. rucio/gateway/did.py +987 -0
  334. rucio/gateway/dirac.py +83 -0
  335. rucio/gateway/exporter.py +60 -0
  336. rucio/gateway/heartbeat.py +76 -0
  337. rucio/gateway/identity.py +189 -0
  338. rucio/gateway/importer.py +46 -0
  339. rucio/gateway/lifetime_exception.py +121 -0
  340. rucio/gateway/lock.py +153 -0
  341. rucio/gateway/meta_conventions.py +98 -0
  342. rucio/gateway/permission.py +74 -0
  343. rucio/gateway/quarantined_replica.py +79 -0
  344. rucio/gateway/replica.py +538 -0
  345. rucio/gateway/request.py +330 -0
  346. rucio/gateway/rse.py +632 -0
  347. rucio/gateway/rule.py +437 -0
  348. rucio/gateway/scope.py +100 -0
  349. rucio/gateway/subscription.py +280 -0
  350. rucio/gateway/vo.py +126 -0
  351. rucio/rse/__init__.py +96 -0
  352. rucio/rse/protocols/__init__.py +13 -0
  353. rucio/rse/protocols/bittorrent.py +194 -0
  354. rucio/rse/protocols/cache.py +111 -0
  355. rucio/rse/protocols/dummy.py +100 -0
  356. rucio/rse/protocols/gfal.py +708 -0
  357. rucio/rse/protocols/globus.py +243 -0
  358. rucio/rse/protocols/http_cache.py +82 -0
  359. rucio/rse/protocols/mock.py +123 -0
  360. rucio/rse/protocols/ngarc.py +209 -0
  361. rucio/rse/protocols/posix.py +250 -0
  362. rucio/rse/protocols/protocol.py +361 -0
  363. rucio/rse/protocols/rclone.py +365 -0
  364. rucio/rse/protocols/rfio.py +145 -0
  365. rucio/rse/protocols/srm.py +338 -0
  366. rucio/rse/protocols/ssh.py +414 -0
  367. rucio/rse/protocols/storm.py +195 -0
  368. rucio/rse/protocols/webdav.py +594 -0
  369. rucio/rse/protocols/xrootd.py +302 -0
  370. rucio/rse/rsemanager.py +881 -0
  371. rucio/rse/translation.py +260 -0
  372. rucio/tests/__init__.py +13 -0
  373. rucio/tests/common.py +280 -0
  374. rucio/tests/common_server.py +149 -0
  375. rucio/transfertool/__init__.py +13 -0
  376. rucio/transfertool/bittorrent.py +200 -0
  377. rucio/transfertool/bittorrent_driver.py +50 -0
  378. rucio/transfertool/bittorrent_driver_qbittorrent.py +134 -0
  379. rucio/transfertool/fts3.py +1600 -0
  380. rucio/transfertool/fts3_plugins.py +152 -0
  381. rucio/transfertool/globus.py +201 -0
  382. rucio/transfertool/globus_library.py +181 -0
  383. rucio/transfertool/mock.py +89 -0
  384. rucio/transfertool/transfertool.py +221 -0
  385. rucio/vcsversion.py +11 -0
  386. rucio/version.py +45 -0
  387. rucio/web/__init__.py +13 -0
  388. rucio/web/rest/__init__.py +13 -0
  389. rucio/web/rest/flaskapi/__init__.py +13 -0
  390. rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
  391. rucio/web/rest/flaskapi/v1/__init__.py +13 -0
  392. rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
  393. rucio/web/rest/flaskapi/v1/accounts.py +1103 -0
  394. rucio/web/rest/flaskapi/v1/archives.py +102 -0
  395. rucio/web/rest/flaskapi/v1/auth.py +1644 -0
  396. rucio/web/rest/flaskapi/v1/common.py +426 -0
  397. rucio/web/rest/flaskapi/v1/config.py +304 -0
  398. rucio/web/rest/flaskapi/v1/credentials.py +213 -0
  399. rucio/web/rest/flaskapi/v1/dids.py +2340 -0
  400. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  401. rucio/web/rest/flaskapi/v1/export.py +75 -0
  402. rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
  403. rucio/web/rest/flaskapi/v1/identities.py +285 -0
  404. rucio/web/rest/flaskapi/v1/import.py +132 -0
  405. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
  406. rucio/web/rest/flaskapi/v1/locks.py +358 -0
  407. rucio/web/rest/flaskapi/v1/main.py +91 -0
  408. rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
  409. rucio/web/rest/flaskapi/v1/metrics.py +36 -0
  410. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  411. rucio/web/rest/flaskapi/v1/ping.py +88 -0
  412. rucio/web/rest/flaskapi/v1/redirect.py +366 -0
  413. rucio/web/rest/flaskapi/v1/replicas.py +1894 -0
  414. rucio/web/rest/flaskapi/v1/requests.py +998 -0
  415. rucio/web/rest/flaskapi/v1/rses.py +2250 -0
  416. rucio/web/rest/flaskapi/v1/rules.py +854 -0
  417. rucio/web/rest/flaskapi/v1/scopes.py +159 -0
  418. rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
  419. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  420. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  421. rucio/web/rest/flaskapi/v1/traces.py +137 -0
  422. rucio/web/rest/flaskapi/v1/types.py +20 -0
  423. rucio/web/rest/flaskapi/v1/vos.py +278 -0
  424. rucio/web/rest/main.py +18 -0
  425. rucio/web/rest/metrics.py +27 -0
  426. rucio/web/rest/ping.py +27 -0
  427. rucio-37.0.0rc1.data/data/rucio/etc/alembic.ini.template +71 -0
  428. rucio-37.0.0rc1.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  429. rucio-37.0.0rc1.data/data/rucio/etc/globus-config.yml.template +5 -0
  430. rucio-37.0.0rc1.data/data/rucio/etc/ldap.cfg.template +30 -0
  431. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  432. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  433. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  434. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  435. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  436. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  437. rucio-37.0.0rc1.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  438. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.atlas.client.template +43 -0
  439. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.template +241 -0
  440. rucio-37.0.0rc1.data/data/rucio/etc/rucio_multi_vo.cfg.template +217 -0
  441. rucio-37.0.0rc1.data/data/rucio/requirements.server.txt +297 -0
  442. rucio-37.0.0rc1.data/data/rucio/tools/bootstrap.py +34 -0
  443. rucio-37.0.0rc1.data/data/rucio/tools/merge_rucio_configs.py +144 -0
  444. rucio-37.0.0rc1.data/data/rucio/tools/reset_database.py +40 -0
  445. rucio-37.0.0rc1.data/scripts/rucio +133 -0
  446. rucio-37.0.0rc1.data/scripts/rucio-abacus-account +74 -0
  447. rucio-37.0.0rc1.data/scripts/rucio-abacus-collection-replica +46 -0
  448. rucio-37.0.0rc1.data/scripts/rucio-abacus-rse +78 -0
  449. rucio-37.0.0rc1.data/scripts/rucio-admin +97 -0
  450. rucio-37.0.0rc1.data/scripts/rucio-atropos +60 -0
  451. rucio-37.0.0rc1.data/scripts/rucio-auditor +206 -0
  452. rucio-37.0.0rc1.data/scripts/rucio-automatix +50 -0
  453. rucio-37.0.0rc1.data/scripts/rucio-bb8 +57 -0
  454. rucio-37.0.0rc1.data/scripts/rucio-cache-client +141 -0
  455. rucio-37.0.0rc1.data/scripts/rucio-cache-consumer +42 -0
  456. rucio-37.0.0rc1.data/scripts/rucio-conveyor-finisher +58 -0
  457. rucio-37.0.0rc1.data/scripts/rucio-conveyor-poller +66 -0
  458. rucio-37.0.0rc1.data/scripts/rucio-conveyor-preparer +37 -0
  459. rucio-37.0.0rc1.data/scripts/rucio-conveyor-receiver +44 -0
  460. rucio-37.0.0rc1.data/scripts/rucio-conveyor-stager +76 -0
  461. rucio-37.0.0rc1.data/scripts/rucio-conveyor-submitter +139 -0
  462. rucio-37.0.0rc1.data/scripts/rucio-conveyor-throttler +104 -0
  463. rucio-37.0.0rc1.data/scripts/rucio-dark-reaper +53 -0
  464. rucio-37.0.0rc1.data/scripts/rucio-dumper +160 -0
  465. rucio-37.0.0rc1.data/scripts/rucio-follower +44 -0
  466. rucio-37.0.0rc1.data/scripts/rucio-hermes +54 -0
  467. rucio-37.0.0rc1.data/scripts/rucio-judge-cleaner +89 -0
  468. rucio-37.0.0rc1.data/scripts/rucio-judge-evaluator +137 -0
  469. rucio-37.0.0rc1.data/scripts/rucio-judge-injector +44 -0
  470. rucio-37.0.0rc1.data/scripts/rucio-judge-repairer +44 -0
  471. rucio-37.0.0rc1.data/scripts/rucio-kronos +44 -0
  472. rucio-37.0.0rc1.data/scripts/rucio-minos +53 -0
  473. rucio-37.0.0rc1.data/scripts/rucio-minos-temporary-expiration +50 -0
  474. rucio-37.0.0rc1.data/scripts/rucio-necromancer +120 -0
  475. rucio-37.0.0rc1.data/scripts/rucio-oauth-manager +63 -0
  476. rucio-37.0.0rc1.data/scripts/rucio-reaper +83 -0
  477. rucio-37.0.0rc1.data/scripts/rucio-replica-recoverer +248 -0
  478. rucio-37.0.0rc1.data/scripts/rucio-rse-decommissioner +66 -0
  479. rucio-37.0.0rc1.data/scripts/rucio-storage-consistency-actions +74 -0
  480. rucio-37.0.0rc1.data/scripts/rucio-transmogrifier +77 -0
  481. rucio-37.0.0rc1.data/scripts/rucio-undertaker +76 -0
  482. rucio-37.0.0rc1.dist-info/METADATA +92 -0
  483. rucio-37.0.0rc1.dist-info/RECORD +487 -0
  484. rucio-37.0.0rc1.dist-info/WHEEL +5 -0
  485. rucio-37.0.0rc1.dist-info/licenses/AUTHORS.rst +100 -0
  486. rucio-37.0.0rc1.dist-info/licenses/LICENSE +201 -0
  487. rucio-37.0.0rc1.dist-info/top_level.txt +1 -0
rucio/core/request.py ADDED
@@ -0,0 +1,3091 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import datetime
16
+ import itertools
17
+ import json
18
+ import logging
19
+ import math
20
+ import random
21
+ import threading
22
+ import traceback
23
+ from abc import ABCMeta, abstractmethod
24
+ from collections import defaultdict, namedtuple
25
+ from dataclasses import dataclass
26
+ from typing import TYPE_CHECKING, Any, Optional, Union
27
+
28
+ from sqlalchemy import and_, delete, exists, insert, or_, select, update
29
+ from sqlalchemy.exc import IntegrityError
30
+ from sqlalchemy.orm import aliased
31
+ from sqlalchemy.sql.expression import asc, false, func, null, true
32
+
33
+ from rucio.common.config import config_get_bool, config_get_int
34
+ from rucio.common.constants import RseAttr
35
+ from rucio.common.exception import InvalidRSEExpression, RequestNotFound, RucioException, UnsupportedOperation
36
+ from rucio.common.types import FilterDict, InternalAccount, InternalScope, LoggerFunction, RequestDict
37
+ from rucio.common.utils import chunks, generate_uuid
38
+ from rucio.core.distance import get_distances
39
+ from rucio.core.message import add_message, add_messages
40
+ from rucio.core.monitor import MetricManager
41
+ from rucio.core.rse import RseCollection, RseData, get_rse_attribute, get_rse_name, get_rse_vo
42
+ from rucio.core.rse_expression_parser import parse_expression
43
+ from rucio.db.sqla import filter_thread_work, models
44
+ from rucio.db.sqla.constants import LockState, ReplicaState, RequestErrMsg, RequestState, RequestType, TransferLimitDirection
45
+ from rucio.db.sqla.session import read_session, stream_session, transactional_session
46
+ from rucio.db.sqla.util import temp_table_mngr
47
+
48
+ RequestAndState = namedtuple('RequestAndState', ['request_id', 'request_state'])
49
+
50
+ if TYPE_CHECKING:
51
+ import uuid
52
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
53
+
54
+ from sqlalchemy.engine import Row
55
+ from sqlalchemy.orm import Session
56
+ from sqlalchemy.sql.selectable import Subquery
57
+
58
+ from rucio.rse.protocols.protocol import RSEProtocol
59
+
60
+ """
61
+ The core request.py is specifically for handling requests.
62
+ Requests accessed by external_id (So called transfers), are covered in the core transfer.py
63
+ """
64
+
65
+ METRICS = MetricManager(module=__name__)
66
+
67
+ TRANSFER_TIME_BUCKETS = (
68
+ 10, 30, 60, 5 * 60, 10 * 60, 20 * 60, 40 * 60, 60 * 60, 1.5 * 60 * 60, 3 * 60 * 60, 6 * 60 * 60,
69
+ 12 * 60 * 60, 24 * 60 * 60, 3 * 24 * 60 * 60, 4 * 24 * 60 * 60, 5 * 24 * 60 * 60,
70
+ 6 * 24 * 60 * 60, 7 * 24 * 60 * 60, 10 * 24 * 60 * 60, 14 * 24 * 60 * 60, 30 * 24 * 60 * 60,
71
+ float('inf')
72
+ )
73
+
74
+
75
+ class RequestSource:
76
+ def __init__(
77
+ self,
78
+ rse: RseData,
79
+ ranking: Optional[int] = None,
80
+ distance: Optional[int] = None,
81
+ file_path: Optional[str] = None,
82
+ scheme: Optional[str] = None,
83
+ url: Optional[str] = None
84
+ ):
85
+ self.rse = rse
86
+ self.distance = distance if distance is not None else 9999
87
+ self.ranking = ranking if ranking is not None else 0
88
+ self.file_path = file_path
89
+ self.scheme = scheme
90
+ self.url = url
91
+
92
+ def __str__(self) -> str:
93
+ return "src_rse={}".format(self.rse)
94
+
95
+
96
+ class TransferDestination:
97
+ def __init__(
98
+ self,
99
+ rse: RseData,
100
+ scheme: str
101
+ ):
102
+ self.rse = rse
103
+ self.scheme = scheme
104
+
105
+ def __str__(self) -> str:
106
+ return "dst_rse={}".format(self.rse)
107
+
108
+
109
+ class RequestWithSources:
110
+ def __init__(
111
+ self,
112
+ id_: Optional[str],
113
+ request_type: RequestType,
114
+ rule_id: Optional[str],
115
+ scope: InternalScope,
116
+ name: str,
117
+ md5: str,
118
+ adler32: str,
119
+ byte_count: int,
120
+ activity: str,
121
+ attributes: Optional[Union[str, dict[str, Any]]],
122
+ previous_attempt_id: Optional[str],
123
+ dest_rse: RseData,
124
+ account: InternalAccount,
125
+ retry_count: int,
126
+ priority: int,
127
+ transfertool: str,
128
+ requested_at: Optional[datetime.datetime] = None,
129
+ ):
130
+ self.request_id = id_
131
+ self.request_type = request_type
132
+ self.rule_id = rule_id
133
+ self.scope = scope
134
+ self.name = name
135
+ self.md5 = md5
136
+ self.adler32 = adler32
137
+ self.byte_count = byte_count
138
+ self.activity = activity
139
+ self._dict_attributes = None
140
+ self._db_attributes = attributes
141
+ self.previous_attempt_id = previous_attempt_id
142
+ self.dest_rse = dest_rse
143
+ self.account = account
144
+ self.retry_count = retry_count or 0
145
+ self.priority = priority if priority is not None else 3
146
+ self.transfertool = transfertool
147
+ self.requested_at = requested_at if requested_at else datetime.datetime.utcnow()
148
+
149
+ self.sources: list[RequestSource] = []
150
+ self.requested_source: Optional[RequestSource] = None
151
+
152
+ def __str__(self) -> str:
153
+ return "{}({}:{})".format(self.request_id, self.scope, self.name)
154
+
155
+ @property
156
+ def attributes(self) -> dict[str, Any]:
157
+ if self._dict_attributes is None:
158
+ self._dict_attributes = self._parse_db_attributes(self._db_attributes)
159
+ return self._dict_attributes
160
+
161
+ @attributes.setter
162
+ def attributes(self, db_attributes: dict[str, Any]) -> None:
163
+ self._dict_attributes = self._parse_db_attributes(db_attributes)
164
+
165
+ @staticmethod
166
+ def _parse_db_attributes(db_attributes: Optional[Union[str, dict[str, Any]]]) -> dict[str, Any]:
167
+ attr = {}
168
+ if db_attributes:
169
+ if isinstance(db_attributes, dict):
170
+ attr = json.loads(json.dumps(db_attributes))
171
+ else:
172
+ attr = json.loads(str(db_attributes))
173
+ # parse source expression
174
+ attr['source_replica_expression'] = attr["source_replica_expression"] if (attr and "source_replica_expression" in attr) else None
175
+ attr['allow_tape_source'] = attr["allow_tape_source"] if (attr and "allow_tape_source" in attr) else True
176
+ attr['dsn'] = attr["ds_name"] if (attr and "ds_name" in attr) else None
177
+ attr['lifetime'] = attr.get('lifetime', -1)
178
+ return attr
179
+
180
+
181
+ class DirectTransfer(metaclass=ABCMeta):
182
+ """
183
+ The configuration for a direct (non-multi-hop) transfer. It can be a multi-source transfer.
184
+ """
185
+
186
+ def __init__(self, sources: list[RequestSource], rws: RequestWithSources) -> None:
187
+ self.sources: list[RequestSource] = sources
188
+ self.rws: RequestWithSources = rws
189
+
190
+ @property
191
+ @abstractmethod
192
+ def src(self) -> RequestSource:
193
+ pass
194
+
195
+ @property
196
+ @abstractmethod
197
+ def dst(self) -> TransferDestination:
198
+ pass
199
+
200
+ @property
201
+ @abstractmethod
202
+ def dest_url(self) -> str:
203
+ pass
204
+
205
+ @abstractmethod
206
+ def source_url(self, source: RequestSource) -> str:
207
+ pass
208
+
209
+ @abstractmethod
210
+ def dest_protocol(self) -> "RSEProtocol":
211
+ pass
212
+
213
+ @abstractmethod
214
+ def source_protocol(self, source: RequestSource) -> "RSEProtocol":
215
+ pass
216
+
217
+
218
+ def should_retry_request(
219
+ req: RequestDict,
220
+ retry_protocol_mismatches: bool
221
+ ) -> bool:
222
+ """
223
+ Whether should retry this request.
224
+
225
+ :param request: Request as a dictionary.
226
+ :param retry_protocol_mismatches: Boolean to retry the transfer in case of protocol mismatch.
227
+ :returns: True if should retry it; False if no more retry.
228
+ """
229
+ if is_intermediate_hop(req):
230
+ # This is an intermediate request in a multi-hop transfer. It must not be re-scheduled on its own.
231
+ # If needed, it will be re-scheduled via the creation of a new multi-hop transfer.
232
+ return False
233
+ if req['state'] == RequestState.SUBMITTING:
234
+ return True
235
+ if req['state'] == RequestState.NO_SOURCES or req['state'] == RequestState.ONLY_TAPE_SOURCES:
236
+ return False
237
+ # hardcoded for now - only requeue a couple of times
238
+ if req['retry_count'] is None or req['retry_count'] < 3:
239
+ if req['state'] == RequestState.MISMATCH_SCHEME:
240
+ return retry_protocol_mismatches
241
+ return True
242
+ return False
243
+
244
+
245
+ @METRICS.time_it
246
+ @transactional_session
247
+ def requeue_and_archive(
248
+ request: RequestDict,
249
+ source_ranking_update: bool = True,
250
+ retry_protocol_mismatches: bool = False,
251
+ *,
252
+ session: "Session",
253
+ logger: LoggerFunction = logging.log
254
+ ) -> Optional[RequestDict]:
255
+ """
256
+ Requeue and archive a failed request.
257
+ TODO: Multiple requeue.
258
+
259
+ :param request: Original request.
260
+ :param source_ranking_update Boolean. If True, the source ranking is decreased (making the sources less likely to be used)
261
+ :param session: Database session to use.
262
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
263
+ """
264
+
265
+ # Probably not needed anymore
266
+ request_id = request['request_id']
267
+ new_req = get_request(request_id, session=session)
268
+
269
+ if new_req:
270
+ new_req['sources'] = get_sources(request_id, session=session)
271
+ archive_request(request_id, session=session)
272
+
273
+ if should_retry_request(new_req, retry_protocol_mismatches):
274
+ new_req['request_id'] = generate_uuid()
275
+ new_req['previous_attempt_id'] = request_id
276
+ if new_req['retry_count'] is None:
277
+ new_req['retry_count'] = 1
278
+ elif new_req['state'] != RequestState.SUBMITTING:
279
+ new_req['retry_count'] += 1
280
+
281
+ if source_ranking_update and new_req['sources']:
282
+ for i in range(len(new_req['sources'])):
283
+ if new_req['sources'][i]['is_using']:
284
+ if new_req['sources'][i]['ranking'] is None:
285
+ new_req['sources'][i]['ranking'] = -1
286
+ else:
287
+ new_req['sources'][i]['ranking'] -= 1
288
+ new_req['sources'][i]['is_using'] = False
289
+ new_req.pop('state', None)
290
+ queue_requests([new_req], session=session, logger=logger)
291
+ return new_req
292
+ else:
293
+ raise RequestNotFound
294
+ return None
295
+
296
+
297
+ @METRICS.count_it
298
+ @transactional_session
299
+ def queue_requests(
300
+ requests: 'Iterable[RequestDict]',
301
+ *,
302
+ session: "Session",
303
+ logger: LoggerFunction = logging.log
304
+ ) -> list[str]:
305
+ """
306
+ Submit transfer requests on destination RSEs for data identifiers.
307
+
308
+ :param requests: List of dictionaries containing request metadata.
309
+ :param session: Database session to use.
310
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
311
+ :returns: List of Request-IDs as 32 character hex strings.
312
+ """
313
+ logger(logging.DEBUG, "queue requests")
314
+
315
+ request_clause = []
316
+ rses = {}
317
+ preparer_enabled = config_get_bool('conveyor', 'use_preparer', raise_exception=False, default=False)
318
+ for req in requests:
319
+
320
+ if isinstance(req['attributes'], str):
321
+ req['attributes'] = json.loads(req['attributes'] or '{}')
322
+ if isinstance(req['attributes'], str):
323
+ req['attributes'] = json.loads(req['attributes'] or '{}')
324
+
325
+ if req['request_type'] == RequestType.TRANSFER:
326
+ request_clause.append(and_(models.Request.scope == req['scope'],
327
+ models.Request.name == req['name'],
328
+ models.Request.dest_rse_id == req['dest_rse_id'],
329
+ models.Request.request_type == RequestType.TRANSFER))
330
+
331
+ if req['dest_rse_id'] not in rses:
332
+ rses[req['dest_rse_id']] = get_rse_name(req['dest_rse_id'], session=session)
333
+
334
+ # Check existing requests
335
+ existing_requests = []
336
+ if request_clause:
337
+ for requests_condition in chunks(request_clause, 1000):
338
+ stmt = select(
339
+ models.Request.scope,
340
+ models.Request.name,
341
+ models.Request.dest_rse_id
342
+ ).with_hint(
343
+ models.Request,
344
+ 'INDEX(REQUESTS REQUESTS_SC_NA_RS_TY_UQ_IDX)',
345
+ 'oracle'
346
+ ).where(
347
+ or_(*requests_condition)
348
+ )
349
+ existing_requests.extend(session.execute(stmt))
350
+
351
+ new_requests, sources, messages = [], [], []
352
+ for request in requests:
353
+ dest_rse_name = get_rse_name(rse_id=request['dest_rse_id'], session=session)
354
+ if request['request_type'] == RequestType.TRANSFER and (request['scope'], request['name'], request['dest_rse_id']) in existing_requests:
355
+ logger(logging.WARNING, 'Request TYPE %s for DID %s:%s at RSE %s exists - ignoring' % (request['request_type'],
356
+ request['scope'],
357
+ request['name'],
358
+ dest_rse_name))
359
+ continue
360
+
361
+ def temp_serializer(obj):
362
+ if isinstance(obj, (InternalAccount, InternalScope)):
363
+ return obj.internal
364
+ raise TypeError('Could not serialise object %r' % obj)
365
+
366
+ if 'state' not in request:
367
+ request['state'] = RequestState.PREPARING if preparer_enabled else RequestState.QUEUED
368
+
369
+ new_request = {'request_type': request['request_type'],
370
+ 'scope': request['scope'],
371
+ 'name': request['name'],
372
+ 'dest_rse_id': request['dest_rse_id'],
373
+ 'source_rse_id': request.get('source_rse_id', None),
374
+ 'attributes': json.dumps(request['attributes'], default=temp_serializer),
375
+ 'state': request['state'],
376
+ 'rule_id': request['rule_id'],
377
+ 'activity': request['attributes']['activity'],
378
+ 'bytes': request['attributes']['bytes'],
379
+ 'md5': request['attributes']['md5'],
380
+ 'adler32': request['attributes']['adler32'],
381
+ 'account': request.get('account', None),
382
+ 'priority': request['attributes'].get('priority', None),
383
+ 'requested_at': request.get('requested_at', None),
384
+ 'retry_count': request['retry_count']}
385
+ if 'transfertool' in request:
386
+ new_request['transfertool'] = request['transfertool']
387
+ if 'previous_attempt_id' in request and 'retry_count' in request:
388
+ new_request['previous_attempt_id'] = request['previous_attempt_id']
389
+ new_request['id'] = request['request_id']
390
+ else:
391
+ new_request['id'] = generate_uuid()
392
+ new_requests.append(new_request)
393
+
394
+ if 'sources' in request and request['sources']:
395
+ for source in request['sources']:
396
+ sources.append({'request_id': new_request['id'],
397
+ 'scope': request['scope'],
398
+ 'name': request['name'],
399
+ 'rse_id': source['rse_id'],
400
+ 'dest_rse_id': request['dest_rse_id'],
401
+ 'ranking': source['ranking'],
402
+ 'bytes': source['bytes'],
403
+ 'url': source['url'],
404
+ 'is_using': source['is_using']})
405
+
406
+ if request['request_type']:
407
+ transfer_status = '%s-%s' % (request['request_type'].name, request['state'].name)
408
+ else:
409
+ transfer_status = 'transfer-%s' % request['state'].name
410
+ transfer_status = transfer_status.lower()
411
+
412
+ payload = {'request-id': new_request['id'],
413
+ 'request-type': request['request_type'].name.lower(),
414
+ 'scope': request['scope'].external,
415
+ 'name': request['name'],
416
+ 'dst-rse-id': request['dest_rse_id'],
417
+ 'dst-rse': dest_rse_name,
418
+ 'state': request['state'].name.lower(),
419
+ 'retry-count': request['retry_count'],
420
+ 'rule-id': str(request['rule_id']),
421
+ 'activity': request['attributes']['activity'],
422
+ 'file-size': request['attributes']['bytes'],
423
+ 'bytes': request['attributes']['bytes'],
424
+ 'checksum-md5': request['attributes']['md5'],
425
+ 'checksum-adler': request['attributes']['adler32'],
426
+ 'queued_at': str(datetime.datetime.utcnow())}
427
+
428
+ messages.append({'event_type': transfer_status,
429
+ 'payload': payload})
430
+
431
+ for requests_chunk in chunks(new_requests, 1000):
432
+ stmt = insert(
433
+ models.Request
434
+ )
435
+ session.execute(stmt, requests_chunk)
436
+
437
+ for sources_chunk in chunks(sources, 1000):
438
+ stmt = insert(
439
+ models.Source
440
+ )
441
+ session.execute(stmt, sources_chunk)
442
+
443
+ add_messages(messages, session=session)
444
+
445
+ return new_requests
446
+
447
+
448
+ @transactional_session
449
+ def list_and_mark_transfer_requests_and_source_replicas(
450
+ rse_collection: "RseCollection",
451
+ processed_by: Optional[str] = None,
452
+ processed_at_delay: int = 600,
453
+ total_workers: int = 0,
454
+ worker_number: int = 0,
455
+ partition_hash_var: Optional[str] = None,
456
+ limit: Optional[int] = None,
457
+ activity: Optional[str] = None,
458
+ older_than: Optional[datetime.datetime] = None,
459
+ rses: Optional['Sequence[str]'] = None,
460
+ request_type: Optional[list[RequestType]] = None,
461
+ request_state: Optional[RequestState] = None,
462
+ required_source_rse_attrs: Optional[list[str]] = None,
463
+ ignore_availability: bool = False,
464
+ transfertool: Optional[str] = None,
465
+ *,
466
+ session: "Session",
467
+ ) -> dict[str, RequestWithSources]:
468
+ """
469
+ List requests with source replicas
470
+ :param rse_collection: the RSE collection being used
471
+ :param processed_by: the daemon/executable running this query
472
+ :param processed_at_delay: how many second to ignore a request if it's already being processed by the same daemon
473
+ :param total_workers: Number of total workers.
474
+ :param worker_number: Id of the executing worker.
475
+ :param partition_hash_var: The hash variable used for partitioning thread work
476
+ :param limit: Integer of requests to retrieve.
477
+ :param activity: Activity to be selected.
478
+ :param older_than: Only select requests older than this DateTime.
479
+ :param rses: List of rse_id to select requests.
480
+ :param request_type: Filter on the given request type.
481
+ :param request_state: Filter on the given request state
482
+ :param transfertool: The transfer tool as specified in rucio.cfg.
483
+ :param required_source_rse_attrs: Only select source RSEs having these attributes set
484
+ :param ignore_availability: Ignore blocklisted RSEs
485
+ :param session: Database session to use.
486
+ :returns: List of RequestWithSources objects.
487
+ """
488
+
489
+ if partition_hash_var is None:
490
+ partition_hash_var = 'requests.id'
491
+ if request_state is None:
492
+ request_state = RequestState.QUEUED
493
+ if request_type is None:
494
+ request_type = [RequestType.TRANSFER]
495
+
496
+ sub_requests = select(
497
+ models.Request.id,
498
+ models.Request.request_type,
499
+ models.Request.rule_id,
500
+ models.Request.scope,
501
+ models.Request.name,
502
+ models.Request.md5,
503
+ models.Request.adler32,
504
+ models.Request.bytes,
505
+ models.Request.activity,
506
+ models.Request.attributes,
507
+ models.Request.previous_attempt_id,
508
+ models.Request.source_rse_id,
509
+ models.Request.dest_rse_id,
510
+ models.Request.retry_count,
511
+ models.Request.account,
512
+ models.Request.created_at,
513
+ models.Request.requested_at,
514
+ models.Request.priority,
515
+ models.Request.transfertool
516
+ ).with_hint(
517
+ models.Request,
518
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
519
+ 'oracle'
520
+ ).where(
521
+ and_(models.Request.state == request_state,
522
+ models.Request.request_type.in_(request_type))
523
+ ).outerjoin(
524
+ models.ReplicationRule,
525
+ models.Request.rule_id == models.ReplicationRule.id
526
+ ).where(
527
+ or_(models.ReplicationRule.child_rule_id != null(),
528
+ and_(models.ReplicationRule.child_rule_id == null(),
529
+ models.ReplicationRule.expires_at == null()),
530
+ and_(models.ReplicationRule.child_rule_id == null(),
531
+ models.ReplicationRule.expires_at > datetime.datetime.utcnow()))
532
+ ).join(
533
+ models.RSE,
534
+ models.RSE.id == models.Request.dest_rse_id
535
+ ).where(
536
+ models.RSE.deleted == false()
537
+ ).outerjoin(
538
+ models.TransferHop,
539
+ models.TransferHop.next_hop_request_id == models.Request.id
540
+ ).where(
541
+ models.TransferHop.next_hop_request_id == null()
542
+ ).order_by(
543
+ models.Request.created_at
544
+ )
545
+
546
+ if processed_by:
547
+ sub_requests = sub_requests.where(
548
+ or_(models.Request.last_processed_by.is_(null()),
549
+ models.Request.last_processed_by != processed_by,
550
+ models.Request.last_processed_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=processed_at_delay))
551
+ )
552
+
553
+ if not ignore_availability:
554
+ sub_requests = sub_requests.where(models.RSE.availability_write == true())
555
+
556
+ if isinstance(older_than, datetime.datetime):
557
+ sub_requests = sub_requests.where(models.Request.requested_at < older_than)
558
+
559
+ if activity:
560
+ sub_requests = sub_requests.where(models.Request.activity == activity)
561
+
562
+ # if a transfertool is specified make sure to filter for those requests and apply related index
563
+ if transfertool:
564
+ sub_requests = sub_requests.where(models.Request.transfertool == transfertool)
565
+ sub_requests = sub_requests.with_hint(models.Request, 'INDEX(REQUESTS REQUESTS_TYP_STA_TRA_ACT_IDX)', 'oracle')
566
+ else:
567
+ sub_requests = sub_requests.with_hint(models.Request, 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)', 'oracle')
568
+
569
+ if rses:
570
+ temp_table_cls = temp_table_mngr(session).create_id_table()
571
+
572
+ values = [{'id': rse_id} for rse_id in rses]
573
+ stmt = insert(
574
+ temp_table_cls
575
+ )
576
+ session.execute(stmt, values)
577
+
578
+ sub_requests = sub_requests.join(temp_table_cls, temp_table_cls.id == models.RSE.id)
579
+
580
+ sub_requests = filter_thread_work(session=session, query=sub_requests, total_threads=total_workers, thread_id=worker_number, hash_variable=partition_hash_var)
581
+
582
+ if limit:
583
+ sub_requests = sub_requests.limit(limit)
584
+
585
+ sub_requests = sub_requests.subquery()
586
+
587
+ stmt = select(
588
+ sub_requests.c.id,
589
+ sub_requests.c.request_type,
590
+ sub_requests.c.rule_id,
591
+ sub_requests.c.scope,
592
+ sub_requests.c.name,
593
+ sub_requests.c.md5,
594
+ sub_requests.c.adler32,
595
+ sub_requests.c.bytes,
596
+ sub_requests.c.activity,
597
+ sub_requests.c.attributes,
598
+ sub_requests.c.previous_attempt_id,
599
+ sub_requests.c.source_rse_id,
600
+ sub_requests.c.dest_rse_id,
601
+ sub_requests.c.account,
602
+ sub_requests.c.retry_count,
603
+ sub_requests.c.priority,
604
+ sub_requests.c.transfertool,
605
+ sub_requests.c.requested_at,
606
+ models.RSE.id.label("replica_rse_id"),
607
+ models.RSE.rse.label("replica_rse_name"),
608
+ models.RSEFileAssociation.path,
609
+ models.Source.ranking.label("source_ranking"),
610
+ models.Source.url.label("source_url"),
611
+ models.Distance.distance
612
+ ).order_by(
613
+ sub_requests.c.created_at
614
+ ).outerjoin(
615
+ models.RSEFileAssociation,
616
+ and_(sub_requests.c.scope == models.RSEFileAssociation.scope,
617
+ sub_requests.c.name == models.RSEFileAssociation.name,
618
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
619
+ sub_requests.c.dest_rse_id != models.RSEFileAssociation.rse_id)
620
+ ).with_hint(
621
+ models.RSEFileAssociation,
622
+ 'INDEX(REPLICAS REPLICAS_PK)',
623
+ 'oracle'
624
+ ).outerjoin(
625
+ models.RSE,
626
+ and_(models.RSE.id == models.RSEFileAssociation.rse_id,
627
+ models.RSE.deleted == false())
628
+ ).outerjoin(
629
+ models.Source,
630
+ and_(sub_requests.c.id == models.Source.request_id,
631
+ models.RSE.id == models.Source.rse_id)
632
+ ).with_hint(
633
+ models.Source,
634
+ 'INDEX(SOURCES SOURCES_PK)',
635
+ 'oracle'
636
+ ).outerjoin(
637
+ models.Distance,
638
+ and_(sub_requests.c.dest_rse_id == models.Distance.dest_rse_id,
639
+ models.RSEFileAssociation.rse_id == models.Distance.src_rse_id)
640
+ ).with_hint(
641
+ models.Distance,
642
+ 'INDEX(DISTANCES DISTANCES_PK)',
643
+ 'oracle'
644
+ )
645
+
646
+ for attribute in required_source_rse_attrs or ():
647
+ rse_attr_alias = aliased(models.RSEAttrAssociation)
648
+ stmt = stmt.where(
649
+ exists(
650
+ select(
651
+ 1
652
+ ).where(
653
+ and_(rse_attr_alias.rse_id == models.RSE.id,
654
+ rse_attr_alias.key == attribute)
655
+ )
656
+ )
657
+ )
658
+
659
+ requests_by_id = {}
660
+ for (request_id, req_type, rule_id, scope, name, md5, adler32, byte_count, activity, attributes, previous_attempt_id, source_rse_id, dest_rse_id, account, retry_count,
661
+ priority, transfertool, requested_at, replica_rse_id, replica_rse_name, file_path, source_ranking, source_url, distance) in session.execute(stmt):
662
+
663
+ request = requests_by_id.get(request_id)
664
+ if not request:
665
+ request = RequestWithSources(id_=request_id, request_type=req_type, rule_id=rule_id, scope=scope, name=name,
666
+ md5=md5, adler32=adler32, byte_count=byte_count, activity=activity, attributes=attributes,
667
+ previous_attempt_id=previous_attempt_id, dest_rse=rse_collection[dest_rse_id],
668
+ account=account, retry_count=retry_count, priority=priority, transfertool=transfertool,
669
+ requested_at=requested_at)
670
+ requests_by_id[request_id] = request
671
+ # if STAGEIN and destination RSE is QoS make sure the source is included
672
+ if request.request_type == RequestType.STAGEIN and get_rse_attribute(rse_id=dest_rse_id, key=RseAttr.STAGING_REQUIRED, session=session):
673
+ source = RequestSource(rse=rse_collection[dest_rse_id])
674
+ request.sources.append(source)
675
+
676
+ if replica_rse_id is not None:
677
+ replica_rse = rse_collection[replica_rse_id]
678
+ replica_rse.name = replica_rse_name
679
+ source = RequestSource(rse=replica_rse, file_path=file_path,
680
+ ranking=source_ranking, distance=distance, url=source_url)
681
+ request.sources.append(source)
682
+ if source_rse_id == replica_rse_id:
683
+ request.requested_source = source
684
+
685
+ if processed_by:
686
+ for chunk in chunks(requests_by_id, 100):
687
+ stmt = update(
688
+ models.Request
689
+ ).where(
690
+ models.Request.id.in_(chunk)
691
+ ).execution_options(
692
+ synchronize_session=False
693
+ ).values({
694
+ models.Request.last_processed_by: processed_by,
695
+ models.Request.last_processed_at: datetime.datetime.now()
696
+ })
697
+ session.execute(stmt)
698
+
699
+ return requests_by_id
700
+
701
+
702
+ @read_session
703
+ def fetch_paths(
704
+ request_id: str,
705
+ *,
706
+ session: "Session"
707
+ ) -> dict[str, list[str]]:
708
+ """
709
+ Find the paths for which the provided request is a constituent hop.
710
+
711
+ Returns a dict: {initial_request_id1: path1, ...}. Each path is an ordered list of request_ids.
712
+ """
713
+ transfer_hop_alias = aliased(models.TransferHop)
714
+ stmt = select(
715
+ models.TransferHop,
716
+ ).join(
717
+ transfer_hop_alias,
718
+ and_(transfer_hop_alias.initial_request_id == models.TransferHop.initial_request_id,
719
+ or_(transfer_hop_alias.request_id == request_id,
720
+ transfer_hop_alias.initial_request_id == request_id))
721
+ )
722
+
723
+ parents_by_initial_request = {}
724
+ for hop, in session.execute(stmt):
725
+ parents_by_initial_request.setdefault(hop.initial_request_id, {})[hop.next_hop_request_id] = hop.request_id
726
+
727
+ paths = {}
728
+ for initial_request_id, parents in parents_by_initial_request.items():
729
+ path = []
730
+ cur_request = initial_request_id
731
+ path.append(cur_request)
732
+ while parents.get(cur_request):
733
+ cur_request = parents[cur_request]
734
+ path.append(cur_request)
735
+ paths[initial_request_id] = list(reversed(path))
736
+ return paths
737
+
738
+
739
+ @METRICS.time_it
740
+ @transactional_session
741
+ def get_and_mark_next(
742
+ rse_collection: "RseCollection",
743
+ request_type: Union[list[RequestType], RequestType],
744
+ state: Union[list[RequestState], RequestState],
745
+ processed_by: Optional[str] = None,
746
+ processed_at_delay: int = 600,
747
+ limit: int = 100,
748
+ older_than: "Optional[datetime.datetime]" = None,
749
+ rse_id: Optional[str] = None,
750
+ activity: Optional[str] = None,
751
+ total_workers: int = 0,
752
+ worker_number: int = 0,
753
+ mode_all: bool = False,
754
+ hash_variable: str = 'id',
755
+ activity_shares: Optional[dict[str, Any]] = None,
756
+ include_dependent: bool = True,
757
+ transfertool: Optional[str] = None,
758
+ *,
759
+ session: "Session"
760
+ ) -> list[dict[str, Any]]:
761
+ """
762
+ Retrieve the next requests matching the request type and state.
763
+ Workers are balanced via hashing to reduce concurrency on database.
764
+
765
+ :param rse_collection: the RSE collection being used
766
+ :param request_type: Type of the request as a string or list of strings.
767
+ :param state: State of the request as a string or list of strings.
768
+ :param processed_by: the daemon/executable running this query
769
+ :param processed_at_delay: how many second to ignore a request if it's already being processed by the same daemon
770
+ :param limit: Integer of requests to retrieve.
771
+ :param older_than: Only select requests older than this DateTime.
772
+ :param rse_id: The RSE to filter on.
773
+ :param activity: The activity to filter on.
774
+ :param total_workers: Number of total workers.
775
+ :param worker_number: Id of the executing worker.
776
+ :param mode_all: If set to True the function returns everything, if set to False returns list of dictionaries {'request_id': x, 'external_host': y, 'external_id': z}.
777
+ :param hash_variable: The variable to use to perform the partitioning. By default it uses the request id.
778
+ :param activity_shares: Activity shares dictionary, with number of requests
779
+ :param include_dependent: If true, includes transfers which have a previous hop dependency on other transfers
780
+ :param transfertool: The transfer tool as specified in rucio.cfg.
781
+ :param session: Database session to use.
782
+ :returns: Request as a dictionary.
783
+ """
784
+ request_type_metric_label = '.'.join(a.name for a in request_type) if isinstance(request_type, list) else request_type.name
785
+ state_metric_label = '.'.join(s.name for s in state) if isinstance(state, list) else state.name
786
+ METRICS.counter('get_next.requests.{request_type}.{state}').labels(request_type=request_type_metric_label, state=state_metric_label).inc()
787
+
788
+ # lists of one element are not allowed by SQLA, so just duplicate the item
789
+ if not isinstance(request_type, list):
790
+ request_type = [request_type, request_type]
791
+ elif len(request_type) == 1:
792
+ request_type = [request_type[0], request_type[0]]
793
+ if not isinstance(state, list):
794
+ state = [state, state]
795
+ elif len(state) == 1:
796
+ state = [state[0], state[0]]
797
+
798
+ result = []
799
+ if not activity_shares:
800
+ activity_shares = [None] # type: ignore
801
+
802
+ for share in activity_shares: # type: ignore
803
+
804
+ query = select(
805
+ models.Request.id
806
+ ).where(
807
+ and_(models.Request.state.in_(state),
808
+ models.Request.request_type.in_(request_type))
809
+ ).order_by(
810
+ asc(models.Request.updated_at)
811
+ )
812
+ if processed_by:
813
+ query = query.where(
814
+ or_(models.Request.last_processed_by.is_(null()),
815
+ models.Request.last_processed_by != processed_by,
816
+ models.Request.last_processed_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=processed_at_delay))
817
+ )
818
+ if transfertool:
819
+ query = query.with_hint(
820
+ models.Request,
821
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_TRA_ACT_IDX)',
822
+ 'oracle'
823
+ ).where(
824
+ models.Request.transfertool == transfertool
825
+ )
826
+ else:
827
+ query = query.with_hint(
828
+ models.Request,
829
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
830
+ 'oracle'
831
+ )
832
+
833
+ if not include_dependent:
834
+ # filter out transfers which depend on some other "previous hop" requests.
835
+ # In particular, this is used to avoid multiple finishers trying to archive different
836
+ # transfers from the same path and thus having concurrent deletion of same rows from
837
+ # the transfer_hop table.
838
+ query = query.outerjoin(
839
+ models.TransferHop,
840
+ models.TransferHop.next_hop_request_id == models.Request.id
841
+ ).where(
842
+ models.TransferHop.next_hop_request_id == null()
843
+ )
844
+
845
+ if isinstance(older_than, datetime.datetime):
846
+ query = query.where(models.Request.updated_at < older_than)
847
+
848
+ if rse_id:
849
+ query = query.where(models.Request.dest_rse_id == rse_id)
850
+
851
+ if share:
852
+ query = query.where(models.Request.activity == share)
853
+ elif activity:
854
+ query = query.where(models.Request.activity == activity)
855
+
856
+ query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable=hash_variable)
857
+
858
+ if share:
859
+ query = query.limit(activity_shares[share]) # type: ignore
860
+ else:
861
+ query = query.limit(limit)
862
+
863
+ if session.bind.dialect.name == 'oracle': # type: ignore
864
+ query = select(
865
+ models.Request
866
+ ).where(
867
+ models.Request.id.in_(query)
868
+ ).with_for_update(
869
+ skip_locked=True
870
+ )
871
+ else:
872
+ query = query.with_only_columns(
873
+ models.Request
874
+ ).with_for_update(
875
+ skip_locked=True,
876
+ of=models.Request.last_processed_by
877
+ )
878
+ query_result = session.execute(query).scalars()
879
+ if query_result:
880
+ if mode_all:
881
+ for res in query_result:
882
+ res_dict = res.to_dict()
883
+ res_dict['request_id'] = res_dict['id']
884
+ res_dict['attributes'] = json.loads(str(res_dict['attributes'] or '{}'))
885
+
886
+ dst_id = res_dict['dest_rse_id']
887
+ src_id = res_dict['source_rse_id']
888
+ res_dict['dst_rse'] = rse_collection[dst_id].ensure_loaded(load_name=True, load_attributes=True)
889
+ res_dict['src_rse'] = rse_collection[src_id].ensure_loaded(load_name=True, load_attributes=True) if src_id is not None else None
890
+
891
+ result.append(res_dict)
892
+ else:
893
+ for res in query_result:
894
+ result.append({'request_id': res.id, 'external_host': res.external_host, 'external_id': res.external_id})
895
+
896
+ request_ids = {r['request_id'] for r in result}
897
+ if processed_by and request_ids:
898
+ for chunk in chunks(request_ids, 100):
899
+ stmt = update(
900
+ models.Request
901
+ ).where(
902
+ models.Request.id.in_(chunk)
903
+ ).execution_options(
904
+ synchronize_session=False
905
+ ).values({
906
+ models.Request.last_processed_by: processed_by,
907
+ models.Request.last_processed_at: datetime.datetime.now()
908
+ })
909
+ session.execute(stmt)
910
+
911
+ return result
912
+
913
+
914
+ @transactional_session
915
+ def update_request(
916
+ request_id: str,
917
+ state: Optional[RequestState] = None,
918
+ transferred_at: Optional[datetime.datetime] = None,
919
+ started_at: Optional[datetime.datetime] = None,
920
+ staging_started_at: Optional[datetime.datetime] = None,
921
+ staging_finished_at: Optional[datetime.datetime] = None,
922
+ source_rse_id: Optional[str] = None,
923
+ err_msg: Optional[str] = None,
924
+ attributes: Optional[dict[str, str]] = None,
925
+ priority: Optional[int] = None,
926
+ transfertool: Optional[str] = None,
927
+ *,
928
+ raise_on_missing: bool = False,
929
+ session: "Session",
930
+ ) -> bool:
931
+
932
+ rowcount = 0
933
+ try:
934
+ update_items: dict[Any, Any] = {
935
+ models.Request.updated_at: datetime.datetime.utcnow()
936
+ }
937
+ if state is not None:
938
+ update_items[models.Request.state] = state
939
+ if transferred_at is not None:
940
+ update_items[models.Request.transferred_at] = transferred_at
941
+ if started_at is not None:
942
+ update_items[models.Request.started_at] = started_at
943
+ if staging_started_at is not None:
944
+ update_items[models.Request.staging_started_at] = staging_started_at
945
+ if staging_finished_at is not None:
946
+ update_items[models.Request.staging_finished_at] = staging_finished_at
947
+ if source_rse_id is not None:
948
+ update_items[models.Request.source_rse_id] = source_rse_id
949
+ if err_msg is not None:
950
+ update_items[models.Request.err_msg] = err_msg
951
+ if attributes is not None:
952
+ update_items[models.Request.attributes] = json.dumps(attributes)
953
+ if priority is not None:
954
+ update_items[models.Request.priority] = priority
955
+ if transfertool is not None:
956
+ update_items[models.Request.transfertool] = transfertool
957
+
958
+ stmt = update(
959
+ models.Request
960
+ ).where(
961
+ models.Request.id == request_id
962
+ ).execution_options(
963
+ synchronize_session=False
964
+ ).values(
965
+ update_items
966
+ )
967
+ rowcount = session.execute(stmt).rowcount
968
+
969
+ except IntegrityError as error:
970
+ raise RucioException(error.args)
971
+
972
+ if not rowcount and raise_on_missing:
973
+ raise UnsupportedOperation("Request %s state cannot be updated." % request_id)
974
+
975
+ if rowcount:
976
+ return True
977
+ return False
978
+
979
+
980
+ @METRICS.count_it
981
+ @transactional_session
982
+ def transition_request_state(
983
+ request_id: str,
984
+ state: Optional[RequestState] = None,
985
+ external_id: Optional[str] = None,
986
+ transferred_at: Optional[datetime.datetime] = None,
987
+ started_at: Optional[datetime.datetime] = None,
988
+ staging_started_at: Optional[datetime.datetime] = None,
989
+ staging_finished_at: Optional[datetime.datetime] = None,
990
+ source_rse_id: Optional[str] = None,
991
+ err_msg: Optional[str] = None,
992
+ attributes: Optional[dict[str, str]] = None,
993
+ *,
994
+ request: Optional[dict[str, Any]] = None,
995
+ session: "Session",
996
+ logger: LoggerFunction = logging.log
997
+ ) -> bool:
998
+ """
999
+ Update the request if its state changed. Return a boolean showing if the request was actually updated or not.
1000
+ """
1001
+
1002
+ # TODO: Should this be a private method?
1003
+
1004
+ if request is None:
1005
+ request = get_request(request_id, session=session)
1006
+
1007
+ if not request:
1008
+ # The request was deleted in the meantime. Ignore it.
1009
+ logger(logging.WARNING, "Request %s not found. Cannot set its state to %s", request_id, state)
1010
+ return False
1011
+
1012
+ if request['state'] == state:
1013
+ logger(logging.INFO, "Request %s state is already %s. Will skip the update.", request_id, state)
1014
+ return False
1015
+
1016
+ if state in [RequestState.FAILED, RequestState.DONE, RequestState.LOST] and (request["external_id"] != external_id):
1017
+ logger(logging.ERROR, "Request %s should not be updated to 'Failed' or 'Done' without external transfer_id" % request_id)
1018
+ return False
1019
+
1020
+ update_request(
1021
+ request_id=request_id,
1022
+ state=state,
1023
+ transferred_at=transferred_at,
1024
+ started_at=started_at,
1025
+ staging_started_at=staging_started_at,
1026
+ staging_finished_at=staging_finished_at,
1027
+ source_rse_id=source_rse_id,
1028
+ err_msg=err_msg,
1029
+ attributes=attributes,
1030
+ raise_on_missing=True,
1031
+ session=session,
1032
+ )
1033
+ return True
1034
+
1035
+
1036
+ @METRICS.count_it
1037
+ @transactional_session
1038
+ def transition_requests_state_if_possible(
1039
+ request_ids: 'Iterable[str]',
1040
+ new_state: str,
1041
+ *,
1042
+ session: "Session",
1043
+ logger: LoggerFunction = logging.log
1044
+ ) -> None:
1045
+ """
1046
+ Bulk update the state of requests. Skips silently if the request_id does not exist.
1047
+
1048
+ :param request_ids: List of (Request-ID as a 32 character hex string).
1049
+ :param new_state: New state as string.
1050
+ :param session: Database session to use.
1051
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
1052
+ """
1053
+
1054
+ try:
1055
+ for request_id in request_ids:
1056
+ try:
1057
+ transition_request_state(request_id, new_state, session=session, logger=logger)
1058
+ except UnsupportedOperation:
1059
+ continue
1060
+ except IntegrityError as error:
1061
+ raise RucioException(error.args)
1062
+
1063
+
1064
+ @METRICS.count_it
1065
+ @transactional_session
1066
+ def touch_requests_by_rule(
1067
+ rule_id: str,
1068
+ *,
1069
+ session: "Session"
1070
+ ) -> None:
1071
+ """
1072
+ Update the update time of requests in a rule. Fails silently if no requests on this rule.
1073
+
1074
+ :param rule_id: Rule-ID as a 32 character hex string.
1075
+ :param session: Database session to use.
1076
+ """
1077
+
1078
+ try:
1079
+ stmt = update(
1080
+ models.Request
1081
+ ).prefix_with(
1082
+ '/*+ INDEX(REQUESTS REQUESTS_RULEID_IDX) */',
1083
+ dialect='oracle'
1084
+ ).where(
1085
+ and_(models.Request.rule_id == rule_id,
1086
+ models.Request.state.in_([RequestState.FAILED, RequestState.DONE, RequestState.LOST, RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES]),
1087
+ models.Request.updated_at < datetime.datetime.utcnow())
1088
+ ).execution_options(
1089
+ synchronize_session=False
1090
+ ).values({
1091
+ models.Request.updated_at: datetime.datetime.utcnow() + datetime.timedelta(minutes=20)
1092
+ })
1093
+ session.execute(stmt)
1094
+ except IntegrityError as error:
1095
+ raise RucioException(error.args)
1096
+
1097
+
1098
+ @read_session
1099
+ def get_request(
1100
+ request_id: str,
1101
+ *,
1102
+ session: "Session"
1103
+ ) -> Optional[dict[str, Any]]:
1104
+ """
1105
+ Retrieve a request by its ID.
1106
+
1107
+ :param request_id: Request-ID as a 32 character hex string.
1108
+ :param session: Database session to use.
1109
+ :returns: Request as a dictionary.
1110
+ """
1111
+
1112
+ try:
1113
+ stmt = select(
1114
+ models.Request
1115
+ ).where(
1116
+ models.Request.id == request_id
1117
+ )
1118
+ tmp = session.execute(stmt).scalar()
1119
+
1120
+ if not tmp:
1121
+ return
1122
+ else:
1123
+ tmp = tmp.to_dict()
1124
+ tmp['attributes'] = json.loads(str(tmp['attributes'] or '{}'))
1125
+ return tmp
1126
+ except IntegrityError as error:
1127
+ raise RucioException(error.args)
1128
+
1129
+
1130
+ @METRICS.count_it
1131
+ @read_session
1132
+ def get_request_by_did(
1133
+ scope: InternalScope,
1134
+ name: str,
1135
+ rse_id: str,
1136
+ request_type: Optional[RequestType] = None,
1137
+ *,
1138
+ session: "Session"
1139
+ ) -> dict[str, Any]:
1140
+ """
1141
+ Retrieve a request by its DID for a destination RSE.
1142
+
1143
+ :param scope: The scope of the data identifier.
1144
+ :param name: The name of the data identifier.
1145
+ :param rse_id: The destination RSE ID of the request.
1146
+ :param request_type: The type of request as rucio.db.sqla.constants.RequestType.
1147
+ :param session: Database session to use.
1148
+ :returns: Request as a dictionary.
1149
+ """
1150
+
1151
+ try:
1152
+ stmt = select(
1153
+ models.Request
1154
+ ).where(
1155
+ and_(models.Request.scope == scope,
1156
+ models.Request.name == name,
1157
+ models.Request.dest_rse_id == rse_id)
1158
+ )
1159
+ if request_type:
1160
+ stmt = stmt.where(
1161
+ models.Request.request_type == request_type
1162
+ )
1163
+
1164
+ tmp = session.execute(stmt).scalar()
1165
+ if not tmp:
1166
+ raise RequestNotFound(f'No request found for DID {scope}:{name} at RSE {rse_id}')
1167
+ else:
1168
+ tmp = tmp.to_dict()
1169
+
1170
+ tmp['source_rse'] = get_rse_name(rse_id=tmp['source_rse_id'], session=session) if tmp['source_rse_id'] is not None else None
1171
+ tmp['dest_rse'] = get_rse_name(rse_id=tmp['dest_rse_id'], session=session) if tmp['dest_rse_id'] is not None else None
1172
+ tmp['attributes'] = json.loads(str(tmp['attributes'] or '{}'))
1173
+
1174
+ return tmp
1175
+ except IntegrityError as error:
1176
+ raise RucioException(error.args)
1177
+
1178
+
1179
+ @METRICS.count_it
1180
+ @read_session
1181
+ def get_request_history_by_did(
1182
+ scope: InternalScope,
1183
+ name: str,
1184
+ rse_id: str,
1185
+ request_type: Optional[RequestType] = None,
1186
+ *,
1187
+ session: "Session"
1188
+ ) -> dict[str, Any]:
1189
+ """
1190
+ Retrieve a historical request by its DID for a destination RSE.
1191
+
1192
+ :param scope: The scope of the data identifier.
1193
+ :param name: The name of the data identifier.
1194
+ :param rse_id: The destination RSE ID of the request.
1195
+ :param request_type: The type of request as rucio.db.sqla.constants.RequestType.
1196
+ :param session: Database session to use.
1197
+ :returns: Request as a dictionary.
1198
+ """
1199
+
1200
+ try:
1201
+ stmt = select(
1202
+ models.RequestHistory
1203
+ ).where(
1204
+ and_(models.RequestHistory.scope == scope,
1205
+ models.RequestHistory.name == name,
1206
+ models.RequestHistory.dest_rse_id == rse_id)
1207
+ )
1208
+ if request_type:
1209
+ stmt = stmt.where(
1210
+ models.RequestHistory.request_type == request_type
1211
+ )
1212
+
1213
+ tmp = session.execute(stmt).scalar()
1214
+ if not tmp:
1215
+ raise RequestNotFound(f'No request found for DID {scope}:{name} at RSE {rse_id}')
1216
+ else:
1217
+ tmp = tmp.to_dict()
1218
+
1219
+ tmp['source_rse'] = get_rse_name(rse_id=tmp['source_rse_id'], session=session) if tmp['source_rse_id'] is not None else None
1220
+ tmp['dest_rse'] = get_rse_name(rse_id=tmp['dest_rse_id'], session=session) if tmp['dest_rse_id'] is not None else None
1221
+
1222
+ return tmp
1223
+ except IntegrityError as error:
1224
+ raise RucioException(error.args)
1225
+
1226
+
1227
+ def is_intermediate_hop(request: RequestDict) -> bool:
1228
+ """
1229
+ Check if the request is an intermediate hop in a multi-hop transfer.
1230
+ """
1231
+ if (request['attributes'] or {}).get('is_intermediate_hop'):
1232
+ return True
1233
+ return False
1234
+
1235
+
1236
+ @transactional_session
1237
+ def handle_failed_intermediate_hop(
1238
+ request: RequestDict,
1239
+ *,
1240
+ session: "Session"
1241
+ ) -> int:
1242
+ """
1243
+ Perform housekeeping behind a failed intermediate hop
1244
+ Returns the number of updated requests
1245
+ """
1246
+ # mark all hops following this one (in any multihop path) as Failed
1247
+ new_state = RequestState.FAILED
1248
+ reason = 'Unused hop in multi-hop'
1249
+
1250
+ paths = fetch_paths(request['id'], session=session)
1251
+ dependent_requests = []
1252
+ for path in paths.values():
1253
+ idx = path.index(request['id'])
1254
+ dependent_requests.extend(path[idx + 1:])
1255
+
1256
+ if dependent_requests:
1257
+ stmt = update(
1258
+ models.Request
1259
+ ).where(
1260
+ and_(models.Request.id.in_(dependent_requests),
1261
+ models.Request.state.in_([RequestState.QUEUED, RequestState.SUBMITTED]))
1262
+ ).execution_options(
1263
+ synchronize_session=False
1264
+ ).values({
1265
+ models.Request.state: new_state,
1266
+ models.Request.err_msg: get_transfer_error(new_state, reason=reason)
1267
+ })
1268
+ session.execute(stmt)
1269
+ return len(dependent_requests)
1270
+
1271
+
1272
+ @METRICS.count_it
1273
+ @transactional_session
1274
+ def archive_request(
1275
+ request_id: str,
1276
+ *,
1277
+ session: "Session"
1278
+ ) -> None:
1279
+ """
1280
+ Move a request to the history table.
1281
+
1282
+ :param request_id: Request-ID as a 32 character hex string.
1283
+ :param session: Database session to use.
1284
+ """
1285
+
1286
+ req = get_request(request_id=request_id, session=session)
1287
+
1288
+ if req:
1289
+ hist_request = models.RequestHistory(id=req['id'],
1290
+ created_at=req['created_at'],
1291
+ request_type=req['request_type'],
1292
+ scope=req['scope'],
1293
+ name=req['name'],
1294
+ dest_rse_id=req['dest_rse_id'],
1295
+ source_rse_id=req['source_rse_id'],
1296
+ attributes=json.dumps(req['attributes']) if isinstance(req['attributes'], dict) else req['attributes'],
1297
+ state=req['state'],
1298
+ account=req['account'],
1299
+ external_id=req['external_id'],
1300
+ retry_count=req['retry_count'],
1301
+ err_msg=req['err_msg'],
1302
+ previous_attempt_id=req['previous_attempt_id'],
1303
+ external_host=req['external_host'],
1304
+ rule_id=req['rule_id'],
1305
+ activity=req['activity'],
1306
+ bytes=req['bytes'],
1307
+ md5=req['md5'],
1308
+ adler32=req['adler32'],
1309
+ dest_url=req['dest_url'],
1310
+ requested_at=req['requested_at'],
1311
+ submitted_at=req['submitted_at'],
1312
+ staging_started_at=req['staging_started_at'],
1313
+ staging_finished_at=req['staging_finished_at'],
1314
+ started_at=req['started_at'],
1315
+ estimated_started_at=req['estimated_started_at'],
1316
+ estimated_at=req['estimated_at'],
1317
+ transferred_at=req['transferred_at'],
1318
+ estimated_transferred_at=req['estimated_transferred_at'],
1319
+ transfertool=req['transfertool'])
1320
+ hist_request.save(session=session)
1321
+ try:
1322
+ time_diff = req['updated_at'] - req['created_at']
1323
+ time_diff_s = time_diff.seconds + time_diff.days * 24 * 3600
1324
+ METRICS.timer('archive_request_per_activity.{activity}').labels(activity=req['activity'].replace(' ', '_')).observe(time_diff_s)
1325
+ stmt = delete(
1326
+ models.Source
1327
+ ).where(
1328
+ models.Source.request_id == request_id
1329
+ )
1330
+ session.execute(stmt)
1331
+
1332
+ stmt = delete(
1333
+ models.TransferHop
1334
+ ).where(
1335
+ or_(models.TransferHop.request_id == request_id,
1336
+ models.TransferHop.next_hop_request_id == request_id,
1337
+ models.TransferHop.initial_request_id == request_id)
1338
+ )
1339
+ session.execute(stmt)
1340
+
1341
+ stmt = delete(
1342
+ models.Request
1343
+ ).where(
1344
+ models.Request.id == request_id
1345
+ )
1346
+ session.execute(stmt)
1347
+ except IntegrityError as error:
1348
+ raise RucioException(error.args)
1349
+
1350
+
1351
+ @METRICS.count_it
1352
+ @transactional_session
1353
+ def cancel_request_did(
1354
+ scope: InternalScope,
1355
+ name: str,
1356
+ dest_rse_id: str,
1357
+ request_type: RequestType = RequestType.TRANSFER,
1358
+ *,
1359
+ session: "Session",
1360
+ logger: LoggerFunction = logging.log
1361
+ ) -> dict[str, Any]:
1362
+ """
1363
+ Cancel a request based on a DID and request type.
1364
+
1365
+ :param scope: Data identifier scope as a string.
1366
+ :param name: Data identifier name as a string.
1367
+ :param dest_rse_id: RSE id as a string.
1368
+ :param request_type: Type of the request.
1369
+ :param session: Database session to use.
1370
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
1371
+ """
1372
+
1373
+ reqs = None
1374
+ try:
1375
+ stmt = select(
1376
+ models.Request.id,
1377
+ models.Request.external_id,
1378
+ models.Request.external_host
1379
+ ).where(
1380
+ and_(models.Request.scope == scope,
1381
+ models.Request.name == name,
1382
+ models.Request.dest_rse_id == dest_rse_id,
1383
+ models.Request.request_type == request_type)
1384
+ )
1385
+ reqs = session.execute(stmt).all()
1386
+ if not reqs:
1387
+ logger(logging.WARNING, 'Tried to cancel non-existent request for DID %s:%s at RSE %s' % (scope, name, get_rse_name(rse_id=dest_rse_id, session=session)))
1388
+ except IntegrityError as error:
1389
+ raise RucioException(error.args)
1390
+
1391
+ transfers_to_cancel = {}
1392
+ for req in reqs:
1393
+ # is there a transfer already in transfertool? if so, schedule to cancel them
1394
+ if req[1] is not None:
1395
+ transfers_to_cancel.setdefault(req[2], set()).add(req[1])
1396
+ archive_request(request_id=req[0], session=session)
1397
+ return transfers_to_cancel
1398
+
1399
+
1400
+ @read_session
1401
+ def get_sources(
1402
+ request_id: str,
1403
+ rse_id: Optional[str] = None,
1404
+ *,
1405
+ session: "Session"
1406
+ ) -> Optional[list[dict[str, Any]]]:
1407
+ """
1408
+ Retrieve sources by its ID.
1409
+
1410
+ :param request_id: Request-ID as a 32 character hex string.
1411
+ :param rse_id: RSE ID as a 32 character hex string.
1412
+ :param session: Database session to use.
1413
+ :returns: Sources as a dictionary.
1414
+ """
1415
+
1416
+ try:
1417
+ stmt = select(
1418
+ models.Source
1419
+ ).where(
1420
+ models.Source.request_id == request_id
1421
+ )
1422
+ if rse_id:
1423
+ stmt = stmt.where(
1424
+ models.Source.rse_id == rse_id
1425
+ )
1426
+ tmp = session.execute(stmt).scalars().all()
1427
+ if not tmp:
1428
+ return
1429
+ else:
1430
+ result = []
1431
+ for t in tmp:
1432
+ t2 = t.to_dict()
1433
+ result.append(t2)
1434
+
1435
+ return result
1436
+ except IntegrityError as error:
1437
+ raise RucioException(error.args)
1438
+
1439
+
1440
+ @read_session
1441
+ def get_heavy_load_rses(
1442
+ threshold: int,
1443
+ *,
1444
+ session: "Session"
1445
+ ) -> Optional[list[dict[str, Any]]]:
1446
+ """
1447
+ Retrieve heavy load rses.
1448
+
1449
+ :param threshold: Threshold as an int.
1450
+ :param session: Database session to use.
1451
+ :returns: .
1452
+ """
1453
+ try:
1454
+ stmt = select(
1455
+ models.Source.rse_id,
1456
+ func.count(models.Source.rse_id).label('load')
1457
+ ).where(
1458
+ models.Source.is_using == true()
1459
+ ).group_by(
1460
+ models.Source.rse_id
1461
+ )
1462
+ results = session.execute(stmt).all()
1463
+
1464
+ if not results:
1465
+ return
1466
+
1467
+ result = []
1468
+ for t in results:
1469
+ if t[1] >= threshold:
1470
+ t2 = {'rse_id': t[0], 'load': t[1]}
1471
+ result.append(t2)
1472
+
1473
+ return result
1474
+ except IntegrityError as error:
1475
+ raise RucioException(error.args)
1476
+
1477
+
1478
+ class TransferStatsManager:
1479
+
1480
+ @dataclass
1481
+ class _StatsRecord:
1482
+ files_failed: int = 0
1483
+ files_done: int = 0
1484
+ bytes_done: int = 0
1485
+
1486
+ def __init__(self):
1487
+ self.lock = threading.Lock()
1488
+
1489
+ retentions = sorted([
1490
+ # resolution, retention
1491
+ (datetime.timedelta(minutes=5), datetime.timedelta(hours=1)),
1492
+ (datetime.timedelta(hours=1), datetime.timedelta(days=1)),
1493
+ (datetime.timedelta(days=1), datetime.timedelta(days=30)),
1494
+ ])
1495
+
1496
+ self.retentions = retentions
1497
+ self.raw_resolution, raw_retention = self.retentions[0]
1498
+
1499
+ self.current_timestamp = datetime.datetime(year=1970, month=1, day=1)
1500
+ self.current_samples = defaultdict()
1501
+ self._rollover_samples(rollover_time=datetime.datetime.utcnow())
1502
+
1503
+ self.record_stats = True
1504
+ self.save_timer = None
1505
+ self.downsample_timer = None
1506
+ self.downsample_period = math.ceil(raw_retention.total_seconds())
1507
+
1508
+ def __enter__(self) -> "TransferStatsManager":
1509
+ self.record_stats = config_get_bool('transfers', 'stats_enabled', default=self.record_stats)
1510
+ downsample_period = config_get_int('transfers', 'stats_downsample_period', default=self.downsample_period)
1511
+ # Introduce some voluntary jitter to reduce the likely-hood of performing this database
1512
+ # operation multiple times in parallel.
1513
+ self.downsample_period = random.randint(downsample_period * 3 // 4, math.ceil(downsample_period * 5 / 4)) # noqa: S311
1514
+ if self.record_stats:
1515
+ self.save_timer = threading.Timer(self.raw_resolution.total_seconds(), self.periodic_save)
1516
+ self.save_timer.start()
1517
+ self.downsample_timer = threading.Timer(self.downsample_period, self.periodic_downsample_and_cleanup)
1518
+ self.downsample_timer.start()
1519
+ return self
1520
+
1521
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
1522
+ if self.save_timer is not None:
1523
+ self.save_timer.cancel()
1524
+ if self.downsample_timer is not None:
1525
+ self.downsample_timer.cancel()
1526
+ if self.record_stats:
1527
+ self.force_save()
1528
+
1529
+ def observe(
1530
+ self,
1531
+ src_rse_id: str,
1532
+ dst_rse_id: str,
1533
+ activity: str,
1534
+ state: RequestState,
1535
+ file_size: int,
1536
+ *,
1537
+ submitted_at: Optional[datetime.datetime] = None,
1538
+ started_at: Optional[datetime.datetime] = None,
1539
+ transferred_at: Optional[datetime.datetime] = None,
1540
+ session: "Optional[Session]" = None
1541
+ ) -> None:
1542
+ """
1543
+ Increment counters for the given (source_rse, destination_rse, activity) as a result of
1544
+ successful or failed transfer.
1545
+ """
1546
+ if not self.record_stats:
1547
+ return
1548
+ now = datetime.datetime.utcnow()
1549
+ with self.lock:
1550
+ save_timestamp, save_samples = now, {}
1551
+ if now >= self.current_timestamp + self.raw_resolution:
1552
+ save_timestamp, save_samples = self._rollover_samples(now)
1553
+
1554
+ if state in (RequestState.DONE, RequestState.FAILED):
1555
+ record = self.current_samples[dst_rse_id, src_rse_id, activity]
1556
+ if state == RequestState.DONE:
1557
+ record.files_done += 1
1558
+ record.bytes_done += file_size
1559
+
1560
+ if submitted_at is not None and started_at is not None:
1561
+ wait_time = (started_at - submitted_at).total_seconds()
1562
+ METRICS.timer(name='wait_time', buckets=TRANSFER_TIME_BUCKETS).observe(wait_time)
1563
+ if transferred_at is not None:
1564
+ transfer_time = (transferred_at - started_at).total_seconds()
1565
+ METRICS.timer(name='transfer_time', buckets=TRANSFER_TIME_BUCKETS).observe(transfer_time)
1566
+ else:
1567
+ record.files_failed += 1
1568
+ if save_samples:
1569
+ self._save_samples(timestamp=save_timestamp, samples=save_samples, session=session)
1570
+
1571
+ def periodic_save(self) -> None:
1572
+ """
1573
+ Save samples to the database if the end of the current recording interval was reached.
1574
+ Opportunistically perform down-sampling.
1575
+ """
1576
+ self.save_timer = threading.Timer(self.raw_resolution.total_seconds(), self.periodic_save)
1577
+ self.save_timer.start()
1578
+
1579
+ now = datetime.datetime.utcnow()
1580
+ with self.lock:
1581
+ save_timestamp, save_samples = now, {}
1582
+ if now >= self.current_timestamp + self.raw_resolution:
1583
+ save_timestamp, save_samples = self._rollover_samples(now)
1584
+ if save_samples:
1585
+ self._save_samples(timestamp=save_timestamp, samples=save_samples)
1586
+
1587
+ @transactional_session
1588
+ def force_save(self, *, session: "Session") -> None:
1589
+ """
1590
+ Commit to the database everything without ensuring that
1591
+ the end of the currently recorded time interval is reached.
1592
+
1593
+ Only to be used for the final save operation on shutdown.
1594
+ """
1595
+ with self.lock:
1596
+ save_timestamp, save_samples = self._rollover_samples(datetime.datetime.utcnow())
1597
+ if save_samples:
1598
+ self._save_samples(timestamp=save_timestamp, samples=save_samples, session=session)
1599
+
1600
+ def _rollover_samples(self, rollover_time: datetime.datetime) -> "tuple[datetime.datetime, Mapping[tuple[str, str, str], TransferStatsManager._StatsRecord]]":
1601
+ previous_samples = (self.current_timestamp, self.current_samples)
1602
+ self.current_samples = defaultdict(lambda: self._StatsRecord())
1603
+ _, self.current_timestamp = next(self.slice_time(self.raw_resolution, start_time=rollover_time + self.raw_resolution))
1604
+ return previous_samples
1605
+
1606
+ @transactional_session
1607
+ def _save_samples(
1608
+ self,
1609
+ timestamp: "datetime.datetime",
1610
+ samples: "Mapping[tuple[str, str, str], TransferStatsManager._StatsRecord]",
1611
+ *,
1612
+ session: "Session"
1613
+ ) -> None:
1614
+ """
1615
+ Commit the provided samples to the database.
1616
+ """
1617
+ rows_to_insert = []
1618
+ for (dst_rse_id, src_rse_id, activity), record in samples.items():
1619
+ rows_to_insert.append({
1620
+ models.TransferStats.timestamp.name: timestamp,
1621
+ models.TransferStats.resolution.name: self.raw_resolution.total_seconds(),
1622
+ models.TransferStats.src_rse_id.name: src_rse_id,
1623
+ models.TransferStats.dest_rse_id.name: dst_rse_id,
1624
+ models.TransferStats.activity.name: activity,
1625
+ models.TransferStats.files_failed.name: record.files_failed,
1626
+ models.TransferStats.files_done.name: record.files_done,
1627
+ models.TransferStats.bytes_done.name: record.bytes_done,
1628
+ })
1629
+ if rows_to_insert:
1630
+ stmt = insert(
1631
+ models.TransferStats
1632
+ )
1633
+ session.execute(stmt, rows_to_insert)
1634
+
1635
+ def periodic_downsample_and_cleanup(self) -> None:
1636
+ """
1637
+ Periodically create lower resolution samples from higher resolution ones.
1638
+ """
1639
+ self.downsample_timer = threading.Timer(self.downsample_period, self.periodic_downsample_and_cleanup)
1640
+ self.downsample_timer.start()
1641
+
1642
+ while self.downsample_and_cleanup():
1643
+ continue
1644
+
1645
+ @read_session
1646
+ def _db_time_ranges(self, *, session: "Session") -> "dict[datetime.timedelta, tuple[datetime.datetime, datetime.datetime]]":
1647
+
1648
+ stmt = select(
1649
+ models.TransferStats.resolution,
1650
+ func.max(models.TransferStats.timestamp),
1651
+ func.min(models.TransferStats.timestamp),
1652
+ ).group_by(
1653
+ models.TransferStats.resolution,
1654
+ )
1655
+ db_time_ranges = {
1656
+ datetime.timedelta(seconds=res): (newest_t, oldest_t)
1657
+ for res, newest_t, oldest_t in session.execute(stmt)
1658
+ }
1659
+ return db_time_ranges
1660
+
1661
+ @transactional_session
1662
+ def downsample_and_cleanup(self, *, session: "Session") -> bool:
1663
+ """
1664
+ Housekeeping of samples in the database:
1665
+ - create lower-resolution (but higher-retention) samples from higher-resolution ones;
1666
+ - delete the samples which are older than the desired retention time.
1667
+ Return True if it thinks there is still more cleanup.
1668
+
1669
+ This function handles safely to be executed in parallel from multiple daemons at the
1670
+ same time. However, this is achieved at the cost of introducing duplicate samples at lower
1671
+ resolution into the database. The possibility of having duplicates at lower resolutions must be
1672
+ considered during work with those sample. Code must tolerate duplicates and avoid double-counting.
1673
+ """
1674
+
1675
+ # Delay processing to leave time for all raw metrics to be correctly saved to the database
1676
+ now = datetime.datetime.utcnow() - 4 * self.raw_resolution
1677
+
1678
+ db_time_ranges = self._db_time_ranges(session=session)
1679
+
1680
+ more_to_delete = False
1681
+ id_temp_table = temp_table_mngr(session).create_id_table()
1682
+ for i in range(1, len(self.retentions)):
1683
+ src_resolution, desired_src_retention = self.retentions[i - 1]
1684
+ dst_resolution, desired_dst_retention = self.retentions[i]
1685
+
1686
+ # Always keep samples at source resolution aligned to the destination resolution interval.
1687
+ # Keep, at least, the amount of samples needed to cover the first interval at
1688
+ # destination resolution, but keep more samples if explicitly configured to do so.
1689
+ oldest_desired_src_timestamp, _ = next(self.slice_time(dst_resolution, start_time=now - desired_src_retention))
1690
+
1691
+ _, oldest_available_src_timestamp = db_time_ranges.get(src_resolution, (None, None))
1692
+ newest_available_dst_timestamp, oldest_available_dst_timestamp = db_time_ranges.get(dst_resolution, (None, None))
1693
+ # Only generate down-samples at destination resolution for interval in which:
1694
+ # - are within the desired retention window
1695
+ oldest_time_to_handle = now - desired_dst_retention - dst_resolution
1696
+ # - we didn't already generate the corresponding sample at destination resolution
1697
+ if newest_available_dst_timestamp:
1698
+ oldest_time_to_handle = max(oldest_time_to_handle, newest_available_dst_timestamp + datetime.timedelta(seconds=1))
1699
+ # - we have samples at source resolution to do it
1700
+ if oldest_available_src_timestamp:
1701
+ oldest_time_to_handle = max(oldest_time_to_handle, oldest_available_src_timestamp)
1702
+ else:
1703
+ oldest_time_to_handle = now
1704
+
1705
+ # Create samples at lower resolution from samples at higher resolution
1706
+ for recent_t, older_t in self.slice_time(dst_resolution, start_time=now, end_time=oldest_time_to_handle):
1707
+ additional_fields = {
1708
+ models.TransferStats.timestamp.name: older_t,
1709
+ models.TransferStats.resolution.name: dst_resolution.total_seconds(),
1710
+ }
1711
+ src_totals = self._load_totals(resolution=src_resolution, recent_t=recent_t, older_t=older_t, session=session)
1712
+ downsample_stats = [stat | additional_fields for stat in src_totals]
1713
+ if downsample_stats:
1714
+ session.execute(insert(models.TransferStats), downsample_stats)
1715
+ if not oldest_available_dst_timestamp or older_t < oldest_available_dst_timestamp:
1716
+ oldest_available_dst_timestamp = older_t
1717
+ if not newest_available_dst_timestamp or older_t > newest_available_dst_timestamp:
1718
+ newest_available_dst_timestamp = older_t
1719
+
1720
+ if oldest_available_dst_timestamp and newest_available_dst_timestamp:
1721
+ db_time_ranges[dst_resolution] = (newest_available_dst_timestamp, oldest_available_dst_timestamp)
1722
+
1723
+ # Delete from the database the samples which are older than desired
1724
+ more_to_delete |= self._cleanup(
1725
+ id_temp_table=id_temp_table,
1726
+ resolution=src_resolution,
1727
+ timestamp=oldest_desired_src_timestamp,
1728
+ session=session
1729
+ )
1730
+
1731
+ # Cleanup samples at the lowest resolution, which were not handled by the previous loop
1732
+ last_resolution, last_retention = self.retentions[-1]
1733
+ _, oldest_desired_timestamp = next(self.slice_time(last_resolution, start_time=now - last_retention))
1734
+ if db_time_ranges.get(last_resolution, (now, now))[1] < oldest_desired_timestamp:
1735
+ more_to_delete |= self._cleanup(
1736
+ id_temp_table=id_temp_table,
1737
+ resolution=last_resolution,
1738
+ timestamp=oldest_desired_timestamp,
1739
+ session=session
1740
+ )
1741
+
1742
+ # Cleanup all resolutions which exist in the database but are not desired by rucio anymore
1743
+ # (probably due to configuration changes).
1744
+ for resolution_to_cleanup in set(db_time_ranges).difference(r[0] for r in self.retentions):
1745
+ more_to_delete |= self._cleanup(
1746
+ id_temp_table=id_temp_table,
1747
+ resolution=resolution_to_cleanup,
1748
+ timestamp=now,
1749
+ session=session
1750
+ )
1751
+ return more_to_delete
1752
+
1753
+ @stream_session
1754
+ def load_totals(
1755
+ self,
1756
+ older_t: "datetime.datetime",
1757
+ dest_rse_id: Optional[str] = None,
1758
+ src_rse_id: Optional[str] = None,
1759
+ activity: Optional[str] = None,
1760
+ by_activity: bool = True,
1761
+ *,
1762
+ session: "Session"
1763
+ ) -> "Iterator[Mapping[str, str | int]]":
1764
+ """
1765
+ Load totals from now up to older_t in the past by automatically picking the best resolution.
1766
+
1767
+ The results will not necessarily be uniquely grouped by src_rse/dest_rse/activity. The caller
1768
+ is responsible for summing identical src_rse/dest_rse/activity pairs to get the actual result
1769
+ """
1770
+
1771
+ db_time_ranges = self._db_time_ranges(session=session)
1772
+
1773
+ oldest_fetched = older_t
1774
+ for resolution, retention in reversed(self.retentions):
1775
+ newest_available_db_timestamp, oldest_available_db_timestamp = db_time_ranges.get(resolution, (None, None))
1776
+
1777
+ if not (newest_available_db_timestamp and oldest_available_db_timestamp):
1778
+ continue
1779
+
1780
+ if newest_available_db_timestamp < oldest_fetched:
1781
+ continue
1782
+
1783
+ yield from self._load_totals(
1784
+ resolution=resolution,
1785
+ recent_t=newest_available_db_timestamp + datetime.timedelta(seconds=1),
1786
+ older_t=oldest_fetched + datetime.timedelta(seconds=1),
1787
+ dest_rse_id=dest_rse_id,
1788
+ src_rse_id=src_rse_id,
1789
+ activity=activity,
1790
+ by_activity=by_activity,
1791
+ session=session,
1792
+ )
1793
+ oldest_fetched = newest_available_db_timestamp + resolution
1794
+
1795
+ @stream_session
1796
+ def _load_totals(
1797
+ self,
1798
+ resolution: "datetime.timedelta",
1799
+ recent_t: "datetime.datetime",
1800
+ older_t: "datetime.datetime",
1801
+ dest_rse_id: Optional[str] = None,
1802
+ src_rse_id: Optional[str] = None,
1803
+ activity: Optional[str] = None,
1804
+ by_activity: bool = True,
1805
+ *,
1806
+ session: "Session"
1807
+ ) -> "Iterator[Mapping[str, Union[str, int]]]":
1808
+ """
1809
+ Load aggregated totals for the given resolution and time interval.
1810
+
1811
+ Ignore multiple values for the same timestamp at downsample resolutions.
1812
+ They are result of concurrent downsample operations (two different
1813
+ daemons performing downsampling at the same time). Very probably,
1814
+ the values are identical. Eve if not, these values must not be counted twice.
1815
+ This is to gracefully handle multiple parallel downsample operations.
1816
+ """
1817
+ grouping: "list[Any]" = [
1818
+ models.TransferStats.src_rse_id,
1819
+ models.TransferStats.dest_rse_id,
1820
+ ]
1821
+ if by_activity:
1822
+ grouping.append(models.TransferStats.activity)
1823
+
1824
+ if resolution == self.raw_resolution:
1825
+ sub_query = select(
1826
+ models.TransferStats.timestamp,
1827
+ *grouping,
1828
+ models.TransferStats.files_failed,
1829
+ models.TransferStats.files_done,
1830
+ models.TransferStats.bytes_done
1831
+ )
1832
+ else:
1833
+ sub_query = select(
1834
+ models.TransferStats.timestamp,
1835
+ *grouping,
1836
+ func.max(models.TransferStats.files_failed).label(models.TransferStats.files_failed.name),
1837
+ func.max(models.TransferStats.files_done).label(models.TransferStats.files_done.name),
1838
+ func.max(models.TransferStats.bytes_done).label(models.TransferStats.bytes_done.name),
1839
+ ).group_by(
1840
+ models.TransferStats.timestamp,
1841
+ *grouping,
1842
+ )
1843
+
1844
+ sub_query = sub_query.where(
1845
+ models.TransferStats.resolution == resolution.total_seconds(),
1846
+ models.TransferStats.timestamp >= older_t,
1847
+ models.TransferStats.timestamp < recent_t
1848
+ )
1849
+ if dest_rse_id:
1850
+ sub_query = sub_query.where(
1851
+ models.TransferStats.dest_rse_id == dest_rse_id
1852
+ )
1853
+ if src_rse_id:
1854
+ sub_query = sub_query.where(
1855
+ models.TransferStats.src_rse_id == src_rse_id
1856
+ )
1857
+ if activity:
1858
+ sub_query = sub_query.where(
1859
+ models.TransferStats.activity == activity
1860
+ )
1861
+
1862
+ sub_query = sub_query.subquery()
1863
+
1864
+ grouping = [
1865
+ sub_query.c.src_rse_id,
1866
+ sub_query.c.dest_rse_id,
1867
+ ]
1868
+ if by_activity:
1869
+ grouping.append(sub_query.c.activity)
1870
+
1871
+ stmt = select(
1872
+ *grouping,
1873
+ func.sum(sub_query.c.files_failed).label(models.TransferStats.files_failed.name),
1874
+ func.sum(sub_query.c.files_done).label(models.TransferStats.files_done.name),
1875
+ func.sum(sub_query.c.bytes_done).label(models.TransferStats.bytes_done.name),
1876
+ ).group_by(
1877
+ *grouping,
1878
+ )
1879
+
1880
+ for row in session.execute(stmt):
1881
+ yield row._asdict()
1882
+
1883
+ @staticmethod
1884
+ def _cleanup(
1885
+ id_temp_table: Any,
1886
+ resolution: "datetime.timedelta",
1887
+ timestamp: "datetime.datetime",
1888
+ limit: "Optional[int]" = 10000,
1889
+ *,
1890
+ session: "Session"
1891
+ ) -> bool:
1892
+ """
1893
+ Delete, from the database, the stats older than the given time.
1894
+ Skip locked rows, to tolerate parallel executions by multiple daemons.
1895
+ """
1896
+ stmt = select(
1897
+ models.TransferStats.id
1898
+ ).where(
1899
+ and_(models.TransferStats.resolution == resolution.total_seconds(),
1900
+ models.TransferStats.timestamp < timestamp)
1901
+ )
1902
+
1903
+ if limit is not None:
1904
+ stmt = stmt.limit(limit)
1905
+
1906
+ # Oracle does not support chaining order_by(), limit(), and
1907
+ # with_for_update(). Use a nested query to overcome this.
1908
+ if session.bind.dialect.name == 'oracle': # type: ignore
1909
+ stmt = select(
1910
+ models.TransferStats.id
1911
+ ).where(
1912
+ models.TransferStats.id.in_(stmt)
1913
+ ).with_for_update(
1914
+ skip_locked=True
1915
+ )
1916
+ else:
1917
+ stmt = stmt.with_for_update(skip_locked=True)
1918
+
1919
+ del_stmt = delete(
1920
+ id_temp_table
1921
+ )
1922
+ session.execute(del_stmt)
1923
+ insert_stmt = insert(
1924
+ id_temp_table
1925
+ ).from_select(
1926
+ ['id'],
1927
+ stmt
1928
+ )
1929
+ session.execute(insert_stmt)
1930
+
1931
+ stmt = delete(
1932
+ models.TransferStats
1933
+ ).where(
1934
+ exists(select(1).where(models.TransferStats.id == id_temp_table.id))
1935
+ ).execution_options(
1936
+ synchronize_session=False
1937
+ )
1938
+ res = session.execute(stmt)
1939
+ return res.rowcount > 0
1940
+
1941
+ @staticmethod
1942
+ def slice_time(
1943
+ resolution: datetime.timedelta,
1944
+ start_time: "Optional[datetime.datetime]" = None,
1945
+ end_time: "Optional[datetime.datetime]" = None
1946
+ ) -> 'Iterator[tuple[datetime.datetime, datetime.datetime]]':
1947
+ """
1948
+ Iterates, back in time, over time intervals of length `resolution` which are fully
1949
+ included within the input interval (start_time, end_time).
1950
+ Intervals are aligned on boundaries divisible by resolution.
1951
+
1952
+ For example: for start_time=17:09:59, end_time=16:20:01 and resolution = 10minutes, it will yield
1953
+ (17:00:00, 16:50:00), (16:50:00, 16:40:00), (16:40:00, 16:30:00)
1954
+ """
1955
+
1956
+ if start_time is None:
1957
+ start_time = datetime.datetime.utcnow()
1958
+ newer_t = datetime.datetime.fromtimestamp(int(start_time.timestamp()) // resolution.total_seconds() * resolution.total_seconds())
1959
+ older_t = newer_t - resolution
1960
+ while not end_time or older_t >= end_time:
1961
+ yield newer_t, older_t
1962
+ newer_t = older_t
1963
+ older_t = older_t - resolution
1964
+
1965
+
1966
+ @read_session
1967
+ def get_request_metrics(
1968
+ dest_rse_id: Optional[str] = None,
1969
+ src_rse_id: Optional[str] = None,
1970
+ activity: Optional[str] = None,
1971
+ group_by_rse_attribute: Optional[str] = None,
1972
+ *,
1973
+ session: "Session"
1974
+ ) -> dict[str, Any]:
1975
+ metrics = {}
1976
+ now = datetime.datetime.utcnow()
1977
+
1978
+ # Add the current queues
1979
+ db_stats = get_request_stats(
1980
+ state=[
1981
+ RequestState.QUEUED,
1982
+ ],
1983
+ src_rse_id=src_rse_id,
1984
+ dest_rse_id=dest_rse_id,
1985
+ activity=activity,
1986
+ session=session,
1987
+ )
1988
+ for stat in db_stats:
1989
+ if not stat.source_rse_id:
1990
+ continue
1991
+
1992
+ resp_elem = metrics.setdefault((stat.source_rse_id, stat.dest_rse_id), {})
1993
+
1994
+ files_elem = resp_elem.setdefault('files', {})
1995
+ files_elem.setdefault('queued', {})[stat.activity] = stat.counter
1996
+ files_elem['queued-total'] = files_elem.get('queued-total', 0) + stat.counter
1997
+
1998
+ bytes_elem = resp_elem.setdefault('bytes', {})
1999
+ bytes_elem.setdefault('queued', {})[stat.activity] = stat.bytes
2000
+ bytes_elem['queued-total'] = bytes_elem.get('queued-total', 0) + stat.bytes
2001
+
2002
+ # Add the historical data
2003
+ for duration, duration_label in (
2004
+ (datetime.timedelta(hours=1), '1h'),
2005
+ (datetime.timedelta(hours=6), '6h')
2006
+ ):
2007
+ db_stats = TransferStatsManager().load_totals(
2008
+ older_t=now - duration,
2009
+ dest_rse_id=dest_rse_id,
2010
+ src_rse_id=src_rse_id,
2011
+ activity=activity,
2012
+ session=session,
2013
+ )
2014
+
2015
+ for stat in db_stats:
2016
+ resp_elem = metrics.setdefault((stat['src_rse_id'], stat['dest_rse_id']), {})
2017
+
2018
+ files_elem = resp_elem.setdefault('files', {})
2019
+ if stat['files_done']:
2020
+ activity_elem = files_elem.setdefault('done', {}).setdefault(stat['activity'], {})
2021
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['files_done']
2022
+ files_elem[f'done-total-{duration_label}'] = files_elem.get(f'done-total-{duration_label}', 0) + stat['files_done']
2023
+ if stat['files_failed']:
2024
+ activity_elem = files_elem.setdefault('failed', {}).setdefault(stat['activity'], {})
2025
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['files_failed']
2026
+ files_elem[f'failed-total-{duration_label}'] = files_elem.get(f'failed-total-{duration_label}', 0) + stat['files_failed']
2027
+
2028
+ bytes_elem = resp_elem.setdefault('bytes', {})
2029
+ if stat['bytes_done']:
2030
+ activity_elem = bytes_elem.setdefault('done', {}).setdefault(stat['activity'], {})
2031
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['bytes_done']
2032
+ bytes_elem[f'done-total-{duration_label}'] = bytes_elem.get(f'done-total-{duration_label}', 0) + stat['bytes_done']
2033
+
2034
+ # Add distances
2035
+ for distance in get_distances(dest_rse_id=dest_rse_id, src_rse_id=src_rse_id):
2036
+ resp_elem = metrics.setdefault((distance['src_rse_id'], distance['dest_rse_id']), {})
2037
+
2038
+ resp_elem['distance'] = distance['distance']
2039
+
2040
+ # Fill RSE names
2041
+ rses = RseCollection(rse_ids=itertools.chain.from_iterable(metrics))
2042
+ rses.ensure_loaded(load_name=True, include_deleted=True)
2043
+ response = {}
2044
+ for (src_id, dst_id), metric in metrics.items():
2045
+ src_rse = rses[src_id]
2046
+ dst_rse = rses[dst_id]
2047
+ metric['src_rse'] = src_rse.name
2048
+ metric['dst_rse'] = dst_rse.name
2049
+
2050
+ if group_by_rse_attribute:
2051
+ src_rse_group = src_rse.attributes.get(group_by_rse_attribute, 'UNKNOWN')
2052
+ dst_rse_group = dst_rse.attributes.get(group_by_rse_attribute, 'UNKNOWN')
2053
+ if src_rse_group is not None and dst_rse_group is not None:
2054
+ response[f'{src_rse_group}:{dst_rse_group}'] = metric
2055
+ else:
2056
+ response[f'{src_rse.name}:{dst_rse.name}'] = metric
2057
+
2058
+ return response
2059
+
2060
+
2061
+ @read_session
2062
+ def get_request_stats(
2063
+ state: Union[RequestState, list[RequestState]],
2064
+ dest_rse_id: Optional[str] = None,
2065
+ src_rse_id: Optional[str] = None,
2066
+ activity: Optional[str] = None,
2067
+ *,
2068
+ session: "Session"
2069
+ ) -> """Sequence[
2070
+ Row[tuple[
2071
+ Optional[InternalAccount],
2072
+ RequestState,
2073
+ uuid.UUID,
2074
+ Optional[uuid.UUID],
2075
+ Optional[str],
2076
+ int,
2077
+ Optional[int]
2078
+ ]
2079
+ ]
2080
+ ]""":
2081
+ """
2082
+ Retrieve statistics about requests by destination, activity and state.
2083
+ """
2084
+
2085
+ if not isinstance(state, list):
2086
+ state = [state]
2087
+
2088
+ try:
2089
+ stmt = select(
2090
+ models.Request.account,
2091
+ models.Request.state,
2092
+ models.Request.dest_rse_id,
2093
+ models.Request.source_rse_id,
2094
+ models.Request.activity,
2095
+ func.count(1).label('counter'),
2096
+ func.sum(models.Request.bytes).label('bytes')
2097
+ ).with_hint(
2098
+ models.Request,
2099
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
2100
+ 'oracle'
2101
+ ).where(
2102
+ and_(models.Request.state.in_(state),
2103
+ models.Request.request_type.in_([RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT]))
2104
+ ).group_by(
2105
+ models.Request.account,
2106
+ models.Request.state,
2107
+ models.Request.dest_rse_id,
2108
+ models.Request.source_rse_id,
2109
+ models.Request.activity,
2110
+ )
2111
+ if src_rse_id:
2112
+ stmt = stmt.where(
2113
+ models.Request.source_rse_id == src_rse_id
2114
+ )
2115
+ if dest_rse_id:
2116
+ stmt = stmt.where(
2117
+ models.Request.dest_rse_id == dest_rse_id
2118
+ )
2119
+ if activity:
2120
+ stmt = stmt.where(
2121
+ models.Request.activity == activity
2122
+ )
2123
+
2124
+ return session.execute(stmt).all()
2125
+
2126
+ except IntegrityError as error:
2127
+ raise RucioException(error.args)
2128
+
2129
+
2130
+ @transactional_session
2131
+ def release_waiting_requests_per_deadline(
2132
+ dest_rse_id: Optional[str] = None,
2133
+ source_rse_id: Optional[str] = None,
2134
+ deadline: int = 1,
2135
+ *,
2136
+ session: "Session",
2137
+ ) -> int:
2138
+ """
2139
+ Release waiting requests that were waiting too long and exceeded the maximum waiting time to be released.
2140
+ If the DID of a request is attached to a dataset, the oldest requested_at date of all requests related to the dataset will be used for checking and all requests of this dataset will be released.
2141
+ :param dest_rse_id: The destination RSE id.
2142
+ :param source_rse_id: The source RSE id.
2143
+ :param deadline: Maximal waiting time in hours until a dataset gets released.
2144
+ :param session: The database session.
2145
+ """
2146
+ amount_released_requests = 0
2147
+ if deadline:
2148
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2149
+ old_requests_subquery = select(
2150
+ grouped_requests_subquery.c.name,
2151
+ grouped_requests_subquery.c.scope,
2152
+ grouped_requests_subquery.c.oldest_requested_at
2153
+ ).where(
2154
+ grouped_requests_subquery.c.oldest_requested_at < datetime.datetime.utcnow() - datetime.timedelta(hours=deadline)
2155
+ ).subquery()
2156
+
2157
+ old_requests_subquery = select(
2158
+ filtered_requests_subquery.c.id
2159
+ ).join(
2160
+ old_requests_subquery,
2161
+ and_(filtered_requests_subquery.c.dataset_name == old_requests_subquery.c.name,
2162
+ filtered_requests_subquery.c.dataset_scope == old_requests_subquery.c.scope)
2163
+ )
2164
+
2165
+ amount_released_requests = update(
2166
+ models.Request
2167
+ ).where(
2168
+ models.Request.id.in_(old_requests_subquery) # type: ignore
2169
+ ).execution_options(
2170
+ synchronize_session=False
2171
+ ).values({
2172
+ models.Request.state: RequestState.QUEUED
2173
+ })
2174
+ return session.execute(amount_released_requests).rowcount # type: ignore
2175
+
2176
+
2177
+ @transactional_session
2178
+ def release_waiting_requests_per_free_volume(
2179
+ dest_rse_id: Optional[str] = None,
2180
+ source_rse_id: Optional[str] = None,
2181
+ volume: int = 0,
2182
+ *,
2183
+ session: "Session"
2184
+ ) -> int:
2185
+ """
2186
+ Release waiting requests if they fit in available transfer volume. If the DID of a request is attached to a dataset, the volume will be checked for the whole dataset as all requests related to this dataset will be released.
2187
+
2188
+ :param dest_rse_id: The destination RSE id.
2189
+ :param source_rse_id: The source RSE id
2190
+ :param volume: The maximum volume in bytes that should be transferred.
2191
+ :param session: The database session.
2192
+ """
2193
+
2194
+ dialect = session.bind.dialect.name # type: ignore
2195
+ if dialect == 'mysql' or dialect == 'sqlite':
2196
+ coalesce_func = func.ifnull
2197
+ elif dialect == 'oracle':
2198
+ coalesce_func = func.nvl
2199
+ else: # dialect == 'postgresql'
2200
+ coalesce_func = func.coalesce
2201
+
2202
+ sum_volume_active_subquery = select(
2203
+ coalesce_func(func.sum(models.Request.bytes), 0).label('sum_bytes')
2204
+ ).where(
2205
+ models.Request.state.in_([RequestState.SUBMITTED, RequestState.QUEUED]),
2206
+ )
2207
+ if dest_rse_id is not None:
2208
+ sum_volume_active_subquery = sum_volume_active_subquery.where(
2209
+ models.Request.dest_rse_id == dest_rse_id
2210
+ )
2211
+ if source_rse_id is not None:
2212
+ sum_volume_active_subquery = sum_volume_active_subquery.where(
2213
+ models.Request.source_rse_id == source_rse_id
2214
+ )
2215
+ sum_volume_active_subquery = sum_volume_active_subquery.subquery()
2216
+
2217
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2218
+
2219
+ cumulated_volume_subquery = select(
2220
+ grouped_requests_subquery.c.name,
2221
+ grouped_requests_subquery.c.scope,
2222
+ func.sum(grouped_requests_subquery.c.volume).over(order_by=grouped_requests_subquery.c.oldest_requested_at).label('cum_volume')
2223
+ ).where(
2224
+ grouped_requests_subquery.c.volume <= volume - sum_volume_active_subquery.c.sum_bytes
2225
+ ).subquery()
2226
+
2227
+ cumulated_volume_subquery = select(
2228
+ filtered_requests_subquery.c.id
2229
+ ).join(
2230
+ cumulated_volume_subquery,
2231
+ and_(filtered_requests_subquery.c.dataset_name == cumulated_volume_subquery.c.name,
2232
+ filtered_requests_subquery.c.dataset_scope == cumulated_volume_subquery.c.scope)
2233
+ ).where(
2234
+ cumulated_volume_subquery.c.cum_volume <= volume - sum_volume_active_subquery.c.sum_bytes
2235
+ )
2236
+
2237
+ amount_released_requests = update(
2238
+ models.Request
2239
+ ).where(
2240
+ models.Request.id.in_(cumulated_volume_subquery) # type: ignore
2241
+ ).execution_options(
2242
+ synchronize_session=False
2243
+ ).values({
2244
+ models.Request.state: RequestState.QUEUED
2245
+ })
2246
+ return session.execute(amount_released_requests).rowcount
2247
+
2248
+
2249
+ @read_session
2250
+ def create_base_query_grouped_fifo(
2251
+ dest_rse_id: Optional[str] = None,
2252
+ source_rse_id: Optional[str] = None,
2253
+ *,
2254
+ session: "Session"
2255
+ ) -> tuple["Subquery", "Subquery"]:
2256
+ """
2257
+ Build the sqlalchemy queries to filter relevant requests and to group them in datasets.
2258
+ Group requests either by same destination RSE or source RSE.
2259
+
2260
+ :param dest_rse_id: The source RSE id to filter on
2261
+ :param source_rse_id: The destination RSE id to filter on
2262
+ :param session: The database session.
2263
+ """
2264
+ dialect = session.bind.dialect.name # type: ignore
2265
+ if dialect == 'mysql' or dialect == 'sqlite':
2266
+ coalesce_func = func.ifnull
2267
+ elif dialect == 'oracle':
2268
+ coalesce_func = func.nvl
2269
+ else: # dialect == 'postgresql'
2270
+ coalesce_func = func.coalesce
2271
+
2272
+ # query DIDs that are attached to a collection and add a column indicating the order of attachment in case of multiple attachments
2273
+ attachment_order_subquery = select(
2274
+ models.DataIdentifierAssociation.child_name,
2275
+ models.DataIdentifierAssociation.child_scope,
2276
+ models.DataIdentifierAssociation.name,
2277
+ models.DataIdentifierAssociation.scope,
2278
+ func.row_number().over(
2279
+ partition_by=(models.DataIdentifierAssociation.child_name,
2280
+ models.DataIdentifierAssociation.child_scope),
2281
+ order_by=models.DataIdentifierAssociation.created_at
2282
+ ).label('order_of_attachment')
2283
+ ).subquery()
2284
+
2285
+ # query transfer requests and join with according datasets
2286
+ requests_subquery_stmt = select(
2287
+ # Will be filled using add_columns() later
2288
+ ).outerjoin(
2289
+ attachment_order_subquery,
2290
+ and_(models.Request.name == attachment_order_subquery.c.child_name,
2291
+ models.Request.scope == attachment_order_subquery.c.child_scope,
2292
+ attachment_order_subquery.c.order_of_attachment == 1),
2293
+ ).where(
2294
+ models.Request.state == RequestState.WAITING,
2295
+ )
2296
+ if source_rse_id is not None:
2297
+ requests_subquery_stmt = requests_subquery_stmt.where(
2298
+ models.Request.source_rse_id == source_rse_id
2299
+ )
2300
+ if dest_rse_id is not None:
2301
+ requests_subquery_stmt = requests_subquery_stmt.where(
2302
+ models.Request.dest_rse_id == dest_rse_id
2303
+ )
2304
+
2305
+ filtered_requests_subquery = requests_subquery_stmt.add_columns(
2306
+ coalesce_func(attachment_order_subquery.c.scope, models.Request.scope).label('dataset_scope'),
2307
+ coalesce_func(attachment_order_subquery.c.name, models.Request.name).label('dataset_name'),
2308
+ models.Request.id.label('id')
2309
+ ).subquery()
2310
+
2311
+ combined_attached_unattached_requests = requests_subquery_stmt.add_columns(
2312
+ coalesce_func(attachment_order_subquery.c.scope, models.Request.scope).label('scope'),
2313
+ coalesce_func(attachment_order_subquery.c.name, models.Request.name).label('name'),
2314
+ models.Request.bytes,
2315
+ models.Request.requested_at
2316
+ ).subquery()
2317
+
2318
+ # group requests and calculate properties like oldest requested_at, amount of children, volume
2319
+ grouped_requests_subquery = select(
2320
+ func.sum(combined_attached_unattached_requests.c.bytes).label('volume'),
2321
+ func.min(combined_attached_unattached_requests.c.requested_at).label('oldest_requested_at'),
2322
+ func.count().label('amount_childs'),
2323
+ combined_attached_unattached_requests.c.name,
2324
+ combined_attached_unattached_requests.c.scope
2325
+ ).group_by(
2326
+ combined_attached_unattached_requests.c.scope,
2327
+ combined_attached_unattached_requests.c.name
2328
+ ).subquery()
2329
+ return grouped_requests_subquery, filtered_requests_subquery
2330
+
2331
+
2332
+ @transactional_session
2333
+ def release_waiting_requests_fifo(
2334
+ dest_rse_id: Optional[str] = None,
2335
+ source_rse_id: Optional[str] = None,
2336
+ activity: Optional[str] = None,
2337
+ count: int = 0,
2338
+ account: Optional[InternalAccount] = None,
2339
+ *,
2340
+ session: "Session"
2341
+ ) -> int:
2342
+ """
2343
+ Release waiting requests. Transfer requests that were requested first, get released first (FIFO).
2344
+
2345
+ :param source_rse_id: The source rse id
2346
+ :param dest_rse_id: The destination rse id
2347
+ :param activity: The activity.
2348
+ :param count: The count to be released.
2349
+ :param account: The account name whose requests to release.
2350
+ :param session: The database session.
2351
+ """
2352
+
2353
+ dialect = session.bind.dialect.name # type: ignore
2354
+ rowcount = 0
2355
+
2356
+ subquery = select(
2357
+ models.Request.id
2358
+ ).where(
2359
+ models.Request.state == RequestState.WAITING
2360
+ ).order_by(
2361
+ asc(models.Request.requested_at)
2362
+ ).limit(
2363
+ count
2364
+ )
2365
+ if source_rse_id is not None:
2366
+ subquery = subquery.where(models.Request.source_rse_id == source_rse_id)
2367
+ if dest_rse_id is not None:
2368
+ subquery = subquery.where(models.Request.dest_rse_id == dest_rse_id)
2369
+
2370
+ if activity is not None:
2371
+ subquery = subquery.where(models.Request.activity == activity)
2372
+ if account is not None:
2373
+ subquery = subquery.where(models.Request.account == account)
2374
+
2375
+ if dialect == 'mysql':
2376
+ # TODO: check if the logic from this `if` is still needed on modern mysql
2377
+
2378
+ # join because IN and LIMIT cannot be used together
2379
+ subquery = subquery.subquery()
2380
+ subquery = select(
2381
+ models.Request.id
2382
+ ).join(
2383
+ subquery,
2384
+ models.Request.id == subquery.c.id
2385
+ ).subquery()
2386
+ # wrap select to update and select from the same table
2387
+ subquery = select(subquery.c.id)
2388
+
2389
+ stmt = update(
2390
+ models.Request
2391
+ ).where(
2392
+ models.Request.id.in_(subquery) # type: ignore
2393
+ ).execution_options(
2394
+ synchronize_session=False
2395
+ ).values({
2396
+ models.Request.state: RequestState.QUEUED
2397
+ })
2398
+ rowcount = session.execute(stmt).rowcount
2399
+ return rowcount
2400
+
2401
+
2402
+ @transactional_session
2403
+ def release_waiting_requests_grouped_fifo(
2404
+ dest_rse_id: Optional[str] = None,
2405
+ source_rse_id: Optional[str] = None,
2406
+ count: int = 0,
2407
+ deadline: int = 1,
2408
+ volume: int = 0,
2409
+ *,
2410
+ session: "Session"
2411
+ ) -> int:
2412
+ """
2413
+ Release waiting requests. Transfer requests that were requested first, get released first (FIFO).
2414
+ Also all requests to DIDs that are attached to the same dataset get released, if one children of the dataset is chosen to be released (Grouped FIFO).
2415
+
2416
+ :param dest_rse_id: The destination rse id
2417
+ :param source_rse_id: The source RSE id.
2418
+ :param count: The count to be released. If None, release all waiting requests.
2419
+ :param deadline: Maximal waiting time in hours until a dataset gets released.
2420
+ :param volume: The maximum volume in bytes that should be transferred.
2421
+ :param session: The database session.
2422
+ """
2423
+
2424
+ amount_updated_requests = 0
2425
+
2426
+ # Release requests that exceeded waiting time
2427
+ if deadline and source_rse_id is not None:
2428
+ amount_updated_requests = release_waiting_requests_per_deadline(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, deadline=deadline, session=session)
2429
+ count = count - amount_updated_requests
2430
+
2431
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2432
+
2433
+ # cumulate amount of children per dataset and combine with each request and only keep requests that dont exceed the limit
2434
+ cumulated_children_subquery = select(
2435
+ grouped_requests_subquery.c.name,
2436
+ grouped_requests_subquery.c.scope,
2437
+ grouped_requests_subquery.c.amount_childs,
2438
+ grouped_requests_subquery.c.oldest_requested_at,
2439
+ func.sum(grouped_requests_subquery.c.amount_childs).over(order_by=(grouped_requests_subquery.c.oldest_requested_at)).label('cum_amount_childs')
2440
+ ).subquery()
2441
+ cumulated_children_subquery = select(
2442
+ filtered_requests_subquery.c.id
2443
+ ).join(
2444
+ cumulated_children_subquery,
2445
+ and_(filtered_requests_subquery.c.dataset_name == cumulated_children_subquery.c.name,
2446
+ filtered_requests_subquery.c.dataset_scope == cumulated_children_subquery.c.scope)
2447
+ ).where(
2448
+ cumulated_children_subquery.c.cum_amount_childs - cumulated_children_subquery.c.amount_childs < count
2449
+ ).subquery()
2450
+
2451
+ # needed for mysql to update and select from the same table
2452
+ cumulated_children_subquery = select(cumulated_children_subquery.c.id)
2453
+
2454
+ stmt = update(
2455
+ models.Request
2456
+ ).where(
2457
+ models.Request.id.in_(cumulated_children_subquery) # type: ignore
2458
+ ).execution_options(
2459
+ synchronize_session=False
2460
+ ).values({
2461
+ models.Request.state: RequestState.QUEUED
2462
+ })
2463
+ amount_updated_requests += session.execute(stmt).rowcount
2464
+
2465
+ # release requests where the whole datasets volume fits in the available volume space
2466
+ if volume and dest_rse_id is not None:
2467
+ amount_updated_requests += release_waiting_requests_per_free_volume(dest_rse_id=dest_rse_id, volume=volume, session=session)
2468
+
2469
+ return amount_updated_requests
2470
+
2471
+
2472
+ @transactional_session
2473
+ def release_all_waiting_requests(
2474
+ dest_rse_id: Optional[str] = None,
2475
+ source_rse_id: Optional[str] = None,
2476
+ activity: Optional[str] = None,
2477
+ account: Optional[InternalAccount] = None,
2478
+ *,
2479
+ session: "Session"
2480
+ ) -> int:
2481
+ """
2482
+ Release all waiting requests per destination RSE.
2483
+
2484
+ :param dest_rse_id: The destination rse id.
2485
+ :param source_rse_id: The source rse id.
2486
+ :param activity: The activity.
2487
+ :param account: The account name whose requests to release.
2488
+ :param session: The database session.
2489
+ """
2490
+ try:
2491
+ query = update(
2492
+ models.Request
2493
+ ).where(
2494
+ models.Request.state == RequestState.WAITING,
2495
+ ).execution_options(
2496
+ synchronize_session=False
2497
+ ).values({
2498
+ models.Request.state: RequestState.QUEUED
2499
+ })
2500
+ if source_rse_id is not None:
2501
+ query = query.where(
2502
+ models.Request.source_rse_id == source_rse_id
2503
+ )
2504
+ if dest_rse_id is not None:
2505
+ query = query.where(
2506
+ models.Request.dest_rse_id == dest_rse_id
2507
+ )
2508
+ if activity is not None:
2509
+ query = query.where(
2510
+ models.Request.activity == activity
2511
+ )
2512
+ if account is not None:
2513
+ query = query.where(
2514
+ models.Request.account == account
2515
+ )
2516
+ rowcount = session.execute(query).rowcount
2517
+ return rowcount
2518
+ except IntegrityError as error:
2519
+ raise RucioException(error.args)
2520
+
2521
+
2522
+ @stream_session
2523
+ def list_transfer_limits(
2524
+ *,
2525
+ session: "Session",
2526
+ ) -> 'Iterator[dict[str, Any]]':
2527
+ stmt = select(
2528
+ models.TransferLimit
2529
+ )
2530
+ for limit in session.execute(stmt).scalars():
2531
+ dict_resp = limit.to_dict()
2532
+ yield dict_resp
2533
+
2534
+
2535
+ def _sync_rse_transfer_limit(
2536
+ limit_id: Union[str, 'uuid.UUID'],
2537
+ desired_rse_ids: set[str],
2538
+ *,
2539
+ session: "Session",
2540
+ ) -> None:
2541
+ """
2542
+ Ensure that an RSETransferLimit exists in the database for each of the given rses (and only for these rses)
2543
+ """
2544
+
2545
+ stmt = select(
2546
+ models.RSETransferLimit.rse_id,
2547
+ ).where(
2548
+ models.RSETransferLimit.limit_id == limit_id
2549
+ )
2550
+ existing_rse_ids = set(session.execute(stmt).scalars())
2551
+
2552
+ rse_limits_to_add = desired_rse_ids.difference(existing_rse_ids)
2553
+ rse_limits_to_delete = existing_rse_ids.difference(desired_rse_ids)
2554
+
2555
+ if rse_limits_to_add:
2556
+ values = [
2557
+ {'rse_id': rse_id, 'limit_id': limit_id}
2558
+ for rse_id in rse_limits_to_add
2559
+ ]
2560
+ stmt = insert(
2561
+ models.RSETransferLimit
2562
+ )
2563
+ session.execute(stmt, values)
2564
+
2565
+ if rse_limits_to_delete:
2566
+ stmt = delete(
2567
+ models.RSETransferLimit
2568
+ ).where(
2569
+ and_(models.RSETransferLimit.limit_id == limit_id,
2570
+ models.RSETransferLimit.rse_id.in_(rse_limits_to_delete))
2571
+ )
2572
+ session.execute(stmt)
2573
+
2574
+
2575
+ @transactional_session
2576
+ def re_sync_all_transfer_limits(
2577
+ delete_empty: bool = False,
2578
+ *,
2579
+ session: "Session",
2580
+ ) -> None:
2581
+ """
2582
+ For each TransferLimit in the database, re-evaluate the rse expression and ensure that the
2583
+ correct RSETransferLimits are in the database
2584
+ :param delete_empty: if True, when rse_expression evaluates to an empty set or is invalid, the limit is completely removed
2585
+ """
2586
+ stmt = select(
2587
+ models.TransferLimit,
2588
+ )
2589
+ for limit in session.execute(stmt).scalars():
2590
+ try:
2591
+ desired_rse_ids = {rse['id'] for rse in parse_expression(expression=limit.rse_expression, session=session)}
2592
+ except InvalidRSEExpression:
2593
+ desired_rse_ids = set()
2594
+
2595
+ if not desired_rse_ids and delete_empty:
2596
+ delete_transfer_limit_by_id(limit_id=limit.id, session=session)
2597
+ else:
2598
+ _sync_rse_transfer_limit(limit_id=limit.id, desired_rse_ids=desired_rse_ids, session=session)
2599
+
2600
+
2601
+ @transactional_session
2602
+ def set_transfer_limit(
2603
+ rse_expression: str,
2604
+ activity: Optional[str] = None,
2605
+ direction: TransferLimitDirection = TransferLimitDirection.DESTINATION,
2606
+ max_transfers: Optional[int] = None,
2607
+ volume: Optional[int] = None,
2608
+ deadline: Optional[int] = None,
2609
+ strategy: Optional[str] = None,
2610
+ transfers: Optional[int] = None,
2611
+ waitings: Optional[int] = None,
2612
+ *,
2613
+ session: "Session",
2614
+ ) -> 'uuid.UUID':
2615
+ """
2616
+ Create or update a transfer limit
2617
+
2618
+ :param rse_expression: RSE expression string.
2619
+ :param activity: The activity.
2620
+ :param direction: The direction in which this limit applies (source/destination)
2621
+ :param max_transfers: Maximum transfers.
2622
+ :param volume: Maximum transfer volume in bytes.
2623
+ :param deadline: Maximum waiting time in hours until a datasets gets released.
2624
+ :param strategy: defines how to handle datasets: `fifo` (each file released separately) or `grouped_fifo` (wait for the entire dataset to fit)
2625
+ :param transfers: Current number of active transfers
2626
+ :param waitings: Current number of waiting transfers
2627
+ :param session: The database session in use.
2628
+
2629
+ :return: the limit id
2630
+ """
2631
+ if activity is None:
2632
+ activity = 'all_activities'
2633
+
2634
+ stmt = select(
2635
+ models.TransferLimit
2636
+ ).where(
2637
+ and_(models.TransferLimit.rse_expression == rse_expression,
2638
+ models.TransferLimit.activity == activity,
2639
+ models.TransferLimit.direction == direction)
2640
+ )
2641
+ limit = session.execute(stmt).scalar_one_or_none()
2642
+
2643
+ if not limit:
2644
+ if max_transfers is None:
2645
+ max_transfers = 0
2646
+ if volume is None:
2647
+ volume = 0
2648
+ if deadline is None:
2649
+ deadline = 1
2650
+ if strategy is None:
2651
+ strategy = 'fifo'
2652
+ limit = models.TransferLimit(
2653
+ rse_expression=rse_expression,
2654
+ activity=activity,
2655
+ direction=direction,
2656
+ max_transfers=max_transfers,
2657
+ volume=volume,
2658
+ deadline=deadline,
2659
+ strategy=strategy,
2660
+ transfers=transfers,
2661
+ waitings=waitings
2662
+ )
2663
+ limit.save(session=session)
2664
+ else:
2665
+ changed = False
2666
+ if max_transfers is not None and limit.max_transfers != max_transfers:
2667
+ limit.max_transfers = max_transfers
2668
+ changed = True
2669
+ if volume is not None and limit.volume != volume:
2670
+ limit.volume = volume
2671
+ changed = True
2672
+ if deadline is not None and limit.deadline != deadline:
2673
+ limit.deadline = deadline
2674
+ changed = True
2675
+ if strategy is not None and limit.strategy != strategy:
2676
+ limit.strategy = strategy
2677
+ changed = True
2678
+ if transfers is not None and limit.transfers != transfers:
2679
+ limit.transfers = transfers
2680
+ changed = True
2681
+ if waitings is not None and limit.waitings != waitings:
2682
+ limit.waitings = waitings
2683
+ changed = True
2684
+ if changed:
2685
+ limit.save(session=session)
2686
+
2687
+ desired_rse_ids = {rse['id'] for rse in parse_expression(expression=rse_expression, session=session)}
2688
+ _sync_rse_transfer_limit(limit_id=limit.id, desired_rse_ids=desired_rse_ids, session=session)
2689
+ return limit.id
2690
+
2691
+
2692
+ @transactional_session
2693
+ def set_transfer_limit_stats(
2694
+ limit_id: str,
2695
+ waitings: int,
2696
+ transfers: int,
2697
+ *,
2698
+ session: "Session",
2699
+ ) -> None:
2700
+ """
2701
+ Set the statistics of the TransferLimit
2702
+ """
2703
+ stmt = update(
2704
+ models.TransferLimit
2705
+ ).where(
2706
+ models.TransferLimit.id == limit_id
2707
+ ).values({
2708
+ models.TransferLimit.waitings: waitings,
2709
+ models.TransferLimit.transfers: transfers
2710
+ })
2711
+ session.execute(stmt)
2712
+
2713
+
2714
+ @transactional_session
2715
+ def delete_transfer_limit(
2716
+ rse_expression: str,
2717
+ activity: Optional[str] = None,
2718
+ direction: TransferLimitDirection = TransferLimitDirection.DESTINATION,
2719
+ *,
2720
+ session: "Session",
2721
+ ) -> None:
2722
+
2723
+ if activity is None:
2724
+ activity = 'all_activities'
2725
+
2726
+ stmt = delete(
2727
+ models.RSETransferLimit
2728
+ ).where(
2729
+ exists(
2730
+ select(1)
2731
+ ).where(
2732
+ and_(models.RSETransferLimit.limit_id == models.TransferLimit.id,
2733
+ models.TransferLimit.rse_expression == rse_expression,
2734
+ models.TransferLimit.activity == activity,
2735
+ models.TransferLimit.direction == direction)
2736
+ )
2737
+ ).execution_options(
2738
+ synchronize_session=False
2739
+ )
2740
+ session.execute(stmt)
2741
+
2742
+ stmt = delete(
2743
+ models.TransferLimit
2744
+ ).where(
2745
+ and_(models.TransferLimit.rse_expression == rse_expression,
2746
+ models.TransferLimit.activity == activity,
2747
+ models.TransferLimit.direction == direction)
2748
+ )
2749
+ session.execute(stmt)
2750
+
2751
+
2752
+ @transactional_session
2753
+ def delete_transfer_limit_by_id(
2754
+ limit_id: str,
2755
+ *,
2756
+ session: "Session",
2757
+ ) -> None:
2758
+ stmt = delete(
2759
+ models.RSETransferLimit
2760
+ ).where(
2761
+ models.RSETransferLimit.limit_id == limit_id
2762
+ )
2763
+ session.execute(stmt)
2764
+
2765
+ stmt = delete(
2766
+ models.TransferLimit
2767
+ ).where(
2768
+ models.TransferLimit.id == limit_id
2769
+ )
2770
+ session.execute(stmt)
2771
+
2772
+
2773
+ @transactional_session
2774
+ def update_requests_priority(
2775
+ priority: int,
2776
+ filter_: FilterDict,
2777
+ *,
2778
+ session: "Session",
2779
+ logger: LoggerFunction = logging.log
2780
+ ) -> dict[str, Any]:
2781
+ """
2782
+ Update priority of requests.
2783
+
2784
+ :param priority: The priority as an integer from 1 to 5.
2785
+ :param filter_: Dictionary such as {'rule_id': rule_id, 'request_id': request_id, 'older_than': time_stamp, 'activities': [activities]}.
2786
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
2787
+ :return the transfers which must be updated in the transfertool
2788
+ """
2789
+ try:
2790
+ query = select(
2791
+ models.Request.id,
2792
+ models.Request.external_id,
2793
+ models.Request.external_host,
2794
+ models.Request.state.label('request_state'),
2795
+ models.ReplicaLock.state.label('lock_state')
2796
+ ).join(
2797
+ models.ReplicaLock,
2798
+ and_(models.ReplicaLock.scope == models.Request.scope,
2799
+ models.ReplicaLock.name == models.Request.name,
2800
+ models.ReplicaLock.rse_id == models.Request.dest_rse_id)
2801
+ )
2802
+ if 'rule_id' in filter_:
2803
+ query = query.where(models.ReplicaLock.rule_id == filter_['rule_id'])
2804
+ if 'request_id' in filter_:
2805
+ query = query.where(models.Request.id == filter_['request_id'])
2806
+ if 'older_than' in filter_:
2807
+ query = query.where(models.Request.created_at < filter_['older_than'])
2808
+ if 'activities' in filter_:
2809
+ if not isinstance(filter_['activities'], list):
2810
+ filter_['activities'] = filter_['activities'].split(',')
2811
+ query = query.filter(models.Request.activity.in_(filter_['activities']))
2812
+
2813
+ transfers_to_update = {}
2814
+ for item in session.execute(query).all():
2815
+ try:
2816
+ update_request(item.id, priority=priority, session=session)
2817
+ logger(logging.DEBUG, "Updated request %s priority to %s in rucio." % (item.id, priority))
2818
+ if item.request_state == RequestState.SUBMITTED and item.lock_state == LockState.REPLICATING:
2819
+ transfers_to_update.setdefault(item.external_host, {})[item.external_id] = priority
2820
+ except Exception:
2821
+ logger(logging.DEBUG, "Failed to boost request %s priority: %s" % (item.id, traceback.format_exc()))
2822
+ return transfers_to_update
2823
+ except IntegrityError as error:
2824
+ raise RucioException(error.args)
2825
+
2826
+
2827
+ @read_session
2828
+ def add_monitor_message(
2829
+ new_state: RequestState,
2830
+ request: RequestDict,
2831
+ additional_fields: "Mapping[str, Any]",
2832
+ *,
2833
+ session: "Session"
2834
+ ) -> None:
2835
+ """
2836
+ Create a message for hermes from a request
2837
+
2838
+ :param new_state: The new state of the transfer request
2839
+ :param request: The request to create the message for.
2840
+ :param additional_fields: Additional custom fields to be added to the message
2841
+ :param session: The database session to use.
2842
+ """
2843
+
2844
+ if request['request_type']:
2845
+ transfer_status = '%s-%s' % (request['request_type'].name, new_state.name)
2846
+ else:
2847
+ transfer_status = 'transfer-%s' % new_state.name
2848
+ transfer_status = transfer_status.lower()
2849
+
2850
+ stmt = select(
2851
+ models.DataIdentifier.datatype
2852
+ ).where(
2853
+ and_(models.DataIdentifier.scope == request['scope'],
2854
+ models.DataIdentifier.name == request['name'])
2855
+ )
2856
+ datatype = session.execute(stmt).scalar_one_or_none()
2857
+
2858
+ # Start by filling up fields from database request or with defaults.
2859
+ message = {'activity': request.get('activity', None),
2860
+ 'request-id': request['id'],
2861
+ 'duration': -1,
2862
+ 'checksum-adler': request.get('adler32', None),
2863
+ 'checksum-md5': request.get('md5', None),
2864
+ 'file-size': request.get('bytes', None),
2865
+ 'bytes': request.get('bytes', None),
2866
+ 'guid': None,
2867
+ 'previous-request-id': request['previous_attempt_id'],
2868
+ 'protocol': None,
2869
+ 'scope': request['scope'],
2870
+ 'name': request['name'],
2871
+ 'dataset': None,
2872
+ 'datasetScope': None,
2873
+ 'src-type': None,
2874
+ 'src-rse': request.get('source_rse', None),
2875
+ 'src-url': None,
2876
+ 'dst-type': None,
2877
+ 'dst-rse': request.get('dest_rse', None),
2878
+ 'dst-url': request.get('dest_url', None),
2879
+ 'reason': request.get('err_msg', None),
2880
+ 'transfer-endpoint': request['external_host'],
2881
+ 'transfer-id': request['external_id'],
2882
+ 'transfer-link': None,
2883
+ 'created_at': request.get('created_at', None),
2884
+ 'submitted_at': request.get('submitted_at', None),
2885
+ 'started_at': request.get('started_at', None),
2886
+ 'transferred_at': request.get('transferred_at', None),
2887
+ 'tool-id': 'rucio-conveyor',
2888
+ 'account': request.get('account', None),
2889
+ 'datatype': datatype}
2890
+
2891
+ # Add (or override) existing fields
2892
+ message.update(additional_fields)
2893
+
2894
+ if message['started_at'] and message['transferred_at']:
2895
+ message['duration'] = (message['transferred_at'] - message['started_at']).seconds
2896
+ ds_scope = request['attributes'].get('ds_scope')
2897
+ if not message['datasetScope'] and ds_scope:
2898
+ message['datasetScope'] = ds_scope
2899
+ ds_name = request['attributes'].get('ds_name')
2900
+ if not message['dataset'] and ds_name:
2901
+ message['dataset'] = ds_name
2902
+ if not message.get('protocol'):
2903
+ dst_url = message['dst-url']
2904
+ if dst_url and ':' in dst_url:
2905
+ message['protocol'] = dst_url.split(':')[0]
2906
+ elif request.get('transfertool'):
2907
+ message['protocol'] = request['transfertool']
2908
+ if not message.get('src-rse'):
2909
+ src_rse_id = request.get('source_rse_id', None)
2910
+ if src_rse_id:
2911
+ src_rse = get_rse_name(src_rse_id, session=session)
2912
+ message['src-rse'] = src_rse
2913
+ if not message.get('dst-rse'):
2914
+ dst_rse_id = request.get('dest_rse_id', None)
2915
+ if dst_rse_id:
2916
+ dst_rse = get_rse_name(dst_rse_id, session=session)
2917
+ message['dst-rse'] = dst_rse
2918
+ if not message.get('vo') and request.get('source_rse_id'):
2919
+ src_id = request['source_rse_id']
2920
+ vo = get_rse_vo(rse_id=src_id, session=session)
2921
+ if vo != 'def':
2922
+ message['vo'] = vo
2923
+ for time_field in ('created_at', 'submitted_at', 'started_at', 'transferred_at'):
2924
+ field_value = message[time_field]
2925
+ message[time_field] = str(field_value) if field_value else None
2926
+
2927
+ add_message(transfer_status, message, session=session)
2928
+
2929
+
2930
+ def get_transfer_error(
2931
+ state: RequestState,
2932
+ reason: Optional[str] = None
2933
+ ) -> Optional[str]:
2934
+ """
2935
+ Transform a specific RequestState to an error message
2936
+
2937
+ :param state: State of the request.
2938
+ :param reason: Reason of the state.
2939
+ :returns: Error message
2940
+ """
2941
+ err_msg = None
2942
+ if state in [RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES]:
2943
+ err_msg = '%s:%s' % (RequestErrMsg.NO_SOURCES, state)
2944
+ elif state in [RequestState.SUBMISSION_FAILED]:
2945
+ err_msg = '%s:%s' % (RequestErrMsg.SUBMISSION_FAILED, state)
2946
+ elif state in [RequestState.SUBMITTING]:
2947
+ err_msg = '%s:%s' % (RequestErrMsg.SUBMISSION_FAILED, "Too long time in submitting state")
2948
+ elif state in [RequestState.LOST]:
2949
+ err_msg = '%s:%s' % (RequestErrMsg.TRANSFER_FAILED, "Transfer job on FTS is lost")
2950
+ elif state in [RequestState.FAILED]:
2951
+ err_msg = '%s:%s' % (RequestErrMsg.TRANSFER_FAILED, reason)
2952
+ elif state in [RequestState.MISMATCH_SCHEME]:
2953
+ err_msg = '%s:%s' % (RequestErrMsg.MISMATCH_SCHEME, state)
2954
+ return err_msg
2955
+
2956
+
2957
+ @read_session
2958
+ def get_source_rse(
2959
+ request_id: str,
2960
+ src_url: str,
2961
+ *,
2962
+ session: "Session",
2963
+ logger: LoggerFunction = logging.log
2964
+ ) -> tuple[Optional[str], Optional[str]]:
2965
+ """
2966
+ Based on a request, and src_url extract the source rse name and id.
2967
+
2968
+ :param request_id: The request_id of the request.
2969
+ :param src_url: The src_url of the request.
2970
+ :param session: The database session to use.
2971
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
2972
+ """
2973
+
2974
+ try:
2975
+ if not request_id:
2976
+ return None, None
2977
+
2978
+ sources = get_sources(request_id, session=session)
2979
+ sources = sources or []
2980
+ for source in sources:
2981
+ if source['url'] == src_url:
2982
+ src_rse_id = source['rse_id']
2983
+ src_rse_name = get_rse_name(src_rse_id, session=session)
2984
+ logger(logging.DEBUG, "Find rse name %s for %s" % (src_rse_name, src_url))
2985
+ return src_rse_name, src_rse_id
2986
+ # cannot find matched source url
2987
+ logger(logging.WARNING, 'Cannot get correct RSE for source url: %s' % (src_url))
2988
+ return None, None
2989
+ except Exception:
2990
+ logger(logging.ERROR, 'Cannot get correct RSE for source url: %s' % (src_url), exc_info=True)
2991
+ return None, None
2992
+
2993
+
2994
+ @stream_session
2995
+ def list_requests(
2996
+ src_rse_ids: 'Sequence[str]',
2997
+ dst_rse_ids: 'Sequence[str]',
2998
+ states: Optional['Sequence[RequestState]'] = None,
2999
+ *,
3000
+ session: "Session"
3001
+ ) -> 'Iterator[models.Request]':
3002
+ """
3003
+ List all requests in a specific state from a source RSE to a destination RSE.
3004
+
3005
+ :param src_rse_ids: source RSE ids.
3006
+ :param dst_rse_ids: destination RSE ids.
3007
+ :param states: list of request states.
3008
+ :param session: The database session in use.
3009
+ """
3010
+ if not states:
3011
+ states = [RequestState.WAITING]
3012
+
3013
+ stmt = select(
3014
+ models.Request
3015
+ ).where(
3016
+ and_(models.Request.state.in_(states),
3017
+ models.Request.source_rse_id.in_(src_rse_ids),
3018
+ models.Request.dest_rse_id.in_(dst_rse_ids))
3019
+ )
3020
+ for request in session.execute(stmt).yield_per(500).scalars():
3021
+ yield request
3022
+
3023
+
3024
+ @stream_session
3025
+ def list_requests_history(
3026
+ src_rse_ids: 'Sequence[str]',
3027
+ dst_rse_ids: 'Sequence[str]',
3028
+ states: Optional['Sequence[RequestState]'] = None,
3029
+ offset: Optional[int] = None,
3030
+ limit: Optional[int] = None,
3031
+ *,
3032
+ session: "Session"
3033
+ ) -> 'Iterator[models.RequestHistory]':
3034
+ """
3035
+ List all historical requests in a specific state from a source RSE to a destination RSE.
3036
+
3037
+ :param src_rse_ids: source RSE ids.
3038
+ :param dst_rse_ids: destination RSE ids.
3039
+ :param states: list of request states.
3040
+ :param offset: offset (for paging).
3041
+ :param limit: limit number of results.
3042
+ :param session: The database session in use.
3043
+ """
3044
+ if not states:
3045
+ states = [RequestState.WAITING]
3046
+
3047
+ stmt = select(
3048
+ models.RequestHistory
3049
+ ).where(
3050
+ and_(models.RequestHistory.state.in_(states),
3051
+ models.RequestHistory.source_rse_id.in_(src_rse_ids),
3052
+ models.RequestHistory.dest_rse_id.in_(dst_rse_ids))
3053
+ )
3054
+ if offset:
3055
+ stmt = stmt.offset(offset)
3056
+ if limit:
3057
+ stmt = stmt.limit(limit)
3058
+ for request in session.execute(stmt).yield_per(500).scalars():
3059
+ yield request
3060
+
3061
+
3062
+ @transactional_session
3063
+ def reset_stale_waiting_requests(time_limit: Optional[datetime.timedelta] = datetime.timedelta(days=1), *, session: "Session") -> None:
3064
+ """
3065
+ Clear source_rse_id for requests that have been in the waiting state for > time_limit amount of time and
3066
+ transition back to preparing state (default time limit = 1 day).
3067
+ This allows for stale requests that have been in the waiting state for a long time to be able to
3068
+ react to source changes that have occurred in the meantime.
3069
+ :param time_limit: The amount of time a request must be in the waiting state to be reset.
3070
+ :param session: The database session in use.
3071
+ """
3072
+ try:
3073
+ # Cutoff timestamp based on time limit
3074
+ time_limit_timestamp = datetime.datetime.utcnow() - time_limit
3075
+
3076
+ # Select all waiting requests that precede the time limit, then clear source_rse_id and reset state to preparing
3077
+ stmt = update(
3078
+ models.Request
3079
+ ).where(
3080
+ and_(models.Request.state == RequestState.WAITING,
3081
+ models.Request.last_processed_at < time_limit_timestamp)
3082
+ ).execution_options(
3083
+ synchronize_session=False
3084
+ ).values({
3085
+ models.Request.source_rse_id: None,
3086
+ models.Request.state: RequestState.PREPARING
3087
+ })
3088
+ session.execute(stmt)
3089
+
3090
+ except IntegrityError as error:
3091
+ raise RucioException(error.args)