rucio 35.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (493) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/client/__init__.py +15 -0
  4. rucio/client/accountclient.py +433 -0
  5. rucio/client/accountlimitclient.py +183 -0
  6. rucio/client/baseclient.py +974 -0
  7. rucio/client/client.py +76 -0
  8. rucio/client/configclient.py +126 -0
  9. rucio/client/credentialclient.py +59 -0
  10. rucio/client/didclient.py +866 -0
  11. rucio/client/diracclient.py +56 -0
  12. rucio/client/downloadclient.py +1785 -0
  13. rucio/client/exportclient.py +44 -0
  14. rucio/client/fileclient.py +50 -0
  15. rucio/client/importclient.py +42 -0
  16. rucio/client/lifetimeclient.py +90 -0
  17. rucio/client/lockclient.py +109 -0
  18. rucio/client/metaconventionsclient.py +140 -0
  19. rucio/client/pingclient.py +44 -0
  20. rucio/client/replicaclient.py +454 -0
  21. rucio/client/requestclient.py +125 -0
  22. rucio/client/rseclient.py +746 -0
  23. rucio/client/ruleclient.py +294 -0
  24. rucio/client/scopeclient.py +90 -0
  25. rucio/client/subscriptionclient.py +173 -0
  26. rucio/client/touchclient.py +82 -0
  27. rucio/client/uploadclient.py +955 -0
  28. rucio/common/__init__.py +13 -0
  29. rucio/common/cache.py +74 -0
  30. rucio/common/config.py +801 -0
  31. rucio/common/constants.py +159 -0
  32. rucio/common/constraints.py +17 -0
  33. rucio/common/didtype.py +189 -0
  34. rucio/common/dumper/__init__.py +335 -0
  35. rucio/common/dumper/consistency.py +452 -0
  36. rucio/common/dumper/data_models.py +318 -0
  37. rucio/common/dumper/path_parsing.py +64 -0
  38. rucio/common/exception.py +1151 -0
  39. rucio/common/extra.py +36 -0
  40. rucio/common/logging.py +420 -0
  41. rucio/common/pcache.py +1408 -0
  42. rucio/common/plugins.py +153 -0
  43. rucio/common/policy.py +84 -0
  44. rucio/common/schema/__init__.py +150 -0
  45. rucio/common/schema/atlas.py +413 -0
  46. rucio/common/schema/belleii.py +408 -0
  47. rucio/common/schema/domatpc.py +401 -0
  48. rucio/common/schema/escape.py +426 -0
  49. rucio/common/schema/generic.py +433 -0
  50. rucio/common/schema/generic_multi_vo.py +412 -0
  51. rucio/common/schema/icecube.py +406 -0
  52. rucio/common/stomp_utils.py +159 -0
  53. rucio/common/stopwatch.py +55 -0
  54. rucio/common/test_rucio_server.py +148 -0
  55. rucio/common/types.py +403 -0
  56. rucio/common/utils.py +2238 -0
  57. rucio/core/__init__.py +13 -0
  58. rucio/core/account.py +496 -0
  59. rucio/core/account_counter.py +236 -0
  60. rucio/core/account_limit.py +423 -0
  61. rucio/core/authentication.py +620 -0
  62. rucio/core/config.py +456 -0
  63. rucio/core/credential.py +225 -0
  64. rucio/core/did.py +3000 -0
  65. rucio/core/did_meta_plugins/__init__.py +252 -0
  66. rucio/core/did_meta_plugins/did_column_meta.py +331 -0
  67. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
  68. rucio/core/did_meta_plugins/filter_engine.py +613 -0
  69. rucio/core/did_meta_plugins/json_meta.py +240 -0
  70. rucio/core/did_meta_plugins/mongo_meta.py +216 -0
  71. rucio/core/did_meta_plugins/postgres_meta.py +316 -0
  72. rucio/core/dirac.py +237 -0
  73. rucio/core/distance.py +187 -0
  74. rucio/core/exporter.py +59 -0
  75. rucio/core/heartbeat.py +363 -0
  76. rucio/core/identity.py +300 -0
  77. rucio/core/importer.py +259 -0
  78. rucio/core/lifetime_exception.py +377 -0
  79. rucio/core/lock.py +576 -0
  80. rucio/core/message.py +282 -0
  81. rucio/core/meta_conventions.py +203 -0
  82. rucio/core/monitor.py +447 -0
  83. rucio/core/naming_convention.py +195 -0
  84. rucio/core/nongrid_trace.py +136 -0
  85. rucio/core/oidc.py +1461 -0
  86. rucio/core/permission/__init__.py +119 -0
  87. rucio/core/permission/atlas.py +1348 -0
  88. rucio/core/permission/belleii.py +1077 -0
  89. rucio/core/permission/escape.py +1078 -0
  90. rucio/core/permission/generic.py +1130 -0
  91. rucio/core/permission/generic_multi_vo.py +1150 -0
  92. rucio/core/quarantined_replica.py +223 -0
  93. rucio/core/replica.py +4158 -0
  94. rucio/core/replica_sorter.py +366 -0
  95. rucio/core/request.py +3089 -0
  96. rucio/core/rse.py +1875 -0
  97. rucio/core/rse_counter.py +186 -0
  98. rucio/core/rse_expression_parser.py +459 -0
  99. rucio/core/rse_selector.py +302 -0
  100. rucio/core/rule.py +4483 -0
  101. rucio/core/rule_grouping.py +1618 -0
  102. rucio/core/scope.py +180 -0
  103. rucio/core/subscription.py +364 -0
  104. rucio/core/topology.py +490 -0
  105. rucio/core/trace.py +375 -0
  106. rucio/core/transfer.py +1517 -0
  107. rucio/core/vo.py +169 -0
  108. rucio/core/volatile_replica.py +150 -0
  109. rucio/daemons/__init__.py +13 -0
  110. rucio/daemons/abacus/__init__.py +13 -0
  111. rucio/daemons/abacus/account.py +116 -0
  112. rucio/daemons/abacus/collection_replica.py +124 -0
  113. rucio/daemons/abacus/rse.py +117 -0
  114. rucio/daemons/atropos/__init__.py +13 -0
  115. rucio/daemons/atropos/atropos.py +242 -0
  116. rucio/daemons/auditor/__init__.py +289 -0
  117. rucio/daemons/auditor/hdfs.py +97 -0
  118. rucio/daemons/auditor/srmdumps.py +355 -0
  119. rucio/daemons/automatix/__init__.py +13 -0
  120. rucio/daemons/automatix/automatix.py +293 -0
  121. rucio/daemons/badreplicas/__init__.py +13 -0
  122. rucio/daemons/badreplicas/minos.py +322 -0
  123. rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
  124. rucio/daemons/badreplicas/necromancer.py +196 -0
  125. rucio/daemons/bb8/__init__.py +13 -0
  126. rucio/daemons/bb8/bb8.py +353 -0
  127. rucio/daemons/bb8/common.py +759 -0
  128. rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
  129. rucio/daemons/bb8/t2_background_rebalance.py +153 -0
  130. rucio/daemons/c3po/__init__.py +13 -0
  131. rucio/daemons/c3po/algorithms/__init__.py +13 -0
  132. rucio/daemons/c3po/algorithms/simple.py +134 -0
  133. rucio/daemons/c3po/algorithms/t2_free_space.py +128 -0
  134. rucio/daemons/c3po/algorithms/t2_free_space_only_pop.py +130 -0
  135. rucio/daemons/c3po/algorithms/t2_free_space_only_pop_with_network.py +294 -0
  136. rucio/daemons/c3po/c3po.py +371 -0
  137. rucio/daemons/c3po/collectors/__init__.py +13 -0
  138. rucio/daemons/c3po/collectors/agis.py +108 -0
  139. rucio/daemons/c3po/collectors/free_space.py +81 -0
  140. rucio/daemons/c3po/collectors/jedi_did.py +57 -0
  141. rucio/daemons/c3po/collectors/mock_did.py +51 -0
  142. rucio/daemons/c3po/collectors/network_metrics.py +71 -0
  143. rucio/daemons/c3po/collectors/workload.py +112 -0
  144. rucio/daemons/c3po/utils/__init__.py +13 -0
  145. rucio/daemons/c3po/utils/dataset_cache.py +50 -0
  146. rucio/daemons/c3po/utils/expiring_dataset_cache.py +56 -0
  147. rucio/daemons/c3po/utils/expiring_list.py +62 -0
  148. rucio/daemons/c3po/utils/popularity.py +85 -0
  149. rucio/daemons/c3po/utils/timeseries.py +89 -0
  150. rucio/daemons/cache/__init__.py +13 -0
  151. rucio/daemons/cache/consumer.py +197 -0
  152. rucio/daemons/common.py +415 -0
  153. rucio/daemons/conveyor/__init__.py +13 -0
  154. rucio/daemons/conveyor/common.py +562 -0
  155. rucio/daemons/conveyor/finisher.py +529 -0
  156. rucio/daemons/conveyor/poller.py +404 -0
  157. rucio/daemons/conveyor/preparer.py +205 -0
  158. rucio/daemons/conveyor/receiver.py +249 -0
  159. rucio/daemons/conveyor/stager.py +132 -0
  160. rucio/daemons/conveyor/submitter.py +403 -0
  161. rucio/daemons/conveyor/throttler.py +532 -0
  162. rucio/daemons/follower/__init__.py +13 -0
  163. rucio/daemons/follower/follower.py +101 -0
  164. rucio/daemons/hermes/__init__.py +13 -0
  165. rucio/daemons/hermes/hermes.py +774 -0
  166. rucio/daemons/judge/__init__.py +13 -0
  167. rucio/daemons/judge/cleaner.py +159 -0
  168. rucio/daemons/judge/evaluator.py +185 -0
  169. rucio/daemons/judge/injector.py +162 -0
  170. rucio/daemons/judge/repairer.py +154 -0
  171. rucio/daemons/oauthmanager/__init__.py +13 -0
  172. rucio/daemons/oauthmanager/oauthmanager.py +198 -0
  173. rucio/daemons/reaper/__init__.py +13 -0
  174. rucio/daemons/reaper/dark_reaper.py +278 -0
  175. rucio/daemons/reaper/reaper.py +743 -0
  176. rucio/daemons/replicarecoverer/__init__.py +13 -0
  177. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
  178. rucio/daemons/rsedecommissioner/__init__.py +13 -0
  179. rucio/daemons/rsedecommissioner/config.py +81 -0
  180. rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
  181. rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
  182. rucio/daemons/rsedecommissioner/profiles/generic.py +451 -0
  183. rucio/daemons/rsedecommissioner/profiles/types.py +92 -0
  184. rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
  185. rucio/daemons/storage/__init__.py +13 -0
  186. rucio/daemons/storage/consistency/__init__.py +13 -0
  187. rucio/daemons/storage/consistency/actions.py +846 -0
  188. rucio/daemons/tracer/__init__.py +13 -0
  189. rucio/daemons/tracer/kronos.py +536 -0
  190. rucio/daemons/transmogrifier/__init__.py +13 -0
  191. rucio/daemons/transmogrifier/transmogrifier.py +762 -0
  192. rucio/daemons/undertaker/__init__.py +13 -0
  193. rucio/daemons/undertaker/undertaker.py +137 -0
  194. rucio/db/__init__.py +13 -0
  195. rucio/db/sqla/__init__.py +52 -0
  196. rucio/db/sqla/constants.py +201 -0
  197. rucio/db/sqla/migrate_repo/__init__.py +13 -0
  198. rucio/db/sqla/migrate_repo/env.py +110 -0
  199. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
  200. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
  201. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
  202. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
  203. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
  204. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
  205. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
  206. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
  207. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
  208. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
  209. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
  210. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
  211. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
  212. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
  213. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
  214. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
  215. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
  216. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
  217. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
  218. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
  219. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
  220. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
  221. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
  222. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
  223. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
  224. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
  225. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
  226. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
  227. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
  228. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
  229. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
  230. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
  231. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
  232. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
  233. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
  234. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
  235. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
  236. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
  237. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
  238. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
  239. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
  240. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
  241. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
  242. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
  243. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
  244. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
  245. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
  246. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
  247. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
  248. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
  249. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
  250. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
  251. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
  252. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
  253. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
  254. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
  255. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
  256. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
  257. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
  258. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
  259. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
  260. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
  261. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
  262. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
  263. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
  264. rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
  265. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
  266. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
  267. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
  268. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
  269. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
  270. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
  271. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
  272. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
  273. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
  274. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
  275. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
  276. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
  277. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
  278. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
  279. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
  280. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
  281. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
  282. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
  283. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
  284. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
  285. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
  286. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
  287. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
  288. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
  289. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
  290. rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
  291. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
  292. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
  293. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
  294. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
  295. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
  296. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
  297. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
  298. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
  299. rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
  300. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
  301. rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
  302. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
  303. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
  304. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
  305. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
  306. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
  307. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
  308. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
  309. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
  310. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
  311. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
  312. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
  313. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
  314. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
  315. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
  316. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
  317. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
  318. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
  319. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
  320. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
  321. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
  322. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
  323. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
  324. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
  325. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
  326. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
  327. rucio/db/sqla/models.py +1740 -0
  328. rucio/db/sqla/sautils.py +55 -0
  329. rucio/db/sqla/session.py +498 -0
  330. rucio/db/sqla/types.py +206 -0
  331. rucio/db/sqla/util.py +543 -0
  332. rucio/gateway/__init__.py +13 -0
  333. rucio/gateway/account.py +339 -0
  334. rucio/gateway/account_limit.py +286 -0
  335. rucio/gateway/authentication.py +375 -0
  336. rucio/gateway/config.py +217 -0
  337. rucio/gateway/credential.py +71 -0
  338. rucio/gateway/did.py +970 -0
  339. rucio/gateway/dirac.py +81 -0
  340. rucio/gateway/exporter.py +59 -0
  341. rucio/gateway/heartbeat.py +74 -0
  342. rucio/gateway/identity.py +204 -0
  343. rucio/gateway/importer.py +45 -0
  344. rucio/gateway/lifetime_exception.py +120 -0
  345. rucio/gateway/lock.py +153 -0
  346. rucio/gateway/meta_conventions.py +87 -0
  347. rucio/gateway/permission.py +71 -0
  348. rucio/gateway/quarantined_replica.py +78 -0
  349. rucio/gateway/replica.py +529 -0
  350. rucio/gateway/request.py +321 -0
  351. rucio/gateway/rse.py +600 -0
  352. rucio/gateway/rule.py +417 -0
  353. rucio/gateway/scope.py +99 -0
  354. rucio/gateway/subscription.py +277 -0
  355. rucio/gateway/vo.py +122 -0
  356. rucio/rse/__init__.py +96 -0
  357. rucio/rse/protocols/__init__.py +13 -0
  358. rucio/rse/protocols/bittorrent.py +184 -0
  359. rucio/rse/protocols/cache.py +122 -0
  360. rucio/rse/protocols/dummy.py +111 -0
  361. rucio/rse/protocols/gfal.py +703 -0
  362. rucio/rse/protocols/globus.py +243 -0
  363. rucio/rse/protocols/gsiftp.py +92 -0
  364. rucio/rse/protocols/http_cache.py +82 -0
  365. rucio/rse/protocols/mock.py +123 -0
  366. rucio/rse/protocols/ngarc.py +209 -0
  367. rucio/rse/protocols/posix.py +250 -0
  368. rucio/rse/protocols/protocol.py +594 -0
  369. rucio/rse/protocols/rclone.py +364 -0
  370. rucio/rse/protocols/rfio.py +136 -0
  371. rucio/rse/protocols/srm.py +338 -0
  372. rucio/rse/protocols/ssh.py +413 -0
  373. rucio/rse/protocols/storm.py +206 -0
  374. rucio/rse/protocols/webdav.py +550 -0
  375. rucio/rse/protocols/xrootd.py +301 -0
  376. rucio/rse/rsemanager.py +764 -0
  377. rucio/tests/__init__.py +13 -0
  378. rucio/tests/common.py +270 -0
  379. rucio/tests/common_server.py +132 -0
  380. rucio/transfertool/__init__.py +13 -0
  381. rucio/transfertool/bittorrent.py +199 -0
  382. rucio/transfertool/bittorrent_driver.py +52 -0
  383. rucio/transfertool/bittorrent_driver_qbittorrent.py +133 -0
  384. rucio/transfertool/fts3.py +1596 -0
  385. rucio/transfertool/fts3_plugins.py +152 -0
  386. rucio/transfertool/globus.py +201 -0
  387. rucio/transfertool/globus_library.py +181 -0
  388. rucio/transfertool/mock.py +90 -0
  389. rucio/transfertool/transfertool.py +221 -0
  390. rucio/vcsversion.py +11 -0
  391. rucio/version.py +38 -0
  392. rucio/web/__init__.py +13 -0
  393. rucio/web/rest/__init__.py +13 -0
  394. rucio/web/rest/flaskapi/__init__.py +13 -0
  395. rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
  396. rucio/web/rest/flaskapi/v1/__init__.py +13 -0
  397. rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
  398. rucio/web/rest/flaskapi/v1/accounts.py +1089 -0
  399. rucio/web/rest/flaskapi/v1/archives.py +102 -0
  400. rucio/web/rest/flaskapi/v1/auth.py +1644 -0
  401. rucio/web/rest/flaskapi/v1/common.py +426 -0
  402. rucio/web/rest/flaskapi/v1/config.py +304 -0
  403. rucio/web/rest/flaskapi/v1/credentials.py +212 -0
  404. rucio/web/rest/flaskapi/v1/dids.py +2334 -0
  405. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  406. rucio/web/rest/flaskapi/v1/export.py +75 -0
  407. rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
  408. rucio/web/rest/flaskapi/v1/identities.py +261 -0
  409. rucio/web/rest/flaskapi/v1/import.py +132 -0
  410. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
  411. rucio/web/rest/flaskapi/v1/locks.py +358 -0
  412. rucio/web/rest/flaskapi/v1/main.py +91 -0
  413. rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
  414. rucio/web/rest/flaskapi/v1/metrics.py +36 -0
  415. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  416. rucio/web/rest/flaskapi/v1/ping.py +88 -0
  417. rucio/web/rest/flaskapi/v1/redirect.py +365 -0
  418. rucio/web/rest/flaskapi/v1/replicas.py +1890 -0
  419. rucio/web/rest/flaskapi/v1/requests.py +998 -0
  420. rucio/web/rest/flaskapi/v1/rses.py +2239 -0
  421. rucio/web/rest/flaskapi/v1/rules.py +854 -0
  422. rucio/web/rest/flaskapi/v1/scopes.py +159 -0
  423. rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
  424. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  425. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  426. rucio/web/rest/flaskapi/v1/traces.py +100 -0
  427. rucio/web/rest/flaskapi/v1/types.py +20 -0
  428. rucio/web/rest/flaskapi/v1/vos.py +278 -0
  429. rucio/web/rest/main.py +18 -0
  430. rucio/web/rest/metrics.py +27 -0
  431. rucio/web/rest/ping.py +27 -0
  432. rucio-35.7.0.data/data/rucio/etc/alembic.ini.template +71 -0
  433. rucio-35.7.0.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  434. rucio-35.7.0.data/data/rucio/etc/globus-config.yml.template +5 -0
  435. rucio-35.7.0.data/data/rucio/etc/ldap.cfg.template +30 -0
  436. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  437. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  438. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  439. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  440. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  441. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  442. rucio-35.7.0.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  443. rucio-35.7.0.data/data/rucio/etc/rucio.cfg.atlas.client.template +42 -0
  444. rucio-35.7.0.data/data/rucio/etc/rucio.cfg.template +257 -0
  445. rucio-35.7.0.data/data/rucio/etc/rucio_multi_vo.cfg.template +234 -0
  446. rucio-35.7.0.data/data/rucio/requirements.server.txt +268 -0
  447. rucio-35.7.0.data/data/rucio/tools/bootstrap.py +34 -0
  448. rucio-35.7.0.data/data/rucio/tools/merge_rucio_configs.py +144 -0
  449. rucio-35.7.0.data/data/rucio/tools/reset_database.py +40 -0
  450. rucio-35.7.0.data/scripts/rucio +2542 -0
  451. rucio-35.7.0.data/scripts/rucio-abacus-account +74 -0
  452. rucio-35.7.0.data/scripts/rucio-abacus-collection-replica +46 -0
  453. rucio-35.7.0.data/scripts/rucio-abacus-rse +78 -0
  454. rucio-35.7.0.data/scripts/rucio-admin +2447 -0
  455. rucio-35.7.0.data/scripts/rucio-atropos +60 -0
  456. rucio-35.7.0.data/scripts/rucio-auditor +205 -0
  457. rucio-35.7.0.data/scripts/rucio-automatix +50 -0
  458. rucio-35.7.0.data/scripts/rucio-bb8 +57 -0
  459. rucio-35.7.0.data/scripts/rucio-c3po +85 -0
  460. rucio-35.7.0.data/scripts/rucio-cache-client +134 -0
  461. rucio-35.7.0.data/scripts/rucio-cache-consumer +42 -0
  462. rucio-35.7.0.data/scripts/rucio-conveyor-finisher +58 -0
  463. rucio-35.7.0.data/scripts/rucio-conveyor-poller +66 -0
  464. rucio-35.7.0.data/scripts/rucio-conveyor-preparer +37 -0
  465. rucio-35.7.0.data/scripts/rucio-conveyor-receiver +43 -0
  466. rucio-35.7.0.data/scripts/rucio-conveyor-stager +76 -0
  467. rucio-35.7.0.data/scripts/rucio-conveyor-submitter +139 -0
  468. rucio-35.7.0.data/scripts/rucio-conveyor-throttler +104 -0
  469. rucio-35.7.0.data/scripts/rucio-dark-reaper +53 -0
  470. rucio-35.7.0.data/scripts/rucio-dumper +160 -0
  471. rucio-35.7.0.data/scripts/rucio-follower +44 -0
  472. rucio-35.7.0.data/scripts/rucio-hermes +54 -0
  473. rucio-35.7.0.data/scripts/rucio-judge-cleaner +89 -0
  474. rucio-35.7.0.data/scripts/rucio-judge-evaluator +137 -0
  475. rucio-35.7.0.data/scripts/rucio-judge-injector +44 -0
  476. rucio-35.7.0.data/scripts/rucio-judge-repairer +44 -0
  477. rucio-35.7.0.data/scripts/rucio-kronos +43 -0
  478. rucio-35.7.0.data/scripts/rucio-minos +53 -0
  479. rucio-35.7.0.data/scripts/rucio-minos-temporary-expiration +50 -0
  480. rucio-35.7.0.data/scripts/rucio-necromancer +120 -0
  481. rucio-35.7.0.data/scripts/rucio-oauth-manager +63 -0
  482. rucio-35.7.0.data/scripts/rucio-reaper +83 -0
  483. rucio-35.7.0.data/scripts/rucio-replica-recoverer +248 -0
  484. rucio-35.7.0.data/scripts/rucio-rse-decommissioner +66 -0
  485. rucio-35.7.0.data/scripts/rucio-storage-consistency-actions +74 -0
  486. rucio-35.7.0.data/scripts/rucio-transmogrifier +77 -0
  487. rucio-35.7.0.data/scripts/rucio-undertaker +76 -0
  488. rucio-35.7.0.dist-info/METADATA +72 -0
  489. rucio-35.7.0.dist-info/RECORD +493 -0
  490. rucio-35.7.0.dist-info/WHEEL +5 -0
  491. rucio-35.7.0.dist-info/licenses/AUTHORS.rst +97 -0
  492. rucio-35.7.0.dist-info/licenses/LICENSE +201 -0
  493. rucio-35.7.0.dist-info/top_level.txt +1 -0
rucio/core/request.py ADDED
@@ -0,0 +1,3089 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import datetime
16
+ import itertools
17
+ import json
18
+ import logging
19
+ import math
20
+ import random
21
+ import threading
22
+ import traceback
23
+ import uuid
24
+ from abc import ABCMeta, abstractmethod
25
+ from collections import defaultdict, namedtuple
26
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
27
+ from dataclasses import dataclass
28
+ from typing import TYPE_CHECKING, Any, Optional, Union
29
+
30
+ from sqlalchemy import and_, delete, exists, insert, or_, select, update
31
+ from sqlalchemy.exc import IntegrityError
32
+ from sqlalchemy.orm import aliased
33
+ from sqlalchemy.sql.expression import asc, false, func, null, true
34
+ from sqlalchemy.sql.functions import coalesce
35
+
36
+ from rucio.common.config import config_get_bool, config_get_int
37
+ from rucio.common.constants import RseAttr
38
+ from rucio.common.exception import InvalidRSEExpression, RequestNotFound, RucioException, UnsupportedOperation
39
+ from rucio.common.types import FilterDict, InternalAccount, InternalScope, LoggerFunction, RequestDict
40
+ from rucio.common.utils import chunks, generate_uuid
41
+ from rucio.core.distance import get_distances
42
+ from rucio.core.message import add_message, add_messages
43
+ from rucio.core.monitor import MetricManager
44
+ from rucio.core.rse import RseCollection, RseData, get_rse_attribute, get_rse_name, get_rse_vo
45
+ from rucio.core.rse_expression_parser import parse_expression
46
+ from rucio.db.sqla import filter_thread_work, models
47
+ from rucio.db.sqla.constants import LockState, ReplicaState, RequestErrMsg, RequestState, RequestType, TransferLimitDirection
48
+ from rucio.db.sqla.session import read_session, stream_session, transactional_session
49
+ from rucio.db.sqla.util import temp_table_mngr
50
+
51
+ RequestAndState = namedtuple('RequestAndState', ['request_id', 'request_state'])
52
+
53
+ if TYPE_CHECKING:
54
+
55
+ from sqlalchemy.engine import Row
56
+ from sqlalchemy.orm import Session
57
+ from sqlalchemy.sql.selectable import Subquery
58
+
59
+ from rucio.rse.protocols.protocol import RSEProtocol
60
+
61
+ """
62
+ The core request.py is specifically for handling requests.
63
+ Requests accessed by external_id (So called transfers), are covered in the core transfer.py
64
+ """
65
+
66
+ METRICS = MetricManager(module=__name__)
67
+
68
+ TRANSFER_TIME_BUCKETS = (
69
+ 10, 30, 60, 5 * 60, 10 * 60, 20 * 60, 40 * 60, 60 * 60, 1.5 * 60 * 60, 3 * 60 * 60, 6 * 60 * 60,
70
+ 12 * 60 * 60, 24 * 60 * 60, 3 * 24 * 60 * 60, 4 * 24 * 60 * 60, 5 * 24 * 60 * 60,
71
+ 6 * 24 * 60 * 60, 7 * 24 * 60 * 60, 10 * 24 * 60 * 60, 14 * 24 * 60 * 60, 30 * 24 * 60 * 60,
72
+ float('inf')
73
+ )
74
+
75
+
76
+ class RequestSource:
77
+ def __init__(
78
+ self,
79
+ rse: RseData,
80
+ ranking: Optional[int] = None,
81
+ distance: Optional[int] = None,
82
+ file_path: Optional[str] = None,
83
+ scheme: Optional[str] = None,
84
+ url: Optional[str] = None
85
+ ):
86
+ self.rse = rse
87
+ self.distance = distance if distance is not None else 9999
88
+ self.ranking = ranking if ranking is not None else 0
89
+ self.file_path = file_path
90
+ self.scheme = scheme
91
+ self.url = url
92
+
93
+ def __str__(self) -> str:
94
+ return "src_rse={}".format(self.rse)
95
+
96
+
97
+ class TransferDestination:
98
+ def __init__(
99
+ self,
100
+ rse: RseData,
101
+ scheme: str
102
+ ):
103
+ self.rse = rse
104
+ self.scheme = scheme
105
+
106
+ def __str__(self) -> str:
107
+ return "dst_rse={}".format(self.rse)
108
+
109
+
110
+ class RequestWithSources:
111
+ def __init__(
112
+ self,
113
+ id_: Optional[str],
114
+ request_type: RequestType,
115
+ rule_id: Optional[str],
116
+ scope: InternalScope,
117
+ name: str,
118
+ md5: str,
119
+ adler32: str,
120
+ byte_count: int,
121
+ activity: str,
122
+ attributes: Optional[Union[str, dict[str, Any]]],
123
+ previous_attempt_id: Optional[str],
124
+ dest_rse: RseData,
125
+ account: InternalAccount,
126
+ retry_count: int,
127
+ priority: int,
128
+ transfertool: str,
129
+ requested_at: Optional[datetime.datetime] = None,
130
+ ):
131
+ self.request_id = id_
132
+ self.request_type = request_type
133
+ self.rule_id = rule_id
134
+ self.scope = scope
135
+ self.name = name
136
+ self.md5 = md5
137
+ self.adler32 = adler32
138
+ self.byte_count = byte_count
139
+ self.activity = activity
140
+ self._dict_attributes = None
141
+ self._db_attributes = attributes
142
+ self.previous_attempt_id = previous_attempt_id
143
+ self.dest_rse = dest_rse
144
+ self.account = account
145
+ self.retry_count = retry_count or 0
146
+ self.priority = priority if priority is not None else 3
147
+ self.transfertool = transfertool
148
+ self.requested_at = requested_at if requested_at else datetime.datetime.utcnow()
149
+
150
+ self.sources: list[RequestSource] = []
151
+ self.requested_source: Optional[RequestSource] = None
152
+
153
+ def __str__(self) -> str:
154
+ return "{}({}:{})".format(self.request_id, self.scope, self.name)
155
+
156
+ @property
157
+ def attributes(self) -> dict[str, Any]:
158
+ if self._dict_attributes is None:
159
+ self._dict_attributes = self._parse_db_attributes(self._db_attributes)
160
+ return self._dict_attributes
161
+
162
+ @attributes.setter
163
+ def attributes(self, db_attributes: dict[str, Any]) -> None:
164
+ self._dict_attributes = self._parse_db_attributes(db_attributes)
165
+
166
+ @staticmethod
167
+ def _parse_db_attributes(db_attributes: Optional[Union[str, dict[str, Any]]]) -> dict[str, Any]:
168
+ attr = {}
169
+ if db_attributes:
170
+ if isinstance(db_attributes, dict):
171
+ attr = json.loads(json.dumps(db_attributes))
172
+ else:
173
+ attr = json.loads(str(db_attributes))
174
+ # parse source expression
175
+ attr['source_replica_expression'] = attr["source_replica_expression"] if (attr and "source_replica_expression" in attr) else None
176
+ attr['allow_tape_source'] = attr["allow_tape_source"] if (attr and "allow_tape_source" in attr) else True
177
+ attr['dsn'] = attr["ds_name"] if (attr and "ds_name" in attr) else None
178
+ attr['lifetime'] = attr.get('lifetime', -1)
179
+ return attr
180
+
181
+
182
+ class DirectTransfer(metaclass=ABCMeta):
183
+ """
184
+ The configuration for a direct (non-multi-hop) transfer. It can be a multi-source transfer.
185
+ """
186
+
187
+ def __init__(self, sources: list[RequestSource], rws: RequestWithSources) -> None:
188
+ self.sources: list[RequestSource] = sources
189
+ self.rws: RequestWithSources = rws
190
+
191
+ @property
192
+ @abstractmethod
193
+ def src(self) -> RequestSource:
194
+ pass
195
+
196
+ @property
197
+ @abstractmethod
198
+ def dst(self) -> TransferDestination:
199
+ pass
200
+
201
+ @property
202
+ @abstractmethod
203
+ def dest_url(self) -> str:
204
+ pass
205
+
206
+ @abstractmethod
207
+ def source_url(self, source: RequestSource) -> str:
208
+ pass
209
+
210
+ @abstractmethod
211
+ def dest_protocol(self) -> "RSEProtocol":
212
+ pass
213
+
214
+ @abstractmethod
215
+ def source_protocol(self, source: RequestSource) -> "RSEProtocol":
216
+ pass
217
+
218
+
219
+ def should_retry_request(
220
+ req: RequestDict,
221
+ retry_protocol_mismatches: bool
222
+ ) -> bool:
223
+ """
224
+ Whether should retry this request.
225
+
226
+ :param request: Request as a dictionary.
227
+ :param retry_protocol_mismatches: Boolean to retry the transfer in case of protocol mismatch.
228
+ :returns: True if should retry it; False if no more retry.
229
+ """
230
+ if is_intermediate_hop(req):
231
+ # This is an intermediate request in a multi-hop transfer. It must not be re-scheduled on its own.
232
+ # If needed, it will be re-scheduled via the creation of a new multi-hop transfer.
233
+ return False
234
+ if req['state'] == RequestState.SUBMITTING:
235
+ return True
236
+ if req['state'] == RequestState.NO_SOURCES or req['state'] == RequestState.ONLY_TAPE_SOURCES:
237
+ return False
238
+ # hardcoded for now - only requeue a couple of times
239
+ if req['retry_count'] is None or req['retry_count'] < 3:
240
+ if req['state'] == RequestState.MISMATCH_SCHEME:
241
+ return retry_protocol_mismatches
242
+ return True
243
+ return False
244
+
245
+
246
+ @METRICS.time_it
247
+ @transactional_session
248
+ def requeue_and_archive(
249
+ request: RequestDict,
250
+ source_ranking_update: bool = True,
251
+ retry_protocol_mismatches: bool = False,
252
+ *,
253
+ session: "Session",
254
+ logger: LoggerFunction = logging.log
255
+ ) -> Optional[RequestDict]:
256
+ """
257
+ Requeue and archive a failed request.
258
+ TODO: Multiple requeue.
259
+
260
+ :param request: Original request.
261
+ :param source_ranking_update Boolean. If True, the source ranking is decreased (making the sources less likely to be used)
262
+ :param session: Database session to use.
263
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
264
+ """
265
+
266
+ # Probably not needed anymore
267
+ request_id = request['request_id']
268
+ new_req = get_request(request_id, session=session)
269
+
270
+ if new_req:
271
+ new_req['sources'] = get_sources(request_id, session=session)
272
+ archive_request(request_id, session=session)
273
+
274
+ if should_retry_request(new_req, retry_protocol_mismatches):
275
+ new_req['request_id'] = generate_uuid()
276
+ new_req['previous_attempt_id'] = request_id
277
+ if new_req['retry_count'] is None:
278
+ new_req['retry_count'] = 1
279
+ elif new_req['state'] != RequestState.SUBMITTING:
280
+ new_req['retry_count'] += 1
281
+
282
+ if source_ranking_update and new_req['sources']:
283
+ for i in range(len(new_req['sources'])):
284
+ if new_req['sources'][i]['is_using']:
285
+ if new_req['sources'][i]['ranking'] is None:
286
+ new_req['sources'][i]['ranking'] = -1
287
+ else:
288
+ new_req['sources'][i]['ranking'] -= 1
289
+ new_req['sources'][i]['is_using'] = False
290
+ new_req.pop('state', None)
291
+ queue_requests([new_req], session=session, logger=logger)
292
+ return new_req
293
+ else:
294
+ raise RequestNotFound
295
+ return None
296
+
297
+
298
+ @METRICS.count_it
299
+ @transactional_session
300
+ def queue_requests(
301
+ requests: Iterable[RequestDict],
302
+ *,
303
+ session: "Session",
304
+ logger: LoggerFunction = logging.log
305
+ ) -> list[str]:
306
+ """
307
+ Submit transfer requests on destination RSEs for data identifiers.
308
+
309
+ :param requests: List of dictionaries containing request metadata.
310
+ :param session: Database session to use.
311
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
312
+ :returns: List of Request-IDs as 32 character hex strings.
313
+ """
314
+ logger(logging.DEBUG, "queue requests")
315
+
316
+ request_clause = []
317
+ rses = {}
318
+ preparer_enabled = config_get_bool('conveyor', 'use_preparer', raise_exception=False, default=False)
319
+ for req in requests:
320
+
321
+ if isinstance(req['attributes'], str):
322
+ req['attributes'] = json.loads(req['attributes'] or '{}')
323
+ if isinstance(req['attributes'], str):
324
+ req['attributes'] = json.loads(req['attributes'] or '{}')
325
+
326
+ if req['request_type'] == RequestType.TRANSFER:
327
+ request_clause.append(and_(models.Request.scope == req['scope'],
328
+ models.Request.name == req['name'],
329
+ models.Request.dest_rse_id == req['dest_rse_id'],
330
+ models.Request.request_type == RequestType.TRANSFER))
331
+
332
+ if req['dest_rse_id'] not in rses:
333
+ rses[req['dest_rse_id']] = get_rse_name(req['dest_rse_id'], session=session)
334
+
335
+ # Check existing requests
336
+ existing_requests = []
337
+ if request_clause:
338
+ for requests_condition in chunks(request_clause, 1000):
339
+ stmt = select(
340
+ models.Request.scope,
341
+ models.Request.name,
342
+ models.Request.dest_rse_id
343
+ ).with_hint(
344
+ models.Request,
345
+ 'INDEX(REQUESTS REQUESTS_SC_NA_RS_TY_UQ_IDX)',
346
+ 'oracle'
347
+ ).where(
348
+ or_(*requests_condition)
349
+ )
350
+ existing_requests.extend(session.execute(stmt))
351
+
352
+ new_requests, sources, messages = [], [], []
353
+ for request in requests:
354
+ dest_rse_name = get_rse_name(rse_id=request['dest_rse_id'], session=session)
355
+ if request['request_type'] == RequestType.TRANSFER and (request['scope'], request['name'], request['dest_rse_id']) in existing_requests:
356
+ logger(logging.WARNING, 'Request TYPE %s for DID %s:%s at RSE %s exists - ignoring' % (request['request_type'],
357
+ request['scope'],
358
+ request['name'],
359
+ dest_rse_name))
360
+ continue
361
+
362
+ def temp_serializer(obj):
363
+ if isinstance(obj, (InternalAccount, InternalScope)):
364
+ return obj.internal
365
+ raise TypeError('Could not serialise object %r' % obj)
366
+
367
+ if 'state' not in request:
368
+ request['state'] = RequestState.PREPARING if preparer_enabled else RequestState.QUEUED
369
+
370
+ new_request = {'request_type': request['request_type'],
371
+ 'scope': request['scope'],
372
+ 'name': request['name'],
373
+ 'dest_rse_id': request['dest_rse_id'],
374
+ 'source_rse_id': request.get('source_rse_id', None),
375
+ 'attributes': json.dumps(request['attributes'], default=temp_serializer),
376
+ 'state': request['state'],
377
+ 'rule_id': request['rule_id'],
378
+ 'activity': request['attributes']['activity'],
379
+ 'bytes': request['attributes']['bytes'],
380
+ 'md5': request['attributes']['md5'],
381
+ 'adler32': request['attributes']['adler32'],
382
+ 'account': request.get('account', None),
383
+ 'priority': request['attributes'].get('priority', None),
384
+ 'requested_at': request.get('requested_at', None),
385
+ 'retry_count': request['retry_count']}
386
+ if 'transfertool' in request:
387
+ new_request['transfertool'] = request['transfertool']
388
+ if 'previous_attempt_id' in request and 'retry_count' in request:
389
+ new_request['previous_attempt_id'] = request['previous_attempt_id']
390
+ new_request['id'] = request['request_id']
391
+ else:
392
+ new_request['id'] = generate_uuid()
393
+ new_requests.append(new_request)
394
+
395
+ if 'sources' in request and request['sources']:
396
+ for source in request['sources']:
397
+ sources.append({'request_id': new_request['id'],
398
+ 'scope': request['scope'],
399
+ 'name': request['name'],
400
+ 'rse_id': source['rse_id'],
401
+ 'dest_rse_id': request['dest_rse_id'],
402
+ 'ranking': source['ranking'],
403
+ 'bytes': source['bytes'],
404
+ 'url': source['url'],
405
+ 'is_using': source['is_using']})
406
+
407
+ if request['request_type']:
408
+ transfer_status = '%s-%s' % (request['request_type'].name, request['state'].name)
409
+ else:
410
+ transfer_status = 'transfer-%s' % request['state'].name
411
+ transfer_status = transfer_status.lower()
412
+
413
+ payload = {'request-id': new_request['id'],
414
+ 'request-type': request['request_type'].name.lower(),
415
+ 'scope': request['scope'].external,
416
+ 'name': request['name'],
417
+ 'dst-rse-id': request['dest_rse_id'],
418
+ 'dst-rse': dest_rse_name,
419
+ 'state': request['state'].name.lower(),
420
+ 'retry-count': request['retry_count'],
421
+ 'rule-id': str(request['rule_id']),
422
+ 'activity': request['attributes']['activity'],
423
+ 'file-size': request['attributes']['bytes'],
424
+ 'bytes': request['attributes']['bytes'],
425
+ 'checksum-md5': request['attributes']['md5'],
426
+ 'checksum-adler': request['attributes']['adler32'],
427
+ 'queued_at': str(datetime.datetime.utcnow())}
428
+
429
+ messages.append({'event_type': transfer_status,
430
+ 'payload': payload})
431
+
432
+ for requests_chunk in chunks(new_requests, 1000):
433
+ stmt = insert(
434
+ models.Request
435
+ )
436
+ session.execute(stmt, requests_chunk)
437
+
438
+ for sources_chunk in chunks(sources, 1000):
439
+ stmt = insert(
440
+ models.Source
441
+ )
442
+ session.execute(stmt, sources_chunk)
443
+
444
+ add_messages(messages, session=session)
445
+
446
+ return new_requests
447
+
448
+
449
+ @transactional_session
450
+ def list_and_mark_transfer_requests_and_source_replicas(
451
+ rse_collection: "RseCollection",
452
+ processed_by: Optional[str] = None,
453
+ processed_at_delay: int = 600,
454
+ total_workers: int = 0,
455
+ worker_number: int = 0,
456
+ partition_hash_var: Optional[str] = None,
457
+ limit: Optional[int] = None,
458
+ activity: Optional[str] = None,
459
+ older_than: Optional[datetime.datetime] = None,
460
+ rses: Optional[Sequence[str]] = None,
461
+ request_type: Optional[list[RequestType]] = None,
462
+ request_state: Optional[RequestState] = None,
463
+ required_source_rse_attrs: Optional[list[str]] = None,
464
+ ignore_availability: bool = False,
465
+ transfertool: Optional[str] = None,
466
+ *,
467
+ session: "Session",
468
+ ) -> dict[str, RequestWithSources]:
469
+ """
470
+ List requests with source replicas
471
+ :param rse_collection: the RSE collection being used
472
+ :param processed_by: the daemon/executable running this query
473
+ :param processed_at_delay: how many second to ignore a request if it's already being processed by the same daemon
474
+ :param total_workers: Number of total workers.
475
+ :param worker_number: Id of the executing worker.
476
+ :param partition_hash_var: The hash variable used for partitioning thread work
477
+ :param limit: Integer of requests to retrieve.
478
+ :param activity: Activity to be selected.
479
+ :param older_than: Only select requests older than this DateTime.
480
+ :param rses: List of rse_id to select requests.
481
+ :param request_type: Filter on the given request type.
482
+ :param request_state: Filter on the given request state
483
+ :param transfertool: The transfer tool as specified in rucio.cfg.
484
+ :param required_source_rse_attrs: Only select source RSEs having these attributes set
485
+ :param ignore_availability: Ignore blocklisted RSEs
486
+ :param session: Database session to use.
487
+ :returns: List of RequestWithSources objects.
488
+ """
489
+
490
+ if partition_hash_var is None:
491
+ partition_hash_var = 'requests.id'
492
+ if request_state is None:
493
+ request_state = RequestState.QUEUED
494
+ if request_type is None:
495
+ request_type = [RequestType.TRANSFER]
496
+
497
+ now = datetime.datetime.utcnow()
498
+
499
+ sub_requests = select(
500
+ models.Request.id,
501
+ models.Request.request_type,
502
+ models.Request.rule_id,
503
+ models.Request.scope,
504
+ models.Request.name,
505
+ models.Request.md5,
506
+ models.Request.adler32,
507
+ models.Request.bytes,
508
+ models.Request.activity,
509
+ models.Request.attributes,
510
+ models.Request.previous_attempt_id,
511
+ models.Request.source_rse_id,
512
+ models.Request.dest_rse_id,
513
+ models.Request.retry_count,
514
+ models.Request.account,
515
+ models.Request.created_at,
516
+ models.Request.requested_at,
517
+ models.Request.priority,
518
+ models.Request.transfertool
519
+ ).with_hint(
520
+ models.Request,
521
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
522
+ 'oracle'
523
+ ).where(
524
+ and_(models.Request.state == request_state,
525
+ models.Request.request_type.in_(request_type))
526
+ ).outerjoin(
527
+ models.ReplicationRule,
528
+ models.Request.rule_id == models.ReplicationRule.id
529
+ ).where(
530
+ coalesce(models.ReplicationRule.expires_at, now) >= now
531
+ ).join(
532
+ models.RSE,
533
+ models.RSE.id == models.Request.dest_rse_id
534
+ ).where(
535
+ models.RSE.deleted == false()
536
+ ).outerjoin(
537
+ models.TransferHop,
538
+ models.TransferHop.next_hop_request_id == models.Request.id
539
+ ).where(
540
+ models.TransferHop.next_hop_request_id == null()
541
+ ).order_by(
542
+ models.Request.created_at
543
+ )
544
+
545
+ if processed_by:
546
+ sub_requests = sub_requests.where(
547
+ or_(models.Request.last_processed_by.is_(null()),
548
+ models.Request.last_processed_by != processed_by,
549
+ models.Request.last_processed_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=processed_at_delay))
550
+ )
551
+
552
+ if not ignore_availability:
553
+ sub_requests = sub_requests.where(models.RSE.availability_write == true())
554
+
555
+ if isinstance(older_than, datetime.datetime):
556
+ sub_requests = sub_requests.where(models.Request.requested_at < older_than)
557
+
558
+ if activity:
559
+ sub_requests = sub_requests.where(models.Request.activity == activity)
560
+
561
+ # if a transfertool is specified make sure to filter for those requests and apply related index
562
+ if transfertool:
563
+ sub_requests = sub_requests.where(models.Request.transfertool == transfertool)
564
+ sub_requests = sub_requests.with_hint(models.Request, 'INDEX(REQUESTS REQUESTS_TYP_STA_TRA_ACT_IDX)', 'oracle')
565
+ else:
566
+ sub_requests = sub_requests.with_hint(models.Request, 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)', 'oracle')
567
+
568
+ if rses:
569
+ temp_table_cls = temp_table_mngr(session).create_id_table()
570
+
571
+ values = [{'id': rse_id} for rse_id in rses]
572
+ stmt = insert(
573
+ temp_table_cls
574
+ )
575
+ session.execute(stmt, values)
576
+
577
+ sub_requests = sub_requests.join(temp_table_cls, temp_table_cls.id == models.RSE.id)
578
+
579
+ sub_requests = filter_thread_work(session=session, query=sub_requests, total_threads=total_workers, thread_id=worker_number, hash_variable=partition_hash_var)
580
+
581
+ if limit:
582
+ sub_requests = sub_requests.limit(limit)
583
+
584
+ sub_requests = sub_requests.subquery()
585
+
586
+ stmt = select(
587
+ sub_requests.c.id,
588
+ sub_requests.c.request_type,
589
+ sub_requests.c.rule_id,
590
+ sub_requests.c.scope,
591
+ sub_requests.c.name,
592
+ sub_requests.c.md5,
593
+ sub_requests.c.adler32,
594
+ sub_requests.c.bytes,
595
+ sub_requests.c.activity,
596
+ sub_requests.c.attributes,
597
+ sub_requests.c.previous_attempt_id,
598
+ sub_requests.c.source_rse_id,
599
+ sub_requests.c.dest_rse_id,
600
+ sub_requests.c.account,
601
+ sub_requests.c.retry_count,
602
+ sub_requests.c.priority,
603
+ sub_requests.c.transfertool,
604
+ sub_requests.c.requested_at,
605
+ models.RSE.id.label("replica_rse_id"),
606
+ models.RSE.rse.label("replica_rse_name"),
607
+ models.RSEFileAssociation.path,
608
+ models.Source.ranking.label("source_ranking"),
609
+ models.Source.url.label("source_url"),
610
+ models.Distance.distance
611
+ ).order_by(
612
+ sub_requests.c.created_at
613
+ ).outerjoin(
614
+ models.RSEFileAssociation,
615
+ and_(sub_requests.c.scope == models.RSEFileAssociation.scope,
616
+ sub_requests.c.name == models.RSEFileAssociation.name,
617
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
618
+ sub_requests.c.dest_rse_id != models.RSEFileAssociation.rse_id)
619
+ ).with_hint(
620
+ models.RSEFileAssociation,
621
+ 'INDEX(REPLICAS REPLICAS_PK)',
622
+ 'oracle'
623
+ ).outerjoin(
624
+ models.RSE,
625
+ and_(models.RSE.id == models.RSEFileAssociation.rse_id,
626
+ models.RSE.deleted == false())
627
+ ).outerjoin(
628
+ models.Source,
629
+ and_(sub_requests.c.id == models.Source.request_id,
630
+ models.RSE.id == models.Source.rse_id)
631
+ ).with_hint(
632
+ models.Source,
633
+ 'INDEX(SOURCES SOURCES_PK)',
634
+ 'oracle'
635
+ ).outerjoin(
636
+ models.Distance,
637
+ and_(sub_requests.c.dest_rse_id == models.Distance.dest_rse_id,
638
+ models.RSEFileAssociation.rse_id == models.Distance.src_rse_id)
639
+ ).with_hint(
640
+ models.Distance,
641
+ 'INDEX(DISTANCES DISTANCES_PK)',
642
+ 'oracle'
643
+ )
644
+
645
+ for attribute in required_source_rse_attrs or ():
646
+ rse_attr_alias = aliased(models.RSEAttrAssociation)
647
+ stmt = stmt.where(
648
+ exists(
649
+ select(
650
+ 1
651
+ ).where(
652
+ and_(rse_attr_alias.rse_id == models.RSE.id,
653
+ rse_attr_alias.key == attribute)
654
+ )
655
+ )
656
+ )
657
+
658
+ requests_by_id = {}
659
+ for (request_id, req_type, rule_id, scope, name, md5, adler32, byte_count, activity, attributes, previous_attempt_id, source_rse_id, dest_rse_id, account, retry_count,
660
+ priority, transfertool, requested_at, replica_rse_id, replica_rse_name, file_path, source_ranking, source_url, distance) in session.execute(stmt):
661
+
662
+ request = requests_by_id.get(request_id)
663
+ if not request:
664
+ request = RequestWithSources(id_=request_id, request_type=req_type, rule_id=rule_id, scope=scope, name=name,
665
+ md5=md5, adler32=adler32, byte_count=byte_count, activity=activity, attributes=attributes,
666
+ previous_attempt_id=previous_attempt_id, dest_rse=rse_collection[dest_rse_id],
667
+ account=account, retry_count=retry_count, priority=priority, transfertool=transfertool,
668
+ requested_at=requested_at)
669
+ requests_by_id[request_id] = request
670
+ # if STAGEIN and destination RSE is QoS make sure the source is included
671
+ if request.request_type == RequestType.STAGEIN and get_rse_attribute(rse_id=dest_rse_id, key=RseAttr.STAGING_REQUIRED, session=session):
672
+ source = RequestSource(rse=rse_collection[dest_rse_id])
673
+ request.sources.append(source)
674
+
675
+ if replica_rse_id is not None:
676
+ replica_rse = rse_collection[replica_rse_id]
677
+ replica_rse.name = replica_rse_name
678
+ source = RequestSource(rse=replica_rse, file_path=file_path,
679
+ ranking=source_ranking, distance=distance, url=source_url)
680
+ request.sources.append(source)
681
+ if source_rse_id == replica_rse_id:
682
+ request.requested_source = source
683
+
684
+ if processed_by:
685
+ for chunk in chunks(requests_by_id, 100):
686
+ stmt = update(
687
+ models.Request
688
+ ).where(
689
+ models.Request.id.in_(chunk)
690
+ ).execution_options(
691
+ synchronize_session=False
692
+ ).values({
693
+ models.Request.last_processed_by: processed_by,
694
+ models.Request.last_processed_at: datetime.datetime.now()
695
+ })
696
+ session.execute(stmt)
697
+
698
+ return requests_by_id
699
+
700
+
701
+ @read_session
702
+ def fetch_paths(
703
+ request_id: str,
704
+ *,
705
+ session: "Session"
706
+ ) -> dict[str, list[str]]:
707
+ """
708
+ Find the paths for which the provided request is a constituent hop.
709
+
710
+ Returns a dict: {initial_request_id1: path1, ...}. Each path is an ordered list of request_ids.
711
+ """
712
+ transfer_hop_alias = aliased(models.TransferHop)
713
+ stmt = select(
714
+ models.TransferHop,
715
+ ).join(
716
+ transfer_hop_alias,
717
+ and_(transfer_hop_alias.initial_request_id == models.TransferHop.initial_request_id,
718
+ or_(transfer_hop_alias.request_id == request_id,
719
+ transfer_hop_alias.initial_request_id == request_id))
720
+ )
721
+
722
+ parents_by_initial_request = {}
723
+ for hop, in session.execute(stmt):
724
+ parents_by_initial_request.setdefault(hop.initial_request_id, {})[hop.next_hop_request_id] = hop.request_id
725
+
726
+ paths = {}
727
+ for initial_request_id, parents in parents_by_initial_request.items():
728
+ path = []
729
+ cur_request = initial_request_id
730
+ path.append(cur_request)
731
+ while parents.get(cur_request):
732
+ cur_request = parents[cur_request]
733
+ path.append(cur_request)
734
+ paths[initial_request_id] = list(reversed(path))
735
+ return paths
736
+
737
+
738
+ @METRICS.time_it
739
+ @transactional_session
740
+ def get_and_mark_next(
741
+ rse_collection: "RseCollection",
742
+ request_type: Union[list[RequestType], RequestType],
743
+ state: Union[list[RequestState], RequestState],
744
+ processed_by: Optional[str] = None,
745
+ processed_at_delay: int = 600,
746
+ limit: int = 100,
747
+ older_than: "Optional[datetime.datetime]" = None,
748
+ rse_id: Optional[str] = None,
749
+ activity: Optional[str] = None,
750
+ total_workers: int = 0,
751
+ worker_number: int = 0,
752
+ mode_all: bool = False,
753
+ hash_variable: str = 'id',
754
+ activity_shares: Optional[dict[str, Any]] = None,
755
+ include_dependent: bool = True,
756
+ transfertool: Optional[str] = None,
757
+ *,
758
+ session: "Session"
759
+ ) -> list[dict[str, Any]]:
760
+ """
761
+ Retrieve the next requests matching the request type and state.
762
+ Workers are balanced via hashing to reduce concurrency on database.
763
+
764
+ :param rse_collection: the RSE collection being used
765
+ :param request_type: Type of the request as a string or list of strings.
766
+ :param state: State of the request as a string or list of strings.
767
+ :param processed_by: the daemon/executable running this query
768
+ :param processed_at_delay: how many second to ignore a request if it's already being processed by the same daemon
769
+ :param limit: Integer of requests to retrieve.
770
+ :param older_than: Only select requests older than this DateTime.
771
+ :param rse_id: The RSE to filter on.
772
+ :param activity: The activity to filter on.
773
+ :param total_workers: Number of total workers.
774
+ :param worker_number: Id of the executing worker.
775
+ :param mode_all: If set to True the function returns everything, if set to False returns list of dictionaries {'request_id': x, 'external_host': y, 'external_id': z}.
776
+ :param hash_variable: The variable to use to perform the partitioning. By default it uses the request id.
777
+ :param activity_shares: Activity shares dictionary, with number of requests
778
+ :param include_dependent: If true, includes transfers which have a previous hop dependency on other transfers
779
+ :param transfertool: The transfer tool as specified in rucio.cfg.
780
+ :param session: Database session to use.
781
+ :returns: Request as a dictionary.
782
+ """
783
+ request_type_metric_label = '.'.join(a.name for a in request_type) if isinstance(request_type, list) else request_type.name
784
+ state_metric_label = '.'.join(s.name for s in state) if isinstance(state, list) else state.name
785
+ METRICS.counter('get_next.requests.{request_type}.{state}').labels(request_type=request_type_metric_label, state=state_metric_label).inc()
786
+
787
+ # lists of one element are not allowed by SQLA, so just duplicate the item
788
+ if not isinstance(request_type, list):
789
+ request_type = [request_type, request_type]
790
+ elif len(request_type) == 1:
791
+ request_type = [request_type[0], request_type[0]]
792
+ if not isinstance(state, list):
793
+ state = [state, state]
794
+ elif len(state) == 1:
795
+ state = [state[0], state[0]]
796
+
797
+ result = []
798
+ if not activity_shares:
799
+ activity_shares = [None] # type: ignore
800
+
801
+ for share in activity_shares: # type: ignore
802
+
803
+ query = select(
804
+ models.Request.id
805
+ ).where(
806
+ and_(models.Request.state.in_(state),
807
+ models.Request.request_type.in_(request_type))
808
+ ).order_by(
809
+ asc(models.Request.updated_at)
810
+ )
811
+ if processed_by:
812
+ query = query.where(
813
+ or_(models.Request.last_processed_by.is_(null()),
814
+ models.Request.last_processed_by != processed_by,
815
+ models.Request.last_processed_at < datetime.datetime.utcnow() - datetime.timedelta(seconds=processed_at_delay))
816
+ )
817
+ if transfertool:
818
+ query = query.with_hint(
819
+ models.Request,
820
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_TRA_ACT_IDX)',
821
+ 'oracle'
822
+ ).where(
823
+ models.Request.transfertool == transfertool
824
+ )
825
+ else:
826
+ query = query.with_hint(
827
+ models.Request,
828
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
829
+ 'oracle'
830
+ )
831
+
832
+ if not include_dependent:
833
+ # filter out transfers which depend on some other "previous hop" requests.
834
+ # In particular, this is used to avoid multiple finishers trying to archive different
835
+ # transfers from the same path and thus having concurrent deletion of same rows from
836
+ # the transfer_hop table.
837
+ query = query.outerjoin(
838
+ models.TransferHop,
839
+ models.TransferHop.next_hop_request_id == models.Request.id
840
+ ).where(
841
+ models.TransferHop.next_hop_request_id == null()
842
+ )
843
+
844
+ if isinstance(older_than, datetime.datetime):
845
+ query = query.where(models.Request.updated_at < older_than)
846
+
847
+ if rse_id:
848
+ query = query.where(models.Request.dest_rse_id == rse_id)
849
+
850
+ if share:
851
+ query = query.where(models.Request.activity == share)
852
+ elif activity:
853
+ query = query.where(models.Request.activity == activity)
854
+
855
+ query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable=hash_variable)
856
+
857
+ if share:
858
+ query = query.limit(activity_shares[share]) # type: ignore
859
+ else:
860
+ query = query.limit(limit)
861
+
862
+ if session.bind.dialect.name == 'oracle': # type: ignore
863
+ query = select(
864
+ models.Request
865
+ ).where(
866
+ models.Request.id.in_(query)
867
+ ).with_for_update(
868
+ skip_locked=True
869
+ )
870
+ else:
871
+ query = query.with_only_columns(
872
+ models.Request
873
+ ).with_for_update(
874
+ skip_locked=True,
875
+ of=models.Request.last_processed_by
876
+ )
877
+ query_result = session.execute(query).scalars()
878
+ if query_result:
879
+ if mode_all:
880
+ for res in query_result:
881
+ res_dict = res.to_dict()
882
+ res_dict['request_id'] = res_dict['id']
883
+ res_dict['attributes'] = json.loads(str(res_dict['attributes'] or '{}'))
884
+
885
+ dst_id = res_dict['dest_rse_id']
886
+ src_id = res_dict['source_rse_id']
887
+ res_dict['dst_rse'] = rse_collection[dst_id].ensure_loaded(load_name=True, load_attributes=True)
888
+ res_dict['src_rse'] = rse_collection[src_id].ensure_loaded(load_name=True, load_attributes=True) if src_id is not None else None
889
+
890
+ result.append(res_dict)
891
+ else:
892
+ for res in query_result:
893
+ result.append({'request_id': res.id, 'external_host': res.external_host, 'external_id': res.external_id})
894
+
895
+ request_ids = {r['request_id'] for r in result}
896
+ if processed_by and request_ids:
897
+ for chunk in chunks(request_ids, 100):
898
+ stmt = update(
899
+ models.Request
900
+ ).where(
901
+ models.Request.id.in_(chunk)
902
+ ).execution_options(
903
+ synchronize_session=False
904
+ ).values({
905
+ models.Request.last_processed_by: processed_by,
906
+ models.Request.last_processed_at: datetime.datetime.now()
907
+ })
908
+ session.execute(stmt)
909
+
910
+ return result
911
+
912
+
913
+ @transactional_session
914
+ def update_request(
915
+ request_id: str,
916
+ state: Optional[RequestState] = None,
917
+ transferred_at: Optional[datetime.datetime] = None,
918
+ started_at: Optional[datetime.datetime] = None,
919
+ staging_started_at: Optional[datetime.datetime] = None,
920
+ staging_finished_at: Optional[datetime.datetime] = None,
921
+ source_rse_id: Optional[str] = None,
922
+ err_msg: Optional[str] = None,
923
+ attributes: Optional[dict[str, str]] = None,
924
+ priority: Optional[int] = None,
925
+ transfertool: Optional[str] = None,
926
+ *,
927
+ raise_on_missing: bool = False,
928
+ session: "Session",
929
+ ) -> bool:
930
+
931
+ rowcount = 0
932
+ try:
933
+ update_items: dict[Any, Any] = {
934
+ models.Request.updated_at: datetime.datetime.utcnow()
935
+ }
936
+ if state is not None:
937
+ update_items[models.Request.state] = state
938
+ if transferred_at is not None:
939
+ update_items[models.Request.transferred_at] = transferred_at
940
+ if started_at is not None:
941
+ update_items[models.Request.started_at] = started_at
942
+ if staging_started_at is not None:
943
+ update_items[models.Request.staging_started_at] = staging_started_at
944
+ if staging_finished_at is not None:
945
+ update_items[models.Request.staging_finished_at] = staging_finished_at
946
+ if source_rse_id is not None:
947
+ update_items[models.Request.source_rse_id] = source_rse_id
948
+ if err_msg is not None:
949
+ update_items[models.Request.err_msg] = err_msg
950
+ if attributes is not None:
951
+ update_items[models.Request.attributes] = json.dumps(attributes)
952
+ if priority is not None:
953
+ update_items[models.Request.priority] = priority
954
+ if transfertool is not None:
955
+ update_items[models.Request.transfertool] = transfertool
956
+
957
+ stmt = update(
958
+ models.Request
959
+ ).where(
960
+ models.Request.id == request_id
961
+ ).execution_options(
962
+ synchronize_session=False
963
+ ).values(
964
+ update_items
965
+ )
966
+ rowcount = session.execute(stmt).rowcount
967
+
968
+ except IntegrityError as error:
969
+ raise RucioException(error.args)
970
+
971
+ if not rowcount and raise_on_missing:
972
+ raise UnsupportedOperation("Request %s state cannot be updated." % request_id)
973
+
974
+ if rowcount:
975
+ return True
976
+ return False
977
+
978
+
979
+ @METRICS.count_it
980
+ @transactional_session
981
+ def transition_request_state(
982
+ request_id: str,
983
+ state: Optional[RequestState] = None,
984
+ external_id: Optional[str] = None,
985
+ transferred_at: Optional[datetime.datetime] = None,
986
+ started_at: Optional[datetime.datetime] = None,
987
+ staging_started_at: Optional[datetime.datetime] = None,
988
+ staging_finished_at: Optional[datetime.datetime] = None,
989
+ source_rse_id: Optional[str] = None,
990
+ err_msg: Optional[str] = None,
991
+ attributes: Optional[dict[str, str]] = None,
992
+ *,
993
+ request: Optional[dict[str, Any]] = None,
994
+ session: "Session",
995
+ logger: LoggerFunction = logging.log
996
+ ) -> bool:
997
+ """
998
+ Update the request if its state changed. Return a boolean showing if the request was actually updated or not.
999
+ """
1000
+
1001
+ # TODO: Should this be a private method?
1002
+
1003
+ if request is None:
1004
+ request = get_request(request_id, session=session)
1005
+
1006
+ if not request:
1007
+ # The request was deleted in the meantime. Ignore it.
1008
+ logger(logging.WARNING, "Request %s not found. Cannot set its state to %s", request_id, state)
1009
+ return False
1010
+
1011
+ if request['state'] == state:
1012
+ logger(logging.INFO, "Request %s state is already %s. Will skip the update.", request_id, state)
1013
+ return False
1014
+
1015
+ if state in [RequestState.FAILED, RequestState.DONE, RequestState.LOST] and (request["external_id"] != external_id):
1016
+ logger(logging.ERROR, "Request %s should not be updated to 'Failed' or 'Done' without external transfer_id" % request_id)
1017
+ return False
1018
+
1019
+ update_request(
1020
+ request_id=request_id,
1021
+ state=state,
1022
+ transferred_at=transferred_at,
1023
+ started_at=started_at,
1024
+ staging_started_at=staging_started_at,
1025
+ staging_finished_at=staging_finished_at,
1026
+ source_rse_id=source_rse_id,
1027
+ err_msg=err_msg,
1028
+ attributes=attributes,
1029
+ raise_on_missing=True,
1030
+ session=session,
1031
+ )
1032
+ return True
1033
+
1034
+
1035
+ @METRICS.count_it
1036
+ @transactional_session
1037
+ def transition_requests_state_if_possible(
1038
+ request_ids: Iterable[str],
1039
+ new_state: str,
1040
+ *,
1041
+ session: "Session",
1042
+ logger: LoggerFunction = logging.log
1043
+ ) -> None:
1044
+ """
1045
+ Bulk update the state of requests. Skips silently if the request_id does not exist.
1046
+
1047
+ :param request_ids: List of (Request-ID as a 32 character hex string).
1048
+ :param new_state: New state as string.
1049
+ :param session: Database session to use.
1050
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
1051
+ """
1052
+
1053
+ try:
1054
+ for request_id in request_ids:
1055
+ try:
1056
+ transition_request_state(request_id, new_state, session=session, logger=logger)
1057
+ except UnsupportedOperation:
1058
+ continue
1059
+ except IntegrityError as error:
1060
+ raise RucioException(error.args)
1061
+
1062
+
1063
+ @METRICS.count_it
1064
+ @transactional_session
1065
+ def touch_requests_by_rule(
1066
+ rule_id: str,
1067
+ *,
1068
+ session: "Session"
1069
+ ) -> None:
1070
+ """
1071
+ Update the update time of requests in a rule. Fails silently if no requests on this rule.
1072
+
1073
+ :param rule_id: Rule-ID as a 32 character hex string.
1074
+ :param session: Database session to use.
1075
+ """
1076
+
1077
+ try:
1078
+ stmt = update(
1079
+ models.Request
1080
+ ).prefix_with(
1081
+ '/*+ INDEX(REQUESTS REQUESTS_RULEID_IDX) */',
1082
+ dialect='oracle'
1083
+ ).where(
1084
+ and_(models.Request.rule_id == rule_id,
1085
+ models.Request.state.in_([RequestState.FAILED, RequestState.DONE, RequestState.LOST, RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES]),
1086
+ models.Request.updated_at < datetime.datetime.utcnow())
1087
+ ).execution_options(
1088
+ synchronize_session=False
1089
+ ).values({
1090
+ models.Request.updated_at: datetime.datetime.utcnow() + datetime.timedelta(minutes=20)
1091
+ })
1092
+ session.execute(stmt)
1093
+ except IntegrityError as error:
1094
+ raise RucioException(error.args)
1095
+
1096
+
1097
+ @read_session
1098
+ def get_request(
1099
+ request_id: str,
1100
+ *,
1101
+ session: "Session"
1102
+ ) -> Optional[dict[str, Any]]:
1103
+ """
1104
+ Retrieve a request by its ID.
1105
+
1106
+ :param request_id: Request-ID as a 32 character hex string.
1107
+ :param session: Database session to use.
1108
+ :returns: Request as a dictionary.
1109
+ """
1110
+
1111
+ try:
1112
+ stmt = select(
1113
+ models.Request
1114
+ ).where(
1115
+ models.Request.id == request_id
1116
+ )
1117
+ tmp = session.execute(stmt).scalar()
1118
+
1119
+ if not tmp:
1120
+ return
1121
+ else:
1122
+ tmp = tmp.to_dict()
1123
+ tmp['attributes'] = json.loads(str(tmp['attributes'] or '{}'))
1124
+ return tmp
1125
+ except IntegrityError as error:
1126
+ raise RucioException(error.args)
1127
+
1128
+
1129
+ @METRICS.count_it
1130
+ @read_session
1131
+ def get_request_by_did(
1132
+ scope: InternalScope,
1133
+ name: str,
1134
+ rse_id: str,
1135
+ request_type: Optional[RequestType] = None,
1136
+ *,
1137
+ session: "Session"
1138
+ ) -> dict[str, Any]:
1139
+ """
1140
+ Retrieve a request by its DID for a destination RSE.
1141
+
1142
+ :param scope: The scope of the data identifier.
1143
+ :param name: The name of the data identifier.
1144
+ :param rse_id: The destination RSE ID of the request.
1145
+ :param request_type: The type of request as rucio.db.sqla.constants.RequestType.
1146
+ :param session: Database session to use.
1147
+ :returns: Request as a dictionary.
1148
+ """
1149
+
1150
+ try:
1151
+ stmt = select(
1152
+ models.Request
1153
+ ).where(
1154
+ and_(models.Request.scope == scope,
1155
+ models.Request.name == name,
1156
+ models.Request.dest_rse_id == rse_id)
1157
+ )
1158
+ if request_type:
1159
+ stmt = stmt.where(
1160
+ models.Request.request_type == request_type
1161
+ )
1162
+
1163
+ tmp = session.execute(stmt).scalar()
1164
+ if not tmp:
1165
+ raise RequestNotFound(f'No request found for DID {scope}:{name} at RSE {rse_id}')
1166
+ else:
1167
+ tmp = tmp.to_dict()
1168
+
1169
+ tmp['source_rse'] = get_rse_name(rse_id=tmp['source_rse_id'], session=session) if tmp['source_rse_id'] is not None else None
1170
+ tmp['dest_rse'] = get_rse_name(rse_id=tmp['dest_rse_id'], session=session) if tmp['dest_rse_id'] is not None else None
1171
+ tmp['attributes'] = json.loads(str(tmp['attributes'] or '{}'))
1172
+
1173
+ return tmp
1174
+ except IntegrityError as error:
1175
+ raise RucioException(error.args)
1176
+
1177
+
1178
+ @METRICS.count_it
1179
+ @read_session
1180
+ def get_request_history_by_did(
1181
+ scope: InternalScope,
1182
+ name: str,
1183
+ rse_id: str,
1184
+ request_type: Optional[RequestType] = None,
1185
+ *,
1186
+ session: "Session"
1187
+ ) -> dict[str, Any]:
1188
+ """
1189
+ Retrieve a historical request by its DID for a destination RSE.
1190
+
1191
+ :param scope: The scope of the data identifier.
1192
+ :param name: The name of the data identifier.
1193
+ :param rse_id: The destination RSE ID of the request.
1194
+ :param request_type: The type of request as rucio.db.sqla.constants.RequestType.
1195
+ :param session: Database session to use.
1196
+ :returns: Request as a dictionary.
1197
+ """
1198
+
1199
+ try:
1200
+ stmt = select(
1201
+ models.RequestHistory
1202
+ ).where(
1203
+ and_(models.RequestHistory.scope == scope,
1204
+ models.RequestHistory.name == name,
1205
+ models.RequestHistory.dest_rse_id == rse_id)
1206
+ )
1207
+ if request_type:
1208
+ stmt = stmt.where(
1209
+ models.RequestHistory.request_type == request_type
1210
+ )
1211
+
1212
+ tmp = session.execute(stmt).scalar()
1213
+ if not tmp:
1214
+ raise RequestNotFound(f'No request found for DID {scope}:{name} at RSE {rse_id}')
1215
+ else:
1216
+ tmp = tmp.to_dict()
1217
+
1218
+ tmp['source_rse'] = get_rse_name(rse_id=tmp['source_rse_id'], session=session) if tmp['source_rse_id'] is not None else None
1219
+ tmp['dest_rse'] = get_rse_name(rse_id=tmp['dest_rse_id'], session=session) if tmp['dest_rse_id'] is not None else None
1220
+
1221
+ return tmp
1222
+ except IntegrityError as error:
1223
+ raise RucioException(error.args)
1224
+
1225
+
1226
+ def is_intermediate_hop(request: RequestDict) -> bool:
1227
+ """
1228
+ Check if the request is an intermediate hop in a multi-hop transfer.
1229
+ """
1230
+ if (request['attributes'] or {}).get('is_intermediate_hop'):
1231
+ return True
1232
+ return False
1233
+
1234
+
1235
+ @transactional_session
1236
+ def handle_failed_intermediate_hop(
1237
+ request: RequestDict,
1238
+ *,
1239
+ session: "Session"
1240
+ ) -> int:
1241
+ """
1242
+ Perform housekeeping behind a failed intermediate hop
1243
+ Returns the number of updated requests
1244
+ """
1245
+ # mark all hops following this one (in any multihop path) as Failed
1246
+ new_state = RequestState.FAILED
1247
+ reason = 'Unused hop in multi-hop'
1248
+
1249
+ paths = fetch_paths(request['id'], session=session)
1250
+ dependent_requests = []
1251
+ for path in paths.values():
1252
+ idx = path.index(request['id'])
1253
+ dependent_requests.extend(path[idx + 1:])
1254
+
1255
+ if dependent_requests:
1256
+ stmt = update(
1257
+ models.Request
1258
+ ).where(
1259
+ and_(models.Request.id.in_(dependent_requests),
1260
+ models.Request.state.in_([RequestState.QUEUED, RequestState.SUBMITTED]))
1261
+ ).execution_options(
1262
+ synchronize_session=False
1263
+ ).values({
1264
+ models.Request.state: new_state,
1265
+ models.Request.err_msg: get_transfer_error(new_state, reason=reason)
1266
+ })
1267
+ session.execute(stmt)
1268
+ return len(dependent_requests)
1269
+
1270
+
1271
+ @METRICS.count_it
1272
+ @transactional_session
1273
+ def archive_request(
1274
+ request_id: str,
1275
+ *,
1276
+ session: "Session"
1277
+ ) -> None:
1278
+ """
1279
+ Move a request to the history table.
1280
+
1281
+ :param request_id: Request-ID as a 32 character hex string.
1282
+ :param session: Database session to use.
1283
+ """
1284
+
1285
+ req = get_request(request_id=request_id, session=session)
1286
+
1287
+ if req:
1288
+ hist_request = models.RequestHistory(id=req['id'],
1289
+ created_at=req['created_at'],
1290
+ request_type=req['request_type'],
1291
+ scope=req['scope'],
1292
+ name=req['name'],
1293
+ dest_rse_id=req['dest_rse_id'],
1294
+ source_rse_id=req['source_rse_id'],
1295
+ attributes=json.dumps(req['attributes']) if isinstance(req['attributes'], dict) else req['attributes'],
1296
+ state=req['state'],
1297
+ account=req['account'],
1298
+ external_id=req['external_id'],
1299
+ retry_count=req['retry_count'],
1300
+ err_msg=req['err_msg'],
1301
+ previous_attempt_id=req['previous_attempt_id'],
1302
+ external_host=req['external_host'],
1303
+ rule_id=req['rule_id'],
1304
+ activity=req['activity'],
1305
+ bytes=req['bytes'],
1306
+ md5=req['md5'],
1307
+ adler32=req['adler32'],
1308
+ dest_url=req['dest_url'],
1309
+ requested_at=req['requested_at'],
1310
+ submitted_at=req['submitted_at'],
1311
+ staging_started_at=req['staging_started_at'],
1312
+ staging_finished_at=req['staging_finished_at'],
1313
+ started_at=req['started_at'],
1314
+ estimated_started_at=req['estimated_started_at'],
1315
+ estimated_at=req['estimated_at'],
1316
+ transferred_at=req['transferred_at'],
1317
+ estimated_transferred_at=req['estimated_transferred_at'],
1318
+ transfertool=req['transfertool'])
1319
+ hist_request.save(session=session)
1320
+ try:
1321
+ time_diff = req['updated_at'] - req['created_at']
1322
+ time_diff_s = time_diff.seconds + time_diff.days * 24 * 3600
1323
+ METRICS.timer('archive_request_per_activity.{activity}').labels(activity=req['activity'].replace(' ', '_')).observe(time_diff_s)
1324
+ stmt = delete(
1325
+ models.Source
1326
+ ).where(
1327
+ models.Source.request_id == request_id
1328
+ )
1329
+ session.execute(stmt)
1330
+
1331
+ stmt = delete(
1332
+ models.TransferHop
1333
+ ).where(
1334
+ or_(models.TransferHop.request_id == request_id,
1335
+ models.TransferHop.next_hop_request_id == request_id,
1336
+ models.TransferHop.initial_request_id == request_id)
1337
+ )
1338
+ session.execute(stmt)
1339
+
1340
+ stmt = delete(
1341
+ models.Request
1342
+ ).where(
1343
+ models.Request.id == request_id
1344
+ )
1345
+ session.execute(stmt)
1346
+ except IntegrityError as error:
1347
+ raise RucioException(error.args)
1348
+
1349
+
1350
+ @METRICS.count_it
1351
+ @transactional_session
1352
+ def cancel_request_did(
1353
+ scope: InternalScope,
1354
+ name: str,
1355
+ dest_rse_id: str,
1356
+ request_type: RequestType = RequestType.TRANSFER,
1357
+ *,
1358
+ session: "Session",
1359
+ logger: LoggerFunction = logging.log
1360
+ ) -> dict[str, Any]:
1361
+ """
1362
+ Cancel a request based on a DID and request type.
1363
+
1364
+ :param scope: Data identifier scope as a string.
1365
+ :param name: Data identifier name as a string.
1366
+ :param dest_rse_id: RSE id as a string.
1367
+ :param request_type: Type of the request.
1368
+ :param session: Database session to use.
1369
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
1370
+ """
1371
+
1372
+ reqs = None
1373
+ try:
1374
+ stmt = select(
1375
+ models.Request.id,
1376
+ models.Request.external_id,
1377
+ models.Request.external_host
1378
+ ).where(
1379
+ and_(models.Request.scope == scope,
1380
+ models.Request.name == name,
1381
+ models.Request.dest_rse_id == dest_rse_id,
1382
+ models.Request.request_type == request_type)
1383
+ )
1384
+ reqs = session.execute(stmt).all()
1385
+ if not reqs:
1386
+ logger(logging.WARNING, 'Tried to cancel non-existent request for DID %s:%s at RSE %s' % (scope, name, get_rse_name(rse_id=dest_rse_id, session=session)))
1387
+ except IntegrityError as error:
1388
+ raise RucioException(error.args)
1389
+
1390
+ transfers_to_cancel = {}
1391
+ for req in reqs:
1392
+ # is there a transfer already in transfertool? if so, schedule to cancel them
1393
+ if req[1] is not None:
1394
+ transfers_to_cancel.setdefault(req[2], set()).add(req[1])
1395
+ archive_request(request_id=req[0], session=session)
1396
+ return transfers_to_cancel
1397
+
1398
+
1399
+ @read_session
1400
+ def get_sources(
1401
+ request_id: str,
1402
+ rse_id: Optional[str] = None,
1403
+ *,
1404
+ session: "Session"
1405
+ ) -> Optional[list[dict[str, Any]]]:
1406
+ """
1407
+ Retrieve sources by its ID.
1408
+
1409
+ :param request_id: Request-ID as a 32 character hex string.
1410
+ :param rse_id: RSE ID as a 32 character hex string.
1411
+ :param session: Database session to use.
1412
+ :returns: Sources as a dictionary.
1413
+ """
1414
+
1415
+ try:
1416
+ stmt = select(
1417
+ models.Source
1418
+ ).where(
1419
+ models.Source.request_id == request_id
1420
+ )
1421
+ if rse_id:
1422
+ stmt = stmt.where(
1423
+ models.Source.rse_id == rse_id
1424
+ )
1425
+ tmp = session.execute(stmt).scalars().all()
1426
+ if not tmp:
1427
+ return
1428
+ else:
1429
+ result = []
1430
+ for t in tmp:
1431
+ t2 = t.to_dict()
1432
+ result.append(t2)
1433
+
1434
+ return result
1435
+ except IntegrityError as error:
1436
+ raise RucioException(error.args)
1437
+
1438
+
1439
+ @read_session
1440
+ def get_heavy_load_rses(
1441
+ threshold: int,
1442
+ *,
1443
+ session: "Session"
1444
+ ) -> Optional[list[dict[str, Any]]]:
1445
+ """
1446
+ Retrieve heavy load rses.
1447
+
1448
+ :param threshold: Threshold as an int.
1449
+ :param session: Database session to use.
1450
+ :returns: .
1451
+ """
1452
+ try:
1453
+ stmt = select(
1454
+ models.Source.rse_id,
1455
+ func.count(models.Source.rse_id).label('load')
1456
+ ).where(
1457
+ models.Source.is_using == true()
1458
+ ).group_by(
1459
+ models.Source.rse_id
1460
+ )
1461
+ results = session.execute(stmt).all()
1462
+
1463
+ if not results:
1464
+ return
1465
+
1466
+ result = []
1467
+ for t in results:
1468
+ if t[1] >= threshold:
1469
+ t2 = {'rse_id': t[0], 'load': t[1]}
1470
+ result.append(t2)
1471
+
1472
+ return result
1473
+ except IntegrityError as error:
1474
+ raise RucioException(error.args)
1475
+
1476
+
1477
+ class TransferStatsManager:
1478
+
1479
+ @dataclass
1480
+ class _StatsRecord:
1481
+ files_failed: int = 0
1482
+ files_done: int = 0
1483
+ bytes_done: int = 0
1484
+
1485
+ def __init__(self):
1486
+ self.lock = threading.Lock()
1487
+
1488
+ retentions = sorted([
1489
+ # resolution, retention
1490
+ (datetime.timedelta(minutes=5), datetime.timedelta(hours=1)),
1491
+ (datetime.timedelta(hours=1), datetime.timedelta(days=1)),
1492
+ (datetime.timedelta(days=1), datetime.timedelta(days=30)),
1493
+ ])
1494
+
1495
+ self.retentions = retentions
1496
+ self.raw_resolution, raw_retention = self.retentions[0]
1497
+
1498
+ self.current_timestamp = datetime.datetime(year=1970, month=1, day=1)
1499
+ self.current_samples = defaultdict()
1500
+ self._rollover_samples(rollover_time=datetime.datetime.utcnow())
1501
+
1502
+ self.record_stats = True
1503
+ self.save_timer = None
1504
+ self.downsample_timer = None
1505
+ self.downsample_period = math.ceil(raw_retention.total_seconds())
1506
+
1507
+ def __enter__(self) -> "TransferStatsManager":
1508
+ self.record_stats = config_get_bool('transfers', 'stats_enabled', default=self.record_stats)
1509
+ downsample_period = config_get_int('transfers', 'stats_downsample_period', default=self.downsample_period)
1510
+ # Introduce some voluntary jitter to reduce the likely-hood of performing this database
1511
+ # operation multiple times in parallel.
1512
+ self.downsample_period = random.randint(downsample_period * 3 // 4, math.ceil(downsample_period * 5 / 4)) # noqa: S311
1513
+ if self.record_stats:
1514
+ self.save_timer = threading.Timer(self.raw_resolution.total_seconds(), self.periodic_save)
1515
+ self.save_timer.start()
1516
+ self.downsample_timer = threading.Timer(self.downsample_period, self.periodic_downsample_and_cleanup)
1517
+ self.downsample_timer.start()
1518
+ return self
1519
+
1520
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
1521
+ if self.save_timer is not None:
1522
+ self.save_timer.cancel()
1523
+ if self.downsample_timer is not None:
1524
+ self.downsample_timer.cancel()
1525
+ if self.record_stats:
1526
+ self.force_save()
1527
+
1528
+ def observe(
1529
+ self,
1530
+ src_rse_id: str,
1531
+ dst_rse_id: str,
1532
+ activity: str,
1533
+ state: RequestState,
1534
+ file_size: int,
1535
+ *,
1536
+ submitted_at: Optional[datetime.datetime] = None,
1537
+ started_at: Optional[datetime.datetime] = None,
1538
+ transferred_at: Optional[datetime.datetime] = None,
1539
+ session: "Optional[Session]" = None
1540
+ ) -> None:
1541
+ """
1542
+ Increment counters for the given (source_rse, destination_rse, activity) as a result of
1543
+ successful or failed transfer.
1544
+ """
1545
+ if not self.record_stats:
1546
+ return
1547
+ now = datetime.datetime.utcnow()
1548
+ with self.lock:
1549
+ save_timestamp, save_samples = now, {}
1550
+ if now >= self.current_timestamp + self.raw_resolution:
1551
+ save_timestamp, save_samples = self._rollover_samples(now)
1552
+
1553
+ if state in (RequestState.DONE, RequestState.FAILED):
1554
+ record = self.current_samples[dst_rse_id, src_rse_id, activity]
1555
+ if state == RequestState.DONE:
1556
+ record.files_done += 1
1557
+ record.bytes_done += file_size
1558
+
1559
+ if submitted_at is not None and started_at is not None:
1560
+ wait_time = (started_at - submitted_at).total_seconds()
1561
+ METRICS.timer(name='wait_time', buckets=TRANSFER_TIME_BUCKETS).observe(wait_time)
1562
+ if transferred_at is not None:
1563
+ transfer_time = (transferred_at - started_at).total_seconds()
1564
+ METRICS.timer(name='transfer_time', buckets=TRANSFER_TIME_BUCKETS).observe(transfer_time)
1565
+ else:
1566
+ record.files_failed += 1
1567
+ if save_samples:
1568
+ self._save_samples(timestamp=save_timestamp, samples=save_samples, session=session)
1569
+
1570
+ def periodic_save(self) -> None:
1571
+ """
1572
+ Save samples to the database if the end of the current recording interval was reached.
1573
+ Opportunistically perform down-sampling.
1574
+ """
1575
+ self.save_timer = threading.Timer(self.raw_resolution.total_seconds(), self.periodic_save)
1576
+ self.save_timer.start()
1577
+
1578
+ now = datetime.datetime.utcnow()
1579
+ with self.lock:
1580
+ save_timestamp, save_samples = now, {}
1581
+ if now >= self.current_timestamp + self.raw_resolution:
1582
+ save_timestamp, save_samples = self._rollover_samples(now)
1583
+ if save_samples:
1584
+ self._save_samples(timestamp=save_timestamp, samples=save_samples)
1585
+
1586
+ @transactional_session
1587
+ def force_save(self, *, session: "Session") -> None:
1588
+ """
1589
+ Commit to the database everything without ensuring that
1590
+ the end of the currently recorded time interval is reached.
1591
+
1592
+ Only to be used for the final save operation on shutdown.
1593
+ """
1594
+ with self.lock:
1595
+ save_timestamp, save_samples = self._rollover_samples(datetime.datetime.utcnow())
1596
+ if save_samples:
1597
+ self._save_samples(timestamp=save_timestamp, samples=save_samples, session=session)
1598
+
1599
+ def _rollover_samples(self, rollover_time: datetime.datetime) -> "tuple[datetime.datetime, Mapping[tuple[str, str, str], TransferStatsManager._StatsRecord]]":
1600
+ previous_samples = (self.current_timestamp, self.current_samples)
1601
+ self.current_samples = defaultdict(lambda: self._StatsRecord())
1602
+ _, self.current_timestamp = next(self.slice_time(self.raw_resolution, start_time=rollover_time + self.raw_resolution))
1603
+ return previous_samples
1604
+
1605
+ @transactional_session
1606
+ def _save_samples(
1607
+ self,
1608
+ timestamp: "datetime.datetime",
1609
+ samples: "Mapping[tuple[str, str, str], TransferStatsManager._StatsRecord]",
1610
+ *,
1611
+ session: "Session"
1612
+ ) -> None:
1613
+ """
1614
+ Commit the provided samples to the database.
1615
+ """
1616
+ rows_to_insert = []
1617
+ for (dst_rse_id, src_rse_id, activity), record in samples.items():
1618
+ rows_to_insert.append({
1619
+ models.TransferStats.timestamp.name: timestamp,
1620
+ models.TransferStats.resolution.name: self.raw_resolution.total_seconds(),
1621
+ models.TransferStats.src_rse_id.name: src_rse_id,
1622
+ models.TransferStats.dest_rse_id.name: dst_rse_id,
1623
+ models.TransferStats.activity.name: activity,
1624
+ models.TransferStats.files_failed.name: record.files_failed,
1625
+ models.TransferStats.files_done.name: record.files_done,
1626
+ models.TransferStats.bytes_done.name: record.bytes_done,
1627
+ })
1628
+ if rows_to_insert:
1629
+ stmt = insert(
1630
+ models.TransferStats
1631
+ )
1632
+ session.execute(stmt, rows_to_insert)
1633
+
1634
+ def periodic_downsample_and_cleanup(self) -> None:
1635
+ """
1636
+ Periodically create lower resolution samples from higher resolution ones.
1637
+ """
1638
+ self.downsample_timer = threading.Timer(self.downsample_period, self.periodic_downsample_and_cleanup)
1639
+ self.downsample_timer.start()
1640
+
1641
+ while self.downsample_and_cleanup():
1642
+ continue
1643
+
1644
+ @read_session
1645
+ def _db_time_ranges(self, *, session: "Session") -> "dict[datetime.timedelta, tuple[datetime.datetime, datetime.datetime]]":
1646
+
1647
+ stmt = select(
1648
+ models.TransferStats.resolution,
1649
+ func.max(models.TransferStats.timestamp),
1650
+ func.min(models.TransferStats.timestamp),
1651
+ ).group_by(
1652
+ models.TransferStats.resolution,
1653
+ )
1654
+ db_time_ranges = {
1655
+ datetime.timedelta(seconds=res): (newest_t, oldest_t)
1656
+ for res, newest_t, oldest_t in session.execute(stmt)
1657
+ }
1658
+ return db_time_ranges
1659
+
1660
+ @transactional_session
1661
+ def downsample_and_cleanup(self, *, session: "Session") -> bool:
1662
+ """
1663
+ Housekeeping of samples in the database:
1664
+ - create lower-resolution (but higher-retention) samples from higher-resolution ones;
1665
+ - delete the samples which are older than the desired retention time.
1666
+ Return True if it thinks there is still more cleanup.
1667
+
1668
+ This function handles safely to be executed in parallel from multiple daemons at the
1669
+ same time. However, this is achieved at the cost of introducing duplicate samples at lower
1670
+ resolution into the database. The possibility of having duplicates at lower resolutions must be
1671
+ considered during work with those sample. Code must tolerate duplicates and avoid double-counting.
1672
+ """
1673
+
1674
+ # Delay processing to leave time for all raw metrics to be correctly saved to the database
1675
+ now = datetime.datetime.utcnow() - 4 * self.raw_resolution
1676
+
1677
+ db_time_ranges = self._db_time_ranges(session=session)
1678
+
1679
+ more_to_delete = False
1680
+ id_temp_table = temp_table_mngr(session).create_id_table()
1681
+ for i in range(1, len(self.retentions)):
1682
+ src_resolution, desired_src_retention = self.retentions[i - 1]
1683
+ dst_resolution, desired_dst_retention = self.retentions[i]
1684
+
1685
+ # Always keep samples at source resolution aligned to the destination resolution interval.
1686
+ # Keep, at least, the amount of samples needed to cover the first interval at
1687
+ # destination resolution, but keep more samples if explicitly configured to do so.
1688
+ oldest_desired_src_timestamp, _ = next(self.slice_time(dst_resolution, start_time=now - desired_src_retention))
1689
+
1690
+ _, oldest_available_src_timestamp = db_time_ranges.get(src_resolution, (None, None))
1691
+ newest_available_dst_timestamp, oldest_available_dst_timestamp = db_time_ranges.get(dst_resolution, (None, None))
1692
+ # Only generate down-samples at destination resolution for interval in which:
1693
+ # - are within the desired retention window
1694
+ oldest_time_to_handle = now - desired_dst_retention - dst_resolution
1695
+ # - we didn't already generate the corresponding sample at destination resolution
1696
+ if newest_available_dst_timestamp:
1697
+ oldest_time_to_handle = max(oldest_time_to_handle, newest_available_dst_timestamp + datetime.timedelta(seconds=1))
1698
+ # - we have samples at source resolution to do it
1699
+ if oldest_available_src_timestamp:
1700
+ oldest_time_to_handle = max(oldest_time_to_handle, oldest_available_src_timestamp)
1701
+ else:
1702
+ oldest_time_to_handle = now
1703
+
1704
+ # Create samples at lower resolution from samples at higher resolution
1705
+ for recent_t, older_t in self.slice_time(dst_resolution, start_time=now, end_time=oldest_time_to_handle):
1706
+ additional_fields = {
1707
+ models.TransferStats.timestamp.name: older_t,
1708
+ models.TransferStats.resolution.name: dst_resolution.total_seconds(),
1709
+ }
1710
+ src_totals = self._load_totals(resolution=src_resolution, recent_t=recent_t, older_t=older_t, session=session)
1711
+ downsample_stats = [stat | additional_fields for stat in src_totals]
1712
+ if downsample_stats:
1713
+ session.execute(insert(models.TransferStats), downsample_stats)
1714
+ if not oldest_available_dst_timestamp or older_t < oldest_available_dst_timestamp:
1715
+ oldest_available_dst_timestamp = older_t
1716
+ if not newest_available_dst_timestamp or older_t > newest_available_dst_timestamp:
1717
+ newest_available_dst_timestamp = older_t
1718
+
1719
+ if oldest_available_dst_timestamp and newest_available_dst_timestamp:
1720
+ db_time_ranges[dst_resolution] = (newest_available_dst_timestamp, oldest_available_dst_timestamp)
1721
+
1722
+ # Delete from the database the samples which are older than desired
1723
+ more_to_delete |= self._cleanup(
1724
+ id_temp_table=id_temp_table,
1725
+ resolution=src_resolution,
1726
+ timestamp=oldest_desired_src_timestamp,
1727
+ session=session
1728
+ )
1729
+
1730
+ # Cleanup samples at the lowest resolution, which were not handled by the previous loop
1731
+ last_resolution, last_retention = self.retentions[-1]
1732
+ _, oldest_desired_timestamp = next(self.slice_time(last_resolution, start_time=now - last_retention))
1733
+ if db_time_ranges.get(last_resolution, (now, now))[1] < oldest_desired_timestamp:
1734
+ more_to_delete |= self._cleanup(
1735
+ id_temp_table=id_temp_table,
1736
+ resolution=last_resolution,
1737
+ timestamp=oldest_desired_timestamp,
1738
+ session=session
1739
+ )
1740
+
1741
+ # Cleanup all resolutions which exist in the database but are not desired by rucio anymore
1742
+ # (probably due to configuration changes).
1743
+ for resolution_to_cleanup in set(db_time_ranges).difference(r[0] for r in self.retentions):
1744
+ more_to_delete |= self._cleanup(
1745
+ id_temp_table=id_temp_table,
1746
+ resolution=resolution_to_cleanup,
1747
+ timestamp=now,
1748
+ session=session
1749
+ )
1750
+ return more_to_delete
1751
+
1752
+ @stream_session
1753
+ def load_totals(
1754
+ self,
1755
+ older_t: "datetime.datetime",
1756
+ dest_rse_id: Optional[str] = None,
1757
+ src_rse_id: Optional[str] = None,
1758
+ activity: Optional[str] = None,
1759
+ by_activity: bool = True,
1760
+ *,
1761
+ session: "Session"
1762
+ ) -> "Iterator[Mapping[str, str | int]]":
1763
+ """
1764
+ Load totals from now up to older_t in the past by automatically picking the best resolution.
1765
+
1766
+ The results will not necessarily be uniquely grouped by src_rse/dest_rse/activity. The caller
1767
+ is responsible for summing identical src_rse/dest_rse/activity pairs to get the actual result
1768
+ """
1769
+
1770
+ db_time_ranges = self._db_time_ranges(session=session)
1771
+
1772
+ oldest_fetched = older_t
1773
+ for resolution, retention in reversed(self.retentions):
1774
+ newest_available_db_timestamp, oldest_available_db_timestamp = db_time_ranges.get(resolution, (None, None))
1775
+
1776
+ if not (newest_available_db_timestamp and oldest_available_db_timestamp):
1777
+ continue
1778
+
1779
+ if newest_available_db_timestamp < oldest_fetched:
1780
+ continue
1781
+
1782
+ yield from self._load_totals(
1783
+ resolution=resolution,
1784
+ recent_t=newest_available_db_timestamp + datetime.timedelta(seconds=1),
1785
+ older_t=oldest_fetched + datetime.timedelta(seconds=1),
1786
+ dest_rse_id=dest_rse_id,
1787
+ src_rse_id=src_rse_id,
1788
+ activity=activity,
1789
+ by_activity=by_activity,
1790
+ session=session,
1791
+ )
1792
+ oldest_fetched = newest_available_db_timestamp + resolution
1793
+
1794
+ @stream_session
1795
+ def _load_totals(
1796
+ self,
1797
+ resolution: "datetime.timedelta",
1798
+ recent_t: "datetime.datetime",
1799
+ older_t: "datetime.datetime",
1800
+ dest_rse_id: Optional[str] = None,
1801
+ src_rse_id: Optional[str] = None,
1802
+ activity: Optional[str] = None,
1803
+ by_activity: bool = True,
1804
+ *,
1805
+ session: "Session"
1806
+ ) -> "Iterator[Mapping[str, Union[str, int]]]":
1807
+ """
1808
+ Load aggregated totals for the given resolution and time interval.
1809
+
1810
+ Ignore multiple values for the same timestamp at downsample resolutions.
1811
+ They are result of concurrent downsample operations (two different
1812
+ daemons performing downsampling at the same time). Very probably,
1813
+ the values are identical. Eve if not, these values must not be counted twice.
1814
+ This is to gracefully handle multiple parallel downsample operations.
1815
+ """
1816
+ grouping: "list[Any]" = [
1817
+ models.TransferStats.src_rse_id,
1818
+ models.TransferStats.dest_rse_id,
1819
+ ]
1820
+ if by_activity:
1821
+ grouping.append(models.TransferStats.activity)
1822
+
1823
+ if resolution == self.raw_resolution:
1824
+ sub_query = select(
1825
+ models.TransferStats.timestamp,
1826
+ *grouping,
1827
+ models.TransferStats.files_failed,
1828
+ models.TransferStats.files_done,
1829
+ models.TransferStats.bytes_done
1830
+ )
1831
+ else:
1832
+ sub_query = select(
1833
+ models.TransferStats.timestamp,
1834
+ *grouping,
1835
+ func.max(models.TransferStats.files_failed).label(models.TransferStats.files_failed.name),
1836
+ func.max(models.TransferStats.files_done).label(models.TransferStats.files_done.name),
1837
+ func.max(models.TransferStats.bytes_done).label(models.TransferStats.bytes_done.name),
1838
+ ).group_by(
1839
+ models.TransferStats.timestamp,
1840
+ *grouping,
1841
+ )
1842
+
1843
+ sub_query = sub_query.where(
1844
+ models.TransferStats.resolution == resolution.total_seconds(),
1845
+ models.TransferStats.timestamp >= older_t,
1846
+ models.TransferStats.timestamp < recent_t
1847
+ )
1848
+ if dest_rse_id:
1849
+ sub_query = sub_query.where(
1850
+ models.TransferStats.dest_rse_id == dest_rse_id
1851
+ )
1852
+ if src_rse_id:
1853
+ sub_query = sub_query.where(
1854
+ models.TransferStats.src_rse_id == src_rse_id
1855
+ )
1856
+ if activity:
1857
+ sub_query = sub_query.where(
1858
+ models.TransferStats.activity == activity
1859
+ )
1860
+
1861
+ sub_query = sub_query.subquery()
1862
+
1863
+ grouping = [
1864
+ sub_query.c.src_rse_id,
1865
+ sub_query.c.dest_rse_id,
1866
+ ]
1867
+ if by_activity:
1868
+ grouping.append(sub_query.c.activity)
1869
+
1870
+ stmt = select(
1871
+ *grouping,
1872
+ func.sum(sub_query.c.files_failed).label(models.TransferStats.files_failed.name),
1873
+ func.sum(sub_query.c.files_done).label(models.TransferStats.files_done.name),
1874
+ func.sum(sub_query.c.bytes_done).label(models.TransferStats.bytes_done.name),
1875
+ ).group_by(
1876
+ *grouping,
1877
+ )
1878
+
1879
+ for row in session.execute(stmt):
1880
+ yield row._asdict()
1881
+
1882
+ @staticmethod
1883
+ def _cleanup(
1884
+ id_temp_table: Any,
1885
+ resolution: "datetime.timedelta",
1886
+ timestamp: "datetime.datetime",
1887
+ limit: "Optional[int]" = 10000,
1888
+ *,
1889
+ session: "Session"
1890
+ ) -> bool:
1891
+ """
1892
+ Delete, from the database, the stats older than the given time.
1893
+ Skip locked rows, to tolerate parallel executions by multiple daemons.
1894
+ """
1895
+ stmt = select(
1896
+ models.TransferStats.id
1897
+ ).where(
1898
+ and_(models.TransferStats.resolution == resolution.total_seconds(),
1899
+ models.TransferStats.timestamp < timestamp)
1900
+ )
1901
+
1902
+ if limit is not None:
1903
+ stmt = stmt.limit(limit)
1904
+
1905
+ # Oracle does not support chaining order_by(), limit(), and
1906
+ # with_for_update(). Use a nested query to overcome this.
1907
+ if session.bind.dialect.name == 'oracle': # type: ignore
1908
+ stmt = select(
1909
+ models.TransferStats.id
1910
+ ).where(
1911
+ models.TransferStats.id.in_(stmt)
1912
+ ).with_for_update(
1913
+ skip_locked=True
1914
+ )
1915
+ else:
1916
+ stmt = stmt.with_for_update(skip_locked=True)
1917
+
1918
+ del_stmt = delete(
1919
+ id_temp_table
1920
+ )
1921
+ session.execute(del_stmt)
1922
+ insert_stmt = insert(
1923
+ id_temp_table
1924
+ ).from_select(
1925
+ ['id'],
1926
+ stmt
1927
+ )
1928
+ session.execute(insert_stmt)
1929
+
1930
+ stmt = delete(
1931
+ models.TransferStats
1932
+ ).where(
1933
+ exists(select(1).where(models.TransferStats.id == id_temp_table.id))
1934
+ ).execution_options(
1935
+ synchronize_session=False
1936
+ )
1937
+ res = session.execute(stmt)
1938
+ return res.rowcount > 0
1939
+
1940
+ @staticmethod
1941
+ def slice_time(
1942
+ resolution: datetime.timedelta,
1943
+ start_time: "Optional[datetime.datetime]" = None,
1944
+ end_time: "Optional[datetime.datetime]" = None
1945
+ ) -> Iterator[tuple[datetime.datetime, datetime.datetime]]:
1946
+ """
1947
+ Iterates, back in time, over time intervals of length `resolution` which are fully
1948
+ included within the input interval (start_time, end_time).
1949
+ Intervals are aligned on boundaries divisible by resolution.
1950
+
1951
+ For example: for start_time=17:09:59, end_time=16:20:01 and resolution = 10minutes, it will yield
1952
+ (17:00:00, 16:50:00), (16:50:00, 16:40:00), (16:40:00, 16:30:00)
1953
+ """
1954
+
1955
+ if start_time is None:
1956
+ start_time = datetime.datetime.utcnow()
1957
+ newer_t = datetime.datetime.fromtimestamp(int(start_time.timestamp()) // resolution.total_seconds() * resolution.total_seconds())
1958
+ older_t = newer_t - resolution
1959
+ while not end_time or older_t >= end_time:
1960
+ yield newer_t, older_t
1961
+ newer_t = older_t
1962
+ older_t = older_t - resolution
1963
+
1964
+
1965
+ @read_session
1966
+ def get_request_metrics(
1967
+ dest_rse_id: Optional[str] = None,
1968
+ src_rse_id: Optional[str] = None,
1969
+ activity: Optional[str] = None,
1970
+ group_by_rse_attribute: Optional[str] = None,
1971
+ *,
1972
+ session: "Session"
1973
+ ) -> dict[str, Any]:
1974
+ metrics = {}
1975
+ now = datetime.datetime.utcnow()
1976
+
1977
+ # Add the current queues
1978
+ db_stats = get_request_stats(
1979
+ state=[
1980
+ RequestState.QUEUED,
1981
+ ],
1982
+ src_rse_id=src_rse_id,
1983
+ dest_rse_id=dest_rse_id,
1984
+ activity=activity,
1985
+ session=session,
1986
+ )
1987
+ for stat in db_stats:
1988
+ if not stat.source_rse_id:
1989
+ continue
1990
+
1991
+ resp_elem = metrics.setdefault((stat.source_rse_id, stat.dest_rse_id), {})
1992
+
1993
+ files_elem = resp_elem.setdefault('files', {})
1994
+ files_elem.setdefault('queued', {})[stat.activity] = stat.counter
1995
+ files_elem['queued-total'] = files_elem.get('queued-total', 0) + stat.counter
1996
+
1997
+ bytes_elem = resp_elem.setdefault('bytes', {})
1998
+ bytes_elem.setdefault('queued', {})[stat.activity] = stat.bytes
1999
+ bytes_elem['queued-total'] = bytes_elem.get('queued-total', 0) + stat.bytes
2000
+
2001
+ # Add the historical data
2002
+ for duration, duration_label in (
2003
+ (datetime.timedelta(hours=1), '1h'),
2004
+ (datetime.timedelta(hours=6), '6h')
2005
+ ):
2006
+ db_stats = TransferStatsManager().load_totals(
2007
+ older_t=now - duration,
2008
+ dest_rse_id=dest_rse_id,
2009
+ src_rse_id=src_rse_id,
2010
+ activity=activity,
2011
+ session=session,
2012
+ )
2013
+
2014
+ for stat in db_stats:
2015
+ resp_elem = metrics.setdefault((stat['src_rse_id'], stat['dest_rse_id']), {})
2016
+
2017
+ files_elem = resp_elem.setdefault('files', {})
2018
+ if stat['files_done']:
2019
+ activity_elem = files_elem.setdefault('done', {}).setdefault(stat['activity'], {})
2020
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['files_done']
2021
+ files_elem[f'done-total-{duration_label}'] = files_elem.get(f'done-total-{duration_label}', 0) + stat['files_done']
2022
+ if stat['files_failed']:
2023
+ activity_elem = files_elem.setdefault('failed', {}).setdefault(stat['activity'], {})
2024
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['files_failed']
2025
+ files_elem[f'failed-total-{duration_label}'] = files_elem.get(f'failed-total-{duration_label}', 0) + stat['files_failed']
2026
+
2027
+ bytes_elem = resp_elem.setdefault('bytes', {})
2028
+ if stat['bytes_done']:
2029
+ activity_elem = bytes_elem.setdefault('done', {}).setdefault(stat['activity'], {})
2030
+ activity_elem[duration_label] = activity_elem.get(duration_label, 0) + stat['bytes_done']
2031
+ bytes_elem[f'done-total-{duration_label}'] = bytes_elem.get(f'done-total-{duration_label}', 0) + stat['bytes_done']
2032
+
2033
+ # Add distances
2034
+ for distance in get_distances(dest_rse_id=dest_rse_id, src_rse_id=src_rse_id):
2035
+ resp_elem = metrics.setdefault((distance['src_rse_id'], distance['dest_rse_id']), {})
2036
+
2037
+ resp_elem['distance'] = distance['distance']
2038
+
2039
+ # Fill RSE names
2040
+ rses = RseCollection(rse_ids=itertools.chain.from_iterable(metrics))
2041
+ rses.ensure_loaded(load_name=True, include_deleted=True)
2042
+ response = {}
2043
+ for (src_id, dst_id), metric in metrics.items():
2044
+ src_rse = rses[src_id]
2045
+ dst_rse = rses[dst_id]
2046
+ metric['src_rse'] = src_rse.name
2047
+ metric['dst_rse'] = dst_rse.name
2048
+
2049
+ if group_by_rse_attribute:
2050
+ src_rse_group = src_rse.attributes.get(group_by_rse_attribute, 'UNKNOWN')
2051
+ dst_rse_group = dst_rse.attributes.get(group_by_rse_attribute, 'UNKNOWN')
2052
+ if src_rse_group is not None and dst_rse_group is not None:
2053
+ response[f'{src_rse_group}:{dst_rse_group}'] = metric
2054
+ else:
2055
+ response[f'{src_rse.name}:{dst_rse.name}'] = metric
2056
+
2057
+ return response
2058
+
2059
+
2060
+ @read_session
2061
+ def get_request_stats(
2062
+ state: Union[RequestState, list[RequestState]],
2063
+ dest_rse_id: Optional[str] = None,
2064
+ src_rse_id: Optional[str] = None,
2065
+ activity: Optional[str] = None,
2066
+ *,
2067
+ session: "Session"
2068
+ ) -> Sequence[
2069
+ """Row[tuple[
2070
+ Optional[InternalAccount],
2071
+ RequestState,
2072
+ uuid.UUID,
2073
+ Optional[uuid.UUID],
2074
+ Optional[str],
2075
+ int,
2076
+ Optional[int]
2077
+ ]]"""
2078
+ ]:
2079
+ """
2080
+ Retrieve statistics about requests by destination, activity and state.
2081
+ """
2082
+
2083
+ if not isinstance(state, list):
2084
+ state = [state]
2085
+
2086
+ try:
2087
+ stmt = select(
2088
+ models.Request.account,
2089
+ models.Request.state,
2090
+ models.Request.dest_rse_id,
2091
+ models.Request.source_rse_id,
2092
+ models.Request.activity,
2093
+ func.count(1).label('counter'),
2094
+ func.sum(models.Request.bytes).label('bytes')
2095
+ ).with_hint(
2096
+ models.Request,
2097
+ 'INDEX(REQUESTS REQUESTS_TYP_STA_UPD_IDX)',
2098
+ 'oracle'
2099
+ ).where(
2100
+ and_(models.Request.state.in_(state),
2101
+ models.Request.request_type.in_([RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT]))
2102
+ ).group_by(
2103
+ models.Request.account,
2104
+ models.Request.state,
2105
+ models.Request.dest_rse_id,
2106
+ models.Request.source_rse_id,
2107
+ models.Request.activity,
2108
+ )
2109
+ if src_rse_id:
2110
+ stmt = stmt.where(
2111
+ models.Request.source_rse_id == src_rse_id
2112
+ )
2113
+ if dest_rse_id:
2114
+ stmt = stmt.where(
2115
+ models.Request.dest_rse_id == dest_rse_id
2116
+ )
2117
+ if activity:
2118
+ stmt = stmt.where(
2119
+ models.Request.activity == activity
2120
+ )
2121
+
2122
+ return session.execute(stmt).all()
2123
+
2124
+ except IntegrityError as error:
2125
+ raise RucioException(error.args)
2126
+
2127
+
2128
+ @transactional_session
2129
+ def release_waiting_requests_per_deadline(
2130
+ dest_rse_id: Optional[str] = None,
2131
+ source_rse_id: Optional[str] = None,
2132
+ deadline: int = 1,
2133
+ *,
2134
+ session: "Session",
2135
+ ) -> int:
2136
+ """
2137
+ Release waiting requests that were waiting too long and exceeded the maximum waiting time to be released.
2138
+ If the DID of a request is attached to a dataset, the oldest requested_at date of all requests related to the dataset will be used for checking and all requests of this dataset will be released.
2139
+ :param dest_rse_id: The destination RSE id.
2140
+ :param source_rse_id: The source RSE id.
2141
+ :param deadline: Maximal waiting time in hours until a dataset gets released.
2142
+ :param session: The database session.
2143
+ """
2144
+ amount_released_requests = 0
2145
+ if deadline:
2146
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2147
+ old_requests_subquery = select(
2148
+ grouped_requests_subquery.c.name,
2149
+ grouped_requests_subquery.c.scope,
2150
+ grouped_requests_subquery.c.oldest_requested_at
2151
+ ).where(
2152
+ grouped_requests_subquery.c.oldest_requested_at < datetime.datetime.utcnow() - datetime.timedelta(hours=deadline)
2153
+ ).subquery()
2154
+
2155
+ old_requests_subquery = select(
2156
+ filtered_requests_subquery.c.id
2157
+ ).join(
2158
+ old_requests_subquery,
2159
+ and_(filtered_requests_subquery.c.dataset_name == old_requests_subquery.c.name,
2160
+ filtered_requests_subquery.c.dataset_scope == old_requests_subquery.c.scope)
2161
+ )
2162
+
2163
+ amount_released_requests = update(
2164
+ models.Request
2165
+ ).where(
2166
+ models.Request.id.in_(old_requests_subquery) # type: ignore
2167
+ ).execution_options(
2168
+ synchronize_session=False
2169
+ ).values({
2170
+ models.Request.state: RequestState.QUEUED
2171
+ })
2172
+ return session.execute(amount_released_requests).rowcount # type: ignore
2173
+
2174
+
2175
+ @transactional_session
2176
+ def release_waiting_requests_per_free_volume(
2177
+ dest_rse_id: Optional[str] = None,
2178
+ source_rse_id: Optional[str] = None,
2179
+ volume: int = 0,
2180
+ *,
2181
+ session: "Session"
2182
+ ) -> int:
2183
+ """
2184
+ Release waiting requests if they fit in available transfer volume. If the DID of a request is attached to a dataset, the volume will be checked for the whole dataset as all requests related to this dataset will be released.
2185
+
2186
+ :param dest_rse_id: The destination RSE id.
2187
+ :param source_rse_id: The source RSE id
2188
+ :param volume: The maximum volume in bytes that should be transferred.
2189
+ :param session: The database session.
2190
+ """
2191
+
2192
+ dialect = session.bind.dialect.name # type: ignore
2193
+ if dialect == 'mysql' or dialect == 'sqlite':
2194
+ coalesce_func = func.ifnull
2195
+ elif dialect == 'oracle':
2196
+ coalesce_func = func.nvl
2197
+ else: # dialect == 'postgresql'
2198
+ coalesce_func = func.coalesce
2199
+
2200
+ sum_volume_active_subquery = select(
2201
+ coalesce_func(func.sum(models.Request.bytes), 0).label('sum_bytes')
2202
+ ).where(
2203
+ models.Request.state.in_([RequestState.SUBMITTED, RequestState.QUEUED]),
2204
+ )
2205
+ if dest_rse_id is not None:
2206
+ sum_volume_active_subquery = sum_volume_active_subquery.where(
2207
+ models.Request.dest_rse_id == dest_rse_id
2208
+ )
2209
+ if source_rse_id is not None:
2210
+ sum_volume_active_subquery = sum_volume_active_subquery.where(
2211
+ models.Request.source_rse_id == source_rse_id
2212
+ )
2213
+ sum_volume_active_subquery = sum_volume_active_subquery.subquery()
2214
+
2215
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2216
+
2217
+ cumulated_volume_subquery = select(
2218
+ grouped_requests_subquery.c.name,
2219
+ grouped_requests_subquery.c.scope,
2220
+ func.sum(grouped_requests_subquery.c.volume).over(order_by=grouped_requests_subquery.c.oldest_requested_at).label('cum_volume')
2221
+ ).where(
2222
+ grouped_requests_subquery.c.volume <= volume - sum_volume_active_subquery.c.sum_bytes
2223
+ ).subquery()
2224
+
2225
+ cumulated_volume_subquery = select(
2226
+ filtered_requests_subquery.c.id
2227
+ ).join(
2228
+ cumulated_volume_subquery,
2229
+ and_(filtered_requests_subquery.c.dataset_name == cumulated_volume_subquery.c.name,
2230
+ filtered_requests_subquery.c.dataset_scope == cumulated_volume_subquery.c.scope)
2231
+ ).where(
2232
+ cumulated_volume_subquery.c.cum_volume <= volume - sum_volume_active_subquery.c.sum_bytes
2233
+ )
2234
+
2235
+ amount_released_requests = update(
2236
+ models.Request
2237
+ ).where(
2238
+ models.Request.id.in_(cumulated_volume_subquery) # type: ignore
2239
+ ).execution_options(
2240
+ synchronize_session=False
2241
+ ).values({
2242
+ models.Request.state: RequestState.QUEUED
2243
+ })
2244
+ return session.execute(amount_released_requests).rowcount
2245
+
2246
+
2247
+ @read_session
2248
+ def create_base_query_grouped_fifo(
2249
+ dest_rse_id: Optional[str] = None,
2250
+ source_rse_id: Optional[str] = None,
2251
+ *,
2252
+ session: "Session"
2253
+ ) -> tuple["Subquery", "Subquery"]:
2254
+ """
2255
+ Build the sqlalchemy queries to filter relevant requests and to group them in datasets.
2256
+ Group requests either by same destination RSE or source RSE.
2257
+
2258
+ :param dest_rse_id: The source RSE id to filter on
2259
+ :param source_rse_id: The destination RSE id to filter on
2260
+ :param session: The database session.
2261
+ """
2262
+ dialect = session.bind.dialect.name # type: ignore
2263
+ if dialect == 'mysql' or dialect == 'sqlite':
2264
+ coalesce_func = func.ifnull
2265
+ elif dialect == 'oracle':
2266
+ coalesce_func = func.nvl
2267
+ else: # dialect == 'postgresql'
2268
+ coalesce_func = func.coalesce
2269
+
2270
+ # query DIDs that are attached to a collection and add a column indicating the order of attachment in case of multiple attachments
2271
+ attachment_order_subquery = select(
2272
+ models.DataIdentifierAssociation.child_name,
2273
+ models.DataIdentifierAssociation.child_scope,
2274
+ models.DataIdentifierAssociation.name,
2275
+ models.DataIdentifierAssociation.scope,
2276
+ func.row_number().over(
2277
+ partition_by=(models.DataIdentifierAssociation.child_name,
2278
+ models.DataIdentifierAssociation.child_scope),
2279
+ order_by=models.DataIdentifierAssociation.created_at
2280
+ ).label('order_of_attachment')
2281
+ ).subquery()
2282
+
2283
+ # query transfer requests and join with according datasets
2284
+ requests_subquery_stmt = select(
2285
+ # Will be filled using add_columns() later
2286
+ ).outerjoin(
2287
+ attachment_order_subquery,
2288
+ and_(models.Request.name == attachment_order_subquery.c.child_name,
2289
+ models.Request.scope == attachment_order_subquery.c.child_scope,
2290
+ attachment_order_subquery.c.order_of_attachment == 1),
2291
+ ).where(
2292
+ models.Request.state == RequestState.WAITING,
2293
+ )
2294
+ if source_rse_id is not None:
2295
+ requests_subquery_stmt = requests_subquery_stmt.where(
2296
+ models.Request.source_rse_id == source_rse_id
2297
+ )
2298
+ if dest_rse_id is not None:
2299
+ requests_subquery_stmt = requests_subquery_stmt.where(
2300
+ models.Request.dest_rse_id == dest_rse_id
2301
+ )
2302
+
2303
+ filtered_requests_subquery = requests_subquery_stmt.add_columns(
2304
+ coalesce_func(attachment_order_subquery.c.scope, models.Request.scope).label('dataset_scope'),
2305
+ coalesce_func(attachment_order_subquery.c.name, models.Request.name).label('dataset_name'),
2306
+ models.Request.id.label('id')
2307
+ ).subquery()
2308
+
2309
+ combined_attached_unattached_requests = requests_subquery_stmt.add_columns(
2310
+ coalesce_func(attachment_order_subquery.c.scope, models.Request.scope).label('scope'),
2311
+ coalesce_func(attachment_order_subquery.c.name, models.Request.name).label('name'),
2312
+ models.Request.bytes,
2313
+ models.Request.requested_at
2314
+ ).subquery()
2315
+
2316
+ # group requests and calculate properties like oldest requested_at, amount of children, volume
2317
+ grouped_requests_subquery = select(
2318
+ func.sum(combined_attached_unattached_requests.c.bytes).label('volume'),
2319
+ func.min(combined_attached_unattached_requests.c.requested_at).label('oldest_requested_at'),
2320
+ func.count().label('amount_childs'),
2321
+ combined_attached_unattached_requests.c.name,
2322
+ combined_attached_unattached_requests.c.scope
2323
+ ).group_by(
2324
+ combined_attached_unattached_requests.c.scope,
2325
+ combined_attached_unattached_requests.c.name
2326
+ ).subquery()
2327
+ return grouped_requests_subquery, filtered_requests_subquery
2328
+
2329
+
2330
+ @transactional_session
2331
+ def release_waiting_requests_fifo(
2332
+ dest_rse_id: Optional[str] = None,
2333
+ source_rse_id: Optional[str] = None,
2334
+ activity: Optional[str] = None,
2335
+ count: int = 0,
2336
+ account: Optional[InternalAccount] = None,
2337
+ *,
2338
+ session: "Session"
2339
+ ) -> int:
2340
+ """
2341
+ Release waiting requests. Transfer requests that were requested first, get released first (FIFO).
2342
+
2343
+ :param source_rse_id: The source rse id
2344
+ :param dest_rse_id: The destination rse id
2345
+ :param activity: The activity.
2346
+ :param count: The count to be released.
2347
+ :param account: The account name whose requests to release.
2348
+ :param session: The database session.
2349
+ """
2350
+
2351
+ dialect = session.bind.dialect.name # type: ignore
2352
+ rowcount = 0
2353
+
2354
+ subquery = select(
2355
+ models.Request.id
2356
+ ).where(
2357
+ models.Request.state == RequestState.WAITING
2358
+ ).order_by(
2359
+ asc(models.Request.requested_at)
2360
+ ).limit(
2361
+ count
2362
+ )
2363
+ if source_rse_id is not None:
2364
+ subquery = subquery.where(models.Request.source_rse_id == source_rse_id)
2365
+ if dest_rse_id is not None:
2366
+ subquery = subquery.where(models.Request.dest_rse_id == dest_rse_id)
2367
+
2368
+ if activity is not None:
2369
+ subquery = subquery.where(models.Request.activity == activity)
2370
+ if account is not None:
2371
+ subquery = subquery.where(models.Request.account == account)
2372
+
2373
+ if dialect == 'mysql':
2374
+ # TODO: check if the logic from this `if` is still needed on modern mysql
2375
+
2376
+ # join because IN and LIMIT cannot be used together
2377
+ subquery = subquery.subquery()
2378
+ subquery = select(
2379
+ models.Request.id
2380
+ ).join(
2381
+ subquery,
2382
+ models.Request.id == subquery.c.id
2383
+ ).subquery()
2384
+ # wrap select to update and select from the same table
2385
+ subquery = select(subquery.c.id)
2386
+
2387
+ stmt = update(
2388
+ models.Request
2389
+ ).where(
2390
+ models.Request.id.in_(subquery) # type: ignore
2391
+ ).execution_options(
2392
+ synchronize_session=False
2393
+ ).values({
2394
+ models.Request.state: RequestState.QUEUED
2395
+ })
2396
+ rowcount = session.execute(stmt).rowcount
2397
+ return rowcount
2398
+
2399
+
2400
+ @transactional_session
2401
+ def release_waiting_requests_grouped_fifo(
2402
+ dest_rse_id: Optional[str] = None,
2403
+ source_rse_id: Optional[str] = None,
2404
+ count: int = 0,
2405
+ deadline: int = 1,
2406
+ volume: int = 0,
2407
+ *,
2408
+ session: "Session"
2409
+ ) -> int:
2410
+ """
2411
+ Release waiting requests. Transfer requests that were requested first, get released first (FIFO).
2412
+ Also all requests to DIDs that are attached to the same dataset get released, if one children of the dataset is chosen to be released (Grouped FIFO).
2413
+
2414
+ :param dest_rse_id: The destination rse id
2415
+ :param source_rse_id: The source RSE id.
2416
+ :param count: The count to be released. If None, release all waiting requests.
2417
+ :param deadline: Maximal waiting time in hours until a dataset gets released.
2418
+ :param volume: The maximum volume in bytes that should be transferred.
2419
+ :param session: The database session.
2420
+ """
2421
+
2422
+ amount_updated_requests = 0
2423
+
2424
+ # Release requests that exceeded waiting time
2425
+ if deadline and source_rse_id is not None:
2426
+ amount_updated_requests = release_waiting_requests_per_deadline(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, deadline=deadline, session=session)
2427
+ count = count - amount_updated_requests
2428
+
2429
+ grouped_requests_subquery, filtered_requests_subquery = create_base_query_grouped_fifo(dest_rse_id=dest_rse_id, source_rse_id=source_rse_id, session=session)
2430
+
2431
+ # cumulate amount of children per dataset and combine with each request and only keep requests that dont exceed the limit
2432
+ cumulated_children_subquery = select(
2433
+ grouped_requests_subquery.c.name,
2434
+ grouped_requests_subquery.c.scope,
2435
+ grouped_requests_subquery.c.amount_childs,
2436
+ grouped_requests_subquery.c.oldest_requested_at,
2437
+ func.sum(grouped_requests_subquery.c.amount_childs).over(order_by=(grouped_requests_subquery.c.oldest_requested_at)).label('cum_amount_childs')
2438
+ ).subquery()
2439
+ cumulated_children_subquery = select(
2440
+ filtered_requests_subquery.c.id
2441
+ ).join(
2442
+ cumulated_children_subquery,
2443
+ and_(filtered_requests_subquery.c.dataset_name == cumulated_children_subquery.c.name,
2444
+ filtered_requests_subquery.c.dataset_scope == cumulated_children_subquery.c.scope)
2445
+ ).where(
2446
+ cumulated_children_subquery.c.cum_amount_childs - cumulated_children_subquery.c.amount_childs < count
2447
+ ).subquery()
2448
+
2449
+ # needed for mysql to update and select from the same table
2450
+ cumulated_children_subquery = select(cumulated_children_subquery.c.id)
2451
+
2452
+ stmt = update(
2453
+ models.Request
2454
+ ).where(
2455
+ models.Request.id.in_(cumulated_children_subquery) # type: ignore
2456
+ ).execution_options(
2457
+ synchronize_session=False
2458
+ ).values({
2459
+ models.Request.state: RequestState.QUEUED
2460
+ })
2461
+ amount_updated_requests += session.execute(stmt).rowcount
2462
+
2463
+ # release requests where the whole datasets volume fits in the available volume space
2464
+ if volume and dest_rse_id is not None:
2465
+ amount_updated_requests += release_waiting_requests_per_free_volume(dest_rse_id=dest_rse_id, volume=volume, session=session)
2466
+
2467
+ return amount_updated_requests
2468
+
2469
+
2470
+ @transactional_session
2471
+ def release_all_waiting_requests(
2472
+ dest_rse_id: Optional[str] = None,
2473
+ source_rse_id: Optional[str] = None,
2474
+ activity: Optional[str] = None,
2475
+ account: Optional[InternalAccount] = None,
2476
+ *,
2477
+ session: "Session"
2478
+ ) -> int:
2479
+ """
2480
+ Release all waiting requests per destination RSE.
2481
+
2482
+ :param dest_rse_id: The destination rse id.
2483
+ :param source_rse_id: The source rse id.
2484
+ :param activity: The activity.
2485
+ :param account: The account name whose requests to release.
2486
+ :param session: The database session.
2487
+ """
2488
+ try:
2489
+ query = update(
2490
+ models.Request
2491
+ ).where(
2492
+ models.Request.state == RequestState.WAITING,
2493
+ ).execution_options(
2494
+ synchronize_session=False
2495
+ ).values({
2496
+ models.Request.state: RequestState.QUEUED
2497
+ })
2498
+ if source_rse_id is not None:
2499
+ query = query.where(
2500
+ models.Request.source_rse_id == source_rse_id
2501
+ )
2502
+ if dest_rse_id is not None:
2503
+ query = query.where(
2504
+ models.Request.dest_rse_id == dest_rse_id
2505
+ )
2506
+ if activity is not None:
2507
+ query = query.where(
2508
+ models.Request.activity == activity
2509
+ )
2510
+ if account is not None:
2511
+ query = query.where(
2512
+ models.Request.account == account
2513
+ )
2514
+ rowcount = session.execute(query).rowcount
2515
+ return rowcount
2516
+ except IntegrityError as error:
2517
+ raise RucioException(error.args)
2518
+
2519
+
2520
+ @stream_session
2521
+ def list_transfer_limits(
2522
+ *,
2523
+ session: "Session",
2524
+ ) -> Iterator[dict[str, Any]]:
2525
+ stmt = select(
2526
+ models.TransferLimit
2527
+ )
2528
+ for limit in session.execute(stmt).scalars():
2529
+ dict_resp = limit.to_dict()
2530
+ yield dict_resp
2531
+
2532
+
2533
+ def _sync_rse_transfer_limit(
2534
+ limit_id: Union[str, uuid.UUID],
2535
+ desired_rse_ids: set[str],
2536
+ *,
2537
+ session: "Session",
2538
+ ) -> None:
2539
+ """
2540
+ Ensure that an RSETransferLimit exists in the database for each of the given rses (and only for these rses)
2541
+ """
2542
+
2543
+ stmt = select(
2544
+ models.RSETransferLimit.rse_id,
2545
+ ).where(
2546
+ models.RSETransferLimit.limit_id == limit_id
2547
+ )
2548
+ existing_rse_ids = set(session.execute(stmt).scalars())
2549
+
2550
+ rse_limits_to_add = desired_rse_ids.difference(existing_rse_ids)
2551
+ rse_limits_to_delete = existing_rse_ids.difference(desired_rse_ids)
2552
+
2553
+ if rse_limits_to_add:
2554
+ values = [
2555
+ {'rse_id': rse_id, 'limit_id': limit_id}
2556
+ for rse_id in rse_limits_to_add
2557
+ ]
2558
+ stmt = insert(
2559
+ models.RSETransferLimit
2560
+ )
2561
+ session.execute(stmt, values)
2562
+
2563
+ if rse_limits_to_delete:
2564
+ stmt = delete(
2565
+ models.RSETransferLimit
2566
+ ).where(
2567
+ and_(models.RSETransferLimit.limit_id == limit_id,
2568
+ models.RSETransferLimit.rse_id.in_(rse_limits_to_delete))
2569
+ )
2570
+ session.execute(stmt)
2571
+
2572
+
2573
+ @transactional_session
2574
+ def re_sync_all_transfer_limits(
2575
+ delete_empty: bool = False,
2576
+ *,
2577
+ session: "Session",
2578
+ ) -> None:
2579
+ """
2580
+ For each TransferLimit in the database, re-evaluate the rse expression and ensure that the
2581
+ correct RSETransferLimits are in the database
2582
+ :param delete_empty: if True, when rse_expression evaluates to an empty set or is invalid, the limit is completely removed
2583
+ """
2584
+ stmt = select(
2585
+ models.TransferLimit,
2586
+ )
2587
+ for limit in session.execute(stmt).scalars():
2588
+ try:
2589
+ desired_rse_ids = {rse['id'] for rse in parse_expression(expression=limit.rse_expression, session=session)}
2590
+ except InvalidRSEExpression:
2591
+ desired_rse_ids = set()
2592
+
2593
+ if not desired_rse_ids and delete_empty:
2594
+ delete_transfer_limit_by_id(limit_id=limit.id, session=session)
2595
+ else:
2596
+ _sync_rse_transfer_limit(limit_id=limit.id, desired_rse_ids=desired_rse_ids, session=session)
2597
+
2598
+
2599
+ @transactional_session
2600
+ def set_transfer_limit(
2601
+ rse_expression: str,
2602
+ activity: Optional[str] = None,
2603
+ direction: TransferLimitDirection = TransferLimitDirection.DESTINATION,
2604
+ max_transfers: Optional[int] = None,
2605
+ volume: Optional[int] = None,
2606
+ deadline: Optional[int] = None,
2607
+ strategy: Optional[str] = None,
2608
+ transfers: Optional[int] = None,
2609
+ waitings: Optional[int] = None,
2610
+ *,
2611
+ session: "Session",
2612
+ ) -> uuid.UUID:
2613
+ """
2614
+ Create or update a transfer limit
2615
+
2616
+ :param rse_expression: RSE expression string.
2617
+ :param activity: The activity.
2618
+ :param direction: The direction in which this limit applies (source/destination)
2619
+ :param max_transfers: Maximum transfers.
2620
+ :param volume: Maximum transfer volume in bytes.
2621
+ :param deadline: Maximum waiting time in hours until a datasets gets released.
2622
+ :param strategy: defines how to handle datasets: `fifo` (each file released separately) or `grouped_fifo` (wait for the entire dataset to fit)
2623
+ :param transfers: Current number of active transfers
2624
+ :param waitings: Current number of waiting transfers
2625
+ :param session: The database session in use.
2626
+
2627
+ :return: the limit id
2628
+ """
2629
+ if activity is None:
2630
+ activity = 'all_activities'
2631
+
2632
+ stmt = select(
2633
+ models.TransferLimit
2634
+ ).where(
2635
+ and_(models.TransferLimit.rse_expression == rse_expression,
2636
+ models.TransferLimit.activity == activity,
2637
+ models.TransferLimit.direction == direction)
2638
+ )
2639
+ limit = session.execute(stmt).scalar_one_or_none()
2640
+
2641
+ if not limit:
2642
+ if max_transfers is None:
2643
+ max_transfers = 0
2644
+ if volume is None:
2645
+ volume = 0
2646
+ if deadline is None:
2647
+ deadline = 1
2648
+ if strategy is None:
2649
+ strategy = 'fifo'
2650
+ limit = models.TransferLimit(
2651
+ rse_expression=rse_expression,
2652
+ activity=activity,
2653
+ direction=direction,
2654
+ max_transfers=max_transfers,
2655
+ volume=volume,
2656
+ deadline=deadline,
2657
+ strategy=strategy,
2658
+ transfers=transfers,
2659
+ waitings=waitings
2660
+ )
2661
+ limit.save(session=session)
2662
+ else:
2663
+ changed = False
2664
+ if max_transfers is not None and limit.max_transfers != max_transfers:
2665
+ limit.max_transfers = max_transfers
2666
+ changed = True
2667
+ if volume is not None and limit.volume != volume:
2668
+ limit.volume = volume
2669
+ changed = True
2670
+ if deadline is not None and limit.deadline != deadline:
2671
+ limit.deadline = deadline
2672
+ changed = True
2673
+ if strategy is not None and limit.strategy != strategy:
2674
+ limit.strategy = strategy
2675
+ changed = True
2676
+ if transfers is not None and limit.transfers != transfers:
2677
+ limit.transfers = transfers
2678
+ changed = True
2679
+ if waitings is not None and limit.waitings != waitings:
2680
+ limit.waitings = waitings
2681
+ changed = True
2682
+ if changed:
2683
+ limit.save(session=session)
2684
+
2685
+ desired_rse_ids = {rse['id'] for rse in parse_expression(expression=rse_expression, session=session)}
2686
+ _sync_rse_transfer_limit(limit_id=limit.id, desired_rse_ids=desired_rse_ids, session=session)
2687
+ return limit.id
2688
+
2689
+
2690
+ @transactional_session
2691
+ def set_transfer_limit_stats(
2692
+ limit_id: str,
2693
+ waitings: int,
2694
+ transfers: int,
2695
+ *,
2696
+ session: "Session",
2697
+ ) -> None:
2698
+ """
2699
+ Set the statistics of the TransferLimit
2700
+ """
2701
+ stmt = update(
2702
+ models.TransferLimit
2703
+ ).where(
2704
+ models.TransferLimit.id == limit_id
2705
+ ).values({
2706
+ models.TransferLimit.waitings: waitings,
2707
+ models.TransferLimit.transfers: transfers
2708
+ })
2709
+ session.execute(stmt)
2710
+
2711
+
2712
+ @transactional_session
2713
+ def delete_transfer_limit(
2714
+ rse_expression: str,
2715
+ activity: Optional[str] = None,
2716
+ direction: TransferLimitDirection = TransferLimitDirection.DESTINATION,
2717
+ *,
2718
+ session: "Session",
2719
+ ) -> None:
2720
+
2721
+ if activity is None:
2722
+ activity = 'all_activities'
2723
+
2724
+ stmt = delete(
2725
+ models.RSETransferLimit
2726
+ ).where(
2727
+ exists(
2728
+ select(1)
2729
+ ).where(
2730
+ and_(models.RSETransferLimit.limit_id == models.TransferLimit.id,
2731
+ models.TransferLimit.rse_expression == rse_expression,
2732
+ models.TransferLimit.activity == activity,
2733
+ models.TransferLimit.direction == direction)
2734
+ )
2735
+ ).execution_options(
2736
+ synchronize_session=False
2737
+ )
2738
+ session.execute(stmt)
2739
+
2740
+ stmt = delete(
2741
+ models.TransferLimit
2742
+ ).where(
2743
+ and_(models.TransferLimit.rse_expression == rse_expression,
2744
+ models.TransferLimit.activity == activity,
2745
+ models.TransferLimit.direction == direction)
2746
+ )
2747
+ session.execute(stmt)
2748
+
2749
+
2750
+ @transactional_session
2751
+ def delete_transfer_limit_by_id(
2752
+ limit_id: str,
2753
+ *,
2754
+ session: "Session",
2755
+ ) -> None:
2756
+ stmt = delete(
2757
+ models.RSETransferLimit
2758
+ ).where(
2759
+ models.RSETransferLimit.limit_id == limit_id
2760
+ )
2761
+ session.execute(stmt)
2762
+
2763
+ stmt = delete(
2764
+ models.TransferLimit
2765
+ ).where(
2766
+ models.TransferLimit.id == limit_id
2767
+ )
2768
+ session.execute(stmt)
2769
+
2770
+
2771
+ @transactional_session
2772
+ def update_requests_priority(
2773
+ priority: int,
2774
+ filter_: FilterDict,
2775
+ *,
2776
+ session: "Session",
2777
+ logger: LoggerFunction = logging.log
2778
+ ) -> dict[str, Any]:
2779
+ """
2780
+ Update priority of requests.
2781
+
2782
+ :param priority: The priority as an integer from 1 to 5.
2783
+ :param filter_: Dictionary such as {'rule_id': rule_id, 'request_id': request_id, 'older_than': time_stamp, 'activities': [activities]}.
2784
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
2785
+ :return the transfers which must be updated in the transfertool
2786
+ """
2787
+ try:
2788
+ query = select(
2789
+ models.Request.id,
2790
+ models.Request.external_id,
2791
+ models.Request.external_host,
2792
+ models.Request.state.label('request_state'),
2793
+ models.ReplicaLock.state.label('lock_state')
2794
+ ).join(
2795
+ models.ReplicaLock,
2796
+ and_(models.ReplicaLock.scope == models.Request.scope,
2797
+ models.ReplicaLock.name == models.Request.name,
2798
+ models.ReplicaLock.rse_id == models.Request.dest_rse_id)
2799
+ )
2800
+ if 'rule_id' in filter_:
2801
+ query = query.where(models.ReplicaLock.rule_id == filter_['rule_id'])
2802
+ if 'request_id' in filter_:
2803
+ query = query.where(models.Request.id == filter_['request_id'])
2804
+ if 'older_than' in filter_:
2805
+ query = query.where(models.Request.created_at < filter_['older_than'])
2806
+ if 'activities' in filter_:
2807
+ if not isinstance(filter_['activities'], list):
2808
+ filter_['activities'] = filter_['activities'].split(',')
2809
+ query = query.filter(models.Request.activity.in_(filter_['activities']))
2810
+
2811
+ transfers_to_update = {}
2812
+ for item in session.execute(query).all():
2813
+ try:
2814
+ update_request(item.id, priority=priority, session=session)
2815
+ logger(logging.DEBUG, "Updated request %s priority to %s in rucio." % (item.id, priority))
2816
+ if item.request_state == RequestState.SUBMITTED and item.lock_state == LockState.REPLICATING:
2817
+ transfers_to_update.setdefault(item.external_host, {})[item.external_id] = priority
2818
+ except Exception:
2819
+ logger(logging.DEBUG, "Failed to boost request %s priority: %s" % (item.id, traceback.format_exc()))
2820
+ return transfers_to_update
2821
+ except IntegrityError as error:
2822
+ raise RucioException(error.args)
2823
+
2824
+
2825
+ @read_session
2826
+ def add_monitor_message(
2827
+ new_state: RequestState,
2828
+ request: RequestDict,
2829
+ additional_fields: "Mapping[str, Any]",
2830
+ *,
2831
+ session: "Session"
2832
+ ) -> None:
2833
+ """
2834
+ Create a message for hermes from a request
2835
+
2836
+ :param new_state: The new state of the transfer request
2837
+ :param request: The request to create the message for.
2838
+ :param additional_fields: Additional custom fields to be added to the message
2839
+ :param session: The database session to use.
2840
+ """
2841
+
2842
+ if request['request_type']:
2843
+ transfer_status = '%s-%s' % (request['request_type'].name, new_state.name)
2844
+ else:
2845
+ transfer_status = 'transfer-%s' % new_state.name
2846
+ transfer_status = transfer_status.lower()
2847
+
2848
+ stmt = select(
2849
+ models.DataIdentifier.datatype
2850
+ ).where(
2851
+ and_(models.DataIdentifier.scope == request['scope'],
2852
+ models.DataIdentifier.name == request['name'])
2853
+ )
2854
+ datatype = session.execute(stmt).scalar_one_or_none()
2855
+
2856
+ # Start by filling up fields from database request or with defaults.
2857
+ message = {'activity': request.get('activity', None),
2858
+ 'request-id': request['id'],
2859
+ 'duration': -1,
2860
+ 'checksum-adler': request.get('adler32', None),
2861
+ 'checksum-md5': request.get('md5', None),
2862
+ 'file-size': request.get('bytes', None),
2863
+ 'bytes': request.get('bytes', None),
2864
+ 'guid': None,
2865
+ 'previous-request-id': request['previous_attempt_id'],
2866
+ 'protocol': None,
2867
+ 'scope': request['scope'],
2868
+ 'name': request['name'],
2869
+ 'dataset': None,
2870
+ 'datasetScope': None,
2871
+ 'src-type': None,
2872
+ 'src-rse': request.get('source_rse', None),
2873
+ 'src-url': None,
2874
+ 'dst-type': None,
2875
+ 'dst-rse': request.get('dest_rse', None),
2876
+ 'dst-url': request.get('dest_url', None),
2877
+ 'reason': request.get('err_msg', None),
2878
+ 'transfer-endpoint': request['external_host'],
2879
+ 'transfer-id': request['external_id'],
2880
+ 'transfer-link': None,
2881
+ 'created_at': request.get('created_at', None),
2882
+ 'submitted_at': request.get('submitted_at', None),
2883
+ 'started_at': request.get('started_at', None),
2884
+ 'transferred_at': request.get('transferred_at', None),
2885
+ 'tool-id': 'rucio-conveyor',
2886
+ 'account': request.get('account', None),
2887
+ 'datatype': datatype}
2888
+
2889
+ # Add (or override) existing fields
2890
+ message.update(additional_fields)
2891
+
2892
+ if message['started_at'] and message['transferred_at']:
2893
+ message['duration'] = (message['transferred_at'] - message['started_at']).seconds
2894
+ ds_scope = request['attributes'].get('ds_scope')
2895
+ if not message['datasetScope'] and ds_scope:
2896
+ message['datasetScope'] = ds_scope
2897
+ ds_name = request['attributes'].get('ds_name')
2898
+ if not message['dataset'] and ds_name:
2899
+ message['dataset'] = ds_name
2900
+ if not message.get('protocol'):
2901
+ dst_url = message['dst-url']
2902
+ if dst_url and ':' in dst_url:
2903
+ message['protocol'] = dst_url.split(':')[0]
2904
+ elif request.get('transfertool'):
2905
+ message['protocol'] = request['transfertool']
2906
+ if not message.get('src-rse'):
2907
+ src_rse_id = request.get('source_rse_id', None)
2908
+ if src_rse_id:
2909
+ src_rse = get_rse_name(src_rse_id, session=session)
2910
+ message['src-rse'] = src_rse
2911
+ if not message.get('dst-rse'):
2912
+ dst_rse_id = request.get('dest_rse_id', None)
2913
+ if dst_rse_id:
2914
+ dst_rse = get_rse_name(dst_rse_id, session=session)
2915
+ message['dst-rse'] = dst_rse
2916
+ if not message.get('vo') and request.get('source_rse_id'):
2917
+ src_id = request['source_rse_id']
2918
+ vo = get_rse_vo(rse_id=src_id, session=session)
2919
+ if vo != 'def':
2920
+ message['vo'] = vo
2921
+ for time_field in ('created_at', 'submitted_at', 'started_at', 'transferred_at'):
2922
+ field_value = message[time_field]
2923
+ message[time_field] = str(field_value) if field_value else None
2924
+
2925
+ add_message(transfer_status, message, session=session)
2926
+
2927
+
2928
+ def get_transfer_error(
2929
+ state: RequestState,
2930
+ reason: Optional[str] = None
2931
+ ) -> Optional[str]:
2932
+ """
2933
+ Transform a specific RequestState to an error message
2934
+
2935
+ :param state: State of the request.
2936
+ :param reason: Reason of the state.
2937
+ :returns: Error message
2938
+ """
2939
+ err_msg = None
2940
+ if state in [RequestState.NO_SOURCES, RequestState.ONLY_TAPE_SOURCES]:
2941
+ err_msg = '%s:%s' % (RequestErrMsg.NO_SOURCES, state)
2942
+ elif state in [RequestState.SUBMISSION_FAILED]:
2943
+ err_msg = '%s:%s' % (RequestErrMsg.SUBMISSION_FAILED, state)
2944
+ elif state in [RequestState.SUBMITTING]:
2945
+ err_msg = '%s:%s' % (RequestErrMsg.SUBMISSION_FAILED, "Too long time in submitting state")
2946
+ elif state in [RequestState.LOST]:
2947
+ err_msg = '%s:%s' % (RequestErrMsg.TRANSFER_FAILED, "Transfer job on FTS is lost")
2948
+ elif state in [RequestState.FAILED]:
2949
+ err_msg = '%s:%s' % (RequestErrMsg.TRANSFER_FAILED, reason)
2950
+ elif state in [RequestState.MISMATCH_SCHEME]:
2951
+ err_msg = '%s:%s' % (RequestErrMsg.MISMATCH_SCHEME, state)
2952
+ return err_msg
2953
+
2954
+
2955
+ @read_session
2956
+ def get_source_rse(
2957
+ request_id: str,
2958
+ src_url: str,
2959
+ *,
2960
+ session: "Session",
2961
+ logger: LoggerFunction = logging.log
2962
+ ) -> tuple[Optional[str], Optional[str]]:
2963
+ """
2964
+ Based on a request, and src_url extract the source rse name and id.
2965
+
2966
+ :param request_id: The request_id of the request.
2967
+ :param src_url: The src_url of the request.
2968
+ :param session: The database session to use.
2969
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
2970
+ """
2971
+
2972
+ try:
2973
+ if not request_id:
2974
+ return None, None
2975
+
2976
+ sources = get_sources(request_id, session=session)
2977
+ sources = sources or []
2978
+ for source in sources:
2979
+ if source['url'] == src_url:
2980
+ src_rse_id = source['rse_id']
2981
+ src_rse_name = get_rse_name(src_rse_id, session=session)
2982
+ logger(logging.DEBUG, "Find rse name %s for %s" % (src_rse_name, src_url))
2983
+ return src_rse_name, src_rse_id
2984
+ # cannot find matched source url
2985
+ logger(logging.WARNING, 'Cannot get correct RSE for source url: %s' % (src_url))
2986
+ return None, None
2987
+ except Exception:
2988
+ logger(logging.ERROR, 'Cannot get correct RSE for source url: %s' % (src_url), exc_info=True)
2989
+ return None, None
2990
+
2991
+
2992
+ @stream_session
2993
+ def list_requests(
2994
+ src_rse_ids: Sequence[str],
2995
+ dst_rse_ids: Sequence[str],
2996
+ states: Optional[Sequence[RequestState]] = None,
2997
+ *,
2998
+ session: "Session"
2999
+ ) -> Iterator[models.Request]:
3000
+ """
3001
+ List all requests in a specific state from a source RSE to a destination RSE.
3002
+
3003
+ :param src_rse_ids: source RSE ids.
3004
+ :param dst_rse_ids: destination RSE ids.
3005
+ :param states: list of request states.
3006
+ :param session: The database session in use.
3007
+ """
3008
+ if not states:
3009
+ states = [RequestState.WAITING]
3010
+
3011
+ stmt = select(
3012
+ models.Request
3013
+ ).where(
3014
+ and_(models.Request.state.in_(states),
3015
+ models.Request.source_rse_id.in_(src_rse_ids),
3016
+ models.Request.dest_rse_id.in_(dst_rse_ids))
3017
+ )
3018
+ for request in session.execute(stmt).yield_per(500).scalars():
3019
+ yield request
3020
+
3021
+
3022
+ @stream_session
3023
+ def list_requests_history(
3024
+ src_rse_ids: Sequence[str],
3025
+ dst_rse_ids: Sequence[str],
3026
+ states: Optional[Sequence[RequestState]] = None,
3027
+ offset: Optional[int] = None,
3028
+ limit: Optional[int] = None,
3029
+ *,
3030
+ session: "Session"
3031
+ ) -> Iterator[models.RequestHistory]:
3032
+ """
3033
+ List all historical requests in a specific state from a source RSE to a destination RSE.
3034
+
3035
+ :param src_rse_ids: source RSE ids.
3036
+ :param dst_rse_ids: destination RSE ids.
3037
+ :param states: list of request states.
3038
+ :param offset: offset (for paging).
3039
+ :param limit: limit number of results.
3040
+ :param session: The database session in use.
3041
+ """
3042
+ if not states:
3043
+ states = [RequestState.WAITING]
3044
+
3045
+ stmt = select(
3046
+ models.RequestHistory
3047
+ ).where(
3048
+ and_(models.RequestHistory.state.in_(states),
3049
+ models.RequestHistory.source_rse_id.in_(src_rse_ids),
3050
+ models.RequestHistory.dest_rse_id.in_(dst_rse_ids))
3051
+ )
3052
+ if offset:
3053
+ stmt = stmt.offset(offset)
3054
+ if limit:
3055
+ stmt = stmt.limit(limit)
3056
+ for request in session.execute(stmt).yield_per(500).scalars():
3057
+ yield request
3058
+
3059
+
3060
+ @transactional_session
3061
+ def reset_stale_waiting_requests(time_limit: Optional[datetime.timedelta] = datetime.timedelta(days=1), *, session: "Session") -> None:
3062
+ """
3063
+ Clear source_rse_id for requests that have been in the waiting state for > time_limit amount of time and
3064
+ transition back to preparing state (default time limit = 1 day).
3065
+ This allows for stale requests that have been in the waiting state for a long time to be able to
3066
+ react to source changes that have occurred in the meantime.
3067
+ :param time_limit: The amount of time a request must be in the waiting state to be reset.
3068
+ :param session: The database session in use.
3069
+ """
3070
+ try:
3071
+ # Cutoff timestamp based on time limit
3072
+ time_limit_timestamp = datetime.datetime.utcnow() - time_limit
3073
+
3074
+ # Select all waiting requests that precede the time limit, then clear source_rse_id and reset state to preparing
3075
+ stmt = update(
3076
+ models.Request
3077
+ ).where(
3078
+ and_(models.Request.state == RequestState.WAITING,
3079
+ models.Request.last_processed_at < time_limit_timestamp)
3080
+ ).execution_options(
3081
+ synchronize_session=False
3082
+ ).values({
3083
+ models.Request.source_rse_id: None,
3084
+ models.Request.state: RequestState.PREPARING
3085
+ })
3086
+ session.execute(stmt)
3087
+
3088
+ except IntegrityError as error:
3089
+ raise RucioException(error.args)