rucio 37.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (487) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/cli/__init__.py +14 -0
  4. rucio/cli/account.py +216 -0
  5. rucio/cli/bin_legacy/__init__.py +13 -0
  6. rucio/cli/bin_legacy/rucio.py +2825 -0
  7. rucio/cli/bin_legacy/rucio_admin.py +2500 -0
  8. rucio/cli/command.py +272 -0
  9. rucio/cli/config.py +72 -0
  10. rucio/cli/did.py +191 -0
  11. rucio/cli/download.py +128 -0
  12. rucio/cli/lifetime_exception.py +33 -0
  13. rucio/cli/replica.py +162 -0
  14. rucio/cli/rse.py +293 -0
  15. rucio/cli/rule.py +158 -0
  16. rucio/cli/scope.py +40 -0
  17. rucio/cli/subscription.py +73 -0
  18. rucio/cli/upload.py +60 -0
  19. rucio/cli/utils.py +226 -0
  20. rucio/client/__init__.py +15 -0
  21. rucio/client/accountclient.py +432 -0
  22. rucio/client/accountlimitclient.py +183 -0
  23. rucio/client/baseclient.py +983 -0
  24. rucio/client/client.py +120 -0
  25. rucio/client/configclient.py +126 -0
  26. rucio/client/credentialclient.py +59 -0
  27. rucio/client/didclient.py +868 -0
  28. rucio/client/diracclient.py +56 -0
  29. rucio/client/downloadclient.py +1783 -0
  30. rucio/client/exportclient.py +44 -0
  31. rucio/client/fileclient.py +50 -0
  32. rucio/client/importclient.py +42 -0
  33. rucio/client/lifetimeclient.py +90 -0
  34. rucio/client/lockclient.py +109 -0
  35. rucio/client/metaconventionsclient.py +140 -0
  36. rucio/client/pingclient.py +44 -0
  37. rucio/client/replicaclient.py +452 -0
  38. rucio/client/requestclient.py +125 -0
  39. rucio/client/richclient.py +317 -0
  40. rucio/client/rseclient.py +746 -0
  41. rucio/client/ruleclient.py +294 -0
  42. rucio/client/scopeclient.py +90 -0
  43. rucio/client/subscriptionclient.py +173 -0
  44. rucio/client/touchclient.py +82 -0
  45. rucio/client/uploadclient.py +969 -0
  46. rucio/common/__init__.py +13 -0
  47. rucio/common/bittorrent.py +234 -0
  48. rucio/common/cache.py +111 -0
  49. rucio/common/checksum.py +168 -0
  50. rucio/common/client.py +122 -0
  51. rucio/common/config.py +788 -0
  52. rucio/common/constants.py +217 -0
  53. rucio/common/constraints.py +17 -0
  54. rucio/common/didtype.py +237 -0
  55. rucio/common/dumper/__init__.py +342 -0
  56. rucio/common/dumper/consistency.py +497 -0
  57. rucio/common/dumper/data_models.py +362 -0
  58. rucio/common/dumper/path_parsing.py +75 -0
  59. rucio/common/exception.py +1208 -0
  60. rucio/common/extra.py +31 -0
  61. rucio/common/logging.py +420 -0
  62. rucio/common/pcache.py +1409 -0
  63. rucio/common/plugins.py +185 -0
  64. rucio/common/policy.py +93 -0
  65. rucio/common/schema/__init__.py +200 -0
  66. rucio/common/schema/generic.py +416 -0
  67. rucio/common/schema/generic_multi_vo.py +395 -0
  68. rucio/common/stomp_utils.py +423 -0
  69. rucio/common/stopwatch.py +55 -0
  70. rucio/common/test_rucio_server.py +154 -0
  71. rucio/common/types.py +483 -0
  72. rucio/common/utils.py +1688 -0
  73. rucio/core/__init__.py +13 -0
  74. rucio/core/account.py +496 -0
  75. rucio/core/account_counter.py +236 -0
  76. rucio/core/account_limit.py +425 -0
  77. rucio/core/authentication.py +620 -0
  78. rucio/core/config.py +437 -0
  79. rucio/core/credential.py +224 -0
  80. rucio/core/did.py +3004 -0
  81. rucio/core/did_meta_plugins/__init__.py +252 -0
  82. rucio/core/did_meta_plugins/did_column_meta.py +331 -0
  83. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
  84. rucio/core/did_meta_plugins/elasticsearch_meta.py +407 -0
  85. rucio/core/did_meta_plugins/filter_engine.py +672 -0
  86. rucio/core/did_meta_plugins/json_meta.py +240 -0
  87. rucio/core/did_meta_plugins/mongo_meta.py +229 -0
  88. rucio/core/did_meta_plugins/postgres_meta.py +352 -0
  89. rucio/core/dirac.py +237 -0
  90. rucio/core/distance.py +187 -0
  91. rucio/core/exporter.py +59 -0
  92. rucio/core/heartbeat.py +363 -0
  93. rucio/core/identity.py +301 -0
  94. rucio/core/importer.py +260 -0
  95. rucio/core/lifetime_exception.py +377 -0
  96. rucio/core/lock.py +577 -0
  97. rucio/core/message.py +288 -0
  98. rucio/core/meta_conventions.py +203 -0
  99. rucio/core/monitor.py +448 -0
  100. rucio/core/naming_convention.py +195 -0
  101. rucio/core/nongrid_trace.py +136 -0
  102. rucio/core/oidc.py +1463 -0
  103. rucio/core/permission/__init__.py +161 -0
  104. rucio/core/permission/generic.py +1124 -0
  105. rucio/core/permission/generic_multi_vo.py +1144 -0
  106. rucio/core/quarantined_replica.py +224 -0
  107. rucio/core/replica.py +4483 -0
  108. rucio/core/replica_sorter.py +362 -0
  109. rucio/core/request.py +3091 -0
  110. rucio/core/rse.py +2079 -0
  111. rucio/core/rse_counter.py +185 -0
  112. rucio/core/rse_expression_parser.py +459 -0
  113. rucio/core/rse_selector.py +304 -0
  114. rucio/core/rule.py +4484 -0
  115. rucio/core/rule_grouping.py +1620 -0
  116. rucio/core/scope.py +181 -0
  117. rucio/core/subscription.py +362 -0
  118. rucio/core/topology.py +490 -0
  119. rucio/core/trace.py +375 -0
  120. rucio/core/transfer.py +1531 -0
  121. rucio/core/vo.py +169 -0
  122. rucio/core/volatile_replica.py +151 -0
  123. rucio/daemons/__init__.py +13 -0
  124. rucio/daemons/abacus/__init__.py +13 -0
  125. rucio/daemons/abacus/account.py +116 -0
  126. rucio/daemons/abacus/collection_replica.py +124 -0
  127. rucio/daemons/abacus/rse.py +117 -0
  128. rucio/daemons/atropos/__init__.py +13 -0
  129. rucio/daemons/atropos/atropos.py +242 -0
  130. rucio/daemons/auditor/__init__.py +289 -0
  131. rucio/daemons/auditor/hdfs.py +97 -0
  132. rucio/daemons/auditor/srmdumps.py +355 -0
  133. rucio/daemons/automatix/__init__.py +13 -0
  134. rucio/daemons/automatix/automatix.py +304 -0
  135. rucio/daemons/badreplicas/__init__.py +13 -0
  136. rucio/daemons/badreplicas/minos.py +322 -0
  137. rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
  138. rucio/daemons/badreplicas/necromancer.py +196 -0
  139. rucio/daemons/bb8/__init__.py +13 -0
  140. rucio/daemons/bb8/bb8.py +353 -0
  141. rucio/daemons/bb8/common.py +759 -0
  142. rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
  143. rucio/daemons/bb8/t2_background_rebalance.py +153 -0
  144. rucio/daemons/cache/__init__.py +13 -0
  145. rucio/daemons/cache/consumer.py +133 -0
  146. rucio/daemons/common.py +405 -0
  147. rucio/daemons/conveyor/__init__.py +13 -0
  148. rucio/daemons/conveyor/common.py +562 -0
  149. rucio/daemons/conveyor/finisher.py +529 -0
  150. rucio/daemons/conveyor/poller.py +394 -0
  151. rucio/daemons/conveyor/preparer.py +205 -0
  152. rucio/daemons/conveyor/receiver.py +179 -0
  153. rucio/daemons/conveyor/stager.py +133 -0
  154. rucio/daemons/conveyor/submitter.py +403 -0
  155. rucio/daemons/conveyor/throttler.py +532 -0
  156. rucio/daemons/follower/__init__.py +13 -0
  157. rucio/daemons/follower/follower.py +101 -0
  158. rucio/daemons/hermes/__init__.py +13 -0
  159. rucio/daemons/hermes/hermes.py +534 -0
  160. rucio/daemons/judge/__init__.py +13 -0
  161. rucio/daemons/judge/cleaner.py +159 -0
  162. rucio/daemons/judge/evaluator.py +185 -0
  163. rucio/daemons/judge/injector.py +162 -0
  164. rucio/daemons/judge/repairer.py +154 -0
  165. rucio/daemons/oauthmanager/__init__.py +13 -0
  166. rucio/daemons/oauthmanager/oauthmanager.py +198 -0
  167. rucio/daemons/reaper/__init__.py +13 -0
  168. rucio/daemons/reaper/dark_reaper.py +282 -0
  169. rucio/daemons/reaper/reaper.py +739 -0
  170. rucio/daemons/replicarecoverer/__init__.py +13 -0
  171. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
  172. rucio/daemons/rsedecommissioner/__init__.py +13 -0
  173. rucio/daemons/rsedecommissioner/config.py +81 -0
  174. rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
  175. rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
  176. rucio/daemons/rsedecommissioner/profiles/generic.py +452 -0
  177. rucio/daemons/rsedecommissioner/profiles/types.py +93 -0
  178. rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
  179. rucio/daemons/storage/__init__.py +13 -0
  180. rucio/daemons/storage/consistency/__init__.py +13 -0
  181. rucio/daemons/storage/consistency/actions.py +848 -0
  182. rucio/daemons/tracer/__init__.py +13 -0
  183. rucio/daemons/tracer/kronos.py +511 -0
  184. rucio/daemons/transmogrifier/__init__.py +13 -0
  185. rucio/daemons/transmogrifier/transmogrifier.py +762 -0
  186. rucio/daemons/undertaker/__init__.py +13 -0
  187. rucio/daemons/undertaker/undertaker.py +137 -0
  188. rucio/db/__init__.py +13 -0
  189. rucio/db/sqla/__init__.py +52 -0
  190. rucio/db/sqla/constants.py +206 -0
  191. rucio/db/sqla/migrate_repo/__init__.py +13 -0
  192. rucio/db/sqla/migrate_repo/env.py +110 -0
  193. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
  194. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
  195. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
  196. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
  197. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
  198. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
  199. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
  200. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
  201. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
  202. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
  203. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
  204. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
  205. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
  206. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
  207. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
  208. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
  209. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
  210. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
  211. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
  212. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
  213. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
  214. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
  215. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
  216. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
  217. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
  218. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
  219. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
  220. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
  221. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
  222. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
  223. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
  224. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
  225. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
  226. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
  227. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
  228. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
  229. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
  230. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
  231. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
  232. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
  233. rucio/db/sqla/migrate_repo/versions/30d5206e9cad_increase_oauthrequest_redirect_msg_.py +37 -0
  234. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
  235. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
  236. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
  237. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
  238. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
  239. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
  240. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
  241. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
  242. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
  243. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
  244. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
  245. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
  246. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
  247. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
  248. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
  249. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
  250. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
  251. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
  252. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
  253. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
  254. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
  255. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
  256. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
  257. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
  258. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
  259. rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
  260. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
  261. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
  262. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
  263. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
  264. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
  265. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
  266. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
  267. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
  268. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
  269. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
  270. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
  271. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
  272. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
  273. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
  274. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
  275. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
  276. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
  277. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
  278. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
  279. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
  280. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
  281. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
  282. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
  283. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
  284. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
  285. rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
  286. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
  287. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
  288. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
  289. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
  290. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
  291. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
  292. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
  293. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
  294. rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
  295. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
  296. rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
  297. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
  298. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
  299. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
  300. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
  301. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
  302. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
  303. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
  304. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
  305. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
  306. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
  307. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
  308. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
  309. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
  310. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
  311. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
  312. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
  313. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
  314. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
  315. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
  316. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
  317. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
  318. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
  319. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
  320. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
  321. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
  322. rucio/db/sqla/models.py +1743 -0
  323. rucio/db/sqla/sautils.py +55 -0
  324. rucio/db/sqla/session.py +529 -0
  325. rucio/db/sqla/types.py +206 -0
  326. rucio/db/sqla/util.py +543 -0
  327. rucio/gateway/__init__.py +13 -0
  328. rucio/gateway/account.py +345 -0
  329. rucio/gateway/account_limit.py +363 -0
  330. rucio/gateway/authentication.py +381 -0
  331. rucio/gateway/config.py +227 -0
  332. rucio/gateway/credential.py +70 -0
  333. rucio/gateway/did.py +987 -0
  334. rucio/gateway/dirac.py +83 -0
  335. rucio/gateway/exporter.py +60 -0
  336. rucio/gateway/heartbeat.py +76 -0
  337. rucio/gateway/identity.py +189 -0
  338. rucio/gateway/importer.py +46 -0
  339. rucio/gateway/lifetime_exception.py +121 -0
  340. rucio/gateway/lock.py +153 -0
  341. rucio/gateway/meta_conventions.py +98 -0
  342. rucio/gateway/permission.py +74 -0
  343. rucio/gateway/quarantined_replica.py +79 -0
  344. rucio/gateway/replica.py +538 -0
  345. rucio/gateway/request.py +330 -0
  346. rucio/gateway/rse.py +632 -0
  347. rucio/gateway/rule.py +437 -0
  348. rucio/gateway/scope.py +100 -0
  349. rucio/gateway/subscription.py +280 -0
  350. rucio/gateway/vo.py +126 -0
  351. rucio/rse/__init__.py +96 -0
  352. rucio/rse/protocols/__init__.py +13 -0
  353. rucio/rse/protocols/bittorrent.py +194 -0
  354. rucio/rse/protocols/cache.py +111 -0
  355. rucio/rse/protocols/dummy.py +100 -0
  356. rucio/rse/protocols/gfal.py +708 -0
  357. rucio/rse/protocols/globus.py +243 -0
  358. rucio/rse/protocols/http_cache.py +82 -0
  359. rucio/rse/protocols/mock.py +123 -0
  360. rucio/rse/protocols/ngarc.py +209 -0
  361. rucio/rse/protocols/posix.py +250 -0
  362. rucio/rse/protocols/protocol.py +361 -0
  363. rucio/rse/protocols/rclone.py +365 -0
  364. rucio/rse/protocols/rfio.py +145 -0
  365. rucio/rse/protocols/srm.py +338 -0
  366. rucio/rse/protocols/ssh.py +414 -0
  367. rucio/rse/protocols/storm.py +195 -0
  368. rucio/rse/protocols/webdav.py +594 -0
  369. rucio/rse/protocols/xrootd.py +302 -0
  370. rucio/rse/rsemanager.py +881 -0
  371. rucio/rse/translation.py +260 -0
  372. rucio/tests/__init__.py +13 -0
  373. rucio/tests/common.py +280 -0
  374. rucio/tests/common_server.py +149 -0
  375. rucio/transfertool/__init__.py +13 -0
  376. rucio/transfertool/bittorrent.py +200 -0
  377. rucio/transfertool/bittorrent_driver.py +50 -0
  378. rucio/transfertool/bittorrent_driver_qbittorrent.py +134 -0
  379. rucio/transfertool/fts3.py +1600 -0
  380. rucio/transfertool/fts3_plugins.py +152 -0
  381. rucio/transfertool/globus.py +201 -0
  382. rucio/transfertool/globus_library.py +181 -0
  383. rucio/transfertool/mock.py +89 -0
  384. rucio/transfertool/transfertool.py +221 -0
  385. rucio/vcsversion.py +11 -0
  386. rucio/version.py +45 -0
  387. rucio/web/__init__.py +13 -0
  388. rucio/web/rest/__init__.py +13 -0
  389. rucio/web/rest/flaskapi/__init__.py +13 -0
  390. rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
  391. rucio/web/rest/flaskapi/v1/__init__.py +13 -0
  392. rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
  393. rucio/web/rest/flaskapi/v1/accounts.py +1103 -0
  394. rucio/web/rest/flaskapi/v1/archives.py +102 -0
  395. rucio/web/rest/flaskapi/v1/auth.py +1644 -0
  396. rucio/web/rest/flaskapi/v1/common.py +426 -0
  397. rucio/web/rest/flaskapi/v1/config.py +304 -0
  398. rucio/web/rest/flaskapi/v1/credentials.py +213 -0
  399. rucio/web/rest/flaskapi/v1/dids.py +2340 -0
  400. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  401. rucio/web/rest/flaskapi/v1/export.py +75 -0
  402. rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
  403. rucio/web/rest/flaskapi/v1/identities.py +285 -0
  404. rucio/web/rest/flaskapi/v1/import.py +132 -0
  405. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
  406. rucio/web/rest/flaskapi/v1/locks.py +358 -0
  407. rucio/web/rest/flaskapi/v1/main.py +91 -0
  408. rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
  409. rucio/web/rest/flaskapi/v1/metrics.py +36 -0
  410. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  411. rucio/web/rest/flaskapi/v1/ping.py +88 -0
  412. rucio/web/rest/flaskapi/v1/redirect.py +366 -0
  413. rucio/web/rest/flaskapi/v1/replicas.py +1894 -0
  414. rucio/web/rest/flaskapi/v1/requests.py +998 -0
  415. rucio/web/rest/flaskapi/v1/rses.py +2250 -0
  416. rucio/web/rest/flaskapi/v1/rules.py +854 -0
  417. rucio/web/rest/flaskapi/v1/scopes.py +159 -0
  418. rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
  419. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  420. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  421. rucio/web/rest/flaskapi/v1/traces.py +137 -0
  422. rucio/web/rest/flaskapi/v1/types.py +20 -0
  423. rucio/web/rest/flaskapi/v1/vos.py +278 -0
  424. rucio/web/rest/main.py +18 -0
  425. rucio/web/rest/metrics.py +27 -0
  426. rucio/web/rest/ping.py +27 -0
  427. rucio-37.0.0rc1.data/data/rucio/etc/alembic.ini.template +71 -0
  428. rucio-37.0.0rc1.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  429. rucio-37.0.0rc1.data/data/rucio/etc/globus-config.yml.template +5 -0
  430. rucio-37.0.0rc1.data/data/rucio/etc/ldap.cfg.template +30 -0
  431. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  432. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  433. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  434. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  435. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  436. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  437. rucio-37.0.0rc1.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  438. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.atlas.client.template +43 -0
  439. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.template +241 -0
  440. rucio-37.0.0rc1.data/data/rucio/etc/rucio_multi_vo.cfg.template +217 -0
  441. rucio-37.0.0rc1.data/data/rucio/requirements.server.txt +297 -0
  442. rucio-37.0.0rc1.data/data/rucio/tools/bootstrap.py +34 -0
  443. rucio-37.0.0rc1.data/data/rucio/tools/merge_rucio_configs.py +144 -0
  444. rucio-37.0.0rc1.data/data/rucio/tools/reset_database.py +40 -0
  445. rucio-37.0.0rc1.data/scripts/rucio +133 -0
  446. rucio-37.0.0rc1.data/scripts/rucio-abacus-account +74 -0
  447. rucio-37.0.0rc1.data/scripts/rucio-abacus-collection-replica +46 -0
  448. rucio-37.0.0rc1.data/scripts/rucio-abacus-rse +78 -0
  449. rucio-37.0.0rc1.data/scripts/rucio-admin +97 -0
  450. rucio-37.0.0rc1.data/scripts/rucio-atropos +60 -0
  451. rucio-37.0.0rc1.data/scripts/rucio-auditor +206 -0
  452. rucio-37.0.0rc1.data/scripts/rucio-automatix +50 -0
  453. rucio-37.0.0rc1.data/scripts/rucio-bb8 +57 -0
  454. rucio-37.0.0rc1.data/scripts/rucio-cache-client +141 -0
  455. rucio-37.0.0rc1.data/scripts/rucio-cache-consumer +42 -0
  456. rucio-37.0.0rc1.data/scripts/rucio-conveyor-finisher +58 -0
  457. rucio-37.0.0rc1.data/scripts/rucio-conveyor-poller +66 -0
  458. rucio-37.0.0rc1.data/scripts/rucio-conveyor-preparer +37 -0
  459. rucio-37.0.0rc1.data/scripts/rucio-conveyor-receiver +44 -0
  460. rucio-37.0.0rc1.data/scripts/rucio-conveyor-stager +76 -0
  461. rucio-37.0.0rc1.data/scripts/rucio-conveyor-submitter +139 -0
  462. rucio-37.0.0rc1.data/scripts/rucio-conveyor-throttler +104 -0
  463. rucio-37.0.0rc1.data/scripts/rucio-dark-reaper +53 -0
  464. rucio-37.0.0rc1.data/scripts/rucio-dumper +160 -0
  465. rucio-37.0.0rc1.data/scripts/rucio-follower +44 -0
  466. rucio-37.0.0rc1.data/scripts/rucio-hermes +54 -0
  467. rucio-37.0.0rc1.data/scripts/rucio-judge-cleaner +89 -0
  468. rucio-37.0.0rc1.data/scripts/rucio-judge-evaluator +137 -0
  469. rucio-37.0.0rc1.data/scripts/rucio-judge-injector +44 -0
  470. rucio-37.0.0rc1.data/scripts/rucio-judge-repairer +44 -0
  471. rucio-37.0.0rc1.data/scripts/rucio-kronos +44 -0
  472. rucio-37.0.0rc1.data/scripts/rucio-minos +53 -0
  473. rucio-37.0.0rc1.data/scripts/rucio-minos-temporary-expiration +50 -0
  474. rucio-37.0.0rc1.data/scripts/rucio-necromancer +120 -0
  475. rucio-37.0.0rc1.data/scripts/rucio-oauth-manager +63 -0
  476. rucio-37.0.0rc1.data/scripts/rucio-reaper +83 -0
  477. rucio-37.0.0rc1.data/scripts/rucio-replica-recoverer +248 -0
  478. rucio-37.0.0rc1.data/scripts/rucio-rse-decommissioner +66 -0
  479. rucio-37.0.0rc1.data/scripts/rucio-storage-consistency-actions +74 -0
  480. rucio-37.0.0rc1.data/scripts/rucio-transmogrifier +77 -0
  481. rucio-37.0.0rc1.data/scripts/rucio-undertaker +76 -0
  482. rucio-37.0.0rc1.dist-info/METADATA +92 -0
  483. rucio-37.0.0rc1.dist-info/RECORD +487 -0
  484. rucio-37.0.0rc1.dist-info/WHEEL +5 -0
  485. rucio-37.0.0rc1.dist-info/licenses/AUTHORS.rst +100 -0
  486. rucio-37.0.0rc1.dist-info/licenses/LICENSE +201 -0
  487. rucio-37.0.0rc1.dist-info/top_level.txt +1 -0
rucio/core/replica.py ADDED
@@ -0,0 +1,4483 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import copy
16
+ import heapq
17
+ import logging
18
+ import math
19
+ import random
20
+ from collections import defaultdict, namedtuple
21
+ from curses.ascii import isprint
22
+ from datetime import datetime, timedelta
23
+ from hashlib import sha256
24
+ from itertools import groupby
25
+ from json import dumps
26
+ from re import match
27
+ from struct import unpack
28
+ from traceback import format_exc
29
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
30
+
31
+ import requests
32
+ from dogpile.cache.api import NO_VALUE
33
+ from sqlalchemy import and_, delete, exists, func, insert, not_, or_, union, update
34
+ from sqlalchemy.exc import DatabaseError, IntegrityError
35
+ from sqlalchemy.orm import aliased
36
+ from sqlalchemy.orm.exc import FlushError, NoResultFound
37
+ from sqlalchemy.sql.expression import ColumnElement, case, false, literal, literal_column, null, select, text, true
38
+
39
+ import rucio.core.did
40
+ import rucio.core.lock
41
+ from rucio.common import exception
42
+ from rucio.common.cache import MemcacheRegion
43
+ from rucio.common.config import config_get, config_get_bool
44
+ from rucio.common.constants import RseAttr, SuspiciousAvailability
45
+ from rucio.common.types import InternalAccount, InternalScope, LFNDict, is_str_list
46
+ from rucio.common.utils import add_url_query, chunks, clean_pfns, str_to_date
47
+ from rucio.core.credential import get_signed_url
48
+ from rucio.core.message import add_messages
49
+ from rucio.core.monitor import MetricManager
50
+ from rucio.core.rse import get_rse, get_rse_attribute, get_rse_name, get_rse_vo, list_rses
51
+ from rucio.core.rse_counter import decrease, increase
52
+ from rucio.core.rse_expression_parser import parse_expression
53
+ from rucio.db.sqla import filter_thread_work, models
54
+ from rucio.db.sqla.constants import OBSOLETE, BadFilesStatus, BadPFNStatus, DIDAvailability, DIDType, ReplicaState, RuleState
55
+ from rucio.db.sqla.session import BASE, DEFAULT_SCHEMA_NAME, read_session, stream_session, transactional_session
56
+ from rucio.db.sqla.util import temp_table_mngr
57
+ from rucio.rse import rsemanager as rsemgr
58
+
59
+ if TYPE_CHECKING:
60
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
61
+
62
+ from sqlalchemy.engine import Row
63
+ from sqlalchemy.orm import Session
64
+ from sqlalchemy.sql.selectable import Select, Subquery
65
+
66
+ from rucio.common.types import LoggerFunction
67
+ from rucio.rse.protocols.protocol import RSEProtocol
68
+
69
+ REGION = MemcacheRegion(expiration_time=60)
70
+ METRICS = MetricManager(module=__name__)
71
+
72
+
73
+ ScopeName = namedtuple('ScopeName', ['scope', 'name'])
74
+ Association = namedtuple('Association', ['scope', 'name', 'child_scope', 'child_name'])
75
+
76
+
77
+ @read_session
78
+ def get_bad_replicas_summary(
79
+ rse_expression: Optional[str] = None,
80
+ from_date: Optional[datetime] = None,
81
+ to_date: Optional[datetime] = None,
82
+ filter_: Optional[dict[str, Any]] = None,
83
+ *,
84
+ session: "Session"
85
+ ) -> list[dict[str, Any]]:
86
+ """
87
+ List the bad file replicas summary. Method used by the rucio-ui.
88
+ :param rse_expression: The RSE expression.
89
+ :param from_date: The start date.
90
+ :param to_date: The end date.
91
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
92
+ :param session: The database session in use.
93
+ """
94
+ result = []
95
+ incidents = {}
96
+ rse_clause = []
97
+ if rse_expression:
98
+ for rse in parse_expression(expression=rse_expression, filter_=filter_, session=session):
99
+ rse_clause.append(models.BadReplica.rse_id == rse['id'])
100
+ elif filter_:
101
+ # Ensure we limit results to current VO even if we don't specify an RSE expression
102
+ for rse in list_rses(filters=filter_, session=session):
103
+ rse_clause.append(models.BadReplica.rse_id == rse['id'])
104
+
105
+ if session.bind.dialect.name == 'oracle': # type: ignore
106
+ to_days = func.trunc(models.BadReplica.created_at, 'DD')
107
+ elif session.bind.dialect.name == 'mysql': # type: ignore
108
+ to_days = func.date(models.BadReplica.created_at)
109
+ elif session.bind.dialect.name == 'postgresql': # type: ignore
110
+ to_days = func.date_trunc('day', models.BadReplica.created_at)
111
+ else:
112
+ to_days = func.strftime(models.BadReplica.created_at, '%Y-%m-%d')
113
+
114
+ stmt = select(
115
+ func.count(),
116
+ to_days,
117
+ models.BadReplica.rse_id,
118
+ models.BadReplica.state,
119
+ models.BadReplica.reason
120
+ ).select_from(
121
+ models.BadReplica
122
+ )
123
+ # To be added : HINTS
124
+ if rse_clause != []:
125
+ stmt = stmt.where(or_(*rse_clause))
126
+ if from_date:
127
+ stmt = stmt.where(models.BadReplica.created_at > from_date)
128
+ if to_date:
129
+ stmt = stmt.where(models.BadReplica.created_at < to_date)
130
+ stmt = stmt.group_by(to_days, models.BadReplica.rse_id, models.BadReplica.reason, models.BadReplica.state)
131
+ for count, to_days, rse_id, state, reason in session.execute(stmt):
132
+ if (rse_id, to_days, reason) not in incidents:
133
+ incidents[(rse_id, to_days, reason)] = {}
134
+ incidents[(rse_id, to_days, reason)][str(state.name)] = count
135
+
136
+ for incident in incidents:
137
+ res = incidents[incident]
138
+ res['rse_id'] = incident[0]
139
+ res['rse'] = get_rse_name(rse_id=incident[0], session=session)
140
+ res['created_at'] = incident[1]
141
+ res['reason'] = incident[2]
142
+ result.append(res)
143
+
144
+ return result
145
+
146
+
147
+ @read_session
148
+ def __exist_replicas(
149
+ rse_id: str,
150
+ replicas: list[tuple[Optional[str], Optional[str], Optional[str]]],
151
+ *,
152
+ session: "Session"
153
+ ) -> list[
154
+ tuple
155
+ [
156
+ str,
157
+ str,
158
+ str,
159
+ bool,
160
+ bool,
161
+ Optional[int]
162
+ ]
163
+ ]:
164
+ """
165
+ Internal method to check if a replica exists at a given site.
166
+ :param rse_id: The RSE id.
167
+ :param replicas: A list of tuples [(<scope>, <name>, <path>}) with either :
168
+ - scope and name are None and path not None
169
+ - scope and name are not None and path is None
170
+ :param session: The database session in use.
171
+
172
+ :returns: A list of tuple (<scope>, <name>, <path>, <exists>, <already_declared>, <bytes>)
173
+ where
174
+ - <exists> is a boolean that identifies if the replica exists
175
+ - <already_declared> is a boolean that identifies if the replica is already declared bad
176
+ """
177
+
178
+ return_list = []
179
+ path_clause: list[ColumnElement[bool]] = []
180
+ did_clause: list[ColumnElement[bool]] = []
181
+ for scope, name, path in replicas:
182
+ if path:
183
+ path_clause.append(models.RSEFileAssociation.path == path)
184
+ if path.startswith('/'):
185
+ path_clause.append(models.RSEFileAssociation.path == path[1:])
186
+ else:
187
+ path_clause.append(models.RSEFileAssociation.path == '/%s' % path)
188
+ else:
189
+ did_clause.append(and_(models.RSEFileAssociation.scope == scope,
190
+ models.RSEFileAssociation.name == name))
191
+
192
+ for clause in [path_clause, did_clause]:
193
+ if clause:
194
+ for chunk in chunks(clause, 10):
195
+ stmt = select(
196
+ models.RSEFileAssociation.path,
197
+ models.RSEFileAssociation.scope,
198
+ models.RSEFileAssociation.name,
199
+ models.RSEFileAssociation.rse_id,
200
+ models.RSEFileAssociation.bytes,
201
+ func.max(
202
+ case(
203
+ (models.BadReplica.state == BadFilesStatus.SUSPICIOUS, 0),
204
+ (models.BadReplica.state == BadFilesStatus.BAD, 1),
205
+ else_=0))
206
+ ).with_hint(
207
+ models.RSEFileAssociation,
208
+ 'INDEX(REPLICAS REPLICAS_PATH_IDX',
209
+ 'oracle'
210
+ ).outerjoin(
211
+ models.BadReplica,
212
+ and_(models.RSEFileAssociation.scope == models.BadReplica.scope,
213
+ models.RSEFileAssociation.name == models.BadReplica.name,
214
+ models.RSEFileAssociation.rse_id == models.BadReplica.rse_id)
215
+ ).where(
216
+ and_(models.RSEFileAssociation.rse_id == rse_id,
217
+ or_(*chunk))
218
+ ).group_by(
219
+ models.RSEFileAssociation.path,
220
+ models.RSEFileAssociation.scope,
221
+ models.RSEFileAssociation.name,
222
+ models.RSEFileAssociation.rse_id,
223
+ models.RSEFileAssociation.bytes
224
+ )
225
+
226
+ for path, scope, name, rse_id, size, state in session.execute(stmt).all():
227
+ if (scope, name, path) in replicas:
228
+ replicas.remove((scope, name, path))
229
+ if (None, None, path) in replicas:
230
+ replicas.remove((None, None, path))
231
+ if (scope, name, None) in replicas:
232
+ replicas.remove((scope, name, None))
233
+ already_declared = False
234
+ if state == 1:
235
+ already_declared = True
236
+ return_list.append((scope, name, path, True, already_declared, size))
237
+
238
+ for scope, name, path in replicas:
239
+ return_list.append((scope, name, path, False, False, None))
240
+
241
+ return return_list
242
+
243
+
244
+ @read_session
245
+ def list_bad_replicas_status(
246
+ state: BadFilesStatus = BadFilesStatus.BAD,
247
+ rse_id: Optional[str] = None,
248
+ younger_than: Optional[datetime] = None,
249
+ older_than: Optional[datetime] = None,
250
+ limit: Optional[int] = None,
251
+ list_pfns: Optional[bool] = False,
252
+ vo: str = 'def',
253
+ *,
254
+ session: "Session"
255
+ ) -> list[dict[str, Any]]:
256
+ """
257
+ List the bad file replicas history states. Method used by the rucio-ui.
258
+ :param state: The state of the file (SUSPICIOUS or BAD).
259
+ :param rse_id: The RSE id.
260
+ :param younger_than: datetime object to select bad replicas younger than this date.
261
+ :param older_than: datetime object to select bad replicas older than this date.
262
+ :param limit: The maximum number of replicas returned.
263
+ :param vo: The VO to find replicas from.
264
+ :param session: The database session in use.
265
+ """
266
+ result = []
267
+ stmt = select(
268
+ models.BadReplica.scope,
269
+ models.BadReplica.name,
270
+ models.BadReplica.rse_id,
271
+ models.BadReplica.state,
272
+ models.BadReplica.created_at,
273
+ models.BadReplica.updated_at
274
+ )
275
+ if state:
276
+ stmt = stmt.where(models.BadReplica.state == state)
277
+ if rse_id:
278
+ stmt = stmt.where(models.BadReplica.rse_id == rse_id)
279
+ if younger_than:
280
+ stmt = stmt.where(models.BadReplica.created_at >= younger_than)
281
+ if older_than:
282
+ stmt = stmt.where(models.BadReplica.created_at <= older_than)
283
+ if limit:
284
+ stmt = stmt.limit(limit)
285
+
286
+ for badfile in session.execute(stmt).yield_per(1000):
287
+ if badfile.scope.vo == vo:
288
+ if list_pfns:
289
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'type': DIDType.FILE})
290
+ else:
291
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'rse': get_rse_name(rse_id=badfile.rse_id, session=session), 'rse_id': badfile.rse_id, 'state': badfile.state, 'created_at': badfile.created_at, 'updated_at': badfile.updated_at})
292
+ if list_pfns:
293
+ reps = []
294
+ for rep in list_replicas(result, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=True, session=session):
295
+ pfn = None
296
+ if rse_id in rep['rses'] and rep['rses'][rse_id]:
297
+ pfn = rep['rses'][rse_id][0]
298
+ if pfn and pfn not in reps:
299
+ reps.append(pfn)
300
+ else:
301
+ reps.extend([item for row in rep['rses'].values() for item in row])
302
+ list(set(reps))
303
+ result = reps
304
+ return result
305
+
306
+
307
+ @transactional_session
308
+ def __declare_bad_file_replicas(
309
+ pfns: list[Union[str, dict[str, Any]]],
310
+ rse_id: str,
311
+ reason: str,
312
+ issuer: InternalAccount,
313
+ status: BadFilesStatus = BadFilesStatus.BAD,
314
+ scheme: str = 'srm',
315
+ force: bool = False,
316
+ logger: "LoggerFunction" = logging.log,
317
+ *,
318
+ session: "Session"
319
+ ) -> list[str]:
320
+ """
321
+ Declare a list of bad replicas.
322
+
323
+ :param pfns: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
324
+ :param rse_id: The RSE id.
325
+ :param reason: The reason of the loss.
326
+ :param issuer: The issuer account.
327
+ :param status: Either BAD or SUSPICIOUS.
328
+ :param scheme: The scheme of the PFNs.
329
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
330
+ :param session: The database session in use.
331
+ """
332
+ unknown_replicas: list[str] = []
333
+ replicas: list[dict[str, Any]] = []
334
+ path_pfn_dict: dict[str, str] = {}
335
+
336
+ if len(pfns) > 0 and is_str_list(pfns):
337
+ # If pfns is a list of PFNs, the scope and names need to be extracted from the path
338
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
339
+ proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
340
+ if rse_info['deterministic']:
341
+ scope_proto = rsemgr.get_scope_protocol(vo=issuer.vo)
342
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
343
+ for pfn in parsed_pfn:
344
+ # Translate into a scope and name
345
+ name, scope = scope_proto(parsed_pfn[pfn])
346
+
347
+ scope = InternalScope(scope, vo=issuer.vo)
348
+ replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': status})
349
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
350
+ path_pfn_dict[path] = pfn
351
+ logger(logging.DEBUG, f"Declaring replica {scope}:{name} {status} at {rse_id} with path {path}")
352
+
353
+ else:
354
+ # For non-deterministic RSEs use the path + rse_id to extract the scope
355
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
356
+ for pfn in parsed_pfn:
357
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
358
+ replicas.append({'scope': None, 'name': None, 'rse_id': rse_id, 'path': path, 'state': status})
359
+ path_pfn_dict[path] = pfn
360
+
361
+ logger(logging.DEBUG, f"Declaring replica with pfn: {pfn} {status} at {rse_id} with path {path}")
362
+
363
+ else:
364
+ # If pfns is a list of replicas, just use scope, name and rse_id
365
+ for pfn in pfns:
366
+ replicas.append({'scope': pfn['scope'], 'name': pfn['name'], 'rse_id': rse_id, 'state': status}) # type: ignore
367
+ logger(logging.DEBUG, f"Declaring replica {pfn['scope']}:{pfn['name']} {status} at {rse_id} without path") # type: ignore
368
+
369
+ replicas_list = []
370
+ for replica in replicas:
371
+ scope, name, rse_id, path = replica['scope'], replica['name'], replica['rse_id'], replica.get('path', None)
372
+ replicas_list.append((scope, name, path))
373
+
374
+ bad_replicas_to_update = []
375
+
376
+ for scope, name, path, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
377
+ declared = False
378
+
379
+ if __exists:
380
+
381
+ if status == BadFilesStatus.BAD and (force or not already_declared):
382
+ bad_replicas_to_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
383
+ declared = True
384
+
385
+ if status == BadFilesStatus.SUSPICIOUS or status == BadFilesStatus.BAD and not already_declared:
386
+ new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=status, account=issuer, bytes=size)
387
+ new_bad_replica.save(session=session, flush=False)
388
+ declared = True
389
+
390
+ if not declared:
391
+ if already_declared:
392
+ unknown_replicas.append('%s %s' % (path_pfn_dict.get(path, '%s:%s' % (scope, name)), 'Already declared'))
393
+ elif path:
394
+ no_hidden_char = True
395
+ for char in str(path):
396
+ if not isprint(char):
397
+ unknown_replicas.append('%s %s' % (path, 'PFN contains hidden chars'))
398
+ no_hidden_char = False
399
+ break
400
+ if no_hidden_char:
401
+ pfn = path_pfn_dict[path]
402
+ if f"{pfn} Unknown replica" not in unknown_replicas:
403
+ unknown_replicas.append('%s %s' % (pfn, 'Unknown replica'))
404
+ elif scope or name:
405
+ unknown_replicas.append(f"{(scope,name)} Unknown replica")
406
+
407
+ if status == BadFilesStatus.BAD:
408
+ # For BAD file, we modify the replica state, not for suspicious
409
+ try:
410
+ # there shouldn't be any exceptions since all replicas exist
411
+ update_replicas_states(bad_replicas_to_update, session=session)
412
+ except exception.UnsupportedOperation:
413
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
414
+
415
+ try:
416
+ session.flush()
417
+ except IntegrityError as error:
418
+ raise exception.RucioException(error.args)
419
+ except DatabaseError as error:
420
+ raise exception.RucioException(error.args)
421
+ except FlushError as error:
422
+ raise exception.RucioException(error.args)
423
+
424
+ return unknown_replicas
425
+
426
+
427
+ @transactional_session
428
+ def add_bad_dids(
429
+ dids: "Iterable[dict[str, Any]]",
430
+ rse_id: str,
431
+ reason: str,
432
+ issuer: InternalAccount,
433
+ state: BadFilesStatus = BadFilesStatus.BAD,
434
+ *,
435
+ session: "Session"
436
+ ) -> list[str]:
437
+ """
438
+ Declare a list of bad replicas.
439
+
440
+ :param dids: The list of DIDs.
441
+ :param rse_id: The RSE id.
442
+ :param reason: The reason of the loss.
443
+ :param issuer: The issuer account.
444
+ :param state: BadFilesStatus.BAD
445
+ :param session: The database session in use.
446
+ """
447
+ unknown_replicas = []
448
+ replicas_for_update = []
449
+ replicas_list = []
450
+
451
+ for did in dids:
452
+ scope = InternalScope(did['scope'], vo=issuer.vo)
453
+ name = did['name']
454
+ replicas_list.append((scope, name, None))
455
+
456
+ for scope, name, _, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
457
+ if __exists and not already_declared:
458
+ replicas_for_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
459
+ new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=state,
460
+ account=issuer, bytes=size)
461
+ new_bad_replica.save(session=session, flush=False)
462
+ stmt = delete(
463
+ models.Source
464
+ ).where(
465
+ and_(models.Source.scope == scope,
466
+ models.Source.name == name,
467
+ models.Source.rse_id == rse_id)
468
+ ).execution_options(
469
+ synchronize_session=False
470
+ )
471
+ session.execute(stmt)
472
+ else:
473
+ if already_declared:
474
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Already declared'))
475
+ else:
476
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Unknown replica'))
477
+
478
+ if state == BadFilesStatus.BAD:
479
+ try:
480
+ update_replicas_states(replicas_for_update, session=session)
481
+ except exception.UnsupportedOperation:
482
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
483
+
484
+ try:
485
+ session.flush()
486
+ except (IntegrityError, DatabaseError, FlushError) as error:
487
+ raise exception.RucioException(error.args)
488
+
489
+ return unknown_replicas
490
+
491
+
492
+ @transactional_session
493
+ def declare_bad_file_replicas(
494
+ replicas: list[Union[str, dict[str, Any]]],
495
+ reason: str,
496
+ issuer: InternalAccount,
497
+ status: BadFilesStatus = BadFilesStatus.BAD,
498
+ force: bool = False,
499
+ *,
500
+ session: "Session"
501
+ ) -> dict[str, list[str]]:
502
+ """
503
+ Declare a list of bad replicas.
504
+
505
+ :param replicas: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
506
+ :param reason: The reason of the loss.
507
+ :param issuer: The issuer account.
508
+ :param status: The status of the file (SUSPICIOUS or BAD).
509
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
510
+ :param session: The database session in use.
511
+ :returns: Dictionary {rse_id -> [replicas failed to declare with errors]}
512
+ """
513
+ unknown_replicas: dict[str, list[str]] = {}
514
+ if replicas:
515
+ type_ = type(replicas[0])
516
+ files_to_declare = {}
517
+ scheme = None
518
+ for replica in replicas:
519
+ if not isinstance(replica, type_):
520
+ raise exception.InvalidType('Replicas must be specified either as a list of string or a list of dicts')
521
+ if type_ is str:
522
+ scheme, files_to_declare, unknown_replicas = get_pfn_to_rse(replicas, vo=issuer.vo, session=session)
523
+ else:
524
+ for replica in replicas:
525
+ rse_id = replica['rse_id'] # type: ignore
526
+ files_to_declare.setdefault(rse_id, []).append(replica)
527
+ for rse_id in files_to_declare:
528
+ notdeclared = __declare_bad_file_replicas(files_to_declare[rse_id], rse_id, reason, issuer,
529
+ status=status, scheme=scheme,
530
+ force=force, session=session)
531
+ if notdeclared:
532
+ unknown_replicas[rse_id] = notdeclared
533
+ return unknown_replicas
534
+
535
+
536
+ @read_session
537
+ def get_pfn_to_rse(
538
+ pfns: "Iterable[str]",
539
+ vo: str = 'def',
540
+ *,
541
+ session: "Session"
542
+ ) -> tuple[Optional[str], dict[str, Any], dict[str, list[str]]]:
543
+ """
544
+ Get the RSE associated to a list of PFNs.
545
+
546
+ :param pfns: The list of pfn.
547
+ :param vo: The VO to find RSEs at.
548
+ :param session: The database session in use.
549
+
550
+ :returns: a tuple : scheme, {rse1 : [pfn1, pfn2, ...], rse2: [pfn3, pfn4, ...]}, {'unknown': [pfn5, pfn6, ...]}.
551
+ """
552
+ unknown_replicas = {}
553
+ storage_elements = []
554
+ se_condition = []
555
+ dict_rse = {}
556
+ cleaned_pfns = clean_pfns(pfns)
557
+ scheme = cleaned_pfns[0].split(':')[0] if cleaned_pfns else None
558
+ for pfn in cleaned_pfns:
559
+ if pfn.split(':')[0] != scheme:
560
+ raise exception.InvalidType('The PFNs specified must have the same protocol')
561
+
562
+ split_se = pfn.split('/')[2].split(':')
563
+ storage_element = split_se[0]
564
+
565
+ if storage_element not in storage_elements:
566
+ storage_elements.append(storage_element)
567
+ se_condition.append(models.RSEProtocol.hostname == storage_element)
568
+ stmt = select(
569
+ models.RSEProtocol.rse_id,
570
+ models.RSEProtocol.scheme,
571
+ models.RSEProtocol.hostname,
572
+ models.RSEProtocol.port,
573
+ models.RSEProtocol.prefix
574
+ ).join(
575
+ models.RSE,
576
+ models.RSEProtocol.rse_id == models.RSE.id
577
+ ).where(
578
+ and_(or_(*se_condition),
579
+ models.RSEProtocol.scheme == scheme,
580
+ models.RSE.deleted == false(),
581
+ models.RSE.staging_area == false())
582
+ )
583
+
584
+ protocols = {}
585
+
586
+ for rse_id, protocol, hostname, port, prefix in session.execute(stmt).yield_per(10000):
587
+ if rse_id not in protocols:
588
+ protocols[rse_id] = []
589
+ protocols[rse_id].append('%s://%s:%s%s' % (protocol, hostname, port, prefix))
590
+ if '%s://%s%s' % (protocol, hostname, prefix) not in protocols[rse_id]:
591
+ protocols[rse_id].append('%s://%s%s' % (protocol, hostname, prefix))
592
+ hint = None
593
+ for pfn in cleaned_pfns:
594
+ if hint:
595
+ for pattern in protocols[hint]:
596
+ if pfn.find(pattern) > -1:
597
+ dict_rse[hint].append(pfn)
598
+ else:
599
+ mult_rse_match = 0
600
+ for rse_id in protocols:
601
+ for pattern in protocols[rse_id]:
602
+ if pfn.find(pattern) > -1 and get_rse_vo(rse_id=rse_id, session=session) == vo:
603
+ mult_rse_match += 1
604
+ if mult_rse_match > 1:
605
+ print('ERROR, multiple matches : %s at %s' % (pfn, rse_id))
606
+ raise exception.RucioException('ERROR, multiple matches : %s at %s' % (pfn, get_rse_name(rse_id=rse_id, session=session)))
607
+ hint = rse_id
608
+ if hint not in dict_rse:
609
+ dict_rse[hint] = []
610
+ dict_rse[hint].append(pfn)
611
+ if mult_rse_match == 0:
612
+ if 'unknown' not in unknown_replicas:
613
+ unknown_replicas['unknown'] = []
614
+ unknown_replicas['unknown'].append(pfn)
615
+ return scheme, dict_rse, unknown_replicas
616
+
617
+
618
+ @read_session
619
+ def get_bad_replicas_backlog(
620
+ *,
621
+ session: "Session"
622
+ ) -> dict[str, int]:
623
+ """
624
+ Get the replica backlog by RSE.
625
+
626
+ :param session: The database session in use.
627
+
628
+ :returns: a list of dictionary {'rse_id': cnt_bad_replicas}.
629
+ """
630
+ stmt = select(
631
+ func.count(),
632
+ models.RSEFileAssociation.rse_id
633
+ ).select_from(
634
+ models.RSEFileAssociation
635
+ ).with_hint(
636
+ models.RSEFileAssociation,
637
+ 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
638
+ 'oracle'
639
+ ).join(
640
+ models.DataIdentifier,
641
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
642
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
643
+ ).where(
644
+ and_(models.DataIdentifier.availability != DIDAvailability.LOST,
645
+ models.RSEFileAssociation.state == ReplicaState.BAD)
646
+ ).group_by(
647
+ models.RSEFileAssociation.rse_id
648
+ )
649
+
650
+ result = dict()
651
+ for cnt, rse_id in session.execute(stmt).all():
652
+ result[rse_id] = cnt
653
+ return result
654
+
655
+
656
+ @read_session
657
+ def list_bad_replicas(
658
+ limit: int = 10000,
659
+ thread: Optional[int] = None,
660
+ total_threads: Optional[int] = None,
661
+ rses: Optional['Iterable[dict[str, Any]]'] = None,
662
+ *,
663
+ session: "Session"
664
+ ) -> list[dict[str, Any]]:
665
+ """
666
+ List RSE File replicas with no locks.
667
+
668
+ :param limit: The maximum number of replicas returned.
669
+ :param thread: The assigned thread for this necromancer.
670
+ :param total_threads: The total number of threads of all necromancers.
671
+ :param session: The database session in use.
672
+
673
+ :returns: a list of dictionary {'scope' scope, 'name': name, 'rse_id': rse_id, 'rse': rse}.
674
+ """
675
+ schema_dot = '%s.' % DEFAULT_SCHEMA_NAME if DEFAULT_SCHEMA_NAME else ''
676
+
677
+ stmt = select(
678
+ models.RSEFileAssociation.scope,
679
+ models.RSEFileAssociation.name,
680
+ models.RSEFileAssociation.rse_id
681
+ ).with_hint(
682
+ models.RSEFileAssociation,
683
+ 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
684
+ 'oracle'
685
+ ).where(
686
+ models.RSEFileAssociation.state == ReplicaState.BAD
687
+ )
688
+
689
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='%sreplicas.name' % (schema_dot))
690
+
691
+ stmt = stmt.join(
692
+ models.DataIdentifier,
693
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
694
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
695
+ ).where(
696
+ models.DataIdentifier.availability != DIDAvailability.LOST
697
+ )
698
+
699
+ if rses:
700
+ rse_clause = [models.RSEFileAssociation.rse_id == rse['id'] for rse in rses]
701
+ stmt = stmt.where(or_(*rse_clause))
702
+
703
+ stmt = stmt.limit(limit)
704
+ rows = []
705
+ for scope, name, rse_id in session.execute(stmt).yield_per(1000):
706
+ rows.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'rse': get_rse_name(rse_id=rse_id, session=session)})
707
+ return rows
708
+
709
+
710
+ @stream_session
711
+ def get_did_from_pfns(
712
+ pfns: "Iterable[str]",
713
+ rse_id: Optional[str] = None,
714
+ vo: str = 'def',
715
+ *,
716
+ session: "Session"
717
+ ) -> 'Iterator[dict[str, dict[str, Any]]]':
718
+ """
719
+ Get the DIDs associated to a PFN on one given RSE
720
+
721
+ :param pfns: The list of PFNs.
722
+ :param rse_id: The RSE id.
723
+ :param vo: The VO to get DIDs from.
724
+ :param session: The database session in use.
725
+ :returns: A dictionary {pfn: {'scope': scope, 'name': name}}
726
+ """
727
+ dict_rse = {}
728
+ if not rse_id:
729
+ scheme, dict_rse, unknown_replicas = get_pfn_to_rse(pfns, vo=vo, session=session)
730
+ if unknown_replicas:
731
+ raise Exception
732
+ else:
733
+ scheme = 'srm'
734
+ dict_rse[rse_id] = pfns
735
+ for rse_id in dict_rse:
736
+ pfns = dict_rse[rse_id]
737
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
738
+ pfndict = {}
739
+ proto: RSEProtocol = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
740
+ if rse_info['deterministic']:
741
+ scope_proto = rsemgr.get_scope_protocol(vo=vo)
742
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
743
+
744
+ for pfn in parsed_pfn:
745
+ # Translate into a scope and name
746
+ name, scope = scope_proto(parsed_pfn[pfn])
747
+ scope = InternalScope(scope, vo)
748
+ yield {pfn: {'scope': scope, 'name': name}}
749
+ else:
750
+ condition = []
751
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
752
+ for pfn in parsed_pfn:
753
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
754
+ pfndict[path] = pfn
755
+ condition.append(and_(models.RSEFileAssociation.path == path,
756
+ models.RSEFileAssociation.rse_id == rse_id))
757
+ stmt = select(
758
+ models.RSEFileAssociation.scope,
759
+ models.RSEFileAssociation.name,
760
+ models.RSEFileAssociation.path
761
+ ).where(
762
+ or_(*condition)
763
+ )
764
+ for scope, name, pfn in session.execute(stmt).all():
765
+ yield {pfndict[pfn]: {'scope': scope, 'name': name}}
766
+
767
+
768
+ def _pick_n_random(
769
+ nrandom: int,
770
+ generator: 'Iterable[Any]'
771
+ ) -> 'Iterator[Any]':
772
+ """
773
+ Select n random elements from the generator
774
+ """
775
+
776
+ if not nrandom:
777
+ # pass-through the data unchanged
778
+ yield from generator
779
+ return
780
+
781
+ # A "reservoir sampling" algorithm:
782
+ # Copy the N first files from the generator. After that, following element may be picked to substitute
783
+ # one of the previously selected element with a probability which decreases as the number of encountered elements grows.
784
+ selected = []
785
+ i = 0
786
+ iterator = iter(generator)
787
+ try:
788
+ for _ in range(nrandom):
789
+ selected.append(next(iterator))
790
+ i += 1
791
+
792
+ while True:
793
+ element = next(iterator)
794
+ i += 1
795
+
796
+ index_to_substitute = random.randint(0, i) # noqa: S311
797
+ if index_to_substitute < nrandom:
798
+ selected[index_to_substitute] = element
799
+ except StopIteration:
800
+ pass
801
+
802
+ for r in selected:
803
+ yield r
804
+
805
+
806
+ def _list_files_wo_replicas(
807
+ files_wo_replica: "Iterable[dict[str, Any]]",
808
+ *,
809
+ session: "Session"
810
+ ) -> 'Iterator[tuple[str, str, int, str, str]]':
811
+ if files_wo_replica:
812
+ file_wo_clause = []
813
+ for file in sorted(files_wo_replica, key=lambda f: (f['scope'], f['name'])):
814
+ file_wo_clause.append(and_(models.DataIdentifier.scope == file['scope'],
815
+ models.DataIdentifier.name == file['name']))
816
+ stmt = select(
817
+ models.DataIdentifier.scope,
818
+ models.DataIdentifier.name,
819
+ models.DataIdentifier.bytes,
820
+ models.DataIdentifier.md5,
821
+ models.DataIdentifier.adler32
822
+ ).with_hint(
823
+ models.DataIdentifier,
824
+ 'INDEX(DIDS DIDS_PK)',
825
+ 'oracle'
826
+ ).where(
827
+ and_(models.DataIdentifier.did_type == DIDType.FILE,
828
+ or_(*file_wo_clause))
829
+ )
830
+ for scope, name, bytes_, md5, adler32 in session.execute(stmt):
831
+ yield scope, name, bytes_, md5, adler32
832
+
833
+
834
+ def get_vp_endpoint() -> str:
835
+ """
836
+ VP endpoint is the Virtual Placement server.
837
+ Once VP is integrated in Rucio it won't be needed.
838
+ """
839
+ vp_endpoint = config_get('virtual_placement', 'vp_endpoint', default='')
840
+ return vp_endpoint
841
+
842
+
843
+ def get_multi_cache_prefix(
844
+ cache_site: str,
845
+ filename: str,
846
+ logger: "LoggerFunction" = logging.log
847
+ ) -> str:
848
+ """
849
+ for a givent cache site and filename, return address of the cache node that
850
+ should be prefixed.
851
+
852
+ :param cache_site: Cache site
853
+ :param filename: Filename
854
+ """
855
+ vp_endpoint = get_vp_endpoint()
856
+ if not vp_endpoint:
857
+ return ''
858
+
859
+ x_caches = REGION.get('CacheSites')
860
+ if x_caches is NO_VALUE:
861
+ try:
862
+ response = requests.get('{}/serverRanges'.format(vp_endpoint), timeout=1, verify=False)
863
+ if response.ok:
864
+ x_caches = response.json()
865
+ REGION.set('CacheSites', x_caches)
866
+ else:
867
+ REGION.set('CacheSites', {'could not reload': ''})
868
+ return ''
869
+ except requests.exceptions.RequestException as re:
870
+ REGION.set('CacheSites', {'could not reload': ''})
871
+ logger(logging.WARNING, 'In get_multi_cache_prefix, could not access {}. Excaption:{}'.format(vp_endpoint, re))
872
+ return ''
873
+
874
+ if cache_site not in x_caches: # type: ignore
875
+ return ''
876
+
877
+ xcache_site = x_caches[cache_site] # type: ignore
878
+ h = float(
879
+ unpack('Q', sha256(filename.encode('utf-8')).digest()[:8])[0]) / 2**64
880
+ for irange in xcache_site['ranges']:
881
+ if h < irange[1]:
882
+ return xcache_site['servers'][irange[0]][0]
883
+ return ''
884
+
885
+
886
+ def _get_list_replicas_protocols(
887
+ rse_id: str,
888
+ domain: str,
889
+ schemes: Optional[list[str]],
890
+ additional_schemes: "Iterable[str]",
891
+ session: "Session"
892
+ ) -> "list[tuple[str, RSEProtocol, int]]":
893
+ """
894
+ Select the protocols to be used by list_replicas to build the PFNs for all replicas on the given RSE
895
+ """
896
+ domains = ['wan', 'lan'] if domain == 'all' else [domain]
897
+
898
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
899
+ # compute scheme priorities, and don't forget to exclude disabled protocols
900
+ # 0 or None in RSE protocol definition = disabled, 1 = highest priority
901
+ scheme_priorities = {
902
+ 'wan': {p['scheme']: p['domains']['wan']['read'] for p in rse_info['protocols'] if p['domains']['wan']['read']},
903
+ 'lan': {p['scheme']: p['domains']['lan']['read'] for p in rse_info['protocols'] if p['domains']['lan']['read']},
904
+ }
905
+
906
+ rse_schemes = copy.copy(schemes) if schemes else []
907
+ if not rse_schemes:
908
+ try:
909
+ for domain in domains:
910
+ rse_schemes.append(rsemgr.select_protocol(rse_settings=rse_info,
911
+ operation='read',
912
+ domain=domain)['scheme'])
913
+ except exception.RSEProtocolNotSupported:
914
+ pass # no need to be verbose
915
+ except Exception:
916
+ print(format_exc())
917
+
918
+ for s in additional_schemes:
919
+ if s not in rse_schemes:
920
+ rse_schemes.append(s)
921
+
922
+ protocols = []
923
+ for s in rse_schemes:
924
+ try:
925
+ for domain in domains:
926
+ protocol = rsemgr.create_protocol(rse_settings=rse_info, operation='read', scheme=s, domain=domain)
927
+ priority = scheme_priorities[domain][s]
928
+
929
+ protocols.append((domain, protocol, priority))
930
+ except exception.RSEProtocolNotSupported:
931
+ pass # no need to be verbose
932
+ except Exception:
933
+ print(format_exc())
934
+ return protocols
935
+
936
+
937
+ def _build_list_replicas_pfn(
938
+ scope: "InternalScope",
939
+ name: str,
940
+ rse_id: str,
941
+ domain: str,
942
+ protocol: "RSEProtocol",
943
+ path: str,
944
+ sign_urls: bool,
945
+ signature_lifetime: Optional[int],
946
+ client_location: Optional[dict[str, Any]],
947
+ logger: "LoggerFunction" = logging.log,
948
+ *,
949
+ session: "Session",
950
+ ) -> str:
951
+ """
952
+ Generate the PFN for the given scope/name on the rse.
953
+ If needed, sign the PFN url
954
+ If relevant, add the server-side root proxy to the pfn url
955
+ """
956
+ lfn: LFNDict = {
957
+ 'scope': scope.external, # type: ignore (scope.external might be None)
958
+ 'name': name,
959
+ 'path': path
960
+ }
961
+ pfn: str = list(protocol.lfns2pfns(lfns=lfn).values())[0]
962
+
963
+ # do we need to sign the URLs?
964
+ if sign_urls and protocol.attributes['scheme'] == 'https':
965
+ service = get_rse_attribute(rse_id, RseAttr.SIGN_URL, session=session)
966
+ if service:
967
+ pfn = get_signed_url(rse_id=rse_id, service=service, operation='read', url=pfn, lifetime=signature_lifetime)
968
+
969
+ # server side root proxy handling if location is set.
970
+ # supports root and http destinations
971
+ # cannot be pushed into protocols because we need to lookup rse attributes.
972
+ # ultra-conservative implementation.
973
+ if domain == 'wan' and protocol.attributes['scheme'] in ['root', 'http', 'https'] and client_location:
974
+
975
+ if 'site' in client_location and client_location['site']:
976
+ replica_site = get_rse_attribute(rse_id, RseAttr.SITE, session=session)
977
+
978
+ # does it match with the client? if not, it's an outgoing connection
979
+ # therefore the internal proxy must be prepended
980
+ if client_location['site'] != replica_site:
981
+ cache_site = config_get('clientcachemap', client_location['site'], default='', session=session)
982
+ if cache_site != '':
983
+ # print('client', client_location['site'], 'has cache:', cache_site)
984
+ # print('filename', name)
985
+ selected_prefix = get_multi_cache_prefix(cache_site, name)
986
+ if selected_prefix:
987
+ pfn = f"root://{selected_prefix}//{pfn.replace('davs://', 'root://')}"
988
+ else:
989
+ # print('site:', client_location['site'], 'has no cache')
990
+ # print('lets check if it has defined an internal root proxy ')
991
+ root_proxy_internal = config_get('root-proxy-internal', # section
992
+ client_location['site'], # option
993
+ default='', # empty string to circumvent exception
994
+ session=session)
995
+
996
+ if root_proxy_internal:
997
+ # TODO: XCache does not seem to grab signed URLs. Doublecheck with XCache devs.
998
+ # For now -> skip prepending XCache for GCS.
999
+ if 'storage.googleapis.com' in pfn or 'atlas-google-cloud.cern.ch' in pfn or 'amazonaws.com' in pfn:
1000
+ pass # ATLAS HACK
1001
+ else:
1002
+ # don't forget to mangle gfal-style davs URL into generic https URL
1003
+ pfn = f"root://{root_proxy_internal}//{pfn.replace('davs://', 'https://')}"
1004
+
1005
+ simulate_multirange = get_rse_attribute(rse_id, RseAttr.SIMULATE_MULTIRANGE, session=session)
1006
+
1007
+ if simulate_multirange is not None:
1008
+ try:
1009
+ # cover values that cannot be cast to int
1010
+ simulate_multirange = int(simulate_multirange)
1011
+ except ValueError:
1012
+ simulate_multirange = 1
1013
+ logger(logging.WARNING, 'Value encountered when retrieving RSE attribute "%s" not compatible with "int", used default value "1".', RseAttr.SIMULATE_MULTIRANGE)
1014
+ if simulate_multirange <= 0:
1015
+ logger(logging.WARNING, f'Value {simulate_multirange} encountered when retrieving RSE attribute "{RseAttr.SIMULATE_MULTIRANGE}" is <= 0, used default value "1".')
1016
+ simulate_multirange = 1
1017
+ pfn += f'&#multirange=false&nconnections={simulate_multirange}'
1018
+
1019
+ return pfn
1020
+
1021
+
1022
+ def _list_replicas(
1023
+ replicas: "Iterable[tuple]",
1024
+ show_pfns: bool,
1025
+ schemes: Optional[list[str]],
1026
+ files_wo_replica: "Iterable[dict[str, Any]]",
1027
+ client_location: Optional[dict[str, Any]],
1028
+ domain: Optional[str],
1029
+ sign_urls: bool,
1030
+ signature_lifetime: Optional[int],
1031
+ resolve_parents: bool,
1032
+ filters: dict[str, Any],
1033
+ by_rse_name: bool,
1034
+ *,
1035
+ session: "Session"
1036
+ ) -> "Iterator[dict[str, Any]]":
1037
+
1038
+ # the `domain` variable name will be re-used throughout the function with different values
1039
+ input_domain = domain
1040
+
1041
+ # find all RSEs local to the client's location in autoselect mode (i.e., when domain is None)
1042
+ local_rses = []
1043
+ if input_domain is None:
1044
+ if client_location and 'site' in client_location and client_location['site']:
1045
+ try:
1046
+ local_rses = [rse['id'] for rse in parse_expression('site=%s' % client_location['site'], filter_=filters, session=session)]
1047
+ except Exception:
1048
+ pass # do not hard fail if site cannot be resolved or is empty
1049
+
1050
+ file, pfns_cache = {}, {}
1051
+ protocols_cache = defaultdict(dict)
1052
+
1053
+ for _, replica_group in groupby(replicas, key=lambda x: (x[0], x[1])): # Group by scope/name
1054
+ file = {}
1055
+ pfns = {}
1056
+ for scope, name, archive_scope, archive_name, bytes_, md5, adler32, path, state, rse_id, rse, rse_type, volatile in replica_group:
1057
+ if isinstance(archive_scope, str):
1058
+ archive_scope = InternalScope(archive_scope, from_external=False)
1059
+
1060
+ is_archive = bool(archive_scope and archive_name)
1061
+
1062
+ # it is the first row in the scope/name group
1063
+ if not file:
1064
+ file['scope'], file['name'] = scope, name
1065
+ file['bytes'], file['md5'], file['adler32'] = bytes_, md5, adler32
1066
+ file['pfns'], file['rses'], file['states'] = {}, {}, {}
1067
+ if resolve_parents:
1068
+ file['parents'] = ['%s:%s' % (parent['scope'].internal, parent['name'])
1069
+ for parent in rucio.core.did.list_all_parent_dids(scope, name, session=session)]
1070
+
1071
+ if not rse_id:
1072
+ continue
1073
+
1074
+ rse_key = rse if by_rse_name else rse_id
1075
+ file['states'][rse_key] = str(state.name if state else state)
1076
+
1077
+ if not show_pfns:
1078
+ continue
1079
+
1080
+ # It's the first time we see this RSE, initialize the protocols needed for PFN generation
1081
+ protocols = protocols_cache.get(rse_id, {}).get(is_archive)
1082
+ if not protocols:
1083
+ # select the lan door in autoselect mode, otherwise use the wan door
1084
+ domain = input_domain
1085
+ if domain is None:
1086
+ domain = 'wan'
1087
+ if local_rses and rse_id in local_rses:
1088
+ domain = 'lan'
1089
+
1090
+ protocols = _get_list_replicas_protocols(
1091
+ rse_id=rse_id,
1092
+ domain=domain,
1093
+ schemes=schemes,
1094
+ # We want 'root' for archives even if it wasn't included into 'schemes'
1095
+ additional_schemes=['root'] if is_archive else [],
1096
+ session=session,
1097
+ )
1098
+ protocols_cache[rse_id][is_archive] = protocols
1099
+
1100
+ # build the pfns
1101
+ for domain, protocol, priority in protocols:
1102
+ # If the current "replica" is a constituent inside an archive, we must construct the pfn for the
1103
+ # parent (archive) file and append the xrdcl.unzip query string to it.
1104
+ if is_archive:
1105
+ t_scope = archive_scope
1106
+ t_name = archive_name
1107
+ else:
1108
+ t_scope = scope
1109
+ t_name = name
1110
+
1111
+ if 'determinism_type' in protocol.attributes: # PFN is cacheable
1112
+ try:
1113
+ path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)]
1114
+ except KeyError: # No cache entry scope:name found for this protocol
1115
+ path = protocol._get_path(t_scope, t_name) # type: ignore (t_scope is InternalScope instead of str)
1116
+ pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)] = path
1117
+
1118
+ try:
1119
+ pfn = _build_list_replicas_pfn(
1120
+ scope=t_scope,
1121
+ name=t_name,
1122
+ rse_id=rse_id,
1123
+ domain=domain,
1124
+ protocol=protocol,
1125
+ path=path,
1126
+ sign_urls=sign_urls,
1127
+ signature_lifetime=signature_lifetime,
1128
+ client_location=client_location,
1129
+ session=session,
1130
+ )
1131
+
1132
+ client_extract = False
1133
+ if is_archive:
1134
+ domain = 'zip'
1135
+ pfn = add_url_query(pfn, {'xrdcl.unzip': name})
1136
+ if protocol.attributes['scheme'] == 'root':
1137
+ # xroot supports downloading files directly from inside an archive. Disable client_extract and prioritize xroot.
1138
+ client_extract = False
1139
+ priority = -1
1140
+ else:
1141
+ client_extract = True
1142
+
1143
+ pfns[pfn] = {
1144
+ 'rse_id': rse_id,
1145
+ 'rse': rse,
1146
+ 'type': str(rse_type.name),
1147
+ 'volatile': volatile,
1148
+ 'domain': domain,
1149
+ 'priority': priority,
1150
+ 'client_extract': client_extract
1151
+ }
1152
+
1153
+ except Exception:
1154
+ # never end up here
1155
+ print(format_exc())
1156
+
1157
+ if protocol.attributes['scheme'] == 'srm':
1158
+ try:
1159
+ file['space_token'] = protocol.attributes['extended_attributes']['space_token']
1160
+ except KeyError:
1161
+ file['space_token'] = None
1162
+
1163
+ # fill the 'pfns' and 'rses' dicts in file
1164
+ if pfns:
1165
+ # set the total order for the priority
1166
+ # --> exploit that L(AN) comes before W(AN) before Z(IP) alphabetically
1167
+ # and use 1-indexing to be compatible with metalink
1168
+ sorted_pfns = sorted(pfns.items(), key=lambda item: (item[1]['domain'], item[1]['priority'], item[0]))
1169
+ for i, (pfn, pfn_value) in enumerate(list(sorted_pfns), start=1):
1170
+ pfn_value['priority'] = i
1171
+ file['pfns'][pfn] = pfn_value
1172
+
1173
+ sorted_pfns = sorted(file['pfns'].items(), key=lambda item: (item[1]['rse_id'], item[1]['priority'], item[0]))
1174
+ for pfn, pfn_value in sorted_pfns:
1175
+ rse_key = pfn_value['rse'] if by_rse_name else pfn_value['rse_id']
1176
+ file['rses'].setdefault(rse_key, []).append(pfn)
1177
+
1178
+ if file:
1179
+ yield file
1180
+
1181
+ for scope, name, bytes_, md5, adler32 in _list_files_wo_replicas(files_wo_replica, session=session):
1182
+ yield {
1183
+ 'scope': scope,
1184
+ 'name': name,
1185
+ 'bytes': bytes_,
1186
+ 'md5': md5,
1187
+ 'adler32': adler32,
1188
+ 'pfns': {},
1189
+ 'rses': defaultdict(list)
1190
+ }
1191
+
1192
+
1193
+ @stream_session
1194
+ def list_replicas(
1195
+ dids: "Sequence[dict[str, Any]]",
1196
+ schemes: Optional[list[str]] = None,
1197
+ unavailable: bool = False,
1198
+ request_id: Optional[str] = None,
1199
+ ignore_availability: bool = True,
1200
+ all_states: bool = False,
1201
+ pfns: bool = True,
1202
+ rse_expression: Optional[str] = None,
1203
+ client_location: Optional[dict[str, Any]] = None,
1204
+ domain: Optional[str] = None,
1205
+ sign_urls: bool = False,
1206
+ signature_lifetime: "Optional[int]" = None,
1207
+ resolve_archives: bool = True,
1208
+ resolve_parents: bool = False,
1209
+ nrandom: Optional[int] = None,
1210
+ updated_after: Optional[datetime] = None,
1211
+ by_rse_name: bool = False,
1212
+ *, session: "Session",
1213
+ ) -> 'Iterator':
1214
+ """
1215
+ List file replicas for a list of data identifiers (DIDs).
1216
+
1217
+ :param dids: The list of data identifiers (DIDs).
1218
+ :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...)
1219
+ :param unavailable: (deprecated) Also include unavailable replicas in the list.
1220
+ :param request_id: ID associated with the request for debugging.
1221
+ :param ignore_availability: Ignore the RSE blocklisting.
1222
+ :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
1223
+ :param rse_expression: The RSE expression to restrict list_replicas on a set of RSEs.
1224
+ :param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site', 'latitude', 'longitude'}
1225
+ :param domain: The network domain for the call, either None, 'wan' or 'lan'. None is automatic mode, 'all' is both ['lan','wan']
1226
+ :param sign_urls: If set, will sign the PFNs if necessary.
1227
+ :param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN.
1228
+ :param resolve_archives: When set to true, find archives which contain the replicas.
1229
+ :param resolve_parents: When set to true, find all parent datasets which contain the replicas.
1230
+ :param updated_after: datetime (UTC time), only return replicas updated after this time
1231
+ :param by_rse_name: if True, rse information will be returned in dicts indexed by rse name; otherwise: in dicts indexed by rse id
1232
+ :param session: The database session in use.
1233
+ """
1234
+ # For historical reasons:
1235
+ # - list_replicas([some_file_did]), must return the file even if it doesn't have replicas
1236
+ # - list_replicas([some_collection_did]) must only return files with replicas
1237
+
1238
+ def _replicas_filter_subquery():
1239
+ """
1240
+ Build the sub-query used to filter replicas according to list_replica's input arguments
1241
+ """
1242
+ stmt = select(
1243
+ models.RSEFileAssociation.scope,
1244
+ models.RSEFileAssociation.name,
1245
+ models.RSEFileAssociation.path,
1246
+ models.RSEFileAssociation.state,
1247
+ models.RSEFileAssociation.bytes,
1248
+ models.RSEFileAssociation.md5,
1249
+ models.RSEFileAssociation.adler32,
1250
+ models.RSE.id.label('rse_id'),
1251
+ models.RSE.rse.label('rse_name'),
1252
+ models.RSE.rse_type,
1253
+ models.RSE.volatile,
1254
+ ).join(
1255
+ models.RSE,
1256
+ and_(models.RSEFileAssociation.rse_id == models.RSE.id,
1257
+ models.RSE.deleted == false())
1258
+ )
1259
+
1260
+ if not ignore_availability:
1261
+ stmt = stmt.where(models.RSE.availability_read == true())
1262
+
1263
+ if updated_after:
1264
+ stmt = stmt.where(models.RSEFileAssociation.updated_at >= updated_after)
1265
+
1266
+ if rse_expression:
1267
+ rses = parse_expression(expression=rse_expression, filter_=filter_, session=session)
1268
+ # When the number of RSEs is small, don't go through the overhead of
1269
+ # creating and using a temporary table. Rely on a simple "in" query.
1270
+ # The number "4" was picked without any particular reason
1271
+ if 0 < len(rses) < 4:
1272
+ stmt = stmt.where(models.RSE.id.in_([rse['id'] for rse in rses]))
1273
+ else:
1274
+ rses_temp_table = temp_table_mngr(session).create_id_table()
1275
+ values = [{'id': rse['id']} for rse in rses]
1276
+ insert_stmt = insert(
1277
+ rses_temp_table
1278
+ )
1279
+ session.execute(insert_stmt, values)
1280
+ stmt = stmt.join(rses_temp_table, models.RSE.id == rses_temp_table.id)
1281
+
1282
+ if not all_states:
1283
+ if not unavailable:
1284
+ state_clause = models.RSEFileAssociation.state == ReplicaState.AVAILABLE
1285
+ else:
1286
+ state_clause = or_(
1287
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
1288
+ models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
1289
+ models.RSEFileAssociation.state == ReplicaState.COPYING
1290
+ )
1291
+ stmt = stmt.where(state_clause)
1292
+
1293
+ return stmt.subquery()
1294
+
1295
+ def _resolve_collection_files(
1296
+ temp_table: Any,
1297
+ *,
1298
+ session: "Session"
1299
+ ) -> tuple[int, Any]:
1300
+ """
1301
+ Find all FILE dids contained in collections from temp_table and return them in a newly
1302
+ created temporary table.
1303
+ """
1304
+ resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
1305
+ selectable = rucio.core.did.list_child_dids_stmt(temp_table, did_type=DIDType.FILE)
1306
+
1307
+ stmt = insert(
1308
+ resolved_files_temp_table
1309
+ ).from_select(
1310
+ ['scope', 'name'],
1311
+ selectable
1312
+ )
1313
+
1314
+ return session.execute(stmt).rowcount, resolved_files_temp_table
1315
+
1316
+ def _list_replicas_for_collection_files_stmt(
1317
+ temp_table: Any,
1318
+ replicas_subquery: "Subquery"
1319
+ ) -> "Select":
1320
+ """
1321
+ Build a query for listing replicas of files resolved from containers/datasets
1322
+
1323
+ The query assumes that temp_table only contains DIDs of type FILE.
1324
+ """
1325
+ return select(
1326
+ temp_table.scope.label('scope'),
1327
+ temp_table.name.label('name'),
1328
+ literal(None).label('archive_scope'),
1329
+ literal(None).label('archive_name'),
1330
+ replicas_subquery.c.bytes,
1331
+ replicas_subquery.c.md5,
1332
+ replicas_subquery.c.adler32,
1333
+ replicas_subquery.c.path,
1334
+ replicas_subquery.c.state,
1335
+ replicas_subquery.c.rse_id,
1336
+ replicas_subquery.c.rse_name,
1337
+ replicas_subquery.c.rse_type,
1338
+ replicas_subquery.c.volatile,
1339
+ ).join_from(
1340
+ temp_table,
1341
+ replicas_subquery,
1342
+ and_(replicas_subquery.c.scope == temp_table.scope,
1343
+ replicas_subquery.c.name == temp_table.name),
1344
+ )
1345
+
1346
+ def _list_replicas_for_constituents_stmt(
1347
+ temp_table: Any,
1348
+ replicas_subquery: "Subquery"
1349
+ ) -> "Select":
1350
+ """
1351
+ Build a query for listing replicas of archives containing the files(constituents) given as input.
1352
+ i.e. for a file scope:file.log which exists in scope:archive.tar.gz, it will return the replicas
1353
+ (rse, path, state, etc) of archive.tar.gz, but with bytes/md5/adler of file.log
1354
+ """
1355
+ return select(
1356
+ models.ConstituentAssociation.child_scope.label('scope'),
1357
+ models.ConstituentAssociation.child_name.label('name'),
1358
+ models.ConstituentAssociation.scope.label('archive_scope'),
1359
+ models.ConstituentAssociation.name.label('archive_name'),
1360
+ models.ConstituentAssociation.bytes,
1361
+ models.ConstituentAssociation.md5,
1362
+ models.ConstituentAssociation.adler32,
1363
+ replicas_subquery.c.path,
1364
+ replicas_subquery.c.state,
1365
+ replicas_subquery.c.rse_id,
1366
+ replicas_subquery.c.rse_name,
1367
+ replicas_subquery.c.rse_type,
1368
+ replicas_subquery.c.volatile,
1369
+ ).join_from(
1370
+ temp_table,
1371
+ models.DataIdentifier,
1372
+ and_(models.DataIdentifier.scope == temp_table.scope,
1373
+ models.DataIdentifier.name == temp_table.name,
1374
+ models.DataIdentifier.did_type == DIDType.FILE,
1375
+ models.DataIdentifier.constituent == true()),
1376
+ ).join(
1377
+ models.ConstituentAssociation,
1378
+ and_(models.ConstituentAssociation.child_scope == temp_table.scope,
1379
+ models.ConstituentAssociation.child_name == temp_table.name)
1380
+ ).join(
1381
+ replicas_subquery,
1382
+ and_(replicas_subquery.c.scope == models.ConstituentAssociation.scope,
1383
+ replicas_subquery.c.name == models.ConstituentAssociation.name),
1384
+ )
1385
+
1386
+ def _list_replicas_for_input_files_stmt(
1387
+ temp_table: Any,
1388
+ replicas_subquery: "Subquery"
1389
+ ) -> "Select":
1390
+ """
1391
+ Builds a query which list the replicas of FILEs from users input, but ignores
1392
+ collections in the same input.
1393
+
1394
+ Note: These FILE dids must be returned to the user even if they don't have replicas,
1395
+ hence the outerjoin against the replicas_subquery.
1396
+ """
1397
+ return select(
1398
+ temp_table.scope.label('scope'),
1399
+ temp_table.name.label('name'),
1400
+ literal(None).label('archive_scope'),
1401
+ literal(None).label('archive_name'),
1402
+ models.DataIdentifier.bytes,
1403
+ models.DataIdentifier.md5,
1404
+ models.DataIdentifier.adler32,
1405
+ replicas_subquery.c.path,
1406
+ replicas_subquery.c.state,
1407
+ replicas_subquery.c.rse_id,
1408
+ replicas_subquery.c.rse_name,
1409
+ replicas_subquery.c.rse_type,
1410
+ replicas_subquery.c.volatile,
1411
+ ).join_from(
1412
+ temp_table,
1413
+ models.DataIdentifier,
1414
+ and_(models.DataIdentifier.scope == temp_table.scope,
1415
+ models.DataIdentifier.name == temp_table.name,
1416
+ models.DataIdentifier.did_type == DIDType.FILE),
1417
+ ).outerjoin(
1418
+ replicas_subquery,
1419
+ and_(replicas_subquery.c.scope == temp_table.scope,
1420
+ replicas_subquery.c.name == temp_table.name),
1421
+ )
1422
+
1423
+ def _inspect_dids(
1424
+ temp_table: Any,
1425
+ *,
1426
+ session: "Session"
1427
+ ) -> tuple[int, int, int]:
1428
+ """
1429
+ Find how many files, collections and constituents are among the dids in the temp_table
1430
+ """
1431
+ stmt = select(
1432
+ func.sum(
1433
+ case((models.DataIdentifier.did_type == DIDType.FILE, 1), else_=0)
1434
+ ).label('num_files'),
1435
+ func.sum(
1436
+ case((models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET]), 1), else_=0)
1437
+ ).label('num_collections'),
1438
+ func.sum(
1439
+ case((models.DataIdentifier.constituent == true(), 1), else_=0)
1440
+ ).label('num_constituents'),
1441
+ ).join_from(
1442
+ temp_table,
1443
+ models.DataIdentifier,
1444
+ and_(models.DataIdentifier.scope == temp_table.scope,
1445
+ models.DataIdentifier.name == temp_table.name),
1446
+ )
1447
+ num_files, num_collections, num_constituents = session.execute(stmt).one() # returns None on empty input
1448
+ return num_files or 0, num_collections or 0, num_constituents or 0
1449
+
1450
+ if dids:
1451
+ filter_ = {'vo': dids[0]['scope'].vo}
1452
+ else:
1453
+ filter_ = {'vo': 'def'}
1454
+
1455
+ dids = {(did['scope'], did['name']): did for did in dids} # type: ignore (Deduplicate input)
1456
+ if not dids:
1457
+ return
1458
+
1459
+ input_dids_temp_table = temp_table_mngr(session).create_scope_name_table()
1460
+ values = [{'scope': scope, 'name': name} for scope, name in dids]
1461
+ stmt = insert(
1462
+ input_dids_temp_table
1463
+ )
1464
+ session.execute(stmt, values)
1465
+
1466
+ num_files, num_collections, num_constituents = _inspect_dids(input_dids_temp_table, session=session)
1467
+
1468
+ num_files_in_collections, resolved_files_temp_table = 0, None
1469
+ if num_collections:
1470
+ num_files_in_collections, resolved_files_temp_table = _resolve_collection_files(input_dids_temp_table, session=session)
1471
+
1472
+ replicas_subquery = _replicas_filter_subquery()
1473
+ replica_sources = []
1474
+ if num_files:
1475
+ replica_sources.append(
1476
+ _list_replicas_for_input_files_stmt(input_dids_temp_table, replicas_subquery)
1477
+ )
1478
+ if num_constituents and resolve_archives:
1479
+ replica_sources.append(
1480
+ _list_replicas_for_constituents_stmt(input_dids_temp_table, replicas_subquery)
1481
+ )
1482
+ if num_files_in_collections:
1483
+ replica_sources.append(
1484
+ _list_replicas_for_collection_files_stmt(resolved_files_temp_table, replicas_subquery)
1485
+ )
1486
+
1487
+ if not replica_sources:
1488
+ return
1489
+
1490
+ # In the simple case that somebody calls list_replicas on big collections with nrandom set,
1491
+ # opportunistically try to reduce the number of fetched and analyzed rows.
1492
+ if (
1493
+ nrandom
1494
+ # Only try this optimisation if list_replicas was called on collection(s).
1495
+ # I didn't consider handling the case when list_replica is called with a mix of
1496
+ # file/archive/collection dids: database queries in those cases are more complex
1497
+ # and people don't usually call list_replicas with nrandom on file/archive_constituents anyway.
1498
+ and (num_files_in_collections and not num_constituents and not num_files)
1499
+ # The following code introduces overhead if it fails to pick n random replicas.
1500
+ # Only execute when nrandom is much smaller than the total number of candidate files.
1501
+ # 64 was picked without any particular reason as "seems good enough".
1502
+ and 0 < nrandom < num_files_in_collections / 64
1503
+ ):
1504
+ # Randomly select a subset of file DIDs which have at least one replica matching the RSE/replica
1505
+ # filters applied on database side. Some filters are applied later in python code
1506
+ # (for example: scheme; or client_location/domain). We don't have any guarantee that
1507
+ # those, python, filters will not drop the replicas which we just selected randomly.
1508
+ stmt = select(
1509
+ resolved_files_temp_table.scope.label('scope'), # type: ignore (resolved_files_temp_table might be None)
1510
+ resolved_files_temp_table.name.label('name'), # type: ignore (resolved_files_temp_table might be None)
1511
+ ).where(
1512
+ exists(
1513
+ select(1)
1514
+ ).where(
1515
+ replicas_subquery.c.scope == resolved_files_temp_table.scope, # type: ignore (resolved_files_temp_table might be None)
1516
+ replicas_subquery.c.name == resolved_files_temp_table.name # type: ignore (resolved_files_temp_table might be None)
1517
+ )
1518
+ ).order_by(
1519
+ literal_column('dbms_random.value') if session.bind.dialect.name == 'oracle' else func.random() # type: ignore
1520
+ ).limit(
1521
+ # slightly overshoot to reduce the probability that python-side filtering will
1522
+ # leave us with less than nrandom replicas.
1523
+ nrandom * 4
1524
+ )
1525
+ # Reuse input temp table. We don't need its content anymore
1526
+ random_dids_temp_table = input_dids_temp_table
1527
+ session.execute(delete(random_dids_temp_table))
1528
+ stmt = insert(
1529
+ random_dids_temp_table
1530
+ ).from_select(
1531
+ ['scope', 'name'],
1532
+ stmt
1533
+ )
1534
+ session.execute(stmt)
1535
+
1536
+ # Fetch all replicas for randomly selected dids and apply filters on python side
1537
+ stmt = _list_replicas_for_collection_files_stmt(random_dids_temp_table, replicas_subquery)
1538
+ stmt = stmt.order_by('scope', 'name')
1539
+ replica_tuples = session.execute(stmt)
1540
+ random_replicas = list(
1541
+ _pick_n_random(
1542
+ nrandom,
1543
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain, # type: ignore (replica_tuples, pending SQLA2.1: https://github.com/rucio/rucio/discussions/6615)
1544
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1545
+ )
1546
+ )
1547
+ if len(random_replicas) == nrandom:
1548
+ yield from random_replicas
1549
+ return
1550
+ else:
1551
+ # Our opportunistic attempt to pick nrandom replicas without fetching all database rows failed,
1552
+ # continue with the normal list_replicas flow and fetch all replicas
1553
+ pass
1554
+
1555
+ if len(replica_sources) == 1:
1556
+ stmt = replica_sources[0].order_by('scope', 'name')
1557
+ replica_tuples = session.execute(stmt)
1558
+ else:
1559
+ if session.bind.dialect.name == 'mysql': # type: ignore
1560
+ # On mysql, perform both queries independently and merge their result in python.
1561
+ # The union query fails with "Can't reopen table"
1562
+ replica_tuples = heapq.merge(
1563
+ *[session.execute(stmt.order_by('scope', 'name')) for stmt in replica_sources],
1564
+ key=lambda t: (t[0], t[1]), # sort by scope, name
1565
+ )
1566
+ else:
1567
+ stmt = union(*replica_sources).order_by('scope', 'name')
1568
+ replica_tuples = session.execute(stmt)
1569
+
1570
+ yield from _pick_n_random(
1571
+ nrandom, # type: ignore (nrandom is not None)
1572
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain, # type: ignore (replica_tuples, pending SQLA2.1: https://github.com/rucio/rucio/discussions/6615)
1573
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1574
+ )
1575
+
1576
+
1577
+ @transactional_session
1578
+ def __bulk_add_new_file_dids(
1579
+ files: "Iterable[dict[str, Any]]",
1580
+ account: InternalAccount,
1581
+ dataset_meta: Optional["Mapping[str, Any]"] = None,
1582
+ *,
1583
+ session: "Session"
1584
+ ) -> Literal[True]:
1585
+ """
1586
+ Bulk add new dids.
1587
+
1588
+ :param dids: the list of new files.
1589
+ :param account: The account owner.
1590
+ :param session: The database session in use.
1591
+ :returns: True is successful.
1592
+ """
1593
+ for file in files:
1594
+ new_did = models.DataIdentifier(scope=file['scope'], name=file['name'],
1595
+ account=file.get('account') or account,
1596
+ did_type=DIDType.FILE, bytes=file['bytes'],
1597
+ md5=file.get('md5'), adler32=file.get('adler32'),
1598
+ is_new=None)
1599
+ new_did.save(session=session, flush=False)
1600
+
1601
+ if 'meta' in file and file['meta']:
1602
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=file['meta'], recursive=False, session=session)
1603
+ if dataset_meta:
1604
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=dataset_meta, recursive=False, session=session)
1605
+ try:
1606
+ session.flush()
1607
+ except IntegrityError as error:
1608
+ if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1609
+ or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
1610
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
1611
+ or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1612
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint "DIDS_SCOPE_FK".*', error.args[0]) \
1613
+ or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
1614
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
1615
+ raise exception.ScopeNotFound('Scope not found!')
1616
+
1617
+ raise exception.RucioException(error.args)
1618
+ except DatabaseError as error:
1619
+ if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
1620
+ raise exception.ScopeNotFound('Scope not found!')
1621
+
1622
+ raise exception.RucioException(error.args)
1623
+ except FlushError as error:
1624
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
1625
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
1626
+ raise exception.RucioException(error.args)
1627
+ return True
1628
+
1629
+
1630
+ @transactional_session
1631
+ def __bulk_add_file_dids(
1632
+ files: "Iterable[dict[str, Any]]",
1633
+ account: InternalAccount,
1634
+ dataset_meta: Optional["Mapping[str, Any]"] = None,
1635
+ *,
1636
+ session: "Session"
1637
+ ) -> list[dict[str, Any]]:
1638
+ """
1639
+ Bulk add new dids.
1640
+
1641
+ :param dids: the list of files.
1642
+ :param account: The account owner.
1643
+ :param session: The database session in use.
1644
+ :returns: list of replicas.
1645
+ """
1646
+ condition = []
1647
+ for f in files:
1648
+ condition.append(and_(models.DataIdentifier.scope == f['scope'],
1649
+ models.DataIdentifier.name == f['name'],
1650
+ models.DataIdentifier.did_type == DIDType.FILE))
1651
+
1652
+ stmt = select(
1653
+ models.DataIdentifier.scope,
1654
+ models.DataIdentifier.name,
1655
+ models.DataIdentifier.bytes,
1656
+ models.DataIdentifier.md5,
1657
+ models.DataIdentifier.adler32,
1658
+ ).with_hint(
1659
+ models.DataIdentifier,
1660
+ 'INDEX(DIDS DIDS_PK)',
1661
+ 'oracle'
1662
+ ).where(
1663
+ or_(*condition)
1664
+ )
1665
+ available_files = [res._asdict() for res in session.execute(stmt).all()]
1666
+ new_files = list()
1667
+ for file in files:
1668
+ found = False
1669
+ for available_file in available_files:
1670
+ if file['scope'] == available_file['scope'] and file['name'] == available_file['name']:
1671
+ found = True
1672
+ break
1673
+ if not found:
1674
+ new_files.append(file)
1675
+ __bulk_add_new_file_dids(files=new_files, account=account,
1676
+ dataset_meta=dataset_meta,
1677
+ session=session)
1678
+ return new_files + available_files
1679
+
1680
+
1681
+ def tombstone_from_delay(tombstone_delay: Optional[Union[str, timedelta]]) -> Optional[datetime]:
1682
+ # Tolerate None for tombstone_delay
1683
+ if not tombstone_delay:
1684
+ return None
1685
+
1686
+ tombstone_delay = timedelta(seconds=int(tombstone_delay)) # type: ignore
1687
+
1688
+ if not tombstone_delay:
1689
+ return None
1690
+
1691
+ if tombstone_delay < timedelta(0):
1692
+ return datetime(1970, 1, 1)
1693
+
1694
+ return datetime.utcnow() + tombstone_delay
1695
+
1696
+
1697
+ @transactional_session
1698
+ def __bulk_add_replicas(
1699
+ rse_id: str,
1700
+ files: "Iterable[dict[str, Any]]",
1701
+ account: InternalAccount,
1702
+ *,
1703
+ session: "Session"
1704
+ ) -> tuple[int, int]:
1705
+ """
1706
+ Bulk add new dids.
1707
+
1708
+ :param rse_id: the RSE id.
1709
+ :param dids: the list of files.
1710
+ :param account: The account owner.
1711
+ :param session: The database session in use.
1712
+ :returns: True is successful.
1713
+ """
1714
+ nbfiles, bytes_ = 0, 0
1715
+ # Check for the replicas already available
1716
+ condition = []
1717
+ for f in files:
1718
+ condition.append(and_(models.RSEFileAssociation.scope == f['scope'],
1719
+ models.RSEFileAssociation.name == f['name'],
1720
+ models.RSEFileAssociation.rse_id == rse_id))
1721
+
1722
+ stmt = select(
1723
+ models.RSEFileAssociation.scope,
1724
+ models.RSEFileAssociation.name,
1725
+ models.RSEFileAssociation.rse_id,
1726
+ ).with_hint(
1727
+ models.RSEFileAssociation,
1728
+ 'INDEX(REPLICAS REPLICAS_PK)',
1729
+ 'oracle'
1730
+ ).where(
1731
+ or_(*condition)
1732
+ )
1733
+
1734
+ available_replicas = [res._asdict() for res in session.execute(stmt).all()]
1735
+
1736
+ default_tombstone_delay = get_rse_attribute(rse_id, RseAttr.TOMBSTONE_DELAY, session=session)
1737
+ default_tombstone = tombstone_from_delay(default_tombstone_delay)
1738
+
1739
+ new_replicas = []
1740
+ for file in files:
1741
+ found = False
1742
+ for available_replica in available_replicas:
1743
+ if file['scope'] == available_replica['scope'] and file['name'] == available_replica['name'] and rse_id == available_replica['rse_id']:
1744
+ found = True
1745
+ break
1746
+ if not found:
1747
+ nbfiles += 1
1748
+ bytes_ += file['bytes']
1749
+ new_replicas.append({'rse_id': rse_id, 'scope': file['scope'],
1750
+ 'name': file['name'], 'bytes': file['bytes'],
1751
+ 'path': file.get('path'),
1752
+ 'state': ReplicaState(file.get('state', 'A')),
1753
+ 'md5': file.get('md5'), 'adler32': file.get('adler32'),
1754
+ 'lock_cnt': file.get('lock_cnt', 0),
1755
+ 'tombstone': file.get('tombstone') or default_tombstone})
1756
+ try:
1757
+ stmt = insert(
1758
+ models.RSEFileAssociation
1759
+ )
1760
+ new_replicas and session.execute(stmt, new_replicas)
1761
+ session.flush()
1762
+ return nbfiles, bytes_
1763
+ except IntegrityError as error:
1764
+ if match('.*IntegrityError.*ORA-00001: unique constraint .*REPLICAS_PK.*violated.*', error.args[0]) \
1765
+ or match('.*IntegrityError.*1062.*Duplicate entry.*', error.args[0]) \
1766
+ or match('.*IntegrityError.*columns? rse_id.*scope.*name.*not unique.*', error.args[0]) \
1767
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]):
1768
+ raise exception.Duplicate("File replica already exists!")
1769
+ raise exception.RucioException(error.args)
1770
+ except DatabaseError as error:
1771
+ raise exception.RucioException(error.args)
1772
+
1773
+
1774
+ @transactional_session
1775
+ def add_replicas(
1776
+ rse_id: str,
1777
+ files: "Iterable[dict[str, Any]]",
1778
+ account: InternalAccount,
1779
+ ignore_availability: bool = True,
1780
+ dataset_meta: Optional["Mapping[str, Any]"] = None,
1781
+ *,
1782
+ session: "Session"
1783
+ ) -> None:
1784
+ """
1785
+ Bulk add file replicas.
1786
+
1787
+ :param rse_id: The RSE id.
1788
+ :param files: The list of files.
1789
+ :param account: The account owner.
1790
+ :param ignore_availability: Ignore the RSE blocklisting.
1791
+ :param session: The database session in use.
1792
+ """
1793
+
1794
+ def _expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=None):
1795
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme, domain=domain, protocol_attr=protocol_attr)
1796
+ expected_pfns = p.lfns2pfns(lfns)
1797
+ return clean_pfns(expected_pfns.values())
1798
+
1799
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1800
+
1801
+ if replica_rse['volatile'] is True:
1802
+ raise exception.UnsupportedOperation('Cannot add replicas on volatile RSE %s ' % (replica_rse['rse']))
1803
+
1804
+ if not replica_rse['availability_write'] and not ignore_availability:
1805
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable for writing' % replica_rse['rse'])
1806
+
1807
+ for file in files:
1808
+ if 'pfn' not in file:
1809
+ if not replica_rse['deterministic']:
1810
+ raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %s ' % (replica_rse['rse']))
1811
+
1812
+ __bulk_add_file_dids(files=files, account=account,
1813
+ dataset_meta=dataset_meta,
1814
+ session=session)
1815
+
1816
+ pfns = {} # dict[str, list[str]], {scheme: [pfns], scheme: [pfns]}
1817
+ for file in files:
1818
+ if 'pfn' in file:
1819
+ scheme = file['pfn'].split(':')[0]
1820
+ pfns.setdefault(scheme, []).append(file['pfn'])
1821
+
1822
+ if pfns:
1823
+ rse_settings = rsemgr.get_rse_info(rse_id=rse_id, session=session)
1824
+ for scheme in pfns.keys():
1825
+ if not replica_rse['deterministic']:
1826
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme)
1827
+ pfns[scheme] = p.parse_pfns(pfns=pfns[scheme])
1828
+ for file in files:
1829
+ if file['pfn'].startswith(scheme):
1830
+ tmp = pfns[scheme][file['pfn']]
1831
+ file['path'] = ''.join([tmp['path'], tmp['name']])
1832
+ else:
1833
+ # Check that the pfns match to the expected pfns
1834
+ lfns = [{'scope': i['scope'].external, 'name': i['name']} for i in files if i['pfn'].startswith(scheme)]
1835
+ pfns[scheme] = set(clean_pfns(pfns[scheme]))
1836
+ expected_pfns = set()
1837
+
1838
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='wan'):
1839
+ expected_pfns.update(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan',
1840
+ protocol_attr=protocol_attr))
1841
+ pfns[scheme] -= expected_pfns
1842
+
1843
+ if len(pfns[scheme]) > 0:
1844
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='lan'):
1845
+ expected_pfns.update(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='lan',
1846
+ protocol_attr=protocol_attr))
1847
+ pfns[scheme] -= expected_pfns
1848
+
1849
+ if len(pfns[scheme]) > 0:
1850
+ # pfns not found in wan or lan
1851
+ pfns_scheme = pfns[scheme]
1852
+ raise exception.InvalidPath(f"One of the PFNs provided {pfns_scheme!r} for {lfns!r} does not match the Rucio expected PFNs: {expected_pfns!r}")
1853
+
1854
+ nbfiles, bytes_ = __bulk_add_replicas(rse_id=rse_id, files=files, account=account, session=session)
1855
+ increase(rse_id=rse_id, files=nbfiles, bytes_=bytes_, session=session)
1856
+
1857
+
1858
+ @transactional_session
1859
+ def add_replica(
1860
+ rse_id: str,
1861
+ scope: InternalScope,
1862
+ name: str,
1863
+ bytes_: int,
1864
+ account: models.InternalAccount,
1865
+ adler32: Optional[str] = None,
1866
+ md5: Optional[str] = None,
1867
+ dsn: Optional[str] = None,
1868
+ pfn: Optional[str] = None,
1869
+ meta: Optional[dict[str, Any]] = None,
1870
+ rules: Optional[list[dict[str, Any]]] = None,
1871
+ tombstone: "Optional[datetime]" = None,
1872
+ *,
1873
+ session: "Session"
1874
+ ) -> list[dict[str, Any]]:
1875
+ """
1876
+ Add File replica.
1877
+
1878
+ :param rse_id: the rse id.
1879
+ :param scope: the scope name.
1880
+ :param name: The data identifier name.
1881
+ :param bytes_: the size of the file.
1882
+ :param account: The account owner.
1883
+ :param md5: The md5 checksum.
1884
+ :param adler32: The adler32 checksum.
1885
+ :param pfn: Physical file name (for nondeterministic rse).
1886
+ :param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary.
1887
+ :param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
1888
+ :param tombstone: If True, create replica with a tombstone.
1889
+ :param session: The database session in use.
1890
+
1891
+ :returns: list of replicas.
1892
+ """
1893
+ meta = meta or {}
1894
+ rules = rules or []
1895
+
1896
+ file = {'scope': scope, 'name': name, 'bytes': bytes_, 'adler32': adler32, 'md5': md5, 'meta': meta, 'rules': rules, 'tombstone': tombstone}
1897
+ if pfn:
1898
+ file['pfn'] = pfn
1899
+ return add_replicas(rse_id=rse_id, files=[file, ], account=account, session=session)
1900
+
1901
+
1902
+ @METRICS.time_it
1903
+ @transactional_session
1904
+ def delete_replicas(
1905
+ rse_id: str,
1906
+ files: Optional["Sequence[dict[str, Any]]"],
1907
+ ignore_availability: bool = True,
1908
+ *,
1909
+ session: "Session"
1910
+ ) -> None:
1911
+ """
1912
+ Delete file replicas.
1913
+
1914
+ :param rse_id: the rse id.
1915
+ :param files: the list of files to delete.
1916
+ :param ignore_availability: Ignore the RSE blocklisting.
1917
+ :param session: The database session in use.
1918
+ """
1919
+ if not files:
1920
+ return
1921
+
1922
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1923
+
1924
+ if not replica_rse['availability_delete'] and not ignore_availability:
1925
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable'
1926
+ 'for deleting' % replica_rse['rse'])
1927
+ tt_mngr = temp_table_mngr(session)
1928
+ scope_name_temp_table = tt_mngr.create_scope_name_table()
1929
+ scope_name_temp_table2 = tt_mngr.create_scope_name_table()
1930
+ association_temp_table = tt_mngr.create_association_table()
1931
+
1932
+ values = [{'scope': file['scope'], 'name': file['name']} for file in files]
1933
+ stmt = insert(
1934
+ scope_name_temp_table
1935
+ )
1936
+ session.execute(stmt, values)
1937
+
1938
+ # WARNING : This should not be necessary since that would mean the replica is used as a source.
1939
+ stmt = delete(
1940
+ models.Source,
1941
+ ).where(
1942
+ exists(select(1)
1943
+ .where(and_(models.Source.scope == scope_name_temp_table.scope,
1944
+ models.Source.name == scope_name_temp_table.name,
1945
+ models.Source.rse_id == rse_id)))
1946
+ ).execution_options(
1947
+ synchronize_session=False
1948
+ )
1949
+ session.execute(stmt)
1950
+
1951
+ stmt = select(
1952
+ func.count(),
1953
+ func.sum(models.RSEFileAssociation.bytes),
1954
+ ).join_from(
1955
+ scope_name_temp_table,
1956
+ models.RSEFileAssociation,
1957
+ and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1958
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1959
+ models.RSEFileAssociation.rse_id == rse_id)
1960
+ )
1961
+ delta, bytes_ = session.execute(stmt).one()
1962
+
1963
+ # Delete replicas
1964
+ stmt = delete(
1965
+ models.RSEFileAssociation,
1966
+ ).where(
1967
+ exists(select(1)
1968
+ .where(
1969
+ and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1970
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1971
+ models.RSEFileAssociation.rse_id == rse_id)))
1972
+ ).execution_options(
1973
+ synchronize_session=False
1974
+ )
1975
+ res = session.execute(stmt)
1976
+ if res.rowcount != len(files):
1977
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
1978
+
1979
+ # Update bad replicas
1980
+ stmt = update(
1981
+ models.BadReplica,
1982
+ ).where(
1983
+ exists(select(1)
1984
+ .where(
1985
+ and_(models.BadReplica.scope == scope_name_temp_table.scope,
1986
+ models.BadReplica.name == scope_name_temp_table.name,
1987
+ models.BadReplica.rse_id == rse_id)))
1988
+ ).where(
1989
+ models.BadReplica.state == BadFilesStatus.BAD
1990
+ ).values({
1991
+ models.BadReplica.state: BadFilesStatus.DELETED,
1992
+ models.BadReplica.updated_at: datetime.utcnow()
1993
+ }).execution_options(
1994
+ synchronize_session=False
1995
+ )
1996
+
1997
+ res = session.execute(stmt)
1998
+
1999
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
2000
+ scope_name_temp_table2=scope_name_temp_table2,
2001
+ association_temp_table=association_temp_table,
2002
+ rse_id=rse_id, files=files, session=session)
2003
+
2004
+ # Decrease RSE counter
2005
+ decrease(rse_id=rse_id, files=delta, bytes_=bytes_, session=session)
2006
+
2007
+
2008
+ @transactional_session
2009
+ def __cleanup_after_replica_deletion(
2010
+ scope_name_temp_table: Any,
2011
+ scope_name_temp_table2: Any,
2012
+ association_temp_table: Any,
2013
+ rse_id: str,
2014
+ files: "Iterable[dict[str, Any]]",
2015
+ *,
2016
+ session: "Session"
2017
+ ) -> None:
2018
+ """
2019
+ Perform update of collections/archive associations/dids after the removal of their replicas
2020
+ :param rse_id: the rse id
2021
+ :param files: list of files whose replica got deleted
2022
+ :param session: The database session in use.
2023
+ """
2024
+ clt_to_update, parents_to_analyze, affected_archives, clt_replicas_to_delete = set(), set(), set(), set()
2025
+ did_condition = []
2026
+ incomplete_dids, messages, clt_to_set_not_archive = [], [], []
2027
+ for file in files:
2028
+
2029
+ # Schedule update of all collections containing this file and having a collection replica in the RSE
2030
+ clt_to_update.add(ScopeName(scope=file['scope'], name=file['name']))
2031
+
2032
+ # If the file doesn't have any replicas anymore, we should perform cleanups of objects
2033
+ # related to this file. However, if the file is "lost", it's removal wasn't intentional,
2034
+ # so we want to skip deleting the metadata here. Perform cleanups:
2035
+
2036
+ # 1) schedule removal of this file from all parent datasets
2037
+ parents_to_analyze.add(ScopeName(scope=file['scope'], name=file['name']))
2038
+
2039
+ # 2) schedule removal of this file from the DID table
2040
+ did_condition.append(
2041
+ and_(models.DataIdentifier.scope == file['scope'],
2042
+ models.DataIdentifier.name == file['name'],
2043
+ models.DataIdentifier.availability != DIDAvailability.LOST,
2044
+ ~exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')).where(
2045
+ and_(models.RSEFileAssociation.scope == file['scope'],
2046
+ models.RSEFileAssociation.name == file['name'])),
2047
+ ~exists(select(1).prefix_with("/*+ INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK) */", dialect='oracle')).where(
2048
+ and_(models.ConstituentAssociation.child_scope == file['scope'],
2049
+ models.ConstituentAssociation.child_name == file['name']))))
2050
+
2051
+ # 3) if the file is an archive, schedule cleanup on the files from inside the archive
2052
+ affected_archives.add(ScopeName(scope=file['scope'], name=file['name']))
2053
+
2054
+ if clt_to_update:
2055
+ # Get all collection_replicas at RSE, insert them into UpdatedCollectionReplica
2056
+ stmt = delete(scope_name_temp_table)
2057
+ session.execute(stmt)
2058
+ values = [sn._asdict() for sn in clt_to_update]
2059
+ stmt = insert(scope_name_temp_table)
2060
+ session.execute(stmt, values)
2061
+ stmt = select(
2062
+ models.DataIdentifierAssociation.scope,
2063
+ models.DataIdentifierAssociation.name,
2064
+ ).distinct(
2065
+ ).join_from(
2066
+ scope_name_temp_table,
2067
+ models.DataIdentifierAssociation,
2068
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
2069
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
2070
+ ).join(
2071
+ models.CollectionReplica,
2072
+ and_(models.CollectionReplica.scope == models.DataIdentifierAssociation.scope,
2073
+ models.CollectionReplica.name == models.DataIdentifierAssociation.name,
2074
+ models.CollectionReplica.rse_id == rse_id)
2075
+ )
2076
+ for parent_scope, parent_name in session.execute(stmt):
2077
+ models.UpdatedCollectionReplica(scope=parent_scope,
2078
+ name=parent_name,
2079
+ did_type=DIDType.DATASET,
2080
+ rse_id=rse_id). \
2081
+ save(session=session, flush=False)
2082
+
2083
+ # Delete did from the content for the last did
2084
+ while parents_to_analyze:
2085
+ did_associations_to_remove = set()
2086
+
2087
+ stmt = delete(scope_name_temp_table)
2088
+ session.execute(stmt)
2089
+ values = [sn._asdict() for sn in parents_to_analyze]
2090
+ stmt = insert(scope_name_temp_table)
2091
+ session.execute(stmt, values)
2092
+ parents_to_analyze.clear()
2093
+
2094
+ stmt = select(
2095
+ models.DataIdentifierAssociation.scope,
2096
+ models.DataIdentifierAssociation.name,
2097
+ models.DataIdentifierAssociation.did_type,
2098
+ models.DataIdentifierAssociation.child_scope,
2099
+ models.DataIdentifierAssociation.child_name,
2100
+ ).distinct(
2101
+ ).join_from(
2102
+ scope_name_temp_table,
2103
+ models.DataIdentifierAssociation,
2104
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
2105
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
2106
+ ).outerjoin(
2107
+ models.DataIdentifier,
2108
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
2109
+ models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
2110
+ models.DataIdentifier.name == models.DataIdentifierAssociation.child_name)
2111
+ ).where(
2112
+ models.DataIdentifier.scope == null()
2113
+ ).outerjoin(
2114
+ models.RSEFileAssociation,
2115
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
2116
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name)
2117
+ ).where(
2118
+ models.RSEFileAssociation.scope == null()
2119
+ ).outerjoin(
2120
+ models.ConstituentAssociation,
2121
+ and_(models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
2122
+ models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name)
2123
+ ).where(
2124
+ models.ConstituentAssociation.child_scope == null()
2125
+ )
2126
+
2127
+ clt_to_set_not_archive.append(set())
2128
+ for parent_scope, parent_name, did_type, child_scope, child_name in session.execute(stmt):
2129
+
2130
+ # Schedule removal of child file/dataset/container from the parent dataset/container
2131
+ did_associations_to_remove.add(Association(scope=parent_scope, name=parent_name,
2132
+ child_scope=child_scope, child_name=child_name))
2133
+
2134
+ # Schedule setting is_archive = False on parents which don't have any children with is_archive == True anymore
2135
+ clt_to_set_not_archive[-1].add(ScopeName(scope=parent_scope, name=parent_name))
2136
+
2137
+ # If the parent dataset/container becomes empty as a result of the child removal
2138
+ # (it was the last children), metadata cleanup has to be done:
2139
+ #
2140
+ # 1) Schedule to remove the replicas of this empty collection
2141
+ clt_replicas_to_delete.add(ScopeName(scope=parent_scope, name=parent_name))
2142
+
2143
+ # 2) Schedule removal of this empty collection from its own parent collections
2144
+ parents_to_analyze.add(ScopeName(scope=parent_scope, name=parent_name))
2145
+
2146
+ # 3) Schedule removal of the entry from the DIDs table
2147
+ remove_open_did = config_get_bool('reaper', 'remove_open_did', default=False, session=session)
2148
+ if remove_open_did:
2149
+ did_condition.append(
2150
+ and_(models.DataIdentifier.scope == parent_scope,
2151
+ models.DataIdentifier.name == parent_name,
2152
+ ~exists(1).where(
2153
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
2154
+ models.DataIdentifierAssociation.child_name == parent_name)),
2155
+ ~exists(1).where(
2156
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
2157
+ models.DataIdentifierAssociation.name == parent_name))))
2158
+ else:
2159
+ did_condition.append(
2160
+ and_(models.DataIdentifier.scope == parent_scope,
2161
+ models.DataIdentifier.name == parent_name,
2162
+ models.DataIdentifier.is_open == false(),
2163
+ ~exists(1).where(
2164
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
2165
+ models.DataIdentifierAssociation.child_name == parent_name)),
2166
+ ~exists(1).where(
2167
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
2168
+ models.DataIdentifierAssociation.name == parent_name))))
2169
+
2170
+ if did_associations_to_remove:
2171
+ stmt = delete(association_temp_table)
2172
+ session.execute(stmt)
2173
+ values = [a._asdict() for a in did_associations_to_remove]
2174
+ stmt = insert(association_temp_table)
2175
+ session.execute(stmt, values)
2176
+
2177
+ # get the list of modified parent scope, name
2178
+ stmt = select(
2179
+ models.DataIdentifier.scope,
2180
+ models.DataIdentifier.name,
2181
+ models.DataIdentifier.did_type,
2182
+ ).distinct(
2183
+ ).join_from(
2184
+ association_temp_table,
2185
+ models.DataIdentifier,
2186
+ and_(association_temp_table.scope == models.DataIdentifier.scope,
2187
+ association_temp_table.name == models.DataIdentifier.name)
2188
+ ).where(
2189
+ or_(models.DataIdentifier.complete == true(),
2190
+ models.DataIdentifier.complete.is_(None)),
2191
+ )
2192
+ for parent_scope, parent_name, parent_did_type in session.execute(stmt):
2193
+ message = {'scope': parent_scope,
2194
+ 'name': parent_name,
2195
+ 'did_type': parent_did_type,
2196
+ 'event_type': 'INCOMPLETE'}
2197
+ if message not in messages:
2198
+ messages.append(message)
2199
+ incomplete_dids.append(ScopeName(scope=parent_scope, name=parent_name))
2200
+
2201
+ content_to_delete_filter = exists(select(1)
2202
+ .where(and_(association_temp_table.scope == models.DataIdentifierAssociation.scope,
2203
+ association_temp_table.name == models.DataIdentifierAssociation.name,
2204
+ association_temp_table.child_scope == models.DataIdentifierAssociation.child_scope,
2205
+ association_temp_table.child_name == models.DataIdentifierAssociation.child_name)))
2206
+
2207
+ rucio.core.did.insert_content_history(filter_=content_to_delete_filter, did_created_at=None, session=session)
2208
+
2209
+ stmt = delete(
2210
+ models.DataIdentifierAssociation
2211
+ ).where(
2212
+ content_to_delete_filter,
2213
+ ).execution_options(
2214
+ synchronize_session=False
2215
+ )
2216
+ session.execute(stmt)
2217
+
2218
+ # Get collection replicas of collections which became empty
2219
+ if clt_replicas_to_delete:
2220
+ stmt = delete(scope_name_temp_table)
2221
+ session.execute(stmt)
2222
+ values = [sn._asdict() for sn in clt_replicas_to_delete]
2223
+ stmt = insert(scope_name_temp_table)
2224
+ session.execute(stmt, values)
2225
+ stmt = delete(scope_name_temp_table2)
2226
+ session.execute(stmt)
2227
+ stmt = select(
2228
+ models.CollectionReplica.scope,
2229
+ models.CollectionReplica.name,
2230
+ ).distinct(
2231
+ ).join_from(
2232
+ scope_name_temp_table,
2233
+ models.CollectionReplica,
2234
+ and_(scope_name_temp_table.scope == models.CollectionReplica.scope,
2235
+ scope_name_temp_table.name == models.CollectionReplica.name),
2236
+ ).join(
2237
+ models.DataIdentifier,
2238
+ and_(models.DataIdentifier.scope == models.CollectionReplica.scope,
2239
+ models.DataIdentifier.name == models.CollectionReplica.name)
2240
+ ).outerjoin(
2241
+ models.DataIdentifierAssociation,
2242
+ and_(models.DataIdentifierAssociation.scope == models.CollectionReplica.scope,
2243
+ models.DataIdentifierAssociation.name == models.CollectionReplica.name)
2244
+ ).where(
2245
+ models.DataIdentifierAssociation.scope == null()
2246
+ )
2247
+ stmt = insert(
2248
+ scope_name_temp_table2
2249
+ ).from_select(
2250
+ ['scope', 'name'],
2251
+ stmt
2252
+ )
2253
+ session.execute(stmt)
2254
+ # Delete the retrieved collection replicas of empty collections
2255
+ stmt = delete(
2256
+ models.CollectionReplica,
2257
+ ).where(
2258
+ exists(select(1)
2259
+ .where(and_(models.CollectionReplica.scope == scope_name_temp_table2.scope,
2260
+ models.CollectionReplica.name == scope_name_temp_table2.name)))
2261
+ ).execution_options(
2262
+ synchronize_session=False
2263
+ )
2264
+ session.execute(stmt)
2265
+
2266
+ # Update incomplete state
2267
+ messages, dids_to_delete = [], set()
2268
+ if incomplete_dids:
2269
+ stmt = delete(scope_name_temp_table)
2270
+ session.execute(stmt)
2271
+ values = [sn._asdict() for sn in incomplete_dids]
2272
+ stmt = insert(scope_name_temp_table)
2273
+ session.execute(stmt, values)
2274
+ stmt = update(
2275
+ models.DataIdentifier
2276
+ ).where(
2277
+ exists(select(1)
2278
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
2279
+ models.DataIdentifier.name == scope_name_temp_table.name)))
2280
+ ).where(
2281
+ models.DataIdentifier.complete != false(),
2282
+ ).values({
2283
+ models.DataIdentifier.complete: False
2284
+ }).execution_options(
2285
+ synchronize_session=False
2286
+ )
2287
+
2288
+ session.execute(stmt)
2289
+
2290
+ # delete empty dids
2291
+ if did_condition:
2292
+ for chunk in chunks(did_condition, 10):
2293
+ stmt = select(
2294
+ models.DataIdentifier.scope,
2295
+ models.DataIdentifier.name,
2296
+ models.DataIdentifier.did_type,
2297
+ ).with_hint(
2298
+ models.DataIdentifier,
2299
+ 'INDEX(DIDS DIDS_PK)',
2300
+ 'oracle'
2301
+ ).where(
2302
+ or_(*chunk)
2303
+ )
2304
+ for scope, name, did_type in session.execute(stmt):
2305
+ if did_type == DIDType.DATASET:
2306
+ messages.append({'event_type': 'ERASE',
2307
+ 'payload': dumps({'scope': scope.external,
2308
+ 'name': name,
2309
+ 'account': 'root'})})
2310
+ dids_to_delete.add(ScopeName(scope=scope, name=name))
2311
+
2312
+ # Remove Archive Constituents
2313
+ constituent_associations_to_delete = set()
2314
+ if affected_archives:
2315
+ stmt = delete(scope_name_temp_table)
2316
+ session.execute(stmt)
2317
+ values = [sn._asdict() for sn in affected_archives]
2318
+ stmt = insert(scope_name_temp_table)
2319
+ session.execute(stmt, values)
2320
+
2321
+ stmt = select(
2322
+ models.ConstituentAssociation
2323
+ ).distinct(
2324
+ ).join_from(
2325
+ scope_name_temp_table,
2326
+ models.ConstituentAssociation,
2327
+ and_(scope_name_temp_table.scope == models.ConstituentAssociation.scope,
2328
+ scope_name_temp_table.name == models.ConstituentAssociation.name),
2329
+ ).outerjoin(
2330
+ models.DataIdentifier,
2331
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
2332
+ models.DataIdentifier.scope == models.ConstituentAssociation.scope,
2333
+ models.DataIdentifier.name == models.ConstituentAssociation.name)
2334
+ ).where(
2335
+ models.DataIdentifier.scope == null()
2336
+ ).outerjoin(
2337
+ models.RSEFileAssociation,
2338
+ and_(models.RSEFileAssociation.scope == models.ConstituentAssociation.scope,
2339
+ models.RSEFileAssociation.name == models.ConstituentAssociation.name)
2340
+ ).where(
2341
+ models.RSEFileAssociation.scope == null()
2342
+ )
2343
+
2344
+ for constituent in session.execute(stmt).scalars().all():
2345
+ constituent_associations_to_delete.add(Association(scope=constituent.scope, name=constituent.name,
2346
+ child_scope=constituent.child_scope, child_name=constituent.child_name))
2347
+ models.ConstituentAssociationHistory(
2348
+ child_scope=constituent.child_scope,
2349
+ child_name=constituent.child_name,
2350
+ scope=constituent.scope,
2351
+ name=constituent.name,
2352
+ bytes=constituent.bytes,
2353
+ adler32=constituent.adler32,
2354
+ md5=constituent.md5,
2355
+ guid=constituent.guid,
2356
+ length=constituent.length,
2357
+ updated_at=constituent.updated_at,
2358
+ created_at=constituent.created_at,
2359
+ ).save(session=session, flush=False)
2360
+
2361
+ if constituent_associations_to_delete:
2362
+ stmt = delete(association_temp_table)
2363
+ session.execute(stmt)
2364
+ values = [a._asdict() for a in constituent_associations_to_delete]
2365
+ stmt = insert(association_temp_table)
2366
+ session.execute(stmt, values)
2367
+ stmt = delete(
2368
+ models.ConstituentAssociation
2369
+ ).where(
2370
+ exists(select(1)
2371
+ .where(and_(association_temp_table.scope == models.ConstituentAssociation.scope,
2372
+ association_temp_table.name == models.ConstituentAssociation.name,
2373
+ association_temp_table.child_scope == models.ConstituentAssociation.child_scope,
2374
+ association_temp_table.child_name == models.ConstituentAssociation.child_name)))
2375
+ ).execution_options(
2376
+ synchronize_session=False
2377
+ )
2378
+ session.execute(stmt)
2379
+
2380
+ removed_constituents = {ScopeName(scope=c.child_scope, name=c.child_name) for c in constituent_associations_to_delete}
2381
+ for chunk in chunks(removed_constituents, 200):
2382
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
2383
+ scope_name_temp_table2=scope_name_temp_table2,
2384
+ association_temp_table=association_temp_table,
2385
+ rse_id=rse_id, files=[sn._asdict() for sn in chunk], session=session)
2386
+
2387
+ if dids_to_delete:
2388
+ stmt = delete(scope_name_temp_table)
2389
+ session.execute(stmt)
2390
+ values = [sn._asdict() for sn in dids_to_delete]
2391
+ stmt = insert(scope_name_temp_table)
2392
+ session.execute(stmt, values)
2393
+
2394
+ # Remove rules in Waiting for approval or Suspended
2395
+ stmt = delete(
2396
+ models.ReplicationRule,
2397
+ ).where(
2398
+ exists(select(1)
2399
+ .where(and_(models.ReplicationRule.scope == scope_name_temp_table.scope,
2400
+ models.ReplicationRule.name == scope_name_temp_table.name)))
2401
+ ).where(
2402
+ models.ReplicationRule.state.in_((RuleState.SUSPENDED, RuleState.WAITING_APPROVAL))
2403
+ ).execution_options(
2404
+ synchronize_session=False
2405
+ )
2406
+ session.execute(stmt)
2407
+
2408
+ # Remove DID Metadata
2409
+ must_delete_did_meta = True
2410
+ if session.bind.dialect.name == 'oracle':
2411
+ oracle_version = int(session.connection().connection.version.split('.')[0])
2412
+ if oracle_version < 12:
2413
+ must_delete_did_meta = False
2414
+ if must_delete_did_meta:
2415
+ stmt = delete(
2416
+ models.DidMeta,
2417
+ ).where(
2418
+ exists(select(1)
2419
+ .where(and_(models.DidMeta.scope == scope_name_temp_table.scope,
2420
+ models.DidMeta.name == scope_name_temp_table.name)))
2421
+ ).execution_options(
2422
+ synchronize_session=False
2423
+ )
2424
+ session.execute(stmt)
2425
+
2426
+ for chunk in chunks(messages, 100):
2427
+ add_messages(chunk, session=session)
2428
+
2429
+ # Delete dids
2430
+ dids_to_delete_filter = exists(select(1)
2431
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
2432
+ models.DataIdentifier.name == scope_name_temp_table.name)))
2433
+ archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
2434
+ if archive_dids:
2435
+ rucio.core.did.insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
2436
+ stmt = delete(
2437
+ models.DataIdentifier,
2438
+ ).where(
2439
+ dids_to_delete_filter,
2440
+ ).execution_options(
2441
+ synchronize_session=False
2442
+ )
2443
+ session.execute(stmt)
2444
+
2445
+ # Set is_archive = false on collections which don't have archive children anymore
2446
+ while clt_to_set_not_archive:
2447
+ to_update = clt_to_set_not_archive.pop(0)
2448
+ if not to_update:
2449
+ continue
2450
+ stmt = delete(scope_name_temp_table)
2451
+ session.execute(stmt)
2452
+ values = [sn._asdict() for sn in to_update]
2453
+ stmt = insert(scope_name_temp_table)
2454
+ session.execute(stmt, values)
2455
+ stmt = delete(scope_name_temp_table2)
2456
+ session.execute(stmt)
2457
+
2458
+ data_identifier_alias = aliased(models.DataIdentifier, name='did_alias')
2459
+ # Fetch rows to be updated
2460
+ stmt = select(
2461
+ models.DataIdentifier.scope,
2462
+ models.DataIdentifier.name,
2463
+ ).distinct(
2464
+ ).where(
2465
+ models.DataIdentifier.is_archive == true()
2466
+ ).join_from(
2467
+ scope_name_temp_table,
2468
+ models.DataIdentifier,
2469
+ and_(scope_name_temp_table.scope == models.DataIdentifier.scope,
2470
+ scope_name_temp_table.name == models.DataIdentifier.name)
2471
+ ).join(
2472
+ models.DataIdentifierAssociation,
2473
+ and_(models.DataIdentifier.scope == models.DataIdentifierAssociation.scope,
2474
+ models.DataIdentifier.name == models.DataIdentifierAssociation.name)
2475
+ ).outerjoin(
2476
+ data_identifier_alias,
2477
+ and_(data_identifier_alias.scope == models.DataIdentifierAssociation.child_scope,
2478
+ data_identifier_alias.name == models.DataIdentifierAssociation.child_name,
2479
+ data_identifier_alias.is_archive == true())
2480
+ ).where(
2481
+ data_identifier_alias.scope == null()
2482
+ )
2483
+ stmt = insert(
2484
+ scope_name_temp_table2
2485
+ ).from_select(
2486
+ ['scope', 'name'],
2487
+ stmt
2488
+ )
2489
+ session.execute(stmt)
2490
+ # update the fetched rows
2491
+ stmt = update(
2492
+ models.DataIdentifier,
2493
+ ).where(
2494
+ exists(select(1)
2495
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table2.scope,
2496
+ models.DataIdentifier.name == scope_name_temp_table2.name)))
2497
+ ).values({
2498
+ models.DataIdentifier.is_archive: False
2499
+ }).execution_options(
2500
+ synchronize_session=False
2501
+ )
2502
+ session.execute(stmt)
2503
+
2504
+
2505
+ @transactional_session
2506
+ def get_replica(
2507
+ rse_id: str,
2508
+ scope: InternalScope,
2509
+ name: str,
2510
+ *,
2511
+ session: "Session"
2512
+ ) -> dict[str, Any]:
2513
+ """
2514
+ Get File replica.
2515
+
2516
+ :param rse_id: The RSE Id.
2517
+ :param scope: the scope name.
2518
+ :param name: The data identifier name.
2519
+ :param session: The database session in use.
2520
+
2521
+ :returns: A dictionary with the list of replica attributes.
2522
+ """
2523
+ try:
2524
+ stmt = select(
2525
+ models.RSEFileAssociation
2526
+ ).where(
2527
+ and_(models.RSEFileAssociation.scope == scope,
2528
+ models.RSEFileAssociation.name == name,
2529
+ models.RSEFileAssociation.rse_id == rse_id)
2530
+ )
2531
+ return session.execute(stmt).scalar_one().to_dict()
2532
+ except NoResultFound:
2533
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
2534
+
2535
+
2536
+ @transactional_session
2537
+ def list_and_mark_unlocked_replicas(
2538
+ limit: int,
2539
+ bytes_: Optional[int] = None,
2540
+ rse_id: Optional[str] = None,
2541
+ delay_seconds: int = 600,
2542
+ only_delete_obsolete: bool = False,
2543
+ *,
2544
+ session: "Session"
2545
+ ) -> list[dict[str, Any]]:
2546
+ """
2547
+ List RSE File replicas with no locks.
2548
+
2549
+ :param limit: Number of replicas returned.
2550
+ :param bytes_: The amount of needed bytes.
2551
+ :param rse_id: The rse_id.
2552
+ :param delay_seconds: The delay to query replicas in BEING_DELETED state
2553
+ :param only_delete_obsolete If set to True, will only return the replicas with EPOCH tombstone
2554
+ :param session: The database session in use.
2555
+
2556
+ :returns: a list of dictionary replica.
2557
+ """
2558
+
2559
+ needed_space = bytes_
2560
+ total_bytes = 0
2561
+ rows = []
2562
+
2563
+ temp_table_cls = temp_table_mngr(session).create_scope_name_table()
2564
+
2565
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
2566
+
2567
+ stmt = select(
2568
+ models.RSEFileAssociation.scope,
2569
+ models.RSEFileAssociation.name,
2570
+ ).where(
2571
+ models.RSEFileAssociation.lock_cnt == 0,
2572
+ models.RSEFileAssociation.rse_id == rse_id,
2573
+ models.RSEFileAssociation.tombstone == OBSOLETE if only_delete_obsolete else models.RSEFileAssociation.tombstone < datetime.utcnow(),
2574
+ ).where(
2575
+ or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE, ReplicaState.BAD)),
2576
+ and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))
2577
+ ).outerjoin(
2578
+ models.Source,
2579
+ and_(models.RSEFileAssociation.scope == models.Source.scope,
2580
+ models.RSEFileAssociation.name == models.Source.name,
2581
+ models.RSEFileAssociation.rse_id == models.Source.rse_id)
2582
+ ).where(
2583
+ models.Source.scope.is_(None) # Only try to delete replicas if they are not used as sources in any transfers
2584
+ ).order_by(
2585
+ models.RSEFileAssociation.tombstone,
2586
+ models.RSEFileAssociation.updated_at
2587
+ ).with_for_update(
2588
+ skip_locked=True,
2589
+ # oracle: we must specify a column, not a table; however, it doesn't matter which column, the lock is put on the whole row
2590
+ # postgresql/mysql: sqlalchemy driver automatically converts it to a table name
2591
+ # sqlite: this is completely ignored
2592
+ of=models.RSEFileAssociation.scope,
2593
+ )
2594
+
2595
+ for chunk in chunks(session.execute(stmt).yield_per(2 * limit), math.ceil(1.25 * limit)):
2596
+ stmt = delete(temp_table_cls)
2597
+ session.execute(stmt)
2598
+ values = [{'scope': scope, 'name': name} for scope, name in chunk]
2599
+ stmt = insert(temp_table_cls)
2600
+ session.execute(stmt, values)
2601
+
2602
+ stmt = select(
2603
+ models.RSEFileAssociation.scope,
2604
+ models.RSEFileAssociation.name,
2605
+ models.RSEFileAssociation.path,
2606
+ models.RSEFileAssociation.bytes,
2607
+ models.RSEFileAssociation.tombstone,
2608
+ models.RSEFileAssociation.state,
2609
+ models.DataIdentifier.datatype,
2610
+ ).join_from(
2611
+ temp_table_cls,
2612
+ models.RSEFileAssociation,
2613
+ and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2614
+ models.RSEFileAssociation.name == temp_table_cls.name,
2615
+ models.RSEFileAssociation.rse_id == rse_id)
2616
+ ).with_hint(
2617
+ replicas_alias,
2618
+ 'INDEX(%(name)s REPLICAS_PK)',
2619
+ 'oracle'
2620
+ ).outerjoin(
2621
+ replicas_alias,
2622
+ and_(models.RSEFileAssociation.scope == replicas_alias.scope,
2623
+ models.RSEFileAssociation.name == replicas_alias.name,
2624
+ models.RSEFileAssociation.rse_id != replicas_alias.rse_id,
2625
+ replicas_alias.state == ReplicaState.AVAILABLE)
2626
+ ).with_hint(
2627
+ models.Request,
2628
+ 'INDEX(requests REQUESTS_SCOPE_NAME_RSE_IDX)',
2629
+ 'oracle'
2630
+ ).outerjoin(
2631
+ models.Request,
2632
+ and_(models.RSEFileAssociation.scope == models.Request.scope,
2633
+ models.RSEFileAssociation.name == models.Request.name)
2634
+ ).join(
2635
+ models.DataIdentifier,
2636
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
2637
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
2638
+ ).group_by(
2639
+ models.RSEFileAssociation.scope,
2640
+ models.RSEFileAssociation.name,
2641
+ models.RSEFileAssociation.path,
2642
+ models.RSEFileAssociation.bytes,
2643
+ models.RSEFileAssociation.tombstone,
2644
+ models.RSEFileAssociation.state,
2645
+ models.RSEFileAssociation.updated_at,
2646
+ models.DataIdentifier.datatype
2647
+ ).having(
2648
+ case((func.count(replicas_alias.scope) > 0, True), # Can delete this replica if it's not the last replica
2649
+ (func.count(models.Request.scope) == 0, True), # If it's the last replica, only can delete if there are no requests using it
2650
+ else_=False).label("can_delete"),
2651
+ ).order_by(
2652
+ models.RSEFileAssociation.tombstone,
2653
+ models.RSEFileAssociation.updated_at
2654
+ ).limit(
2655
+ limit - len(rows)
2656
+ )
2657
+
2658
+ for scope, name, path, bytes_, tombstone, state, datatype in session.execute(stmt):
2659
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2660
+ break
2661
+ if state != ReplicaState.UNAVAILABLE:
2662
+ total_bytes += bytes_ # type: ignore
2663
+
2664
+ rows.append({'scope': scope, 'name': name, 'path': path,
2665
+ 'bytes': bytes_, 'tombstone': tombstone,
2666
+ 'state': state, 'datatype': datatype})
2667
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2668
+ break
2669
+
2670
+ if rows:
2671
+ stmt = delete(temp_table_cls)
2672
+ session.execute(stmt)
2673
+ values = [{'scope': row['scope'], 'name': row['name']} for row in rows]
2674
+ stmt = insert(temp_table_cls)
2675
+ session.execute(stmt, values)
2676
+ stmt = update(
2677
+ models.RSEFileAssociation
2678
+ ).where(
2679
+ exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')
2680
+ .where(and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2681
+ models.RSEFileAssociation.name == temp_table_cls.name,
2682
+ models.RSEFileAssociation.rse_id == rse_id)))
2683
+ ).values({
2684
+ models.RSEFileAssociation.updated_at: datetime.utcnow(),
2685
+ models.RSEFileAssociation.state: ReplicaState.BEING_DELETED,
2686
+ models.RSEFileAssociation.tombstone: OBSOLETE
2687
+ }).execution_options(
2688
+ synchronize_session=False
2689
+ )
2690
+
2691
+ session.execute(stmt)
2692
+
2693
+ return rows
2694
+
2695
+
2696
+ @transactional_session
2697
+ def update_replicas_states(
2698
+ replicas: "Iterable[dict[str, Any]]",
2699
+ nowait: bool = False,
2700
+ *,
2701
+ session: "Session"
2702
+ ) -> bool:
2703
+ """
2704
+ Update File replica information and state.
2705
+
2706
+ :param replicas: The list of replicas.
2707
+ :param nowait: Nowait parameter for the for_update queries.
2708
+ :param session: The database session in use.
2709
+ """
2710
+
2711
+ for replica in replicas:
2712
+ stmt = select(
2713
+ models.RSEFileAssociation
2714
+ ).where(
2715
+ models.RSEFileAssociation.rse_id == replica['rse_id'],
2716
+ models.RSEFileAssociation.scope == replica['scope'],
2717
+ models.RSEFileAssociation.name == replica['name']
2718
+ ).with_for_update(
2719
+ nowait=nowait
2720
+ )
2721
+
2722
+ if session.execute(stmt).scalar_one_or_none() is None:
2723
+ # remember scope, name and rse
2724
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (replica['scope'], replica['name'], get_rse_name(replica['rse_id'], session=session)))
2725
+
2726
+ if isinstance(replica['state'], str):
2727
+ replica['state'] = ReplicaState(replica['state'])
2728
+
2729
+ values = {'state': replica['state']}
2730
+ if replica['state'] == ReplicaState.BEING_DELETED:
2731
+ # Exclude replicas use as sources
2732
+ stmt = stmt.where(
2733
+ and_(models.RSEFileAssociation.lock_cnt == 0,
2734
+ not_(exists(select(1)
2735
+ .where(and_(models.RSEFileAssociation.scope == models.Source.scope,
2736
+ models.RSEFileAssociation.name == models.Source.name,
2737
+ models.RSEFileAssociation.rse_id == models.Source.rse_id)))))
2738
+ )
2739
+ values['tombstone'] = OBSOLETE
2740
+ elif replica['state'] == ReplicaState.AVAILABLE:
2741
+ rucio.core.lock.successful_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], nowait=nowait, session=session)
2742
+ stmt_bad_replicas = select(
2743
+ func.count()
2744
+ ).select_from(
2745
+ models.BadReplica
2746
+ ).where(
2747
+ and_(models.BadReplica.state == BadFilesStatus.BAD,
2748
+ models.BadReplica.rse_id == replica['rse_id'],
2749
+ models.BadReplica.scope == replica['scope'],
2750
+ models.BadReplica.name == replica['name'])
2751
+ )
2752
+
2753
+ if session.execute(stmt_bad_replicas).scalar():
2754
+ update_stmt = update(
2755
+ models.BadReplica
2756
+ ).where(
2757
+ and_(models.BadReplica.state == BadFilesStatus.BAD,
2758
+ models.BadReplica.rse_id == replica['rse_id'],
2759
+ models.BadReplica.scope == replica['scope'],
2760
+ models.BadReplica.name == replica['name'])
2761
+ ).values({
2762
+ models.BadReplica.state: BadFilesStatus.RECOVERED,
2763
+ models.BadReplica.updated_at: datetime.utcnow()
2764
+ }).execution_options(
2765
+ synchronize_session=False
2766
+ )
2767
+ session.execute(update_stmt)
2768
+ elif replica['state'] == ReplicaState.UNAVAILABLE:
2769
+ rucio.core.lock.failed_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'],
2770
+ error_message=replica.get('error_message', None),
2771
+ broken_rule_id=replica.get('broken_rule_id', None),
2772
+ broken_message=replica.get('broken_message', None),
2773
+ nowait=nowait, session=session)
2774
+ elif replica['state'] == ReplicaState.TEMPORARY_UNAVAILABLE:
2775
+ stmt = stmt.where(
2776
+ models.RSEFileAssociation.state.in_([ReplicaState.AVAILABLE,
2777
+ ReplicaState.TEMPORARY_UNAVAILABLE])
2778
+ )
2779
+
2780
+ if 'path' in replica and replica['path']:
2781
+ values['path'] = replica['path']
2782
+
2783
+ update_stmt = update(
2784
+ models.RSEFileAssociation
2785
+ ).where(
2786
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2787
+ models.RSEFileAssociation.scope == replica['scope'],
2788
+ models.RSEFileAssociation.name == replica['name'])
2789
+ ).values(
2790
+ values
2791
+ ).execution_options(
2792
+ synchronize_session=False
2793
+ )
2794
+
2795
+ if not session.execute(update_stmt).rowcount:
2796
+ if 'rse' not in replica:
2797
+ replica['rse'] = get_rse_name(rse_id=replica['rse_id'], session=session)
2798
+ raise exception.UnsupportedOperation('State %(state)s for replica %(scope)s:%(name)s on %(rse)s cannot be updated' % replica)
2799
+ return True
2800
+
2801
+
2802
+ @transactional_session
2803
+ def touch_replica(
2804
+ replica: dict[str, Any],
2805
+ *,
2806
+ session: "Session"
2807
+ ) -> bool:
2808
+ """
2809
+ Update the accessed_at timestamp of the given file replica/did but don't wait if row is locked.
2810
+
2811
+ :param replica: a dictionary with the information of the affected replica.
2812
+ :param session: The database session in use.
2813
+
2814
+ :returns: True, if successful, False otherwise.
2815
+ """
2816
+ try:
2817
+ accessed_at, none_value = replica.get('accessed_at') or datetime.utcnow(), None
2818
+
2819
+ stmt = select(
2820
+ models.RSEFileAssociation
2821
+ ).with_hint(
2822
+ models.RSEFileAssociation,
2823
+ 'INDEX(REPLICAS REPLICAS_PK)',
2824
+ 'oracle'
2825
+ ).where(
2826
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2827
+ models.RSEFileAssociation.scope == replica['scope'],
2828
+ models.RSEFileAssociation.name == replica['name'])
2829
+ ).with_for_update(
2830
+ nowait=True
2831
+ )
2832
+ session.execute(stmt).one()
2833
+
2834
+ stmt = update(
2835
+ models.RSEFileAssociation
2836
+ ).where(
2837
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2838
+ models.RSEFileAssociation.scope == replica['scope'],
2839
+ models.RSEFileAssociation.name == replica['name'])
2840
+ ).prefix_with(
2841
+ '/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
2842
+ ).values({
2843
+ models.RSEFileAssociation.accessed_at: accessed_at,
2844
+ models.RSEFileAssociation.tombstone: case(
2845
+ (models.RSEFileAssociation.tombstone.not_in([OBSOLETE, none_value]),
2846
+ accessed_at),
2847
+ else_=models.RSEFileAssociation.tombstone)
2848
+ }).execution_options(
2849
+ synchronize_session=False
2850
+ )
2851
+ session.execute(stmt)
2852
+
2853
+ stmt = select(
2854
+ models.DataIdentifier
2855
+ ).with_hint(
2856
+ models.DataIdentifier,
2857
+ 'INDEX(DIDS DIDS_PK)',
2858
+ 'oracle'
2859
+ ).where(
2860
+ and_(models.DataIdentifier.scope == replica['scope'],
2861
+ models.DataIdentifier.name == replica['name'],
2862
+ models.DataIdentifier.did_type == DIDType.FILE)
2863
+ ).with_for_update(
2864
+ nowait=True
2865
+ )
2866
+ session.execute(stmt).one()
2867
+
2868
+ stmt = update(
2869
+ models.DataIdentifier
2870
+ ).where(
2871
+ and_(models.DataIdentifier.scope == replica['scope'],
2872
+ models.DataIdentifier.name == replica['name'],
2873
+ models.DataIdentifier.did_type == DIDType.FILE)
2874
+ ).prefix_with(
2875
+ '/*+ INDEX(DIDS DIDS_PK) */', dialect='oracle'
2876
+ ).values({
2877
+ models.DataIdentifier.accessed_at: accessed_at
2878
+ }).execution_options(
2879
+ synchronize_session=False
2880
+ )
2881
+ session.execute(stmt)
2882
+
2883
+ except DatabaseError:
2884
+ return False
2885
+ except NoResultFound:
2886
+ return True
2887
+
2888
+ return True
2889
+
2890
+
2891
+ @transactional_session
2892
+ def update_replica_state(
2893
+ rse_id: str,
2894
+ scope: InternalScope,
2895
+ name: str,
2896
+ state: BadFilesStatus,
2897
+ *,
2898
+ session: "Session"
2899
+ ) -> bool:
2900
+ """
2901
+ Update File replica information and state.
2902
+
2903
+ :param rse_id: the rse id.
2904
+ :param scope: the tag name.
2905
+ :param name: The data identifier name.
2906
+ :param state: The state.
2907
+ :param session: The database session in use.
2908
+ """
2909
+ return update_replicas_states(replicas=[{'scope': scope, 'name': name, 'state': state, 'rse_id': rse_id}], session=session)
2910
+
2911
+
2912
+ @transactional_session
2913
+ def get_and_lock_file_replicas(
2914
+ scope: InternalScope,
2915
+ name: str,
2916
+ nowait: bool = False,
2917
+ restrict_rses: Optional["Sequence[str]"] = None,
2918
+ *,
2919
+ session: "Session"
2920
+ ) -> "Sequence[models.RSEFileAssociation]":
2921
+ """
2922
+ Get file replicas for a specific scope:name.
2923
+
2924
+ :param scope: The scope of the did.
2925
+ :param name: The name of the did.
2926
+ :param nowait: Nowait parameter for the FOR UPDATE statement
2927
+ :param restrict_rses: Possible RSE_ids to filter on.
2928
+ :param session: The db session in use.
2929
+ :returns: List of SQLAlchemy Replica Objects
2930
+ """
2931
+
2932
+ stmt = select(
2933
+ models.RSEFileAssociation
2934
+ ).where(
2935
+ and_(models.RSEFileAssociation.scope == scope,
2936
+ models.RSEFileAssociation.name == name,
2937
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
2938
+ ).with_for_update(
2939
+ nowait=nowait
2940
+ )
2941
+ if restrict_rses is not None and len(restrict_rses) < 10:
2942
+ rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
2943
+ if rse_clause:
2944
+ stmt = stmt.where(or_(*rse_clause))
2945
+
2946
+ return session.execute(stmt).scalars().all()
2947
+
2948
+
2949
+ @transactional_session
2950
+ def get_source_replicas(
2951
+ scope: InternalScope,
2952
+ name: str,
2953
+ source_rses: Optional["Sequence[str]"] = None,
2954
+ *,
2955
+ session: "Session"
2956
+ ) -> "Sequence[str]":
2957
+ """
2958
+ Get source replicas for a specific scope:name.
2959
+
2960
+ :param scope: The scope of the did.
2961
+ :param name: The name of the did.
2962
+ :param soruce_rses: Possible RSE_ids to filter on.
2963
+ :param session: The db session in use.
2964
+ :returns: List of SQLAlchemy Replica Objects
2965
+ """
2966
+
2967
+ stmt = select(
2968
+ models.RSEFileAssociation.rse_id
2969
+ ).where(
2970
+ and_(models.RSEFileAssociation.scope == scope,
2971
+ models.RSEFileAssociation.name == name,
2972
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
2973
+ )
2974
+ if source_rses:
2975
+ if len(source_rses) < 10:
2976
+ rse_clause = []
2977
+ for rse_id in source_rses:
2978
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2979
+ if rse_clause:
2980
+ stmt = stmt.where(or_(*rse_clause))
2981
+ return session.execute(stmt).scalars().all()
2982
+
2983
+
2984
+ @transactional_session
2985
+ def get_and_lock_file_replicas_for_dataset(
2986
+ scope: InternalScope,
2987
+ name: str,
2988
+ nowait: bool = False,
2989
+ restrict_rses: Optional["Sequence[str]"] = None,
2990
+ total_threads: Optional[int] = None,
2991
+ thread_id: Optional[int] = None,
2992
+ *,
2993
+ session: "Session"
2994
+ ) -> tuple[list[dict[str, Any]], dict[tuple[InternalScope, str], Any]]:
2995
+ """
2996
+ Get file replicas for all files of a dataset.
2997
+
2998
+ :param scope: The scope of the dataset.
2999
+ :param name: The name of the dataset.
3000
+ :param nowait: Nowait parameter for the FOR UPDATE statement
3001
+ :param restrict_rses: Possible RSE_ids to filter on.
3002
+ :param total_threads: Total threads
3003
+ :param thread_id: This thread
3004
+ :param session: The db session in use.
3005
+ :returns: (files in dataset, replicas in dataset)
3006
+ """
3007
+ files, replicas = {}, {}
3008
+
3009
+ base_stmt = select(
3010
+ models.DataIdentifierAssociation.child_scope,
3011
+ models.DataIdentifierAssociation.child_name,
3012
+ models.DataIdentifierAssociation.bytes,
3013
+ models.DataIdentifierAssociation.md5,
3014
+ models.DataIdentifierAssociation.adler32,
3015
+ ).where(
3016
+ and_(models.DataIdentifierAssociation.scope == scope,
3017
+ models.DataIdentifierAssociation.name == name)
3018
+ )
3019
+
3020
+ stmt = base_stmt.add_columns(
3021
+ models.RSEFileAssociation
3022
+ ).where(
3023
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3024
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3025
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
3026
+ )
3027
+
3028
+ rse_clause = [true()]
3029
+ if restrict_rses is not None and len(restrict_rses) < 10:
3030
+ rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
3031
+
3032
+ if session.bind.dialect.name == 'postgresql': # type: ignore
3033
+ if total_threads and total_threads > 1:
3034
+ base_stmt = filter_thread_work(session=session,
3035
+ query=base_stmt,
3036
+ total_threads=total_threads,
3037
+ thread_id=thread_id,
3038
+ hash_variable='child_name')
3039
+
3040
+ for child_scope, child_name, bytes_, md5, adler32 in session.execute(base_stmt).yield_per(1000):
3041
+ files[(child_scope, child_name)] = {'scope': child_scope,
3042
+ 'name': child_name,
3043
+ 'bytes': bytes_,
3044
+ 'md5': md5,
3045
+ 'adler32': adler32}
3046
+ replicas[(child_scope, child_name)] = []
3047
+
3048
+ stmt = stmt.where(or_(*rse_clause))
3049
+ else:
3050
+ stmt = base_stmt.add_columns(
3051
+ models.RSEFileAssociation
3052
+ ).with_hint(
3053
+ models.DataIdentifierAssociation,
3054
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3055
+ 'oracle'
3056
+ ).outerjoin(
3057
+ models.RSEFileAssociation,
3058
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3059
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3060
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
3061
+ or_(*rse_clause))
3062
+ )
3063
+
3064
+ if total_threads and total_threads > 1:
3065
+ stmt = filter_thread_work(session=session,
3066
+ query=stmt,
3067
+ total_threads=total_threads,
3068
+ thread_id=thread_id,
3069
+ hash_variable='child_name')
3070
+
3071
+ stmt = stmt.with_for_update(
3072
+ nowait=nowait,
3073
+ of=models.RSEFileAssociation.lock_cnt
3074
+ )
3075
+
3076
+ for child_scope, child_name, bytes_, md5, adler32, replica in session.execute(stmt).yield_per(1000):
3077
+ if (child_scope, child_name) not in files:
3078
+ files[(child_scope, child_name)] = {'scope': child_scope,
3079
+ 'name': child_name,
3080
+ 'bytes': bytes_,
3081
+ 'md5': md5,
3082
+ 'adler32': adler32}
3083
+
3084
+ if (child_scope, child_name) in replicas:
3085
+ if replica is not None:
3086
+ replicas[(child_scope, child_name)].append(replica)
3087
+ else:
3088
+ replicas[(child_scope, child_name)] = []
3089
+ if replica is not None:
3090
+ replicas[(child_scope, child_name)].append(replica)
3091
+
3092
+ return (list(files.values()), replicas)
3093
+
3094
+
3095
+ @transactional_session
3096
+ def get_source_replicas_for_dataset(
3097
+ scope: InternalScope,
3098
+ name: str,
3099
+ source_rses: Optional["Sequence[str]"] = None,
3100
+ total_threads: Optional[int] = None,
3101
+ thread_id: Optional[int] = None,
3102
+ *,
3103
+ session: "Session"
3104
+ ) -> dict[tuple[InternalScope, str], Any]:
3105
+ """
3106
+ Get file replicas for all files of a dataset.
3107
+
3108
+ :param scope: The scope of the dataset.
3109
+ :param name: The name of the dataset.
3110
+ :param source_rses: Possible source RSE_ids to filter on.
3111
+ :param total_threads: Total threads
3112
+ :param thread_id: This thread
3113
+ :param session: The db session in use.
3114
+ :returns: (files in dataset, replicas in dataset)
3115
+ """
3116
+ stmt = select(
3117
+ models.DataIdentifierAssociation.child_scope,
3118
+ models.DataIdentifierAssociation.child_name,
3119
+ models.RSEFileAssociation.rse_id
3120
+ ).with_hint(
3121
+ models.DataIdentifierAssociation,
3122
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3123
+ 'oracle'
3124
+ ).outerjoin(
3125
+ models.RSEFileAssociation,
3126
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3127
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3128
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
3129
+ ).where(
3130
+ and_(models.DataIdentifierAssociation.scope == scope,
3131
+ models.DataIdentifierAssociation.name == name)
3132
+ )
3133
+
3134
+ if source_rses:
3135
+ if len(source_rses) < 10:
3136
+ rse_clause = []
3137
+ for rse_id in source_rses:
3138
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
3139
+ if rse_clause:
3140
+ stmt = select(
3141
+ models.DataIdentifierAssociation.child_scope,
3142
+ models.DataIdentifierAssociation.child_name,
3143
+ models.RSEFileAssociation.rse_id
3144
+ ).with_hint(
3145
+ models.DataIdentifierAssociation,
3146
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3147
+ 'oracle'
3148
+ ).outerjoin(
3149
+ models.RSEFileAssociation,
3150
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3151
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3152
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3153
+ or_(*rse_clause))
3154
+ ).where(
3155
+ and_(models.DataIdentifierAssociation.scope == scope,
3156
+ models.DataIdentifierAssociation.name == name)
3157
+ )
3158
+ if total_threads and total_threads > 1:
3159
+ stmt = filter_thread_work(session=session,
3160
+ query=stmt,
3161
+ total_threads=total_threads,
3162
+ thread_id=thread_id,
3163
+ hash_variable='child_name')
3164
+
3165
+ replicas = {}
3166
+
3167
+ for child_scope, child_name, rse_id in session.execute(stmt):
3168
+
3169
+ if (child_scope, child_name) in replicas:
3170
+ if rse_id:
3171
+ replicas[(child_scope, child_name)].append(rse_id)
3172
+ else:
3173
+ replicas[(child_scope, child_name)] = []
3174
+ if rse_id:
3175
+ replicas[(child_scope, child_name)].append(rse_id)
3176
+
3177
+ return replicas
3178
+
3179
+
3180
+ @read_session
3181
+ def get_replica_atime(
3182
+ replica: dict[str, Any],
3183
+ *,
3184
+ session: "Session"
3185
+ ) -> Optional[datetime]:
3186
+ """
3187
+ Get the accessed_at timestamp for a replica. Just for testing.
3188
+ :param replicas: List of dictionaries {scope, name, rse_id, path}
3189
+ :param session: Database session to use.
3190
+
3191
+ :returns: A datetime timestamp with the last access time.
3192
+ """
3193
+ stmt = select(
3194
+ models.RSEFileAssociation.accessed_at
3195
+ ).with_hint(
3196
+ models.RSEFileAssociation,
3197
+ 'INDEX(REPLICAS REPLICAS_PK)',
3198
+ 'oracle'
3199
+ ).where(
3200
+ and_(models.RSEFileAssociation.scope == replica['scope'],
3201
+ models.RSEFileAssociation.name == replica['name'],
3202
+ models.RSEFileAssociation.rse_id == replica['rse_id'])
3203
+ )
3204
+ return session.execute(stmt).scalar_one()
3205
+
3206
+
3207
+ @transactional_session
3208
+ def touch_collection_replicas(
3209
+ collection_replicas: "Iterable[dict[str, Any]]",
3210
+ *,
3211
+ session: "Session"
3212
+ ) -> bool:
3213
+ """
3214
+ Update the accessed_at timestamp of the given collection replicas.
3215
+
3216
+ :param collection_replicas: the list of collection replicas.
3217
+ :param session: The database session in use.
3218
+
3219
+ :returns: True, if successful, False otherwise.
3220
+ """
3221
+
3222
+ now = datetime.utcnow()
3223
+ for collection_replica in collection_replicas:
3224
+ try:
3225
+ stmt = update(
3226
+ models.CollectionReplica
3227
+ ).where(
3228
+ and_(models.CollectionReplica.scope == collection_replica['scope'],
3229
+ models.CollectionReplica.name == collection_replica['name'],
3230
+ models.CollectionReplica.rse_id == collection_replica['rse_id'])
3231
+ ).values({
3232
+ models.CollectionReplica.accessed_at: collection_replica.get('accessed_at') or now
3233
+ }).execution_options(
3234
+ synchronize_session=False
3235
+ )
3236
+ session.execute(stmt)
3237
+ except DatabaseError:
3238
+ return False
3239
+
3240
+ return True
3241
+
3242
+
3243
+ @stream_session
3244
+ def list_dataset_replicas(
3245
+ scope: "InternalScope",
3246
+ name: str,
3247
+ deep: bool = False,
3248
+ *,
3249
+ session: "Session"
3250
+ ) -> "Iterator[dict[str, Any]]":
3251
+ """
3252
+ :param scope: The scope of the dataset.
3253
+ :param name: The name of the dataset.
3254
+ :param deep: Lookup at the file level.
3255
+ :param session: Database session to use.
3256
+
3257
+ :returns: A list of dictionaries containing the dataset replicas
3258
+ with associated metrics and timestamps
3259
+ """
3260
+
3261
+ if not deep:
3262
+ stmt = select(
3263
+ models.CollectionReplica.scope,
3264
+ models.CollectionReplica.name,
3265
+ models.RSE.rse,
3266
+ models.CollectionReplica.rse_id,
3267
+ models.CollectionReplica.bytes,
3268
+ models.CollectionReplica.length,
3269
+ models.CollectionReplica.available_bytes,
3270
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3271
+ models.CollectionReplica.state,
3272
+ models.CollectionReplica.created_at,
3273
+ models.CollectionReplica.updated_at,
3274
+ models.CollectionReplica.accessed_at
3275
+ ).where(
3276
+ and_(models.CollectionReplica.scope == scope,
3277
+ models.CollectionReplica.name == name,
3278
+ models.CollectionReplica.did_type == DIDType.DATASET,
3279
+ models.CollectionReplica.rse_id == models.RSE.id,
3280
+ models.RSE.deleted == false())
3281
+ )
3282
+
3283
+ for row in session.execute(stmt).all():
3284
+ yield row._asdict()
3285
+
3286
+ else:
3287
+ # Find maximum values
3288
+ stmt = select(
3289
+ func.sum(models.DataIdentifierAssociation.bytes).label("bytes"),
3290
+ func.count().label("length")
3291
+ ).select_from(
3292
+ models.DataIdentifierAssociation
3293
+ ).with_hint(
3294
+ models.DataIdentifierAssociation,
3295
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3296
+ 'oracle'
3297
+ ).where(
3298
+ and_(models.DataIdentifierAssociation.scope == scope,
3299
+ models.DataIdentifierAssociation.name == name)
3300
+ )
3301
+
3302
+ bytes_, length = session.execute(stmt).one()
3303
+ bytes_ = bytes_ or 0
3304
+
3305
+ # Find archives that contain files of the requested dataset
3306
+ sub_query_stmt = select(
3307
+ models.DataIdentifierAssociation.scope.label('dataset_scope'),
3308
+ models.DataIdentifierAssociation.name.label('dataset_name'),
3309
+ models.DataIdentifierAssociation.bytes.label('file_bytes'),
3310
+ models.ConstituentAssociation.child_scope.label('file_scope'),
3311
+ models.ConstituentAssociation.child_name.label('file_name'),
3312
+ models.RSEFileAssociation.scope.label('replica_scope'),
3313
+ models.RSEFileAssociation.name.label('replica_name'),
3314
+ models.RSE.rse,
3315
+ models.RSE.id.label('rse_id'),
3316
+ models.RSEFileAssociation.created_at,
3317
+ models.RSEFileAssociation.accessed_at,
3318
+ models.RSEFileAssociation.updated_at
3319
+ ).where(
3320
+ and_(models.DataIdentifierAssociation.scope == scope,
3321
+ models.DataIdentifierAssociation.name == name,
3322
+ models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
3323
+ models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name,
3324
+ models.ConstituentAssociation.scope == models.RSEFileAssociation.scope,
3325
+ models.ConstituentAssociation.name == models.RSEFileAssociation.name,
3326
+ models.RSEFileAssociation.rse_id == models.RSE.id,
3327
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3328
+ models.RSE.deleted == false())
3329
+ ).subquery()
3330
+
3331
+ # Count the metrics
3332
+ group_query_stmt = select(
3333
+ sub_query_stmt.c.dataset_scope,
3334
+ sub_query_stmt.c.dataset_name,
3335
+ sub_query_stmt.c.file_scope,
3336
+ sub_query_stmt.c.file_name,
3337
+ sub_query_stmt.c.rse_id,
3338
+ sub_query_stmt.c.rse,
3339
+ func.sum(sub_query_stmt.c.file_bytes).label('file_bytes'),
3340
+ func.min(sub_query_stmt.c.created_at).label('created_at'),
3341
+ func.max(sub_query_stmt.c.updated_at).label('updated_at'),
3342
+ func.max(sub_query_stmt.c.accessed_at).label('accessed_at')
3343
+ ).group_by(
3344
+ sub_query_stmt.c.dataset_scope,
3345
+ sub_query_stmt.c.dataset_name,
3346
+ sub_query_stmt.c.file_scope,
3347
+ sub_query_stmt.c.file_name,
3348
+ sub_query_stmt.c.rse_id,
3349
+ sub_query_stmt.c.rse
3350
+ ).subquery()
3351
+
3352
+ # Bring it in the same column state as the non-archive query
3353
+ full_query_stmt = select(
3354
+ group_query_stmt.c.dataset_scope.label('scope'),
3355
+ group_query_stmt.c.dataset_name.label('name'),
3356
+ group_query_stmt.c.rse_id,
3357
+ group_query_stmt.c.rse,
3358
+ func.sum(group_query_stmt.c.file_bytes).label('available_bytes'),
3359
+ func.count().label('available_length'),
3360
+ func.min(group_query_stmt.c.created_at).label('created_at'),
3361
+ func.max(group_query_stmt.c.updated_at).label('updated_at'),
3362
+ func.max(group_query_stmt.c.accessed_at).label('accessed_at')
3363
+ ).group_by(
3364
+ group_query_stmt.c.dataset_scope,
3365
+ group_query_stmt.c.dataset_name,
3366
+ group_query_stmt.c.rse_id,
3367
+ group_query_stmt.c.rse
3368
+ )
3369
+
3370
+ # Find the non-archive dataset replicas
3371
+ sub_query_stmt = select(
3372
+ models.DataIdentifierAssociation.scope,
3373
+ models.DataIdentifierAssociation.name,
3374
+ models.RSEFileAssociation.rse_id,
3375
+ func.sum(models.RSEFileAssociation.bytes).label("available_bytes"),
3376
+ func.count().label("available_length"),
3377
+ func.min(models.RSEFileAssociation.created_at).label("created_at"),
3378
+ func.max(models.RSEFileAssociation.updated_at).label("updated_at"),
3379
+ func.max(models.RSEFileAssociation.accessed_at).label("accessed_at")
3380
+ ).with_hint(
3381
+ models.DataIdentifierAssociation,
3382
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) INDEX_RS_ASC(REPLICAS REPLICAS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3383
+ 'oracle'
3384
+ ).where(
3385
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3386
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3387
+ models.DataIdentifierAssociation.scope == scope,
3388
+ models.DataIdentifierAssociation.name == name,
3389
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
3390
+ ).group_by(
3391
+ models.DataIdentifierAssociation.scope,
3392
+ models.DataIdentifierAssociation.name,
3393
+ models.RSEFileAssociation.rse_id
3394
+ ).subquery()
3395
+
3396
+ stmt = select(
3397
+ sub_query_stmt.c.scope,
3398
+ sub_query_stmt.c.name,
3399
+ sub_query_stmt.c.rse_id,
3400
+ models.RSE.rse,
3401
+ sub_query_stmt.c.available_bytes,
3402
+ sub_query_stmt.c.available_length,
3403
+ sub_query_stmt.c.created_at,
3404
+ sub_query_stmt.c.updated_at,
3405
+ sub_query_stmt.c.accessed_at
3406
+ ).where(
3407
+ and_(sub_query_stmt.c.rse_id == models.RSE.id,
3408
+ models.RSE.deleted == false())
3409
+ )
3410
+
3411
+ # Join everything together
3412
+ final_stmt = stmt.union_all(full_query_stmt)
3413
+ for row in session.execute(final_stmt).all():
3414
+ replica = row._asdict()
3415
+ replica['length'], replica['bytes'] = length, bytes_
3416
+ if replica['length'] == row.available_length:
3417
+ replica['state'] = ReplicaState.AVAILABLE
3418
+ else:
3419
+ replica['state'] = ReplicaState.UNAVAILABLE
3420
+ yield replica
3421
+
3422
+
3423
+ @stream_session
3424
+ def list_dataset_replicas_bulk(
3425
+ names_by_intscope: dict[str, Any],
3426
+ *,
3427
+ session: "Session"
3428
+ ) -> "Iterator[dict[str, Any]]":
3429
+ """
3430
+ :param names_by_intscope: The dictionary of internal scopes pointing at the list of names.
3431
+ :param session: Database session to use.
3432
+
3433
+ :returns: A list of dictionaries containing the dataset replicas
3434
+ with associated metrics and timestamps
3435
+ """
3436
+
3437
+ condition = []
3438
+ for scope in names_by_intscope:
3439
+ condition.append(and_(models.CollectionReplica.scope == scope,
3440
+ models.CollectionReplica.name.in_(names_by_intscope[scope])))
3441
+
3442
+ try:
3443
+ # chunk size refers to the number of different scopes, see above
3444
+ for chunk in chunks(condition, 10):
3445
+ stmt = select(
3446
+ models.CollectionReplica.scope,
3447
+ models.CollectionReplica.name,
3448
+ models.RSE.rse,
3449
+ models.CollectionReplica.rse_id,
3450
+ models.CollectionReplica.bytes,
3451
+ models.CollectionReplica.length,
3452
+ models.CollectionReplica.available_bytes,
3453
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3454
+ models.CollectionReplica.state,
3455
+ models.CollectionReplica.created_at,
3456
+ models.CollectionReplica.updated_at,
3457
+ models.CollectionReplica.accessed_at
3458
+ ).where(
3459
+ and_(models.CollectionReplica.did_type == DIDType.DATASET,
3460
+ models.CollectionReplica.rse_id == models.RSE.id,
3461
+ models.RSE.deleted == false(),
3462
+ or_(*chunk))
3463
+ )
3464
+
3465
+ for row in session.execute(stmt).all():
3466
+ yield row._asdict()
3467
+ except NoResultFound:
3468
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
3469
+
3470
+
3471
+ @stream_session
3472
+ def list_dataset_replicas_vp(
3473
+ scope: InternalScope,
3474
+ name: str,
3475
+ deep: bool = False,
3476
+ *,
3477
+ session: "Session",
3478
+ logger: "LoggerFunction" = logging.log
3479
+ ) -> Union[list[str], "Iterator[dict[str, Any]]"]:
3480
+ """
3481
+ List dataset replicas for a DID (scope:name) using the
3482
+ Virtual Placement service.
3483
+
3484
+ NOTICE: This is an RnD function and might change or go away at any time.
3485
+
3486
+ :param scope: The scope of the dataset.
3487
+ :param name: The name of the dataset.
3488
+ :param deep: Lookup at the file level.
3489
+ :param session: Database session to use.
3490
+
3491
+ :returns: If VP exists and there is at least one non-TAPE replica, returns a list of dicts of sites
3492
+ """
3493
+ vp_endpoint = get_vp_endpoint()
3494
+ vp_replies = ['other']
3495
+ nr_replies = 5 # force limit reply size
3496
+
3497
+ if not vp_endpoint:
3498
+ return vp_replies
3499
+
3500
+ try:
3501
+ vp_replies = requests.get('{}/ds/{}/{}:{}'.format(vp_endpoint, nr_replies, scope, name),
3502
+ verify=False,
3503
+ timeout=1)
3504
+ if vp_replies.status_code == 200:
3505
+ vp_replies = vp_replies.json()
3506
+ else:
3507
+ vp_replies = ['other']
3508
+ except requests.exceptions.RequestException as re:
3509
+ logger(logging.ERROR, 'In list_dataset_replicas_vp, could not access {}. Error:{}'.format(vp_endpoint, re))
3510
+ vp_replies = ['other']
3511
+
3512
+ if vp_replies != ['other']:
3513
+ # check that there is at least one regular replica
3514
+ # that is not on tape and has a protocol with scheme "root"
3515
+ # and can be accessed from WAN
3516
+ accessible_replica_exists = False
3517
+ for reply in list_dataset_replicas(scope=scope, name=name, deep=deep, session=session):
3518
+ if reply['state'] != ReplicaState.AVAILABLE:
3519
+ continue
3520
+ rse_info = rsemgr.get_rse_info(rse=reply['rse'], vo=scope.vo, session=session)
3521
+ if rse_info['rse_type'] == 'TAPE':
3522
+ continue
3523
+ for prot in rse_info['protocols']:
3524
+ if prot['scheme'] == 'root' and prot['domains']['wan']['read']:
3525
+ accessible_replica_exists = True
3526
+ break
3527
+ if accessible_replica_exists is True:
3528
+ break
3529
+ if accessible_replica_exists is True:
3530
+ for vp_reply in vp_replies:
3531
+ yield {'vp': True, 'site': vp_reply}
3532
+
3533
+
3534
+ @stream_session
3535
+ def list_datasets_per_rse(
3536
+ rse_id: str,
3537
+ filters: Optional[dict[str, Any]] = None,
3538
+ limit: Optional[int] = None,
3539
+ *,
3540
+ session: "Session"
3541
+ ) -> "Iterator[dict[str, Any]]":
3542
+ """
3543
+ List datasets at a RSE.
3544
+
3545
+ :param rse: the rse id.
3546
+ :param filters: dictionary of attributes by which the results should be filtered.
3547
+ :param limit: limit number.
3548
+ :param session: Database session to use.
3549
+
3550
+ :returns: A list of dict dataset replicas
3551
+ """
3552
+ stmt = select(
3553
+ models.CollectionReplica.scope,
3554
+ models.CollectionReplica.name,
3555
+ models.RSE.id.label('rse_id'),
3556
+ models.RSE.rse,
3557
+ models.CollectionReplica.bytes,
3558
+ models.CollectionReplica.length,
3559
+ models.CollectionReplica.available_bytes,
3560
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3561
+ models.CollectionReplica.state,
3562
+ models.CollectionReplica.created_at,
3563
+ models.CollectionReplica.updated_at,
3564
+ models.CollectionReplica.accessed_at
3565
+ ).where(
3566
+ and_(models.CollectionReplica.did_type == DIDType.DATASET,
3567
+ models.CollectionReplica.rse_id == models.RSE.id,
3568
+ models.RSE.deleted == false(),
3569
+ models.RSE.id == rse_id)
3570
+ )
3571
+
3572
+ for (k, v) in filters and filters.items() or []:
3573
+ if k == 'name' or k == 'scope':
3574
+ v_str = v if k != 'scope' else v.internal # type: ignore
3575
+ if '*' in v_str or '%' in v_str:
3576
+ if session.bind.dialect.name == 'postgresql': # type: ignore | PostgreSQL escapes automatically
3577
+ stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%')))
3578
+ else:
3579
+ stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%'), escape='\\'))
3580
+ else:
3581
+ stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
3582
+ # hints ?
3583
+ elif k == 'created_before':
3584
+ created_before = str_to_date(v)
3585
+ stmt = stmt.where(models.CollectionReplica.created_at <= created_before)
3586
+ elif k == 'created_after':
3587
+ created_after = str_to_date(v)
3588
+ stmt = stmt.where(models.CollectionReplica.created_at >= created_after)
3589
+ else:
3590
+ stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
3591
+
3592
+ if limit:
3593
+ stmt = stmt.limit(limit)
3594
+
3595
+ for row in session.execute(stmt).all():
3596
+ yield row._asdict()
3597
+
3598
+
3599
+ @stream_session
3600
+ def list_replicas_per_rse(
3601
+ rse_id: str,
3602
+ limit: Optional[int] = None,
3603
+ *,
3604
+ session: "Session"
3605
+ ) -> "Iterator[dict[str, Any]]":
3606
+ """List all replicas at a given RSE."""
3607
+ list_stmt = select(
3608
+ models.RSEFileAssociation
3609
+ ).where(
3610
+ models.RSEFileAssociation.rse_id == rse_id
3611
+ )
3612
+
3613
+ if limit:
3614
+ list_stmt = list_stmt.limit(limit)
3615
+
3616
+ for replica in session.execute(list_stmt).yield_per(100).scalars():
3617
+ yield replica.to_dict()
3618
+
3619
+
3620
+ @transactional_session
3621
+ def get_cleaned_updated_collection_replicas(
3622
+ total_workers: int,
3623
+ worker_number: int,
3624
+ limit: Optional[int] = None,
3625
+ *,
3626
+ session: "Session"
3627
+ ) -> list[dict[str, Any]]:
3628
+ """
3629
+ Get update request for collection replicas.
3630
+ :param total_workers: Number of total workers.
3631
+ :param worker_number: id of the executing worker.
3632
+ :param limit: Maximum numberws to return.
3633
+ :param session: Database session in use.
3634
+ :returns: List of update requests for collection replicas.
3635
+ """
3636
+
3637
+ stmt = delete(
3638
+ models.UpdatedCollectionReplica
3639
+ ).where(
3640
+ and_(models.UpdatedCollectionReplica.rse_id.is_(None),
3641
+ ~exists().where(
3642
+ and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
3643
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope)))
3644
+ ).execution_options(
3645
+ synchronize_session=False
3646
+ )
3647
+ session.execute(stmt)
3648
+
3649
+ # Delete update requests which do not have collection_replicas
3650
+ stmt = delete(
3651
+ models.UpdatedCollectionReplica
3652
+ ).where(
3653
+ and_(models.UpdatedCollectionReplica.rse_id.isnot(None),
3654
+ ~exists().where(
3655
+ and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
3656
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope,
3657
+ models.CollectionReplica.rse_id == models.UpdatedCollectionReplica.rse_id)))
3658
+ ).execution_options(
3659
+ synchronize_session=False
3660
+ )
3661
+ session.execute(stmt)
3662
+
3663
+ # Delete duplicates
3664
+ if session.bind.dialect.name == 'oracle': # type: ignore
3665
+ schema = ''
3666
+ if BASE.metadata.schema:
3667
+ schema = BASE.metadata.schema + '.'
3668
+ session.execute(text('DELETE FROM {schema}updated_col_rep A WHERE A.rowid > ANY (SELECT B.rowid FROM {schema}updated_col_rep B WHERE A.scope = B.scope AND A.name=B.name AND A.did_type=B.did_type AND (A.rse_id=B.rse_id OR (A.rse_id IS NULL and B.rse_id IS NULL)))'.format(schema=schema))) # NOQA: E501
3669
+ elif session.bind.dialect.name == 'mysql': # type: ignore
3670
+ subquery1 = select(
3671
+ func.max(models.UpdatedCollectionReplica.id).label('max_id')
3672
+ ).group_by(
3673
+ models.UpdatedCollectionReplica.scope,
3674
+ models.UpdatedCollectionReplica.name,
3675
+ models.UpdatedCollectionReplica.rse_id
3676
+ ).subquery()
3677
+
3678
+ subquery2 = select(
3679
+ subquery1.c.max_id
3680
+ )
3681
+
3682
+ stmt_del = delete(
3683
+ models.UpdatedCollectionReplica
3684
+ ).where(
3685
+ models.UpdatedCollectionReplica.id.not_in(subquery2)
3686
+ ).execution_options(
3687
+ synchronize_session=False
3688
+ )
3689
+ session.execute(stmt_del)
3690
+ else:
3691
+ stmt = select(models.UpdatedCollectionReplica)
3692
+ update_requests_with_rse_id = []
3693
+ update_requests_without_rse_id = []
3694
+ duplicate_request_ids = []
3695
+ for update_request in session.execute(stmt).scalars().all():
3696
+ if update_request.rse_id is not None:
3697
+ small_request = {'name': update_request.name, 'scope': update_request.scope, 'rse_id': update_request.rse_id}
3698
+ if small_request not in update_requests_with_rse_id:
3699
+ update_requests_with_rse_id.append(small_request)
3700
+ else:
3701
+ duplicate_request_ids.append(update_request.id)
3702
+ continue
3703
+ else:
3704
+ small_request = {'name': update_request.name, 'scope': update_request.scope}
3705
+ if small_request not in update_requests_without_rse_id:
3706
+ update_requests_without_rse_id.append(small_request)
3707
+ else:
3708
+ duplicate_request_ids.append(update_request.id)
3709
+ continue
3710
+ for chunk in chunks(duplicate_request_ids, 100):
3711
+ stmt = delete(
3712
+ models.UpdatedCollectionReplica
3713
+ ).where(
3714
+ models.UpdatedCollectionReplica.id.in_(chunk)
3715
+ ).execution_options(
3716
+ synchronize_session=False
3717
+ )
3718
+ session.execute(stmt)
3719
+
3720
+ stmt = select(models.UpdatedCollectionReplica)
3721
+ if limit:
3722
+ stmt = stmt.limit(limit)
3723
+ return [update_request.to_dict() for update_request in session.execute(stmt).scalars().all()]
3724
+
3725
+
3726
+ @transactional_session
3727
+ def update_collection_replica(
3728
+ update_request: dict[str, Any],
3729
+ *,
3730
+ session: "Session"
3731
+ ) -> None:
3732
+ """
3733
+ Update a collection replica.
3734
+ :param update_request: update request from the upated_col_rep table.
3735
+ """
3736
+ if update_request['rse_id'] is not None:
3737
+ # Check one specific dataset replica
3738
+ ds_length = 0
3739
+ old_available_replicas = 0
3740
+ ds_bytes = 0
3741
+ ds_replica_state = None
3742
+ ds_available_bytes = 0
3743
+ available_replicas = 0
3744
+
3745
+ try:
3746
+ stmt = select(
3747
+ models.CollectionReplica
3748
+ ).where(
3749
+ and_(models.CollectionReplica.scope == update_request['scope'],
3750
+ models.CollectionReplica.name == update_request['name'],
3751
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3752
+ )
3753
+ collection_replica = session.execute(stmt).scalar_one()
3754
+ ds_length = collection_replica.length
3755
+ old_available_replicas = collection_replica.available_replicas_cnt
3756
+ ds_bytes = collection_replica.bytes
3757
+ except NoResultFound:
3758
+ pass
3759
+
3760
+ try:
3761
+ stmt = select(
3762
+ func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
3763
+ func.count().label('available_replicas')
3764
+ ).select_from(
3765
+ models.RSEFileAssociation
3766
+ ).where(
3767
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3768
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3769
+ models.RSEFileAssociation.rse_id == update_request['rse_id'],
3770
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3771
+ models.DataIdentifierAssociation.name == update_request['name'],
3772
+ models.DataIdentifierAssociation.scope == update_request['scope'])
3773
+ )
3774
+ file_replica = session.execute(stmt).one()
3775
+
3776
+ available_replicas = file_replica.available_replicas
3777
+ ds_available_bytes = file_replica.ds_available_bytes
3778
+ except NoResultFound:
3779
+ pass
3780
+
3781
+ if available_replicas >= ds_length:
3782
+ ds_replica_state = ReplicaState.AVAILABLE
3783
+ else:
3784
+ ds_replica_state = ReplicaState.UNAVAILABLE
3785
+
3786
+ if old_available_replicas is not None and old_available_replicas > 0 and available_replicas == 0:
3787
+ stmt = delete(
3788
+ models.CollectionReplica
3789
+ ).where(
3790
+ and_(models.CollectionReplica.scope == update_request['scope'],
3791
+ models.CollectionReplica.name == update_request['name'],
3792
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3793
+ )
3794
+ session.execute(stmt)
3795
+ else:
3796
+ stmt = select(
3797
+ models.CollectionReplica
3798
+ ).where(
3799
+ and_(models.CollectionReplica.scope == update_request['scope'],
3800
+ models.CollectionReplica.name == update_request['name'],
3801
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3802
+ )
3803
+ updated_replica = session.execute(stmt).scalar_one()
3804
+
3805
+ updated_replica.state = ds_replica_state
3806
+ updated_replica.available_replicas_cnt = available_replicas
3807
+ updated_replica.length = ds_length
3808
+ updated_replica.bytes = ds_bytes
3809
+ updated_replica.available_bytes = ds_available_bytes
3810
+ else:
3811
+ stmt = select(
3812
+ func.count().label('ds_length'),
3813
+ func.sum(models.DataIdentifierAssociation.bytes).label('ds_bytes')
3814
+ ).select_from(
3815
+ models.DataIdentifierAssociation
3816
+ ).where(
3817
+ and_(models.DataIdentifierAssociation.scope == update_request['scope'],
3818
+ models.DataIdentifierAssociation.name == update_request['name'])
3819
+ )
3820
+ association = session.execute(stmt).one()
3821
+
3822
+ # Check all dataset replicas
3823
+ ds_length = association.ds_length
3824
+ ds_bytes = association.ds_bytes
3825
+ ds_replica_state = None
3826
+
3827
+ stmt = select(
3828
+ models.CollectionReplica
3829
+ ).where(
3830
+ and_(models.CollectionReplica.scope == update_request['scope'],
3831
+ models.CollectionReplica.name == update_request['name'])
3832
+ )
3833
+ for collection_replica in session.execute(stmt).scalars().all():
3834
+ if ds_length:
3835
+ collection_replica.length = ds_length
3836
+ else:
3837
+ collection_replica.length = 0
3838
+ if ds_bytes:
3839
+ collection_replica.bytes = ds_bytes
3840
+ else:
3841
+ collection_replica.bytes = 0
3842
+
3843
+ stmt = select(
3844
+ func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
3845
+ func.count().label('available_replicas'),
3846
+ models.RSEFileAssociation.rse_id
3847
+ ).select_from(
3848
+ models.RSEFileAssociation
3849
+ ).where(
3850
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3851
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3852
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3853
+ models.DataIdentifierAssociation.name == update_request['name'],
3854
+ models.DataIdentifierAssociation.scope == update_request['scope'])
3855
+ ).group_by(
3856
+ models.RSEFileAssociation.rse_id
3857
+ )
3858
+
3859
+ for file_replica in session.execute(stmt).all():
3860
+ if file_replica.available_replicas >= ds_length:
3861
+ ds_replica_state = ReplicaState.AVAILABLE
3862
+ else:
3863
+ ds_replica_state = ReplicaState.UNAVAILABLE
3864
+
3865
+ stmt = select(
3866
+ models.CollectionReplica
3867
+ ).where(
3868
+ and_(models.CollectionReplica.scope == update_request['scope'],
3869
+ models.CollectionReplica.name == update_request['name'],
3870
+ models.CollectionReplica.rse_id == file_replica.rse_id)
3871
+ )
3872
+ collection_replica = session.execute(stmt).scalars().first()
3873
+ if collection_replica:
3874
+ collection_replica.state = ds_replica_state
3875
+ collection_replica.available_replicas_cnt = file_replica.available_replicas
3876
+ collection_replica.available_bytes = file_replica.ds_available_bytes
3877
+
3878
+ stmt = delete(
3879
+ models.UpdatedCollectionReplica
3880
+ ).where(
3881
+ models.UpdatedCollectionReplica.id == update_request['id']
3882
+ )
3883
+ session.execute(stmt)
3884
+
3885
+
3886
+ @read_session
3887
+ def get_bad_pfns(
3888
+ limit: int = 10000,
3889
+ thread: Optional[int] = None,
3890
+ total_threads: Optional[int] = None,
3891
+ *,
3892
+ session: "Session"
3893
+ ) -> list[dict[str, Any]]:
3894
+ """
3895
+ Returns a list of bad PFNs
3896
+
3897
+ :param limit: The maximum number of replicas returned.
3898
+ :param thread: The assigned thread for this minos instance.
3899
+ :param total_threads: The total number of minos threads.
3900
+ :param session: The database session in use.
3901
+
3902
+ returns: list of PFNs {'pfn': pfn, 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at}
3903
+ """
3904
+ result = []
3905
+
3906
+ stmt = select(
3907
+ models.BadPFN.path,
3908
+ models.BadPFN.state,
3909
+ models.BadPFN.reason,
3910
+ models.BadPFN.account,
3911
+ models.BadPFN.expires_at
3912
+ )
3913
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='path')
3914
+ stmt = stmt.order_by(
3915
+ models.BadPFN.created_at
3916
+ ).limit(
3917
+ limit
3918
+ )
3919
+
3920
+ for path, state, reason, account, expires_at in session.execute(stmt).yield_per(1000):
3921
+ result.append({'pfn': clean_pfns([str(path)])[0], 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at})
3922
+ return result
3923
+
3924
+
3925
+ @transactional_session
3926
+ def bulk_add_bad_replicas(
3927
+ replicas: "Iterable[dict[str, Any]]",
3928
+ account: InternalAccount,
3929
+ state: BadFilesStatus = BadFilesStatus.TEMPORARY_UNAVAILABLE,
3930
+ reason: Optional[str] = None,
3931
+ expires_at: Optional[datetime] = None,
3932
+ *,
3933
+ session: "Session"
3934
+ ) -> bool:
3935
+ """
3936
+ Bulk add new bad replicas.
3937
+
3938
+ :param replicas: the list of bad replicas.
3939
+ :param account: The account who declared the bad replicas.
3940
+ :param state: The state of the file (SUSPICIOUS, BAD or TEMPORARY_UNAVAILABLE).
3941
+ :param session: The database session in use.
3942
+
3943
+ :returns: True is successful.
3944
+ """
3945
+ for replica in replicas:
3946
+ scope_name_rse_state = and_(models.BadReplica.scope == replica['scope'],
3947
+ models.BadReplica.name == replica['name'],
3948
+ models.BadReplica.rse_id == replica['rse_id'],
3949
+ models.BadReplica.state == state)
3950
+ insert_new_row = True
3951
+ if state == BadFilesStatus.TEMPORARY_UNAVAILABLE:
3952
+ stmt = select(
3953
+ models.BadReplica
3954
+ ).where(
3955
+ scope_name_rse_state
3956
+ )
3957
+ if session.execute(stmt).scalar_one_or_none():
3958
+ stmt = update(
3959
+ models.BadReplica
3960
+ ).where(
3961
+ scope_name_rse_state
3962
+ ).values({
3963
+ models.BadReplica.state: BadFilesStatus.TEMPORARY_UNAVAILABLE,
3964
+ models.BadReplica.updated_at: datetime.utcnow(),
3965
+ models.BadReplica.account: account,
3966
+ models.BadReplica.reason: reason,
3967
+ models.BadReplica.expires_at: expires_at
3968
+ }).execution_options(
3969
+ synchronize_session=False
3970
+ )
3971
+ session.execute(stmt)
3972
+
3973
+ insert_new_row = False
3974
+ if insert_new_row:
3975
+ new_bad_replica = models.BadReplica(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], reason=reason,
3976
+ state=state, account=account, bytes=None, expires_at=expires_at)
3977
+ new_bad_replica.save(session=session, flush=False)
3978
+ try:
3979
+ session.flush()
3980
+ except IntegrityError as error:
3981
+ raise exception.RucioException(error.args)
3982
+ except DatabaseError as error:
3983
+ raise exception.RucioException(error.args)
3984
+ except FlushError as error:
3985
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
3986
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
3987
+ raise exception.RucioException(error.args)
3988
+ return True
3989
+
3990
+
3991
+ @transactional_session
3992
+ def bulk_delete_bad_pfns(
3993
+ pfns: "Iterable[str]",
3994
+ *,
3995
+ session: "Session"
3996
+ ) -> Literal[True]:
3997
+ """
3998
+ Bulk delete bad PFNs.
3999
+
4000
+ :param pfns: the list of new files.
4001
+ :param session: The database session in use.
4002
+
4003
+ :returns: True is successful.
4004
+ """
4005
+ pfn_clause = []
4006
+ for pfn in pfns:
4007
+ pfn_clause.append(models.BadPFN.path == pfn)
4008
+
4009
+ for chunk in chunks(pfn_clause, 100):
4010
+ stmt = delete(
4011
+ models.BadPFN
4012
+ ).where(
4013
+ or_(*chunk)
4014
+ ).execution_options(
4015
+ synchronize_session=False
4016
+ )
4017
+ session.execute(stmt)
4018
+
4019
+ return True
4020
+
4021
+
4022
+ @transactional_session
4023
+ def bulk_delete_bad_replicas(
4024
+ bad_replicas: "Iterable[dict[str, Any]]",
4025
+ *,
4026
+ session: "Session"
4027
+ ) -> Literal[True]:
4028
+ """
4029
+ Bulk delete bad replica.
4030
+
4031
+ :param bad_replicas: The list of bad replicas to delete (Dictionaries).
4032
+ :param session: The database session in use.
4033
+
4034
+ :returns: True is successful.
4035
+ """
4036
+ replica_clause = []
4037
+ for replica in bad_replicas:
4038
+ replica_clause.append(and_(models.BadReplica.scope == replica['scope'],
4039
+ models.BadReplica.name == replica['name'],
4040
+ models.BadReplica.rse_id == replica['rse_id'],
4041
+ models.BadReplica.state == replica['state']))
4042
+
4043
+ for chunk in chunks(replica_clause, 100):
4044
+ stmt = delete(
4045
+ models.BadReplica
4046
+ ).where(
4047
+ or_(*chunk)
4048
+ ).execution_options(
4049
+ synchronize_session=False
4050
+ )
4051
+ session.execute(stmt)
4052
+ return True
4053
+
4054
+
4055
+ @transactional_session
4056
+ def add_bad_pfns(
4057
+ pfns: "Iterable[str]",
4058
+ account: InternalAccount,
4059
+ state: BadFilesStatus,
4060
+ reason: Optional[str] = None,
4061
+ expires_at: Optional[datetime] = None,
4062
+ *,
4063
+ session: "Session"
4064
+ ) -> Literal[True]:
4065
+ """
4066
+ Add bad PFNs.
4067
+
4068
+ :param pfns: the list of new files.
4069
+ :param account: The account who declared the bad replicas.
4070
+ :param state: One of the possible states : BAD, SUSPICIOUS, TEMPORARY_UNAVAILABLE.
4071
+ :param reason: A string describing the reason of the loss.
4072
+ :param expires_at: Specify a timeout for the TEMPORARY_UNAVAILABLE replicas. None for BAD files.
4073
+ :param session: The database session in use.
4074
+
4075
+ :returns: True is successful.
4076
+ """
4077
+
4078
+ if isinstance(state, str):
4079
+ rep_state = BadPFNStatus[state]
4080
+ else:
4081
+ rep_state = state
4082
+
4083
+ if rep_state == BadPFNStatus.TEMPORARY_UNAVAILABLE and expires_at is None:
4084
+ raise exception.InputValidationError("When adding a TEMPORARY UNAVAILABLE pfn the expires_at value should be set.")
4085
+ elif rep_state == BadPFNStatus.BAD and expires_at is not None:
4086
+ raise exception.InputValidationError("When adding a BAD pfn the expires_at value shouldn't be set.")
4087
+
4088
+ pfns = clean_pfns(pfns)
4089
+ for pfn in pfns:
4090
+ new_pfn = models.BadPFN(path=str(pfn), account=account, state=rep_state, reason=reason, expires_at=expires_at)
4091
+ new_pfn = session.merge(new_pfn)
4092
+ new_pfn.save(session=session, flush=False)
4093
+
4094
+ try:
4095
+ session.flush()
4096
+ except IntegrityError as error:
4097
+ raise exception.RucioException(error.args)
4098
+ except DatabaseError as error:
4099
+ raise exception.RucioException(error.args)
4100
+ except FlushError as error:
4101
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
4102
+ raise exception.Duplicate('One PFN already exists!')
4103
+ raise exception.RucioException(error.args)
4104
+ return True
4105
+
4106
+
4107
+ @read_session
4108
+ def list_expired_temporary_unavailable_replicas(
4109
+ total_workers: int,
4110
+ worker_number: int,
4111
+ limit: int = 10000,
4112
+ *,
4113
+ session: "Session"
4114
+ ) -> "Sequence[Row]":
4115
+ """
4116
+ List the expired temporary unavailable replicas
4117
+
4118
+ :param total_workers: Number of total workers.
4119
+ :param worker_number: id of the executing worker.
4120
+ :param limit: The maximum number of replicas returned.
4121
+ :param session: The database session in use.
4122
+ """
4123
+
4124
+ stmt = select(
4125
+ models.BadReplica.scope,
4126
+ models.BadReplica.name,
4127
+ models.BadReplica.rse_id,
4128
+ ).with_hint(
4129
+ models.ReplicationRule,
4130
+ 'INDEX(bad_replicas BAD_REPLICAS_EXPIRES_AT_IDX)',
4131
+ 'oracle'
4132
+ ).where(
4133
+ and_(models.BadReplica.state == BadFilesStatus.TEMPORARY_UNAVAILABLE,
4134
+ models.BadReplica.expires_at < datetime.utcnow())
4135
+ ).order_by(
4136
+ models.BadReplica.expires_at
4137
+ )
4138
+
4139
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
4140
+ stmt = stmt.limit(limit)
4141
+
4142
+ return session.execute(stmt).all()
4143
+
4144
+
4145
+ @read_session
4146
+ def get_replicas_state(
4147
+ scope: Optional[InternalScope] = None,
4148
+ name: Optional[str] = None,
4149
+ *,
4150
+ session: "Session"
4151
+ ) -> dict[ReplicaState, list[str]]:
4152
+ """
4153
+ Method used by the necromancer to get all the replicas of a DIDs
4154
+ :param scope: The scope of the file.
4155
+ :param name: The name of the file.
4156
+ :param session: The database session in use.
4157
+
4158
+ :returns: A dictionary with the list of states as keys and the rse_ids as value
4159
+ """
4160
+
4161
+ stmt = select(
4162
+ models.RSEFileAssociation.rse_id,
4163
+ models.RSEFileAssociation.state
4164
+ ).where(
4165
+ and_(models.RSEFileAssociation.scope == scope,
4166
+ models.RSEFileAssociation.name == name)
4167
+ )
4168
+ states = {}
4169
+ for res in session.execute(stmt).all():
4170
+ rse_id, state = res
4171
+ if state not in states:
4172
+ states[state] = []
4173
+ states[state].append(rse_id)
4174
+ return states
4175
+
4176
+
4177
+ @read_session
4178
+ def get_suspicious_files(
4179
+ rse_expression: str,
4180
+ available_elsewhere: int,
4181
+ filter_: Optional[dict[str, Any]] = None,
4182
+ logger: "LoggerFunction" = logging.log,
4183
+ younger_than: Optional[datetime] = None,
4184
+ nattempts: int = 0,
4185
+ nattempts_exact: bool = False,
4186
+ *,
4187
+ session: "Session",
4188
+ exclude_states: Optional["Iterable[str]"] = None,
4189
+ is_suspicious: bool = False
4190
+ ) -> list[dict[str, Any]]:
4191
+ """
4192
+ Gets a list of replicas from bad_replicas table which are: declared more than <nattempts> times since <younger_than> date,
4193
+ present on the RSE specified by the <rse_expression> and do not have a state in <exclude_states> list.
4194
+ Selected replicas can also be required to be <available_elsewhere> on another RSE than the one declared in bad_replicas table and/or
4195
+ be declared as <is_suspicious> in the bad_replicas table.
4196
+ Keyword Arguments:
4197
+ :param younger_than: Datetime object to select the replicas which were declared since younger_than date. Default value = 10 days ago.
4198
+ :param nattempts: The minimum number of replica appearances in the bad_replica DB table from younger_than date. Default value = 0.
4199
+ :param nattempts_exact: If True, then only replicas with exactly 'nattempts' appearances in the bad_replica DB table are retrieved. Replicas with more appearances are ignored.
4200
+ :param rse_expression: The RSE expression where the replicas are located.
4201
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
4202
+ :param exclude_states: List of states which eliminates replicas from search result if any of the states in the list
4203
+ was declared for a replica since younger_than date. Allowed values
4204
+ = ['B', 'R', 'D', 'L', 'T', 'S'] (meaning 'BAD', 'RECOVERED', 'DELETED', 'LOST', 'TEMPORARY_UNAVAILABLE', 'SUSPICIOUS').
4205
+ :param available_elsewhere: Default: SuspiciousAvailability["ALL"].value, all suspicious replicas are returned.
4206
+ If SuspiciousAvailability["EXIST_COPIES"].value, only replicas that additionally have copies declared as AVAILABLE on at least one other RSE
4207
+ than the one in the bad_replicas table will be taken into account.
4208
+ If SuspiciousAvailability["LAST_COPY"].value, only replicas that do not have another copy declared as AVAILABLE on another RSE will be taken into account.
4209
+ :param is_suspicious: If True, only replicas declared as SUSPICIOUS in bad replicas table will be taken into account. Default value = False.
4210
+ :param session: The database session in use. Default value = None.
4211
+
4212
+ :returns: a list of replicas:
4213
+ [{'scope': scope, 'name': name, 'rse': rse, 'rse_id': rse_id, cnt': cnt, 'created_at': created_at}, ...]
4214
+ """
4215
+
4216
+ exclude_states = exclude_states or ['B', 'R', 'D']
4217
+ if available_elsewhere not in [SuspiciousAvailability["ALL"].value, SuspiciousAvailability["EXIST_COPIES"].value, SuspiciousAvailability["LAST_COPY"].value]:
4218
+ logger(logging.WARNING, """ERROR, available_elsewhere must be set to one of the following:
4219
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
4220
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
4221
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
4222
+ raise exception.RucioException("""ERROR, available_elsewhere must be set to one of the following:
4223
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
4224
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
4225
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
4226
+
4227
+ # only for the 2 web api used parameters, checking value types and assigning the default values
4228
+ if not isinstance(nattempts, int):
4229
+ nattempts = 0
4230
+ if not isinstance(younger_than, datetime):
4231
+ younger_than = datetime.utcnow() - timedelta(days=10)
4232
+
4233
+ # assembling exclude_states_clause
4234
+ exclude_states_clause = []
4235
+ for state in exclude_states:
4236
+ exclude_states_clause.append(BadFilesStatus(state))
4237
+
4238
+ # making aliases for bad_replicas and replicas tables
4239
+ bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
4240
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
4241
+
4242
+ # assembling the selection rse_clause
4243
+ rse_clause = []
4244
+ if rse_expression:
4245
+ parsedexp = parse_expression(expression=rse_expression, filter_=filter_, session=session)
4246
+ for rse in parsedexp:
4247
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse['id'])
4248
+
4249
+ stmt = select(
4250
+ func.count(),
4251
+ bad_replicas_alias.scope,
4252
+ bad_replicas_alias.name,
4253
+ models.RSEFileAssociation.rse_id,
4254
+ func.min(models.RSEFileAssociation.created_at)
4255
+ ).select_from(
4256
+ bad_replicas_alias
4257
+ ).where(
4258
+ models.RSEFileAssociation.rse_id == bad_replicas_alias.rse_id,
4259
+ models.RSEFileAssociation.scope == bad_replicas_alias.scope,
4260
+ models.RSEFileAssociation.name == bad_replicas_alias.name,
4261
+ bad_replicas_alias.created_at >= younger_than
4262
+ )
4263
+ if is_suspicious:
4264
+ stmt = stmt.where(bad_replicas_alias.state == BadFilesStatus.SUSPICIOUS)
4265
+ if rse_clause:
4266
+ stmt = stmt.where(or_(*rse_clause))
4267
+
4268
+ # Only return replicas that have at least one copy on another RSE
4269
+ if available_elsewhere == SuspiciousAvailability["EXIST_COPIES"].value:
4270
+ available_replica = exists(select(1)
4271
+ .where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
4272
+ replicas_alias.scope == bad_replicas_alias.scope,
4273
+ replicas_alias.name == bad_replicas_alias.name,
4274
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
4275
+ stmt = stmt.where(available_replica)
4276
+
4277
+ # Only return replicas that are the last remaining copy
4278
+ if available_elsewhere == SuspiciousAvailability["LAST_COPY"].value:
4279
+ last_replica = ~exists(select(1)
4280
+ .where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
4281
+ replicas_alias.scope == bad_replicas_alias.scope,
4282
+ replicas_alias.name == bad_replicas_alias.name,
4283
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
4284
+ stmt = stmt.where(last_replica)
4285
+
4286
+ # it is required that the selected replicas
4287
+ # do not occur as BAD/DELETED/LOST/RECOVERED/...
4288
+ # in the bad_replicas table during the same time window.
4289
+ other_states_present = exists(select(1)
4290
+ .where(and_(models.BadReplica.scope == bad_replicas_alias.scope,
4291
+ models.BadReplica.name == bad_replicas_alias.name,
4292
+ models.BadReplica.created_at >= younger_than,
4293
+ models.BadReplica.rse_id == bad_replicas_alias.rse_id,
4294
+ models.BadReplica.state.in_(exclude_states_clause))))
4295
+ stmt = stmt.where(not_(other_states_present))
4296
+
4297
+ # finally, the results are grouped by RSE, scope, name and required to have
4298
+ # at least 'nattempts' occurrences in the result of the query per replica.
4299
+ # If nattempts_exact, then only replicas are required to have exactly
4300
+ # 'nattempts' occurrences.
4301
+ if nattempts_exact:
4302
+ stmt = stmt.group_by(
4303
+ models.RSEFileAssociation.rse_id,
4304
+ bad_replicas_alias.scope,
4305
+ bad_replicas_alias.name
4306
+ ).having(
4307
+ func.count() == nattempts
4308
+ )
4309
+ query_result = session.execute(stmt).all()
4310
+ else:
4311
+ stmt = stmt.group_by(
4312
+ models.RSEFileAssociation.rse_id,
4313
+ bad_replicas_alias.scope,
4314
+ bad_replicas_alias.name
4315
+ ).having(
4316
+ func.count() > nattempts
4317
+ )
4318
+ query_result = session.execute(stmt).all()
4319
+
4320
+ # translating the rse_id to RSE name and assembling the return list of dictionaries
4321
+ result = []
4322
+ rses = {}
4323
+ for cnt, scope, name, rse_id, created_at in query_result:
4324
+ if rse_id not in rses:
4325
+ rse = get_rse_name(rse_id=rse_id, session=session)
4326
+ rses[rse_id] = rse
4327
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id], 'rse_id': rse_id, 'cnt': cnt, 'created_at': created_at})
4328
+
4329
+ return result
4330
+
4331
+
4332
+ @read_session
4333
+ def get_suspicious_reason(
4334
+ rse_id: str,
4335
+ scope: InternalScope,
4336
+ name: str,
4337
+ nattempts: int = 0,
4338
+ logger: "LoggerFunction" = logging.log,
4339
+ *,
4340
+ session: "Session"
4341
+ ) -> list[dict[str, Any]]:
4342
+ """
4343
+ Returns the error message(s) which lead to the replica(s) being declared suspicious.
4344
+
4345
+ :param rse_id: ID of RSE.
4346
+ :param scope: Scope of the replica DID.
4347
+ :param name: Name of the replica DID.
4348
+ :param session: The database session in use. Default value = None.
4349
+ """
4350
+ # Alias for bad replicas
4351
+ bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
4352
+
4353
+ stmt = select(
4354
+ bad_replicas_alias.scope,
4355
+ bad_replicas_alias.name,
4356
+ bad_replicas_alias.reason,
4357
+ bad_replicas_alias.rse_id
4358
+ ).where(
4359
+ and_(bad_replicas_alias.rse_id == rse_id,
4360
+ bad_replicas_alias.scope == scope,
4361
+ bad_replicas_alias.state == 'S',
4362
+ bad_replicas_alias.name == name,
4363
+ ~exists(select(1).where(
4364
+ and_(bad_replicas_alias.rse_id == rse_id,
4365
+ bad_replicas_alias.name == name,
4366
+ bad_replicas_alias.scope == scope,
4367
+ bad_replicas_alias.state != 'S'))))
4368
+ )
4369
+
4370
+ count_query = select(
4371
+ func.count()
4372
+ ).select_from(
4373
+ stmt.subquery()
4374
+ )
4375
+ count = session.execute(count_query).scalar_one()
4376
+
4377
+ grouped_stmt = stmt.group_by(
4378
+ bad_replicas_alias.rse_id,
4379
+ bad_replicas_alias.scope,
4380
+ bad_replicas_alias.name,
4381
+ bad_replicas_alias.reason
4382
+ ).having(
4383
+ func.count() > nattempts
4384
+ )
4385
+
4386
+ result = []
4387
+ rses = {}
4388
+ for scope_, name_, reason, rse_id_ in session.execute(grouped_stmt).all():
4389
+ if rse_id_ not in rses:
4390
+ rse = get_rse_name(rse_id=rse_id_, session=session)
4391
+ rses[rse_id_] = rse
4392
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id_], 'rse_id': rse_id_, 'reason': reason, 'count': count})
4393
+
4394
+ if len(result) > 1:
4395
+ logger(logging.WARNING, "Multiple reasons have been found. Please investigate.")
4396
+
4397
+ return result
4398
+
4399
+
4400
+ @transactional_session
4401
+ def set_tombstone(
4402
+ rse_id: str,
4403
+ scope: InternalScope,
4404
+ name: str,
4405
+ tombstone: datetime = OBSOLETE,
4406
+ *,
4407
+ session: "Session"
4408
+ ) -> None:
4409
+ """
4410
+ Sets a tombstone on a replica.
4411
+
4412
+ :param rse_id: ID of RSE.
4413
+ :param scope: scope of the replica DID.
4414
+ :param name: name of the replica DID.
4415
+ :param tombstone: the tombstone to set. Default is OBSOLETE
4416
+ :param session: database session in use.
4417
+ """
4418
+ stmt = update(models.RSEFileAssociation).where(
4419
+ and_(models.RSEFileAssociation.rse_id == rse_id,
4420
+ models.RSEFileAssociation.name == name,
4421
+ models.RSEFileAssociation.scope == scope,
4422
+ ~exists().where(
4423
+ and_(models.ReplicaLock.rse_id == rse_id,
4424
+ models.ReplicaLock.name == name,
4425
+ models.ReplicaLock.scope == scope)))
4426
+ ).prefix_with(
4427
+ '/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
4428
+ ).values({
4429
+ models.RSEFileAssociation.tombstone: tombstone
4430
+ }).execution_options(
4431
+ synchronize_session=False
4432
+ )
4433
+
4434
+ if session.execute(stmt).rowcount == 0:
4435
+ try:
4436
+ stmt = select(
4437
+ models.RSEFileAssociation.tombstone
4438
+ ).where(
4439
+ and_(models.RSEFileAssociation.rse_id == rse_id,
4440
+ models.RSEFileAssociation.name == name,
4441
+ models.RSEFileAssociation.scope == scope)
4442
+ )
4443
+ session.execute(stmt).scalar_one()
4444
+ raise exception.ReplicaIsLocked('Replica %s:%s on RSE %s is locked.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
4445
+ except NoResultFound:
4446
+ raise exception.ReplicaNotFound('Replica %s:%s on RSE %s could not be found.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
4447
+
4448
+
4449
+ @read_session
4450
+ def get_rse_coverage_of_dataset(
4451
+ scope: "InternalScope",
4452
+ name: str,
4453
+ *,
4454
+ session: "Session"
4455
+ ) -> dict[str, int]:
4456
+ """
4457
+ Get total bytes present on RSEs
4458
+
4459
+ :param scope: Scope of the dataset
4460
+ :param name: Name of the dataset
4461
+ :param session: The db session.
4462
+ :return: Dictionary { rse_id : <total bytes present at rse_id> }
4463
+ """
4464
+
4465
+ stmt = select(
4466
+ models.RSEFileAssociation.rse_id,
4467
+ func.sum(models.DataIdentifierAssociation.bytes)
4468
+ ).where(
4469
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
4470
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
4471
+ models.DataIdentifierAssociation.scope == scope,
4472
+ models.DataIdentifierAssociation.name == name,
4473
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
4474
+ ).group_by(
4475
+ models.RSEFileAssociation.rse_id
4476
+ )
4477
+
4478
+ result = {}
4479
+ for rse_id, total in session.execute(stmt):
4480
+ if total:
4481
+ result[rse_id] = total
4482
+
4483
+ return result