rucio 35.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (493) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/client/__init__.py +15 -0
  4. rucio/client/accountclient.py +433 -0
  5. rucio/client/accountlimitclient.py +183 -0
  6. rucio/client/baseclient.py +974 -0
  7. rucio/client/client.py +76 -0
  8. rucio/client/configclient.py +126 -0
  9. rucio/client/credentialclient.py +59 -0
  10. rucio/client/didclient.py +866 -0
  11. rucio/client/diracclient.py +56 -0
  12. rucio/client/downloadclient.py +1785 -0
  13. rucio/client/exportclient.py +44 -0
  14. rucio/client/fileclient.py +50 -0
  15. rucio/client/importclient.py +42 -0
  16. rucio/client/lifetimeclient.py +90 -0
  17. rucio/client/lockclient.py +109 -0
  18. rucio/client/metaconventionsclient.py +140 -0
  19. rucio/client/pingclient.py +44 -0
  20. rucio/client/replicaclient.py +454 -0
  21. rucio/client/requestclient.py +125 -0
  22. rucio/client/rseclient.py +746 -0
  23. rucio/client/ruleclient.py +294 -0
  24. rucio/client/scopeclient.py +90 -0
  25. rucio/client/subscriptionclient.py +173 -0
  26. rucio/client/touchclient.py +82 -0
  27. rucio/client/uploadclient.py +955 -0
  28. rucio/common/__init__.py +13 -0
  29. rucio/common/cache.py +74 -0
  30. rucio/common/config.py +801 -0
  31. rucio/common/constants.py +159 -0
  32. rucio/common/constraints.py +17 -0
  33. rucio/common/didtype.py +189 -0
  34. rucio/common/dumper/__init__.py +335 -0
  35. rucio/common/dumper/consistency.py +452 -0
  36. rucio/common/dumper/data_models.py +318 -0
  37. rucio/common/dumper/path_parsing.py +64 -0
  38. rucio/common/exception.py +1151 -0
  39. rucio/common/extra.py +36 -0
  40. rucio/common/logging.py +420 -0
  41. rucio/common/pcache.py +1408 -0
  42. rucio/common/plugins.py +153 -0
  43. rucio/common/policy.py +84 -0
  44. rucio/common/schema/__init__.py +150 -0
  45. rucio/common/schema/atlas.py +413 -0
  46. rucio/common/schema/belleii.py +408 -0
  47. rucio/common/schema/domatpc.py +401 -0
  48. rucio/common/schema/escape.py +426 -0
  49. rucio/common/schema/generic.py +433 -0
  50. rucio/common/schema/generic_multi_vo.py +412 -0
  51. rucio/common/schema/icecube.py +406 -0
  52. rucio/common/stomp_utils.py +159 -0
  53. rucio/common/stopwatch.py +55 -0
  54. rucio/common/test_rucio_server.py +148 -0
  55. rucio/common/types.py +403 -0
  56. rucio/common/utils.py +2238 -0
  57. rucio/core/__init__.py +13 -0
  58. rucio/core/account.py +496 -0
  59. rucio/core/account_counter.py +236 -0
  60. rucio/core/account_limit.py +423 -0
  61. rucio/core/authentication.py +620 -0
  62. rucio/core/config.py +456 -0
  63. rucio/core/credential.py +225 -0
  64. rucio/core/did.py +3000 -0
  65. rucio/core/did_meta_plugins/__init__.py +252 -0
  66. rucio/core/did_meta_plugins/did_column_meta.py +331 -0
  67. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
  68. rucio/core/did_meta_plugins/filter_engine.py +613 -0
  69. rucio/core/did_meta_plugins/json_meta.py +240 -0
  70. rucio/core/did_meta_plugins/mongo_meta.py +216 -0
  71. rucio/core/did_meta_plugins/postgres_meta.py +316 -0
  72. rucio/core/dirac.py +237 -0
  73. rucio/core/distance.py +187 -0
  74. rucio/core/exporter.py +59 -0
  75. rucio/core/heartbeat.py +363 -0
  76. rucio/core/identity.py +300 -0
  77. rucio/core/importer.py +259 -0
  78. rucio/core/lifetime_exception.py +377 -0
  79. rucio/core/lock.py +576 -0
  80. rucio/core/message.py +282 -0
  81. rucio/core/meta_conventions.py +203 -0
  82. rucio/core/monitor.py +447 -0
  83. rucio/core/naming_convention.py +195 -0
  84. rucio/core/nongrid_trace.py +136 -0
  85. rucio/core/oidc.py +1461 -0
  86. rucio/core/permission/__init__.py +119 -0
  87. rucio/core/permission/atlas.py +1348 -0
  88. rucio/core/permission/belleii.py +1077 -0
  89. rucio/core/permission/escape.py +1078 -0
  90. rucio/core/permission/generic.py +1130 -0
  91. rucio/core/permission/generic_multi_vo.py +1150 -0
  92. rucio/core/quarantined_replica.py +223 -0
  93. rucio/core/replica.py +4158 -0
  94. rucio/core/replica_sorter.py +366 -0
  95. rucio/core/request.py +3089 -0
  96. rucio/core/rse.py +1875 -0
  97. rucio/core/rse_counter.py +186 -0
  98. rucio/core/rse_expression_parser.py +459 -0
  99. rucio/core/rse_selector.py +302 -0
  100. rucio/core/rule.py +4483 -0
  101. rucio/core/rule_grouping.py +1618 -0
  102. rucio/core/scope.py +180 -0
  103. rucio/core/subscription.py +364 -0
  104. rucio/core/topology.py +490 -0
  105. rucio/core/trace.py +375 -0
  106. rucio/core/transfer.py +1517 -0
  107. rucio/core/vo.py +169 -0
  108. rucio/core/volatile_replica.py +150 -0
  109. rucio/daemons/__init__.py +13 -0
  110. rucio/daemons/abacus/__init__.py +13 -0
  111. rucio/daemons/abacus/account.py +116 -0
  112. rucio/daemons/abacus/collection_replica.py +124 -0
  113. rucio/daemons/abacus/rse.py +117 -0
  114. rucio/daemons/atropos/__init__.py +13 -0
  115. rucio/daemons/atropos/atropos.py +242 -0
  116. rucio/daemons/auditor/__init__.py +289 -0
  117. rucio/daemons/auditor/hdfs.py +97 -0
  118. rucio/daemons/auditor/srmdumps.py +355 -0
  119. rucio/daemons/automatix/__init__.py +13 -0
  120. rucio/daemons/automatix/automatix.py +293 -0
  121. rucio/daemons/badreplicas/__init__.py +13 -0
  122. rucio/daemons/badreplicas/minos.py +322 -0
  123. rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
  124. rucio/daemons/badreplicas/necromancer.py +196 -0
  125. rucio/daemons/bb8/__init__.py +13 -0
  126. rucio/daemons/bb8/bb8.py +353 -0
  127. rucio/daemons/bb8/common.py +759 -0
  128. rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
  129. rucio/daemons/bb8/t2_background_rebalance.py +153 -0
  130. rucio/daemons/c3po/__init__.py +13 -0
  131. rucio/daemons/c3po/algorithms/__init__.py +13 -0
  132. rucio/daemons/c3po/algorithms/simple.py +134 -0
  133. rucio/daemons/c3po/algorithms/t2_free_space.py +128 -0
  134. rucio/daemons/c3po/algorithms/t2_free_space_only_pop.py +130 -0
  135. rucio/daemons/c3po/algorithms/t2_free_space_only_pop_with_network.py +294 -0
  136. rucio/daemons/c3po/c3po.py +371 -0
  137. rucio/daemons/c3po/collectors/__init__.py +13 -0
  138. rucio/daemons/c3po/collectors/agis.py +108 -0
  139. rucio/daemons/c3po/collectors/free_space.py +81 -0
  140. rucio/daemons/c3po/collectors/jedi_did.py +57 -0
  141. rucio/daemons/c3po/collectors/mock_did.py +51 -0
  142. rucio/daemons/c3po/collectors/network_metrics.py +71 -0
  143. rucio/daemons/c3po/collectors/workload.py +112 -0
  144. rucio/daemons/c3po/utils/__init__.py +13 -0
  145. rucio/daemons/c3po/utils/dataset_cache.py +50 -0
  146. rucio/daemons/c3po/utils/expiring_dataset_cache.py +56 -0
  147. rucio/daemons/c3po/utils/expiring_list.py +62 -0
  148. rucio/daemons/c3po/utils/popularity.py +85 -0
  149. rucio/daemons/c3po/utils/timeseries.py +89 -0
  150. rucio/daemons/cache/__init__.py +13 -0
  151. rucio/daemons/cache/consumer.py +197 -0
  152. rucio/daemons/common.py +415 -0
  153. rucio/daemons/conveyor/__init__.py +13 -0
  154. rucio/daemons/conveyor/common.py +562 -0
  155. rucio/daemons/conveyor/finisher.py +529 -0
  156. rucio/daemons/conveyor/poller.py +404 -0
  157. rucio/daemons/conveyor/preparer.py +205 -0
  158. rucio/daemons/conveyor/receiver.py +249 -0
  159. rucio/daemons/conveyor/stager.py +132 -0
  160. rucio/daemons/conveyor/submitter.py +403 -0
  161. rucio/daemons/conveyor/throttler.py +532 -0
  162. rucio/daemons/follower/__init__.py +13 -0
  163. rucio/daemons/follower/follower.py +101 -0
  164. rucio/daemons/hermes/__init__.py +13 -0
  165. rucio/daemons/hermes/hermes.py +774 -0
  166. rucio/daemons/judge/__init__.py +13 -0
  167. rucio/daemons/judge/cleaner.py +159 -0
  168. rucio/daemons/judge/evaluator.py +185 -0
  169. rucio/daemons/judge/injector.py +162 -0
  170. rucio/daemons/judge/repairer.py +154 -0
  171. rucio/daemons/oauthmanager/__init__.py +13 -0
  172. rucio/daemons/oauthmanager/oauthmanager.py +198 -0
  173. rucio/daemons/reaper/__init__.py +13 -0
  174. rucio/daemons/reaper/dark_reaper.py +278 -0
  175. rucio/daemons/reaper/reaper.py +743 -0
  176. rucio/daemons/replicarecoverer/__init__.py +13 -0
  177. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
  178. rucio/daemons/rsedecommissioner/__init__.py +13 -0
  179. rucio/daemons/rsedecommissioner/config.py +81 -0
  180. rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
  181. rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
  182. rucio/daemons/rsedecommissioner/profiles/generic.py +451 -0
  183. rucio/daemons/rsedecommissioner/profiles/types.py +92 -0
  184. rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
  185. rucio/daemons/storage/__init__.py +13 -0
  186. rucio/daemons/storage/consistency/__init__.py +13 -0
  187. rucio/daemons/storage/consistency/actions.py +846 -0
  188. rucio/daemons/tracer/__init__.py +13 -0
  189. rucio/daemons/tracer/kronos.py +536 -0
  190. rucio/daemons/transmogrifier/__init__.py +13 -0
  191. rucio/daemons/transmogrifier/transmogrifier.py +762 -0
  192. rucio/daemons/undertaker/__init__.py +13 -0
  193. rucio/daemons/undertaker/undertaker.py +137 -0
  194. rucio/db/__init__.py +13 -0
  195. rucio/db/sqla/__init__.py +52 -0
  196. rucio/db/sqla/constants.py +201 -0
  197. rucio/db/sqla/migrate_repo/__init__.py +13 -0
  198. rucio/db/sqla/migrate_repo/env.py +110 -0
  199. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
  200. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
  201. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
  202. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
  203. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
  204. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
  205. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
  206. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
  207. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
  208. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
  209. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
  210. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
  211. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
  212. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
  213. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
  214. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
  215. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
  216. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
  217. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
  218. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
  219. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
  220. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
  221. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
  222. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
  223. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
  224. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
  225. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
  226. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
  227. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
  228. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
  229. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
  230. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
  231. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
  232. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
  233. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
  234. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
  235. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
  236. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
  237. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
  238. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
  239. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
  240. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
  241. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
  242. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
  243. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
  244. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
  245. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
  246. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
  247. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
  248. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
  249. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
  250. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
  251. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
  252. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
  253. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
  254. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
  255. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
  256. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
  257. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
  258. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
  259. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
  260. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
  261. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
  262. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
  263. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
  264. rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
  265. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
  266. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
  267. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
  268. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
  269. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
  270. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
  271. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
  272. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
  273. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
  274. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
  275. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
  276. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
  277. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
  278. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
  279. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
  280. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
  281. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
  282. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
  283. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
  284. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
  285. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
  286. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
  287. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
  288. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
  289. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
  290. rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
  291. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
  292. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
  293. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
  294. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
  295. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
  296. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
  297. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
  298. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
  299. rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
  300. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
  301. rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
  302. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
  303. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
  304. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
  305. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
  306. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
  307. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
  308. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
  309. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
  310. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
  311. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
  312. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
  313. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
  314. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
  315. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
  316. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
  317. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
  318. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
  319. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
  320. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
  321. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
  322. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
  323. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
  324. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
  325. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
  326. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
  327. rucio/db/sqla/models.py +1740 -0
  328. rucio/db/sqla/sautils.py +55 -0
  329. rucio/db/sqla/session.py +498 -0
  330. rucio/db/sqla/types.py +206 -0
  331. rucio/db/sqla/util.py +543 -0
  332. rucio/gateway/__init__.py +13 -0
  333. rucio/gateway/account.py +339 -0
  334. rucio/gateway/account_limit.py +286 -0
  335. rucio/gateway/authentication.py +375 -0
  336. rucio/gateway/config.py +217 -0
  337. rucio/gateway/credential.py +71 -0
  338. rucio/gateway/did.py +970 -0
  339. rucio/gateway/dirac.py +81 -0
  340. rucio/gateway/exporter.py +59 -0
  341. rucio/gateway/heartbeat.py +74 -0
  342. rucio/gateway/identity.py +204 -0
  343. rucio/gateway/importer.py +45 -0
  344. rucio/gateway/lifetime_exception.py +120 -0
  345. rucio/gateway/lock.py +153 -0
  346. rucio/gateway/meta_conventions.py +87 -0
  347. rucio/gateway/permission.py +71 -0
  348. rucio/gateway/quarantined_replica.py +78 -0
  349. rucio/gateway/replica.py +529 -0
  350. rucio/gateway/request.py +321 -0
  351. rucio/gateway/rse.py +600 -0
  352. rucio/gateway/rule.py +417 -0
  353. rucio/gateway/scope.py +99 -0
  354. rucio/gateway/subscription.py +277 -0
  355. rucio/gateway/vo.py +122 -0
  356. rucio/rse/__init__.py +96 -0
  357. rucio/rse/protocols/__init__.py +13 -0
  358. rucio/rse/protocols/bittorrent.py +184 -0
  359. rucio/rse/protocols/cache.py +122 -0
  360. rucio/rse/protocols/dummy.py +111 -0
  361. rucio/rse/protocols/gfal.py +703 -0
  362. rucio/rse/protocols/globus.py +243 -0
  363. rucio/rse/protocols/gsiftp.py +92 -0
  364. rucio/rse/protocols/http_cache.py +82 -0
  365. rucio/rse/protocols/mock.py +123 -0
  366. rucio/rse/protocols/ngarc.py +209 -0
  367. rucio/rse/protocols/posix.py +250 -0
  368. rucio/rse/protocols/protocol.py +594 -0
  369. rucio/rse/protocols/rclone.py +364 -0
  370. rucio/rse/protocols/rfio.py +136 -0
  371. rucio/rse/protocols/srm.py +338 -0
  372. rucio/rse/protocols/ssh.py +413 -0
  373. rucio/rse/protocols/storm.py +206 -0
  374. rucio/rse/protocols/webdav.py +550 -0
  375. rucio/rse/protocols/xrootd.py +301 -0
  376. rucio/rse/rsemanager.py +764 -0
  377. rucio/tests/__init__.py +13 -0
  378. rucio/tests/common.py +270 -0
  379. rucio/tests/common_server.py +132 -0
  380. rucio/transfertool/__init__.py +13 -0
  381. rucio/transfertool/bittorrent.py +199 -0
  382. rucio/transfertool/bittorrent_driver.py +52 -0
  383. rucio/transfertool/bittorrent_driver_qbittorrent.py +133 -0
  384. rucio/transfertool/fts3.py +1596 -0
  385. rucio/transfertool/fts3_plugins.py +152 -0
  386. rucio/transfertool/globus.py +201 -0
  387. rucio/transfertool/globus_library.py +181 -0
  388. rucio/transfertool/mock.py +90 -0
  389. rucio/transfertool/transfertool.py +221 -0
  390. rucio/vcsversion.py +11 -0
  391. rucio/version.py +38 -0
  392. rucio/web/__init__.py +13 -0
  393. rucio/web/rest/__init__.py +13 -0
  394. rucio/web/rest/flaskapi/__init__.py +13 -0
  395. rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
  396. rucio/web/rest/flaskapi/v1/__init__.py +13 -0
  397. rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
  398. rucio/web/rest/flaskapi/v1/accounts.py +1089 -0
  399. rucio/web/rest/flaskapi/v1/archives.py +102 -0
  400. rucio/web/rest/flaskapi/v1/auth.py +1644 -0
  401. rucio/web/rest/flaskapi/v1/common.py +426 -0
  402. rucio/web/rest/flaskapi/v1/config.py +304 -0
  403. rucio/web/rest/flaskapi/v1/credentials.py +212 -0
  404. rucio/web/rest/flaskapi/v1/dids.py +2334 -0
  405. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  406. rucio/web/rest/flaskapi/v1/export.py +75 -0
  407. rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
  408. rucio/web/rest/flaskapi/v1/identities.py +261 -0
  409. rucio/web/rest/flaskapi/v1/import.py +132 -0
  410. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
  411. rucio/web/rest/flaskapi/v1/locks.py +358 -0
  412. rucio/web/rest/flaskapi/v1/main.py +91 -0
  413. rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
  414. rucio/web/rest/flaskapi/v1/metrics.py +36 -0
  415. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  416. rucio/web/rest/flaskapi/v1/ping.py +88 -0
  417. rucio/web/rest/flaskapi/v1/redirect.py +365 -0
  418. rucio/web/rest/flaskapi/v1/replicas.py +1890 -0
  419. rucio/web/rest/flaskapi/v1/requests.py +998 -0
  420. rucio/web/rest/flaskapi/v1/rses.py +2239 -0
  421. rucio/web/rest/flaskapi/v1/rules.py +854 -0
  422. rucio/web/rest/flaskapi/v1/scopes.py +159 -0
  423. rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
  424. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  425. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  426. rucio/web/rest/flaskapi/v1/traces.py +100 -0
  427. rucio/web/rest/flaskapi/v1/types.py +20 -0
  428. rucio/web/rest/flaskapi/v1/vos.py +278 -0
  429. rucio/web/rest/main.py +18 -0
  430. rucio/web/rest/metrics.py +27 -0
  431. rucio/web/rest/ping.py +27 -0
  432. rucio-35.7.0.data/data/rucio/etc/alembic.ini.template +71 -0
  433. rucio-35.7.0.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  434. rucio-35.7.0.data/data/rucio/etc/globus-config.yml.template +5 -0
  435. rucio-35.7.0.data/data/rucio/etc/ldap.cfg.template +30 -0
  436. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  437. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  438. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  439. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  440. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  441. rucio-35.7.0.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  442. rucio-35.7.0.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  443. rucio-35.7.0.data/data/rucio/etc/rucio.cfg.atlas.client.template +42 -0
  444. rucio-35.7.0.data/data/rucio/etc/rucio.cfg.template +257 -0
  445. rucio-35.7.0.data/data/rucio/etc/rucio_multi_vo.cfg.template +234 -0
  446. rucio-35.7.0.data/data/rucio/requirements.server.txt +268 -0
  447. rucio-35.7.0.data/data/rucio/tools/bootstrap.py +34 -0
  448. rucio-35.7.0.data/data/rucio/tools/merge_rucio_configs.py +144 -0
  449. rucio-35.7.0.data/data/rucio/tools/reset_database.py +40 -0
  450. rucio-35.7.0.data/scripts/rucio +2542 -0
  451. rucio-35.7.0.data/scripts/rucio-abacus-account +74 -0
  452. rucio-35.7.0.data/scripts/rucio-abacus-collection-replica +46 -0
  453. rucio-35.7.0.data/scripts/rucio-abacus-rse +78 -0
  454. rucio-35.7.0.data/scripts/rucio-admin +2447 -0
  455. rucio-35.7.0.data/scripts/rucio-atropos +60 -0
  456. rucio-35.7.0.data/scripts/rucio-auditor +205 -0
  457. rucio-35.7.0.data/scripts/rucio-automatix +50 -0
  458. rucio-35.7.0.data/scripts/rucio-bb8 +57 -0
  459. rucio-35.7.0.data/scripts/rucio-c3po +85 -0
  460. rucio-35.7.0.data/scripts/rucio-cache-client +134 -0
  461. rucio-35.7.0.data/scripts/rucio-cache-consumer +42 -0
  462. rucio-35.7.0.data/scripts/rucio-conveyor-finisher +58 -0
  463. rucio-35.7.0.data/scripts/rucio-conveyor-poller +66 -0
  464. rucio-35.7.0.data/scripts/rucio-conveyor-preparer +37 -0
  465. rucio-35.7.0.data/scripts/rucio-conveyor-receiver +43 -0
  466. rucio-35.7.0.data/scripts/rucio-conveyor-stager +76 -0
  467. rucio-35.7.0.data/scripts/rucio-conveyor-submitter +139 -0
  468. rucio-35.7.0.data/scripts/rucio-conveyor-throttler +104 -0
  469. rucio-35.7.0.data/scripts/rucio-dark-reaper +53 -0
  470. rucio-35.7.0.data/scripts/rucio-dumper +160 -0
  471. rucio-35.7.0.data/scripts/rucio-follower +44 -0
  472. rucio-35.7.0.data/scripts/rucio-hermes +54 -0
  473. rucio-35.7.0.data/scripts/rucio-judge-cleaner +89 -0
  474. rucio-35.7.0.data/scripts/rucio-judge-evaluator +137 -0
  475. rucio-35.7.0.data/scripts/rucio-judge-injector +44 -0
  476. rucio-35.7.0.data/scripts/rucio-judge-repairer +44 -0
  477. rucio-35.7.0.data/scripts/rucio-kronos +43 -0
  478. rucio-35.7.0.data/scripts/rucio-minos +53 -0
  479. rucio-35.7.0.data/scripts/rucio-minos-temporary-expiration +50 -0
  480. rucio-35.7.0.data/scripts/rucio-necromancer +120 -0
  481. rucio-35.7.0.data/scripts/rucio-oauth-manager +63 -0
  482. rucio-35.7.0.data/scripts/rucio-reaper +83 -0
  483. rucio-35.7.0.data/scripts/rucio-replica-recoverer +248 -0
  484. rucio-35.7.0.data/scripts/rucio-rse-decommissioner +66 -0
  485. rucio-35.7.0.data/scripts/rucio-storage-consistency-actions +74 -0
  486. rucio-35.7.0.data/scripts/rucio-transmogrifier +77 -0
  487. rucio-35.7.0.data/scripts/rucio-undertaker +76 -0
  488. rucio-35.7.0.dist-info/METADATA +72 -0
  489. rucio-35.7.0.dist-info/RECORD +493 -0
  490. rucio-35.7.0.dist-info/WHEEL +5 -0
  491. rucio-35.7.0.dist-info/licenses/AUTHORS.rst +97 -0
  492. rucio-35.7.0.dist-info/licenses/LICENSE +201 -0
  493. rucio-35.7.0.dist-info/top_level.txt +1 -0
rucio/core/replica.py ADDED
@@ -0,0 +1,4158 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import copy
16
+ import heapq
17
+ import logging
18
+ import math
19
+ import random
20
+ from collections import defaultdict, namedtuple
21
+ from curses.ascii import isprint
22
+ from datetime import datetime, timedelta
23
+ from hashlib import sha256
24
+ from itertools import groupby
25
+ from json import dumps
26
+ from re import match
27
+ from struct import unpack
28
+ from traceback import format_exc
29
+ from typing import TYPE_CHECKING
30
+
31
+ import requests
32
+ from dogpile.cache.api import NO_VALUE
33
+ from sqlalchemy import and_, delete, exists, func, insert, not_, or_, union, update
34
+ from sqlalchemy.exc import DatabaseError, IntegrityError
35
+ from sqlalchemy.orm import aliased
36
+ from sqlalchemy.orm.exc import FlushError, NoResultFound
37
+ from sqlalchemy.sql.expression import case, false, literal, literal_column, null, select, text, true
38
+
39
+ import rucio.core.did
40
+ import rucio.core.lock
41
+ from rucio.common import exception
42
+ from rucio.common.cache import make_region_memcached
43
+ from rucio.common.config import config_get, config_get_bool
44
+ from rucio.common.constants import RseAttr, SuspiciousAvailability
45
+ from rucio.common.types import InternalScope
46
+ from rucio.common.utils import add_url_query, chunks, clean_pfns, str_to_date
47
+ from rucio.core.credential import get_signed_url
48
+ from rucio.core.message import add_messages
49
+ from rucio.core.monitor import MetricManager
50
+ from rucio.core.rse import get_rse, get_rse_attribute, get_rse_name, get_rse_vo, list_rses
51
+ from rucio.core.rse_counter import decrease, increase
52
+ from rucio.core.rse_expression_parser import parse_expression
53
+ from rucio.db.sqla import filter_thread_work, models
54
+ from rucio.db.sqla.constants import OBSOLETE, BadFilesStatus, BadPFNStatus, DIDAvailability, DIDType, ReplicaState, RuleState
55
+ from rucio.db.sqla.session import BASE, DEFAULT_SCHEMA_NAME, read_session, stream_session, transactional_session
56
+ from rucio.db.sqla.util import temp_table_mngr
57
+ from rucio.rse import rsemanager as rsemgr
58
+
59
+ if TYPE_CHECKING:
60
+ from collections.abc import Iterable, Iterator, Sequence
61
+ from typing import Any, Optional
62
+
63
+ from sqlalchemy.orm import Session
64
+
65
+ from rucio.common.types import LoggerFunction
66
+ from rucio.rse.protocols.protocol import RSEProtocol
67
+
68
+ REGION = make_region_memcached(expiration_time=60)
69
+ METRICS = MetricManager(module=__name__)
70
+
71
+
72
+ ScopeName = namedtuple('ScopeName', ['scope', 'name'])
73
+ Association = namedtuple('Association', ['scope', 'name', 'child_scope', 'child_name'])
74
+
75
+
76
+ @read_session
77
+ def get_bad_replicas_summary(rse_expression=None, from_date=None, to_date=None, filter_=None, *, session: "Session"):
78
+ """
79
+ List the bad file replicas summary. Method used by the rucio-ui.
80
+ :param rse_expression: The RSE expression.
81
+ :param from_date: The start date.
82
+ :param to_date: The end date.
83
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
84
+ :param session: The database session in use.
85
+ """
86
+ result = []
87
+ incidents = {}
88
+ rse_clause = []
89
+ if rse_expression:
90
+ for rse in parse_expression(expression=rse_expression, filter_=filter_, session=session):
91
+ rse_clause.append(models.BadReplica.rse_id == rse['id'])
92
+ elif filter_:
93
+ # Ensure we limit results to current VO even if we don't specify an RSE expression
94
+ for rse in list_rses(filters=filter_, session=session):
95
+ rse_clause.append(models.BadReplica.rse_id == rse['id'])
96
+
97
+ if session.bind.dialect.name == 'oracle':
98
+ to_days = func.trunc(models.BadReplica.created_at, 'DD')
99
+ elif session.bind.dialect.name == 'mysql':
100
+ to_days = func.date(models.BadReplica.created_at)
101
+ elif session.bind.dialect.name == 'postgresql':
102
+ to_days = func.date_trunc('day', models.BadReplica.created_at)
103
+ else:
104
+ to_days = func.strftime(models.BadReplica.created_at, '%Y-%m-%d')
105
+
106
+ stmt = select(
107
+ func.count(),
108
+ to_days,
109
+ models.BadReplica.rse_id,
110
+ models.BadReplica.state,
111
+ models.BadReplica.reason
112
+ ).select_from(
113
+ models.BadReplica
114
+ )
115
+ # To be added : HINTS
116
+ if rse_clause != []:
117
+ stmt = stmt.where(or_(*rse_clause))
118
+ if from_date:
119
+ stmt = stmt.where(models.BadReplica.created_at > from_date)
120
+ if to_date:
121
+ stmt = stmt.where(models.BadReplica.created_at < to_date)
122
+ stmt = stmt.group_by(to_days, models.BadReplica.rse_id, models.BadReplica.reason, models.BadReplica.state)
123
+ for count, to_days, rse_id, state, reason in session.execute(stmt):
124
+ if (rse_id, to_days, reason) not in incidents:
125
+ incidents[(rse_id, to_days, reason)] = {}
126
+ incidents[(rse_id, to_days, reason)][str(state.name)] = count
127
+
128
+ for incident in incidents:
129
+ res = incidents[incident]
130
+ res['rse_id'] = incident[0]
131
+ res['rse'] = get_rse_name(rse_id=incident[0], session=session)
132
+ res['created_at'] = incident[1]
133
+ res['reason'] = incident[2]
134
+ result.append(res)
135
+
136
+ return result
137
+
138
+
139
+ @read_session
140
+ def __exist_replicas(rse_id, replicas, *, session: "Session"):
141
+ """
142
+ Internal method to check if a replica exists at a given site.
143
+ :param rse_id: The RSE id.
144
+ :param replicas: A list of tuples [(<scope>, <name>, <path>}) with either :
145
+ - scope and name are None and path not None
146
+ - scope and name are not None and path is None
147
+ :param session: The database session in use.
148
+
149
+ :returns: A list of tuple (<scope>, <name>, <path>, <exists>, <already_declared>, <bytes>)
150
+ where
151
+ - <exists> is a boolean that identifies if the replica exists
152
+ - <already_declared> is a boolean that identifies if the replica is already declared bad
153
+ """
154
+
155
+ return_list = []
156
+ path_clause = []
157
+ did_clause = []
158
+ for scope, name, path in replicas:
159
+ if path:
160
+ path_clause.append(models.RSEFileAssociation.path == path)
161
+ if path.startswith('/'):
162
+ path_clause.append(models.RSEFileAssociation.path == path[1:])
163
+ else:
164
+ path_clause.append(models.RSEFileAssociation.path == '/%s' % path)
165
+ else:
166
+ did_clause.append(and_(models.RSEFileAssociation.scope == scope,
167
+ models.RSEFileAssociation.name == name))
168
+
169
+ for clause in [path_clause, did_clause]:
170
+ if clause:
171
+ for chunk in chunks(clause, 10):
172
+ stmt = select(
173
+ models.RSEFileAssociation.path,
174
+ models.RSEFileAssociation.scope,
175
+ models.RSEFileAssociation.name,
176
+ models.RSEFileAssociation.rse_id,
177
+ models.RSEFileAssociation.bytes,
178
+ func.max(
179
+ case(
180
+ (models.BadReplica.state == BadFilesStatus.SUSPICIOUS, 0),
181
+ (models.BadReplica.state == BadFilesStatus.BAD, 1),
182
+ else_=0))
183
+ ).with_hint(
184
+ models.RSEFileAssociation,
185
+ 'INDEX(REPLICAS REPLICAS_PATH_IDX',
186
+ 'oracle'
187
+ ).outerjoin(
188
+ models.BadReplica,
189
+ and_(models.RSEFileAssociation.scope == models.BadReplica.scope,
190
+ models.RSEFileAssociation.name == models.BadReplica.name,
191
+ models.RSEFileAssociation.rse_id == models.BadReplica.rse_id)
192
+ ).where(
193
+ and_(models.RSEFileAssociation.rse_id == rse_id,
194
+ or_(*chunk))
195
+ ).group_by(
196
+ models.RSEFileAssociation.path,
197
+ models.RSEFileAssociation.scope,
198
+ models.RSEFileAssociation.name,
199
+ models.RSEFileAssociation.rse_id,
200
+ models.RSEFileAssociation.bytes
201
+ )
202
+
203
+ for path, scope, name, rse_id, size, state in session.execute(stmt).all():
204
+ if (scope, name, path) in replicas:
205
+ replicas.remove((scope, name, path))
206
+ if (None, None, path) in replicas:
207
+ replicas.remove((None, None, path))
208
+ if (scope, name, None) in replicas:
209
+ replicas.remove((scope, name, None))
210
+ already_declared = False
211
+ if state == 1:
212
+ already_declared = True
213
+ return_list.append((scope, name, path, True, already_declared, size))
214
+
215
+ for scope, name, path in replicas:
216
+ return_list.append((scope, name, path, False, False, None))
217
+
218
+ return return_list
219
+
220
+
221
+ @read_session
222
+ def list_bad_replicas_status(state=BadFilesStatus.BAD, rse_id=None, younger_than=None, older_than=None, limit=None, list_pfns=False, vo='def', *, session: "Session"):
223
+ """
224
+ List the bad file replicas history states. Method used by the rucio-ui.
225
+ :param state: The state of the file (SUSPICIOUS or BAD).
226
+ :param rse_id: The RSE id.
227
+ :param younger_than: datetime object to select bad replicas younger than this date.
228
+ :param older_than: datetime object to select bad replicas older than this date.
229
+ :param limit: The maximum number of replicas returned.
230
+ :param vo: The VO to find replicas from.
231
+ :param session: The database session in use.
232
+ """
233
+ result = []
234
+ stmt = select(
235
+ models.BadReplica.scope,
236
+ models.BadReplica.name,
237
+ models.BadReplica.rse_id,
238
+ models.BadReplica.state,
239
+ models.BadReplica.created_at,
240
+ models.BadReplica.updated_at
241
+ )
242
+ if state:
243
+ stmt = stmt.where(models.BadReplica.state == state)
244
+ if rse_id:
245
+ stmt = stmt.where(models.BadReplica.rse_id == rse_id)
246
+ if younger_than:
247
+ stmt = stmt.where(models.BadReplica.created_at >= younger_than)
248
+ if older_than:
249
+ stmt = stmt.where(models.BadReplica.created_at <= older_than)
250
+ if limit:
251
+ stmt = stmt.limit(limit)
252
+
253
+ for badfile in session.execute(stmt).yield_per(1000):
254
+ if badfile.scope.vo == vo:
255
+ if list_pfns:
256
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'type': DIDType.FILE})
257
+ else:
258
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'rse': get_rse_name(rse_id=badfile.rse_id, session=session), 'rse_id': badfile.rse_id, 'state': badfile.state, 'created_at': badfile.created_at, 'updated_at': badfile.updated_at})
259
+ if list_pfns:
260
+ reps = []
261
+ for rep in list_replicas(result, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=True, session=session):
262
+ pfn = None
263
+ if rse_id in rep['rses'] and rep['rses'][rse_id]:
264
+ pfn = rep['rses'][rse_id][0]
265
+ if pfn and pfn not in reps:
266
+ reps.append(pfn)
267
+ else:
268
+ reps.extend([item for row in rep['rses'].values() for item in row])
269
+ list(set(reps))
270
+ result = reps
271
+ return result
272
+
273
+
274
+ @transactional_session
275
+ def __declare_bad_file_replicas(pfns, rse_id, reason, issuer, status=BadFilesStatus.BAD, scheme='srm', force=False, logger: "LoggerFunction" = logging.log, *, session: "Session"):
276
+ """
277
+ Declare a list of bad replicas.
278
+
279
+ :param pfns: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
280
+ :param rse_id: The RSE id.
281
+ :param reason: The reason of the loss.
282
+ :param issuer: The issuer account.
283
+ :param status: Either BAD or SUSPICIOUS.
284
+ :param scheme: The scheme of the PFNs.
285
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
286
+ :param session: The database session in use.
287
+ """
288
+ unknown_replicas = []
289
+ replicas = []
290
+ path_pfn_dict = {}
291
+
292
+ if len(pfns) > 0 and type(pfns[0]) is str:
293
+ # If pfns is a list of PFNs, the scope and names need to be extracted from the path
294
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
295
+ proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
296
+ if rse_info['deterministic']:
297
+ scope_proto = rsemgr.get_scope_protocol(vo=issuer.vo)
298
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
299
+ for pfn in parsed_pfn:
300
+ # Translate into a scope and name
301
+ name, scope = scope_proto(parsed_pfn[pfn])
302
+
303
+ scope = InternalScope(scope, vo=issuer.vo)
304
+ replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': status})
305
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
306
+ path_pfn_dict[path] = pfn
307
+ logger(logging.DEBUG, f"Declaring replica {scope}:{name} {status} at {rse_id} with path {path}")
308
+
309
+ else:
310
+ # For non-deterministic RSEs use the path + rse_id to extract the scope
311
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
312
+ for pfn in parsed_pfn:
313
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
314
+ replicas.append({'scope': None, 'name': None, 'rse_id': rse_id, 'path': path, 'state': status})
315
+ path_pfn_dict[path] = pfn
316
+
317
+ logger(logging.DEBUG, f"Declaring replica with pfn: {pfn} {status} at {rse_id} with path {path}")
318
+
319
+ else:
320
+ # If pfns is a list of replicas, just use scope, name and rse_id
321
+ for pfn in pfns:
322
+ replicas.append({'scope': pfn['scope'], 'name': pfn['name'], 'rse_id': rse_id, 'state': status})
323
+ logger(logging.DEBUG, f"Declaring replica {pfn['scope']}:{pfn['name']} {status} at {rse_id} without path")
324
+
325
+ replicas_list = []
326
+ for replica in replicas:
327
+ scope, name, rse_id, path = replica['scope'], replica['name'], replica['rse_id'], replica.get('path', None)
328
+ replicas_list.append((scope, name, path))
329
+
330
+ bad_replicas_to_update = []
331
+
332
+ for scope, name, path, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
333
+ declared = False
334
+
335
+ if __exists:
336
+
337
+ if status == BadFilesStatus.BAD and (force or not already_declared):
338
+ bad_replicas_to_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
339
+ declared = True
340
+
341
+ if status == BadFilesStatus.SUSPICIOUS or status == BadFilesStatus.BAD and not already_declared:
342
+ new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=status, account=issuer, bytes=size)
343
+ new_bad_replica.save(session=session, flush=False)
344
+ declared = True
345
+
346
+ if not declared:
347
+ if already_declared:
348
+ unknown_replicas.append('%s %s' % (path_pfn_dict.get(path, '%s:%s' % (scope, name)), 'Already declared'))
349
+ elif path:
350
+ no_hidden_char = True
351
+ for char in str(path):
352
+ if not isprint(char):
353
+ unknown_replicas.append('%s %s' % (path, 'PFN contains hidden chars'))
354
+ no_hidden_char = False
355
+ break
356
+ if no_hidden_char:
357
+ pfn = path_pfn_dict[path]
358
+ if f"{pfn} Unknown replica" not in unknown_replicas:
359
+ unknown_replicas.append('%s %s' % (pfn, 'Unknown replica'))
360
+ elif scope or name:
361
+ unknown_replicas.append(f"{(scope,name)} Unknown replica")
362
+
363
+ if status == BadFilesStatus.BAD:
364
+ # For BAD file, we modify the replica state, not for suspicious
365
+ try:
366
+ # there shouldn't be any exceptions since all replicas exist
367
+ update_replicas_states(bad_replicas_to_update, session=session)
368
+ except exception.UnsupportedOperation:
369
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
370
+
371
+ try:
372
+ session.flush()
373
+ except IntegrityError as error:
374
+ raise exception.RucioException(error.args)
375
+ except DatabaseError as error:
376
+ raise exception.RucioException(error.args)
377
+ except FlushError as error:
378
+ raise exception.RucioException(error.args)
379
+
380
+ return unknown_replicas
381
+
382
+
383
+ @transactional_session
384
+ def add_bad_dids(dids, rse_id, reason, issuer, state=BadFilesStatus.BAD, *, session: "Session"):
385
+ """
386
+ Declare a list of bad replicas.
387
+
388
+ :param dids: The list of DIDs.
389
+ :param rse_id: The RSE id.
390
+ :param reason: The reason of the loss.
391
+ :param issuer: The issuer account.
392
+ :param state: BadFilesStatus.BAD
393
+ :param session: The database session in use.
394
+ """
395
+ unknown_replicas = []
396
+ replicas_for_update = []
397
+ replicas_list = []
398
+
399
+ for did in dids:
400
+ scope = InternalScope(did['scope'], vo=issuer.vo)
401
+ name = did['name']
402
+ replicas_list.append((scope, name, None))
403
+
404
+ for scope, name, _, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
405
+ if __exists and not already_declared:
406
+ replicas_for_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
407
+ new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=state,
408
+ account=issuer, bytes=size)
409
+ new_bad_replica.save(session=session, flush=False)
410
+ stmt = delete(
411
+ models.Source
412
+ ).where(
413
+ and_(models.Source.scope == scope,
414
+ models.Source.name == name,
415
+ models.Source.rse_id == rse_id)
416
+ ).execution_options(
417
+ synchronize_session=False
418
+ )
419
+ session.execute(stmt)
420
+ else:
421
+ if already_declared:
422
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Already declared'))
423
+ else:
424
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Unknown replica'))
425
+
426
+ if state == BadFilesStatus.BAD:
427
+ try:
428
+ update_replicas_states(replicas_for_update, session=session)
429
+ except exception.UnsupportedOperation:
430
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
431
+
432
+ try:
433
+ session.flush()
434
+ except (IntegrityError, DatabaseError, FlushError) as error:
435
+ raise exception.RucioException(error.args)
436
+
437
+ return unknown_replicas
438
+
439
+
440
+ @transactional_session
441
+ def declare_bad_file_replicas(replicas: list, reason: str, issuer, status=BadFilesStatus.BAD, force: bool = False, *,
442
+ session: "Session"):
443
+ """
444
+ Declare a list of bad replicas.
445
+
446
+ :param replicas: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
447
+ :param reason: The reason of the loss.
448
+ :param issuer: The issuer account.
449
+ :param status: The status of the file (SUSPICIOUS or BAD).
450
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
451
+ :param session: The database session in use.
452
+ :returns: Dictionary {rse_id -> [replicas failed to declare with errors]}
453
+ """
454
+ unknown_replicas = {}
455
+ if replicas:
456
+ type_ = type(replicas[0])
457
+ files_to_declare = {}
458
+ scheme = None
459
+ for replica in replicas:
460
+ if not isinstance(replica, type_):
461
+ raise exception.InvalidType('Replicas must be specified either as a list of string or a list of dicts')
462
+ if type_ == str:
463
+ scheme, files_to_declare, unknown_replicas = get_pfn_to_rse(replicas, vo=issuer.vo, session=session)
464
+ else:
465
+ for replica in replicas:
466
+ rse_id = replica['rse_id']
467
+ files_to_declare.setdefault(rse_id, []).append(replica)
468
+ for rse_id in files_to_declare:
469
+ notdeclared = __declare_bad_file_replicas(files_to_declare[rse_id], rse_id, reason, issuer,
470
+ status=status, scheme=scheme,
471
+ force=force, session=session)
472
+ if notdeclared:
473
+ unknown_replicas[rse_id] = notdeclared
474
+ return unknown_replicas
475
+
476
+
477
+ @read_session
478
+ def get_pfn_to_rse(pfns, vo='def', *, session: "Session"):
479
+ """
480
+ Get the RSE associated to a list of PFNs.
481
+
482
+ :param pfns: The list of pfn.
483
+ :param vo: The VO to find RSEs at.
484
+ :param session: The database session in use.
485
+
486
+ :returns: a tuple : scheme, {rse1 : [pfn1, pfn2, ...], rse2: [pfn3, pfn4, ...]}, {'unknown': [pfn5, pfn6, ...]}.
487
+ """
488
+ unknown_replicas = {}
489
+ storage_elements = []
490
+ se_condition = []
491
+ dict_rse = {}
492
+ cleaned_pfns = clean_pfns(pfns)
493
+ scheme = cleaned_pfns[0].split(':')[0] if cleaned_pfns else None
494
+ for pfn in cleaned_pfns:
495
+ if pfn.split(':')[0] != scheme:
496
+ raise exception.InvalidType('The PFNs specified must have the same protocol')
497
+
498
+ split_se = pfn.split('/')[2].split(':')
499
+ storage_element = split_se[0]
500
+
501
+ if storage_element not in storage_elements:
502
+ storage_elements.append(storage_element)
503
+ se_condition.append(models.RSEProtocol.hostname == storage_element)
504
+ stmt = select(
505
+ models.RSEProtocol.rse_id,
506
+ models.RSEProtocol.scheme,
507
+ models.RSEProtocol.hostname,
508
+ models.RSEProtocol.port,
509
+ models.RSEProtocol.prefix
510
+ ).join(
511
+ models.RSE,
512
+ models.RSEProtocol.rse_id == models.RSE.id
513
+ ).where(
514
+ and_(or_(*se_condition),
515
+ models.RSEProtocol.scheme == scheme,
516
+ models.RSE.deleted == false(),
517
+ models.RSE.staging_area == false())
518
+ )
519
+
520
+ protocols = {}
521
+
522
+ for rse_id, protocol, hostname, port, prefix in session.execute(stmt).yield_per(10000):
523
+ if rse_id not in protocols:
524
+ protocols[rse_id] = []
525
+ protocols[rse_id].append('%s://%s:%s%s' % (protocol, hostname, port, prefix))
526
+ if '%s://%s%s' % (protocol, hostname, prefix) not in protocols[rse_id]:
527
+ protocols[rse_id].append('%s://%s%s' % (protocol, hostname, prefix))
528
+ hint = None
529
+ for pfn in cleaned_pfns:
530
+ if hint:
531
+ for pattern in protocols[hint]:
532
+ if pfn.find(pattern) > -1:
533
+ dict_rse[hint].append(pfn)
534
+ else:
535
+ mult_rse_match = 0
536
+ for rse_id in protocols:
537
+ for pattern in protocols[rse_id]:
538
+ if pfn.find(pattern) > -1 and get_rse_vo(rse_id=rse_id, session=session) == vo:
539
+ mult_rse_match += 1
540
+ if mult_rse_match > 1:
541
+ print('ERROR, multiple matches : %s at %s' % (pfn, rse_id))
542
+ raise exception.RucioException('ERROR, multiple matches : %s at %s' % (pfn, get_rse_name(rse_id=rse_id, session=session)))
543
+ hint = rse_id
544
+ if hint not in dict_rse:
545
+ dict_rse[hint] = []
546
+ dict_rse[hint].append(pfn)
547
+ if mult_rse_match == 0:
548
+ if 'unknown' not in unknown_replicas:
549
+ unknown_replicas['unknown'] = []
550
+ unknown_replicas['unknown'].append(pfn)
551
+ return scheme, dict_rse, unknown_replicas
552
+
553
+
554
+ @read_session
555
+ def get_bad_replicas_backlog(*, session: "Session"):
556
+ """
557
+ Get the replica backlog by RSE.
558
+
559
+ :param session: The database session in use.
560
+
561
+ :returns: a list of dictionary {'rse_id': cnt_bad_replicas}.
562
+ """
563
+ stmt = select(
564
+ func.count(),
565
+ models.RSEFileAssociation.rse_id
566
+ ).select_from(
567
+ models.RSEFileAssociation
568
+ ).with_hint(
569
+ models.RSEFileAssociation,
570
+ 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
571
+ 'oracle'
572
+ ).join(
573
+ models.DataIdentifier,
574
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
575
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
576
+ ).where(
577
+ and_(models.DataIdentifier.availability != DIDAvailability.LOST,
578
+ models.RSEFileAssociation.state == ReplicaState.BAD)
579
+ ).group_by(
580
+ models.RSEFileAssociation.rse_id
581
+ )
582
+
583
+ result = dict()
584
+ for cnt, rse_id in session.execute(stmt).all():
585
+ result[rse_id] = cnt
586
+ return result
587
+
588
+
589
+ @read_session
590
+ def list_bad_replicas(limit=10000, thread=None, total_threads=None, rses=None, *, session: "Session"):
591
+ """
592
+ List RSE File replicas with no locks.
593
+
594
+ :param limit: The maximum number of replicas returned.
595
+ :param thread: The assigned thread for this necromancer.
596
+ :param total_threads: The total number of threads of all necromancers.
597
+ :param session: The database session in use.
598
+
599
+ :returns: a list of dictionary {'scope' scope, 'name': name, 'rse_id': rse_id, 'rse': rse}.
600
+ """
601
+ schema_dot = '%s.' % DEFAULT_SCHEMA_NAME if DEFAULT_SCHEMA_NAME else ''
602
+
603
+ stmt = select(
604
+ models.RSEFileAssociation.scope,
605
+ models.RSEFileAssociation.name,
606
+ models.RSEFileAssociation.rse_id
607
+ ).with_hint(
608
+ models.RSEFileAssociation,
609
+ 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
610
+ 'oracle'
611
+ ).where(
612
+ models.RSEFileAssociation.state == ReplicaState.BAD
613
+ )
614
+
615
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='%sreplicas.name' % (schema_dot))
616
+
617
+ stmt = stmt.join(
618
+ models.DataIdentifier,
619
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
620
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
621
+ ).where(
622
+ models.DataIdentifier.availability != DIDAvailability.LOST
623
+ )
624
+
625
+ if rses:
626
+ rse_clause = [models.RSEFileAssociation.rse_id == rse['id'] for rse in rses]
627
+ stmt = stmt.where(or_(*rse_clause))
628
+
629
+ stmt = stmt.limit(limit)
630
+ rows = []
631
+ for scope, name, rse_id in session.execute(stmt).yield_per(1000):
632
+ rows.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'rse': get_rse_name(rse_id=rse_id, session=session)})
633
+ return rows
634
+
635
+
636
+ @stream_session
637
+ def get_did_from_pfns(pfns, rse_id=None, vo='def', *, session: "Session"):
638
+ """
639
+ Get the DIDs associated to a PFN on one given RSE
640
+
641
+ :param pfns: The list of PFNs.
642
+ :param rse_id: The RSE id.
643
+ :param vo: The VO to get DIDs from.
644
+ :param session: The database session in use.
645
+ :returns: A dictionary {pfn: {'scope': scope, 'name': name}}
646
+ """
647
+ dict_rse = {}
648
+ if not rse_id:
649
+ scheme, dict_rse, unknown_replicas = get_pfn_to_rse(pfns, vo=vo, session=session)
650
+ if unknown_replicas:
651
+ raise Exception
652
+ else:
653
+ scheme = 'srm'
654
+ dict_rse[rse_id] = pfns
655
+ for rse_id in dict_rse:
656
+ pfns = dict_rse[rse_id]
657
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
658
+ pfndict = {}
659
+ proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
660
+ if rse_info['deterministic']:
661
+ scope_proto = rsemgr.get_scope_protocol(vo=vo)
662
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
663
+
664
+ for pfn in parsed_pfn:
665
+ # Translate into a scope and name
666
+ name, scope = scope_proto(parsed_pfn[pfn])
667
+ scope = InternalScope(scope, vo)
668
+ yield {pfn: {'scope': scope, 'name': name}}
669
+ else:
670
+ condition = []
671
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
672
+ for pfn in parsed_pfn:
673
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
674
+ pfndict[path] = pfn
675
+ condition.append(and_(models.RSEFileAssociation.path == path,
676
+ models.RSEFileAssociation.rse_id == rse_id))
677
+ stmt = select(
678
+ models.RSEFileAssociation.scope,
679
+ models.RSEFileAssociation.name,
680
+ models.RSEFileAssociation.path
681
+ ).where(
682
+ or_(*condition)
683
+ )
684
+ for scope, name, pfn in session.execute(stmt).all():
685
+ yield {pfndict[pfn]: {'scope': scope, 'name': name}}
686
+
687
+
688
+ def _pick_n_random(nrandom, generator):
689
+ """
690
+ Select n random elements from the generator
691
+ """
692
+
693
+ if not nrandom:
694
+ # pass-through the data unchanged
695
+ yield from generator
696
+ return
697
+
698
+ # A "reservoir sampling" algorithm:
699
+ # Copy the N first files from the generator. After that, following element may be picked to substitute
700
+ # one of the previously selected element with a probability which decreases as the number of encountered elements grows.
701
+ selected = []
702
+ i = 0
703
+ iterator = iter(generator)
704
+ try:
705
+ for _ in range(nrandom):
706
+ selected.append(next(iterator))
707
+ i += 1
708
+
709
+ while True:
710
+ element = next(iterator)
711
+ i += 1
712
+
713
+ index_to_substitute = random.randint(0, i) # noqa: S311
714
+ if index_to_substitute < nrandom:
715
+ selected[index_to_substitute] = element
716
+ except StopIteration:
717
+ pass
718
+
719
+ for r in selected:
720
+ yield r
721
+
722
+
723
+ def _list_files_wo_replicas(files_wo_replica, *, session: "Session"):
724
+ if files_wo_replica:
725
+ file_wo_clause = []
726
+ for file in sorted(files_wo_replica, key=lambda f: (f['scope'], f['name'])):
727
+ file_wo_clause.append(and_(models.DataIdentifier.scope == file['scope'],
728
+ models.DataIdentifier.name == file['name']))
729
+ stmt = select(
730
+ models.DataIdentifier.scope,
731
+ models.DataIdentifier.name,
732
+ models.DataIdentifier.bytes,
733
+ models.DataIdentifier.md5,
734
+ models.DataIdentifier.adler32
735
+ ).with_hint(
736
+ models.DataIdentifier,
737
+ 'INDEX(DIDS DIDS_PK)',
738
+ 'oracle'
739
+ ).where(
740
+ and_(models.DataIdentifier.did_type == DIDType.FILE,
741
+ or_(*file_wo_clause))
742
+ )
743
+ for scope, name, bytes_, md5, adler32 in session.execute(stmt):
744
+ yield scope, name, bytes_, md5, adler32
745
+
746
+
747
+ def get_vp_endpoint():
748
+ """
749
+ VP endpoint is the Virtual Placement server.
750
+ Once VP is integrated in Rucio it won't be needed.
751
+ """
752
+ vp_endpoint = config_get('virtual_placement', 'vp_endpoint', default='')
753
+ return vp_endpoint
754
+
755
+
756
+ def get_multi_cache_prefix(cache_site, filename, logger=logging.log):
757
+ """
758
+ for a givent cache site and filename, return address of the cache node that
759
+ should be prefixed.
760
+
761
+ :param cache_site: Cache site
762
+ :param filename: Filename
763
+ """
764
+ vp_endpoint = get_vp_endpoint()
765
+ if not vp_endpoint:
766
+ return ''
767
+
768
+ x_caches = REGION.get('CacheSites')
769
+ if x_caches is NO_VALUE:
770
+ try:
771
+ response = requests.get('{}/serverRanges'.format(vp_endpoint), timeout=1, verify=False)
772
+ if response.ok:
773
+ x_caches = response.json()
774
+ REGION.set('CacheSites', x_caches)
775
+ else:
776
+ REGION.set('CacheSites', {'could not reload': ''})
777
+ return ''
778
+ except requests.exceptions.RequestException as re:
779
+ REGION.set('CacheSites', {'could not reload': ''})
780
+ logger(logging.WARNING, 'In get_multi_cache_prefix, could not access {}. Excaption:{}'.format(vp_endpoint, re))
781
+ return ''
782
+
783
+ if cache_site not in x_caches:
784
+ return ''
785
+
786
+ xcache_site = x_caches[cache_site]
787
+ h = float(
788
+ unpack('Q', sha256(filename.encode('utf-8')).digest()[:8])[0]) / 2**64
789
+ for irange in xcache_site['ranges']:
790
+ if h < irange[1]:
791
+ return xcache_site['servers'][irange[0]][0]
792
+ return ''
793
+
794
+
795
+ def _get_list_replicas_protocols(
796
+ rse_id: str,
797
+ domain: str,
798
+ schemes: "Sequence[str]",
799
+ additional_schemes: "Sequence[str]",
800
+ session: "Session"
801
+ ) -> "list[tuple[str, RSEProtocol, int]]":
802
+ """
803
+ Select the protocols to be used by list_replicas to build the PFNs for all replicas on the given RSE
804
+ """
805
+ domains = ['wan', 'lan'] if domain == 'all' else [domain]
806
+
807
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
808
+ # compute scheme priorities, and don't forget to exclude disabled protocols
809
+ # 0 or None in RSE protocol definition = disabled, 1 = highest priority
810
+ scheme_priorities = {
811
+ 'wan': {p['scheme']: p['domains']['wan']['read'] for p in rse_info['protocols'] if p['domains']['wan']['read']},
812
+ 'lan': {p['scheme']: p['domains']['lan']['read'] for p in rse_info['protocols'] if p['domains']['lan']['read']},
813
+ }
814
+
815
+ rse_schemes = copy.copy(schemes) if schemes else []
816
+ if not rse_schemes:
817
+ try:
818
+ for domain in domains:
819
+ rse_schemes.append(rsemgr.select_protocol(rse_settings=rse_info,
820
+ operation='read',
821
+ domain=domain)['scheme'])
822
+ except exception.RSEProtocolNotSupported:
823
+ pass # no need to be verbose
824
+ except Exception:
825
+ print(format_exc())
826
+
827
+ for s in additional_schemes:
828
+ if s not in rse_schemes:
829
+ rse_schemes.append(s)
830
+
831
+ protocols = []
832
+ for s in rse_schemes:
833
+ try:
834
+ for domain in domains:
835
+ protocol = rsemgr.create_protocol(rse_settings=rse_info, operation='read', scheme=s, domain=domain)
836
+ priority = scheme_priorities[domain][s]
837
+
838
+ protocols.append((domain, protocol, priority))
839
+ except exception.RSEProtocolNotSupported:
840
+ pass # no need to be verbose
841
+ except Exception:
842
+ print(format_exc())
843
+ return protocols
844
+
845
+
846
+ def _build_list_replicas_pfn(
847
+ scope: "InternalScope",
848
+ name: str,
849
+ rse_id: str,
850
+ domain: str,
851
+ protocol: "RSEProtocol",
852
+ path: str,
853
+ sign_urls: bool,
854
+ signature_lifetime: int,
855
+ client_location: "dict[str, Any]",
856
+ logger=logging.log,
857
+ *,
858
+ session: "Session",
859
+ ) -> str:
860
+ """
861
+ Generate the PFN for the given scope/name on the rse.
862
+ If needed, sign the PFN url
863
+ If relevant, add the server-side root proxy to the pfn url
864
+ """
865
+ pfn: str = list(protocol.lfns2pfns(lfns={'scope': scope.external,
866
+ 'name': name,
867
+ 'path': path}).values())[0]
868
+
869
+ # do we need to sign the URLs?
870
+ if sign_urls and protocol.attributes['scheme'] == 'https':
871
+ service = get_rse_attribute(rse_id, RseAttr.SIGN_URL, session=session)
872
+ if service:
873
+ pfn = get_signed_url(rse_id=rse_id, service=service, operation='read', url=pfn, lifetime=signature_lifetime)
874
+
875
+ # server side root proxy handling if location is set.
876
+ # supports root and http destinations
877
+ # cannot be pushed into protocols because we need to lookup rse attributes.
878
+ # ultra-conservative implementation.
879
+ if domain == 'wan' and protocol.attributes['scheme'] in ['root', 'http', 'https'] and client_location:
880
+
881
+ if 'site' in client_location and client_location['site']:
882
+ replica_site = get_rse_attribute(rse_id, RseAttr.SITE, session=session)
883
+
884
+ # does it match with the client? if not, it's an outgoing connection
885
+ # therefore the internal proxy must be prepended
886
+ if client_location['site'] != replica_site:
887
+ cache_site = config_get('clientcachemap', client_location['site'], default='', session=session)
888
+ if cache_site != '':
889
+ # print('client', client_location['site'], 'has cache:', cache_site)
890
+ # print('filename', name)
891
+ selected_prefix = get_multi_cache_prefix(cache_site, name)
892
+ if selected_prefix:
893
+ pfn = f"root://{selected_prefix}//{pfn.replace('davs://', 'root://')}"
894
+ else:
895
+ # print('site:', client_location['site'], 'has no cache')
896
+ # print('lets check if it has defined an internal root proxy ')
897
+ root_proxy_internal = config_get('root-proxy-internal', # section
898
+ client_location['site'], # option
899
+ default='', # empty string to circumvent exception
900
+ session=session)
901
+
902
+ if root_proxy_internal:
903
+ # TODO: XCache does not seem to grab signed URLs. Doublecheck with XCache devs.
904
+ # For now -> skip prepending XCache for GCS.
905
+ if 'storage.googleapis.com' in pfn or 'atlas-google-cloud.cern.ch' in pfn or 'amazonaws.com' in pfn:
906
+ pass # ATLAS HACK
907
+ else:
908
+ # don't forget to mangle gfal-style davs URL into generic https URL
909
+ pfn = f"root://{root_proxy_internal}//{pfn.replace('davs://', 'https://')}"
910
+
911
+ simulate_multirange = get_rse_attribute(rse_id, RseAttr.SIMULATE_MULTIRANGE)
912
+
913
+ if simulate_multirange is not None:
914
+ try:
915
+ # cover values that cannot be cast to int
916
+ simulate_multirange = int(simulate_multirange)
917
+ except ValueError:
918
+ simulate_multirange = 1
919
+ logger(logging.WARNING, 'Value encountered when retrieving RSE attribute "%s" not compatible with "int", used default value "1".', RseAttr.SIMULATE_MULTIRANGE)
920
+ if simulate_multirange <= 0:
921
+ logger(logging.WARNING, f'Value {simulate_multirange} encountered when retrieving RSE attribute "{RseAttr.SIMULATE_MULTIRANGE}" is <= 0, used default value "1".')
922
+ simulate_multirange = 1
923
+ pfn += f'&#multirange=false&nconnections={simulate_multirange}'
924
+
925
+ return pfn
926
+
927
+
928
+ def _list_replicas(replicas, show_pfns, schemes, files_wo_replica, client_location, domain,
929
+ sign_urls, signature_lifetime, resolve_parents, filters, by_rse_name, *, session: "Session"):
930
+
931
+ # the `domain` variable name will be re-used throughout the function with different values
932
+ input_domain = domain
933
+
934
+ # find all RSEs local to the client's location in autoselect mode (i.e., when domain is None)
935
+ local_rses = []
936
+ if input_domain is None:
937
+ if client_location and 'site' in client_location and client_location['site']:
938
+ try:
939
+ local_rses = [rse['id'] for rse in parse_expression('site=%s' % client_location['site'], filter_=filters, session=session)]
940
+ except Exception:
941
+ pass # do not hard fail if site cannot be resolved or is empty
942
+
943
+ file, pfns_cache = {}, {}
944
+ protocols_cache = defaultdict(dict)
945
+
946
+ for _, replica_group in groupby(replicas, key=lambda x: (x[0], x[1])): # Group by scope/name
947
+ file = {}
948
+ pfns = {}
949
+ for scope, name, archive_scope, archive_name, bytes_, md5, adler32, path, state, rse_id, rse, rse_type, volatile in replica_group:
950
+ if isinstance(archive_scope, str):
951
+ archive_scope = InternalScope(archive_scope, fromExternal=False)
952
+
953
+ is_archive = bool(archive_scope and archive_name)
954
+
955
+ # it is the first row in the scope/name group
956
+ if not file:
957
+ file['scope'], file['name'] = scope, name
958
+ file['bytes'], file['md5'], file['adler32'] = bytes_, md5, adler32
959
+ file['pfns'], file['rses'], file['states'] = {}, {}, {}
960
+ if resolve_parents:
961
+ file['parents'] = ['%s:%s' % (parent['scope'].internal, parent['name'])
962
+ for parent in rucio.core.did.list_all_parent_dids(scope, name, session=session)]
963
+
964
+ if not rse_id:
965
+ continue
966
+
967
+ rse_key = rse if by_rse_name else rse_id
968
+ file['states'][rse_key] = str(state.name if state else state)
969
+
970
+ if not show_pfns:
971
+ continue
972
+
973
+ # It's the first time we see this RSE, initialize the protocols needed for PFN generation
974
+ protocols = protocols_cache.get(rse_id, {}).get(is_archive)
975
+ if not protocols:
976
+ # select the lan door in autoselect mode, otherwise use the wan door
977
+ domain = input_domain
978
+ if domain is None:
979
+ domain = 'wan'
980
+ if local_rses and rse_id in local_rses:
981
+ domain = 'lan'
982
+
983
+ protocols = _get_list_replicas_protocols(
984
+ rse_id=rse_id,
985
+ domain=domain,
986
+ schemes=schemes,
987
+ # We want 'root' for archives even if it wasn't included into 'schemes'
988
+ additional_schemes=['root'] if is_archive else [],
989
+ session=session,
990
+ )
991
+ protocols_cache[rse_id][is_archive] = protocols
992
+
993
+ # build the pfns
994
+ for domain, protocol, priority in protocols:
995
+ # If the current "replica" is a constituent inside an archive, we must construct the pfn for the
996
+ # parent (archive) file and append the xrdcl.unzip query string to it.
997
+ if is_archive:
998
+ t_scope = archive_scope
999
+ t_name = archive_name
1000
+ else:
1001
+ t_scope = scope
1002
+ t_name = name
1003
+
1004
+ if 'determinism_type' in protocol.attributes: # PFN is cacheable
1005
+ try:
1006
+ path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)]
1007
+ except KeyError: # No cache entry scope:name found for this protocol
1008
+ path = protocol._get_path(t_scope, t_name)
1009
+ pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)] = path
1010
+
1011
+ try:
1012
+ pfn = _build_list_replicas_pfn(
1013
+ scope=t_scope,
1014
+ name=t_name,
1015
+ rse_id=rse_id,
1016
+ domain=domain,
1017
+ protocol=protocol,
1018
+ path=path,
1019
+ sign_urls=sign_urls,
1020
+ signature_lifetime=signature_lifetime,
1021
+ client_location=client_location,
1022
+ session=session,
1023
+ )
1024
+
1025
+ client_extract = False
1026
+ if is_archive:
1027
+ domain = 'zip'
1028
+ pfn = add_url_query(pfn, {'xrdcl.unzip': name})
1029
+ if protocol.attributes['scheme'] == 'root':
1030
+ # xroot supports downloading files directly from inside an archive. Disable client_extract and prioritize xroot.
1031
+ client_extract = False
1032
+ priority = -1
1033
+ else:
1034
+ client_extract = True
1035
+
1036
+ pfns[pfn] = {
1037
+ 'rse_id': rse_id,
1038
+ 'rse': rse,
1039
+ 'type': str(rse_type.name),
1040
+ 'volatile': volatile,
1041
+ 'domain': domain,
1042
+ 'priority': priority,
1043
+ 'client_extract': client_extract
1044
+ }
1045
+
1046
+ except Exception:
1047
+ # never end up here
1048
+ print(format_exc())
1049
+
1050
+ if protocol.attributes['scheme'] == 'srm':
1051
+ try:
1052
+ file['space_token'] = protocol.attributes['extended_attributes']['space_token']
1053
+ except KeyError:
1054
+ file['space_token'] = None
1055
+
1056
+ # fill the 'pfns' and 'rses' dicts in file
1057
+ if pfns:
1058
+ # set the total order for the priority
1059
+ # --> exploit that L(AN) comes before W(AN) before Z(IP) alphabetically
1060
+ # and use 1-indexing to be compatible with metalink
1061
+ sorted_pfns = sorted(pfns.items(), key=lambda item: (item[1]['domain'], item[1]['priority'], item[0]))
1062
+ for i, (pfn, pfn_value) in enumerate(list(sorted_pfns), start=1):
1063
+ pfn_value['priority'] = i
1064
+ file['pfns'][pfn] = pfn_value
1065
+
1066
+ sorted_pfns = sorted(file['pfns'].items(), key=lambda item: (item[1]['rse_id'], item[1]['priority'], item[0]))
1067
+ for pfn, pfn_value in sorted_pfns:
1068
+ rse_key = pfn_value['rse'] if by_rse_name else pfn_value['rse_id']
1069
+ file['rses'].setdefault(rse_key, []).append(pfn)
1070
+
1071
+ if file:
1072
+ yield file
1073
+
1074
+ for scope, name, bytes_, md5, adler32 in _list_files_wo_replicas(files_wo_replica, session=session):
1075
+ yield {
1076
+ 'scope': scope,
1077
+ 'name': name,
1078
+ 'bytes': bytes_,
1079
+ 'md5': md5,
1080
+ 'adler32': adler32,
1081
+ 'pfns': {},
1082
+ 'rses': defaultdict(list)
1083
+ }
1084
+
1085
+
1086
+ @stream_session
1087
+ def list_replicas(
1088
+ dids: "Sequence[dict[str, Any]]",
1089
+ schemes: "Optional[list[str]]" = None,
1090
+ unavailable: bool = False,
1091
+ request_id: "Optional[str]" = None,
1092
+ ignore_availability: bool = True,
1093
+ all_states: bool = False,
1094
+ pfns: bool = True,
1095
+ rse_expression: "Optional[str]" = None,
1096
+ client_location: "Optional[dict[str, Any]]" = None,
1097
+ domain: "Optional[str]" = None,
1098
+ sign_urls: bool = False,
1099
+ signature_lifetime: "Optional[int]" = None,
1100
+ resolve_archives: bool = True,
1101
+ resolve_parents: bool = False,
1102
+ nrandom: "Optional[int]" = None,
1103
+ updated_after: "Optional[datetime]" = None,
1104
+ by_rse_name: bool = False,
1105
+ *, session: "Session",
1106
+ ):
1107
+ """
1108
+ List file replicas for a list of data identifiers (DIDs).
1109
+
1110
+ :param dids: The list of data identifiers (DIDs).
1111
+ :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...)
1112
+ :param unavailable: (deprecated) Also include unavailable replicas in the list.
1113
+ :param request_id: ID associated with the request for debugging.
1114
+ :param ignore_availability: Ignore the RSE blocklisting.
1115
+ :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
1116
+ :param rse_expression: The RSE expression to restrict list_replicas on a set of RSEs.
1117
+ :param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site', 'latitude', 'longitude'}
1118
+ :param domain: The network domain for the call, either None, 'wan' or 'lan'. None is automatic mode, 'all' is both ['lan','wan']
1119
+ :param sign_urls: If set, will sign the PFNs if necessary.
1120
+ :param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN.
1121
+ :param resolve_archives: When set to true, find archives which contain the replicas.
1122
+ :param resolve_parents: When set to true, find all parent datasets which contain the replicas.
1123
+ :param updated_after: datetime (UTC time), only return replicas updated after this time
1124
+ :param by_rse_name: if True, rse information will be returned in dicts indexed by rse name; otherwise: in dicts indexed by rse id
1125
+ :param session: The database session in use.
1126
+ """
1127
+ # For historical reasons:
1128
+ # - list_replicas([some_file_did]), must return the file even if it doesn't have replicas
1129
+ # - list_replicas([some_collection_did]) must only return files with replicas
1130
+
1131
+ def _replicas_filter_subquery():
1132
+ """
1133
+ Build the sub-query used to filter replicas according to list_replica's input arguments
1134
+ """
1135
+ stmt = select(
1136
+ models.RSEFileAssociation.scope,
1137
+ models.RSEFileAssociation.name,
1138
+ models.RSEFileAssociation.path,
1139
+ models.RSEFileAssociation.state,
1140
+ models.RSEFileAssociation.bytes,
1141
+ models.RSEFileAssociation.md5,
1142
+ models.RSEFileAssociation.adler32,
1143
+ models.RSE.id.label('rse_id'),
1144
+ models.RSE.rse.label('rse_name'),
1145
+ models.RSE.rse_type,
1146
+ models.RSE.volatile,
1147
+ ).join(
1148
+ models.RSE,
1149
+ and_(models.RSEFileAssociation.rse_id == models.RSE.id,
1150
+ models.RSE.deleted == false())
1151
+ )
1152
+
1153
+ if not ignore_availability:
1154
+ stmt = stmt.where(models.RSE.availability_read == true())
1155
+
1156
+ if updated_after:
1157
+ stmt = stmt.where(models.RSEFileAssociation.updated_at >= updated_after)
1158
+
1159
+ if rse_expression:
1160
+ rses = parse_expression(expression=rse_expression, filter_=filter_, session=session)
1161
+ # When the number of RSEs is small, don't go through the overhead of
1162
+ # creating and using a temporary table. Rely on a simple "in" query.
1163
+ # The number "4" was picked without any particular reason
1164
+ if 0 < len(rses) < 4:
1165
+ stmt = stmt.where(models.RSE.id.in_([rse['id'] for rse in rses]))
1166
+ else:
1167
+ rses_temp_table = temp_table_mngr(session).create_id_table()
1168
+ values = [{'id': rse['id']} for rse in rses]
1169
+ insert_stmt = insert(
1170
+ rses_temp_table
1171
+ )
1172
+ session.execute(insert_stmt, values)
1173
+ stmt = stmt.join(rses_temp_table, models.RSE.id == rses_temp_table.id)
1174
+
1175
+ if not all_states:
1176
+ if not unavailable:
1177
+ state_clause = models.RSEFileAssociation.state == ReplicaState.AVAILABLE
1178
+ else:
1179
+ state_clause = or_(
1180
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
1181
+ models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
1182
+ models.RSEFileAssociation.state == ReplicaState.COPYING
1183
+ )
1184
+ stmt = stmt.where(state_clause)
1185
+
1186
+ return stmt.subquery()
1187
+
1188
+ def _resolve_collection_files(temp_table, *, session: "Session"):
1189
+ """
1190
+ Find all FILE dids contained in collections from temp_table and return them in a newly
1191
+ created temporary table.
1192
+ """
1193
+ resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
1194
+ selectable = rucio.core.did.list_child_dids_stmt(temp_table, did_type=DIDType.FILE)
1195
+
1196
+ stmt = insert(
1197
+ resolved_files_temp_table
1198
+ ).from_select(
1199
+ ['scope', 'name'],
1200
+ selectable
1201
+ )
1202
+
1203
+ return session.execute(stmt).rowcount, resolved_files_temp_table
1204
+
1205
+ def _list_replicas_for_collection_files_stmt(temp_table, replicas_subquery):
1206
+ """
1207
+ Build a query for listing replicas of files resolved from containers/datasets
1208
+
1209
+ The query assumes that temp_table only contains DIDs of type FILE.
1210
+ """
1211
+ return select(
1212
+ temp_table.scope.label('scope'),
1213
+ temp_table.name.label('name'),
1214
+ literal(None).label('archive_scope'),
1215
+ literal(None).label('archive_name'),
1216
+ replicas_subquery.c.bytes,
1217
+ replicas_subquery.c.md5,
1218
+ replicas_subquery.c.adler32,
1219
+ replicas_subquery.c.path,
1220
+ replicas_subquery.c.state,
1221
+ replicas_subquery.c.rse_id,
1222
+ replicas_subquery.c.rse_name,
1223
+ replicas_subquery.c.rse_type,
1224
+ replicas_subquery.c.volatile,
1225
+ ).join_from(
1226
+ temp_table,
1227
+ replicas_subquery,
1228
+ and_(replicas_subquery.c.scope == temp_table.scope,
1229
+ replicas_subquery.c.name == temp_table.name),
1230
+ )
1231
+
1232
+ def _list_replicas_for_constituents_stmt(temp_table, replicas_subquery):
1233
+ """
1234
+ Build a query for listing replicas of archives containing the files(constituents) given as input.
1235
+ i.e. for a file scope:file.log which exists in scope:archive.tar.gz, it will return the replicas
1236
+ (rse, path, state, etc) of archive.tar.gz, but with bytes/md5/adler of file.log
1237
+ """
1238
+ return select(
1239
+ models.ConstituentAssociation.child_scope.label('scope'),
1240
+ models.ConstituentAssociation.child_name.label('name'),
1241
+ models.ConstituentAssociation.scope.label('archive_scope'),
1242
+ models.ConstituentAssociation.name.label('archive_name'),
1243
+ models.ConstituentAssociation.bytes,
1244
+ models.ConstituentAssociation.md5,
1245
+ models.ConstituentAssociation.adler32,
1246
+ replicas_subquery.c.path,
1247
+ replicas_subquery.c.state,
1248
+ replicas_subquery.c.rse_id,
1249
+ replicas_subquery.c.rse_name,
1250
+ replicas_subquery.c.rse_type,
1251
+ replicas_subquery.c.volatile,
1252
+ ).join_from(
1253
+ temp_table,
1254
+ models.DataIdentifier,
1255
+ and_(models.DataIdentifier.scope == temp_table.scope,
1256
+ models.DataIdentifier.name == temp_table.name,
1257
+ models.DataIdentifier.did_type == DIDType.FILE,
1258
+ models.DataIdentifier.constituent == true()),
1259
+ ).join(
1260
+ models.ConstituentAssociation,
1261
+ and_(models.ConstituentAssociation.child_scope == temp_table.scope,
1262
+ models.ConstituentAssociation.child_name == temp_table.name)
1263
+ ).join(
1264
+ replicas_subquery,
1265
+ and_(replicas_subquery.c.scope == models.ConstituentAssociation.scope,
1266
+ replicas_subquery.c.name == models.ConstituentAssociation.name),
1267
+ )
1268
+
1269
+ def _list_replicas_for_input_files_stmt(temp_table, replicas_subquery):
1270
+ """
1271
+ Builds a query which list the replicas of FILEs from users input, but ignores
1272
+ collections in the same input.
1273
+
1274
+ Note: These FILE dids must be returned to the user even if they don't have replicas,
1275
+ hence the outerjoin against the replicas_subquery.
1276
+ """
1277
+ return select(
1278
+ temp_table.scope.label('scope'),
1279
+ temp_table.name.label('name'),
1280
+ literal(None).label('archive_scope'),
1281
+ literal(None).label('archive_name'),
1282
+ models.DataIdentifier.bytes,
1283
+ models.DataIdentifier.md5,
1284
+ models.DataIdentifier.adler32,
1285
+ replicas_subquery.c.path,
1286
+ replicas_subquery.c.state,
1287
+ replicas_subquery.c.rse_id,
1288
+ replicas_subquery.c.rse_name,
1289
+ replicas_subquery.c.rse_type,
1290
+ replicas_subquery.c.volatile,
1291
+ ).join_from(
1292
+ temp_table,
1293
+ models.DataIdentifier,
1294
+ and_(models.DataIdentifier.scope == temp_table.scope,
1295
+ models.DataIdentifier.name == temp_table.name,
1296
+ models.DataIdentifier.did_type == DIDType.FILE),
1297
+ ).outerjoin(
1298
+ replicas_subquery,
1299
+ and_(replicas_subquery.c.scope == temp_table.scope,
1300
+ replicas_subquery.c.name == temp_table.name),
1301
+ )
1302
+
1303
+ def _inspect_dids(temp_table, *, session: "Session"):
1304
+ """
1305
+ Find how many files, collections and constituents are among the dids in the temp_table
1306
+ """
1307
+ stmt = select(
1308
+ func.sum(
1309
+ case((models.DataIdentifier.did_type == DIDType.FILE, 1), else_=0)
1310
+ ).label('num_files'),
1311
+ func.sum(
1312
+ case((models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET]), 1), else_=0)
1313
+ ).label('num_collections'),
1314
+ func.sum(
1315
+ case((models.DataIdentifier.constituent == true(), 1), else_=0)
1316
+ ).label('num_constituents'),
1317
+ ).join_from(
1318
+ temp_table,
1319
+ models.DataIdentifier,
1320
+ and_(models.DataIdentifier.scope == temp_table.scope,
1321
+ models.DataIdentifier.name == temp_table.name),
1322
+ )
1323
+ num_files, num_collections, num_constituents = session.execute(stmt).one() # returns None on empty input
1324
+ return num_files or 0, num_collections or 0, num_constituents or 0
1325
+
1326
+ if dids:
1327
+ filter_ = {'vo': dids[0]['scope'].vo}
1328
+ else:
1329
+ filter_ = {'vo': 'def'}
1330
+
1331
+ dids = {(did['scope'], did['name']): did for did in dids} # Deduplicate input
1332
+ if not dids:
1333
+ return
1334
+
1335
+ input_dids_temp_table = temp_table_mngr(session).create_scope_name_table()
1336
+ values = [{'scope': scope, 'name': name} for scope, name in dids]
1337
+ stmt = insert(
1338
+ input_dids_temp_table
1339
+ )
1340
+ session.execute(stmt, values)
1341
+
1342
+ num_files, num_collections, num_constituents = _inspect_dids(input_dids_temp_table, session=session)
1343
+
1344
+ num_files_in_collections, resolved_files_temp_table = 0, None
1345
+ if num_collections:
1346
+ num_files_in_collections, resolved_files_temp_table = _resolve_collection_files(input_dids_temp_table, session=session)
1347
+
1348
+ replicas_subquery = _replicas_filter_subquery()
1349
+ replica_sources = []
1350
+ if num_files:
1351
+ replica_sources.append(
1352
+ _list_replicas_for_input_files_stmt(input_dids_temp_table, replicas_subquery)
1353
+ )
1354
+ if num_constituents and resolve_archives:
1355
+ replica_sources.append(
1356
+ _list_replicas_for_constituents_stmt(input_dids_temp_table, replicas_subquery)
1357
+ )
1358
+ if num_files_in_collections:
1359
+ replica_sources.append(
1360
+ _list_replicas_for_collection_files_stmt(resolved_files_temp_table, replicas_subquery)
1361
+ )
1362
+
1363
+ if not replica_sources:
1364
+ return
1365
+
1366
+ # In the simple case that somebody calls list_replicas on big collections with nrandom set,
1367
+ # opportunistically try to reduce the number of fetched and analyzed rows.
1368
+ if (
1369
+ nrandom
1370
+ # Only try this optimisation if list_replicas was called on collection(s).
1371
+ # I didn't consider handling the case when list_replica is called with a mix of
1372
+ # file/archive/collection dids: database queries in those cases are more complex
1373
+ # and people don't usually call list_replicas with nrandom on file/archive_constituents anyway.
1374
+ and (num_files_in_collections and not num_constituents and not num_files)
1375
+ # The following code introduces overhead if it fails to pick n random replicas.
1376
+ # Only execute when nrandom is much smaller than the total number of candidate files.
1377
+ # 64 was picked without any particular reason as "seems good enough".
1378
+ and 0 < nrandom < num_files_in_collections / 64
1379
+ ):
1380
+ # Randomly select a subset of file DIDs which have at least one replica matching the RSE/replica
1381
+ # filters applied on database side. Some filters are applied later in python code
1382
+ # (for example: scheme; or client_location/domain). We don't have any guarantee that
1383
+ # those, python, filters will not drop the replicas which we just selected randomly.
1384
+ stmt = select(
1385
+ resolved_files_temp_table.scope.label('scope'),
1386
+ resolved_files_temp_table.name.label('name'),
1387
+ ).where(
1388
+ exists(
1389
+ select(1)
1390
+ ).where(
1391
+ replicas_subquery.c.scope == resolved_files_temp_table.scope,
1392
+ replicas_subquery.c.name == resolved_files_temp_table.name
1393
+ )
1394
+ ).order_by(
1395
+ literal_column('dbms_random.value') if session.bind.dialect.name == 'oracle' else func.random()
1396
+ ).limit(
1397
+ # slightly overshoot to reduce the probability that python-side filtering will
1398
+ # leave us with less than nrandom replicas.
1399
+ nrandom * 4
1400
+ )
1401
+ # Reuse input temp table. We don't need its content anymore
1402
+ random_dids_temp_table = input_dids_temp_table
1403
+ session.execute(delete(random_dids_temp_table))
1404
+ stmt = insert(
1405
+ random_dids_temp_table
1406
+ ).from_select(
1407
+ ['scope', 'name'],
1408
+ stmt
1409
+ )
1410
+ session.execute(stmt)
1411
+
1412
+ # Fetch all replicas for randomly selected dids and apply filters on python side
1413
+ stmt = _list_replicas_for_collection_files_stmt(random_dids_temp_table, replicas_subquery)
1414
+ stmt = stmt.order_by('scope', 'name')
1415
+ replica_tuples = session.execute(stmt)
1416
+ random_replicas = list(
1417
+ _pick_n_random(
1418
+ nrandom,
1419
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain,
1420
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1421
+ )
1422
+ )
1423
+ if len(random_replicas) == nrandom:
1424
+ yield from random_replicas
1425
+ return
1426
+ else:
1427
+ # Our opportunistic attempt to pick nrandom replicas without fetching all database rows failed,
1428
+ # continue with the normal list_replicas flow and fetch all replicas
1429
+ pass
1430
+
1431
+ if len(replica_sources) == 1:
1432
+ stmt = replica_sources[0].order_by('scope', 'name')
1433
+ replica_tuples = session.execute(stmt)
1434
+ else:
1435
+ if session.bind.dialect.name == 'mysql':
1436
+ # On mysql, perform both queries independently and merge their result in python.
1437
+ # The union query fails with "Can't reopen table"
1438
+ replica_tuples = heapq.merge(
1439
+ *[session.execute(stmt.order_by('scope', 'name')) for stmt in replica_sources],
1440
+ key=lambda t: (t[0], t[1]), # sort by scope, name
1441
+ )
1442
+ else:
1443
+ stmt = union(*replica_sources).order_by('scope', 'name')
1444
+ replica_tuples = session.execute(stmt)
1445
+
1446
+ yield from _pick_n_random(
1447
+ nrandom,
1448
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain,
1449
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1450
+ )
1451
+
1452
+
1453
+ @transactional_session
1454
+ def __bulk_add_new_file_dids(files, account, dataset_meta=None, *, session: "Session"):
1455
+ """
1456
+ Bulk add new dids.
1457
+
1458
+ :param dids: the list of new files.
1459
+ :param account: The account owner.
1460
+ :param session: The database session in use.
1461
+ :returns: True is successful.
1462
+ """
1463
+ for file in files:
1464
+ new_did = models.DataIdentifier(scope=file['scope'], name=file['name'],
1465
+ account=file.get('account') or account,
1466
+ did_type=DIDType.FILE, bytes=file['bytes'],
1467
+ md5=file.get('md5'), adler32=file.get('adler32'),
1468
+ is_new=None)
1469
+ new_did.save(session=session, flush=False)
1470
+
1471
+ if 'meta' in file and file['meta']:
1472
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=file['meta'], recursive=False, session=session)
1473
+ if dataset_meta:
1474
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=dataset_meta, recursive=False, session=session)
1475
+ try:
1476
+ session.flush()
1477
+ except IntegrityError as error:
1478
+ if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1479
+ or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
1480
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
1481
+ or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1482
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint "DIDS_SCOPE_FK".*', error.args[0]) \
1483
+ or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
1484
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
1485
+ raise exception.ScopeNotFound('Scope not found!')
1486
+
1487
+ raise exception.RucioException(error.args)
1488
+ except DatabaseError as error:
1489
+ if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
1490
+ raise exception.ScopeNotFound('Scope not found!')
1491
+
1492
+ raise exception.RucioException(error.args)
1493
+ except FlushError as error:
1494
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
1495
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
1496
+ raise exception.RucioException(error.args)
1497
+ return True
1498
+
1499
+
1500
+ @transactional_session
1501
+ def __bulk_add_file_dids(files, account, dataset_meta=None, *, session: "Session"):
1502
+ """
1503
+ Bulk add new dids.
1504
+
1505
+ :param dids: the list of files.
1506
+ :param account: The account owner.
1507
+ :param session: The database session in use.
1508
+ :returns: list of replicas.
1509
+ """
1510
+ condition = []
1511
+ for f in files:
1512
+ condition.append(and_(models.DataIdentifier.scope == f['scope'],
1513
+ models.DataIdentifier.name == f['name'],
1514
+ models.DataIdentifier.did_type == DIDType.FILE))
1515
+
1516
+ stmt = select(
1517
+ models.DataIdentifier.scope,
1518
+ models.DataIdentifier.name,
1519
+ models.DataIdentifier.bytes,
1520
+ models.DataIdentifier.md5,
1521
+ models.DataIdentifier.adler32,
1522
+ ).with_hint(
1523
+ models.DataIdentifier,
1524
+ 'INDEX(DIDS DIDS_PK)',
1525
+ 'oracle'
1526
+ ).where(
1527
+ or_(*condition)
1528
+ )
1529
+ available_files = [res._asdict() for res in session.execute(stmt).all()]
1530
+ new_files = list()
1531
+ for file in files:
1532
+ found = False
1533
+ for available_file in available_files:
1534
+ if file['scope'] == available_file['scope'] and file['name'] == available_file['name']:
1535
+ found = True
1536
+ break
1537
+ if not found:
1538
+ new_files.append(file)
1539
+ __bulk_add_new_file_dids(files=new_files, account=account,
1540
+ dataset_meta=dataset_meta,
1541
+ session=session)
1542
+ return new_files + available_files
1543
+
1544
+
1545
+ def tombstone_from_delay(tombstone_delay):
1546
+ # Tolerate None for tombstone_delay
1547
+ if not tombstone_delay:
1548
+ return None
1549
+
1550
+ tombstone_delay = timedelta(seconds=int(tombstone_delay))
1551
+
1552
+ if not tombstone_delay:
1553
+ return None
1554
+
1555
+ if tombstone_delay < timedelta(0):
1556
+ return datetime(1970, 1, 1)
1557
+
1558
+ return datetime.utcnow() + tombstone_delay
1559
+
1560
+
1561
+ @transactional_session
1562
+ def __bulk_add_replicas(rse_id, files, account, *, session: "Session"):
1563
+ """
1564
+ Bulk add new dids.
1565
+
1566
+ :param rse_id: the RSE id.
1567
+ :param dids: the list of files.
1568
+ :param account: The account owner.
1569
+ :param session: The database session in use.
1570
+ :returns: True is successful.
1571
+ """
1572
+ nbfiles, bytes_ = 0, 0
1573
+ # Check for the replicas already available
1574
+ condition = []
1575
+ for f in files:
1576
+ condition.append(and_(models.RSEFileAssociation.scope == f['scope'],
1577
+ models.RSEFileAssociation.name == f['name'],
1578
+ models.RSEFileAssociation.rse_id == rse_id))
1579
+
1580
+ stmt = select(
1581
+ models.RSEFileAssociation.scope,
1582
+ models.RSEFileAssociation.name,
1583
+ models.RSEFileAssociation.rse_id,
1584
+ ).with_hint(
1585
+ models.RSEFileAssociation,
1586
+ 'INDEX(REPLICAS REPLICAS_PK)',
1587
+ 'oracle'
1588
+ ).where(
1589
+ or_(*condition)
1590
+ )
1591
+
1592
+ available_replicas = [res._asdict() for res in session.execute(stmt).all()]
1593
+
1594
+ default_tombstone_delay = get_rse_attribute(rse_id, RseAttr.TOMBSTONE_DELAY, session=session)
1595
+ default_tombstone = tombstone_from_delay(default_tombstone_delay)
1596
+
1597
+ new_replicas = []
1598
+ for file in files:
1599
+ found = False
1600
+ for available_replica in available_replicas:
1601
+ if file['scope'] == available_replica['scope'] and file['name'] == available_replica['name'] and rse_id == available_replica['rse_id']:
1602
+ found = True
1603
+ break
1604
+ if not found:
1605
+ nbfiles += 1
1606
+ bytes_ += file['bytes']
1607
+ new_replicas.append({'rse_id': rse_id, 'scope': file['scope'],
1608
+ 'name': file['name'], 'bytes': file['bytes'],
1609
+ 'path': file.get('path'),
1610
+ 'state': ReplicaState(file.get('state', 'A')),
1611
+ 'md5': file.get('md5'), 'adler32': file.get('adler32'),
1612
+ 'lock_cnt': file.get('lock_cnt', 0),
1613
+ 'tombstone': file.get('tombstone') or default_tombstone})
1614
+ try:
1615
+ stmt = insert(
1616
+ models.RSEFileAssociation
1617
+ )
1618
+ new_replicas and session.execute(stmt, new_replicas)
1619
+ session.flush()
1620
+ return nbfiles, bytes_
1621
+ except IntegrityError as error:
1622
+ if match('.*IntegrityError.*ORA-00001: unique constraint .*REPLICAS_PK.*violated.*', error.args[0]) \
1623
+ or match('.*IntegrityError.*1062.*Duplicate entry.*', error.args[0]) \
1624
+ or match('.*IntegrityError.*columns? rse_id.*scope.*name.*not unique.*', error.args[0]) \
1625
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]):
1626
+ raise exception.Duplicate("File replica already exists!")
1627
+ raise exception.RucioException(error.args)
1628
+ except DatabaseError as error:
1629
+ raise exception.RucioException(error.args)
1630
+
1631
+
1632
+ @transactional_session
1633
+ def add_replicas(rse_id, files, account, ignore_availability=True,
1634
+ dataset_meta=None, *, session: "Session"):
1635
+ """
1636
+ Bulk add file replicas.
1637
+
1638
+ :param rse_id: The RSE id.
1639
+ :param files: The list of files.
1640
+ :param account: The account owner.
1641
+ :param ignore_availability: Ignore the RSE blocklisting.
1642
+ :param session: The database session in use.
1643
+
1644
+ :returns: list of replicas.
1645
+ """
1646
+
1647
+ def _expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=None):
1648
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme, domain=domain, protocol_attr=protocol_attr)
1649
+ expected_pfns = p.lfns2pfns(lfns)
1650
+ return clean_pfns(expected_pfns.values())
1651
+
1652
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1653
+
1654
+ if replica_rse['volatile'] is True:
1655
+ raise exception.UnsupportedOperation('Cannot add replicas on volatile RSE %s ' % (replica_rse['rse']))
1656
+
1657
+ if not replica_rse['availability_write'] and not ignore_availability:
1658
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable for writing' % replica_rse['rse'])
1659
+
1660
+ for file in files:
1661
+ if 'pfn' not in file:
1662
+ if not replica_rse['deterministic']:
1663
+ raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %s ' % (replica_rse['rse']))
1664
+
1665
+ __bulk_add_file_dids(files=files, account=account,
1666
+ dataset_meta=dataset_meta,
1667
+ session=session)
1668
+
1669
+ pfns = {} # dict[str, list[str]], {scheme: [pfns], scheme: [pfns]}
1670
+ for file in files:
1671
+ if 'pfn' in file:
1672
+ scheme = file['pfn'].split(':')[0]
1673
+ pfns.setdefault(scheme, []).append(file['pfn'])
1674
+
1675
+ if pfns:
1676
+ rse_settings = rsemgr.get_rse_info(rse_id=rse_id, session=session)
1677
+ for scheme in pfns.keys():
1678
+ if not replica_rse['deterministic']:
1679
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme)
1680
+ pfns[scheme] = p.parse_pfns(pfns=pfns[scheme])
1681
+ for file in files:
1682
+ if file['pfn'].startswith(scheme):
1683
+ tmp = pfns[scheme][file['pfn']]
1684
+ file['path'] = ''.join([tmp['path'], tmp['name']])
1685
+ else:
1686
+ # Check that the pfns match to the expected pfns
1687
+ lfns = [{'scope': i['scope'].external, 'name': i['name']} for i in files if i['pfn'].startswith(scheme)]
1688
+ pfns[scheme] = clean_pfns(pfns[scheme])
1689
+
1690
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='wan'):
1691
+ pfns[scheme] = list(set(pfns[scheme]) - set(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=protocol_attr)))
1692
+
1693
+ if len(pfns[scheme]) > 0:
1694
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='lan'):
1695
+ pfns[scheme] = list(set(pfns[scheme]) - set(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='lan', protocol_attr=protocol_attr)))
1696
+
1697
+ if len(pfns[scheme]) > 0:
1698
+ # pfns not found in wan or lan
1699
+ raise exception.InvalidPath('One of the PFNs provided does not match the Rucio expected PFN : %s (%s)' % (str(pfns[scheme]), str(lfns)))
1700
+
1701
+ nbfiles, bytes_ = __bulk_add_replicas(rse_id=rse_id, files=files, account=account, session=session)
1702
+ increase(rse_id=rse_id, files=nbfiles, bytes_=bytes_, session=session)
1703
+
1704
+
1705
+ @transactional_session
1706
+ def add_replica(
1707
+ rse_id: str,
1708
+ scope: InternalScope,
1709
+ name: str,
1710
+ bytes_: int,
1711
+ account: models.InternalAccount,
1712
+ adler32: "Optional[str]" = None,
1713
+ md5: "Optional[str]" = None,
1714
+ dsn: "Optional[str]" = None,
1715
+ pfn: "Optional[str]" = None,
1716
+ meta: "Optional[dict[str, Any]]" = None,
1717
+ rules: "Optional[list[dict[str, Any]]]" = None,
1718
+ tombstone: "Optional[datetime]" = None,
1719
+ *,
1720
+ session: "Session"
1721
+ ) -> "list[dict[str, Any]]":
1722
+ """
1723
+ Add File replica.
1724
+
1725
+ :param rse_id: the rse id.
1726
+ :param scope: the scope name.
1727
+ :param name: The data identifier name.
1728
+ :param bytes_: the size of the file.
1729
+ :param account: The account owner.
1730
+ :param md5: The md5 checksum.
1731
+ :param adler32: The adler32 checksum.
1732
+ :param pfn: Physical file name (for nondeterministic rse).
1733
+ :param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary.
1734
+ :param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
1735
+ :param tombstone: If True, create replica with a tombstone.
1736
+ :param session: The database session in use.
1737
+
1738
+ :returns: list of replicas.
1739
+ """
1740
+ meta = meta or {}
1741
+ rules = rules or []
1742
+
1743
+ file = {'scope': scope, 'name': name, 'bytes': bytes_, 'adler32': adler32, 'md5': md5, 'meta': meta, 'rules': rules, 'tombstone': tombstone}
1744
+ if pfn:
1745
+ file['pfn'] = pfn
1746
+ return add_replicas(rse_id=rse_id, files=[file, ], account=account, session=session)
1747
+
1748
+
1749
+ @METRICS.time_it
1750
+ @transactional_session
1751
+ def delete_replicas(rse_id, files, ignore_availability=True, *, session: "Session"):
1752
+ """
1753
+ Delete file replicas.
1754
+
1755
+ :param rse_id: the rse id.
1756
+ :param files: the list of files to delete.
1757
+ :param ignore_availability: Ignore the RSE blocklisting.
1758
+ :param session: The database session in use.
1759
+ """
1760
+ if not files:
1761
+ return
1762
+
1763
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1764
+
1765
+ if not replica_rse['availability_delete'] and not ignore_availability:
1766
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable'
1767
+ 'for deleting' % replica_rse['rse'])
1768
+ tt_mngr = temp_table_mngr(session)
1769
+ scope_name_temp_table = tt_mngr.create_scope_name_table()
1770
+ scope_name_temp_table2 = tt_mngr.create_scope_name_table()
1771
+ association_temp_table = tt_mngr.create_association_table()
1772
+
1773
+ values = [{'scope': file['scope'], 'name': file['name']} for file in files]
1774
+ stmt = insert(
1775
+ scope_name_temp_table
1776
+ )
1777
+ session.execute(stmt, values)
1778
+
1779
+ # WARNING : This should not be necessary since that would mean the replica is used as a source.
1780
+ stmt = delete(
1781
+ models.Source,
1782
+ ).where(
1783
+ exists(select(1)
1784
+ .where(and_(models.Source.scope == scope_name_temp_table.scope,
1785
+ models.Source.name == scope_name_temp_table.name,
1786
+ models.Source.rse_id == rse_id)))
1787
+ ).execution_options(
1788
+ synchronize_session=False
1789
+ )
1790
+ session.execute(stmt)
1791
+
1792
+ stmt = select(
1793
+ func.count(),
1794
+ func.sum(models.RSEFileAssociation.bytes),
1795
+ ).join_from(
1796
+ scope_name_temp_table,
1797
+ models.RSEFileAssociation,
1798
+ and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1799
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1800
+ models.RSEFileAssociation.rse_id == rse_id)
1801
+ )
1802
+ delta, bytes_ = session.execute(stmt).one()
1803
+
1804
+ # Delete replicas
1805
+ stmt = delete(
1806
+ models.RSEFileAssociation,
1807
+ ).where(
1808
+ exists(select(1)
1809
+ .where(
1810
+ and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1811
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1812
+ models.RSEFileAssociation.rse_id == rse_id)))
1813
+ ).execution_options(
1814
+ synchronize_session=False
1815
+ )
1816
+ res = session.execute(stmt)
1817
+ if res.rowcount != len(files):
1818
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
1819
+
1820
+ # Update bad replicas
1821
+ stmt = update(
1822
+ models.BadReplica,
1823
+ ).where(
1824
+ exists(select(1)
1825
+ .where(
1826
+ and_(models.BadReplica.scope == scope_name_temp_table.scope,
1827
+ models.BadReplica.name == scope_name_temp_table.name,
1828
+ models.BadReplica.rse_id == rse_id)))
1829
+ ).where(
1830
+ models.BadReplica.state == BadFilesStatus.BAD
1831
+ ).values({
1832
+ models.BadReplica.state: BadFilesStatus.DELETED,
1833
+ models.BadReplica.updated_at: datetime.utcnow()
1834
+ }).execution_options(
1835
+ synchronize_session=False
1836
+ )
1837
+
1838
+ res = session.execute(stmt)
1839
+
1840
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
1841
+ scope_name_temp_table2=scope_name_temp_table2,
1842
+ association_temp_table=association_temp_table,
1843
+ rse_id=rse_id, files=files, session=session)
1844
+
1845
+ # Decrease RSE counter
1846
+ decrease(rse_id=rse_id, files=delta, bytes_=bytes_, session=session)
1847
+
1848
+
1849
+ @transactional_session
1850
+ def __cleanup_after_replica_deletion(scope_name_temp_table, scope_name_temp_table2, association_temp_table, rse_id, files, *, session: "Session"):
1851
+ """
1852
+ Perform update of collections/archive associations/dids after the removal of their replicas
1853
+ :param rse_id: the rse id
1854
+ :param files: list of files whose replica got deleted
1855
+ :param session: The database session in use.
1856
+ """
1857
+ clt_to_update, parents_to_analyze, affected_archives, clt_replicas_to_delete = set(), set(), set(), set()
1858
+ did_condition = []
1859
+ incomplete_dids, messages, clt_to_set_not_archive = [], [], []
1860
+ for file in files:
1861
+
1862
+ # Schedule update of all collections containing this file and having a collection replica in the RSE
1863
+ clt_to_update.add(ScopeName(scope=file['scope'], name=file['name']))
1864
+
1865
+ # If the file doesn't have any replicas anymore, we should perform cleanups of objects
1866
+ # related to this file. However, if the file is "lost", it's removal wasn't intentional,
1867
+ # so we want to skip deleting the metadata here. Perform cleanups:
1868
+
1869
+ # 1) schedule removal of this file from all parent datasets
1870
+ parents_to_analyze.add(ScopeName(scope=file['scope'], name=file['name']))
1871
+
1872
+ # 2) schedule removal of this file from the DID table
1873
+ did_condition.append(
1874
+ and_(models.DataIdentifier.scope == file['scope'],
1875
+ models.DataIdentifier.name == file['name'],
1876
+ models.DataIdentifier.availability != DIDAvailability.LOST,
1877
+ ~exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')).where(
1878
+ and_(models.RSEFileAssociation.scope == file['scope'],
1879
+ models.RSEFileAssociation.name == file['name'])),
1880
+ ~exists(select(1).prefix_with("/*+ INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK) */", dialect='oracle')).where(
1881
+ and_(models.ConstituentAssociation.child_scope == file['scope'],
1882
+ models.ConstituentAssociation.child_name == file['name']))))
1883
+
1884
+ # 3) if the file is an archive, schedule cleanup on the files from inside the archive
1885
+ affected_archives.add(ScopeName(scope=file['scope'], name=file['name']))
1886
+
1887
+ if clt_to_update:
1888
+ # Get all collection_replicas at RSE, insert them into UpdatedCollectionReplica
1889
+ stmt = delete(scope_name_temp_table)
1890
+ session.execute(stmt)
1891
+ values = [sn._asdict() for sn in clt_to_update]
1892
+ stmt = insert(scope_name_temp_table)
1893
+ session.execute(stmt, values)
1894
+ stmt = select(
1895
+ models.DataIdentifierAssociation.scope,
1896
+ models.DataIdentifierAssociation.name,
1897
+ ).distinct(
1898
+ ).join_from(
1899
+ scope_name_temp_table,
1900
+ models.DataIdentifierAssociation,
1901
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
1902
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
1903
+ ).join(
1904
+ models.CollectionReplica,
1905
+ and_(models.CollectionReplica.scope == models.DataIdentifierAssociation.scope,
1906
+ models.CollectionReplica.name == models.DataIdentifierAssociation.name,
1907
+ models.CollectionReplica.rse_id == rse_id)
1908
+ )
1909
+ for parent_scope, parent_name in session.execute(stmt):
1910
+ models.UpdatedCollectionReplica(scope=parent_scope,
1911
+ name=parent_name,
1912
+ did_type=DIDType.DATASET,
1913
+ rse_id=rse_id). \
1914
+ save(session=session, flush=False)
1915
+
1916
+ # Delete did from the content for the last did
1917
+ while parents_to_analyze:
1918
+ did_associations_to_remove = set()
1919
+
1920
+ stmt = delete(scope_name_temp_table)
1921
+ session.execute(stmt)
1922
+ values = [sn._asdict() for sn in parents_to_analyze]
1923
+ stmt = insert(scope_name_temp_table)
1924
+ session.execute(stmt, values)
1925
+ parents_to_analyze.clear()
1926
+
1927
+ stmt = select(
1928
+ models.DataIdentifierAssociation.scope,
1929
+ models.DataIdentifierAssociation.name,
1930
+ models.DataIdentifierAssociation.did_type,
1931
+ models.DataIdentifierAssociation.child_scope,
1932
+ models.DataIdentifierAssociation.child_name,
1933
+ ).distinct(
1934
+ ).join_from(
1935
+ scope_name_temp_table,
1936
+ models.DataIdentifierAssociation,
1937
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
1938
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
1939
+ ).outerjoin(
1940
+ models.DataIdentifier,
1941
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
1942
+ models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
1943
+ models.DataIdentifier.name == models.DataIdentifierAssociation.child_name)
1944
+ ).where(
1945
+ models.DataIdentifier.scope == null()
1946
+ ).outerjoin(
1947
+ models.RSEFileAssociation,
1948
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
1949
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name)
1950
+ ).where(
1951
+ models.RSEFileAssociation.scope == null()
1952
+ ).outerjoin(
1953
+ models.ConstituentAssociation,
1954
+ and_(models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
1955
+ models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name)
1956
+ ).where(
1957
+ models.ConstituentAssociation.child_scope == null()
1958
+ )
1959
+
1960
+ clt_to_set_not_archive.append(set())
1961
+ for parent_scope, parent_name, did_type, child_scope, child_name in session.execute(stmt):
1962
+
1963
+ # Schedule removal of child file/dataset/container from the parent dataset/container
1964
+ did_associations_to_remove.add(Association(scope=parent_scope, name=parent_name,
1965
+ child_scope=child_scope, child_name=child_name))
1966
+
1967
+ # Schedule setting is_archive = False on parents which don't have any children with is_archive == True anymore
1968
+ clt_to_set_not_archive[-1].add(ScopeName(scope=parent_scope, name=parent_name))
1969
+
1970
+ # If the parent dataset/container becomes empty as a result of the child removal
1971
+ # (it was the last children), metadata cleanup has to be done:
1972
+ #
1973
+ # 1) Schedule to remove the replicas of this empty collection
1974
+ clt_replicas_to_delete.add(ScopeName(scope=parent_scope, name=parent_name))
1975
+
1976
+ # 2) Schedule removal of this empty collection from its own parent collections
1977
+ parents_to_analyze.add(ScopeName(scope=parent_scope, name=parent_name))
1978
+
1979
+ # 3) Schedule removal of the entry from the DIDs table
1980
+ remove_open_did = config_get_bool('reaper', 'remove_open_did', default=False, session=session)
1981
+ if remove_open_did:
1982
+ did_condition.append(
1983
+ and_(models.DataIdentifier.scope == parent_scope,
1984
+ models.DataIdentifier.name == parent_name,
1985
+ ~exists(1).where(
1986
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
1987
+ models.DataIdentifierAssociation.child_name == parent_name)),
1988
+ ~exists(1).where(
1989
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
1990
+ models.DataIdentifierAssociation.name == parent_name))))
1991
+ else:
1992
+ did_condition.append(
1993
+ and_(models.DataIdentifier.scope == parent_scope,
1994
+ models.DataIdentifier.name == parent_name,
1995
+ models.DataIdentifier.is_open == false(),
1996
+ ~exists(1).where(
1997
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
1998
+ models.DataIdentifierAssociation.child_name == parent_name)),
1999
+ ~exists(1).where(
2000
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
2001
+ models.DataIdentifierAssociation.name == parent_name))))
2002
+
2003
+ if did_associations_to_remove:
2004
+ stmt = delete(association_temp_table)
2005
+ session.execute(stmt)
2006
+ values = [a._asdict() for a in did_associations_to_remove]
2007
+ stmt = insert(association_temp_table)
2008
+ session.execute(stmt, values)
2009
+
2010
+ # get the list of modified parent scope, name
2011
+ stmt = select(
2012
+ models.DataIdentifier.scope,
2013
+ models.DataIdentifier.name,
2014
+ models.DataIdentifier.did_type,
2015
+ ).distinct(
2016
+ ).join_from(
2017
+ association_temp_table,
2018
+ models.DataIdentifier,
2019
+ and_(association_temp_table.scope == models.DataIdentifier.scope,
2020
+ association_temp_table.name == models.DataIdentifier.name)
2021
+ ).where(
2022
+ or_(models.DataIdentifier.complete == true(),
2023
+ models.DataIdentifier.complete.is_(None)),
2024
+ )
2025
+ for parent_scope, parent_name, parent_did_type in session.execute(stmt):
2026
+ message = {'scope': parent_scope,
2027
+ 'name': parent_name,
2028
+ 'did_type': parent_did_type,
2029
+ 'event_type': 'INCOMPLETE'}
2030
+ if message not in messages:
2031
+ messages.append(message)
2032
+ incomplete_dids.append(ScopeName(scope=parent_scope, name=parent_name))
2033
+
2034
+ content_to_delete_filter = exists(select(1)
2035
+ .where(and_(association_temp_table.scope == models.DataIdentifierAssociation.scope,
2036
+ association_temp_table.name == models.DataIdentifierAssociation.name,
2037
+ association_temp_table.child_scope == models.DataIdentifierAssociation.child_scope,
2038
+ association_temp_table.child_name == models.DataIdentifierAssociation.child_name)))
2039
+
2040
+ rucio.core.did.insert_content_history(filter_=content_to_delete_filter, did_created_at=None, session=session)
2041
+
2042
+ stmt = delete(
2043
+ models.DataIdentifierAssociation
2044
+ ).where(
2045
+ content_to_delete_filter,
2046
+ ).execution_options(
2047
+ synchronize_session=False
2048
+ )
2049
+ session.execute(stmt)
2050
+
2051
+ # Get collection replicas of collections which became empty
2052
+ if clt_replicas_to_delete:
2053
+ stmt = delete(scope_name_temp_table)
2054
+ session.execute(stmt)
2055
+ values = [sn._asdict() for sn in clt_replicas_to_delete]
2056
+ stmt = insert(scope_name_temp_table)
2057
+ session.execute(stmt, values)
2058
+ stmt = delete(scope_name_temp_table2)
2059
+ session.execute(stmt)
2060
+ stmt = select(
2061
+ models.CollectionReplica.scope,
2062
+ models.CollectionReplica.name,
2063
+ ).distinct(
2064
+ ).join_from(
2065
+ scope_name_temp_table,
2066
+ models.CollectionReplica,
2067
+ and_(scope_name_temp_table.scope == models.CollectionReplica.scope,
2068
+ scope_name_temp_table.name == models.CollectionReplica.name),
2069
+ ).join(
2070
+ models.DataIdentifier,
2071
+ and_(models.DataIdentifier.scope == models.CollectionReplica.scope,
2072
+ models.DataIdentifier.name == models.CollectionReplica.name)
2073
+ ).outerjoin(
2074
+ models.DataIdentifierAssociation,
2075
+ and_(models.DataIdentifierAssociation.scope == models.CollectionReplica.scope,
2076
+ models.DataIdentifierAssociation.name == models.CollectionReplica.name)
2077
+ ).where(
2078
+ models.DataIdentifierAssociation.scope == null()
2079
+ )
2080
+ stmt = insert(
2081
+ scope_name_temp_table2
2082
+ ).from_select(
2083
+ ['scope', 'name'],
2084
+ stmt
2085
+ )
2086
+ session.execute(stmt)
2087
+ # Delete the retrieved collection replicas of empty collections
2088
+ stmt = delete(
2089
+ models.CollectionReplica,
2090
+ ).where(
2091
+ exists(select(1)
2092
+ .where(and_(models.CollectionReplica.scope == scope_name_temp_table2.scope,
2093
+ models.CollectionReplica.name == scope_name_temp_table2.name)))
2094
+ ).execution_options(
2095
+ synchronize_session=False
2096
+ )
2097
+ session.execute(stmt)
2098
+
2099
+ # Update incomplete state
2100
+ messages, dids_to_delete = [], set()
2101
+ if incomplete_dids:
2102
+ stmt = delete(scope_name_temp_table)
2103
+ session.execute(stmt)
2104
+ values = [sn._asdict() for sn in incomplete_dids]
2105
+ stmt = insert(scope_name_temp_table)
2106
+ session.execute(stmt, values)
2107
+ stmt = update(
2108
+ models.DataIdentifier
2109
+ ).where(
2110
+ exists(select(1)
2111
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
2112
+ models.DataIdentifier.name == scope_name_temp_table.name)))
2113
+ ).where(
2114
+ models.DataIdentifier.complete != false(),
2115
+ ).values({
2116
+ models.DataIdentifier.complete: False
2117
+ }).execution_options(
2118
+ synchronize_session=False
2119
+ )
2120
+
2121
+ session.execute(stmt)
2122
+
2123
+ # delete empty dids
2124
+ if did_condition:
2125
+ for chunk in chunks(did_condition, 10):
2126
+ stmt = select(
2127
+ models.DataIdentifier.scope,
2128
+ models.DataIdentifier.name,
2129
+ models.DataIdentifier.did_type,
2130
+ ).with_hint(
2131
+ models.DataIdentifier,
2132
+ 'INDEX(DIDS DIDS_PK)',
2133
+ 'oracle'
2134
+ ).where(
2135
+ or_(*chunk)
2136
+ )
2137
+ for scope, name, did_type in session.execute(stmt):
2138
+ if did_type == DIDType.DATASET:
2139
+ messages.append({'event_type': 'ERASE',
2140
+ 'payload': dumps({'scope': scope.external,
2141
+ 'name': name,
2142
+ 'account': 'root'})})
2143
+ dids_to_delete.add(ScopeName(scope=scope, name=name))
2144
+
2145
+ # Remove Archive Constituents
2146
+ constituent_associations_to_delete = set()
2147
+ if affected_archives:
2148
+ stmt = delete(scope_name_temp_table)
2149
+ session.execute(stmt)
2150
+ values = [sn._asdict() for sn in affected_archives]
2151
+ stmt = insert(scope_name_temp_table)
2152
+ session.execute(stmt, values)
2153
+
2154
+ stmt = select(
2155
+ models.ConstituentAssociation
2156
+ ).distinct(
2157
+ ).join_from(
2158
+ scope_name_temp_table,
2159
+ models.ConstituentAssociation,
2160
+ and_(scope_name_temp_table.scope == models.ConstituentAssociation.scope,
2161
+ scope_name_temp_table.name == models.ConstituentAssociation.name),
2162
+ ).outerjoin(
2163
+ models.DataIdentifier,
2164
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
2165
+ models.DataIdentifier.scope == models.ConstituentAssociation.scope,
2166
+ models.DataIdentifier.name == models.ConstituentAssociation.name)
2167
+ ).where(
2168
+ models.DataIdentifier.scope == null()
2169
+ ).outerjoin(
2170
+ models.RSEFileAssociation,
2171
+ and_(models.RSEFileAssociation.scope == models.ConstituentAssociation.scope,
2172
+ models.RSEFileAssociation.name == models.ConstituentAssociation.name)
2173
+ ).where(
2174
+ models.RSEFileAssociation.scope == null()
2175
+ )
2176
+
2177
+ for constituent in session.execute(stmt).scalars().all():
2178
+ constituent_associations_to_delete.add(Association(scope=constituent.scope, name=constituent.name,
2179
+ child_scope=constituent.child_scope, child_name=constituent.child_name))
2180
+ models.ConstituentAssociationHistory(
2181
+ child_scope=constituent.child_scope,
2182
+ child_name=constituent.child_name,
2183
+ scope=constituent.scope,
2184
+ name=constituent.name,
2185
+ bytes=constituent.bytes,
2186
+ adler32=constituent.adler32,
2187
+ md5=constituent.md5,
2188
+ guid=constituent.guid,
2189
+ length=constituent.length,
2190
+ updated_at=constituent.updated_at,
2191
+ created_at=constituent.created_at,
2192
+ ).save(session=session, flush=False)
2193
+
2194
+ if constituent_associations_to_delete:
2195
+ stmt = delete(association_temp_table)
2196
+ session.execute(stmt)
2197
+ values = [a._asdict() for a in constituent_associations_to_delete]
2198
+ stmt = insert(association_temp_table)
2199
+ session.execute(stmt, values)
2200
+ stmt = delete(
2201
+ models.ConstituentAssociation
2202
+ ).where(
2203
+ exists(select(1)
2204
+ .where(and_(association_temp_table.scope == models.ConstituentAssociation.scope,
2205
+ association_temp_table.name == models.ConstituentAssociation.name,
2206
+ association_temp_table.child_scope == models.ConstituentAssociation.child_scope,
2207
+ association_temp_table.child_name == models.ConstituentAssociation.child_name)))
2208
+ ).execution_options(
2209
+ synchronize_session=False
2210
+ )
2211
+ session.execute(stmt)
2212
+
2213
+ removed_constituents = {ScopeName(scope=c.child_scope, name=c.child_name) for c in constituent_associations_to_delete}
2214
+ for chunk in chunks(removed_constituents, 200):
2215
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
2216
+ scope_name_temp_table2=scope_name_temp_table2,
2217
+ association_temp_table=association_temp_table,
2218
+ rse_id=rse_id, files=[sn._asdict() for sn in chunk], session=session)
2219
+
2220
+ if dids_to_delete:
2221
+ stmt = delete(scope_name_temp_table)
2222
+ session.execute(stmt)
2223
+ values = [sn._asdict() for sn in dids_to_delete]
2224
+ stmt = insert(scope_name_temp_table)
2225
+ session.execute(stmt, values)
2226
+
2227
+ # Remove rules in Waiting for approval or Suspended
2228
+ stmt = delete(
2229
+ models.ReplicationRule,
2230
+ ).where(
2231
+ exists(select(1)
2232
+ .where(and_(models.ReplicationRule.scope == scope_name_temp_table.scope,
2233
+ models.ReplicationRule.name == scope_name_temp_table.name)))
2234
+ ).where(
2235
+ models.ReplicationRule.state.in_((RuleState.SUSPENDED, RuleState.WAITING_APPROVAL))
2236
+ ).execution_options(
2237
+ synchronize_session=False
2238
+ )
2239
+ session.execute(stmt)
2240
+
2241
+ # Remove DID Metadata
2242
+ must_delete_did_meta = True
2243
+ if session.bind.dialect.name == 'oracle':
2244
+ oracle_version = int(session.connection().connection.version.split('.')[0])
2245
+ if oracle_version < 12:
2246
+ must_delete_did_meta = False
2247
+ if must_delete_did_meta:
2248
+ stmt = delete(
2249
+ models.DidMeta,
2250
+ ).where(
2251
+ exists(select(1)
2252
+ .where(and_(models.DidMeta.scope == scope_name_temp_table.scope,
2253
+ models.DidMeta.name == scope_name_temp_table.name)))
2254
+ ).execution_options(
2255
+ synchronize_session=False
2256
+ )
2257
+ session.execute(stmt)
2258
+
2259
+ for chunk in chunks(messages, 100):
2260
+ add_messages(chunk, session=session)
2261
+
2262
+ # Delete dids
2263
+ dids_to_delete_filter = exists(select(1)
2264
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
2265
+ models.DataIdentifier.name == scope_name_temp_table.name)))
2266
+ archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
2267
+ if archive_dids:
2268
+ rucio.core.did.insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
2269
+ stmt = delete(
2270
+ models.DataIdentifier,
2271
+ ).where(
2272
+ dids_to_delete_filter,
2273
+ ).execution_options(
2274
+ synchronize_session=False
2275
+ )
2276
+ session.execute(stmt)
2277
+
2278
+ # Set is_archive = false on collections which don't have archive children anymore
2279
+ while clt_to_set_not_archive:
2280
+ to_update = clt_to_set_not_archive.pop(0)
2281
+ if not to_update:
2282
+ continue
2283
+ stmt = delete(scope_name_temp_table)
2284
+ session.execute(stmt)
2285
+ values = [sn._asdict() for sn in to_update]
2286
+ stmt = insert(scope_name_temp_table)
2287
+ session.execute(stmt, values)
2288
+ stmt = delete(scope_name_temp_table2)
2289
+ session.execute(stmt)
2290
+
2291
+ data_identifier_alias = aliased(models.DataIdentifier, name='did_alias')
2292
+ # Fetch rows to be updated
2293
+ stmt = select(
2294
+ models.DataIdentifier.scope,
2295
+ models.DataIdentifier.name,
2296
+ ).distinct(
2297
+ ).where(
2298
+ models.DataIdentifier.is_archive == true()
2299
+ ).join_from(
2300
+ scope_name_temp_table,
2301
+ models.DataIdentifier,
2302
+ and_(scope_name_temp_table.scope == models.DataIdentifier.scope,
2303
+ scope_name_temp_table.name == models.DataIdentifier.name)
2304
+ ).join(
2305
+ models.DataIdentifierAssociation,
2306
+ and_(models.DataIdentifier.scope == models.DataIdentifierAssociation.scope,
2307
+ models.DataIdentifier.name == models.DataIdentifierAssociation.name)
2308
+ ).outerjoin(
2309
+ data_identifier_alias,
2310
+ and_(data_identifier_alias.scope == models.DataIdentifierAssociation.child_scope,
2311
+ data_identifier_alias.name == models.DataIdentifierAssociation.child_name,
2312
+ data_identifier_alias.is_archive == true())
2313
+ ).where(
2314
+ data_identifier_alias.scope == null()
2315
+ )
2316
+ stmt = insert(
2317
+ scope_name_temp_table2
2318
+ ).from_select(
2319
+ ['scope', 'name'],
2320
+ stmt
2321
+ )
2322
+ session.execute(stmt)
2323
+ # update the fetched rows
2324
+ stmt = update(
2325
+ models.DataIdentifier,
2326
+ ).where(
2327
+ exists(select(1)
2328
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table2.scope,
2329
+ models.DataIdentifier.name == scope_name_temp_table2.name)))
2330
+ ).values({
2331
+ models.DataIdentifier.is_archive: False
2332
+ }).execution_options(
2333
+ synchronize_session=False
2334
+ )
2335
+ session.execute(stmt)
2336
+
2337
+
2338
+ @transactional_session
2339
+ def get_replica(rse_id, scope, name, *, session: "Session"):
2340
+ """
2341
+ Get File replica.
2342
+
2343
+ :param rse_id: The RSE Id.
2344
+ :param scope: the scope name.
2345
+ :param name: The data identifier name.
2346
+ :param session: The database session in use.
2347
+
2348
+ :returns: A dictionary with the list of replica attributes.
2349
+ """
2350
+ try:
2351
+ stmt = select(
2352
+ models.RSEFileAssociation
2353
+ ).where(
2354
+ and_(models.RSEFileAssociation.scope == scope,
2355
+ models.RSEFileAssociation.name == name,
2356
+ models.RSEFileAssociation.rse_id == rse_id)
2357
+ )
2358
+ return session.execute(stmt).scalar_one().to_dict()
2359
+ except NoResultFound:
2360
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
2361
+
2362
+
2363
+ @transactional_session
2364
+ def list_and_mark_unlocked_replicas(limit, bytes_=None, rse_id=None, delay_seconds=600, only_delete_obsolete=False, *, session: "Session"):
2365
+ """
2366
+ List RSE File replicas with no locks.
2367
+
2368
+ :param limit: Number of replicas returned.
2369
+ :param bytes_: The amount of needed bytes.
2370
+ :param rse_id: The rse_id.
2371
+ :param delay_seconds: The delay to query replicas in BEING_DELETED state
2372
+ :param only_delete_obsolete If set to True, will only return the replicas with EPOCH tombstone
2373
+ :param session: The database session in use.
2374
+
2375
+ :returns: a list of dictionary replica.
2376
+ """
2377
+
2378
+ needed_space = bytes_
2379
+ total_bytes = 0
2380
+ rows = []
2381
+
2382
+ temp_table_cls = temp_table_mngr(session).create_scope_name_table()
2383
+
2384
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
2385
+
2386
+ stmt = select(
2387
+ models.RSEFileAssociation.scope,
2388
+ models.RSEFileAssociation.name,
2389
+ ).where(
2390
+ models.RSEFileAssociation.lock_cnt == 0,
2391
+ models.RSEFileAssociation.rse_id == rse_id,
2392
+ models.RSEFileAssociation.tombstone == OBSOLETE if only_delete_obsolete else models.RSEFileAssociation.tombstone < datetime.utcnow(),
2393
+ ).where(
2394
+ or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE, ReplicaState.BAD)),
2395
+ and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))
2396
+ ).outerjoin(
2397
+ models.Source,
2398
+ and_(models.RSEFileAssociation.scope == models.Source.scope,
2399
+ models.RSEFileAssociation.name == models.Source.name,
2400
+ models.RSEFileAssociation.rse_id == models.Source.rse_id)
2401
+ ).where(
2402
+ models.Source.scope.is_(None) # Only try to delete replicas if they are not used as sources in any transfers
2403
+ ).order_by(
2404
+ models.RSEFileAssociation.tombstone,
2405
+ models.RSEFileAssociation.updated_at
2406
+ ).with_for_update(
2407
+ skip_locked=True,
2408
+ # oracle: we must specify a column, not a table; however, it doesn't matter which column, the lock is put on the whole row
2409
+ # postgresql/mysql: sqlalchemy driver automatically converts it to a table name
2410
+ # sqlite: this is completely ignored
2411
+ of=models.RSEFileAssociation.scope,
2412
+ )
2413
+
2414
+ for chunk in chunks(session.execute(stmt).yield_per(2 * limit), math.ceil(1.25 * limit)):
2415
+ stmt = delete(temp_table_cls)
2416
+ session.execute(stmt)
2417
+ values = [{'scope': scope, 'name': name} for scope, name in chunk]
2418
+ stmt = insert(temp_table_cls)
2419
+ session.execute(stmt, values)
2420
+
2421
+ stmt = select(
2422
+ models.RSEFileAssociation.scope,
2423
+ models.RSEFileAssociation.name,
2424
+ models.RSEFileAssociation.path,
2425
+ models.RSEFileAssociation.bytes,
2426
+ models.RSEFileAssociation.tombstone,
2427
+ models.RSEFileAssociation.state,
2428
+ models.DataIdentifier.datatype,
2429
+ ).join_from(
2430
+ temp_table_cls,
2431
+ models.RSEFileAssociation,
2432
+ and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2433
+ models.RSEFileAssociation.name == temp_table_cls.name,
2434
+ models.RSEFileAssociation.rse_id == rse_id)
2435
+ ).with_hint(
2436
+ replicas_alias,
2437
+ 'INDEX(%(name)s REPLICAS_PK)',
2438
+ 'oracle'
2439
+ ).outerjoin(
2440
+ replicas_alias,
2441
+ and_(models.RSEFileAssociation.scope == replicas_alias.scope,
2442
+ models.RSEFileAssociation.name == replicas_alias.name,
2443
+ models.RSEFileAssociation.rse_id != replicas_alias.rse_id,
2444
+ replicas_alias.state == ReplicaState.AVAILABLE)
2445
+ ).with_hint(
2446
+ models.Request,
2447
+ 'INDEX(requests REQUESTS_SCOPE_NAME_RSE_IDX)',
2448
+ 'oracle'
2449
+ ).outerjoin(
2450
+ models.Request,
2451
+ and_(models.RSEFileAssociation.scope == models.Request.scope,
2452
+ models.RSEFileAssociation.name == models.Request.name)
2453
+ ).join(
2454
+ models.DataIdentifier,
2455
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
2456
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
2457
+ ).group_by(
2458
+ models.RSEFileAssociation.scope,
2459
+ models.RSEFileAssociation.name,
2460
+ models.RSEFileAssociation.path,
2461
+ models.RSEFileAssociation.bytes,
2462
+ models.RSEFileAssociation.tombstone,
2463
+ models.RSEFileAssociation.state,
2464
+ models.RSEFileAssociation.updated_at,
2465
+ models.DataIdentifier.datatype
2466
+ ).having(
2467
+ case((func.count(replicas_alias.scope) > 0, True), # Can delete this replica if it's not the last replica
2468
+ (func.count(models.Request.scope) == 0, True), # If it's the last replica, only can delete if there are no requests using it
2469
+ else_=False).label("can_delete"),
2470
+ ).order_by(
2471
+ models.RSEFileAssociation.tombstone,
2472
+ models.RSEFileAssociation.updated_at
2473
+ ).limit(
2474
+ limit - len(rows)
2475
+ )
2476
+
2477
+ for scope, name, path, bytes_, tombstone, state, datatype in session.execute(stmt):
2478
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2479
+ break
2480
+ if state != ReplicaState.UNAVAILABLE:
2481
+ total_bytes += bytes_
2482
+
2483
+ rows.append({'scope': scope, 'name': name, 'path': path,
2484
+ 'bytes': bytes_, 'tombstone': tombstone,
2485
+ 'state': state, 'datatype': datatype})
2486
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2487
+ break
2488
+
2489
+ if rows:
2490
+ stmt = delete(temp_table_cls)
2491
+ session.execute(stmt)
2492
+ values = [{'scope': row['scope'], 'name': row['name']} for row in rows]
2493
+ stmt = insert(temp_table_cls)
2494
+ session.execute(stmt, values)
2495
+ stmt = update(
2496
+ models.RSEFileAssociation
2497
+ ).where(
2498
+ exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')
2499
+ .where(and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2500
+ models.RSEFileAssociation.name == temp_table_cls.name,
2501
+ models.RSEFileAssociation.rse_id == rse_id)))
2502
+ ).values({
2503
+ models.RSEFileAssociation.updated_at: datetime.utcnow(),
2504
+ models.RSEFileAssociation.state: ReplicaState.BEING_DELETED,
2505
+ models.RSEFileAssociation.tombstone: OBSOLETE
2506
+ }).execution_options(
2507
+ synchronize_session=False
2508
+ )
2509
+
2510
+ session.execute(stmt)
2511
+
2512
+ return rows
2513
+
2514
+
2515
+ @transactional_session
2516
+ def update_replicas_states(replicas, nowait=False, *, session: "Session"):
2517
+ """
2518
+ Update File replica information and state.
2519
+
2520
+ :param replicas: The list of replicas.
2521
+ :param nowait: Nowait parameter for the for_update queries.
2522
+ :param session: The database session in use.
2523
+ """
2524
+
2525
+ for replica in replicas:
2526
+ stmt = select(
2527
+ models.RSEFileAssociation
2528
+ ).where(
2529
+ models.RSEFileAssociation.rse_id == replica['rse_id'],
2530
+ models.RSEFileAssociation.scope == replica['scope'],
2531
+ models.RSEFileAssociation.name == replica['name']
2532
+ ).with_for_update(
2533
+ nowait=nowait
2534
+ )
2535
+
2536
+ if session.execute(stmt).scalar_one_or_none() is None:
2537
+ # remember scope, name and rse
2538
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (replica['scope'], replica['name'], get_rse_name(replica['rse_id'], session=session)))
2539
+
2540
+ if isinstance(replica['state'], str):
2541
+ replica['state'] = ReplicaState(replica['state'])
2542
+
2543
+ values = {'state': replica['state']}
2544
+ if replica['state'] == ReplicaState.BEING_DELETED:
2545
+ # Exclude replicas use as sources
2546
+ stmt = stmt.where(
2547
+ and_(models.RSEFileAssociation.lock_cnt == 0,
2548
+ not_(exists(select(1)
2549
+ .where(and_(models.RSEFileAssociation.scope == models.Source.scope,
2550
+ models.RSEFileAssociation.name == models.Source.name,
2551
+ models.RSEFileAssociation.rse_id == models.Source.rse_id)))))
2552
+ )
2553
+ values['tombstone'] = OBSOLETE
2554
+ elif replica['state'] == ReplicaState.AVAILABLE:
2555
+ rucio.core.lock.successful_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], nowait=nowait, session=session)
2556
+ stmt_bad_replicas = select(
2557
+ func.count()
2558
+ ).select_from(
2559
+ models.BadReplica
2560
+ ).where(
2561
+ and_(models.BadReplica.state == BadFilesStatus.BAD,
2562
+ models.BadReplica.rse_id == replica['rse_id'],
2563
+ models.BadReplica.scope == replica['scope'],
2564
+ models.BadReplica.name == replica['name'])
2565
+ )
2566
+
2567
+ if session.execute(stmt_bad_replicas).scalar():
2568
+ update_stmt = update(
2569
+ models.BadReplica
2570
+ ).where(
2571
+ and_(models.BadReplica.state == BadFilesStatus.BAD,
2572
+ models.BadReplica.rse_id == replica['rse_id'],
2573
+ models.BadReplica.scope == replica['scope'],
2574
+ models.BadReplica.name == replica['name'])
2575
+ ).values({
2576
+ models.BadReplica.state: BadFilesStatus.RECOVERED,
2577
+ models.BadReplica.updated_at: datetime.utcnow()
2578
+ }).execution_options(
2579
+ synchronize_session=False
2580
+ )
2581
+ session.execute(update_stmt)
2582
+ elif replica['state'] == ReplicaState.UNAVAILABLE:
2583
+ rucio.core.lock.failed_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'],
2584
+ error_message=replica.get('error_message', None),
2585
+ broken_rule_id=replica.get('broken_rule_id', None),
2586
+ broken_message=replica.get('broken_message', None),
2587
+ nowait=nowait, session=session)
2588
+ elif replica['state'] == ReplicaState.TEMPORARY_UNAVAILABLE:
2589
+ stmt = stmt.where(
2590
+ models.RSEFileAssociation.state.in_([ReplicaState.AVAILABLE,
2591
+ ReplicaState.TEMPORARY_UNAVAILABLE])
2592
+ )
2593
+
2594
+ if 'path' in replica and replica['path']:
2595
+ values['path'] = replica['path']
2596
+
2597
+ update_stmt = update(
2598
+ models.RSEFileAssociation
2599
+ ).where(
2600
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2601
+ models.RSEFileAssociation.scope == replica['scope'],
2602
+ models.RSEFileAssociation.name == replica['name'])
2603
+ ).values(
2604
+ values
2605
+ ).execution_options(
2606
+ synchronize_session=False
2607
+ )
2608
+
2609
+ if not session.execute(update_stmt).rowcount:
2610
+ if 'rse' not in replica:
2611
+ replica['rse'] = get_rse_name(rse_id=replica['rse_id'], session=session)
2612
+ raise exception.UnsupportedOperation('State %(state)s for replica %(scope)s:%(name)s on %(rse)s cannot be updated' % replica)
2613
+ return True
2614
+
2615
+
2616
+ @transactional_session
2617
+ def touch_replica(replica, *, session: "Session"):
2618
+ """
2619
+ Update the accessed_at timestamp of the given file replica/did but don't wait if row is locked.
2620
+
2621
+ :param replica: a dictionary with the information of the affected replica.
2622
+ :param session: The database session in use.
2623
+
2624
+ :returns: True, if successful, False otherwise.
2625
+ """
2626
+ try:
2627
+ accessed_at, none_value = replica.get('accessed_at') or datetime.utcnow(), None
2628
+
2629
+ stmt = select(
2630
+ models.RSEFileAssociation
2631
+ ).with_hint(
2632
+ models.RSEFileAssociation,
2633
+ 'INDEX(REPLICAS REPLICAS_PK)',
2634
+ 'oracle'
2635
+ ).where(
2636
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2637
+ models.RSEFileAssociation.scope == replica['scope'],
2638
+ models.RSEFileAssociation.name == replica['name'])
2639
+ ).with_for_update(
2640
+ nowait=True
2641
+ )
2642
+ session.execute(stmt).one()
2643
+
2644
+ stmt = update(
2645
+ models.RSEFileAssociation
2646
+ ).where(
2647
+ and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
2648
+ models.RSEFileAssociation.scope == replica['scope'],
2649
+ models.RSEFileAssociation.name == replica['name'])
2650
+ ).prefix_with(
2651
+ '/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
2652
+ ).values({
2653
+ models.RSEFileAssociation.accessed_at: accessed_at,
2654
+ models.RSEFileAssociation.tombstone: case(
2655
+ (models.RSEFileAssociation.tombstone.not_in([OBSOLETE, none_value]),
2656
+ accessed_at),
2657
+ else_=models.RSEFileAssociation.tombstone)
2658
+ }).execution_options(
2659
+ synchronize_session=False
2660
+ )
2661
+ session.execute(stmt)
2662
+
2663
+ stmt = select(
2664
+ models.DataIdentifier
2665
+ ).with_hint(
2666
+ models.DataIdentifier,
2667
+ 'INDEX(DIDS DIDS_PK)',
2668
+ 'oracle'
2669
+ ).where(
2670
+ and_(models.DataIdentifier.scope == replica['scope'],
2671
+ models.DataIdentifier.name == replica['name'],
2672
+ models.DataIdentifier.did_type == DIDType.FILE)
2673
+ ).with_for_update(
2674
+ nowait=True
2675
+ )
2676
+ session.execute(stmt).one()
2677
+
2678
+ stmt = update(
2679
+ models.DataIdentifier
2680
+ ).where(
2681
+ and_(models.DataIdentifier.scope == replica['scope'],
2682
+ models.DataIdentifier.name == replica['name'],
2683
+ models.DataIdentifier.did_type == DIDType.FILE)
2684
+ ).prefix_with(
2685
+ '/*+ INDEX(DIDS DIDS_PK) */', dialect='oracle'
2686
+ ).values({
2687
+ models.DataIdentifier.accessed_at: accessed_at
2688
+ }).execution_options(
2689
+ synchronize_session=False
2690
+ )
2691
+ session.execute(stmt)
2692
+
2693
+ except DatabaseError:
2694
+ return False
2695
+ except NoResultFound:
2696
+ return True
2697
+
2698
+ return True
2699
+
2700
+
2701
+ @transactional_session
2702
+ def update_replica_state(rse_id, scope, name, state, *, session: "Session"):
2703
+ """
2704
+ Update File replica information and state.
2705
+
2706
+ :param rse_id: the rse id.
2707
+ :param scope: the tag name.
2708
+ :param name: The data identifier name.
2709
+ :param state: The state.
2710
+ :param session: The database session in use.
2711
+ """
2712
+ return update_replicas_states(replicas=[{'scope': scope, 'name': name, 'state': state, 'rse_id': rse_id}], session=session)
2713
+
2714
+
2715
+ @transactional_session
2716
+ def get_and_lock_file_replicas(scope, name, nowait=False, restrict_rses=None, *, session: "Session"):
2717
+ """
2718
+ Get file replicas for a specific scope:name.
2719
+
2720
+ :param scope: The scope of the did.
2721
+ :param name: The name of the did.
2722
+ :param nowait: Nowait parameter for the FOR UPDATE statement
2723
+ :param restrict_rses: Possible RSE_ids to filter on.
2724
+ :param session: The db session in use.
2725
+ :returns: List of SQLAlchemy Replica Objects
2726
+ """
2727
+
2728
+ stmt = select(
2729
+ models.RSEFileAssociation
2730
+ ).where(
2731
+ and_(models.RSEFileAssociation.scope == scope,
2732
+ models.RSEFileAssociation.name == name,
2733
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
2734
+ ).with_for_update(
2735
+ nowait=nowait
2736
+ )
2737
+ if restrict_rses is not None and len(restrict_rses) < 10:
2738
+ rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
2739
+ if rse_clause:
2740
+ stmt = stmt.where(or_(*rse_clause))
2741
+
2742
+ return session.execute(stmt).scalars().all()
2743
+
2744
+
2745
+ @transactional_session
2746
+ def get_source_replicas(scope, name, source_rses=None, *, session: "Session"):
2747
+ """
2748
+ Get source replicas for a specific scope:name.
2749
+
2750
+ :param scope: The scope of the did.
2751
+ :param name: The name of the did.
2752
+ :param soruce_rses: Possible RSE_ids to filter on.
2753
+ :param session: The db session in use.
2754
+ :returns: List of SQLAlchemy Replica Objects
2755
+ """
2756
+
2757
+ stmt = select(
2758
+ models.RSEFileAssociation.rse_id
2759
+ ).where(
2760
+ and_(models.RSEFileAssociation.scope == scope,
2761
+ models.RSEFileAssociation.name == name,
2762
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
2763
+ )
2764
+ if source_rses:
2765
+ if len(source_rses) < 10:
2766
+ rse_clause = []
2767
+ for rse_id in source_rses:
2768
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2769
+ if rse_clause:
2770
+ stmt = stmt.where(or_(*rse_clause))
2771
+ return session.execute(stmt).scalars().all()
2772
+
2773
+
2774
+ @transactional_session
2775
+ def get_and_lock_file_replicas_for_dataset(scope, name, nowait=False, restrict_rses=None,
2776
+ total_threads=None, thread_id=None,
2777
+ *, session: "Session"):
2778
+ """
2779
+ Get file replicas for all files of a dataset.
2780
+
2781
+ :param scope: The scope of the dataset.
2782
+ :param name: The name of the dataset.
2783
+ :param nowait: Nowait parameter for the FOR UPDATE statement
2784
+ :param restrict_rses: Possible RSE_ids to filter on.
2785
+ :param total_threads: Total threads
2786
+ :param thread_id: This thread
2787
+ :param session: The db session in use.
2788
+ :returns: (files in dataset, replicas in dataset)
2789
+ """
2790
+ files, replicas = {}, {}
2791
+
2792
+ base_stmt = select(
2793
+ models.DataIdentifierAssociation.child_scope,
2794
+ models.DataIdentifierAssociation.child_name,
2795
+ models.DataIdentifierAssociation.bytes,
2796
+ models.DataIdentifierAssociation.md5,
2797
+ models.DataIdentifierAssociation.adler32,
2798
+ ).where(
2799
+ and_(models.DataIdentifierAssociation.scope == scope,
2800
+ models.DataIdentifierAssociation.name == name)
2801
+ )
2802
+
2803
+ stmt = base_stmt.add_columns(
2804
+ models.RSEFileAssociation
2805
+ ).where(
2806
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2807
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2808
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
2809
+ )
2810
+
2811
+ rse_clause = [true()]
2812
+ if restrict_rses is not None and len(restrict_rses) < 10:
2813
+ rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
2814
+
2815
+ if session.bind.dialect.name == 'postgresql':
2816
+ if total_threads and total_threads > 1:
2817
+ base_stmt = filter_thread_work(session=session,
2818
+ query=base_stmt,
2819
+ total_threads=total_threads,
2820
+ thread_id=thread_id,
2821
+ hash_variable='child_name')
2822
+
2823
+ for child_scope, child_name, bytes_, md5, adler32 in session.execute(base_stmt).yield_per(1000):
2824
+ files[(child_scope, child_name)] = {'scope': child_scope,
2825
+ 'name': child_name,
2826
+ 'bytes': bytes_,
2827
+ 'md5': md5,
2828
+ 'adler32': adler32}
2829
+ replicas[(child_scope, child_name)] = []
2830
+
2831
+ stmt = stmt.where(or_(*rse_clause))
2832
+ else:
2833
+ stmt = base_stmt.add_columns(
2834
+ models.RSEFileAssociation
2835
+ ).with_hint(
2836
+ models.DataIdentifierAssociation,
2837
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
2838
+ 'oracle'
2839
+ ).outerjoin(
2840
+ models.RSEFileAssociation,
2841
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2842
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2843
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
2844
+ or_(*rse_clause))
2845
+ )
2846
+
2847
+ if total_threads and total_threads > 1:
2848
+ stmt = filter_thread_work(session=session,
2849
+ query=stmt,
2850
+ total_threads=total_threads,
2851
+ thread_id=thread_id,
2852
+ hash_variable='child_name')
2853
+
2854
+ stmt = stmt.with_for_update(
2855
+ nowait=nowait,
2856
+ of=models.RSEFileAssociation.lock_cnt
2857
+ )
2858
+
2859
+ for child_scope, child_name, bytes_, md5, adler32, replica in session.execute(stmt).yield_per(1000):
2860
+ if (child_scope, child_name) not in files:
2861
+ files[(child_scope, child_name)] = {'scope': child_scope,
2862
+ 'name': child_name,
2863
+ 'bytes': bytes_,
2864
+ 'md5': md5,
2865
+ 'adler32': adler32}
2866
+
2867
+ if (child_scope, child_name) in replicas:
2868
+ if replica is not None:
2869
+ replicas[(child_scope, child_name)].append(replica)
2870
+ else:
2871
+ replicas[(child_scope, child_name)] = []
2872
+ if replica is not None:
2873
+ replicas[(child_scope, child_name)].append(replica)
2874
+
2875
+ return (list(files.values()), replicas)
2876
+
2877
+
2878
+ @transactional_session
2879
+ def get_source_replicas_for_dataset(scope, name, source_rses=None,
2880
+ total_threads=None, thread_id=None,
2881
+ *, session: "Session"):
2882
+ """
2883
+ Get file replicas for all files of a dataset.
2884
+
2885
+ :param scope: The scope of the dataset.
2886
+ :param name: The name of the dataset.
2887
+ :param source_rses: Possible source RSE_ids to filter on.
2888
+ :param total_threads: Total threads
2889
+ :param thread_id: This thread
2890
+ :param session: The db session in use.
2891
+ :returns: (files in dataset, replicas in dataset)
2892
+ """
2893
+ stmt = select(
2894
+ models.DataIdentifierAssociation.child_scope,
2895
+ models.DataIdentifierAssociation.child_name,
2896
+ models.RSEFileAssociation.rse_id
2897
+ ).with_hint(
2898
+ models.DataIdentifierAssociation,
2899
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
2900
+ 'oracle'
2901
+ ).outerjoin(
2902
+ models.RSEFileAssociation,
2903
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2904
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2905
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
2906
+ ).where(
2907
+ and_(models.DataIdentifierAssociation.scope == scope,
2908
+ models.DataIdentifierAssociation.name == name)
2909
+ )
2910
+
2911
+ if source_rses:
2912
+ if len(source_rses) < 10:
2913
+ rse_clause = []
2914
+ for rse_id in source_rses:
2915
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2916
+ if rse_clause:
2917
+ stmt = select(
2918
+ models.DataIdentifierAssociation.child_scope,
2919
+ models.DataIdentifierAssociation.child_name,
2920
+ models.RSEFileAssociation.rse_id
2921
+ ).with_hint(
2922
+ models.DataIdentifierAssociation,
2923
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
2924
+ 'oracle'
2925
+ ).outerjoin(
2926
+ models.RSEFileAssociation,
2927
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2928
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2929
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
2930
+ or_(*rse_clause))
2931
+ ).where(
2932
+ and_(models.DataIdentifierAssociation.scope == scope,
2933
+ models.DataIdentifierAssociation.name == name)
2934
+ )
2935
+ if total_threads and total_threads > 1:
2936
+ stmt = filter_thread_work(session=session,
2937
+ query=stmt,
2938
+ total_threads=total_threads,
2939
+ thread_id=thread_id,
2940
+ hash_variable='child_name')
2941
+
2942
+ replicas = {}
2943
+
2944
+ for child_scope, child_name, rse_id in session.execute(stmt):
2945
+
2946
+ if (child_scope, child_name) in replicas:
2947
+ if rse_id:
2948
+ replicas[(child_scope, child_name)].append(rse_id)
2949
+ else:
2950
+ replicas[(child_scope, child_name)] = []
2951
+ if rse_id:
2952
+ replicas[(child_scope, child_name)].append(rse_id)
2953
+
2954
+ return replicas
2955
+
2956
+
2957
+ @read_session
2958
+ def get_replica_atime(replica, *, session: "Session"):
2959
+ """
2960
+ Get the accessed_at timestamp for a replica. Just for testing.
2961
+ :param replicas: List of dictionaries {scope, name, rse_id, path}
2962
+ :param session: Database session to use.
2963
+
2964
+ :returns: A datetime timestamp with the last access time.
2965
+ """
2966
+ stmt = select(
2967
+ models.RSEFileAssociation.accessed_at
2968
+ ).with_hint(
2969
+ models.RSEFileAssociation,
2970
+ 'INDEX(REPLICAS REPLICAS_PK)',
2971
+ 'oracle'
2972
+ ).where(
2973
+ and_(models.RSEFileAssociation.scope == replica['scope'],
2974
+ models.RSEFileAssociation.name == replica['name'],
2975
+ models.RSEFileAssociation.rse_id == replica['rse_id'])
2976
+ )
2977
+ return session.execute(stmt).scalar_one()
2978
+
2979
+
2980
+ @transactional_session
2981
+ def touch_collection_replicas(collection_replicas, *, session: "Session"):
2982
+ """
2983
+ Update the accessed_at timestamp of the given collection replicas.
2984
+
2985
+ :param collection_replicas: the list of collection replicas.
2986
+ :param session: The database session in use.
2987
+
2988
+ :returns: True, if successful, False otherwise.
2989
+ """
2990
+
2991
+ now = datetime.utcnow()
2992
+ for collection_replica in collection_replicas:
2993
+ try:
2994
+ stmt = update(
2995
+ models.CollectionReplica
2996
+ ).where(
2997
+ and_(models.CollectionReplica.scope == collection_replica['scope'],
2998
+ models.CollectionReplica.name == collection_replica['name'],
2999
+ models.CollectionReplica.rse_id == collection_replica['rse_id'])
3000
+ ).values({
3001
+ models.CollectionReplica.accessed_at: collection_replica.get('accessed_at') or now
3002
+ }).execution_options(
3003
+ synchronize_session=False
3004
+ )
3005
+ session.execute(stmt)
3006
+ except DatabaseError:
3007
+ return False
3008
+
3009
+ return True
3010
+
3011
+
3012
+ @stream_session
3013
+ def list_dataset_replicas(scope, name, deep=False, *, session: "Session"):
3014
+ """
3015
+ :param scope: The scope of the dataset.
3016
+ :param name: The name of the dataset.
3017
+ :param deep: Lookup at the file level.
3018
+ :param session: Database session to use.
3019
+
3020
+ :returns: A list of dictionaries containing the dataset replicas
3021
+ with associated metrics and timestamps
3022
+ """
3023
+
3024
+ if not deep:
3025
+ stmt = select(
3026
+ models.CollectionReplica.scope,
3027
+ models.CollectionReplica.name,
3028
+ models.RSE.rse,
3029
+ models.CollectionReplica.rse_id,
3030
+ models.CollectionReplica.bytes,
3031
+ models.CollectionReplica.length,
3032
+ models.CollectionReplica.available_bytes,
3033
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3034
+ models.CollectionReplica.state,
3035
+ models.CollectionReplica.created_at,
3036
+ models.CollectionReplica.updated_at,
3037
+ models.CollectionReplica.accessed_at
3038
+ ).where(
3039
+ and_(models.CollectionReplica.scope == scope,
3040
+ models.CollectionReplica.name == name,
3041
+ models.CollectionReplica.did_type == DIDType.DATASET,
3042
+ models.CollectionReplica.rse_id == models.RSE.id,
3043
+ models.RSE.deleted == false())
3044
+ )
3045
+
3046
+ for row in session.execute(stmt).all():
3047
+ yield row._asdict()
3048
+
3049
+ else:
3050
+ # Find maximum values
3051
+ stmt = select(
3052
+ func.sum(models.DataIdentifierAssociation.bytes).label("bytes"),
3053
+ func.count().label("length")
3054
+ ).select_from(
3055
+ models.DataIdentifierAssociation
3056
+ ).with_hint(
3057
+ models.DataIdentifierAssociation,
3058
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3059
+ 'oracle'
3060
+ ).where(
3061
+ and_(models.DataIdentifierAssociation.scope == scope,
3062
+ models.DataIdentifierAssociation.name == name)
3063
+ )
3064
+
3065
+ bytes_, length = session.execute(stmt).one()
3066
+ bytes_ = bytes_ or 0
3067
+
3068
+ # Find archives that contain files of the requested dataset
3069
+ sub_query_stmt = select(
3070
+ models.DataIdentifierAssociation.scope.label('dataset_scope'),
3071
+ models.DataIdentifierAssociation.name.label('dataset_name'),
3072
+ models.DataIdentifierAssociation.bytes.label('file_bytes'),
3073
+ models.ConstituentAssociation.child_scope.label('file_scope'),
3074
+ models.ConstituentAssociation.child_name.label('file_name'),
3075
+ models.RSEFileAssociation.scope.label('replica_scope'),
3076
+ models.RSEFileAssociation.name.label('replica_name'),
3077
+ models.RSE.rse,
3078
+ models.RSE.id.label('rse_id'),
3079
+ models.RSEFileAssociation.created_at,
3080
+ models.RSEFileAssociation.accessed_at,
3081
+ models.RSEFileAssociation.updated_at
3082
+ ).where(
3083
+ and_(models.DataIdentifierAssociation.scope == scope,
3084
+ models.DataIdentifierAssociation.name == name,
3085
+ models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
3086
+ models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name,
3087
+ models.ConstituentAssociation.scope == models.RSEFileAssociation.scope,
3088
+ models.ConstituentAssociation.name == models.RSEFileAssociation.name,
3089
+ models.RSEFileAssociation.rse_id == models.RSE.id,
3090
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3091
+ models.RSE.deleted == false())
3092
+ ).subquery()
3093
+
3094
+ # Count the metrics
3095
+ group_query_stmt = select(
3096
+ sub_query_stmt.c.dataset_scope,
3097
+ sub_query_stmt.c.dataset_name,
3098
+ sub_query_stmt.c.file_scope,
3099
+ sub_query_stmt.c.file_name,
3100
+ sub_query_stmt.c.rse_id,
3101
+ sub_query_stmt.c.rse,
3102
+ func.sum(sub_query_stmt.c.file_bytes).label('file_bytes'),
3103
+ func.min(sub_query_stmt.c.created_at).label('created_at'),
3104
+ func.max(sub_query_stmt.c.updated_at).label('updated_at'),
3105
+ func.max(sub_query_stmt.c.accessed_at).label('accessed_at')
3106
+ ).group_by(
3107
+ sub_query_stmt.c.dataset_scope,
3108
+ sub_query_stmt.c.dataset_name,
3109
+ sub_query_stmt.c.file_scope,
3110
+ sub_query_stmt.c.file_name,
3111
+ sub_query_stmt.c.rse_id,
3112
+ sub_query_stmt.c.rse
3113
+ ).subquery()
3114
+
3115
+ # Bring it in the same column state as the non-archive query
3116
+ full_query_stmt = select(
3117
+ group_query_stmt.c.dataset_scope.label('scope'),
3118
+ group_query_stmt.c.dataset_name.label('name'),
3119
+ group_query_stmt.c.rse_id,
3120
+ group_query_stmt.c.rse,
3121
+ func.sum(group_query_stmt.c.file_bytes).label('available_bytes'),
3122
+ func.count().label('available_length'),
3123
+ func.min(group_query_stmt.c.created_at).label('created_at'),
3124
+ func.max(group_query_stmt.c.updated_at).label('updated_at'),
3125
+ func.max(group_query_stmt.c.accessed_at).label('accessed_at')
3126
+ ).group_by(
3127
+ group_query_stmt.c.dataset_scope,
3128
+ group_query_stmt.c.dataset_name,
3129
+ group_query_stmt.c.rse_id,
3130
+ group_query_stmt.c.rse
3131
+ )
3132
+
3133
+ # Find the non-archive dataset replicas
3134
+ sub_query_stmt = select(
3135
+ models.DataIdentifierAssociation.scope,
3136
+ models.DataIdentifierAssociation.name,
3137
+ models.RSEFileAssociation.rse_id,
3138
+ func.sum(models.RSEFileAssociation.bytes).label("available_bytes"),
3139
+ func.count().label("available_length"),
3140
+ func.min(models.RSEFileAssociation.created_at).label("created_at"),
3141
+ func.max(models.RSEFileAssociation.updated_at).label("updated_at"),
3142
+ func.max(models.RSEFileAssociation.accessed_at).label("accessed_at")
3143
+ ).with_hint(
3144
+ models.DataIdentifierAssociation,
3145
+ 'INDEX_RS_ASC(CONTENTS CONTENTS_PK) INDEX_RS_ASC(REPLICAS REPLICAS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
3146
+ 'oracle'
3147
+ ).where(
3148
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3149
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3150
+ models.DataIdentifierAssociation.scope == scope,
3151
+ models.DataIdentifierAssociation.name == name,
3152
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
3153
+ ).group_by(
3154
+ models.DataIdentifierAssociation.scope,
3155
+ models.DataIdentifierAssociation.name,
3156
+ models.RSEFileAssociation.rse_id
3157
+ ).subquery()
3158
+
3159
+ stmt = select(
3160
+ sub_query_stmt.c.scope,
3161
+ sub_query_stmt.c.name,
3162
+ sub_query_stmt.c.rse_id,
3163
+ models.RSE.rse,
3164
+ sub_query_stmt.c.available_bytes,
3165
+ sub_query_stmt.c.available_length,
3166
+ sub_query_stmt.c.created_at,
3167
+ sub_query_stmt.c.updated_at,
3168
+ sub_query_stmt.c.accessed_at
3169
+ ).where(
3170
+ and_(sub_query_stmt.c.rse_id == models.RSE.id,
3171
+ models.RSE.deleted == false())
3172
+ )
3173
+
3174
+ # Join everything together
3175
+ final_stmt = stmt.union_all(full_query_stmt)
3176
+ for row in session.execute(final_stmt).all():
3177
+ replica = row._asdict()
3178
+ replica['length'], replica['bytes'] = length, bytes_
3179
+ if replica['length'] == row.available_length:
3180
+ replica['state'] = ReplicaState.AVAILABLE
3181
+ else:
3182
+ replica['state'] = ReplicaState.UNAVAILABLE
3183
+ yield replica
3184
+
3185
+
3186
+ @stream_session
3187
+ def list_dataset_replicas_bulk(names_by_intscope, *, session: "Session"):
3188
+ """
3189
+ :param names_by_intscope: The dictionary of internal scopes pointing at the list of names.
3190
+ :param session: Database session to use.
3191
+
3192
+ :returns: A list of dictionaries containing the dataset replicas
3193
+ with associated metrics and timestamps
3194
+ """
3195
+
3196
+ condition = []
3197
+ for scope in names_by_intscope:
3198
+ condition.append(and_(models.CollectionReplica.scope == scope,
3199
+ models.CollectionReplica.name.in_(names_by_intscope[scope])))
3200
+
3201
+ try:
3202
+ # chunk size refers to the number of different scopes, see above
3203
+ for chunk in chunks(condition, 10):
3204
+ stmt = select(
3205
+ models.CollectionReplica.scope,
3206
+ models.CollectionReplica.name,
3207
+ models.RSE.rse,
3208
+ models.CollectionReplica.rse_id,
3209
+ models.CollectionReplica.bytes,
3210
+ models.CollectionReplica.length,
3211
+ models.CollectionReplica.available_bytes,
3212
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3213
+ models.CollectionReplica.state,
3214
+ models.CollectionReplica.created_at,
3215
+ models.CollectionReplica.updated_at,
3216
+ models.CollectionReplica.accessed_at
3217
+ ).where(
3218
+ and_(models.CollectionReplica.did_type == DIDType.DATASET,
3219
+ models.CollectionReplica.rse_id == models.RSE.id,
3220
+ models.RSE.deleted == false(),
3221
+ or_(*chunk))
3222
+ )
3223
+
3224
+ for row in session.execute(stmt).all():
3225
+ yield row._asdict()
3226
+ except NoResultFound:
3227
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
3228
+
3229
+
3230
+ @stream_session
3231
+ def list_dataset_replicas_vp(scope, name, deep=False, *, session: "Session", logger=logging.log):
3232
+ """
3233
+ List dataset replicas for a DID (scope:name) using the
3234
+ Virtual Placement service.
3235
+
3236
+ NOTICE: This is an RnD function and might change or go away at any time.
3237
+
3238
+ :param scope: The scope of the dataset.
3239
+ :param name: The name of the dataset.
3240
+ :param deep: Lookup at the file level.
3241
+ :param session: Database session to use.
3242
+
3243
+ :returns: If VP exists and there is at least one non-TAPE replica, returns a list of dicts of sites
3244
+ """
3245
+ vp_endpoint = get_vp_endpoint()
3246
+ vp_replies = ['other']
3247
+ nr_replies = 5 # force limit reply size
3248
+
3249
+ if not vp_endpoint:
3250
+ return vp_replies
3251
+
3252
+ try:
3253
+ vp_replies = requests.get('{}/ds/{}/{}:{}'.format(vp_endpoint, nr_replies, scope, name),
3254
+ verify=False,
3255
+ timeout=1)
3256
+ if vp_replies.status_code == 200:
3257
+ vp_replies = vp_replies.json()
3258
+ else:
3259
+ vp_replies = ['other']
3260
+ except requests.exceptions.RequestException as re:
3261
+ logger(logging.ERROR, 'In list_dataset_replicas_vp, could not access {}. Error:{}'.format(vp_endpoint, re))
3262
+ vp_replies = ['other']
3263
+
3264
+ if vp_replies != ['other']:
3265
+ # check that there is at least one regular replica
3266
+ # that is not on tape and has a protocol with scheme "root"
3267
+ # and can be accessed from WAN
3268
+ accessible_replica_exists = False
3269
+ for reply in list_dataset_replicas(scope=scope, name=name, deep=deep, session=session):
3270
+ if reply['state'] != ReplicaState.AVAILABLE:
3271
+ continue
3272
+ rse_info = rsemgr.get_rse_info(rse=reply['rse'], vo=scope.vo, session=session)
3273
+ if rse_info['rse_type'] == 'TAPE':
3274
+ continue
3275
+ for prot in rse_info['protocols']:
3276
+ if prot['scheme'] == 'root' and prot['domains']['wan']['read']:
3277
+ accessible_replica_exists = True
3278
+ break
3279
+ if accessible_replica_exists is True:
3280
+ break
3281
+ if accessible_replica_exists is True:
3282
+ for vp_reply in vp_replies:
3283
+ yield {'vp': True, 'site': vp_reply}
3284
+
3285
+
3286
+ @stream_session
3287
+ def list_datasets_per_rse(rse_id, filters=None, limit=None, *, session: "Session"):
3288
+ """
3289
+ List datasets at a RSE.
3290
+
3291
+ :param rse: the rse id.
3292
+ :param filters: dictionary of attributes by which the results should be filtered.
3293
+ :param limit: limit number.
3294
+ :param session: Database session to use.
3295
+
3296
+ :returns: A list of dict dataset replicas
3297
+ """
3298
+ stmt = select(
3299
+ models.CollectionReplica.scope,
3300
+ models.CollectionReplica.name,
3301
+ models.RSE.id.label('rse_id'),
3302
+ models.RSE.rse,
3303
+ models.CollectionReplica.bytes,
3304
+ models.CollectionReplica.length,
3305
+ models.CollectionReplica.available_bytes,
3306
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3307
+ models.CollectionReplica.state,
3308
+ models.CollectionReplica.created_at,
3309
+ models.CollectionReplica.updated_at,
3310
+ models.CollectionReplica.accessed_at
3311
+ ).where(
3312
+ and_(models.CollectionReplica.did_type == DIDType.DATASET,
3313
+ models.CollectionReplica.rse_id == models.RSE.id,
3314
+ models.RSE.deleted == false(),
3315
+ models.RSE.id == rse_id)
3316
+ )
3317
+
3318
+ for (k, v) in filters and filters.items() or []:
3319
+ if k == 'name' or k == 'scope':
3320
+ v_str = v if k != 'scope' else v.internal
3321
+ if '*' in v_str or '%' in v_str:
3322
+ if session.bind.dialect.name == 'postgresql': # PostgreSQL escapes automatically
3323
+ stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%')))
3324
+ else:
3325
+ stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%'), escape='\\'))
3326
+ else:
3327
+ stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
3328
+ # hints ?
3329
+ elif k == 'created_before':
3330
+ created_before = str_to_date(v)
3331
+ stmt = stmt.where(models.CollectionReplica.created_at <= created_before)
3332
+ elif k == 'created_after':
3333
+ created_after = str_to_date(v)
3334
+ stmt = stmt.where(models.CollectionReplica.created_at >= created_after)
3335
+ else:
3336
+ stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
3337
+
3338
+ if limit:
3339
+ stmt = stmt.limit(limit)
3340
+
3341
+ for row in session.execute(stmt).all():
3342
+ yield row._asdict()
3343
+
3344
+
3345
+ @stream_session
3346
+ def list_replicas_per_rse(
3347
+ rse_id: str,
3348
+ limit: "Optional[int]" = None,
3349
+ *,
3350
+ session: "Session"
3351
+ ) -> "Iterator[dict[str, Any]]":
3352
+ """List all replicas at a given RSE."""
3353
+ list_stmt = select(
3354
+ models.RSEFileAssociation
3355
+ ).where(
3356
+ models.RSEFileAssociation.rse_id == rse_id
3357
+ )
3358
+
3359
+ if limit:
3360
+ list_stmt = list_stmt.limit(limit)
3361
+
3362
+ for replica in session.execute(list_stmt).yield_per(100).scalars():
3363
+ yield replica.to_dict()
3364
+
3365
+
3366
+ @transactional_session
3367
+ def get_cleaned_updated_collection_replicas(total_workers, worker_number, limit=None, *, session: "Session"):
3368
+ """
3369
+ Get update request for collection replicas.
3370
+ :param total_workers: Number of total workers.
3371
+ :param worker_number: id of the executing worker.
3372
+ :param limit: Maximum numberws to return.
3373
+ :param session: Database session in use.
3374
+ :returns: List of update requests for collection replicas.
3375
+ """
3376
+
3377
+ stmt = delete(
3378
+ models.UpdatedCollectionReplica
3379
+ ).where(
3380
+ and_(models.UpdatedCollectionReplica.rse_id.is_(None),
3381
+ ~exists().where(
3382
+ and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
3383
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope)))
3384
+ ).execution_options(
3385
+ synchronize_session=False
3386
+ )
3387
+ session.execute(stmt)
3388
+
3389
+ # Delete update requests which do not have collection_replicas
3390
+ stmt = delete(
3391
+ models.UpdatedCollectionReplica
3392
+ ).where(
3393
+ and_(models.UpdatedCollectionReplica.rse_id.isnot(None),
3394
+ ~exists().where(
3395
+ and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
3396
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope,
3397
+ models.CollectionReplica.rse_id == models.UpdatedCollectionReplica.rse_id)))
3398
+ ).execution_options(
3399
+ synchronize_session=False
3400
+ )
3401
+ session.execute(stmt)
3402
+
3403
+ # Delete duplicates
3404
+ if session.bind.dialect.name == 'oracle':
3405
+ schema = ''
3406
+ if BASE.metadata.schema:
3407
+ schema = BASE.metadata.schema + '.'
3408
+ session.execute(text('DELETE FROM {schema}updated_col_rep A WHERE A.rowid > ANY (SELECT B.rowid FROM {schema}updated_col_rep B WHERE A.scope = B.scope AND A.name=B.name AND A.did_type=B.did_type AND (A.rse_id=B.rse_id OR (A.rse_id IS NULL and B.rse_id IS NULL)))'.format(schema=schema))) # NOQA: E501
3409
+ elif session.bind.dialect.name == 'mysql':
3410
+ subquery1 = select(
3411
+ func.max(models.UpdatedCollectionReplica.id).label('max_id')
3412
+ ).group_by(
3413
+ models.UpdatedCollectionReplica.scope,
3414
+ models.UpdatedCollectionReplica.name,
3415
+ models.UpdatedCollectionReplica.rse_id
3416
+ ).subquery()
3417
+
3418
+ subquery2 = select(
3419
+ subquery1.c.max_id
3420
+ )
3421
+
3422
+ stmt_del = delete(
3423
+ models.UpdatedCollectionReplica
3424
+ ).where(
3425
+ models.UpdatedCollectionReplica.id.not_in(subquery2)
3426
+ ).execution_options(
3427
+ synchronize_session=False
3428
+ )
3429
+ session.execute(stmt_del)
3430
+ else:
3431
+ stmt = select(models.UpdatedCollectionReplica)
3432
+ update_requests_with_rse_id = []
3433
+ update_requests_without_rse_id = []
3434
+ duplicate_request_ids = []
3435
+ for update_request in session.execute(stmt).scalars().all():
3436
+ if update_request.rse_id is not None:
3437
+ small_request = {'name': update_request.name, 'scope': update_request.scope, 'rse_id': update_request.rse_id}
3438
+ if small_request not in update_requests_with_rse_id:
3439
+ update_requests_with_rse_id.append(small_request)
3440
+ else:
3441
+ duplicate_request_ids.append(update_request.id)
3442
+ continue
3443
+ else:
3444
+ small_request = {'name': update_request.name, 'scope': update_request.scope}
3445
+ if small_request not in update_requests_without_rse_id:
3446
+ update_requests_without_rse_id.append(small_request)
3447
+ else:
3448
+ duplicate_request_ids.append(update_request.id)
3449
+ continue
3450
+ for chunk in chunks(duplicate_request_ids, 100):
3451
+ stmt = delete(
3452
+ models.UpdatedCollectionReplica
3453
+ ).where(
3454
+ models.UpdatedCollectionReplica.id.in_(chunk)
3455
+ ).execution_options(
3456
+ synchronize_session=False
3457
+ )
3458
+ session.execute(stmt)
3459
+
3460
+ stmt = select(models.UpdatedCollectionReplica)
3461
+ if limit:
3462
+ stmt = stmt.limit(limit)
3463
+ return [update_request.to_dict() for update_request in session.execute(stmt).scalars().all()]
3464
+
3465
+
3466
+ @transactional_session
3467
+ def update_collection_replica(update_request, *, session: "Session"):
3468
+ """
3469
+ Update a collection replica.
3470
+ :param update_request: update request from the upated_col_rep table.
3471
+ """
3472
+ if update_request['rse_id'] is not None:
3473
+ # Check one specific dataset replica
3474
+ ds_length = 0
3475
+ old_available_replicas = 0
3476
+ ds_bytes = 0
3477
+ ds_replica_state = None
3478
+ ds_available_bytes = 0
3479
+ available_replicas = 0
3480
+
3481
+ try:
3482
+ stmt = select(
3483
+ models.CollectionReplica
3484
+ ).where(
3485
+ and_(models.CollectionReplica.scope == update_request['scope'],
3486
+ models.CollectionReplica.name == update_request['name'],
3487
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3488
+ )
3489
+ collection_replica = session.execute(stmt).scalar_one()
3490
+ ds_length = collection_replica.length
3491
+ old_available_replicas = collection_replica.available_replicas_cnt
3492
+ ds_bytes = collection_replica.bytes
3493
+ except NoResultFound:
3494
+ pass
3495
+
3496
+ try:
3497
+ stmt = select(
3498
+ func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
3499
+ func.count().label('available_replicas')
3500
+ ).select_from(
3501
+ models.RSEFileAssociation
3502
+ ).where(
3503
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3504
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3505
+ models.RSEFileAssociation.rse_id == update_request['rse_id'],
3506
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3507
+ models.DataIdentifierAssociation.name == update_request['name'],
3508
+ models.DataIdentifierAssociation.scope == update_request['scope'])
3509
+ )
3510
+ file_replica = session.execute(stmt).one()
3511
+
3512
+ available_replicas = file_replica.available_replicas
3513
+ ds_available_bytes = file_replica.ds_available_bytes
3514
+ except NoResultFound:
3515
+ pass
3516
+
3517
+ if available_replicas >= ds_length:
3518
+ ds_replica_state = ReplicaState.AVAILABLE
3519
+ else:
3520
+ ds_replica_state = ReplicaState.UNAVAILABLE
3521
+
3522
+ if old_available_replicas is not None and old_available_replicas > 0 and available_replicas == 0:
3523
+ stmt = delete(
3524
+ models.CollectionReplica
3525
+ ).where(
3526
+ and_(models.CollectionReplica.scope == update_request['scope'],
3527
+ models.CollectionReplica.name == update_request['name'],
3528
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3529
+ )
3530
+ session.execute(stmt)
3531
+ else:
3532
+ stmt = select(
3533
+ models.CollectionReplica
3534
+ ).where(
3535
+ and_(models.CollectionReplica.scope == update_request['scope'],
3536
+ models.CollectionReplica.name == update_request['name'],
3537
+ models.CollectionReplica.rse_id == update_request['rse_id'])
3538
+ )
3539
+ updated_replica = session.execute(stmt).scalar_one()
3540
+
3541
+ updated_replica.state = ds_replica_state
3542
+ updated_replica.available_replicas_cnt = available_replicas
3543
+ updated_replica.length = ds_length
3544
+ updated_replica.bytes = ds_bytes
3545
+ updated_replica.available_bytes = ds_available_bytes
3546
+ else:
3547
+ stmt = select(
3548
+ func.count().label('ds_length'),
3549
+ func.sum(models.DataIdentifierAssociation.bytes).label('ds_bytes')
3550
+ ).select_from(
3551
+ models.DataIdentifierAssociation
3552
+ ).where(
3553
+ and_(models.DataIdentifierAssociation.scope == update_request['scope'],
3554
+ models.DataIdentifierAssociation.name == update_request['name'])
3555
+ )
3556
+ association = session.execute(stmt).one()
3557
+
3558
+ # Check all dataset replicas
3559
+ ds_length = association.ds_length
3560
+ ds_bytes = association.ds_bytes
3561
+ ds_replica_state = None
3562
+
3563
+ stmt = select(
3564
+ models.CollectionReplica
3565
+ ).where(
3566
+ and_(models.CollectionReplica.scope == update_request['scope'],
3567
+ models.CollectionReplica.name == update_request['name'])
3568
+ )
3569
+ for collection_replica in session.execute(stmt).scalars().all():
3570
+ if ds_length:
3571
+ collection_replica.length = ds_length
3572
+ else:
3573
+ collection_replica.length = 0
3574
+ if ds_bytes:
3575
+ collection_replica.bytes = ds_bytes
3576
+ else:
3577
+ collection_replica.bytes = 0
3578
+
3579
+ stmt = select(
3580
+ func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
3581
+ func.count().label('available_replicas'),
3582
+ models.RSEFileAssociation.rse_id
3583
+ ).select_from(
3584
+ models.RSEFileAssociation
3585
+ ).where(
3586
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3587
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3588
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3589
+ models.DataIdentifierAssociation.name == update_request['name'],
3590
+ models.DataIdentifierAssociation.scope == update_request['scope'])
3591
+ ).group_by(
3592
+ models.RSEFileAssociation.rse_id
3593
+ )
3594
+
3595
+ for file_replica in session.execute(stmt).all():
3596
+ if file_replica.available_replicas >= ds_length:
3597
+ ds_replica_state = ReplicaState.AVAILABLE
3598
+ else:
3599
+ ds_replica_state = ReplicaState.UNAVAILABLE
3600
+
3601
+ stmt = select(
3602
+ models.CollectionReplica
3603
+ ).where(
3604
+ and_(models.CollectionReplica.scope == update_request['scope'],
3605
+ models.CollectionReplica.name == update_request['name'],
3606
+ models.CollectionReplica.rse_id == file_replica.rse_id)
3607
+ )
3608
+ collection_replica = session.execute(stmt).scalars().first()
3609
+ if collection_replica:
3610
+ collection_replica.state = ds_replica_state
3611
+ collection_replica.available_replicas_cnt = file_replica.available_replicas
3612
+ collection_replica.available_bytes = file_replica.ds_available_bytes
3613
+
3614
+ stmt = delete(
3615
+ models.UpdatedCollectionReplica
3616
+ ).where(
3617
+ models.UpdatedCollectionReplica.id == update_request['id']
3618
+ )
3619
+ session.execute(stmt)
3620
+
3621
+
3622
+ @read_session
3623
+ def get_bad_pfns(limit=10000, thread=None, total_threads=None, *, session: "Session"):
3624
+ """
3625
+ Returns a list of bad PFNs
3626
+
3627
+ :param limit: The maximum number of replicas returned.
3628
+ :param thread: The assigned thread for this minos instance.
3629
+ :param total_threads: The total number of minos threads.
3630
+ :param session: The database session in use.
3631
+
3632
+ returns: list of PFNs {'pfn': pfn, 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at}
3633
+ """
3634
+ result = []
3635
+
3636
+ stmt = select(
3637
+ models.BadPFN.path,
3638
+ models.BadPFN.state,
3639
+ models.BadPFN.reason,
3640
+ models.BadPFN.account,
3641
+ models.BadPFN.expires_at
3642
+ )
3643
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='path')
3644
+ stmt = stmt.order_by(
3645
+ models.BadPFN.created_at
3646
+ ).limit(
3647
+ limit
3648
+ )
3649
+
3650
+ for path, state, reason, account, expires_at in session.execute(stmt).yield_per(1000):
3651
+ result.append({'pfn': clean_pfns([str(path)])[0], 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at})
3652
+ return result
3653
+
3654
+
3655
+ @transactional_session
3656
+ def bulk_add_bad_replicas(replicas, account, state=BadFilesStatus.TEMPORARY_UNAVAILABLE, reason=None, expires_at=None, *, session: "Session"):
3657
+ """
3658
+ Bulk add new bad replicas.
3659
+
3660
+ :param replicas: the list of bad replicas.
3661
+ :param account: The account who declared the bad replicas.
3662
+ :param state: The state of the file (SUSPICIOUS, BAD or TEMPORARY_UNAVAILABLE).
3663
+ :param session: The database session in use.
3664
+
3665
+ :returns: True is successful.
3666
+ """
3667
+ for replica in replicas:
3668
+ scope_name_rse_state = and_(models.BadReplica.scope == replica['scope'],
3669
+ models.BadReplica.name == replica['name'],
3670
+ models.BadReplica.rse_id == replica['rse_id'],
3671
+ models.BadReplica.state == state)
3672
+ insert_new_row = True
3673
+ if state == BadFilesStatus.TEMPORARY_UNAVAILABLE:
3674
+ stmt = select(
3675
+ models.BadReplica
3676
+ ).where(
3677
+ scope_name_rse_state
3678
+ )
3679
+ if session.execute(stmt).scalar_one_or_none():
3680
+ stmt = update(
3681
+ models.BadReplica
3682
+ ).where(
3683
+ scope_name_rse_state
3684
+ ).values({
3685
+ models.BadReplica.state: BadFilesStatus.TEMPORARY_UNAVAILABLE,
3686
+ models.BadReplica.updated_at: datetime.utcnow(),
3687
+ models.BadReplica.account: account,
3688
+ models.BadReplica.reason: reason,
3689
+ models.BadReplica.expires_at: expires_at
3690
+ }).execution_options(
3691
+ synchronize_session=False
3692
+ )
3693
+ session.execute(stmt)
3694
+
3695
+ insert_new_row = False
3696
+ if insert_new_row:
3697
+ new_bad_replica = models.BadReplica(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], reason=reason,
3698
+ state=state, account=account, bytes=None, expires_at=expires_at)
3699
+ new_bad_replica.save(session=session, flush=False)
3700
+ try:
3701
+ session.flush()
3702
+ except IntegrityError as error:
3703
+ raise exception.RucioException(error.args)
3704
+ except DatabaseError as error:
3705
+ raise exception.RucioException(error.args)
3706
+ except FlushError as error:
3707
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
3708
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
3709
+ raise exception.RucioException(error.args)
3710
+ return True
3711
+
3712
+
3713
+ @transactional_session
3714
+ def bulk_delete_bad_pfns(pfns, *, session: "Session"):
3715
+ """
3716
+ Bulk delete bad PFNs.
3717
+
3718
+ :param pfns: the list of new files.
3719
+ :param session: The database session in use.
3720
+
3721
+ :returns: True is successful.
3722
+ """
3723
+ pfn_clause = []
3724
+ for pfn in pfns:
3725
+ pfn_clause.append(models.BadPFN.path == pfn)
3726
+
3727
+ for chunk in chunks(pfn_clause, 100):
3728
+ stmt = delete(
3729
+ models.BadPFN
3730
+ ).where(
3731
+ or_(*chunk)
3732
+ ).execution_options(
3733
+ synchronize_session=False
3734
+ )
3735
+ session.execute(stmt)
3736
+
3737
+ return True
3738
+
3739
+
3740
+ @transactional_session
3741
+ def bulk_delete_bad_replicas(bad_replicas, *, session: "Session"):
3742
+ """
3743
+ Bulk delete bad replica.
3744
+
3745
+ :param bad_replicas: The list of bad replicas to delete (Dictionaries).
3746
+ :param session: The database session in use.
3747
+
3748
+ :returns: True is successful.
3749
+ """
3750
+ replica_clause = []
3751
+ for replica in bad_replicas:
3752
+ replica_clause.append(and_(models.BadReplica.scope == replica['scope'],
3753
+ models.BadReplica.name == replica['name'],
3754
+ models.BadReplica.rse_id == replica['rse_id'],
3755
+ models.BadReplica.state == replica['state']))
3756
+
3757
+ for chunk in chunks(replica_clause, 100):
3758
+ stmt = delete(
3759
+ models.BadReplica
3760
+ ).where(
3761
+ or_(*chunk)
3762
+ ).execution_options(
3763
+ synchronize_session=False
3764
+ )
3765
+ session.execute(stmt)
3766
+ return True
3767
+
3768
+
3769
+ @transactional_session
3770
+ def add_bad_pfns(pfns, account, state, reason=None, expires_at=None, *, session: "Session"):
3771
+ """
3772
+ Add bad PFNs.
3773
+
3774
+ :param pfns: the list of new files.
3775
+ :param account: The account who declared the bad replicas.
3776
+ :param state: One of the possible states : BAD, SUSPICIOUS, TEMPORARY_UNAVAILABLE.
3777
+ :param reason: A string describing the reason of the loss.
3778
+ :param expires_at: Specify a timeout for the TEMPORARY_UNAVAILABLE replicas. None for BAD files.
3779
+ :param session: The database session in use.
3780
+
3781
+ :returns: True is successful.
3782
+ """
3783
+
3784
+ if isinstance(state, str):
3785
+ rep_state = BadPFNStatus[state]
3786
+ else:
3787
+ rep_state = state
3788
+
3789
+ if rep_state == BadPFNStatus.TEMPORARY_UNAVAILABLE and expires_at is None:
3790
+ raise exception.InputValidationError("When adding a TEMPORARY UNAVAILABLE pfn the expires_at value should be set.")
3791
+ elif rep_state == BadPFNStatus.BAD and expires_at is not None:
3792
+ raise exception.InputValidationError("When adding a BAD pfn the expires_at value shouldn't be set.")
3793
+
3794
+ pfns = clean_pfns(pfns)
3795
+ for pfn in pfns:
3796
+ new_pfn = models.BadPFN(path=str(pfn), account=account, state=rep_state, reason=reason, expires_at=expires_at)
3797
+ new_pfn = session.merge(new_pfn)
3798
+ new_pfn.save(session=session, flush=False)
3799
+
3800
+ try:
3801
+ session.flush()
3802
+ except IntegrityError as error:
3803
+ raise exception.RucioException(error.args)
3804
+ except DatabaseError as error:
3805
+ raise exception.RucioException(error.args)
3806
+ except FlushError as error:
3807
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
3808
+ raise exception.Duplicate('One PFN already exists!')
3809
+ raise exception.RucioException(error.args)
3810
+ return True
3811
+
3812
+
3813
+ @read_session
3814
+ def list_expired_temporary_unavailable_replicas(total_workers, worker_number, limit=10000, *, session: "Session"):
3815
+ """
3816
+ List the expired temporary unavailable replicas
3817
+
3818
+ :param total_workers: Number of total workers.
3819
+ :param worker_number: id of the executing worker.
3820
+ :param limit: The maximum number of replicas returned.
3821
+ :param session: The database session in use.
3822
+ """
3823
+
3824
+ stmt = select(
3825
+ models.BadReplica.scope,
3826
+ models.BadReplica.name,
3827
+ models.BadReplica.rse_id,
3828
+ ).with_hint(
3829
+ models.ReplicationRule,
3830
+ 'INDEX(bad_replicas BAD_REPLICAS_EXPIRES_AT_IDX)',
3831
+ 'oracle'
3832
+ ).where(
3833
+ and_(models.BadReplica.state == BadFilesStatus.TEMPORARY_UNAVAILABLE,
3834
+ models.BadReplica.expires_at < datetime.utcnow())
3835
+ ).order_by(
3836
+ models.BadReplica.expires_at
3837
+ )
3838
+
3839
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
3840
+ stmt = stmt.limit(limit)
3841
+
3842
+ return session.execute(stmt).all()
3843
+
3844
+
3845
+ @read_session
3846
+ def get_replicas_state(scope=None, name=None, *, session: "Session"):
3847
+ """
3848
+ Method used by the necromancer to get all the replicas of a DIDs
3849
+ :param scope: The scope of the file.
3850
+ :param name: The name of the file.
3851
+ :param session: The database session in use.
3852
+
3853
+ :returns: A dictionary with the list of states as keys and the rse_ids as value
3854
+ """
3855
+
3856
+ stmt = select(
3857
+ models.RSEFileAssociation.rse_id,
3858
+ models.RSEFileAssociation.state
3859
+ ).where(
3860
+ and_(models.RSEFileAssociation.scope == scope,
3861
+ models.RSEFileAssociation.name == name)
3862
+ )
3863
+ states = {}
3864
+ for res in session.execute(stmt).all():
3865
+ rse_id, state = res
3866
+ if state not in states:
3867
+ states[state] = []
3868
+ states[state].append(rse_id)
3869
+ return states
3870
+
3871
+
3872
+ @read_session
3873
+ def get_suspicious_files(
3874
+ rse_expression: str,
3875
+ available_elsewhere: int,
3876
+ filter_: "Optional[dict[str, Any]]" = None,
3877
+ logger: "LoggerFunction" = logging.log,
3878
+ younger_than: "Optional[datetime]" = None,
3879
+ nattempts: int = 0,
3880
+ nattempts_exact: bool = False,
3881
+ *,
3882
+ session: "Session",
3883
+ exclude_states: "Optional[Iterable[str]]" = None,
3884
+ is_suspicious: bool = False
3885
+ ) -> "list[dict[str, Any]]":
3886
+ """
3887
+ Gets a list of replicas from bad_replicas table which are: declared more than <nattempts> times since <younger_than> date,
3888
+ present on the RSE specified by the <rse_expression> and do not have a state in <exclude_states> list.
3889
+ Selected replicas can also be required to be <available_elsewhere> on another RSE than the one declared in bad_replicas table and/or
3890
+ be declared as <is_suspicious> in the bad_replicas table.
3891
+ Keyword Arguments:
3892
+ :param younger_than: Datetime object to select the replicas which were declared since younger_than date. Default value = 10 days ago.
3893
+ :param nattempts: The minimum number of replica appearances in the bad_replica DB table from younger_than date. Default value = 0.
3894
+ :param nattempts_exact: If True, then only replicas with exactly 'nattempts' appearances in the bad_replica DB table are retrieved. Replicas with more appearances are ignored.
3895
+ :param rse_expression: The RSE expression where the replicas are located.
3896
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
3897
+ :param exclude_states: List of states which eliminates replicas from search result if any of the states in the list
3898
+ was declared for a replica since younger_than date. Allowed values
3899
+ = ['B', 'R', 'D', 'L', 'T', 'S'] (meaning 'BAD', 'RECOVERED', 'DELETED', 'LOST', 'TEMPORARY_UNAVAILABLE', 'SUSPICIOUS').
3900
+ :param available_elsewhere: Default: SuspiciousAvailability["ALL"].value, all suspicious replicas are returned.
3901
+ If SuspiciousAvailability["EXIST_COPIES"].value, only replicas that additionally have copies declared as AVAILABLE on at least one other RSE
3902
+ than the one in the bad_replicas table will be taken into account.
3903
+ If SuspiciousAvailability["LAST_COPY"].value, only replicas that do not have another copy declared as AVAILABLE on another RSE will be taken into account.
3904
+ :param is_suspicious: If True, only replicas declared as SUSPICIOUS in bad replicas table will be taken into account. Default value = False.
3905
+ :param session: The database session in use. Default value = None.
3906
+
3907
+ :returns: a list of replicas:
3908
+ [{'scope': scope, 'name': name, 'rse': rse, 'rse_id': rse_id, cnt': cnt, 'created_at': created_at}, ...]
3909
+ """
3910
+
3911
+ exclude_states = exclude_states or ['B', 'R', 'D']
3912
+ if available_elsewhere not in [SuspiciousAvailability["ALL"].value, SuspiciousAvailability["EXIST_COPIES"].value, SuspiciousAvailability["LAST_COPY"].value]:
3913
+ logger(logging.WARNING, """ERROR, available_elsewhere must be set to one of the following:
3914
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
3915
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
3916
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
3917
+ raise exception.RucioException("""ERROR, available_elsewhere must be set to one of the following:
3918
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
3919
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
3920
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
3921
+
3922
+ # only for the 2 web api used parameters, checking value types and assigning the default values
3923
+ if not isinstance(nattempts, int):
3924
+ nattempts = 0
3925
+ if not isinstance(younger_than, datetime):
3926
+ younger_than = datetime.utcnow() - timedelta(days=10)
3927
+
3928
+ # assembling exclude_states_clause
3929
+ exclude_states_clause = []
3930
+ for state in exclude_states:
3931
+ exclude_states_clause.append(BadFilesStatus(state))
3932
+
3933
+ # making aliases for bad_replicas and replicas tables
3934
+ bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
3935
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
3936
+
3937
+ # assembling the selection rse_clause
3938
+ rse_clause = []
3939
+ if rse_expression:
3940
+ parsedexp = parse_expression(expression=rse_expression, filter_=filter_, session=session)
3941
+ for rse in parsedexp:
3942
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse['id'])
3943
+
3944
+ stmt = select(
3945
+ func.count(),
3946
+ bad_replicas_alias.scope,
3947
+ bad_replicas_alias.name,
3948
+ models.RSEFileAssociation.rse_id,
3949
+ func.min(models.RSEFileAssociation.created_at)
3950
+ ).select_from(
3951
+ bad_replicas_alias
3952
+ ).where(
3953
+ models.RSEFileAssociation.rse_id == bad_replicas_alias.rse_id,
3954
+ models.RSEFileAssociation.scope == bad_replicas_alias.scope,
3955
+ models.RSEFileAssociation.name == bad_replicas_alias.name,
3956
+ bad_replicas_alias.created_at >= younger_than
3957
+ )
3958
+ if is_suspicious:
3959
+ stmt = stmt.where(bad_replicas_alias.state == BadFilesStatus.SUSPICIOUS)
3960
+ if rse_clause:
3961
+ stmt = stmt.where(or_(*rse_clause))
3962
+
3963
+ # Only return replicas that have at least one copy on another RSE
3964
+ if available_elsewhere == SuspiciousAvailability["EXIST_COPIES"].value:
3965
+ available_replica = exists(select(1)
3966
+ .where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
3967
+ replicas_alias.scope == bad_replicas_alias.scope,
3968
+ replicas_alias.name == bad_replicas_alias.name,
3969
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
3970
+ stmt = stmt.where(available_replica)
3971
+
3972
+ # Only return replicas that are the last remaining copy
3973
+ if available_elsewhere == SuspiciousAvailability["LAST_COPY"].value:
3974
+ last_replica = ~exists(select(1)
3975
+ .where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
3976
+ replicas_alias.scope == bad_replicas_alias.scope,
3977
+ replicas_alias.name == bad_replicas_alias.name,
3978
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
3979
+ stmt = stmt.where(last_replica)
3980
+
3981
+ # it is required that the selected replicas
3982
+ # do not occur as BAD/DELETED/LOST/RECOVERED/...
3983
+ # in the bad_replicas table during the same time window.
3984
+ other_states_present = exists(select(1)
3985
+ .where(and_(models.BadReplica.scope == bad_replicas_alias.scope,
3986
+ models.BadReplica.name == bad_replicas_alias.name,
3987
+ models.BadReplica.created_at >= younger_than,
3988
+ models.BadReplica.rse_id == bad_replicas_alias.rse_id,
3989
+ models.BadReplica.state.in_(exclude_states_clause))))
3990
+ stmt = stmt.where(not_(other_states_present))
3991
+
3992
+ # finally, the results are grouped by RSE, scope, name and required to have
3993
+ # at least 'nattempts' occurrences in the result of the query per replica.
3994
+ # If nattempts_exact, then only replicas are required to have exactly
3995
+ # 'nattempts' occurrences.
3996
+ if nattempts_exact:
3997
+ stmt = stmt.group_by(
3998
+ models.RSEFileAssociation.rse_id,
3999
+ bad_replicas_alias.scope,
4000
+ bad_replicas_alias.name
4001
+ ).having(
4002
+ func.count() == nattempts
4003
+ )
4004
+ query_result = session.execute(stmt).all()
4005
+ else:
4006
+ stmt = stmt.group_by(
4007
+ models.RSEFileAssociation.rse_id,
4008
+ bad_replicas_alias.scope,
4009
+ bad_replicas_alias.name
4010
+ ).having(
4011
+ func.count() > nattempts
4012
+ )
4013
+ query_result = session.execute(stmt).all()
4014
+
4015
+ # translating the rse_id to RSE name and assembling the return list of dictionaries
4016
+ result = []
4017
+ rses = {}
4018
+ for cnt, scope, name, rse_id, created_at in query_result:
4019
+ if rse_id not in rses:
4020
+ rse = get_rse_name(rse_id=rse_id, session=session)
4021
+ rses[rse_id] = rse
4022
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id], 'rse_id': rse_id, 'cnt': cnt, 'created_at': created_at})
4023
+
4024
+ return result
4025
+
4026
+
4027
+ @read_session
4028
+ def get_suspicious_reason(rse_id, scope, name, nattempts=0, logger=logging.log, *, session: "Session"):
4029
+ """
4030
+ Returns the error message(s) which lead to the replica(s) being declared suspicious.
4031
+
4032
+ :param rse_id: ID of RSE.
4033
+ :param scope: Scope of the replica DID.
4034
+ :param name: Name of the replica DID.
4035
+ :param session: The database session in use. Default value = None.
4036
+ """
4037
+ # Alias for bad replicas
4038
+ bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
4039
+
4040
+ stmt = select(
4041
+ bad_replicas_alias.scope,
4042
+ bad_replicas_alias.name,
4043
+ bad_replicas_alias.reason,
4044
+ bad_replicas_alias.rse_id
4045
+ ).where(
4046
+ and_(bad_replicas_alias.rse_id == rse_id,
4047
+ bad_replicas_alias.scope == scope,
4048
+ bad_replicas_alias.state == 'S',
4049
+ bad_replicas_alias.name == name,
4050
+ ~exists(select(1).where(
4051
+ and_(bad_replicas_alias.rse_id == rse_id,
4052
+ bad_replicas_alias.name == name,
4053
+ bad_replicas_alias.scope == scope,
4054
+ bad_replicas_alias.state != 'S'))))
4055
+ )
4056
+
4057
+ count_query = select(
4058
+ func.count()
4059
+ ).select_from(
4060
+ stmt.subquery()
4061
+ )
4062
+ count = session.execute(count_query).scalar_one()
4063
+
4064
+ grouped_stmt = stmt.group_by(
4065
+ bad_replicas_alias.rse_id,
4066
+ bad_replicas_alias.scope,
4067
+ bad_replicas_alias.name,
4068
+ bad_replicas_alias.reason
4069
+ ).having(
4070
+ func.count() > nattempts
4071
+ )
4072
+
4073
+ result = []
4074
+ rses = {}
4075
+ for scope_, name_, reason, rse_id_ in session.execute(grouped_stmt).all():
4076
+ if rse_id_ not in rses:
4077
+ rse = get_rse_name(rse_id=rse_id_, session=session)
4078
+ rses[rse_id_] = rse
4079
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id_], 'rse_id': rse_id_, 'reason': reason, 'count': count})
4080
+
4081
+ if len(result) > 1:
4082
+ logger(logging.WARNING, "Multiple reasons have been found. Please investigate.")
4083
+
4084
+ return result
4085
+
4086
+
4087
+ @transactional_session
4088
+ def set_tombstone(rse_id, scope, name, tombstone=OBSOLETE, *, session: "Session"):
4089
+ """
4090
+ Sets a tombstone on a replica.
4091
+
4092
+ :param rse_id: ID of RSE.
4093
+ :param scope: scope of the replica DID.
4094
+ :param name: name of the replica DID.
4095
+ :param tombstone: the tombstone to set. Default is OBSOLETE
4096
+ :param session: database session in use.
4097
+ """
4098
+ stmt = update(models.RSEFileAssociation).where(
4099
+ and_(models.RSEFileAssociation.rse_id == rse_id,
4100
+ models.RSEFileAssociation.name == name,
4101
+ models.RSEFileAssociation.scope == scope,
4102
+ ~exists().where(
4103
+ and_(models.ReplicaLock.rse_id == rse_id,
4104
+ models.ReplicaLock.name == name,
4105
+ models.ReplicaLock.scope == scope)))
4106
+ ).prefix_with(
4107
+ '/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
4108
+ ).values({
4109
+ models.RSEFileAssociation.tombstone: tombstone
4110
+ }).execution_options(
4111
+ synchronize_session=False
4112
+ )
4113
+
4114
+ if session.execute(stmt).rowcount == 0:
4115
+ try:
4116
+ stmt = select(
4117
+ models.RSEFileAssociation.tombstone
4118
+ ).where(
4119
+ and_(models.RSEFileAssociation.rse_id == rse_id,
4120
+ models.RSEFileAssociation.name == name,
4121
+ models.RSEFileAssociation.scope == scope)
4122
+ )
4123
+ session.execute(stmt).scalar_one()
4124
+ raise exception.ReplicaIsLocked('Replica %s:%s on RSE %s is locked.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
4125
+ except NoResultFound:
4126
+ raise exception.ReplicaNotFound('Replica %s:%s on RSE %s could not be found.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
4127
+
4128
+
4129
+ @read_session
4130
+ def get_RSEcoverage_of_dataset(scope, name, *, session: "Session"):
4131
+ """
4132
+ Get total bytes present on RSEs
4133
+
4134
+ :param scope: Scope of the dataset
4135
+ :param name: Name of the dataset
4136
+ :param session: The db session.
4137
+ :return: Dictionary { rse_id : <total bytes present at rse_id> }
4138
+ """
4139
+
4140
+ stmt = select(
4141
+ models.RSEFileAssociation.rse_id,
4142
+ func.sum(models.DataIdentifierAssociation.bytes)
4143
+ ).where(
4144
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
4145
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
4146
+ models.DataIdentifierAssociation.scope == scope,
4147
+ models.DataIdentifierAssociation.name == name,
4148
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
4149
+ ).group_by(
4150
+ models.RSEFileAssociation.rse_id
4151
+ )
4152
+
4153
+ result = {}
4154
+ for rse_id, total in session.execute(stmt):
4155
+ if total:
4156
+ result[rse_id] = total
4157
+
4158
+ return result