rucio 32.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (481) hide show
  1. rucio/__init__.py +18 -0
  2. rucio/alembicrevision.py +16 -0
  3. rucio/api/__init__.py +14 -0
  4. rucio/api/account.py +266 -0
  5. rucio/api/account_limit.py +287 -0
  6. rucio/api/authentication.py +302 -0
  7. rucio/api/config.py +218 -0
  8. rucio/api/credential.py +60 -0
  9. rucio/api/did.py +726 -0
  10. rucio/api/dirac.py +71 -0
  11. rucio/api/exporter.py +60 -0
  12. rucio/api/heartbeat.py +62 -0
  13. rucio/api/identity.py +160 -0
  14. rucio/api/importer.py +46 -0
  15. rucio/api/lifetime_exception.py +95 -0
  16. rucio/api/lock.py +131 -0
  17. rucio/api/meta.py +85 -0
  18. rucio/api/permission.py +72 -0
  19. rucio/api/quarantined_replica.py +69 -0
  20. rucio/api/replica.py +528 -0
  21. rucio/api/request.py +220 -0
  22. rucio/api/rse.py +601 -0
  23. rucio/api/rule.py +335 -0
  24. rucio/api/scope.py +89 -0
  25. rucio/api/subscription.py +255 -0
  26. rucio/api/temporary_did.py +49 -0
  27. rucio/api/vo.py +112 -0
  28. rucio/client/__init__.py +16 -0
  29. rucio/client/accountclient.py +413 -0
  30. rucio/client/accountlimitclient.py +155 -0
  31. rucio/client/baseclient.py +929 -0
  32. rucio/client/client.py +77 -0
  33. rucio/client/configclient.py +113 -0
  34. rucio/client/credentialclient.py +54 -0
  35. rucio/client/didclient.py +691 -0
  36. rucio/client/diracclient.py +48 -0
  37. rucio/client/downloadclient.py +1674 -0
  38. rucio/client/exportclient.py +44 -0
  39. rucio/client/fileclient.py +51 -0
  40. rucio/client/importclient.py +42 -0
  41. rucio/client/lifetimeclient.py +74 -0
  42. rucio/client/lockclient.py +99 -0
  43. rucio/client/metaclient.py +137 -0
  44. rucio/client/pingclient.py +45 -0
  45. rucio/client/replicaclient.py +444 -0
  46. rucio/client/requestclient.py +109 -0
  47. rucio/client/rseclient.py +664 -0
  48. rucio/client/ruleclient.py +287 -0
  49. rucio/client/scopeclient.py +88 -0
  50. rucio/client/subscriptionclient.py +161 -0
  51. rucio/client/touchclient.py +78 -0
  52. rucio/client/uploadclient.py +871 -0
  53. rucio/common/__init__.py +14 -0
  54. rucio/common/cache.py +74 -0
  55. rucio/common/config.py +796 -0
  56. rucio/common/constants.py +92 -0
  57. rucio/common/constraints.py +18 -0
  58. rucio/common/didtype.py +187 -0
  59. rucio/common/dumper/__init__.py +306 -0
  60. rucio/common/dumper/consistency.py +449 -0
  61. rucio/common/dumper/data_models.py +325 -0
  62. rucio/common/dumper/path_parsing.py +65 -0
  63. rucio/common/exception.py +1092 -0
  64. rucio/common/extra.py +37 -0
  65. rucio/common/logging.py +404 -0
  66. rucio/common/pcache.py +1387 -0
  67. rucio/common/policy.py +84 -0
  68. rucio/common/schema/__init__.py +143 -0
  69. rucio/common/schema/atlas.py +411 -0
  70. rucio/common/schema/belleii.py +406 -0
  71. rucio/common/schema/cms.py +478 -0
  72. rucio/common/schema/domatpc.py +399 -0
  73. rucio/common/schema/escape.py +424 -0
  74. rucio/common/schema/generic.py +431 -0
  75. rucio/common/schema/generic_multi_vo.py +410 -0
  76. rucio/common/schema/icecube.py +404 -0
  77. rucio/common/schema/lsst.py +423 -0
  78. rucio/common/stomp_utils.py +160 -0
  79. rucio/common/stopwatch.py +56 -0
  80. rucio/common/test_rucio_server.py +148 -0
  81. rucio/common/types.py +158 -0
  82. rucio/common/utils.py +1946 -0
  83. rucio/core/__init__.py +14 -0
  84. rucio/core/account.py +426 -0
  85. rucio/core/account_counter.py +171 -0
  86. rucio/core/account_limit.py +357 -0
  87. rucio/core/authentication.py +563 -0
  88. rucio/core/config.py +386 -0
  89. rucio/core/credential.py +218 -0
  90. rucio/core/did.py +3102 -0
  91. rucio/core/did_meta_plugins/__init__.py +250 -0
  92. rucio/core/did_meta_plugins/did_column_meta.py +326 -0
  93. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +116 -0
  94. rucio/core/did_meta_plugins/filter_engine.py +573 -0
  95. rucio/core/did_meta_plugins/json_meta.py +215 -0
  96. rucio/core/did_meta_plugins/mongo_meta.py +199 -0
  97. rucio/core/did_meta_plugins/postgres_meta.py +317 -0
  98. rucio/core/dirac.py +208 -0
  99. rucio/core/distance.py +164 -0
  100. rucio/core/exporter.py +59 -0
  101. rucio/core/heartbeat.py +263 -0
  102. rucio/core/identity.py +290 -0
  103. rucio/core/importer.py +248 -0
  104. rucio/core/lifetime_exception.py +377 -0
  105. rucio/core/lock.py +474 -0
  106. rucio/core/message.py +241 -0
  107. rucio/core/meta.py +190 -0
  108. rucio/core/monitor.py +441 -0
  109. rucio/core/naming_convention.py +154 -0
  110. rucio/core/nongrid_trace.py +124 -0
  111. rucio/core/oidc.py +1339 -0
  112. rucio/core/permission/__init__.py +107 -0
  113. rucio/core/permission/atlas.py +1333 -0
  114. rucio/core/permission/belleii.py +1076 -0
  115. rucio/core/permission/cms.py +1166 -0
  116. rucio/core/permission/escape.py +1076 -0
  117. rucio/core/permission/generic.py +1128 -0
  118. rucio/core/permission/generic_multi_vo.py +1148 -0
  119. rucio/core/quarantined_replica.py +190 -0
  120. rucio/core/replica.py +3627 -0
  121. rucio/core/replica_sorter.py +368 -0
  122. rucio/core/request.py +2241 -0
  123. rucio/core/rse.py +1835 -0
  124. rucio/core/rse_counter.py +155 -0
  125. rucio/core/rse_expression_parser.py +460 -0
  126. rucio/core/rse_selector.py +277 -0
  127. rucio/core/rule.py +3419 -0
  128. rucio/core/rule_grouping.py +1473 -0
  129. rucio/core/scope.py +152 -0
  130. rucio/core/subscription.py +316 -0
  131. rucio/core/temporary_did.py +188 -0
  132. rucio/core/topology.py +448 -0
  133. rucio/core/trace.py +361 -0
  134. rucio/core/transfer.py +1233 -0
  135. rucio/core/vo.py +151 -0
  136. rucio/core/volatile_replica.py +123 -0
  137. rucio/daemons/__init__.py +14 -0
  138. rucio/daemons/abacus/__init__.py +14 -0
  139. rucio/daemons/abacus/account.py +106 -0
  140. rucio/daemons/abacus/collection_replica.py +113 -0
  141. rucio/daemons/abacus/rse.py +107 -0
  142. rucio/daemons/atropos/__init__.py +14 -0
  143. rucio/daemons/atropos/atropos.py +243 -0
  144. rucio/daemons/auditor/__init__.py +261 -0
  145. rucio/daemons/auditor/hdfs.py +86 -0
  146. rucio/daemons/auditor/srmdumps.py +284 -0
  147. rucio/daemons/automatix/__init__.py +14 -0
  148. rucio/daemons/automatix/automatix.py +281 -0
  149. rucio/daemons/badreplicas/__init__.py +14 -0
  150. rucio/daemons/badreplicas/minos.py +311 -0
  151. rucio/daemons/badreplicas/minos_temporary_expiration.py +173 -0
  152. rucio/daemons/badreplicas/necromancer.py +200 -0
  153. rucio/daemons/bb8/__init__.py +14 -0
  154. rucio/daemons/bb8/bb8.py +356 -0
  155. rucio/daemons/bb8/common.py +762 -0
  156. rucio/daemons/bb8/nuclei_background_rebalance.py +147 -0
  157. rucio/daemons/bb8/t2_background_rebalance.py +146 -0
  158. rucio/daemons/c3po/__init__.py +14 -0
  159. rucio/daemons/c3po/algorithms/__init__.py +14 -0
  160. rucio/daemons/c3po/algorithms/simple.py +131 -0
  161. rucio/daemons/c3po/algorithms/t2_free_space.py +125 -0
  162. rucio/daemons/c3po/algorithms/t2_free_space_only_pop.py +127 -0
  163. rucio/daemons/c3po/algorithms/t2_free_space_only_pop_with_network.py +279 -0
  164. rucio/daemons/c3po/c3po.py +342 -0
  165. rucio/daemons/c3po/collectors/__init__.py +14 -0
  166. rucio/daemons/c3po/collectors/agis.py +108 -0
  167. rucio/daemons/c3po/collectors/free_space.py +62 -0
  168. rucio/daemons/c3po/collectors/jedi_did.py +48 -0
  169. rucio/daemons/c3po/collectors/mock_did.py +46 -0
  170. rucio/daemons/c3po/collectors/network_metrics.py +63 -0
  171. rucio/daemons/c3po/collectors/workload.py +110 -0
  172. rucio/daemons/c3po/utils/__init__.py +14 -0
  173. rucio/daemons/c3po/utils/dataset_cache.py +40 -0
  174. rucio/daemons/c3po/utils/expiring_dataset_cache.py +45 -0
  175. rucio/daemons/c3po/utils/expiring_list.py +63 -0
  176. rucio/daemons/c3po/utils/popularity.py +82 -0
  177. rucio/daemons/c3po/utils/timeseries.py +76 -0
  178. rucio/daemons/cache/__init__.py +14 -0
  179. rucio/daemons/cache/consumer.py +191 -0
  180. rucio/daemons/common.py +391 -0
  181. rucio/daemons/conveyor/__init__.py +14 -0
  182. rucio/daemons/conveyor/common.py +530 -0
  183. rucio/daemons/conveyor/finisher.py +492 -0
  184. rucio/daemons/conveyor/poller.py +372 -0
  185. rucio/daemons/conveyor/preparer.py +198 -0
  186. rucio/daemons/conveyor/receiver.py +206 -0
  187. rucio/daemons/conveyor/stager.py +127 -0
  188. rucio/daemons/conveyor/submitter.py +379 -0
  189. rucio/daemons/conveyor/throttler.py +468 -0
  190. rucio/daemons/follower/__init__.py +14 -0
  191. rucio/daemons/follower/follower.py +97 -0
  192. rucio/daemons/hermes/__init__.py +14 -0
  193. rucio/daemons/hermes/hermes.py +738 -0
  194. rucio/daemons/judge/__init__.py +14 -0
  195. rucio/daemons/judge/cleaner.py +149 -0
  196. rucio/daemons/judge/evaluator.py +172 -0
  197. rucio/daemons/judge/injector.py +154 -0
  198. rucio/daemons/judge/repairer.py +144 -0
  199. rucio/daemons/oauthmanager/__init__.py +14 -0
  200. rucio/daemons/oauthmanager/oauthmanager.py +199 -0
  201. rucio/daemons/reaper/__init__.py +14 -0
  202. rucio/daemons/reaper/dark_reaper.py +272 -0
  203. rucio/daemons/reaper/light_reaper.py +255 -0
  204. rucio/daemons/reaper/reaper.py +701 -0
  205. rucio/daemons/replicarecoverer/__init__.py +14 -0
  206. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +487 -0
  207. rucio/daemons/storage/__init__.py +14 -0
  208. rucio/daemons/storage/consistency/__init__.py +14 -0
  209. rucio/daemons/storage/consistency/actions.py +753 -0
  210. rucio/daemons/tracer/__init__.py +14 -0
  211. rucio/daemons/tracer/kronos.py +513 -0
  212. rucio/daemons/transmogrifier/__init__.py +14 -0
  213. rucio/daemons/transmogrifier/transmogrifier.py +753 -0
  214. rucio/daemons/undertaker/__init__.py +14 -0
  215. rucio/daemons/undertaker/undertaker.py +137 -0
  216. rucio/db/__init__.py +14 -0
  217. rucio/db/sqla/__init__.py +38 -0
  218. rucio/db/sqla/constants.py +192 -0
  219. rucio/db/sqla/migrate_repo/__init__.py +14 -0
  220. rucio/db/sqla/migrate_repo/env.py +111 -0
  221. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +71 -0
  222. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +50 -0
  223. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +61 -0
  224. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +46 -0
  225. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +93 -0
  226. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +78 -0
  227. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +46 -0
  228. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +53 -0
  229. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +69 -0
  230. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +42 -0
  231. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +46 -0
  232. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +61 -0
  233. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +42 -0
  234. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +141 -0
  235. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +75 -0
  236. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +75 -0
  237. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +46 -0
  238. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +51 -0
  239. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +135 -0
  240. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +65 -0
  241. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +42 -0
  242. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +66 -0
  243. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +54 -0
  244. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +43 -0
  245. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +46 -0
  246. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +47 -0
  247. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +54 -0
  248. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +39 -0
  249. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +48 -0
  250. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +47 -0
  251. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +48 -0
  252. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +59 -0
  253. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +47 -0
  254. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +72 -0
  255. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +46 -0
  256. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +45 -0
  257. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +48 -0
  258. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +48 -0
  259. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +42 -0
  260. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +69 -0
  261. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +46 -0
  262. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +78 -0
  263. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +62 -0
  264. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +74 -0
  265. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +44 -0
  266. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +67 -0
  267. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +134 -0
  268. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +58 -0
  269. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +79 -0
  270. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +61 -0
  271. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +45 -0
  272. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +46 -0
  273. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +65 -0
  274. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +42 -0
  275. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +46 -0
  276. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +46 -0
  277. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +80 -0
  278. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +43 -0
  279. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +61 -0
  280. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +47 -0
  281. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +46 -0
  282. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +52 -0
  283. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +42 -0
  284. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +65 -0
  285. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +46 -0
  286. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +47 -0
  287. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +45 -0
  288. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +46 -0
  289. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +48 -0
  290. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +50 -0
  291. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +59 -0
  292. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +48 -0
  293. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +108 -0
  294. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +57 -0
  295. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +51 -0
  296. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +50 -0
  297. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +46 -0
  298. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +42 -0
  299. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +93 -0
  300. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +73 -0
  301. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +52 -0
  302. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +45 -0
  303. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +46 -0
  304. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +54 -0
  305. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +48 -0
  306. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +70 -0
  307. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +48 -0
  308. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +95 -0
  309. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +55 -0
  310. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +74 -0
  311. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +78 -0
  312. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +49 -0
  313. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +124 -0
  314. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +60 -0
  315. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +53 -0
  316. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +56 -0
  317. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +67 -0
  318. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +50 -0
  319. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +46 -0
  320. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +92 -0
  321. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +42 -0
  322. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +46 -0
  323. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +147 -0
  324. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +78 -0
  325. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +53 -0
  326. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +74 -0
  327. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +56 -0
  328. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +46 -0
  329. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +68 -0
  330. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +48 -0
  331. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +149 -0
  332. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +106 -0
  333. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +47 -0
  334. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +45 -0
  335. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +105 -0
  336. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +52 -0
  337. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +106 -0
  338. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +30 -0
  339. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +75 -0
  340. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +49 -0
  341. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +45 -0
  342. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +38 -0
  343. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +44 -0
  344. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +46 -0
  345. rucio/db/sqla/models.py +1834 -0
  346. rucio/db/sqla/sautils.py +48 -0
  347. rucio/db/sqla/session.py +470 -0
  348. rucio/db/sqla/types.py +207 -0
  349. rucio/db/sqla/util.py +521 -0
  350. rucio/rse/__init__.py +97 -0
  351. rucio/rse/protocols/__init__.py +14 -0
  352. rucio/rse/protocols/cache.py +123 -0
  353. rucio/rse/protocols/dummy.py +112 -0
  354. rucio/rse/protocols/gfal.py +701 -0
  355. rucio/rse/protocols/globus.py +243 -0
  356. rucio/rse/protocols/gsiftp.py +93 -0
  357. rucio/rse/protocols/http_cache.py +83 -0
  358. rucio/rse/protocols/mock.py +124 -0
  359. rucio/rse/protocols/ngarc.py +210 -0
  360. rucio/rse/protocols/posix.py +251 -0
  361. rucio/rse/protocols/protocol.py +530 -0
  362. rucio/rse/protocols/rclone.py +365 -0
  363. rucio/rse/protocols/rfio.py +137 -0
  364. rucio/rse/protocols/srm.py +339 -0
  365. rucio/rse/protocols/ssh.py +414 -0
  366. rucio/rse/protocols/storm.py +207 -0
  367. rucio/rse/protocols/webdav.py +547 -0
  368. rucio/rse/protocols/xrootd.py +295 -0
  369. rucio/rse/rsemanager.py +752 -0
  370. rucio/tests/__init__.py +14 -0
  371. rucio/tests/common.py +244 -0
  372. rucio/tests/common_server.py +132 -0
  373. rucio/transfertool/__init__.py +14 -0
  374. rucio/transfertool/fts3.py +1484 -0
  375. rucio/transfertool/globus.py +200 -0
  376. rucio/transfertool/globus_library.py +182 -0
  377. rucio/transfertool/mock.py +81 -0
  378. rucio/transfertool/transfertool.py +212 -0
  379. rucio/vcsversion.py +11 -0
  380. rucio/version.py +46 -0
  381. rucio/web/__init__.py +14 -0
  382. rucio/web/rest/__init__.py +14 -0
  383. rucio/web/rest/flaskapi/__init__.py +14 -0
  384. rucio/web/rest/flaskapi/authenticated_bp.py +28 -0
  385. rucio/web/rest/flaskapi/v1/__init__.py +14 -0
  386. rucio/web/rest/flaskapi/v1/accountlimits.py +234 -0
  387. rucio/web/rest/flaskapi/v1/accounts.py +1088 -0
  388. rucio/web/rest/flaskapi/v1/archives.py +100 -0
  389. rucio/web/rest/flaskapi/v1/auth.py +1642 -0
  390. rucio/web/rest/flaskapi/v1/common.py +385 -0
  391. rucio/web/rest/flaskapi/v1/config.py +305 -0
  392. rucio/web/rest/flaskapi/v1/credentials.py +213 -0
  393. rucio/web/rest/flaskapi/v1/dids.py +2204 -0
  394. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  395. rucio/web/rest/flaskapi/v1/export.py +77 -0
  396. rucio/web/rest/flaskapi/v1/heartbeats.py +129 -0
  397. rucio/web/rest/flaskapi/v1/identities.py +263 -0
  398. rucio/web/rest/flaskapi/v1/import.py +133 -0
  399. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +315 -0
  400. rucio/web/rest/flaskapi/v1/locks.py +360 -0
  401. rucio/web/rest/flaskapi/v1/main.py +83 -0
  402. rucio/web/rest/flaskapi/v1/meta.py +226 -0
  403. rucio/web/rest/flaskapi/v1/metrics.py +37 -0
  404. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  405. rucio/web/rest/flaskapi/v1/ping.py +89 -0
  406. rucio/web/rest/flaskapi/v1/redirect.py +366 -0
  407. rucio/web/rest/flaskapi/v1/replicas.py +1866 -0
  408. rucio/web/rest/flaskapi/v1/requests.py +841 -0
  409. rucio/web/rest/flaskapi/v1/rses.py +2204 -0
  410. rucio/web/rest/flaskapi/v1/rules.py +824 -0
  411. rucio/web/rest/flaskapi/v1/scopes.py +161 -0
  412. rucio/web/rest/flaskapi/v1/subscriptions.py +646 -0
  413. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  414. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  415. rucio/web/rest/flaskapi/v1/tmp_dids.py +115 -0
  416. rucio/web/rest/flaskapi/v1/traces.py +100 -0
  417. rucio/web/rest/flaskapi/v1/vos.py +280 -0
  418. rucio/web/rest/main.py +19 -0
  419. rucio/web/rest/metrics.py +28 -0
  420. rucio-32.8.6.data/data/rucio/etc/alembic.ini.template +71 -0
  421. rucio-32.8.6.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  422. rucio-32.8.6.data/data/rucio/etc/globus-config.yml.template +5 -0
  423. rucio-32.8.6.data/data/rucio/etc/ldap.cfg.template +30 -0
  424. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  425. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  426. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  427. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  428. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  429. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  430. rucio-32.8.6.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  431. rucio-32.8.6.data/data/rucio/etc/rucio.cfg.atlas.client.template +42 -0
  432. rucio-32.8.6.data/data/rucio/etc/rucio.cfg.template +257 -0
  433. rucio-32.8.6.data/data/rucio/etc/rucio_multi_vo.cfg.template +234 -0
  434. rucio-32.8.6.data/data/rucio/requirements.txt +55 -0
  435. rucio-32.8.6.data/data/rucio/tools/bootstrap.py +34 -0
  436. rucio-32.8.6.data/data/rucio/tools/merge_rucio_configs.py +147 -0
  437. rucio-32.8.6.data/data/rucio/tools/reset_database.py +40 -0
  438. rucio-32.8.6.data/scripts/rucio +2540 -0
  439. rucio-32.8.6.data/scripts/rucio-abacus-account +75 -0
  440. rucio-32.8.6.data/scripts/rucio-abacus-collection-replica +47 -0
  441. rucio-32.8.6.data/scripts/rucio-abacus-rse +79 -0
  442. rucio-32.8.6.data/scripts/rucio-admin +2434 -0
  443. rucio-32.8.6.data/scripts/rucio-atropos +61 -0
  444. rucio-32.8.6.data/scripts/rucio-auditor +199 -0
  445. rucio-32.8.6.data/scripts/rucio-automatix +51 -0
  446. rucio-32.8.6.data/scripts/rucio-bb8 +58 -0
  447. rucio-32.8.6.data/scripts/rucio-c3po +86 -0
  448. rucio-32.8.6.data/scripts/rucio-cache-client +135 -0
  449. rucio-32.8.6.data/scripts/rucio-cache-consumer +43 -0
  450. rucio-32.8.6.data/scripts/rucio-conveyor-finisher +59 -0
  451. rucio-32.8.6.data/scripts/rucio-conveyor-poller +67 -0
  452. rucio-32.8.6.data/scripts/rucio-conveyor-preparer +38 -0
  453. rucio-32.8.6.data/scripts/rucio-conveyor-receiver +44 -0
  454. rucio-32.8.6.data/scripts/rucio-conveyor-stager +77 -0
  455. rucio-32.8.6.data/scripts/rucio-conveyor-submitter +140 -0
  456. rucio-32.8.6.data/scripts/rucio-conveyor-throttler +105 -0
  457. rucio-32.8.6.data/scripts/rucio-dark-reaper +54 -0
  458. rucio-32.8.6.data/scripts/rucio-dumper +159 -0
  459. rucio-32.8.6.data/scripts/rucio-follower +45 -0
  460. rucio-32.8.6.data/scripts/rucio-hermes +55 -0
  461. rucio-32.8.6.data/scripts/rucio-judge-cleaner +90 -0
  462. rucio-32.8.6.data/scripts/rucio-judge-evaluator +138 -0
  463. rucio-32.8.6.data/scripts/rucio-judge-injector +45 -0
  464. rucio-32.8.6.data/scripts/rucio-judge-repairer +45 -0
  465. rucio-32.8.6.data/scripts/rucio-kronos +45 -0
  466. rucio-32.8.6.data/scripts/rucio-light-reaper +53 -0
  467. rucio-32.8.6.data/scripts/rucio-minos +54 -0
  468. rucio-32.8.6.data/scripts/rucio-minos-temporary-expiration +51 -0
  469. rucio-32.8.6.data/scripts/rucio-necromancer +121 -0
  470. rucio-32.8.6.data/scripts/rucio-oauth-manager +64 -0
  471. rucio-32.8.6.data/scripts/rucio-reaper +84 -0
  472. rucio-32.8.6.data/scripts/rucio-replica-recoverer +249 -0
  473. rucio-32.8.6.data/scripts/rucio-storage-consistency-actions +75 -0
  474. rucio-32.8.6.data/scripts/rucio-transmogrifier +78 -0
  475. rucio-32.8.6.data/scripts/rucio-undertaker +77 -0
  476. rucio-32.8.6.dist-info/METADATA +83 -0
  477. rucio-32.8.6.dist-info/RECORD +481 -0
  478. rucio-32.8.6.dist-info/WHEEL +5 -0
  479. rucio-32.8.6.dist-info/licenses/AUTHORS.rst +94 -0
  480. rucio-32.8.6.dist-info/licenses/LICENSE +201 -0
  481. rucio-32.8.6.dist-info/top_level.txt +1 -0
rucio/core/replica.py ADDED
@@ -0,0 +1,3627 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import copy
16
+ import heapq
17
+ import logging
18
+ import random
19
+ from collections import defaultdict, namedtuple
20
+ from curses.ascii import isprint
21
+ from datetime import datetime, timedelta
22
+ from hashlib import sha256
23
+ from itertools import groupby
24
+ from json import dumps
25
+ from re import match
26
+ from struct import unpack
27
+ from traceback import format_exc
28
+ from typing import TYPE_CHECKING
29
+
30
+ import math
31
+ import requests
32
+ from dogpile.cache.api import NO_VALUE
33
+ from sqlalchemy import func, and_, or_, exists, not_, update, delete, insert, union
34
+ from sqlalchemy.exc import DatabaseError, IntegrityError
35
+ from sqlalchemy.orm import aliased
36
+ from sqlalchemy.orm.exc import FlushError, NoResultFound
37
+ from sqlalchemy.sql import label
38
+ from sqlalchemy.sql.expression import case, select, text, false, true, null, literal, literal_column
39
+
40
+ import rucio.core.did
41
+ import rucio.core.lock
42
+ from rucio.common import exception
43
+ from rucio.common.cache import make_region_memcached
44
+ from rucio.common.config import config_get, config_get_bool
45
+ from rucio.common.constants import SuspiciousAvailability
46
+ from rucio.common.types import InternalScope
47
+ from rucio.common.utils import chunks, clean_surls, str_to_date, add_url_query
48
+ from rucio.core.credential import get_signed_url
49
+ from rucio.core.message import add_messages
50
+ from rucio.core.monitor import MetricManager
51
+ from rucio.core.rse import get_rse, get_rse_name, get_rse_attribute, get_rse_vo, list_rses
52
+ from rucio.core.rse_counter import decrease, increase
53
+ from rucio.core.rse_expression_parser import parse_expression
54
+ from rucio.db.sqla import models, filter_thread_work
55
+ from rucio.db.sqla.constants import (DIDType, ReplicaState, OBSOLETE, DIDAvailability,
56
+ BadFilesStatus, RuleState, BadPFNStatus)
57
+ from rucio.db.sqla.session import (read_session, stream_session, transactional_session,
58
+ DEFAULT_SCHEMA_NAME, BASE)
59
+ from rucio.db.sqla.util import temp_table_mngr
60
+ from rucio.rse import rsemanager as rsemgr
61
+
62
+ if TYPE_CHECKING:
63
+ from collections.abc import Sequence
64
+ from rucio.rse.protocols.protocol import RSEProtocol
65
+ from typing import Any, Optional
66
+ from sqlalchemy.orm import Session
67
+
68
+ REGION = make_region_memcached(expiration_time=60)
69
+ METRICS = MetricManager(module=__name__)
70
+
71
+
72
+ ScopeName = namedtuple('ScopeName', ['scope', 'name'])
73
+ Association = namedtuple('Association', ['scope', 'name', 'child_scope', 'child_name'])
74
+
75
+
76
+ @read_session
77
+ def get_bad_replicas_summary(rse_expression=None, from_date=None, to_date=None, filter_=None, *, session: "Session"):
78
+ """
79
+ List the bad file replicas summary. Method used by the rucio-ui.
80
+ :param rse_expression: The RSE expression.
81
+ :param from_date: The start date.
82
+ :param to_date: The end date.
83
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
84
+ :param session: The database session in use.
85
+ """
86
+ result = []
87
+ incidents = {}
88
+ rse_clause = []
89
+ if rse_expression:
90
+ for rse in parse_expression(expression=rse_expression, filter_=filter_, session=session):
91
+ rse_clause.append(models.BadReplicas.rse_id == rse['id'])
92
+ elif filter_:
93
+ # Ensure we limit results to current VO even if we don't specify an RSE expression
94
+ for rse in list_rses(filters=filter_, session=session):
95
+ rse_clause.append(models.BadReplicas.rse_id == rse['id'])
96
+
97
+ if session.bind.dialect.name == 'oracle':
98
+ to_days = func.trunc(models.BadReplicas.created_at, str('DD'))
99
+ elif session.bind.dialect.name == 'mysql':
100
+ to_days = func.date(models.BadReplicas.created_at)
101
+ elif session.bind.dialect.name == 'postgresql':
102
+ to_days = func.date_trunc('day', models.BadReplicas.created_at)
103
+ else:
104
+ to_days = func.strftime(models.BadReplicas.created_at, '%Y-%m-%d')
105
+ query = session.query(func.count(), to_days, models.BadReplicas.rse_id, models.BadReplicas.state, models.BadReplicas.reason)
106
+ # To be added : HINTS
107
+ if rse_clause != []:
108
+ query = query.filter(or_(*rse_clause))
109
+ if from_date:
110
+ query = query.filter(models.BadReplicas.created_at > from_date)
111
+ if to_date:
112
+ query = query.filter(models.BadReplicas.created_at < to_date)
113
+ summary = query.group_by(to_days, models.BadReplicas.rse_id, models.BadReplicas.reason, models.BadReplicas.state).all()
114
+ for row in summary:
115
+ if (row[2], row[1], row[4]) not in incidents:
116
+ incidents[(row[2], row[1], row[4])] = {}
117
+ incidents[(row[2], row[1], row[4])][str(row[3].name)] = row[0]
118
+
119
+ for incident in incidents:
120
+ res = incidents[incident]
121
+ res['rse_id'] = incident[0]
122
+ res['rse'] = get_rse_name(rse_id=incident[0], session=session)
123
+ res['created_at'] = incident[1]
124
+ res['reason'] = incident[2]
125
+ result.append(res)
126
+
127
+ return result
128
+
129
+
130
+ @read_session
131
+ def __exist_replicas(rse_id, replicas, *, session: "Session"):
132
+ """
133
+ Internal method to check if a replica exists at a given site.
134
+ :param rse_id: The RSE id.
135
+ :param replicas: A list of tuples [(<scope>, <name>, <path>}) with either :
136
+ - scope and name are None and path not None
137
+ - scope and name are not None and path is None
138
+ :param session: The database session in use.
139
+
140
+ :returns: A list of tuple (<scope>, <name>, <path>, <exists>, <already_declared>, <bytes>)
141
+ where
142
+ - <exists> is a boolean that identifies if the replica exists
143
+ - <already_declared> is a boolean that identifies if the replica is already declared bad
144
+ """
145
+
146
+ return_list = []
147
+ path_clause = []
148
+ did_clause = []
149
+ for scope, name, path in replicas:
150
+ if path:
151
+ path_clause.append(models.RSEFileAssociation.path == path)
152
+ if path.startswith('/'):
153
+ path_clause.append(models.RSEFileAssociation.path == path[1:])
154
+ else:
155
+ path_clause.append(models.RSEFileAssociation.path == '/%s' % path)
156
+ else:
157
+ did_clause.append(and_(models.RSEFileAssociation.scope == scope,
158
+ models.RSEFileAssociation.name == name))
159
+
160
+ for clause in [path_clause, did_clause]:
161
+ if clause:
162
+ for chunk in chunks(clause, 10):
163
+ query = session.query(models.RSEFileAssociation.path,
164
+ models.RSEFileAssociation.scope,
165
+ models.RSEFileAssociation.name,
166
+ models.RSEFileAssociation.rse_id,
167
+ models.RSEFileAssociation.bytes,
168
+ func.max(case((models.BadReplicas.state == BadFilesStatus.SUSPICIOUS, 0),
169
+ (models.BadReplicas.state == BadFilesStatus.BAD, 1),
170
+ else_=0))).\
171
+ with_hint(models.RSEFileAssociation, "INDEX(REPLICAS REPLICAS_PATH_IDX", 'oracle').\
172
+ outerjoin(models.BadReplicas,
173
+ and_(models.RSEFileAssociation.scope == models.BadReplicas.scope,
174
+ models.RSEFileAssociation.name == models.BadReplicas.name,
175
+ models.RSEFileAssociation.rse_id == models.BadReplicas.rse_id)).\
176
+ filter(models.RSEFileAssociation.rse_id == rse_id).filter(or_(*chunk)).\
177
+ group_by(models.RSEFileAssociation.path,
178
+ models.RSEFileAssociation.scope,
179
+ models.RSEFileAssociation.name,
180
+ models.RSEFileAssociation.rse_id,
181
+ models.RSEFileAssociation.bytes)
182
+
183
+ for path, scope, name, rse_id, size, state in query.all():
184
+ if (scope, name, path) in replicas:
185
+ replicas.remove((scope, name, path))
186
+ if (None, None, path) in replicas:
187
+ replicas.remove((None, None, path))
188
+ if (scope, name, None) in replicas:
189
+ replicas.remove((scope, name, None))
190
+ already_declared = False
191
+ if state == 1:
192
+ already_declared = True
193
+ return_list.append((scope, name, path, True, already_declared, size))
194
+
195
+ for scope, name, path in replicas:
196
+ return_list.append((scope, name, path, False, False, None))
197
+
198
+ return return_list
199
+
200
+
201
+ @read_session
202
+ def list_bad_replicas_status(state=BadFilesStatus.BAD, rse_id=None, younger_than=None, older_than=None, limit=None, list_pfns=False, vo='def', *, session: "Session"):
203
+ """
204
+ List the bad file replicas history states. Method used by the rucio-ui.
205
+ :param state: The state of the file (SUSPICIOUS or BAD).
206
+ :param rse_id: The RSE id.
207
+ :param younger_than: datetime object to select bad replicas younger than this date.
208
+ :param older_than: datetime object to select bad replicas older than this date.
209
+ :param limit: The maximum number of replicas returned.
210
+ :param vo: The VO to find replicas from.
211
+ :param session: The database session in use.
212
+ """
213
+ result = []
214
+ query = session.query(models.BadReplicas.scope, models.BadReplicas.name, models.BadReplicas.rse_id, models.BadReplicas.state, models.BadReplicas.created_at, models.BadReplicas.updated_at)
215
+ if state:
216
+ query = query.filter(models.BadReplicas.state == state)
217
+ if rse_id:
218
+ query = query.filter(models.BadReplicas.rse_id == rse_id)
219
+ if younger_than:
220
+ query = query.filter(models.BadReplicas.created_at >= younger_than)
221
+ if older_than:
222
+ query = query.filter(models.BadReplicas.created_at <= older_than)
223
+ if limit:
224
+ query = query.limit(limit)
225
+
226
+ for badfile in query.yield_per(1000):
227
+ if badfile.scope.vo == vo:
228
+ if list_pfns:
229
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'type': DIDType.FILE})
230
+ else:
231
+ result.append({'scope': badfile.scope, 'name': badfile.name, 'rse': get_rse_name(rse_id=badfile.rse_id, session=session), 'rse_id': badfile.rse_id, 'state': badfile.state, 'created_at': badfile.created_at, 'updated_at': badfile.updated_at})
232
+ if list_pfns:
233
+ reps = []
234
+ for rep in list_replicas(result, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=True, session=session):
235
+ pfn = None
236
+ if rse_id in rep['rses'] and rep['rses'][rse_id]:
237
+ pfn = rep['rses'][rse_id][0]
238
+ if pfn and pfn not in reps:
239
+ reps.append(pfn)
240
+ else:
241
+ reps.extend([item for row in rep['rses'].values() for item in row])
242
+ list(set(reps))
243
+ result = reps
244
+ return result
245
+
246
+
247
+ @transactional_session
248
+ def __declare_bad_file_replicas(pfns, rse_id, reason, issuer, status=BadFilesStatus.BAD, scheme='srm', force=False, *, session: "Session"):
249
+ """
250
+ Declare a list of bad replicas.
251
+
252
+ :param pfns: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
253
+ :param rse_id: The RSE id.
254
+ :param reason: The reason of the loss.
255
+ :param issuer: The issuer account.
256
+ :param status: Either BAD or SUSPICIOUS.
257
+ :param scheme: The scheme of the PFNs.
258
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
259
+ :param session: The database session in use.
260
+ """
261
+ unknown_replicas = []
262
+ replicas = []
263
+ path_pfn_dict = {}
264
+
265
+ if len(pfns) > 0 and type(pfns[0]) is str:
266
+ # If pfns is a list of PFNs, the scope and names need to be extracted from the path
267
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
268
+ proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
269
+ if rse_info['deterministic']:
270
+ # TBD : In case of deterministic RSE, call the extract_scope_from_path method
271
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
272
+ for pfn in parsed_pfn:
273
+ # WARNING : this part is ATLAS specific and must be changed
274
+ path = parsed_pfn[pfn]['path']
275
+ if path.startswith('/user') or path.startswith('/group'):
276
+ scope = '%s.%s' % (path.split('/')[1], path.split('/')[2])
277
+ name = parsed_pfn[pfn]['name']
278
+ elif path.startswith('/'):
279
+ scope = path.split('/')[1]
280
+ name = parsed_pfn[pfn]['name']
281
+ else:
282
+ scope = path.split('/')[0]
283
+ name = parsed_pfn[pfn]['name']
284
+
285
+ scope = InternalScope(scope, vo=issuer.vo)
286
+ replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': status})
287
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
288
+ path_pfn_dict[path] = pfn
289
+
290
+ else:
291
+ # For non-deterministic RSEs use the path + rse_id to extract the scope
292
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
293
+ for pfn in parsed_pfn:
294
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
295
+ replicas.append({'scope': None, 'name': None, 'rse_id': rse_id, 'path': path, 'state': status})
296
+ path_pfn_dict[path] = pfn
297
+
298
+ else:
299
+ # If pfns is a list of replicas, just use scope, name and rse_id
300
+ for pfn in pfns:
301
+ replicas.append({'scope': pfn['scope'], 'name': pfn['name'], 'rse_id': rse_id, 'state': status})
302
+
303
+ replicas_list = []
304
+ for replica in replicas:
305
+ scope, name, rse_id, path = replica['scope'], replica['name'], replica['rse_id'], replica.get('path', None)
306
+ replicas_list.append((scope, name, path))
307
+
308
+ bad_replicas_to_update = []
309
+
310
+ for scope, name, path, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
311
+
312
+ declared = False
313
+
314
+ if __exists:
315
+
316
+ if status == BadFilesStatus.BAD and (force or not already_declared):
317
+ bad_replicas_to_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
318
+ declared = True
319
+
320
+ if status == BadFilesStatus.SUSPICIOUS or status == BadFilesStatus.BAD and not already_declared:
321
+ new_bad_replica = models.BadReplicas(scope=scope, name=name, rse_id=rse_id, reason=reason, state=status, account=issuer, bytes=size)
322
+ new_bad_replica.save(session=session, flush=False)
323
+ declared = True
324
+
325
+ if not declared:
326
+ if already_declared:
327
+ unknown_replicas.append('%s %s' % (path_pfn_dict.get(path, '%s:%s' % (scope, name)), 'Already declared'))
328
+ elif path:
329
+ no_hidden_char = True
330
+ for char in str(path):
331
+ if not isprint(char):
332
+ unknown_replicas.append('%s %s' % (path, 'PFN contains hidden chars'))
333
+ no_hidden_char = False
334
+ break
335
+ if no_hidden_char:
336
+ unknown_replicas.append('%s %s' % (path_pfn_dict[path], 'Unknown replica'))
337
+
338
+ if status == BadFilesStatus.BAD:
339
+ # For BAD file, we modify the replica state, not for suspicious
340
+ try:
341
+ # there shouldn't be any exceptions since all replicas exist
342
+ update_replicas_states(bad_replicas_to_update, session=session)
343
+ except exception.UnsupportedOperation:
344
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
345
+
346
+ try:
347
+ session.flush()
348
+ except IntegrityError as error:
349
+ raise exception.RucioException(error.args)
350
+ except DatabaseError as error:
351
+ raise exception.RucioException(error.args)
352
+ except FlushError as error:
353
+ raise exception.RucioException(error.args)
354
+
355
+ return unknown_replicas
356
+
357
+
358
+ @transactional_session
359
+ def add_bad_dids(dids, rse_id, reason, issuer, state=BadFilesStatus.BAD, *, session: "Session"):
360
+ """
361
+ Declare a list of bad replicas.
362
+
363
+ :param dids: The list of DIDs.
364
+ :param rse_id: The RSE id.
365
+ :param reason: The reason of the loss.
366
+ :param issuer: The issuer account.
367
+ :param state: BadFilesStatus.BAD
368
+ :param session: The database session in use.
369
+ """
370
+ unknown_replicas = []
371
+ replicas_for_update = []
372
+ replicas_list = []
373
+
374
+ for did in dids:
375
+ scope = InternalScope(did['scope'], vo=issuer.vo)
376
+ name = did['name']
377
+ replicas_list.append((scope, name, None))
378
+
379
+ for scope, name, _, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
380
+ if __exists and not already_declared:
381
+ replicas_for_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
382
+ new_bad_replica = models.BadReplicas(scope=scope, name=name, rse_id=rse_id, reason=reason, state=state,
383
+ account=issuer, bytes=size)
384
+ new_bad_replica.save(session=session, flush=False)
385
+ session.query(models.Source).filter_by(scope=scope, name=name,
386
+ rse_id=rse_id).delete(synchronize_session=False)
387
+ else:
388
+ if already_declared:
389
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Already declared'))
390
+ else:
391
+ unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Unknown replica'))
392
+
393
+ if state == BadFilesStatus.BAD:
394
+ try:
395
+ update_replicas_states(replicas_for_update, session=session)
396
+ except exception.UnsupportedOperation:
397
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
398
+
399
+ try:
400
+ session.flush()
401
+ except (IntegrityError, DatabaseError, FlushError) as error:
402
+ raise exception.RucioException(error.args)
403
+
404
+ return unknown_replicas
405
+
406
+
407
+ @transactional_session
408
+ def declare_bad_file_replicas(replicas: list, reason: str, issuer, status=BadFilesStatus.BAD, force: bool = False, *,
409
+ session: "Session"):
410
+ """
411
+ Declare a list of bad replicas.
412
+
413
+ :param replicas: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
414
+ :param reason: The reason of the loss.
415
+ :param issuer: The issuer account.
416
+ :param status: The status of the file (SUSPICIOUS or BAD).
417
+ :param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
418
+ :param session: The database session in use.
419
+ :returns: Dictionary {rse_id -> [replicas failed to declare with errors]}
420
+ """
421
+ unknown_replicas = {}
422
+ if replicas:
423
+ type_ = type(replicas[0])
424
+ files_to_declare = {}
425
+ scheme = None
426
+ for replica in replicas:
427
+ if not isinstance(replica, type_):
428
+ raise exception.InvalidType('Replicas must be specified either as a list of string or a list of dicts')
429
+ if type_ == str:
430
+ scheme, files_to_declare, unknown_replicas = get_pfn_to_rse(replicas, vo=issuer.vo, session=session)
431
+ else:
432
+ for replica in replicas:
433
+ rse_id = replica['rse_id']
434
+ files_to_declare.setdefault(rse_id, []).append(replica)
435
+ for rse_id in files_to_declare:
436
+ notdeclared = __declare_bad_file_replicas(files_to_declare[rse_id], rse_id, reason, issuer,
437
+ status=status, scheme=scheme,
438
+ force=force, session=session)
439
+ if notdeclared:
440
+ unknown_replicas[rse_id] = notdeclared
441
+ return unknown_replicas
442
+
443
+
444
+ @read_session
445
+ def get_pfn_to_rse(pfns, vo='def', *, session: "Session"):
446
+ """
447
+ Get the RSE associated to a list of PFNs.
448
+
449
+ :param pfns: The list of pfn.
450
+ :param vo: The VO to find RSEs at.
451
+ :param session: The database session in use.
452
+
453
+ :returns: a tuple : scheme, {rse1 : [pfn1, pfn2, ...], rse2: [pfn3, pfn4, ...]}, {'unknown': [pfn5, pfn6, ...]}.
454
+ """
455
+ unknown_replicas = {}
456
+ storage_elements = []
457
+ se_condition = []
458
+ dict_rse = {}
459
+ surls = clean_surls(pfns)
460
+ scheme = surls[0].split(':')[0] if surls else None
461
+ for surl in surls:
462
+ if surl.split(':')[0] != scheme:
463
+ raise exception.InvalidType('The PFNs specified must have the same protocol')
464
+
465
+ split_se = surl.split('/')[2].split(':')
466
+ storage_element = split_se[0]
467
+
468
+ if storage_element not in storage_elements:
469
+ storage_elements.append(storage_element)
470
+ se_condition.append(models.RSEProtocols.hostname == storage_element)
471
+ query = session.query(models.RSEProtocols.rse_id,
472
+ models.RSEProtocols.scheme,
473
+ models.RSEProtocols.hostname,
474
+ models.RSEProtocols.port,
475
+ models.RSEProtocols.prefix).\
476
+ join(models.RSE, models.RSE.id == models.RSEProtocols.rse_id).\
477
+ filter(and_(or_(*se_condition), models.RSEProtocols.scheme == scheme)).\
478
+ filter(models.RSE.deleted == false()).\
479
+ filter(models.RSE.staging_area == false())
480
+ protocols = {}
481
+
482
+ for rse_id, protocol, hostname, port, prefix in query.yield_per(10000):
483
+ if rse_id not in protocols:
484
+ protocols[rse_id] = []
485
+ protocols[rse_id].append('%s://%s:%s%s' % (protocol, hostname, port, prefix))
486
+ if '%s://%s%s' % (protocol, hostname, prefix) not in protocols[rse_id]:
487
+ protocols[rse_id].append('%s://%s%s' % (protocol, hostname, prefix))
488
+ hint = None
489
+ for surl in surls:
490
+ if hint:
491
+ for pattern in protocols[hint]:
492
+ if surl.find(pattern) > -1:
493
+ dict_rse[hint].append(surl)
494
+ else:
495
+ mult_rse_match = 0
496
+ for rse_id in protocols:
497
+ for pattern in protocols[rse_id]:
498
+ if surl.find(pattern) > -1 and get_rse_vo(rse_id=rse_id, session=session) == vo:
499
+ mult_rse_match += 1
500
+ if mult_rse_match > 1:
501
+ print('ERROR, multiple matches : %s at %s' % (surl, rse_id))
502
+ raise exception.RucioException('ERROR, multiple matches : %s at %s' % (surl, get_rse_name(rse_id=rse_id, session=session)))
503
+ hint = rse_id
504
+ if hint not in dict_rse:
505
+ dict_rse[hint] = []
506
+ dict_rse[hint].append(surl)
507
+ if mult_rse_match == 0:
508
+ if 'unknown' not in unknown_replicas:
509
+ unknown_replicas['unknown'] = []
510
+ unknown_replicas['unknown'].append(surl)
511
+ return scheme, dict_rse, unknown_replicas
512
+
513
+
514
+ @read_session
515
+ def get_bad_replicas_backlog(*, session: "Session"):
516
+ """
517
+ Get the replica backlog by RSE.
518
+
519
+ :param session: The database session in use.
520
+
521
+ :returns: a list of dictionary {'rse_id': cnt_bad_replicas}.
522
+ """
523
+ query = session.query(func.count(models.RSEFileAssociation.rse_id), models.RSEFileAssociation.rse_id). \
524
+ with_hint(models.RSEFileAssociation, 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))', 'oracle'). \
525
+ filter(models.RSEFileAssociation.state == ReplicaState.BAD)
526
+
527
+ query = query.join(models.DataIdentifier,
528
+ and_(models.DataIdentifier.scope == models.RSEFileAssociation.scope,
529
+ models.DataIdentifier.name == models.RSEFileAssociation.name)).\
530
+ filter(models.DataIdentifier.availability != DIDAvailability.LOST).\
531
+ group_by(models.RSEFileAssociation.rse_id)
532
+
533
+ result = dict()
534
+ for cnt, rse_id in query.all():
535
+ result[rse_id] = cnt
536
+ return result
537
+
538
+
539
+ @read_session
540
+ def list_bad_replicas(limit=10000, thread=None, total_threads=None, rses=None, *, session: "Session"):
541
+ """
542
+ List RSE File replicas with no locks.
543
+
544
+ :param limit: The maximum number of replicas returned.
545
+ :param thread: The assigned thread for this necromancer.
546
+ :param total_threads: The total number of threads of all necromancers.
547
+ :param session: The database session in use.
548
+
549
+ :returns: a list of dictionary {'scope' scope, 'name': name, 'rse_id': rse_id, 'rse': rse}.
550
+ """
551
+ schema_dot = '%s.' % DEFAULT_SCHEMA_NAME if DEFAULT_SCHEMA_NAME else ''
552
+ query = session.query(models.RSEFileAssociation.scope,
553
+ models.RSEFileAssociation.name,
554
+ models.RSEFileAssociation.rse_id). \
555
+ with_hint(models.RSEFileAssociation, 'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))', 'oracle'). \
556
+ filter(models.RSEFileAssociation.state == ReplicaState.BAD)
557
+
558
+ query = filter_thread_work(session=session, query=query, total_threads=total_threads, thread_id=thread, hash_variable='%sreplicas.name' % (schema_dot))
559
+ query = query.join(models.DataIdentifier,
560
+ and_(models.DataIdentifier.scope == models.RSEFileAssociation.scope,
561
+ models.DataIdentifier.name == models.RSEFileAssociation.name)).\
562
+ filter(models.DataIdentifier.availability != DIDAvailability.LOST)
563
+
564
+ if rses:
565
+ rse_clause = list()
566
+ for rse in rses:
567
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse['id'])
568
+ query = query.filter(or_(*rse_clause))
569
+
570
+ query = query.limit(limit)
571
+ rows = []
572
+ for scope, name, rse_id in query.yield_per(1000):
573
+ rows.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'rse': get_rse_name(rse_id=rse_id, session=session)})
574
+ return rows
575
+
576
+
577
+ @stream_session
578
+ def get_did_from_pfns(pfns, rse_id=None, vo='def', *, session: "Session"):
579
+ """
580
+ Get the DIDs associated to a PFN on one given RSE
581
+
582
+ :param pfns: The list of PFNs.
583
+ :param rse_id: The RSE id.
584
+ :param vo: The VO to get DIDs from.
585
+ :param session: The database session in use.
586
+ :returns: A dictionary {pfn: {'scope': scope, 'name': name}}
587
+ """
588
+ dict_rse = {}
589
+ if not rse_id:
590
+ scheme, dict_rse, unknown_replicas = get_pfn_to_rse(pfns, vo=vo, session=session)
591
+ if unknown_replicas:
592
+ raise Exception
593
+ else:
594
+ scheme = 'srm'
595
+ dict_rse[rse_id] = pfns
596
+ for rse_id in dict_rse:
597
+ pfns = dict_rse[rse_id]
598
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
599
+ pfndict = {}
600
+ proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
601
+ if rse_info['deterministic']:
602
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
603
+
604
+ # WARNING : this part is ATLAS specific and must be changed
605
+ for pfn in parsed_pfn:
606
+ path = parsed_pfn[pfn]['path']
607
+ if path.startswith('/user') or path.startswith('/group'):
608
+ scope = '%s.%s' % (path.split('/')[1], path.split('/')[2])
609
+ name = parsed_pfn[pfn]['name']
610
+ elif path.startswith('/'):
611
+ scope = path.split('/')[1]
612
+ name = parsed_pfn[pfn]['name']
613
+ else:
614
+ scope = path.split('/')[0]
615
+ name = parsed_pfn[pfn]['name']
616
+ scope = InternalScope(scope, vo)
617
+ yield {pfn: {'scope': scope, 'name': name}}
618
+ else:
619
+ condition = []
620
+ parsed_pfn = proto.parse_pfns(pfns=pfns)
621
+ for pfn in parsed_pfn:
622
+ path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
623
+ pfndict[path] = pfn
624
+ condition.append(and_(models.RSEFileAssociation.path == path, models.RSEFileAssociation.rse_id == rse_id))
625
+ for scope, name, pfn in session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.path).filter(or_(*condition)):
626
+ yield {pfndict[pfn]: {'scope': scope, 'name': name}}
627
+
628
+
629
+ def _pick_n_random(nrandom, generator):
630
+ """
631
+ Select n random elements from the generator
632
+ """
633
+
634
+ if not nrandom:
635
+ # pass-through the data unchanged
636
+ yield from generator
637
+ return
638
+
639
+ # A "reservoir sampling" algorithm:
640
+ # Copy the N first files from the generator. After that, following element may be picked to substitute
641
+ # one of the previously selected element with a probability which decreases as the number of encountered elements grows.
642
+ selected = []
643
+ i = 0
644
+ iterator = iter(generator)
645
+ try:
646
+ for _ in range(nrandom):
647
+ selected.append(next(iterator))
648
+ i += 1
649
+
650
+ while True:
651
+ element = next(iterator)
652
+ i += 1
653
+
654
+ index_to_substitute = random.randint(0, i)
655
+ if index_to_substitute < nrandom:
656
+ selected[index_to_substitute] = element
657
+ except StopIteration:
658
+ pass
659
+
660
+ for r in selected:
661
+ yield r
662
+
663
+
664
+ def _list_files_wo_replicas(files_wo_replica, *, session: "Session"):
665
+ if files_wo_replica:
666
+ file_wo_clause = []
667
+ for file in sorted(files_wo_replica, key=lambda f: (f['scope'], f['name'])):
668
+ file_wo_clause.append(and_(models.DataIdentifier.scope == file['scope'],
669
+ models.DataIdentifier.name == file['name']))
670
+ files_wo_replicas_query = session.query(models.DataIdentifier.scope,
671
+ models.DataIdentifier.name,
672
+ models.DataIdentifier.bytes,
673
+ models.DataIdentifier.md5,
674
+ models.DataIdentifier.adler32).\
675
+ filter_by(did_type=DIDType.FILE).filter(or_(*file_wo_clause)).\
676
+ with_hint(models.DataIdentifier, text="INDEX(DIDS DIDS_PK)", dialect_name='oracle')
677
+
678
+ for scope, name, bytes_, md5, adler32 in files_wo_replicas_query:
679
+ yield scope, name, bytes_, md5, adler32
680
+
681
+
682
+ def get_vp_endpoint():
683
+ """
684
+ VP endpoint is the Virtual Placement server.
685
+ Once VP is integrated in Rucio it won't be needed.
686
+ """
687
+ vp_endpoint = config_get('virtual_placement', 'vp_endpoint', default='')
688
+ return vp_endpoint
689
+
690
+
691
+ def get_multi_cache_prefix(cache_site, filename, logger=logging.log):
692
+ """
693
+ for a givent cache site and filename, return address of the cache node that
694
+ should be prefixed.
695
+
696
+ :param cache_site: Cache site
697
+ :param filename: Filename
698
+ """
699
+ vp_endpoint = get_vp_endpoint()
700
+ if not vp_endpoint:
701
+ return ''
702
+
703
+ x_caches = REGION.get('CacheSites')
704
+ if x_caches is NO_VALUE:
705
+ try:
706
+ response = requests.get('{}/serverRanges'.format(vp_endpoint), timeout=1, verify=False)
707
+ if response.ok:
708
+ x_caches = response.json()
709
+ REGION.set('CacheSites', x_caches)
710
+ else:
711
+ REGION.set('CacheSites', {'could not reload': ''})
712
+ return ''
713
+ except requests.exceptions.RequestException as re:
714
+ REGION.set('CacheSites', {'could not reload': ''})
715
+ logger(logging.WARNING, 'In get_multi_cache_prefix, could not access {}. Excaption:{}'.format(vp_endpoint, re))
716
+ return ''
717
+
718
+ if cache_site not in x_caches:
719
+ return ''
720
+
721
+ xcache_site = x_caches[cache_site]
722
+ h = float(
723
+ unpack('Q', sha256(filename.encode('utf-8')).digest()[:8])[0]) / 2**64
724
+ for irange in xcache_site['ranges']:
725
+ if h < irange[1]:
726
+ return xcache_site['servers'][irange[0]][0]
727
+ return ''
728
+
729
+
730
+ def _get_list_replicas_protocols(
731
+ rse_id: str,
732
+ domain: str,
733
+ schemes: "Sequence[str]",
734
+ additional_schemes: "Sequence[str]",
735
+ session: "Session"
736
+ ) -> "list[tuple[str, RSEProtocol, int]]":
737
+ """
738
+ Select the protocols to be used by list_replicas to build the PFNs for all replicas on the given RSE
739
+ """
740
+ domains = ['wan', 'lan'] if domain == 'all' else [domain]
741
+
742
+ rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
743
+ # compute scheme priorities, and don't forget to exclude disabled protocols
744
+ # 0 or None in RSE protocol definition = disabled, 1 = highest priority
745
+ scheme_priorities = {
746
+ 'wan': {p['scheme']: p['domains']['wan']['read'] for p in rse_info['protocols'] if p['domains']['wan']['read']},
747
+ 'lan': {p['scheme']: p['domains']['lan']['read'] for p in rse_info['protocols'] if p['domains']['lan']['read']},
748
+ }
749
+
750
+ rse_schemes = copy.copy(schemes) if schemes else []
751
+ if not rse_schemes:
752
+ try:
753
+ for domain in domains:
754
+ rse_schemes.append(rsemgr.select_protocol(rse_settings=rse_info,
755
+ operation='read',
756
+ domain=domain)['scheme'])
757
+ except exception.RSEProtocolNotSupported:
758
+ pass # no need to be verbose
759
+ except Exception:
760
+ print(format_exc())
761
+
762
+ for s in additional_schemes:
763
+ if s not in rse_schemes:
764
+ rse_schemes.append(s)
765
+
766
+ protocols = []
767
+ for s in rse_schemes:
768
+ try:
769
+ for domain in domains:
770
+ protocol = rsemgr.create_protocol(rse_settings=rse_info, operation='read', scheme=s, domain=domain)
771
+ priority = scheme_priorities[domain][s]
772
+
773
+ protocols.append((domain, protocol, priority))
774
+ except exception.RSEProtocolNotSupported:
775
+ pass # no need to be verbose
776
+ except Exception:
777
+ print(format_exc())
778
+ return protocols
779
+
780
+
781
+ def _build_list_replicas_pfn(
782
+ scope: "InternalScope",
783
+ name: str,
784
+ rse_id: str,
785
+ domain: str,
786
+ protocol: "RSEProtocol",
787
+ path: str,
788
+ sign_urls: bool,
789
+ signature_lifetime: int,
790
+ client_location: "dict[str, Any]",
791
+ logger=logging.log,
792
+ *,
793
+ session: "Session",
794
+ ) -> str:
795
+ """
796
+ Generate the PFN for the given scope/name on the rse.
797
+ If needed, sign the PFN url
798
+ If relevant, add the server-side root proxy to te pfn url
799
+ """
800
+ pfn: str = list(protocol.lfns2pfns(lfns={'scope': scope.external,
801
+ 'name': name,
802
+ 'path': path}).values())[0]
803
+
804
+ # do we need to sign the URLs?
805
+ if sign_urls and protocol.attributes['scheme'] == 'https':
806
+ service = get_rse_attribute(rse_id, 'sign_url', session=session)
807
+ if service:
808
+ pfn = get_signed_url(rse_id=rse_id, service=service, operation='read', url=pfn, lifetime=signature_lifetime)
809
+
810
+ # server side root proxy handling if location is set.
811
+ # supports root and http destinations
812
+ # cannot be pushed into protocols because we need to lookup rse attributes.
813
+ # ultra-conservative implementation.
814
+ if domain == 'wan' and protocol.attributes['scheme'] in ['root', 'http', 'https'] and client_location:
815
+
816
+ if 'site' in client_location and client_location['site']:
817
+ replica_site = get_rse_attribute(rse_id, 'site', session=session)
818
+
819
+ # does it match with the client? if not, it's an outgoing connection
820
+ # therefore the internal proxy must be prepended
821
+ if client_location['site'] != replica_site:
822
+ cache_site = config_get('clientcachemap', client_location['site'], default='', session=session)
823
+ if cache_site != '':
824
+ # print('client', client_location['site'], 'has cache:', cache_site)
825
+ # print('filename', name)
826
+ selected_prefix = get_multi_cache_prefix(cache_site, name)
827
+ if selected_prefix:
828
+ pfn = f"root://{selected_prefix}//{pfn.replace('davs://', 'root://')}"
829
+ else:
830
+ # print('site:', client_location['site'], 'has no cache')
831
+ # print('lets check if it has defined an internal root proxy ')
832
+ root_proxy_internal = config_get('root-proxy-internal', # section
833
+ client_location['site'], # option
834
+ default='', # empty string to circumvent exception
835
+ session=session)
836
+
837
+ if root_proxy_internal:
838
+ # TODO: XCache does not seem to grab signed URLs. Doublecheck with XCache devs.
839
+ # For now -> skip prepending XCache for GCS.
840
+ if 'storage.googleapis.com' in pfn or 'atlas-google-cloud.cern.ch' in pfn or 'amazonaws.com' in pfn:
841
+ pass # ATLAS HACK
842
+ else:
843
+ # don't forget to mangle gfal-style davs URL into generic https URL
844
+ pfn = f"root://{root_proxy_internal}//{pfn.replace('davs://', 'https://')}"
845
+
846
+ simulate_multirange = get_rse_attribute(rse_id, 'simulate_multirange')
847
+
848
+ if simulate_multirange is not None:
849
+ try:
850
+ # cover values that cannot be cast to int
851
+ simulate_multirange = int(simulate_multirange)
852
+ except ValueError:
853
+ simulate_multirange = 1
854
+ logger(logging.WARNING, 'Value encountered when retrieving RSE attribute "simulate_multirange" not compatible with "int", used default value "1".')
855
+ if simulate_multirange <= 0:
856
+ logger(logging.WARNING, f'Value {simulate_multirange} encountered when retrieving RSE attribute "simulate_multirange" is <= 0, used default value "1".')
857
+ simulate_multirange = 1
858
+ pfn += f'&#multirange=false&nconnections={simulate_multirange}'
859
+
860
+ return pfn
861
+
862
+
863
+ def _list_replicas(replicas, show_pfns, schemes, files_wo_replica, client_location, domain,
864
+ sign_urls, signature_lifetime, resolve_parents, filters, by_rse_name, *, session: "Session"):
865
+
866
+ # the `domain` variable name will be re-used throughout the function with different values
867
+ input_domain = domain
868
+
869
+ # find all RSEs local to the client's location in autoselect mode (i.e., when domain is None)
870
+ local_rses = []
871
+ if input_domain is None:
872
+ if client_location and 'site' in client_location and client_location['site']:
873
+ try:
874
+ local_rses = [rse['id'] for rse in parse_expression('site=%s' % client_location['site'], filter_=filters, session=session)]
875
+ except Exception:
876
+ pass # do not hard fail if site cannot be resolved or is empty
877
+
878
+ file, pfns_cache = {}, {}
879
+ protocols_cache = defaultdict(dict)
880
+
881
+ for _, replica_group in groupby(replicas, key=lambda x: (x[0], x[1])): # Group by scope/name
882
+ file = {}
883
+ pfns = {}
884
+ for scope, name, archive_scope, archive_name, bytes_, md5, adler32, path, state, rse_id, rse, rse_type, volatile in replica_group:
885
+ if isinstance(archive_scope, str):
886
+ archive_scope = InternalScope(archive_scope, fromExternal=False)
887
+
888
+ is_archive = bool(archive_scope and archive_name)
889
+
890
+ # it is the first row in the scope/name group
891
+ if not file:
892
+ file['scope'], file['name'] = scope, name
893
+ file['bytes'], file['md5'], file['adler32'] = bytes_, md5, adler32
894
+ file['pfns'], file['rses'], file['states'] = {}, {}, {}
895
+ if resolve_parents:
896
+ file['parents'] = ['%s:%s' % (parent['scope'].internal, parent['name'])
897
+ for parent in rucio.core.did.list_all_parent_dids(scope, name, session=session)]
898
+
899
+ if not rse_id:
900
+ continue
901
+
902
+ rse_key = rse if by_rse_name else rse_id
903
+ file['states'][rse_key] = str(state.name if state else state)
904
+
905
+ if not show_pfns:
906
+ continue
907
+
908
+ # It's the first time we see this RSE, initialize the protocols needed for PFN generation
909
+ protocols = protocols_cache.get(rse_id, {}).get(is_archive)
910
+ if not protocols:
911
+ # select the lan door in autoselect mode, otherwise use the wan door
912
+ domain = input_domain
913
+ if domain is None:
914
+ domain = 'wan'
915
+ if local_rses and rse_id in local_rses:
916
+ domain = 'lan'
917
+
918
+ protocols = _get_list_replicas_protocols(
919
+ rse_id=rse_id,
920
+ domain=domain,
921
+ schemes=schemes,
922
+ # We want 'root' for archives even if it wasn't included into 'schemes'
923
+ additional_schemes=['root'] if is_archive else [],
924
+ session=session,
925
+ )
926
+ protocols_cache[rse_id][is_archive] = protocols
927
+
928
+ # build the pfns
929
+ for domain, protocol, priority in protocols:
930
+ # If the current "replica" is a constituent inside an archive, we must construct the pfn for the
931
+ # parent (archive) file and append the xrdcl.unzip query string to it.
932
+ if is_archive:
933
+ t_scope = archive_scope
934
+ t_name = archive_name
935
+ else:
936
+ t_scope = scope
937
+ t_name = name
938
+
939
+ if 'determinism_type' in protocol.attributes: # PFN is cachable
940
+ try:
941
+ path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)]
942
+ except KeyError: # No cache entry scope:name found for this protocol
943
+ path = protocol._get_path(t_scope, t_name)
944
+ pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)] = path
945
+
946
+ try:
947
+ pfn = _build_list_replicas_pfn(
948
+ scope=t_scope,
949
+ name=t_name,
950
+ rse_id=rse_id,
951
+ domain=domain,
952
+ protocol=protocol,
953
+ path=path,
954
+ sign_urls=sign_urls,
955
+ signature_lifetime=signature_lifetime,
956
+ client_location=client_location,
957
+ session=session,
958
+ )
959
+
960
+ client_extract = False
961
+ if is_archive:
962
+ domain = 'zip'
963
+ pfn = add_url_query(pfn, {'xrdcl.unzip': name})
964
+ if protocol.attributes['scheme'] == 'root':
965
+ # xroot supports downloading files directly from inside an archive. Disable client_extract and prioritize xroot.
966
+ client_extract = False
967
+ priority = -1
968
+ else:
969
+ client_extract = True
970
+
971
+ pfns[pfn] = {
972
+ 'rse_id': rse_id,
973
+ 'rse': rse,
974
+ 'type': str(rse_type.name),
975
+ 'volatile': volatile,
976
+ 'domain': domain,
977
+ 'priority': priority,
978
+ 'client_extract': client_extract
979
+ }
980
+
981
+ except Exception:
982
+ # never end up here
983
+ print(format_exc())
984
+
985
+ if protocol.attributes['scheme'] == 'srm':
986
+ try:
987
+ file['space_token'] = protocol.attributes['extended_attributes']['space_token']
988
+ except KeyError:
989
+ file['space_token'] = None
990
+
991
+ # fill the 'pfns' and 'rses' dicts in file
992
+ if pfns:
993
+ # set the total order for the priority
994
+ # --> exploit that L(AN) comes before W(AN) before Z(IP) alphabetically
995
+ # and use 1-indexing to be compatible with metalink
996
+ sorted_pfns = sorted(pfns.items(), key=lambda item: (item[1]['domain'], item[1]['priority'], item[0]))
997
+ for i, (pfn, pfn_value) in enumerate(list(sorted_pfns), start=1):
998
+ pfn_value['priority'] = i
999
+ file['pfns'][pfn] = pfn_value
1000
+
1001
+ sorted_pfns = sorted(file['pfns'].items(), key=lambda item: (item[1]['rse_id'], item[1]['priority'], item[0]))
1002
+ for pfn, pfn_value in sorted_pfns:
1003
+ rse_key = pfn_value['rse'] if by_rse_name else pfn_value['rse_id']
1004
+ file['rses'].setdefault(rse_key, []).append(pfn)
1005
+
1006
+ if file:
1007
+ yield file
1008
+
1009
+ for scope, name, bytes_, md5, adler32 in _list_files_wo_replicas(files_wo_replica, session=session):
1010
+ yield {
1011
+ 'scope': scope,
1012
+ 'name': name,
1013
+ 'bytes': bytes_,
1014
+ 'md5': md5,
1015
+ 'adler32': adler32,
1016
+ 'pfns': {},
1017
+ 'rses': defaultdict(list)
1018
+ }
1019
+
1020
+
1021
+ @stream_session
1022
+ def list_replicas(
1023
+ dids: "Sequence[dict[str, Any]]",
1024
+ schemes: "Optional[list[str]]" = None,
1025
+ unavailable: bool = False,
1026
+ request_id: "Optional[str]" = None,
1027
+ ignore_availability: bool = True,
1028
+ all_states: bool = False,
1029
+ pfns: bool = True,
1030
+ rse_expression: "Optional[str]" = None,
1031
+ client_location: "Optional[dict[str, Any]]" = None,
1032
+ domain: "Optional[str]" = None,
1033
+ sign_urls: bool = False,
1034
+ signature_lifetime: "Optional[int]" = None,
1035
+ resolve_archives: bool = True,
1036
+ resolve_parents: bool = False,
1037
+ nrandom: "Optional[int]" = None,
1038
+ updated_after: "Optional[datetime]" = None,
1039
+ by_rse_name: bool = False,
1040
+ *, session: "Session",
1041
+ ):
1042
+ """
1043
+ List file replicas for a list of data identifiers (DIDs).
1044
+
1045
+ :param dids: The list of data identifiers (DIDs).
1046
+ :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...)
1047
+ :param unavailable: (deprecated) Also include unavailable replicas in the list.
1048
+ :param request_id: ID associated with the request for debugging.
1049
+ :param ignore_availability: Ignore the RSE blocklisting.
1050
+ :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
1051
+ :param rse_expression: The RSE expression to restrict list_replicas on a set of RSEs.
1052
+ :param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site', 'latitude', 'longitude'}
1053
+ :param domain: The network domain for the call, either None, 'wan' or 'lan'. None is automatic mode, 'all' is both ['lan','wan']
1054
+ :param sign_urls: If set, will sign the PFNs if necessary.
1055
+ :param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN.
1056
+ :param resolve_archives: When set to true, find archives which contain the replicas.
1057
+ :param resolve_parents: When set to true, find all parent datasets which contain the replicas.
1058
+ :param updated_after: datetime (UTC time), only return replicas updated after this time
1059
+ :param by_rse_name: if True, rse information will be returned in dicts indexed by rse name; otherwise: in dicts indexed by rse id
1060
+ :param session: The database session in use.
1061
+ """
1062
+ # For historical reasons:
1063
+ # - list_replicas([some_file_did]), must return the file even if it doesn't have replicas
1064
+ # - list_replicas([some_collection_did]) must only return files with replicas
1065
+
1066
+ def _replicas_filter_subquery():
1067
+ """
1068
+ Build the sub-query used to filter replicas according to list_replica's input arguments
1069
+ """
1070
+ stmt = select(
1071
+ models.RSEFileAssociation.scope,
1072
+ models.RSEFileAssociation.name,
1073
+ models.RSEFileAssociation.path,
1074
+ models.RSEFileAssociation.state,
1075
+ models.RSEFileAssociation.bytes,
1076
+ models.RSEFileAssociation.md5,
1077
+ models.RSEFileAssociation.adler32,
1078
+ models.RSE.id.label('rse_id'),
1079
+ models.RSE.rse.label('rse_name'),
1080
+ models.RSE.rse_type,
1081
+ models.RSE.volatile,
1082
+ ).join(
1083
+ models.RSE,
1084
+ and_(models.RSE.id == models.RSEFileAssociation.rse_id,
1085
+ models.RSE.deleted == false())
1086
+ )
1087
+
1088
+ if not ignore_availability:
1089
+ stmt = stmt.where(models.RSE.availability_read == true())
1090
+
1091
+ if updated_after:
1092
+ stmt = stmt.where(models.RSEFileAssociation.updated_at >= updated_after)
1093
+
1094
+ if rse_expression:
1095
+ rses = parse_expression(expression=rse_expression, filter_=filter_, session=session)
1096
+ # When the number of RSEs is small, don't go through the overhead of
1097
+ # creating and using a temporary table. Rely on a simple "in" query.
1098
+ # The number "4" was picked without any particular reason
1099
+ if 0 < len(rses) < 4:
1100
+ stmt = stmt.where(models.RSE.id.in_([rse['id'] for rse in rses]))
1101
+ else:
1102
+ rses_temp_table = temp_table_mngr(session).create_id_table()
1103
+ session.execute(insert(rses_temp_table), [{'id': rse['id']} for rse in rses])
1104
+ stmt = stmt.join(rses_temp_table, models.RSE.id == rses_temp_table.id)
1105
+
1106
+ if not all_states:
1107
+ if not unavailable:
1108
+ state_clause = models.RSEFileAssociation.state == ReplicaState.AVAILABLE
1109
+ else:
1110
+ state_clause = or_(
1111
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
1112
+ models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
1113
+ models.RSEFileAssociation.state == ReplicaState.COPYING
1114
+ )
1115
+ stmt = stmt.where(state_clause)
1116
+
1117
+ return stmt.subquery()
1118
+
1119
+ def _resolve_collection_files(temp_table, *, session: "Session"):
1120
+ """
1121
+ Find all FILE dids contained in collections from temp_table and return them in a newly
1122
+ created temporary table.
1123
+ """
1124
+ resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
1125
+
1126
+ stmt = insert(
1127
+ resolved_files_temp_table
1128
+ ).from_select(
1129
+ ['scope', 'name'],
1130
+ rucio.core.did.list_child_dids_stmt(temp_table, did_type=DIDType.FILE)
1131
+ )
1132
+ result = session.execute(stmt)
1133
+
1134
+ return result.rowcount, resolved_files_temp_table
1135
+
1136
+ def _list_replicas_for_collection_files_stmt(temp_table, replicas_subquery):
1137
+ """
1138
+ Build a query for listing replicas of files resolved from containers/datasets
1139
+
1140
+ The query assumes that temp_table only contains DIDs of type FILE.
1141
+ """
1142
+ return select(
1143
+ temp_table.scope.label('scope'),
1144
+ temp_table.name.label('name'),
1145
+ literal(None).label('archive_scope'),
1146
+ literal(None).label('archive_name'),
1147
+ replicas_subquery.c.bytes,
1148
+ replicas_subquery.c.md5,
1149
+ replicas_subquery.c.adler32,
1150
+ replicas_subquery.c.path,
1151
+ replicas_subquery.c.state,
1152
+ replicas_subquery.c.rse_id,
1153
+ replicas_subquery.c.rse_name,
1154
+ replicas_subquery.c.rse_type,
1155
+ replicas_subquery.c.volatile,
1156
+ ).join_from(
1157
+ temp_table,
1158
+ replicas_subquery,
1159
+ and_(replicas_subquery.c.scope == temp_table.scope,
1160
+ replicas_subquery.c.name == temp_table.name),
1161
+ )
1162
+
1163
+ def _list_replicas_for_constituents_stmt(temp_table, replicas_subquery):
1164
+ """
1165
+ Build a query for listing replicas of archives containing the files(constituents) given as input.
1166
+ i.e. for a file scope:file.log which exists in scope:archive.tar.gz, it will return the replicas
1167
+ (rse, path, state, etc) of archive.tar.gz, but with bytes/md5/adler of file.log
1168
+ """
1169
+ return select(
1170
+ models.ConstituentAssociation.child_scope.label('scope'),
1171
+ models.ConstituentAssociation.child_name.label('name'),
1172
+ models.ConstituentAssociation.scope.label('archive_scope'),
1173
+ models.ConstituentAssociation.name.label('archive_name'),
1174
+ models.ConstituentAssociation.bytes,
1175
+ models.ConstituentAssociation.md5,
1176
+ models.ConstituentAssociation.adler32,
1177
+ replicas_subquery.c.path,
1178
+ replicas_subquery.c.state,
1179
+ replicas_subquery.c.rse_id,
1180
+ replicas_subquery.c.rse_name,
1181
+ replicas_subquery.c.rse_type,
1182
+ replicas_subquery.c.volatile,
1183
+ ).join_from(
1184
+ temp_table,
1185
+ models.DataIdentifier,
1186
+ and_(models.DataIdentifier.scope == temp_table.scope,
1187
+ models.DataIdentifier.name == temp_table.name,
1188
+ models.DataIdentifier.did_type == DIDType.FILE,
1189
+ models.DataIdentifier.constituent == true()),
1190
+ ).join(
1191
+ models.ConstituentAssociation,
1192
+ and_(models.ConstituentAssociation.child_scope == temp_table.scope,
1193
+ models.ConstituentAssociation.child_name == temp_table.name)
1194
+ ).join(
1195
+ replicas_subquery,
1196
+ and_(replicas_subquery.c.scope == models.ConstituentAssociation.scope,
1197
+ replicas_subquery.c.name == models.ConstituentAssociation.name),
1198
+ )
1199
+
1200
+ def _list_replicas_for_input_files_stmt(temp_table, replicas_subquery):
1201
+ """
1202
+ Builds a query which list the replicas of FILEs from users input, but ignores
1203
+ collections in the same input.
1204
+
1205
+ Note: These FILE dids must be returned to the user even if they don't have replicas,
1206
+ hence the outerjoin against the replicas_subquery.
1207
+ """
1208
+ return select(
1209
+ temp_table.scope.label('scope'),
1210
+ temp_table.name.label('name'),
1211
+ literal(None).label('archive_scope'),
1212
+ literal(None).label('archive_name'),
1213
+ models.DataIdentifier.bytes,
1214
+ models.DataIdentifier.md5,
1215
+ models.DataIdentifier.adler32,
1216
+ replicas_subquery.c.path,
1217
+ replicas_subquery.c.state,
1218
+ replicas_subquery.c.rse_id,
1219
+ replicas_subquery.c.rse_name,
1220
+ replicas_subquery.c.rse_type,
1221
+ replicas_subquery.c.volatile,
1222
+ ).join_from(
1223
+ temp_table,
1224
+ models.DataIdentifier,
1225
+ and_(models.DataIdentifier.scope == temp_table.scope,
1226
+ models.DataIdentifier.name == temp_table.name,
1227
+ models.DataIdentifier.did_type == DIDType.FILE),
1228
+ ).outerjoin(
1229
+ replicas_subquery,
1230
+ and_(replicas_subquery.c.scope == temp_table.scope,
1231
+ replicas_subquery.c.name == temp_table.name),
1232
+ )
1233
+
1234
+ def _inspect_dids(temp_table, *, session: "Session"):
1235
+ """
1236
+ Find how many files, collections and constituents are among the dids in the temp_table
1237
+ """
1238
+ stmt = select(
1239
+ func.sum(
1240
+ case((models.DataIdentifier.did_type == DIDType.FILE, 1), else_=0)
1241
+ ).label('num_files'),
1242
+ func.sum(
1243
+ case((models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET]), 1), else_=0)
1244
+ ).label('num_collections'),
1245
+ func.sum(
1246
+ case((models.DataIdentifier.constituent == true(), 1), else_=0)
1247
+ ).label('num_constituents'),
1248
+ ).join_from(
1249
+ temp_table,
1250
+ models.DataIdentifier,
1251
+ and_(models.DataIdentifier.scope == temp_table.scope,
1252
+ models.DataIdentifier.name == temp_table.name),
1253
+ )
1254
+ num_files, num_collections, num_constituents = session.execute(stmt).one() # returns None on empty input
1255
+ return num_files or 0, num_collections or 0, num_constituents or 0
1256
+
1257
+ if dids:
1258
+ filter_ = {'vo': dids[0]['scope'].vo}
1259
+ else:
1260
+ filter_ = {'vo': 'def'}
1261
+
1262
+ dids = {(did['scope'], did['name']): did for did in dids} # Deduplicate input
1263
+ if not dids:
1264
+ return
1265
+
1266
+ input_dids_temp_table = temp_table_mngr(session).create_scope_name_table()
1267
+ session.execute(insert(input_dids_temp_table), [{'scope': s, 'name': n} for s, n in dids])
1268
+
1269
+ num_files, num_collections, num_constituents = _inspect_dids(input_dids_temp_table, session=session)
1270
+
1271
+ num_files_in_collections, resolved_files_temp_table = 0, None
1272
+ if num_collections:
1273
+ num_files_in_collections, resolved_files_temp_table = _resolve_collection_files(input_dids_temp_table, session=session)
1274
+
1275
+ replicas_subquery = _replicas_filter_subquery()
1276
+ replica_sources = []
1277
+ if num_files:
1278
+ replica_sources.append(
1279
+ _list_replicas_for_input_files_stmt(input_dids_temp_table, replicas_subquery)
1280
+ )
1281
+ if num_constituents and resolve_archives:
1282
+ replica_sources.append(
1283
+ _list_replicas_for_constituents_stmt(input_dids_temp_table, replicas_subquery)
1284
+ )
1285
+ if num_files_in_collections:
1286
+ replica_sources.append(
1287
+ _list_replicas_for_collection_files_stmt(resolved_files_temp_table, replicas_subquery)
1288
+ )
1289
+
1290
+ if not replica_sources:
1291
+ return
1292
+
1293
+ # In the simple case that somebody calls list_replicas on big collections with nrandom set,
1294
+ # opportunistically try to reduce the number of fetched and analyzed rows.
1295
+ if (
1296
+ nrandom
1297
+ # Only try this optimisation if list_replicas was called on collection(s).
1298
+ # I didn't consider handling the case when list_replica is called with a mix of
1299
+ # file/archive/collection dids: database queries in those cases are more complex
1300
+ # and people don't usually call list_replicas with nrandom on file/archive_constituents anyway.
1301
+ and (num_files_in_collections and not num_constituents and not num_files)
1302
+ # The following code introduces overhead if it fails to pick n random replicas.
1303
+ # Only execute when nrandom is much smaller than the total number of candidate files.
1304
+ # 64 was picked without any particular reason as "seems good enough".
1305
+ and 0 < nrandom < num_files_in_collections / 64
1306
+ ):
1307
+ # Randomly select a subset of file DIDs which have at least one replica matching the RSE/replica
1308
+ # filters applied on database side. Some filters are applied later in python code
1309
+ # (for example: scheme; or client_location/domain). We don't have any guarantee that
1310
+ # those, python, filters will not drop the replicas which we just selected randomly.
1311
+ stmt = select(
1312
+ resolved_files_temp_table.scope.label('scope'),
1313
+ resolved_files_temp_table.name.label('name'),
1314
+ ).where(
1315
+ exists(
1316
+ select(1)
1317
+ ).where(
1318
+ replicas_subquery.c.scope == resolved_files_temp_table.scope,
1319
+ replicas_subquery.c.name == resolved_files_temp_table.name
1320
+ )
1321
+ ).order_by(
1322
+ literal_column('dbms_random.value') if session.bind.dialect.name == 'oracle' else func.random()
1323
+ ).limit(
1324
+ # slightly overshoot to reduce the probability that python-side filtering will
1325
+ # leave us with less than nrandom replicas.
1326
+ nrandom * 4
1327
+ )
1328
+ # Re-use input temp table. We don't need its content anymore
1329
+ random_dids_temp_table = input_dids_temp_table
1330
+ session.execute(delete(random_dids_temp_table))
1331
+ session.execute(insert(random_dids_temp_table).from_select(['scope', 'name'], stmt))
1332
+
1333
+ # Fetch all replicas for randomly selected dids and apply filters on python side
1334
+ stmt = _list_replicas_for_collection_files_stmt(random_dids_temp_table, replicas_subquery)
1335
+ stmt = stmt.order_by('scope', 'name')
1336
+ replica_tuples = session.execute(stmt)
1337
+ random_replicas = list(
1338
+ _pick_n_random(
1339
+ nrandom,
1340
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain,
1341
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1342
+ )
1343
+ )
1344
+ if len(random_replicas) == nrandom:
1345
+ yield from random_replicas
1346
+ return
1347
+ else:
1348
+ # Our opportunistic attempt to pick nrandom replicas without fetching all database rows failed,
1349
+ # continue with the normal list_replicas flow and fetch all replicas
1350
+ pass
1351
+
1352
+ if len(replica_sources) == 1:
1353
+ stmt = replica_sources[0].order_by('scope', 'name')
1354
+ replica_tuples = session.execute(stmt)
1355
+ else:
1356
+ if session.bind.dialect.name == 'mysql':
1357
+ # On mysql, perform both queries independently and merge their result in python.
1358
+ # The union query fails with "Can't reopen table"
1359
+ replica_tuples = heapq.merge(
1360
+ *[session.execute(stmt.order_by('scope', 'name')) for stmt in replica_sources],
1361
+ key=lambda t: (t[0], t[1]), # sort by scope, name
1362
+ )
1363
+ else:
1364
+ stmt = union(*replica_sources).order_by('scope', 'name')
1365
+ replica_tuples = session.execute(stmt)
1366
+
1367
+ yield from _pick_n_random(
1368
+ nrandom,
1369
+ _list_replicas(replica_tuples, pfns, schemes, [], client_location, domain,
1370
+ sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
1371
+ )
1372
+
1373
+
1374
+ @transactional_session
1375
+ def __bulk_add_new_file_dids(files, account, dataset_meta=None, *, session: "Session"):
1376
+ """
1377
+ Bulk add new dids.
1378
+
1379
+ :param dids: the list of new files.
1380
+ :param account: The account owner.
1381
+ :param session: The database session in use.
1382
+ :returns: True is successful.
1383
+ """
1384
+ for file in files:
1385
+ new_did = models.DataIdentifier(scope=file['scope'], name=file['name'],
1386
+ account=file.get('account') or account,
1387
+ did_type=DIDType.FILE, bytes=file['bytes'],
1388
+ md5=file.get('md5'), adler32=file.get('adler32'),
1389
+ is_new=None)
1390
+ new_did.save(session=session, flush=False)
1391
+
1392
+ if 'meta' in file and file['meta']:
1393
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=file['meta'], recursive=False, session=session)
1394
+ if dataset_meta:
1395
+ rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=dataset_meta, recursive=False, session=session)
1396
+ try:
1397
+ session.flush()
1398
+ except IntegrityError as error:
1399
+ if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1400
+ or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
1401
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
1402
+ or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
1403
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint "DIDS_SCOPE_FK".*', error.args[0]) \
1404
+ or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
1405
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
1406
+ raise exception.ScopeNotFound('Scope not found!')
1407
+
1408
+ raise exception.RucioException(error.args)
1409
+ except DatabaseError as error:
1410
+ if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
1411
+ raise exception.ScopeNotFound('Scope not found!')
1412
+
1413
+ raise exception.RucioException(error.args)
1414
+ except FlushError as error:
1415
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
1416
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
1417
+ raise exception.RucioException(error.args)
1418
+ return True
1419
+
1420
+
1421
+ @transactional_session
1422
+ def __bulk_add_file_dids(files, account, dataset_meta=None, *, session: "Session"):
1423
+ """
1424
+ Bulk add new dids.
1425
+
1426
+ :param dids: the list of files.
1427
+ :param account: The account owner.
1428
+ :param session: The database session in use.
1429
+ :returns: True is successful.
1430
+ """
1431
+ condition = []
1432
+ for f in files:
1433
+ condition.append(and_(models.DataIdentifier.scope == f['scope'], models.DataIdentifier.name == f['name'], models.DataIdentifier.did_type == DIDType.FILE))
1434
+
1435
+ q = session.query(models.DataIdentifier.scope,
1436
+ models.DataIdentifier.name,
1437
+ models.DataIdentifier.bytes,
1438
+ models.DataIdentifier.adler32,
1439
+ models.DataIdentifier.md5).with_hint(models.DataIdentifier, "INDEX(dids DIDS_PK)", 'oracle').filter(or_(*condition))
1440
+ available_files = [dict([(column, getattr(row, column)) for column in row._fields]) for row in q]
1441
+ new_files = list()
1442
+ for file in files:
1443
+ found = False
1444
+ for available_file in available_files:
1445
+ if file['scope'] == available_file['scope'] and file['name'] == available_file['name']:
1446
+ found = True
1447
+ break
1448
+ if not found:
1449
+ new_files.append(file)
1450
+ __bulk_add_new_file_dids(files=new_files, account=account,
1451
+ dataset_meta=dataset_meta,
1452
+ session=session)
1453
+ return new_files + available_files
1454
+
1455
+
1456
+ def tombstone_from_delay(tombstone_delay):
1457
+ # Tolerate None for tombstone_delay
1458
+ if not tombstone_delay:
1459
+ return None
1460
+
1461
+ tombstone_delay = timedelta(seconds=int(tombstone_delay))
1462
+
1463
+ if not tombstone_delay:
1464
+ return None
1465
+
1466
+ if tombstone_delay < timedelta(0):
1467
+ return datetime(1970, 1, 1)
1468
+
1469
+ return datetime.utcnow() + tombstone_delay
1470
+
1471
+
1472
+ @transactional_session
1473
+ def __bulk_add_replicas(rse_id, files, account, *, session: "Session"):
1474
+ """
1475
+ Bulk add new dids.
1476
+
1477
+ :param rse_id: the RSE id.
1478
+ :param dids: the list of files.
1479
+ :param account: The account owner.
1480
+ :param session: The database session in use.
1481
+ :returns: True is successful.
1482
+ """
1483
+ nbfiles, bytes_ = 0, 0
1484
+ # Check for the replicas already available
1485
+ condition = []
1486
+ for f in files:
1487
+ condition.append(and_(models.RSEFileAssociation.scope == f['scope'], models.RSEFileAssociation.name == f['name'], models.RSEFileAssociation.rse_id == rse_id))
1488
+
1489
+ query = session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id).\
1490
+ with_hint(models.RSEFileAssociation, text="INDEX(REPLICAS REPLICAS_PK)", dialect_name='oracle').\
1491
+ filter(or_(*condition))
1492
+ available_replicas = [dict([(column, getattr(row, column)) for column in row._fields]) for row in query]
1493
+
1494
+ default_tombstone_delay = get_rse_attribute(rse_id, 'tombstone_delay', session=session)
1495
+ default_tombstone = tombstone_from_delay(default_tombstone_delay)
1496
+
1497
+ new_replicas = []
1498
+ for file in files:
1499
+ found = False
1500
+ for available_replica in available_replicas:
1501
+ if file['scope'] == available_replica['scope'] and file['name'] == available_replica['name'] and rse_id == available_replica['rse_id']:
1502
+ found = True
1503
+ break
1504
+ if not found:
1505
+ nbfiles += 1
1506
+ bytes_ += file['bytes']
1507
+ new_replicas.append({'rse_id': rse_id, 'scope': file['scope'],
1508
+ 'name': file['name'], 'bytes': file['bytes'],
1509
+ 'path': file.get('path'),
1510
+ 'state': ReplicaState(file.get('state', 'A')),
1511
+ 'md5': file.get('md5'), 'adler32': file.get('adler32'),
1512
+ 'lock_cnt': file.get('lock_cnt', 0),
1513
+ 'tombstone': file.get('tombstone') or default_tombstone})
1514
+ try:
1515
+ new_replicas and session.execute(insert(models.RSEFileAssociation), new_replicas)
1516
+ session.flush()
1517
+ return nbfiles, bytes_
1518
+ except IntegrityError as error:
1519
+ if match('.*IntegrityError.*ORA-00001: unique constraint .*REPLICAS_PK.*violated.*', error.args[0]) \
1520
+ or match('.*IntegrityError.*1062.*Duplicate entry.*', error.args[0]) \
1521
+ or match('.*IntegrityError.*columns? rse_id.*scope.*name.*not unique.*', error.args[0]) \
1522
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]):
1523
+ raise exception.Duplicate("File replica already exists!")
1524
+ raise exception.RucioException(error.args)
1525
+ except DatabaseError as error:
1526
+ raise exception.RucioException(error.args)
1527
+
1528
+
1529
+ @transactional_session
1530
+ def add_replicas(rse_id, files, account, ignore_availability=True,
1531
+ dataset_meta=None, *, session: "Session"):
1532
+ """
1533
+ Bulk add file replicas.
1534
+
1535
+ :param rse_id: The RSE id.
1536
+ :param files: The list of files.
1537
+ :param account: The account owner.
1538
+ :param ignore_availability: Ignore the RSE blocklisting.
1539
+ :param session: The database session in use.
1540
+
1541
+ :returns: True is successful.
1542
+ """
1543
+
1544
+ def _expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=None):
1545
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme, domain=domain, protocol_attr=protocol_attr)
1546
+ expected_pfns = p.lfns2pfns(lfns)
1547
+ return clean_surls(expected_pfns.values())
1548
+
1549
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1550
+
1551
+ if replica_rse['volatile'] is True:
1552
+ raise exception.UnsupportedOperation('Cannot add replicas on volatile RSE %s ' % (replica_rse['rse']))
1553
+
1554
+ if not replica_rse['availability_write'] and not ignore_availability:
1555
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable for writing' % replica_rse['rse'])
1556
+
1557
+ for file in files:
1558
+ if 'pfn' not in file:
1559
+ if not replica_rse['deterministic']:
1560
+ raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %s ' % (replica_rse['rse']))
1561
+
1562
+ replicas = __bulk_add_file_dids(files=files, account=account,
1563
+ dataset_meta=dataset_meta,
1564
+ session=session)
1565
+
1566
+ pfns = {} # dict[str, list[str]], {scheme: [pfns], scheme: [pfns]}
1567
+ for file in files:
1568
+ if 'pfn' in file:
1569
+ scheme = file['pfn'].split(':')[0]
1570
+ pfns.setdefault(scheme, []).append(file['pfn'])
1571
+
1572
+ if pfns:
1573
+ rse_settings = rsemgr.get_rse_info(rse_id=rse_id, session=session)
1574
+ for scheme in pfns.keys():
1575
+ if not replica_rse['deterministic']:
1576
+ p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme)
1577
+ pfns[scheme] = p.parse_pfns(pfns=pfns[scheme])
1578
+ for file in files:
1579
+ if file['pfn'].startswith(scheme):
1580
+ tmp = pfns[scheme][file['pfn']]
1581
+ file['path'] = ''.join([tmp['path'], tmp['name']])
1582
+ else:
1583
+ # Check that the pfns match to the expected pfns
1584
+ lfns = [{'scope': i['scope'].external, 'name': i['name']} for i in files if i['pfn'].startswith(scheme)]
1585
+ pfns[scheme] = clean_surls(pfns[scheme])
1586
+
1587
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='wan'):
1588
+ pfns[scheme] = list(set(pfns[scheme]) - set(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=protocol_attr)))
1589
+
1590
+ if len(pfns[scheme]) > 0:
1591
+ for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='lan'):
1592
+ pfns[scheme] = list(set(pfns[scheme]) - set(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='lan', protocol_attr=protocol_attr)))
1593
+
1594
+ if len(pfns[scheme]) > 0:
1595
+ # pfns not found in wan or lan
1596
+ raise exception.InvalidPath('One of the PFNs provided does not match the Rucio expected PFN : %s (%s)' % (str(pfns[scheme]), str(lfns)))
1597
+
1598
+ nbfiles, bytes_ = __bulk_add_replicas(rse_id=rse_id, files=files, account=account, session=session)
1599
+ increase(rse_id=rse_id, files=nbfiles, bytes_=bytes_, session=session)
1600
+ return replicas
1601
+
1602
+
1603
+ @transactional_session
1604
+ def add_replica(rse_id, scope, name, bytes_, account, adler32=None, md5=None, dsn=None, pfn=None, meta=None, rules=[], tombstone=None, *, session: "Session"):
1605
+ """
1606
+ Add File replica.
1607
+
1608
+ :param rse_id: the rse id.
1609
+ :param scope: the scope name.
1610
+ :param name: The data identifier name.
1611
+ :param bytes_: the size of the file.
1612
+ :param account: The account owner.
1613
+ :param md5: The md5 checksum.
1614
+ :param adler32: The adler32 checksum.
1615
+ :param pfn: Physical file name (for nondeterministic rse).
1616
+ :param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary.
1617
+ :param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
1618
+ :param tombstone: If True, create replica with a tombstone.
1619
+ :param session: The database session in use.
1620
+
1621
+ :returns: True is successful.
1622
+ """
1623
+ if meta is None:
1624
+ meta = {}
1625
+
1626
+ file = {'scope': scope, 'name': name, 'bytes': bytes_, 'adler32': adler32, 'md5': md5, 'meta': meta, 'rules': rules, 'tombstone': tombstone}
1627
+ if pfn:
1628
+ file['pfn'] = pfn
1629
+ return add_replicas(rse_id=rse_id, files=[file, ], account=account, session=session)
1630
+
1631
+
1632
+ @METRICS.time_it
1633
+ @transactional_session
1634
+ def delete_replicas(rse_id, files, ignore_availability=True, *, session: "Session"):
1635
+ """
1636
+ Delete file replicas.
1637
+
1638
+ :param rse_id: the rse id.
1639
+ :param files: the list of files to delete.
1640
+ :param ignore_availability: Ignore the RSE blocklisting.
1641
+ :param session: The database session in use.
1642
+ """
1643
+ if not files:
1644
+ return
1645
+
1646
+ replica_rse = get_rse(rse_id=rse_id, session=session)
1647
+
1648
+ if not replica_rse['availability_delete'] and not ignore_availability:
1649
+ raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable'
1650
+ 'for deleting' % replica_rse['rse'])
1651
+ tt_mngr = temp_table_mngr(session)
1652
+ scope_name_temp_table = tt_mngr.create_scope_name_table()
1653
+ scope_name_temp_table2 = tt_mngr.create_scope_name_table()
1654
+ association_temp_table = tt_mngr.create_association_table()
1655
+
1656
+ session.execute(insert(scope_name_temp_table), [{'scope': file['scope'], 'name': file['name']} for file in files])
1657
+
1658
+ # WARNING : This should not be necessary since that would mean the replica is used as a source.
1659
+ stmt = delete(
1660
+ models.Source,
1661
+ ).where(
1662
+ exists(select(1)
1663
+ .where(and_(models.Source.scope == scope_name_temp_table.scope,
1664
+ models.Source.name == scope_name_temp_table.name,
1665
+ models.Source.rse_id == rse_id)))
1666
+ ).execution_options(
1667
+ synchronize_session=False
1668
+ )
1669
+ session.execute(stmt)
1670
+
1671
+ stmt = select(
1672
+ func.count(),
1673
+ func.sum(models.RSEFileAssociation.bytes),
1674
+ ).join_from(
1675
+ scope_name_temp_table,
1676
+ models.RSEFileAssociation,
1677
+ and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1678
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1679
+ models.RSEFileAssociation.rse_id == rse_id)
1680
+ )
1681
+ delta, bytes_ = session.execute(stmt).one()
1682
+
1683
+ # Delete replicas
1684
+ stmt = delete(
1685
+ models.RSEFileAssociation,
1686
+ ).where(
1687
+ exists(select(1)
1688
+ .where(and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
1689
+ models.RSEFileAssociation.name == scope_name_temp_table.name,
1690
+ models.RSEFileAssociation.rse_id == rse_id)))
1691
+ ).execution_options(
1692
+ synchronize_session=False
1693
+ )
1694
+ res = session.execute(stmt)
1695
+ if res.rowcount != len(files):
1696
+ raise exception.ReplicaNotFound("One or several replicas don't exist.")
1697
+
1698
+ # Update bad replicas
1699
+ stmt = update(
1700
+ models.BadReplicas,
1701
+ ).where(
1702
+ exists(select(1)
1703
+ .where(and_(models.BadReplicas.scope == scope_name_temp_table.scope,
1704
+ models.BadReplicas.name == scope_name_temp_table.name,
1705
+ models.BadReplicas.rse_id == rse_id)))
1706
+ ).where(
1707
+ models.BadReplicas.state == BadFilesStatus.BAD
1708
+ ).execution_options(
1709
+ synchronize_session=False
1710
+ ).values(
1711
+ state=BadFilesStatus.DELETED,
1712
+ updated_at=datetime.utcnow()
1713
+ )
1714
+ res = session.execute(stmt)
1715
+
1716
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
1717
+ scope_name_temp_table2=scope_name_temp_table2,
1718
+ association_temp_table=association_temp_table,
1719
+ rse_id=rse_id, files=files, session=session)
1720
+
1721
+ # Decrease RSE counter
1722
+ decrease(rse_id=rse_id, files=delta, bytes_=bytes_, session=session)
1723
+
1724
+
1725
+ @transactional_session
1726
+ def __cleanup_after_replica_deletion(scope_name_temp_table, scope_name_temp_table2, association_temp_table, rse_id, files, *, session: "Session"):
1727
+ """
1728
+ Perform update of collections/archive associations/dids after the removal of their replicas
1729
+ :param rse_id: the rse id
1730
+ :param files: list of files whose replica got deleted
1731
+ :param session: The database session in use.
1732
+ """
1733
+ clt_to_update, parents_to_analyze, affected_archives, clt_replicas_to_delete = set(), set(), set(), set()
1734
+ did_condition = []
1735
+ incomplete_dids, messages, clt_to_set_not_archive = [], [], []
1736
+ for file in files:
1737
+
1738
+ # Schedule update of all collections containing this file and having a collection replica in the RSE
1739
+ clt_to_update.add(ScopeName(scope=file['scope'], name=file['name']))
1740
+
1741
+ # If the file doesn't have any replicas anymore, we should perform cleanups of objects
1742
+ # related to this file. However, if the file is "lost", it's removal wasn't intentional,
1743
+ # so we want to skip deleting the metadata here. Perform cleanups:
1744
+
1745
+ # 1) schedule removal of this file from all parent datasets
1746
+ parents_to_analyze.add(ScopeName(scope=file['scope'], name=file['name']))
1747
+
1748
+ # 2) schedule removal of this file from the DID table
1749
+ did_condition.append(
1750
+ and_(models.DataIdentifier.scope == file['scope'],
1751
+ models.DataIdentifier.name == file['name'],
1752
+ models.DataIdentifier.availability != DIDAvailability.LOST,
1753
+ ~exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')).where(
1754
+ and_(models.RSEFileAssociation.scope == file['scope'],
1755
+ models.RSEFileAssociation.name == file['name'])),
1756
+ ~exists(select(1).prefix_with("/*+ INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK) */", dialect='oracle')).where(
1757
+ and_(models.ConstituentAssociation.child_scope == file['scope'],
1758
+ models.ConstituentAssociation.child_name == file['name']))))
1759
+
1760
+ # 3) if the file is an archive, schedule cleanup on the files from inside the archive
1761
+ affected_archives.add(ScopeName(scope=file['scope'], name=file['name']))
1762
+
1763
+ if clt_to_update:
1764
+ # Get all collection_replicas at RSE, insert them into UpdatedCollectionReplica
1765
+ session.query(scope_name_temp_table).delete()
1766
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in clt_to_update])
1767
+ stmt = select(
1768
+ models.DataIdentifierAssociation.scope,
1769
+ models.DataIdentifierAssociation.name,
1770
+ ).distinct(
1771
+ ).join_from(
1772
+ scope_name_temp_table,
1773
+ models.DataIdentifierAssociation,
1774
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
1775
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
1776
+ ).join(
1777
+ models.CollectionReplica,
1778
+ and_(models.CollectionReplica.scope == models.DataIdentifierAssociation.scope,
1779
+ models.CollectionReplica.name == models.DataIdentifierAssociation.name,
1780
+ models.CollectionReplica.rse_id == rse_id)
1781
+ )
1782
+ for parent_scope, parent_name in session.execute(stmt):
1783
+ models.UpdatedCollectionReplica(scope=parent_scope,
1784
+ name=parent_name,
1785
+ did_type=DIDType.DATASET,
1786
+ rse_id=rse_id). \
1787
+ save(session=session, flush=False)
1788
+
1789
+ # Delete did from the content for the last did
1790
+ while parents_to_analyze:
1791
+ did_associations_to_remove = set()
1792
+
1793
+ session.query(scope_name_temp_table).delete()
1794
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in parents_to_analyze])
1795
+ parents_to_analyze.clear()
1796
+
1797
+ stmt = select(
1798
+ models.DataIdentifierAssociation.scope,
1799
+ models.DataIdentifierAssociation.name,
1800
+ models.DataIdentifierAssociation.did_type,
1801
+ models.DataIdentifierAssociation.child_scope,
1802
+ models.DataIdentifierAssociation.child_name,
1803
+ ).distinct(
1804
+ ).join_from(
1805
+ scope_name_temp_table,
1806
+ models.DataIdentifierAssociation,
1807
+ and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
1808
+ scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
1809
+ ).outerjoin(
1810
+ models.DataIdentifier,
1811
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
1812
+ models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
1813
+ models.DataIdentifier.name == models.DataIdentifierAssociation.child_name)
1814
+ ).where(
1815
+ models.DataIdentifier.scope == null()
1816
+ ).outerjoin(
1817
+ models.RSEFileAssociation,
1818
+ and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
1819
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name)
1820
+ ).where(
1821
+ models.RSEFileAssociation.scope == null()
1822
+ ).outerjoin(
1823
+ models.ConstituentAssociation,
1824
+ and_(models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
1825
+ models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name)
1826
+ ).where(
1827
+ models.ConstituentAssociation.child_scope == null()
1828
+ )
1829
+
1830
+ clt_to_set_not_archive.append(set())
1831
+ for parent_scope, parent_name, did_type, child_scope, child_name in session.execute(stmt):
1832
+
1833
+ # Schedule removal of child file/dataset/container from the parent dataset/container
1834
+ did_associations_to_remove.add(Association(scope=parent_scope, name=parent_name,
1835
+ child_scope=child_scope, child_name=child_name))
1836
+
1837
+ # Schedule setting is_archive = False on parents which don't have any children with is_archive == True anymore
1838
+ clt_to_set_not_archive[-1].add(ScopeName(scope=parent_scope, name=parent_name))
1839
+
1840
+ # If the parent dataset/container becomes empty as a result of the child removal
1841
+ # (it was the last children), metadata cleanup has to be done:
1842
+ #
1843
+ # 1) Schedule to remove the replicas of this empty collection
1844
+ clt_replicas_to_delete.add(ScopeName(scope=parent_scope, name=parent_name))
1845
+
1846
+ # 2) Schedule removal of this empty collection from its own parent collections
1847
+ parents_to_analyze.add(ScopeName(scope=parent_scope, name=parent_name))
1848
+
1849
+ # 3) Schedule removal of the entry from the DIDs table
1850
+ remove_open_did = config_get_bool('reaper', 'remove_open_did', default=False, session=session)
1851
+ if remove_open_did:
1852
+ did_condition.append(
1853
+ and_(models.DataIdentifier.scope == parent_scope,
1854
+ models.DataIdentifier.name == parent_name,
1855
+ ~exists(1).where(
1856
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
1857
+ models.DataIdentifierAssociation.child_name == parent_name)),
1858
+ ~exists(1).where(
1859
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
1860
+ models.DataIdentifierAssociation.name == parent_name))))
1861
+ else:
1862
+ did_condition.append(
1863
+ and_(models.DataIdentifier.scope == parent_scope,
1864
+ models.DataIdentifier.name == parent_name,
1865
+ models.DataIdentifier.is_open == False, # NOQA
1866
+ ~exists(1).where(
1867
+ and_(models.DataIdentifierAssociation.child_scope == parent_scope,
1868
+ models.DataIdentifierAssociation.child_name == parent_name)),
1869
+ ~exists(1).where(
1870
+ and_(models.DataIdentifierAssociation.scope == parent_scope,
1871
+ models.DataIdentifierAssociation.name == parent_name))))
1872
+
1873
+ if did_associations_to_remove:
1874
+ session.query(association_temp_table).delete()
1875
+ session.execute(insert(association_temp_table), [a._asdict() for a in did_associations_to_remove])
1876
+
1877
+ # get the list of modified parent scope, name
1878
+ stmt = select(
1879
+ models.DataIdentifier.scope,
1880
+ models.DataIdentifier.name,
1881
+ models.DataIdentifier.did_type,
1882
+ ).distinct(
1883
+ ).join_from(
1884
+ association_temp_table,
1885
+ models.DataIdentifier,
1886
+ and_(association_temp_table.scope == models.DataIdentifier.scope,
1887
+ association_temp_table.name == models.DataIdentifier.name)
1888
+ ).where(
1889
+ or_(models.DataIdentifier.complete == true(),
1890
+ models.DataIdentifier.complete is None),
1891
+ )
1892
+ for parent_scope, parent_name, parent_did_type in session.execute(stmt):
1893
+ message = {'scope': parent_scope,
1894
+ 'name': parent_name,
1895
+ 'did_type': parent_did_type,
1896
+ 'event_type': 'INCOMPLETE'}
1897
+ if message not in messages:
1898
+ messages.append(message)
1899
+ incomplete_dids.append(ScopeName(scope=parent_scope, name=parent_name))
1900
+
1901
+ content_to_delete_filter = exists(select(1)
1902
+ .where(and_(association_temp_table.scope == models.DataIdentifierAssociation.scope,
1903
+ association_temp_table.name == models.DataIdentifierAssociation.name,
1904
+ association_temp_table.child_scope == models.DataIdentifierAssociation.child_scope,
1905
+ association_temp_table.child_name == models.DataIdentifierAssociation.child_name)))
1906
+
1907
+ rucio.core.did.insert_content_history(filter_=content_to_delete_filter, did_created_at=None, session=session)
1908
+
1909
+ stmt = delete(
1910
+ models.DataIdentifierAssociation
1911
+ ).where(
1912
+ content_to_delete_filter,
1913
+ ).execution_options(
1914
+ synchronize_session=False
1915
+ )
1916
+ session.execute(stmt)
1917
+
1918
+ # Get collection replicas of collections which became empty
1919
+ if clt_replicas_to_delete:
1920
+ session.query(scope_name_temp_table).delete()
1921
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in clt_replicas_to_delete])
1922
+ session.query(scope_name_temp_table2).delete()
1923
+ stmt = select(
1924
+ models.CollectionReplica.scope,
1925
+ models.CollectionReplica.name,
1926
+ ).distinct(
1927
+ ).join_from(
1928
+ scope_name_temp_table,
1929
+ models.CollectionReplica,
1930
+ and_(scope_name_temp_table.scope == models.CollectionReplica.scope,
1931
+ scope_name_temp_table.name == models.CollectionReplica.name),
1932
+ ).join(
1933
+ models.DataIdentifier,
1934
+ and_(models.DataIdentifier.scope == models.CollectionReplica.scope,
1935
+ models.DataIdentifier.name == models.CollectionReplica.name)
1936
+ ).outerjoin(
1937
+ models.DataIdentifierAssociation,
1938
+ and_(models.DataIdentifierAssociation.scope == models.CollectionReplica.scope,
1939
+ models.DataIdentifierAssociation.name == models.CollectionReplica.name)
1940
+ ).where(
1941
+ models.DataIdentifierAssociation.scope == null()
1942
+ )
1943
+ session.execute(
1944
+ insert(scope_name_temp_table2).from_select(['scope', 'name'], stmt)
1945
+ )
1946
+ # Delete the retrieved collection replicas of empty collections
1947
+ stmt = delete(
1948
+ models.CollectionReplica,
1949
+ ).where(
1950
+ exists(select(1)
1951
+ .where(and_(models.CollectionReplica.scope == scope_name_temp_table2.scope,
1952
+ models.CollectionReplica.name == scope_name_temp_table2.name)))
1953
+ ).execution_options(
1954
+ synchronize_session=False
1955
+ )
1956
+ session.execute(stmt)
1957
+
1958
+ # Update incomplete state
1959
+ messages, dids_to_delete = [], set()
1960
+ if incomplete_dids:
1961
+ session.query(scope_name_temp_table).delete()
1962
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in incomplete_dids])
1963
+ stmt = update(
1964
+ models.DataIdentifier
1965
+ ).where(
1966
+ exists(select(1)
1967
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
1968
+ models.DataIdentifier.name == scope_name_temp_table.name)))
1969
+ ).where(
1970
+ models.DataIdentifier.complete != false(),
1971
+ ).execution_options(
1972
+ synchronize_session=False
1973
+ ).values(
1974
+ complete=False
1975
+ )
1976
+ session.execute(stmt)
1977
+
1978
+ # delete empty dids
1979
+ if did_condition:
1980
+ for chunk in chunks(did_condition, 10):
1981
+ query = session.query(models.DataIdentifier.scope,
1982
+ models.DataIdentifier.name,
1983
+ models.DataIdentifier.did_type). \
1984
+ with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'). \
1985
+ filter(or_(*chunk))
1986
+ for scope, name, did_type in query:
1987
+ if did_type == DIDType.DATASET:
1988
+ messages.append({'event_type': 'ERASE',
1989
+ 'payload': dumps({'scope': scope.external,
1990
+ 'name': name,
1991
+ 'account': 'root'})})
1992
+ dids_to_delete.add(ScopeName(scope=scope, name=name))
1993
+
1994
+ # Remove Archive Constituents
1995
+ constituent_associations_to_delete = set()
1996
+ if affected_archives:
1997
+ session.query(scope_name_temp_table).delete()
1998
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in affected_archives])
1999
+
2000
+ stmt = select(
2001
+ models.ConstituentAssociation
2002
+ ).distinct(
2003
+ ).join_from(
2004
+ scope_name_temp_table,
2005
+ models.ConstituentAssociation,
2006
+ and_(scope_name_temp_table.scope == models.ConstituentAssociation.scope,
2007
+ scope_name_temp_table.name == models.ConstituentAssociation.name),
2008
+ ).outerjoin(
2009
+ models.DataIdentifier,
2010
+ and_(models.DataIdentifier.availability == DIDAvailability.LOST,
2011
+ models.DataIdentifier.scope == models.ConstituentAssociation.scope,
2012
+ models.DataIdentifier.name == models.ConstituentAssociation.name)
2013
+ ).where(
2014
+ models.DataIdentifier.scope == null()
2015
+ ).outerjoin(
2016
+ models.RSEFileAssociation,
2017
+ and_(models.RSEFileAssociation.scope == models.ConstituentAssociation.scope,
2018
+ models.RSEFileAssociation.name == models.ConstituentAssociation.name)
2019
+ ).where(
2020
+ models.RSEFileAssociation.scope == null()
2021
+ )
2022
+
2023
+ for constituent in session.execute(stmt).scalars().all():
2024
+ constituent_associations_to_delete.add(Association(scope=constituent.scope, name=constituent.name,
2025
+ child_scope=constituent.child_scope, child_name=constituent.child_name))
2026
+ models.ConstituentAssociationHistory(
2027
+ child_scope=constituent.child_scope,
2028
+ child_name=constituent.child_name,
2029
+ scope=constituent.scope,
2030
+ name=constituent.name,
2031
+ bytes=constituent.bytes,
2032
+ adler32=constituent.adler32,
2033
+ md5=constituent.md5,
2034
+ guid=constituent.guid,
2035
+ length=constituent.length,
2036
+ updated_at=constituent.updated_at,
2037
+ created_at=constituent.created_at,
2038
+ ).save(session=session, flush=False)
2039
+
2040
+ if constituent_associations_to_delete:
2041
+ session.query(association_temp_table).delete()
2042
+ session.execute(insert(association_temp_table), [a._asdict() for a in constituent_associations_to_delete])
2043
+ stmt = delete(
2044
+ models.ConstituentAssociation
2045
+ ).where(
2046
+ exists(select(1)
2047
+ .where(and_(association_temp_table.scope == models.ConstituentAssociation.scope,
2048
+ association_temp_table.name == models.ConstituentAssociation.name,
2049
+ association_temp_table.child_scope == models.ConstituentAssociation.child_scope,
2050
+ association_temp_table.child_name == models.ConstituentAssociation.child_name)))
2051
+ ).execution_options(
2052
+ synchronize_session=False
2053
+ )
2054
+ session.execute(stmt)
2055
+
2056
+ removed_constituents = {ScopeName(scope=c.child_scope, name=c.child_name) for c in constituent_associations_to_delete}
2057
+ for chunk in chunks(removed_constituents, 200):
2058
+ __cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
2059
+ scope_name_temp_table2=scope_name_temp_table2,
2060
+ association_temp_table=association_temp_table,
2061
+ rse_id=rse_id, files=[sn._asdict() for sn in chunk], session=session)
2062
+
2063
+ if dids_to_delete:
2064
+ session.query(scope_name_temp_table).delete()
2065
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in dids_to_delete])
2066
+
2067
+ # Remove rules in Waiting for approval or Suspended
2068
+ stmt = delete(
2069
+ models.ReplicationRule,
2070
+ ).where(
2071
+ exists(select(1)
2072
+ .where(and_(models.ReplicationRule.scope == scope_name_temp_table.scope,
2073
+ models.ReplicationRule.name == scope_name_temp_table.name)))
2074
+ ).where(
2075
+ models.ReplicationRule.state.in_((RuleState.SUSPENDED, RuleState.WAITING_APPROVAL))
2076
+ ).execution_options(
2077
+ synchronize_session=False
2078
+ )
2079
+ session.execute(stmt)
2080
+
2081
+ # Remove DID Metadata
2082
+ must_delete_did_meta = True
2083
+ if session.bind.dialect.name == 'oracle':
2084
+ oracle_version = int(session.connection().connection.version.split('.')[0])
2085
+ if oracle_version < 12:
2086
+ must_delete_did_meta = False
2087
+ if must_delete_did_meta:
2088
+ stmt = delete(
2089
+ models.DidMeta,
2090
+ ).where(
2091
+ exists(select(1)
2092
+ .where(and_(models.DidMeta.scope == scope_name_temp_table.scope,
2093
+ models.DidMeta.name == scope_name_temp_table.name)))
2094
+ ).execution_options(
2095
+ synchronize_session=False
2096
+ )
2097
+ session.execute(stmt)
2098
+
2099
+ for chunk in chunks(messages, 100):
2100
+ add_messages(chunk, session=session)
2101
+
2102
+ # Delete dids
2103
+ dids_to_delete_filter = exists(select(1)
2104
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
2105
+ models.DataIdentifier.name == scope_name_temp_table.name)))
2106
+ archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
2107
+ if archive_dids:
2108
+ rucio.core.did.insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
2109
+ stmt = delete(
2110
+ models.DataIdentifier,
2111
+ ).where(
2112
+ dids_to_delete_filter,
2113
+ ).execution_options(
2114
+ synchronize_session=False
2115
+ )
2116
+ session.execute(stmt)
2117
+
2118
+ # Set is_archive = false on collections which don't have archive children anymore
2119
+ while clt_to_set_not_archive:
2120
+ to_update = clt_to_set_not_archive.pop(0)
2121
+ if not to_update:
2122
+ continue
2123
+ session.query(scope_name_temp_table).delete()
2124
+ session.execute(insert(scope_name_temp_table), [sn._asdict() for sn in to_update])
2125
+ session.query(scope_name_temp_table2).delete()
2126
+
2127
+ data_identifier_alias = aliased(models.DataIdentifier, name='did_alias')
2128
+ # Fetch rows to be updated
2129
+ stmt = select(
2130
+ models.DataIdentifier.scope,
2131
+ models.DataIdentifier.name,
2132
+ ).distinct(
2133
+ ).where(
2134
+ models.DataIdentifier.is_archive == true()
2135
+ ).join_from(
2136
+ scope_name_temp_table,
2137
+ models.DataIdentifier,
2138
+ and_(scope_name_temp_table.scope == models.DataIdentifier.scope,
2139
+ scope_name_temp_table.name == models.DataIdentifier.name)
2140
+ ).join(
2141
+ models.DataIdentifierAssociation,
2142
+ and_(models.DataIdentifier.scope == models.DataIdentifierAssociation.scope,
2143
+ models.DataIdentifier.name == models.DataIdentifierAssociation.name)
2144
+ ).outerjoin(
2145
+ data_identifier_alias,
2146
+ and_(data_identifier_alias.scope == models.DataIdentifierAssociation.child_scope,
2147
+ data_identifier_alias.name == models.DataIdentifierAssociation.child_name,
2148
+ data_identifier_alias.is_archive == true())
2149
+ ).where(
2150
+ data_identifier_alias.scope == null()
2151
+ )
2152
+ session.execute(insert(scope_name_temp_table2).from_select(['scope', 'name'], stmt))
2153
+ # update the fetched rows
2154
+ stmt = update(
2155
+ models.DataIdentifier,
2156
+ ).where(
2157
+ exists(select(1)
2158
+ .where(and_(models.DataIdentifier.scope == scope_name_temp_table2.scope,
2159
+ models.DataIdentifier.name == scope_name_temp_table2.name)))
2160
+ ).execution_options(
2161
+ synchronize_session=False
2162
+ ).values(
2163
+ is_archive=False,
2164
+ )
2165
+ session.execute(stmt)
2166
+
2167
+
2168
+ @transactional_session
2169
+ def get_replica(rse_id, scope, name, *, session: "Session"):
2170
+ """
2171
+ Get File replica.
2172
+
2173
+ :param rse_id: The RSE Id.
2174
+ :param scope: the scope name.
2175
+ :param name: The data identifier name.
2176
+ :param session: The database session in use.
2177
+
2178
+ :returns: A dictionary with the list of replica attributes.
2179
+ """
2180
+ try:
2181
+ row = session.query(models.RSEFileAssociation).filter_by(rse_id=rse_id, scope=scope, name=name).one()
2182
+ return row.to_dict()
2183
+ except NoResultFound:
2184
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
2185
+
2186
+
2187
+ @transactional_session
2188
+ def list_and_mark_unlocked_replicas(limit, bytes_=None, rse_id=None, delay_seconds=600, only_delete_obsolete=False, *, session: "Session"):
2189
+ """
2190
+ List RSE File replicas with no locks.
2191
+
2192
+ :param limit: Number of replicas returned.
2193
+ :param bytes_: The amount of needed bytes.
2194
+ :param rse_id: The rse_id.
2195
+ :param delay_seconds: The delay to query replicas in BEING_DELETED state
2196
+ :param only_delete_obsolete If set to True, will only return the replicas with EPOCH tombstone
2197
+ :param session: The database session in use.
2198
+
2199
+ :returns: a list of dictionary replica.
2200
+ """
2201
+
2202
+ needed_space = bytes_
2203
+ total_bytes = 0
2204
+ rows = []
2205
+
2206
+ temp_table_cls = temp_table_mngr(session).create_scope_name_table()
2207
+
2208
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
2209
+
2210
+ stmt = select(
2211
+ models.RSEFileAssociation.scope,
2212
+ models.RSEFileAssociation.name,
2213
+ ).where(
2214
+ models.RSEFileAssociation.lock_cnt == 0,
2215
+ models.RSEFileAssociation.rse_id == rse_id,
2216
+ models.RSEFileAssociation.tombstone == OBSOLETE if only_delete_obsolete else models.RSEFileAssociation.tombstone < datetime.utcnow(),
2217
+ ).where(
2218
+ or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE, ReplicaState.BAD)),
2219
+ and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))
2220
+ ).outerjoin(
2221
+ models.Source,
2222
+ and_(models.RSEFileAssociation.scope == models.Source.scope,
2223
+ models.RSEFileAssociation.name == models.Source.name,
2224
+ models.RSEFileAssociation.rse_id == models.Source.rse_id)
2225
+ ).where(
2226
+ models.Source.scope.is_(None) # Only try to delete replicas if they are not used as sources in any transfers
2227
+ ).order_by(
2228
+ models.RSEFileAssociation.tombstone,
2229
+ models.RSEFileAssociation.updated_at
2230
+ ).with_for_update(
2231
+ skip_locked=True,
2232
+ # oracle: we must specify a column, not a table; however, it doesn't matter which column, the lock is put on the whole row
2233
+ # postgresql/mysql: sqlalchemy driver automatically converts it to a table name
2234
+ # sqlite: this is completely ignored
2235
+ of=models.RSEFileAssociation.scope,
2236
+ )
2237
+
2238
+ for chunk in chunks(session.execute(stmt).yield_per(2 * limit), math.ceil(1.25 * limit)):
2239
+ session.query(temp_table_cls).delete()
2240
+ session.execute(insert(temp_table_cls), [{'scope': scope, 'name': name} for scope, name in chunk])
2241
+
2242
+ stmt = select(
2243
+ models.RSEFileAssociation.scope,
2244
+ models.RSEFileAssociation.name,
2245
+ models.RSEFileAssociation.path,
2246
+ models.RSEFileAssociation.bytes,
2247
+ models.RSEFileAssociation.tombstone,
2248
+ models.RSEFileAssociation.state,
2249
+ models.DataIdentifier.datatype,
2250
+ ).join_from(
2251
+ temp_table_cls,
2252
+ models.RSEFileAssociation,
2253
+ and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2254
+ models.RSEFileAssociation.name == temp_table_cls.name,
2255
+ models.RSEFileAssociation.rse_id == rse_id)
2256
+ ).with_hint(
2257
+ replicas_alias, "index(%(name)s REPLICAS_PK)", 'oracle'
2258
+ ).outerjoin(
2259
+ replicas_alias,
2260
+ and_(models.RSEFileAssociation.scope == replicas_alias.scope,
2261
+ models.RSEFileAssociation.name == replicas_alias.name,
2262
+ models.RSEFileAssociation.rse_id != replicas_alias.rse_id,
2263
+ replicas_alias.state == ReplicaState.AVAILABLE)
2264
+ ).with_hint(
2265
+ models.Request, "INDEX(requests REQUESTS_SCOPE_NAME_RSE_IDX)", 'oracle'
2266
+ ).outerjoin(
2267
+ models.Request,
2268
+ and_(models.RSEFileAssociation.scope == models.Request.scope,
2269
+ models.RSEFileAssociation.name == models.Request.name)
2270
+ ).join(
2271
+ models.DataIdentifier,
2272
+ and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
2273
+ models.RSEFileAssociation.name == models.DataIdentifier.name)
2274
+ ).group_by(
2275
+ models.RSEFileAssociation.scope,
2276
+ models.RSEFileAssociation.name,
2277
+ models.RSEFileAssociation.path,
2278
+ models.RSEFileAssociation.bytes,
2279
+ models.RSEFileAssociation.tombstone,
2280
+ models.RSEFileAssociation.state,
2281
+ models.RSEFileAssociation.updated_at,
2282
+ models.DataIdentifier.datatype
2283
+ ).having(
2284
+ case((func.count(replicas_alias.scope) > 0, True), # Can delete this replica if it's not the last replica
2285
+ (func.count(models.Request.scope) == 0, True), # If it's the last replica, only can delete if there are no requests using it
2286
+ else_=False).label("can_delete"),
2287
+ ).order_by(
2288
+ models.RSEFileAssociation.tombstone,
2289
+ models.RSEFileAssociation.updated_at
2290
+ ).limit(
2291
+ limit - len(rows)
2292
+ )
2293
+
2294
+ for scope, name, path, bytes_, tombstone, state, datatype in session.execute(stmt):
2295
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2296
+ break
2297
+ if state != ReplicaState.UNAVAILABLE:
2298
+ total_bytes += bytes_
2299
+
2300
+ rows.append({'scope': scope, 'name': name, 'path': path,
2301
+ 'bytes': bytes_, 'tombstone': tombstone,
2302
+ 'state': state, 'datatype': datatype})
2303
+ if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
2304
+ break
2305
+
2306
+ if rows:
2307
+ session.query(temp_table_cls).delete()
2308
+ session.execute(insert(temp_table_cls), [{'scope': r['scope'], 'name': r['name']} for r in rows])
2309
+ stmt = update(
2310
+ models.RSEFileAssociation
2311
+ ).where(
2312
+ exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')
2313
+ .where(and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
2314
+ models.RSEFileAssociation.name == temp_table_cls.name,
2315
+ models.RSEFileAssociation.rse_id == rse_id)))
2316
+ ).execution_options(
2317
+ synchronize_session=False
2318
+ ).values(
2319
+ updated_at=datetime.utcnow(),
2320
+ state=ReplicaState.BEING_DELETED,
2321
+ tombstone=OBSOLETE,
2322
+ )
2323
+ session.execute(stmt)
2324
+
2325
+ return rows
2326
+
2327
+
2328
+ @transactional_session
2329
+ def update_replicas_states(replicas, nowait=False, *, session: "Session"):
2330
+ """
2331
+ Update File replica information and state.
2332
+
2333
+ :param replicas: The list of replicas.
2334
+ :param nowait: Nowait parameter for the for_update queries.
2335
+ :param session: The database session in use.
2336
+ """
2337
+
2338
+ for replica in replicas:
2339
+ query = session.query(models.RSEFileAssociation).filter_by(rse_id=replica['rse_id'], scope=replica['scope'], name=replica['name'])
2340
+ try:
2341
+ if nowait:
2342
+ query.with_for_update(nowait=True).one()
2343
+ except NoResultFound:
2344
+ # remember scope, name and rse
2345
+ raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (replica['scope'], replica['name'], get_rse_name(replica['rse_id'], session=session)))
2346
+
2347
+ if isinstance(replica['state'], str):
2348
+ replica['state'] = ReplicaState(replica['state'])
2349
+
2350
+ values = {'state': replica['state']}
2351
+ if replica['state'] == ReplicaState.BEING_DELETED:
2352
+ query = query.filter_by(lock_cnt=0)
2353
+ # Exclude replicas use as sources
2354
+ stmt = exists(1).where(and_(models.RSEFileAssociation.scope == models.Source.scope,
2355
+ models.RSEFileAssociation.name == models.Source.name,
2356
+ models.RSEFileAssociation.rse_id == models.Source.rse_id))
2357
+ query = query.filter(not_(stmt))
2358
+ values['tombstone'] = OBSOLETE
2359
+ elif replica['state'] == ReplicaState.AVAILABLE:
2360
+ rucio.core.lock.successful_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], nowait=nowait, session=session)
2361
+ query1 = session.query(models.BadReplicas).filter_by(state=BadFilesStatus.BAD, rse_id=replica['rse_id'], scope=replica['scope'], name=replica['name'])
2362
+ if query1.count():
2363
+ query1.update({'state': BadFilesStatus.RECOVERED, 'updated_at': datetime.utcnow()}, synchronize_session=False)
2364
+ elif replica['state'] == ReplicaState.UNAVAILABLE:
2365
+ rucio.core.lock.failed_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'],
2366
+ error_message=replica.get('error_message', None),
2367
+ broken_rule_id=replica.get('broken_rule_id', None),
2368
+ broken_message=replica.get('broken_message', None),
2369
+ nowait=nowait, session=session)
2370
+ elif replica['state'] == ReplicaState.TEMPORARY_UNAVAILABLE:
2371
+ query = query.filter(or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE, models.RSEFileAssociation.state == ReplicaState.TEMPORARY_UNAVAILABLE))
2372
+
2373
+ if 'path' in replica and replica['path']:
2374
+ values['path'] = replica['path']
2375
+
2376
+ if not query.update(values, synchronize_session=False):
2377
+ if 'rse' not in replica:
2378
+ replica['rse'] = get_rse_name(rse_id=replica['rse_id'], session=session)
2379
+ raise exception.UnsupportedOperation('State %(state)s for replica %(scope)s:%(name)s on %(rse)s cannot be updated' % replica)
2380
+ return True
2381
+
2382
+
2383
+ @transactional_session
2384
+ def touch_replica(replica, *, session: "Session"):
2385
+ """
2386
+ Update the accessed_at timestamp of the given file replica/did but don't wait if row is locked.
2387
+
2388
+ :param replica: a dictionary with the information of the affected replica.
2389
+ :param session: The database session in use.
2390
+
2391
+ :returns: True, if successful, False otherwise.
2392
+ """
2393
+ try:
2394
+ accessed_at, none_value = replica.get('accessed_at') or datetime.utcnow(), None
2395
+
2396
+ session.query(models.RSEFileAssociation).\
2397
+ filter_by(rse_id=replica['rse_id'], scope=replica['scope'], name=replica['name']).\
2398
+ with_hint(models.RSEFileAssociation, "index(REPLICAS REPLICAS_PK)", 'oracle').\
2399
+ with_for_update(nowait=True).one()
2400
+
2401
+ stmt = update(models.RSEFileAssociation).\
2402
+ filter_by(rse_id=replica['rse_id'], scope=replica['scope'], name=replica['name']).\
2403
+ prefix_with("/*+ index(REPLICAS REPLICAS_PK) */", dialect='oracle').\
2404
+ execution_options(synchronize_session=False).\
2405
+ values(accessed_at=accessed_at,
2406
+ tombstone=case((and_(models.RSEFileAssociation.tombstone != none_value,
2407
+ models.RSEFileAssociation.tombstone != OBSOLETE),
2408
+ accessed_at),
2409
+ else_=models.RSEFileAssociation.tombstone))
2410
+ session.execute(stmt)
2411
+
2412
+ session.query(models.DataIdentifier).\
2413
+ filter_by(scope=replica['scope'], name=replica['name'], did_type=DIDType.FILE).\
2414
+ with_hint(models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle').\
2415
+ with_for_update(nowait=True).one()
2416
+
2417
+ stmt = update(models.DataIdentifier).\
2418
+ filter_by(scope=replica['scope'], name=replica['name'], did_type=DIDType.FILE).\
2419
+ prefix_with("/*+ INDEX(DIDS DIDS_PK) */", dialect='oracle').\
2420
+ execution_options(synchronize_session=False).\
2421
+ values(accessed_at=accessed_at)
2422
+ session.execute(stmt)
2423
+
2424
+ except DatabaseError:
2425
+ return False
2426
+ except NoResultFound:
2427
+ return True
2428
+
2429
+ return True
2430
+
2431
+
2432
+ @transactional_session
2433
+ def update_replica_state(rse_id, scope, name, state, *, session: "Session"):
2434
+ """
2435
+ Update File replica information and state.
2436
+
2437
+ :param rse_id: the rse id.
2438
+ :param scope: the tag name.
2439
+ :param name: The data identifier name.
2440
+ :param state: The state.
2441
+ :param session: The database session in use.
2442
+ """
2443
+ return update_replicas_states(replicas=[{'scope': scope, 'name': name, 'state': state, 'rse_id': rse_id}], session=session)
2444
+
2445
+
2446
+ @transactional_session
2447
+ def get_and_lock_file_replicas(scope, name, nowait=False, restrict_rses=None, *, session: "Session"):
2448
+ """
2449
+ Get file replicas for a specific scope:name.
2450
+
2451
+ :param scope: The scope of the did.
2452
+ :param name: The name of the did.
2453
+ :param nowait: Nowait parameter for the FOR UPDATE statement
2454
+ :param restrict_rses: Possible RSE_ids to filter on.
2455
+ :param session: The db session in use.
2456
+ :returns: List of SQLAlchemy Replica Objects
2457
+ """
2458
+
2459
+ query = session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name).filter(models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
2460
+ if restrict_rses is not None:
2461
+ if len(restrict_rses) < 10:
2462
+ rse_clause = []
2463
+ for rse_id in restrict_rses:
2464
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2465
+ if rse_clause:
2466
+ query = query.filter(or_(*rse_clause))
2467
+ return query.with_for_update(nowait=nowait).all()
2468
+
2469
+
2470
+ @transactional_session
2471
+ def get_source_replicas(scope, name, source_rses=None, *, session: "Session"):
2472
+ """
2473
+ Get soruce replicas for a specific scope:name.
2474
+
2475
+ :param scope: The scope of the did.
2476
+ :param name: The name of the did.
2477
+ :param soruce_rses: Possible RSE_ids to filter on.
2478
+ :param session: The db session in use.
2479
+ :returns: List of SQLAlchemy Replica Objects
2480
+ """
2481
+
2482
+ query = session.query(models.RSEFileAssociation.rse_id).filter_by(scope=scope, name=name).filter(models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
2483
+ if source_rses:
2484
+ if len(source_rses) < 10:
2485
+ rse_clause = []
2486
+ for rse_id in source_rses:
2487
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2488
+ if rse_clause:
2489
+ query = query.filter(or_(*rse_clause))
2490
+ return [a[0] for a in query.all()]
2491
+
2492
+
2493
+ @transactional_session
2494
+ def get_and_lock_file_replicas_for_dataset(scope, name, nowait=False, restrict_rses=None,
2495
+ total_threads=None, thread_id=None,
2496
+ *, session: "Session"):
2497
+ """
2498
+ Get file replicas for all files of a dataset.
2499
+
2500
+ :param scope: The scope of the dataset.
2501
+ :param name: The name of the dataset.
2502
+ :param nowait: Nowait parameter for the FOR UPDATE statement
2503
+ :param restrict_rses: Possible RSE_ids to filter on.
2504
+ :param total_threads: Total threads
2505
+ :param thread_id: This thread
2506
+ :param session: The db session in use.
2507
+ :returns: (files in dataset, replicas in dataset)
2508
+ """
2509
+ files, replicas = {}, {}
2510
+ if session.bind.dialect.name == 'postgresql':
2511
+ # Get content
2512
+ content_query = session.query(models.DataIdentifierAssociation.child_scope,
2513
+ models.DataIdentifierAssociation.child_name,
2514
+ models.DataIdentifierAssociation.bytes,
2515
+ models.DataIdentifierAssociation.md5,
2516
+ models.DataIdentifierAssociation.adler32).\
2517
+ with_hint(models.DataIdentifierAssociation,
2518
+ "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
2519
+ 'oracle').\
2520
+ filter(models.DataIdentifierAssociation.scope == scope,
2521
+ models.DataIdentifierAssociation.name == name)
2522
+
2523
+ if total_threads and total_threads > 1:
2524
+ content_query = filter_thread_work(session=session, query=content_query, total_threads=total_threads,
2525
+ thread_id=thread_id, hash_variable='child_name')
2526
+
2527
+ for child_scope, child_name, bytes_, md5, adler32 in content_query.yield_per(1000):
2528
+ files[(child_scope, child_name)] = {'scope': child_scope,
2529
+ 'name': child_name,
2530
+ 'bytes': bytes_,
2531
+ 'md5': md5,
2532
+ 'adler32': adler32}
2533
+ replicas[(child_scope, child_name)] = []
2534
+
2535
+ # Get replicas and lock them
2536
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2537
+ models.DataIdentifierAssociation.child_name,
2538
+ models.DataIdentifierAssociation.bytes,
2539
+ models.DataIdentifierAssociation.md5,
2540
+ models.DataIdentifierAssociation.adler32,
2541
+ models.RSEFileAssociation)\
2542
+ .with_hint(models.DataIdentifierAssociation,
2543
+ "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
2544
+ 'oracle')\
2545
+ .filter(and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2546
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2547
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED))\
2548
+ .filter(models.DataIdentifierAssociation.scope == scope,
2549
+ models.DataIdentifierAssociation.name == name)
2550
+
2551
+ if restrict_rses is not None:
2552
+ if len(restrict_rses) < 10:
2553
+ rse_clause = []
2554
+ for rse_id in restrict_rses:
2555
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2556
+ if rse_clause:
2557
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2558
+ models.DataIdentifierAssociation.child_name,
2559
+ models.DataIdentifierAssociation.bytes,
2560
+ models.DataIdentifierAssociation.md5,
2561
+ models.DataIdentifierAssociation.adler32,
2562
+ models.RSEFileAssociation)\
2563
+ .with_hint(models.DataIdentifierAssociation,
2564
+ "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
2565
+ 'oracle')\
2566
+ .filter(and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2567
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2568
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
2569
+ or_(*rse_clause)))\
2570
+ .filter(models.DataIdentifierAssociation.scope == scope,
2571
+ models.DataIdentifierAssociation.name == name)
2572
+
2573
+ else:
2574
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2575
+ models.DataIdentifierAssociation.child_name,
2576
+ models.DataIdentifierAssociation.bytes,
2577
+ models.DataIdentifierAssociation.md5,
2578
+ models.DataIdentifierAssociation.adler32,
2579
+ models.RSEFileAssociation)\
2580
+ .with_hint(models.DataIdentifierAssociation,
2581
+ "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
2582
+ 'oracle') \
2583
+ .with_hint(models.RSEFileAssociation, "INDEX(REPLICAS REPLICAS_PK)", 'oracle')\
2584
+ .outerjoin(models.RSEFileAssociation,
2585
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2586
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2587
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)).\
2588
+ filter(models.DataIdentifierAssociation.scope == scope,
2589
+ models.DataIdentifierAssociation.name == name)
2590
+
2591
+ if restrict_rses is not None:
2592
+ if len(restrict_rses) < 10:
2593
+ rse_clause = []
2594
+ for rse_id in restrict_rses:
2595
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2596
+ if rse_clause:
2597
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2598
+ models.DataIdentifierAssociation.child_name,
2599
+ models.DataIdentifierAssociation.bytes,
2600
+ models.DataIdentifierAssociation.md5,
2601
+ models.DataIdentifierAssociation.adler32,
2602
+ models.RSEFileAssociation)\
2603
+ .with_hint(models.DataIdentifierAssociation,
2604
+ "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)",
2605
+ 'oracle')\
2606
+ .outerjoin(models.RSEFileAssociation,
2607
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2608
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2609
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
2610
+ or_(*rse_clause)))\
2611
+ .filter(models.DataIdentifierAssociation.scope == scope,
2612
+ models.DataIdentifierAssociation.name == name)
2613
+
2614
+ if total_threads and total_threads > 1:
2615
+ query = filter_thread_work(session=session, query=query, total_threads=total_threads,
2616
+ thread_id=thread_id, hash_variable='child_name')
2617
+
2618
+ query = query.with_for_update(nowait=nowait, of=models.RSEFileAssociation.lock_cnt)
2619
+
2620
+ for child_scope, child_name, bytes_, md5, adler32, replica in query.yield_per(1000):
2621
+ if (child_scope, child_name) not in files:
2622
+ files[(child_scope, child_name)] = {'scope': child_scope,
2623
+ 'name': child_name,
2624
+ 'bytes': bytes_,
2625
+ 'md5': md5,
2626
+ 'adler32': adler32}
2627
+
2628
+ if (child_scope, child_name) in replicas:
2629
+ if replica is not None:
2630
+ replicas[(child_scope, child_name)].append(replica)
2631
+ else:
2632
+ replicas[(child_scope, child_name)] = []
2633
+ if replica is not None:
2634
+ replicas[(child_scope, child_name)].append(replica)
2635
+
2636
+ return (list(files.values()), replicas)
2637
+
2638
+
2639
+ @transactional_session
2640
+ def get_source_replicas_for_dataset(scope, name, source_rses=None,
2641
+ total_threads=None, thread_id=None,
2642
+ *, session: "Session"):
2643
+ """
2644
+ Get file replicas for all files of a dataset.
2645
+
2646
+ :param scope: The scope of the dataset.
2647
+ :param name: The name of the dataset.
2648
+ :param source_rses: Possible source RSE_ids to filter on.
2649
+ :param total_threads: Total threads
2650
+ :param thread_id: This thread
2651
+ :param session: The db session in use.
2652
+ :returns: (files in dataset, replicas in dataset)
2653
+ """
2654
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2655
+ models.DataIdentifierAssociation.child_name,
2656
+ models.RSEFileAssociation.rse_id)\
2657
+ .with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle')\
2658
+ .outerjoin(models.RSEFileAssociation,
2659
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2660
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2661
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE)).\
2662
+ filter(models.DataIdentifierAssociation.scope == scope, models.DataIdentifierAssociation.name == name)
2663
+
2664
+ if source_rses:
2665
+ if len(source_rses) < 10:
2666
+ rse_clause = []
2667
+ for rse_id in source_rses:
2668
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
2669
+ if rse_clause:
2670
+ query = session.query(models.DataIdentifierAssociation.child_scope,
2671
+ models.DataIdentifierAssociation.child_name,
2672
+ models.RSEFileAssociation.rse_id)\
2673
+ .with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle')\
2674
+ .outerjoin(models.RSEFileAssociation,
2675
+ and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
2676
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
2677
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
2678
+ or_(*rse_clause)))\
2679
+ .filter(models.DataIdentifierAssociation.scope == scope,
2680
+ models.DataIdentifierAssociation.name == name)
2681
+
2682
+ if total_threads and total_threads > 1:
2683
+ query = filter_thread_work(session=session, query=query, total_threads=total_threads,
2684
+ thread_id=thread_id, hash_variable='child_name')
2685
+
2686
+ replicas = {}
2687
+
2688
+ for child_scope, child_name, rse_id in query:
2689
+
2690
+ if (child_scope, child_name) in replicas:
2691
+ if rse_id:
2692
+ replicas[(child_scope, child_name)].append(rse_id)
2693
+ else:
2694
+ replicas[(child_scope, child_name)] = []
2695
+ if rse_id:
2696
+ replicas[(child_scope, child_name)].append(rse_id)
2697
+
2698
+ return replicas
2699
+
2700
+
2701
+ @read_session
2702
+ def get_replica_atime(replica, *, session: "Session"):
2703
+ """
2704
+ Get the accessed_at timestamp for a replica. Just for testing.
2705
+ :param replicas: List of dictionaries {scope, name, rse_id, path}
2706
+ :param session: Database session to use.
2707
+
2708
+ :returns: A datetime timestamp with the last access time.
2709
+ """
2710
+ return session.query(models.RSEFileAssociation.accessed_at).filter_by(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id']).\
2711
+ with_hint(models.RSEFileAssociation, text="INDEX(REPLICAS REPLICAS_PK)", dialect_name='oracle').one()[0]
2712
+
2713
+
2714
+ @transactional_session
2715
+ def touch_collection_replicas(collection_replicas, *, session: "Session"):
2716
+ """
2717
+ Update the accessed_at timestamp of the given collection replicas.
2718
+
2719
+ :param collection_replicas: the list of collection replicas.
2720
+ :param session: The database session in use.
2721
+
2722
+ :returns: True, if successful, False otherwise.
2723
+ """
2724
+
2725
+ now = datetime.utcnow()
2726
+ for collection_replica in collection_replicas:
2727
+ try:
2728
+ session.query(models.CollectionReplica).filter_by(scope=collection_replica['scope'], name=collection_replica['name'], rse_id=collection_replica['rse_id']).\
2729
+ update({'accessed_at': collection_replica.get('accessed_at') or now}, synchronize_session=False)
2730
+ except DatabaseError:
2731
+ return False
2732
+
2733
+ return True
2734
+
2735
+
2736
+ @stream_session
2737
+ def list_dataset_replicas(scope, name, deep=False, *, session: "Session"):
2738
+ """
2739
+ :param scope: The scope of the dataset.
2740
+ :param name: The name of the dataset.
2741
+ :param deep: Lookup at the file level.
2742
+ :param session: Database session to use.
2743
+
2744
+ :returns: A list of dictionaries containing the dataset replicas
2745
+ with associated metrics and timestamps
2746
+ """
2747
+
2748
+ if not deep:
2749
+ query = session.query(models.CollectionReplica.scope,
2750
+ models.CollectionReplica.name,
2751
+ models.RSE.rse,
2752
+ models.CollectionReplica.rse_id,
2753
+ models.CollectionReplica.bytes,
2754
+ models.CollectionReplica.length,
2755
+ models.CollectionReplica.available_bytes,
2756
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
2757
+ models.CollectionReplica.state,
2758
+ models.CollectionReplica.created_at,
2759
+ models.CollectionReplica.updated_at,
2760
+ models.CollectionReplica.accessed_at)\
2761
+ .filter_by(scope=scope, name=name, did_type=DIDType.DATASET)\
2762
+ .filter(models.CollectionReplica.rse_id == models.RSE.id)\
2763
+ .filter(models.RSE.deleted == false())
2764
+
2765
+ for row in query:
2766
+ yield row._asdict()
2767
+
2768
+ else:
2769
+
2770
+ # find maximum values
2771
+ content_query = session\
2772
+ .query(func.sum(models.DataIdentifierAssociation.bytes).label("bytes"),
2773
+ func.count().label("length"))\
2774
+ .with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle')\
2775
+ .filter(models.DataIdentifierAssociation.scope == scope)\
2776
+ .filter(models.DataIdentifierAssociation.name == name)
2777
+
2778
+ bytes_, length = 0, 0
2779
+ for row in content_query:
2780
+ bytes_, length = row.bytes, row.length
2781
+
2782
+ # find archives that contain files of the requested dataset
2783
+ sub_query_archives = session\
2784
+ .query(models.DataIdentifierAssociation.scope.label('dataset_scope'),
2785
+ models.DataIdentifierAssociation.name.label('dataset_name'),
2786
+ models.DataIdentifierAssociation.bytes.label('file_bytes'),
2787
+ models.ConstituentAssociation.child_scope.label('file_scope'),
2788
+ models.ConstituentAssociation.child_name.label('file_name'),
2789
+ models.RSEFileAssociation.scope.label('replica_scope'),
2790
+ models.RSEFileAssociation.name.label('replica_name'),
2791
+ models.RSE.rse,
2792
+ models.RSE.id.label('rse_id'),
2793
+ models.RSEFileAssociation.created_at,
2794
+ models.RSEFileAssociation.accessed_at,
2795
+ models.RSEFileAssociation.updated_at)\
2796
+ .filter(models.DataIdentifierAssociation.scope == scope)\
2797
+ .filter(models.DataIdentifierAssociation.name == name)\
2798
+ .filter(models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope)\
2799
+ .filter(models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name)\
2800
+ .filter(models.ConstituentAssociation.scope == models.RSEFileAssociation.scope)\
2801
+ .filter(models.ConstituentAssociation.name == models.RSEFileAssociation.name)\
2802
+ .filter(models.RSEFileAssociation.rse_id == models.RSE.id)\
2803
+ .filter(models.RSEFileAssociation.state == ReplicaState.AVAILABLE)\
2804
+ .filter(models.RSE.deleted == false())\
2805
+ .subquery()
2806
+
2807
+ # count the metrics
2808
+ group_query_archives = session\
2809
+ .query(sub_query_archives.c.dataset_scope,
2810
+ sub_query_archives.c.dataset_name,
2811
+ sub_query_archives.c.file_scope,
2812
+ sub_query_archives.c.file_name,
2813
+ sub_query_archives.c.rse_id,
2814
+ sub_query_archives.c.rse,
2815
+ func.sum(sub_query_archives.c.file_bytes).label('file_bytes'),
2816
+ func.min(sub_query_archives.c.created_at).label('created_at'),
2817
+ func.max(sub_query_archives.c.updated_at).label('updated_at'),
2818
+ func.max(sub_query_archives.c.accessed_at).label('accessed_at'))\
2819
+ .group_by(sub_query_archives.c.dataset_scope,
2820
+ sub_query_archives.c.dataset_name,
2821
+ sub_query_archives.c.file_scope,
2822
+ sub_query_archives.c.file_name,
2823
+ sub_query_archives.c.rse_id,
2824
+ sub_query_archives.c.rse)\
2825
+ .subquery()
2826
+
2827
+ # bring it in the same column state as the non-archive query
2828
+ full_query_archives = session\
2829
+ .query(group_query_archives.c.dataset_scope.label('scope'),
2830
+ group_query_archives.c.dataset_name.label('name'),
2831
+ group_query_archives.c.rse_id,
2832
+ group_query_archives.c.rse,
2833
+ func.sum(group_query_archives.c.file_bytes).label('available_bytes'),
2834
+ func.count().label('available_length'),
2835
+ func.min(group_query_archives.c.created_at).label('created_at'),
2836
+ func.max(group_query_archives.c.updated_at).label('updated_at'),
2837
+ func.max(group_query_archives.c.accessed_at).label('accessed_at'))\
2838
+ .group_by(group_query_archives.c.dataset_scope,
2839
+ group_query_archives.c.dataset_name,
2840
+ group_query_archives.c.rse_id,
2841
+ group_query_archives.c.rse)
2842
+
2843
+ # find the non-archive dataset replicas
2844
+ sub_query = session\
2845
+ .query(models.DataIdentifierAssociation.scope,
2846
+ models.DataIdentifierAssociation.name,
2847
+ models.RSEFileAssociation.rse_id,
2848
+ func.sum(models.RSEFileAssociation.bytes).label("available_bytes"),
2849
+ func.count().label("available_length"),
2850
+ func.min(models.RSEFileAssociation.created_at).label("created_at"),
2851
+ func.max(models.RSEFileAssociation.updated_at).label("updated_at"),
2852
+ func.max(models.RSEFileAssociation.accessed_at).label("accessed_at"))\
2853
+ .with_hint(models.DataIdentifierAssociation, "INDEX_RS_ASC(CONTENTS CONTENTS_PK) INDEX_RS_ASC(REPLICAS REPLICAS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", 'oracle')\
2854
+ .filter(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope)\
2855
+ .filter(models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name)\
2856
+ .filter(models.DataIdentifierAssociation.scope == scope)\
2857
+ .filter(models.DataIdentifierAssociation.name == name)\
2858
+ .filter(models.RSEFileAssociation.state == ReplicaState.AVAILABLE)\
2859
+ .group_by(models.DataIdentifierAssociation.scope,
2860
+ models.DataIdentifierAssociation.name,
2861
+ models.RSEFileAssociation.rse_id)\
2862
+ .subquery()
2863
+
2864
+ query = session\
2865
+ .query(sub_query.c.scope,
2866
+ sub_query.c.name,
2867
+ sub_query.c.rse_id,
2868
+ models.RSE.rse,
2869
+ sub_query.c.available_bytes,
2870
+ sub_query.c.available_length,
2871
+ sub_query.c.created_at,
2872
+ sub_query.c.updated_at,
2873
+ sub_query.c.accessed_at)\
2874
+ .filter(models.RSE.id == sub_query.c.rse_id)\
2875
+ .filter(models.RSE.deleted == false())
2876
+
2877
+ # join everything together
2878
+ final_query = query.union_all(full_query_archives)
2879
+
2880
+ for row in final_query.all():
2881
+ replica = row._asdict()
2882
+ replica['length'], replica['bytes'] = length, bytes_
2883
+ if replica['length'] == row.available_length:
2884
+ replica['state'] = ReplicaState.AVAILABLE
2885
+ else:
2886
+ replica['state'] = ReplicaState.UNAVAILABLE
2887
+ yield replica
2888
+
2889
+
2890
+ @stream_session
2891
+ def list_dataset_replicas_bulk(names_by_intscope, *, session: "Session"):
2892
+ """
2893
+ :param names_by_intscope: The dictionary of internal scopes pointing at the list of names.
2894
+ :param session: Database session to use.
2895
+
2896
+ :returns: A list of dictionaries containing the dataset replicas
2897
+ with associated metrics and timestamps
2898
+ """
2899
+
2900
+ condition = []
2901
+ for scope in names_by_intscope:
2902
+ condition.append(and_(models.CollectionReplica.scope == scope,
2903
+ models.CollectionReplica.name.in_(names_by_intscope[scope])))
2904
+
2905
+ try:
2906
+ # chunk size refers to the number of different scopes, see above
2907
+ for chunk in chunks(condition, 10):
2908
+ query = session.query(models.CollectionReplica.scope,
2909
+ models.CollectionReplica.name,
2910
+ models.RSE.rse,
2911
+ models.CollectionReplica.rse_id,
2912
+ models.CollectionReplica.bytes,
2913
+ models.CollectionReplica.length,
2914
+ models.CollectionReplica.available_bytes,
2915
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
2916
+ models.CollectionReplica.state,
2917
+ models.CollectionReplica.created_at,
2918
+ models.CollectionReplica.updated_at,
2919
+ models.CollectionReplica.accessed_at) \
2920
+ .filter(models.CollectionReplica.did_type == DIDType.DATASET) \
2921
+ .filter(models.CollectionReplica.rse_id == models.RSE.id) \
2922
+ .filter(or_(*chunk)) \
2923
+ .filter(models.RSE.deleted == false())
2924
+ for row in query:
2925
+ yield row._asdict()
2926
+ except NoResultFound:
2927
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
2928
+
2929
+
2930
+ @stream_session
2931
+ def list_dataset_replicas_vp(scope, name, deep=False, *, session: "Session", logger=logging.log):
2932
+ """
2933
+ List dataset replicas for a DID (scope:name) using the
2934
+ Virtual Placement service.
2935
+
2936
+ NOTICE: This is an RnD function and might change or go away at any time.
2937
+
2938
+ :param scope: The scope of the dataset.
2939
+ :param name: The name of the dataset.
2940
+ :param deep: Lookup at the file level.
2941
+ :param session: Database session to use.
2942
+
2943
+ :returns: If VP exists and there is at least one non-TAPE replica, returns a list of dicts of sites
2944
+ """
2945
+ vp_endpoint = get_vp_endpoint()
2946
+ vp_replies = ['other']
2947
+ nr_replies = 5 # force limit reply size
2948
+
2949
+ if not vp_endpoint:
2950
+ return vp_replies
2951
+
2952
+ try:
2953
+ vp_replies = requests.get('{}/ds/{}/{}:{}'.format(vp_endpoint, nr_replies, scope, name),
2954
+ verify=False,
2955
+ timeout=1)
2956
+ if vp_replies.status_code == 200:
2957
+ vp_replies = vp_replies.json()
2958
+ else:
2959
+ vp_replies = ['other']
2960
+ except requests.exceptions.RequestException as re:
2961
+ logger(logging.ERROR, 'In list_dataset_replicas_vp, could not access {}. Error:{}'.format(vp_endpoint, re))
2962
+ vp_replies = ['other']
2963
+
2964
+ if vp_replies != ['other']:
2965
+ # check that there is at least one regular replica
2966
+ # that is not on tape and has a protocol with scheme "root"
2967
+ # and can be accessed from WAN
2968
+ accessible_replica_exists = False
2969
+ for reply in list_dataset_replicas(scope=scope, name=name, deep=deep, session=session):
2970
+ if reply['state'] != ReplicaState.AVAILABLE:
2971
+ continue
2972
+ rse_info = rsemgr.get_rse_info(rse=reply['rse'], vo=scope.vo, session=session)
2973
+ if rse_info['rse_type'] == 'TAPE':
2974
+ continue
2975
+ for prot in rse_info['protocols']:
2976
+ if prot['scheme'] == 'root' and prot['domains']['wan']['read']:
2977
+ accessible_replica_exists = True
2978
+ break
2979
+ if accessible_replica_exists is True:
2980
+ break
2981
+ if accessible_replica_exists is True:
2982
+ for vp_reply in vp_replies:
2983
+ yield {'vp': True, 'site': vp_reply}
2984
+
2985
+
2986
+ @stream_session
2987
+ def list_datasets_per_rse(rse_id, filters=None, limit=None, *, session: "Session"):
2988
+ """
2989
+ List datasets at a RSE.
2990
+
2991
+ :param rse: the rse id.
2992
+ :param filters: dictionary of attributes by which the results should be filtered.
2993
+ :param limit: limit number.
2994
+ :param session: Database session to use.
2995
+
2996
+ :returns: A list of dict dataset replicas
2997
+ """
2998
+ query = session.query(models.CollectionReplica.scope,
2999
+ models.CollectionReplica.name,
3000
+ models.RSE.id.label('rse_id'),
3001
+ models.RSE.rse,
3002
+ models.CollectionReplica.bytes,
3003
+ models.CollectionReplica.length,
3004
+ models.CollectionReplica.available_bytes,
3005
+ models.CollectionReplica.available_replicas_cnt.label("available_length"),
3006
+ models.CollectionReplica.state,
3007
+ models.CollectionReplica.created_at,
3008
+ models.CollectionReplica.updated_at,
3009
+ models.CollectionReplica.accessed_at)\
3010
+ .filter_by(did_type=DIDType.DATASET)\
3011
+ .filter(models.CollectionReplica.rse_id == models.RSE.id)\
3012
+ .filter(models.RSE.id == rse_id)\
3013
+ .filter(models.RSE.deleted == false())
3014
+
3015
+ for (k, v) in filters and filters.items() or []:
3016
+ if k == 'name' or k == 'scope':
3017
+ v_str = v if k != 'scope' else v.internal
3018
+ if '*' in v_str or '%' in v_str:
3019
+ if session.bind.dialect.name == 'postgresql': # PostgreSQL escapes automatically
3020
+ query = query.filter(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%')))
3021
+ else:
3022
+ query = query.filter(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%'), escape='\\'))
3023
+ else:
3024
+ query = query.filter(getattr(models.CollectionReplica, k) == v)
3025
+ # hints ?
3026
+ elif k == 'created_before':
3027
+ created_before = str_to_date(v)
3028
+ query = query.filter(models.CollectionReplica.created_at <= created_before)
3029
+ elif k == 'created_after':
3030
+ created_after = str_to_date(v)
3031
+ query = query.filter(models.CollectionReplica.created_at >= created_after)
3032
+ else:
3033
+ query = query.filter(getattr(models.CollectionReplica, k) == v)
3034
+
3035
+ if limit:
3036
+ query = query.limit(limit)
3037
+
3038
+ for row in query:
3039
+ yield row._asdict()
3040
+
3041
+
3042
+ @transactional_session
3043
+ def get_cleaned_updated_collection_replicas(total_workers, worker_number, limit=None, *, session: "Session"):
3044
+ """
3045
+ Get update request for collection replicas.
3046
+ :param total_workers: Number of total workers.
3047
+ :param worker_number: id of the executing worker.
3048
+ :param limit: Maximum numberws to return.
3049
+ :param session: Database session in use.
3050
+ :returns: List of update requests for collection replicas.
3051
+ """
3052
+
3053
+ # Delete update requests which do not have collection_replicas
3054
+ session.query(models.UpdatedCollectionReplica).filter(models.UpdatedCollectionReplica.rse_id.is_(None)
3055
+ & ~exists().where(and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name, # NOQA: W503
3056
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope))).delete(synchronize_session=False)
3057
+ session.query(models.UpdatedCollectionReplica).filter(models.UpdatedCollectionReplica.rse_id.isnot(None)
3058
+ & ~exists().where(and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name, # NOQA: W503
3059
+ models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope,
3060
+ models.CollectionReplica.rse_id == models.UpdatedCollectionReplica.rse_id))).delete(synchronize_session=False)
3061
+
3062
+ # Delete duplicates
3063
+ if session.bind.dialect.name == 'oracle':
3064
+ schema = ''
3065
+ if BASE.metadata.schema:
3066
+ schema = BASE.metadata.schema + '.'
3067
+ session.execute(text('DELETE FROM {schema}updated_col_rep A WHERE A.rowid > ANY (SELECT B.rowid FROM {schema}updated_col_rep B WHERE A.scope = B.scope AND A.name=B.name AND A.did_type=B.did_type AND (A.rse_id=B.rse_id OR (A.rse_id IS NULL and B.rse_id IS NULL)))'.format(schema=schema))) # NOQA: E501
3068
+ elif session.bind.dialect.name == 'mysql':
3069
+ subquery1 = session.query(func.max(models.UpdatedCollectionReplica.id).label('max_id')).\
3070
+ group_by(models.UpdatedCollectionReplica.scope,
3071
+ models.UpdatedCollectionReplica.name,
3072
+ models.UpdatedCollectionReplica.rse_id).subquery()
3073
+ subquery2 = session.query(subquery1.c.max_id).subquery()
3074
+ session.query(models.UpdatedCollectionReplica).filter(models.UpdatedCollectionReplica.id.notin_(subquery2)).delete(synchronize_session=False)
3075
+ else:
3076
+ replica_update_requests = session.query(models.UpdatedCollectionReplica)
3077
+ update_requests_with_rse_id = []
3078
+ update_requests_without_rse_id = []
3079
+ duplicate_request_ids = []
3080
+ for update_request in replica_update_requests.all():
3081
+ if update_request.rse_id is not None:
3082
+ small_request = {'name': update_request.name, 'scope': update_request.scope, 'rse_id': update_request.rse_id}
3083
+ if small_request not in update_requests_with_rse_id:
3084
+ update_requests_with_rse_id.append(small_request)
3085
+ else:
3086
+ duplicate_request_ids.append(update_request.id)
3087
+ continue
3088
+ else:
3089
+ small_request = {'name': update_request.name, 'scope': update_request.scope}
3090
+ if small_request not in update_requests_without_rse_id:
3091
+ update_requests_without_rse_id.append(small_request)
3092
+ else:
3093
+ duplicate_request_ids.append(update_request.id)
3094
+ continue
3095
+ for chunk in chunks(duplicate_request_ids, 100):
3096
+ session.query(models.UpdatedCollectionReplica).filter(models.UpdatedCollectionReplica.id.in_(chunk)).delete(synchronize_session=False)
3097
+
3098
+ query = session.query(models.UpdatedCollectionReplica)
3099
+ if limit:
3100
+ query = query.limit(limit)
3101
+ return [update_request.to_dict() for update_request in query.all()]
3102
+
3103
+
3104
+ @transactional_session
3105
+ def update_collection_replica(update_request, *, session: "Session"):
3106
+ """
3107
+ Update a collection replica.
3108
+ :param update_request: update request from the upated_col_rep table.
3109
+ """
3110
+ if update_request['rse_id'] is not None:
3111
+ # Check one specific dataset replica
3112
+ ds_length = 0
3113
+ old_available_replicas = 0
3114
+ ds_bytes = 0
3115
+ ds_replica_state = None
3116
+ ds_available_bytes = 0
3117
+ available_replicas = 0
3118
+
3119
+ try:
3120
+ collection_replica = session.query(models.CollectionReplica)\
3121
+ .filter_by(scope=update_request['scope'],
3122
+ name=update_request['name'],
3123
+ rse_id=update_request['rse_id'])\
3124
+ .one()
3125
+ ds_length = collection_replica.length
3126
+ old_available_replicas = collection_replica.available_replicas_cnt
3127
+ ds_bytes = collection_replica.bytes
3128
+ except NoResultFound:
3129
+ pass
3130
+
3131
+ try:
3132
+ file_replica = session.query(models.RSEFileAssociation, models.DataIdentifierAssociation)\
3133
+ .filter(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3134
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3135
+ models.DataIdentifierAssociation.name == update_request['name'],
3136
+ models.RSEFileAssociation.rse_id == update_request['rse_id'],
3137
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3138
+ update_request['scope'] == models.DataIdentifierAssociation.scope)\
3139
+ .with_entities(label('ds_available_bytes', func.sum(models.RSEFileAssociation.bytes)),
3140
+ label('available_replicas', func.count()))\
3141
+ .one()
3142
+ available_replicas = file_replica.available_replicas
3143
+ ds_available_bytes = file_replica.ds_available_bytes
3144
+ except NoResultFound:
3145
+ pass
3146
+
3147
+ if available_replicas >= ds_length:
3148
+ ds_replica_state = ReplicaState.AVAILABLE
3149
+ else:
3150
+ ds_replica_state = ReplicaState.UNAVAILABLE
3151
+
3152
+ if old_available_replicas is not None and old_available_replicas > 0 and available_replicas == 0:
3153
+ session.query(models.CollectionReplica).filter_by(scope=update_request['scope'],
3154
+ name=update_request['name'],
3155
+ rse_id=update_request['rse_id'])\
3156
+ .delete()
3157
+ else:
3158
+ updated_replica = session.query(models.CollectionReplica).filter_by(scope=update_request['scope'],
3159
+ name=update_request['name'],
3160
+ rse_id=update_request['rse_id'])\
3161
+ .one()
3162
+ updated_replica.state = ds_replica_state
3163
+ updated_replica.available_replicas_cnt = available_replicas
3164
+ updated_replica.length = ds_length
3165
+ updated_replica.bytes = ds_bytes
3166
+ updated_replica.available_bytes = ds_available_bytes
3167
+ else:
3168
+ # Check all dataset replicas
3169
+ association = session.query(models.DataIdentifierAssociation)\
3170
+ .filter_by(scope=update_request['scope'],
3171
+ name=update_request['name'])\
3172
+ .with_entities(label('ds_length', func.count()),
3173
+ label('ds_bytes', func.sum(models.DataIdentifierAssociation.bytes)))\
3174
+ .one()
3175
+ ds_length = association.ds_length
3176
+ ds_bytes = association.ds_bytes
3177
+ ds_replica_state = None
3178
+
3179
+ collection_replicas = session.query(models.CollectionReplica)\
3180
+ .filter_by(scope=update_request['scope'], name=update_request['name'])\
3181
+ .all()
3182
+ for collection_replica in collection_replicas:
3183
+ if ds_length:
3184
+ collection_replica.length = ds_length
3185
+ else:
3186
+ collection_replica.length = 0
3187
+ if ds_bytes:
3188
+ collection_replica.bytes = ds_bytes
3189
+ else:
3190
+ collection_replica.bytes = 0
3191
+
3192
+ file_replicas = session.query(models.RSEFileAssociation, models.DataIdentifierAssociation)\
3193
+ .filter(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
3194
+ models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
3195
+ models.DataIdentifierAssociation.name == update_request['name'],
3196
+ models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
3197
+ update_request['scope'] == models.DataIdentifierAssociation.scope)\
3198
+ .with_entities(models.RSEFileAssociation.rse_id,
3199
+ label('ds_available_bytes', func.sum(models.RSEFileAssociation.bytes)),
3200
+ label('available_replicas', func.count()))\
3201
+ .group_by(models.RSEFileAssociation.rse_id)\
3202
+ .all()
3203
+ for file_replica in file_replicas:
3204
+ if file_replica.available_replicas >= ds_length:
3205
+ ds_replica_state = ReplicaState.AVAILABLE
3206
+ else:
3207
+ ds_replica_state = ReplicaState.UNAVAILABLE
3208
+
3209
+ collection_replica = session.query(models.CollectionReplica)\
3210
+ .filter_by(scope=update_request['scope'], name=update_request['name'], rse_id=file_replica.rse_id)\
3211
+ .first()
3212
+ if collection_replica:
3213
+ collection_replica.state = ds_replica_state
3214
+ collection_replica.available_replicas_cnt = file_replica.available_replicas
3215
+ collection_replica.available_bytes = file_replica.ds_available_bytes
3216
+ session.query(models.UpdatedCollectionReplica).filter_by(id=update_request['id']).delete()
3217
+
3218
+
3219
+ @read_session
3220
+ def get_bad_pfns(limit=10000, thread=None, total_threads=None, *, session: "Session"):
3221
+ """
3222
+ Returns a list of bad PFNs
3223
+
3224
+ :param limit: The maximum number of replicas returned.
3225
+ :param thread: The assigned thread for this minos instance.
3226
+ :param total_threads: The total number of minos threads.
3227
+ :param session: The database session in use.
3228
+
3229
+ returns: list of PFNs {'pfn': pfn, 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at}
3230
+ """
3231
+ result = []
3232
+ query = session.query(models.BadPFNs.path, models.BadPFNs.state, models.BadPFNs.reason, models.BadPFNs.account, models.BadPFNs.expires_at)
3233
+ query = filter_thread_work(session=session, query=query, total_threads=total_threads, thread_id=thread, hash_variable='path')
3234
+ query.order_by(models.BadPFNs.created_at)
3235
+ query = query.limit(limit)
3236
+ for path, state, reason, account, expires_at in query.yield_per(1000):
3237
+ result.append({'pfn': clean_surls([str(path)])[0], 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at})
3238
+ return result
3239
+
3240
+
3241
+ @transactional_session
3242
+ def bulk_add_bad_replicas(replicas, account, state=BadFilesStatus.TEMPORARY_UNAVAILABLE, reason=None, expires_at=None, *, session: "Session"):
3243
+ """
3244
+ Bulk add new bad replicas.
3245
+
3246
+ :param replicas: the list of bad replicas.
3247
+ :param account: The account who declared the bad replicas.
3248
+ :param state: The state of the file (SUSPICIOUS, BAD or TEMPORARY_UNAVAILABLE).
3249
+ :param session: The database session in use.
3250
+
3251
+ :returns: True is successful.
3252
+ """
3253
+ for replica in replicas:
3254
+ insert_new_row = True
3255
+ if state == BadFilesStatus.TEMPORARY_UNAVAILABLE:
3256
+ query = session.query(models.BadReplicas).filter_by(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], state=state)
3257
+ if query.count():
3258
+ query.update({'state': BadFilesStatus.TEMPORARY_UNAVAILABLE, 'updated_at': datetime.utcnow(), 'account': account, 'reason': reason, 'expires_at': expires_at}, synchronize_session=False)
3259
+ insert_new_row = False
3260
+ if insert_new_row:
3261
+ new_bad_replica = models.BadReplicas(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], reason=reason,
3262
+ state=state, account=account, bytes=None, expires_at=expires_at)
3263
+ new_bad_replica.save(session=session, flush=False)
3264
+ try:
3265
+ session.flush()
3266
+ except IntegrityError as error:
3267
+ raise exception.RucioException(error.args)
3268
+ except DatabaseError as error:
3269
+ raise exception.RucioException(error.args)
3270
+ except FlushError as error:
3271
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
3272
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
3273
+ raise exception.RucioException(error.args)
3274
+ return True
3275
+
3276
+
3277
+ @transactional_session
3278
+ def bulk_delete_bad_pfns(pfns, *, session: "Session"):
3279
+ """
3280
+ Bulk delete bad PFNs.
3281
+
3282
+ :param pfns: the list of new files.
3283
+ :param session: The database session in use.
3284
+
3285
+ :returns: True is successful.
3286
+ """
3287
+ pfn_clause = []
3288
+ for pfn in pfns:
3289
+ pfn_clause.append(models.BadPFNs.path == pfn)
3290
+
3291
+ for chunk in chunks(pfn_clause, 100):
3292
+ query = session.query(models.BadPFNs).filter(or_(*chunk))
3293
+ query.delete(synchronize_session=False)
3294
+
3295
+ return True
3296
+
3297
+
3298
+ @transactional_session
3299
+ def bulk_delete_bad_replicas(bad_replicas, *, session: "Session"):
3300
+ """
3301
+ Bulk delete bad replica.
3302
+
3303
+ :param bad_replicas: The list of bad replicas to delete (Dictionaries).
3304
+ :param session: The database session in use.
3305
+
3306
+ :returns: True is successful.
3307
+ """
3308
+ replica_clause = []
3309
+ for replica in bad_replicas:
3310
+ replica_clause.append(and_(models.BadReplicas.scope == replica['scope'],
3311
+ models.BadReplicas.name == replica['name'],
3312
+ models.BadReplicas.rse_id == replica['rse_id'],
3313
+ models.BadReplicas.state == replica['state']))
3314
+
3315
+ for chunk in chunks(replica_clause, 100):
3316
+ session.query(models.BadReplicas).filter(or_(*chunk)).\
3317
+ delete(synchronize_session=False)
3318
+ return True
3319
+
3320
+
3321
+ @transactional_session
3322
+ def add_bad_pfns(pfns, account, state, reason=None, expires_at=None, *, session: "Session"):
3323
+ """
3324
+ Add bad PFNs.
3325
+
3326
+ :param pfns: the list of new files.
3327
+ :param account: The account who declared the bad replicas.
3328
+ :param state: One of the possible states : BAD, SUSPICIOUS, TEMPORARY_UNAVAILABLE.
3329
+ :param reason: A string describing the reason of the loss.
3330
+ :param expires_at: Specify a timeout for the TEMPORARY_UNAVAILABLE replicas. None for BAD files.
3331
+ :param session: The database session in use.
3332
+
3333
+ :returns: True is successful.
3334
+ """
3335
+
3336
+ if isinstance(state, str):
3337
+ rep_state = BadPFNStatus[state]
3338
+ else:
3339
+ rep_state = state
3340
+
3341
+ if rep_state == BadPFNStatus.TEMPORARY_UNAVAILABLE and expires_at is None:
3342
+ raise exception.InputValidationError("When adding a TEMPORARY UNAVAILABLE pfn the expires_at value should be set.")
3343
+ elif rep_state == BadPFNStatus.BAD and expires_at is not None:
3344
+ raise exception.InputValidationError("When adding a BAD pfn the expires_at value shouldn't be set.")
3345
+
3346
+ pfns = clean_surls(pfns)
3347
+ for pfn in pfns:
3348
+ new_pfn = models.BadPFNs(path=str(pfn), account=account, state=rep_state, reason=reason, expires_at=expires_at)
3349
+ new_pfn = session.merge(new_pfn)
3350
+ new_pfn.save(session=session, flush=False)
3351
+
3352
+ try:
3353
+ session.flush()
3354
+ except IntegrityError as error:
3355
+ raise exception.RucioException(error.args)
3356
+ except DatabaseError as error:
3357
+ raise exception.RucioException(error.args)
3358
+ except FlushError as error:
3359
+ if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
3360
+ raise exception.Duplicate('One PFN already exists!')
3361
+ raise exception.RucioException(error.args)
3362
+ return True
3363
+
3364
+
3365
+ @read_session
3366
+ def list_expired_temporary_unavailable_replicas(total_workers, worker_number, limit=10000, *, session: "Session"):
3367
+ """
3368
+ List the expired temporary unavailable replicas
3369
+
3370
+ :param total_workers: Number of total workers.
3371
+ :param worker_number: id of the executing worker.
3372
+ :param limit: The maximum number of replicas returned.
3373
+ :param session: The database session in use.
3374
+ """
3375
+
3376
+ query = session.query(models.BadReplicas.scope, models.BadReplicas.name, models.BadReplicas.rse_id).\
3377
+ filter(models.BadReplicas.state == BadFilesStatus.TEMPORARY_UNAVAILABLE).\
3378
+ filter(models.BadReplicas.expires_at < datetime.utcnow()).\
3379
+ with_hint(models.ReplicationRule, "index(bad_replicas BAD_REPLICAS_EXPIRES_AT_IDX)", 'oracle').\
3380
+ order_by(models.BadReplicas.expires_at)
3381
+
3382
+ query = filter_thread_work(session=session, query=query, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
3383
+ query = query.limit(limit)
3384
+ return query.all()
3385
+
3386
+
3387
+ @read_session
3388
+ def get_replicas_state(scope=None, name=None, *, session: "Session"):
3389
+ """
3390
+ Method used by the necromancer to get all the replicas of a DIDs
3391
+ :param scope: The scope of the file.
3392
+ :param name: The name of the file.
3393
+ :param session: The database session in use.
3394
+
3395
+ :returns: A dictionary with the list of states as keys and the rse_ids as value
3396
+ """
3397
+
3398
+ query = session.query(models.RSEFileAssociation.rse_id, models.RSEFileAssociation.state).filter_by(scope=scope, name=name)
3399
+ states = {}
3400
+ for res in query.all():
3401
+ rse_id, state = res
3402
+ if state not in states:
3403
+ states[state] = []
3404
+ states[state].append(rse_id)
3405
+ return states
3406
+
3407
+
3408
+ @read_session
3409
+ def get_suspicious_files(rse_expression, available_elsewhere, filter_=None, logger=logging.log, younger_than=10, nattempts=0, nattempts_exact=False, *, session: "Session", exclude_states=['B', 'R', 'D'], is_suspicious=False):
3410
+ """
3411
+ Gets a list of replicas from bad_replicas table which are: declared more than <nattempts> times since <younger_than> date,
3412
+ present on the RSE specified by the <rse_expression> and do not have a state in <exclude_states> list.
3413
+ Selected replicas can also be required to be <available_elsewhere> on another RSE than the one declared in bad_replicas table and/or
3414
+ be declared as <is_suspicious> in the bad_replicas table.
3415
+ Keyword Arguments:
3416
+ :param younger_than: Datetime object to select the replicas which were declared since younger_than date. Default value = 10 days ago.
3417
+ :param nattempts: The minimum number of replica appearances in the bad_replica DB table from younger_than date. Default value = 0.
3418
+ :param nattempts_exact: If True, then only replicas with exactly 'nattempts' appearences in the bad_replica DB table are retrieved. Replicas with more appearences are ignored.
3419
+ :param rse_expression: The RSE expression where the replicas are located.
3420
+ :param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
3421
+ :param exclude_states: List of states which eliminates replicas from search result if any of the states in the list
3422
+ was declared for a replica since younger_than date. Allowed values
3423
+ = ['B', 'R', 'D', 'L', 'T', 'S'] (meaning 'BAD', 'RECOVERED', 'DELETED', 'LOST', 'TEMPORARY_UNAVAILABLE', 'SUSPICIOUS').
3424
+ :param available_elsewhere: Default: SuspiciousAvailability["ALL"].value, all suspicious replicas are returned.
3425
+ If SuspiciousAvailability["EXIST_COPIES"].value, only replicas that additionally have copies declared as AVAILABLE on at least one other RSE
3426
+ than the one in the bad_replicas table will be taken into account.
3427
+ If SuspiciousAvailability["LAST_COPY"].value, only replicas that do not have another copy declared as AVAILABLE on another RSE will be taken into account.
3428
+ :param is_suspicious: If True, only replicas declared as SUSPICIOUS in bad replicas table will be taken into account. Default value = False.
3429
+ :param session: The database session in use. Default value = None.
3430
+
3431
+ :returns: a list of replicas:
3432
+ [{'scope': scope, 'name': name, 'rse': rse, 'rse_id': rse_id, cnt': cnt, 'created_at': created_at}, ...]
3433
+ """
3434
+
3435
+ if available_elsewhere not in [SuspiciousAvailability["ALL"].value, SuspiciousAvailability["EXIST_COPIES"].value, SuspiciousAvailability["LAST_COPY"].value]:
3436
+ logger(logging.WARNING, """ERROR, available_elsewhere must be set to one of the following:
3437
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
3438
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
3439
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
3440
+ raise exception.RucioException("""ERROR, available_elsewhere must be set to one of the following:
3441
+ SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
3442
+ SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
3443
+ SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
3444
+
3445
+ # only for the 2 web api used parameters, checking value types and assigning the default values
3446
+ if not isinstance(nattempts, int):
3447
+ nattempts = 0
3448
+ if not isinstance(younger_than, datetime):
3449
+ younger_than = datetime.utcnow() - timedelta(days=10)
3450
+
3451
+ # assembling exclude_states_clause
3452
+ exclude_states_clause = []
3453
+ for state in exclude_states:
3454
+ exclude_states_clause.append(BadFilesStatus(state))
3455
+
3456
+ # making aliases for bad_replicas and replicas tables
3457
+ bad_replicas_alias = aliased(models.BadReplicas, name='bad_replicas_alias')
3458
+ replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
3459
+
3460
+ # assembling the selection rse_clause
3461
+ rse_clause = []
3462
+ if rse_expression:
3463
+ parsedexp = parse_expression(expression=rse_expression, filter_=filter_, session=session)
3464
+ for rse in parsedexp:
3465
+ rse_clause.append(models.RSEFileAssociation.rse_id == rse['id'])
3466
+
3467
+ # query base
3468
+ query = session.query(func.count(), bad_replicas_alias.scope, bad_replicas_alias.name, models.RSEFileAssociation.rse_id, func.min(models.RSEFileAssociation.created_at))\
3469
+ .filter(models.RSEFileAssociation.rse_id == bad_replicas_alias.rse_id,
3470
+ models.RSEFileAssociation.scope == bad_replicas_alias.scope,
3471
+ models.RSEFileAssociation.name == bad_replicas_alias.name,
3472
+ bad_replicas_alias.created_at >= younger_than)
3473
+ if is_suspicious:
3474
+ query.filter(bad_replicas_alias.state == BadFilesStatus.SUSPICIOUS)
3475
+ if rse_clause:
3476
+ query = query.filter(or_(*rse_clause))
3477
+
3478
+ # Only return replicas that have at least one copy on another RSE
3479
+ if available_elsewhere == SuspiciousAvailability["EXIST_COPIES"].value:
3480
+ available_replica = exists(select(1).where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
3481
+ replicas_alias.scope == bad_replicas_alias.scope,
3482
+ replicas_alias.name == bad_replicas_alias.name,
3483
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
3484
+ query = query.filter(available_replica)
3485
+
3486
+ # Only return replicas that are the last remaining copy
3487
+ if available_elsewhere == SuspiciousAvailability["LAST_COPY"].value:
3488
+ last_replica = ~exists(select(1).where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
3489
+ replicas_alias.scope == bad_replicas_alias.scope,
3490
+ replicas_alias.name == bad_replicas_alias.name,
3491
+ replicas_alias.rse_id != bad_replicas_alias.rse_id)))
3492
+ query = query.filter(last_replica)
3493
+
3494
+ # it is required that the selected replicas
3495
+ # do not occur as BAD/DELETED/LOST/RECOVERED/...
3496
+ # in the bad_replicas table during the same time window.
3497
+ other_states_present = exists(select(1).where(and_(models.BadReplicas.scope == bad_replicas_alias.scope,
3498
+ models.BadReplicas.name == bad_replicas_alias.name,
3499
+ models.BadReplicas.created_at >= younger_than,
3500
+ models.BadReplicas.rse_id == bad_replicas_alias.rse_id,
3501
+ models.BadReplicas.state.in_(exclude_states_clause))))
3502
+ query = query.filter(not_(other_states_present))
3503
+
3504
+ # finally, the results are grouped by RSE, scope, name and required to have
3505
+ # at least 'nattempts' occurrences in the result of the query per replica.
3506
+ # If nattempts_exact, then only replicas are required to have exactly
3507
+ # 'nattempts' occurences.
3508
+ if nattempts_exact:
3509
+ query_result = query.group_by(models.RSEFileAssociation.rse_id, bad_replicas_alias.scope, bad_replicas_alias.name).having(func.count() == nattempts).all()
3510
+ else:
3511
+ query_result = query.group_by(models.RSEFileAssociation.rse_id, bad_replicas_alias.scope, bad_replicas_alias.name).having(func.count() > nattempts).all()
3512
+
3513
+ # translating the rse_id to RSE name and assembling the return list of dictionaries
3514
+ result = []
3515
+ rses = {}
3516
+ for cnt, scope, name, rse_id, created_at in query_result:
3517
+ if rse_id not in rses:
3518
+ rse = get_rse_name(rse_id=rse_id, session=session)
3519
+ rses[rse_id] = rse
3520
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id], 'rse_id': rse_id, 'cnt': cnt, 'created_at': created_at})
3521
+
3522
+ return result
3523
+
3524
+
3525
+ @read_session
3526
+ def get_suspicious_reason(rse_id, scope, name, nattempts=0, logger=logging.log, *, session: "Session"):
3527
+ """
3528
+ Returns the error message(s) which lead to the replica(s) being declared suspicious.
3529
+
3530
+ :param rse_id: ID of RSE.
3531
+ :param scope: Scope of the replica DID.
3532
+ :param name: Name of the replica DID.
3533
+ :param session: The database session in use. Default value = None.
3534
+ """
3535
+ # Alias for bad replicas
3536
+ bad_replicas_alias = aliased(models.BadReplicas, name='bad_replicas_alias')
3537
+
3538
+ # query base
3539
+ query = session.query(bad_replicas_alias.scope, bad_replicas_alias.name, bad_replicas_alias.reason, bad_replicas_alias.rse_id)\
3540
+ .filter(bad_replicas_alias.rse_id == rse_id,
3541
+ bad_replicas_alias.scope == scope,
3542
+ bad_replicas_alias.name == name)
3543
+ count = query.count()
3544
+
3545
+ query_result = query.group_by(bad_replicas_alias.rse_id, bad_replicas_alias.scope, bad_replicas_alias.name, bad_replicas_alias.reason).having(func.count() > nattempts).all()
3546
+
3547
+ result = []
3548
+ rses = {}
3549
+ for scope_, name_, reason, rse_id_ in query_result:
3550
+ if rse_id_ not in rses:
3551
+ rse = get_rse_name(rse_id=rse_id_, session=session)
3552
+ rses[rse_id_] = rse
3553
+ result.append({'scope': scope, 'name': name, 'rse': rses[rse_id_], 'rse_id': rse_id_, 'reason': reason, 'count': count})
3554
+
3555
+ if len(result) > 1:
3556
+ logger(logging.WARNING, "Multiple reasons have been found. Please investigate.")
3557
+
3558
+ return result
3559
+
3560
+
3561
+ @transactional_session
3562
+ def set_tombstone(rse_id, scope, name, tombstone=OBSOLETE, *, session: "Session"):
3563
+ """
3564
+ Sets a tombstone on a replica.
3565
+
3566
+ :param rse_id: ID of RSE.
3567
+ :param scope: scope of the replica DID.
3568
+ :param name: name of the replica DID.
3569
+ :param tombstone: the tombstone to set. Default is OBSOLETE
3570
+ :param session: database session in use.
3571
+ """
3572
+ stmt = update(models.RSEFileAssociation).where(
3573
+ and_(
3574
+ models.RSEFileAssociation.rse_id == rse_id,
3575
+ models.RSEFileAssociation.name == name,
3576
+ models.RSEFileAssociation.scope == scope,
3577
+ ~exists().where(
3578
+ and_(
3579
+ models.ReplicaLock.rse_id == rse_id,
3580
+ models.ReplicaLock.name == name,
3581
+ models.ReplicaLock.scope == scope,
3582
+ )
3583
+ )
3584
+ )
3585
+ )\
3586
+ .prefix_with("/*+ index(REPLICAS REPLICAS_PK) */", dialect='oracle')\
3587
+ .execution_options(synchronize_session=False)\
3588
+ .values(tombstone=tombstone)
3589
+ result = session.execute(stmt)
3590
+
3591
+ if result.rowcount == 0:
3592
+ try:
3593
+ session.query(models.RSEFileAssociation).filter_by(scope=scope, name=name, rse_id=rse_id).one()
3594
+ raise exception.ReplicaIsLocked('Replica %s:%s on RSE %s is locked.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
3595
+ except NoResultFound:
3596
+ raise exception.ReplicaNotFound('Replica %s:%s on RSE %s could not be found.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
3597
+
3598
+
3599
+ @read_session
3600
+ def get_RSEcoverage_of_dataset(scope, name, *, session: "Session"):
3601
+ """
3602
+ Get total bytes present on RSEs
3603
+
3604
+ :param scope: Scope of the dataset
3605
+ :param name: Name of the dataset
3606
+ :param session: The db session.
3607
+ :return: Dictionary { rse_id : <total bytes present at rse_id> }
3608
+ """
3609
+
3610
+ query = session.query(models.RSEFileAssociation.rse_id, func.sum(models.DataIdentifierAssociation.bytes))
3611
+
3612
+ query = query.filter(and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
3613
+ models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
3614
+ models.DataIdentifierAssociation.scope == scope,
3615
+ models.DataIdentifierAssociation.name == name,
3616
+ models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
3617
+ ))
3618
+
3619
+ query = query.group_by(models.RSEFileAssociation.rse_id)
3620
+
3621
+ result = {}
3622
+
3623
+ for rse_id, total in query:
3624
+ if total:
3625
+ result[rse_id] = total
3626
+
3627
+ return result