rucio 32.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (481) hide show
  1. rucio/__init__.py +18 -0
  2. rucio/alembicrevision.py +16 -0
  3. rucio/api/__init__.py +14 -0
  4. rucio/api/account.py +266 -0
  5. rucio/api/account_limit.py +287 -0
  6. rucio/api/authentication.py +302 -0
  7. rucio/api/config.py +218 -0
  8. rucio/api/credential.py +60 -0
  9. rucio/api/did.py +726 -0
  10. rucio/api/dirac.py +71 -0
  11. rucio/api/exporter.py +60 -0
  12. rucio/api/heartbeat.py +62 -0
  13. rucio/api/identity.py +160 -0
  14. rucio/api/importer.py +46 -0
  15. rucio/api/lifetime_exception.py +95 -0
  16. rucio/api/lock.py +131 -0
  17. rucio/api/meta.py +85 -0
  18. rucio/api/permission.py +72 -0
  19. rucio/api/quarantined_replica.py +69 -0
  20. rucio/api/replica.py +528 -0
  21. rucio/api/request.py +220 -0
  22. rucio/api/rse.py +601 -0
  23. rucio/api/rule.py +335 -0
  24. rucio/api/scope.py +89 -0
  25. rucio/api/subscription.py +255 -0
  26. rucio/api/temporary_did.py +49 -0
  27. rucio/api/vo.py +112 -0
  28. rucio/client/__init__.py +16 -0
  29. rucio/client/accountclient.py +413 -0
  30. rucio/client/accountlimitclient.py +155 -0
  31. rucio/client/baseclient.py +929 -0
  32. rucio/client/client.py +77 -0
  33. rucio/client/configclient.py +113 -0
  34. rucio/client/credentialclient.py +54 -0
  35. rucio/client/didclient.py +691 -0
  36. rucio/client/diracclient.py +48 -0
  37. rucio/client/downloadclient.py +1674 -0
  38. rucio/client/exportclient.py +44 -0
  39. rucio/client/fileclient.py +51 -0
  40. rucio/client/importclient.py +42 -0
  41. rucio/client/lifetimeclient.py +74 -0
  42. rucio/client/lockclient.py +99 -0
  43. rucio/client/metaclient.py +137 -0
  44. rucio/client/pingclient.py +45 -0
  45. rucio/client/replicaclient.py +444 -0
  46. rucio/client/requestclient.py +109 -0
  47. rucio/client/rseclient.py +664 -0
  48. rucio/client/ruleclient.py +287 -0
  49. rucio/client/scopeclient.py +88 -0
  50. rucio/client/subscriptionclient.py +161 -0
  51. rucio/client/touchclient.py +78 -0
  52. rucio/client/uploadclient.py +871 -0
  53. rucio/common/__init__.py +14 -0
  54. rucio/common/cache.py +74 -0
  55. rucio/common/config.py +796 -0
  56. rucio/common/constants.py +92 -0
  57. rucio/common/constraints.py +18 -0
  58. rucio/common/didtype.py +187 -0
  59. rucio/common/dumper/__init__.py +306 -0
  60. rucio/common/dumper/consistency.py +449 -0
  61. rucio/common/dumper/data_models.py +325 -0
  62. rucio/common/dumper/path_parsing.py +65 -0
  63. rucio/common/exception.py +1092 -0
  64. rucio/common/extra.py +37 -0
  65. rucio/common/logging.py +404 -0
  66. rucio/common/pcache.py +1387 -0
  67. rucio/common/policy.py +84 -0
  68. rucio/common/schema/__init__.py +143 -0
  69. rucio/common/schema/atlas.py +411 -0
  70. rucio/common/schema/belleii.py +406 -0
  71. rucio/common/schema/cms.py +478 -0
  72. rucio/common/schema/domatpc.py +399 -0
  73. rucio/common/schema/escape.py +424 -0
  74. rucio/common/schema/generic.py +431 -0
  75. rucio/common/schema/generic_multi_vo.py +410 -0
  76. rucio/common/schema/icecube.py +404 -0
  77. rucio/common/schema/lsst.py +423 -0
  78. rucio/common/stomp_utils.py +160 -0
  79. rucio/common/stopwatch.py +56 -0
  80. rucio/common/test_rucio_server.py +148 -0
  81. rucio/common/types.py +158 -0
  82. rucio/common/utils.py +1946 -0
  83. rucio/core/__init__.py +14 -0
  84. rucio/core/account.py +426 -0
  85. rucio/core/account_counter.py +171 -0
  86. rucio/core/account_limit.py +357 -0
  87. rucio/core/authentication.py +563 -0
  88. rucio/core/config.py +386 -0
  89. rucio/core/credential.py +218 -0
  90. rucio/core/did.py +3102 -0
  91. rucio/core/did_meta_plugins/__init__.py +250 -0
  92. rucio/core/did_meta_plugins/did_column_meta.py +326 -0
  93. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +116 -0
  94. rucio/core/did_meta_plugins/filter_engine.py +573 -0
  95. rucio/core/did_meta_plugins/json_meta.py +215 -0
  96. rucio/core/did_meta_plugins/mongo_meta.py +199 -0
  97. rucio/core/did_meta_plugins/postgres_meta.py +317 -0
  98. rucio/core/dirac.py +208 -0
  99. rucio/core/distance.py +164 -0
  100. rucio/core/exporter.py +59 -0
  101. rucio/core/heartbeat.py +263 -0
  102. rucio/core/identity.py +290 -0
  103. rucio/core/importer.py +248 -0
  104. rucio/core/lifetime_exception.py +377 -0
  105. rucio/core/lock.py +474 -0
  106. rucio/core/message.py +241 -0
  107. rucio/core/meta.py +190 -0
  108. rucio/core/monitor.py +441 -0
  109. rucio/core/naming_convention.py +154 -0
  110. rucio/core/nongrid_trace.py +124 -0
  111. rucio/core/oidc.py +1339 -0
  112. rucio/core/permission/__init__.py +107 -0
  113. rucio/core/permission/atlas.py +1333 -0
  114. rucio/core/permission/belleii.py +1076 -0
  115. rucio/core/permission/cms.py +1166 -0
  116. rucio/core/permission/escape.py +1076 -0
  117. rucio/core/permission/generic.py +1128 -0
  118. rucio/core/permission/generic_multi_vo.py +1148 -0
  119. rucio/core/quarantined_replica.py +190 -0
  120. rucio/core/replica.py +3627 -0
  121. rucio/core/replica_sorter.py +368 -0
  122. rucio/core/request.py +2241 -0
  123. rucio/core/rse.py +1835 -0
  124. rucio/core/rse_counter.py +155 -0
  125. rucio/core/rse_expression_parser.py +460 -0
  126. rucio/core/rse_selector.py +277 -0
  127. rucio/core/rule.py +3419 -0
  128. rucio/core/rule_grouping.py +1473 -0
  129. rucio/core/scope.py +152 -0
  130. rucio/core/subscription.py +316 -0
  131. rucio/core/temporary_did.py +188 -0
  132. rucio/core/topology.py +448 -0
  133. rucio/core/trace.py +361 -0
  134. rucio/core/transfer.py +1233 -0
  135. rucio/core/vo.py +151 -0
  136. rucio/core/volatile_replica.py +123 -0
  137. rucio/daemons/__init__.py +14 -0
  138. rucio/daemons/abacus/__init__.py +14 -0
  139. rucio/daemons/abacus/account.py +106 -0
  140. rucio/daemons/abacus/collection_replica.py +113 -0
  141. rucio/daemons/abacus/rse.py +107 -0
  142. rucio/daemons/atropos/__init__.py +14 -0
  143. rucio/daemons/atropos/atropos.py +243 -0
  144. rucio/daemons/auditor/__init__.py +261 -0
  145. rucio/daemons/auditor/hdfs.py +86 -0
  146. rucio/daemons/auditor/srmdumps.py +284 -0
  147. rucio/daemons/automatix/__init__.py +14 -0
  148. rucio/daemons/automatix/automatix.py +281 -0
  149. rucio/daemons/badreplicas/__init__.py +14 -0
  150. rucio/daemons/badreplicas/minos.py +311 -0
  151. rucio/daemons/badreplicas/minos_temporary_expiration.py +173 -0
  152. rucio/daemons/badreplicas/necromancer.py +200 -0
  153. rucio/daemons/bb8/__init__.py +14 -0
  154. rucio/daemons/bb8/bb8.py +356 -0
  155. rucio/daemons/bb8/common.py +762 -0
  156. rucio/daemons/bb8/nuclei_background_rebalance.py +147 -0
  157. rucio/daemons/bb8/t2_background_rebalance.py +146 -0
  158. rucio/daemons/c3po/__init__.py +14 -0
  159. rucio/daemons/c3po/algorithms/__init__.py +14 -0
  160. rucio/daemons/c3po/algorithms/simple.py +131 -0
  161. rucio/daemons/c3po/algorithms/t2_free_space.py +125 -0
  162. rucio/daemons/c3po/algorithms/t2_free_space_only_pop.py +127 -0
  163. rucio/daemons/c3po/algorithms/t2_free_space_only_pop_with_network.py +279 -0
  164. rucio/daemons/c3po/c3po.py +342 -0
  165. rucio/daemons/c3po/collectors/__init__.py +14 -0
  166. rucio/daemons/c3po/collectors/agis.py +108 -0
  167. rucio/daemons/c3po/collectors/free_space.py +62 -0
  168. rucio/daemons/c3po/collectors/jedi_did.py +48 -0
  169. rucio/daemons/c3po/collectors/mock_did.py +46 -0
  170. rucio/daemons/c3po/collectors/network_metrics.py +63 -0
  171. rucio/daemons/c3po/collectors/workload.py +110 -0
  172. rucio/daemons/c3po/utils/__init__.py +14 -0
  173. rucio/daemons/c3po/utils/dataset_cache.py +40 -0
  174. rucio/daemons/c3po/utils/expiring_dataset_cache.py +45 -0
  175. rucio/daemons/c3po/utils/expiring_list.py +63 -0
  176. rucio/daemons/c3po/utils/popularity.py +82 -0
  177. rucio/daemons/c3po/utils/timeseries.py +76 -0
  178. rucio/daemons/cache/__init__.py +14 -0
  179. rucio/daemons/cache/consumer.py +191 -0
  180. rucio/daemons/common.py +391 -0
  181. rucio/daemons/conveyor/__init__.py +14 -0
  182. rucio/daemons/conveyor/common.py +530 -0
  183. rucio/daemons/conveyor/finisher.py +492 -0
  184. rucio/daemons/conveyor/poller.py +372 -0
  185. rucio/daemons/conveyor/preparer.py +198 -0
  186. rucio/daemons/conveyor/receiver.py +206 -0
  187. rucio/daemons/conveyor/stager.py +127 -0
  188. rucio/daemons/conveyor/submitter.py +379 -0
  189. rucio/daemons/conveyor/throttler.py +468 -0
  190. rucio/daemons/follower/__init__.py +14 -0
  191. rucio/daemons/follower/follower.py +97 -0
  192. rucio/daemons/hermes/__init__.py +14 -0
  193. rucio/daemons/hermes/hermes.py +738 -0
  194. rucio/daemons/judge/__init__.py +14 -0
  195. rucio/daemons/judge/cleaner.py +149 -0
  196. rucio/daemons/judge/evaluator.py +172 -0
  197. rucio/daemons/judge/injector.py +154 -0
  198. rucio/daemons/judge/repairer.py +144 -0
  199. rucio/daemons/oauthmanager/__init__.py +14 -0
  200. rucio/daemons/oauthmanager/oauthmanager.py +199 -0
  201. rucio/daemons/reaper/__init__.py +14 -0
  202. rucio/daemons/reaper/dark_reaper.py +272 -0
  203. rucio/daemons/reaper/light_reaper.py +255 -0
  204. rucio/daemons/reaper/reaper.py +701 -0
  205. rucio/daemons/replicarecoverer/__init__.py +14 -0
  206. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +487 -0
  207. rucio/daemons/storage/__init__.py +14 -0
  208. rucio/daemons/storage/consistency/__init__.py +14 -0
  209. rucio/daemons/storage/consistency/actions.py +753 -0
  210. rucio/daemons/tracer/__init__.py +14 -0
  211. rucio/daemons/tracer/kronos.py +513 -0
  212. rucio/daemons/transmogrifier/__init__.py +14 -0
  213. rucio/daemons/transmogrifier/transmogrifier.py +753 -0
  214. rucio/daemons/undertaker/__init__.py +14 -0
  215. rucio/daemons/undertaker/undertaker.py +137 -0
  216. rucio/db/__init__.py +14 -0
  217. rucio/db/sqla/__init__.py +38 -0
  218. rucio/db/sqla/constants.py +192 -0
  219. rucio/db/sqla/migrate_repo/__init__.py +14 -0
  220. rucio/db/sqla/migrate_repo/env.py +111 -0
  221. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +71 -0
  222. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +50 -0
  223. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +61 -0
  224. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +46 -0
  225. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +93 -0
  226. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +78 -0
  227. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +46 -0
  228. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +53 -0
  229. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +69 -0
  230. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +42 -0
  231. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +46 -0
  232. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +61 -0
  233. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +42 -0
  234. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +141 -0
  235. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +75 -0
  236. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +75 -0
  237. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +46 -0
  238. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +51 -0
  239. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +135 -0
  240. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +65 -0
  241. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +42 -0
  242. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +66 -0
  243. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +54 -0
  244. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +43 -0
  245. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +46 -0
  246. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +47 -0
  247. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +54 -0
  248. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +39 -0
  249. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +48 -0
  250. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +47 -0
  251. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +48 -0
  252. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +59 -0
  253. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +47 -0
  254. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +72 -0
  255. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +46 -0
  256. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +45 -0
  257. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +48 -0
  258. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +48 -0
  259. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +42 -0
  260. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +69 -0
  261. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +46 -0
  262. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +78 -0
  263. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +62 -0
  264. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +74 -0
  265. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +44 -0
  266. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +67 -0
  267. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +134 -0
  268. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +58 -0
  269. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +79 -0
  270. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +61 -0
  271. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +45 -0
  272. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +46 -0
  273. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +65 -0
  274. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +42 -0
  275. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +46 -0
  276. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +46 -0
  277. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +80 -0
  278. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +43 -0
  279. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +61 -0
  280. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +47 -0
  281. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +46 -0
  282. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +52 -0
  283. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +42 -0
  284. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +65 -0
  285. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +46 -0
  286. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +47 -0
  287. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +45 -0
  288. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +46 -0
  289. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +48 -0
  290. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +50 -0
  291. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +59 -0
  292. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +48 -0
  293. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +108 -0
  294. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +57 -0
  295. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +51 -0
  296. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +50 -0
  297. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +46 -0
  298. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +42 -0
  299. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +93 -0
  300. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +73 -0
  301. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +52 -0
  302. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +45 -0
  303. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +46 -0
  304. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +54 -0
  305. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +48 -0
  306. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +70 -0
  307. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +48 -0
  308. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +95 -0
  309. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +55 -0
  310. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +74 -0
  311. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +78 -0
  312. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +49 -0
  313. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +124 -0
  314. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +60 -0
  315. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +53 -0
  316. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +56 -0
  317. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +67 -0
  318. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +50 -0
  319. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +46 -0
  320. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +92 -0
  321. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +42 -0
  322. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +46 -0
  323. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +147 -0
  324. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +78 -0
  325. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +53 -0
  326. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +74 -0
  327. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +56 -0
  328. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +46 -0
  329. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +68 -0
  330. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +48 -0
  331. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +149 -0
  332. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +106 -0
  333. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +47 -0
  334. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +45 -0
  335. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +105 -0
  336. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +52 -0
  337. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +106 -0
  338. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +30 -0
  339. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +75 -0
  340. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +49 -0
  341. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +45 -0
  342. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +38 -0
  343. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +44 -0
  344. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +46 -0
  345. rucio/db/sqla/models.py +1834 -0
  346. rucio/db/sqla/sautils.py +48 -0
  347. rucio/db/sqla/session.py +470 -0
  348. rucio/db/sqla/types.py +207 -0
  349. rucio/db/sqla/util.py +521 -0
  350. rucio/rse/__init__.py +97 -0
  351. rucio/rse/protocols/__init__.py +14 -0
  352. rucio/rse/protocols/cache.py +123 -0
  353. rucio/rse/protocols/dummy.py +112 -0
  354. rucio/rse/protocols/gfal.py +701 -0
  355. rucio/rse/protocols/globus.py +243 -0
  356. rucio/rse/protocols/gsiftp.py +93 -0
  357. rucio/rse/protocols/http_cache.py +83 -0
  358. rucio/rse/protocols/mock.py +124 -0
  359. rucio/rse/protocols/ngarc.py +210 -0
  360. rucio/rse/protocols/posix.py +251 -0
  361. rucio/rse/protocols/protocol.py +530 -0
  362. rucio/rse/protocols/rclone.py +365 -0
  363. rucio/rse/protocols/rfio.py +137 -0
  364. rucio/rse/protocols/srm.py +339 -0
  365. rucio/rse/protocols/ssh.py +414 -0
  366. rucio/rse/protocols/storm.py +207 -0
  367. rucio/rse/protocols/webdav.py +547 -0
  368. rucio/rse/protocols/xrootd.py +295 -0
  369. rucio/rse/rsemanager.py +752 -0
  370. rucio/tests/__init__.py +14 -0
  371. rucio/tests/common.py +244 -0
  372. rucio/tests/common_server.py +132 -0
  373. rucio/transfertool/__init__.py +14 -0
  374. rucio/transfertool/fts3.py +1484 -0
  375. rucio/transfertool/globus.py +200 -0
  376. rucio/transfertool/globus_library.py +182 -0
  377. rucio/transfertool/mock.py +81 -0
  378. rucio/transfertool/transfertool.py +212 -0
  379. rucio/vcsversion.py +11 -0
  380. rucio/version.py +46 -0
  381. rucio/web/__init__.py +14 -0
  382. rucio/web/rest/__init__.py +14 -0
  383. rucio/web/rest/flaskapi/__init__.py +14 -0
  384. rucio/web/rest/flaskapi/authenticated_bp.py +28 -0
  385. rucio/web/rest/flaskapi/v1/__init__.py +14 -0
  386. rucio/web/rest/flaskapi/v1/accountlimits.py +234 -0
  387. rucio/web/rest/flaskapi/v1/accounts.py +1088 -0
  388. rucio/web/rest/flaskapi/v1/archives.py +100 -0
  389. rucio/web/rest/flaskapi/v1/auth.py +1642 -0
  390. rucio/web/rest/flaskapi/v1/common.py +385 -0
  391. rucio/web/rest/flaskapi/v1/config.py +305 -0
  392. rucio/web/rest/flaskapi/v1/credentials.py +213 -0
  393. rucio/web/rest/flaskapi/v1/dids.py +2204 -0
  394. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  395. rucio/web/rest/flaskapi/v1/export.py +77 -0
  396. rucio/web/rest/flaskapi/v1/heartbeats.py +129 -0
  397. rucio/web/rest/flaskapi/v1/identities.py +263 -0
  398. rucio/web/rest/flaskapi/v1/import.py +133 -0
  399. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +315 -0
  400. rucio/web/rest/flaskapi/v1/locks.py +360 -0
  401. rucio/web/rest/flaskapi/v1/main.py +83 -0
  402. rucio/web/rest/flaskapi/v1/meta.py +226 -0
  403. rucio/web/rest/flaskapi/v1/metrics.py +37 -0
  404. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  405. rucio/web/rest/flaskapi/v1/ping.py +89 -0
  406. rucio/web/rest/flaskapi/v1/redirect.py +366 -0
  407. rucio/web/rest/flaskapi/v1/replicas.py +1866 -0
  408. rucio/web/rest/flaskapi/v1/requests.py +841 -0
  409. rucio/web/rest/flaskapi/v1/rses.py +2204 -0
  410. rucio/web/rest/flaskapi/v1/rules.py +824 -0
  411. rucio/web/rest/flaskapi/v1/scopes.py +161 -0
  412. rucio/web/rest/flaskapi/v1/subscriptions.py +646 -0
  413. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  414. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  415. rucio/web/rest/flaskapi/v1/tmp_dids.py +115 -0
  416. rucio/web/rest/flaskapi/v1/traces.py +100 -0
  417. rucio/web/rest/flaskapi/v1/vos.py +280 -0
  418. rucio/web/rest/main.py +19 -0
  419. rucio/web/rest/metrics.py +28 -0
  420. rucio-32.8.6.data/data/rucio/etc/alembic.ini.template +71 -0
  421. rucio-32.8.6.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  422. rucio-32.8.6.data/data/rucio/etc/globus-config.yml.template +5 -0
  423. rucio-32.8.6.data/data/rucio/etc/ldap.cfg.template +30 -0
  424. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  425. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  426. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  427. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  428. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  429. rucio-32.8.6.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  430. rucio-32.8.6.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  431. rucio-32.8.6.data/data/rucio/etc/rucio.cfg.atlas.client.template +42 -0
  432. rucio-32.8.6.data/data/rucio/etc/rucio.cfg.template +257 -0
  433. rucio-32.8.6.data/data/rucio/etc/rucio_multi_vo.cfg.template +234 -0
  434. rucio-32.8.6.data/data/rucio/requirements.txt +55 -0
  435. rucio-32.8.6.data/data/rucio/tools/bootstrap.py +34 -0
  436. rucio-32.8.6.data/data/rucio/tools/merge_rucio_configs.py +147 -0
  437. rucio-32.8.6.data/data/rucio/tools/reset_database.py +40 -0
  438. rucio-32.8.6.data/scripts/rucio +2540 -0
  439. rucio-32.8.6.data/scripts/rucio-abacus-account +75 -0
  440. rucio-32.8.6.data/scripts/rucio-abacus-collection-replica +47 -0
  441. rucio-32.8.6.data/scripts/rucio-abacus-rse +79 -0
  442. rucio-32.8.6.data/scripts/rucio-admin +2434 -0
  443. rucio-32.8.6.data/scripts/rucio-atropos +61 -0
  444. rucio-32.8.6.data/scripts/rucio-auditor +199 -0
  445. rucio-32.8.6.data/scripts/rucio-automatix +51 -0
  446. rucio-32.8.6.data/scripts/rucio-bb8 +58 -0
  447. rucio-32.8.6.data/scripts/rucio-c3po +86 -0
  448. rucio-32.8.6.data/scripts/rucio-cache-client +135 -0
  449. rucio-32.8.6.data/scripts/rucio-cache-consumer +43 -0
  450. rucio-32.8.6.data/scripts/rucio-conveyor-finisher +59 -0
  451. rucio-32.8.6.data/scripts/rucio-conveyor-poller +67 -0
  452. rucio-32.8.6.data/scripts/rucio-conveyor-preparer +38 -0
  453. rucio-32.8.6.data/scripts/rucio-conveyor-receiver +44 -0
  454. rucio-32.8.6.data/scripts/rucio-conveyor-stager +77 -0
  455. rucio-32.8.6.data/scripts/rucio-conveyor-submitter +140 -0
  456. rucio-32.8.6.data/scripts/rucio-conveyor-throttler +105 -0
  457. rucio-32.8.6.data/scripts/rucio-dark-reaper +54 -0
  458. rucio-32.8.6.data/scripts/rucio-dumper +159 -0
  459. rucio-32.8.6.data/scripts/rucio-follower +45 -0
  460. rucio-32.8.6.data/scripts/rucio-hermes +55 -0
  461. rucio-32.8.6.data/scripts/rucio-judge-cleaner +90 -0
  462. rucio-32.8.6.data/scripts/rucio-judge-evaluator +138 -0
  463. rucio-32.8.6.data/scripts/rucio-judge-injector +45 -0
  464. rucio-32.8.6.data/scripts/rucio-judge-repairer +45 -0
  465. rucio-32.8.6.data/scripts/rucio-kronos +45 -0
  466. rucio-32.8.6.data/scripts/rucio-light-reaper +53 -0
  467. rucio-32.8.6.data/scripts/rucio-minos +54 -0
  468. rucio-32.8.6.data/scripts/rucio-minos-temporary-expiration +51 -0
  469. rucio-32.8.6.data/scripts/rucio-necromancer +121 -0
  470. rucio-32.8.6.data/scripts/rucio-oauth-manager +64 -0
  471. rucio-32.8.6.data/scripts/rucio-reaper +84 -0
  472. rucio-32.8.6.data/scripts/rucio-replica-recoverer +249 -0
  473. rucio-32.8.6.data/scripts/rucio-storage-consistency-actions +75 -0
  474. rucio-32.8.6.data/scripts/rucio-transmogrifier +78 -0
  475. rucio-32.8.6.data/scripts/rucio-undertaker +77 -0
  476. rucio-32.8.6.dist-info/METADATA +83 -0
  477. rucio-32.8.6.dist-info/RECORD +481 -0
  478. rucio-32.8.6.dist-info/WHEEL +5 -0
  479. rucio-32.8.6.dist-info/licenses/AUTHORS.rst +94 -0
  480. rucio-32.8.6.dist-info/licenses/LICENSE +201 -0
  481. rucio-32.8.6.dist-info/top_level.txt +1 -0
rucio/core/did.py ADDED
@@ -0,0 +1,3102 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import logging
17
+ import random
18
+ from datetime import datetime, timedelta
19
+ from enum import Enum
20
+ from hashlib import md5
21
+ from re import match
22
+ from typing import TYPE_CHECKING
23
+
24
+ from sqlalchemy import and_, or_, exists, update, delete, insert
25
+ from sqlalchemy.exc import DatabaseError, IntegrityError
26
+ from sqlalchemy.orm.exc import NoResultFound
27
+ from sqlalchemy.sql import not_, func
28
+ from sqlalchemy.sql.expression import bindparam, case, select, true, false, null
29
+
30
+ import rucio.core.replica # import add_replicas
31
+ import rucio.core.rule
32
+ from rucio.common import exception
33
+ from rucio.common.config import config_get_bool, config_get_int
34
+ from rucio.common.utils import is_archive, chunks
35
+ from rucio.core import did_meta_plugins
36
+ from rucio.core.message import add_message
37
+ from rucio.core.monitor import MetricManager
38
+ from rucio.core.naming_convention import validate_name
39
+ from rucio.db.sqla import models, filter_thread_work
40
+ from rucio.db.sqla.constants import DIDType, DIDReEvaluation, DIDAvailability, RuleState, BadFilesStatus
41
+ from rucio.db.sqla.session import read_session, transactional_session, stream_session
42
+ from rucio.db.sqla.util import temp_table_mngr
43
+
44
+ if TYPE_CHECKING:
45
+ from collections.abc import Callable, Sequence
46
+ from typing import Any, Optional, Union
47
+ from sqlalchemy.orm import Session
48
+ from sqlalchemy.schema import Table
49
+ from rucio.common.types import InternalAccount, InternalScope
50
+
51
+ LoggerFunction = Callable[..., Any]
52
+
53
+ METRICS = MetricManager(module=__name__)
54
+
55
+
56
+ @read_session
57
+ def list_expired_dids(
58
+ worker_number: int = None,
59
+ total_workers: int = None,
60
+ limit: int = None,
61
+ *,
62
+ session: "Session"
63
+ ):
64
+ """
65
+ List expired data identifiers.
66
+
67
+ :param limit: limit number.
68
+ :param session: The database session in use.
69
+ """
70
+
71
+ sub_query = exists(
72
+ ).where(
73
+ models.ReplicationRule.scope == models.DataIdentifier.scope,
74
+ models.ReplicationRule.name == models.DataIdentifier.name,
75
+ models.ReplicationRule.locked == true(),
76
+ )
77
+ list_stmt = select(
78
+ models.DataIdentifier.scope,
79
+ models.DataIdentifier.name,
80
+ models.DataIdentifier.did_type,
81
+ models.DataIdentifier.created_at,
82
+ models.DataIdentifier.purge_replicas
83
+ ).where(
84
+ models.DataIdentifier.expired_at < datetime.utcnow(),
85
+ not_(sub_query),
86
+ ).order_by(
87
+ models.DataIdentifier.expired_at
88
+ ).with_hint(
89
+ models.DataIdentifier, "index(DIDS DIDS_EXPIRED_AT_IDX)", 'oracle'
90
+ )
91
+
92
+ if session.bind.dialect.name in ['oracle', 'mysql', 'postgresql']:
93
+ list_stmt = filter_thread_work(session=session, query=list_stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
94
+ elif session.bind.dialect.name == 'sqlite' and worker_number and total_workers and total_workers > 0:
95
+ row_count = 0
96
+ dids = list()
97
+ for scope, name, did_type, created_at, purge_replicas in session.execute(list_stmt).yield_per(10):
98
+ if int(md5(name).hexdigest(), 16) % total_workers == worker_number:
99
+ dids.append({'scope': scope,
100
+ 'name': name,
101
+ 'did_type': did_type,
102
+ 'created_at': created_at,
103
+ 'purge_replicas': purge_replicas})
104
+ row_count += 1
105
+ if limit and row_count >= limit:
106
+ return dids
107
+ return dids
108
+ else:
109
+ if worker_number and total_workers:
110
+ raise exception.DatabaseException('The database type %s returned by SQLAlchemy is invalid.' % session.bind.dialect.name)
111
+
112
+ if limit:
113
+ list_stmt = list_stmt.limit(limit)
114
+
115
+ return [{'scope': scope, 'name': name, 'did_type': did_type, 'created_at': created_at,
116
+ 'purge_replicas': purge_replicas} for scope, name, did_type, created_at, purge_replicas in session.execute(list_stmt)]
117
+
118
+
119
+ @transactional_session
120
+ def add_did(
121
+ scope: "InternalScope",
122
+ name: str,
123
+ did_type: "Union[str, DIDType]",
124
+ account: "InternalAccount",
125
+ statuses: "Optional[dict[str, Any]]" = None,
126
+ meta: "Optional[dict[str, Any]]" = None,
127
+ rules: "Optional[Sequence[str]]" = None,
128
+ lifetime: "Optional[int]" = None,
129
+ dids: "Optional[Sequence[dict[str, Any]]]" = None,
130
+ rse_id: "Optional[str]" = None,
131
+ *,
132
+ session: "Session",
133
+ ):
134
+ """
135
+ Add data identifier.
136
+
137
+ :param scope: The scope name.
138
+ :param name: The data identifier name.
139
+ :param did_type: The data identifier type.
140
+ :param account: The account owner.
141
+ :param statuses: Dictionary with statuses, e.g.g {'monotonic':True}.
142
+ :meta: Meta-data associated with the data identifier is represented using key/value pairs in a dictionary.
143
+ :rules: Replication rules associated with the data identifier. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
144
+ :param lifetime: DID's lifetime (in seconds).
145
+ :param dids: The content.
146
+ :param rse_id: The RSE id when registering replicas.
147
+ :param session: The database session in use.
148
+ """
149
+ return add_dids(dids=[{'scope': scope, 'name': name, 'type': did_type,
150
+ 'statuses': statuses or {}, 'meta': meta or {},
151
+ 'rules': rules, 'lifetime': lifetime,
152
+ 'dids': dids, 'rse_id': rse_id}],
153
+ account=account, session=session)
154
+
155
+
156
+ @transactional_session
157
+ def add_dids(
158
+ dids: "Sequence[dict[str, Any]]",
159
+ account: "InternalAccount",
160
+ *,
161
+ session: "Session",
162
+ ):
163
+ """
164
+ Bulk add data identifiers.
165
+
166
+ :param dids: A list of dids.
167
+ :param account: The account owner.
168
+ :param session: The database session in use.
169
+ """
170
+ try:
171
+
172
+ for did in dids:
173
+ try:
174
+
175
+ if isinstance(did['type'], str):
176
+ did['type'] = DIDType[did['type']]
177
+
178
+ if did['type'] == DIDType.FILE:
179
+ raise exception.UnsupportedOperation('Only collection (dataset/container) can be registered.')
180
+
181
+ # Lifetime
182
+ expired_at = None
183
+ if did.get('lifetime'):
184
+ expired_at = datetime.utcnow() + timedelta(seconds=did['lifetime'])
185
+
186
+ # Insert new data identifier
187
+ new_did = models.DataIdentifier(scope=did['scope'], name=did['name'], account=did.get('account') or account,
188
+ did_type=did['type'], monotonic=did.get('statuses', {}).get('monotonic', False),
189
+ is_open=True, expired_at=expired_at)
190
+
191
+ new_did.save(session=session, flush=False)
192
+
193
+ if 'meta' in did and did['meta']:
194
+ # Add metadata
195
+ set_metadata_bulk(scope=did['scope'], name=did['name'], meta=did['meta'], recursive=False, session=session)
196
+
197
+ if did.get('dids', None):
198
+ attach_dids(scope=did['scope'], name=did['name'], dids=did['dids'],
199
+ account=account, rse_id=did.get('rse_id'), session=session)
200
+
201
+ if did.get('rules', None):
202
+ rucio.core.rule.add_rules(dids=[did, ], rules=did['rules'], session=session)
203
+
204
+ event_type = None
205
+ if did['type'] == DIDType.CONTAINER:
206
+ event_type = 'CREATE_CNT'
207
+ if did['type'] == DIDType.DATASET:
208
+ event_type = 'CREATE_DTS'
209
+ if event_type:
210
+ message = {'account': account.external,
211
+ 'scope': did['scope'].external,
212
+ 'name': did['name'],
213
+ 'expired_at': str(expired_at) if expired_at is not None else None}
214
+ if account.vo != 'def':
215
+ message['vo'] = account.vo
216
+
217
+ add_message(event_type, message, session=session)
218
+
219
+ except KeyError:
220
+ # ToDo
221
+ raise
222
+
223
+ session.flush()
224
+
225
+ except IntegrityError as error:
226
+ if match('.*IntegrityError.*ORA-00001: unique constraint.*DIDS_PK.*violated.*', error.args[0]) \
227
+ or match('.*IntegrityError.*UNIQUE constraint failed: dids.scope, dids.name.*', error.args[0]) \
228
+ or match('.*IntegrityError.*1062.*Duplicate entry.*for key.*', error.args[0]) \
229
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
230
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
231
+ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]):
232
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
233
+
234
+ if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
235
+ or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
236
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
237
+ or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
238
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
239
+ or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
240
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
241
+ raise exception.ScopeNotFound('Scope not found!')
242
+
243
+ raise exception.RucioException(error.args)
244
+ except DatabaseError as error:
245
+ if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
246
+ raise exception.ScopeNotFound('Scope not found!')
247
+ raise exception.RucioException(error.args)
248
+
249
+
250
+ @transactional_session
251
+ def attach_dids(
252
+ scope: "InternalScope",
253
+ name: str,
254
+ dids: "Sequence[dict[str, Any]]",
255
+ account: "InternalAccount",
256
+ rse_id: "Optional[str]" = None,
257
+ *,
258
+ session: "Session",
259
+ ):
260
+ """
261
+ Append data identifier.
262
+
263
+ :param scope: The scope name.
264
+ :param name: The data identifier name.
265
+ :param dids: The content.
266
+ :param account: The account owner.
267
+ :param rse_id: The RSE id for the replicas.
268
+ :param session: The database session in use.
269
+ """
270
+ return attach_dids_to_dids(attachments=[{'scope': scope, 'name': name, 'dids': dids, 'rse_id': rse_id}], account=account, session=session)
271
+
272
+
273
+ @transactional_session
274
+ def attach_dids_to_dids(
275
+ attachments: "dict[str, Any]",
276
+ account: "InternalAccount",
277
+ ignore_duplicate: bool = False,
278
+ *,
279
+ session: "Session",
280
+ ):
281
+ children_temp_table = temp_table_mngr(session).create_scope_name_table()
282
+ parent_dids = list()
283
+ first_iteration = True
284
+ for attachment in attachments:
285
+ try:
286
+ children = {(a['scope'], a['name']): a for a in attachment['dids']}
287
+ cont = []
288
+ stmt = select(
289
+ models.DataIdentifier
290
+ ).where(
291
+ models.DataIdentifier.scope == attachment['scope'],
292
+ models.DataIdentifier.name == attachment['name']
293
+ ).with_hint(
294
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
295
+ )
296
+ parent_did = session.execute(stmt).scalar_one()
297
+ update_parent = False
298
+
299
+ if not first_iteration:
300
+ session.query(children_temp_table).delete()
301
+ session.execute(insert(children_temp_table), [{'scope': s, 'name': n} for s, n in children])
302
+
303
+ if parent_did.did_type == DIDType.FILE:
304
+ # check if parent file has the archive extension
305
+ if is_archive(attachment['name']):
306
+ __add_files_to_archive(parent_did=parent_did,
307
+ files_temp_table=children_temp_table,
308
+ files=children,
309
+ account=account,
310
+ ignore_duplicate=ignore_duplicate,
311
+ session=session)
312
+ return
313
+ raise exception.UnsupportedOperation("Data identifier '%(scope)s:%(name)s' is a file" % attachment)
314
+
315
+ elif not parent_did.is_open:
316
+ raise exception.UnsupportedOperation("Data identifier '%(scope)s:%(name)s' is closed" % attachment)
317
+
318
+ elif parent_did.did_type == DIDType.DATASET:
319
+ cont = __add_files_to_dataset(parent_did=parent_did,
320
+ files_temp_table=children_temp_table,
321
+ files=children,
322
+ account=account,
323
+ ignore_duplicate=ignore_duplicate,
324
+ rse_id=attachment.get('rse_id'),
325
+ session=session)
326
+ update_parent = len(cont) > 0
327
+
328
+ elif parent_did.did_type == DIDType.CONTAINER:
329
+ __add_collections_to_container(parent_did=parent_did,
330
+ collections_temp_table=children_temp_table,
331
+ collections=children,
332
+ account=account,
333
+ session=session)
334
+ update_parent = True
335
+
336
+ if update_parent:
337
+ # cont contains the parent of the files and is only filled if the files does not exist yet
338
+ parent_dids.append({'scope': parent_did.scope,
339
+ 'name': parent_did.name,
340
+ 'rule_evaluation_action': DIDReEvaluation.ATTACH})
341
+ except NoResultFound:
342
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (attachment['scope'], attachment['name']))
343
+ first_iteration = False
344
+
345
+ # Remove all duplicated dictionnaries from the list
346
+ # (convert the list of dictionaries into a list of tuple, then to a set of tuple
347
+ # to remove duplicates, then back to a list of unique dictionaries)
348
+ parent_dids = [dict(tup) for tup in set(tuple(dictionary.items()) for dictionary in parent_dids)]
349
+ if parent_dids:
350
+ session.execute(insert(models.UpdatedDID), parent_dids)
351
+
352
+
353
+ def __add_files_to_archive(parent_did, files_temp_table, files, account, ignore_duplicate=False, *, session: "Session"):
354
+ """
355
+ Add files to archive.
356
+
357
+ :param parent_did: the DataIdentifier object of the parent did
358
+ :param files: archive content.
359
+ :param account: The account owner.
360
+ :param ignore_duplicate: If True, ignore duplicate entries.
361
+ :param session: The database session in use.
362
+ """
363
+ stmt = select(
364
+ files_temp_table.scope,
365
+ files_temp_table.name,
366
+ models.DataIdentifier.scope.label('did_scope'),
367
+ models.DataIdentifier.bytes,
368
+ models.DataIdentifier.guid,
369
+ models.DataIdentifier.events,
370
+ models.DataIdentifier.availability,
371
+ models.DataIdentifier.adler32,
372
+ models.DataIdentifier.md5,
373
+ models.DataIdentifier.is_archive,
374
+ models.DataIdentifier.constituent,
375
+ models.DataIdentifier.did_type,
376
+ ).outerjoin_from(
377
+ files_temp_table,
378
+ models.DataIdentifier,
379
+ and_(
380
+ models.DataIdentifier.scope == files_temp_table.scope,
381
+ models.DataIdentifier.name == files_temp_table.name,
382
+ ),
383
+ )
384
+ if ignore_duplicate:
385
+ stmt = stmt.add_columns(
386
+ models.ConstituentAssociation.scope.label('archive_contents_scope'),
387
+ ).outerjoin_from(
388
+ files_temp_table,
389
+ models.ConstituentAssociation,
390
+ and_(
391
+ models.ConstituentAssociation.scope == parent_did.scope,
392
+ models.ConstituentAssociation.name == parent_did.name,
393
+ models.ConstituentAssociation.child_scope == files_temp_table.scope,
394
+ models.ConstituentAssociation.child_name == files_temp_table.name,
395
+ ),
396
+ )
397
+
398
+ dids_to_add = {}
399
+ must_set_constituent = False
400
+ archive_contents_to_add = {}
401
+ for row in session.execute(stmt):
402
+ file = files[row.scope, row.name]
403
+
404
+ if ignore_duplicate and row.archive_contents_scope is not None:
405
+ continue
406
+
407
+ if (row.scope, row.name) in archive_contents_to_add:
408
+ # Ignore duplicate input
409
+ continue
410
+
411
+ if row.did_scope is None:
412
+ new_did = {}
413
+ new_did.update((k, v) for k, v in file.items() if k != 'meta')
414
+ for key in file.get('meta', {}):
415
+ new_did[key] = file['meta'][key]
416
+ new_did['constituent'] = True
417
+ new_did['did_type'] = DIDType.FILE
418
+ new_did['account'] = account
419
+ dids_to_add[row.scope, row.name] = new_did
420
+
421
+ new_content = {
422
+ 'child_scope': file['scope'],
423
+ 'child_name': file['name'],
424
+ 'scope': parent_did.scope,
425
+ 'name': parent_did.name,
426
+ 'bytes': file['bytes'],
427
+ 'adler32': file.get('adler32'),
428
+ 'md5': file.get('md5'),
429
+ 'guid': file.get('guid'),
430
+ 'length': file.get('events')
431
+ }
432
+ else:
433
+ if row.did_type != DIDType.FILE:
434
+ raise exception.UnsupportedOperation('Data identifier %s:%s of type %s cannot be added to an archive ' % (row.scope, row.name, row.did_type))
435
+
436
+ if not row.constituent:
437
+ must_set_constituent = True
438
+
439
+ new_content = {
440
+ 'child_scope': row.scope,
441
+ 'child_name': row.name,
442
+ 'scope': parent_did.scope,
443
+ 'name': parent_did.name,
444
+ 'bytes': row.bytes,
445
+ 'adler32': row.adler32,
446
+ 'md5': row.md5,
447
+ 'guid': row.guid,
448
+ 'length': row.events
449
+ }
450
+
451
+ archive_contents_to_add[row.scope, row.name] = new_content
452
+
453
+ # insert into archive_contents
454
+ try:
455
+ dids_to_add and session.execute(insert(models.DataIdentifier), list(dids_to_add.values()))
456
+ archive_contents_to_add and session.execute(insert(models.ConstituentAssociation), list(archive_contents_to_add.values()))
457
+ if must_set_constituent:
458
+ stmt = update(
459
+ models.DataIdentifier
460
+ ).where(
461
+ exists(
462
+ select(1)
463
+ ).where(
464
+ models.DataIdentifier.scope == files_temp_table.scope,
465
+ models.DataIdentifier.name == files_temp_table.name
466
+ )
467
+ ).where(
468
+ or_(models.DataIdentifier.constituent.is_(None),
469
+ models.DataIdentifier.constituent == false())
470
+ ).execution_options(
471
+ synchronize_session=False
472
+ ).values(
473
+ constituent=True
474
+ )
475
+ session.execute(stmt)
476
+ session.flush()
477
+ except IntegrityError as error:
478
+ raise exception.RucioException(error.args)
479
+
480
+ if not parent_did.is_archive:
481
+ # mark tha archive file as is_archive
482
+ parent_did.is_archive = True
483
+
484
+ # mark parent datasets as is_archive = True
485
+ stmt = update(
486
+ models.DataIdentifier
487
+ ).where(
488
+ exists(
489
+ select(1).prefix_with("/*+ INDEX(CONTENTS CONTENTS_CHILD_SCOPE_NAME_IDX) */", dialect="oracle")
490
+ ).where(
491
+ models.DataIdentifierAssociation.child_scope == parent_did.scope,
492
+ models.DataIdentifierAssociation.child_name == parent_did.name,
493
+ models.DataIdentifierAssociation.scope == models.DataIdentifier.scope,
494
+ models.DataIdentifierAssociation.name == models.DataIdentifier.name
495
+ )
496
+ ).where(
497
+ or_(models.DataIdentifier.is_archive.is_(None),
498
+ models.DataIdentifier.is_archive == false())
499
+ ).execution_options(
500
+ synchronize_session=False
501
+ ).values(
502
+ is_archive=True
503
+ )
504
+ session.execute(stmt)
505
+
506
+
507
+ @transactional_session
508
+ def __add_files_to_dataset(parent_did, files_temp_table, files, account, rse_id, ignore_duplicate=False, *, session: "Session"):
509
+ """
510
+ Add files to dataset.
511
+
512
+ :param parent_did: the DataIdentifier object of the parent did
513
+ :param files_temp_table: Temporary table containing the scope and name of files to add.
514
+ :param account: The account owner.
515
+ :param rse_id: The RSE id for the replicas.
516
+ :param ignore_duplicate: If True, ignore duplicate entries.
517
+ :param session: The database session in use.
518
+ :returns: List of files attached (excluding the ones that were already attached to the dataset).
519
+ """
520
+ # Get metadata from dataset
521
+ try:
522
+ dataset_meta = validate_name(scope=parent_did.scope, name=parent_did.name, did_type='D')
523
+ except Exception:
524
+ dataset_meta = None
525
+
526
+ if rse_id:
527
+ # Tier-0 uses this old work-around to register replicas on the RSE
528
+ # in the same call as attaching them to a dataset
529
+ rucio.core.replica.add_replicas(rse_id=rse_id, files=files.values(), dataset_meta=dataset_meta,
530
+ account=account, session=session)
531
+
532
+ stmt = select(
533
+ files_temp_table.scope,
534
+ files_temp_table.name,
535
+ models.DataIdentifier.scope.label('did_scope'),
536
+ models.DataIdentifier.bytes,
537
+ models.DataIdentifier.guid,
538
+ models.DataIdentifier.events,
539
+ models.DataIdentifier.availability,
540
+ models.DataIdentifier.adler32,
541
+ models.DataIdentifier.md5,
542
+ models.DataIdentifier.is_archive,
543
+ models.DataIdentifier.did_type,
544
+ ).outerjoin_from(
545
+ files_temp_table,
546
+ models.DataIdentifier,
547
+ and_(
548
+ models.DataIdentifier.scope == files_temp_table.scope,
549
+ models.DataIdentifier.name == files_temp_table.name,
550
+ ),
551
+ )
552
+ if ignore_duplicate:
553
+ stmt = stmt.add_columns(
554
+ models.DataIdentifierAssociation.scope.label('contents_scope'),
555
+ ).outerjoin_from(
556
+ files_temp_table,
557
+ models.DataIdentifierAssociation,
558
+ and_(
559
+ models.DataIdentifierAssociation.scope == parent_did.scope,
560
+ models.DataIdentifierAssociation.name == parent_did.name,
561
+ models.DataIdentifierAssociation.child_scope == files_temp_table.scope,
562
+ models.DataIdentifierAssociation.child_name == files_temp_table.name,
563
+ ),
564
+ )
565
+
566
+ files_to_add = {}
567
+ for row in session.execute(stmt):
568
+ file = files[row.scope, row.name]
569
+
570
+ if row.did_scope is None:
571
+ raise exception.DataIdentifierNotFound(f"Data identifier '{row.scope}:{row.name}' not found")
572
+
573
+ if row.availability == DIDAvailability.LOST:
574
+ raise exception.UnsupportedOperation('File %s:%s is LOST and cannot be attached' % (row.scope, row.name))
575
+
576
+ if row.did_type != DIDType.FILE:
577
+ raise exception.UnsupportedOperation('Data identifier %s:%s of type %s cannot be added to a dataset ' % (row.scope, row.name, row.did_type))
578
+
579
+ # Check meta-data, if provided
580
+ row_dict = row._asdict()
581
+ for key in ['bytes', 'adler32', 'md5']:
582
+ if key in file and str(file[key]) != str(row_dict[key]):
583
+ raise exception.FileConsistencyMismatch(key + " mismatch for '%(scope)s:%(name)s': " % row_dict + str(file.get(key)) + '!=' + str(row_dict[key]))
584
+
585
+ if ignore_duplicate and row.contents_scope is not None:
586
+ continue
587
+
588
+ if (row.scope, row.name) in files_to_add:
589
+ # Ignore duplicate input files
590
+ continue
591
+
592
+ if row.is_archive and not parent_did.is_archive:
593
+ parent_did.is_archive = True
594
+
595
+ files_to_add[(row.scope, row.name)] = {
596
+ 'scope': parent_did.scope,
597
+ 'name': parent_did.name,
598
+ 'child_scope': row.scope,
599
+ 'child_name': row.name,
600
+ 'bytes': row.bytes,
601
+ 'adler32': row.adler32,
602
+ 'md5': row.md5,
603
+ 'guid': row.guid,
604
+ 'events': row.events,
605
+ 'did_type': DIDType.DATASET,
606
+ 'child_type': DIDType.FILE,
607
+ 'rule_evaluation': True,
608
+ }
609
+
610
+ try:
611
+ files_to_add and session.execute(insert(models.DataIdentifierAssociation), list(files_to_add.values()))
612
+ session.flush()
613
+ return files_to_add
614
+ except IntegrityError as error:
615
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
616
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
617
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
618
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
619
+ raise exception.DataIdentifierNotFound("Data identifier not found")
620
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
621
+ or match('.*IntegrityError.*UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0]) \
622
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
623
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
624
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
625
+ or match('.*duplicate entry.*key.*PRIMARY.*', error.args[0]) \
626
+ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]):
627
+ raise exception.FileAlreadyExists(error.args)
628
+ else:
629
+ raise exception.RucioException(error.args)
630
+
631
+
632
+ @transactional_session
633
+ def __add_collections_to_container(parent_did, collections_temp_table, collections, account, *, session: "Session"):
634
+ """
635
+ Add collections (datasets or containers) to container.
636
+
637
+ :param parent_did: the DataIdentifier object of the parent did
638
+ :param collections: .
639
+ :param account: The account owner.
640
+ :param session: The database session in use.
641
+ """
642
+
643
+ if (parent_did.scope, parent_did.name) in collections:
644
+ raise exception.UnsupportedOperation('Self-append is not valid!')
645
+
646
+ stmt = select(
647
+ collections_temp_table.scope,
648
+ collections_temp_table.name,
649
+ models.DataIdentifier.scope.label('did_scope'),
650
+ models.DataIdentifier.did_type
651
+ ).outerjoin_from(
652
+ collections_temp_table,
653
+ models.DataIdentifier,
654
+ and_(
655
+ models.DataIdentifier.scope == collections_temp_table.scope,
656
+ models.DataIdentifier.name == collections_temp_table.name,
657
+ ),
658
+ )
659
+
660
+ container_parents = None
661
+ child_type = None
662
+ for row in session.execute(stmt):
663
+
664
+ if row.did_scope is None:
665
+ raise exception.DataIdentifierNotFound("Data identifier '%(scope)s:%(name)s' not found" % row)
666
+
667
+ if row.did_type == DIDType.FILE:
668
+ raise exception.UnsupportedOperation("Adding a file (%s:%s) to a container (%s:%s) is forbidden" % (row.scope, row.name, parent_did.scope, parent_did.name))
669
+
670
+ if not child_type:
671
+ child_type = row.did_type
672
+
673
+ if child_type != row.did_type:
674
+ raise exception.UnsupportedOperation("Mixed collection is not allowed: '%s:%s' is a %s(expected type: %s)" % (row.scope, row.name, row.did_type, child_type))
675
+
676
+ if child_type == DIDType.CONTAINER:
677
+ if container_parents is None:
678
+ container_parents = {(parent['scope'], parent['name']) for parent in list_all_parent_dids(scope=parent_did.scope, name=parent_did.name, session=session)}
679
+
680
+ if (row.scope, row.name) in container_parents:
681
+ raise exception.UnsupportedOperation('Circular attachment detected. %s:%s is already a parent of %s:%s' % (row.scope, row.name, parent_did.scope, parent_did.name))
682
+
683
+ messages = []
684
+ for c in collections.values():
685
+ did_asso = models.DataIdentifierAssociation(
686
+ scope=parent_did.scope,
687
+ name=parent_did.name,
688
+ child_scope=c['scope'],
689
+ child_name=c['name'],
690
+ did_type=DIDType.CONTAINER,
691
+ child_type=child_type,
692
+ rule_evaluation=True
693
+ )
694
+ did_asso.save(session=session, flush=False)
695
+ # Send AMI messages
696
+ if child_type == DIDType.CONTAINER:
697
+ chld_type = 'CONTAINER'
698
+ elif child_type == DIDType.DATASET:
699
+ chld_type = 'DATASET'
700
+ else:
701
+ chld_type = 'UNKNOWN'
702
+
703
+ message = {'account': account.external,
704
+ 'scope': parent_did.scope.external,
705
+ 'name': parent_did.name,
706
+ 'childscope': c['scope'].external,
707
+ 'childname': c['name'],
708
+ 'childtype': chld_type}
709
+ if account.vo != 'def':
710
+ message['vo'] = account.vo
711
+ messages.append(message)
712
+
713
+ try:
714
+ for message in messages:
715
+ add_message('REGISTER_CNT', message, session=session)
716
+ session.flush()
717
+ except IntegrityError as error:
718
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
719
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
720
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
721
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
722
+ raise exception.DataIdentifierNotFound("Data identifier not found")
723
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
724
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
725
+ or match('.*IntegrityError.*columns? scope.*name.*child_scope.*child_name.*not unique.*', error.args[0]) \
726
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
727
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
728
+ or match('.*IntegrityError.* UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0]):
729
+ raise exception.DuplicateContent(error.args)
730
+ raise exception.RucioException(error.args)
731
+
732
+
733
+ def __add_files_to_archive_without_temp_tables(scope, name, files, account, ignore_duplicate=False, *, session: "Session"):
734
+ """
735
+ Add files to archive.
736
+
737
+ :param scope: The scope name.
738
+ :param name: The data identifier name.
739
+ :param files: archive content.
740
+ :param account: The account owner.
741
+ :param ignore_duplicate: If True, ignore duplicate entries.
742
+ :param session: The database session in use.
743
+ """
744
+ # lookup for existing files
745
+ files_query = select(
746
+ models.DataIdentifier.scope,
747
+ models.DataIdentifier.name,
748
+ models.DataIdentifier.bytes,
749
+ models.DataIdentifier.guid,
750
+ models.DataIdentifier.events,
751
+ models.DataIdentifier.availability,
752
+ models.DataIdentifier.adler32,
753
+ models.DataIdentifier.md5,
754
+ ).where(
755
+ models.DataIdentifier.did_type == DIDType.FILE
756
+ ).with_hint(
757
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
758
+ )
759
+
760
+ file_condition = []
761
+ for file in files:
762
+ file_condition.append(and_(models.DataIdentifier.scope == file['scope'],
763
+ models.DataIdentifier.name == file['name']))
764
+
765
+ existing_content, existing_files = [], {}
766
+ if ignore_duplicate:
767
+ # lookup for existing content
768
+ content_query = select(
769
+ models.ConstituentAssociation.scope,
770
+ models.ConstituentAssociation.name,
771
+ models.ConstituentAssociation.child_scope,
772
+ models.ConstituentAssociation.child_name
773
+ ).with_hint(
774
+ models.ConstituentAssociation, "INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK)", 'oracle'
775
+ )
776
+ content_condition = []
777
+ for file in files:
778
+ content_condition.append(and_(models.ConstituentAssociation.scope == scope,
779
+ models.ConstituentAssociation.name == name,
780
+ models.ConstituentAssociation.child_scope == file['scope'],
781
+ models.ConstituentAssociation.child_name == file['name']))
782
+ for row in session.execute(content_query.where(or_(*content_condition))):
783
+ existing_content.append(row)
784
+
785
+ for row in session.execute(files_query.where(or_(*file_condition))):
786
+ existing_files['%s:%s' % (row.scope.internal, row.name)] = {'child_scope': row.scope,
787
+ 'child_name': row.name,
788
+ 'scope': scope,
789
+ 'name': name,
790
+ 'bytes': row.bytes,
791
+ 'adler32': row.adler32,
792
+ 'md5': row.md5,
793
+ 'guid': row.guid,
794
+ 'length': row.events}
795
+
796
+ contents = []
797
+ new_files, existing_files_condition = [], []
798
+ for file in files:
799
+ did_tag = '%s:%s' % (file['scope'].internal, file['name'])
800
+ if did_tag not in existing_files:
801
+ # For non existing files
802
+ # Add them to the content
803
+ contents.append({'child_scope': file['scope'],
804
+ 'child_name': file['name'],
805
+ 'scope': scope,
806
+ 'name': name,
807
+ 'bytes': file['bytes'],
808
+ 'adler32': file.get('adler32'),
809
+ 'md5': file.get('md5'),
810
+ 'guid': file.get('guid'),
811
+ 'length': file.get('events')})
812
+
813
+ file['constituent'] = True
814
+ file['did_type'] = DIDType.FILE
815
+ file['account'] = account
816
+ for key in file.get('meta', {}):
817
+ file[key] = file['meta'][key]
818
+ # Prepare new file registrations
819
+ new_files.append(file)
820
+ else:
821
+ # For existing files
822
+ # Prepare the dids updates
823
+ existing_files_condition.append(and_(models.DataIdentifier.scope == file['scope'],
824
+ models.DataIdentifier.name == file['name']))
825
+ # Check if they are not already in the content
826
+ if not existing_content or (scope, name, file['scope'], file['name']) not in existing_content:
827
+ contents.append(existing_files[did_tag])
828
+
829
+ # insert into archive_contents
830
+ try:
831
+ new_files and session.execute(insert(models.DataIdentifier), new_files)
832
+ if existing_files_condition:
833
+ for chunk in chunks(existing_files_condition, 20):
834
+ stmt = update(
835
+ models.DataIdentifier
836
+ ).prefix_with(
837
+ "/*+ INDEX(DIDS DIDS_PK) */", dialect='oracle'
838
+ ).where(
839
+ models.DataIdentifier.did_type == DIDType.FILE
840
+ ).where(
841
+ or_(models.DataIdentifier.constituent.is_(None),
842
+ models.DataIdentifier.constituent == false())
843
+ ).where(
844
+ or_(*chunk)
845
+ ).values(
846
+ constituent=True
847
+ )
848
+ session.execute(stmt)
849
+ contents and session.execute(insert(models.ConstituentAssociation), contents)
850
+ session.flush()
851
+ except IntegrityError as error:
852
+ raise exception.RucioException(error.args)
853
+
854
+ stmt = select(
855
+ models.DataIdentifier
856
+ ).where(
857
+ models.DataIdentifier.did_type == DIDType.FILE,
858
+ models.DataIdentifier.scope == scope,
859
+ models.DataIdentifier.name == name,
860
+ )
861
+ archive_did = session.execute(stmt).scalar()
862
+ if not archive_did.is_archive:
863
+ # mark tha archive file as is_archive
864
+ archive_did.is_archive = True
865
+
866
+ # mark parent datasets as is_archive = True
867
+ stmt = update(
868
+ models.DataIdentifier
869
+ ).where(
870
+ exists(
871
+ select(1).prefix_with("/*+ INDEX(CONTENTS CONTENTS_CHILD_SCOPE_NAME_IDX) */", dialect="oracle")
872
+ ).where(
873
+ models.DataIdentifierAssociation.child_scope == scope,
874
+ models.DataIdentifierAssociation.child_name == name,
875
+ models.DataIdentifierAssociation.scope == models.DataIdentifier.scope,
876
+ models.DataIdentifierAssociation.name == models.DataIdentifier.name
877
+ )
878
+ ).where(
879
+ or_(models.DataIdentifier.is_archive.is_(None),
880
+ models.DataIdentifier.is_archive == false())
881
+ ).execution_options(
882
+ synchronize_session=False
883
+ ).values(
884
+ is_archive=True
885
+ )
886
+ session.execute(stmt)
887
+
888
+
889
+ @transactional_session
890
+ def __add_files_to_dataset_without_temp_tables(scope, name, files, account, rse_id, ignore_duplicate=False, *, session: "Session"):
891
+ """
892
+ Add files to dataset.
893
+
894
+ :param scope: The scope name.
895
+ :param name: The data identifier name.
896
+ :param files: The list of files.
897
+ :param account: The account owner.
898
+ :param rse_id: The RSE id for the replicas.
899
+ :param ignore_duplicate: If True, ignore duplicate entries.
900
+ :param session: The database session in use.
901
+ :returns: List of files attached (excluding the ones that were already attached to the dataset).
902
+ """
903
+ # Get metadata from dataset
904
+ try:
905
+ dataset_meta = validate_name(scope=scope, name=name, did_type='D')
906
+ except Exception:
907
+ dataset_meta = None
908
+
909
+ if rse_id:
910
+ rucio.core.replica.add_replicas(rse_id=rse_id, files=files, dataset_meta=dataset_meta,
911
+ account=account, session=session)
912
+
913
+ files = get_files(files=files, session=session)
914
+
915
+ existing_content = []
916
+ if ignore_duplicate:
917
+ content_query = select(
918
+ models.DataIdentifierAssociation.scope,
919
+ models.DataIdentifierAssociation.name,
920
+ models.DataIdentifierAssociation.child_scope,
921
+ models.DataIdentifierAssociation.child_name
922
+ ).with_hint(
923
+ models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle'
924
+ )
925
+ content_condition = []
926
+ for file in files:
927
+ content_condition.append(and_(models.DataIdentifierAssociation.scope == scope,
928
+ models.DataIdentifierAssociation.name == name,
929
+ models.DataIdentifierAssociation.child_scope == file['scope'],
930
+ models.DataIdentifierAssociation.child_name == file['name']))
931
+ for row in session.execute(content_query.where(or_(*content_condition))):
932
+ existing_content.append(row)
933
+
934
+ contents = []
935
+ added_archives_condition = []
936
+ for file in files:
937
+ if not existing_content or (scope, name, file['scope'], file['name']) not in existing_content:
938
+ contents.append({'scope': scope, 'name': name, 'child_scope': file['scope'],
939
+ 'child_name': file['name'], 'bytes': file['bytes'],
940
+ 'adler32': file.get('adler32'),
941
+ 'guid': file['guid'], 'events': file['events'],
942
+ 'md5': file.get('md5'), 'did_type': DIDType.DATASET,
943
+ 'child_type': DIDType.FILE, 'rule_evaluation': True})
944
+ added_archives_condition.append(
945
+ and_(models.DataIdentifier.scope == file['scope'],
946
+ models.DataIdentifier.name == file['name'],
947
+ models.DataIdentifier.is_archive == true()))
948
+
949
+ # if any of the attached files is an archive, set is_archive = True on the dataset
950
+ stmt = select(
951
+ models.DataIdentifier
952
+ ).with_hint(
953
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
954
+ ).where(
955
+ or_(*added_archives_condition)
956
+ ).limit(
957
+ 1
958
+ )
959
+ if session.execute(stmt).scalar() is not None:
960
+ stmt = update(
961
+ models.DataIdentifier
962
+ ).where(
963
+ models.DataIdentifier.scope == scope,
964
+ models.DataIdentifier.name == name,
965
+ ).where(
966
+ or_(models.DataIdentifier.is_archive.is_(None),
967
+ models.DataIdentifier.is_archive == false())
968
+ ).values(
969
+ is_archive=True
970
+ )
971
+ session.execute(stmt)
972
+
973
+ try:
974
+ contents and session.execute(insert(models.DataIdentifierAssociation), contents)
975
+ session.flush()
976
+ return contents
977
+ except IntegrityError as error:
978
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
979
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
980
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
981
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
982
+ raise exception.DataIdentifierNotFound("Data identifier not found")
983
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
984
+ or match('.*IntegrityError.*UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0])\
985
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
986
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
987
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
988
+ or match('.*duplicate entry.*key.*PRIMARY.*', error.args[0]) \
989
+ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]):
990
+ raise exception.FileAlreadyExists(error.args)
991
+ else:
992
+ raise exception.RucioException(error.args)
993
+
994
+
995
+ @transactional_session
996
+ def __add_collections_to_container_without_temp_tables(scope, name, collections, account, *, session: "Session"):
997
+ """
998
+ Add collections (datasets or containers) to container.
999
+
1000
+ :param scope: The scope name.
1001
+ :param name: The container name.
1002
+ :param collections: .
1003
+ :param account: The account owner.
1004
+ :param session: The database session in use.
1005
+ """
1006
+ container_parents = None
1007
+ condition = []
1008
+ for cond in collections:
1009
+
1010
+ if (scope == cond['scope']) and (name == cond['name']):
1011
+ raise exception.UnsupportedOperation('Self-append is not valid!')
1012
+
1013
+ condition.append(and_(models.DataIdentifier.scope == cond['scope'],
1014
+ models.DataIdentifier.name == cond['name']))
1015
+
1016
+ available_dids = {}
1017
+ child_type = None
1018
+ stmt = select(
1019
+ models.DataIdentifier.scope,
1020
+ models.DataIdentifier.name,
1021
+ models.DataIdentifier.did_type
1022
+ ).with_hint(
1023
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
1024
+ ).where(
1025
+ or_(*condition)
1026
+ )
1027
+ for row in session.execute(stmt):
1028
+
1029
+ if row.did_type == DIDType.FILE:
1030
+ raise exception.UnsupportedOperation("Adding a file (%s:%s) to a container (%s:%s) is forbidden" % (row.scope, row.name, scope, name))
1031
+
1032
+ if not child_type:
1033
+ child_type = row.did_type
1034
+
1035
+ available_dids['%s:%s' % (row.scope.internal, row.name)] = row.did_type
1036
+
1037
+ if child_type != row.did_type:
1038
+ raise exception.UnsupportedOperation("Mixed collection is not allowed: '%s:%s' is a %s(expected type: %s)" % (row.scope, row.name, row.did_type, child_type))
1039
+
1040
+ if child_type == DIDType.CONTAINER:
1041
+ if container_parents is None:
1042
+ container_parents = {(parent['scope'], parent['name']) for parent in list_all_parent_dids(scope=scope, name=name, session=session)}
1043
+
1044
+ if (row.scope, row.name) in container_parents:
1045
+ raise exception.UnsupportedOperation('Circular attachment detected. %s:%s is already a parent of %s:%s', row.scope, row.name, scope, name)
1046
+
1047
+ messages = []
1048
+ for c in collections:
1049
+ did_asso = models.DataIdentifierAssociation(
1050
+ scope=scope,
1051
+ name=name,
1052
+ child_scope=c['scope'],
1053
+ child_name=c['name'],
1054
+ did_type=DIDType.CONTAINER,
1055
+ child_type=available_dids.get('%s:%s' % (c['scope'].internal, c['name'])),
1056
+ rule_evaluation=True
1057
+ )
1058
+ did_asso.save(session=session, flush=False)
1059
+ # Send AMI messages
1060
+ if child_type == DIDType.CONTAINER:
1061
+ chld_type = 'CONTAINER'
1062
+ elif child_type == DIDType.DATASET:
1063
+ chld_type = 'DATASET'
1064
+ else:
1065
+ chld_type = 'UNKNOWN'
1066
+
1067
+ message = {'account': account.external,
1068
+ 'scope': scope.external,
1069
+ 'name': name,
1070
+ 'childscope': c['scope'].external,
1071
+ 'childname': c['name'],
1072
+ 'childtype': chld_type}
1073
+ if account.vo != 'def':
1074
+ message['vo'] = account.vo
1075
+ messages.append(message)
1076
+
1077
+ try:
1078
+ for message in messages:
1079
+ add_message('REGISTER_CNT', message, session=session)
1080
+ session.flush()
1081
+ except IntegrityError as error:
1082
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
1083
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
1084
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
1085
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
1086
+ raise exception.DataIdentifierNotFound("Data identifier not found")
1087
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
1088
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
1089
+ or match('.*IntegrityError.*columns? scope.*name.*child_scope.*child_name.*not unique.*', error.args[0]) \
1090
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
1091
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
1092
+ or match('.*IntegrityError.* UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0]):
1093
+ raise exception.DuplicateContent(error.args)
1094
+ raise exception.RucioException(error.args)
1095
+
1096
+
1097
+ @transactional_session
1098
+ def delete_dids(
1099
+ dids: "Sequence[dict[str, Any]]",
1100
+ account: "InternalAccount",
1101
+ expire_rules: bool = False,
1102
+ *,
1103
+ session: "Session",
1104
+ logger: "LoggerFunction" = logging.log,
1105
+ ):
1106
+ """
1107
+ Delete data identifiers
1108
+
1109
+ :param dids: The list of dids to delete.
1110
+ :param account: The account.
1111
+ :param expire_rules: Expire large rules instead of deleting them right away. This should only be used in Undertaker mode, as it can be that
1112
+ the method returns normally, but a did was not deleted; This trusts in the fact that the undertaker will retry an
1113
+ expired did.
1114
+ :param session: The database session in use.
1115
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
1116
+ """
1117
+ if not dids:
1118
+ return
1119
+
1120
+ not_purge_replicas = []
1121
+
1122
+ archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
1123
+ archive_content = config_get_bool('deletion', 'archive_content', default=False, session=session)
1124
+
1125
+ file_dids = {}
1126
+ collection_dids = {}
1127
+ all_dids = {}
1128
+ for did in dids:
1129
+ scope, name = did['scope'], did['name']
1130
+ logger(logging.INFO, 'Removing did %(scope)s:%(name)s (%(did_type)s)' % did)
1131
+ all_dids[scope, name] = {'scope': scope, 'name': name}
1132
+ if did['did_type'] == DIDType.FILE:
1133
+ file_dids[scope, name] = {'scope': scope, 'name': name}
1134
+ else:
1135
+ collection_dids[scope, name] = {'scope': scope, 'name': name}
1136
+
1137
+ # ATLAS LOCALGROUPDISK Archive policy
1138
+ if did['did_type'] == DIDType.DATASET and did['scope'].external != 'archive':
1139
+ try:
1140
+ rucio.core.rule.archive_localgroupdisk_datasets(scope=did['scope'], name=did['name'], session=session)
1141
+ except exception.UndefinedPolicy:
1142
+ pass
1143
+
1144
+ if did['purge_replicas'] is False:
1145
+ not_purge_replicas.append((did['scope'], did['name']))
1146
+
1147
+ if archive_content:
1148
+ insert_content_history(filter_=[and_(models.DataIdentifierAssociation.scope == did['scope'],
1149
+ models.DataIdentifierAssociation.name == did['name'])],
1150
+ did_created_at=did.get('created_at'),
1151
+ session=session)
1152
+
1153
+ # Send message
1154
+ message = {'account': account.external,
1155
+ 'scope': did['scope'].external,
1156
+ 'name': did['name']}
1157
+ if did['scope'].vo != 'def':
1158
+ message['vo'] = did['scope'].vo
1159
+
1160
+ add_message('ERASE', message, session=session)
1161
+
1162
+ if not file_dids:
1163
+ data_in_temp_table = all_dids = collection_dids
1164
+ elif not collection_dids:
1165
+ data_in_temp_table = all_dids = file_dids
1166
+ else:
1167
+ data_in_temp_table = all_dids
1168
+
1169
+ if not all_dids:
1170
+ return
1171
+
1172
+ temp_table = temp_table_mngr(session).create_scope_name_table()
1173
+ session.execute(insert(temp_table), list(data_in_temp_table.values()))
1174
+
1175
+ # Delete rules on did
1176
+ skip_deletion = False # Skip deletion in case of expiration of a rule
1177
+ with METRICS.timer('delete_dids.rules'):
1178
+ stmt = select(
1179
+ models.ReplicationRule.id,
1180
+ models.ReplicationRule.scope,
1181
+ models.ReplicationRule.name,
1182
+ models.ReplicationRule.rse_expression,
1183
+ models.ReplicationRule.locks_ok_cnt,
1184
+ models.ReplicationRule.locks_replicating_cnt,
1185
+ models.ReplicationRule.locks_stuck_cnt
1186
+ ).join_from(
1187
+ temp_table,
1188
+ models.ReplicationRule,
1189
+ and_(models.ReplicationRule.scope == temp_table.scope,
1190
+ models.ReplicationRule.name == temp_table.name)
1191
+ )
1192
+ for (rule_id, scope, name, rse_expression, locks_ok_cnt, locks_replicating_cnt, locks_stuck_cnt) in session.execute(stmt):
1193
+ logger(logging.DEBUG, 'Removing rule %s for did %s:%s on RSE-Expression %s' % (str(rule_id), scope, name, rse_expression))
1194
+
1195
+ # Propagate purge_replicas from did to rules
1196
+ if (scope, name) in not_purge_replicas:
1197
+ purge_replicas = False
1198
+ else:
1199
+ purge_replicas = True
1200
+ if expire_rules and locks_ok_cnt + locks_replicating_cnt + locks_stuck_cnt > int(config_get_int('undertaker', 'expire_rules_locks_size', default=10000, session=session)):
1201
+ # Expire the rule (soft=True)
1202
+ rucio.core.rule.delete_rule(rule_id=rule_id, purge_replicas=purge_replicas, soft=True, delete_parent=True, nowait=True, session=session)
1203
+ # Update expiration of did
1204
+ set_metadata(scope=scope, name=name, key='lifetime', value=3600 * 24, session=session)
1205
+ skip_deletion = True
1206
+ else:
1207
+ rucio.core.rule.delete_rule(rule_id=rule_id, purge_replicas=purge_replicas, delete_parent=True, nowait=True, session=session)
1208
+
1209
+ if skip_deletion:
1210
+ return
1211
+
1212
+ # Detach from parent dids:
1213
+ existing_parent_dids = False
1214
+ with METRICS.timer('delete_dids.parent_content'):
1215
+ stmt = select(
1216
+ models.DataIdentifierAssociation
1217
+ ).join_from(
1218
+ temp_table,
1219
+ models.DataIdentifierAssociation,
1220
+ and_(models.DataIdentifierAssociation.child_scope == temp_table.scope,
1221
+ models.DataIdentifierAssociation.child_name == temp_table.name)
1222
+ )
1223
+ for parent_did in session.execute(stmt).scalars():
1224
+ existing_parent_dids = True
1225
+ detach_dids(scope=parent_did.scope, name=parent_did.name, dids=[{'scope': parent_did.child_scope, 'name': parent_did.child_name}], session=session)
1226
+
1227
+ # Remove generic did metadata
1228
+ must_delete_did_meta = True
1229
+ if session.bind.dialect.name == 'oracle':
1230
+ oracle_version = int(session.connection().connection.version.split('.')[0])
1231
+ if oracle_version < 12:
1232
+ must_delete_did_meta = False
1233
+ if must_delete_did_meta:
1234
+ stmt = delete(
1235
+ models.DidMeta
1236
+ ).where(
1237
+ exists(
1238
+ select(1)
1239
+ ).where(
1240
+ models.DidMeta.scope == temp_table.scope,
1241
+ models.DidMeta.name == temp_table.name
1242
+ )
1243
+ ).execution_options(
1244
+ synchronize_session=False
1245
+ )
1246
+ with METRICS.timer('delete_dids.did_meta'):
1247
+ session.execute(stmt)
1248
+
1249
+ # Prepare the common part of the query for updating bad replicas if they exist
1250
+ bad_replica_stmt = update(
1251
+ models.BadReplicas
1252
+ ).where(
1253
+ models.BadReplicas.state == BadFilesStatus.BAD
1254
+ ).values(
1255
+ state=BadFilesStatus.DELETED,
1256
+ updated_at=datetime.utcnow(),
1257
+ ).execution_options(
1258
+ synchronize_session=False
1259
+ )
1260
+
1261
+ if file_dids:
1262
+ if data_in_temp_table is not file_dids:
1263
+ session.execute(delete(temp_table))
1264
+ session.execute(insert(temp_table), list(file_dids.values()))
1265
+ data_in_temp_table = file_dids
1266
+
1267
+ # update bad files passed directly as input
1268
+ stmt = bad_replica_stmt.where(
1269
+ exists(
1270
+ select(1)
1271
+ ).where(
1272
+ models.BadReplicas.scope == temp_table.scope,
1273
+ models.BadReplicas.name == temp_table.name
1274
+ )
1275
+ )
1276
+ session.execute(stmt)
1277
+
1278
+ if collection_dids:
1279
+ if data_in_temp_table is not collection_dids:
1280
+ session.execute(delete(temp_table))
1281
+ session.execute(insert(temp_table), list(collection_dids.values()))
1282
+ data_in_temp_table = collection_dids
1283
+
1284
+ # Find files of datasets passed as input and put them in a separate temp table
1285
+ resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
1286
+ stmt = insert(
1287
+ resolved_files_temp_table,
1288
+ ).from_select(
1289
+ ['scope', 'name'],
1290
+ select(
1291
+ models.DataIdentifierAssociation.child_scope,
1292
+ models.DataIdentifierAssociation.child_name,
1293
+ ).join_from(
1294
+ temp_table,
1295
+ models.DataIdentifierAssociation,
1296
+ and_(models.DataIdentifierAssociation.scope == temp_table.scope,
1297
+ models.DataIdentifierAssociation.name == temp_table.name)
1298
+ ).where(
1299
+ models.DataIdentifierAssociation.child_type == DIDType.FILE
1300
+ ).distinct(
1301
+ )
1302
+ )
1303
+ session.execute(stmt)
1304
+
1305
+ # update bad files from datasets
1306
+ stmt = bad_replica_stmt.where(
1307
+ exists(
1308
+ select(1)
1309
+ ).where(
1310
+ models.BadReplicas.scope == resolved_files_temp_table.scope,
1311
+ models.BadReplicas.name == resolved_files_temp_table.name
1312
+ )
1313
+ )
1314
+ session.execute(stmt)
1315
+
1316
+ # Set Epoch tombstone for the files replicas inside the did
1317
+ if config_get_bool('undertaker', 'purge_all_replicas', default=False, session=session):
1318
+ with METRICS.timer('delete_dids.file_content'):
1319
+ stmt = update(
1320
+ models.RSEFileAssociation
1321
+ ).where(
1322
+ exists(
1323
+ select(1)
1324
+ ).where(
1325
+ models.RSEFileAssociation.scope == resolved_files_temp_table.scope,
1326
+ models.RSEFileAssociation.name == resolved_files_temp_table.name
1327
+ )
1328
+ ).where(
1329
+ models.RSEFileAssociation.lock_cnt == 0,
1330
+ models.RSEFileAssociation.tombstone != null()
1331
+ ).execution_options(
1332
+ synchronize_session=False
1333
+ ).values(
1334
+ tombstone=datetime(1970, 1, 1)
1335
+ )
1336
+ session.execute(stmt)
1337
+
1338
+ # Remove content
1339
+ with METRICS.timer('delete_dids.content'):
1340
+ stmt = delete(
1341
+ models.DataIdentifierAssociation
1342
+ ).where(
1343
+ exists(
1344
+ select(1)
1345
+ ).where(
1346
+ models.DataIdentifierAssociation.scope == temp_table.scope,
1347
+ models.DataIdentifierAssociation.name == temp_table.name
1348
+ )
1349
+ ).execution_options(
1350
+ synchronize_session=False
1351
+ )
1352
+ rowcount = session.execute(stmt).rowcount
1353
+ METRICS.counter(name='delete_dids.content_rowcount').inc(rowcount)
1354
+
1355
+ # Remove CollectionReplica
1356
+ with METRICS.timer('delete_dids.collection_replicas'):
1357
+ stmt = delete(
1358
+ models.CollectionReplica
1359
+ ).where(
1360
+ exists(
1361
+ select(1)
1362
+ ).where(
1363
+ models.CollectionReplica.scope == temp_table.scope,
1364
+ models.CollectionReplica.name == temp_table.name
1365
+ )
1366
+ ).execution_options(
1367
+ synchronize_session=False
1368
+ )
1369
+ session.execute(stmt)
1370
+
1371
+ # remove data identifier
1372
+ if existing_parent_dids:
1373
+ # Exit method early to give Judge time to remove locks (Otherwise, due to foreign keys, did removal does not work
1374
+ logger(logging.DEBUG, 'Leaving delete_dids early for Judge-Evaluator checks')
1375
+ return
1376
+
1377
+ if collection_dids:
1378
+ if data_in_temp_table is not collection_dids:
1379
+ session.execute(delete(temp_table))
1380
+ session.execute(insert(temp_table), list(collection_dids.values()))
1381
+ data_in_temp_table = collection_dids
1382
+
1383
+ with METRICS.timer('delete_dids.dids_followed'):
1384
+ stmt = delete(
1385
+ models.DidsFollowed
1386
+ ).where(
1387
+ exists(
1388
+ select(1)
1389
+ ).where(
1390
+ models.DidsFollowed.scope == temp_table.scope,
1391
+ models.DidsFollowed.name == temp_table.name
1392
+ )
1393
+ ).execution_options(
1394
+ synchronize_session=False
1395
+ )
1396
+ session.execute(stmt)
1397
+
1398
+ with METRICS.timer('delete_dids.dids'):
1399
+ dids_to_delete_filter = exists(
1400
+ select(1)
1401
+ ).where(
1402
+ models.DataIdentifier.scope == temp_table.scope,
1403
+ models.DataIdentifier.name == temp_table.name,
1404
+ models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET])
1405
+ )
1406
+
1407
+ if archive_dids:
1408
+ insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
1409
+
1410
+ stmt = delete(
1411
+ models.DataIdentifier
1412
+ ).where(
1413
+ dids_to_delete_filter,
1414
+ ).execution_options(
1415
+ synchronize_session=False
1416
+ )
1417
+ session.execute(stmt)
1418
+
1419
+ if file_dids:
1420
+ if data_in_temp_table is not file_dids:
1421
+ session.execute(delete(temp_table))
1422
+ session.execute(insert(temp_table), list(file_dids.values()))
1423
+ data_in_temp_table = file_dids
1424
+ stmt = update(
1425
+ models.DataIdentifier
1426
+ ).where(
1427
+ exists(
1428
+ select(1)
1429
+ ).where(
1430
+ models.DataIdentifier.scope == temp_table.scope,
1431
+ models.DataIdentifier.name == temp_table.name
1432
+ )
1433
+ ).where(
1434
+ models.DataIdentifier.did_type == DIDType.FILE
1435
+ ).execution_options(
1436
+ synchronize_session=False
1437
+ ).values(
1438
+ expired_at=None
1439
+ )
1440
+ session.execute(stmt)
1441
+
1442
+
1443
+ @transactional_session
1444
+ def detach_dids(scope, name, dids, *, session: "Session"):
1445
+ """
1446
+ Detach data identifier
1447
+
1448
+ :param scope: The scope name.
1449
+ :param name: The data identifier name.
1450
+ :param dids: The content.
1451
+ :param session: The database session in use.
1452
+ """
1453
+ # Row Lock the parent did
1454
+ stmt = select(
1455
+ models.DataIdentifier
1456
+ ).where(
1457
+ models.DataIdentifier.scope == scope,
1458
+ models.DataIdentifier.name == name,
1459
+ ).where(
1460
+ or_(models.DataIdentifier.did_type == DIDType.CONTAINER,
1461
+ models.DataIdentifier.did_type == DIDType.DATASET)
1462
+ )
1463
+ try:
1464
+ did = session.execute(stmt).scalar_one()
1465
+ # Mark for rule re-evaluation
1466
+ models.UpdatedDID(
1467
+ scope=scope,
1468
+ name=name,
1469
+ rule_evaluation_action=DIDReEvaluation.DETACH
1470
+ ).save(session=session, flush=False)
1471
+ except NoResultFound:
1472
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1473
+
1474
+ # TODO: should judge target did's status: open, monotonic, close.
1475
+ stmt = select(
1476
+ models.DataIdentifierAssociation
1477
+ ).filter_by(
1478
+ scope=scope,
1479
+ name=name,
1480
+ ).limit(
1481
+ 1
1482
+ )
1483
+ if session.execute(stmt).scalar() is None:
1484
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' has no child data identifiers.")
1485
+ for source in dids:
1486
+ if (scope == source['scope']) and (name == source['name']):
1487
+ raise exception.UnsupportedOperation('Self-detach is not valid.')
1488
+ child_scope = source['scope']
1489
+ child_name = source['name']
1490
+ associ_did = session.execute(
1491
+ stmt.filter_by(
1492
+ child_scope=child_scope,
1493
+ child_name=child_name
1494
+ ).limit(
1495
+ 1
1496
+ )
1497
+ ).scalar()
1498
+ if associ_did is None:
1499
+ raise exception.DataIdentifierNotFound(f"Data identifier '{child_scope}:{child_name}' not found under '{scope}:{name}'")
1500
+
1501
+ child_type = associ_did.child_type
1502
+ child_size = associ_did.bytes
1503
+ child_events = associ_did.events
1504
+ if did.length:
1505
+ did.length -= 1
1506
+ if did.bytes and child_size:
1507
+ did.bytes -= child_size
1508
+ if did.events and child_events:
1509
+ did.events -= child_events
1510
+ associ_did.delete(session=session)
1511
+
1512
+ # Archive contents
1513
+ # If reattach happens, merge the latest due to primary key constraint
1514
+ new_detach = models.DataIdentifierAssociationHistory(scope=associ_did.scope,
1515
+ name=associ_did.name,
1516
+ child_scope=associ_did.child_scope,
1517
+ child_name=associ_did.child_name,
1518
+ did_type=associ_did.did_type,
1519
+ child_type=associ_did.child_type,
1520
+ bytes=associ_did.bytes,
1521
+ adler32=associ_did.adler32,
1522
+ md5=associ_did.md5,
1523
+ guid=associ_did.guid,
1524
+ events=associ_did.events,
1525
+ rule_evaluation=associ_did.rule_evaluation,
1526
+ did_created_at=did.created_at,
1527
+ created_at=associ_did.created_at,
1528
+ updated_at=associ_did.updated_at,
1529
+ deleted_at=datetime.utcnow())
1530
+ new_detach.save(session=session, flush=False)
1531
+
1532
+ # Send message for AMI. To be removed in the future when they use the DETACH messages
1533
+ if did.did_type == DIDType.CONTAINER:
1534
+ if child_type == DIDType.CONTAINER:
1535
+ chld_type = 'CONTAINER'
1536
+ elif child_type == DIDType.DATASET:
1537
+ chld_type = 'DATASET'
1538
+ else:
1539
+ chld_type = 'UNKNOWN'
1540
+
1541
+ message = {'scope': scope.external,
1542
+ 'name': name,
1543
+ 'childscope': source['scope'].external,
1544
+ 'childname': source['name'],
1545
+ 'childtype': chld_type}
1546
+ if scope.vo != 'def':
1547
+ message['vo'] = scope.vo
1548
+
1549
+ add_message('ERASE_CNT', message, session=session)
1550
+
1551
+ message = {'scope': scope.external,
1552
+ 'name': name,
1553
+ 'did_type': str(did.did_type),
1554
+ 'child_scope': source['scope'].external,
1555
+ 'child_name': str(source['name']),
1556
+ 'child_type': str(child_type)}
1557
+ if scope.vo != 'def':
1558
+ message['vo'] = scope.vo
1559
+
1560
+ add_message('DETACH', message, session=session)
1561
+
1562
+
1563
+ @stream_session
1564
+ def list_new_dids(did_type, thread=None, total_threads=None, chunk_size=1000, *, session: "Session"):
1565
+ """
1566
+ List recent identifiers.
1567
+
1568
+ :param did_type : The DID type.
1569
+ :param thread: The assigned thread for this necromancer.
1570
+ :param total_threads: The total number of threads of all necromancers.
1571
+ :param chunk_size: Number of requests to return per yield.
1572
+ :param session: The database session in use.
1573
+ """
1574
+
1575
+ sub_query = select(
1576
+ 1
1577
+ ).prefix_with(
1578
+ "/*+ INDEX(RULES RULES_SCOPE_NAME_IDX) */", dialect='oracle'
1579
+ ).where(
1580
+ models.DataIdentifier.scope == models.ReplicationRule.scope,
1581
+ models.DataIdentifier.name == models.ReplicationRule.name,
1582
+ models.ReplicationRule.state == RuleState.INJECT
1583
+ )
1584
+
1585
+ select_stmt = select(
1586
+ models.DataIdentifier
1587
+ ).with_hint(
1588
+ models.DataIdentifier, "index(dids DIDS_IS_NEW_IDX)", 'oracle'
1589
+ ).filter_by(
1590
+ is_new=True
1591
+ ).where(
1592
+ ~exists(sub_query)
1593
+ )
1594
+
1595
+ if did_type:
1596
+ if isinstance(did_type, str):
1597
+ select_stmt = select_stmt.filter_by(did_type=DIDType[did_type])
1598
+ elif isinstance(did_type, Enum):
1599
+ select_stmt = select_stmt.filter_by(did_type=did_type)
1600
+
1601
+ select_stmt = filter_thread_work(session=session, query=select_stmt, total_threads=total_threads, thread_id=thread, hash_variable='name')
1602
+
1603
+ row_count = 0
1604
+ for chunk in session.execute(select_stmt).yield_per(10).scalars():
1605
+ row_count += 1
1606
+ if row_count <= chunk_size:
1607
+ yield {'scope': chunk.scope, 'name': chunk.name, 'did_type': chunk.did_type} # TODO Change this to the proper filebytes [RUCIO-199]
1608
+ else:
1609
+ break
1610
+
1611
+
1612
+ @transactional_session
1613
+ def set_new_dids(dids, new_flag, *, session: "Session"):
1614
+ """
1615
+ Set/reset the flag new
1616
+
1617
+ :param dids: A list of dids
1618
+ :param new_flag: A boolean to flag new DIDs.
1619
+ :param session: The database session in use.
1620
+ """
1621
+ if session.bind.dialect.name == 'postgresql':
1622
+ new_flag = bool(new_flag)
1623
+ for did in dids:
1624
+ try:
1625
+ stmt = update(
1626
+ models.DataIdentifier
1627
+ ).filter_by(
1628
+ scope=did['scope'],
1629
+ name=did['name']
1630
+ ).values(
1631
+ is_new=new_flag
1632
+ ).execution_options(
1633
+ synchronize_session=False
1634
+ )
1635
+ rowcount = session.execute(stmt).rowcount
1636
+ if not rowcount:
1637
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (did['scope'], did['name']))
1638
+ except DatabaseError as error:
1639
+ raise exception.DatabaseException('%s : Cannot update %s:%s' % (error.args[0], did['scope'], did['name']))
1640
+ try:
1641
+ session.flush()
1642
+ except IntegrityError as error:
1643
+ raise exception.RucioException(error.args[0])
1644
+ except DatabaseError as error:
1645
+ raise exception.RucioException(error.args[0])
1646
+ return True
1647
+
1648
+
1649
+ @stream_session
1650
+ def list_content(scope, name, *, session: "Session"):
1651
+ """
1652
+ List data identifier contents.
1653
+
1654
+ :param scope: The scope name.
1655
+ :param name: The data identifier name.
1656
+ :param session: The database session in use.
1657
+ """
1658
+ stmt = select(
1659
+ models.DataIdentifierAssociation
1660
+ ).with_hint(
1661
+ models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle'
1662
+ ).filter_by(
1663
+ scope=scope,
1664
+ name=name
1665
+ )
1666
+ children_found = False
1667
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
1668
+ children_found = True
1669
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name, 'type': tmp_did.child_type,
1670
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5}
1671
+ if not children_found:
1672
+ # Raise exception if the did doesn't exist
1673
+ __get_did(scope=scope, name=name, session=session)
1674
+
1675
+
1676
+ @stream_session
1677
+ def list_content_history(scope, name, *, session: "Session"):
1678
+ """
1679
+ List data identifier contents history.
1680
+
1681
+ :param scope: The scope name.
1682
+ :param name: The data identifier name.
1683
+ :param session: The database session in use.
1684
+ """
1685
+ try:
1686
+ stmt = select(
1687
+ models.DataIdentifierAssociationHistory
1688
+ ).filter_by(
1689
+ scope=scope,
1690
+ name=name
1691
+ )
1692
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
1693
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name,
1694
+ 'type': tmp_did.child_type,
1695
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5,
1696
+ 'deleted_at': tmp_did.deleted_at, 'created_at': tmp_did.created_at,
1697
+ 'updated_at': tmp_did.updated_at}
1698
+ except NoResultFound:
1699
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1700
+
1701
+
1702
+ @stream_session
1703
+ def list_parent_dids(scope, name, *, session: "Session"):
1704
+ """
1705
+ List parent datasets and containers of a did.
1706
+
1707
+ :param scope: The scope.
1708
+ :param name: The name.
1709
+ :param session: The database session.
1710
+ :returns: List of dids.
1711
+ :rtype: Generator.
1712
+ """
1713
+
1714
+ stmt = select(
1715
+ models.DataIdentifierAssociation.scope,
1716
+ models.DataIdentifierAssociation.name,
1717
+ models.DataIdentifierAssociation.did_type
1718
+ ).filter_by(
1719
+ child_scope=scope,
1720
+ child_name=name
1721
+ )
1722
+ for did in session.execute(stmt).yield_per(5):
1723
+ yield {'scope': did.scope, 'name': did.name, 'type': did.did_type}
1724
+
1725
+
1726
+ @stream_session
1727
+ def list_all_parent_dids(scope, name, *, session: "Session"):
1728
+ """
1729
+ List all parent datasets and containers of a did, no matter on what level.
1730
+
1731
+ :param scope: The scope.
1732
+ :param name: The name.
1733
+ :param session: The database session.
1734
+ :returns: List of dids.
1735
+ :rtype: Generator.
1736
+ """
1737
+
1738
+ stmt = select(
1739
+ models.DataIdentifierAssociation.scope,
1740
+ models.DataIdentifierAssociation.name,
1741
+ models.DataIdentifierAssociation.did_type
1742
+ ).filter_by(
1743
+ child_scope=scope,
1744
+ child_name=name
1745
+ )
1746
+ for did in session.execute(stmt).yield_per(5):
1747
+ yield {'scope': did.scope, 'name': did.name, 'type': did.did_type}
1748
+ # Note that only Python3 supports recursive yield, that's the reason to do the nested for.
1749
+ for pdid in list_all_parent_dids(scope=did.scope, name=did.name, session=session):
1750
+ yield {'scope': pdid['scope'], 'name': pdid['name'], 'type': pdid['type']}
1751
+
1752
+
1753
+ def list_child_dids_stmt(
1754
+ input_dids_table: "Table",
1755
+ did_type: DIDType,
1756
+ ):
1757
+ """
1758
+ Build and returns a query which recursively lists children dids of type `did_type`
1759
+ for the dids given as input in a scope/name (temporary) table.
1760
+
1761
+ did_type defines the desired type of DIDs in the result. If set to DIDType.Dataset,
1762
+ will only resolve containers and return datasets. If set to DIDType.File, will
1763
+ also resolve the datasets and return files.
1764
+ """
1765
+ if did_type == DIDType.DATASET:
1766
+ dids_to_resolve = [DIDType.CONTAINER]
1767
+ else:
1768
+ dids_to_resolve = [DIDType.CONTAINER, DIDType.DATASET]
1769
+
1770
+ # Uses a recursive SQL CTE (Common Table Expressions)
1771
+ initial_set = select(
1772
+ models.DataIdentifierAssociation.child_scope,
1773
+ models.DataIdentifierAssociation.child_name,
1774
+ models.DataIdentifierAssociation.child_type,
1775
+ ).join_from(
1776
+ input_dids_table,
1777
+ models.DataIdentifierAssociation,
1778
+ and_(
1779
+ models.DataIdentifierAssociation.scope == input_dids_table.scope,
1780
+ models.DataIdentifierAssociation.name == input_dids_table.name,
1781
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve),
1782
+ ),
1783
+ ).cte(
1784
+ recursive=True,
1785
+ )
1786
+
1787
+ # Oracle doesn't support union() in recursive CTEs, so use UNION ALL
1788
+ # and a "distinct" filter later
1789
+ child_datasets_cte = initial_set.union_all(
1790
+ select(
1791
+ models.DataIdentifierAssociation.child_scope,
1792
+ models.DataIdentifierAssociation.child_name,
1793
+ models.DataIdentifierAssociation.child_type,
1794
+ ).where(
1795
+ models.DataIdentifierAssociation.scope == initial_set.c.child_scope,
1796
+ models.DataIdentifierAssociation.name == initial_set.c.child_name,
1797
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve),
1798
+ )
1799
+ )
1800
+
1801
+ stmt = select(
1802
+ child_datasets_cte.c.child_scope.label('scope'),
1803
+ child_datasets_cte.c.child_name.label('name'),
1804
+ ).distinct(
1805
+ ).where(
1806
+ child_datasets_cte.c.child_type == did_type,
1807
+ )
1808
+ return stmt
1809
+
1810
+
1811
+ def list_one_did_childs_stmt(
1812
+ scope: "InternalScope",
1813
+ name: str,
1814
+ did_type: DIDType,
1815
+ ):
1816
+ """
1817
+ Returns the sqlalchemy query for recursively fetching the child dids of type
1818
+ 'did_type' for the input did.
1819
+
1820
+ did_type defines the desired type of DIDs in the result. If set to DIDType.Dataset,
1821
+ will only resolve containers and return datasets. If set to DIDType.File, will
1822
+ also resolve the datasets and return files.
1823
+ """
1824
+ if did_type == DIDType.DATASET:
1825
+ dids_to_resolve = [DIDType.CONTAINER]
1826
+ else:
1827
+ dids_to_resolve = [DIDType.CONTAINER, DIDType.DATASET]
1828
+
1829
+ # Uses a recursive SQL CTE (Common Table Expressions)
1830
+ initial_set = select(
1831
+ models.DataIdentifierAssociation.child_scope,
1832
+ models.DataIdentifierAssociation.child_name,
1833
+ models.DataIdentifierAssociation.child_type,
1834
+ ).where(
1835
+ models.DataIdentifierAssociation.scope == scope,
1836
+ models.DataIdentifierAssociation.name == name,
1837
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve),
1838
+ ).cte(
1839
+ recursive=True,
1840
+ )
1841
+
1842
+ # Oracle doesn't support union() in recursive CTEs, so use UNION ALL
1843
+ # and a "distinct" filter later
1844
+ child_datasets_cte = initial_set.union_all(
1845
+ select(
1846
+ models.DataIdentifierAssociation.child_scope,
1847
+ models.DataIdentifierAssociation.child_name,
1848
+ models.DataIdentifierAssociation.child_type,
1849
+ ).where(
1850
+ models.DataIdentifierAssociation.scope == initial_set.c.child_scope,
1851
+ models.DataIdentifierAssociation.name == initial_set.c.child_name,
1852
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve),
1853
+ )
1854
+ )
1855
+
1856
+ stmt = select(
1857
+ child_datasets_cte.c.child_scope.label('scope'),
1858
+ child_datasets_cte.c.child_name.label('name'),
1859
+ ).distinct(
1860
+ ).where(
1861
+ child_datasets_cte.c.child_type == did_type,
1862
+ )
1863
+ return stmt
1864
+
1865
+
1866
+ @transactional_session
1867
+ def list_child_datasets(
1868
+ scope: "InternalScope",
1869
+ name: str,
1870
+ *,
1871
+ session: "Session"
1872
+ ):
1873
+ """
1874
+ List all child datasets of a container.
1875
+
1876
+ :param scope: The scope.
1877
+ :param name: The name.
1878
+ :param session: The database session
1879
+ :returns: List of dids
1880
+ :rtype: Generator
1881
+ """
1882
+ stmt = list_one_did_childs_stmt(scope, name, did_type=DIDType.DATASET)
1883
+ result = []
1884
+ for row in session.execute(stmt):
1885
+ result.append({'scope': row.scope, 'name': row.name})
1886
+
1887
+ return result
1888
+
1889
+
1890
+ @stream_session
1891
+ def list_files(scope, name, long=False, *, session: "Session"):
1892
+ """
1893
+ List data identifier file contents.
1894
+
1895
+ :param scope: The scope name.
1896
+ :param name: The data identifier name.
1897
+ :param long: A boolean to choose if more metadata are returned or not.
1898
+ :param session: The database session in use.
1899
+ """
1900
+ try:
1901
+ stmt = select(
1902
+ models.DataIdentifier.scope,
1903
+ models.DataIdentifier.name,
1904
+ models.DataIdentifier.bytes,
1905
+ models.DataIdentifier.adler32,
1906
+ models.DataIdentifier.guid,
1907
+ models.DataIdentifier.events,
1908
+ models.DataIdentifier.lumiblocknr,
1909
+ models.DataIdentifier.did_type
1910
+ ).filter_by(
1911
+ scope=scope,
1912
+ name=name
1913
+ ).with_hint(
1914
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
1915
+ )
1916
+ did = session.execute(stmt).one()
1917
+
1918
+ if did[7] == DIDType.FILE:
1919
+ if long:
1920
+ yield {'scope': did[0], 'name': did[1], 'bytes': did[2],
1921
+ 'adler32': did[3], 'guid': did[4] and did[4].upper(),
1922
+ 'events': did[5], 'lumiblocknr': did[6]}
1923
+ else:
1924
+ yield {'scope': did[0], 'name': did[1], 'bytes': did[2],
1925
+ 'adler32': did[3], 'guid': did[4] and did[4].upper(),
1926
+ 'events': did[5]}
1927
+ else:
1928
+ cnt_query = select(
1929
+ models.DataIdentifierAssociation.child_scope,
1930
+ models.DataIdentifierAssociation.child_name,
1931
+ models.DataIdentifierAssociation.child_type
1932
+ ).with_hint(
1933
+ models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle'
1934
+ )
1935
+
1936
+ if long:
1937
+ dst_cnt_query = select(
1938
+ models.DataIdentifierAssociation.child_scope,
1939
+ models.DataIdentifierAssociation.child_name,
1940
+ models.DataIdentifierAssociation.child_type,
1941
+ models.DataIdentifierAssociation.bytes,
1942
+ models.DataIdentifierAssociation.adler32,
1943
+ models.DataIdentifierAssociation.guid,
1944
+ models.DataIdentifierAssociation.events,
1945
+ models.DataIdentifier.lumiblocknr
1946
+ ).with_hint(
1947
+ models.DataIdentifierAssociation, "INDEX_RS_ASC(DIDS DIDS_PK) INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)", "oracle"
1948
+ ).where(
1949
+ models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
1950
+ models.DataIdentifier.name == models.DataIdentifierAssociation.child_name
1951
+ )
1952
+ else:
1953
+ dst_cnt_query = select(
1954
+ models.DataIdentifierAssociation.child_scope,
1955
+ models.DataIdentifierAssociation.child_name,
1956
+ models.DataIdentifierAssociation.child_type,
1957
+ models.DataIdentifierAssociation.bytes,
1958
+ models.DataIdentifierAssociation.adler32,
1959
+ models.DataIdentifierAssociation.guid,
1960
+ models.DataIdentifierAssociation.events,
1961
+ bindparam("lumiblocknr", None)
1962
+ ).with_hint(
1963
+ models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle'
1964
+ )
1965
+
1966
+ dids = [(scope, name, did[7]), ]
1967
+ while dids:
1968
+ s, n, t = dids.pop()
1969
+ if t == DIDType.DATASET:
1970
+ stmt = dst_cnt_query.where(
1971
+ and_(models.DataIdentifierAssociation.scope == s,
1972
+ models.DataIdentifierAssociation.name == n)
1973
+ )
1974
+
1975
+ for child_scope, child_name, child_type, bytes_, adler32, guid, events, lumiblocknr in session.execute(stmt).yield_per(500):
1976
+ if long:
1977
+ yield {'scope': child_scope, 'name': child_name,
1978
+ 'bytes': bytes_, 'adler32': adler32,
1979
+ 'guid': guid and guid.upper(),
1980
+ 'events': events,
1981
+ 'lumiblocknr': lumiblocknr}
1982
+ else:
1983
+ yield {'scope': child_scope, 'name': child_name,
1984
+ 'bytes': bytes_, 'adler32': adler32,
1985
+ 'guid': guid and guid.upper(),
1986
+ 'events': events}
1987
+ else:
1988
+ stmt = cnt_query.filter_by(scope=s, name=n)
1989
+ for child_scope, child_name, child_type in session.execute(stmt).yield_per(500):
1990
+ dids.append((child_scope, child_name, child_type))
1991
+
1992
+ except NoResultFound:
1993
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1994
+
1995
+
1996
+ @stream_session
1997
+ def scope_list(scope, name=None, recursive=False, *, session: "Session"):
1998
+ """
1999
+ List data identifiers in a scope.
2000
+
2001
+ :param scope: The scope name.
2002
+ :param session: The database session in use.
2003
+ :param name: The data identifier name.
2004
+ :param recursive: boolean, True or False.
2005
+ """
2006
+ # TODO= Perf. tuning of the method
2007
+ # query = session.query(models.DataIdentifier).filter_by(scope=scope, deleted=False)
2008
+ # for did in query.yield_per(5):
2009
+ # yield {'scope': did.scope, 'name': did.name, 'type': did.did_type, 'parent': None, 'level': 0}
2010
+
2011
+ def __topdids(scope):
2012
+ sub_stmt = select(
2013
+ models.DataIdentifierAssociation.child_name
2014
+ ).filter_by(
2015
+ scope=scope,
2016
+ child_scope=scope
2017
+ )
2018
+ stmt = select(
2019
+ models.DataIdentifier.name,
2020
+ models.DataIdentifier.did_type,
2021
+ models.DataIdentifier.bytes
2022
+ ).filter_by(
2023
+ scope=scope
2024
+ ).where(
2025
+ not_(models.DataIdentifier.name.in_(sub_stmt))
2026
+ ).order_by(
2027
+ models.DataIdentifier.name
2028
+ )
2029
+ for row in session.execute(stmt).yield_per(5):
2030
+ if row.did_type == DIDType.FILE:
2031
+ yield {'scope': scope, 'name': row.name, 'type': row.did_type, 'parent': None, 'level': 0, 'bytes': row.bytes}
2032
+ else:
2033
+ yield {'scope': scope, 'name': row.name, 'type': row.did_type, 'parent': None, 'level': 0, 'bytes': None}
2034
+
2035
+ def __diddriller(pdid):
2036
+ stmt = select(
2037
+ models.DataIdentifierAssociation
2038
+ ).filter_by(
2039
+ scope=pdid['scope'],
2040
+ name=pdid['name']
2041
+ ).order_by(
2042
+ models.DataIdentifierAssociation.child_name
2043
+ )
2044
+ for row in session.execute(stmt).yield_per(5).scalars():
2045
+ parent = {'scope': pdid['scope'], 'name': pdid['name']}
2046
+ cdid = {'scope': row.child_scope, 'name': row.child_name, 'type': row.child_type, 'parent': parent, 'level': pdid['level'] + 1}
2047
+ yield cdid
2048
+ if cdid['type'] != DIDType.FILE and recursive:
2049
+ for did in __diddriller(cdid):
2050
+ yield did
2051
+
2052
+ if name is None:
2053
+ topdids = __topdids(scope)
2054
+ else:
2055
+ stmt = select(
2056
+ models.DataIdentifier
2057
+ ).filter_by(
2058
+ scope=scope,
2059
+ name=name
2060
+ ).limit(
2061
+ 1
2062
+ )
2063
+ topdids = session.execute(stmt).scalar()
2064
+ if topdids is None:
2065
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2066
+ topdids = [{'scope': topdids.scope, 'name': topdids.name, 'type': topdids.did_type, 'parent': None, 'level': 0}]
2067
+
2068
+ if name is None:
2069
+ for topdid in topdids:
2070
+ yield topdid
2071
+ if recursive:
2072
+ for did in __diddriller(topdid):
2073
+ yield did
2074
+ else:
2075
+ for topdid in topdids:
2076
+ for did in __diddriller(topdid):
2077
+ yield did
2078
+
2079
+
2080
+ @read_session
2081
+ def __get_did(scope, name, *, session: "Session"):
2082
+ try:
2083
+ stmt = select(
2084
+ models.DataIdentifier
2085
+ ).with_hint(
2086
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
2087
+ ).where(
2088
+ models.DataIdentifier.scope == scope,
2089
+ models.DataIdentifier.name == name,
2090
+ )
2091
+ return session.execute(stmt).scalar_one()
2092
+ except NoResultFound:
2093
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2094
+
2095
+
2096
+ @read_session
2097
+ def get_did(scope: "InternalScope", name: str, dynamic_depth: "Optional[DIDType]" = None, *, session: "Session") -> "dict[str, Any]":
2098
+ """
2099
+ Retrieve a single data identifier.
2100
+
2101
+ :param scope: The scope name.
2102
+ :param name: The data identifier name.
2103
+ :param dynamic_depth: the DID type to use as source for estimation of this DIDs length/bytes.
2104
+ If set to None, or to a value which doesn't make sense (ex: requesting depth = CONTAINER for a did of type DATASET)
2105
+ will not compute the size dynamically.
2106
+ :param session: The database session in use.
2107
+ """
2108
+ did = __get_did(scope=scope, name=name, session=session)
2109
+
2110
+ bytes_, length = did.bytes, did.length
2111
+ if dynamic_depth:
2112
+ bytes_, length, events = __resolve_bytes_length_events_did(did=did, dynamic_depth=dynamic_depth, session=session)
2113
+
2114
+ if did.did_type == DIDType.FILE:
2115
+ return {'scope': did.scope, 'name': did.name, 'type': did.did_type,
2116
+ 'account': did.account, 'bytes': bytes_, 'length': 1,
2117
+ 'md5': did.md5, 'adler32': did.adler32}
2118
+ else:
2119
+ return {'scope': did.scope, 'name': did.name, 'type': did.did_type,
2120
+ 'account': did.account, 'open': did.is_open,
2121
+ 'monotonic': did.monotonic, 'expired_at': did.expired_at,
2122
+ 'length': length, 'bytes': bytes_}
2123
+
2124
+
2125
+ @read_session
2126
+ def get_files(files, *, session: "Session"):
2127
+ """
2128
+ Retrieve a list of files.
2129
+
2130
+ :param files: A list of files (dictionaries).
2131
+ :param session: The database session in use.
2132
+ """
2133
+ file_condition = []
2134
+ for file in files:
2135
+ file_condition.append(and_(models.DataIdentifier.scope == file['scope'], models.DataIdentifier.name == file['name']))
2136
+
2137
+ stmt = select(
2138
+ models.DataIdentifier.scope,
2139
+ models.DataIdentifier.name,
2140
+ models.DataIdentifier.bytes,
2141
+ models.DataIdentifier.guid,
2142
+ models.DataIdentifier.events,
2143
+ models.DataIdentifier.availability,
2144
+ models.DataIdentifier.adler32,
2145
+ models.DataIdentifier.md5
2146
+ ).where(
2147
+ models.DataIdentifier.did_type == DIDType.FILE
2148
+ ).with_hint(
2149
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
2150
+ ).where(
2151
+ or_(*file_condition)
2152
+ )
2153
+
2154
+ rows = []
2155
+ for row in session.execute(stmt):
2156
+ file = row._asdict()
2157
+ rows.append(file)
2158
+ if file['availability'] == DIDAvailability.LOST:
2159
+ raise exception.UnsupportedOperation('File %s:%s is LOST and cannot be attached' % (file['scope'], file['name']))
2160
+ # Check meta-data, if provided
2161
+ for f in files:
2162
+ if f['name'] == file['name'] and f['scope'] == file['scope']:
2163
+ for key in ['bytes', 'adler32', 'md5']:
2164
+ if key in f and str(f.get(key)) != str(file[key]):
2165
+ raise exception.FileConsistencyMismatch(key + " mismatch for '%(scope)s:%(name)s': " % file + str(f.get(key)) + '!=' + str(file[key]))
2166
+ break
2167
+
2168
+ if len(rows) != len(files):
2169
+ for file in files:
2170
+ found = False
2171
+ for row in rows:
2172
+ if row['scope'] == file['scope'] and row['name'] == file['name']:
2173
+ found = True
2174
+ break
2175
+ if not found:
2176
+ raise exception.DataIdentifierNotFound("Data identifier '%(scope)s:%(name)s' not found" % file)
2177
+ return rows
2178
+
2179
+
2180
+ @transactional_session
2181
+ def set_metadata(scope, name, key, value, did_type=None, did=None,
2182
+ recursive=False, *, session: "Session"):
2183
+ """
2184
+ Add single metadata to a data identifier.
2185
+
2186
+ :param scope: The scope name.
2187
+ :param name: The data identifier name.
2188
+ :param key: the key.
2189
+ :param value: the value.
2190
+ :param did: The data identifier info.
2191
+ :param recursive: Option to propagate the metadata change to content.
2192
+ :param session: The database session in use.
2193
+ """
2194
+ did_meta_plugins.set_metadata(scope=scope, name=name, key=key, value=value, recursive=recursive, session=session)
2195
+
2196
+
2197
+ @transactional_session
2198
+ def set_metadata_bulk(scope, name, meta, recursive=False, *, session: "Session"):
2199
+ """
2200
+ Add metadata to a data identifier.
2201
+
2202
+ :param scope: The scope name.
2203
+ :param name: The data identifier name.
2204
+ :param meta: the key-values.
2205
+ :param recursive: Option to propagate the metadata change to content.
2206
+ :param session: The database session in use.
2207
+ """
2208
+ did_meta_plugins.set_metadata_bulk(scope=scope, name=name, meta=meta, recursive=recursive, session=session)
2209
+
2210
+
2211
+ @transactional_session
2212
+ def set_dids_metadata_bulk(dids, recursive=False, *, session: "Session"):
2213
+ """
2214
+ Add metadata to a list of data identifiers.
2215
+
2216
+ :param dids: A list of dids including metadata.
2217
+ :param recursive: Option to propagate the metadata change to content.
2218
+ :param session: The database session in use.
2219
+ """
2220
+
2221
+ for did in dids:
2222
+ did_meta_plugins.set_metadata_bulk(scope=did['scope'], name=did['name'], meta=did['meta'], recursive=recursive, session=session)
2223
+
2224
+
2225
+ @read_session
2226
+ def get_metadata(scope, name, plugin='DID_COLUMN', *, session: "Session"):
2227
+ """
2228
+ Get data identifier metadata
2229
+
2230
+ :param scope: The scope name.
2231
+ :param name: The data identifier name.
2232
+ :param session: The database session in use.
2233
+
2234
+ :returns: List of HARDCODED metadata for did.
2235
+ """
2236
+ return did_meta_plugins.get_metadata(scope, name, plugin=plugin, session=session)
2237
+
2238
+
2239
+ @stream_session
2240
+ def list_parent_dids_bulk(dids, *, session: "Session"):
2241
+ """
2242
+ List parent datasets and containers of a did.
2243
+
2244
+ :param dids: A list of dids.
2245
+ :param session: The database session in use.
2246
+ :returns: List of dids.
2247
+ :rtype: Generator.
2248
+ """
2249
+ condition = []
2250
+ for did in dids:
2251
+ condition.append(and_(models.DataIdentifierAssociation.child_scope == did['scope'],
2252
+ models.DataIdentifierAssociation.child_name == did['name']))
2253
+
2254
+ try:
2255
+ for chunk in chunks(condition, 50):
2256
+ stmt = select(
2257
+ models.DataIdentifierAssociation.child_scope,
2258
+ models.DataIdentifierAssociation.child_name,
2259
+ models.DataIdentifierAssociation.scope,
2260
+ models.DataIdentifierAssociation.name,
2261
+ models.DataIdentifierAssociation.did_type
2262
+ ).where(
2263
+ or_(*chunk)
2264
+ )
2265
+ for did_chunk in session.execute(stmt).yield_per(5):
2266
+ yield {'scope': did_chunk.scope, 'name': did_chunk.name, 'child_scope': did_chunk.child_scope, 'child_name': did_chunk.child_name, 'type': did_chunk.did_type}
2267
+ except NoResultFound:
2268
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
2269
+
2270
+
2271
+ @stream_session
2272
+ def get_metadata_bulk(dids, inherit=False, *, session: "Session"):
2273
+ """
2274
+ Get metadata for a list of dids
2275
+ :param dids: A list of dids.
2276
+ :param inherit: A boolean. If set to true, the metadata of the parent are concatenated.
2277
+ :param session: The database session in use.
2278
+ """
2279
+ if inherit:
2280
+ parent_list = []
2281
+ unique_dids = []
2282
+ parents = [1, ]
2283
+ depth = 0
2284
+ for did in dids:
2285
+ unique_dids.append((did['scope'], did['name']))
2286
+ parent_list.append([(did['scope'], did['name']), ])
2287
+
2288
+ while parents and depth < 20:
2289
+ parents = []
2290
+ for did in list_parent_dids_bulk(dids, session=session):
2291
+ scope = did['scope']
2292
+ name = did['name']
2293
+ child_scope = did['child_scope']
2294
+ child_name = did['child_name']
2295
+ if (scope, name) not in unique_dids:
2296
+ unique_dids.append((scope, name))
2297
+ if (scope, name) not in parents:
2298
+ parents.append((scope, name))
2299
+ for entry in parent_list:
2300
+ if entry[-1] == (child_scope, child_name):
2301
+ entry.append((scope, name))
2302
+ dids = [{'scope': did[0], 'name': did[1]} for did in parents]
2303
+ depth += 1
2304
+ unique_dids = [{'scope': did[0], 'name': did[1]} for did in unique_dids]
2305
+ meta_dict = {}
2306
+ for did in unique_dids:
2307
+ try:
2308
+ meta = get_metadata(did['scope'], did['name'], plugin='JSON', session=session)
2309
+ except exception.DataIdentifierNotFound:
2310
+ meta = {}
2311
+ meta_dict[(did['scope'], did['name'])] = meta
2312
+ for dids in parent_list:
2313
+ result = {'scope': dids[0][0], 'name': dids[0][1]}
2314
+ for did in dids:
2315
+ for key in meta_dict[did]:
2316
+ if key not in result:
2317
+ result[key] = meta_dict[did][key]
2318
+ yield result
2319
+ else:
2320
+ condition = []
2321
+ for did in dids:
2322
+ condition.append(and_(models.DataIdentifier.scope == did['scope'],
2323
+ models.DataIdentifier.name == did['name']))
2324
+ try:
2325
+ for chunk in chunks(condition, 50):
2326
+ stmt = select(
2327
+ models.DataIdentifier
2328
+ ).with_hint(
2329
+ models.DataIdentifier, "INDEX(DIDS DIDS_PK)", 'oracle'
2330
+ ).where(
2331
+ or_(*chunk)
2332
+ )
2333
+ for row in session.execute(stmt).scalars():
2334
+ yield row.to_dict()
2335
+ except NoResultFound:
2336
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
2337
+
2338
+
2339
+ @transactional_session
2340
+ def delete_metadata(scope, name, key, *, session: "Session"):
2341
+ """
2342
+ Delete a key from the metadata column
2343
+
2344
+ :param scope: the scope of did
2345
+ :param name: the name of the did
2346
+ :param key: the key to be deleted
2347
+ """
2348
+ did_meta_plugins.delete_metadata(scope, name, key, session=session)
2349
+
2350
+
2351
+ @transactional_session
2352
+ def set_status(scope, name, *, session: "Session", **kwargs):
2353
+ """
2354
+ Set data identifier status
2355
+
2356
+ :param scope: The scope name.
2357
+ :param name: The data identifier name.
2358
+ :param session: The database session in use.
2359
+ :param kwargs: Keyword arguments of the form status_name=value.
2360
+ """
2361
+ statuses = ['open', ]
2362
+ reevaluate_dids_at_close = config_get_bool('subscriptions', 'reevaluate_dids_at_close', raise_exception=False, default=False, session=session)
2363
+
2364
+ update_stmt = update(
2365
+ models.DataIdentifier
2366
+ ).filter_by(
2367
+ scope=scope,
2368
+ name=name
2369
+ ).prefix_with(
2370
+ "/*+ INDEX(DIDS DIDS_PK) */", dialect='oracle'
2371
+ ).where(
2372
+ or_(models.DataIdentifier.did_type == DIDType.CONTAINER,
2373
+ models.DataIdentifier.did_type == DIDType.DATASET)
2374
+ ).execution_options(
2375
+ synchronize_session=False
2376
+ )
2377
+ values = {}
2378
+ for k in kwargs:
2379
+ if k not in statuses:
2380
+ raise exception.UnsupportedStatus(f'The status {k} is not a valid data identifier status.')
2381
+ if k == 'open':
2382
+ if not kwargs[k]:
2383
+ update_stmt = update_stmt.filter_by(
2384
+ is_open=True
2385
+ ).where(
2386
+ models.DataIdentifier.did_type != DIDType.FILE
2387
+ )
2388
+ values['is_open'], values['closed_at'] = False, datetime.utcnow()
2389
+ values['bytes'], values['length'], values['events'] = __resolve_bytes_length_events_did(did=__get_did(scope=scope, name=name, session=session),
2390
+ session=session)
2391
+ # Update datasetlocks as well
2392
+ stmt = update(
2393
+ models.DatasetLock
2394
+ ).filter_by(
2395
+ scope=scope,
2396
+ name=name
2397
+ ).values(
2398
+ length=values['length'],
2399
+ bytes=values['bytes']
2400
+ )
2401
+ session.execute(stmt)
2402
+
2403
+ # Generate a message
2404
+ message = {'scope': scope.external,
2405
+ 'name': name,
2406
+ 'bytes': values['bytes'],
2407
+ 'length': values['length'],
2408
+ 'events': values['events']}
2409
+ if scope.vo != 'def':
2410
+ message['vo'] = scope.vo
2411
+
2412
+ add_message('CLOSE', message, session=session)
2413
+ if reevaluate_dids_at_close:
2414
+ set_new_dids(dids=[{'scope': scope, 'name': name}],
2415
+ new_flag=True,
2416
+ session=session)
2417
+
2418
+ else:
2419
+ # Set status to open only for privileged accounts
2420
+ update_stmt = update_stmt.filter_by(
2421
+ is_open=False
2422
+ ).where(
2423
+ models.DataIdentifier.did_type != DIDType.FILE
2424
+ )
2425
+ values['is_open'] = True
2426
+
2427
+ message = {'scope': scope.external, 'name': name}
2428
+ if scope.vo != 'def':
2429
+ message['vo'] = scope.vo
2430
+ add_message('OPEN', message, session=session)
2431
+
2432
+ update_stmt = update_stmt.values(values)
2433
+ rowcount = session.execute(update_stmt).rowcount
2434
+
2435
+ if not rowcount:
2436
+ stmt = select(
2437
+ models.DataIdentifier
2438
+ ).filter_by(
2439
+ scope=scope,
2440
+ name=name
2441
+ )
2442
+ try:
2443
+ session.execute(stmt).scalar_one()
2444
+ except NoResultFound:
2445
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2446
+ raise exception.UnsupportedOperation(f"The status of the data identifier '{scope}:{name}' cannot be changed")
2447
+ else:
2448
+ # Generate callbacks
2449
+ if not values['is_open']:
2450
+ stmt = select(
2451
+ models.ReplicationRule
2452
+ ).filter_by(
2453
+ scope=scope,
2454
+ name=name
2455
+ )
2456
+ for rule in session.execute(stmt).scalars():
2457
+ rucio.core.rule.generate_rule_notifications(rule=rule, session=session)
2458
+
2459
+
2460
+ @read_session
2461
+ def list_dids(scope, filters, did_type='collection', ignore_case=False, limit=None,
2462
+ offset=None, long=False, recursive=False, ignore_dids=None, *, session: "Session"):
2463
+ """
2464
+ Search data identifiers.
2465
+
2466
+ :param scope: the scope name.
2467
+ :param filters: dictionary of attributes by which the results should be filtered.
2468
+ :param did_type: the type of the did: all(container, dataset, file), collection(dataset or container), dataset, container, file.
2469
+ :param ignore_case: ignore case distinctions.
2470
+ :param limit: limit number.
2471
+ :param offset: offset number.
2472
+ :param long: Long format option to display more information for each DID.
2473
+ :param recursive: Recursively list DIDs content.
2474
+ :param ignore_dids: List of DIDs to refrain from yielding.
2475
+ :param session: The database session in use.
2476
+ """
2477
+ return did_meta_plugins.list_dids(scope, filters, did_type, ignore_case, limit, offset, long, recursive, ignore_dids, session=session)
2478
+
2479
+
2480
+ @read_session
2481
+ def get_did_atime(scope, name, *, session: "Session"):
2482
+ """
2483
+ Get the accessed_at timestamp for a did. Just for testing.
2484
+ :param scope: the scope name.
2485
+ :param name: The data identifier name.
2486
+ :param session: Database session to use.
2487
+
2488
+ :returns: A datetime timestamp with the last access time.
2489
+ """
2490
+ stmt = select(
2491
+ models.DataIdentifier.accessed_at
2492
+ ).filter_by(
2493
+ scope=scope,
2494
+ name=name
2495
+ )
2496
+ return session.execute(stmt).one()[0]
2497
+
2498
+
2499
+ @read_session
2500
+ def get_did_access_cnt(scope, name, *, session: "Session"):
2501
+ """
2502
+ Get the access_cnt for a did. Just for testing.
2503
+ :param scope: the scope name.
2504
+ :param name: The data identifier name.
2505
+ :param session: Database session to use.
2506
+
2507
+ :returns: A datetime timestamp with the last access time.
2508
+ """
2509
+ stmt = select(
2510
+ models.DataIdentifier.access_cnt
2511
+ ).filter_by(
2512
+ scope=scope,
2513
+ name=name
2514
+ )
2515
+ return session.execute(stmt).one()[0]
2516
+
2517
+
2518
+ @stream_session
2519
+ def get_dataset_by_guid(guid, *, session: "Session"):
2520
+ """
2521
+ Get the parent datasets for a given GUID.
2522
+ :param guid: The GUID.
2523
+ :param session: Database session to use.
2524
+
2525
+ :returns: A did.
2526
+ """
2527
+ stmt = select(
2528
+ models.DataIdentifier
2529
+ ).filter_by(
2530
+ guid=guid,
2531
+ did_type=DIDType.FILE
2532
+ ).with_hint(
2533
+ models.ReplicaLock, "INDEX(DIDS_GUIDS_IDX)", 'oracle'
2534
+ )
2535
+ try:
2536
+ r = session.execute(stmt).scalar_one()
2537
+ datasets_stmt = select(
2538
+ models.DataIdentifierAssociation.scope,
2539
+ models.DataIdentifierAssociation.name
2540
+ ).filter_by(
2541
+ child_scope=r.scope,
2542
+ child_name=r.name
2543
+ ).with_hint(
2544
+ models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_CHILD_SCOPE_NAME_IDX)", 'oracle'
2545
+ )
2546
+
2547
+ except NoResultFound:
2548
+ raise exception.DataIdentifierNotFound("No file associated to GUID : %s" % guid)
2549
+ for tmp_did in session.execute(datasets_stmt).yield_per(5):
2550
+ yield {'scope': tmp_did.scope, 'name': tmp_did.name}
2551
+
2552
+
2553
+ @transactional_session
2554
+ def touch_dids(dids, *, session: "Session"):
2555
+ """
2556
+ Update the accessed_at timestamp and the access_cnt of the given dids.
2557
+
2558
+ :param replicas: the list of dids.
2559
+ :param session: The database session in use.
2560
+
2561
+ :returns: True, if successful, False otherwise.
2562
+ """
2563
+
2564
+ now = datetime.utcnow()
2565
+ none_value = None
2566
+ try:
2567
+ for did in dids:
2568
+ stmt = update(
2569
+ models.DataIdentifier
2570
+ ).filter_by(
2571
+ scope=did['scope'],
2572
+ name=did['name'],
2573
+ did_type=did['type']
2574
+ ).values(
2575
+ accessed_at=did.get('accessed_at') or now,
2576
+ access_cnt=case((models.DataIdentifier.access_cnt == none_value, 1),
2577
+ else_=(models.DataIdentifier.access_cnt + 1)) # type: ignore
2578
+ ).execution_options(
2579
+ synchronize_session=False
2580
+ )
2581
+ session.execute(stmt)
2582
+ except DatabaseError:
2583
+ return False
2584
+
2585
+ return True
2586
+
2587
+
2588
+ @transactional_session
2589
+ def create_did_sample(input_scope, input_name, output_scope, output_name, account, nbfiles, *, session: "Session"):
2590
+ """
2591
+ Create a sample from an input collection.
2592
+
2593
+ :param input_scope: The scope of the input DID.
2594
+ :param input_name: The name of the input DID.
2595
+ :param output_scope: The scope of the output dataset.
2596
+ :param output_name: The name of the output dataset.
2597
+ :param account: The account.
2598
+ :param nbfiles: The number of files to register in the output dataset.
2599
+ :param session: The database session in use.
2600
+ """
2601
+ files = [did for did in list_files(scope=input_scope, name=input_name, long=False, session=session)]
2602
+ random.shuffle(files)
2603
+ output_files = files[:int(nbfiles)]
2604
+ add_did(scope=output_scope, name=output_name, did_type=DIDType.DATASET, account=account, statuses={}, meta=[], rules=[], lifetime=None, dids=output_files, rse_id=None, session=session)
2605
+
2606
+
2607
+ @transactional_session
2608
+ def __resolve_bytes_length_events_did(
2609
+ did: models.DataIdentifier,
2610
+ dynamic_depth: "DIDType" = DIDType.FILE,
2611
+ *, session: "Session",
2612
+ ) -> tuple[int, int, int]:
2613
+ """
2614
+ Resolve bytes, length and events of a did
2615
+
2616
+ :did: the DID ORM object for which we perform the resolution
2617
+ :param dynamic_depth: the DID type to use as source for estimation of this DIDs length/bytes.
2618
+ If set to None, or to a value which doesn't make sense (ex: requesting depth = DATASET for a did of type FILE)
2619
+ will not compute the size dynamically.
2620
+ :param session: The database session in use.
2621
+ """
2622
+
2623
+ if did.did_type == DIDType.DATASET and dynamic_depth == DIDType.FILE or \
2624
+ did.did_type == DIDType.CONTAINER and dynamic_depth in (DIDType.FILE, DIDType.DATASET):
2625
+
2626
+ if did.did_type == DIDType.DATASET and dynamic_depth == DIDType.FILE:
2627
+ stmt = select(
2628
+ func.count(),
2629
+ func.sum(models.DataIdentifierAssociation.bytes),
2630
+ func.sum(models.DataIdentifierAssociation.events),
2631
+ ).where(
2632
+ models.DataIdentifierAssociation.scope == did.scope,
2633
+ models.DataIdentifierAssociation.name == did.name
2634
+ )
2635
+ elif did.did_type == DIDType.CONTAINER and dynamic_depth == DIDType.DATASET:
2636
+ child_did_stmt = list_one_did_childs_stmt(did.scope, did.name, did_type=DIDType.DATASET).subquery()
2637
+ stmt = select(
2638
+ func.sum(models.DataIdentifier.length),
2639
+ func.sum(models.DataIdentifier.bytes),
2640
+ func.sum(models.DataIdentifier.events),
2641
+ ).join_from(
2642
+ child_did_stmt,
2643
+ models.DataIdentifier,
2644
+ and_(models.DataIdentifier.scope == child_did_stmt.c.scope,
2645
+ models.DataIdentifier.name == child_did_stmt.c.name),
2646
+ )
2647
+ else: # did.did_type == DIDType.CONTAINER and dynamic_depth == DIDType.FILE:
2648
+ child_did_stmt = list_one_did_childs_stmt(did.scope, did.name, did_type=DIDType.DATASET).subquery()
2649
+ stmt = select(
2650
+ func.count(),
2651
+ func.sum(models.DataIdentifierAssociation.bytes),
2652
+ func.sum(models.DataIdentifierAssociation.events),
2653
+ ).join_from(
2654
+ child_did_stmt,
2655
+ models.DataIdentifierAssociation,
2656
+ and_(models.DataIdentifierAssociation.scope == child_did_stmt.c.scope,
2657
+ models.DataIdentifierAssociation.name == child_did_stmt.c.name)
2658
+ )
2659
+
2660
+ try:
2661
+ length, bytes_, events = session.execute(stmt).one()
2662
+ length = length or 0
2663
+ bytes_ = bytes_ or 0
2664
+ events = events or 0
2665
+ except NoResultFound:
2666
+ bytes_, length, events = 0, 0, 0
2667
+ elif did.did_type == DIDType.FILE:
2668
+ bytes_, length, events = did.bytes or 0, 1, did.events or 0
2669
+ else:
2670
+ bytes_, length, events = did.bytes or 0, did.length or 0, did.events or 0
2671
+ return bytes_, length, events
2672
+
2673
+
2674
+ @transactional_session
2675
+ def resurrect(dids, *, session: "Session"):
2676
+ """
2677
+ Resurrect data identifiers.
2678
+
2679
+ :param dids: The list of dids to resurrect.
2680
+ :param session: The database session in use.
2681
+ """
2682
+ for did in dids:
2683
+ try:
2684
+ stmt = select(
2685
+ models.DeletedDataIdentifier
2686
+ ).with_hint(
2687
+ models.DeletedDataIdentifier, "INDEX(DELETED_DIDS DELETED_DIDS_PK)", 'oracle'
2688
+ ).filter_by(
2689
+ scope=did['scope'],
2690
+ name=did['name']
2691
+ )
2692
+ del_did = session.execute(stmt).scalar_one()
2693
+ except NoResultFound:
2694
+ # Dataset might still exist, but could have an expiration date, if it has, remove it
2695
+ stmt = update(
2696
+ models.DataIdentifier
2697
+ ).where(
2698
+ models.DataIdentifier.scope == did['scope'],
2699
+ models.DataIdentifier.name == did['name'],
2700
+ models.DataIdentifier.expired_at < datetime.utcnow()
2701
+ ).execution_options(
2702
+ synchronize_session=False
2703
+ ).values(
2704
+ expired_at=None
2705
+ )
2706
+ rowcount = session.execute(stmt).rowcount
2707
+ if rowcount:
2708
+ continue
2709
+ raise exception.DataIdentifierNotFound("Deleted Data identifier '%(scope)s:%(name)s' not found" % did)
2710
+
2711
+ # Check did_type
2712
+ # if del_did.did_type == DIDType.FILE:
2713
+ # raise exception.UnsupportedOperation("File '%(scope)s:%(name)s' cannot be resurrected" % did)
2714
+
2715
+ kargs = del_did.to_dict()
2716
+ if kargs['expired_at']:
2717
+ kargs['expired_at'] = None
2718
+
2719
+ stmt = delete(
2720
+ models.DeletedDataIdentifier
2721
+ ).prefix_with(
2722
+ "/*+ INDEX(DELETED_DIDS DELETED_DIDS_PK) */", dialect='oracle'
2723
+ ).filter_by(
2724
+ scope=did['scope'],
2725
+ name=did['name']
2726
+ )
2727
+ session.execute(stmt)
2728
+
2729
+ models.DataIdentifier(**kargs).\
2730
+ save(session=session, flush=False)
2731
+
2732
+
2733
+ @stream_session
2734
+ def list_archive_content(scope, name, *, session: "Session"):
2735
+ """
2736
+ List archive contents.
2737
+
2738
+ :param scope: The archive scope name.
2739
+ :param name: The archive data identifier name.
2740
+ :param session: The database session in use.
2741
+ """
2742
+ try:
2743
+ stmt = select(
2744
+ models.ConstituentAssociation
2745
+ ).with_hint(
2746
+ models.ConstituentAssociation, "INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK)", 'oracle'
2747
+ ).filter_by(
2748
+ scope=scope,
2749
+ name=name
2750
+ )
2751
+
2752
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
2753
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name,
2754
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5}
2755
+ except NoResultFound:
2756
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2757
+
2758
+
2759
+ @transactional_session
2760
+ def add_did_to_followed(scope, name, account, *, session: "Session"):
2761
+ """
2762
+ Mark a did as followed by the given account
2763
+
2764
+ :param scope: The scope name.
2765
+ :param name: The data identifier name.
2766
+ :param account: The account owner.
2767
+ :param session: The database session in use.
2768
+ """
2769
+ return add_dids_to_followed(dids=[{'scope': scope, 'name': name}],
2770
+ account=account, session=session)
2771
+
2772
+
2773
+ @transactional_session
2774
+ def add_dids_to_followed(dids, account, *, session: "Session"):
2775
+ """
2776
+ Bulk mark datasets as followed
2777
+
2778
+ :param dids: A list of dids.
2779
+ :param account: The account owner.
2780
+ :param session: The database session in use.
2781
+ """
2782
+ try:
2783
+ for did in dids:
2784
+ # Get the did details corresponding to the scope and name passed.
2785
+ stmt = select(
2786
+ models.DataIdentifier
2787
+ ).filter_by(
2788
+ scope=did['scope'],
2789
+ name=did['name']
2790
+ )
2791
+ did = session.execute(stmt).scalar_one()
2792
+ # Add the queried to the followed table.
2793
+ new_did_followed = models.DidsFollowed(scope=did.scope, name=did.name, account=account,
2794
+ did_type=did.did_type)
2795
+
2796
+ new_did_followed.save(session=session, flush=False)
2797
+
2798
+ session.flush()
2799
+ except IntegrityError as error:
2800
+ raise exception.RucioException(error.args)
2801
+
2802
+
2803
+ @stream_session
2804
+ def get_users_following_did(scope, name, *, session: "Session"):
2805
+ """
2806
+ Return list of users following a did
2807
+
2808
+ :param scope: The scope name.
2809
+ :param name: The data identifier name.
2810
+ :param session: The database session in use.
2811
+ """
2812
+ try:
2813
+ stmt = select(
2814
+ models.DidsFollowed
2815
+ ).filter_by(
2816
+ scope=scope,
2817
+ name=name
2818
+ )
2819
+ for user in session.execute(stmt).scalars().all():
2820
+ # Return a dictionary of users to be rendered as json.
2821
+ yield {'user': user.account}
2822
+
2823
+ except NoResultFound:
2824
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name))
2825
+
2826
+
2827
+ @transactional_session
2828
+ def remove_did_from_followed(scope, name, account, *, session: "Session"):
2829
+ """
2830
+ Mark a did as not followed
2831
+
2832
+ :param scope: The scope name.
2833
+ :param name: The data identifier name.
2834
+ :param account: The account owner.
2835
+ :param session: The database session in use.
2836
+ """
2837
+ return remove_dids_from_followed(dids=[{'scope': scope, 'name': name}],
2838
+ account=account, session=session)
2839
+
2840
+
2841
+ @transactional_session
2842
+ def remove_dids_from_followed(dids, account, *, session: "Session"):
2843
+ """
2844
+ Bulk mark datasets as not followed
2845
+
2846
+ :param dids: A list of dids.
2847
+ :param account: The account owner.
2848
+ :param session: The database session in use.
2849
+ """
2850
+ try:
2851
+ for did in dids:
2852
+ stmt = delete(
2853
+ models.DidsFollowed
2854
+ ).filter_by(
2855
+ scope=did['scope'],
2856
+ name=did['name'],
2857
+ account=account
2858
+ ).execution_options(
2859
+ synchronize_session=False
2860
+ )
2861
+ session.execute(stmt)
2862
+ except NoResultFound:
2863
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (did['scope'], did['name']))
2864
+
2865
+
2866
+ @transactional_session
2867
+ def trigger_event(scope, name, event_type, payload, *, session: "Session"):
2868
+ """
2869
+ Records changes occuring in the did to the FollowEvents table
2870
+
2871
+ :param scope: The scope name.
2872
+ :param name: The data identifier name.
2873
+ :param event_type: The type of event affecting the did.
2874
+ :param payload: Any message to be stored along with the event.
2875
+ :param session: The database session in use.
2876
+ """
2877
+ try:
2878
+ stmt = select(
2879
+ models.DidsFollowed
2880
+ ).filter_by(
2881
+ scope=scope,
2882
+ name=name
2883
+ )
2884
+ for did in session.execute(stmt).scalars().all():
2885
+ # Create a new event using teh specified parameters.
2886
+ new_event = models.FollowEvents(scope=scope, name=name, account=did.account,
2887
+ did_type=did.did_type, event_type=event_type, payload=payload)
2888
+ new_event.save(session=session, flush=False)
2889
+
2890
+ session.flush()
2891
+ except IntegrityError as error:
2892
+ raise exception.RucioException(error.args)
2893
+
2894
+
2895
+ @read_session
2896
+ def create_reports(total_workers, worker_number, *, session: "Session"):
2897
+ """
2898
+ Create a summary report of the events affecting a dataset, for its followers.
2899
+
2900
+ :param session: The database session in use.
2901
+ """
2902
+ # Query the FollowEvents table
2903
+ stmt = select(
2904
+ models.FollowEvents
2905
+ ).order_by(
2906
+ models.FollowEvents.created_at
2907
+ )
2908
+
2909
+ # Use hearbeat mechanism to select a chunck of events based on the hashed account
2910
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='account')
2911
+
2912
+ try:
2913
+ events = session.execute(stmt).scalars().all()
2914
+ # If events exist for an account then create a report.
2915
+ if events:
2916
+ body = '''
2917
+ Hello,
2918
+ This is an auto-generated report of the events that have affected the datasets you follow.
2919
+
2920
+ '''
2921
+ account = None
2922
+ for i, event in enumerate(events):
2923
+ # Add each event to the message body.
2924
+ body += "{}. Dataset: {} Event: {}\n".format(i + 1, event.name, event.event_type)
2925
+ if event.payload:
2926
+ body += "Message: {}\n".format(event.payload)
2927
+ body += "\n"
2928
+ account = event.account
2929
+ # Clean up the event after creating the report
2930
+ stmt = delete(
2931
+ models.FollowEvents
2932
+ ).filter_by(
2933
+ scope=event.scope,
2934
+ name=event.name,
2935
+ account=event.account
2936
+ ).execution_options(
2937
+ synchronize_session=False
2938
+ )
2939
+ session.execute(stmt)
2940
+
2941
+ body += "Thank You."
2942
+ # Get the email associated with the account.
2943
+ stmt = select(
2944
+ models.Account.email
2945
+ ).filter_by(
2946
+ account=account
2947
+ )
2948
+ email = session.execute(stmt).scalar()
2949
+ add_message('email', {'to': email,
2950
+ 'subject': 'Report of affected dataset(s)',
2951
+ 'body': body})
2952
+
2953
+ except NoResultFound:
2954
+ raise exception.AccountNotFound("No email found for given account.")
2955
+
2956
+
2957
+ @transactional_session
2958
+ def insert_content_history(filter_, did_created_at, *, session: "Session"):
2959
+ """
2960
+ Insert into content history a list of did
2961
+
2962
+ :param filter_: Content clause of the files to archive
2963
+ :param did_created_at: Creation date of the did
2964
+ :param session: The database session in use.
2965
+ """
2966
+ new_did_created_at = did_created_at
2967
+ stmt = select(
2968
+ models.DataIdentifierAssociation.scope,
2969
+ models.DataIdentifierAssociation.name,
2970
+ models.DataIdentifierAssociation.child_scope,
2971
+ models.DataIdentifierAssociation.child_name,
2972
+ models.DataIdentifierAssociation.did_type,
2973
+ models.DataIdentifierAssociation.child_type,
2974
+ models.DataIdentifierAssociation.bytes,
2975
+ models.DataIdentifierAssociation.adler32,
2976
+ models.DataIdentifierAssociation.md5,
2977
+ models.DataIdentifierAssociation.guid,
2978
+ models.DataIdentifierAssociation.events,
2979
+ models.DataIdentifierAssociation.rule_evaluation,
2980
+ models.DataIdentifierAssociation.created_at,
2981
+ models.DataIdentifierAssociation.updated_at
2982
+ ).where(
2983
+ filter_
2984
+ )
2985
+ for cont in session.execute(stmt).all():
2986
+ if not did_created_at:
2987
+ new_did_created_at = cont.created_at
2988
+ models.DataIdentifierAssociationHistory(
2989
+ scope=cont.scope,
2990
+ name=cont.name,
2991
+ child_scope=cont.child_scope,
2992
+ child_name=cont.child_name,
2993
+ did_type=cont.did_type,
2994
+ child_type=cont.child_type,
2995
+ bytes=cont.bytes,
2996
+ adler32=cont.adler32,
2997
+ md5=cont.md5,
2998
+ guid=cont.guid,
2999
+ events=cont.events,
3000
+ rule_evaluation=cont.rule_evaluation,
3001
+ updated_at=cont.updated_at,
3002
+ created_at=cont.created_at,
3003
+ did_created_at=new_did_created_at,
3004
+ deleted_at=datetime.utcnow()
3005
+ ).save(session=session, flush=False)
3006
+
3007
+
3008
+ @transactional_session
3009
+ def insert_deleted_dids(filter_, *, session: "Session"):
3010
+ """
3011
+ Insert into deleted_dids a list of did
3012
+
3013
+ :param filter_: The database filter to retrieve dids for archival
3014
+ :param session: The database session in use.
3015
+ """
3016
+ stmt = select(
3017
+ models.DataIdentifier.scope,
3018
+ models.DataIdentifier.name,
3019
+ models.DataIdentifier.account,
3020
+ models.DataIdentifier.did_type,
3021
+ models.DataIdentifier.is_open,
3022
+ models.DataIdentifier.monotonic,
3023
+ models.DataIdentifier.hidden,
3024
+ models.DataIdentifier.obsolete,
3025
+ models.DataIdentifier.complete,
3026
+ models.DataIdentifier.is_new,
3027
+ models.DataIdentifier.availability,
3028
+ models.DataIdentifier.suppressed,
3029
+ models.DataIdentifier.bytes,
3030
+ models.DataIdentifier.length,
3031
+ models.DataIdentifier.md5,
3032
+ models.DataIdentifier.adler32,
3033
+ models.DataIdentifier.expired_at,
3034
+ models.DataIdentifier.purge_replicas,
3035
+ models.DataIdentifier.deleted_at,
3036
+ models.DataIdentifier.events,
3037
+ models.DataIdentifier.guid,
3038
+ models.DataIdentifier.project,
3039
+ models.DataIdentifier.datatype,
3040
+ models.DataIdentifier.run_number,
3041
+ models.DataIdentifier.stream_name,
3042
+ models.DataIdentifier.prod_step,
3043
+ models.DataIdentifier.version,
3044
+ models.DataIdentifier.campaign,
3045
+ models.DataIdentifier.task_id,
3046
+ models.DataIdentifier.panda_id,
3047
+ models.DataIdentifier.lumiblocknr,
3048
+ models.DataIdentifier.provenance,
3049
+ models.DataIdentifier.phys_group,
3050
+ models.DataIdentifier.transient,
3051
+ models.DataIdentifier.accessed_at,
3052
+ models.DataIdentifier.closed_at,
3053
+ models.DataIdentifier.eol_at,
3054
+ models.DataIdentifier.is_archive,
3055
+ models.DataIdentifier.constituent,
3056
+ models.DataIdentifier.access_cnt
3057
+ ).where(
3058
+ filter_
3059
+ )
3060
+
3061
+ for did in session.execute(stmt).all():
3062
+ models.DeletedDataIdentifier(
3063
+ scope=did.scope,
3064
+ name=did.name,
3065
+ account=did.account,
3066
+ did_type=did.did_type,
3067
+ is_open=did.is_open,
3068
+ monotonic=did.monotonic,
3069
+ hidden=did.hidden,
3070
+ obsolete=did.obsolete,
3071
+ complete=did.complete,
3072
+ is_new=did.is_new,
3073
+ availability=did.availability,
3074
+ suppressed=did.suppressed,
3075
+ bytes=did.bytes,
3076
+ length=did.length,
3077
+ md5=did.md5,
3078
+ adler32=did.adler32,
3079
+ expired_at=did.expired_at,
3080
+ purge_replicas=did.purge_replicas,
3081
+ deleted_at=datetime.utcnow(),
3082
+ events=did.events,
3083
+ guid=did.guid,
3084
+ project=did.project,
3085
+ datatype=did.datatype,
3086
+ run_number=did.run_number,
3087
+ stream_name=did.stream_name,
3088
+ prod_step=did.prod_step,
3089
+ version=did.version,
3090
+ campaign=did.campaign,
3091
+ task_id=did.task_id,
3092
+ panda_id=did.panda_id,
3093
+ lumiblocknr=did.lumiblocknr,
3094
+ provenance=did.provenance,
3095
+ phys_group=did.phys_group,
3096
+ transient=did.transient,
3097
+ accessed_at=did.accessed_at,
3098
+ closed_at=did.closed_at,
3099
+ eol_at=did.eol_at,
3100
+ is_archive=did.is_archive,
3101
+ constituent=did.constituent
3102
+ ).save(session=session, flush=False)