rucio 37.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rucio might be problematic. Click here for more details.

Files changed (487) hide show
  1. rucio/__init__.py +17 -0
  2. rucio/alembicrevision.py +15 -0
  3. rucio/cli/__init__.py +14 -0
  4. rucio/cli/account.py +216 -0
  5. rucio/cli/bin_legacy/__init__.py +13 -0
  6. rucio/cli/bin_legacy/rucio.py +2825 -0
  7. rucio/cli/bin_legacy/rucio_admin.py +2500 -0
  8. rucio/cli/command.py +272 -0
  9. rucio/cli/config.py +72 -0
  10. rucio/cli/did.py +191 -0
  11. rucio/cli/download.py +128 -0
  12. rucio/cli/lifetime_exception.py +33 -0
  13. rucio/cli/replica.py +162 -0
  14. rucio/cli/rse.py +293 -0
  15. rucio/cli/rule.py +158 -0
  16. rucio/cli/scope.py +40 -0
  17. rucio/cli/subscription.py +73 -0
  18. rucio/cli/upload.py +60 -0
  19. rucio/cli/utils.py +226 -0
  20. rucio/client/__init__.py +15 -0
  21. rucio/client/accountclient.py +432 -0
  22. rucio/client/accountlimitclient.py +183 -0
  23. rucio/client/baseclient.py +983 -0
  24. rucio/client/client.py +120 -0
  25. rucio/client/configclient.py +126 -0
  26. rucio/client/credentialclient.py +59 -0
  27. rucio/client/didclient.py +868 -0
  28. rucio/client/diracclient.py +56 -0
  29. rucio/client/downloadclient.py +1783 -0
  30. rucio/client/exportclient.py +44 -0
  31. rucio/client/fileclient.py +50 -0
  32. rucio/client/importclient.py +42 -0
  33. rucio/client/lifetimeclient.py +90 -0
  34. rucio/client/lockclient.py +109 -0
  35. rucio/client/metaconventionsclient.py +140 -0
  36. rucio/client/pingclient.py +44 -0
  37. rucio/client/replicaclient.py +452 -0
  38. rucio/client/requestclient.py +125 -0
  39. rucio/client/richclient.py +317 -0
  40. rucio/client/rseclient.py +746 -0
  41. rucio/client/ruleclient.py +294 -0
  42. rucio/client/scopeclient.py +90 -0
  43. rucio/client/subscriptionclient.py +173 -0
  44. rucio/client/touchclient.py +82 -0
  45. rucio/client/uploadclient.py +969 -0
  46. rucio/common/__init__.py +13 -0
  47. rucio/common/bittorrent.py +234 -0
  48. rucio/common/cache.py +111 -0
  49. rucio/common/checksum.py +168 -0
  50. rucio/common/client.py +122 -0
  51. rucio/common/config.py +788 -0
  52. rucio/common/constants.py +217 -0
  53. rucio/common/constraints.py +17 -0
  54. rucio/common/didtype.py +237 -0
  55. rucio/common/dumper/__init__.py +342 -0
  56. rucio/common/dumper/consistency.py +497 -0
  57. rucio/common/dumper/data_models.py +362 -0
  58. rucio/common/dumper/path_parsing.py +75 -0
  59. rucio/common/exception.py +1208 -0
  60. rucio/common/extra.py +31 -0
  61. rucio/common/logging.py +420 -0
  62. rucio/common/pcache.py +1409 -0
  63. rucio/common/plugins.py +185 -0
  64. rucio/common/policy.py +93 -0
  65. rucio/common/schema/__init__.py +200 -0
  66. rucio/common/schema/generic.py +416 -0
  67. rucio/common/schema/generic_multi_vo.py +395 -0
  68. rucio/common/stomp_utils.py +423 -0
  69. rucio/common/stopwatch.py +55 -0
  70. rucio/common/test_rucio_server.py +154 -0
  71. rucio/common/types.py +483 -0
  72. rucio/common/utils.py +1688 -0
  73. rucio/core/__init__.py +13 -0
  74. rucio/core/account.py +496 -0
  75. rucio/core/account_counter.py +236 -0
  76. rucio/core/account_limit.py +425 -0
  77. rucio/core/authentication.py +620 -0
  78. rucio/core/config.py +437 -0
  79. rucio/core/credential.py +224 -0
  80. rucio/core/did.py +3004 -0
  81. rucio/core/did_meta_plugins/__init__.py +252 -0
  82. rucio/core/did_meta_plugins/did_column_meta.py +331 -0
  83. rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
  84. rucio/core/did_meta_plugins/elasticsearch_meta.py +407 -0
  85. rucio/core/did_meta_plugins/filter_engine.py +672 -0
  86. rucio/core/did_meta_plugins/json_meta.py +240 -0
  87. rucio/core/did_meta_plugins/mongo_meta.py +229 -0
  88. rucio/core/did_meta_plugins/postgres_meta.py +352 -0
  89. rucio/core/dirac.py +237 -0
  90. rucio/core/distance.py +187 -0
  91. rucio/core/exporter.py +59 -0
  92. rucio/core/heartbeat.py +363 -0
  93. rucio/core/identity.py +301 -0
  94. rucio/core/importer.py +260 -0
  95. rucio/core/lifetime_exception.py +377 -0
  96. rucio/core/lock.py +577 -0
  97. rucio/core/message.py +288 -0
  98. rucio/core/meta_conventions.py +203 -0
  99. rucio/core/monitor.py +448 -0
  100. rucio/core/naming_convention.py +195 -0
  101. rucio/core/nongrid_trace.py +136 -0
  102. rucio/core/oidc.py +1463 -0
  103. rucio/core/permission/__init__.py +161 -0
  104. rucio/core/permission/generic.py +1124 -0
  105. rucio/core/permission/generic_multi_vo.py +1144 -0
  106. rucio/core/quarantined_replica.py +224 -0
  107. rucio/core/replica.py +4483 -0
  108. rucio/core/replica_sorter.py +362 -0
  109. rucio/core/request.py +3091 -0
  110. rucio/core/rse.py +2079 -0
  111. rucio/core/rse_counter.py +185 -0
  112. rucio/core/rse_expression_parser.py +459 -0
  113. rucio/core/rse_selector.py +304 -0
  114. rucio/core/rule.py +4484 -0
  115. rucio/core/rule_grouping.py +1620 -0
  116. rucio/core/scope.py +181 -0
  117. rucio/core/subscription.py +362 -0
  118. rucio/core/topology.py +490 -0
  119. rucio/core/trace.py +375 -0
  120. rucio/core/transfer.py +1531 -0
  121. rucio/core/vo.py +169 -0
  122. rucio/core/volatile_replica.py +151 -0
  123. rucio/daemons/__init__.py +13 -0
  124. rucio/daemons/abacus/__init__.py +13 -0
  125. rucio/daemons/abacus/account.py +116 -0
  126. rucio/daemons/abacus/collection_replica.py +124 -0
  127. rucio/daemons/abacus/rse.py +117 -0
  128. rucio/daemons/atropos/__init__.py +13 -0
  129. rucio/daemons/atropos/atropos.py +242 -0
  130. rucio/daemons/auditor/__init__.py +289 -0
  131. rucio/daemons/auditor/hdfs.py +97 -0
  132. rucio/daemons/auditor/srmdumps.py +355 -0
  133. rucio/daemons/automatix/__init__.py +13 -0
  134. rucio/daemons/automatix/automatix.py +304 -0
  135. rucio/daemons/badreplicas/__init__.py +13 -0
  136. rucio/daemons/badreplicas/minos.py +322 -0
  137. rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
  138. rucio/daemons/badreplicas/necromancer.py +196 -0
  139. rucio/daemons/bb8/__init__.py +13 -0
  140. rucio/daemons/bb8/bb8.py +353 -0
  141. rucio/daemons/bb8/common.py +759 -0
  142. rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
  143. rucio/daemons/bb8/t2_background_rebalance.py +153 -0
  144. rucio/daemons/cache/__init__.py +13 -0
  145. rucio/daemons/cache/consumer.py +133 -0
  146. rucio/daemons/common.py +405 -0
  147. rucio/daemons/conveyor/__init__.py +13 -0
  148. rucio/daemons/conveyor/common.py +562 -0
  149. rucio/daemons/conveyor/finisher.py +529 -0
  150. rucio/daemons/conveyor/poller.py +394 -0
  151. rucio/daemons/conveyor/preparer.py +205 -0
  152. rucio/daemons/conveyor/receiver.py +179 -0
  153. rucio/daemons/conveyor/stager.py +133 -0
  154. rucio/daemons/conveyor/submitter.py +403 -0
  155. rucio/daemons/conveyor/throttler.py +532 -0
  156. rucio/daemons/follower/__init__.py +13 -0
  157. rucio/daemons/follower/follower.py +101 -0
  158. rucio/daemons/hermes/__init__.py +13 -0
  159. rucio/daemons/hermes/hermes.py +534 -0
  160. rucio/daemons/judge/__init__.py +13 -0
  161. rucio/daemons/judge/cleaner.py +159 -0
  162. rucio/daemons/judge/evaluator.py +185 -0
  163. rucio/daemons/judge/injector.py +162 -0
  164. rucio/daemons/judge/repairer.py +154 -0
  165. rucio/daemons/oauthmanager/__init__.py +13 -0
  166. rucio/daemons/oauthmanager/oauthmanager.py +198 -0
  167. rucio/daemons/reaper/__init__.py +13 -0
  168. rucio/daemons/reaper/dark_reaper.py +282 -0
  169. rucio/daemons/reaper/reaper.py +739 -0
  170. rucio/daemons/replicarecoverer/__init__.py +13 -0
  171. rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
  172. rucio/daemons/rsedecommissioner/__init__.py +13 -0
  173. rucio/daemons/rsedecommissioner/config.py +81 -0
  174. rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
  175. rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
  176. rucio/daemons/rsedecommissioner/profiles/generic.py +452 -0
  177. rucio/daemons/rsedecommissioner/profiles/types.py +93 -0
  178. rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
  179. rucio/daemons/storage/__init__.py +13 -0
  180. rucio/daemons/storage/consistency/__init__.py +13 -0
  181. rucio/daemons/storage/consistency/actions.py +848 -0
  182. rucio/daemons/tracer/__init__.py +13 -0
  183. rucio/daemons/tracer/kronos.py +511 -0
  184. rucio/daemons/transmogrifier/__init__.py +13 -0
  185. rucio/daemons/transmogrifier/transmogrifier.py +762 -0
  186. rucio/daemons/undertaker/__init__.py +13 -0
  187. rucio/daemons/undertaker/undertaker.py +137 -0
  188. rucio/db/__init__.py +13 -0
  189. rucio/db/sqla/__init__.py +52 -0
  190. rucio/db/sqla/constants.py +206 -0
  191. rucio/db/sqla/migrate_repo/__init__.py +13 -0
  192. rucio/db/sqla/migrate_repo/env.py +110 -0
  193. rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
  194. rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
  195. rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
  196. rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
  197. rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
  198. rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
  199. rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
  200. rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
  201. rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
  202. rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
  203. rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
  204. rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
  205. rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
  206. rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
  207. rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
  208. rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
  209. rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
  210. rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
  211. rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
  212. rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
  213. rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
  214. rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
  215. rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
  216. rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
  217. rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
  218. rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
  219. rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
  220. rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
  221. rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
  222. rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
  223. rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
  224. rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
  225. rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
  226. rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
  227. rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
  228. rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
  229. rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
  230. rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
  231. rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
  232. rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
  233. rucio/db/sqla/migrate_repo/versions/30d5206e9cad_increase_oauthrequest_redirect_msg_.py +37 -0
  234. rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
  235. rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
  236. rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
  237. rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
  238. rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
  239. rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
  240. rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
  241. rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
  242. rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
  243. rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
  244. rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
  245. rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
  246. rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
  247. rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
  248. rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
  249. rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
  250. rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
  251. rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
  252. rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
  253. rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
  254. rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
  255. rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
  256. rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
  257. rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
  258. rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
  259. rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
  260. rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
  261. rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
  262. rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
  263. rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
  264. rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
  265. rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
  266. rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
  267. rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
  268. rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
  269. rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
  270. rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
  271. rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
  272. rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
  273. rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
  274. rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
  275. rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
  276. rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
  277. rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
  278. rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
  279. rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
  280. rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
  281. rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
  282. rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
  283. rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
  284. rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
  285. rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
  286. rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
  287. rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
  288. rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
  289. rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
  290. rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
  291. rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
  292. rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
  293. rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
  294. rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
  295. rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
  296. rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
  297. rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
  298. rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
  299. rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
  300. rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
  301. rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
  302. rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
  303. rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
  304. rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
  305. rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
  306. rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
  307. rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
  308. rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
  309. rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
  310. rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
  311. rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
  312. rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
  313. rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
  314. rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
  315. rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
  316. rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
  317. rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
  318. rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
  319. rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
  320. rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
  321. rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
  322. rucio/db/sqla/models.py +1743 -0
  323. rucio/db/sqla/sautils.py +55 -0
  324. rucio/db/sqla/session.py +529 -0
  325. rucio/db/sqla/types.py +206 -0
  326. rucio/db/sqla/util.py +543 -0
  327. rucio/gateway/__init__.py +13 -0
  328. rucio/gateway/account.py +345 -0
  329. rucio/gateway/account_limit.py +363 -0
  330. rucio/gateway/authentication.py +381 -0
  331. rucio/gateway/config.py +227 -0
  332. rucio/gateway/credential.py +70 -0
  333. rucio/gateway/did.py +987 -0
  334. rucio/gateway/dirac.py +83 -0
  335. rucio/gateway/exporter.py +60 -0
  336. rucio/gateway/heartbeat.py +76 -0
  337. rucio/gateway/identity.py +189 -0
  338. rucio/gateway/importer.py +46 -0
  339. rucio/gateway/lifetime_exception.py +121 -0
  340. rucio/gateway/lock.py +153 -0
  341. rucio/gateway/meta_conventions.py +98 -0
  342. rucio/gateway/permission.py +74 -0
  343. rucio/gateway/quarantined_replica.py +79 -0
  344. rucio/gateway/replica.py +538 -0
  345. rucio/gateway/request.py +330 -0
  346. rucio/gateway/rse.py +632 -0
  347. rucio/gateway/rule.py +437 -0
  348. rucio/gateway/scope.py +100 -0
  349. rucio/gateway/subscription.py +280 -0
  350. rucio/gateway/vo.py +126 -0
  351. rucio/rse/__init__.py +96 -0
  352. rucio/rse/protocols/__init__.py +13 -0
  353. rucio/rse/protocols/bittorrent.py +194 -0
  354. rucio/rse/protocols/cache.py +111 -0
  355. rucio/rse/protocols/dummy.py +100 -0
  356. rucio/rse/protocols/gfal.py +708 -0
  357. rucio/rse/protocols/globus.py +243 -0
  358. rucio/rse/protocols/http_cache.py +82 -0
  359. rucio/rse/protocols/mock.py +123 -0
  360. rucio/rse/protocols/ngarc.py +209 -0
  361. rucio/rse/protocols/posix.py +250 -0
  362. rucio/rse/protocols/protocol.py +361 -0
  363. rucio/rse/protocols/rclone.py +365 -0
  364. rucio/rse/protocols/rfio.py +145 -0
  365. rucio/rse/protocols/srm.py +338 -0
  366. rucio/rse/protocols/ssh.py +414 -0
  367. rucio/rse/protocols/storm.py +195 -0
  368. rucio/rse/protocols/webdav.py +594 -0
  369. rucio/rse/protocols/xrootd.py +302 -0
  370. rucio/rse/rsemanager.py +881 -0
  371. rucio/rse/translation.py +260 -0
  372. rucio/tests/__init__.py +13 -0
  373. rucio/tests/common.py +280 -0
  374. rucio/tests/common_server.py +149 -0
  375. rucio/transfertool/__init__.py +13 -0
  376. rucio/transfertool/bittorrent.py +200 -0
  377. rucio/transfertool/bittorrent_driver.py +50 -0
  378. rucio/transfertool/bittorrent_driver_qbittorrent.py +134 -0
  379. rucio/transfertool/fts3.py +1600 -0
  380. rucio/transfertool/fts3_plugins.py +152 -0
  381. rucio/transfertool/globus.py +201 -0
  382. rucio/transfertool/globus_library.py +181 -0
  383. rucio/transfertool/mock.py +89 -0
  384. rucio/transfertool/transfertool.py +221 -0
  385. rucio/vcsversion.py +11 -0
  386. rucio/version.py +45 -0
  387. rucio/web/__init__.py +13 -0
  388. rucio/web/rest/__init__.py +13 -0
  389. rucio/web/rest/flaskapi/__init__.py +13 -0
  390. rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
  391. rucio/web/rest/flaskapi/v1/__init__.py +13 -0
  392. rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
  393. rucio/web/rest/flaskapi/v1/accounts.py +1103 -0
  394. rucio/web/rest/flaskapi/v1/archives.py +102 -0
  395. rucio/web/rest/flaskapi/v1/auth.py +1644 -0
  396. rucio/web/rest/flaskapi/v1/common.py +426 -0
  397. rucio/web/rest/flaskapi/v1/config.py +304 -0
  398. rucio/web/rest/flaskapi/v1/credentials.py +213 -0
  399. rucio/web/rest/flaskapi/v1/dids.py +2340 -0
  400. rucio/web/rest/flaskapi/v1/dirac.py +116 -0
  401. rucio/web/rest/flaskapi/v1/export.py +75 -0
  402. rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
  403. rucio/web/rest/flaskapi/v1/identities.py +285 -0
  404. rucio/web/rest/flaskapi/v1/import.py +132 -0
  405. rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
  406. rucio/web/rest/flaskapi/v1/locks.py +358 -0
  407. rucio/web/rest/flaskapi/v1/main.py +91 -0
  408. rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
  409. rucio/web/rest/flaskapi/v1/metrics.py +36 -0
  410. rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
  411. rucio/web/rest/flaskapi/v1/ping.py +88 -0
  412. rucio/web/rest/flaskapi/v1/redirect.py +366 -0
  413. rucio/web/rest/flaskapi/v1/replicas.py +1894 -0
  414. rucio/web/rest/flaskapi/v1/requests.py +998 -0
  415. rucio/web/rest/flaskapi/v1/rses.py +2250 -0
  416. rucio/web/rest/flaskapi/v1/rules.py +854 -0
  417. rucio/web/rest/flaskapi/v1/scopes.py +159 -0
  418. rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
  419. rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
  420. rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
  421. rucio/web/rest/flaskapi/v1/traces.py +137 -0
  422. rucio/web/rest/flaskapi/v1/types.py +20 -0
  423. rucio/web/rest/flaskapi/v1/vos.py +278 -0
  424. rucio/web/rest/main.py +18 -0
  425. rucio/web/rest/metrics.py +27 -0
  426. rucio/web/rest/ping.py +27 -0
  427. rucio-37.0.0rc1.data/data/rucio/etc/alembic.ini.template +71 -0
  428. rucio-37.0.0rc1.data/data/rucio/etc/alembic_offline.ini.template +74 -0
  429. rucio-37.0.0rc1.data/data/rucio/etc/globus-config.yml.template +5 -0
  430. rucio-37.0.0rc1.data/data/rucio/etc/ldap.cfg.template +30 -0
  431. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
  432. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
  433. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
  434. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
  435. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
  436. rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
  437. rucio-37.0.0rc1.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
  438. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.atlas.client.template +43 -0
  439. rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.template +241 -0
  440. rucio-37.0.0rc1.data/data/rucio/etc/rucio_multi_vo.cfg.template +217 -0
  441. rucio-37.0.0rc1.data/data/rucio/requirements.server.txt +297 -0
  442. rucio-37.0.0rc1.data/data/rucio/tools/bootstrap.py +34 -0
  443. rucio-37.0.0rc1.data/data/rucio/tools/merge_rucio_configs.py +144 -0
  444. rucio-37.0.0rc1.data/data/rucio/tools/reset_database.py +40 -0
  445. rucio-37.0.0rc1.data/scripts/rucio +133 -0
  446. rucio-37.0.0rc1.data/scripts/rucio-abacus-account +74 -0
  447. rucio-37.0.0rc1.data/scripts/rucio-abacus-collection-replica +46 -0
  448. rucio-37.0.0rc1.data/scripts/rucio-abacus-rse +78 -0
  449. rucio-37.0.0rc1.data/scripts/rucio-admin +97 -0
  450. rucio-37.0.0rc1.data/scripts/rucio-atropos +60 -0
  451. rucio-37.0.0rc1.data/scripts/rucio-auditor +206 -0
  452. rucio-37.0.0rc1.data/scripts/rucio-automatix +50 -0
  453. rucio-37.0.0rc1.data/scripts/rucio-bb8 +57 -0
  454. rucio-37.0.0rc1.data/scripts/rucio-cache-client +141 -0
  455. rucio-37.0.0rc1.data/scripts/rucio-cache-consumer +42 -0
  456. rucio-37.0.0rc1.data/scripts/rucio-conveyor-finisher +58 -0
  457. rucio-37.0.0rc1.data/scripts/rucio-conveyor-poller +66 -0
  458. rucio-37.0.0rc1.data/scripts/rucio-conveyor-preparer +37 -0
  459. rucio-37.0.0rc1.data/scripts/rucio-conveyor-receiver +44 -0
  460. rucio-37.0.0rc1.data/scripts/rucio-conveyor-stager +76 -0
  461. rucio-37.0.0rc1.data/scripts/rucio-conveyor-submitter +139 -0
  462. rucio-37.0.0rc1.data/scripts/rucio-conveyor-throttler +104 -0
  463. rucio-37.0.0rc1.data/scripts/rucio-dark-reaper +53 -0
  464. rucio-37.0.0rc1.data/scripts/rucio-dumper +160 -0
  465. rucio-37.0.0rc1.data/scripts/rucio-follower +44 -0
  466. rucio-37.0.0rc1.data/scripts/rucio-hermes +54 -0
  467. rucio-37.0.0rc1.data/scripts/rucio-judge-cleaner +89 -0
  468. rucio-37.0.0rc1.data/scripts/rucio-judge-evaluator +137 -0
  469. rucio-37.0.0rc1.data/scripts/rucio-judge-injector +44 -0
  470. rucio-37.0.0rc1.data/scripts/rucio-judge-repairer +44 -0
  471. rucio-37.0.0rc1.data/scripts/rucio-kronos +44 -0
  472. rucio-37.0.0rc1.data/scripts/rucio-minos +53 -0
  473. rucio-37.0.0rc1.data/scripts/rucio-minos-temporary-expiration +50 -0
  474. rucio-37.0.0rc1.data/scripts/rucio-necromancer +120 -0
  475. rucio-37.0.0rc1.data/scripts/rucio-oauth-manager +63 -0
  476. rucio-37.0.0rc1.data/scripts/rucio-reaper +83 -0
  477. rucio-37.0.0rc1.data/scripts/rucio-replica-recoverer +248 -0
  478. rucio-37.0.0rc1.data/scripts/rucio-rse-decommissioner +66 -0
  479. rucio-37.0.0rc1.data/scripts/rucio-storage-consistency-actions +74 -0
  480. rucio-37.0.0rc1.data/scripts/rucio-transmogrifier +77 -0
  481. rucio-37.0.0rc1.data/scripts/rucio-undertaker +76 -0
  482. rucio-37.0.0rc1.dist-info/METADATA +92 -0
  483. rucio-37.0.0rc1.dist-info/RECORD +487 -0
  484. rucio-37.0.0rc1.dist-info/WHEEL +5 -0
  485. rucio-37.0.0rc1.dist-info/licenses/AUTHORS.rst +100 -0
  486. rucio-37.0.0rc1.dist-info/licenses/LICENSE +201 -0
  487. rucio-37.0.0rc1.dist-info/top_level.txt +1 -0
rucio/core/did.py ADDED
@@ -0,0 +1,3004 @@
1
+ # Copyright European Organization for Nuclear Research (CERN) since 2012
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import random
17
+ from datetime import datetime, timedelta
18
+ from enum import Enum
19
+ from hashlib import md5
20
+ from re import match
21
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Union
22
+
23
+ from sqlalchemy import and_, delete, exists, insert, or_, update
24
+ from sqlalchemy.exc import DatabaseError, IntegrityError, NoResultFound
25
+ from sqlalchemy.sql import func, not_
26
+ from sqlalchemy.sql.expression import bindparam, case, false, null, select, true
27
+
28
+ import rucio.core.replica # import add_replicas
29
+ import rucio.core.rule
30
+ from rucio.common import exception
31
+ from rucio.common.config import config_get_bool, config_get_int
32
+ from rucio.common.utils import chunks, is_archive
33
+ from rucio.core import did_meta_plugins
34
+ from rucio.core.message import add_message
35
+ from rucio.core.monitor import MetricManager
36
+ from rucio.core.naming_convention import validate_name
37
+ from rucio.db.sqla import filter_thread_work, models
38
+ from rucio.db.sqla.constants import BadFilesStatus, DIDAvailability, DIDReEvaluation, DIDType, RuleState
39
+ from rucio.db.sqla.session import read_session, stream_session, transactional_session
40
+ from rucio.db.sqla.util import temp_table_mngr
41
+
42
+ if TYPE_CHECKING:
43
+ from collections.abc import Iterable, Iterator, Mapping, Sequence
44
+
45
+ from sqlalchemy.orm import Session
46
+ from sqlalchemy.sql._typing import ColumnExpressionArgument
47
+ from sqlalchemy.sql.selectable import Select
48
+
49
+ from rucio.common.types import InternalAccount, InternalScope, LoggerFunction
50
+
51
+
52
+ METRICS = MetricManager(module=__name__)
53
+
54
+
55
+ @read_session
56
+ def list_expired_dids(
57
+ worker_number: Optional[int] = None,
58
+ total_workers: Optional[int] = None,
59
+ limit: Optional[int] = None,
60
+ *,
61
+ session: "Session"
62
+ ) -> list[dict[str, Any]]:
63
+ """
64
+ List expired data identifiers.
65
+
66
+ :param limit: limit number.
67
+ :param session: The database session in use.
68
+ """
69
+
70
+ sub_query = exists(
71
+ ).where(
72
+ models.ReplicationRule.scope == models.DataIdentifier.scope,
73
+ models.ReplicationRule.name == models.DataIdentifier.name,
74
+ models.ReplicationRule.locked == true(),
75
+ )
76
+ list_stmt = select(
77
+ models.DataIdentifier.scope,
78
+ models.DataIdentifier.name,
79
+ models.DataIdentifier.did_type,
80
+ models.DataIdentifier.created_at,
81
+ models.DataIdentifier.purge_replicas
82
+ ).with_hint(
83
+ models.DataIdentifier,
84
+ 'INDEX(DIDS DIDS_EXPIRED_AT_IDX)',
85
+ 'oracle'
86
+ ).where(
87
+ models.DataIdentifier.expired_at < datetime.utcnow(),
88
+ not_(sub_query),
89
+ ).order_by(
90
+ models.DataIdentifier.expired_at
91
+ )
92
+
93
+ if session.bind.dialect.name in ['oracle', 'mysql', 'postgresql']:
94
+ list_stmt = filter_thread_work(session=session, query=list_stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
95
+ elif session.bind.dialect.name == 'sqlite' and worker_number and total_workers and total_workers > 0:
96
+ row_count = 0
97
+ dids = list()
98
+ for scope, name, did_type, created_at, purge_replicas in session.execute(list_stmt).yield_per(10):
99
+ if int(md5(name).hexdigest(), 16) % total_workers == worker_number:
100
+ dids.append({'scope': scope,
101
+ 'name': name,
102
+ 'did_type': did_type,
103
+ 'created_at': created_at,
104
+ 'purge_replicas': purge_replicas})
105
+ row_count += 1
106
+ if limit and row_count >= limit:
107
+ return dids
108
+ return dids
109
+ else:
110
+ if worker_number and total_workers:
111
+ raise exception.DatabaseException('The database type %s returned by SQLAlchemy is invalid.' % session.bind.dialect.name)
112
+
113
+ if limit:
114
+ list_stmt = list_stmt.limit(limit)
115
+
116
+ return [{'scope': scope, 'name': name, 'did_type': did_type, 'created_at': created_at,
117
+ 'purge_replicas': purge_replicas} for scope, name, did_type, created_at, purge_replicas in session.execute(list_stmt)]
118
+
119
+
120
+ @transactional_session
121
+ def add_did(
122
+ scope: "InternalScope",
123
+ name: str,
124
+ did_type: Union[str, DIDType],
125
+ account: "InternalAccount",
126
+ statuses: Optional["Mapping[str, Any]"] = None,
127
+ meta: Optional["Mapping[str, Any]"] = None,
128
+ rules: Optional["Sequence[str]"] = None,
129
+ lifetime: Optional[int] = None,
130
+ dids: Optional["Sequence[Mapping[str, Any]]"] = None,
131
+ rse_id: Optional[str] = None,
132
+ *,
133
+ session: "Session",
134
+ ) -> None:
135
+ """
136
+ Add data identifier.
137
+
138
+ :param scope: The scope name.
139
+ :param name: The data identifier name.
140
+ :param did_type: The data identifier type.
141
+ :param account: The account owner.
142
+ :param statuses: Dictionary with statuses, e.g.g {'monotonic':True}.
143
+ :meta: Meta-data associated with the data identifier is represented using key/value pairs in a dictionary.
144
+ :rules: Replication rules associated with the data identifier. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
145
+ :param lifetime: DID's lifetime (in seconds).
146
+ :param dids: The content.
147
+ :param rse_id: The RSE id when registering replicas.
148
+ :param session: The database session in use.
149
+ """
150
+ return add_dids(dids=[{'scope': scope, 'name': name, 'type': did_type,
151
+ 'statuses': statuses or {}, 'meta': meta or {},
152
+ 'rules': rules, 'lifetime': lifetime,
153
+ 'dids': dids, 'rse_id': rse_id}],
154
+ account=account, session=session)
155
+
156
+
157
+ @transactional_session
158
+ def add_dids(
159
+ dids: "Sequence[dict[str, Any]]",
160
+ account: "InternalAccount",
161
+ *,
162
+ session: "Session",
163
+ ) -> None:
164
+ """
165
+ Bulk add data identifiers.
166
+
167
+ :param dids: A list of dids.
168
+ :param account: The account owner.
169
+ :param session: The database session in use.
170
+ """
171
+ try:
172
+
173
+ for did in dids:
174
+ try:
175
+
176
+ if isinstance(did['type'], str):
177
+ did['type'] = DIDType[did['type']]
178
+
179
+ if did['type'] == DIDType.FILE:
180
+ raise exception.UnsupportedOperation('Only collection (dataset/container) can be registered.')
181
+
182
+ # Lifetime
183
+ expired_at = None
184
+ if did.get('lifetime'):
185
+ expired_at = datetime.utcnow() + timedelta(seconds=did['lifetime'])
186
+
187
+ # Insert new data identifier
188
+ new_did = models.DataIdentifier(scope=did['scope'], name=did['name'], account=did.get('account') or account,
189
+ did_type=did['type'], monotonic=did.get('statuses', {}).get('monotonic', False),
190
+ is_open=True, expired_at=expired_at)
191
+
192
+ new_did.save(session=session, flush=False)
193
+
194
+ if 'meta' in did and did['meta']:
195
+ # Add metadata
196
+ set_metadata_bulk(scope=did['scope'], name=did['name'], meta=did['meta'], recursive=False, session=session)
197
+
198
+ if did.get('dids', None):
199
+ attach_dids(scope=did['scope'], name=did['name'], dids=did['dids'],
200
+ account=account, rse_id=did.get('rse_id'), session=session)
201
+
202
+ if did.get('rules', None):
203
+ rucio.core.rule.add_rules(dids=[did, ], rules=did['rules'], session=session)
204
+
205
+ event_type = None
206
+ if did['type'] == DIDType.CONTAINER:
207
+ event_type = 'CREATE_CNT'
208
+ if did['type'] == DIDType.DATASET:
209
+ event_type = 'CREATE_DTS'
210
+ if event_type:
211
+ message = {'account': account.external,
212
+ 'scope': did['scope'].external,
213
+ 'name': did['name'],
214
+ 'expired_at': str(expired_at) if expired_at is not None else None}
215
+ if account.vo != 'def':
216
+ message['vo'] = account.vo
217
+
218
+ add_message(event_type, message, session=session)
219
+
220
+ except KeyError:
221
+ # ToDo
222
+ raise
223
+
224
+ session.flush()
225
+
226
+ except IntegrityError as error:
227
+ if match('.*IntegrityError.*ORA-00001: unique constraint.*DIDS_PK.*violated.*', error.args[0]) \
228
+ or match('.*IntegrityError.*UNIQUE constraint failed: dids.scope, dids.name.*', error.args[0]) \
229
+ or match('.*IntegrityError.*1062.*Duplicate entry.*for key.*', error.args[0]) \
230
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
231
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
232
+ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]):
233
+ raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
234
+
235
+ if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
236
+ or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
237
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
238
+ or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
239
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
240
+ or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
241
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
242
+ raise exception.ScopeNotFound('Scope not found!')
243
+
244
+ raise exception.RucioException(error.args)
245
+ except DatabaseError as error:
246
+ if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
247
+ raise exception.ScopeNotFound('Scope not found!')
248
+ raise exception.RucioException(error.args)
249
+
250
+
251
+ @transactional_session
252
+ def attach_dids(
253
+ scope: "InternalScope",
254
+ name: str,
255
+ dids: "Sequence[Mapping[str, Any]]",
256
+ account: "InternalAccount",
257
+ rse_id: Optional[str] = None,
258
+ *,
259
+ session: "Session",
260
+ ) -> None:
261
+ """
262
+ Append data identifier.
263
+
264
+ :param scope: The scope name.
265
+ :param name: The data identifier name.
266
+ :param dids: The content.
267
+ :param account: The account owner.
268
+ :param rse_id: The RSE id for the replicas.
269
+ :param session: The database session in use.
270
+ """
271
+ return attach_dids_to_dids(attachments=[{'scope': scope, 'name': name, 'dids': dids, 'rse_id': rse_id}], account=account, session=session)
272
+
273
+
274
+ @transactional_session
275
+ def attach_dids_to_dids(
276
+ attachments: "Sequence[Mapping[str, Any]]",
277
+ account: "InternalAccount",
278
+ ignore_duplicate: bool = False,
279
+ *,
280
+ session: "Session",
281
+ ) -> None:
282
+ children_temp_table = temp_table_mngr(session).create_scope_name_table()
283
+ parent_dids = list()
284
+ first_iteration = True
285
+ for attachment in attachments:
286
+ try:
287
+ children = {(a['scope'], a['name']): a for a in attachment['dids']}
288
+ cont = []
289
+ stmt = select(
290
+ models.DataIdentifier
291
+ ).with_hint(
292
+ models.DataIdentifier,
293
+ 'INDEX(DIDS DIDS_PK)',
294
+ 'oracle'
295
+ ).where(
296
+ models.DataIdentifier.scope == attachment['scope'],
297
+ models.DataIdentifier.name == attachment['name']
298
+ )
299
+ parent_did = session.execute(stmt).scalar_one()
300
+ update_parent = False
301
+
302
+ if not first_iteration:
303
+ stmt = delete(
304
+ children_temp_table
305
+ )
306
+ session.execute(stmt)
307
+ values = [{'scope': s, 'name': n} for s, n in children]
308
+ stmt = insert(
309
+ children_temp_table
310
+ )
311
+ session.execute(stmt, values)
312
+
313
+ if parent_did.did_type == DIDType.FILE:
314
+ # check if parent file has the archive extension
315
+ if is_archive(attachment['name']):
316
+ __add_files_to_archive(parent_did=parent_did,
317
+ files_temp_table=children_temp_table,
318
+ files=children,
319
+ account=account,
320
+ ignore_duplicate=ignore_duplicate,
321
+ session=session)
322
+ return
323
+ raise exception.UnsupportedOperation("Data identifier '%(scope)s:%(name)s' is a file" % attachment)
324
+
325
+ elif not parent_did.is_open:
326
+ raise exception.UnsupportedOperation("Data identifier '%(scope)s:%(name)s' is closed" % attachment)
327
+
328
+ elif parent_did.did_type == DIDType.DATASET:
329
+ cont = __add_files_to_dataset(parent_did=parent_did,
330
+ files_temp_table=children_temp_table,
331
+ files=children,
332
+ account=account,
333
+ ignore_duplicate=ignore_duplicate,
334
+ rse_id=attachment.get('rse_id'),
335
+ session=session)
336
+ update_parent = len(cont) > 0
337
+
338
+ elif parent_did.did_type == DIDType.CONTAINER:
339
+ __add_collections_to_container(parent_did=parent_did,
340
+ collections_temp_table=children_temp_table,
341
+ collections=children,
342
+ account=account,
343
+ session=session)
344
+ update_parent = True
345
+
346
+ if update_parent:
347
+ # cont contains the parent of the files and is only filled if the files does not exist yet
348
+ parent_dids.append({'scope': parent_did.scope,
349
+ 'name': parent_did.name,
350
+ 'rule_evaluation_action': DIDReEvaluation.ATTACH})
351
+ except NoResultFound:
352
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (attachment['scope'], attachment['name']))
353
+ first_iteration = False
354
+
355
+ # Remove all duplicated dictionaries from the list
356
+ # (convert the list of dictionaries into a list of tuple, then to a set of tuple
357
+ # to remove duplicates, then back to a list of unique dictionaries)
358
+ parent_dids = [dict(tup) for tup in set(tuple(dictionary.items()) for dictionary in parent_dids)]
359
+ if parent_dids:
360
+ stmt = insert(
361
+ models.UpdatedDID
362
+ )
363
+ session.execute(stmt, parent_dids)
364
+
365
+
366
+ def __add_files_to_archive(
367
+ parent_did: models.DataIdentifier,
368
+ files_temp_table: Any,
369
+ files: "Mapping[tuple[InternalScope, str], Mapping[str, Any]]",
370
+ account: "InternalAccount",
371
+ ignore_duplicate: bool = False,
372
+ *,
373
+ session: "Session"
374
+ ) -> None:
375
+ """
376
+ Add files to archive.
377
+
378
+ :param parent_did: the DataIdentifier object of the parent did
379
+ :param files: archive content.
380
+ :param account: The account owner.
381
+ :param ignore_duplicate: If True, ignore duplicate entries.
382
+ :param session: The database session in use.
383
+ """
384
+ stmt = select(
385
+ files_temp_table.scope,
386
+ files_temp_table.name,
387
+ models.DataIdentifier.scope.label('did_scope'),
388
+ models.DataIdentifier.bytes,
389
+ models.DataIdentifier.guid,
390
+ models.DataIdentifier.events,
391
+ models.DataIdentifier.availability,
392
+ models.DataIdentifier.adler32,
393
+ models.DataIdentifier.md5,
394
+ models.DataIdentifier.is_archive,
395
+ models.DataIdentifier.constituent,
396
+ models.DataIdentifier.did_type,
397
+ ).outerjoin_from(
398
+ files_temp_table,
399
+ models.DataIdentifier,
400
+ and_(models.DataIdentifier.scope == files_temp_table.scope,
401
+ models.DataIdentifier.name == files_temp_table.name)
402
+ )
403
+ if ignore_duplicate:
404
+ stmt = stmt.add_columns(
405
+ models.ConstituentAssociation.scope.label('archive_contents_scope'),
406
+ ).outerjoin_from(
407
+ files_temp_table,
408
+ models.ConstituentAssociation,
409
+ and_(models.ConstituentAssociation.scope == parent_did.scope,
410
+ models.ConstituentAssociation.name == parent_did.name,
411
+ models.ConstituentAssociation.child_scope == files_temp_table.scope,
412
+ models.ConstituentAssociation.child_name == files_temp_table.name)
413
+ )
414
+
415
+ dids_to_add = {}
416
+ must_set_constituent = False
417
+ archive_contents_to_add = {}
418
+ for row in session.execute(stmt):
419
+ file = files[row.scope, row.name]
420
+
421
+ if ignore_duplicate and row.archive_contents_scope is not None:
422
+ continue
423
+
424
+ if (row.scope, row.name) in archive_contents_to_add:
425
+ # Ignore duplicate input
426
+ continue
427
+
428
+ if row.did_scope is None:
429
+ new_did = {}
430
+ new_did.update((k, v) for k, v in file.items() if k != 'meta')
431
+ for key in file.get('meta', {}):
432
+ new_did[key] = file['meta'][key]
433
+ new_did['constituent'] = True
434
+ new_did['did_type'] = DIDType.FILE
435
+ new_did['account'] = account
436
+ dids_to_add[row.scope, row.name] = new_did
437
+
438
+ new_content = {
439
+ 'child_scope': file['scope'],
440
+ 'child_name': file['name'],
441
+ 'scope': parent_did.scope,
442
+ 'name': parent_did.name,
443
+ 'bytes': file['bytes'],
444
+ 'adler32': file.get('adler32'),
445
+ 'md5': file.get('md5'),
446
+ 'guid': file.get('guid'),
447
+ 'length': file.get('events')
448
+ }
449
+ else:
450
+ if row.did_type != DIDType.FILE:
451
+ raise exception.UnsupportedOperation('Data identifier %s:%s of type %s cannot be added to an archive ' % (row.scope, row.name, row.did_type))
452
+
453
+ if not row.constituent:
454
+ must_set_constituent = True
455
+
456
+ new_content = {
457
+ 'child_scope': row.scope,
458
+ 'child_name': row.name,
459
+ 'scope': parent_did.scope,
460
+ 'name': parent_did.name,
461
+ 'bytes': row.bytes,
462
+ 'adler32': row.adler32,
463
+ 'md5': row.md5,
464
+ 'guid': row.guid,
465
+ 'length': row.events
466
+ }
467
+
468
+ archive_contents_to_add[row.scope, row.name] = new_content
469
+
470
+ # insert into archive_contents
471
+ try:
472
+ values = list(dids_to_add.values())
473
+ stmt = insert(
474
+ models.DataIdentifier
475
+ )
476
+ dids_to_add and session.execute(stmt, values)
477
+ values = list(archive_contents_to_add.values())
478
+ stmt = insert(
479
+ models.ConstituentAssociation
480
+ )
481
+ archive_contents_to_add and session.execute(stmt, values)
482
+ if must_set_constituent:
483
+ stmt = update(
484
+ models.DataIdentifier
485
+ ).where(
486
+ exists(
487
+ select(1)
488
+ ).where(
489
+ and_(models.DataIdentifier.scope == files_temp_table.scope,
490
+ models.DataIdentifier.name == files_temp_table.name)
491
+ )
492
+ ).where(
493
+ or_(models.DataIdentifier.constituent.is_(None),
494
+ models.DataIdentifier.constituent == false())
495
+ ).values({
496
+ models.DataIdentifier.constituent: True
497
+ }).execution_options(
498
+ synchronize_session=False
499
+ )
500
+ session.execute(stmt)
501
+ session.flush()
502
+ except IntegrityError as error:
503
+ raise exception.RucioException(error.args)
504
+
505
+ if not parent_did.is_archive:
506
+ # mark the archive file as is_archive
507
+ parent_did.is_archive = True
508
+
509
+ # mark parent datasets as is_archive = True
510
+ stmt = update(
511
+ models.DataIdentifier
512
+ ).where(
513
+ exists(
514
+ select(1).prefix_with("/*+ INDEX(CONTENTS CONTENTS_CHILD_SCOPE_NAME_IDX) */", dialect="oracle")
515
+ ).where(
516
+ and_(models.DataIdentifierAssociation.child_scope == parent_did.scope,
517
+ models.DataIdentifierAssociation.child_name == parent_did.name,
518
+ models.DataIdentifierAssociation.scope == models.DataIdentifier.scope,
519
+ models.DataIdentifierAssociation.name == models.DataIdentifier.name)
520
+ )
521
+ ).where(
522
+ or_(models.DataIdentifier.is_archive.is_(None),
523
+ models.DataIdentifier.is_archive == false())
524
+ ).values({
525
+ models.DataIdentifier.is_archive: True
526
+ }).execution_options(
527
+ synchronize_session=False
528
+ )
529
+ session.execute(stmt)
530
+
531
+
532
+ @transactional_session
533
+ def __add_files_to_dataset(
534
+ parent_did: models.DataIdentifier,
535
+ files_temp_table: Any,
536
+ files: "Mapping[tuple[InternalScope, str], Mapping[str, Any]]",
537
+ account: "InternalAccount",
538
+ rse_id: str,
539
+ ignore_duplicate: bool = False,
540
+ *,
541
+ session: "Session"
542
+ ) -> dict[tuple["InternalScope", str], dict[str, Any]]:
543
+ """
544
+ Add files to dataset.
545
+
546
+ :param parent_did: the DataIdentifier object of the parent did
547
+ :param files_temp_table: Temporary table containing the scope and name of files to add.
548
+ :param account: The account owner.
549
+ :param rse_id: The RSE id for the replicas.
550
+ :param ignore_duplicate: If True, ignore duplicate entries.
551
+ :param session: The database session in use.
552
+ :returns: List of files attached (excluding the ones that were already attached to the dataset).
553
+ """
554
+ # Get metadata from dataset
555
+ try:
556
+ dataset_meta = validate_name(scope=parent_did.scope, name=parent_did.name, did_type='D')
557
+ except Exception:
558
+ dataset_meta = None
559
+
560
+ if rse_id:
561
+ # Tier-0 uses this old work-around to register replicas on the RSE
562
+ # in the same call as attaching them to a dataset
563
+ rucio.core.replica.add_replicas(rse_id=rse_id, files=files.values(), dataset_meta=dataset_meta,
564
+ account=account, session=session)
565
+
566
+ stmt = select(
567
+ files_temp_table.scope,
568
+ files_temp_table.name,
569
+ models.DataIdentifier.scope.label('did_scope'),
570
+ models.DataIdentifier.bytes,
571
+ models.DataIdentifier.guid,
572
+ models.DataIdentifier.events,
573
+ models.DataIdentifier.availability,
574
+ models.DataIdentifier.adler32,
575
+ models.DataIdentifier.md5,
576
+ models.DataIdentifier.is_archive,
577
+ models.DataIdentifier.did_type,
578
+ ).outerjoin_from(
579
+ files_temp_table,
580
+ models.DataIdentifier,
581
+ and_(models.DataIdentifier.scope == files_temp_table.scope,
582
+ models.DataIdentifier.name == files_temp_table.name),
583
+ )
584
+ if ignore_duplicate:
585
+ stmt = stmt.add_columns(
586
+ models.DataIdentifierAssociation.scope.label('contents_scope'),
587
+ ).outerjoin_from(
588
+ files_temp_table,
589
+ models.DataIdentifierAssociation,
590
+ and_(models.DataIdentifierAssociation.scope == parent_did.scope,
591
+ models.DataIdentifierAssociation.name == parent_did.name,
592
+ models.DataIdentifierAssociation.child_scope == files_temp_table.scope,
593
+ models.DataIdentifierAssociation.child_name == files_temp_table.name),
594
+ )
595
+
596
+ files_to_add = {}
597
+ for row in session.execute(stmt):
598
+ file = files[row.scope, row.name]
599
+
600
+ if row.did_scope is None:
601
+ raise exception.DataIdentifierNotFound(f"Data identifier '{row.scope}:{row.name}' not found")
602
+
603
+ if row.availability == DIDAvailability.LOST:
604
+ raise exception.UnsupportedOperation('File %s:%s is LOST and cannot be attached' % (row.scope, row.name))
605
+
606
+ if row.did_type != DIDType.FILE:
607
+ raise exception.UnsupportedOperation('Data identifier %s:%s of type %s cannot be added to a dataset ' % (row.scope, row.name, row.did_type))
608
+
609
+ # Check meta-data, if provided
610
+ row_dict = row._asdict()
611
+ for key in ['bytes', 'adler32', 'md5']:
612
+ if key in file and str(file[key]) != str(row_dict[key]):
613
+ raise exception.FileConsistencyMismatch(key + " mismatch for '%(scope)s:%(name)s': " % row_dict + str(file.get(key)) + '!=' + str(row_dict[key]))
614
+
615
+ if ignore_duplicate and row.contents_scope is not None:
616
+ continue
617
+
618
+ if (row.scope, row.name) in files_to_add:
619
+ # Ignore duplicate input files
620
+ continue
621
+
622
+ if row.is_archive and not parent_did.is_archive:
623
+ parent_did.is_archive = True
624
+
625
+ files_to_add[(row.scope, row.name)] = {
626
+ 'scope': parent_did.scope,
627
+ 'name': parent_did.name,
628
+ 'child_scope': row.scope,
629
+ 'child_name': row.name,
630
+ 'bytes': row.bytes,
631
+ 'adler32': row.adler32,
632
+ 'md5': row.md5,
633
+ 'guid': row.guid,
634
+ 'events': row.events,
635
+ 'did_type': DIDType.DATASET,
636
+ 'child_type': DIDType.FILE,
637
+ 'rule_evaluation': True,
638
+ }
639
+
640
+ try:
641
+ values = list(files_to_add.values())
642
+ stmt = insert(
643
+ models.DataIdentifierAssociation
644
+ )
645
+ files_to_add and session.execute(stmt, values)
646
+ session.flush()
647
+ return files_to_add
648
+ except IntegrityError as error:
649
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
650
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
651
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
652
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
653
+ raise exception.DataIdentifierNotFound("Data identifier not found")
654
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
655
+ or match('.*IntegrityError.*UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0]) \
656
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
657
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
658
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
659
+ or match('.*duplicate entry.*key.*PRIMARY.*', error.args[0]) \
660
+ or match('.*IntegrityError.*columns? .*not unique.*', error.args[0]):
661
+ raise exception.FileAlreadyExists(error.args)
662
+ else:
663
+ raise exception.RucioException(error.args)
664
+
665
+
666
+ @transactional_session
667
+ def __add_collections_to_container(
668
+ parent_did: models.DataIdentifier,
669
+ collections_temp_table: Any,
670
+ collections: "Mapping[tuple[InternalScope, str], Mapping[str, Any]]",
671
+ account: "InternalAccount",
672
+ *,
673
+ session: "Session"
674
+ ) -> None:
675
+ """
676
+ Add collections (datasets or containers) to container.
677
+
678
+ :param parent_did: the DataIdentifier object of the parent did
679
+ :param collections: .
680
+ :param account: The account owner.
681
+ :param session: The database session in use.
682
+ """
683
+
684
+ if (parent_did.scope, parent_did.name) in collections:
685
+ raise exception.UnsupportedOperation('Self-append is not valid!')
686
+
687
+ stmt = select(
688
+ collections_temp_table.scope,
689
+ collections_temp_table.name,
690
+ models.DataIdentifier.scope.label('did_scope'),
691
+ models.DataIdentifier.did_type
692
+ ).outerjoin_from(
693
+ collections_temp_table,
694
+ models.DataIdentifier,
695
+ and_(models.DataIdentifier.scope == collections_temp_table.scope,
696
+ models.DataIdentifier.name == collections_temp_table.name),
697
+ )
698
+
699
+ container_parents = None
700
+ child_type = None
701
+ for row in session.execute(stmt):
702
+
703
+ if row.did_scope is None:
704
+ raise exception.DataIdentifierNotFound("Data identifier '%(scope)s:%(name)s' not found" % row)
705
+
706
+ if row.did_type == DIDType.FILE:
707
+ raise exception.UnsupportedOperation("Adding a file (%s:%s) to a container (%s:%s) is forbidden" % (row.scope, row.name, parent_did.scope, parent_did.name))
708
+
709
+ if not child_type:
710
+ child_type = row.did_type
711
+
712
+ if child_type != row.did_type:
713
+ raise exception.UnsupportedOperation("Mixed collection is not allowed: '%s:%s' is a %s(expected type: %s)" % (row.scope, row.name, row.did_type, child_type))
714
+
715
+ if child_type == DIDType.CONTAINER:
716
+ if container_parents is None:
717
+ container_parents = {(parent['scope'], parent['name']) for parent in list_all_parent_dids(scope=parent_did.scope, name=parent_did.name, session=session)}
718
+
719
+ if (row.scope, row.name) in container_parents:
720
+ raise exception.UnsupportedOperation('Circular attachment detected. %s:%s is already a parent of %s:%s' % (row.scope, row.name, parent_did.scope, parent_did.name))
721
+
722
+ messages = []
723
+ for c in collections.values():
724
+ did_asso = models.DataIdentifierAssociation(
725
+ scope=parent_did.scope,
726
+ name=parent_did.name,
727
+ child_scope=c['scope'],
728
+ child_name=c['name'],
729
+ did_type=DIDType.CONTAINER,
730
+ child_type=child_type,
731
+ rule_evaluation=True
732
+ )
733
+ did_asso.save(session=session, flush=False)
734
+ # Send AMI messages
735
+ if child_type == DIDType.CONTAINER:
736
+ chld_type = 'CONTAINER'
737
+ elif child_type == DIDType.DATASET:
738
+ chld_type = 'DATASET'
739
+ else:
740
+ chld_type = 'UNKNOWN'
741
+
742
+ message = {'account': account.external,
743
+ 'scope': parent_did.scope.external,
744
+ 'name': parent_did.name,
745
+ 'childscope': c['scope'].external,
746
+ 'childname': c['name'],
747
+ 'childtype': chld_type}
748
+ if account.vo != 'def':
749
+ message['vo'] = account.vo
750
+ messages.append(message)
751
+
752
+ try:
753
+ for message in messages:
754
+ add_message('REGISTER_CNT', message, session=session)
755
+ session.flush()
756
+ except IntegrityError as error:
757
+ if match('.*IntegrityError.*ORA-02291: integrity constraint .*CONTENTS_CHILD_ID_FK.*violated - parent key not found.*', error.args[0]) \
758
+ or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
759
+ or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]) \
760
+ or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint.*', error.args[0]):
761
+ raise exception.DataIdentifierNotFound("Data identifier not found")
762
+ elif match('.*IntegrityError.*ORA-00001: unique constraint .*CONTENTS_PK.*violated.*', error.args[0]) \
763
+ or match('.*IntegrityError.*1062.*Duplicate entry .*for key.*PRIMARY.*', error.args[0]) \
764
+ or match('.*IntegrityError.*columns? scope.*name.*child_scope.*child_name.*not unique.*', error.args[0]) \
765
+ or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]) \
766
+ or match('.*UniqueViolation.*duplicate key value violates unique constraint.*', error.args[0]) \
767
+ or match('.*IntegrityError.* UNIQUE constraint failed: contents.scope, contents.name, contents.child_scope, contents.child_name.*', error.args[0]):
768
+ raise exception.DuplicateContent(error.args)
769
+ raise exception.RucioException(error.args)
770
+
771
+
772
+ @transactional_session
773
+ def delete_dids(
774
+ dids: "Sequence[Mapping[str, Any]]",
775
+ account: "InternalAccount",
776
+ expire_rules: bool = False,
777
+ *,
778
+ session: "Session",
779
+ logger: "LoggerFunction" = logging.log,
780
+ ) -> None:
781
+ """
782
+ Delete data identifiers
783
+
784
+ :param dids: The list of dids to delete.
785
+ :param account: The account.
786
+ :param expire_rules: Expire large rules instead of deleting them right away. This should only be used in Undertaker mode, as it can be that
787
+ the method returns normally, but a did was not deleted; This trusts in the fact that the undertaker will retry an
788
+ expired did.
789
+ :param session: The database session in use.
790
+ :param logger: Optional decorated logger that can be passed from the calling daemons or servers.
791
+ """
792
+ if not dids:
793
+ return
794
+
795
+ not_purge_replicas = []
796
+
797
+ archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
798
+ archive_content = config_get_bool('deletion', 'archive_content', default=False, session=session)
799
+
800
+ file_dids = {}
801
+ collection_dids = {}
802
+ all_dids = {}
803
+ for did in dids:
804
+ scope, name = did['scope'], did['name']
805
+ logger(logging.INFO, 'Removing did %(scope)s:%(name)s (%(did_type)s)' % did)
806
+ all_dids[scope, name] = {'scope': scope, 'name': name}
807
+ if did['did_type'] == DIDType.FILE:
808
+ file_dids[scope, name] = {'scope': scope, 'name': name}
809
+ else:
810
+ collection_dids[scope, name] = {'scope': scope, 'name': name}
811
+
812
+ # ATLAS LOCALGROUPDISK Archive policy
813
+ if did['did_type'] == DIDType.DATASET and did['scope'].external != 'archive':
814
+ try:
815
+ rucio.core.rule.archive_localgroupdisk_datasets(scope=did['scope'], name=did['name'], session=session)
816
+ except exception.UndefinedPolicy:
817
+ pass
818
+
819
+ if did['purge_replicas'] is False:
820
+ not_purge_replicas.append((did['scope'], did['name']))
821
+
822
+ if archive_content:
823
+ insert_content_history(filter_=[and_(models.DataIdentifierAssociation.scope == did['scope'],
824
+ models.DataIdentifierAssociation.name == did['name'])],
825
+ did_created_at=did.get('created_at'),
826
+ session=session)
827
+
828
+ # Send message
829
+ message = {'account': account.external,
830
+ 'scope': did['scope'].external,
831
+ 'name': did['name']}
832
+ if did['scope'].vo != 'def':
833
+ message['vo'] = did['scope'].vo
834
+
835
+ add_message('ERASE', message, session=session)
836
+
837
+ if not file_dids:
838
+ data_in_temp_table = all_dids = collection_dids
839
+ elif not collection_dids:
840
+ data_in_temp_table = all_dids = file_dids
841
+ else:
842
+ data_in_temp_table = all_dids
843
+
844
+ if not all_dids:
845
+ return
846
+
847
+ temp_table = temp_table_mngr(session).create_scope_name_table()
848
+ values = list(data_in_temp_table.values())
849
+ stmt = insert(
850
+ temp_table
851
+ )
852
+ session.execute(stmt, values)
853
+
854
+ # Delete rules on did
855
+ skip_deletion = False # Skip deletion in case of expiration of a rule
856
+ with METRICS.timer('delete_dids.rules'):
857
+ stmt = select(
858
+ models.ReplicationRule.id,
859
+ models.ReplicationRule.scope,
860
+ models.ReplicationRule.name,
861
+ models.ReplicationRule.rse_expression,
862
+ models.ReplicationRule.locks_ok_cnt,
863
+ models.ReplicationRule.locks_replicating_cnt,
864
+ models.ReplicationRule.locks_stuck_cnt
865
+ ).join_from(
866
+ temp_table,
867
+ models.ReplicationRule,
868
+ and_(models.ReplicationRule.scope == temp_table.scope,
869
+ models.ReplicationRule.name == temp_table.name)
870
+ )
871
+ for (rule_id, scope, name, rse_expression, locks_ok_cnt, locks_replicating_cnt, locks_stuck_cnt) in session.execute(stmt):
872
+ logger(logging.DEBUG, 'Removing rule %s for did %s:%s on RSE-Expression %s' % (str(rule_id), scope, name, rse_expression))
873
+
874
+ # Propagate purge_replicas from did to rules
875
+ if (scope, name) in not_purge_replicas:
876
+ purge_replicas = False
877
+ else:
878
+ purge_replicas = True
879
+ if expire_rules and locks_ok_cnt + locks_replicating_cnt + locks_stuck_cnt > int(config_get_int('undertaker', 'expire_rules_locks_size', default=10000, session=session)):
880
+ # Expire the rule (soft=True)
881
+ rucio.core.rule.delete_rule(rule_id=rule_id, purge_replicas=purge_replicas, soft=True, delete_parent=True, nowait=True, session=session)
882
+ # Update expiration of did
883
+ set_metadata(scope=scope, name=name, key='lifetime', value=3600 * 24, session=session)
884
+ skip_deletion = True
885
+ else:
886
+ rucio.core.rule.delete_rule(rule_id=rule_id, purge_replicas=purge_replicas, delete_parent=True, nowait=True, session=session)
887
+
888
+ if skip_deletion:
889
+ return
890
+
891
+ # Detach from parent dids:
892
+ existing_parent_dids = False
893
+ with METRICS.timer('delete_dids.parent_content'):
894
+ stmt = select(
895
+ models.DataIdentifierAssociation
896
+ ).join_from(
897
+ temp_table,
898
+ models.DataIdentifierAssociation,
899
+ and_(models.DataIdentifierAssociation.child_scope == temp_table.scope,
900
+ models.DataIdentifierAssociation.child_name == temp_table.name)
901
+ )
902
+ for parent_did in session.execute(stmt).scalars():
903
+ existing_parent_dids = True
904
+ detach_dids(scope=parent_did.scope, name=parent_did.name, dids=[{'scope': parent_did.child_scope, 'name': parent_did.child_name}], session=session)
905
+
906
+ # Remove generic did metadata
907
+ must_delete_did_meta = True
908
+ if session.bind.dialect.name == 'oracle':
909
+ oracle_version = int(session.connection().connection.version.split('.')[0])
910
+ if oracle_version < 12:
911
+ must_delete_did_meta = False
912
+ if must_delete_did_meta:
913
+ stmt = delete(
914
+ models.DidMeta
915
+ ).where(
916
+ exists(
917
+ select(1)
918
+ ).where(
919
+ and_(models.DidMeta.scope == temp_table.scope,
920
+ models.DidMeta.name == temp_table.name)
921
+ )
922
+ ).execution_options(
923
+ synchronize_session=False
924
+ )
925
+ with METRICS.timer('delete_dids.did_meta'):
926
+ session.execute(stmt)
927
+
928
+ # Prepare the common part of the query for updating bad replicas if they exist
929
+ bad_replica_stmt = update(
930
+ models.BadReplica
931
+ ).where(
932
+ models.BadReplica.state == BadFilesStatus.BAD
933
+ ).values({
934
+ models.BadReplica.state: BadFilesStatus.DELETED,
935
+ models.BadReplica.updated_at: datetime.utcnow(),
936
+ }).execution_options(
937
+ synchronize_session=False
938
+ )
939
+
940
+ if file_dids:
941
+ if data_in_temp_table is not file_dids:
942
+ stmt = delete(
943
+ temp_table
944
+ )
945
+ session.execute(stmt)
946
+
947
+ values = list(file_dids.values())
948
+ stmt = insert(
949
+ temp_table
950
+ )
951
+ session.execute(stmt, values)
952
+ data_in_temp_table = file_dids
953
+
954
+ # update bad files passed directly as input
955
+ stmt = bad_replica_stmt.where(
956
+ exists(
957
+ select(1)
958
+ ).where(
959
+ and_(models.BadReplica.scope == temp_table.scope,
960
+ models.BadReplica.name == temp_table.name)
961
+ )
962
+ )
963
+ session.execute(stmt)
964
+
965
+ if collection_dids:
966
+ if data_in_temp_table is not collection_dids:
967
+ stmt = delete(
968
+ temp_table
969
+ )
970
+ session.execute(stmt)
971
+
972
+ values = list(collection_dids.values())
973
+ stmt = insert(
974
+ temp_table
975
+ )
976
+ session.execute(stmt, values)
977
+ data_in_temp_table = collection_dids
978
+
979
+ # Find files of datasets passed as input and put them in a separate temp table
980
+ resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
981
+ stmt = insert(
982
+ resolved_files_temp_table,
983
+ ).from_select(
984
+ ['scope', 'name'],
985
+ select(
986
+ models.DataIdentifierAssociation.child_scope,
987
+ models.DataIdentifierAssociation.child_name,
988
+ ).distinct(
989
+ ).join_from(
990
+ temp_table,
991
+ models.DataIdentifierAssociation,
992
+ and_(models.DataIdentifierAssociation.scope == temp_table.scope,
993
+ models.DataIdentifierAssociation.name == temp_table.name)
994
+ ).where(
995
+ models.DataIdentifierAssociation.child_type == DIDType.FILE
996
+ )
997
+ )
998
+ session.execute(stmt)
999
+
1000
+ # update bad files from datasets
1001
+ stmt = bad_replica_stmt.where(
1002
+ exists(
1003
+ select(1)
1004
+ ).where(
1005
+ and_(models.BadReplica.scope == resolved_files_temp_table.scope,
1006
+ models.BadReplica.name == resolved_files_temp_table.name)
1007
+ )
1008
+ )
1009
+ session.execute(stmt)
1010
+
1011
+ # Set Epoch tombstone for the files replicas inside the did
1012
+ if config_get_bool('undertaker', 'purge_all_replicas', default=False, session=session):
1013
+ with METRICS.timer('delete_dids.file_content'):
1014
+ stmt = update(
1015
+ models.RSEFileAssociation
1016
+ ).where(
1017
+ exists(
1018
+ select(1)
1019
+ ).where(
1020
+ and_(models.RSEFileAssociation.scope == resolved_files_temp_table.scope,
1021
+ models.RSEFileAssociation.name == resolved_files_temp_table.name)
1022
+ )
1023
+ ).where(
1024
+ and_(models.RSEFileAssociation.lock_cnt == 0,
1025
+ models.RSEFileAssociation.tombstone != null())
1026
+ ).values({
1027
+ models.RSEFileAssociation.tombstone: datetime(1970, 1, 1)
1028
+ }).execution_options(
1029
+ synchronize_session=False
1030
+ )
1031
+ session.execute(stmt)
1032
+
1033
+ # Remove content
1034
+ with METRICS.timer('delete_dids.content'):
1035
+ stmt = delete(
1036
+ models.DataIdentifierAssociation
1037
+ ).where(
1038
+ exists(
1039
+ select(1)
1040
+ ).where(
1041
+ and_(models.DataIdentifierAssociation.scope == temp_table.scope,
1042
+ models.DataIdentifierAssociation.name == temp_table.name)
1043
+ )
1044
+ ).execution_options(
1045
+ synchronize_session=False
1046
+ )
1047
+ rowcount = session.execute(stmt).rowcount
1048
+ METRICS.counter(name='delete_dids.content_rowcount').inc(rowcount)
1049
+
1050
+ # Remove CollectionReplica
1051
+ with METRICS.timer('delete_dids.collection_replicas'):
1052
+ stmt = delete(
1053
+ models.CollectionReplica
1054
+ ).where(
1055
+ exists(
1056
+ select(1)
1057
+ ).where(
1058
+ and_(models.CollectionReplica.scope == temp_table.scope,
1059
+ models.CollectionReplica.name == temp_table.name)
1060
+ )
1061
+ ).execution_options(
1062
+ synchronize_session=False
1063
+ )
1064
+ session.execute(stmt)
1065
+
1066
+ # remove data identifier
1067
+ if existing_parent_dids:
1068
+ # Exit method early to give Judge time to remove locks (Otherwise, due to foreign keys, did removal does not work
1069
+ logger(logging.DEBUG, 'Leaving delete_dids early for Judge-Evaluator checks')
1070
+ return
1071
+
1072
+ if collection_dids:
1073
+ if data_in_temp_table is not collection_dids:
1074
+ stmt = delete(
1075
+ temp_table
1076
+ )
1077
+ session.execute(stmt)
1078
+
1079
+ values = list(collection_dids.values())
1080
+ stmt = insert(
1081
+ temp_table
1082
+ )
1083
+ session.execute(stmt, values)
1084
+ data_in_temp_table = collection_dids
1085
+
1086
+ with METRICS.timer('delete_dids.dids_followed'):
1087
+ stmt = delete(
1088
+ models.DidFollowed
1089
+ ).where(
1090
+ exists(
1091
+ select(1)
1092
+ ).where(
1093
+ and_(models.DidFollowed.scope == temp_table.scope,
1094
+ models.DidFollowed.name == temp_table.name)
1095
+ )
1096
+ ).execution_options(
1097
+ synchronize_session=False
1098
+ )
1099
+ session.execute(stmt)
1100
+
1101
+ with METRICS.timer('delete_dids.dids'):
1102
+ dids_to_delete_filter = exists(
1103
+ select(1)
1104
+ ).where(
1105
+ and_(models.DataIdentifier.scope == temp_table.scope,
1106
+ models.DataIdentifier.name == temp_table.name,
1107
+ models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET]))
1108
+ )
1109
+
1110
+ if archive_dids:
1111
+ insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
1112
+
1113
+ stmt = delete(
1114
+ models.DataIdentifier
1115
+ ).where(
1116
+ dids_to_delete_filter,
1117
+ ).execution_options(
1118
+ synchronize_session=False
1119
+ )
1120
+ session.execute(stmt)
1121
+
1122
+ if file_dids:
1123
+ if data_in_temp_table is not file_dids:
1124
+ stmt = delete(
1125
+ temp_table
1126
+ )
1127
+ session.execute(stmt)
1128
+
1129
+ values = list(file_dids.values())
1130
+ stmt = insert(
1131
+ temp_table
1132
+ )
1133
+ session.execute(stmt, values)
1134
+ data_in_temp_table = file_dids
1135
+ stmt = update(
1136
+ models.DataIdentifier
1137
+ ).where(
1138
+ exists(
1139
+ select(1)
1140
+ ).where(
1141
+ and_(models.DataIdentifier.scope == temp_table.scope,
1142
+ models.DataIdentifier.name == temp_table.name)
1143
+ )
1144
+ ).where(
1145
+ models.DataIdentifier.did_type == DIDType.FILE
1146
+ ).values({
1147
+ models.DataIdentifier.expired_at: None
1148
+ }).execution_options(
1149
+ synchronize_session=False
1150
+ )
1151
+ session.execute(stmt)
1152
+
1153
+
1154
+ @transactional_session
1155
+ def detach_dids(
1156
+ scope: "InternalScope",
1157
+ name: str,
1158
+ dids: "Sequence[Mapping[str, Any]]",
1159
+ *,
1160
+ session: "Session"
1161
+ ) -> None:
1162
+ """
1163
+ Detach data identifier
1164
+
1165
+ :param scope: The scope name.
1166
+ :param name: The data identifier name.
1167
+ :param dids: The content.
1168
+ :param session: The database session in use.
1169
+ """
1170
+ # Row Lock the parent did
1171
+ stmt = select(
1172
+ models.DataIdentifier
1173
+ ).where(
1174
+ and_(models.DataIdentifier.scope == scope,
1175
+ models.DataIdentifier.name == name,
1176
+ or_(models.DataIdentifier.did_type == DIDType.CONTAINER,
1177
+ models.DataIdentifier.did_type == DIDType.DATASET))
1178
+ )
1179
+ try:
1180
+ did = session.execute(stmt).scalar_one()
1181
+ # Mark for rule re-evaluation
1182
+ models.UpdatedDID(
1183
+ scope=scope,
1184
+ name=name,
1185
+ rule_evaluation_action=DIDReEvaluation.DETACH
1186
+ ).save(session=session, flush=False)
1187
+ except NoResultFound:
1188
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1189
+
1190
+ # TODO: should judge target did's status: open, monotonic, close.
1191
+ stmt = select(
1192
+ models.DataIdentifierAssociation
1193
+ ).where(
1194
+ and_(models.DataIdentifierAssociation.scope == scope,
1195
+ models.DataIdentifierAssociation.name == name)
1196
+ ).limit(
1197
+ 1
1198
+ )
1199
+ if session.execute(stmt).scalar() is None:
1200
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' has no child data identifiers.")
1201
+ for source in dids:
1202
+ if (scope == source['scope']) and (name == source['name']):
1203
+ raise exception.UnsupportedOperation('Self-detach is not valid.')
1204
+ child_scope = source['scope']
1205
+ child_name = source['name']
1206
+ curr_stmt = stmt.where(
1207
+ and_(models.DataIdentifierAssociation.child_scope == child_scope,
1208
+ models.DataIdentifierAssociation.child_name == child_name)
1209
+ ).limit(
1210
+ 1
1211
+ )
1212
+ associ_did = session.execute(curr_stmt).scalar()
1213
+ if associ_did is None:
1214
+ raise exception.DataIdentifierNotFound(f"Data identifier '{child_scope}:{child_name}' not found under '{scope}:{name}'")
1215
+
1216
+ child_type = associ_did.child_type
1217
+ child_size = associ_did.bytes
1218
+ child_events = associ_did.events
1219
+ if did.length:
1220
+ did.length -= 1
1221
+ if did.bytes and child_size:
1222
+ did.bytes -= child_size
1223
+ if did.events and child_events:
1224
+ did.events -= child_events
1225
+ associ_did.delete(session=session)
1226
+
1227
+ # Archive contents
1228
+ # If reattach happens, merge the latest due to primary key constraint
1229
+ new_detach = models.DataIdentifierAssociationHistory(scope=associ_did.scope,
1230
+ name=associ_did.name,
1231
+ child_scope=associ_did.child_scope,
1232
+ child_name=associ_did.child_name,
1233
+ did_type=associ_did.did_type,
1234
+ child_type=associ_did.child_type,
1235
+ bytes=associ_did.bytes,
1236
+ adler32=associ_did.adler32,
1237
+ md5=associ_did.md5,
1238
+ guid=associ_did.guid,
1239
+ events=associ_did.events,
1240
+ rule_evaluation=associ_did.rule_evaluation,
1241
+ did_created_at=did.created_at,
1242
+ created_at=associ_did.created_at,
1243
+ updated_at=associ_did.updated_at,
1244
+ deleted_at=datetime.utcnow())
1245
+ new_detach.save(session=session, flush=False)
1246
+
1247
+ # Send message for AMI. To be removed in the future when they use the DETACH messages
1248
+ if did.did_type == DIDType.CONTAINER:
1249
+ if child_type == DIDType.CONTAINER:
1250
+ chld_type = 'CONTAINER'
1251
+ elif child_type == DIDType.DATASET:
1252
+ chld_type = 'DATASET'
1253
+ else:
1254
+ chld_type = 'UNKNOWN'
1255
+
1256
+ message = {'scope': scope.external,
1257
+ 'name': name,
1258
+ 'childscope': source['scope'].external,
1259
+ 'childname': source['name'],
1260
+ 'childtype': chld_type}
1261
+ if scope.vo != 'def':
1262
+ message['vo'] = scope.vo
1263
+
1264
+ add_message('ERASE_CNT', message, session=session)
1265
+
1266
+ message = {'scope': scope.external,
1267
+ 'name': name,
1268
+ 'did_type': str(did.did_type),
1269
+ 'child_scope': source['scope'].external,
1270
+ 'child_name': str(source['name']),
1271
+ 'child_type': str(child_type)}
1272
+ if scope.vo != 'def':
1273
+ message['vo'] = scope.vo
1274
+
1275
+ add_message('DETACH', message, session=session)
1276
+
1277
+
1278
+ @stream_session
1279
+ def list_new_dids(
1280
+ did_type: Union[str, "DIDType"],
1281
+ thread: Optional[int] = None,
1282
+ total_threads: Optional[int] = None,
1283
+ chunk_size: int = 1000,
1284
+ *,
1285
+ session: "Session",
1286
+ ) -> "Iterator[dict[str, Any]]":
1287
+ """
1288
+ List recent identifiers.
1289
+
1290
+ :param did_type : The DID type.
1291
+ :param thread: The assigned thread for this necromancer.
1292
+ :param total_threads: The total number of threads of all necromancers.
1293
+ :param chunk_size: Number of requests to return per yield.
1294
+ :param session: The database session in use.
1295
+ """
1296
+
1297
+ sub_query = select(
1298
+ 1
1299
+ ).prefix_with(
1300
+ "/*+ INDEX(RULES RULES_SCOPE_NAME_IDX) */", dialect='oracle'
1301
+ ).where(
1302
+ and_(models.DataIdentifier.scope == models.ReplicationRule.scope,
1303
+ models.DataIdentifier.name == models.ReplicationRule.name,
1304
+ models.ReplicationRule.state == RuleState.INJECT)
1305
+ )
1306
+
1307
+ select_stmt = select(
1308
+ models.DataIdentifier
1309
+ ).with_hint(
1310
+ models.DataIdentifier,
1311
+ 'INDEX(dids DIDS_IS_NEW_IDX)',
1312
+ 'oracle'
1313
+ ).where(
1314
+ and_(models.DataIdentifier.is_new == true(),
1315
+ ~exists(sub_query))
1316
+ )
1317
+
1318
+ if did_type:
1319
+ if isinstance(did_type, str):
1320
+ select_stmt = select_stmt.where(
1321
+ models.DataIdentifier.did_type == DIDType[did_type]
1322
+ )
1323
+ elif isinstance(did_type, Enum):
1324
+ select_stmt = select_stmt.where(
1325
+ models.DataIdentifier.did_type == did_type
1326
+ )
1327
+
1328
+ select_stmt = filter_thread_work(session=session, query=select_stmt, total_threads=total_threads, thread_id=thread, hash_variable='name')
1329
+
1330
+ row_count = 0
1331
+ for chunk in session.execute(select_stmt).yield_per(10).scalars():
1332
+ row_count += 1
1333
+ if row_count <= chunk_size:
1334
+ yield {'scope': chunk.scope, 'name': chunk.name, 'did_type': chunk.did_type} # TODO Change this to the proper filebytes [RUCIO-199]
1335
+ else:
1336
+ break
1337
+
1338
+
1339
+ @transactional_session
1340
+ def set_new_dids(
1341
+ dids: "Sequence[Mapping[str, Any]]",
1342
+ new_flag: Optional[bool],
1343
+ *,
1344
+ session: "Session"
1345
+ ) -> bool:
1346
+ """
1347
+ Set/reset the flag new
1348
+
1349
+ :param dids: A list of dids
1350
+ :param new_flag: A boolean to flag new DIDs.
1351
+ :param session: The database session in use.
1352
+ """
1353
+ if session.bind.dialect.name == 'postgresql':
1354
+ new_flag = bool(new_flag)
1355
+ for did in dids:
1356
+ try:
1357
+ stmt = update(
1358
+ models.DataIdentifier
1359
+ ).where(
1360
+ and_(models.DataIdentifier.scope == did['scope'],
1361
+ models.DataIdentifier.name == did['name'])
1362
+ ).values({
1363
+ models.DataIdentifier.is_new: new_flag
1364
+ }).execution_options(
1365
+ synchronize_session=False
1366
+ )
1367
+ rowcount = session.execute(stmt).rowcount
1368
+ if not rowcount:
1369
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (did['scope'], did['name']))
1370
+ except DatabaseError as error:
1371
+ raise exception.DatabaseException('%s : Cannot update %s:%s' % (error.args[0], did['scope'], did['name']))
1372
+ try:
1373
+ session.flush()
1374
+ except IntegrityError as error:
1375
+ raise exception.RucioException(error.args[0])
1376
+ except DatabaseError as error:
1377
+ raise exception.RucioException(error.args[0])
1378
+ return True
1379
+
1380
+
1381
+ @stream_session
1382
+ def list_content(
1383
+ scope: "InternalScope",
1384
+ name: str,
1385
+ *,
1386
+ session: "Session"
1387
+ ) -> "Iterator[dict[str, Any]]":
1388
+ """
1389
+ List data identifier contents.
1390
+
1391
+ :param scope: The scope name.
1392
+ :param name: The data identifier name.
1393
+ :param session: The database session in use.
1394
+ """
1395
+ stmt = select(
1396
+ models.DataIdentifierAssociation
1397
+ ).with_hint(
1398
+ models.DataIdentifierAssociation,
1399
+ 'INDEX(CONTENTS CONTENTS_PK)',
1400
+ 'oracle'
1401
+ ).where(
1402
+ and_(models.DataIdentifierAssociation.scope == scope,
1403
+ models.DataIdentifierAssociation.name == name)
1404
+ )
1405
+ children_found = False
1406
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
1407
+ children_found = True
1408
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name, 'type': tmp_did.child_type,
1409
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5}
1410
+ if not children_found:
1411
+ # Raise exception if the did doesn't exist
1412
+ __get_did(scope=scope, name=name, session=session)
1413
+
1414
+
1415
+ @stream_session
1416
+ def list_content_history(
1417
+ scope: "InternalScope",
1418
+ name: str,
1419
+ *,
1420
+ session: "Session"
1421
+ ) -> "Iterator[dict[str, Any]]":
1422
+ """
1423
+ List data identifier contents history.
1424
+
1425
+ :param scope: The scope name.
1426
+ :param name: The data identifier name.
1427
+ :param session: The database session in use.
1428
+ """
1429
+ try:
1430
+ stmt = select(
1431
+ models.DataIdentifierAssociationHistory
1432
+ ).where(
1433
+ and_(models.DataIdentifierAssociationHistory.scope == scope,
1434
+ models.DataIdentifierAssociationHistory.name == name)
1435
+ )
1436
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
1437
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name,
1438
+ 'type': tmp_did.child_type,
1439
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5,
1440
+ 'deleted_at': tmp_did.deleted_at, 'created_at': tmp_did.created_at,
1441
+ 'updated_at': tmp_did.updated_at}
1442
+ except NoResultFound:
1443
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1444
+
1445
+
1446
+ @stream_session
1447
+ def list_parent_dids(
1448
+ scope: "InternalScope",
1449
+ name: str,
1450
+ order_by: Optional[list[str]] = None,
1451
+ *,
1452
+ session: "Session"
1453
+ ) -> "Iterator[dict[str, Any]]":
1454
+ """
1455
+ List parent datasets and containers of a did.
1456
+
1457
+ :param scope: The scope.
1458
+ :param name: The name.
1459
+ :param order_by: List of parameters to order the query by. Possible values: ['scope', 'name', 'did_type', 'created_at'].
1460
+ :param session: The database session.
1461
+ :returns: List of dids.
1462
+ :rtype: Generator.
1463
+ """
1464
+
1465
+ if order_by is None:
1466
+ order_by = []
1467
+
1468
+ stmt = select(
1469
+ models.DataIdentifierAssociation.scope,
1470
+ models.DataIdentifierAssociation.name,
1471
+ models.DataIdentifierAssociation.did_type,
1472
+ models.DataIdentifier.created_at
1473
+ ).where(
1474
+ and_(models.DataIdentifierAssociation.child_scope == scope,
1475
+ models.DataIdentifierAssociation.child_name == name,
1476
+ models.DataIdentifier.scope == models.DataIdentifierAssociation.scope,
1477
+ models.DataIdentifier.name == models.DataIdentifierAssociation.name)
1478
+ ).order_by(
1479
+ *order_by
1480
+ )
1481
+
1482
+ for did in session.execute(stmt).yield_per(5):
1483
+ yield {'scope': did.scope, 'name': did.name, 'type': did.did_type}
1484
+
1485
+
1486
+ @stream_session
1487
+ def list_all_parent_dids(
1488
+ scope: "InternalScope",
1489
+ name: str,
1490
+ *,
1491
+ session: "Session"
1492
+ ) -> "Iterator[dict[str, Any]]":
1493
+ """
1494
+ List all parent datasets and containers of a did, no matter on what level.
1495
+
1496
+ :param scope: The scope.
1497
+ :param name: The name.
1498
+ :param session: The database session.
1499
+ :returns: List of dids.
1500
+ :rtype: Generator.
1501
+ """
1502
+
1503
+ stmt = select(
1504
+ models.DataIdentifierAssociation.scope,
1505
+ models.DataIdentifierAssociation.name,
1506
+ models.DataIdentifierAssociation.did_type
1507
+ ).where(
1508
+ and_(models.DataIdentifierAssociation.child_scope == scope,
1509
+ models.DataIdentifierAssociation.child_name == name)
1510
+ )
1511
+ for did in session.execute(stmt).yield_per(5):
1512
+ yield {'scope': did.scope, 'name': did.name, 'type': did.did_type}
1513
+ # Note that only Python3 supports recursive yield, that's the reason to do the nested for.
1514
+ for pdid in list_all_parent_dids(scope=did.scope, name=did.name, session=session):
1515
+ yield {'scope': pdid['scope'], 'name': pdid['name'], 'type': pdid['type']}
1516
+
1517
+
1518
+ def list_child_dids_stmt(
1519
+ input_dids_table: Any,
1520
+ did_type: DIDType,
1521
+ ) -> "Select[tuple[InternalScope, str]]":
1522
+ """
1523
+ Build and returns a query which recursively lists children dids of type `did_type`
1524
+ for the dids given as input in a scope/name (temporary) table.
1525
+
1526
+ did_type defines the desired type of DIDs in the result. If set to DIDType.Dataset,
1527
+ will only resolve containers and return datasets. If set to DIDType.File, will
1528
+ also resolve the datasets and return files.
1529
+ """
1530
+ if did_type == DIDType.DATASET:
1531
+ dids_to_resolve = [DIDType.CONTAINER]
1532
+ else:
1533
+ dids_to_resolve = [DIDType.CONTAINER, DIDType.DATASET]
1534
+
1535
+ # Uses a recursive SQL CTE (Common Table Expressions)
1536
+ initial_set = select(
1537
+ models.DataIdentifierAssociation.child_scope,
1538
+ models.DataIdentifierAssociation.child_name,
1539
+ models.DataIdentifierAssociation.child_type,
1540
+ ).join_from(
1541
+ input_dids_table,
1542
+ models.DataIdentifierAssociation,
1543
+ and_(models.DataIdentifierAssociation.scope == input_dids_table.scope,
1544
+ models.DataIdentifierAssociation.name == input_dids_table.name,
1545
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve)),
1546
+ ).cte(
1547
+ recursive=True,
1548
+ )
1549
+
1550
+ # Oracle doesn't support union() in recursive CTEs, so use UNION ALL
1551
+ # and a "distinct" filter later
1552
+ child_datasets_cte = initial_set.union_all(
1553
+ select(
1554
+ models.DataIdentifierAssociation.child_scope,
1555
+ models.DataIdentifierAssociation.child_name,
1556
+ models.DataIdentifierAssociation.child_type,
1557
+ ).where(
1558
+ and_(models.DataIdentifierAssociation.scope == initial_set.c.child_scope,
1559
+ models.DataIdentifierAssociation.name == initial_set.c.child_name,
1560
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve))
1561
+ )
1562
+ )
1563
+
1564
+ stmt = select(
1565
+ child_datasets_cte.c.child_scope.label('scope'),
1566
+ child_datasets_cte.c.child_name.label('name'),
1567
+ ).distinct(
1568
+ ).where(
1569
+ child_datasets_cte.c.child_type == did_type,
1570
+ )
1571
+ return stmt
1572
+
1573
+
1574
+ def list_one_did_childs_stmt(
1575
+ scope: "InternalScope",
1576
+ name: str,
1577
+ did_type: DIDType,
1578
+ ) -> "Select[tuple[InternalScope, str]]":
1579
+ """
1580
+ Returns the sqlalchemy query for recursively fetching the child dids of type
1581
+ 'did_type' for the input did.
1582
+
1583
+ did_type defines the desired type of DIDs in the result. If set to DIDType.Dataset,
1584
+ will only resolve containers and return datasets. If set to DIDType.File, will
1585
+ also resolve the datasets and return files.
1586
+ """
1587
+ if did_type == DIDType.DATASET:
1588
+ dids_to_resolve = [DIDType.CONTAINER]
1589
+ else:
1590
+ dids_to_resolve = [DIDType.CONTAINER, DIDType.DATASET]
1591
+
1592
+ # Uses a recursive SQL CTE (Common Table Expressions)
1593
+ initial_set = select(
1594
+ models.DataIdentifierAssociation.child_scope,
1595
+ models.DataIdentifierAssociation.child_name,
1596
+ models.DataIdentifierAssociation.child_type,
1597
+ ).where(
1598
+ and_(models.DataIdentifierAssociation.scope == scope,
1599
+ models.DataIdentifierAssociation.name == name,
1600
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve))
1601
+ ).cte(
1602
+ recursive=True,
1603
+ )
1604
+
1605
+ # Oracle doesn't support union() in recursive CTEs, so use UNION ALL
1606
+ # and a "distinct" filter later
1607
+ child_datasets_cte = initial_set.union_all(
1608
+ select(
1609
+ models.DataIdentifierAssociation.child_scope,
1610
+ models.DataIdentifierAssociation.child_name,
1611
+ models.DataIdentifierAssociation.child_type,
1612
+ ).where(
1613
+ and_(models.DataIdentifierAssociation.scope == initial_set.c.child_scope,
1614
+ models.DataIdentifierAssociation.name == initial_set.c.child_name,
1615
+ models.DataIdentifierAssociation.did_type.in_(dids_to_resolve))
1616
+ )
1617
+ )
1618
+
1619
+ stmt = select(
1620
+ child_datasets_cte.c.child_scope.label('scope'),
1621
+ child_datasets_cte.c.child_name.label('name'),
1622
+ ).distinct(
1623
+ ).where(
1624
+ child_datasets_cte.c.child_type == did_type,
1625
+ )
1626
+ return stmt
1627
+
1628
+
1629
+ @transactional_session
1630
+ def list_child_datasets(
1631
+ scope: "InternalScope",
1632
+ name: str,
1633
+ *,
1634
+ session: "Session"
1635
+ ) -> list[dict[str, Union["InternalScope", str]]]:
1636
+ """
1637
+ List all child datasets of a container.
1638
+
1639
+ :param scope: The scope.
1640
+ :param name: The name.
1641
+ :param session: The database session
1642
+ :returns: List of dids
1643
+ :rtype: Generator
1644
+ """
1645
+ stmt = list_one_did_childs_stmt(scope, name, did_type=DIDType.DATASET)
1646
+ result = []
1647
+ for row in session.execute(stmt):
1648
+ result.append({'scope': row.scope, 'name': row.name})
1649
+
1650
+ return result
1651
+
1652
+
1653
+ @stream_session
1654
+ def bulk_list_files(
1655
+ dids: "Iterable[Mapping[str, Any]]",
1656
+ long: bool = False,
1657
+ *,
1658
+ session: "Session"
1659
+ ) -> "Optional[Iterator[dict[str, Any]]]":
1660
+ """
1661
+ List file contents of a list of data identifier.
1662
+
1663
+ :param dids: A list of DIDs.
1664
+ :param long: A boolean to choose if more metadata are returned or not.
1665
+ :param session: The database session in use.
1666
+ """
1667
+ for did in dids:
1668
+ try:
1669
+ for file_dict in list_files(scope=did['scope'], name=did['name'], long=long, session=session):
1670
+ file_dict['parent_scope'] = did['scope']
1671
+ file_dict['parent_name'] = did['name']
1672
+ yield file_dict
1673
+ except exception.DataIdentifierNotFound:
1674
+ pass
1675
+
1676
+
1677
+ @stream_session
1678
+ def list_files(scope: "InternalScope", name: str, long: bool = False, *, session: "Session") -> "Iterator[dict[str, Any]]":
1679
+ """
1680
+ List data identifier file contents.
1681
+
1682
+ :param scope: The scope name.
1683
+ :param name: The data identifier name.
1684
+ :param long: A boolean to choose if more metadata are returned or not.
1685
+ :param session: The database session in use.
1686
+ """
1687
+ try:
1688
+ stmt = select(
1689
+ models.DataIdentifier.scope,
1690
+ models.DataIdentifier.name,
1691
+ models.DataIdentifier.bytes,
1692
+ models.DataIdentifier.adler32,
1693
+ models.DataIdentifier.guid,
1694
+ models.DataIdentifier.events,
1695
+ models.DataIdentifier.lumiblocknr,
1696
+ models.DataIdentifier.did_type
1697
+ ).with_hint(
1698
+ models.DataIdentifier,
1699
+ 'INDEX(DIDS DIDS_PK)',
1700
+ 'oracle'
1701
+ ).where(
1702
+ and_(models.DataIdentifier.scope == scope,
1703
+ models.DataIdentifier.name == name)
1704
+ )
1705
+ did = session.execute(stmt).one()
1706
+
1707
+ if did[7] == DIDType.FILE:
1708
+ if long:
1709
+ yield {'scope': did[0], 'name': did[1], 'bytes': did[2],
1710
+ 'adler32': did[3], 'guid': did[4] and did[4].upper(),
1711
+ 'events': did[5], 'lumiblocknr': did[6]}
1712
+ else:
1713
+ yield {'scope': did[0], 'name': did[1], 'bytes': did[2],
1714
+ 'adler32': did[3], 'guid': did[4] and did[4].upper(),
1715
+ 'events': did[5]}
1716
+ else:
1717
+ cnt_query = select(
1718
+ models.DataIdentifierAssociation.child_scope,
1719
+ models.DataIdentifierAssociation.child_name,
1720
+ models.DataIdentifierAssociation.child_type
1721
+ ).with_hint(
1722
+ models.DataIdentifierAssociation,
1723
+ 'INDEX(CONTENTS CONTENTS_PK)',
1724
+ 'oracle'
1725
+ )
1726
+
1727
+ if long:
1728
+ dst_cnt_query = select(
1729
+ models.DataIdentifierAssociation.child_scope,
1730
+ models.DataIdentifierAssociation.child_name,
1731
+ models.DataIdentifierAssociation.child_type,
1732
+ models.DataIdentifierAssociation.bytes,
1733
+ models.DataIdentifierAssociation.adler32,
1734
+ models.DataIdentifierAssociation.guid,
1735
+ models.DataIdentifierAssociation.events,
1736
+ models.DataIdentifier.lumiblocknr
1737
+ ).with_hint(
1738
+ models.DataIdentifierAssociation,
1739
+ 'INDEX_RS_ASC(DIDS DIDS_PK) INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
1740
+ 'oracle'
1741
+ ).where(
1742
+ and_(models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
1743
+ models.DataIdentifier.name == models.DataIdentifierAssociation.child_name)
1744
+ )
1745
+ else:
1746
+ dst_cnt_query = select(
1747
+ models.DataIdentifierAssociation.child_scope,
1748
+ models.DataIdentifierAssociation.child_name,
1749
+ models.DataIdentifierAssociation.child_type,
1750
+ models.DataIdentifierAssociation.bytes,
1751
+ models.DataIdentifierAssociation.adler32,
1752
+ models.DataIdentifierAssociation.guid,
1753
+ models.DataIdentifierAssociation.events,
1754
+ bindparam("lumiblocknr", None)
1755
+ ).with_hint(
1756
+ models.DataIdentifierAssociation,
1757
+ 'INDEX(CONTENTS CONTENTS_PK)',
1758
+ 'oracle'
1759
+ )
1760
+
1761
+ dids = [(scope, name, did[7]), ]
1762
+ while dids:
1763
+ s, n, t = dids.pop()
1764
+ if t == DIDType.DATASET:
1765
+ stmt = dst_cnt_query.where(
1766
+ and_(models.DataIdentifierAssociation.scope == s,
1767
+ models.DataIdentifierAssociation.name == n)
1768
+ )
1769
+
1770
+ for child_scope, child_name, child_type, bytes_, adler32, guid, events, lumiblocknr in session.execute(stmt).yield_per(500):
1771
+ if long:
1772
+ yield {'scope': child_scope, 'name': child_name,
1773
+ 'bytes': bytes_, 'adler32': adler32,
1774
+ 'guid': guid and guid.upper(),
1775
+ 'events': events,
1776
+ 'lumiblocknr': lumiblocknr}
1777
+ else:
1778
+ yield {'scope': child_scope, 'name': child_name,
1779
+ 'bytes': bytes_, 'adler32': adler32,
1780
+ 'guid': guid and guid.upper(),
1781
+ 'events': events}
1782
+ else:
1783
+ stmt = cnt_query.where(
1784
+ and_(models.DataIdentifierAssociation.scope == s,
1785
+ models.DataIdentifierAssociation.name == n)
1786
+ )
1787
+ for child_scope, child_name, child_type in session.execute(stmt).yield_per(500):
1788
+ dids.append((child_scope, child_name, child_type))
1789
+
1790
+ except NoResultFound:
1791
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1792
+
1793
+
1794
+ @stream_session
1795
+ def scope_list(
1796
+ scope: "InternalScope",
1797
+ name: Optional[str] = None,
1798
+ recursive: bool = False,
1799
+ *,
1800
+ session: "Session"
1801
+ ) -> "Iterator[dict[str, Any]]":
1802
+ """
1803
+ List data identifiers in a scope.
1804
+
1805
+ :param scope: The scope name.
1806
+ :param session: The database session in use.
1807
+ :param name: The data identifier name.
1808
+ :param recursive: boolean, True or False.
1809
+ """
1810
+ # TODO= Perf. tuning of the method
1811
+ # query = session.query(models.DataIdentifier).filter_by(scope=scope, deleted=False)
1812
+ # for did in query.yield_per(5):
1813
+ # yield {'scope': did.scope, 'name': did.name, 'type': did.did_type, 'parent': None, 'level': 0}
1814
+
1815
+ def __topdids(scope):
1816
+ sub_stmt = select(
1817
+ models.DataIdentifierAssociation.child_name
1818
+ ).where(
1819
+ and_(models.DataIdentifierAssociation.scope == scope,
1820
+ models.DataIdentifierAssociation.child_scope == scope)
1821
+ )
1822
+ stmt = select(
1823
+ models.DataIdentifier.name,
1824
+ models.DataIdentifier.did_type,
1825
+ models.DataIdentifier.bytes
1826
+ ).where(
1827
+ and_(models.DataIdentifier.scope == scope,
1828
+ not_(models.DataIdentifier.name.in_(sub_stmt)))
1829
+ ).order_by(
1830
+ models.DataIdentifier.name
1831
+ )
1832
+ for row in session.execute(stmt).yield_per(5):
1833
+ if row.did_type == DIDType.FILE:
1834
+ yield {'scope': scope, 'name': row.name, 'type': row.did_type, 'parent': None, 'level': 0, 'bytes': row.bytes}
1835
+ else:
1836
+ yield {'scope': scope, 'name': row.name, 'type': row.did_type, 'parent': None, 'level': 0, 'bytes': None}
1837
+
1838
+ def __diddriller(pdid: "Mapping[str, Any]") -> "Iterator[dict[str, Any]]":
1839
+ stmt = select(
1840
+ models.DataIdentifierAssociation
1841
+ ).where(
1842
+ and_(models.DataIdentifierAssociation.scope == pdid['scope'],
1843
+ models.DataIdentifierAssociation.name == pdid['name'])
1844
+ ).order_by(
1845
+ models.DataIdentifierAssociation.child_name
1846
+ )
1847
+ for row in session.execute(stmt).yield_per(5).scalars():
1848
+ parent = {'scope': pdid['scope'], 'name': pdid['name']}
1849
+ cdid = {'scope': row.child_scope, 'name': row.child_name, 'type': row.child_type, 'parent': parent, 'level': pdid['level'] + 1}
1850
+ yield cdid
1851
+ if cdid['type'] != DIDType.FILE and recursive:
1852
+ for did in __diddriller(cdid):
1853
+ yield did
1854
+
1855
+ if name is None:
1856
+ topdids = __topdids(scope)
1857
+ else:
1858
+ stmt = select(
1859
+ models.DataIdentifier
1860
+ ).where(
1861
+ and_(models.DataIdentifier.scope == scope,
1862
+ models.DataIdentifier.name == name)
1863
+ ).limit(
1864
+ 1
1865
+ )
1866
+ topdids = session.execute(stmt).scalar()
1867
+ if topdids is None:
1868
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1869
+ topdids = [{'scope': topdids.scope, 'name': topdids.name, 'type': topdids.did_type, 'parent': None, 'level': 0}]
1870
+
1871
+ if name is None:
1872
+ for topdid in topdids:
1873
+ yield topdid
1874
+ if recursive:
1875
+ for did in __diddriller(topdid):
1876
+ yield did
1877
+ else:
1878
+ for topdid in topdids:
1879
+ for did in __diddriller(topdid):
1880
+ yield did
1881
+
1882
+
1883
+ @read_session
1884
+ def __get_did(
1885
+ scope: "InternalScope",
1886
+ name: str,
1887
+ *,
1888
+ session: "Session"
1889
+ ) -> "models.DataIdentifier":
1890
+ try:
1891
+ stmt = select(
1892
+ models.DataIdentifier
1893
+ ).with_hint(
1894
+ models.DataIdentifier,
1895
+ 'INDEX(DIDS DIDS_PK)',
1896
+ 'oracle'
1897
+ ).where(
1898
+ and_(models.DataIdentifier.scope == scope,
1899
+ models.DataIdentifier.name == name)
1900
+ )
1901
+ return session.execute(stmt).scalar_one()
1902
+ except NoResultFound:
1903
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
1904
+
1905
+
1906
+ @read_session
1907
+ def get_did(scope: "InternalScope", name: str, dynamic_depth: "Optional[DIDType]" = None, *, session: "Session") -> "dict[str, Any]":
1908
+ """
1909
+ Retrieve a single data identifier.
1910
+
1911
+ :param scope: The scope name.
1912
+ :param name: The data identifier name.
1913
+ :param dynamic_depth: the DID type to use as source for estimation of this DIDs length/bytes.
1914
+ If set to None, or to a value which doesn't make sense (ex: requesting depth = CONTAINER for a did of type DATASET)
1915
+ will not compute the size dynamically.
1916
+ :param session: The database session in use.
1917
+ """
1918
+ did = __get_did(scope=scope, name=name, session=session)
1919
+
1920
+ bytes_, length = did.bytes, did.length
1921
+ if dynamic_depth:
1922
+ bytes_, length, events = __resolve_bytes_length_events_did(did=did, dynamic_depth=dynamic_depth, session=session)
1923
+
1924
+ if did.did_type == DIDType.FILE:
1925
+ return {'scope': did.scope, 'name': did.name, 'type': did.did_type,
1926
+ 'account': did.account, 'bytes': bytes_, 'length': 1,
1927
+ 'md5': did.md5, 'adler32': did.adler32}
1928
+ else:
1929
+ return {'scope': did.scope, 'name': did.name, 'type': did.did_type,
1930
+ 'account': did.account, 'open': did.is_open,
1931
+ 'monotonic': did.monotonic, 'expired_at': did.expired_at,
1932
+ 'length': length, 'bytes': bytes_}
1933
+
1934
+
1935
+ @transactional_session
1936
+ def set_metadata(
1937
+ scope: "InternalScope",
1938
+ name: str,
1939
+ key: str,
1940
+ value: Any,
1941
+ did_type: Optional[DIDType] = None,
1942
+ did: Optional["Mapping[str, Any]"] = None,
1943
+ recursive: bool = False,
1944
+ *,
1945
+ session: "Session"
1946
+ ) -> None:
1947
+ """
1948
+ Add single metadata to a data identifier.
1949
+
1950
+ :param scope: The scope name.
1951
+ :param name: The data identifier name.
1952
+ :param key: the key.
1953
+ :param value: the value.
1954
+ :param did: The data identifier info.
1955
+ :param recursive: Option to propagate the metadata change to content.
1956
+ :param session: The database session in use.
1957
+ """
1958
+ did_meta_plugins.set_metadata(scope=scope, name=name, key=key, value=value, recursive=recursive, session=session)
1959
+
1960
+
1961
+ @transactional_session
1962
+ def set_metadata_bulk(
1963
+ scope: "InternalScope",
1964
+ name: str,
1965
+ meta: "Mapping[str, Any]",
1966
+ recursive: bool = False,
1967
+ *,
1968
+ session: "Session"
1969
+ ) -> None:
1970
+ """
1971
+ Add metadata to a data identifier.
1972
+
1973
+ :param scope: The scope name.
1974
+ :param name: The data identifier name.
1975
+ :param meta: the key-values.
1976
+ :param recursive: Option to propagate the metadata change to content.
1977
+ :param session: The database session in use.
1978
+ """
1979
+ did_meta_plugins.set_metadata_bulk(scope=scope, name=name, meta=meta, recursive=recursive, session=session)
1980
+
1981
+
1982
+ @transactional_session
1983
+ def set_dids_metadata_bulk(
1984
+ dids: "Iterable[Mapping[str, Any]]",
1985
+ recursive: bool = False,
1986
+ *,
1987
+ session: "Session"
1988
+ ) -> None:
1989
+ """
1990
+ Add metadata to a list of data identifiers.
1991
+
1992
+ :param dids: A list of dids including metadata.
1993
+ :param recursive: Option to propagate the metadata change to content.
1994
+ :param session: The database session in use.
1995
+ """
1996
+
1997
+ for did in dids:
1998
+ did_meta_plugins.set_metadata_bulk(scope=did['scope'], name=did['name'], meta=did['meta'], recursive=recursive, session=session)
1999
+
2000
+
2001
+ @read_session
2002
+ def get_metadata(
2003
+ scope: "InternalScope",
2004
+ name: str,
2005
+ plugin: str = 'DID_COLUMN',
2006
+ *,
2007
+ session: "Session"
2008
+ ) -> dict[str, Any]:
2009
+ """
2010
+ Get data identifier metadata
2011
+
2012
+ :param scope: The scope name.
2013
+ :param name: The data identifier name.
2014
+ :param plugin: The metadata plugin to use or 'ALL' for all.
2015
+ :param session: The database session in use.
2016
+
2017
+
2018
+ :returns: List of HARDCODED metadata for did.
2019
+ """
2020
+ return did_meta_plugins.get_metadata(scope, name, plugin=plugin, session=session)
2021
+
2022
+
2023
+ @stream_session
2024
+ def list_parent_dids_bulk(
2025
+ dids: "Iterable[Mapping[str, Any]]",
2026
+ *,
2027
+ session: "Session"
2028
+ ) -> "Iterator[dict[str, Any]]":
2029
+ """
2030
+ List parent datasets and containers of a did.
2031
+
2032
+ :param dids: A list of dids.
2033
+ :param session: The database session in use.
2034
+ :returns: List of dids.
2035
+ :rtype: Generator.
2036
+ """
2037
+ condition = []
2038
+ for did in dids:
2039
+ condition.append(and_(models.DataIdentifierAssociation.child_scope == did['scope'],
2040
+ models.DataIdentifierAssociation.child_name == did['name']))
2041
+
2042
+ try:
2043
+ for chunk in chunks(condition, 50):
2044
+ stmt = select(
2045
+ models.DataIdentifierAssociation.child_scope,
2046
+ models.DataIdentifierAssociation.child_name,
2047
+ models.DataIdentifierAssociation.scope,
2048
+ models.DataIdentifierAssociation.name,
2049
+ models.DataIdentifierAssociation.did_type
2050
+ ).where(
2051
+ or_(*chunk)
2052
+ )
2053
+ for did_chunk in session.execute(stmt).yield_per(5):
2054
+ yield {'scope': did_chunk.scope, 'name': did_chunk.name, 'child_scope': did_chunk.child_scope, 'child_name': did_chunk.child_name, 'type': did_chunk.did_type}
2055
+ except NoResultFound:
2056
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
2057
+
2058
+
2059
+ @stream_session
2060
+ def get_metadata_bulk(
2061
+ dids: list["Mapping[Any, Any]"],
2062
+ inherit: bool = False,
2063
+ plugin: str = 'JSON',
2064
+ *,
2065
+ session: "Session"
2066
+ ) -> "Iterator[dict[str, Any]]":
2067
+ """
2068
+ Get metadata for a list of dids
2069
+ :param dids: A list of dids.
2070
+ :param inherit: A boolean. If set to true, the metadata of the parent are concatenated.
2071
+ :param plugin: A string. The metadata plugin to use or 'ALL' for all.
2072
+ :param session: The database session in use.
2073
+ """
2074
+ if inherit:
2075
+ parent_list = []
2076
+ unique_dids = []
2077
+ parents = [1, ]
2078
+ depth = 0
2079
+ for did in dids:
2080
+ unique_dids.append((did['scope'], did['name']))
2081
+ parent_list.append([(did['scope'], did['name']), ])
2082
+
2083
+ while parents and depth < 20:
2084
+ parents = []
2085
+ for did in list_parent_dids_bulk(dids, session=session):
2086
+ scope = did['scope']
2087
+ name = did['name']
2088
+ child_scope = did['child_scope']
2089
+ child_name = did['child_name']
2090
+ if (scope, name) not in unique_dids:
2091
+ unique_dids.append((scope, name))
2092
+ if (scope, name) not in parents:
2093
+ parents.append((scope, name))
2094
+ for entry in parent_list:
2095
+ if entry[-1] == (child_scope, child_name):
2096
+ entry.append((scope, name))
2097
+ dids = [{'scope': did[0], 'name': did[1]} for did in parents]
2098
+ depth += 1
2099
+ unique_dids = [{'scope': did[0], 'name': did[1]} for did in unique_dids]
2100
+ meta_dict = {}
2101
+ for did in unique_dids:
2102
+ try:
2103
+ meta = get_metadata(did['scope'], did['name'], plugin=plugin, session=session)
2104
+ except exception.DataIdentifierNotFound:
2105
+ meta = {}
2106
+ meta_dict[(did['scope'], did['name'])] = meta
2107
+ for dids in parent_list:
2108
+ result = {'scope': dids[0][0], 'name': dids[0][1]}
2109
+ for did in dids:
2110
+ for key in meta_dict[did]:
2111
+ if key not in result:
2112
+ result[key] = meta_dict[did][key]
2113
+ yield result
2114
+ else:
2115
+ condition = []
2116
+ for did in dids:
2117
+ condition.append(and_(models.DataIdentifier.scope == did['scope'],
2118
+ models.DataIdentifier.name == did['name']))
2119
+ try:
2120
+ for chunk in chunks(condition, 50):
2121
+ stmt = select(
2122
+ models.DataIdentifier
2123
+ ).with_hint(
2124
+ models.DataIdentifier,
2125
+ 'INDEX(DIDS DIDS_PK)',
2126
+ 'oracle'
2127
+ ).where(
2128
+ or_(*chunk)
2129
+ )
2130
+ for row in session.execute(stmt).scalars():
2131
+ yield row.to_dict()
2132
+ except NoResultFound:
2133
+ raise exception.DataIdentifierNotFound('No Data Identifiers found')
2134
+
2135
+
2136
+ @transactional_session
2137
+ def delete_metadata(
2138
+ scope: "InternalScope",
2139
+ name: str,
2140
+ key: str,
2141
+ *,
2142
+ session: "Session"
2143
+ ) -> None:
2144
+ """
2145
+ Delete a key from the metadata column
2146
+
2147
+ :param scope: the scope of did
2148
+ :param name: the name of the did
2149
+ :param key: the key to be deleted
2150
+ """
2151
+ did_meta_plugins.delete_metadata(scope, name, key, session=session)
2152
+
2153
+
2154
+ @transactional_session
2155
+ def set_status(
2156
+ scope: "InternalScope",
2157
+ name: str,
2158
+ *,
2159
+ session: "Session",
2160
+ **kwargs
2161
+ ) -> None:
2162
+ """
2163
+ Set data identifier status
2164
+
2165
+ :param scope: The scope name.
2166
+ :param name: The data identifier name.
2167
+ :param session: The database session in use.
2168
+ :param kwargs: Keyword arguments of the form status_name=value.
2169
+ """
2170
+ statuses = ['open', ]
2171
+ reevaluate_dids_at_close = config_get_bool('subscriptions', 'reevaluate_dids_at_close', raise_exception=False, default=False, session=session)
2172
+
2173
+ update_stmt = update(
2174
+ models.DataIdentifier
2175
+ ).where(
2176
+ and_(models.DataIdentifier.scope == scope,
2177
+ models.DataIdentifier.name == name,
2178
+ or_(models.DataIdentifier.did_type == DIDType.CONTAINER,
2179
+ models.DataIdentifier.did_type == DIDType.DATASET))
2180
+ ).prefix_with(
2181
+ "/*+ INDEX(DIDS DIDS_PK) */", dialect='oracle'
2182
+ ).execution_options(
2183
+ synchronize_session=False
2184
+ )
2185
+ values = {}
2186
+ for k in kwargs:
2187
+ if k not in statuses:
2188
+ raise exception.UnsupportedStatus(f'The status {k} is not a valid data identifier status.')
2189
+ if k == 'open':
2190
+ if not kwargs[k]:
2191
+ update_stmt = update_stmt.where(
2192
+ and_(models.DataIdentifier.is_open == true(),
2193
+ models.DataIdentifier.did_type != DIDType.FILE)
2194
+ )
2195
+ values['is_open'], values['closed_at'] = False, datetime.utcnow()
2196
+ values['bytes'], values['length'], values['events'] = __resolve_bytes_length_events_did(did=__get_did(scope=scope, name=name, session=session),
2197
+ session=session)
2198
+ # Update datasetlocks as well
2199
+ stmt = update(
2200
+ models.DatasetLock
2201
+ ).where(
2202
+ and_(models.DatasetLock.scope == scope,
2203
+ models.DatasetLock.name == name)
2204
+ ).values({
2205
+ models.DatasetLock.length: values['length'],
2206
+ models.DatasetLock.bytes: values['bytes']
2207
+ })
2208
+ session.execute(stmt)
2209
+
2210
+ # Generate a message
2211
+ message = {'scope': scope.external,
2212
+ 'name': name,
2213
+ 'bytes': values['bytes'],
2214
+ 'length': values['length'],
2215
+ 'events': values['events']}
2216
+ if scope.vo != 'def':
2217
+ message['vo'] = scope.vo
2218
+
2219
+ add_message('CLOSE', message, session=session)
2220
+ if reevaluate_dids_at_close:
2221
+ set_new_dids(dids=[{'scope': scope, 'name': name}],
2222
+ new_flag=True,
2223
+ session=session)
2224
+
2225
+ else:
2226
+ # Set status to open only for privileged accounts
2227
+ update_stmt = update_stmt.where(
2228
+ and_(models.DataIdentifier.is_open == false(),
2229
+ models.DataIdentifier.did_type != DIDType.FILE)
2230
+ )
2231
+ values['is_open'] = True
2232
+
2233
+ message = {'scope': scope.external, 'name': name}
2234
+ if scope.vo != 'def':
2235
+ message['vo'] = scope.vo
2236
+ add_message('OPEN', message, session=session)
2237
+
2238
+ update_stmt = update_stmt.values(
2239
+ values
2240
+ )
2241
+ rowcount = session.execute(update_stmt).rowcount
2242
+
2243
+ if not rowcount:
2244
+ stmt = select(
2245
+ models.DataIdentifier
2246
+ ).where(
2247
+ and_(models.DataIdentifier.scope == scope,
2248
+ models.DataIdentifier.name == name)
2249
+ )
2250
+ try:
2251
+ session.execute(stmt).scalar_one()
2252
+ except NoResultFound:
2253
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2254
+ raise exception.UnsupportedOperation(f"The status of the data identifier '{scope}:{name}' cannot be changed")
2255
+ else:
2256
+ # Generate callbacks
2257
+ if not values['is_open']:
2258
+ stmt = select(
2259
+ models.ReplicationRule
2260
+ ).where(
2261
+ and_(models.ReplicationRule.scope == scope,
2262
+ models.ReplicationRule.name == name)
2263
+ )
2264
+ for rule in session.execute(stmt).scalars():
2265
+ rucio.core.rule.generate_rule_notifications(rule=rule, session=session)
2266
+
2267
+
2268
+ @read_session
2269
+ def list_dids(
2270
+ scope: "InternalScope",
2271
+ filters: "Mapping[Any, Any]",
2272
+ did_type: Literal['all', 'collection', 'dataset', 'container', 'file'] = 'collection',
2273
+ ignore_case: bool = False,
2274
+ limit: Optional[int] = None,
2275
+ offset: Optional[int] = None,
2276
+ long: bool = False,
2277
+ recursive: bool = False,
2278
+ ignore_dids: Optional["Sequence[str]"] = None,
2279
+ *,
2280
+ session: "Session"
2281
+ ) -> "Iterator[dict[str, Any]]":
2282
+ """
2283
+ Search data identifiers.
2284
+
2285
+ :param scope: the scope name.
2286
+ :param filters: dictionary of attributes by which the results should be filtered.
2287
+ :param did_type: the type of the did: all(container, dataset, file), collection(dataset or container), dataset, container, file.
2288
+ :param ignore_case: ignore case distinctions.
2289
+ :param limit: limit number.
2290
+ :param offset: offset number.
2291
+ :param long: Long format option to display more information for each DID.
2292
+ :param recursive: Recursively list DIDs content.
2293
+ :param ignore_dids: List of DIDs to refrain from yielding.
2294
+ :param session: The database session in use.
2295
+ """
2296
+ return did_meta_plugins.list_dids(scope, filters, did_type, ignore_case, limit, offset, long, recursive, ignore_dids, session=session)
2297
+
2298
+
2299
+ @read_session
2300
+ def get_did_atime(
2301
+ scope: "InternalScope",
2302
+ name: str,
2303
+ *,
2304
+ session: "Session"
2305
+ ) -> datetime:
2306
+ """
2307
+ Get the accessed_at timestamp for a did. Just for testing.
2308
+ :param scope: the scope name.
2309
+ :param name: The data identifier name.
2310
+ :param session: Database session to use.
2311
+
2312
+ :returns: A datetime timestamp with the last access time.
2313
+ """
2314
+ stmt = select(
2315
+ models.DataIdentifier.accessed_at
2316
+ ).where(
2317
+ and_(models.DataIdentifier.scope == scope,
2318
+ models.DataIdentifier.name == name)
2319
+ )
2320
+ return session.execute(stmt).one()[0]
2321
+
2322
+
2323
+ @read_session
2324
+ def get_did_access_cnt(
2325
+ scope: "InternalScope",
2326
+ name: str,
2327
+ *,
2328
+ session: "Session"
2329
+ ) -> int:
2330
+ """
2331
+ Get the access_cnt for a did. Just for testing.
2332
+ :param scope: the scope name.
2333
+ :param name: The data identifier name.
2334
+ :param session: Database session to use.
2335
+
2336
+ :returns: A datetime timestamp with the last access time.
2337
+ """
2338
+ stmt = select(
2339
+ models.DataIdentifier.access_cnt
2340
+ ).where(
2341
+ and_(models.DataIdentifier.scope == scope,
2342
+ models.DataIdentifier.name == name)
2343
+ )
2344
+ return session.execute(stmt).one()[0]
2345
+
2346
+
2347
+ @stream_session
2348
+ def get_dataset_by_guid(
2349
+ guid: str,
2350
+ *,
2351
+ session: "Session"
2352
+ ) -> "Iterator[dict[str, Union[InternalScope, str]]]":
2353
+ """
2354
+ Get the parent datasets for a given GUID.
2355
+ :param guid: The GUID.
2356
+ :param session: Database session to use.
2357
+
2358
+ :returns: A did.
2359
+ """
2360
+ stmt = select(
2361
+ models.DataIdentifier
2362
+ ).with_hint(
2363
+ models.ReplicaLock,
2364
+ 'INDEX(DIDS_GUIDS_IDX)',
2365
+ 'oracle'
2366
+ ).where(
2367
+ and_(models.DataIdentifier.guid == guid,
2368
+ models.DataIdentifier.did_type == DIDType.FILE)
2369
+ )
2370
+ try:
2371
+ r = session.execute(stmt).scalar_one()
2372
+ datasets_stmt = select(
2373
+ models.DataIdentifierAssociation.scope,
2374
+ models.DataIdentifierAssociation.name
2375
+ ).with_hint(
2376
+ models.DataIdentifierAssociation,
2377
+ 'INDEX(CONTENTS CONTENTS_CHILD_SCOPE_NAME_IDX)',
2378
+ 'oracle'
2379
+ ).where(
2380
+ and_(models.DataIdentifierAssociation.child_scope == r.scope,
2381
+ models.DataIdentifierAssociation.child_name == r.name)
2382
+ )
2383
+
2384
+ except NoResultFound:
2385
+ raise exception.DataIdentifierNotFound("No file associated to GUID : %s" % guid)
2386
+ for tmp_did in session.execute(datasets_stmt).yield_per(5):
2387
+ yield {'scope': tmp_did.scope, 'name': tmp_did.name}
2388
+
2389
+
2390
+ @transactional_session
2391
+ def touch_dids(
2392
+ dids: "Iterable[Mapping[str, Any]]",
2393
+ *,
2394
+ session: "Session"
2395
+ ) -> bool:
2396
+ """
2397
+ Update the accessed_at timestamp and the access_cnt of the given dids.
2398
+
2399
+ :param replicas: the list of dids.
2400
+ :param session: The database session in use.
2401
+
2402
+ :returns: True, if successful, False otherwise.
2403
+ """
2404
+
2405
+ now = datetime.utcnow()
2406
+ none_value = None
2407
+ try:
2408
+ for did in dids:
2409
+ stmt = update(
2410
+ models.DataIdentifier
2411
+ ).where(
2412
+ and_(models.DataIdentifier.scope == did['scope'],
2413
+ models.DataIdentifier.name == did['name'],
2414
+ models.DataIdentifier.did_type == did['type'])
2415
+ ).values({
2416
+ models.DataIdentifier.accessed_at: did.get('accessed_at') or now,
2417
+ models.DataIdentifier.access_cnt: case((models.DataIdentifier.access_cnt == none_value, 1),
2418
+ else_=(models.DataIdentifier.access_cnt + 1)) # type: ignore
2419
+ }).execution_options(
2420
+ synchronize_session=False
2421
+ )
2422
+ session.execute(stmt)
2423
+ except DatabaseError:
2424
+ return False
2425
+
2426
+ return True
2427
+
2428
+
2429
+ @transactional_session
2430
+ def create_did_sample(
2431
+ input_scope: "InternalScope",
2432
+ input_name: str,
2433
+ output_scope: "InternalScope",
2434
+ output_name: str,
2435
+ account: "InternalAccount",
2436
+ nbfiles: str,
2437
+ *,
2438
+ session: "Session"
2439
+ ) -> None:
2440
+ """
2441
+ Create a sample from an input collection.
2442
+
2443
+ :param input_scope: The scope of the input DID.
2444
+ :param input_name: The name of the input DID.
2445
+ :param output_scope: The scope of the output dataset.
2446
+ :param output_name: The name of the output dataset.
2447
+ :param account: The account.
2448
+ :param nbfiles: The number of files to register in the output dataset.
2449
+ :param session: The database session in use.
2450
+ """
2451
+ files = [did for did in list_files(scope=input_scope, name=input_name, long=False, session=session)]
2452
+ random.shuffle(files)
2453
+ output_files = files[:int(nbfiles)]
2454
+ add_did(scope=output_scope, name=output_name, did_type=DIDType.DATASET, account=account, statuses={}, meta=[], rules=[], lifetime=None, dids=output_files, rse_id=None, session=session)
2455
+
2456
+
2457
+ @transactional_session
2458
+ def __resolve_bytes_length_events_did(
2459
+ did: models.DataIdentifier,
2460
+ dynamic_depth: "DIDType" = DIDType.FILE,
2461
+ *, session: "Session",
2462
+ ) -> tuple[int, int, int]:
2463
+ """
2464
+ Resolve bytes, length and events of a did
2465
+
2466
+ :did: the DID ORM object for which we perform the resolution
2467
+ :param dynamic_depth: the DID type to use as source for estimation of this DIDs length/bytes.
2468
+ If set to None, or to a value which doesn't make sense (ex: requesting depth = DATASET for a did of type FILE)
2469
+ will not compute the size dynamically.
2470
+ :param session: The database session in use.
2471
+ """
2472
+
2473
+ if did.did_type == DIDType.DATASET and dynamic_depth == DIDType.FILE or \
2474
+ did.did_type == DIDType.CONTAINER and dynamic_depth in (DIDType.FILE, DIDType.DATASET):
2475
+
2476
+ if did.did_type == DIDType.DATASET and dynamic_depth == DIDType.FILE:
2477
+ stmt = select(
2478
+ func.count(),
2479
+ func.sum(models.DataIdentifierAssociation.bytes),
2480
+ func.sum(models.DataIdentifierAssociation.events),
2481
+ ).where(
2482
+ and_(models.DataIdentifierAssociation.scope == did.scope,
2483
+ models.DataIdentifierAssociation.name == did.name)
2484
+ )
2485
+ elif did.did_type == DIDType.CONTAINER and dynamic_depth == DIDType.DATASET:
2486
+ child_did_stmt = list_one_did_childs_stmt(did.scope, did.name, did_type=DIDType.DATASET).subquery()
2487
+ stmt = select(
2488
+ func.sum(models.DataIdentifier.length),
2489
+ func.sum(models.DataIdentifier.bytes),
2490
+ func.sum(models.DataIdentifier.events),
2491
+ ).join_from(
2492
+ child_did_stmt,
2493
+ models.DataIdentifier,
2494
+ and_(models.DataIdentifier.scope == child_did_stmt.c.scope,
2495
+ models.DataIdentifier.name == child_did_stmt.c.name),
2496
+ )
2497
+ else: # did.did_type == DIDType.CONTAINER and dynamic_depth == DIDType.FILE:
2498
+ child_did_stmt = list_one_did_childs_stmt(did.scope, did.name, did_type=DIDType.DATASET).subquery()
2499
+ stmt = select(
2500
+ func.count(),
2501
+ func.sum(models.DataIdentifierAssociation.bytes),
2502
+ func.sum(models.DataIdentifierAssociation.events),
2503
+ ).join_from(
2504
+ child_did_stmt,
2505
+ models.DataIdentifierAssociation,
2506
+ and_(models.DataIdentifierAssociation.scope == child_did_stmt.c.scope,
2507
+ models.DataIdentifierAssociation.name == child_did_stmt.c.name)
2508
+ )
2509
+
2510
+ try:
2511
+ length, bytes_, events = session.execute(stmt).one()
2512
+ length = length or 0
2513
+ bytes_ = bytes_ or 0
2514
+ events = events or 0
2515
+ except NoResultFound:
2516
+ bytes_, length, events = 0, 0, 0
2517
+ elif did.did_type == DIDType.FILE:
2518
+ bytes_, length, events = did.bytes or 0, 1, did.events or 0
2519
+ else:
2520
+ bytes_, length, events = did.bytes or 0, did.length or 0, did.events or 0
2521
+ return bytes_, length, events
2522
+
2523
+
2524
+ @transactional_session
2525
+ def resurrect(dids: "Iterable[Mapping[str, Any]]", *, session: "Session") -> None:
2526
+ """
2527
+ Resurrect data identifiers.
2528
+
2529
+ :param dids: The list of dids to resurrect.
2530
+ :param session: The database session in use.
2531
+ """
2532
+ for did in dids:
2533
+ try:
2534
+ stmt = select(
2535
+ models.DeletedDataIdentifier
2536
+ ).with_hint(
2537
+ models.DeletedDataIdentifier,
2538
+ 'INDEX(DELETED_DIDS DELETED_DIDS_PK)',
2539
+ 'oracle'
2540
+ ).where(
2541
+ and_(models.DeletedDataIdentifier.scope == did['scope'],
2542
+ models.DeletedDataIdentifier.name == did['name'])
2543
+ )
2544
+ del_did = session.execute(stmt).scalar_one()
2545
+ except NoResultFound:
2546
+ # Dataset might still exist, but could have an expiration date, if it has, remove it
2547
+ stmt = update(
2548
+ models.DataIdentifier
2549
+ ).where(
2550
+ and_(models.DataIdentifier.scope == did['scope'],
2551
+ models.DataIdentifier.name == did['name'],
2552
+ models.DataIdentifier.expired_at < datetime.utcnow())
2553
+ ).values({
2554
+ models.DataIdentifier.expired_at: None
2555
+ }).execution_options(
2556
+ synchronize_session=False
2557
+ )
2558
+ rowcount = session.execute(stmt).rowcount
2559
+ if rowcount:
2560
+ continue
2561
+ raise exception.DataIdentifierNotFound("Deleted Data identifier '%(scope)s:%(name)s' not found" % did)
2562
+
2563
+ # Check did_type
2564
+ # if del_did.did_type == DIDType.FILE:
2565
+ # raise exception.UnsupportedOperation("File '%(scope)s:%(name)s' cannot be resurrected" % did)
2566
+
2567
+ kargs = del_did.to_dict()
2568
+ if kargs['expired_at']:
2569
+ kargs['expired_at'] = None
2570
+
2571
+ stmt = delete(
2572
+ models.DeletedDataIdentifier
2573
+ ).prefix_with(
2574
+ "/*+ INDEX(DELETED_DIDS DELETED_DIDS_PK) */", dialect='oracle'
2575
+ ).where(
2576
+ and_(models.DeletedDataIdentifier.scope == did['scope'],
2577
+ models.DeletedDataIdentifier.name == did['name'])
2578
+ )
2579
+ session.execute(stmt)
2580
+
2581
+ models.DataIdentifier(**kargs).save(session=session, flush=False)
2582
+
2583
+
2584
+ @stream_session
2585
+ def list_archive_content(
2586
+ scope: "InternalScope",
2587
+ name,
2588
+ *,
2589
+ session: "Session"
2590
+ ) -> "Iterator[dict[str, Any]]":
2591
+ """
2592
+ List archive contents.
2593
+
2594
+ :param scope: The archive scope name.
2595
+ :param name: The archive data identifier name.
2596
+ :param session: The database session in use.
2597
+ """
2598
+ try:
2599
+ stmt = select(
2600
+ models.ConstituentAssociation
2601
+ ).with_hint(
2602
+ models.ConstituentAssociation,
2603
+ 'INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK)',
2604
+ 'oracle'
2605
+ ).where(
2606
+ and_(models.ConstituentAssociation.scope == scope,
2607
+ models.ConstituentAssociation.name == name)
2608
+ )
2609
+
2610
+ for tmp_did in session.execute(stmt).yield_per(5).scalars():
2611
+ yield {'scope': tmp_did.child_scope, 'name': tmp_did.child_name,
2612
+ 'bytes': tmp_did.bytes, 'adler32': tmp_did.adler32, 'md5': tmp_did.md5}
2613
+ except NoResultFound:
2614
+ raise exception.DataIdentifierNotFound(f"Data identifier '{scope}:{name}' not found")
2615
+
2616
+
2617
+ @transactional_session
2618
+ def add_did_to_followed(
2619
+ scope: "InternalScope",
2620
+ name: str,
2621
+ account: "InternalAccount",
2622
+ *,
2623
+ session: "Session"
2624
+ ) -> None:
2625
+ """
2626
+ Mark a did as followed by the given account
2627
+
2628
+ :param scope: The scope name.
2629
+ :param name: The data identifier name.
2630
+ :param account: The account owner.
2631
+ :param session: The database session in use.
2632
+ """
2633
+ return add_dids_to_followed(dids=[{'scope': scope, 'name': name}],
2634
+ account=account, session=session)
2635
+
2636
+
2637
+ @transactional_session
2638
+ def add_dids_to_followed(
2639
+ dids: "Iterable[Mapping[str, Any]]",
2640
+ account: "InternalAccount",
2641
+ *,
2642
+ session: "Session"
2643
+ ) -> None:
2644
+ """
2645
+ Bulk mark datasets as followed
2646
+
2647
+ :param dids: A list of dids.
2648
+ :param account: The account owner.
2649
+ :param session: The database session in use.
2650
+ """
2651
+ try:
2652
+ for did in dids:
2653
+ # Get the did details corresponding to the scope and name passed.
2654
+ stmt = select(
2655
+ models.DataIdentifier
2656
+ ).where(
2657
+ and_(models.DataIdentifier.scope == did['scope'],
2658
+ models.DataIdentifier.name == did['name'])
2659
+ )
2660
+ did = session.execute(stmt).scalar_one()
2661
+ # Add the queried to the followed table.
2662
+ new_did_followed = models.DidFollowed(scope=did.scope, name=did.name, account=account,
2663
+ did_type=did.did_type)
2664
+
2665
+ new_did_followed.save(session=session, flush=False)
2666
+
2667
+ session.flush()
2668
+ except IntegrityError as error:
2669
+ raise exception.RucioException(error.args)
2670
+
2671
+
2672
+ @stream_session
2673
+ def get_users_following_did(
2674
+ scope: "InternalScope",
2675
+ name: str,
2676
+ *,
2677
+ session: "Session"
2678
+ ) -> "Iterator[dict[str, InternalAccount]]":
2679
+ """
2680
+ Return list of users following a did
2681
+
2682
+ :param scope: The scope name.
2683
+ :param name: The data identifier name.
2684
+ :param session: The database session in use.
2685
+ """
2686
+ try:
2687
+ stmt = select(
2688
+ models.DidFollowed
2689
+ ).where(
2690
+ and_(models.DidFollowed.scope == scope,
2691
+ models.DidFollowed.name == name)
2692
+ )
2693
+ for user in session.execute(stmt).scalars().all():
2694
+ # Return a dictionary of users to be rendered as json.
2695
+ yield {'user': user.account}
2696
+
2697
+ except NoResultFound:
2698
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (scope, name))
2699
+
2700
+
2701
+ @transactional_session
2702
+ def remove_did_from_followed(
2703
+ scope: "InternalScope",
2704
+ name: str,
2705
+ account: "InternalAccount",
2706
+ *,
2707
+ session: "Session"
2708
+ ) -> None:
2709
+ """
2710
+ Mark a did as not followed
2711
+
2712
+ :param scope: The scope name.
2713
+ :param name: The data identifier name.
2714
+ :param account: The account owner.
2715
+ :param session: The database session in use.
2716
+ """
2717
+ return remove_dids_from_followed(dids=[{'scope': scope, 'name': name}],
2718
+ account=account, session=session)
2719
+
2720
+
2721
+ @transactional_session
2722
+ def remove_dids_from_followed(
2723
+ dids: "Iterable[Mapping[str, Any]]",
2724
+ account: "InternalAccount",
2725
+ *,
2726
+ session: "Session"
2727
+ ) -> None:
2728
+ """
2729
+ Bulk mark datasets as not followed
2730
+
2731
+ :param dids: A list of dids.
2732
+ :param account: The account owner.
2733
+ :param session: The database session in use.
2734
+ """
2735
+ try:
2736
+ for did in dids:
2737
+ stmt = delete(
2738
+ models.DidFollowed
2739
+ ).where(
2740
+ and_(models.DidFollowed.scope == did['scope'],
2741
+ models.DidFollowed.name == did['name'],
2742
+ models.DidFollowed.account == account)
2743
+ ).execution_options(
2744
+ synchronize_session=False
2745
+ )
2746
+ session.execute(stmt)
2747
+ except NoResultFound:
2748
+ raise exception.DataIdentifierNotFound("Data identifier '%s:%s' not found" % (did['scope'], did['name']))
2749
+
2750
+
2751
+ @transactional_session
2752
+ def trigger_event(
2753
+ scope: "InternalScope",
2754
+ name: str,
2755
+ event_type: str,
2756
+ payload: str,
2757
+ *,
2758
+ session: "Session"
2759
+ ) -> None:
2760
+ """
2761
+ Records changes occurring in the did to the FollowEvent table
2762
+
2763
+ :param scope: The scope name.
2764
+ :param name: The data identifier name.
2765
+ :param event_type: The type of event affecting the did.
2766
+ :param payload: Any message to be stored along with the event.
2767
+ :param session: The database session in use.
2768
+ """
2769
+ try:
2770
+ stmt = select(
2771
+ models.DidFollowed
2772
+ ).where(
2773
+ and_(models.DidFollowed.scope == scope,
2774
+ models.DidFollowed.name == name)
2775
+ )
2776
+ for did in session.execute(stmt).scalars().all():
2777
+ # Create a new event using the specified parameters.
2778
+ new_event = models.FollowEvent(scope=scope, name=name, account=did.account,
2779
+ did_type=did.did_type, event_type=event_type, payload=payload)
2780
+ new_event.save(session=session, flush=False)
2781
+
2782
+ session.flush()
2783
+ except IntegrityError as error:
2784
+ raise exception.RucioException(error.args)
2785
+
2786
+
2787
+ @read_session
2788
+ def create_reports(
2789
+ total_workers: int,
2790
+ worker_number: int,
2791
+ *,
2792
+ session: "Session"
2793
+ ) -> None:
2794
+ """
2795
+ Create a summary report of the events affecting a dataset, for its followers.
2796
+
2797
+ :param session: The database session in use.
2798
+ """
2799
+ # Query the FollowEvent table
2800
+ stmt = select(
2801
+ models.FollowEvent
2802
+ ).order_by(
2803
+ models.FollowEvent.created_at
2804
+ )
2805
+
2806
+ # Use heartbeat mechanism to select a chunk of events based on the hashed account
2807
+ stmt = filter_thread_work(session=session, query=stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='account')
2808
+
2809
+ try:
2810
+ events = session.execute(stmt).scalars().all()
2811
+ # If events exist for an account then create a report.
2812
+ if events:
2813
+ body = '''
2814
+ Hello,
2815
+ This is an auto-generated report of the events that have affected the datasets you follow.
2816
+
2817
+ '''
2818
+ account = None
2819
+ for i, event in enumerate(events):
2820
+ # Add each event to the message body.
2821
+ body += "{}. Dataset: {} Event: {}\n".format(i + 1, event.name, event.event_type)
2822
+ if event.payload:
2823
+ body += "Message: {}\n".format(event.payload)
2824
+ body += "\n"
2825
+ account = event.account
2826
+ # Clean up the event after creating the report
2827
+ stmt = delete(
2828
+ models.FollowEvent
2829
+ ).where(
2830
+ and_(models.FollowEvent.scope == event.scope,
2831
+ models.FollowEvent.name == event.name,
2832
+ models.FollowEvent.account == event.account)
2833
+ ).execution_options(
2834
+ synchronize_session=False
2835
+ )
2836
+ session.execute(stmt)
2837
+
2838
+ body += "Thank You."
2839
+ # Get the email associated with the account.
2840
+ stmt = select(
2841
+ models.Account.email
2842
+ ).where(
2843
+ models.Account.account == account
2844
+ )
2845
+ email = session.execute(stmt).scalar()
2846
+ add_message('email', {'to': email,
2847
+ 'subject': 'Report of affected dataset(s)',
2848
+ 'body': body})
2849
+
2850
+ except NoResultFound:
2851
+ raise exception.AccountNotFound("No email found for given account.")
2852
+
2853
+
2854
+ @transactional_session
2855
+ def insert_content_history(
2856
+ filter_: "ColumnExpressionArgument[bool]",
2857
+ did_created_at: datetime,
2858
+ *,
2859
+ session: "Session"
2860
+ ) -> None:
2861
+ """
2862
+ Insert into content history a list of did
2863
+
2864
+ :param filter_: Content clause of the files to archive
2865
+ :param did_created_at: Creation date of the did
2866
+ :param session: The database session in use.
2867
+ """
2868
+ new_did_created_at = did_created_at
2869
+ stmt = select(
2870
+ models.DataIdentifierAssociation.scope,
2871
+ models.DataIdentifierAssociation.name,
2872
+ models.DataIdentifierAssociation.child_scope,
2873
+ models.DataIdentifierAssociation.child_name,
2874
+ models.DataIdentifierAssociation.did_type,
2875
+ models.DataIdentifierAssociation.child_type,
2876
+ models.DataIdentifierAssociation.bytes,
2877
+ models.DataIdentifierAssociation.adler32,
2878
+ models.DataIdentifierAssociation.md5,
2879
+ models.DataIdentifierAssociation.guid,
2880
+ models.DataIdentifierAssociation.events,
2881
+ models.DataIdentifierAssociation.rule_evaluation,
2882
+ models.DataIdentifierAssociation.created_at,
2883
+ models.DataIdentifierAssociation.updated_at
2884
+ ).where(
2885
+ filter_
2886
+ )
2887
+ for cont in session.execute(stmt).all():
2888
+ if not did_created_at:
2889
+ new_did_created_at = cont.created_at
2890
+ models.DataIdentifierAssociationHistory(
2891
+ scope=cont.scope,
2892
+ name=cont.name,
2893
+ child_scope=cont.child_scope,
2894
+ child_name=cont.child_name,
2895
+ did_type=cont.did_type,
2896
+ child_type=cont.child_type,
2897
+ bytes=cont.bytes,
2898
+ adler32=cont.adler32,
2899
+ md5=cont.md5,
2900
+ guid=cont.guid,
2901
+ events=cont.events,
2902
+ rule_evaluation=cont.rule_evaluation,
2903
+ updated_at=cont.updated_at,
2904
+ created_at=cont.created_at,
2905
+ did_created_at=new_did_created_at,
2906
+ deleted_at=datetime.utcnow()
2907
+ ).save(session=session, flush=False)
2908
+
2909
+
2910
+ @transactional_session
2911
+ def insert_deleted_dids(filter_: "ColumnExpressionArgument[bool]", *, session: "Session") -> None:
2912
+ """
2913
+ Insert into deleted_dids a list of did
2914
+
2915
+ :param filter_: The database filter to retrieve dids for archival
2916
+ :param session: The database session in use.
2917
+ """
2918
+ stmt = select(
2919
+ models.DataIdentifier.scope,
2920
+ models.DataIdentifier.name,
2921
+ models.DataIdentifier.account,
2922
+ models.DataIdentifier.did_type,
2923
+ models.DataIdentifier.is_open,
2924
+ models.DataIdentifier.monotonic,
2925
+ models.DataIdentifier.hidden,
2926
+ models.DataIdentifier.obsolete,
2927
+ models.DataIdentifier.complete,
2928
+ models.DataIdentifier.is_new,
2929
+ models.DataIdentifier.availability,
2930
+ models.DataIdentifier.suppressed,
2931
+ models.DataIdentifier.bytes,
2932
+ models.DataIdentifier.length,
2933
+ models.DataIdentifier.md5,
2934
+ models.DataIdentifier.adler32,
2935
+ models.DataIdentifier.expired_at,
2936
+ models.DataIdentifier.purge_replicas,
2937
+ models.DataIdentifier.deleted_at,
2938
+ models.DataIdentifier.events,
2939
+ models.DataIdentifier.guid,
2940
+ models.DataIdentifier.project,
2941
+ models.DataIdentifier.datatype,
2942
+ models.DataIdentifier.run_number,
2943
+ models.DataIdentifier.stream_name,
2944
+ models.DataIdentifier.prod_step,
2945
+ models.DataIdentifier.version,
2946
+ models.DataIdentifier.campaign,
2947
+ models.DataIdentifier.task_id,
2948
+ models.DataIdentifier.panda_id,
2949
+ models.DataIdentifier.lumiblocknr,
2950
+ models.DataIdentifier.provenance,
2951
+ models.DataIdentifier.phys_group,
2952
+ models.DataIdentifier.transient,
2953
+ models.DataIdentifier.accessed_at,
2954
+ models.DataIdentifier.closed_at,
2955
+ models.DataIdentifier.eol_at,
2956
+ models.DataIdentifier.is_archive,
2957
+ models.DataIdentifier.constituent,
2958
+ models.DataIdentifier.access_cnt
2959
+ ).where(
2960
+ filter_
2961
+ )
2962
+
2963
+ for did in session.execute(stmt).all():
2964
+ models.DeletedDataIdentifier(
2965
+ scope=did.scope,
2966
+ name=did.name,
2967
+ account=did.account,
2968
+ did_type=did.did_type,
2969
+ is_open=did.is_open,
2970
+ monotonic=did.monotonic,
2971
+ hidden=did.hidden,
2972
+ obsolete=did.obsolete,
2973
+ complete=did.complete,
2974
+ is_new=did.is_new,
2975
+ availability=did.availability,
2976
+ suppressed=did.suppressed,
2977
+ bytes=did.bytes,
2978
+ length=did.length,
2979
+ md5=did.md5,
2980
+ adler32=did.adler32,
2981
+ expired_at=did.expired_at,
2982
+ purge_replicas=did.purge_replicas,
2983
+ deleted_at=datetime.utcnow(),
2984
+ events=did.events,
2985
+ guid=did.guid,
2986
+ project=did.project,
2987
+ datatype=did.datatype,
2988
+ run_number=did.run_number,
2989
+ stream_name=did.stream_name,
2990
+ prod_step=did.prod_step,
2991
+ version=did.version,
2992
+ campaign=did.campaign,
2993
+ task_id=did.task_id,
2994
+ panda_id=did.panda_id,
2995
+ lumiblocknr=did.lumiblocknr,
2996
+ provenance=did.provenance,
2997
+ phys_group=did.phys_group,
2998
+ transient=did.transient,
2999
+ accessed_at=did.accessed_at,
3000
+ closed_at=did.closed_at,
3001
+ eol_at=did.eol_at,
3002
+ is_archive=did.is_archive,
3003
+ constituent=did.constituent
3004
+ ).save(session=session, flush=False)