rucio 37.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rucio might be problematic. Click here for more details.
- rucio/__init__.py +17 -0
- rucio/alembicrevision.py +15 -0
- rucio/cli/__init__.py +14 -0
- rucio/cli/account.py +216 -0
- rucio/cli/bin_legacy/__init__.py +13 -0
- rucio/cli/bin_legacy/rucio.py +2825 -0
- rucio/cli/bin_legacy/rucio_admin.py +2500 -0
- rucio/cli/command.py +272 -0
- rucio/cli/config.py +72 -0
- rucio/cli/did.py +191 -0
- rucio/cli/download.py +128 -0
- rucio/cli/lifetime_exception.py +33 -0
- rucio/cli/replica.py +162 -0
- rucio/cli/rse.py +293 -0
- rucio/cli/rule.py +158 -0
- rucio/cli/scope.py +40 -0
- rucio/cli/subscription.py +73 -0
- rucio/cli/upload.py +60 -0
- rucio/cli/utils.py +226 -0
- rucio/client/__init__.py +15 -0
- rucio/client/accountclient.py +432 -0
- rucio/client/accountlimitclient.py +183 -0
- rucio/client/baseclient.py +983 -0
- rucio/client/client.py +120 -0
- rucio/client/configclient.py +126 -0
- rucio/client/credentialclient.py +59 -0
- rucio/client/didclient.py +868 -0
- rucio/client/diracclient.py +56 -0
- rucio/client/downloadclient.py +1783 -0
- rucio/client/exportclient.py +44 -0
- rucio/client/fileclient.py +50 -0
- rucio/client/importclient.py +42 -0
- rucio/client/lifetimeclient.py +90 -0
- rucio/client/lockclient.py +109 -0
- rucio/client/metaconventionsclient.py +140 -0
- rucio/client/pingclient.py +44 -0
- rucio/client/replicaclient.py +452 -0
- rucio/client/requestclient.py +125 -0
- rucio/client/richclient.py +317 -0
- rucio/client/rseclient.py +746 -0
- rucio/client/ruleclient.py +294 -0
- rucio/client/scopeclient.py +90 -0
- rucio/client/subscriptionclient.py +173 -0
- rucio/client/touchclient.py +82 -0
- rucio/client/uploadclient.py +969 -0
- rucio/common/__init__.py +13 -0
- rucio/common/bittorrent.py +234 -0
- rucio/common/cache.py +111 -0
- rucio/common/checksum.py +168 -0
- rucio/common/client.py +122 -0
- rucio/common/config.py +788 -0
- rucio/common/constants.py +217 -0
- rucio/common/constraints.py +17 -0
- rucio/common/didtype.py +237 -0
- rucio/common/dumper/__init__.py +342 -0
- rucio/common/dumper/consistency.py +497 -0
- rucio/common/dumper/data_models.py +362 -0
- rucio/common/dumper/path_parsing.py +75 -0
- rucio/common/exception.py +1208 -0
- rucio/common/extra.py +31 -0
- rucio/common/logging.py +420 -0
- rucio/common/pcache.py +1409 -0
- rucio/common/plugins.py +185 -0
- rucio/common/policy.py +93 -0
- rucio/common/schema/__init__.py +200 -0
- rucio/common/schema/generic.py +416 -0
- rucio/common/schema/generic_multi_vo.py +395 -0
- rucio/common/stomp_utils.py +423 -0
- rucio/common/stopwatch.py +55 -0
- rucio/common/test_rucio_server.py +154 -0
- rucio/common/types.py +483 -0
- rucio/common/utils.py +1688 -0
- rucio/core/__init__.py +13 -0
- rucio/core/account.py +496 -0
- rucio/core/account_counter.py +236 -0
- rucio/core/account_limit.py +425 -0
- rucio/core/authentication.py +620 -0
- rucio/core/config.py +437 -0
- rucio/core/credential.py +224 -0
- rucio/core/did.py +3004 -0
- rucio/core/did_meta_plugins/__init__.py +252 -0
- rucio/core/did_meta_plugins/did_column_meta.py +331 -0
- rucio/core/did_meta_plugins/did_meta_plugin_interface.py +165 -0
- rucio/core/did_meta_plugins/elasticsearch_meta.py +407 -0
- rucio/core/did_meta_plugins/filter_engine.py +672 -0
- rucio/core/did_meta_plugins/json_meta.py +240 -0
- rucio/core/did_meta_plugins/mongo_meta.py +229 -0
- rucio/core/did_meta_plugins/postgres_meta.py +352 -0
- rucio/core/dirac.py +237 -0
- rucio/core/distance.py +187 -0
- rucio/core/exporter.py +59 -0
- rucio/core/heartbeat.py +363 -0
- rucio/core/identity.py +301 -0
- rucio/core/importer.py +260 -0
- rucio/core/lifetime_exception.py +377 -0
- rucio/core/lock.py +577 -0
- rucio/core/message.py +288 -0
- rucio/core/meta_conventions.py +203 -0
- rucio/core/monitor.py +448 -0
- rucio/core/naming_convention.py +195 -0
- rucio/core/nongrid_trace.py +136 -0
- rucio/core/oidc.py +1463 -0
- rucio/core/permission/__init__.py +161 -0
- rucio/core/permission/generic.py +1124 -0
- rucio/core/permission/generic_multi_vo.py +1144 -0
- rucio/core/quarantined_replica.py +224 -0
- rucio/core/replica.py +4483 -0
- rucio/core/replica_sorter.py +362 -0
- rucio/core/request.py +3091 -0
- rucio/core/rse.py +2079 -0
- rucio/core/rse_counter.py +185 -0
- rucio/core/rse_expression_parser.py +459 -0
- rucio/core/rse_selector.py +304 -0
- rucio/core/rule.py +4484 -0
- rucio/core/rule_grouping.py +1620 -0
- rucio/core/scope.py +181 -0
- rucio/core/subscription.py +362 -0
- rucio/core/topology.py +490 -0
- rucio/core/trace.py +375 -0
- rucio/core/transfer.py +1531 -0
- rucio/core/vo.py +169 -0
- rucio/core/volatile_replica.py +151 -0
- rucio/daemons/__init__.py +13 -0
- rucio/daemons/abacus/__init__.py +13 -0
- rucio/daemons/abacus/account.py +116 -0
- rucio/daemons/abacus/collection_replica.py +124 -0
- rucio/daemons/abacus/rse.py +117 -0
- rucio/daemons/atropos/__init__.py +13 -0
- rucio/daemons/atropos/atropos.py +242 -0
- rucio/daemons/auditor/__init__.py +289 -0
- rucio/daemons/auditor/hdfs.py +97 -0
- rucio/daemons/auditor/srmdumps.py +355 -0
- rucio/daemons/automatix/__init__.py +13 -0
- rucio/daemons/automatix/automatix.py +304 -0
- rucio/daemons/badreplicas/__init__.py +13 -0
- rucio/daemons/badreplicas/minos.py +322 -0
- rucio/daemons/badreplicas/minos_temporary_expiration.py +171 -0
- rucio/daemons/badreplicas/necromancer.py +196 -0
- rucio/daemons/bb8/__init__.py +13 -0
- rucio/daemons/bb8/bb8.py +353 -0
- rucio/daemons/bb8/common.py +759 -0
- rucio/daemons/bb8/nuclei_background_rebalance.py +153 -0
- rucio/daemons/bb8/t2_background_rebalance.py +153 -0
- rucio/daemons/cache/__init__.py +13 -0
- rucio/daemons/cache/consumer.py +133 -0
- rucio/daemons/common.py +405 -0
- rucio/daemons/conveyor/__init__.py +13 -0
- rucio/daemons/conveyor/common.py +562 -0
- rucio/daemons/conveyor/finisher.py +529 -0
- rucio/daemons/conveyor/poller.py +394 -0
- rucio/daemons/conveyor/preparer.py +205 -0
- rucio/daemons/conveyor/receiver.py +179 -0
- rucio/daemons/conveyor/stager.py +133 -0
- rucio/daemons/conveyor/submitter.py +403 -0
- rucio/daemons/conveyor/throttler.py +532 -0
- rucio/daemons/follower/__init__.py +13 -0
- rucio/daemons/follower/follower.py +101 -0
- rucio/daemons/hermes/__init__.py +13 -0
- rucio/daemons/hermes/hermes.py +534 -0
- rucio/daemons/judge/__init__.py +13 -0
- rucio/daemons/judge/cleaner.py +159 -0
- rucio/daemons/judge/evaluator.py +185 -0
- rucio/daemons/judge/injector.py +162 -0
- rucio/daemons/judge/repairer.py +154 -0
- rucio/daemons/oauthmanager/__init__.py +13 -0
- rucio/daemons/oauthmanager/oauthmanager.py +198 -0
- rucio/daemons/reaper/__init__.py +13 -0
- rucio/daemons/reaper/dark_reaper.py +282 -0
- rucio/daemons/reaper/reaper.py +739 -0
- rucio/daemons/replicarecoverer/__init__.py +13 -0
- rucio/daemons/replicarecoverer/suspicious_replica_recoverer.py +626 -0
- rucio/daemons/rsedecommissioner/__init__.py +13 -0
- rucio/daemons/rsedecommissioner/config.py +81 -0
- rucio/daemons/rsedecommissioner/profiles/__init__.py +24 -0
- rucio/daemons/rsedecommissioner/profiles/atlas.py +60 -0
- rucio/daemons/rsedecommissioner/profiles/generic.py +452 -0
- rucio/daemons/rsedecommissioner/profiles/types.py +93 -0
- rucio/daemons/rsedecommissioner/rse_decommissioner.py +280 -0
- rucio/daemons/storage/__init__.py +13 -0
- rucio/daemons/storage/consistency/__init__.py +13 -0
- rucio/daemons/storage/consistency/actions.py +848 -0
- rucio/daemons/tracer/__init__.py +13 -0
- rucio/daemons/tracer/kronos.py +511 -0
- rucio/daemons/transmogrifier/__init__.py +13 -0
- rucio/daemons/transmogrifier/transmogrifier.py +762 -0
- rucio/daemons/undertaker/__init__.py +13 -0
- rucio/daemons/undertaker/undertaker.py +137 -0
- rucio/db/__init__.py +13 -0
- rucio/db/sqla/__init__.py +52 -0
- rucio/db/sqla/constants.py +206 -0
- rucio/db/sqla/migrate_repo/__init__.py +13 -0
- rucio/db/sqla/migrate_repo/env.py +110 -0
- rucio/db/sqla/migrate_repo/versions/01eaf73ab656_add_new_rule_notification_state_progress.py +70 -0
- rucio/db/sqla/migrate_repo/versions/0437a40dbfd1_add_eol_at_in_rules.py +47 -0
- rucio/db/sqla/migrate_repo/versions/0f1adb7a599a_create_transfer_hops_table.py +59 -0
- rucio/db/sqla/migrate_repo/versions/102efcf145f4_added_stuck_at_column_to_rules.py +43 -0
- rucio/db/sqla/migrate_repo/versions/13d4f70c66a9_introduce_transfer_limits.py +91 -0
- rucio/db/sqla/migrate_repo/versions/140fef722e91_cleanup_distances_table.py +76 -0
- rucio/db/sqla/migrate_repo/versions/14ec5aeb64cf_add_request_external_host.py +43 -0
- rucio/db/sqla/migrate_repo/versions/156fb5b5a14_add_request_type_to_requests_idx.py +50 -0
- rucio/db/sqla/migrate_repo/versions/1677d4d803c8_split_rse_availability_into_multiple.py +68 -0
- rucio/db/sqla/migrate_repo/versions/16a0aca82e12_create_index_on_table_replicas_path.py +40 -0
- rucio/db/sqla/migrate_repo/versions/1803333ac20f_adding_provenance_and_phys_group.py +45 -0
- rucio/db/sqla/migrate_repo/versions/1a29d6a9504c_add_didtype_chck_to_requests.py +60 -0
- rucio/db/sqla/migrate_repo/versions/1a80adff031a_create_index_on_rules_hist_recent.py +40 -0
- rucio/db/sqla/migrate_repo/versions/1c45d9730ca6_increase_identity_length.py +140 -0
- rucio/db/sqla/migrate_repo/versions/1d1215494e95_add_quarantined_replicas_table.py +73 -0
- rucio/db/sqla/migrate_repo/versions/1d96f484df21_asynchronous_rules_and_rule_approval.py +74 -0
- rucio/db/sqla/migrate_repo/versions/1f46c5f240ac_add_bytes_column_to_bad_replicas.py +43 -0
- rucio/db/sqla/migrate_repo/versions/1fc15ab60d43_add_message_history_table.py +50 -0
- rucio/db/sqla/migrate_repo/versions/2190e703eb6e_move_rse_settings_to_rse_attributes.py +134 -0
- rucio/db/sqla/migrate_repo/versions/21d6b9dc9961_add_mismatch_scheme_state_to_requests.py +64 -0
- rucio/db/sqla/migrate_repo/versions/22cf51430c78_add_availability_column_to_table_rses.py +39 -0
- rucio/db/sqla/migrate_repo/versions/22d887e4ec0a_create_sources_table.py +64 -0
- rucio/db/sqla/migrate_repo/versions/25821a8a45a3_remove_unique_constraint_on_requests.py +51 -0
- rucio/db/sqla/migrate_repo/versions/25fc855625cf_added_unique_constraint_to_rules.py +41 -0
- rucio/db/sqla/migrate_repo/versions/269fee20dee9_add_repair_cnt_to_locks.py +43 -0
- rucio/db/sqla/migrate_repo/versions/271a46ea6244_add_ignore_availability_column_to_rules.py +44 -0
- rucio/db/sqla/migrate_repo/versions/277b5fbb41d3_switch_heartbeats_executable.py +53 -0
- rucio/db/sqla/migrate_repo/versions/27e3a68927fb_remove_replicas_tombstone_and_replicas_.py +38 -0
- rucio/db/sqla/migrate_repo/versions/2854cd9e168_added_rule_id_column.py +47 -0
- rucio/db/sqla/migrate_repo/versions/295289b5a800_processed_by_and__at_in_requests.py +45 -0
- rucio/db/sqla/migrate_repo/versions/2962ece31cf4_add_nbaccesses_column_in_the_did_table.py +45 -0
- rucio/db/sqla/migrate_repo/versions/2af3291ec4c_added_replicas_history_table.py +57 -0
- rucio/db/sqla/migrate_repo/versions/2b69addda658_add_columns_for_third_party_copy_read_.py +45 -0
- rucio/db/sqla/migrate_repo/versions/2b8e7bcb4783_add_config_table.py +69 -0
- rucio/db/sqla/migrate_repo/versions/2ba5229cb54c_add_submitted_at_to_requests_table.py +43 -0
- rucio/db/sqla/migrate_repo/versions/2cbee484dcf9_added_column_volume_to_rse_transfer_.py +42 -0
- rucio/db/sqla/migrate_repo/versions/2edee4a83846_add_source_to_requests_and_requests_.py +47 -0
- rucio/db/sqla/migrate_repo/versions/2eef46be23d4_change_tokens_pk.py +46 -0
- rucio/db/sqla/migrate_repo/versions/2f648fc909f3_index_in_rule_history_on_scope_name.py +40 -0
- rucio/db/sqla/migrate_repo/versions/3082b8cef557_add_naming_convention_table_and_closed_.py +67 -0
- rucio/db/sqla/migrate_repo/versions/30d5206e9cad_increase_oauthrequest_redirect_msg_.py +37 -0
- rucio/db/sqla/migrate_repo/versions/30fa38b6434e_add_index_on_service_column_in_the_message_table.py +44 -0
- rucio/db/sqla/migrate_repo/versions/3152492b110b_added_staging_area_column.py +77 -0
- rucio/db/sqla/migrate_repo/versions/32c7d2783f7e_create_bad_replicas_table.py +60 -0
- rucio/db/sqla/migrate_repo/versions/3345511706b8_replicas_table_pk_definition_is_in_.py +72 -0
- rucio/db/sqla/migrate_repo/versions/35ef10d1e11b_change_index_on_table_requests.py +42 -0
- rucio/db/sqla/migrate_repo/versions/379a19b5332d_create_rse_limits_table.py +65 -0
- rucio/db/sqla/migrate_repo/versions/384b96aa0f60_created_rule_history_tables.py +133 -0
- rucio/db/sqla/migrate_repo/versions/3ac1660a1a72_extend_distance_table.py +55 -0
- rucio/db/sqla/migrate_repo/versions/3ad36e2268b0_create_collection_replicas_updates_table.py +76 -0
- rucio/db/sqla/migrate_repo/versions/3c9df354071b_extend_waiting_request_state.py +60 -0
- rucio/db/sqla/migrate_repo/versions/3d9813fab443_add_a_new_state_lost_in_badfilesstatus.py +44 -0
- rucio/db/sqla/migrate_repo/versions/40ad39ce3160_add_transferred_at_to_requests_table.py +43 -0
- rucio/db/sqla/migrate_repo/versions/4207be2fd914_add_notification_column_to_rules.py +64 -0
- rucio/db/sqla/migrate_repo/versions/42db2617c364_create_index_on_requests_external_id.py +40 -0
- rucio/db/sqla/migrate_repo/versions/436827b13f82_added_column_activity_to_table_requests.py +43 -0
- rucio/db/sqla/migrate_repo/versions/44278720f774_update_requests_typ_sta_upd_idx_index.py +44 -0
- rucio/db/sqla/migrate_repo/versions/45378a1e76a8_create_collection_replica_table.py +78 -0
- rucio/db/sqla/migrate_repo/versions/469d262be19_removing_created_at_index.py +41 -0
- rucio/db/sqla/migrate_repo/versions/4783c1f49cb4_create_distance_table.py +59 -0
- rucio/db/sqla/migrate_repo/versions/49a21b4d4357_create_index_on_table_tokens.py +44 -0
- rucio/db/sqla/migrate_repo/versions/4a2cbedda8b9_add_source_replica_expression_column_to_.py +43 -0
- rucio/db/sqla/migrate_repo/versions/4a7182d9578b_added_bytes_length_accessed_at_columns.py +49 -0
- rucio/db/sqla/migrate_repo/versions/4bab9edd01fc_create_index_on_requests_rule_id.py +40 -0
- rucio/db/sqla/migrate_repo/versions/4c3a4acfe006_new_attr_account_table.py +63 -0
- rucio/db/sqla/migrate_repo/versions/4cf0a2e127d4_adding_transient_metadata.py +43 -0
- rucio/db/sqla/migrate_repo/versions/4df2c5ddabc0_remove_temporary_dids.py +55 -0
- rucio/db/sqla/migrate_repo/versions/50280c53117c_add_qos_class_to_rse.py +45 -0
- rucio/db/sqla/migrate_repo/versions/52153819589c_add_rse_id_to_replicas_table.py +43 -0
- rucio/db/sqla/migrate_repo/versions/52fd9f4916fa_added_activity_to_rules.py +43 -0
- rucio/db/sqla/migrate_repo/versions/53b479c3cb0f_fix_did_meta_table_missing_updated_at_.py +45 -0
- rucio/db/sqla/migrate_repo/versions/5673b4b6e843_add_wfms_metadata_to_rule_tables.py +47 -0
- rucio/db/sqla/migrate_repo/versions/575767d9f89_added_source_history_table.py +58 -0
- rucio/db/sqla/migrate_repo/versions/58bff7008037_add_started_at_to_requests.py +45 -0
- rucio/db/sqla/migrate_repo/versions/58c8b78301ab_rename_callback_to_message.py +106 -0
- rucio/db/sqla/migrate_repo/versions/5f139f77382a_added_child_rule_id_column.py +55 -0
- rucio/db/sqla/migrate_repo/versions/688ef1840840_adding_did_meta_table.py +50 -0
- rucio/db/sqla/migrate_repo/versions/6e572a9bfbf3_add_new_split_container_column_to_rules.py +47 -0
- rucio/db/sqla/migrate_repo/versions/70587619328_add_comment_column_for_subscriptions.py +43 -0
- rucio/db/sqla/migrate_repo/versions/739064d31565_remove_history_table_pks.py +41 -0
- rucio/db/sqla/migrate_repo/versions/7541902bf173_add_didsfollowed_and_followevents_table.py +91 -0
- rucio/db/sqla/migrate_repo/versions/7ec22226cdbf_new_replica_state_for_temporary_.py +72 -0
- rucio/db/sqla/migrate_repo/versions/810a41685bc1_added_columns_rse_transfer_limits.py +49 -0
- rucio/db/sqla/migrate_repo/versions/83f991c63a93_correct_rse_expression_length.py +43 -0
- rucio/db/sqla/migrate_repo/versions/8523998e2e76_increase_size_of_extended_attributes_.py +43 -0
- rucio/db/sqla/migrate_repo/versions/8ea9122275b1_adding_missing_function_based_indices.py +53 -0
- rucio/db/sqla/migrate_repo/versions/90f47792bb76_add_clob_payload_to_messages.py +45 -0
- rucio/db/sqla/migrate_repo/versions/914b8f02df38_new_table_for_lifetime_model_exceptions.py +68 -0
- rucio/db/sqla/migrate_repo/versions/94a5961ddbf2_add_estimator_columns.py +45 -0
- rucio/db/sqla/migrate_repo/versions/9a1b149a2044_add_saml_identity_type.py +94 -0
- rucio/db/sqla/migrate_repo/versions/9a45bc4ea66d_add_vp_table.py +54 -0
- rucio/db/sqla/migrate_repo/versions/9eb936a81eb1_true_is_true.py +72 -0
- rucio/db/sqla/migrate_repo/versions/a08fa8de1545_transfer_stats_table.py +55 -0
- rucio/db/sqla/migrate_repo/versions/a118956323f8_added_vo_table_and_vo_col_to_rse.py +76 -0
- rucio/db/sqla/migrate_repo/versions/a193a275255c_add_status_column_in_messages.py +47 -0
- rucio/db/sqla/migrate_repo/versions/a5f6f6e928a7_1_7_0.py +121 -0
- rucio/db/sqla/migrate_repo/versions/a616581ee47_added_columns_to_table_requests.py +59 -0
- rucio/db/sqla/migrate_repo/versions/a6eb23955c28_state_idx_non_functional.py +52 -0
- rucio/db/sqla/migrate_repo/versions/a74275a1ad30_added_global_quota_table.py +54 -0
- rucio/db/sqla/migrate_repo/versions/a93e4e47bda_heartbeats.py +64 -0
- rucio/db/sqla/migrate_repo/versions/ae2a56fcc89_added_comment_column_to_rules.py +49 -0
- rucio/db/sqla/migrate_repo/versions/b0070f3695c8_add_deletedidmeta_table.py +57 -0
- rucio/db/sqla/migrate_repo/versions/b4293a99f344_added_column_identity_to_table_tokens.py +43 -0
- rucio/db/sqla/migrate_repo/versions/b5493606bbf5_fix_primary_key_for_subscription_history.py +41 -0
- rucio/db/sqla/migrate_repo/versions/b7d287de34fd_removal_of_replicastate_source.py +91 -0
- rucio/db/sqla/migrate_repo/versions/b818052fa670_add_index_to_quarantined_replicas.py +40 -0
- rucio/db/sqla/migrate_repo/versions/b8caac94d7f0_add_comments_column_for_subscriptions_.py +43 -0
- rucio/db/sqla/migrate_repo/versions/b96a1c7e1cc4_new_bad_pfns_table_and_bad_replicas_.py +143 -0
- rucio/db/sqla/migrate_repo/versions/bb695f45c04_extend_request_state.py +76 -0
- rucio/db/sqla/migrate_repo/versions/bc68e9946deb_add_staging_timestamps_to_request.py +50 -0
- rucio/db/sqla/migrate_repo/versions/bf3baa1c1474_correct_pk_and_idx_for_history_tables.py +72 -0
- rucio/db/sqla/migrate_repo/versions/c0937668555f_add_qos_policy_map_table.py +55 -0
- rucio/db/sqla/migrate_repo/versions/c129ccdb2d5_add_lumiblocknr_to_dids.py +43 -0
- rucio/db/sqla/migrate_repo/versions/ccdbcd48206e_add_did_type_column_index_on_did_meta_.py +65 -0
- rucio/db/sqla/migrate_repo/versions/cebad904c4dd_new_payload_column_for_heartbeats.py +47 -0
- rucio/db/sqla/migrate_repo/versions/d1189a09c6e0_oauth2_0_and_jwt_feature_support_adding_.py +146 -0
- rucio/db/sqla/migrate_repo/versions/d23453595260_extend_request_state_for_preparer.py +104 -0
- rucio/db/sqla/migrate_repo/versions/d6dceb1de2d_added_purge_column_to_rules.py +44 -0
- rucio/db/sqla/migrate_repo/versions/d6e2c3b2cf26_remove_third_party_copy_column_from_rse.py +43 -0
- rucio/db/sqla/migrate_repo/versions/d91002c5841_new_account_limits_table.py +103 -0
- rucio/db/sqla/migrate_repo/versions/e138c364ebd0_extending_columns_for_filter_and_.py +49 -0
- rucio/db/sqla/migrate_repo/versions/e59300c8b179_support_for_archive.py +104 -0
- rucio/db/sqla/migrate_repo/versions/f1b14a8c2ac1_postgres_use_check_constraints.py +29 -0
- rucio/db/sqla/migrate_repo/versions/f41ffe206f37_oracle_global_temporary_tables.py +74 -0
- rucio/db/sqla/migrate_repo/versions/f85a2962b021_adding_transfertool_column_to_requests_.py +47 -0
- rucio/db/sqla/migrate_repo/versions/fa7a7d78b602_increase_refresh_token_size.py +43 -0
- rucio/db/sqla/migrate_repo/versions/fb28a95fe288_add_replicas_rse_id_tombstone_idx.py +37 -0
- rucio/db/sqla/migrate_repo/versions/fe1a65b176c9_set_third_party_copy_read_and_write_.py +43 -0
- rucio/db/sqla/migrate_repo/versions/fe8ea2fa9788_added_third_party_copy_column_to_rse_.py +43 -0
- rucio/db/sqla/models.py +1743 -0
- rucio/db/sqla/sautils.py +55 -0
- rucio/db/sqla/session.py +529 -0
- rucio/db/sqla/types.py +206 -0
- rucio/db/sqla/util.py +543 -0
- rucio/gateway/__init__.py +13 -0
- rucio/gateway/account.py +345 -0
- rucio/gateway/account_limit.py +363 -0
- rucio/gateway/authentication.py +381 -0
- rucio/gateway/config.py +227 -0
- rucio/gateway/credential.py +70 -0
- rucio/gateway/did.py +987 -0
- rucio/gateway/dirac.py +83 -0
- rucio/gateway/exporter.py +60 -0
- rucio/gateway/heartbeat.py +76 -0
- rucio/gateway/identity.py +189 -0
- rucio/gateway/importer.py +46 -0
- rucio/gateway/lifetime_exception.py +121 -0
- rucio/gateway/lock.py +153 -0
- rucio/gateway/meta_conventions.py +98 -0
- rucio/gateway/permission.py +74 -0
- rucio/gateway/quarantined_replica.py +79 -0
- rucio/gateway/replica.py +538 -0
- rucio/gateway/request.py +330 -0
- rucio/gateway/rse.py +632 -0
- rucio/gateway/rule.py +437 -0
- rucio/gateway/scope.py +100 -0
- rucio/gateway/subscription.py +280 -0
- rucio/gateway/vo.py +126 -0
- rucio/rse/__init__.py +96 -0
- rucio/rse/protocols/__init__.py +13 -0
- rucio/rse/protocols/bittorrent.py +194 -0
- rucio/rse/protocols/cache.py +111 -0
- rucio/rse/protocols/dummy.py +100 -0
- rucio/rse/protocols/gfal.py +708 -0
- rucio/rse/protocols/globus.py +243 -0
- rucio/rse/protocols/http_cache.py +82 -0
- rucio/rse/protocols/mock.py +123 -0
- rucio/rse/protocols/ngarc.py +209 -0
- rucio/rse/protocols/posix.py +250 -0
- rucio/rse/protocols/protocol.py +361 -0
- rucio/rse/protocols/rclone.py +365 -0
- rucio/rse/protocols/rfio.py +145 -0
- rucio/rse/protocols/srm.py +338 -0
- rucio/rse/protocols/ssh.py +414 -0
- rucio/rse/protocols/storm.py +195 -0
- rucio/rse/protocols/webdav.py +594 -0
- rucio/rse/protocols/xrootd.py +302 -0
- rucio/rse/rsemanager.py +881 -0
- rucio/rse/translation.py +260 -0
- rucio/tests/__init__.py +13 -0
- rucio/tests/common.py +280 -0
- rucio/tests/common_server.py +149 -0
- rucio/transfertool/__init__.py +13 -0
- rucio/transfertool/bittorrent.py +200 -0
- rucio/transfertool/bittorrent_driver.py +50 -0
- rucio/transfertool/bittorrent_driver_qbittorrent.py +134 -0
- rucio/transfertool/fts3.py +1600 -0
- rucio/transfertool/fts3_plugins.py +152 -0
- rucio/transfertool/globus.py +201 -0
- rucio/transfertool/globus_library.py +181 -0
- rucio/transfertool/mock.py +89 -0
- rucio/transfertool/transfertool.py +221 -0
- rucio/vcsversion.py +11 -0
- rucio/version.py +45 -0
- rucio/web/__init__.py +13 -0
- rucio/web/rest/__init__.py +13 -0
- rucio/web/rest/flaskapi/__init__.py +13 -0
- rucio/web/rest/flaskapi/authenticated_bp.py +27 -0
- rucio/web/rest/flaskapi/v1/__init__.py +13 -0
- rucio/web/rest/flaskapi/v1/accountlimits.py +236 -0
- rucio/web/rest/flaskapi/v1/accounts.py +1103 -0
- rucio/web/rest/flaskapi/v1/archives.py +102 -0
- rucio/web/rest/flaskapi/v1/auth.py +1644 -0
- rucio/web/rest/flaskapi/v1/common.py +426 -0
- rucio/web/rest/flaskapi/v1/config.py +304 -0
- rucio/web/rest/flaskapi/v1/credentials.py +213 -0
- rucio/web/rest/flaskapi/v1/dids.py +2340 -0
- rucio/web/rest/flaskapi/v1/dirac.py +116 -0
- rucio/web/rest/flaskapi/v1/export.py +75 -0
- rucio/web/rest/flaskapi/v1/heartbeats.py +127 -0
- rucio/web/rest/flaskapi/v1/identities.py +285 -0
- rucio/web/rest/flaskapi/v1/import.py +132 -0
- rucio/web/rest/flaskapi/v1/lifetime_exceptions.py +312 -0
- rucio/web/rest/flaskapi/v1/locks.py +358 -0
- rucio/web/rest/flaskapi/v1/main.py +91 -0
- rucio/web/rest/flaskapi/v1/meta_conventions.py +241 -0
- rucio/web/rest/flaskapi/v1/metrics.py +36 -0
- rucio/web/rest/flaskapi/v1/nongrid_traces.py +97 -0
- rucio/web/rest/flaskapi/v1/ping.py +88 -0
- rucio/web/rest/flaskapi/v1/redirect.py +366 -0
- rucio/web/rest/flaskapi/v1/replicas.py +1894 -0
- rucio/web/rest/flaskapi/v1/requests.py +998 -0
- rucio/web/rest/flaskapi/v1/rses.py +2250 -0
- rucio/web/rest/flaskapi/v1/rules.py +854 -0
- rucio/web/rest/flaskapi/v1/scopes.py +159 -0
- rucio/web/rest/flaskapi/v1/subscriptions.py +650 -0
- rucio/web/rest/flaskapi/v1/templates/auth_crash.html +80 -0
- rucio/web/rest/flaskapi/v1/templates/auth_granted.html +82 -0
- rucio/web/rest/flaskapi/v1/traces.py +137 -0
- rucio/web/rest/flaskapi/v1/types.py +20 -0
- rucio/web/rest/flaskapi/v1/vos.py +278 -0
- rucio/web/rest/main.py +18 -0
- rucio/web/rest/metrics.py +27 -0
- rucio/web/rest/ping.py +27 -0
- rucio-37.0.0rc1.data/data/rucio/etc/alembic.ini.template +71 -0
- rucio-37.0.0rc1.data/data/rucio/etc/alembic_offline.ini.template +74 -0
- rucio-37.0.0rc1.data/data/rucio/etc/globus-config.yml.template +5 -0
- rucio-37.0.0rc1.data/data/rucio/etc/ldap.cfg.template +30 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approval_request.tmpl +38 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_admin.tmpl +4 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_approved_user.tmpl +17 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_admin.tmpl +6 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_denied_user.tmpl +17 -0
- rucio-37.0.0rc1.data/data/rucio/etc/mail_templates/rule_ok_notification.tmpl +19 -0
- rucio-37.0.0rc1.data/data/rucio/etc/rse-accounts.cfg.template +25 -0
- rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.atlas.client.template +43 -0
- rucio-37.0.0rc1.data/data/rucio/etc/rucio.cfg.template +241 -0
- rucio-37.0.0rc1.data/data/rucio/etc/rucio_multi_vo.cfg.template +217 -0
- rucio-37.0.0rc1.data/data/rucio/requirements.server.txt +297 -0
- rucio-37.0.0rc1.data/data/rucio/tools/bootstrap.py +34 -0
- rucio-37.0.0rc1.data/data/rucio/tools/merge_rucio_configs.py +144 -0
- rucio-37.0.0rc1.data/data/rucio/tools/reset_database.py +40 -0
- rucio-37.0.0rc1.data/scripts/rucio +133 -0
- rucio-37.0.0rc1.data/scripts/rucio-abacus-account +74 -0
- rucio-37.0.0rc1.data/scripts/rucio-abacus-collection-replica +46 -0
- rucio-37.0.0rc1.data/scripts/rucio-abacus-rse +78 -0
- rucio-37.0.0rc1.data/scripts/rucio-admin +97 -0
- rucio-37.0.0rc1.data/scripts/rucio-atropos +60 -0
- rucio-37.0.0rc1.data/scripts/rucio-auditor +206 -0
- rucio-37.0.0rc1.data/scripts/rucio-automatix +50 -0
- rucio-37.0.0rc1.data/scripts/rucio-bb8 +57 -0
- rucio-37.0.0rc1.data/scripts/rucio-cache-client +141 -0
- rucio-37.0.0rc1.data/scripts/rucio-cache-consumer +42 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-finisher +58 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-poller +66 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-preparer +37 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-receiver +44 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-stager +76 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-submitter +139 -0
- rucio-37.0.0rc1.data/scripts/rucio-conveyor-throttler +104 -0
- rucio-37.0.0rc1.data/scripts/rucio-dark-reaper +53 -0
- rucio-37.0.0rc1.data/scripts/rucio-dumper +160 -0
- rucio-37.0.0rc1.data/scripts/rucio-follower +44 -0
- rucio-37.0.0rc1.data/scripts/rucio-hermes +54 -0
- rucio-37.0.0rc1.data/scripts/rucio-judge-cleaner +89 -0
- rucio-37.0.0rc1.data/scripts/rucio-judge-evaluator +137 -0
- rucio-37.0.0rc1.data/scripts/rucio-judge-injector +44 -0
- rucio-37.0.0rc1.data/scripts/rucio-judge-repairer +44 -0
- rucio-37.0.0rc1.data/scripts/rucio-kronos +44 -0
- rucio-37.0.0rc1.data/scripts/rucio-minos +53 -0
- rucio-37.0.0rc1.data/scripts/rucio-minos-temporary-expiration +50 -0
- rucio-37.0.0rc1.data/scripts/rucio-necromancer +120 -0
- rucio-37.0.0rc1.data/scripts/rucio-oauth-manager +63 -0
- rucio-37.0.0rc1.data/scripts/rucio-reaper +83 -0
- rucio-37.0.0rc1.data/scripts/rucio-replica-recoverer +248 -0
- rucio-37.0.0rc1.data/scripts/rucio-rse-decommissioner +66 -0
- rucio-37.0.0rc1.data/scripts/rucio-storage-consistency-actions +74 -0
- rucio-37.0.0rc1.data/scripts/rucio-transmogrifier +77 -0
- rucio-37.0.0rc1.data/scripts/rucio-undertaker +76 -0
- rucio-37.0.0rc1.dist-info/METADATA +92 -0
- rucio-37.0.0rc1.dist-info/RECORD +487 -0
- rucio-37.0.0rc1.dist-info/WHEEL +5 -0
- rucio-37.0.0rc1.dist-info/licenses/AUTHORS.rst +100 -0
- rucio-37.0.0rc1.dist-info/licenses/LICENSE +201 -0
- rucio-37.0.0rc1.dist-info/top_level.txt +1 -0
rucio/core/replica.py
ADDED
|
@@ -0,0 +1,4483 @@
|
|
|
1
|
+
# Copyright European Organization for Nuclear Research (CERN) since 2012
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import copy
|
|
16
|
+
import heapq
|
|
17
|
+
import logging
|
|
18
|
+
import math
|
|
19
|
+
import random
|
|
20
|
+
from collections import defaultdict, namedtuple
|
|
21
|
+
from curses.ascii import isprint
|
|
22
|
+
from datetime import datetime, timedelta
|
|
23
|
+
from hashlib import sha256
|
|
24
|
+
from itertools import groupby
|
|
25
|
+
from json import dumps
|
|
26
|
+
from re import match
|
|
27
|
+
from struct import unpack
|
|
28
|
+
from traceback import format_exc
|
|
29
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Union
|
|
30
|
+
|
|
31
|
+
import requests
|
|
32
|
+
from dogpile.cache.api import NO_VALUE
|
|
33
|
+
from sqlalchemy import and_, delete, exists, func, insert, not_, or_, union, update
|
|
34
|
+
from sqlalchemy.exc import DatabaseError, IntegrityError
|
|
35
|
+
from sqlalchemy.orm import aliased
|
|
36
|
+
from sqlalchemy.orm.exc import FlushError, NoResultFound
|
|
37
|
+
from sqlalchemy.sql.expression import ColumnElement, case, false, literal, literal_column, null, select, text, true
|
|
38
|
+
|
|
39
|
+
import rucio.core.did
|
|
40
|
+
import rucio.core.lock
|
|
41
|
+
from rucio.common import exception
|
|
42
|
+
from rucio.common.cache import MemcacheRegion
|
|
43
|
+
from rucio.common.config import config_get, config_get_bool
|
|
44
|
+
from rucio.common.constants import RseAttr, SuspiciousAvailability
|
|
45
|
+
from rucio.common.types import InternalAccount, InternalScope, LFNDict, is_str_list
|
|
46
|
+
from rucio.common.utils import add_url_query, chunks, clean_pfns, str_to_date
|
|
47
|
+
from rucio.core.credential import get_signed_url
|
|
48
|
+
from rucio.core.message import add_messages
|
|
49
|
+
from rucio.core.monitor import MetricManager
|
|
50
|
+
from rucio.core.rse import get_rse, get_rse_attribute, get_rse_name, get_rse_vo, list_rses
|
|
51
|
+
from rucio.core.rse_counter import decrease, increase
|
|
52
|
+
from rucio.core.rse_expression_parser import parse_expression
|
|
53
|
+
from rucio.db.sqla import filter_thread_work, models
|
|
54
|
+
from rucio.db.sqla.constants import OBSOLETE, BadFilesStatus, BadPFNStatus, DIDAvailability, DIDType, ReplicaState, RuleState
|
|
55
|
+
from rucio.db.sqla.session import BASE, DEFAULT_SCHEMA_NAME, read_session, stream_session, transactional_session
|
|
56
|
+
from rucio.db.sqla.util import temp_table_mngr
|
|
57
|
+
from rucio.rse import rsemanager as rsemgr
|
|
58
|
+
|
|
59
|
+
if TYPE_CHECKING:
|
|
60
|
+
from collections.abc import Iterable, Iterator, Mapping, Sequence
|
|
61
|
+
|
|
62
|
+
from sqlalchemy.engine import Row
|
|
63
|
+
from sqlalchemy.orm import Session
|
|
64
|
+
from sqlalchemy.sql.selectable import Select, Subquery
|
|
65
|
+
|
|
66
|
+
from rucio.common.types import LoggerFunction
|
|
67
|
+
from rucio.rse.protocols.protocol import RSEProtocol
|
|
68
|
+
|
|
69
|
+
REGION = MemcacheRegion(expiration_time=60)
|
|
70
|
+
METRICS = MetricManager(module=__name__)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
ScopeName = namedtuple('ScopeName', ['scope', 'name'])
|
|
74
|
+
Association = namedtuple('Association', ['scope', 'name', 'child_scope', 'child_name'])
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@read_session
|
|
78
|
+
def get_bad_replicas_summary(
|
|
79
|
+
rse_expression: Optional[str] = None,
|
|
80
|
+
from_date: Optional[datetime] = None,
|
|
81
|
+
to_date: Optional[datetime] = None,
|
|
82
|
+
filter_: Optional[dict[str, Any]] = None,
|
|
83
|
+
*,
|
|
84
|
+
session: "Session"
|
|
85
|
+
) -> list[dict[str, Any]]:
|
|
86
|
+
"""
|
|
87
|
+
List the bad file replicas summary. Method used by the rucio-ui.
|
|
88
|
+
:param rse_expression: The RSE expression.
|
|
89
|
+
:param from_date: The start date.
|
|
90
|
+
:param to_date: The end date.
|
|
91
|
+
:param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
|
|
92
|
+
:param session: The database session in use.
|
|
93
|
+
"""
|
|
94
|
+
result = []
|
|
95
|
+
incidents = {}
|
|
96
|
+
rse_clause = []
|
|
97
|
+
if rse_expression:
|
|
98
|
+
for rse in parse_expression(expression=rse_expression, filter_=filter_, session=session):
|
|
99
|
+
rse_clause.append(models.BadReplica.rse_id == rse['id'])
|
|
100
|
+
elif filter_:
|
|
101
|
+
# Ensure we limit results to current VO even if we don't specify an RSE expression
|
|
102
|
+
for rse in list_rses(filters=filter_, session=session):
|
|
103
|
+
rse_clause.append(models.BadReplica.rse_id == rse['id'])
|
|
104
|
+
|
|
105
|
+
if session.bind.dialect.name == 'oracle': # type: ignore
|
|
106
|
+
to_days = func.trunc(models.BadReplica.created_at, 'DD')
|
|
107
|
+
elif session.bind.dialect.name == 'mysql': # type: ignore
|
|
108
|
+
to_days = func.date(models.BadReplica.created_at)
|
|
109
|
+
elif session.bind.dialect.name == 'postgresql': # type: ignore
|
|
110
|
+
to_days = func.date_trunc('day', models.BadReplica.created_at)
|
|
111
|
+
else:
|
|
112
|
+
to_days = func.strftime(models.BadReplica.created_at, '%Y-%m-%d')
|
|
113
|
+
|
|
114
|
+
stmt = select(
|
|
115
|
+
func.count(),
|
|
116
|
+
to_days,
|
|
117
|
+
models.BadReplica.rse_id,
|
|
118
|
+
models.BadReplica.state,
|
|
119
|
+
models.BadReplica.reason
|
|
120
|
+
).select_from(
|
|
121
|
+
models.BadReplica
|
|
122
|
+
)
|
|
123
|
+
# To be added : HINTS
|
|
124
|
+
if rse_clause != []:
|
|
125
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
126
|
+
if from_date:
|
|
127
|
+
stmt = stmt.where(models.BadReplica.created_at > from_date)
|
|
128
|
+
if to_date:
|
|
129
|
+
stmt = stmt.where(models.BadReplica.created_at < to_date)
|
|
130
|
+
stmt = stmt.group_by(to_days, models.BadReplica.rse_id, models.BadReplica.reason, models.BadReplica.state)
|
|
131
|
+
for count, to_days, rse_id, state, reason in session.execute(stmt):
|
|
132
|
+
if (rse_id, to_days, reason) not in incidents:
|
|
133
|
+
incidents[(rse_id, to_days, reason)] = {}
|
|
134
|
+
incidents[(rse_id, to_days, reason)][str(state.name)] = count
|
|
135
|
+
|
|
136
|
+
for incident in incidents:
|
|
137
|
+
res = incidents[incident]
|
|
138
|
+
res['rse_id'] = incident[0]
|
|
139
|
+
res['rse'] = get_rse_name(rse_id=incident[0], session=session)
|
|
140
|
+
res['created_at'] = incident[1]
|
|
141
|
+
res['reason'] = incident[2]
|
|
142
|
+
result.append(res)
|
|
143
|
+
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@read_session
|
|
148
|
+
def __exist_replicas(
|
|
149
|
+
rse_id: str,
|
|
150
|
+
replicas: list[tuple[Optional[str], Optional[str], Optional[str]]],
|
|
151
|
+
*,
|
|
152
|
+
session: "Session"
|
|
153
|
+
) -> list[
|
|
154
|
+
tuple
|
|
155
|
+
[
|
|
156
|
+
str,
|
|
157
|
+
str,
|
|
158
|
+
str,
|
|
159
|
+
bool,
|
|
160
|
+
bool,
|
|
161
|
+
Optional[int]
|
|
162
|
+
]
|
|
163
|
+
]:
|
|
164
|
+
"""
|
|
165
|
+
Internal method to check if a replica exists at a given site.
|
|
166
|
+
:param rse_id: The RSE id.
|
|
167
|
+
:param replicas: A list of tuples [(<scope>, <name>, <path>}) with either :
|
|
168
|
+
- scope and name are None and path not None
|
|
169
|
+
- scope and name are not None and path is None
|
|
170
|
+
:param session: The database session in use.
|
|
171
|
+
|
|
172
|
+
:returns: A list of tuple (<scope>, <name>, <path>, <exists>, <already_declared>, <bytes>)
|
|
173
|
+
where
|
|
174
|
+
- <exists> is a boolean that identifies if the replica exists
|
|
175
|
+
- <already_declared> is a boolean that identifies if the replica is already declared bad
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
return_list = []
|
|
179
|
+
path_clause: list[ColumnElement[bool]] = []
|
|
180
|
+
did_clause: list[ColumnElement[bool]] = []
|
|
181
|
+
for scope, name, path in replicas:
|
|
182
|
+
if path:
|
|
183
|
+
path_clause.append(models.RSEFileAssociation.path == path)
|
|
184
|
+
if path.startswith('/'):
|
|
185
|
+
path_clause.append(models.RSEFileAssociation.path == path[1:])
|
|
186
|
+
else:
|
|
187
|
+
path_clause.append(models.RSEFileAssociation.path == '/%s' % path)
|
|
188
|
+
else:
|
|
189
|
+
did_clause.append(and_(models.RSEFileAssociation.scope == scope,
|
|
190
|
+
models.RSEFileAssociation.name == name))
|
|
191
|
+
|
|
192
|
+
for clause in [path_clause, did_clause]:
|
|
193
|
+
if clause:
|
|
194
|
+
for chunk in chunks(clause, 10):
|
|
195
|
+
stmt = select(
|
|
196
|
+
models.RSEFileAssociation.path,
|
|
197
|
+
models.RSEFileAssociation.scope,
|
|
198
|
+
models.RSEFileAssociation.name,
|
|
199
|
+
models.RSEFileAssociation.rse_id,
|
|
200
|
+
models.RSEFileAssociation.bytes,
|
|
201
|
+
func.max(
|
|
202
|
+
case(
|
|
203
|
+
(models.BadReplica.state == BadFilesStatus.SUSPICIOUS, 0),
|
|
204
|
+
(models.BadReplica.state == BadFilesStatus.BAD, 1),
|
|
205
|
+
else_=0))
|
|
206
|
+
).with_hint(
|
|
207
|
+
models.RSEFileAssociation,
|
|
208
|
+
'INDEX(REPLICAS REPLICAS_PATH_IDX',
|
|
209
|
+
'oracle'
|
|
210
|
+
).outerjoin(
|
|
211
|
+
models.BadReplica,
|
|
212
|
+
and_(models.RSEFileAssociation.scope == models.BadReplica.scope,
|
|
213
|
+
models.RSEFileAssociation.name == models.BadReplica.name,
|
|
214
|
+
models.RSEFileAssociation.rse_id == models.BadReplica.rse_id)
|
|
215
|
+
).where(
|
|
216
|
+
and_(models.RSEFileAssociation.rse_id == rse_id,
|
|
217
|
+
or_(*chunk))
|
|
218
|
+
).group_by(
|
|
219
|
+
models.RSEFileAssociation.path,
|
|
220
|
+
models.RSEFileAssociation.scope,
|
|
221
|
+
models.RSEFileAssociation.name,
|
|
222
|
+
models.RSEFileAssociation.rse_id,
|
|
223
|
+
models.RSEFileAssociation.bytes
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
for path, scope, name, rse_id, size, state in session.execute(stmt).all():
|
|
227
|
+
if (scope, name, path) in replicas:
|
|
228
|
+
replicas.remove((scope, name, path))
|
|
229
|
+
if (None, None, path) in replicas:
|
|
230
|
+
replicas.remove((None, None, path))
|
|
231
|
+
if (scope, name, None) in replicas:
|
|
232
|
+
replicas.remove((scope, name, None))
|
|
233
|
+
already_declared = False
|
|
234
|
+
if state == 1:
|
|
235
|
+
already_declared = True
|
|
236
|
+
return_list.append((scope, name, path, True, already_declared, size))
|
|
237
|
+
|
|
238
|
+
for scope, name, path in replicas:
|
|
239
|
+
return_list.append((scope, name, path, False, False, None))
|
|
240
|
+
|
|
241
|
+
return return_list
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@read_session
|
|
245
|
+
def list_bad_replicas_status(
|
|
246
|
+
state: BadFilesStatus = BadFilesStatus.BAD,
|
|
247
|
+
rse_id: Optional[str] = None,
|
|
248
|
+
younger_than: Optional[datetime] = None,
|
|
249
|
+
older_than: Optional[datetime] = None,
|
|
250
|
+
limit: Optional[int] = None,
|
|
251
|
+
list_pfns: Optional[bool] = False,
|
|
252
|
+
vo: str = 'def',
|
|
253
|
+
*,
|
|
254
|
+
session: "Session"
|
|
255
|
+
) -> list[dict[str, Any]]:
|
|
256
|
+
"""
|
|
257
|
+
List the bad file replicas history states. Method used by the rucio-ui.
|
|
258
|
+
:param state: The state of the file (SUSPICIOUS or BAD).
|
|
259
|
+
:param rse_id: The RSE id.
|
|
260
|
+
:param younger_than: datetime object to select bad replicas younger than this date.
|
|
261
|
+
:param older_than: datetime object to select bad replicas older than this date.
|
|
262
|
+
:param limit: The maximum number of replicas returned.
|
|
263
|
+
:param vo: The VO to find replicas from.
|
|
264
|
+
:param session: The database session in use.
|
|
265
|
+
"""
|
|
266
|
+
result = []
|
|
267
|
+
stmt = select(
|
|
268
|
+
models.BadReplica.scope,
|
|
269
|
+
models.BadReplica.name,
|
|
270
|
+
models.BadReplica.rse_id,
|
|
271
|
+
models.BadReplica.state,
|
|
272
|
+
models.BadReplica.created_at,
|
|
273
|
+
models.BadReplica.updated_at
|
|
274
|
+
)
|
|
275
|
+
if state:
|
|
276
|
+
stmt = stmt.where(models.BadReplica.state == state)
|
|
277
|
+
if rse_id:
|
|
278
|
+
stmt = stmt.where(models.BadReplica.rse_id == rse_id)
|
|
279
|
+
if younger_than:
|
|
280
|
+
stmt = stmt.where(models.BadReplica.created_at >= younger_than)
|
|
281
|
+
if older_than:
|
|
282
|
+
stmt = stmt.where(models.BadReplica.created_at <= older_than)
|
|
283
|
+
if limit:
|
|
284
|
+
stmt = stmt.limit(limit)
|
|
285
|
+
|
|
286
|
+
for badfile in session.execute(stmt).yield_per(1000):
|
|
287
|
+
if badfile.scope.vo == vo:
|
|
288
|
+
if list_pfns:
|
|
289
|
+
result.append({'scope': badfile.scope, 'name': badfile.name, 'type': DIDType.FILE})
|
|
290
|
+
else:
|
|
291
|
+
result.append({'scope': badfile.scope, 'name': badfile.name, 'rse': get_rse_name(rse_id=badfile.rse_id, session=session), 'rse_id': badfile.rse_id, 'state': badfile.state, 'created_at': badfile.created_at, 'updated_at': badfile.updated_at})
|
|
292
|
+
if list_pfns:
|
|
293
|
+
reps = []
|
|
294
|
+
for rep in list_replicas(result, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=True, session=session):
|
|
295
|
+
pfn = None
|
|
296
|
+
if rse_id in rep['rses'] and rep['rses'][rse_id]:
|
|
297
|
+
pfn = rep['rses'][rse_id][0]
|
|
298
|
+
if pfn and pfn not in reps:
|
|
299
|
+
reps.append(pfn)
|
|
300
|
+
else:
|
|
301
|
+
reps.extend([item for row in rep['rses'].values() for item in row])
|
|
302
|
+
list(set(reps))
|
|
303
|
+
result = reps
|
|
304
|
+
return result
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@transactional_session
|
|
308
|
+
def __declare_bad_file_replicas(
|
|
309
|
+
pfns: list[Union[str, dict[str, Any]]],
|
|
310
|
+
rse_id: str,
|
|
311
|
+
reason: str,
|
|
312
|
+
issuer: InternalAccount,
|
|
313
|
+
status: BadFilesStatus = BadFilesStatus.BAD,
|
|
314
|
+
scheme: str = 'srm',
|
|
315
|
+
force: bool = False,
|
|
316
|
+
logger: "LoggerFunction" = logging.log,
|
|
317
|
+
*,
|
|
318
|
+
session: "Session"
|
|
319
|
+
) -> list[str]:
|
|
320
|
+
"""
|
|
321
|
+
Declare a list of bad replicas.
|
|
322
|
+
|
|
323
|
+
:param pfns: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
|
|
324
|
+
:param rse_id: The RSE id.
|
|
325
|
+
:param reason: The reason of the loss.
|
|
326
|
+
:param issuer: The issuer account.
|
|
327
|
+
:param status: Either BAD or SUSPICIOUS.
|
|
328
|
+
:param scheme: The scheme of the PFNs.
|
|
329
|
+
:param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
|
|
330
|
+
:param session: The database session in use.
|
|
331
|
+
"""
|
|
332
|
+
unknown_replicas: list[str] = []
|
|
333
|
+
replicas: list[dict[str, Any]] = []
|
|
334
|
+
path_pfn_dict: dict[str, str] = {}
|
|
335
|
+
|
|
336
|
+
if len(pfns) > 0 and is_str_list(pfns):
|
|
337
|
+
# If pfns is a list of PFNs, the scope and names need to be extracted from the path
|
|
338
|
+
rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
|
|
339
|
+
proto = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
|
|
340
|
+
if rse_info['deterministic']:
|
|
341
|
+
scope_proto = rsemgr.get_scope_protocol(vo=issuer.vo)
|
|
342
|
+
parsed_pfn = proto.parse_pfns(pfns=pfns)
|
|
343
|
+
for pfn in parsed_pfn:
|
|
344
|
+
# Translate into a scope and name
|
|
345
|
+
name, scope = scope_proto(parsed_pfn[pfn])
|
|
346
|
+
|
|
347
|
+
scope = InternalScope(scope, vo=issuer.vo)
|
|
348
|
+
replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': status})
|
|
349
|
+
path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
|
|
350
|
+
path_pfn_dict[path] = pfn
|
|
351
|
+
logger(logging.DEBUG, f"Declaring replica {scope}:{name} {status} at {rse_id} with path {path}")
|
|
352
|
+
|
|
353
|
+
else:
|
|
354
|
+
# For non-deterministic RSEs use the path + rse_id to extract the scope
|
|
355
|
+
parsed_pfn = proto.parse_pfns(pfns=pfns)
|
|
356
|
+
for pfn in parsed_pfn:
|
|
357
|
+
path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
|
|
358
|
+
replicas.append({'scope': None, 'name': None, 'rse_id': rse_id, 'path': path, 'state': status})
|
|
359
|
+
path_pfn_dict[path] = pfn
|
|
360
|
+
|
|
361
|
+
logger(logging.DEBUG, f"Declaring replica with pfn: {pfn} {status} at {rse_id} with path {path}")
|
|
362
|
+
|
|
363
|
+
else:
|
|
364
|
+
# If pfns is a list of replicas, just use scope, name and rse_id
|
|
365
|
+
for pfn in pfns:
|
|
366
|
+
replicas.append({'scope': pfn['scope'], 'name': pfn['name'], 'rse_id': rse_id, 'state': status}) # type: ignore
|
|
367
|
+
logger(logging.DEBUG, f"Declaring replica {pfn['scope']}:{pfn['name']} {status} at {rse_id} without path") # type: ignore
|
|
368
|
+
|
|
369
|
+
replicas_list = []
|
|
370
|
+
for replica in replicas:
|
|
371
|
+
scope, name, rse_id, path = replica['scope'], replica['name'], replica['rse_id'], replica.get('path', None)
|
|
372
|
+
replicas_list.append((scope, name, path))
|
|
373
|
+
|
|
374
|
+
bad_replicas_to_update = []
|
|
375
|
+
|
|
376
|
+
for scope, name, path, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
|
|
377
|
+
declared = False
|
|
378
|
+
|
|
379
|
+
if __exists:
|
|
380
|
+
|
|
381
|
+
if status == BadFilesStatus.BAD and (force or not already_declared):
|
|
382
|
+
bad_replicas_to_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
|
|
383
|
+
declared = True
|
|
384
|
+
|
|
385
|
+
if status == BadFilesStatus.SUSPICIOUS or status == BadFilesStatus.BAD and not already_declared:
|
|
386
|
+
new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=status, account=issuer, bytes=size)
|
|
387
|
+
new_bad_replica.save(session=session, flush=False)
|
|
388
|
+
declared = True
|
|
389
|
+
|
|
390
|
+
if not declared:
|
|
391
|
+
if already_declared:
|
|
392
|
+
unknown_replicas.append('%s %s' % (path_pfn_dict.get(path, '%s:%s' % (scope, name)), 'Already declared'))
|
|
393
|
+
elif path:
|
|
394
|
+
no_hidden_char = True
|
|
395
|
+
for char in str(path):
|
|
396
|
+
if not isprint(char):
|
|
397
|
+
unknown_replicas.append('%s %s' % (path, 'PFN contains hidden chars'))
|
|
398
|
+
no_hidden_char = False
|
|
399
|
+
break
|
|
400
|
+
if no_hidden_char:
|
|
401
|
+
pfn = path_pfn_dict[path]
|
|
402
|
+
if f"{pfn} Unknown replica" not in unknown_replicas:
|
|
403
|
+
unknown_replicas.append('%s %s' % (pfn, 'Unknown replica'))
|
|
404
|
+
elif scope or name:
|
|
405
|
+
unknown_replicas.append(f"{(scope,name)} Unknown replica")
|
|
406
|
+
|
|
407
|
+
if status == BadFilesStatus.BAD:
|
|
408
|
+
# For BAD file, we modify the replica state, not for suspicious
|
|
409
|
+
try:
|
|
410
|
+
# there shouldn't be any exceptions since all replicas exist
|
|
411
|
+
update_replicas_states(bad_replicas_to_update, session=session)
|
|
412
|
+
except exception.UnsupportedOperation:
|
|
413
|
+
raise exception.ReplicaNotFound("One or several replicas don't exist.")
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
session.flush()
|
|
417
|
+
except IntegrityError as error:
|
|
418
|
+
raise exception.RucioException(error.args)
|
|
419
|
+
except DatabaseError as error:
|
|
420
|
+
raise exception.RucioException(error.args)
|
|
421
|
+
except FlushError as error:
|
|
422
|
+
raise exception.RucioException(error.args)
|
|
423
|
+
|
|
424
|
+
return unknown_replicas
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@transactional_session
|
|
428
|
+
def add_bad_dids(
|
|
429
|
+
dids: "Iterable[dict[str, Any]]",
|
|
430
|
+
rse_id: str,
|
|
431
|
+
reason: str,
|
|
432
|
+
issuer: InternalAccount,
|
|
433
|
+
state: BadFilesStatus = BadFilesStatus.BAD,
|
|
434
|
+
*,
|
|
435
|
+
session: "Session"
|
|
436
|
+
) -> list[str]:
|
|
437
|
+
"""
|
|
438
|
+
Declare a list of bad replicas.
|
|
439
|
+
|
|
440
|
+
:param dids: The list of DIDs.
|
|
441
|
+
:param rse_id: The RSE id.
|
|
442
|
+
:param reason: The reason of the loss.
|
|
443
|
+
:param issuer: The issuer account.
|
|
444
|
+
:param state: BadFilesStatus.BAD
|
|
445
|
+
:param session: The database session in use.
|
|
446
|
+
"""
|
|
447
|
+
unknown_replicas = []
|
|
448
|
+
replicas_for_update = []
|
|
449
|
+
replicas_list = []
|
|
450
|
+
|
|
451
|
+
for did in dids:
|
|
452
|
+
scope = InternalScope(did['scope'], vo=issuer.vo)
|
|
453
|
+
name = did['name']
|
|
454
|
+
replicas_list.append((scope, name, None))
|
|
455
|
+
|
|
456
|
+
for scope, name, _, __exists, already_declared, size in __exist_replicas(rse_id=rse_id, replicas=replicas_list, session=session):
|
|
457
|
+
if __exists and not already_declared:
|
|
458
|
+
replicas_for_update.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD})
|
|
459
|
+
new_bad_replica = models.BadReplica(scope=scope, name=name, rse_id=rse_id, reason=reason, state=state,
|
|
460
|
+
account=issuer, bytes=size)
|
|
461
|
+
new_bad_replica.save(session=session, flush=False)
|
|
462
|
+
stmt = delete(
|
|
463
|
+
models.Source
|
|
464
|
+
).where(
|
|
465
|
+
and_(models.Source.scope == scope,
|
|
466
|
+
models.Source.name == name,
|
|
467
|
+
models.Source.rse_id == rse_id)
|
|
468
|
+
).execution_options(
|
|
469
|
+
synchronize_session=False
|
|
470
|
+
)
|
|
471
|
+
session.execute(stmt)
|
|
472
|
+
else:
|
|
473
|
+
if already_declared:
|
|
474
|
+
unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Already declared'))
|
|
475
|
+
else:
|
|
476
|
+
unknown_replicas.append('%s:%s %s' % (did['scope'], name, 'Unknown replica'))
|
|
477
|
+
|
|
478
|
+
if state == BadFilesStatus.BAD:
|
|
479
|
+
try:
|
|
480
|
+
update_replicas_states(replicas_for_update, session=session)
|
|
481
|
+
except exception.UnsupportedOperation:
|
|
482
|
+
raise exception.ReplicaNotFound("One or several replicas don't exist.")
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
session.flush()
|
|
486
|
+
except (IntegrityError, DatabaseError, FlushError) as error:
|
|
487
|
+
raise exception.RucioException(error.args)
|
|
488
|
+
|
|
489
|
+
return unknown_replicas
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
@transactional_session
|
|
493
|
+
def declare_bad_file_replicas(
|
|
494
|
+
replicas: list[Union[str, dict[str, Any]]],
|
|
495
|
+
reason: str,
|
|
496
|
+
issuer: InternalAccount,
|
|
497
|
+
status: BadFilesStatus = BadFilesStatus.BAD,
|
|
498
|
+
force: bool = False,
|
|
499
|
+
*,
|
|
500
|
+
session: "Session"
|
|
501
|
+
) -> dict[str, list[str]]:
|
|
502
|
+
"""
|
|
503
|
+
Declare a list of bad replicas.
|
|
504
|
+
|
|
505
|
+
:param replicas: Either a list of PFNs (string) or a list of replicas {'scope': <scope>, 'name': <name>, 'rse_id': <rse_id>}.
|
|
506
|
+
:param reason: The reason of the loss.
|
|
507
|
+
:param issuer: The issuer account.
|
|
508
|
+
:param status: The status of the file (SUSPICIOUS or BAD).
|
|
509
|
+
:param force: boolean, if declaring BAD replica, ignore existing replica status in the bad_replicas table. Default: False
|
|
510
|
+
:param session: The database session in use.
|
|
511
|
+
:returns: Dictionary {rse_id -> [replicas failed to declare with errors]}
|
|
512
|
+
"""
|
|
513
|
+
unknown_replicas: dict[str, list[str]] = {}
|
|
514
|
+
if replicas:
|
|
515
|
+
type_ = type(replicas[0])
|
|
516
|
+
files_to_declare = {}
|
|
517
|
+
scheme = None
|
|
518
|
+
for replica in replicas:
|
|
519
|
+
if not isinstance(replica, type_):
|
|
520
|
+
raise exception.InvalidType('Replicas must be specified either as a list of string or a list of dicts')
|
|
521
|
+
if type_ is str:
|
|
522
|
+
scheme, files_to_declare, unknown_replicas = get_pfn_to_rse(replicas, vo=issuer.vo, session=session)
|
|
523
|
+
else:
|
|
524
|
+
for replica in replicas:
|
|
525
|
+
rse_id = replica['rse_id'] # type: ignore
|
|
526
|
+
files_to_declare.setdefault(rse_id, []).append(replica)
|
|
527
|
+
for rse_id in files_to_declare:
|
|
528
|
+
notdeclared = __declare_bad_file_replicas(files_to_declare[rse_id], rse_id, reason, issuer,
|
|
529
|
+
status=status, scheme=scheme,
|
|
530
|
+
force=force, session=session)
|
|
531
|
+
if notdeclared:
|
|
532
|
+
unknown_replicas[rse_id] = notdeclared
|
|
533
|
+
return unknown_replicas
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
@read_session
|
|
537
|
+
def get_pfn_to_rse(
|
|
538
|
+
pfns: "Iterable[str]",
|
|
539
|
+
vo: str = 'def',
|
|
540
|
+
*,
|
|
541
|
+
session: "Session"
|
|
542
|
+
) -> tuple[Optional[str], dict[str, Any], dict[str, list[str]]]:
|
|
543
|
+
"""
|
|
544
|
+
Get the RSE associated to a list of PFNs.
|
|
545
|
+
|
|
546
|
+
:param pfns: The list of pfn.
|
|
547
|
+
:param vo: The VO to find RSEs at.
|
|
548
|
+
:param session: The database session in use.
|
|
549
|
+
|
|
550
|
+
:returns: a tuple : scheme, {rse1 : [pfn1, pfn2, ...], rse2: [pfn3, pfn4, ...]}, {'unknown': [pfn5, pfn6, ...]}.
|
|
551
|
+
"""
|
|
552
|
+
unknown_replicas = {}
|
|
553
|
+
storage_elements = []
|
|
554
|
+
se_condition = []
|
|
555
|
+
dict_rse = {}
|
|
556
|
+
cleaned_pfns = clean_pfns(pfns)
|
|
557
|
+
scheme = cleaned_pfns[0].split(':')[0] if cleaned_pfns else None
|
|
558
|
+
for pfn in cleaned_pfns:
|
|
559
|
+
if pfn.split(':')[0] != scheme:
|
|
560
|
+
raise exception.InvalidType('The PFNs specified must have the same protocol')
|
|
561
|
+
|
|
562
|
+
split_se = pfn.split('/')[2].split(':')
|
|
563
|
+
storage_element = split_se[0]
|
|
564
|
+
|
|
565
|
+
if storage_element not in storage_elements:
|
|
566
|
+
storage_elements.append(storage_element)
|
|
567
|
+
se_condition.append(models.RSEProtocol.hostname == storage_element)
|
|
568
|
+
stmt = select(
|
|
569
|
+
models.RSEProtocol.rse_id,
|
|
570
|
+
models.RSEProtocol.scheme,
|
|
571
|
+
models.RSEProtocol.hostname,
|
|
572
|
+
models.RSEProtocol.port,
|
|
573
|
+
models.RSEProtocol.prefix
|
|
574
|
+
).join(
|
|
575
|
+
models.RSE,
|
|
576
|
+
models.RSEProtocol.rse_id == models.RSE.id
|
|
577
|
+
).where(
|
|
578
|
+
and_(or_(*se_condition),
|
|
579
|
+
models.RSEProtocol.scheme == scheme,
|
|
580
|
+
models.RSE.deleted == false(),
|
|
581
|
+
models.RSE.staging_area == false())
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
protocols = {}
|
|
585
|
+
|
|
586
|
+
for rse_id, protocol, hostname, port, prefix in session.execute(stmt).yield_per(10000):
|
|
587
|
+
if rse_id not in protocols:
|
|
588
|
+
protocols[rse_id] = []
|
|
589
|
+
protocols[rse_id].append('%s://%s:%s%s' % (protocol, hostname, port, prefix))
|
|
590
|
+
if '%s://%s%s' % (protocol, hostname, prefix) not in protocols[rse_id]:
|
|
591
|
+
protocols[rse_id].append('%s://%s%s' % (protocol, hostname, prefix))
|
|
592
|
+
hint = None
|
|
593
|
+
for pfn in cleaned_pfns:
|
|
594
|
+
if hint:
|
|
595
|
+
for pattern in protocols[hint]:
|
|
596
|
+
if pfn.find(pattern) > -1:
|
|
597
|
+
dict_rse[hint].append(pfn)
|
|
598
|
+
else:
|
|
599
|
+
mult_rse_match = 0
|
|
600
|
+
for rse_id in protocols:
|
|
601
|
+
for pattern in protocols[rse_id]:
|
|
602
|
+
if pfn.find(pattern) > -1 and get_rse_vo(rse_id=rse_id, session=session) == vo:
|
|
603
|
+
mult_rse_match += 1
|
|
604
|
+
if mult_rse_match > 1:
|
|
605
|
+
print('ERROR, multiple matches : %s at %s' % (pfn, rse_id))
|
|
606
|
+
raise exception.RucioException('ERROR, multiple matches : %s at %s' % (pfn, get_rse_name(rse_id=rse_id, session=session)))
|
|
607
|
+
hint = rse_id
|
|
608
|
+
if hint not in dict_rse:
|
|
609
|
+
dict_rse[hint] = []
|
|
610
|
+
dict_rse[hint].append(pfn)
|
|
611
|
+
if mult_rse_match == 0:
|
|
612
|
+
if 'unknown' not in unknown_replicas:
|
|
613
|
+
unknown_replicas['unknown'] = []
|
|
614
|
+
unknown_replicas['unknown'].append(pfn)
|
|
615
|
+
return scheme, dict_rse, unknown_replicas
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
@read_session
|
|
619
|
+
def get_bad_replicas_backlog(
|
|
620
|
+
*,
|
|
621
|
+
session: "Session"
|
|
622
|
+
) -> dict[str, int]:
|
|
623
|
+
"""
|
|
624
|
+
Get the replica backlog by RSE.
|
|
625
|
+
|
|
626
|
+
:param session: The database session in use.
|
|
627
|
+
|
|
628
|
+
:returns: a list of dictionary {'rse_id': cnt_bad_replicas}.
|
|
629
|
+
"""
|
|
630
|
+
stmt = select(
|
|
631
|
+
func.count(),
|
|
632
|
+
models.RSEFileAssociation.rse_id
|
|
633
|
+
).select_from(
|
|
634
|
+
models.RSEFileAssociation
|
|
635
|
+
).with_hint(
|
|
636
|
+
models.RSEFileAssociation,
|
|
637
|
+
'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
|
|
638
|
+
'oracle'
|
|
639
|
+
).join(
|
|
640
|
+
models.DataIdentifier,
|
|
641
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
|
|
642
|
+
models.RSEFileAssociation.name == models.DataIdentifier.name)
|
|
643
|
+
).where(
|
|
644
|
+
and_(models.DataIdentifier.availability != DIDAvailability.LOST,
|
|
645
|
+
models.RSEFileAssociation.state == ReplicaState.BAD)
|
|
646
|
+
).group_by(
|
|
647
|
+
models.RSEFileAssociation.rse_id
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
result = dict()
|
|
651
|
+
for cnt, rse_id in session.execute(stmt).all():
|
|
652
|
+
result[rse_id] = cnt
|
|
653
|
+
return result
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
@read_session
|
|
657
|
+
def list_bad_replicas(
|
|
658
|
+
limit: int = 10000,
|
|
659
|
+
thread: Optional[int] = None,
|
|
660
|
+
total_threads: Optional[int] = None,
|
|
661
|
+
rses: Optional['Iterable[dict[str, Any]]'] = None,
|
|
662
|
+
*,
|
|
663
|
+
session: "Session"
|
|
664
|
+
) -> list[dict[str, Any]]:
|
|
665
|
+
"""
|
|
666
|
+
List RSE File replicas with no locks.
|
|
667
|
+
|
|
668
|
+
:param limit: The maximum number of replicas returned.
|
|
669
|
+
:param thread: The assigned thread for this necromancer.
|
|
670
|
+
:param total_threads: The total number of threads of all necromancers.
|
|
671
|
+
:param session: The database session in use.
|
|
672
|
+
|
|
673
|
+
:returns: a list of dictionary {'scope' scope, 'name': name, 'rse_id': rse_id, 'rse': rse}.
|
|
674
|
+
"""
|
|
675
|
+
schema_dot = '%s.' % DEFAULT_SCHEMA_NAME if DEFAULT_SCHEMA_NAME else ''
|
|
676
|
+
|
|
677
|
+
stmt = select(
|
|
678
|
+
models.RSEFileAssociation.scope,
|
|
679
|
+
models.RSEFileAssociation.name,
|
|
680
|
+
models.RSEFileAssociation.rse_id
|
|
681
|
+
).with_hint(
|
|
682
|
+
models.RSEFileAssociation,
|
|
683
|
+
'INDEX(DIDS DIDS_PK) USE_NL(DIDS) INDEX_RS_ASC(REPLICAS ("REPLICAS"."STATE"))',
|
|
684
|
+
'oracle'
|
|
685
|
+
).where(
|
|
686
|
+
models.RSEFileAssociation.state == ReplicaState.BAD
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='%sreplicas.name' % (schema_dot))
|
|
690
|
+
|
|
691
|
+
stmt = stmt.join(
|
|
692
|
+
models.DataIdentifier,
|
|
693
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
|
|
694
|
+
models.RSEFileAssociation.name == models.DataIdentifier.name)
|
|
695
|
+
).where(
|
|
696
|
+
models.DataIdentifier.availability != DIDAvailability.LOST
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
if rses:
|
|
700
|
+
rse_clause = [models.RSEFileAssociation.rse_id == rse['id'] for rse in rses]
|
|
701
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
702
|
+
|
|
703
|
+
stmt = stmt.limit(limit)
|
|
704
|
+
rows = []
|
|
705
|
+
for scope, name, rse_id in session.execute(stmt).yield_per(1000):
|
|
706
|
+
rows.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'rse': get_rse_name(rse_id=rse_id, session=session)})
|
|
707
|
+
return rows
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
@stream_session
|
|
711
|
+
def get_did_from_pfns(
|
|
712
|
+
pfns: "Iterable[str]",
|
|
713
|
+
rse_id: Optional[str] = None,
|
|
714
|
+
vo: str = 'def',
|
|
715
|
+
*,
|
|
716
|
+
session: "Session"
|
|
717
|
+
) -> 'Iterator[dict[str, dict[str, Any]]]':
|
|
718
|
+
"""
|
|
719
|
+
Get the DIDs associated to a PFN on one given RSE
|
|
720
|
+
|
|
721
|
+
:param pfns: The list of PFNs.
|
|
722
|
+
:param rse_id: The RSE id.
|
|
723
|
+
:param vo: The VO to get DIDs from.
|
|
724
|
+
:param session: The database session in use.
|
|
725
|
+
:returns: A dictionary {pfn: {'scope': scope, 'name': name}}
|
|
726
|
+
"""
|
|
727
|
+
dict_rse = {}
|
|
728
|
+
if not rse_id:
|
|
729
|
+
scheme, dict_rse, unknown_replicas = get_pfn_to_rse(pfns, vo=vo, session=session)
|
|
730
|
+
if unknown_replicas:
|
|
731
|
+
raise Exception
|
|
732
|
+
else:
|
|
733
|
+
scheme = 'srm'
|
|
734
|
+
dict_rse[rse_id] = pfns
|
|
735
|
+
for rse_id in dict_rse:
|
|
736
|
+
pfns = dict_rse[rse_id]
|
|
737
|
+
rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
|
|
738
|
+
pfndict = {}
|
|
739
|
+
proto: RSEProtocol = rsemgr.create_protocol(rse_info, 'read', scheme=scheme)
|
|
740
|
+
if rse_info['deterministic']:
|
|
741
|
+
scope_proto = rsemgr.get_scope_protocol(vo=vo)
|
|
742
|
+
parsed_pfn = proto.parse_pfns(pfns=pfns)
|
|
743
|
+
|
|
744
|
+
for pfn in parsed_pfn:
|
|
745
|
+
# Translate into a scope and name
|
|
746
|
+
name, scope = scope_proto(parsed_pfn[pfn])
|
|
747
|
+
scope = InternalScope(scope, vo)
|
|
748
|
+
yield {pfn: {'scope': scope, 'name': name}}
|
|
749
|
+
else:
|
|
750
|
+
condition = []
|
|
751
|
+
parsed_pfn = proto.parse_pfns(pfns=pfns)
|
|
752
|
+
for pfn in parsed_pfn:
|
|
753
|
+
path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name'])
|
|
754
|
+
pfndict[path] = pfn
|
|
755
|
+
condition.append(and_(models.RSEFileAssociation.path == path,
|
|
756
|
+
models.RSEFileAssociation.rse_id == rse_id))
|
|
757
|
+
stmt = select(
|
|
758
|
+
models.RSEFileAssociation.scope,
|
|
759
|
+
models.RSEFileAssociation.name,
|
|
760
|
+
models.RSEFileAssociation.path
|
|
761
|
+
).where(
|
|
762
|
+
or_(*condition)
|
|
763
|
+
)
|
|
764
|
+
for scope, name, pfn in session.execute(stmt).all():
|
|
765
|
+
yield {pfndict[pfn]: {'scope': scope, 'name': name}}
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
def _pick_n_random(
|
|
769
|
+
nrandom: int,
|
|
770
|
+
generator: 'Iterable[Any]'
|
|
771
|
+
) -> 'Iterator[Any]':
|
|
772
|
+
"""
|
|
773
|
+
Select n random elements from the generator
|
|
774
|
+
"""
|
|
775
|
+
|
|
776
|
+
if not nrandom:
|
|
777
|
+
# pass-through the data unchanged
|
|
778
|
+
yield from generator
|
|
779
|
+
return
|
|
780
|
+
|
|
781
|
+
# A "reservoir sampling" algorithm:
|
|
782
|
+
# Copy the N first files from the generator. After that, following element may be picked to substitute
|
|
783
|
+
# one of the previously selected element with a probability which decreases as the number of encountered elements grows.
|
|
784
|
+
selected = []
|
|
785
|
+
i = 0
|
|
786
|
+
iterator = iter(generator)
|
|
787
|
+
try:
|
|
788
|
+
for _ in range(nrandom):
|
|
789
|
+
selected.append(next(iterator))
|
|
790
|
+
i += 1
|
|
791
|
+
|
|
792
|
+
while True:
|
|
793
|
+
element = next(iterator)
|
|
794
|
+
i += 1
|
|
795
|
+
|
|
796
|
+
index_to_substitute = random.randint(0, i) # noqa: S311
|
|
797
|
+
if index_to_substitute < nrandom:
|
|
798
|
+
selected[index_to_substitute] = element
|
|
799
|
+
except StopIteration:
|
|
800
|
+
pass
|
|
801
|
+
|
|
802
|
+
for r in selected:
|
|
803
|
+
yield r
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
def _list_files_wo_replicas(
|
|
807
|
+
files_wo_replica: "Iterable[dict[str, Any]]",
|
|
808
|
+
*,
|
|
809
|
+
session: "Session"
|
|
810
|
+
) -> 'Iterator[tuple[str, str, int, str, str]]':
|
|
811
|
+
if files_wo_replica:
|
|
812
|
+
file_wo_clause = []
|
|
813
|
+
for file in sorted(files_wo_replica, key=lambda f: (f['scope'], f['name'])):
|
|
814
|
+
file_wo_clause.append(and_(models.DataIdentifier.scope == file['scope'],
|
|
815
|
+
models.DataIdentifier.name == file['name']))
|
|
816
|
+
stmt = select(
|
|
817
|
+
models.DataIdentifier.scope,
|
|
818
|
+
models.DataIdentifier.name,
|
|
819
|
+
models.DataIdentifier.bytes,
|
|
820
|
+
models.DataIdentifier.md5,
|
|
821
|
+
models.DataIdentifier.adler32
|
|
822
|
+
).with_hint(
|
|
823
|
+
models.DataIdentifier,
|
|
824
|
+
'INDEX(DIDS DIDS_PK)',
|
|
825
|
+
'oracle'
|
|
826
|
+
).where(
|
|
827
|
+
and_(models.DataIdentifier.did_type == DIDType.FILE,
|
|
828
|
+
or_(*file_wo_clause))
|
|
829
|
+
)
|
|
830
|
+
for scope, name, bytes_, md5, adler32 in session.execute(stmt):
|
|
831
|
+
yield scope, name, bytes_, md5, adler32
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def get_vp_endpoint() -> str:
|
|
835
|
+
"""
|
|
836
|
+
VP endpoint is the Virtual Placement server.
|
|
837
|
+
Once VP is integrated in Rucio it won't be needed.
|
|
838
|
+
"""
|
|
839
|
+
vp_endpoint = config_get('virtual_placement', 'vp_endpoint', default='')
|
|
840
|
+
return vp_endpoint
|
|
841
|
+
|
|
842
|
+
|
|
843
|
+
def get_multi_cache_prefix(
|
|
844
|
+
cache_site: str,
|
|
845
|
+
filename: str,
|
|
846
|
+
logger: "LoggerFunction" = logging.log
|
|
847
|
+
) -> str:
|
|
848
|
+
"""
|
|
849
|
+
for a givent cache site and filename, return address of the cache node that
|
|
850
|
+
should be prefixed.
|
|
851
|
+
|
|
852
|
+
:param cache_site: Cache site
|
|
853
|
+
:param filename: Filename
|
|
854
|
+
"""
|
|
855
|
+
vp_endpoint = get_vp_endpoint()
|
|
856
|
+
if not vp_endpoint:
|
|
857
|
+
return ''
|
|
858
|
+
|
|
859
|
+
x_caches = REGION.get('CacheSites')
|
|
860
|
+
if x_caches is NO_VALUE:
|
|
861
|
+
try:
|
|
862
|
+
response = requests.get('{}/serverRanges'.format(vp_endpoint), timeout=1, verify=False)
|
|
863
|
+
if response.ok:
|
|
864
|
+
x_caches = response.json()
|
|
865
|
+
REGION.set('CacheSites', x_caches)
|
|
866
|
+
else:
|
|
867
|
+
REGION.set('CacheSites', {'could not reload': ''})
|
|
868
|
+
return ''
|
|
869
|
+
except requests.exceptions.RequestException as re:
|
|
870
|
+
REGION.set('CacheSites', {'could not reload': ''})
|
|
871
|
+
logger(logging.WARNING, 'In get_multi_cache_prefix, could not access {}. Excaption:{}'.format(vp_endpoint, re))
|
|
872
|
+
return ''
|
|
873
|
+
|
|
874
|
+
if cache_site not in x_caches: # type: ignore
|
|
875
|
+
return ''
|
|
876
|
+
|
|
877
|
+
xcache_site = x_caches[cache_site] # type: ignore
|
|
878
|
+
h = float(
|
|
879
|
+
unpack('Q', sha256(filename.encode('utf-8')).digest()[:8])[0]) / 2**64
|
|
880
|
+
for irange in xcache_site['ranges']:
|
|
881
|
+
if h < irange[1]:
|
|
882
|
+
return xcache_site['servers'][irange[0]][0]
|
|
883
|
+
return ''
|
|
884
|
+
|
|
885
|
+
|
|
886
|
+
def _get_list_replicas_protocols(
|
|
887
|
+
rse_id: str,
|
|
888
|
+
domain: str,
|
|
889
|
+
schemes: Optional[list[str]],
|
|
890
|
+
additional_schemes: "Iterable[str]",
|
|
891
|
+
session: "Session"
|
|
892
|
+
) -> "list[tuple[str, RSEProtocol, int]]":
|
|
893
|
+
"""
|
|
894
|
+
Select the protocols to be used by list_replicas to build the PFNs for all replicas on the given RSE
|
|
895
|
+
"""
|
|
896
|
+
domains = ['wan', 'lan'] if domain == 'all' else [domain]
|
|
897
|
+
|
|
898
|
+
rse_info = rsemgr.get_rse_info(rse_id=rse_id, session=session)
|
|
899
|
+
# compute scheme priorities, and don't forget to exclude disabled protocols
|
|
900
|
+
# 0 or None in RSE protocol definition = disabled, 1 = highest priority
|
|
901
|
+
scheme_priorities = {
|
|
902
|
+
'wan': {p['scheme']: p['domains']['wan']['read'] for p in rse_info['protocols'] if p['domains']['wan']['read']},
|
|
903
|
+
'lan': {p['scheme']: p['domains']['lan']['read'] for p in rse_info['protocols'] if p['domains']['lan']['read']},
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
rse_schemes = copy.copy(schemes) if schemes else []
|
|
907
|
+
if not rse_schemes:
|
|
908
|
+
try:
|
|
909
|
+
for domain in domains:
|
|
910
|
+
rse_schemes.append(rsemgr.select_protocol(rse_settings=rse_info,
|
|
911
|
+
operation='read',
|
|
912
|
+
domain=domain)['scheme'])
|
|
913
|
+
except exception.RSEProtocolNotSupported:
|
|
914
|
+
pass # no need to be verbose
|
|
915
|
+
except Exception:
|
|
916
|
+
print(format_exc())
|
|
917
|
+
|
|
918
|
+
for s in additional_schemes:
|
|
919
|
+
if s not in rse_schemes:
|
|
920
|
+
rse_schemes.append(s)
|
|
921
|
+
|
|
922
|
+
protocols = []
|
|
923
|
+
for s in rse_schemes:
|
|
924
|
+
try:
|
|
925
|
+
for domain in domains:
|
|
926
|
+
protocol = rsemgr.create_protocol(rse_settings=rse_info, operation='read', scheme=s, domain=domain)
|
|
927
|
+
priority = scheme_priorities[domain][s]
|
|
928
|
+
|
|
929
|
+
protocols.append((domain, protocol, priority))
|
|
930
|
+
except exception.RSEProtocolNotSupported:
|
|
931
|
+
pass # no need to be verbose
|
|
932
|
+
except Exception:
|
|
933
|
+
print(format_exc())
|
|
934
|
+
return protocols
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
def _build_list_replicas_pfn(
|
|
938
|
+
scope: "InternalScope",
|
|
939
|
+
name: str,
|
|
940
|
+
rse_id: str,
|
|
941
|
+
domain: str,
|
|
942
|
+
protocol: "RSEProtocol",
|
|
943
|
+
path: str,
|
|
944
|
+
sign_urls: bool,
|
|
945
|
+
signature_lifetime: Optional[int],
|
|
946
|
+
client_location: Optional[dict[str, Any]],
|
|
947
|
+
logger: "LoggerFunction" = logging.log,
|
|
948
|
+
*,
|
|
949
|
+
session: "Session",
|
|
950
|
+
) -> str:
|
|
951
|
+
"""
|
|
952
|
+
Generate the PFN for the given scope/name on the rse.
|
|
953
|
+
If needed, sign the PFN url
|
|
954
|
+
If relevant, add the server-side root proxy to the pfn url
|
|
955
|
+
"""
|
|
956
|
+
lfn: LFNDict = {
|
|
957
|
+
'scope': scope.external, # type: ignore (scope.external might be None)
|
|
958
|
+
'name': name,
|
|
959
|
+
'path': path
|
|
960
|
+
}
|
|
961
|
+
pfn: str = list(protocol.lfns2pfns(lfns=lfn).values())[0]
|
|
962
|
+
|
|
963
|
+
# do we need to sign the URLs?
|
|
964
|
+
if sign_urls and protocol.attributes['scheme'] == 'https':
|
|
965
|
+
service = get_rse_attribute(rse_id, RseAttr.SIGN_URL, session=session)
|
|
966
|
+
if service:
|
|
967
|
+
pfn = get_signed_url(rse_id=rse_id, service=service, operation='read', url=pfn, lifetime=signature_lifetime)
|
|
968
|
+
|
|
969
|
+
# server side root proxy handling if location is set.
|
|
970
|
+
# supports root and http destinations
|
|
971
|
+
# cannot be pushed into protocols because we need to lookup rse attributes.
|
|
972
|
+
# ultra-conservative implementation.
|
|
973
|
+
if domain == 'wan' and protocol.attributes['scheme'] in ['root', 'http', 'https'] and client_location:
|
|
974
|
+
|
|
975
|
+
if 'site' in client_location and client_location['site']:
|
|
976
|
+
replica_site = get_rse_attribute(rse_id, RseAttr.SITE, session=session)
|
|
977
|
+
|
|
978
|
+
# does it match with the client? if not, it's an outgoing connection
|
|
979
|
+
# therefore the internal proxy must be prepended
|
|
980
|
+
if client_location['site'] != replica_site:
|
|
981
|
+
cache_site = config_get('clientcachemap', client_location['site'], default='', session=session)
|
|
982
|
+
if cache_site != '':
|
|
983
|
+
# print('client', client_location['site'], 'has cache:', cache_site)
|
|
984
|
+
# print('filename', name)
|
|
985
|
+
selected_prefix = get_multi_cache_prefix(cache_site, name)
|
|
986
|
+
if selected_prefix:
|
|
987
|
+
pfn = f"root://{selected_prefix}//{pfn.replace('davs://', 'root://')}"
|
|
988
|
+
else:
|
|
989
|
+
# print('site:', client_location['site'], 'has no cache')
|
|
990
|
+
# print('lets check if it has defined an internal root proxy ')
|
|
991
|
+
root_proxy_internal = config_get('root-proxy-internal', # section
|
|
992
|
+
client_location['site'], # option
|
|
993
|
+
default='', # empty string to circumvent exception
|
|
994
|
+
session=session)
|
|
995
|
+
|
|
996
|
+
if root_proxy_internal:
|
|
997
|
+
# TODO: XCache does not seem to grab signed URLs. Doublecheck with XCache devs.
|
|
998
|
+
# For now -> skip prepending XCache for GCS.
|
|
999
|
+
if 'storage.googleapis.com' in pfn or 'atlas-google-cloud.cern.ch' in pfn or 'amazonaws.com' in pfn:
|
|
1000
|
+
pass # ATLAS HACK
|
|
1001
|
+
else:
|
|
1002
|
+
# don't forget to mangle gfal-style davs URL into generic https URL
|
|
1003
|
+
pfn = f"root://{root_proxy_internal}//{pfn.replace('davs://', 'https://')}"
|
|
1004
|
+
|
|
1005
|
+
simulate_multirange = get_rse_attribute(rse_id, RseAttr.SIMULATE_MULTIRANGE, session=session)
|
|
1006
|
+
|
|
1007
|
+
if simulate_multirange is not None:
|
|
1008
|
+
try:
|
|
1009
|
+
# cover values that cannot be cast to int
|
|
1010
|
+
simulate_multirange = int(simulate_multirange)
|
|
1011
|
+
except ValueError:
|
|
1012
|
+
simulate_multirange = 1
|
|
1013
|
+
logger(logging.WARNING, 'Value encountered when retrieving RSE attribute "%s" not compatible with "int", used default value "1".', RseAttr.SIMULATE_MULTIRANGE)
|
|
1014
|
+
if simulate_multirange <= 0:
|
|
1015
|
+
logger(logging.WARNING, f'Value {simulate_multirange} encountered when retrieving RSE attribute "{RseAttr.SIMULATE_MULTIRANGE}" is <= 0, used default value "1".')
|
|
1016
|
+
simulate_multirange = 1
|
|
1017
|
+
pfn += f'&#multirange=false&nconnections={simulate_multirange}'
|
|
1018
|
+
|
|
1019
|
+
return pfn
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
def _list_replicas(
|
|
1023
|
+
replicas: "Iterable[tuple]",
|
|
1024
|
+
show_pfns: bool,
|
|
1025
|
+
schemes: Optional[list[str]],
|
|
1026
|
+
files_wo_replica: "Iterable[dict[str, Any]]",
|
|
1027
|
+
client_location: Optional[dict[str, Any]],
|
|
1028
|
+
domain: Optional[str],
|
|
1029
|
+
sign_urls: bool,
|
|
1030
|
+
signature_lifetime: Optional[int],
|
|
1031
|
+
resolve_parents: bool,
|
|
1032
|
+
filters: dict[str, Any],
|
|
1033
|
+
by_rse_name: bool,
|
|
1034
|
+
*,
|
|
1035
|
+
session: "Session"
|
|
1036
|
+
) -> "Iterator[dict[str, Any]]":
|
|
1037
|
+
|
|
1038
|
+
# the `domain` variable name will be re-used throughout the function with different values
|
|
1039
|
+
input_domain = domain
|
|
1040
|
+
|
|
1041
|
+
# find all RSEs local to the client's location in autoselect mode (i.e., when domain is None)
|
|
1042
|
+
local_rses = []
|
|
1043
|
+
if input_domain is None:
|
|
1044
|
+
if client_location and 'site' in client_location and client_location['site']:
|
|
1045
|
+
try:
|
|
1046
|
+
local_rses = [rse['id'] for rse in parse_expression('site=%s' % client_location['site'], filter_=filters, session=session)]
|
|
1047
|
+
except Exception:
|
|
1048
|
+
pass # do not hard fail if site cannot be resolved or is empty
|
|
1049
|
+
|
|
1050
|
+
file, pfns_cache = {}, {}
|
|
1051
|
+
protocols_cache = defaultdict(dict)
|
|
1052
|
+
|
|
1053
|
+
for _, replica_group in groupby(replicas, key=lambda x: (x[0], x[1])): # Group by scope/name
|
|
1054
|
+
file = {}
|
|
1055
|
+
pfns = {}
|
|
1056
|
+
for scope, name, archive_scope, archive_name, bytes_, md5, adler32, path, state, rse_id, rse, rse_type, volatile in replica_group:
|
|
1057
|
+
if isinstance(archive_scope, str):
|
|
1058
|
+
archive_scope = InternalScope(archive_scope, from_external=False)
|
|
1059
|
+
|
|
1060
|
+
is_archive = bool(archive_scope and archive_name)
|
|
1061
|
+
|
|
1062
|
+
# it is the first row in the scope/name group
|
|
1063
|
+
if not file:
|
|
1064
|
+
file['scope'], file['name'] = scope, name
|
|
1065
|
+
file['bytes'], file['md5'], file['adler32'] = bytes_, md5, adler32
|
|
1066
|
+
file['pfns'], file['rses'], file['states'] = {}, {}, {}
|
|
1067
|
+
if resolve_parents:
|
|
1068
|
+
file['parents'] = ['%s:%s' % (parent['scope'].internal, parent['name'])
|
|
1069
|
+
for parent in rucio.core.did.list_all_parent_dids(scope, name, session=session)]
|
|
1070
|
+
|
|
1071
|
+
if not rse_id:
|
|
1072
|
+
continue
|
|
1073
|
+
|
|
1074
|
+
rse_key = rse if by_rse_name else rse_id
|
|
1075
|
+
file['states'][rse_key] = str(state.name if state else state)
|
|
1076
|
+
|
|
1077
|
+
if not show_pfns:
|
|
1078
|
+
continue
|
|
1079
|
+
|
|
1080
|
+
# It's the first time we see this RSE, initialize the protocols needed for PFN generation
|
|
1081
|
+
protocols = protocols_cache.get(rse_id, {}).get(is_archive)
|
|
1082
|
+
if not protocols:
|
|
1083
|
+
# select the lan door in autoselect mode, otherwise use the wan door
|
|
1084
|
+
domain = input_domain
|
|
1085
|
+
if domain is None:
|
|
1086
|
+
domain = 'wan'
|
|
1087
|
+
if local_rses and rse_id in local_rses:
|
|
1088
|
+
domain = 'lan'
|
|
1089
|
+
|
|
1090
|
+
protocols = _get_list_replicas_protocols(
|
|
1091
|
+
rse_id=rse_id,
|
|
1092
|
+
domain=domain,
|
|
1093
|
+
schemes=schemes,
|
|
1094
|
+
# We want 'root' for archives even if it wasn't included into 'schemes'
|
|
1095
|
+
additional_schemes=['root'] if is_archive else [],
|
|
1096
|
+
session=session,
|
|
1097
|
+
)
|
|
1098
|
+
protocols_cache[rse_id][is_archive] = protocols
|
|
1099
|
+
|
|
1100
|
+
# build the pfns
|
|
1101
|
+
for domain, protocol, priority in protocols:
|
|
1102
|
+
# If the current "replica" is a constituent inside an archive, we must construct the pfn for the
|
|
1103
|
+
# parent (archive) file and append the xrdcl.unzip query string to it.
|
|
1104
|
+
if is_archive:
|
|
1105
|
+
t_scope = archive_scope
|
|
1106
|
+
t_name = archive_name
|
|
1107
|
+
else:
|
|
1108
|
+
t_scope = scope
|
|
1109
|
+
t_name = name
|
|
1110
|
+
|
|
1111
|
+
if 'determinism_type' in protocol.attributes: # PFN is cacheable
|
|
1112
|
+
try:
|
|
1113
|
+
path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)]
|
|
1114
|
+
except KeyError: # No cache entry scope:name found for this protocol
|
|
1115
|
+
path = protocol._get_path(t_scope, t_name) # type: ignore (t_scope is InternalScope instead of str)
|
|
1116
|
+
pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], t_scope.internal, t_name)] = path
|
|
1117
|
+
|
|
1118
|
+
try:
|
|
1119
|
+
pfn = _build_list_replicas_pfn(
|
|
1120
|
+
scope=t_scope,
|
|
1121
|
+
name=t_name,
|
|
1122
|
+
rse_id=rse_id,
|
|
1123
|
+
domain=domain,
|
|
1124
|
+
protocol=protocol,
|
|
1125
|
+
path=path,
|
|
1126
|
+
sign_urls=sign_urls,
|
|
1127
|
+
signature_lifetime=signature_lifetime,
|
|
1128
|
+
client_location=client_location,
|
|
1129
|
+
session=session,
|
|
1130
|
+
)
|
|
1131
|
+
|
|
1132
|
+
client_extract = False
|
|
1133
|
+
if is_archive:
|
|
1134
|
+
domain = 'zip'
|
|
1135
|
+
pfn = add_url_query(pfn, {'xrdcl.unzip': name})
|
|
1136
|
+
if protocol.attributes['scheme'] == 'root':
|
|
1137
|
+
# xroot supports downloading files directly from inside an archive. Disable client_extract and prioritize xroot.
|
|
1138
|
+
client_extract = False
|
|
1139
|
+
priority = -1
|
|
1140
|
+
else:
|
|
1141
|
+
client_extract = True
|
|
1142
|
+
|
|
1143
|
+
pfns[pfn] = {
|
|
1144
|
+
'rse_id': rse_id,
|
|
1145
|
+
'rse': rse,
|
|
1146
|
+
'type': str(rse_type.name),
|
|
1147
|
+
'volatile': volatile,
|
|
1148
|
+
'domain': domain,
|
|
1149
|
+
'priority': priority,
|
|
1150
|
+
'client_extract': client_extract
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
except Exception:
|
|
1154
|
+
# never end up here
|
|
1155
|
+
print(format_exc())
|
|
1156
|
+
|
|
1157
|
+
if protocol.attributes['scheme'] == 'srm':
|
|
1158
|
+
try:
|
|
1159
|
+
file['space_token'] = protocol.attributes['extended_attributes']['space_token']
|
|
1160
|
+
except KeyError:
|
|
1161
|
+
file['space_token'] = None
|
|
1162
|
+
|
|
1163
|
+
# fill the 'pfns' and 'rses' dicts in file
|
|
1164
|
+
if pfns:
|
|
1165
|
+
# set the total order for the priority
|
|
1166
|
+
# --> exploit that L(AN) comes before W(AN) before Z(IP) alphabetically
|
|
1167
|
+
# and use 1-indexing to be compatible with metalink
|
|
1168
|
+
sorted_pfns = sorted(pfns.items(), key=lambda item: (item[1]['domain'], item[1]['priority'], item[0]))
|
|
1169
|
+
for i, (pfn, pfn_value) in enumerate(list(sorted_pfns), start=1):
|
|
1170
|
+
pfn_value['priority'] = i
|
|
1171
|
+
file['pfns'][pfn] = pfn_value
|
|
1172
|
+
|
|
1173
|
+
sorted_pfns = sorted(file['pfns'].items(), key=lambda item: (item[1]['rse_id'], item[1]['priority'], item[0]))
|
|
1174
|
+
for pfn, pfn_value in sorted_pfns:
|
|
1175
|
+
rse_key = pfn_value['rse'] if by_rse_name else pfn_value['rse_id']
|
|
1176
|
+
file['rses'].setdefault(rse_key, []).append(pfn)
|
|
1177
|
+
|
|
1178
|
+
if file:
|
|
1179
|
+
yield file
|
|
1180
|
+
|
|
1181
|
+
for scope, name, bytes_, md5, adler32 in _list_files_wo_replicas(files_wo_replica, session=session):
|
|
1182
|
+
yield {
|
|
1183
|
+
'scope': scope,
|
|
1184
|
+
'name': name,
|
|
1185
|
+
'bytes': bytes_,
|
|
1186
|
+
'md5': md5,
|
|
1187
|
+
'adler32': adler32,
|
|
1188
|
+
'pfns': {},
|
|
1189
|
+
'rses': defaultdict(list)
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
@stream_session
|
|
1194
|
+
def list_replicas(
|
|
1195
|
+
dids: "Sequence[dict[str, Any]]",
|
|
1196
|
+
schemes: Optional[list[str]] = None,
|
|
1197
|
+
unavailable: bool = False,
|
|
1198
|
+
request_id: Optional[str] = None,
|
|
1199
|
+
ignore_availability: bool = True,
|
|
1200
|
+
all_states: bool = False,
|
|
1201
|
+
pfns: bool = True,
|
|
1202
|
+
rse_expression: Optional[str] = None,
|
|
1203
|
+
client_location: Optional[dict[str, Any]] = None,
|
|
1204
|
+
domain: Optional[str] = None,
|
|
1205
|
+
sign_urls: bool = False,
|
|
1206
|
+
signature_lifetime: "Optional[int]" = None,
|
|
1207
|
+
resolve_archives: bool = True,
|
|
1208
|
+
resolve_parents: bool = False,
|
|
1209
|
+
nrandom: Optional[int] = None,
|
|
1210
|
+
updated_after: Optional[datetime] = None,
|
|
1211
|
+
by_rse_name: bool = False,
|
|
1212
|
+
*, session: "Session",
|
|
1213
|
+
) -> 'Iterator':
|
|
1214
|
+
"""
|
|
1215
|
+
List file replicas for a list of data identifiers (DIDs).
|
|
1216
|
+
|
|
1217
|
+
:param dids: The list of data identifiers (DIDs).
|
|
1218
|
+
:param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...)
|
|
1219
|
+
:param unavailable: (deprecated) Also include unavailable replicas in the list.
|
|
1220
|
+
:param request_id: ID associated with the request for debugging.
|
|
1221
|
+
:param ignore_availability: Ignore the RSE blocklisting.
|
|
1222
|
+
:param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary.
|
|
1223
|
+
:param rse_expression: The RSE expression to restrict list_replicas on a set of RSEs.
|
|
1224
|
+
:param client_location: Client location dictionary for PFN modification {'ip', 'fqdn', 'site', 'latitude', 'longitude'}
|
|
1225
|
+
:param domain: The network domain for the call, either None, 'wan' or 'lan'. None is automatic mode, 'all' is both ['lan','wan']
|
|
1226
|
+
:param sign_urls: If set, will sign the PFNs if necessary.
|
|
1227
|
+
:param signature_lifetime: If supported, in seconds, restrict the lifetime of the signed PFN.
|
|
1228
|
+
:param resolve_archives: When set to true, find archives which contain the replicas.
|
|
1229
|
+
:param resolve_parents: When set to true, find all parent datasets which contain the replicas.
|
|
1230
|
+
:param updated_after: datetime (UTC time), only return replicas updated after this time
|
|
1231
|
+
:param by_rse_name: if True, rse information will be returned in dicts indexed by rse name; otherwise: in dicts indexed by rse id
|
|
1232
|
+
:param session: The database session in use.
|
|
1233
|
+
"""
|
|
1234
|
+
# For historical reasons:
|
|
1235
|
+
# - list_replicas([some_file_did]), must return the file even if it doesn't have replicas
|
|
1236
|
+
# - list_replicas([some_collection_did]) must only return files with replicas
|
|
1237
|
+
|
|
1238
|
+
def _replicas_filter_subquery():
|
|
1239
|
+
"""
|
|
1240
|
+
Build the sub-query used to filter replicas according to list_replica's input arguments
|
|
1241
|
+
"""
|
|
1242
|
+
stmt = select(
|
|
1243
|
+
models.RSEFileAssociation.scope,
|
|
1244
|
+
models.RSEFileAssociation.name,
|
|
1245
|
+
models.RSEFileAssociation.path,
|
|
1246
|
+
models.RSEFileAssociation.state,
|
|
1247
|
+
models.RSEFileAssociation.bytes,
|
|
1248
|
+
models.RSEFileAssociation.md5,
|
|
1249
|
+
models.RSEFileAssociation.adler32,
|
|
1250
|
+
models.RSE.id.label('rse_id'),
|
|
1251
|
+
models.RSE.rse.label('rse_name'),
|
|
1252
|
+
models.RSE.rse_type,
|
|
1253
|
+
models.RSE.volatile,
|
|
1254
|
+
).join(
|
|
1255
|
+
models.RSE,
|
|
1256
|
+
and_(models.RSEFileAssociation.rse_id == models.RSE.id,
|
|
1257
|
+
models.RSE.deleted == false())
|
|
1258
|
+
)
|
|
1259
|
+
|
|
1260
|
+
if not ignore_availability:
|
|
1261
|
+
stmt = stmt.where(models.RSE.availability_read == true())
|
|
1262
|
+
|
|
1263
|
+
if updated_after:
|
|
1264
|
+
stmt = stmt.where(models.RSEFileAssociation.updated_at >= updated_after)
|
|
1265
|
+
|
|
1266
|
+
if rse_expression:
|
|
1267
|
+
rses = parse_expression(expression=rse_expression, filter_=filter_, session=session)
|
|
1268
|
+
# When the number of RSEs is small, don't go through the overhead of
|
|
1269
|
+
# creating and using a temporary table. Rely on a simple "in" query.
|
|
1270
|
+
# The number "4" was picked without any particular reason
|
|
1271
|
+
if 0 < len(rses) < 4:
|
|
1272
|
+
stmt = stmt.where(models.RSE.id.in_([rse['id'] for rse in rses]))
|
|
1273
|
+
else:
|
|
1274
|
+
rses_temp_table = temp_table_mngr(session).create_id_table()
|
|
1275
|
+
values = [{'id': rse['id']} for rse in rses]
|
|
1276
|
+
insert_stmt = insert(
|
|
1277
|
+
rses_temp_table
|
|
1278
|
+
)
|
|
1279
|
+
session.execute(insert_stmt, values)
|
|
1280
|
+
stmt = stmt.join(rses_temp_table, models.RSE.id == rses_temp_table.id)
|
|
1281
|
+
|
|
1282
|
+
if not all_states:
|
|
1283
|
+
if not unavailable:
|
|
1284
|
+
state_clause = models.RSEFileAssociation.state == ReplicaState.AVAILABLE
|
|
1285
|
+
else:
|
|
1286
|
+
state_clause = or_(
|
|
1287
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
|
|
1288
|
+
models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE,
|
|
1289
|
+
models.RSEFileAssociation.state == ReplicaState.COPYING
|
|
1290
|
+
)
|
|
1291
|
+
stmt = stmt.where(state_clause)
|
|
1292
|
+
|
|
1293
|
+
return stmt.subquery()
|
|
1294
|
+
|
|
1295
|
+
def _resolve_collection_files(
|
|
1296
|
+
temp_table: Any,
|
|
1297
|
+
*,
|
|
1298
|
+
session: "Session"
|
|
1299
|
+
) -> tuple[int, Any]:
|
|
1300
|
+
"""
|
|
1301
|
+
Find all FILE dids contained in collections from temp_table and return them in a newly
|
|
1302
|
+
created temporary table.
|
|
1303
|
+
"""
|
|
1304
|
+
resolved_files_temp_table = temp_table_mngr(session).create_scope_name_table()
|
|
1305
|
+
selectable = rucio.core.did.list_child_dids_stmt(temp_table, did_type=DIDType.FILE)
|
|
1306
|
+
|
|
1307
|
+
stmt = insert(
|
|
1308
|
+
resolved_files_temp_table
|
|
1309
|
+
).from_select(
|
|
1310
|
+
['scope', 'name'],
|
|
1311
|
+
selectable
|
|
1312
|
+
)
|
|
1313
|
+
|
|
1314
|
+
return session.execute(stmt).rowcount, resolved_files_temp_table
|
|
1315
|
+
|
|
1316
|
+
def _list_replicas_for_collection_files_stmt(
|
|
1317
|
+
temp_table: Any,
|
|
1318
|
+
replicas_subquery: "Subquery"
|
|
1319
|
+
) -> "Select":
|
|
1320
|
+
"""
|
|
1321
|
+
Build a query for listing replicas of files resolved from containers/datasets
|
|
1322
|
+
|
|
1323
|
+
The query assumes that temp_table only contains DIDs of type FILE.
|
|
1324
|
+
"""
|
|
1325
|
+
return select(
|
|
1326
|
+
temp_table.scope.label('scope'),
|
|
1327
|
+
temp_table.name.label('name'),
|
|
1328
|
+
literal(None).label('archive_scope'),
|
|
1329
|
+
literal(None).label('archive_name'),
|
|
1330
|
+
replicas_subquery.c.bytes,
|
|
1331
|
+
replicas_subquery.c.md5,
|
|
1332
|
+
replicas_subquery.c.adler32,
|
|
1333
|
+
replicas_subquery.c.path,
|
|
1334
|
+
replicas_subquery.c.state,
|
|
1335
|
+
replicas_subquery.c.rse_id,
|
|
1336
|
+
replicas_subquery.c.rse_name,
|
|
1337
|
+
replicas_subquery.c.rse_type,
|
|
1338
|
+
replicas_subquery.c.volatile,
|
|
1339
|
+
).join_from(
|
|
1340
|
+
temp_table,
|
|
1341
|
+
replicas_subquery,
|
|
1342
|
+
and_(replicas_subquery.c.scope == temp_table.scope,
|
|
1343
|
+
replicas_subquery.c.name == temp_table.name),
|
|
1344
|
+
)
|
|
1345
|
+
|
|
1346
|
+
def _list_replicas_for_constituents_stmt(
|
|
1347
|
+
temp_table: Any,
|
|
1348
|
+
replicas_subquery: "Subquery"
|
|
1349
|
+
) -> "Select":
|
|
1350
|
+
"""
|
|
1351
|
+
Build a query for listing replicas of archives containing the files(constituents) given as input.
|
|
1352
|
+
i.e. for a file scope:file.log which exists in scope:archive.tar.gz, it will return the replicas
|
|
1353
|
+
(rse, path, state, etc) of archive.tar.gz, but with bytes/md5/adler of file.log
|
|
1354
|
+
"""
|
|
1355
|
+
return select(
|
|
1356
|
+
models.ConstituentAssociation.child_scope.label('scope'),
|
|
1357
|
+
models.ConstituentAssociation.child_name.label('name'),
|
|
1358
|
+
models.ConstituentAssociation.scope.label('archive_scope'),
|
|
1359
|
+
models.ConstituentAssociation.name.label('archive_name'),
|
|
1360
|
+
models.ConstituentAssociation.bytes,
|
|
1361
|
+
models.ConstituentAssociation.md5,
|
|
1362
|
+
models.ConstituentAssociation.adler32,
|
|
1363
|
+
replicas_subquery.c.path,
|
|
1364
|
+
replicas_subquery.c.state,
|
|
1365
|
+
replicas_subquery.c.rse_id,
|
|
1366
|
+
replicas_subquery.c.rse_name,
|
|
1367
|
+
replicas_subquery.c.rse_type,
|
|
1368
|
+
replicas_subquery.c.volatile,
|
|
1369
|
+
).join_from(
|
|
1370
|
+
temp_table,
|
|
1371
|
+
models.DataIdentifier,
|
|
1372
|
+
and_(models.DataIdentifier.scope == temp_table.scope,
|
|
1373
|
+
models.DataIdentifier.name == temp_table.name,
|
|
1374
|
+
models.DataIdentifier.did_type == DIDType.FILE,
|
|
1375
|
+
models.DataIdentifier.constituent == true()),
|
|
1376
|
+
).join(
|
|
1377
|
+
models.ConstituentAssociation,
|
|
1378
|
+
and_(models.ConstituentAssociation.child_scope == temp_table.scope,
|
|
1379
|
+
models.ConstituentAssociation.child_name == temp_table.name)
|
|
1380
|
+
).join(
|
|
1381
|
+
replicas_subquery,
|
|
1382
|
+
and_(replicas_subquery.c.scope == models.ConstituentAssociation.scope,
|
|
1383
|
+
replicas_subquery.c.name == models.ConstituentAssociation.name),
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1386
|
+
def _list_replicas_for_input_files_stmt(
|
|
1387
|
+
temp_table: Any,
|
|
1388
|
+
replicas_subquery: "Subquery"
|
|
1389
|
+
) -> "Select":
|
|
1390
|
+
"""
|
|
1391
|
+
Builds a query which list the replicas of FILEs from users input, but ignores
|
|
1392
|
+
collections in the same input.
|
|
1393
|
+
|
|
1394
|
+
Note: These FILE dids must be returned to the user even if they don't have replicas,
|
|
1395
|
+
hence the outerjoin against the replicas_subquery.
|
|
1396
|
+
"""
|
|
1397
|
+
return select(
|
|
1398
|
+
temp_table.scope.label('scope'),
|
|
1399
|
+
temp_table.name.label('name'),
|
|
1400
|
+
literal(None).label('archive_scope'),
|
|
1401
|
+
literal(None).label('archive_name'),
|
|
1402
|
+
models.DataIdentifier.bytes,
|
|
1403
|
+
models.DataIdentifier.md5,
|
|
1404
|
+
models.DataIdentifier.adler32,
|
|
1405
|
+
replicas_subquery.c.path,
|
|
1406
|
+
replicas_subquery.c.state,
|
|
1407
|
+
replicas_subquery.c.rse_id,
|
|
1408
|
+
replicas_subquery.c.rse_name,
|
|
1409
|
+
replicas_subquery.c.rse_type,
|
|
1410
|
+
replicas_subquery.c.volatile,
|
|
1411
|
+
).join_from(
|
|
1412
|
+
temp_table,
|
|
1413
|
+
models.DataIdentifier,
|
|
1414
|
+
and_(models.DataIdentifier.scope == temp_table.scope,
|
|
1415
|
+
models.DataIdentifier.name == temp_table.name,
|
|
1416
|
+
models.DataIdentifier.did_type == DIDType.FILE),
|
|
1417
|
+
).outerjoin(
|
|
1418
|
+
replicas_subquery,
|
|
1419
|
+
and_(replicas_subquery.c.scope == temp_table.scope,
|
|
1420
|
+
replicas_subquery.c.name == temp_table.name),
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
def _inspect_dids(
|
|
1424
|
+
temp_table: Any,
|
|
1425
|
+
*,
|
|
1426
|
+
session: "Session"
|
|
1427
|
+
) -> tuple[int, int, int]:
|
|
1428
|
+
"""
|
|
1429
|
+
Find how many files, collections and constituents are among the dids in the temp_table
|
|
1430
|
+
"""
|
|
1431
|
+
stmt = select(
|
|
1432
|
+
func.sum(
|
|
1433
|
+
case((models.DataIdentifier.did_type == DIDType.FILE, 1), else_=0)
|
|
1434
|
+
).label('num_files'),
|
|
1435
|
+
func.sum(
|
|
1436
|
+
case((models.DataIdentifier.did_type.in_([DIDType.CONTAINER, DIDType.DATASET]), 1), else_=0)
|
|
1437
|
+
).label('num_collections'),
|
|
1438
|
+
func.sum(
|
|
1439
|
+
case((models.DataIdentifier.constituent == true(), 1), else_=0)
|
|
1440
|
+
).label('num_constituents'),
|
|
1441
|
+
).join_from(
|
|
1442
|
+
temp_table,
|
|
1443
|
+
models.DataIdentifier,
|
|
1444
|
+
and_(models.DataIdentifier.scope == temp_table.scope,
|
|
1445
|
+
models.DataIdentifier.name == temp_table.name),
|
|
1446
|
+
)
|
|
1447
|
+
num_files, num_collections, num_constituents = session.execute(stmt).one() # returns None on empty input
|
|
1448
|
+
return num_files or 0, num_collections or 0, num_constituents or 0
|
|
1449
|
+
|
|
1450
|
+
if dids:
|
|
1451
|
+
filter_ = {'vo': dids[0]['scope'].vo}
|
|
1452
|
+
else:
|
|
1453
|
+
filter_ = {'vo': 'def'}
|
|
1454
|
+
|
|
1455
|
+
dids = {(did['scope'], did['name']): did for did in dids} # type: ignore (Deduplicate input)
|
|
1456
|
+
if not dids:
|
|
1457
|
+
return
|
|
1458
|
+
|
|
1459
|
+
input_dids_temp_table = temp_table_mngr(session).create_scope_name_table()
|
|
1460
|
+
values = [{'scope': scope, 'name': name} for scope, name in dids]
|
|
1461
|
+
stmt = insert(
|
|
1462
|
+
input_dids_temp_table
|
|
1463
|
+
)
|
|
1464
|
+
session.execute(stmt, values)
|
|
1465
|
+
|
|
1466
|
+
num_files, num_collections, num_constituents = _inspect_dids(input_dids_temp_table, session=session)
|
|
1467
|
+
|
|
1468
|
+
num_files_in_collections, resolved_files_temp_table = 0, None
|
|
1469
|
+
if num_collections:
|
|
1470
|
+
num_files_in_collections, resolved_files_temp_table = _resolve_collection_files(input_dids_temp_table, session=session)
|
|
1471
|
+
|
|
1472
|
+
replicas_subquery = _replicas_filter_subquery()
|
|
1473
|
+
replica_sources = []
|
|
1474
|
+
if num_files:
|
|
1475
|
+
replica_sources.append(
|
|
1476
|
+
_list_replicas_for_input_files_stmt(input_dids_temp_table, replicas_subquery)
|
|
1477
|
+
)
|
|
1478
|
+
if num_constituents and resolve_archives:
|
|
1479
|
+
replica_sources.append(
|
|
1480
|
+
_list_replicas_for_constituents_stmt(input_dids_temp_table, replicas_subquery)
|
|
1481
|
+
)
|
|
1482
|
+
if num_files_in_collections:
|
|
1483
|
+
replica_sources.append(
|
|
1484
|
+
_list_replicas_for_collection_files_stmt(resolved_files_temp_table, replicas_subquery)
|
|
1485
|
+
)
|
|
1486
|
+
|
|
1487
|
+
if not replica_sources:
|
|
1488
|
+
return
|
|
1489
|
+
|
|
1490
|
+
# In the simple case that somebody calls list_replicas on big collections with nrandom set,
|
|
1491
|
+
# opportunistically try to reduce the number of fetched and analyzed rows.
|
|
1492
|
+
if (
|
|
1493
|
+
nrandom
|
|
1494
|
+
# Only try this optimisation if list_replicas was called on collection(s).
|
|
1495
|
+
# I didn't consider handling the case when list_replica is called with a mix of
|
|
1496
|
+
# file/archive/collection dids: database queries in those cases are more complex
|
|
1497
|
+
# and people don't usually call list_replicas with nrandom on file/archive_constituents anyway.
|
|
1498
|
+
and (num_files_in_collections and not num_constituents and not num_files)
|
|
1499
|
+
# The following code introduces overhead if it fails to pick n random replicas.
|
|
1500
|
+
# Only execute when nrandom is much smaller than the total number of candidate files.
|
|
1501
|
+
# 64 was picked without any particular reason as "seems good enough".
|
|
1502
|
+
and 0 < nrandom < num_files_in_collections / 64
|
|
1503
|
+
):
|
|
1504
|
+
# Randomly select a subset of file DIDs which have at least one replica matching the RSE/replica
|
|
1505
|
+
# filters applied on database side. Some filters are applied later in python code
|
|
1506
|
+
# (for example: scheme; or client_location/domain). We don't have any guarantee that
|
|
1507
|
+
# those, python, filters will not drop the replicas which we just selected randomly.
|
|
1508
|
+
stmt = select(
|
|
1509
|
+
resolved_files_temp_table.scope.label('scope'), # type: ignore (resolved_files_temp_table might be None)
|
|
1510
|
+
resolved_files_temp_table.name.label('name'), # type: ignore (resolved_files_temp_table might be None)
|
|
1511
|
+
).where(
|
|
1512
|
+
exists(
|
|
1513
|
+
select(1)
|
|
1514
|
+
).where(
|
|
1515
|
+
replicas_subquery.c.scope == resolved_files_temp_table.scope, # type: ignore (resolved_files_temp_table might be None)
|
|
1516
|
+
replicas_subquery.c.name == resolved_files_temp_table.name # type: ignore (resolved_files_temp_table might be None)
|
|
1517
|
+
)
|
|
1518
|
+
).order_by(
|
|
1519
|
+
literal_column('dbms_random.value') if session.bind.dialect.name == 'oracle' else func.random() # type: ignore
|
|
1520
|
+
).limit(
|
|
1521
|
+
# slightly overshoot to reduce the probability that python-side filtering will
|
|
1522
|
+
# leave us with less than nrandom replicas.
|
|
1523
|
+
nrandom * 4
|
|
1524
|
+
)
|
|
1525
|
+
# Reuse input temp table. We don't need its content anymore
|
|
1526
|
+
random_dids_temp_table = input_dids_temp_table
|
|
1527
|
+
session.execute(delete(random_dids_temp_table))
|
|
1528
|
+
stmt = insert(
|
|
1529
|
+
random_dids_temp_table
|
|
1530
|
+
).from_select(
|
|
1531
|
+
['scope', 'name'],
|
|
1532
|
+
stmt
|
|
1533
|
+
)
|
|
1534
|
+
session.execute(stmt)
|
|
1535
|
+
|
|
1536
|
+
# Fetch all replicas for randomly selected dids and apply filters on python side
|
|
1537
|
+
stmt = _list_replicas_for_collection_files_stmt(random_dids_temp_table, replicas_subquery)
|
|
1538
|
+
stmt = stmt.order_by('scope', 'name')
|
|
1539
|
+
replica_tuples = session.execute(stmt)
|
|
1540
|
+
random_replicas = list(
|
|
1541
|
+
_pick_n_random(
|
|
1542
|
+
nrandom,
|
|
1543
|
+
_list_replicas(replica_tuples, pfns, schemes, [], client_location, domain, # type: ignore (replica_tuples, pending SQLA2.1: https://github.com/rucio/rucio/discussions/6615)
|
|
1544
|
+
sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
|
|
1545
|
+
)
|
|
1546
|
+
)
|
|
1547
|
+
if len(random_replicas) == nrandom:
|
|
1548
|
+
yield from random_replicas
|
|
1549
|
+
return
|
|
1550
|
+
else:
|
|
1551
|
+
# Our opportunistic attempt to pick nrandom replicas without fetching all database rows failed,
|
|
1552
|
+
# continue with the normal list_replicas flow and fetch all replicas
|
|
1553
|
+
pass
|
|
1554
|
+
|
|
1555
|
+
if len(replica_sources) == 1:
|
|
1556
|
+
stmt = replica_sources[0].order_by('scope', 'name')
|
|
1557
|
+
replica_tuples = session.execute(stmt)
|
|
1558
|
+
else:
|
|
1559
|
+
if session.bind.dialect.name == 'mysql': # type: ignore
|
|
1560
|
+
# On mysql, perform both queries independently and merge their result in python.
|
|
1561
|
+
# The union query fails with "Can't reopen table"
|
|
1562
|
+
replica_tuples = heapq.merge(
|
|
1563
|
+
*[session.execute(stmt.order_by('scope', 'name')) for stmt in replica_sources],
|
|
1564
|
+
key=lambda t: (t[0], t[1]), # sort by scope, name
|
|
1565
|
+
)
|
|
1566
|
+
else:
|
|
1567
|
+
stmt = union(*replica_sources).order_by('scope', 'name')
|
|
1568
|
+
replica_tuples = session.execute(stmt)
|
|
1569
|
+
|
|
1570
|
+
yield from _pick_n_random(
|
|
1571
|
+
nrandom, # type: ignore (nrandom is not None)
|
|
1572
|
+
_list_replicas(replica_tuples, pfns, schemes, [], client_location, domain, # type: ignore (replica_tuples, pending SQLA2.1: https://github.com/rucio/rucio/discussions/6615)
|
|
1573
|
+
sign_urls, signature_lifetime, resolve_parents, filter_, by_rse_name, session=session)
|
|
1574
|
+
)
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
@transactional_session
|
|
1578
|
+
def __bulk_add_new_file_dids(
|
|
1579
|
+
files: "Iterable[dict[str, Any]]",
|
|
1580
|
+
account: InternalAccount,
|
|
1581
|
+
dataset_meta: Optional["Mapping[str, Any]"] = None,
|
|
1582
|
+
*,
|
|
1583
|
+
session: "Session"
|
|
1584
|
+
) -> Literal[True]:
|
|
1585
|
+
"""
|
|
1586
|
+
Bulk add new dids.
|
|
1587
|
+
|
|
1588
|
+
:param dids: the list of new files.
|
|
1589
|
+
:param account: The account owner.
|
|
1590
|
+
:param session: The database session in use.
|
|
1591
|
+
:returns: True is successful.
|
|
1592
|
+
"""
|
|
1593
|
+
for file in files:
|
|
1594
|
+
new_did = models.DataIdentifier(scope=file['scope'], name=file['name'],
|
|
1595
|
+
account=file.get('account') or account,
|
|
1596
|
+
did_type=DIDType.FILE, bytes=file['bytes'],
|
|
1597
|
+
md5=file.get('md5'), adler32=file.get('adler32'),
|
|
1598
|
+
is_new=None)
|
|
1599
|
+
new_did.save(session=session, flush=False)
|
|
1600
|
+
|
|
1601
|
+
if 'meta' in file and file['meta']:
|
|
1602
|
+
rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=file['meta'], recursive=False, session=session)
|
|
1603
|
+
if dataset_meta:
|
|
1604
|
+
rucio.core.did.set_metadata_bulk(scope=file['scope'], name=file['name'], meta=dataset_meta, recursive=False, session=session)
|
|
1605
|
+
try:
|
|
1606
|
+
session.flush()
|
|
1607
|
+
except IntegrityError as error:
|
|
1608
|
+
if match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
|
|
1609
|
+
or match('.*IntegrityError.*FOREIGN KEY constraint failed.*', error.args[0]) \
|
|
1610
|
+
or match('.*IntegrityError.*1452.*Cannot add or update a child row: a foreign key constraint fails.*', error.args[0]) \
|
|
1611
|
+
or match('.*IntegrityError.*02291.*integrity constraint.*DIDS_SCOPE_FK.*violated - parent key not found.*', error.args[0]) \
|
|
1612
|
+
or match('.*IntegrityError.*insert or update on table.*violates foreign key constraint "DIDS_SCOPE_FK".*', error.args[0]) \
|
|
1613
|
+
or match('.*ForeignKeyViolation.*insert or update on table.*violates foreign key constraint.*', error.args[0]) \
|
|
1614
|
+
or match('.*IntegrityError.*foreign key constraints? failed.*', error.args[0]):
|
|
1615
|
+
raise exception.ScopeNotFound('Scope not found!')
|
|
1616
|
+
|
|
1617
|
+
raise exception.RucioException(error.args)
|
|
1618
|
+
except DatabaseError as error:
|
|
1619
|
+
if match('.*(DatabaseError).*ORA-14400.*inserted partition key does not map to any partition.*', error.args[0]):
|
|
1620
|
+
raise exception.ScopeNotFound('Scope not found!')
|
|
1621
|
+
|
|
1622
|
+
raise exception.RucioException(error.args)
|
|
1623
|
+
except FlushError as error:
|
|
1624
|
+
if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
|
|
1625
|
+
raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
|
|
1626
|
+
raise exception.RucioException(error.args)
|
|
1627
|
+
return True
|
|
1628
|
+
|
|
1629
|
+
|
|
1630
|
+
@transactional_session
|
|
1631
|
+
def __bulk_add_file_dids(
|
|
1632
|
+
files: "Iterable[dict[str, Any]]",
|
|
1633
|
+
account: InternalAccount,
|
|
1634
|
+
dataset_meta: Optional["Mapping[str, Any]"] = None,
|
|
1635
|
+
*,
|
|
1636
|
+
session: "Session"
|
|
1637
|
+
) -> list[dict[str, Any]]:
|
|
1638
|
+
"""
|
|
1639
|
+
Bulk add new dids.
|
|
1640
|
+
|
|
1641
|
+
:param dids: the list of files.
|
|
1642
|
+
:param account: The account owner.
|
|
1643
|
+
:param session: The database session in use.
|
|
1644
|
+
:returns: list of replicas.
|
|
1645
|
+
"""
|
|
1646
|
+
condition = []
|
|
1647
|
+
for f in files:
|
|
1648
|
+
condition.append(and_(models.DataIdentifier.scope == f['scope'],
|
|
1649
|
+
models.DataIdentifier.name == f['name'],
|
|
1650
|
+
models.DataIdentifier.did_type == DIDType.FILE))
|
|
1651
|
+
|
|
1652
|
+
stmt = select(
|
|
1653
|
+
models.DataIdentifier.scope,
|
|
1654
|
+
models.DataIdentifier.name,
|
|
1655
|
+
models.DataIdentifier.bytes,
|
|
1656
|
+
models.DataIdentifier.md5,
|
|
1657
|
+
models.DataIdentifier.adler32,
|
|
1658
|
+
).with_hint(
|
|
1659
|
+
models.DataIdentifier,
|
|
1660
|
+
'INDEX(DIDS DIDS_PK)',
|
|
1661
|
+
'oracle'
|
|
1662
|
+
).where(
|
|
1663
|
+
or_(*condition)
|
|
1664
|
+
)
|
|
1665
|
+
available_files = [res._asdict() for res in session.execute(stmt).all()]
|
|
1666
|
+
new_files = list()
|
|
1667
|
+
for file in files:
|
|
1668
|
+
found = False
|
|
1669
|
+
for available_file in available_files:
|
|
1670
|
+
if file['scope'] == available_file['scope'] and file['name'] == available_file['name']:
|
|
1671
|
+
found = True
|
|
1672
|
+
break
|
|
1673
|
+
if not found:
|
|
1674
|
+
new_files.append(file)
|
|
1675
|
+
__bulk_add_new_file_dids(files=new_files, account=account,
|
|
1676
|
+
dataset_meta=dataset_meta,
|
|
1677
|
+
session=session)
|
|
1678
|
+
return new_files + available_files
|
|
1679
|
+
|
|
1680
|
+
|
|
1681
|
+
def tombstone_from_delay(tombstone_delay: Optional[Union[str, timedelta]]) -> Optional[datetime]:
|
|
1682
|
+
# Tolerate None for tombstone_delay
|
|
1683
|
+
if not tombstone_delay:
|
|
1684
|
+
return None
|
|
1685
|
+
|
|
1686
|
+
tombstone_delay = timedelta(seconds=int(tombstone_delay)) # type: ignore
|
|
1687
|
+
|
|
1688
|
+
if not tombstone_delay:
|
|
1689
|
+
return None
|
|
1690
|
+
|
|
1691
|
+
if tombstone_delay < timedelta(0):
|
|
1692
|
+
return datetime(1970, 1, 1)
|
|
1693
|
+
|
|
1694
|
+
return datetime.utcnow() + tombstone_delay
|
|
1695
|
+
|
|
1696
|
+
|
|
1697
|
+
@transactional_session
|
|
1698
|
+
def __bulk_add_replicas(
|
|
1699
|
+
rse_id: str,
|
|
1700
|
+
files: "Iterable[dict[str, Any]]",
|
|
1701
|
+
account: InternalAccount,
|
|
1702
|
+
*,
|
|
1703
|
+
session: "Session"
|
|
1704
|
+
) -> tuple[int, int]:
|
|
1705
|
+
"""
|
|
1706
|
+
Bulk add new dids.
|
|
1707
|
+
|
|
1708
|
+
:param rse_id: the RSE id.
|
|
1709
|
+
:param dids: the list of files.
|
|
1710
|
+
:param account: The account owner.
|
|
1711
|
+
:param session: The database session in use.
|
|
1712
|
+
:returns: True is successful.
|
|
1713
|
+
"""
|
|
1714
|
+
nbfiles, bytes_ = 0, 0
|
|
1715
|
+
# Check for the replicas already available
|
|
1716
|
+
condition = []
|
|
1717
|
+
for f in files:
|
|
1718
|
+
condition.append(and_(models.RSEFileAssociation.scope == f['scope'],
|
|
1719
|
+
models.RSEFileAssociation.name == f['name'],
|
|
1720
|
+
models.RSEFileAssociation.rse_id == rse_id))
|
|
1721
|
+
|
|
1722
|
+
stmt = select(
|
|
1723
|
+
models.RSEFileAssociation.scope,
|
|
1724
|
+
models.RSEFileAssociation.name,
|
|
1725
|
+
models.RSEFileAssociation.rse_id,
|
|
1726
|
+
).with_hint(
|
|
1727
|
+
models.RSEFileAssociation,
|
|
1728
|
+
'INDEX(REPLICAS REPLICAS_PK)',
|
|
1729
|
+
'oracle'
|
|
1730
|
+
).where(
|
|
1731
|
+
or_(*condition)
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
available_replicas = [res._asdict() for res in session.execute(stmt).all()]
|
|
1735
|
+
|
|
1736
|
+
default_tombstone_delay = get_rse_attribute(rse_id, RseAttr.TOMBSTONE_DELAY, session=session)
|
|
1737
|
+
default_tombstone = tombstone_from_delay(default_tombstone_delay)
|
|
1738
|
+
|
|
1739
|
+
new_replicas = []
|
|
1740
|
+
for file in files:
|
|
1741
|
+
found = False
|
|
1742
|
+
for available_replica in available_replicas:
|
|
1743
|
+
if file['scope'] == available_replica['scope'] and file['name'] == available_replica['name'] and rse_id == available_replica['rse_id']:
|
|
1744
|
+
found = True
|
|
1745
|
+
break
|
|
1746
|
+
if not found:
|
|
1747
|
+
nbfiles += 1
|
|
1748
|
+
bytes_ += file['bytes']
|
|
1749
|
+
new_replicas.append({'rse_id': rse_id, 'scope': file['scope'],
|
|
1750
|
+
'name': file['name'], 'bytes': file['bytes'],
|
|
1751
|
+
'path': file.get('path'),
|
|
1752
|
+
'state': ReplicaState(file.get('state', 'A')),
|
|
1753
|
+
'md5': file.get('md5'), 'adler32': file.get('adler32'),
|
|
1754
|
+
'lock_cnt': file.get('lock_cnt', 0),
|
|
1755
|
+
'tombstone': file.get('tombstone') or default_tombstone})
|
|
1756
|
+
try:
|
|
1757
|
+
stmt = insert(
|
|
1758
|
+
models.RSEFileAssociation
|
|
1759
|
+
)
|
|
1760
|
+
new_replicas and session.execute(stmt, new_replicas)
|
|
1761
|
+
session.flush()
|
|
1762
|
+
return nbfiles, bytes_
|
|
1763
|
+
except IntegrityError as error:
|
|
1764
|
+
if match('.*IntegrityError.*ORA-00001: unique constraint .*REPLICAS_PK.*violated.*', error.args[0]) \
|
|
1765
|
+
or match('.*IntegrityError.*1062.*Duplicate entry.*', error.args[0]) \
|
|
1766
|
+
or match('.*IntegrityError.*columns? rse_id.*scope.*name.*not unique.*', error.args[0]) \
|
|
1767
|
+
or match('.*IntegrityError.*duplicate key value violates unique constraint.*', error.args[0]):
|
|
1768
|
+
raise exception.Duplicate("File replica already exists!")
|
|
1769
|
+
raise exception.RucioException(error.args)
|
|
1770
|
+
except DatabaseError as error:
|
|
1771
|
+
raise exception.RucioException(error.args)
|
|
1772
|
+
|
|
1773
|
+
|
|
1774
|
+
@transactional_session
|
|
1775
|
+
def add_replicas(
|
|
1776
|
+
rse_id: str,
|
|
1777
|
+
files: "Iterable[dict[str, Any]]",
|
|
1778
|
+
account: InternalAccount,
|
|
1779
|
+
ignore_availability: bool = True,
|
|
1780
|
+
dataset_meta: Optional["Mapping[str, Any]"] = None,
|
|
1781
|
+
*,
|
|
1782
|
+
session: "Session"
|
|
1783
|
+
) -> None:
|
|
1784
|
+
"""
|
|
1785
|
+
Bulk add file replicas.
|
|
1786
|
+
|
|
1787
|
+
:param rse_id: The RSE id.
|
|
1788
|
+
:param files: The list of files.
|
|
1789
|
+
:param account: The account owner.
|
|
1790
|
+
:param ignore_availability: Ignore the RSE blocklisting.
|
|
1791
|
+
:param session: The database session in use.
|
|
1792
|
+
"""
|
|
1793
|
+
|
|
1794
|
+
def _expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan', protocol_attr=None):
|
|
1795
|
+
p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme, domain=domain, protocol_attr=protocol_attr)
|
|
1796
|
+
expected_pfns = p.lfns2pfns(lfns)
|
|
1797
|
+
return clean_pfns(expected_pfns.values())
|
|
1798
|
+
|
|
1799
|
+
replica_rse = get_rse(rse_id=rse_id, session=session)
|
|
1800
|
+
|
|
1801
|
+
if replica_rse['volatile'] is True:
|
|
1802
|
+
raise exception.UnsupportedOperation('Cannot add replicas on volatile RSE %s ' % (replica_rse['rse']))
|
|
1803
|
+
|
|
1804
|
+
if not replica_rse['availability_write'] and not ignore_availability:
|
|
1805
|
+
raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable for writing' % replica_rse['rse'])
|
|
1806
|
+
|
|
1807
|
+
for file in files:
|
|
1808
|
+
if 'pfn' not in file:
|
|
1809
|
+
if not replica_rse['deterministic']:
|
|
1810
|
+
raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %s ' % (replica_rse['rse']))
|
|
1811
|
+
|
|
1812
|
+
__bulk_add_file_dids(files=files, account=account,
|
|
1813
|
+
dataset_meta=dataset_meta,
|
|
1814
|
+
session=session)
|
|
1815
|
+
|
|
1816
|
+
pfns = {} # dict[str, list[str]], {scheme: [pfns], scheme: [pfns]}
|
|
1817
|
+
for file in files:
|
|
1818
|
+
if 'pfn' in file:
|
|
1819
|
+
scheme = file['pfn'].split(':')[0]
|
|
1820
|
+
pfns.setdefault(scheme, []).append(file['pfn'])
|
|
1821
|
+
|
|
1822
|
+
if pfns:
|
|
1823
|
+
rse_settings = rsemgr.get_rse_info(rse_id=rse_id, session=session)
|
|
1824
|
+
for scheme in pfns.keys():
|
|
1825
|
+
if not replica_rse['deterministic']:
|
|
1826
|
+
p = rsemgr.create_protocol(rse_settings=rse_settings, operation='write', scheme=scheme)
|
|
1827
|
+
pfns[scheme] = p.parse_pfns(pfns=pfns[scheme])
|
|
1828
|
+
for file in files:
|
|
1829
|
+
if file['pfn'].startswith(scheme):
|
|
1830
|
+
tmp = pfns[scheme][file['pfn']]
|
|
1831
|
+
file['path'] = ''.join([tmp['path'], tmp['name']])
|
|
1832
|
+
else:
|
|
1833
|
+
# Check that the pfns match to the expected pfns
|
|
1834
|
+
lfns = [{'scope': i['scope'].external, 'name': i['name']} for i in files if i['pfn'].startswith(scheme)]
|
|
1835
|
+
pfns[scheme] = set(clean_pfns(pfns[scheme]))
|
|
1836
|
+
expected_pfns = set()
|
|
1837
|
+
|
|
1838
|
+
for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='wan'):
|
|
1839
|
+
expected_pfns.update(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='wan',
|
|
1840
|
+
protocol_attr=protocol_attr))
|
|
1841
|
+
pfns[scheme] -= expected_pfns
|
|
1842
|
+
|
|
1843
|
+
if len(pfns[scheme]) > 0:
|
|
1844
|
+
for protocol_attr in rsemgr.get_protocols_ordered(rse_settings=rse_settings, operation='write', scheme=scheme, domain='lan'):
|
|
1845
|
+
expected_pfns.update(_expected_pfns(lfns, rse_settings, scheme, operation='write', domain='lan',
|
|
1846
|
+
protocol_attr=protocol_attr))
|
|
1847
|
+
pfns[scheme] -= expected_pfns
|
|
1848
|
+
|
|
1849
|
+
if len(pfns[scheme]) > 0:
|
|
1850
|
+
# pfns not found in wan or lan
|
|
1851
|
+
pfns_scheme = pfns[scheme]
|
|
1852
|
+
raise exception.InvalidPath(f"One of the PFNs provided {pfns_scheme!r} for {lfns!r} does not match the Rucio expected PFNs: {expected_pfns!r}")
|
|
1853
|
+
|
|
1854
|
+
nbfiles, bytes_ = __bulk_add_replicas(rse_id=rse_id, files=files, account=account, session=session)
|
|
1855
|
+
increase(rse_id=rse_id, files=nbfiles, bytes_=bytes_, session=session)
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
@transactional_session
|
|
1859
|
+
def add_replica(
|
|
1860
|
+
rse_id: str,
|
|
1861
|
+
scope: InternalScope,
|
|
1862
|
+
name: str,
|
|
1863
|
+
bytes_: int,
|
|
1864
|
+
account: models.InternalAccount,
|
|
1865
|
+
adler32: Optional[str] = None,
|
|
1866
|
+
md5: Optional[str] = None,
|
|
1867
|
+
dsn: Optional[str] = None,
|
|
1868
|
+
pfn: Optional[str] = None,
|
|
1869
|
+
meta: Optional[dict[str, Any]] = None,
|
|
1870
|
+
rules: Optional[list[dict[str, Any]]] = None,
|
|
1871
|
+
tombstone: "Optional[datetime]" = None,
|
|
1872
|
+
*,
|
|
1873
|
+
session: "Session"
|
|
1874
|
+
) -> list[dict[str, Any]]:
|
|
1875
|
+
"""
|
|
1876
|
+
Add File replica.
|
|
1877
|
+
|
|
1878
|
+
:param rse_id: the rse id.
|
|
1879
|
+
:param scope: the scope name.
|
|
1880
|
+
:param name: The data identifier name.
|
|
1881
|
+
:param bytes_: the size of the file.
|
|
1882
|
+
:param account: The account owner.
|
|
1883
|
+
:param md5: The md5 checksum.
|
|
1884
|
+
:param adler32: The adler32 checksum.
|
|
1885
|
+
:param pfn: Physical file name (for nondeterministic rse).
|
|
1886
|
+
:param meta: Meta-data associated with the file. Represented as key/value pairs in a dictionary.
|
|
1887
|
+
:param rules: Replication rules associated with the file. A list of dictionaries, e.g., [{'copies': 2, 'rse_expression': 'TIERS1'}, ].
|
|
1888
|
+
:param tombstone: If True, create replica with a tombstone.
|
|
1889
|
+
:param session: The database session in use.
|
|
1890
|
+
|
|
1891
|
+
:returns: list of replicas.
|
|
1892
|
+
"""
|
|
1893
|
+
meta = meta or {}
|
|
1894
|
+
rules = rules or []
|
|
1895
|
+
|
|
1896
|
+
file = {'scope': scope, 'name': name, 'bytes': bytes_, 'adler32': adler32, 'md5': md5, 'meta': meta, 'rules': rules, 'tombstone': tombstone}
|
|
1897
|
+
if pfn:
|
|
1898
|
+
file['pfn'] = pfn
|
|
1899
|
+
return add_replicas(rse_id=rse_id, files=[file, ], account=account, session=session)
|
|
1900
|
+
|
|
1901
|
+
|
|
1902
|
+
@METRICS.time_it
|
|
1903
|
+
@transactional_session
|
|
1904
|
+
def delete_replicas(
|
|
1905
|
+
rse_id: str,
|
|
1906
|
+
files: Optional["Sequence[dict[str, Any]]"],
|
|
1907
|
+
ignore_availability: bool = True,
|
|
1908
|
+
*,
|
|
1909
|
+
session: "Session"
|
|
1910
|
+
) -> None:
|
|
1911
|
+
"""
|
|
1912
|
+
Delete file replicas.
|
|
1913
|
+
|
|
1914
|
+
:param rse_id: the rse id.
|
|
1915
|
+
:param files: the list of files to delete.
|
|
1916
|
+
:param ignore_availability: Ignore the RSE blocklisting.
|
|
1917
|
+
:param session: The database session in use.
|
|
1918
|
+
"""
|
|
1919
|
+
if not files:
|
|
1920
|
+
return
|
|
1921
|
+
|
|
1922
|
+
replica_rse = get_rse(rse_id=rse_id, session=session)
|
|
1923
|
+
|
|
1924
|
+
if not replica_rse['availability_delete'] and not ignore_availability:
|
|
1925
|
+
raise exception.ResourceTemporaryUnavailable('%s is temporary unavailable'
|
|
1926
|
+
'for deleting' % replica_rse['rse'])
|
|
1927
|
+
tt_mngr = temp_table_mngr(session)
|
|
1928
|
+
scope_name_temp_table = tt_mngr.create_scope_name_table()
|
|
1929
|
+
scope_name_temp_table2 = tt_mngr.create_scope_name_table()
|
|
1930
|
+
association_temp_table = tt_mngr.create_association_table()
|
|
1931
|
+
|
|
1932
|
+
values = [{'scope': file['scope'], 'name': file['name']} for file in files]
|
|
1933
|
+
stmt = insert(
|
|
1934
|
+
scope_name_temp_table
|
|
1935
|
+
)
|
|
1936
|
+
session.execute(stmt, values)
|
|
1937
|
+
|
|
1938
|
+
# WARNING : This should not be necessary since that would mean the replica is used as a source.
|
|
1939
|
+
stmt = delete(
|
|
1940
|
+
models.Source,
|
|
1941
|
+
).where(
|
|
1942
|
+
exists(select(1)
|
|
1943
|
+
.where(and_(models.Source.scope == scope_name_temp_table.scope,
|
|
1944
|
+
models.Source.name == scope_name_temp_table.name,
|
|
1945
|
+
models.Source.rse_id == rse_id)))
|
|
1946
|
+
).execution_options(
|
|
1947
|
+
synchronize_session=False
|
|
1948
|
+
)
|
|
1949
|
+
session.execute(stmt)
|
|
1950
|
+
|
|
1951
|
+
stmt = select(
|
|
1952
|
+
func.count(),
|
|
1953
|
+
func.sum(models.RSEFileAssociation.bytes),
|
|
1954
|
+
).join_from(
|
|
1955
|
+
scope_name_temp_table,
|
|
1956
|
+
models.RSEFileAssociation,
|
|
1957
|
+
and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
|
|
1958
|
+
models.RSEFileAssociation.name == scope_name_temp_table.name,
|
|
1959
|
+
models.RSEFileAssociation.rse_id == rse_id)
|
|
1960
|
+
)
|
|
1961
|
+
delta, bytes_ = session.execute(stmt).one()
|
|
1962
|
+
|
|
1963
|
+
# Delete replicas
|
|
1964
|
+
stmt = delete(
|
|
1965
|
+
models.RSEFileAssociation,
|
|
1966
|
+
).where(
|
|
1967
|
+
exists(select(1)
|
|
1968
|
+
.where(
|
|
1969
|
+
and_(models.RSEFileAssociation.scope == scope_name_temp_table.scope,
|
|
1970
|
+
models.RSEFileAssociation.name == scope_name_temp_table.name,
|
|
1971
|
+
models.RSEFileAssociation.rse_id == rse_id)))
|
|
1972
|
+
).execution_options(
|
|
1973
|
+
synchronize_session=False
|
|
1974
|
+
)
|
|
1975
|
+
res = session.execute(stmt)
|
|
1976
|
+
if res.rowcount != len(files):
|
|
1977
|
+
raise exception.ReplicaNotFound("One or several replicas don't exist.")
|
|
1978
|
+
|
|
1979
|
+
# Update bad replicas
|
|
1980
|
+
stmt = update(
|
|
1981
|
+
models.BadReplica,
|
|
1982
|
+
).where(
|
|
1983
|
+
exists(select(1)
|
|
1984
|
+
.where(
|
|
1985
|
+
and_(models.BadReplica.scope == scope_name_temp_table.scope,
|
|
1986
|
+
models.BadReplica.name == scope_name_temp_table.name,
|
|
1987
|
+
models.BadReplica.rse_id == rse_id)))
|
|
1988
|
+
).where(
|
|
1989
|
+
models.BadReplica.state == BadFilesStatus.BAD
|
|
1990
|
+
).values({
|
|
1991
|
+
models.BadReplica.state: BadFilesStatus.DELETED,
|
|
1992
|
+
models.BadReplica.updated_at: datetime.utcnow()
|
|
1993
|
+
}).execution_options(
|
|
1994
|
+
synchronize_session=False
|
|
1995
|
+
)
|
|
1996
|
+
|
|
1997
|
+
res = session.execute(stmt)
|
|
1998
|
+
|
|
1999
|
+
__cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
|
|
2000
|
+
scope_name_temp_table2=scope_name_temp_table2,
|
|
2001
|
+
association_temp_table=association_temp_table,
|
|
2002
|
+
rse_id=rse_id, files=files, session=session)
|
|
2003
|
+
|
|
2004
|
+
# Decrease RSE counter
|
|
2005
|
+
decrease(rse_id=rse_id, files=delta, bytes_=bytes_, session=session)
|
|
2006
|
+
|
|
2007
|
+
|
|
2008
|
+
@transactional_session
|
|
2009
|
+
def __cleanup_after_replica_deletion(
|
|
2010
|
+
scope_name_temp_table: Any,
|
|
2011
|
+
scope_name_temp_table2: Any,
|
|
2012
|
+
association_temp_table: Any,
|
|
2013
|
+
rse_id: str,
|
|
2014
|
+
files: "Iterable[dict[str, Any]]",
|
|
2015
|
+
*,
|
|
2016
|
+
session: "Session"
|
|
2017
|
+
) -> None:
|
|
2018
|
+
"""
|
|
2019
|
+
Perform update of collections/archive associations/dids after the removal of their replicas
|
|
2020
|
+
:param rse_id: the rse id
|
|
2021
|
+
:param files: list of files whose replica got deleted
|
|
2022
|
+
:param session: The database session in use.
|
|
2023
|
+
"""
|
|
2024
|
+
clt_to_update, parents_to_analyze, affected_archives, clt_replicas_to_delete = set(), set(), set(), set()
|
|
2025
|
+
did_condition = []
|
|
2026
|
+
incomplete_dids, messages, clt_to_set_not_archive = [], [], []
|
|
2027
|
+
for file in files:
|
|
2028
|
+
|
|
2029
|
+
# Schedule update of all collections containing this file and having a collection replica in the RSE
|
|
2030
|
+
clt_to_update.add(ScopeName(scope=file['scope'], name=file['name']))
|
|
2031
|
+
|
|
2032
|
+
# If the file doesn't have any replicas anymore, we should perform cleanups of objects
|
|
2033
|
+
# related to this file. However, if the file is "lost", it's removal wasn't intentional,
|
|
2034
|
+
# so we want to skip deleting the metadata here. Perform cleanups:
|
|
2035
|
+
|
|
2036
|
+
# 1) schedule removal of this file from all parent datasets
|
|
2037
|
+
parents_to_analyze.add(ScopeName(scope=file['scope'], name=file['name']))
|
|
2038
|
+
|
|
2039
|
+
# 2) schedule removal of this file from the DID table
|
|
2040
|
+
did_condition.append(
|
|
2041
|
+
and_(models.DataIdentifier.scope == file['scope'],
|
|
2042
|
+
models.DataIdentifier.name == file['name'],
|
|
2043
|
+
models.DataIdentifier.availability != DIDAvailability.LOST,
|
|
2044
|
+
~exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')).where(
|
|
2045
|
+
and_(models.RSEFileAssociation.scope == file['scope'],
|
|
2046
|
+
models.RSEFileAssociation.name == file['name'])),
|
|
2047
|
+
~exists(select(1).prefix_with("/*+ INDEX(ARCHIVE_CONTENTS ARCH_CONTENTS_PK) */", dialect='oracle')).where(
|
|
2048
|
+
and_(models.ConstituentAssociation.child_scope == file['scope'],
|
|
2049
|
+
models.ConstituentAssociation.child_name == file['name']))))
|
|
2050
|
+
|
|
2051
|
+
# 3) if the file is an archive, schedule cleanup on the files from inside the archive
|
|
2052
|
+
affected_archives.add(ScopeName(scope=file['scope'], name=file['name']))
|
|
2053
|
+
|
|
2054
|
+
if clt_to_update:
|
|
2055
|
+
# Get all collection_replicas at RSE, insert them into UpdatedCollectionReplica
|
|
2056
|
+
stmt = delete(scope_name_temp_table)
|
|
2057
|
+
session.execute(stmt)
|
|
2058
|
+
values = [sn._asdict() for sn in clt_to_update]
|
|
2059
|
+
stmt = insert(scope_name_temp_table)
|
|
2060
|
+
session.execute(stmt, values)
|
|
2061
|
+
stmt = select(
|
|
2062
|
+
models.DataIdentifierAssociation.scope,
|
|
2063
|
+
models.DataIdentifierAssociation.name,
|
|
2064
|
+
).distinct(
|
|
2065
|
+
).join_from(
|
|
2066
|
+
scope_name_temp_table,
|
|
2067
|
+
models.DataIdentifierAssociation,
|
|
2068
|
+
and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
|
|
2069
|
+
scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
|
|
2070
|
+
).join(
|
|
2071
|
+
models.CollectionReplica,
|
|
2072
|
+
and_(models.CollectionReplica.scope == models.DataIdentifierAssociation.scope,
|
|
2073
|
+
models.CollectionReplica.name == models.DataIdentifierAssociation.name,
|
|
2074
|
+
models.CollectionReplica.rse_id == rse_id)
|
|
2075
|
+
)
|
|
2076
|
+
for parent_scope, parent_name in session.execute(stmt):
|
|
2077
|
+
models.UpdatedCollectionReplica(scope=parent_scope,
|
|
2078
|
+
name=parent_name,
|
|
2079
|
+
did_type=DIDType.DATASET,
|
|
2080
|
+
rse_id=rse_id). \
|
|
2081
|
+
save(session=session, flush=False)
|
|
2082
|
+
|
|
2083
|
+
# Delete did from the content for the last did
|
|
2084
|
+
while parents_to_analyze:
|
|
2085
|
+
did_associations_to_remove = set()
|
|
2086
|
+
|
|
2087
|
+
stmt = delete(scope_name_temp_table)
|
|
2088
|
+
session.execute(stmt)
|
|
2089
|
+
values = [sn._asdict() for sn in parents_to_analyze]
|
|
2090
|
+
stmt = insert(scope_name_temp_table)
|
|
2091
|
+
session.execute(stmt, values)
|
|
2092
|
+
parents_to_analyze.clear()
|
|
2093
|
+
|
|
2094
|
+
stmt = select(
|
|
2095
|
+
models.DataIdentifierAssociation.scope,
|
|
2096
|
+
models.DataIdentifierAssociation.name,
|
|
2097
|
+
models.DataIdentifierAssociation.did_type,
|
|
2098
|
+
models.DataIdentifierAssociation.child_scope,
|
|
2099
|
+
models.DataIdentifierAssociation.child_name,
|
|
2100
|
+
).distinct(
|
|
2101
|
+
).join_from(
|
|
2102
|
+
scope_name_temp_table,
|
|
2103
|
+
models.DataIdentifierAssociation,
|
|
2104
|
+
and_(scope_name_temp_table.scope == models.DataIdentifierAssociation.child_scope,
|
|
2105
|
+
scope_name_temp_table.name == models.DataIdentifierAssociation.child_name)
|
|
2106
|
+
).outerjoin(
|
|
2107
|
+
models.DataIdentifier,
|
|
2108
|
+
and_(models.DataIdentifier.availability == DIDAvailability.LOST,
|
|
2109
|
+
models.DataIdentifier.scope == models.DataIdentifierAssociation.child_scope,
|
|
2110
|
+
models.DataIdentifier.name == models.DataIdentifierAssociation.child_name)
|
|
2111
|
+
).where(
|
|
2112
|
+
models.DataIdentifier.scope == null()
|
|
2113
|
+
).outerjoin(
|
|
2114
|
+
models.RSEFileAssociation,
|
|
2115
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
|
|
2116
|
+
models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name)
|
|
2117
|
+
).where(
|
|
2118
|
+
models.RSEFileAssociation.scope == null()
|
|
2119
|
+
).outerjoin(
|
|
2120
|
+
models.ConstituentAssociation,
|
|
2121
|
+
and_(models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
|
|
2122
|
+
models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name)
|
|
2123
|
+
).where(
|
|
2124
|
+
models.ConstituentAssociation.child_scope == null()
|
|
2125
|
+
)
|
|
2126
|
+
|
|
2127
|
+
clt_to_set_not_archive.append(set())
|
|
2128
|
+
for parent_scope, parent_name, did_type, child_scope, child_name in session.execute(stmt):
|
|
2129
|
+
|
|
2130
|
+
# Schedule removal of child file/dataset/container from the parent dataset/container
|
|
2131
|
+
did_associations_to_remove.add(Association(scope=parent_scope, name=parent_name,
|
|
2132
|
+
child_scope=child_scope, child_name=child_name))
|
|
2133
|
+
|
|
2134
|
+
# Schedule setting is_archive = False on parents which don't have any children with is_archive == True anymore
|
|
2135
|
+
clt_to_set_not_archive[-1].add(ScopeName(scope=parent_scope, name=parent_name))
|
|
2136
|
+
|
|
2137
|
+
# If the parent dataset/container becomes empty as a result of the child removal
|
|
2138
|
+
# (it was the last children), metadata cleanup has to be done:
|
|
2139
|
+
#
|
|
2140
|
+
# 1) Schedule to remove the replicas of this empty collection
|
|
2141
|
+
clt_replicas_to_delete.add(ScopeName(scope=parent_scope, name=parent_name))
|
|
2142
|
+
|
|
2143
|
+
# 2) Schedule removal of this empty collection from its own parent collections
|
|
2144
|
+
parents_to_analyze.add(ScopeName(scope=parent_scope, name=parent_name))
|
|
2145
|
+
|
|
2146
|
+
# 3) Schedule removal of the entry from the DIDs table
|
|
2147
|
+
remove_open_did = config_get_bool('reaper', 'remove_open_did', default=False, session=session)
|
|
2148
|
+
if remove_open_did:
|
|
2149
|
+
did_condition.append(
|
|
2150
|
+
and_(models.DataIdentifier.scope == parent_scope,
|
|
2151
|
+
models.DataIdentifier.name == parent_name,
|
|
2152
|
+
~exists(1).where(
|
|
2153
|
+
and_(models.DataIdentifierAssociation.child_scope == parent_scope,
|
|
2154
|
+
models.DataIdentifierAssociation.child_name == parent_name)),
|
|
2155
|
+
~exists(1).where(
|
|
2156
|
+
and_(models.DataIdentifierAssociation.scope == parent_scope,
|
|
2157
|
+
models.DataIdentifierAssociation.name == parent_name))))
|
|
2158
|
+
else:
|
|
2159
|
+
did_condition.append(
|
|
2160
|
+
and_(models.DataIdentifier.scope == parent_scope,
|
|
2161
|
+
models.DataIdentifier.name == parent_name,
|
|
2162
|
+
models.DataIdentifier.is_open == false(),
|
|
2163
|
+
~exists(1).where(
|
|
2164
|
+
and_(models.DataIdentifierAssociation.child_scope == parent_scope,
|
|
2165
|
+
models.DataIdentifierAssociation.child_name == parent_name)),
|
|
2166
|
+
~exists(1).where(
|
|
2167
|
+
and_(models.DataIdentifierAssociation.scope == parent_scope,
|
|
2168
|
+
models.DataIdentifierAssociation.name == parent_name))))
|
|
2169
|
+
|
|
2170
|
+
if did_associations_to_remove:
|
|
2171
|
+
stmt = delete(association_temp_table)
|
|
2172
|
+
session.execute(stmt)
|
|
2173
|
+
values = [a._asdict() for a in did_associations_to_remove]
|
|
2174
|
+
stmt = insert(association_temp_table)
|
|
2175
|
+
session.execute(stmt, values)
|
|
2176
|
+
|
|
2177
|
+
# get the list of modified parent scope, name
|
|
2178
|
+
stmt = select(
|
|
2179
|
+
models.DataIdentifier.scope,
|
|
2180
|
+
models.DataIdentifier.name,
|
|
2181
|
+
models.DataIdentifier.did_type,
|
|
2182
|
+
).distinct(
|
|
2183
|
+
).join_from(
|
|
2184
|
+
association_temp_table,
|
|
2185
|
+
models.DataIdentifier,
|
|
2186
|
+
and_(association_temp_table.scope == models.DataIdentifier.scope,
|
|
2187
|
+
association_temp_table.name == models.DataIdentifier.name)
|
|
2188
|
+
).where(
|
|
2189
|
+
or_(models.DataIdentifier.complete == true(),
|
|
2190
|
+
models.DataIdentifier.complete.is_(None)),
|
|
2191
|
+
)
|
|
2192
|
+
for parent_scope, parent_name, parent_did_type in session.execute(stmt):
|
|
2193
|
+
message = {'scope': parent_scope,
|
|
2194
|
+
'name': parent_name,
|
|
2195
|
+
'did_type': parent_did_type,
|
|
2196
|
+
'event_type': 'INCOMPLETE'}
|
|
2197
|
+
if message not in messages:
|
|
2198
|
+
messages.append(message)
|
|
2199
|
+
incomplete_dids.append(ScopeName(scope=parent_scope, name=parent_name))
|
|
2200
|
+
|
|
2201
|
+
content_to_delete_filter = exists(select(1)
|
|
2202
|
+
.where(and_(association_temp_table.scope == models.DataIdentifierAssociation.scope,
|
|
2203
|
+
association_temp_table.name == models.DataIdentifierAssociation.name,
|
|
2204
|
+
association_temp_table.child_scope == models.DataIdentifierAssociation.child_scope,
|
|
2205
|
+
association_temp_table.child_name == models.DataIdentifierAssociation.child_name)))
|
|
2206
|
+
|
|
2207
|
+
rucio.core.did.insert_content_history(filter_=content_to_delete_filter, did_created_at=None, session=session)
|
|
2208
|
+
|
|
2209
|
+
stmt = delete(
|
|
2210
|
+
models.DataIdentifierAssociation
|
|
2211
|
+
).where(
|
|
2212
|
+
content_to_delete_filter,
|
|
2213
|
+
).execution_options(
|
|
2214
|
+
synchronize_session=False
|
|
2215
|
+
)
|
|
2216
|
+
session.execute(stmt)
|
|
2217
|
+
|
|
2218
|
+
# Get collection replicas of collections which became empty
|
|
2219
|
+
if clt_replicas_to_delete:
|
|
2220
|
+
stmt = delete(scope_name_temp_table)
|
|
2221
|
+
session.execute(stmt)
|
|
2222
|
+
values = [sn._asdict() for sn in clt_replicas_to_delete]
|
|
2223
|
+
stmt = insert(scope_name_temp_table)
|
|
2224
|
+
session.execute(stmt, values)
|
|
2225
|
+
stmt = delete(scope_name_temp_table2)
|
|
2226
|
+
session.execute(stmt)
|
|
2227
|
+
stmt = select(
|
|
2228
|
+
models.CollectionReplica.scope,
|
|
2229
|
+
models.CollectionReplica.name,
|
|
2230
|
+
).distinct(
|
|
2231
|
+
).join_from(
|
|
2232
|
+
scope_name_temp_table,
|
|
2233
|
+
models.CollectionReplica,
|
|
2234
|
+
and_(scope_name_temp_table.scope == models.CollectionReplica.scope,
|
|
2235
|
+
scope_name_temp_table.name == models.CollectionReplica.name),
|
|
2236
|
+
).join(
|
|
2237
|
+
models.DataIdentifier,
|
|
2238
|
+
and_(models.DataIdentifier.scope == models.CollectionReplica.scope,
|
|
2239
|
+
models.DataIdentifier.name == models.CollectionReplica.name)
|
|
2240
|
+
).outerjoin(
|
|
2241
|
+
models.DataIdentifierAssociation,
|
|
2242
|
+
and_(models.DataIdentifierAssociation.scope == models.CollectionReplica.scope,
|
|
2243
|
+
models.DataIdentifierAssociation.name == models.CollectionReplica.name)
|
|
2244
|
+
).where(
|
|
2245
|
+
models.DataIdentifierAssociation.scope == null()
|
|
2246
|
+
)
|
|
2247
|
+
stmt = insert(
|
|
2248
|
+
scope_name_temp_table2
|
|
2249
|
+
).from_select(
|
|
2250
|
+
['scope', 'name'],
|
|
2251
|
+
stmt
|
|
2252
|
+
)
|
|
2253
|
+
session.execute(stmt)
|
|
2254
|
+
# Delete the retrieved collection replicas of empty collections
|
|
2255
|
+
stmt = delete(
|
|
2256
|
+
models.CollectionReplica,
|
|
2257
|
+
).where(
|
|
2258
|
+
exists(select(1)
|
|
2259
|
+
.where(and_(models.CollectionReplica.scope == scope_name_temp_table2.scope,
|
|
2260
|
+
models.CollectionReplica.name == scope_name_temp_table2.name)))
|
|
2261
|
+
).execution_options(
|
|
2262
|
+
synchronize_session=False
|
|
2263
|
+
)
|
|
2264
|
+
session.execute(stmt)
|
|
2265
|
+
|
|
2266
|
+
# Update incomplete state
|
|
2267
|
+
messages, dids_to_delete = [], set()
|
|
2268
|
+
if incomplete_dids:
|
|
2269
|
+
stmt = delete(scope_name_temp_table)
|
|
2270
|
+
session.execute(stmt)
|
|
2271
|
+
values = [sn._asdict() for sn in incomplete_dids]
|
|
2272
|
+
stmt = insert(scope_name_temp_table)
|
|
2273
|
+
session.execute(stmt, values)
|
|
2274
|
+
stmt = update(
|
|
2275
|
+
models.DataIdentifier
|
|
2276
|
+
).where(
|
|
2277
|
+
exists(select(1)
|
|
2278
|
+
.where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
|
|
2279
|
+
models.DataIdentifier.name == scope_name_temp_table.name)))
|
|
2280
|
+
).where(
|
|
2281
|
+
models.DataIdentifier.complete != false(),
|
|
2282
|
+
).values({
|
|
2283
|
+
models.DataIdentifier.complete: False
|
|
2284
|
+
}).execution_options(
|
|
2285
|
+
synchronize_session=False
|
|
2286
|
+
)
|
|
2287
|
+
|
|
2288
|
+
session.execute(stmt)
|
|
2289
|
+
|
|
2290
|
+
# delete empty dids
|
|
2291
|
+
if did_condition:
|
|
2292
|
+
for chunk in chunks(did_condition, 10):
|
|
2293
|
+
stmt = select(
|
|
2294
|
+
models.DataIdentifier.scope,
|
|
2295
|
+
models.DataIdentifier.name,
|
|
2296
|
+
models.DataIdentifier.did_type,
|
|
2297
|
+
).with_hint(
|
|
2298
|
+
models.DataIdentifier,
|
|
2299
|
+
'INDEX(DIDS DIDS_PK)',
|
|
2300
|
+
'oracle'
|
|
2301
|
+
).where(
|
|
2302
|
+
or_(*chunk)
|
|
2303
|
+
)
|
|
2304
|
+
for scope, name, did_type in session.execute(stmt):
|
|
2305
|
+
if did_type == DIDType.DATASET:
|
|
2306
|
+
messages.append({'event_type': 'ERASE',
|
|
2307
|
+
'payload': dumps({'scope': scope.external,
|
|
2308
|
+
'name': name,
|
|
2309
|
+
'account': 'root'})})
|
|
2310
|
+
dids_to_delete.add(ScopeName(scope=scope, name=name))
|
|
2311
|
+
|
|
2312
|
+
# Remove Archive Constituents
|
|
2313
|
+
constituent_associations_to_delete = set()
|
|
2314
|
+
if affected_archives:
|
|
2315
|
+
stmt = delete(scope_name_temp_table)
|
|
2316
|
+
session.execute(stmt)
|
|
2317
|
+
values = [sn._asdict() for sn in affected_archives]
|
|
2318
|
+
stmt = insert(scope_name_temp_table)
|
|
2319
|
+
session.execute(stmt, values)
|
|
2320
|
+
|
|
2321
|
+
stmt = select(
|
|
2322
|
+
models.ConstituentAssociation
|
|
2323
|
+
).distinct(
|
|
2324
|
+
).join_from(
|
|
2325
|
+
scope_name_temp_table,
|
|
2326
|
+
models.ConstituentAssociation,
|
|
2327
|
+
and_(scope_name_temp_table.scope == models.ConstituentAssociation.scope,
|
|
2328
|
+
scope_name_temp_table.name == models.ConstituentAssociation.name),
|
|
2329
|
+
).outerjoin(
|
|
2330
|
+
models.DataIdentifier,
|
|
2331
|
+
and_(models.DataIdentifier.availability == DIDAvailability.LOST,
|
|
2332
|
+
models.DataIdentifier.scope == models.ConstituentAssociation.scope,
|
|
2333
|
+
models.DataIdentifier.name == models.ConstituentAssociation.name)
|
|
2334
|
+
).where(
|
|
2335
|
+
models.DataIdentifier.scope == null()
|
|
2336
|
+
).outerjoin(
|
|
2337
|
+
models.RSEFileAssociation,
|
|
2338
|
+
and_(models.RSEFileAssociation.scope == models.ConstituentAssociation.scope,
|
|
2339
|
+
models.RSEFileAssociation.name == models.ConstituentAssociation.name)
|
|
2340
|
+
).where(
|
|
2341
|
+
models.RSEFileAssociation.scope == null()
|
|
2342
|
+
)
|
|
2343
|
+
|
|
2344
|
+
for constituent in session.execute(stmt).scalars().all():
|
|
2345
|
+
constituent_associations_to_delete.add(Association(scope=constituent.scope, name=constituent.name,
|
|
2346
|
+
child_scope=constituent.child_scope, child_name=constituent.child_name))
|
|
2347
|
+
models.ConstituentAssociationHistory(
|
|
2348
|
+
child_scope=constituent.child_scope,
|
|
2349
|
+
child_name=constituent.child_name,
|
|
2350
|
+
scope=constituent.scope,
|
|
2351
|
+
name=constituent.name,
|
|
2352
|
+
bytes=constituent.bytes,
|
|
2353
|
+
adler32=constituent.adler32,
|
|
2354
|
+
md5=constituent.md5,
|
|
2355
|
+
guid=constituent.guid,
|
|
2356
|
+
length=constituent.length,
|
|
2357
|
+
updated_at=constituent.updated_at,
|
|
2358
|
+
created_at=constituent.created_at,
|
|
2359
|
+
).save(session=session, flush=False)
|
|
2360
|
+
|
|
2361
|
+
if constituent_associations_to_delete:
|
|
2362
|
+
stmt = delete(association_temp_table)
|
|
2363
|
+
session.execute(stmt)
|
|
2364
|
+
values = [a._asdict() for a in constituent_associations_to_delete]
|
|
2365
|
+
stmt = insert(association_temp_table)
|
|
2366
|
+
session.execute(stmt, values)
|
|
2367
|
+
stmt = delete(
|
|
2368
|
+
models.ConstituentAssociation
|
|
2369
|
+
).where(
|
|
2370
|
+
exists(select(1)
|
|
2371
|
+
.where(and_(association_temp_table.scope == models.ConstituentAssociation.scope,
|
|
2372
|
+
association_temp_table.name == models.ConstituentAssociation.name,
|
|
2373
|
+
association_temp_table.child_scope == models.ConstituentAssociation.child_scope,
|
|
2374
|
+
association_temp_table.child_name == models.ConstituentAssociation.child_name)))
|
|
2375
|
+
).execution_options(
|
|
2376
|
+
synchronize_session=False
|
|
2377
|
+
)
|
|
2378
|
+
session.execute(stmt)
|
|
2379
|
+
|
|
2380
|
+
removed_constituents = {ScopeName(scope=c.child_scope, name=c.child_name) for c in constituent_associations_to_delete}
|
|
2381
|
+
for chunk in chunks(removed_constituents, 200):
|
|
2382
|
+
__cleanup_after_replica_deletion(scope_name_temp_table=scope_name_temp_table,
|
|
2383
|
+
scope_name_temp_table2=scope_name_temp_table2,
|
|
2384
|
+
association_temp_table=association_temp_table,
|
|
2385
|
+
rse_id=rse_id, files=[sn._asdict() for sn in chunk], session=session)
|
|
2386
|
+
|
|
2387
|
+
if dids_to_delete:
|
|
2388
|
+
stmt = delete(scope_name_temp_table)
|
|
2389
|
+
session.execute(stmt)
|
|
2390
|
+
values = [sn._asdict() for sn in dids_to_delete]
|
|
2391
|
+
stmt = insert(scope_name_temp_table)
|
|
2392
|
+
session.execute(stmt, values)
|
|
2393
|
+
|
|
2394
|
+
# Remove rules in Waiting for approval or Suspended
|
|
2395
|
+
stmt = delete(
|
|
2396
|
+
models.ReplicationRule,
|
|
2397
|
+
).where(
|
|
2398
|
+
exists(select(1)
|
|
2399
|
+
.where(and_(models.ReplicationRule.scope == scope_name_temp_table.scope,
|
|
2400
|
+
models.ReplicationRule.name == scope_name_temp_table.name)))
|
|
2401
|
+
).where(
|
|
2402
|
+
models.ReplicationRule.state.in_((RuleState.SUSPENDED, RuleState.WAITING_APPROVAL))
|
|
2403
|
+
).execution_options(
|
|
2404
|
+
synchronize_session=False
|
|
2405
|
+
)
|
|
2406
|
+
session.execute(stmt)
|
|
2407
|
+
|
|
2408
|
+
# Remove DID Metadata
|
|
2409
|
+
must_delete_did_meta = True
|
|
2410
|
+
if session.bind.dialect.name == 'oracle':
|
|
2411
|
+
oracle_version = int(session.connection().connection.version.split('.')[0])
|
|
2412
|
+
if oracle_version < 12:
|
|
2413
|
+
must_delete_did_meta = False
|
|
2414
|
+
if must_delete_did_meta:
|
|
2415
|
+
stmt = delete(
|
|
2416
|
+
models.DidMeta,
|
|
2417
|
+
).where(
|
|
2418
|
+
exists(select(1)
|
|
2419
|
+
.where(and_(models.DidMeta.scope == scope_name_temp_table.scope,
|
|
2420
|
+
models.DidMeta.name == scope_name_temp_table.name)))
|
|
2421
|
+
).execution_options(
|
|
2422
|
+
synchronize_session=False
|
|
2423
|
+
)
|
|
2424
|
+
session.execute(stmt)
|
|
2425
|
+
|
|
2426
|
+
for chunk in chunks(messages, 100):
|
|
2427
|
+
add_messages(chunk, session=session)
|
|
2428
|
+
|
|
2429
|
+
# Delete dids
|
|
2430
|
+
dids_to_delete_filter = exists(select(1)
|
|
2431
|
+
.where(and_(models.DataIdentifier.scope == scope_name_temp_table.scope,
|
|
2432
|
+
models.DataIdentifier.name == scope_name_temp_table.name)))
|
|
2433
|
+
archive_dids = config_get_bool('deletion', 'archive_dids', default=False, session=session)
|
|
2434
|
+
if archive_dids:
|
|
2435
|
+
rucio.core.did.insert_deleted_dids(filter_=dids_to_delete_filter, session=session)
|
|
2436
|
+
stmt = delete(
|
|
2437
|
+
models.DataIdentifier,
|
|
2438
|
+
).where(
|
|
2439
|
+
dids_to_delete_filter,
|
|
2440
|
+
).execution_options(
|
|
2441
|
+
synchronize_session=False
|
|
2442
|
+
)
|
|
2443
|
+
session.execute(stmt)
|
|
2444
|
+
|
|
2445
|
+
# Set is_archive = false on collections which don't have archive children anymore
|
|
2446
|
+
while clt_to_set_not_archive:
|
|
2447
|
+
to_update = clt_to_set_not_archive.pop(0)
|
|
2448
|
+
if not to_update:
|
|
2449
|
+
continue
|
|
2450
|
+
stmt = delete(scope_name_temp_table)
|
|
2451
|
+
session.execute(stmt)
|
|
2452
|
+
values = [sn._asdict() for sn in to_update]
|
|
2453
|
+
stmt = insert(scope_name_temp_table)
|
|
2454
|
+
session.execute(stmt, values)
|
|
2455
|
+
stmt = delete(scope_name_temp_table2)
|
|
2456
|
+
session.execute(stmt)
|
|
2457
|
+
|
|
2458
|
+
data_identifier_alias = aliased(models.DataIdentifier, name='did_alias')
|
|
2459
|
+
# Fetch rows to be updated
|
|
2460
|
+
stmt = select(
|
|
2461
|
+
models.DataIdentifier.scope,
|
|
2462
|
+
models.DataIdentifier.name,
|
|
2463
|
+
).distinct(
|
|
2464
|
+
).where(
|
|
2465
|
+
models.DataIdentifier.is_archive == true()
|
|
2466
|
+
).join_from(
|
|
2467
|
+
scope_name_temp_table,
|
|
2468
|
+
models.DataIdentifier,
|
|
2469
|
+
and_(scope_name_temp_table.scope == models.DataIdentifier.scope,
|
|
2470
|
+
scope_name_temp_table.name == models.DataIdentifier.name)
|
|
2471
|
+
).join(
|
|
2472
|
+
models.DataIdentifierAssociation,
|
|
2473
|
+
and_(models.DataIdentifier.scope == models.DataIdentifierAssociation.scope,
|
|
2474
|
+
models.DataIdentifier.name == models.DataIdentifierAssociation.name)
|
|
2475
|
+
).outerjoin(
|
|
2476
|
+
data_identifier_alias,
|
|
2477
|
+
and_(data_identifier_alias.scope == models.DataIdentifierAssociation.child_scope,
|
|
2478
|
+
data_identifier_alias.name == models.DataIdentifierAssociation.child_name,
|
|
2479
|
+
data_identifier_alias.is_archive == true())
|
|
2480
|
+
).where(
|
|
2481
|
+
data_identifier_alias.scope == null()
|
|
2482
|
+
)
|
|
2483
|
+
stmt = insert(
|
|
2484
|
+
scope_name_temp_table2
|
|
2485
|
+
).from_select(
|
|
2486
|
+
['scope', 'name'],
|
|
2487
|
+
stmt
|
|
2488
|
+
)
|
|
2489
|
+
session.execute(stmt)
|
|
2490
|
+
# update the fetched rows
|
|
2491
|
+
stmt = update(
|
|
2492
|
+
models.DataIdentifier,
|
|
2493
|
+
).where(
|
|
2494
|
+
exists(select(1)
|
|
2495
|
+
.where(and_(models.DataIdentifier.scope == scope_name_temp_table2.scope,
|
|
2496
|
+
models.DataIdentifier.name == scope_name_temp_table2.name)))
|
|
2497
|
+
).values({
|
|
2498
|
+
models.DataIdentifier.is_archive: False
|
|
2499
|
+
}).execution_options(
|
|
2500
|
+
synchronize_session=False
|
|
2501
|
+
)
|
|
2502
|
+
session.execute(stmt)
|
|
2503
|
+
|
|
2504
|
+
|
|
2505
|
+
@transactional_session
|
|
2506
|
+
def get_replica(
|
|
2507
|
+
rse_id: str,
|
|
2508
|
+
scope: InternalScope,
|
|
2509
|
+
name: str,
|
|
2510
|
+
*,
|
|
2511
|
+
session: "Session"
|
|
2512
|
+
) -> dict[str, Any]:
|
|
2513
|
+
"""
|
|
2514
|
+
Get File replica.
|
|
2515
|
+
|
|
2516
|
+
:param rse_id: The RSE Id.
|
|
2517
|
+
:param scope: the scope name.
|
|
2518
|
+
:param name: The data identifier name.
|
|
2519
|
+
:param session: The database session in use.
|
|
2520
|
+
|
|
2521
|
+
:returns: A dictionary with the list of replica attributes.
|
|
2522
|
+
"""
|
|
2523
|
+
try:
|
|
2524
|
+
stmt = select(
|
|
2525
|
+
models.RSEFileAssociation
|
|
2526
|
+
).where(
|
|
2527
|
+
and_(models.RSEFileAssociation.scope == scope,
|
|
2528
|
+
models.RSEFileAssociation.name == name,
|
|
2529
|
+
models.RSEFileAssociation.rse_id == rse_id)
|
|
2530
|
+
)
|
|
2531
|
+
return session.execute(stmt).scalar_one().to_dict()
|
|
2532
|
+
except NoResultFound:
|
|
2533
|
+
raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
|
|
2534
|
+
|
|
2535
|
+
|
|
2536
|
+
@transactional_session
|
|
2537
|
+
def list_and_mark_unlocked_replicas(
|
|
2538
|
+
limit: int,
|
|
2539
|
+
bytes_: Optional[int] = None,
|
|
2540
|
+
rse_id: Optional[str] = None,
|
|
2541
|
+
delay_seconds: int = 600,
|
|
2542
|
+
only_delete_obsolete: bool = False,
|
|
2543
|
+
*,
|
|
2544
|
+
session: "Session"
|
|
2545
|
+
) -> list[dict[str, Any]]:
|
|
2546
|
+
"""
|
|
2547
|
+
List RSE File replicas with no locks.
|
|
2548
|
+
|
|
2549
|
+
:param limit: Number of replicas returned.
|
|
2550
|
+
:param bytes_: The amount of needed bytes.
|
|
2551
|
+
:param rse_id: The rse_id.
|
|
2552
|
+
:param delay_seconds: The delay to query replicas in BEING_DELETED state
|
|
2553
|
+
:param only_delete_obsolete If set to True, will only return the replicas with EPOCH tombstone
|
|
2554
|
+
:param session: The database session in use.
|
|
2555
|
+
|
|
2556
|
+
:returns: a list of dictionary replica.
|
|
2557
|
+
"""
|
|
2558
|
+
|
|
2559
|
+
needed_space = bytes_
|
|
2560
|
+
total_bytes = 0
|
|
2561
|
+
rows = []
|
|
2562
|
+
|
|
2563
|
+
temp_table_cls = temp_table_mngr(session).create_scope_name_table()
|
|
2564
|
+
|
|
2565
|
+
replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
|
|
2566
|
+
|
|
2567
|
+
stmt = select(
|
|
2568
|
+
models.RSEFileAssociation.scope,
|
|
2569
|
+
models.RSEFileAssociation.name,
|
|
2570
|
+
).where(
|
|
2571
|
+
models.RSEFileAssociation.lock_cnt == 0,
|
|
2572
|
+
models.RSEFileAssociation.rse_id == rse_id,
|
|
2573
|
+
models.RSEFileAssociation.tombstone == OBSOLETE if only_delete_obsolete else models.RSEFileAssociation.tombstone < datetime.utcnow(),
|
|
2574
|
+
).where(
|
|
2575
|
+
or_(models.RSEFileAssociation.state.in_((ReplicaState.AVAILABLE, ReplicaState.UNAVAILABLE, ReplicaState.BAD)),
|
|
2576
|
+
and_(models.RSEFileAssociation.state == ReplicaState.BEING_DELETED, models.RSEFileAssociation.updated_at < datetime.utcnow() - timedelta(seconds=delay_seconds)))
|
|
2577
|
+
).outerjoin(
|
|
2578
|
+
models.Source,
|
|
2579
|
+
and_(models.RSEFileAssociation.scope == models.Source.scope,
|
|
2580
|
+
models.RSEFileAssociation.name == models.Source.name,
|
|
2581
|
+
models.RSEFileAssociation.rse_id == models.Source.rse_id)
|
|
2582
|
+
).where(
|
|
2583
|
+
models.Source.scope.is_(None) # Only try to delete replicas if they are not used as sources in any transfers
|
|
2584
|
+
).order_by(
|
|
2585
|
+
models.RSEFileAssociation.tombstone,
|
|
2586
|
+
models.RSEFileAssociation.updated_at
|
|
2587
|
+
).with_for_update(
|
|
2588
|
+
skip_locked=True,
|
|
2589
|
+
# oracle: we must specify a column, not a table; however, it doesn't matter which column, the lock is put on the whole row
|
|
2590
|
+
# postgresql/mysql: sqlalchemy driver automatically converts it to a table name
|
|
2591
|
+
# sqlite: this is completely ignored
|
|
2592
|
+
of=models.RSEFileAssociation.scope,
|
|
2593
|
+
)
|
|
2594
|
+
|
|
2595
|
+
for chunk in chunks(session.execute(stmt).yield_per(2 * limit), math.ceil(1.25 * limit)):
|
|
2596
|
+
stmt = delete(temp_table_cls)
|
|
2597
|
+
session.execute(stmt)
|
|
2598
|
+
values = [{'scope': scope, 'name': name} for scope, name in chunk]
|
|
2599
|
+
stmt = insert(temp_table_cls)
|
|
2600
|
+
session.execute(stmt, values)
|
|
2601
|
+
|
|
2602
|
+
stmt = select(
|
|
2603
|
+
models.RSEFileAssociation.scope,
|
|
2604
|
+
models.RSEFileAssociation.name,
|
|
2605
|
+
models.RSEFileAssociation.path,
|
|
2606
|
+
models.RSEFileAssociation.bytes,
|
|
2607
|
+
models.RSEFileAssociation.tombstone,
|
|
2608
|
+
models.RSEFileAssociation.state,
|
|
2609
|
+
models.DataIdentifier.datatype,
|
|
2610
|
+
).join_from(
|
|
2611
|
+
temp_table_cls,
|
|
2612
|
+
models.RSEFileAssociation,
|
|
2613
|
+
and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
|
|
2614
|
+
models.RSEFileAssociation.name == temp_table_cls.name,
|
|
2615
|
+
models.RSEFileAssociation.rse_id == rse_id)
|
|
2616
|
+
).with_hint(
|
|
2617
|
+
replicas_alias,
|
|
2618
|
+
'INDEX(%(name)s REPLICAS_PK)',
|
|
2619
|
+
'oracle'
|
|
2620
|
+
).outerjoin(
|
|
2621
|
+
replicas_alias,
|
|
2622
|
+
and_(models.RSEFileAssociation.scope == replicas_alias.scope,
|
|
2623
|
+
models.RSEFileAssociation.name == replicas_alias.name,
|
|
2624
|
+
models.RSEFileAssociation.rse_id != replicas_alias.rse_id,
|
|
2625
|
+
replicas_alias.state == ReplicaState.AVAILABLE)
|
|
2626
|
+
).with_hint(
|
|
2627
|
+
models.Request,
|
|
2628
|
+
'INDEX(requests REQUESTS_SCOPE_NAME_RSE_IDX)',
|
|
2629
|
+
'oracle'
|
|
2630
|
+
).outerjoin(
|
|
2631
|
+
models.Request,
|
|
2632
|
+
and_(models.RSEFileAssociation.scope == models.Request.scope,
|
|
2633
|
+
models.RSEFileAssociation.name == models.Request.name)
|
|
2634
|
+
).join(
|
|
2635
|
+
models.DataIdentifier,
|
|
2636
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifier.scope,
|
|
2637
|
+
models.RSEFileAssociation.name == models.DataIdentifier.name)
|
|
2638
|
+
).group_by(
|
|
2639
|
+
models.RSEFileAssociation.scope,
|
|
2640
|
+
models.RSEFileAssociation.name,
|
|
2641
|
+
models.RSEFileAssociation.path,
|
|
2642
|
+
models.RSEFileAssociation.bytes,
|
|
2643
|
+
models.RSEFileAssociation.tombstone,
|
|
2644
|
+
models.RSEFileAssociation.state,
|
|
2645
|
+
models.RSEFileAssociation.updated_at,
|
|
2646
|
+
models.DataIdentifier.datatype
|
|
2647
|
+
).having(
|
|
2648
|
+
case((func.count(replicas_alias.scope) > 0, True), # Can delete this replica if it's not the last replica
|
|
2649
|
+
(func.count(models.Request.scope) == 0, True), # If it's the last replica, only can delete if there are no requests using it
|
|
2650
|
+
else_=False).label("can_delete"),
|
|
2651
|
+
).order_by(
|
|
2652
|
+
models.RSEFileAssociation.tombstone,
|
|
2653
|
+
models.RSEFileAssociation.updated_at
|
|
2654
|
+
).limit(
|
|
2655
|
+
limit - len(rows)
|
|
2656
|
+
)
|
|
2657
|
+
|
|
2658
|
+
for scope, name, path, bytes_, tombstone, state, datatype in session.execute(stmt):
|
|
2659
|
+
if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
|
|
2660
|
+
break
|
|
2661
|
+
if state != ReplicaState.UNAVAILABLE:
|
|
2662
|
+
total_bytes += bytes_ # type: ignore
|
|
2663
|
+
|
|
2664
|
+
rows.append({'scope': scope, 'name': name, 'path': path,
|
|
2665
|
+
'bytes': bytes_, 'tombstone': tombstone,
|
|
2666
|
+
'state': state, 'datatype': datatype})
|
|
2667
|
+
if len(rows) >= limit or (not only_delete_obsolete and needed_space is not None and total_bytes > needed_space):
|
|
2668
|
+
break
|
|
2669
|
+
|
|
2670
|
+
if rows:
|
|
2671
|
+
stmt = delete(temp_table_cls)
|
|
2672
|
+
session.execute(stmt)
|
|
2673
|
+
values = [{'scope': row['scope'], 'name': row['name']} for row in rows]
|
|
2674
|
+
stmt = insert(temp_table_cls)
|
|
2675
|
+
session.execute(stmt, values)
|
|
2676
|
+
stmt = update(
|
|
2677
|
+
models.RSEFileAssociation
|
|
2678
|
+
).where(
|
|
2679
|
+
exists(select(1).prefix_with("/*+ INDEX(REPLICAS REPLICAS_PK) */", dialect='oracle')
|
|
2680
|
+
.where(and_(models.RSEFileAssociation.scope == temp_table_cls.scope,
|
|
2681
|
+
models.RSEFileAssociation.name == temp_table_cls.name,
|
|
2682
|
+
models.RSEFileAssociation.rse_id == rse_id)))
|
|
2683
|
+
).values({
|
|
2684
|
+
models.RSEFileAssociation.updated_at: datetime.utcnow(),
|
|
2685
|
+
models.RSEFileAssociation.state: ReplicaState.BEING_DELETED,
|
|
2686
|
+
models.RSEFileAssociation.tombstone: OBSOLETE
|
|
2687
|
+
}).execution_options(
|
|
2688
|
+
synchronize_session=False
|
|
2689
|
+
)
|
|
2690
|
+
|
|
2691
|
+
session.execute(stmt)
|
|
2692
|
+
|
|
2693
|
+
return rows
|
|
2694
|
+
|
|
2695
|
+
|
|
2696
|
+
@transactional_session
|
|
2697
|
+
def update_replicas_states(
|
|
2698
|
+
replicas: "Iterable[dict[str, Any]]",
|
|
2699
|
+
nowait: bool = False,
|
|
2700
|
+
*,
|
|
2701
|
+
session: "Session"
|
|
2702
|
+
) -> bool:
|
|
2703
|
+
"""
|
|
2704
|
+
Update File replica information and state.
|
|
2705
|
+
|
|
2706
|
+
:param replicas: The list of replicas.
|
|
2707
|
+
:param nowait: Nowait parameter for the for_update queries.
|
|
2708
|
+
:param session: The database session in use.
|
|
2709
|
+
"""
|
|
2710
|
+
|
|
2711
|
+
for replica in replicas:
|
|
2712
|
+
stmt = select(
|
|
2713
|
+
models.RSEFileAssociation
|
|
2714
|
+
).where(
|
|
2715
|
+
models.RSEFileAssociation.rse_id == replica['rse_id'],
|
|
2716
|
+
models.RSEFileAssociation.scope == replica['scope'],
|
|
2717
|
+
models.RSEFileAssociation.name == replica['name']
|
|
2718
|
+
).with_for_update(
|
|
2719
|
+
nowait=nowait
|
|
2720
|
+
)
|
|
2721
|
+
|
|
2722
|
+
if session.execute(stmt).scalar_one_or_none() is None:
|
|
2723
|
+
# remember scope, name and rse
|
|
2724
|
+
raise exception.ReplicaNotFound("No row found for scope: %s name: %s rse: %s" % (replica['scope'], replica['name'], get_rse_name(replica['rse_id'], session=session)))
|
|
2725
|
+
|
|
2726
|
+
if isinstance(replica['state'], str):
|
|
2727
|
+
replica['state'] = ReplicaState(replica['state'])
|
|
2728
|
+
|
|
2729
|
+
values = {'state': replica['state']}
|
|
2730
|
+
if replica['state'] == ReplicaState.BEING_DELETED:
|
|
2731
|
+
# Exclude replicas use as sources
|
|
2732
|
+
stmt = stmt.where(
|
|
2733
|
+
and_(models.RSEFileAssociation.lock_cnt == 0,
|
|
2734
|
+
not_(exists(select(1)
|
|
2735
|
+
.where(and_(models.RSEFileAssociation.scope == models.Source.scope,
|
|
2736
|
+
models.RSEFileAssociation.name == models.Source.name,
|
|
2737
|
+
models.RSEFileAssociation.rse_id == models.Source.rse_id)))))
|
|
2738
|
+
)
|
|
2739
|
+
values['tombstone'] = OBSOLETE
|
|
2740
|
+
elif replica['state'] == ReplicaState.AVAILABLE:
|
|
2741
|
+
rucio.core.lock.successful_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], nowait=nowait, session=session)
|
|
2742
|
+
stmt_bad_replicas = select(
|
|
2743
|
+
func.count()
|
|
2744
|
+
).select_from(
|
|
2745
|
+
models.BadReplica
|
|
2746
|
+
).where(
|
|
2747
|
+
and_(models.BadReplica.state == BadFilesStatus.BAD,
|
|
2748
|
+
models.BadReplica.rse_id == replica['rse_id'],
|
|
2749
|
+
models.BadReplica.scope == replica['scope'],
|
|
2750
|
+
models.BadReplica.name == replica['name'])
|
|
2751
|
+
)
|
|
2752
|
+
|
|
2753
|
+
if session.execute(stmt_bad_replicas).scalar():
|
|
2754
|
+
update_stmt = update(
|
|
2755
|
+
models.BadReplica
|
|
2756
|
+
).where(
|
|
2757
|
+
and_(models.BadReplica.state == BadFilesStatus.BAD,
|
|
2758
|
+
models.BadReplica.rse_id == replica['rse_id'],
|
|
2759
|
+
models.BadReplica.scope == replica['scope'],
|
|
2760
|
+
models.BadReplica.name == replica['name'])
|
|
2761
|
+
).values({
|
|
2762
|
+
models.BadReplica.state: BadFilesStatus.RECOVERED,
|
|
2763
|
+
models.BadReplica.updated_at: datetime.utcnow()
|
|
2764
|
+
}).execution_options(
|
|
2765
|
+
synchronize_session=False
|
|
2766
|
+
)
|
|
2767
|
+
session.execute(update_stmt)
|
|
2768
|
+
elif replica['state'] == ReplicaState.UNAVAILABLE:
|
|
2769
|
+
rucio.core.lock.failed_transfer(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'],
|
|
2770
|
+
error_message=replica.get('error_message', None),
|
|
2771
|
+
broken_rule_id=replica.get('broken_rule_id', None),
|
|
2772
|
+
broken_message=replica.get('broken_message', None),
|
|
2773
|
+
nowait=nowait, session=session)
|
|
2774
|
+
elif replica['state'] == ReplicaState.TEMPORARY_UNAVAILABLE:
|
|
2775
|
+
stmt = stmt.where(
|
|
2776
|
+
models.RSEFileAssociation.state.in_([ReplicaState.AVAILABLE,
|
|
2777
|
+
ReplicaState.TEMPORARY_UNAVAILABLE])
|
|
2778
|
+
)
|
|
2779
|
+
|
|
2780
|
+
if 'path' in replica and replica['path']:
|
|
2781
|
+
values['path'] = replica['path']
|
|
2782
|
+
|
|
2783
|
+
update_stmt = update(
|
|
2784
|
+
models.RSEFileAssociation
|
|
2785
|
+
).where(
|
|
2786
|
+
and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
|
|
2787
|
+
models.RSEFileAssociation.scope == replica['scope'],
|
|
2788
|
+
models.RSEFileAssociation.name == replica['name'])
|
|
2789
|
+
).values(
|
|
2790
|
+
values
|
|
2791
|
+
).execution_options(
|
|
2792
|
+
synchronize_session=False
|
|
2793
|
+
)
|
|
2794
|
+
|
|
2795
|
+
if not session.execute(update_stmt).rowcount:
|
|
2796
|
+
if 'rse' not in replica:
|
|
2797
|
+
replica['rse'] = get_rse_name(rse_id=replica['rse_id'], session=session)
|
|
2798
|
+
raise exception.UnsupportedOperation('State %(state)s for replica %(scope)s:%(name)s on %(rse)s cannot be updated' % replica)
|
|
2799
|
+
return True
|
|
2800
|
+
|
|
2801
|
+
|
|
2802
|
+
@transactional_session
|
|
2803
|
+
def touch_replica(
|
|
2804
|
+
replica: dict[str, Any],
|
|
2805
|
+
*,
|
|
2806
|
+
session: "Session"
|
|
2807
|
+
) -> bool:
|
|
2808
|
+
"""
|
|
2809
|
+
Update the accessed_at timestamp of the given file replica/did but don't wait if row is locked.
|
|
2810
|
+
|
|
2811
|
+
:param replica: a dictionary with the information of the affected replica.
|
|
2812
|
+
:param session: The database session in use.
|
|
2813
|
+
|
|
2814
|
+
:returns: True, if successful, False otherwise.
|
|
2815
|
+
"""
|
|
2816
|
+
try:
|
|
2817
|
+
accessed_at, none_value = replica.get('accessed_at') or datetime.utcnow(), None
|
|
2818
|
+
|
|
2819
|
+
stmt = select(
|
|
2820
|
+
models.RSEFileAssociation
|
|
2821
|
+
).with_hint(
|
|
2822
|
+
models.RSEFileAssociation,
|
|
2823
|
+
'INDEX(REPLICAS REPLICAS_PK)',
|
|
2824
|
+
'oracle'
|
|
2825
|
+
).where(
|
|
2826
|
+
and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
|
|
2827
|
+
models.RSEFileAssociation.scope == replica['scope'],
|
|
2828
|
+
models.RSEFileAssociation.name == replica['name'])
|
|
2829
|
+
).with_for_update(
|
|
2830
|
+
nowait=True
|
|
2831
|
+
)
|
|
2832
|
+
session.execute(stmt).one()
|
|
2833
|
+
|
|
2834
|
+
stmt = update(
|
|
2835
|
+
models.RSEFileAssociation
|
|
2836
|
+
).where(
|
|
2837
|
+
and_(models.RSEFileAssociation.rse_id == replica['rse_id'],
|
|
2838
|
+
models.RSEFileAssociation.scope == replica['scope'],
|
|
2839
|
+
models.RSEFileAssociation.name == replica['name'])
|
|
2840
|
+
).prefix_with(
|
|
2841
|
+
'/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
|
|
2842
|
+
).values({
|
|
2843
|
+
models.RSEFileAssociation.accessed_at: accessed_at,
|
|
2844
|
+
models.RSEFileAssociation.tombstone: case(
|
|
2845
|
+
(models.RSEFileAssociation.tombstone.not_in([OBSOLETE, none_value]),
|
|
2846
|
+
accessed_at),
|
|
2847
|
+
else_=models.RSEFileAssociation.tombstone)
|
|
2848
|
+
}).execution_options(
|
|
2849
|
+
synchronize_session=False
|
|
2850
|
+
)
|
|
2851
|
+
session.execute(stmt)
|
|
2852
|
+
|
|
2853
|
+
stmt = select(
|
|
2854
|
+
models.DataIdentifier
|
|
2855
|
+
).with_hint(
|
|
2856
|
+
models.DataIdentifier,
|
|
2857
|
+
'INDEX(DIDS DIDS_PK)',
|
|
2858
|
+
'oracle'
|
|
2859
|
+
).where(
|
|
2860
|
+
and_(models.DataIdentifier.scope == replica['scope'],
|
|
2861
|
+
models.DataIdentifier.name == replica['name'],
|
|
2862
|
+
models.DataIdentifier.did_type == DIDType.FILE)
|
|
2863
|
+
).with_for_update(
|
|
2864
|
+
nowait=True
|
|
2865
|
+
)
|
|
2866
|
+
session.execute(stmt).one()
|
|
2867
|
+
|
|
2868
|
+
stmt = update(
|
|
2869
|
+
models.DataIdentifier
|
|
2870
|
+
).where(
|
|
2871
|
+
and_(models.DataIdentifier.scope == replica['scope'],
|
|
2872
|
+
models.DataIdentifier.name == replica['name'],
|
|
2873
|
+
models.DataIdentifier.did_type == DIDType.FILE)
|
|
2874
|
+
).prefix_with(
|
|
2875
|
+
'/*+ INDEX(DIDS DIDS_PK) */', dialect='oracle'
|
|
2876
|
+
).values({
|
|
2877
|
+
models.DataIdentifier.accessed_at: accessed_at
|
|
2878
|
+
}).execution_options(
|
|
2879
|
+
synchronize_session=False
|
|
2880
|
+
)
|
|
2881
|
+
session.execute(stmt)
|
|
2882
|
+
|
|
2883
|
+
except DatabaseError:
|
|
2884
|
+
return False
|
|
2885
|
+
except NoResultFound:
|
|
2886
|
+
return True
|
|
2887
|
+
|
|
2888
|
+
return True
|
|
2889
|
+
|
|
2890
|
+
|
|
2891
|
+
@transactional_session
|
|
2892
|
+
def update_replica_state(
|
|
2893
|
+
rse_id: str,
|
|
2894
|
+
scope: InternalScope,
|
|
2895
|
+
name: str,
|
|
2896
|
+
state: BadFilesStatus,
|
|
2897
|
+
*,
|
|
2898
|
+
session: "Session"
|
|
2899
|
+
) -> bool:
|
|
2900
|
+
"""
|
|
2901
|
+
Update File replica information and state.
|
|
2902
|
+
|
|
2903
|
+
:param rse_id: the rse id.
|
|
2904
|
+
:param scope: the tag name.
|
|
2905
|
+
:param name: The data identifier name.
|
|
2906
|
+
:param state: The state.
|
|
2907
|
+
:param session: The database session in use.
|
|
2908
|
+
"""
|
|
2909
|
+
return update_replicas_states(replicas=[{'scope': scope, 'name': name, 'state': state, 'rse_id': rse_id}], session=session)
|
|
2910
|
+
|
|
2911
|
+
|
|
2912
|
+
@transactional_session
|
|
2913
|
+
def get_and_lock_file_replicas(
|
|
2914
|
+
scope: InternalScope,
|
|
2915
|
+
name: str,
|
|
2916
|
+
nowait: bool = False,
|
|
2917
|
+
restrict_rses: Optional["Sequence[str]"] = None,
|
|
2918
|
+
*,
|
|
2919
|
+
session: "Session"
|
|
2920
|
+
) -> "Sequence[models.RSEFileAssociation]":
|
|
2921
|
+
"""
|
|
2922
|
+
Get file replicas for a specific scope:name.
|
|
2923
|
+
|
|
2924
|
+
:param scope: The scope of the did.
|
|
2925
|
+
:param name: The name of the did.
|
|
2926
|
+
:param nowait: Nowait parameter for the FOR UPDATE statement
|
|
2927
|
+
:param restrict_rses: Possible RSE_ids to filter on.
|
|
2928
|
+
:param session: The db session in use.
|
|
2929
|
+
:returns: List of SQLAlchemy Replica Objects
|
|
2930
|
+
"""
|
|
2931
|
+
|
|
2932
|
+
stmt = select(
|
|
2933
|
+
models.RSEFileAssociation
|
|
2934
|
+
).where(
|
|
2935
|
+
and_(models.RSEFileAssociation.scope == scope,
|
|
2936
|
+
models.RSEFileAssociation.name == name,
|
|
2937
|
+
models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
|
|
2938
|
+
).with_for_update(
|
|
2939
|
+
nowait=nowait
|
|
2940
|
+
)
|
|
2941
|
+
if restrict_rses is not None and len(restrict_rses) < 10:
|
|
2942
|
+
rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
|
|
2943
|
+
if rse_clause:
|
|
2944
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
2945
|
+
|
|
2946
|
+
return session.execute(stmt).scalars().all()
|
|
2947
|
+
|
|
2948
|
+
|
|
2949
|
+
@transactional_session
|
|
2950
|
+
def get_source_replicas(
|
|
2951
|
+
scope: InternalScope,
|
|
2952
|
+
name: str,
|
|
2953
|
+
source_rses: Optional["Sequence[str]"] = None,
|
|
2954
|
+
*,
|
|
2955
|
+
session: "Session"
|
|
2956
|
+
) -> "Sequence[str]":
|
|
2957
|
+
"""
|
|
2958
|
+
Get source replicas for a specific scope:name.
|
|
2959
|
+
|
|
2960
|
+
:param scope: The scope of the did.
|
|
2961
|
+
:param name: The name of the did.
|
|
2962
|
+
:param soruce_rses: Possible RSE_ids to filter on.
|
|
2963
|
+
:param session: The db session in use.
|
|
2964
|
+
:returns: List of SQLAlchemy Replica Objects
|
|
2965
|
+
"""
|
|
2966
|
+
|
|
2967
|
+
stmt = select(
|
|
2968
|
+
models.RSEFileAssociation.rse_id
|
|
2969
|
+
).where(
|
|
2970
|
+
and_(models.RSEFileAssociation.scope == scope,
|
|
2971
|
+
models.RSEFileAssociation.name == name,
|
|
2972
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
|
|
2973
|
+
)
|
|
2974
|
+
if source_rses:
|
|
2975
|
+
if len(source_rses) < 10:
|
|
2976
|
+
rse_clause = []
|
|
2977
|
+
for rse_id in source_rses:
|
|
2978
|
+
rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
|
|
2979
|
+
if rse_clause:
|
|
2980
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
2981
|
+
return session.execute(stmt).scalars().all()
|
|
2982
|
+
|
|
2983
|
+
|
|
2984
|
+
@transactional_session
|
|
2985
|
+
def get_and_lock_file_replicas_for_dataset(
|
|
2986
|
+
scope: InternalScope,
|
|
2987
|
+
name: str,
|
|
2988
|
+
nowait: bool = False,
|
|
2989
|
+
restrict_rses: Optional["Sequence[str]"] = None,
|
|
2990
|
+
total_threads: Optional[int] = None,
|
|
2991
|
+
thread_id: Optional[int] = None,
|
|
2992
|
+
*,
|
|
2993
|
+
session: "Session"
|
|
2994
|
+
) -> tuple[list[dict[str, Any]], dict[tuple[InternalScope, str], Any]]:
|
|
2995
|
+
"""
|
|
2996
|
+
Get file replicas for all files of a dataset.
|
|
2997
|
+
|
|
2998
|
+
:param scope: The scope of the dataset.
|
|
2999
|
+
:param name: The name of the dataset.
|
|
3000
|
+
:param nowait: Nowait parameter for the FOR UPDATE statement
|
|
3001
|
+
:param restrict_rses: Possible RSE_ids to filter on.
|
|
3002
|
+
:param total_threads: Total threads
|
|
3003
|
+
:param thread_id: This thread
|
|
3004
|
+
:param session: The db session in use.
|
|
3005
|
+
:returns: (files in dataset, replicas in dataset)
|
|
3006
|
+
"""
|
|
3007
|
+
files, replicas = {}, {}
|
|
3008
|
+
|
|
3009
|
+
base_stmt = select(
|
|
3010
|
+
models.DataIdentifierAssociation.child_scope,
|
|
3011
|
+
models.DataIdentifierAssociation.child_name,
|
|
3012
|
+
models.DataIdentifierAssociation.bytes,
|
|
3013
|
+
models.DataIdentifierAssociation.md5,
|
|
3014
|
+
models.DataIdentifierAssociation.adler32,
|
|
3015
|
+
).where(
|
|
3016
|
+
and_(models.DataIdentifierAssociation.scope == scope,
|
|
3017
|
+
models.DataIdentifierAssociation.name == name)
|
|
3018
|
+
)
|
|
3019
|
+
|
|
3020
|
+
stmt = base_stmt.add_columns(
|
|
3021
|
+
models.RSEFileAssociation
|
|
3022
|
+
).where(
|
|
3023
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
3024
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
3025
|
+
models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
|
|
3026
|
+
)
|
|
3027
|
+
|
|
3028
|
+
rse_clause = [true()]
|
|
3029
|
+
if restrict_rses is not None and len(restrict_rses) < 10:
|
|
3030
|
+
rse_clause = [models.RSEFileAssociation.rse_id == rse_id for rse_id in restrict_rses]
|
|
3031
|
+
|
|
3032
|
+
if session.bind.dialect.name == 'postgresql': # type: ignore
|
|
3033
|
+
if total_threads and total_threads > 1:
|
|
3034
|
+
base_stmt = filter_thread_work(session=session,
|
|
3035
|
+
query=base_stmt,
|
|
3036
|
+
total_threads=total_threads,
|
|
3037
|
+
thread_id=thread_id,
|
|
3038
|
+
hash_variable='child_name')
|
|
3039
|
+
|
|
3040
|
+
for child_scope, child_name, bytes_, md5, adler32 in session.execute(base_stmt).yield_per(1000):
|
|
3041
|
+
files[(child_scope, child_name)] = {'scope': child_scope,
|
|
3042
|
+
'name': child_name,
|
|
3043
|
+
'bytes': bytes_,
|
|
3044
|
+
'md5': md5,
|
|
3045
|
+
'adler32': adler32}
|
|
3046
|
+
replicas[(child_scope, child_name)] = []
|
|
3047
|
+
|
|
3048
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
3049
|
+
else:
|
|
3050
|
+
stmt = base_stmt.add_columns(
|
|
3051
|
+
models.RSEFileAssociation
|
|
3052
|
+
).with_hint(
|
|
3053
|
+
models.DataIdentifierAssociation,
|
|
3054
|
+
'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
|
|
3055
|
+
'oracle'
|
|
3056
|
+
).outerjoin(
|
|
3057
|
+
models.RSEFileAssociation,
|
|
3058
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
3059
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
3060
|
+
models.RSEFileAssociation.state != ReplicaState.BEING_DELETED,
|
|
3061
|
+
or_(*rse_clause))
|
|
3062
|
+
)
|
|
3063
|
+
|
|
3064
|
+
if total_threads and total_threads > 1:
|
|
3065
|
+
stmt = filter_thread_work(session=session,
|
|
3066
|
+
query=stmt,
|
|
3067
|
+
total_threads=total_threads,
|
|
3068
|
+
thread_id=thread_id,
|
|
3069
|
+
hash_variable='child_name')
|
|
3070
|
+
|
|
3071
|
+
stmt = stmt.with_for_update(
|
|
3072
|
+
nowait=nowait,
|
|
3073
|
+
of=models.RSEFileAssociation.lock_cnt
|
|
3074
|
+
)
|
|
3075
|
+
|
|
3076
|
+
for child_scope, child_name, bytes_, md5, adler32, replica in session.execute(stmt).yield_per(1000):
|
|
3077
|
+
if (child_scope, child_name) not in files:
|
|
3078
|
+
files[(child_scope, child_name)] = {'scope': child_scope,
|
|
3079
|
+
'name': child_name,
|
|
3080
|
+
'bytes': bytes_,
|
|
3081
|
+
'md5': md5,
|
|
3082
|
+
'adler32': adler32}
|
|
3083
|
+
|
|
3084
|
+
if (child_scope, child_name) in replicas:
|
|
3085
|
+
if replica is not None:
|
|
3086
|
+
replicas[(child_scope, child_name)].append(replica)
|
|
3087
|
+
else:
|
|
3088
|
+
replicas[(child_scope, child_name)] = []
|
|
3089
|
+
if replica is not None:
|
|
3090
|
+
replicas[(child_scope, child_name)].append(replica)
|
|
3091
|
+
|
|
3092
|
+
return (list(files.values()), replicas)
|
|
3093
|
+
|
|
3094
|
+
|
|
3095
|
+
@transactional_session
|
|
3096
|
+
def get_source_replicas_for_dataset(
|
|
3097
|
+
scope: InternalScope,
|
|
3098
|
+
name: str,
|
|
3099
|
+
source_rses: Optional["Sequence[str]"] = None,
|
|
3100
|
+
total_threads: Optional[int] = None,
|
|
3101
|
+
thread_id: Optional[int] = None,
|
|
3102
|
+
*,
|
|
3103
|
+
session: "Session"
|
|
3104
|
+
) -> dict[tuple[InternalScope, str], Any]:
|
|
3105
|
+
"""
|
|
3106
|
+
Get file replicas for all files of a dataset.
|
|
3107
|
+
|
|
3108
|
+
:param scope: The scope of the dataset.
|
|
3109
|
+
:param name: The name of the dataset.
|
|
3110
|
+
:param source_rses: Possible source RSE_ids to filter on.
|
|
3111
|
+
:param total_threads: Total threads
|
|
3112
|
+
:param thread_id: This thread
|
|
3113
|
+
:param session: The db session in use.
|
|
3114
|
+
:returns: (files in dataset, replicas in dataset)
|
|
3115
|
+
"""
|
|
3116
|
+
stmt = select(
|
|
3117
|
+
models.DataIdentifierAssociation.child_scope,
|
|
3118
|
+
models.DataIdentifierAssociation.child_name,
|
|
3119
|
+
models.RSEFileAssociation.rse_id
|
|
3120
|
+
).with_hint(
|
|
3121
|
+
models.DataIdentifierAssociation,
|
|
3122
|
+
'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
|
|
3123
|
+
'oracle'
|
|
3124
|
+
).outerjoin(
|
|
3125
|
+
models.RSEFileAssociation,
|
|
3126
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
3127
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
3128
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
|
|
3129
|
+
).where(
|
|
3130
|
+
and_(models.DataIdentifierAssociation.scope == scope,
|
|
3131
|
+
models.DataIdentifierAssociation.name == name)
|
|
3132
|
+
)
|
|
3133
|
+
|
|
3134
|
+
if source_rses:
|
|
3135
|
+
if len(source_rses) < 10:
|
|
3136
|
+
rse_clause = []
|
|
3137
|
+
for rse_id in source_rses:
|
|
3138
|
+
rse_clause.append(models.RSEFileAssociation.rse_id == rse_id)
|
|
3139
|
+
if rse_clause:
|
|
3140
|
+
stmt = select(
|
|
3141
|
+
models.DataIdentifierAssociation.child_scope,
|
|
3142
|
+
models.DataIdentifierAssociation.child_name,
|
|
3143
|
+
models.RSEFileAssociation.rse_id
|
|
3144
|
+
).with_hint(
|
|
3145
|
+
models.DataIdentifierAssociation,
|
|
3146
|
+
'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
|
|
3147
|
+
'oracle'
|
|
3148
|
+
).outerjoin(
|
|
3149
|
+
models.RSEFileAssociation,
|
|
3150
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
3151
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
3152
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
|
|
3153
|
+
or_(*rse_clause))
|
|
3154
|
+
).where(
|
|
3155
|
+
and_(models.DataIdentifierAssociation.scope == scope,
|
|
3156
|
+
models.DataIdentifierAssociation.name == name)
|
|
3157
|
+
)
|
|
3158
|
+
if total_threads and total_threads > 1:
|
|
3159
|
+
stmt = filter_thread_work(session=session,
|
|
3160
|
+
query=stmt,
|
|
3161
|
+
total_threads=total_threads,
|
|
3162
|
+
thread_id=thread_id,
|
|
3163
|
+
hash_variable='child_name')
|
|
3164
|
+
|
|
3165
|
+
replicas = {}
|
|
3166
|
+
|
|
3167
|
+
for child_scope, child_name, rse_id in session.execute(stmt):
|
|
3168
|
+
|
|
3169
|
+
if (child_scope, child_name) in replicas:
|
|
3170
|
+
if rse_id:
|
|
3171
|
+
replicas[(child_scope, child_name)].append(rse_id)
|
|
3172
|
+
else:
|
|
3173
|
+
replicas[(child_scope, child_name)] = []
|
|
3174
|
+
if rse_id:
|
|
3175
|
+
replicas[(child_scope, child_name)].append(rse_id)
|
|
3176
|
+
|
|
3177
|
+
return replicas
|
|
3178
|
+
|
|
3179
|
+
|
|
3180
|
+
@read_session
|
|
3181
|
+
def get_replica_atime(
|
|
3182
|
+
replica: dict[str, Any],
|
|
3183
|
+
*,
|
|
3184
|
+
session: "Session"
|
|
3185
|
+
) -> Optional[datetime]:
|
|
3186
|
+
"""
|
|
3187
|
+
Get the accessed_at timestamp for a replica. Just for testing.
|
|
3188
|
+
:param replicas: List of dictionaries {scope, name, rse_id, path}
|
|
3189
|
+
:param session: Database session to use.
|
|
3190
|
+
|
|
3191
|
+
:returns: A datetime timestamp with the last access time.
|
|
3192
|
+
"""
|
|
3193
|
+
stmt = select(
|
|
3194
|
+
models.RSEFileAssociation.accessed_at
|
|
3195
|
+
).with_hint(
|
|
3196
|
+
models.RSEFileAssociation,
|
|
3197
|
+
'INDEX(REPLICAS REPLICAS_PK)',
|
|
3198
|
+
'oracle'
|
|
3199
|
+
).where(
|
|
3200
|
+
and_(models.RSEFileAssociation.scope == replica['scope'],
|
|
3201
|
+
models.RSEFileAssociation.name == replica['name'],
|
|
3202
|
+
models.RSEFileAssociation.rse_id == replica['rse_id'])
|
|
3203
|
+
)
|
|
3204
|
+
return session.execute(stmt).scalar_one()
|
|
3205
|
+
|
|
3206
|
+
|
|
3207
|
+
@transactional_session
|
|
3208
|
+
def touch_collection_replicas(
|
|
3209
|
+
collection_replicas: "Iterable[dict[str, Any]]",
|
|
3210
|
+
*,
|
|
3211
|
+
session: "Session"
|
|
3212
|
+
) -> bool:
|
|
3213
|
+
"""
|
|
3214
|
+
Update the accessed_at timestamp of the given collection replicas.
|
|
3215
|
+
|
|
3216
|
+
:param collection_replicas: the list of collection replicas.
|
|
3217
|
+
:param session: The database session in use.
|
|
3218
|
+
|
|
3219
|
+
:returns: True, if successful, False otherwise.
|
|
3220
|
+
"""
|
|
3221
|
+
|
|
3222
|
+
now = datetime.utcnow()
|
|
3223
|
+
for collection_replica in collection_replicas:
|
|
3224
|
+
try:
|
|
3225
|
+
stmt = update(
|
|
3226
|
+
models.CollectionReplica
|
|
3227
|
+
).where(
|
|
3228
|
+
and_(models.CollectionReplica.scope == collection_replica['scope'],
|
|
3229
|
+
models.CollectionReplica.name == collection_replica['name'],
|
|
3230
|
+
models.CollectionReplica.rse_id == collection_replica['rse_id'])
|
|
3231
|
+
).values({
|
|
3232
|
+
models.CollectionReplica.accessed_at: collection_replica.get('accessed_at') or now
|
|
3233
|
+
}).execution_options(
|
|
3234
|
+
synchronize_session=False
|
|
3235
|
+
)
|
|
3236
|
+
session.execute(stmt)
|
|
3237
|
+
except DatabaseError:
|
|
3238
|
+
return False
|
|
3239
|
+
|
|
3240
|
+
return True
|
|
3241
|
+
|
|
3242
|
+
|
|
3243
|
+
@stream_session
|
|
3244
|
+
def list_dataset_replicas(
|
|
3245
|
+
scope: "InternalScope",
|
|
3246
|
+
name: str,
|
|
3247
|
+
deep: bool = False,
|
|
3248
|
+
*,
|
|
3249
|
+
session: "Session"
|
|
3250
|
+
) -> "Iterator[dict[str, Any]]":
|
|
3251
|
+
"""
|
|
3252
|
+
:param scope: The scope of the dataset.
|
|
3253
|
+
:param name: The name of the dataset.
|
|
3254
|
+
:param deep: Lookup at the file level.
|
|
3255
|
+
:param session: Database session to use.
|
|
3256
|
+
|
|
3257
|
+
:returns: A list of dictionaries containing the dataset replicas
|
|
3258
|
+
with associated metrics and timestamps
|
|
3259
|
+
"""
|
|
3260
|
+
|
|
3261
|
+
if not deep:
|
|
3262
|
+
stmt = select(
|
|
3263
|
+
models.CollectionReplica.scope,
|
|
3264
|
+
models.CollectionReplica.name,
|
|
3265
|
+
models.RSE.rse,
|
|
3266
|
+
models.CollectionReplica.rse_id,
|
|
3267
|
+
models.CollectionReplica.bytes,
|
|
3268
|
+
models.CollectionReplica.length,
|
|
3269
|
+
models.CollectionReplica.available_bytes,
|
|
3270
|
+
models.CollectionReplica.available_replicas_cnt.label("available_length"),
|
|
3271
|
+
models.CollectionReplica.state,
|
|
3272
|
+
models.CollectionReplica.created_at,
|
|
3273
|
+
models.CollectionReplica.updated_at,
|
|
3274
|
+
models.CollectionReplica.accessed_at
|
|
3275
|
+
).where(
|
|
3276
|
+
and_(models.CollectionReplica.scope == scope,
|
|
3277
|
+
models.CollectionReplica.name == name,
|
|
3278
|
+
models.CollectionReplica.did_type == DIDType.DATASET,
|
|
3279
|
+
models.CollectionReplica.rse_id == models.RSE.id,
|
|
3280
|
+
models.RSE.deleted == false())
|
|
3281
|
+
)
|
|
3282
|
+
|
|
3283
|
+
for row in session.execute(stmt).all():
|
|
3284
|
+
yield row._asdict()
|
|
3285
|
+
|
|
3286
|
+
else:
|
|
3287
|
+
# Find maximum values
|
|
3288
|
+
stmt = select(
|
|
3289
|
+
func.sum(models.DataIdentifierAssociation.bytes).label("bytes"),
|
|
3290
|
+
func.count().label("length")
|
|
3291
|
+
).select_from(
|
|
3292
|
+
models.DataIdentifierAssociation
|
|
3293
|
+
).with_hint(
|
|
3294
|
+
models.DataIdentifierAssociation,
|
|
3295
|
+
'INDEX_RS_ASC(CONTENTS CONTENTS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
|
|
3296
|
+
'oracle'
|
|
3297
|
+
).where(
|
|
3298
|
+
and_(models.DataIdentifierAssociation.scope == scope,
|
|
3299
|
+
models.DataIdentifierAssociation.name == name)
|
|
3300
|
+
)
|
|
3301
|
+
|
|
3302
|
+
bytes_, length = session.execute(stmt).one()
|
|
3303
|
+
bytes_ = bytes_ or 0
|
|
3304
|
+
|
|
3305
|
+
# Find archives that contain files of the requested dataset
|
|
3306
|
+
sub_query_stmt = select(
|
|
3307
|
+
models.DataIdentifierAssociation.scope.label('dataset_scope'),
|
|
3308
|
+
models.DataIdentifierAssociation.name.label('dataset_name'),
|
|
3309
|
+
models.DataIdentifierAssociation.bytes.label('file_bytes'),
|
|
3310
|
+
models.ConstituentAssociation.child_scope.label('file_scope'),
|
|
3311
|
+
models.ConstituentAssociation.child_name.label('file_name'),
|
|
3312
|
+
models.RSEFileAssociation.scope.label('replica_scope'),
|
|
3313
|
+
models.RSEFileAssociation.name.label('replica_name'),
|
|
3314
|
+
models.RSE.rse,
|
|
3315
|
+
models.RSE.id.label('rse_id'),
|
|
3316
|
+
models.RSEFileAssociation.created_at,
|
|
3317
|
+
models.RSEFileAssociation.accessed_at,
|
|
3318
|
+
models.RSEFileAssociation.updated_at
|
|
3319
|
+
).where(
|
|
3320
|
+
and_(models.DataIdentifierAssociation.scope == scope,
|
|
3321
|
+
models.DataIdentifierAssociation.name == name,
|
|
3322
|
+
models.ConstituentAssociation.child_scope == models.DataIdentifierAssociation.child_scope,
|
|
3323
|
+
models.ConstituentAssociation.child_name == models.DataIdentifierAssociation.child_name,
|
|
3324
|
+
models.ConstituentAssociation.scope == models.RSEFileAssociation.scope,
|
|
3325
|
+
models.ConstituentAssociation.name == models.RSEFileAssociation.name,
|
|
3326
|
+
models.RSEFileAssociation.rse_id == models.RSE.id,
|
|
3327
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
|
|
3328
|
+
models.RSE.deleted == false())
|
|
3329
|
+
).subquery()
|
|
3330
|
+
|
|
3331
|
+
# Count the metrics
|
|
3332
|
+
group_query_stmt = select(
|
|
3333
|
+
sub_query_stmt.c.dataset_scope,
|
|
3334
|
+
sub_query_stmt.c.dataset_name,
|
|
3335
|
+
sub_query_stmt.c.file_scope,
|
|
3336
|
+
sub_query_stmt.c.file_name,
|
|
3337
|
+
sub_query_stmt.c.rse_id,
|
|
3338
|
+
sub_query_stmt.c.rse,
|
|
3339
|
+
func.sum(sub_query_stmt.c.file_bytes).label('file_bytes'),
|
|
3340
|
+
func.min(sub_query_stmt.c.created_at).label('created_at'),
|
|
3341
|
+
func.max(sub_query_stmt.c.updated_at).label('updated_at'),
|
|
3342
|
+
func.max(sub_query_stmt.c.accessed_at).label('accessed_at')
|
|
3343
|
+
).group_by(
|
|
3344
|
+
sub_query_stmt.c.dataset_scope,
|
|
3345
|
+
sub_query_stmt.c.dataset_name,
|
|
3346
|
+
sub_query_stmt.c.file_scope,
|
|
3347
|
+
sub_query_stmt.c.file_name,
|
|
3348
|
+
sub_query_stmt.c.rse_id,
|
|
3349
|
+
sub_query_stmt.c.rse
|
|
3350
|
+
).subquery()
|
|
3351
|
+
|
|
3352
|
+
# Bring it in the same column state as the non-archive query
|
|
3353
|
+
full_query_stmt = select(
|
|
3354
|
+
group_query_stmt.c.dataset_scope.label('scope'),
|
|
3355
|
+
group_query_stmt.c.dataset_name.label('name'),
|
|
3356
|
+
group_query_stmt.c.rse_id,
|
|
3357
|
+
group_query_stmt.c.rse,
|
|
3358
|
+
func.sum(group_query_stmt.c.file_bytes).label('available_bytes'),
|
|
3359
|
+
func.count().label('available_length'),
|
|
3360
|
+
func.min(group_query_stmt.c.created_at).label('created_at'),
|
|
3361
|
+
func.max(group_query_stmt.c.updated_at).label('updated_at'),
|
|
3362
|
+
func.max(group_query_stmt.c.accessed_at).label('accessed_at')
|
|
3363
|
+
).group_by(
|
|
3364
|
+
group_query_stmt.c.dataset_scope,
|
|
3365
|
+
group_query_stmt.c.dataset_name,
|
|
3366
|
+
group_query_stmt.c.rse_id,
|
|
3367
|
+
group_query_stmt.c.rse
|
|
3368
|
+
)
|
|
3369
|
+
|
|
3370
|
+
# Find the non-archive dataset replicas
|
|
3371
|
+
sub_query_stmt = select(
|
|
3372
|
+
models.DataIdentifierAssociation.scope,
|
|
3373
|
+
models.DataIdentifierAssociation.name,
|
|
3374
|
+
models.RSEFileAssociation.rse_id,
|
|
3375
|
+
func.sum(models.RSEFileAssociation.bytes).label("available_bytes"),
|
|
3376
|
+
func.count().label("available_length"),
|
|
3377
|
+
func.min(models.RSEFileAssociation.created_at).label("created_at"),
|
|
3378
|
+
func.max(models.RSEFileAssociation.updated_at).label("updated_at"),
|
|
3379
|
+
func.max(models.RSEFileAssociation.accessed_at).label("accessed_at")
|
|
3380
|
+
).with_hint(
|
|
3381
|
+
models.DataIdentifierAssociation,
|
|
3382
|
+
'INDEX_RS_ASC(CONTENTS CONTENTS_PK) INDEX_RS_ASC(REPLICAS REPLICAS_PK) NO_INDEX_FFS(CONTENTS CONTENTS_PK)',
|
|
3383
|
+
'oracle'
|
|
3384
|
+
).where(
|
|
3385
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
3386
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
3387
|
+
models.DataIdentifierAssociation.scope == scope,
|
|
3388
|
+
models.DataIdentifierAssociation.name == name,
|
|
3389
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE)
|
|
3390
|
+
).group_by(
|
|
3391
|
+
models.DataIdentifierAssociation.scope,
|
|
3392
|
+
models.DataIdentifierAssociation.name,
|
|
3393
|
+
models.RSEFileAssociation.rse_id
|
|
3394
|
+
).subquery()
|
|
3395
|
+
|
|
3396
|
+
stmt = select(
|
|
3397
|
+
sub_query_stmt.c.scope,
|
|
3398
|
+
sub_query_stmt.c.name,
|
|
3399
|
+
sub_query_stmt.c.rse_id,
|
|
3400
|
+
models.RSE.rse,
|
|
3401
|
+
sub_query_stmt.c.available_bytes,
|
|
3402
|
+
sub_query_stmt.c.available_length,
|
|
3403
|
+
sub_query_stmt.c.created_at,
|
|
3404
|
+
sub_query_stmt.c.updated_at,
|
|
3405
|
+
sub_query_stmt.c.accessed_at
|
|
3406
|
+
).where(
|
|
3407
|
+
and_(sub_query_stmt.c.rse_id == models.RSE.id,
|
|
3408
|
+
models.RSE.deleted == false())
|
|
3409
|
+
)
|
|
3410
|
+
|
|
3411
|
+
# Join everything together
|
|
3412
|
+
final_stmt = stmt.union_all(full_query_stmt)
|
|
3413
|
+
for row in session.execute(final_stmt).all():
|
|
3414
|
+
replica = row._asdict()
|
|
3415
|
+
replica['length'], replica['bytes'] = length, bytes_
|
|
3416
|
+
if replica['length'] == row.available_length:
|
|
3417
|
+
replica['state'] = ReplicaState.AVAILABLE
|
|
3418
|
+
else:
|
|
3419
|
+
replica['state'] = ReplicaState.UNAVAILABLE
|
|
3420
|
+
yield replica
|
|
3421
|
+
|
|
3422
|
+
|
|
3423
|
+
@stream_session
|
|
3424
|
+
def list_dataset_replicas_bulk(
|
|
3425
|
+
names_by_intscope: dict[str, Any],
|
|
3426
|
+
*,
|
|
3427
|
+
session: "Session"
|
|
3428
|
+
) -> "Iterator[dict[str, Any]]":
|
|
3429
|
+
"""
|
|
3430
|
+
:param names_by_intscope: The dictionary of internal scopes pointing at the list of names.
|
|
3431
|
+
:param session: Database session to use.
|
|
3432
|
+
|
|
3433
|
+
:returns: A list of dictionaries containing the dataset replicas
|
|
3434
|
+
with associated metrics and timestamps
|
|
3435
|
+
"""
|
|
3436
|
+
|
|
3437
|
+
condition = []
|
|
3438
|
+
for scope in names_by_intscope:
|
|
3439
|
+
condition.append(and_(models.CollectionReplica.scope == scope,
|
|
3440
|
+
models.CollectionReplica.name.in_(names_by_intscope[scope])))
|
|
3441
|
+
|
|
3442
|
+
try:
|
|
3443
|
+
# chunk size refers to the number of different scopes, see above
|
|
3444
|
+
for chunk in chunks(condition, 10):
|
|
3445
|
+
stmt = select(
|
|
3446
|
+
models.CollectionReplica.scope,
|
|
3447
|
+
models.CollectionReplica.name,
|
|
3448
|
+
models.RSE.rse,
|
|
3449
|
+
models.CollectionReplica.rse_id,
|
|
3450
|
+
models.CollectionReplica.bytes,
|
|
3451
|
+
models.CollectionReplica.length,
|
|
3452
|
+
models.CollectionReplica.available_bytes,
|
|
3453
|
+
models.CollectionReplica.available_replicas_cnt.label("available_length"),
|
|
3454
|
+
models.CollectionReplica.state,
|
|
3455
|
+
models.CollectionReplica.created_at,
|
|
3456
|
+
models.CollectionReplica.updated_at,
|
|
3457
|
+
models.CollectionReplica.accessed_at
|
|
3458
|
+
).where(
|
|
3459
|
+
and_(models.CollectionReplica.did_type == DIDType.DATASET,
|
|
3460
|
+
models.CollectionReplica.rse_id == models.RSE.id,
|
|
3461
|
+
models.RSE.deleted == false(),
|
|
3462
|
+
or_(*chunk))
|
|
3463
|
+
)
|
|
3464
|
+
|
|
3465
|
+
for row in session.execute(stmt).all():
|
|
3466
|
+
yield row._asdict()
|
|
3467
|
+
except NoResultFound:
|
|
3468
|
+
raise exception.DataIdentifierNotFound('No Data Identifiers found')
|
|
3469
|
+
|
|
3470
|
+
|
|
3471
|
+
@stream_session
|
|
3472
|
+
def list_dataset_replicas_vp(
|
|
3473
|
+
scope: InternalScope,
|
|
3474
|
+
name: str,
|
|
3475
|
+
deep: bool = False,
|
|
3476
|
+
*,
|
|
3477
|
+
session: "Session",
|
|
3478
|
+
logger: "LoggerFunction" = logging.log
|
|
3479
|
+
) -> Union[list[str], "Iterator[dict[str, Any]]"]:
|
|
3480
|
+
"""
|
|
3481
|
+
List dataset replicas for a DID (scope:name) using the
|
|
3482
|
+
Virtual Placement service.
|
|
3483
|
+
|
|
3484
|
+
NOTICE: This is an RnD function and might change or go away at any time.
|
|
3485
|
+
|
|
3486
|
+
:param scope: The scope of the dataset.
|
|
3487
|
+
:param name: The name of the dataset.
|
|
3488
|
+
:param deep: Lookup at the file level.
|
|
3489
|
+
:param session: Database session to use.
|
|
3490
|
+
|
|
3491
|
+
:returns: If VP exists and there is at least one non-TAPE replica, returns a list of dicts of sites
|
|
3492
|
+
"""
|
|
3493
|
+
vp_endpoint = get_vp_endpoint()
|
|
3494
|
+
vp_replies = ['other']
|
|
3495
|
+
nr_replies = 5 # force limit reply size
|
|
3496
|
+
|
|
3497
|
+
if not vp_endpoint:
|
|
3498
|
+
return vp_replies
|
|
3499
|
+
|
|
3500
|
+
try:
|
|
3501
|
+
vp_replies = requests.get('{}/ds/{}/{}:{}'.format(vp_endpoint, nr_replies, scope, name),
|
|
3502
|
+
verify=False,
|
|
3503
|
+
timeout=1)
|
|
3504
|
+
if vp_replies.status_code == 200:
|
|
3505
|
+
vp_replies = vp_replies.json()
|
|
3506
|
+
else:
|
|
3507
|
+
vp_replies = ['other']
|
|
3508
|
+
except requests.exceptions.RequestException as re:
|
|
3509
|
+
logger(logging.ERROR, 'In list_dataset_replicas_vp, could not access {}. Error:{}'.format(vp_endpoint, re))
|
|
3510
|
+
vp_replies = ['other']
|
|
3511
|
+
|
|
3512
|
+
if vp_replies != ['other']:
|
|
3513
|
+
# check that there is at least one regular replica
|
|
3514
|
+
# that is not on tape and has a protocol with scheme "root"
|
|
3515
|
+
# and can be accessed from WAN
|
|
3516
|
+
accessible_replica_exists = False
|
|
3517
|
+
for reply in list_dataset_replicas(scope=scope, name=name, deep=deep, session=session):
|
|
3518
|
+
if reply['state'] != ReplicaState.AVAILABLE:
|
|
3519
|
+
continue
|
|
3520
|
+
rse_info = rsemgr.get_rse_info(rse=reply['rse'], vo=scope.vo, session=session)
|
|
3521
|
+
if rse_info['rse_type'] == 'TAPE':
|
|
3522
|
+
continue
|
|
3523
|
+
for prot in rse_info['protocols']:
|
|
3524
|
+
if prot['scheme'] == 'root' and prot['domains']['wan']['read']:
|
|
3525
|
+
accessible_replica_exists = True
|
|
3526
|
+
break
|
|
3527
|
+
if accessible_replica_exists is True:
|
|
3528
|
+
break
|
|
3529
|
+
if accessible_replica_exists is True:
|
|
3530
|
+
for vp_reply in vp_replies:
|
|
3531
|
+
yield {'vp': True, 'site': vp_reply}
|
|
3532
|
+
|
|
3533
|
+
|
|
3534
|
+
@stream_session
|
|
3535
|
+
def list_datasets_per_rse(
|
|
3536
|
+
rse_id: str,
|
|
3537
|
+
filters: Optional[dict[str, Any]] = None,
|
|
3538
|
+
limit: Optional[int] = None,
|
|
3539
|
+
*,
|
|
3540
|
+
session: "Session"
|
|
3541
|
+
) -> "Iterator[dict[str, Any]]":
|
|
3542
|
+
"""
|
|
3543
|
+
List datasets at a RSE.
|
|
3544
|
+
|
|
3545
|
+
:param rse: the rse id.
|
|
3546
|
+
:param filters: dictionary of attributes by which the results should be filtered.
|
|
3547
|
+
:param limit: limit number.
|
|
3548
|
+
:param session: Database session to use.
|
|
3549
|
+
|
|
3550
|
+
:returns: A list of dict dataset replicas
|
|
3551
|
+
"""
|
|
3552
|
+
stmt = select(
|
|
3553
|
+
models.CollectionReplica.scope,
|
|
3554
|
+
models.CollectionReplica.name,
|
|
3555
|
+
models.RSE.id.label('rse_id'),
|
|
3556
|
+
models.RSE.rse,
|
|
3557
|
+
models.CollectionReplica.bytes,
|
|
3558
|
+
models.CollectionReplica.length,
|
|
3559
|
+
models.CollectionReplica.available_bytes,
|
|
3560
|
+
models.CollectionReplica.available_replicas_cnt.label("available_length"),
|
|
3561
|
+
models.CollectionReplica.state,
|
|
3562
|
+
models.CollectionReplica.created_at,
|
|
3563
|
+
models.CollectionReplica.updated_at,
|
|
3564
|
+
models.CollectionReplica.accessed_at
|
|
3565
|
+
).where(
|
|
3566
|
+
and_(models.CollectionReplica.did_type == DIDType.DATASET,
|
|
3567
|
+
models.CollectionReplica.rse_id == models.RSE.id,
|
|
3568
|
+
models.RSE.deleted == false(),
|
|
3569
|
+
models.RSE.id == rse_id)
|
|
3570
|
+
)
|
|
3571
|
+
|
|
3572
|
+
for (k, v) in filters and filters.items() or []:
|
|
3573
|
+
if k == 'name' or k == 'scope':
|
|
3574
|
+
v_str = v if k != 'scope' else v.internal # type: ignore
|
|
3575
|
+
if '*' in v_str or '%' in v_str:
|
|
3576
|
+
if session.bind.dialect.name == 'postgresql': # type: ignore | PostgreSQL escapes automatically
|
|
3577
|
+
stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%')))
|
|
3578
|
+
else:
|
|
3579
|
+
stmt = stmt.where(getattr(models.CollectionReplica, k).like(v_str.replace('*', '%'), escape='\\'))
|
|
3580
|
+
else:
|
|
3581
|
+
stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
|
|
3582
|
+
# hints ?
|
|
3583
|
+
elif k == 'created_before':
|
|
3584
|
+
created_before = str_to_date(v)
|
|
3585
|
+
stmt = stmt.where(models.CollectionReplica.created_at <= created_before)
|
|
3586
|
+
elif k == 'created_after':
|
|
3587
|
+
created_after = str_to_date(v)
|
|
3588
|
+
stmt = stmt.where(models.CollectionReplica.created_at >= created_after)
|
|
3589
|
+
else:
|
|
3590
|
+
stmt = stmt.where(getattr(models.CollectionReplica, k) == v)
|
|
3591
|
+
|
|
3592
|
+
if limit:
|
|
3593
|
+
stmt = stmt.limit(limit)
|
|
3594
|
+
|
|
3595
|
+
for row in session.execute(stmt).all():
|
|
3596
|
+
yield row._asdict()
|
|
3597
|
+
|
|
3598
|
+
|
|
3599
|
+
@stream_session
|
|
3600
|
+
def list_replicas_per_rse(
|
|
3601
|
+
rse_id: str,
|
|
3602
|
+
limit: Optional[int] = None,
|
|
3603
|
+
*,
|
|
3604
|
+
session: "Session"
|
|
3605
|
+
) -> "Iterator[dict[str, Any]]":
|
|
3606
|
+
"""List all replicas at a given RSE."""
|
|
3607
|
+
list_stmt = select(
|
|
3608
|
+
models.RSEFileAssociation
|
|
3609
|
+
).where(
|
|
3610
|
+
models.RSEFileAssociation.rse_id == rse_id
|
|
3611
|
+
)
|
|
3612
|
+
|
|
3613
|
+
if limit:
|
|
3614
|
+
list_stmt = list_stmt.limit(limit)
|
|
3615
|
+
|
|
3616
|
+
for replica in session.execute(list_stmt).yield_per(100).scalars():
|
|
3617
|
+
yield replica.to_dict()
|
|
3618
|
+
|
|
3619
|
+
|
|
3620
|
+
@transactional_session
|
|
3621
|
+
def get_cleaned_updated_collection_replicas(
|
|
3622
|
+
total_workers: int,
|
|
3623
|
+
worker_number: int,
|
|
3624
|
+
limit: Optional[int] = None,
|
|
3625
|
+
*,
|
|
3626
|
+
session: "Session"
|
|
3627
|
+
) -> list[dict[str, Any]]:
|
|
3628
|
+
"""
|
|
3629
|
+
Get update request for collection replicas.
|
|
3630
|
+
:param total_workers: Number of total workers.
|
|
3631
|
+
:param worker_number: id of the executing worker.
|
|
3632
|
+
:param limit: Maximum numberws to return.
|
|
3633
|
+
:param session: Database session in use.
|
|
3634
|
+
:returns: List of update requests for collection replicas.
|
|
3635
|
+
"""
|
|
3636
|
+
|
|
3637
|
+
stmt = delete(
|
|
3638
|
+
models.UpdatedCollectionReplica
|
|
3639
|
+
).where(
|
|
3640
|
+
and_(models.UpdatedCollectionReplica.rse_id.is_(None),
|
|
3641
|
+
~exists().where(
|
|
3642
|
+
and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
|
|
3643
|
+
models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope)))
|
|
3644
|
+
).execution_options(
|
|
3645
|
+
synchronize_session=False
|
|
3646
|
+
)
|
|
3647
|
+
session.execute(stmt)
|
|
3648
|
+
|
|
3649
|
+
# Delete update requests which do not have collection_replicas
|
|
3650
|
+
stmt = delete(
|
|
3651
|
+
models.UpdatedCollectionReplica
|
|
3652
|
+
).where(
|
|
3653
|
+
and_(models.UpdatedCollectionReplica.rse_id.isnot(None),
|
|
3654
|
+
~exists().where(
|
|
3655
|
+
and_(models.CollectionReplica.name == models.UpdatedCollectionReplica.name,
|
|
3656
|
+
models.CollectionReplica.scope == models.UpdatedCollectionReplica.scope,
|
|
3657
|
+
models.CollectionReplica.rse_id == models.UpdatedCollectionReplica.rse_id)))
|
|
3658
|
+
).execution_options(
|
|
3659
|
+
synchronize_session=False
|
|
3660
|
+
)
|
|
3661
|
+
session.execute(stmt)
|
|
3662
|
+
|
|
3663
|
+
# Delete duplicates
|
|
3664
|
+
if session.bind.dialect.name == 'oracle': # type: ignore
|
|
3665
|
+
schema = ''
|
|
3666
|
+
if BASE.metadata.schema:
|
|
3667
|
+
schema = BASE.metadata.schema + '.'
|
|
3668
|
+
session.execute(text('DELETE FROM {schema}updated_col_rep A WHERE A.rowid > ANY (SELECT B.rowid FROM {schema}updated_col_rep B WHERE A.scope = B.scope AND A.name=B.name AND A.did_type=B.did_type AND (A.rse_id=B.rse_id OR (A.rse_id IS NULL and B.rse_id IS NULL)))'.format(schema=schema))) # NOQA: E501
|
|
3669
|
+
elif session.bind.dialect.name == 'mysql': # type: ignore
|
|
3670
|
+
subquery1 = select(
|
|
3671
|
+
func.max(models.UpdatedCollectionReplica.id).label('max_id')
|
|
3672
|
+
).group_by(
|
|
3673
|
+
models.UpdatedCollectionReplica.scope,
|
|
3674
|
+
models.UpdatedCollectionReplica.name,
|
|
3675
|
+
models.UpdatedCollectionReplica.rse_id
|
|
3676
|
+
).subquery()
|
|
3677
|
+
|
|
3678
|
+
subquery2 = select(
|
|
3679
|
+
subquery1.c.max_id
|
|
3680
|
+
)
|
|
3681
|
+
|
|
3682
|
+
stmt_del = delete(
|
|
3683
|
+
models.UpdatedCollectionReplica
|
|
3684
|
+
).where(
|
|
3685
|
+
models.UpdatedCollectionReplica.id.not_in(subquery2)
|
|
3686
|
+
).execution_options(
|
|
3687
|
+
synchronize_session=False
|
|
3688
|
+
)
|
|
3689
|
+
session.execute(stmt_del)
|
|
3690
|
+
else:
|
|
3691
|
+
stmt = select(models.UpdatedCollectionReplica)
|
|
3692
|
+
update_requests_with_rse_id = []
|
|
3693
|
+
update_requests_without_rse_id = []
|
|
3694
|
+
duplicate_request_ids = []
|
|
3695
|
+
for update_request in session.execute(stmt).scalars().all():
|
|
3696
|
+
if update_request.rse_id is not None:
|
|
3697
|
+
small_request = {'name': update_request.name, 'scope': update_request.scope, 'rse_id': update_request.rse_id}
|
|
3698
|
+
if small_request not in update_requests_with_rse_id:
|
|
3699
|
+
update_requests_with_rse_id.append(small_request)
|
|
3700
|
+
else:
|
|
3701
|
+
duplicate_request_ids.append(update_request.id)
|
|
3702
|
+
continue
|
|
3703
|
+
else:
|
|
3704
|
+
small_request = {'name': update_request.name, 'scope': update_request.scope}
|
|
3705
|
+
if small_request not in update_requests_without_rse_id:
|
|
3706
|
+
update_requests_without_rse_id.append(small_request)
|
|
3707
|
+
else:
|
|
3708
|
+
duplicate_request_ids.append(update_request.id)
|
|
3709
|
+
continue
|
|
3710
|
+
for chunk in chunks(duplicate_request_ids, 100):
|
|
3711
|
+
stmt = delete(
|
|
3712
|
+
models.UpdatedCollectionReplica
|
|
3713
|
+
).where(
|
|
3714
|
+
models.UpdatedCollectionReplica.id.in_(chunk)
|
|
3715
|
+
).execution_options(
|
|
3716
|
+
synchronize_session=False
|
|
3717
|
+
)
|
|
3718
|
+
session.execute(stmt)
|
|
3719
|
+
|
|
3720
|
+
stmt = select(models.UpdatedCollectionReplica)
|
|
3721
|
+
if limit:
|
|
3722
|
+
stmt = stmt.limit(limit)
|
|
3723
|
+
return [update_request.to_dict() for update_request in session.execute(stmt).scalars().all()]
|
|
3724
|
+
|
|
3725
|
+
|
|
3726
|
+
@transactional_session
|
|
3727
|
+
def update_collection_replica(
|
|
3728
|
+
update_request: dict[str, Any],
|
|
3729
|
+
*,
|
|
3730
|
+
session: "Session"
|
|
3731
|
+
) -> None:
|
|
3732
|
+
"""
|
|
3733
|
+
Update a collection replica.
|
|
3734
|
+
:param update_request: update request from the upated_col_rep table.
|
|
3735
|
+
"""
|
|
3736
|
+
if update_request['rse_id'] is not None:
|
|
3737
|
+
# Check one specific dataset replica
|
|
3738
|
+
ds_length = 0
|
|
3739
|
+
old_available_replicas = 0
|
|
3740
|
+
ds_bytes = 0
|
|
3741
|
+
ds_replica_state = None
|
|
3742
|
+
ds_available_bytes = 0
|
|
3743
|
+
available_replicas = 0
|
|
3744
|
+
|
|
3745
|
+
try:
|
|
3746
|
+
stmt = select(
|
|
3747
|
+
models.CollectionReplica
|
|
3748
|
+
).where(
|
|
3749
|
+
and_(models.CollectionReplica.scope == update_request['scope'],
|
|
3750
|
+
models.CollectionReplica.name == update_request['name'],
|
|
3751
|
+
models.CollectionReplica.rse_id == update_request['rse_id'])
|
|
3752
|
+
)
|
|
3753
|
+
collection_replica = session.execute(stmt).scalar_one()
|
|
3754
|
+
ds_length = collection_replica.length
|
|
3755
|
+
old_available_replicas = collection_replica.available_replicas_cnt
|
|
3756
|
+
ds_bytes = collection_replica.bytes
|
|
3757
|
+
except NoResultFound:
|
|
3758
|
+
pass
|
|
3759
|
+
|
|
3760
|
+
try:
|
|
3761
|
+
stmt = select(
|
|
3762
|
+
func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
|
|
3763
|
+
func.count().label('available_replicas')
|
|
3764
|
+
).select_from(
|
|
3765
|
+
models.RSEFileAssociation
|
|
3766
|
+
).where(
|
|
3767
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
|
|
3768
|
+
models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
|
|
3769
|
+
models.RSEFileAssociation.rse_id == update_request['rse_id'],
|
|
3770
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
|
|
3771
|
+
models.DataIdentifierAssociation.name == update_request['name'],
|
|
3772
|
+
models.DataIdentifierAssociation.scope == update_request['scope'])
|
|
3773
|
+
)
|
|
3774
|
+
file_replica = session.execute(stmt).one()
|
|
3775
|
+
|
|
3776
|
+
available_replicas = file_replica.available_replicas
|
|
3777
|
+
ds_available_bytes = file_replica.ds_available_bytes
|
|
3778
|
+
except NoResultFound:
|
|
3779
|
+
pass
|
|
3780
|
+
|
|
3781
|
+
if available_replicas >= ds_length:
|
|
3782
|
+
ds_replica_state = ReplicaState.AVAILABLE
|
|
3783
|
+
else:
|
|
3784
|
+
ds_replica_state = ReplicaState.UNAVAILABLE
|
|
3785
|
+
|
|
3786
|
+
if old_available_replicas is not None and old_available_replicas > 0 and available_replicas == 0:
|
|
3787
|
+
stmt = delete(
|
|
3788
|
+
models.CollectionReplica
|
|
3789
|
+
).where(
|
|
3790
|
+
and_(models.CollectionReplica.scope == update_request['scope'],
|
|
3791
|
+
models.CollectionReplica.name == update_request['name'],
|
|
3792
|
+
models.CollectionReplica.rse_id == update_request['rse_id'])
|
|
3793
|
+
)
|
|
3794
|
+
session.execute(stmt)
|
|
3795
|
+
else:
|
|
3796
|
+
stmt = select(
|
|
3797
|
+
models.CollectionReplica
|
|
3798
|
+
).where(
|
|
3799
|
+
and_(models.CollectionReplica.scope == update_request['scope'],
|
|
3800
|
+
models.CollectionReplica.name == update_request['name'],
|
|
3801
|
+
models.CollectionReplica.rse_id == update_request['rse_id'])
|
|
3802
|
+
)
|
|
3803
|
+
updated_replica = session.execute(stmt).scalar_one()
|
|
3804
|
+
|
|
3805
|
+
updated_replica.state = ds_replica_state
|
|
3806
|
+
updated_replica.available_replicas_cnt = available_replicas
|
|
3807
|
+
updated_replica.length = ds_length
|
|
3808
|
+
updated_replica.bytes = ds_bytes
|
|
3809
|
+
updated_replica.available_bytes = ds_available_bytes
|
|
3810
|
+
else:
|
|
3811
|
+
stmt = select(
|
|
3812
|
+
func.count().label('ds_length'),
|
|
3813
|
+
func.sum(models.DataIdentifierAssociation.bytes).label('ds_bytes')
|
|
3814
|
+
).select_from(
|
|
3815
|
+
models.DataIdentifierAssociation
|
|
3816
|
+
).where(
|
|
3817
|
+
and_(models.DataIdentifierAssociation.scope == update_request['scope'],
|
|
3818
|
+
models.DataIdentifierAssociation.name == update_request['name'])
|
|
3819
|
+
)
|
|
3820
|
+
association = session.execute(stmt).one()
|
|
3821
|
+
|
|
3822
|
+
# Check all dataset replicas
|
|
3823
|
+
ds_length = association.ds_length
|
|
3824
|
+
ds_bytes = association.ds_bytes
|
|
3825
|
+
ds_replica_state = None
|
|
3826
|
+
|
|
3827
|
+
stmt = select(
|
|
3828
|
+
models.CollectionReplica
|
|
3829
|
+
).where(
|
|
3830
|
+
and_(models.CollectionReplica.scope == update_request['scope'],
|
|
3831
|
+
models.CollectionReplica.name == update_request['name'])
|
|
3832
|
+
)
|
|
3833
|
+
for collection_replica in session.execute(stmt).scalars().all():
|
|
3834
|
+
if ds_length:
|
|
3835
|
+
collection_replica.length = ds_length
|
|
3836
|
+
else:
|
|
3837
|
+
collection_replica.length = 0
|
|
3838
|
+
if ds_bytes:
|
|
3839
|
+
collection_replica.bytes = ds_bytes
|
|
3840
|
+
else:
|
|
3841
|
+
collection_replica.bytes = 0
|
|
3842
|
+
|
|
3843
|
+
stmt = select(
|
|
3844
|
+
func.sum(models.RSEFileAssociation.bytes).label('ds_available_bytes'),
|
|
3845
|
+
func.count().label('available_replicas'),
|
|
3846
|
+
models.RSEFileAssociation.rse_id
|
|
3847
|
+
).select_from(
|
|
3848
|
+
models.RSEFileAssociation
|
|
3849
|
+
).where(
|
|
3850
|
+
and_(models.RSEFileAssociation.scope == models.DataIdentifierAssociation.child_scope,
|
|
3851
|
+
models.RSEFileAssociation.name == models.DataIdentifierAssociation.child_name,
|
|
3852
|
+
models.RSEFileAssociation.state == ReplicaState.AVAILABLE,
|
|
3853
|
+
models.DataIdentifierAssociation.name == update_request['name'],
|
|
3854
|
+
models.DataIdentifierAssociation.scope == update_request['scope'])
|
|
3855
|
+
).group_by(
|
|
3856
|
+
models.RSEFileAssociation.rse_id
|
|
3857
|
+
)
|
|
3858
|
+
|
|
3859
|
+
for file_replica in session.execute(stmt).all():
|
|
3860
|
+
if file_replica.available_replicas >= ds_length:
|
|
3861
|
+
ds_replica_state = ReplicaState.AVAILABLE
|
|
3862
|
+
else:
|
|
3863
|
+
ds_replica_state = ReplicaState.UNAVAILABLE
|
|
3864
|
+
|
|
3865
|
+
stmt = select(
|
|
3866
|
+
models.CollectionReplica
|
|
3867
|
+
).where(
|
|
3868
|
+
and_(models.CollectionReplica.scope == update_request['scope'],
|
|
3869
|
+
models.CollectionReplica.name == update_request['name'],
|
|
3870
|
+
models.CollectionReplica.rse_id == file_replica.rse_id)
|
|
3871
|
+
)
|
|
3872
|
+
collection_replica = session.execute(stmt).scalars().first()
|
|
3873
|
+
if collection_replica:
|
|
3874
|
+
collection_replica.state = ds_replica_state
|
|
3875
|
+
collection_replica.available_replicas_cnt = file_replica.available_replicas
|
|
3876
|
+
collection_replica.available_bytes = file_replica.ds_available_bytes
|
|
3877
|
+
|
|
3878
|
+
stmt = delete(
|
|
3879
|
+
models.UpdatedCollectionReplica
|
|
3880
|
+
).where(
|
|
3881
|
+
models.UpdatedCollectionReplica.id == update_request['id']
|
|
3882
|
+
)
|
|
3883
|
+
session.execute(stmt)
|
|
3884
|
+
|
|
3885
|
+
|
|
3886
|
+
@read_session
|
|
3887
|
+
def get_bad_pfns(
|
|
3888
|
+
limit: int = 10000,
|
|
3889
|
+
thread: Optional[int] = None,
|
|
3890
|
+
total_threads: Optional[int] = None,
|
|
3891
|
+
*,
|
|
3892
|
+
session: "Session"
|
|
3893
|
+
) -> list[dict[str, Any]]:
|
|
3894
|
+
"""
|
|
3895
|
+
Returns a list of bad PFNs
|
|
3896
|
+
|
|
3897
|
+
:param limit: The maximum number of replicas returned.
|
|
3898
|
+
:param thread: The assigned thread for this minos instance.
|
|
3899
|
+
:param total_threads: The total number of minos threads.
|
|
3900
|
+
:param session: The database session in use.
|
|
3901
|
+
|
|
3902
|
+
returns: list of PFNs {'pfn': pfn, 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at}
|
|
3903
|
+
"""
|
|
3904
|
+
result = []
|
|
3905
|
+
|
|
3906
|
+
stmt = select(
|
|
3907
|
+
models.BadPFN.path,
|
|
3908
|
+
models.BadPFN.state,
|
|
3909
|
+
models.BadPFN.reason,
|
|
3910
|
+
models.BadPFN.account,
|
|
3911
|
+
models.BadPFN.expires_at
|
|
3912
|
+
)
|
|
3913
|
+
stmt = filter_thread_work(session=session, query=stmt, total_threads=total_threads, thread_id=thread, hash_variable='path')
|
|
3914
|
+
stmt = stmt.order_by(
|
|
3915
|
+
models.BadPFN.created_at
|
|
3916
|
+
).limit(
|
|
3917
|
+
limit
|
|
3918
|
+
)
|
|
3919
|
+
|
|
3920
|
+
for path, state, reason, account, expires_at in session.execute(stmt).yield_per(1000):
|
|
3921
|
+
result.append({'pfn': clean_pfns([str(path)])[0], 'state': state, 'reason': reason, 'account': account, 'expires_at': expires_at})
|
|
3922
|
+
return result
|
|
3923
|
+
|
|
3924
|
+
|
|
3925
|
+
@transactional_session
|
|
3926
|
+
def bulk_add_bad_replicas(
|
|
3927
|
+
replicas: "Iterable[dict[str, Any]]",
|
|
3928
|
+
account: InternalAccount,
|
|
3929
|
+
state: BadFilesStatus = BadFilesStatus.TEMPORARY_UNAVAILABLE,
|
|
3930
|
+
reason: Optional[str] = None,
|
|
3931
|
+
expires_at: Optional[datetime] = None,
|
|
3932
|
+
*,
|
|
3933
|
+
session: "Session"
|
|
3934
|
+
) -> bool:
|
|
3935
|
+
"""
|
|
3936
|
+
Bulk add new bad replicas.
|
|
3937
|
+
|
|
3938
|
+
:param replicas: the list of bad replicas.
|
|
3939
|
+
:param account: The account who declared the bad replicas.
|
|
3940
|
+
:param state: The state of the file (SUSPICIOUS, BAD or TEMPORARY_UNAVAILABLE).
|
|
3941
|
+
:param session: The database session in use.
|
|
3942
|
+
|
|
3943
|
+
:returns: True is successful.
|
|
3944
|
+
"""
|
|
3945
|
+
for replica in replicas:
|
|
3946
|
+
scope_name_rse_state = and_(models.BadReplica.scope == replica['scope'],
|
|
3947
|
+
models.BadReplica.name == replica['name'],
|
|
3948
|
+
models.BadReplica.rse_id == replica['rse_id'],
|
|
3949
|
+
models.BadReplica.state == state)
|
|
3950
|
+
insert_new_row = True
|
|
3951
|
+
if state == BadFilesStatus.TEMPORARY_UNAVAILABLE:
|
|
3952
|
+
stmt = select(
|
|
3953
|
+
models.BadReplica
|
|
3954
|
+
).where(
|
|
3955
|
+
scope_name_rse_state
|
|
3956
|
+
)
|
|
3957
|
+
if session.execute(stmt).scalar_one_or_none():
|
|
3958
|
+
stmt = update(
|
|
3959
|
+
models.BadReplica
|
|
3960
|
+
).where(
|
|
3961
|
+
scope_name_rse_state
|
|
3962
|
+
).values({
|
|
3963
|
+
models.BadReplica.state: BadFilesStatus.TEMPORARY_UNAVAILABLE,
|
|
3964
|
+
models.BadReplica.updated_at: datetime.utcnow(),
|
|
3965
|
+
models.BadReplica.account: account,
|
|
3966
|
+
models.BadReplica.reason: reason,
|
|
3967
|
+
models.BadReplica.expires_at: expires_at
|
|
3968
|
+
}).execution_options(
|
|
3969
|
+
synchronize_session=False
|
|
3970
|
+
)
|
|
3971
|
+
session.execute(stmt)
|
|
3972
|
+
|
|
3973
|
+
insert_new_row = False
|
|
3974
|
+
if insert_new_row:
|
|
3975
|
+
new_bad_replica = models.BadReplica(scope=replica['scope'], name=replica['name'], rse_id=replica['rse_id'], reason=reason,
|
|
3976
|
+
state=state, account=account, bytes=None, expires_at=expires_at)
|
|
3977
|
+
new_bad_replica.save(session=session, flush=False)
|
|
3978
|
+
try:
|
|
3979
|
+
session.flush()
|
|
3980
|
+
except IntegrityError as error:
|
|
3981
|
+
raise exception.RucioException(error.args)
|
|
3982
|
+
except DatabaseError as error:
|
|
3983
|
+
raise exception.RucioException(error.args)
|
|
3984
|
+
except FlushError as error:
|
|
3985
|
+
if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
|
|
3986
|
+
raise exception.DataIdentifierAlreadyExists('Data Identifier already exists!')
|
|
3987
|
+
raise exception.RucioException(error.args)
|
|
3988
|
+
return True
|
|
3989
|
+
|
|
3990
|
+
|
|
3991
|
+
@transactional_session
|
|
3992
|
+
def bulk_delete_bad_pfns(
|
|
3993
|
+
pfns: "Iterable[str]",
|
|
3994
|
+
*,
|
|
3995
|
+
session: "Session"
|
|
3996
|
+
) -> Literal[True]:
|
|
3997
|
+
"""
|
|
3998
|
+
Bulk delete bad PFNs.
|
|
3999
|
+
|
|
4000
|
+
:param pfns: the list of new files.
|
|
4001
|
+
:param session: The database session in use.
|
|
4002
|
+
|
|
4003
|
+
:returns: True is successful.
|
|
4004
|
+
"""
|
|
4005
|
+
pfn_clause = []
|
|
4006
|
+
for pfn in pfns:
|
|
4007
|
+
pfn_clause.append(models.BadPFN.path == pfn)
|
|
4008
|
+
|
|
4009
|
+
for chunk in chunks(pfn_clause, 100):
|
|
4010
|
+
stmt = delete(
|
|
4011
|
+
models.BadPFN
|
|
4012
|
+
).where(
|
|
4013
|
+
or_(*chunk)
|
|
4014
|
+
).execution_options(
|
|
4015
|
+
synchronize_session=False
|
|
4016
|
+
)
|
|
4017
|
+
session.execute(stmt)
|
|
4018
|
+
|
|
4019
|
+
return True
|
|
4020
|
+
|
|
4021
|
+
|
|
4022
|
+
@transactional_session
|
|
4023
|
+
def bulk_delete_bad_replicas(
|
|
4024
|
+
bad_replicas: "Iterable[dict[str, Any]]",
|
|
4025
|
+
*,
|
|
4026
|
+
session: "Session"
|
|
4027
|
+
) -> Literal[True]:
|
|
4028
|
+
"""
|
|
4029
|
+
Bulk delete bad replica.
|
|
4030
|
+
|
|
4031
|
+
:param bad_replicas: The list of bad replicas to delete (Dictionaries).
|
|
4032
|
+
:param session: The database session in use.
|
|
4033
|
+
|
|
4034
|
+
:returns: True is successful.
|
|
4035
|
+
"""
|
|
4036
|
+
replica_clause = []
|
|
4037
|
+
for replica in bad_replicas:
|
|
4038
|
+
replica_clause.append(and_(models.BadReplica.scope == replica['scope'],
|
|
4039
|
+
models.BadReplica.name == replica['name'],
|
|
4040
|
+
models.BadReplica.rse_id == replica['rse_id'],
|
|
4041
|
+
models.BadReplica.state == replica['state']))
|
|
4042
|
+
|
|
4043
|
+
for chunk in chunks(replica_clause, 100):
|
|
4044
|
+
stmt = delete(
|
|
4045
|
+
models.BadReplica
|
|
4046
|
+
).where(
|
|
4047
|
+
or_(*chunk)
|
|
4048
|
+
).execution_options(
|
|
4049
|
+
synchronize_session=False
|
|
4050
|
+
)
|
|
4051
|
+
session.execute(stmt)
|
|
4052
|
+
return True
|
|
4053
|
+
|
|
4054
|
+
|
|
4055
|
+
@transactional_session
|
|
4056
|
+
def add_bad_pfns(
|
|
4057
|
+
pfns: "Iterable[str]",
|
|
4058
|
+
account: InternalAccount,
|
|
4059
|
+
state: BadFilesStatus,
|
|
4060
|
+
reason: Optional[str] = None,
|
|
4061
|
+
expires_at: Optional[datetime] = None,
|
|
4062
|
+
*,
|
|
4063
|
+
session: "Session"
|
|
4064
|
+
) -> Literal[True]:
|
|
4065
|
+
"""
|
|
4066
|
+
Add bad PFNs.
|
|
4067
|
+
|
|
4068
|
+
:param pfns: the list of new files.
|
|
4069
|
+
:param account: The account who declared the bad replicas.
|
|
4070
|
+
:param state: One of the possible states : BAD, SUSPICIOUS, TEMPORARY_UNAVAILABLE.
|
|
4071
|
+
:param reason: A string describing the reason of the loss.
|
|
4072
|
+
:param expires_at: Specify a timeout for the TEMPORARY_UNAVAILABLE replicas. None for BAD files.
|
|
4073
|
+
:param session: The database session in use.
|
|
4074
|
+
|
|
4075
|
+
:returns: True is successful.
|
|
4076
|
+
"""
|
|
4077
|
+
|
|
4078
|
+
if isinstance(state, str):
|
|
4079
|
+
rep_state = BadPFNStatus[state]
|
|
4080
|
+
else:
|
|
4081
|
+
rep_state = state
|
|
4082
|
+
|
|
4083
|
+
if rep_state == BadPFNStatus.TEMPORARY_UNAVAILABLE and expires_at is None:
|
|
4084
|
+
raise exception.InputValidationError("When adding a TEMPORARY UNAVAILABLE pfn the expires_at value should be set.")
|
|
4085
|
+
elif rep_state == BadPFNStatus.BAD and expires_at is not None:
|
|
4086
|
+
raise exception.InputValidationError("When adding a BAD pfn the expires_at value shouldn't be set.")
|
|
4087
|
+
|
|
4088
|
+
pfns = clean_pfns(pfns)
|
|
4089
|
+
for pfn in pfns:
|
|
4090
|
+
new_pfn = models.BadPFN(path=str(pfn), account=account, state=rep_state, reason=reason, expires_at=expires_at)
|
|
4091
|
+
new_pfn = session.merge(new_pfn)
|
|
4092
|
+
new_pfn.save(session=session, flush=False)
|
|
4093
|
+
|
|
4094
|
+
try:
|
|
4095
|
+
session.flush()
|
|
4096
|
+
except IntegrityError as error:
|
|
4097
|
+
raise exception.RucioException(error.args)
|
|
4098
|
+
except DatabaseError as error:
|
|
4099
|
+
raise exception.RucioException(error.args)
|
|
4100
|
+
except FlushError as error:
|
|
4101
|
+
if match('New instance .* with identity key .* conflicts with persistent instance', error.args[0]):
|
|
4102
|
+
raise exception.Duplicate('One PFN already exists!')
|
|
4103
|
+
raise exception.RucioException(error.args)
|
|
4104
|
+
return True
|
|
4105
|
+
|
|
4106
|
+
|
|
4107
|
+
@read_session
|
|
4108
|
+
def list_expired_temporary_unavailable_replicas(
|
|
4109
|
+
total_workers: int,
|
|
4110
|
+
worker_number: int,
|
|
4111
|
+
limit: int = 10000,
|
|
4112
|
+
*,
|
|
4113
|
+
session: "Session"
|
|
4114
|
+
) -> "Sequence[Row]":
|
|
4115
|
+
"""
|
|
4116
|
+
List the expired temporary unavailable replicas
|
|
4117
|
+
|
|
4118
|
+
:param total_workers: Number of total workers.
|
|
4119
|
+
:param worker_number: id of the executing worker.
|
|
4120
|
+
:param limit: The maximum number of replicas returned.
|
|
4121
|
+
:param session: The database session in use.
|
|
4122
|
+
"""
|
|
4123
|
+
|
|
4124
|
+
stmt = select(
|
|
4125
|
+
models.BadReplica.scope,
|
|
4126
|
+
models.BadReplica.name,
|
|
4127
|
+
models.BadReplica.rse_id,
|
|
4128
|
+
).with_hint(
|
|
4129
|
+
models.ReplicationRule,
|
|
4130
|
+
'INDEX(bad_replicas BAD_REPLICAS_EXPIRES_AT_IDX)',
|
|
4131
|
+
'oracle'
|
|
4132
|
+
).where(
|
|
4133
|
+
and_(models.BadReplica.state == BadFilesStatus.TEMPORARY_UNAVAILABLE,
|
|
4134
|
+
models.BadReplica.expires_at < datetime.utcnow())
|
|
4135
|
+
).order_by(
|
|
4136
|
+
models.BadReplica.expires_at
|
|
4137
|
+
)
|
|
4138
|
+
|
|
4139
|
+
stmt = filter_thread_work(session=session, query=stmt, total_threads=total_workers, thread_id=worker_number, hash_variable='name')
|
|
4140
|
+
stmt = stmt.limit(limit)
|
|
4141
|
+
|
|
4142
|
+
return session.execute(stmt).all()
|
|
4143
|
+
|
|
4144
|
+
|
|
4145
|
+
@read_session
|
|
4146
|
+
def get_replicas_state(
|
|
4147
|
+
scope: Optional[InternalScope] = None,
|
|
4148
|
+
name: Optional[str] = None,
|
|
4149
|
+
*,
|
|
4150
|
+
session: "Session"
|
|
4151
|
+
) -> dict[ReplicaState, list[str]]:
|
|
4152
|
+
"""
|
|
4153
|
+
Method used by the necromancer to get all the replicas of a DIDs
|
|
4154
|
+
:param scope: The scope of the file.
|
|
4155
|
+
:param name: The name of the file.
|
|
4156
|
+
:param session: The database session in use.
|
|
4157
|
+
|
|
4158
|
+
:returns: A dictionary with the list of states as keys and the rse_ids as value
|
|
4159
|
+
"""
|
|
4160
|
+
|
|
4161
|
+
stmt = select(
|
|
4162
|
+
models.RSEFileAssociation.rse_id,
|
|
4163
|
+
models.RSEFileAssociation.state
|
|
4164
|
+
).where(
|
|
4165
|
+
and_(models.RSEFileAssociation.scope == scope,
|
|
4166
|
+
models.RSEFileAssociation.name == name)
|
|
4167
|
+
)
|
|
4168
|
+
states = {}
|
|
4169
|
+
for res in session.execute(stmt).all():
|
|
4170
|
+
rse_id, state = res
|
|
4171
|
+
if state not in states:
|
|
4172
|
+
states[state] = []
|
|
4173
|
+
states[state].append(rse_id)
|
|
4174
|
+
return states
|
|
4175
|
+
|
|
4176
|
+
|
|
4177
|
+
@read_session
|
|
4178
|
+
def get_suspicious_files(
|
|
4179
|
+
rse_expression: str,
|
|
4180
|
+
available_elsewhere: int,
|
|
4181
|
+
filter_: Optional[dict[str, Any]] = None,
|
|
4182
|
+
logger: "LoggerFunction" = logging.log,
|
|
4183
|
+
younger_than: Optional[datetime] = None,
|
|
4184
|
+
nattempts: int = 0,
|
|
4185
|
+
nattempts_exact: bool = False,
|
|
4186
|
+
*,
|
|
4187
|
+
session: "Session",
|
|
4188
|
+
exclude_states: Optional["Iterable[str]"] = None,
|
|
4189
|
+
is_suspicious: bool = False
|
|
4190
|
+
) -> list[dict[str, Any]]:
|
|
4191
|
+
"""
|
|
4192
|
+
Gets a list of replicas from bad_replicas table which are: declared more than <nattempts> times since <younger_than> date,
|
|
4193
|
+
present on the RSE specified by the <rse_expression> and do not have a state in <exclude_states> list.
|
|
4194
|
+
Selected replicas can also be required to be <available_elsewhere> on another RSE than the one declared in bad_replicas table and/or
|
|
4195
|
+
be declared as <is_suspicious> in the bad_replicas table.
|
|
4196
|
+
Keyword Arguments:
|
|
4197
|
+
:param younger_than: Datetime object to select the replicas which were declared since younger_than date. Default value = 10 days ago.
|
|
4198
|
+
:param nattempts: The minimum number of replica appearances in the bad_replica DB table from younger_than date. Default value = 0.
|
|
4199
|
+
:param nattempts_exact: If True, then only replicas with exactly 'nattempts' appearances in the bad_replica DB table are retrieved. Replicas with more appearances are ignored.
|
|
4200
|
+
:param rse_expression: The RSE expression where the replicas are located.
|
|
4201
|
+
:param filter_: Dictionary of attributes by which the RSE results should be filtered. e.g.: {'availability_write': True}
|
|
4202
|
+
:param exclude_states: List of states which eliminates replicas from search result if any of the states in the list
|
|
4203
|
+
was declared for a replica since younger_than date. Allowed values
|
|
4204
|
+
= ['B', 'R', 'D', 'L', 'T', 'S'] (meaning 'BAD', 'RECOVERED', 'DELETED', 'LOST', 'TEMPORARY_UNAVAILABLE', 'SUSPICIOUS').
|
|
4205
|
+
:param available_elsewhere: Default: SuspiciousAvailability["ALL"].value, all suspicious replicas are returned.
|
|
4206
|
+
If SuspiciousAvailability["EXIST_COPIES"].value, only replicas that additionally have copies declared as AVAILABLE on at least one other RSE
|
|
4207
|
+
than the one in the bad_replicas table will be taken into account.
|
|
4208
|
+
If SuspiciousAvailability["LAST_COPY"].value, only replicas that do not have another copy declared as AVAILABLE on another RSE will be taken into account.
|
|
4209
|
+
:param is_suspicious: If True, only replicas declared as SUSPICIOUS in bad replicas table will be taken into account. Default value = False.
|
|
4210
|
+
:param session: The database session in use. Default value = None.
|
|
4211
|
+
|
|
4212
|
+
:returns: a list of replicas:
|
|
4213
|
+
[{'scope': scope, 'name': name, 'rse': rse, 'rse_id': rse_id, cnt': cnt, 'created_at': created_at}, ...]
|
|
4214
|
+
"""
|
|
4215
|
+
|
|
4216
|
+
exclude_states = exclude_states or ['B', 'R', 'D']
|
|
4217
|
+
if available_elsewhere not in [SuspiciousAvailability["ALL"].value, SuspiciousAvailability["EXIST_COPIES"].value, SuspiciousAvailability["LAST_COPY"].value]:
|
|
4218
|
+
logger(logging.WARNING, """ERROR, available_elsewhere must be set to one of the following:
|
|
4219
|
+
SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
|
|
4220
|
+
SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
|
|
4221
|
+
SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
|
|
4222
|
+
raise exception.RucioException("""ERROR, available_elsewhere must be set to one of the following:
|
|
4223
|
+
SuspiciousAvailability["ALL"].value: (default) all suspicious replicas are returned
|
|
4224
|
+
SuspiciousAvailability["EXIST_COPIES"].value: only replicas that additionally have copies declared as AVAILABLE on at least one other RSE are returned
|
|
4225
|
+
SuspiciousAvailability["LAST_COPY"].value: only replicas that do not have another copy declared as AVAILABLE on another RSE are returned""")
|
|
4226
|
+
|
|
4227
|
+
# only for the 2 web api used parameters, checking value types and assigning the default values
|
|
4228
|
+
if not isinstance(nattempts, int):
|
|
4229
|
+
nattempts = 0
|
|
4230
|
+
if not isinstance(younger_than, datetime):
|
|
4231
|
+
younger_than = datetime.utcnow() - timedelta(days=10)
|
|
4232
|
+
|
|
4233
|
+
# assembling exclude_states_clause
|
|
4234
|
+
exclude_states_clause = []
|
|
4235
|
+
for state in exclude_states:
|
|
4236
|
+
exclude_states_clause.append(BadFilesStatus(state))
|
|
4237
|
+
|
|
4238
|
+
# making aliases for bad_replicas and replicas tables
|
|
4239
|
+
bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
|
|
4240
|
+
replicas_alias = aliased(models.RSEFileAssociation, name='replicas_alias')
|
|
4241
|
+
|
|
4242
|
+
# assembling the selection rse_clause
|
|
4243
|
+
rse_clause = []
|
|
4244
|
+
if rse_expression:
|
|
4245
|
+
parsedexp = parse_expression(expression=rse_expression, filter_=filter_, session=session)
|
|
4246
|
+
for rse in parsedexp:
|
|
4247
|
+
rse_clause.append(models.RSEFileAssociation.rse_id == rse['id'])
|
|
4248
|
+
|
|
4249
|
+
stmt = select(
|
|
4250
|
+
func.count(),
|
|
4251
|
+
bad_replicas_alias.scope,
|
|
4252
|
+
bad_replicas_alias.name,
|
|
4253
|
+
models.RSEFileAssociation.rse_id,
|
|
4254
|
+
func.min(models.RSEFileAssociation.created_at)
|
|
4255
|
+
).select_from(
|
|
4256
|
+
bad_replicas_alias
|
|
4257
|
+
).where(
|
|
4258
|
+
models.RSEFileAssociation.rse_id == bad_replicas_alias.rse_id,
|
|
4259
|
+
models.RSEFileAssociation.scope == bad_replicas_alias.scope,
|
|
4260
|
+
models.RSEFileAssociation.name == bad_replicas_alias.name,
|
|
4261
|
+
bad_replicas_alias.created_at >= younger_than
|
|
4262
|
+
)
|
|
4263
|
+
if is_suspicious:
|
|
4264
|
+
stmt = stmt.where(bad_replicas_alias.state == BadFilesStatus.SUSPICIOUS)
|
|
4265
|
+
if rse_clause:
|
|
4266
|
+
stmt = stmt.where(or_(*rse_clause))
|
|
4267
|
+
|
|
4268
|
+
# Only return replicas that have at least one copy on another RSE
|
|
4269
|
+
if available_elsewhere == SuspiciousAvailability["EXIST_COPIES"].value:
|
|
4270
|
+
available_replica = exists(select(1)
|
|
4271
|
+
.where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
|
|
4272
|
+
replicas_alias.scope == bad_replicas_alias.scope,
|
|
4273
|
+
replicas_alias.name == bad_replicas_alias.name,
|
|
4274
|
+
replicas_alias.rse_id != bad_replicas_alias.rse_id)))
|
|
4275
|
+
stmt = stmt.where(available_replica)
|
|
4276
|
+
|
|
4277
|
+
# Only return replicas that are the last remaining copy
|
|
4278
|
+
if available_elsewhere == SuspiciousAvailability["LAST_COPY"].value:
|
|
4279
|
+
last_replica = ~exists(select(1)
|
|
4280
|
+
.where(and_(replicas_alias.state == ReplicaState.AVAILABLE,
|
|
4281
|
+
replicas_alias.scope == bad_replicas_alias.scope,
|
|
4282
|
+
replicas_alias.name == bad_replicas_alias.name,
|
|
4283
|
+
replicas_alias.rse_id != bad_replicas_alias.rse_id)))
|
|
4284
|
+
stmt = stmt.where(last_replica)
|
|
4285
|
+
|
|
4286
|
+
# it is required that the selected replicas
|
|
4287
|
+
# do not occur as BAD/DELETED/LOST/RECOVERED/...
|
|
4288
|
+
# in the bad_replicas table during the same time window.
|
|
4289
|
+
other_states_present = exists(select(1)
|
|
4290
|
+
.where(and_(models.BadReplica.scope == bad_replicas_alias.scope,
|
|
4291
|
+
models.BadReplica.name == bad_replicas_alias.name,
|
|
4292
|
+
models.BadReplica.created_at >= younger_than,
|
|
4293
|
+
models.BadReplica.rse_id == bad_replicas_alias.rse_id,
|
|
4294
|
+
models.BadReplica.state.in_(exclude_states_clause))))
|
|
4295
|
+
stmt = stmt.where(not_(other_states_present))
|
|
4296
|
+
|
|
4297
|
+
# finally, the results are grouped by RSE, scope, name and required to have
|
|
4298
|
+
# at least 'nattempts' occurrences in the result of the query per replica.
|
|
4299
|
+
# If nattempts_exact, then only replicas are required to have exactly
|
|
4300
|
+
# 'nattempts' occurrences.
|
|
4301
|
+
if nattempts_exact:
|
|
4302
|
+
stmt = stmt.group_by(
|
|
4303
|
+
models.RSEFileAssociation.rse_id,
|
|
4304
|
+
bad_replicas_alias.scope,
|
|
4305
|
+
bad_replicas_alias.name
|
|
4306
|
+
).having(
|
|
4307
|
+
func.count() == nattempts
|
|
4308
|
+
)
|
|
4309
|
+
query_result = session.execute(stmt).all()
|
|
4310
|
+
else:
|
|
4311
|
+
stmt = stmt.group_by(
|
|
4312
|
+
models.RSEFileAssociation.rse_id,
|
|
4313
|
+
bad_replicas_alias.scope,
|
|
4314
|
+
bad_replicas_alias.name
|
|
4315
|
+
).having(
|
|
4316
|
+
func.count() > nattempts
|
|
4317
|
+
)
|
|
4318
|
+
query_result = session.execute(stmt).all()
|
|
4319
|
+
|
|
4320
|
+
# translating the rse_id to RSE name and assembling the return list of dictionaries
|
|
4321
|
+
result = []
|
|
4322
|
+
rses = {}
|
|
4323
|
+
for cnt, scope, name, rse_id, created_at in query_result:
|
|
4324
|
+
if rse_id not in rses:
|
|
4325
|
+
rse = get_rse_name(rse_id=rse_id, session=session)
|
|
4326
|
+
rses[rse_id] = rse
|
|
4327
|
+
result.append({'scope': scope, 'name': name, 'rse': rses[rse_id], 'rse_id': rse_id, 'cnt': cnt, 'created_at': created_at})
|
|
4328
|
+
|
|
4329
|
+
return result
|
|
4330
|
+
|
|
4331
|
+
|
|
4332
|
+
@read_session
|
|
4333
|
+
def get_suspicious_reason(
|
|
4334
|
+
rse_id: str,
|
|
4335
|
+
scope: InternalScope,
|
|
4336
|
+
name: str,
|
|
4337
|
+
nattempts: int = 0,
|
|
4338
|
+
logger: "LoggerFunction" = logging.log,
|
|
4339
|
+
*,
|
|
4340
|
+
session: "Session"
|
|
4341
|
+
) -> list[dict[str, Any]]:
|
|
4342
|
+
"""
|
|
4343
|
+
Returns the error message(s) which lead to the replica(s) being declared suspicious.
|
|
4344
|
+
|
|
4345
|
+
:param rse_id: ID of RSE.
|
|
4346
|
+
:param scope: Scope of the replica DID.
|
|
4347
|
+
:param name: Name of the replica DID.
|
|
4348
|
+
:param session: The database session in use. Default value = None.
|
|
4349
|
+
"""
|
|
4350
|
+
# Alias for bad replicas
|
|
4351
|
+
bad_replicas_alias = aliased(models.BadReplica, name='bad_replicas_alias')
|
|
4352
|
+
|
|
4353
|
+
stmt = select(
|
|
4354
|
+
bad_replicas_alias.scope,
|
|
4355
|
+
bad_replicas_alias.name,
|
|
4356
|
+
bad_replicas_alias.reason,
|
|
4357
|
+
bad_replicas_alias.rse_id
|
|
4358
|
+
).where(
|
|
4359
|
+
and_(bad_replicas_alias.rse_id == rse_id,
|
|
4360
|
+
bad_replicas_alias.scope == scope,
|
|
4361
|
+
bad_replicas_alias.state == 'S',
|
|
4362
|
+
bad_replicas_alias.name == name,
|
|
4363
|
+
~exists(select(1).where(
|
|
4364
|
+
and_(bad_replicas_alias.rse_id == rse_id,
|
|
4365
|
+
bad_replicas_alias.name == name,
|
|
4366
|
+
bad_replicas_alias.scope == scope,
|
|
4367
|
+
bad_replicas_alias.state != 'S'))))
|
|
4368
|
+
)
|
|
4369
|
+
|
|
4370
|
+
count_query = select(
|
|
4371
|
+
func.count()
|
|
4372
|
+
).select_from(
|
|
4373
|
+
stmt.subquery()
|
|
4374
|
+
)
|
|
4375
|
+
count = session.execute(count_query).scalar_one()
|
|
4376
|
+
|
|
4377
|
+
grouped_stmt = stmt.group_by(
|
|
4378
|
+
bad_replicas_alias.rse_id,
|
|
4379
|
+
bad_replicas_alias.scope,
|
|
4380
|
+
bad_replicas_alias.name,
|
|
4381
|
+
bad_replicas_alias.reason
|
|
4382
|
+
).having(
|
|
4383
|
+
func.count() > nattempts
|
|
4384
|
+
)
|
|
4385
|
+
|
|
4386
|
+
result = []
|
|
4387
|
+
rses = {}
|
|
4388
|
+
for scope_, name_, reason, rse_id_ in session.execute(grouped_stmt).all():
|
|
4389
|
+
if rse_id_ not in rses:
|
|
4390
|
+
rse = get_rse_name(rse_id=rse_id_, session=session)
|
|
4391
|
+
rses[rse_id_] = rse
|
|
4392
|
+
result.append({'scope': scope, 'name': name, 'rse': rses[rse_id_], 'rse_id': rse_id_, 'reason': reason, 'count': count})
|
|
4393
|
+
|
|
4394
|
+
if len(result) > 1:
|
|
4395
|
+
logger(logging.WARNING, "Multiple reasons have been found. Please investigate.")
|
|
4396
|
+
|
|
4397
|
+
return result
|
|
4398
|
+
|
|
4399
|
+
|
|
4400
|
+
@transactional_session
|
|
4401
|
+
def set_tombstone(
|
|
4402
|
+
rse_id: str,
|
|
4403
|
+
scope: InternalScope,
|
|
4404
|
+
name: str,
|
|
4405
|
+
tombstone: datetime = OBSOLETE,
|
|
4406
|
+
*,
|
|
4407
|
+
session: "Session"
|
|
4408
|
+
) -> None:
|
|
4409
|
+
"""
|
|
4410
|
+
Sets a tombstone on a replica.
|
|
4411
|
+
|
|
4412
|
+
:param rse_id: ID of RSE.
|
|
4413
|
+
:param scope: scope of the replica DID.
|
|
4414
|
+
:param name: name of the replica DID.
|
|
4415
|
+
:param tombstone: the tombstone to set. Default is OBSOLETE
|
|
4416
|
+
:param session: database session in use.
|
|
4417
|
+
"""
|
|
4418
|
+
stmt = update(models.RSEFileAssociation).where(
|
|
4419
|
+
and_(models.RSEFileAssociation.rse_id == rse_id,
|
|
4420
|
+
models.RSEFileAssociation.name == name,
|
|
4421
|
+
models.RSEFileAssociation.scope == scope,
|
|
4422
|
+
~exists().where(
|
|
4423
|
+
and_(models.ReplicaLock.rse_id == rse_id,
|
|
4424
|
+
models.ReplicaLock.name == name,
|
|
4425
|
+
models.ReplicaLock.scope == scope)))
|
|
4426
|
+
).prefix_with(
|
|
4427
|
+
'/*+ INDEX(REPLICAS REPLICAS_PK) */', dialect='oracle'
|
|
4428
|
+
).values({
|
|
4429
|
+
models.RSEFileAssociation.tombstone: tombstone
|
|
4430
|
+
}).execution_options(
|
|
4431
|
+
synchronize_session=False
|
|
4432
|
+
)
|
|
4433
|
+
|
|
4434
|
+
if session.execute(stmt).rowcount == 0:
|
|
4435
|
+
try:
|
|
4436
|
+
stmt = select(
|
|
4437
|
+
models.RSEFileAssociation.tombstone
|
|
4438
|
+
).where(
|
|
4439
|
+
and_(models.RSEFileAssociation.rse_id == rse_id,
|
|
4440
|
+
models.RSEFileAssociation.name == name,
|
|
4441
|
+
models.RSEFileAssociation.scope == scope)
|
|
4442
|
+
)
|
|
4443
|
+
session.execute(stmt).scalar_one()
|
|
4444
|
+
raise exception.ReplicaIsLocked('Replica %s:%s on RSE %s is locked.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
|
|
4445
|
+
except NoResultFound:
|
|
4446
|
+
raise exception.ReplicaNotFound('Replica %s:%s on RSE %s could not be found.' % (scope, name, get_rse_name(rse_id=rse_id, session=session)))
|
|
4447
|
+
|
|
4448
|
+
|
|
4449
|
+
@read_session
|
|
4450
|
+
def get_rse_coverage_of_dataset(
|
|
4451
|
+
scope: "InternalScope",
|
|
4452
|
+
name: str,
|
|
4453
|
+
*,
|
|
4454
|
+
session: "Session"
|
|
4455
|
+
) -> dict[str, int]:
|
|
4456
|
+
"""
|
|
4457
|
+
Get total bytes present on RSEs
|
|
4458
|
+
|
|
4459
|
+
:param scope: Scope of the dataset
|
|
4460
|
+
:param name: Name of the dataset
|
|
4461
|
+
:param session: The db session.
|
|
4462
|
+
:return: Dictionary { rse_id : <total bytes present at rse_id> }
|
|
4463
|
+
"""
|
|
4464
|
+
|
|
4465
|
+
stmt = select(
|
|
4466
|
+
models.RSEFileAssociation.rse_id,
|
|
4467
|
+
func.sum(models.DataIdentifierAssociation.bytes)
|
|
4468
|
+
).where(
|
|
4469
|
+
and_(models.DataIdentifierAssociation.child_scope == models.RSEFileAssociation.scope,
|
|
4470
|
+
models.DataIdentifierAssociation.child_name == models.RSEFileAssociation.name,
|
|
4471
|
+
models.DataIdentifierAssociation.scope == scope,
|
|
4472
|
+
models.DataIdentifierAssociation.name == name,
|
|
4473
|
+
models.RSEFileAssociation.state != ReplicaState.BEING_DELETED)
|
|
4474
|
+
).group_by(
|
|
4475
|
+
models.RSEFileAssociation.rse_id
|
|
4476
|
+
)
|
|
4477
|
+
|
|
4478
|
+
result = {}
|
|
4479
|
+
for rse_id, total in session.execute(stmt):
|
|
4480
|
+
if total:
|
|
4481
|
+
result[rse_id] = total
|
|
4482
|
+
|
|
4483
|
+
return result
|