dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import urllib.parse
|
|
2
|
+
from collections.abc import Iterator
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
from dataclasses import field
|
|
5
|
+
from http import HTTPStatus
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import Iterator
|
|
6
7
|
|
|
8
|
+
import regex
|
|
7
9
|
from loguru import logger
|
|
8
10
|
from requests import JSONDecodeError
|
|
9
11
|
from requests import RequestException
|
|
@@ -11,18 +13,14 @@ from requests import Session
|
|
|
11
13
|
from requests.adapters import HTTPAdapter
|
|
12
14
|
from requests.adapters import Retry
|
|
13
15
|
|
|
16
|
+
from dsp_tools.clients.authentication_client import AuthenticationClient
|
|
14
17
|
from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailure
|
|
15
|
-
from dsp_tools.
|
|
16
|
-
from dsp_tools.
|
|
17
|
-
from dsp_tools.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
STATUS_FORBIDDEN = 403
|
|
22
|
-
STATUS_NOT_FOUND = 404
|
|
23
|
-
STATUS_CONFLICT = 409
|
|
24
|
-
STATUS_INTERNAL_SERVER_ERROR = 500
|
|
25
|
-
STATUS_SERVER_UNAVAILABLE = 503
|
|
18
|
+
from dsp_tools.config.logger_config import LOGGER_SAVEPATH
|
|
19
|
+
from dsp_tools.error.exceptions import BadCredentialsError
|
|
20
|
+
from dsp_tools.error.exceptions import InputError
|
|
21
|
+
from dsp_tools.utils.request_utils import RequestParameters
|
|
22
|
+
from dsp_tools.utils.request_utils import log_request
|
|
23
|
+
from dsp_tools.utils.request_utils import log_response
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
@dataclass
|
|
@@ -30,7 +28,7 @@ class BulkIngestClient:
|
|
|
30
28
|
"""Client to upload multiple files to the ingest server and monitor the ingest process."""
|
|
31
29
|
|
|
32
30
|
dsp_ingest_url: str
|
|
33
|
-
|
|
31
|
+
authentication_client: AuthenticationClient
|
|
34
32
|
shortcode: str
|
|
35
33
|
imgdir: Path = field(default=Path.cwd())
|
|
36
34
|
session: Session = field(init=False)
|
|
@@ -45,68 +43,93 @@ class BulkIngestClient:
|
|
|
45
43
|
connect=retries,
|
|
46
44
|
backoff_factor=0.3,
|
|
47
45
|
allowed_methods=None, # means all methods
|
|
48
|
-
status_forcelist=[
|
|
46
|
+
status_forcelist=[HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE],
|
|
49
47
|
)
|
|
50
48
|
adapter = HTTPAdapter(max_retries=retry)
|
|
51
49
|
self.session.mount("http://", adapter)
|
|
52
50
|
self.session.mount("https://", adapter)
|
|
53
|
-
self.session.headers["Authorization"] = f"Bearer {self.token}"
|
|
54
51
|
|
|
55
52
|
def upload_file(
|
|
56
53
|
self,
|
|
57
54
|
filepath: Path,
|
|
58
55
|
) -> UploadFailure | None:
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
timeout
|
|
56
|
+
"""
|
|
57
|
+
Uploads a file to the ingest server.
|
|
58
|
+
The load balancer on DSP servers currently has a timeout of 10m,
|
|
59
|
+
so we need to use a slightly shorter timeout of 9m.
|
|
60
|
+
See https://linear.app/dasch/issue/INFRA-847/increase-traefik-readtimeout
|
|
61
|
+
# noqa: DAR101
|
|
62
|
+
# noqa: DAR201
|
|
63
|
+
"""
|
|
64
|
+
logger.debug(f"Uploading file '{filepath}'")
|
|
65
|
+
timeout = 9 * 60
|
|
66
|
+
url = self._build_url_for_bulk_ingest_ingest_route(filepath)
|
|
67
|
+
headers = {
|
|
68
|
+
"Content-Type": "application/octet-stream",
|
|
69
|
+
"Authorization": f"Bearer {self.authentication_client.get_token()}",
|
|
70
|
+
}
|
|
63
71
|
err_msg = f"Failed to upload '{filepath}' to '{url}'."
|
|
72
|
+
params = RequestParameters("POST", url, timeout, headers=headers)
|
|
73
|
+
log_request(params)
|
|
64
74
|
try:
|
|
65
75
|
with open(self.imgdir / filepath, "rb") as binary_io:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
res
|
|
73
|
-
url=url,
|
|
74
|
-
headers=headers,
|
|
75
|
-
data=content,
|
|
76
|
-
timeout=timeout,
|
|
77
|
-
)
|
|
78
|
-
logger.debug(f"RESPONSE: {res.status_code}")
|
|
76
|
+
res = self.session.post(
|
|
77
|
+
url=params.url,
|
|
78
|
+
headers=params.headers,
|
|
79
|
+
data=binary_io, # https://requests.readthedocs.io/en/latest/user/advanced/#streaming-uploads
|
|
80
|
+
timeout=params.timeout,
|
|
81
|
+
)
|
|
82
|
+
log_response(res)
|
|
79
83
|
except RequestException as e:
|
|
80
|
-
logger.
|
|
84
|
+
logger.exception(err_msg)
|
|
81
85
|
return UploadFailure(filepath, f"Exception of requests library: {e}")
|
|
82
|
-
|
|
86
|
+
except OSError as e:
|
|
87
|
+
err_msg = f"Cannot bulk-ingest {filepath}, because the file could not be opened/read: {e.strerror}"
|
|
83
88
|
logger.error(err_msg)
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
89
|
+
return UploadFailure(filepath, err_msg)
|
|
90
|
+
if res.status_code != HTTPStatus.OK:
|
|
91
|
+
logger.error(f"{err_msg}: Response {res.status_code}: {res.text}")
|
|
92
|
+
return UploadFailure(filepath, res.reason, res.status_code, res.text)
|
|
87
93
|
return None
|
|
88
94
|
|
|
95
|
+
def _build_url_for_bulk_ingest_ingest_route(self, filepath: Path) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Remove the leading slash of absolute filepaths,
|
|
98
|
+
because the `/project/<shortcode>/bulk-ingest/ingest` route only accepts relative paths.
|
|
99
|
+
The leading slash has to be added again in the "ingest-xmlupload" step, when applying the ingest ID.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
filepath: filepath
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
url
|
|
106
|
+
"""
|
|
107
|
+
quoted = regex.sub(r"^%2F", "", urllib.parse.quote(str(filepath), safe=""))
|
|
108
|
+
return f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{quoted}"
|
|
109
|
+
|
|
89
110
|
def trigger_ingest_process(self) -> None:
|
|
90
111
|
"""Start the ingest process on the server."""
|
|
91
112
|
url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest"
|
|
92
113
|
timeout = 5
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
if res.status_code ==
|
|
99
|
-
raise
|
|
114
|
+
headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
|
|
115
|
+
params = RequestParameters("POST", url, timeout, headers=headers)
|
|
116
|
+
log_request(params)
|
|
117
|
+
res = self.session.post(params.url, timeout=params.timeout, headers=params.headers)
|
|
118
|
+
log_response(res)
|
|
119
|
+
if res.status_code == HTTPStatus.FORBIDDEN:
|
|
120
|
+
raise BadCredentialsError("Only ProjectAdmins or SystemAdmins can start the ingest process.")
|
|
121
|
+
if res.status_code == HTTPStatus.NOT_FOUND:
|
|
122
|
+
raise InputError(
|
|
100
123
|
f"No assets have been uploaded for project {self.shortcode}. "
|
|
101
124
|
"Before using the 'ingest-files' command, you must upload some files with the 'upload-files' command."
|
|
102
125
|
)
|
|
103
|
-
if res.status_code ==
|
|
126
|
+
if res.status_code == HTTPStatus.CONFLICT:
|
|
104
127
|
msg = f"Ingest process on the server {self.dsp_ingest_url} is already running. Wait until it completes..."
|
|
105
128
|
print(msg)
|
|
106
129
|
logger.info(msg)
|
|
107
130
|
return
|
|
108
|
-
if res.status_code in [
|
|
109
|
-
raise
|
|
131
|
+
if res.status_code in [HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE]:
|
|
132
|
+
raise InputError("Server is unavailable. Please try again later.")
|
|
110
133
|
|
|
111
134
|
try:
|
|
112
135
|
returned_shortcode = res.json().get("id")
|
|
@@ -114,7 +137,7 @@ class BulkIngestClient:
|
|
|
114
137
|
except JSONDecodeError:
|
|
115
138
|
failed = True
|
|
116
139
|
if failed:
|
|
117
|
-
raise
|
|
140
|
+
raise InputError("Failed to trigger the ingest process. Please check the server logs, or try again later.")
|
|
118
141
|
print(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
|
|
119
142
|
logger.info(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
|
|
120
143
|
|
|
@@ -128,26 +151,28 @@ class BulkIngestClient:
|
|
|
128
151
|
The mapping CSV if the ingest process has completed.
|
|
129
152
|
|
|
130
153
|
Raises:
|
|
131
|
-
|
|
154
|
+
InputError: if there are too many server errors in a row.
|
|
132
155
|
"""
|
|
133
156
|
url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/mapping.csv"
|
|
134
157
|
timeout = 5
|
|
135
158
|
while True:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
159
|
+
headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
|
|
160
|
+
params = RequestParameters("GET", url, timeout, headers=headers)
|
|
161
|
+
log_request(params)
|
|
162
|
+
res = self.session.get(params.url, timeout=params.timeout, headers=params.headers)
|
|
163
|
+
log_response(res)
|
|
164
|
+
if res.status_code == HTTPStatus.CONFLICT:
|
|
140
165
|
self.retrieval_failures = 0
|
|
141
166
|
logger.info("Ingest process is still running. Wait until it completes...")
|
|
142
167
|
yield True
|
|
143
|
-
elif res.status_code !=
|
|
168
|
+
elif res.status_code != HTTPStatus.OK or not res.text.startswith("original,derivative"):
|
|
144
169
|
self.retrieval_failures += 1
|
|
145
170
|
if self.retrieval_failures > 15:
|
|
146
|
-
raise
|
|
171
|
+
raise InputError(f"There were too many server errors. Please check the logs at {LOGGER_SAVEPATH}.")
|
|
147
172
|
msg = "While retrieving the mapping CSV, the server responded with an unexpected status code/content."
|
|
148
173
|
logger.error(msg)
|
|
149
174
|
yield False
|
|
150
175
|
else:
|
|
151
176
|
logger.info("Ingest process completed.")
|
|
152
177
|
break
|
|
153
|
-
yield res.
|
|
178
|
+
yield res.content.decode("utf-8")
|
|
@@ -9,7 +9,7 @@ from loguru import logger
|
|
|
9
9
|
from lxml import etree
|
|
10
10
|
|
|
11
11
|
from dsp_tools.commands.ingest_xmlupload.create_resources.user_information import IngestInformation
|
|
12
|
-
from dsp_tools.
|
|
12
|
+
from dsp_tools.error.exceptions import InputError
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def get_mapping_dict_from_file(shortcode: str) -> dict[str, str]:
|
|
@@ -56,15 +56,9 @@ def replace_filepath_with_internal_filename(
|
|
|
56
56
|
for elem in new_tree.iter():
|
|
57
57
|
if not etree.QName(elem).localname.endswith("bitstream") or not elem.text:
|
|
58
58
|
continue
|
|
59
|
-
img_path_str = elem.text
|
|
60
|
-
if img_path_str not in orig_path_2_asset_id:
|
|
61
|
-
|
|
62
|
-
img_path_str = str(img_path.relative_to(Path.cwd()) if img_path.is_absolute() else img_path)
|
|
63
|
-
if img_path_str not in orig_path_2_asset_id:
|
|
64
|
-
img_path_str = str(img_path.with_suffix(img_path.suffix.lower()))
|
|
65
|
-
if img_path_str not in orig_path_2_asset_id:
|
|
66
|
-
img_path_str = str(img_path.with_suffix(img_path.suffix.upper()))
|
|
67
|
-
|
|
59
|
+
img_path_str = elem.text.strip()
|
|
60
|
+
if img_path_str not in orig_path_2_asset_id and img_path_str.startswith("/"):
|
|
61
|
+
img_path_str = img_path_str[1:]
|
|
68
62
|
if img_path_str in orig_path_2_asset_id:
|
|
69
63
|
elem.text = orig_path_2_asset_id[img_path_str]
|
|
70
64
|
used_media_file_paths.append(img_path_str)
|
|
@@ -6,27 +6,44 @@ from loguru import logger
|
|
|
6
6
|
from lxml import etree
|
|
7
7
|
|
|
8
8
|
from dsp_tools.cli.args import ServerCredentials
|
|
9
|
+
from dsp_tools.cli.args import ValidateDataConfig
|
|
10
|
+
from dsp_tools.cli.args import ValidationSeverity
|
|
11
|
+
from dsp_tools.clients.authentication_client import AuthenticationClient
|
|
12
|
+
from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
|
|
13
|
+
from dsp_tools.clients.connection import Connection
|
|
14
|
+
from dsp_tools.clients.connection_live import ConnectionLive
|
|
15
|
+
from dsp_tools.clients.legal_info_client_live import LegalInfoClientLive
|
|
16
|
+
from dsp_tools.clients.project_client_live import ProjectClientLive
|
|
9
17
|
from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import get_mapping_dict_from_file
|
|
10
18
|
from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import replace_filepath_with_internal_filename
|
|
11
|
-
from dsp_tools.commands.
|
|
19
|
+
from dsp_tools.commands.validate_data.validate_data import validate_parsed_resources
|
|
12
20
|
from dsp_tools.commands.xmlupload.models.ingest import BulkIngestedAssetClient
|
|
13
21
|
from dsp_tools.commands.xmlupload.models.upload_clients import UploadClients
|
|
14
22
|
from dsp_tools.commands.xmlupload.models.upload_state import UploadState
|
|
15
|
-
from dsp_tools.commands.xmlupload.
|
|
16
|
-
from dsp_tools.commands.xmlupload.
|
|
17
|
-
from dsp_tools.commands.xmlupload.
|
|
23
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.get_processed_resources import get_processed_resources
|
|
24
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.list_client import ListClientLive
|
|
25
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_parsed_resources_and_mappers
|
|
26
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_stash_and_upload_order
|
|
27
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.read_validate_xml_file import validate_iiif_uris
|
|
18
28
|
from dsp_tools.commands.xmlupload.upload_config import UploadConfig
|
|
29
|
+
from dsp_tools.commands.xmlupload.xmlupload import enable_unknown_license_if_any_are_missing
|
|
19
30
|
from dsp_tools.commands.xmlupload.xmlupload import execute_upload
|
|
20
|
-
from dsp_tools.
|
|
21
|
-
from dsp_tools.
|
|
22
|
-
from dsp_tools.utils.
|
|
23
|
-
from dsp_tools.utils.
|
|
31
|
+
from dsp_tools.error.exceptions import InputError
|
|
32
|
+
from dsp_tools.utils.ansi_colors import BOLD_RED
|
|
33
|
+
from dsp_tools.utils.ansi_colors import RESET_TO_DEFAULT
|
|
34
|
+
from dsp_tools.utils.data_formats.uri_util import is_prod_like_server
|
|
35
|
+
from dsp_tools.utils.replace_id_with_iri import use_id2iri_mapping_to_replace_ids
|
|
36
|
+
from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
|
|
24
37
|
|
|
25
38
|
|
|
26
39
|
def ingest_xmlupload(
|
|
27
40
|
xml_file: Path,
|
|
28
41
|
creds: ServerCredentials,
|
|
29
42
|
interrupt_after: int | None = None,
|
|
43
|
+
skip_validation: bool = False,
|
|
44
|
+
skip_ontology_validation: bool = False,
|
|
45
|
+
id2iri_file: str | None = None,
|
|
46
|
+
do_not_request_resource_metadata_from_db: bool = False,
|
|
30
47
|
) -> bool:
|
|
31
48
|
"""
|
|
32
49
|
This function reads an XML file
|
|
@@ -40,6 +57,11 @@ def ingest_xmlupload(
|
|
|
40
57
|
xml_file: path to XML file containing the resources
|
|
41
58
|
creds: credentials to access the DSP server
|
|
42
59
|
interrupt_after: if set, the upload will be interrupted after this number of resources
|
|
60
|
+
skip_validation: skip the SHACL validation
|
|
61
|
+
skip_ontology_validation: skip the ontology validation
|
|
62
|
+
id2iri_file: to replace internal IDs of an XML file by IRIs provided in this mapping file
|
|
63
|
+
do_not_request_resource_metadata_from_db: if true do not request metadata information from the api
|
|
64
|
+
for existing resources
|
|
43
65
|
|
|
44
66
|
Returns:
|
|
45
67
|
True if all resources could be uploaded without errors; False if one of the resources could not be
|
|
@@ -48,11 +70,12 @@ def ingest_xmlupload(
|
|
|
48
70
|
Raises:
|
|
49
71
|
InputError: if any media was not uploaded or uploaded media was not referenced.
|
|
50
72
|
"""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
con.login(creds.user, creds.password)
|
|
73
|
+
root = parse_and_clean_xml_file(xml_file)
|
|
74
|
+
shortcode = root.attrib["shortcode"]
|
|
75
|
+
root = _replace_filepaths_with_internal_filename_from_ingest(root, shortcode)
|
|
55
76
|
|
|
77
|
+
auth = AuthenticationClientLive(server=creds.server, email=creds.user, password=creds.password)
|
|
78
|
+
con = ConnectionLive(creds.server, auth)
|
|
56
79
|
config = UploadConfig(
|
|
57
80
|
media_previously_uploaded=True,
|
|
58
81
|
interrupt_after=interrupt_after,
|
|
@@ -60,34 +83,70 @@ def ingest_xmlupload(
|
|
|
60
83
|
server=creds.server,
|
|
61
84
|
shortcode=shortcode,
|
|
62
85
|
)
|
|
86
|
+
clients = _get_live_clients(con, config, auth)
|
|
87
|
+
|
|
88
|
+
parsed_resources, lookups = get_parsed_resources_and_mappers(root, clients)
|
|
89
|
+
if id2iri_file:
|
|
90
|
+
parsed_resources = use_id2iri_mapping_to_replace_ids(parsed_resources, Path(id2iri_file))
|
|
91
|
+
|
|
92
|
+
validation_should_be_skipped = skip_validation
|
|
93
|
+
is_on_prod_like_server = is_prod_like_server(creds.server)
|
|
94
|
+
if is_on_prod_like_server and config.skip_validation:
|
|
95
|
+
msg = (
|
|
96
|
+
"You set the flag '--skip-validation' to circumvent the SHACL schema validation. "
|
|
97
|
+
"This means that the upload may fail due to undetected errors. "
|
|
98
|
+
"Do you wish to skip the validation (yes/no)? "
|
|
99
|
+
)
|
|
100
|
+
resp = ""
|
|
101
|
+
while resp not in ["yes", "no"]:
|
|
102
|
+
resp = input(BOLD_RED + msg + RESET_TO_DEFAULT)
|
|
103
|
+
if str(resp) == "no":
|
|
104
|
+
validation_should_be_skipped = False
|
|
105
|
+
if not validation_should_be_skipped:
|
|
106
|
+
v_severity = config.validation_severity
|
|
107
|
+
if is_on_prod_like_server:
|
|
108
|
+
v_severity = ValidationSeverity.INFO
|
|
109
|
+
validation_passed = validate_parsed_resources(
|
|
110
|
+
parsed_resources=parsed_resources,
|
|
111
|
+
authorship_lookup=lookups.authorships,
|
|
112
|
+
permission_ids=list(lookups.permissions.keys()),
|
|
113
|
+
shortcode=shortcode,
|
|
114
|
+
config=ValidateDataConfig(
|
|
115
|
+
xml_file,
|
|
116
|
+
save_graph_dir=None,
|
|
117
|
+
severity=v_severity,
|
|
118
|
+
ignore_duplicate_files_warning=True,
|
|
119
|
+
is_on_prod_server=is_on_prod_like_server,
|
|
120
|
+
skip_ontology_validation=skip_ontology_validation,
|
|
121
|
+
do_not_request_resource_metadata_from_db=do_not_request_resource_metadata_from_db,
|
|
122
|
+
),
|
|
123
|
+
auth=auth,
|
|
124
|
+
)
|
|
125
|
+
if not validation_passed:
|
|
126
|
+
return False
|
|
127
|
+
else:
|
|
128
|
+
logger.debug("SHACL validation was skipped.")
|
|
63
129
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
clients = _get_live_clients(con, config)
|
|
68
|
-
state = UploadState(resources, stash, config, permissions_lookup)
|
|
69
|
-
|
|
70
|
-
return execute_upload(clients, state)
|
|
130
|
+
if not config.skip_iiif_validation:
|
|
131
|
+
validate_iiif_uris(root)
|
|
71
132
|
|
|
133
|
+
if not is_on_prod_like_server:
|
|
134
|
+
enable_unknown_license_if_any_are_missing(clients.legal_info_client, parsed_resources)
|
|
72
135
|
|
|
73
|
-
|
|
74
|
-
"""
|
|
75
|
-
Validate and parse an upload XML file.
|
|
76
|
-
The bulk-ingest must already have taken place, and the mapping CSV must be in the CWD.
|
|
136
|
+
processed_resources = get_processed_resources(parsed_resources, lookups, is_on_prod_like_server)
|
|
77
137
|
|
|
78
|
-
|
|
79
|
-
xml_file: file that will be parsed
|
|
138
|
+
sorted_resources, stash = get_stash_and_upload_order(processed_resources)
|
|
80
139
|
|
|
81
|
-
|
|
82
|
-
|
|
140
|
+
state = UploadState(
|
|
141
|
+
pending_resources=sorted_resources,
|
|
142
|
+
pending_stash=stash,
|
|
143
|
+
config=config,
|
|
144
|
+
)
|
|
83
145
|
|
|
84
|
-
|
|
85
|
-
InputError: if replacing file paths with internal asset IDs failed
|
|
86
|
-
"""
|
|
87
|
-
root, shortcode, default_ontology = validate_and_parse(xml_file)
|
|
146
|
+
return execute_upload(clients, state)
|
|
88
147
|
|
|
89
|
-
logger.info(f"Validated and parsed the XML. {shortcode=:} and {default_ontology=:}")
|
|
90
148
|
|
|
149
|
+
def _replace_filepaths_with_internal_filename_from_ingest(root: etree._Element, shortcode: str) -> etree._Element:
|
|
91
150
|
orig_path_2_asset_id = get_mapping_dict_from_file(shortcode)
|
|
92
151
|
root, ingest_info = replace_filepath_with_internal_filename(root, orig_path_2_asset_id)
|
|
93
152
|
if ok := ingest_info.ok_msg():
|
|
@@ -96,11 +155,12 @@ def _parse_xml_and_replace_filepaths(xml_file: Path) -> tuple[str, etree._Elemen
|
|
|
96
155
|
else:
|
|
97
156
|
err_msg = ingest_info.execute_error_protocol()
|
|
98
157
|
raise InputError(err_msg)
|
|
99
|
-
return
|
|
158
|
+
return root
|
|
100
159
|
|
|
101
160
|
|
|
102
|
-
def _get_live_clients(con: Connection, config: UploadConfig) -> UploadClients:
|
|
161
|
+
def _get_live_clients(con: Connection, config: UploadConfig, auth: AuthenticationClient) -> UploadClients:
|
|
103
162
|
ingest_client = BulkIngestedAssetClient()
|
|
104
|
-
project_client = ProjectClientLive(
|
|
105
|
-
list_client = ListClientLive(con, project_client.get_project_iri())
|
|
106
|
-
|
|
163
|
+
project_client = ProjectClientLive(auth.server, auth)
|
|
164
|
+
list_client = ListClientLive(con, project_client.get_project_iri(config.shortcode))
|
|
165
|
+
legal_info_client = LegalInfoClientLive(config.server, config.shortcode, auth)
|
|
166
|
+
return UploadClients(ingest_client, list_client, legal_info_client)
|
|
@@ -72,7 +72,7 @@ class IngestInformation:
|
|
|
72
72
|
"The data XML file contains references to multimedia files "
|
|
73
73
|
"which were not previously uploaded through dsp-ingest:\n"
|
|
74
74
|
f" The file with the resource IDs and problematic filenames was saved at "
|
|
75
|
-
f"'{Path(self.csv_directory_path/self.mediafiles_no_id_csv)}'."
|
|
75
|
+
f"'{Path(self.csv_directory_path / self.mediafiles_no_id_csv)}'."
|
|
76
76
|
)
|
|
77
77
|
return None
|
|
78
78
|
|
|
@@ -87,7 +87,7 @@ class IngestInformation:
|
|
|
87
87
|
"The data XML file does not reference all the multimedia files which were previously "
|
|
88
88
|
"uploaded through dsp-ingest.\n"
|
|
89
89
|
f" The file with the unused filenames was saved at "
|
|
90
|
-
f"'{Path(self.csv_directory_path/self.unused_mediafiles_csv)}'."
|
|
90
|
+
f"'{Path(self.csv_directory_path / self.unused_mediafiles_csv)}'."
|
|
91
91
|
)
|
|
92
92
|
return None
|
|
93
93
|
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
from time import sleep
|
|
4
3
|
from typing import cast
|
|
@@ -7,9 +6,8 @@ from loguru import logger
|
|
|
7
6
|
from tqdm import tqdm
|
|
8
7
|
|
|
9
8
|
from dsp_tools.cli.args import ServerCredentials
|
|
9
|
+
from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
|
|
10
10
|
from dsp_tools.commands.ingest_xmlupload.bulk_ingest_client import BulkIngestClient
|
|
11
|
-
from dsp_tools.utils.connection import Connection
|
|
12
|
-
from dsp_tools.utils.connection_live import ConnectionLive
|
|
13
11
|
|
|
14
12
|
|
|
15
13
|
def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
|
|
@@ -24,9 +22,8 @@ def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
|
|
|
24
22
|
Returns:
|
|
25
23
|
success status
|
|
26
24
|
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
bulk_ingest_client = BulkIngestClient(creds.dsp_ingest_url, con.get_token(), shortcode)
|
|
25
|
+
auth = AuthenticationClientLive(creds.server, creds.user, creds.password)
|
|
26
|
+
bulk_ingest_client = BulkIngestClient(creds.dsp_ingest_url, auth, shortcode)
|
|
30
27
|
bulk_ingest_client.trigger_ingest_process()
|
|
31
28
|
sleep(5)
|
|
32
29
|
mapping = _retrieve_mapping(bulk_ingest_client)
|
|
@@ -35,7 +32,7 @@ def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
|
|
|
35
32
|
|
|
36
33
|
|
|
37
34
|
def _retrieve_mapping(bulk_ingest_client: BulkIngestClient) -> str:
|
|
38
|
-
sleeping_time =
|
|
35
|
+
sleeping_time = 60
|
|
39
36
|
desc = f"Wait until mapping CSV is ready. Ask server every {sleeping_time} seconds "
|
|
40
37
|
progress_bar = tqdm(
|
|
41
38
|
bulk_ingest_client.retrieve_mapping_generator(), desc=desc, bar_format="{desc}{elapsed}", dynamic_ncols=True
|
|
@@ -58,8 +55,10 @@ def _retrieve_mapping(bulk_ingest_client: BulkIngestClient) -> str:
|
|
|
58
55
|
def _save_mapping(mapping: str, shortcode: str) -> None:
|
|
59
56
|
filepath = Path(f"mapping-{shortcode}.csv")
|
|
60
57
|
if filepath.exists():
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
58
|
+
i = 1
|
|
59
|
+
while (new_name_for_existing := Path(f"mapping-{shortcode}-{i}.csv")).exists():
|
|
60
|
+
i += 1
|
|
61
|
+
filepath.rename(new_name_for_existing)
|
|
62
|
+
filepath.write_text(mapping, encoding="utf-8")
|
|
64
63
|
print(f"Saved mapping CSV to '{filepath}'")
|
|
65
64
|
logger.info(f"Saved mapping CSV to '{filepath}'")
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from collections.abc import Iterable
|
|
3
4
|
from pathlib import Path
|
|
4
|
-
from typing import Iterable
|
|
5
5
|
|
|
6
6
|
from dsp_tools.commands.ingest_xmlupload.upload_files.input_error import FileProblems
|
|
7
7
|
|
|
8
8
|
SUPPORTED_EXTENSIONS = (
|
|
9
|
-
"zip,tar,gz,z,tgz,gzip,7z,mp3,wav,pdf,doc,docx,xls,xlsx,ppt,pptx,"
|
|
10
|
-
"mp4,jpg,jpeg,jp2,png,tif,tiff,odd,rng,txt,xml,xsd,xsl,csv"
|
|
9
|
+
"zip,tar,gz,z,tgz,gzip,7z,mp3,wav,pdf,doc,docx,xls,xlsx,ppt,pptx,epub,"
|
|
10
|
+
"mp4,jpg,jpeg,jp2,json,png,tif,tiff,odd,rng,txt,xml,htm,html,xsd,xsl,csv"
|
|
11
11
|
).split(",")
|
|
12
12
|
|
|
13
13
|
|
|
@@ -1,22 +1,14 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Protocol
|
|
4
3
|
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
6
|
+
from dsp_tools.error.problems import Problem
|
|
7
|
+
|
|
7
8
|
separator = "\n\n"
|
|
8
9
|
list_separator = "\n - "
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
class Problem(Protocol):
|
|
12
|
-
"""Information about input errors."""
|
|
13
|
-
|
|
14
|
-
def execute_error_protocol(self) -> str:
|
|
15
|
-
"""
|
|
16
|
-
This function initiates all the steps for successful problem communication with the user.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
12
|
@dataclass(frozen=True)
|
|
21
13
|
class FileProblems(Problem):
|
|
22
14
|
"""Handle the error communication to the user in case that some files don't exist or are unsupported."""
|
|
@@ -5,7 +5,8 @@ import pandas as pd
|
|
|
5
5
|
import regex
|
|
6
6
|
|
|
7
7
|
separator = "\n\n"
|
|
8
|
-
list_separator = "\n
|
|
8
|
+
list_separator = "\n- "
|
|
9
|
+
list_separator_indented = "\n - "
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@dataclass(frozen=True)
|
|
@@ -14,6 +15,8 @@ class UploadFailure:
|
|
|
14
15
|
|
|
15
16
|
filepath: Path
|
|
16
17
|
reason: str
|
|
18
|
+
status_code: int | None = None
|
|
19
|
+
response_text: str | None = None
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
@dataclass(frozen=True)
|
|
@@ -44,13 +47,20 @@ class UploadFailures:
|
|
|
44
47
|
msg += f"The full list of failed files has been saved to '{output_file}'."
|
|
45
48
|
else:
|
|
46
49
|
msg += f"Failed to upload the following {len(self.failures)} files:"
|
|
47
|
-
|
|
50
|
+
for f in self.failures:
|
|
51
|
+
msg += list_separator + f"{f.filepath}: {f.reason}"
|
|
52
|
+
if f.status_code:
|
|
53
|
+
msg += list_separator_indented + f"Status code: {f.status_code}"
|
|
54
|
+
if f.response_text:
|
|
55
|
+
msg += list_separator_indented + f"Response text: {f.response_text}"
|
|
48
56
|
return msg
|
|
49
57
|
|
|
50
58
|
def _save_to_csv(self, output_file: Path) -> None:
|
|
51
59
|
data = {
|
|
52
60
|
"Filepath": [failure.filepath for failure in self.failures],
|
|
53
61
|
"Reason": [failure.reason for failure in self.failures],
|
|
62
|
+
"Status code": [failure.status_code for failure in self.failures],
|
|
63
|
+
"Response text": [failure.response_text for failure in self.failures],
|
|
54
64
|
}
|
|
55
65
|
df = pd.DataFrame(data)
|
|
56
66
|
df.to_csv(output_file, index=False)
|
|
@@ -5,14 +5,13 @@ from lxml import etree
|
|
|
5
5
|
from tqdm import tqdm
|
|
6
6
|
|
|
7
7
|
from dsp_tools.cli.args import ServerCredentials
|
|
8
|
+
from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
|
|
8
9
|
from dsp_tools.commands.ingest_xmlupload.bulk_ingest_client import BulkIngestClient
|
|
9
10
|
from dsp_tools.commands.ingest_xmlupload.upload_files.filechecker import check_files
|
|
10
11
|
from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailure
|
|
11
12
|
from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailures
|
|
12
|
-
from dsp_tools.
|
|
13
|
-
from dsp_tools.
|
|
14
|
-
from dsp_tools.utils.connection import Connection
|
|
15
|
-
from dsp_tools.utils.connection_live import ConnectionLive
|
|
13
|
+
from dsp_tools.error.exceptions import InputError
|
|
14
|
+
from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
def upload_files(
|
|
@@ -32,14 +31,14 @@ def upload_files(
|
|
|
32
31
|
Returns:
|
|
33
32
|
success status
|
|
34
33
|
"""
|
|
35
|
-
root
|
|
34
|
+
root = parse_and_clean_xml_file(xml_file)
|
|
35
|
+
shortcode = root.attrib["shortcode"]
|
|
36
36
|
paths = _get_validated_paths(root)
|
|
37
37
|
print(f"Found {len(paths)} files to upload onto server {creds.dsp_ingest_url}.")
|
|
38
38
|
logger.info(f"Found {len(paths)} files to upload onto server {creds.dsp_ingest_url}.")
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
ingest_client = BulkIngestClient(creds.dsp_ingest_url, con.get_token(), shortcode, imgdir)
|
|
40
|
+
auth = AuthenticationClientLive(creds.server, creds.user, creds.password)
|
|
41
|
+
ingest_client = BulkIngestClient(creds.dsp_ingest_url, auth, shortcode, imgdir)
|
|
43
42
|
|
|
44
43
|
failures: list[UploadFailure] = []
|
|
45
44
|
progress_bar = tqdm(paths, desc="Uploading files", unit="file(s)", dynamic_ncols=True)
|
|
@@ -61,7 +60,7 @@ def upload_files(
|
|
|
61
60
|
|
|
62
61
|
|
|
63
62
|
def _get_validated_paths(root: etree._Element) -> set[Path]:
|
|
64
|
-
paths = {Path(x.text) for x in root.xpath("//bitstream")}
|
|
63
|
+
paths = {Path(x.text.strip()) for x in root.xpath("//bitstream")}
|
|
65
64
|
if problems := check_files(paths):
|
|
66
65
|
msg = problems.execute_error_protocol()
|
|
67
66
|
raise InputError(msg)
|