dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
import warnings
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dataclasses import field
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from http import HTTPStatus
|
|
11
|
+
from typing import Any
|
|
12
|
+
from typing import Literal
|
|
13
|
+
from typing import Never
|
|
14
|
+
from typing import Union
|
|
15
|
+
|
|
16
|
+
from loguru import logger
|
|
17
|
+
from requests import JSONDecodeError
|
|
18
|
+
from requests import ReadTimeout
|
|
19
|
+
from requests import RequestException
|
|
20
|
+
from requests import Response
|
|
21
|
+
|
|
22
|
+
from dsp_tools.commands.get.legacy_models.context import Context
|
|
23
|
+
from dsp_tools.commands.get.legacy_models.helpers import OntoIri
|
|
24
|
+
from dsp_tools.config.logger_config import LOGGER_SAVEPATH
|
|
25
|
+
from dsp_tools.error.custom_warnings import DspToolsUnexpectedStatusCodeWarning
|
|
26
|
+
from dsp_tools.error.exceptions import DspToolsRequestException
|
|
27
|
+
from dsp_tools.error.exceptions import PermanentTimeOutError
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ResponseCodeAndText:
|
|
32
|
+
status_code: int
|
|
33
|
+
text: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class PostFiles:
|
|
38
|
+
"""One or more files to be uploaded in a POST request."""
|
|
39
|
+
|
|
40
|
+
files: list[PostFile]
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> dict[str, tuple[str, Any, str] | tuple[str, Any]]:
|
|
43
|
+
return {x.file_name: x.to_tuple() for x in self.files}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class PostFile:
|
|
48
|
+
file_name: str
|
|
49
|
+
fileobj: Any
|
|
50
|
+
content_type: str | None = None
|
|
51
|
+
|
|
52
|
+
def to_tuple(self) -> tuple[str, Any, str] | tuple[str, Any]:
|
|
53
|
+
if self.content_type:
|
|
54
|
+
return self.file_name, self.fileobj, self.content_type
|
|
55
|
+
return self.file_name, self.fileobj
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class SetEncoder(json.JSONEncoder):
|
|
59
|
+
"""Encoder used to serialize objects to JSON that would by default not be serializable"""
|
|
60
|
+
|
|
61
|
+
def default(self, o: Union[set[Any], Context, OntoIri]) -> Any:
|
|
62
|
+
"""Return a serializable object for o"""
|
|
63
|
+
if isinstance(o, set):
|
|
64
|
+
return list(o)
|
|
65
|
+
elif isinstance(o, Context):
|
|
66
|
+
return o.toJsonObj()
|
|
67
|
+
elif isinstance(o, OntoIri):
|
|
68
|
+
return {"iri": o.iri, "hashtag": o.hashtag}
|
|
69
|
+
return json.JSONEncoder.default(self, o)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class RequestParameters:
|
|
74
|
+
method: Literal["POST", "GET", "PUT", "DELETE"]
|
|
75
|
+
url: str
|
|
76
|
+
timeout: int
|
|
77
|
+
data: dict[str, Any] | None = None
|
|
78
|
+
data_serialized: bytes | None = field(init=False, default=None)
|
|
79
|
+
headers: dict[str, str] | None = None
|
|
80
|
+
files: PostFiles | None = None
|
|
81
|
+
|
|
82
|
+
def __post_init__(self) -> None:
|
|
83
|
+
self.data_serialized = self._serialize_payload(self.data)
|
|
84
|
+
|
|
85
|
+
def _serialize_payload(self, payload: dict[str, Any] | None) -> bytes | None:
|
|
86
|
+
# If data is not encoded as bytes, issues can occur with non-ASCII characters,
|
|
87
|
+
# where the content-length of the request will turn out to be different from the actual length.
|
|
88
|
+
return json.dumps(payload, cls=SetEncoder, ensure_ascii=False).encode("utf-8") if payload else None
|
|
89
|
+
|
|
90
|
+
def as_kwargs(self) -> dict[str, Any]:
|
|
91
|
+
kwargs = {
|
|
92
|
+
"method": self.method,
|
|
93
|
+
"url": self.url,
|
|
94
|
+
"timeout": self.timeout,
|
|
95
|
+
"data": self.data_serialized,
|
|
96
|
+
"headers": self.headers,
|
|
97
|
+
}
|
|
98
|
+
if self.files:
|
|
99
|
+
kwargs["files"] = self.files.to_dict()
|
|
100
|
+
return kwargs
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def log_request(params: RequestParameters, extra_headers: dict[str, Any] | None = None) -> None:
|
|
104
|
+
"""Logs the request."""
|
|
105
|
+
dumpobj = {
|
|
106
|
+
"method": params.method,
|
|
107
|
+
"url": params.url,
|
|
108
|
+
"timeout": params.timeout,
|
|
109
|
+
}
|
|
110
|
+
headers_to_log = {}
|
|
111
|
+
if extra_headers:
|
|
112
|
+
headers_to_log = extra_headers
|
|
113
|
+
if params.headers:
|
|
114
|
+
headers_to_log = headers_to_log | params.headers
|
|
115
|
+
dumpobj["headers"] = sanitize_headers(headers_to_log)
|
|
116
|
+
if params.data:
|
|
117
|
+
data = params.data.copy()
|
|
118
|
+
if "password" in data:
|
|
119
|
+
data["password"] = "***"
|
|
120
|
+
dumpobj["data"] = data
|
|
121
|
+
if params.files:
|
|
122
|
+
dumpobj["files"] = [x.file_name for x in params.files.files]
|
|
123
|
+
logger.debug(f"REQUEST: {json.dumps(dumpobj, cls=SetEncoder)}")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def log_response(response: Response, include_response_content: bool = True) -> None:
|
|
127
|
+
"""Log the response of a request."""
|
|
128
|
+
dumpobj: dict[str, Any] = {
|
|
129
|
+
"status_code": response.status_code,
|
|
130
|
+
"headers": sanitize_headers(dict(response.headers)) if response.headers else "",
|
|
131
|
+
}
|
|
132
|
+
if include_response_content:
|
|
133
|
+
try:
|
|
134
|
+
dumpobj["content"] = response.json()
|
|
135
|
+
except JSONDecodeError:
|
|
136
|
+
dumpobj["content"] = response.text
|
|
137
|
+
else:
|
|
138
|
+
dumpobj["content"] = "too big to be logged"
|
|
139
|
+
logger.debug(f"RESPONSE: {json.dumps(dumpobj)}")
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def sanitize_headers(headers: dict[str, str | bytes]) -> dict[str, str]:
|
|
143
|
+
"""Remove sensitive information from request headers."""
|
|
144
|
+
|
|
145
|
+
def _mask(key: str, value: str | bytes) -> str:
|
|
146
|
+
if isinstance(value, bytes):
|
|
147
|
+
value = value.decode("utf-8")
|
|
148
|
+
if key == "Authorization" and value.startswith("Bearer "):
|
|
149
|
+
return "Bearer ***"
|
|
150
|
+
if key == "Set-Cookie":
|
|
151
|
+
return "***"
|
|
152
|
+
return value
|
|
153
|
+
|
|
154
|
+
return {k: _mask(k, v) for k, v in headers.items()}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def log_request_failure_and_sleep(reason: str, retry_counter: int, exc_info: bool) -> None:
|
|
158
|
+
"""
|
|
159
|
+
Log the reason for a request failure and sleep.
|
|
160
|
+
|
|
161
|
+
============= ================ =============================
|
|
162
|
+
retry_counter seconds to sleep cumulative waiting time (min)
|
|
163
|
+
============= ================ =============================
|
|
164
|
+
0 1 0
|
|
165
|
+
1 2 0
|
|
166
|
+
2 4 0
|
|
167
|
+
3 8 0
|
|
168
|
+
4 16 0
|
|
169
|
+
5 32 1
|
|
170
|
+
6 64 2
|
|
171
|
+
7 128 4
|
|
172
|
+
8 256 9
|
|
173
|
+
9 300 14
|
|
174
|
+
10 300 19
|
|
175
|
+
11 300 24
|
|
176
|
+
12 300 29
|
|
177
|
+
15 300 44
|
|
178
|
+
18 300 59
|
|
179
|
+
24 300 89
|
|
180
|
+
30 300 119
|
|
181
|
+
============= ================ =============================
|
|
182
|
+
"""
|
|
183
|
+
sleep_time = min(2**retry_counter, 300)
|
|
184
|
+
msg = f"{reason}: Try reconnecting to DSP server, next attempt in {sleep_time} seconds..."
|
|
185
|
+
print(f"{datetime.now()}: {msg}")
|
|
186
|
+
if exc_info:
|
|
187
|
+
logger.exception(f"{msg} ({retry_counter=:})")
|
|
188
|
+
else:
|
|
189
|
+
logger.error(f"{msg} ({retry_counter=:})")
|
|
190
|
+
time.sleep(sleep_time)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def log_and_raise_timeouts(error: TimeoutError | ReadTimeout) -> Never:
|
|
194
|
+
"""Log a timeout error raised by a request and raise our own PermanentTimeOutError"""
|
|
195
|
+
msg = f"A '{error.__class__.__name__}' occurred during the connection to the DSP server."
|
|
196
|
+
print(f"{datetime.now()}: {msg}")
|
|
197
|
+
logger.error(msg)
|
|
198
|
+
raise PermanentTimeOutError(msg) from None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def should_retry(response: Response) -> bool:
|
|
202
|
+
"""Returns the decision if a retry of a request is sensible."""
|
|
203
|
+
in_500_range = 500 <= response.status_code < 600
|
|
204
|
+
try_again_later = "try again later" in response.text.lower()
|
|
205
|
+
in_testing_env = os.getenv("DSP_TOOLS_TESTING") == "true" # set in .github/workflows/tests-on-push.yml
|
|
206
|
+
return (try_again_later or in_500_range) and not in_testing_env
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def log_and_raise_request_exception(error: RequestException) -> Never:
|
|
210
|
+
msg = (
|
|
211
|
+
f"During an API call the following exception occurred. "
|
|
212
|
+
f"Please contact support@dasch.swiss with the log file at {LOGGER_SAVEPATH} "
|
|
213
|
+
f"if you required help resolving the issue.\n"
|
|
214
|
+
f"Original exception name: {error.__class__.__name__}\n"
|
|
215
|
+
)
|
|
216
|
+
if error.request:
|
|
217
|
+
msg += f"Original request: {error.request.method} {error.request.url}"
|
|
218
|
+
logger.exception(msg)
|
|
219
|
+
raise DspToolsRequestException(msg) from None
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def log_and_warn_unexpected_non_ok_response(status_code: int, response_text: str) -> None:
|
|
223
|
+
resp_txt = response_text[:200] if len(response_text) > 200 else response_text
|
|
224
|
+
msg = (
|
|
225
|
+
"We got an unexpected API response during the following request. "
|
|
226
|
+
"Please contact the dsp-tools development team (at support@dasch.swiss) with your log file "
|
|
227
|
+
"so that we can handle this more gracefully in the future.\n"
|
|
228
|
+
f"Response status code: {status_code}\n"
|
|
229
|
+
f"Original Message: {resp_txt}"
|
|
230
|
+
)
|
|
231
|
+
logger.warning(msg)
|
|
232
|
+
warnings.warn(DspToolsUnexpectedStatusCodeWarning(msg))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def is_server_error(response: ResponseCodeAndText) -> bool:
|
|
236
|
+
if HTTPStatus.INTERNAL_SERVER_ERROR <= response.status_code <= HTTPStatus.NETWORK_AUTHENTICATION_REQUIRED:
|
|
237
|
+
return True
|
|
238
|
+
return False
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
5
|
+
import regex
|
|
6
|
+
from lxml import etree
|
|
7
|
+
|
|
8
|
+
from dsp_tools.commands.xmlupload.models.permission import Permissions
|
|
9
|
+
from dsp_tools.commands.xmlupload.models.permissions_parsed import XmlPermission
|
|
10
|
+
from dsp_tools.legacy_models.projectContext import ProjectContext
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_authorship_lookup(root: etree._Element) -> dict[str, list[str]]:
|
|
14
|
+
def get_one_author(ele: etree._Element) -> str:
|
|
15
|
+
# The xsd file ensures that the body of the element contains valid non-whitespace characters
|
|
16
|
+
txt = cast(str, ele.text)
|
|
17
|
+
txt = regex.sub(r" +", " ", txt)
|
|
18
|
+
return txt.strip()
|
|
19
|
+
|
|
20
|
+
authorship_lookup = {}
|
|
21
|
+
for auth in root.iter(tag="authorship"):
|
|
22
|
+
individual_authors = [get_one_author(child) for child in auth.iterchildren()]
|
|
23
|
+
authorship_lookup[auth.attrib["id"]] = individual_authors
|
|
24
|
+
return authorship_lookup
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_permissions_lookup(root: etree._Element, proj_context: ProjectContext) -> dict[str, Permissions]:
|
|
28
|
+
permission_ele = list(root.iter(tag="permissions"))
|
|
29
|
+
permissions = [XmlPermission(permission, proj_context) for permission in permission_ele]
|
|
30
|
+
permissions_dict = {permission.permission_id: permission for permission in permissions}
|
|
31
|
+
permissions_lookup = {name: perm.get_permission_instance() for name, perm in permissions_dict.items()}
|
|
32
|
+
return permissions_lookup
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import regex
|
|
4
|
+
from lxml import etree
|
|
5
|
+
|
|
6
|
+
from dsp_tools.commands.validate_data.mappers import XML_TAG_TO_VALUE_TYPE_MAPPER
|
|
7
|
+
from dsp_tools.error.exceptions import InputError
|
|
8
|
+
from dsp_tools.utils.data_formats.iri_util import convert_api_url_for_correct_iri_namespace_construction
|
|
9
|
+
from dsp_tools.utils.rdf_constants import KNORA_API_PREFIX
|
|
10
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import KnoraValueType
|
|
11
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValue
|
|
12
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValueMetadata
|
|
13
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedMigrationMetadata
|
|
14
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
|
|
15
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedValue
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_parsed_resources(root: etree._Element, api_url: str) -> list[ParsedResource]:
|
|
19
|
+
api_url = convert_api_url_for_correct_iri_namespace_construction(api_url)
|
|
20
|
+
iri_lookup = _create_from_local_name_to_absolute_iri_lookup(root, api_url)
|
|
21
|
+
all_res: list[ParsedResource] = []
|
|
22
|
+
for res in root.iterdescendants(tag="resource"):
|
|
23
|
+
res_type = iri_lookup[res.attrib["restype"]]
|
|
24
|
+
all_res.append(_parse_one_resource(res, res_type, iri_lookup))
|
|
25
|
+
for res in root.iterdescendants(tag="region"):
|
|
26
|
+
res_type = f"{KNORA_API_PREFIX}Region"
|
|
27
|
+
all_res.append(_parse_one_resource(res, res_type, iri_lookup))
|
|
28
|
+
for res in root.iterdescendants(tag="link"):
|
|
29
|
+
res_type = f"{KNORA_API_PREFIX}LinkObj"
|
|
30
|
+
all_res.append(_parse_one_resource(res, res_type, iri_lookup))
|
|
31
|
+
for res in root.iterdescendants(tag="video-segment"):
|
|
32
|
+
all_res.append(_parse_segment(res, "Video"))
|
|
33
|
+
for res in root.iterdescendants(tag="audio-segment"):
|
|
34
|
+
all_res.append(_parse_segment(res, "Audio"))
|
|
35
|
+
return all_res
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _create_from_local_name_to_absolute_iri_lookup(root: etree._Element, api_url: str) -> dict[str, str]:
|
|
39
|
+
shortcode = root.attrib["shortcode"]
|
|
40
|
+
default_ontology = root.attrib["default-ontology"]
|
|
41
|
+
local_names = {ele.attrib["restype"] for ele in root.iterdescendants(tag="resource")}
|
|
42
|
+
props = {ele.attrib["name"] for ele in root.iter() if "name" in ele.attrib}
|
|
43
|
+
local_names.update(props)
|
|
44
|
+
lookup = {local: _get_one_absolute_iri(local, shortcode, default_ontology, api_url) for local in local_names}
|
|
45
|
+
return lookup
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _get_one_absolute_iri(local_name: str, shortcode: str, default_ontology: str, api_url: str) -> str:
|
|
49
|
+
split_name = local_name.split(":")
|
|
50
|
+
if len(split_name) == 1:
|
|
51
|
+
return f"{KNORA_API_PREFIX}{local_name}"
|
|
52
|
+
if len(split_name) == 2:
|
|
53
|
+
if split_name[0] == "":
|
|
54
|
+
return f"{_construct_namespace(api_url, shortcode, default_ontology)}{split_name[1]}"
|
|
55
|
+
if split_name[0] == "knora-api":
|
|
56
|
+
return f"{KNORA_API_PREFIX}{split_name[1]}"
|
|
57
|
+
return f"{_construct_namespace(api_url, shortcode, split_name[0])}{split_name[1]}"
|
|
58
|
+
raise InputError(
|
|
59
|
+
f"It is not permissible to have a colon in a property or resource class name. "
|
|
60
|
+
f"Please correct the following: {local_name}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _construct_namespace(api_url: str, shortcode: str, onto_name: str) -> str:
|
|
65
|
+
return f"{api_url}/ontology/{shortcode}/{onto_name}/v2#"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _parse_segment(segment: etree._Element, segment_type: str) -> ParsedResource:
|
|
69
|
+
values = _parse_segment_values(segment, segment_type)
|
|
70
|
+
migration_metadata = _parse_migration_metadata(segment)
|
|
71
|
+
return ParsedResource(
|
|
72
|
+
res_id=segment.attrib["id"],
|
|
73
|
+
res_type=f"{KNORA_API_PREFIX}{segment_type}Segment",
|
|
74
|
+
label=segment.attrib["label"],
|
|
75
|
+
permissions_id=segment.attrib.get("permissions"),
|
|
76
|
+
values=values,
|
|
77
|
+
file_value=None,
|
|
78
|
+
migration_metadata=migration_metadata,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _parse_segment_values(segment: etree._Element, segment_type: str) -> list[ParsedValue]:
|
|
83
|
+
values: list[ParsedValue] = []
|
|
84
|
+
value: str | tuple[str, str] | None
|
|
85
|
+
for val in segment.iterchildren():
|
|
86
|
+
prop = f"{KNORA_API_PREFIX}{val.tag!s}"
|
|
87
|
+
match val.tag:
|
|
88
|
+
case "isSegmentOf":
|
|
89
|
+
val_type = KnoraValueType.LINK_VALUE
|
|
90
|
+
prop = f"{KNORA_API_PREFIX}is{segment_type}SegmentOf"
|
|
91
|
+
value = val.text.strip() if val.text else None
|
|
92
|
+
case "hasSegmentBounds":
|
|
93
|
+
val_type = KnoraValueType.INTERVAL_VALUE
|
|
94
|
+
value = (val.attrib["segment_start"], val.attrib["segment_end"])
|
|
95
|
+
case "hasDescription" | "hasComment":
|
|
96
|
+
val_type = KnoraValueType.RICHTEXT_VALUE
|
|
97
|
+
value = _get_richtext_as_string(val)
|
|
98
|
+
case "relatesTo":
|
|
99
|
+
val_type = KnoraValueType.LINK_VALUE
|
|
100
|
+
value = val.text.strip() if val.text else None
|
|
101
|
+
case _:
|
|
102
|
+
val_type = KnoraValueType.SIMPLETEXT_VALUE
|
|
103
|
+
value = _get_simpletext_as_string(val)
|
|
104
|
+
values.append(
|
|
105
|
+
ParsedValue(
|
|
106
|
+
prop_name=prop,
|
|
107
|
+
value=value,
|
|
108
|
+
value_type=val_type,
|
|
109
|
+
permissions_id=val.attrib.get("permissions"),
|
|
110
|
+
comment=val.attrib.get("comment"),
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
return values
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _parse_one_resource(resource: etree._Element, res_type: str, iri_lookup: dict[str, str]) -> ParsedResource:
|
|
117
|
+
values, file_value = _parse_values(resource, iri_lookup)
|
|
118
|
+
migration_metadata = _parse_migration_metadata(resource)
|
|
119
|
+
return ParsedResource(
|
|
120
|
+
res_id=resource.attrib["id"],
|
|
121
|
+
res_type=res_type,
|
|
122
|
+
label=resource.attrib["label"],
|
|
123
|
+
permissions_id=resource.attrib.get("permissions"),
|
|
124
|
+
values=values,
|
|
125
|
+
file_value=file_value,
|
|
126
|
+
migration_metadata=migration_metadata,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _parse_migration_metadata(resource: etree._Element) -> ParsedMigrationMetadata | None:
|
|
131
|
+
metadata = (resource.attrib.get("iri"), resource.attrib.get("ark"), resource.attrib.get("creation_date"))
|
|
132
|
+
if any(metadata):
|
|
133
|
+
return ParsedMigrationMetadata(
|
|
134
|
+
iri=metadata[0],
|
|
135
|
+
ark=metadata[1],
|
|
136
|
+
creation_date=metadata[2],
|
|
137
|
+
)
|
|
138
|
+
return None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _parse_values(
|
|
142
|
+
resource: etree._Element, iri_lookup: dict[str, str]
|
|
143
|
+
) -> tuple[list[ParsedValue], ParsedFileValue | None]:
|
|
144
|
+
values = []
|
|
145
|
+
asset_value = None
|
|
146
|
+
for val in resource.iterchildren():
|
|
147
|
+
match val.tag:
|
|
148
|
+
case "bitstream":
|
|
149
|
+
asset_value = _parse_file_values(val)
|
|
150
|
+
case "iiif-uri":
|
|
151
|
+
asset_value = _parse_iiif_uri(val)
|
|
152
|
+
case _:
|
|
153
|
+
values.extend(_parse_one_value(val, iri_lookup))
|
|
154
|
+
return values, asset_value
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _parse_one_value(values: etree._Element, iri_lookup: dict[str, str]) -> list[ParsedValue]:
|
|
158
|
+
prop_name = iri_lookup[values.attrib["name"]]
|
|
159
|
+
match values.tag:
|
|
160
|
+
case "list-prop":
|
|
161
|
+
return _parse_list_value(values, prop_name)
|
|
162
|
+
case "text-prop":
|
|
163
|
+
return _parse_text_value(values, prop_name)
|
|
164
|
+
case _:
|
|
165
|
+
return _parse_generic_values(values, prop_name)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _parse_generic_values(values: etree._Element, prop_name: str) -> list[ParsedValue]:
|
|
169
|
+
value_type = XML_TAG_TO_VALUE_TYPE_MAPPER[str(values.tag)]
|
|
170
|
+
parsed_values = []
|
|
171
|
+
for val in values:
|
|
172
|
+
parsed_values.append(
|
|
173
|
+
ParsedValue(
|
|
174
|
+
prop_name=prop_name,
|
|
175
|
+
value=val.text.strip() if val.text else None,
|
|
176
|
+
value_type=value_type,
|
|
177
|
+
permissions_id=val.attrib.get("permissions"),
|
|
178
|
+
comment=val.attrib.get("comment"),
|
|
179
|
+
)
|
|
180
|
+
)
|
|
181
|
+
return parsed_values
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _parse_list_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
|
|
185
|
+
parsed_values = []
|
|
186
|
+
list_name = values.attrib["list"]
|
|
187
|
+
for val in values:
|
|
188
|
+
list_node = val.text.strip() if val.text else None
|
|
189
|
+
parsed_values.append(
|
|
190
|
+
ParsedValue(
|
|
191
|
+
prop_name=prop_name,
|
|
192
|
+
value=(list_name, list_node),
|
|
193
|
+
value_type=KnoraValueType.LIST_VALUE,
|
|
194
|
+
permissions_id=val.attrib.get("permissions"),
|
|
195
|
+
comment=val.attrib.get("comment"),
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
return parsed_values
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _parse_text_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
|
|
202
|
+
parsed_values = []
|
|
203
|
+
for val in values:
|
|
204
|
+
if val.attrib["encoding"] == "xml":
|
|
205
|
+
val_type = KnoraValueType.RICHTEXT_VALUE
|
|
206
|
+
value = _get_richtext_as_string(val)
|
|
207
|
+
else:
|
|
208
|
+
val_type = KnoraValueType.SIMPLETEXT_VALUE
|
|
209
|
+
value = _get_simpletext_as_string(val)
|
|
210
|
+
parsed_values.append(
|
|
211
|
+
ParsedValue(
|
|
212
|
+
prop_name=prop_name,
|
|
213
|
+
value=value,
|
|
214
|
+
value_type=val_type,
|
|
215
|
+
permissions_id=val.attrib.get("permissions"),
|
|
216
|
+
comment=val.attrib.get("comment"),
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
return parsed_values
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _get_richtext_as_string(value: etree._Element) -> str | None:
|
|
223
|
+
# Not entering any values within the tag results in None,
|
|
224
|
+
# however if only whitespaces are entered then it should return an empty string so that the user message is precise.
|
|
225
|
+
if not value.text and len(value) == 0:
|
|
226
|
+
return None
|
|
227
|
+
xmlstr = etree.tostring(value, encoding="unicode", method="xml").strip()
|
|
228
|
+
xmlstr = regex.sub(f"^<{value.tag!s}.*?>", "", xmlstr, count=1)
|
|
229
|
+
xmlstr = regex.sub(f"</{value.tag!s}>$", "", xmlstr)
|
|
230
|
+
striped_str = xmlstr.strip()
|
|
231
|
+
return _cleanup_formatted_text(striped_str)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _cleanup_formatted_text(xmlstr_orig: str) -> str:
|
|
235
|
+
"""
|
|
236
|
+
In a xml-encoded text value from the XML file,
|
|
237
|
+
there may be non-text characters that must be removed.
|
|
238
|
+
This function:
|
|
239
|
+
- replaces (multiple) line breaks by a space
|
|
240
|
+
- replaces multiple spaces or tabstops by a single space (except within `<code>` or `<pre>` tags)
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
xmlstr_orig: content of the tag from the XML file, in serialized form
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
purged string, suitable to be sent to DSP-API
|
|
247
|
+
"""
|
|
248
|
+
# replace (multiple) line breaks by a space
|
|
249
|
+
xmlstr = regex.sub("\n+", " ", xmlstr_orig)
|
|
250
|
+
# replace multiple spaces or tabstops by a single space (except within <code> or <pre> tags)
|
|
251
|
+
# the regex selects all spaces/tabstops not followed by </xyz> without <xyz in between.
|
|
252
|
+
# credits: https://stackoverflow.com/a/46937770/14414188
|
|
253
|
+
xmlstr = regex.sub("( {2,}|\t+)(?!(.(?!<(code|pre)))*</(code|pre)>)", " ", xmlstr)
|
|
254
|
+
# remove spaces after <br/> tags (except within <code> tags)
|
|
255
|
+
xmlstr = regex.sub("((?<=<br/?>) )(?!(.(?!<code))*</code>)", "", xmlstr)
|
|
256
|
+
# remove leading and trailing spaces
|
|
257
|
+
xmlstr = xmlstr.strip()
|
|
258
|
+
return xmlstr
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _get_simpletext_as_string(value: etree._Element) -> str | None:
|
|
262
|
+
# Not entering any values within the tag results in None,
|
|
263
|
+
# however if only whitespaces are entered then it should return an empty string so that the user message is precise.
|
|
264
|
+
if len(value) == 0:
|
|
265
|
+
if not (found := value.text):
|
|
266
|
+
return None
|
|
267
|
+
else:
|
|
268
|
+
# Extract the inner XML content, preserving tags
|
|
269
|
+
found = "".join(etree.tostring(child, encoding="unicode") for child in value.iterdescendants())
|
|
270
|
+
if value.text:
|
|
271
|
+
found = value.text + found
|
|
272
|
+
# replace multiple spaces or tabstops by a single space
|
|
273
|
+
str_val = regex.sub(r" {2,}|\t+", " ", found)
|
|
274
|
+
# remove leading and trailing spaces (of every line, but also of the entire string)
|
|
275
|
+
str_val = "\n".join([s.strip() for s in str_val.split("\n")])
|
|
276
|
+
result = str_val.strip()
|
|
277
|
+
return result
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _parse_iiif_uri(iiif_uri: etree._Element) -> ParsedFileValue:
|
|
281
|
+
return ParsedFileValue(
|
|
282
|
+
value=iiif_uri.text.strip() if iiif_uri.text else None,
|
|
283
|
+
value_type=KnoraValueType.STILL_IMAGE_IIIF,
|
|
284
|
+
metadata=_parse_file_metadata(iiif_uri),
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _parse_file_values(file_value: etree._Element) -> ParsedFileValue:
|
|
289
|
+
val = file_value.text.strip() if file_value.text else None
|
|
290
|
+
return ParsedFileValue(
|
|
291
|
+
value=val,
|
|
292
|
+
value_type=_get_file_value_type(val),
|
|
293
|
+
metadata=_parse_file_metadata(file_value),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _parse_file_metadata(file_value: etree._Element) -> ParsedFileValueMetadata:
|
|
298
|
+
return ParsedFileValueMetadata(
|
|
299
|
+
license_iri=file_value.attrib.get("license"),
|
|
300
|
+
copyright_holder=file_value.attrib.get("copyright-holder"),
|
|
301
|
+
authorship_id=file_value.attrib.get("authorship-id"),
|
|
302
|
+
permissions_id=file_value.attrib.get("permissions"),
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _get_file_value_type(file_name: str | None) -> KnoraValueType | None: # noqa:PLR0911 (Too many return statements)
|
|
307
|
+
if not file_name:
|
|
308
|
+
return None
|
|
309
|
+
file_extension = Path(file_name).suffix[1:].lower()
|
|
310
|
+
match file_extension:
|
|
311
|
+
case "zip" | "tar" | "gz" | "z" | "tgz" | "gzip" | "7z":
|
|
312
|
+
return KnoraValueType.ARCHIVE_FILE
|
|
313
|
+
case "mp3" | "wav":
|
|
314
|
+
return KnoraValueType.AUDIO_FILE
|
|
315
|
+
case "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "epub":
|
|
316
|
+
return KnoraValueType.DOCUMENT_FILE
|
|
317
|
+
case "mp4":
|
|
318
|
+
return KnoraValueType.MOVING_IMAGE_FILE
|
|
319
|
+
# jpx is the extension of the files returned by dsp-ingest
|
|
320
|
+
case "jpg" | "jpeg" | "jp2" | "png" | "tif" | "tiff" | "jpx":
|
|
321
|
+
return KnoraValueType.STILL_IMAGE_FILE
|
|
322
|
+
case "odd" | "rng" | "txt" | "xml" | "htm" | "html" | "xsd" | "xsl" | "csv" | "json":
|
|
323
|
+
return KnoraValueType.TEXT_FILE
|
|
324
|
+
case _:
|
|
325
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from enum import auto
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ParsedResource:
|
|
10
|
+
res_id: str
|
|
11
|
+
res_type: str
|
|
12
|
+
label: str
|
|
13
|
+
permissions_id: str | None
|
|
14
|
+
values: list[ParsedValue]
|
|
15
|
+
file_value: ParsedFileValue | None
|
|
16
|
+
migration_metadata: ParsedMigrationMetadata | None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ParsedMigrationMetadata:
|
|
21
|
+
iri: str | None
|
|
22
|
+
ark: str | None
|
|
23
|
+
creation_date: str | None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class ParsedValue:
|
|
28
|
+
prop_name: str
|
|
29
|
+
value: str | tuple[str | None, str | None] | None
|
|
30
|
+
value_type: KnoraValueType
|
|
31
|
+
permissions_id: str | None
|
|
32
|
+
comment: str | None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ParsedFileValue:
|
|
37
|
+
value: str | None
|
|
38
|
+
value_type: KnoraValueType | None
|
|
39
|
+
metadata: ParsedFileValueMetadata
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class ParsedFileValueMetadata:
|
|
44
|
+
license_iri: str | None
|
|
45
|
+
copyright_holder: str | None
|
|
46
|
+
authorship_id: str | None
|
|
47
|
+
permissions_id: str | None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class KnoraValueType(Enum):
|
|
51
|
+
"""
|
|
52
|
+
Maps to a knora value type, for example: BOOLEAN_VALUE -> knora-api:BooleanValue
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
BOOLEAN_VALUE = auto()
|
|
56
|
+
COLOR_VALUE = auto()
|
|
57
|
+
DATE_VALUE = auto()
|
|
58
|
+
DECIMAL_VALUE = auto()
|
|
59
|
+
GEONAME_VALUE = auto()
|
|
60
|
+
GEOM_VALUE = auto()
|
|
61
|
+
INT_VALUE = auto()
|
|
62
|
+
INTERVAL_VALUE = auto()
|
|
63
|
+
LINK_VALUE = auto()
|
|
64
|
+
LIST_VALUE = auto()
|
|
65
|
+
SIMPLETEXT_VALUE = auto()
|
|
66
|
+
RICHTEXT_VALUE = auto()
|
|
67
|
+
TIME_VALUE = auto()
|
|
68
|
+
URI_VALUE = auto()
|
|
69
|
+
|
|
70
|
+
ARCHIVE_FILE = auto()
|
|
71
|
+
AUDIO_FILE = auto()
|
|
72
|
+
DOCUMENT_FILE = auto()
|
|
73
|
+
MOVING_IMAGE_FILE = auto()
|
|
74
|
+
STILL_IMAGE_FILE = auto()
|
|
75
|
+
STILL_IMAGE_IIIF = auto()
|
|
76
|
+
TEXT_FILE = auto()
|