dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -1,383 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
import time
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
from dataclasses import field
|
|
6
|
-
from datetime import datetime
|
|
7
|
-
from functools import partial
|
|
8
|
-
from importlib.metadata import version
|
|
9
|
-
from typing import Any
|
|
10
|
-
from typing import Literal
|
|
11
|
-
from typing import Never
|
|
12
|
-
from typing import Optional
|
|
13
|
-
from typing import cast
|
|
14
|
-
|
|
15
|
-
import regex
|
|
16
|
-
from loguru import logger
|
|
17
|
-
from requests import ReadTimeout
|
|
18
|
-
from requests import RequestException
|
|
19
|
-
from requests import Response
|
|
20
|
-
from requests import Session
|
|
21
|
-
|
|
22
|
-
from dsp_tools.models.exceptions import BadCredentialsError
|
|
23
|
-
from dsp_tools.models.exceptions import BaseError
|
|
24
|
-
from dsp_tools.models.exceptions import InvalidInputError
|
|
25
|
-
from dsp_tools.models.exceptions import PermanentConnectionError
|
|
26
|
-
from dsp_tools.models.exceptions import PermanentTimeOutError
|
|
27
|
-
from dsp_tools.models.exceptions import UserError
|
|
28
|
-
from dsp_tools.utils.connection import Connection
|
|
29
|
-
from dsp_tools.utils.logger_config import WARNINGS_SAVEPATH
|
|
30
|
-
from dsp_tools.utils.set_encoder import SetEncoder
|
|
31
|
-
|
|
32
|
-
HTTP_OK = 200
|
|
33
|
-
HTTP_UNAUTHORIZED = 401
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass
|
|
37
|
-
class RequestParameters:
|
|
38
|
-
method: Literal["POST", "GET", "PUT", "DELETE"]
|
|
39
|
-
url: str
|
|
40
|
-
timeout: int
|
|
41
|
-
data: dict[str, Any] | None = None
|
|
42
|
-
data_serialized: bytes | None = field(init=False, default=None)
|
|
43
|
-
headers: dict[str, str] | None = None
|
|
44
|
-
files: dict[str, tuple[str, Any]] | None = None
|
|
45
|
-
|
|
46
|
-
def __post_init__(self) -> None:
|
|
47
|
-
self.data_serialized = self._serialize_payload(self.data)
|
|
48
|
-
|
|
49
|
-
def _serialize_payload(self, payload: dict[str, Any] | None) -> bytes | None:
|
|
50
|
-
# If data is not encoded as bytes, issues can occur with non-ASCII characters,
|
|
51
|
-
# where the content-length of the request will turn out to be different from the actual length.
|
|
52
|
-
return json.dumps(payload, cls=SetEncoder, ensure_ascii=False).encode("utf-8") if payload else None
|
|
53
|
-
|
|
54
|
-
def as_kwargs(self) -> dict[str, Any]:
|
|
55
|
-
return {
|
|
56
|
-
"method": self.method,
|
|
57
|
-
"url": self.url,
|
|
58
|
-
"timeout": self.timeout,
|
|
59
|
-
"data": self.data_serialized,
|
|
60
|
-
"headers": self.headers,
|
|
61
|
-
"files": self.files,
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
@dataclass
|
|
66
|
-
class ConnectionLive(Connection):
|
|
67
|
-
"""
|
|
68
|
-
A Connection instance represents a connection to a DSP server.
|
|
69
|
-
|
|
70
|
-
Attributes:
|
|
71
|
-
server: address of the server, e.g https://api.dasch.swiss
|
|
72
|
-
token: session token received by the server after login
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
server: str
|
|
76
|
-
token: Optional[str] = None
|
|
77
|
-
session: Session = field(init=False, default=Session())
|
|
78
|
-
# downtimes of server-side services -> API still processes request
|
|
79
|
-
# -> retry too early has side effects (e.g. duplicated resources)
|
|
80
|
-
timeout_put_post: int = field(init=False, default=30 * 60)
|
|
81
|
-
timeout_get_delete: int = field(init=False, default=20)
|
|
82
|
-
|
|
83
|
-
def __post_init__(self) -> None:
|
|
84
|
-
self.session.headers["User-Agent"] = f'DSP-TOOLS/{version("dsp-tools")}'
|
|
85
|
-
if self.server.endswith("/"):
|
|
86
|
-
self.server = self.server[:-1]
|
|
87
|
-
|
|
88
|
-
def login(self, email: str, password: str) -> None:
|
|
89
|
-
"""
|
|
90
|
-
Retrieve a session token and store it as class attribute.
|
|
91
|
-
|
|
92
|
-
Args:
|
|
93
|
-
email: email address of the user
|
|
94
|
-
password: password of the user
|
|
95
|
-
|
|
96
|
-
Raises:
|
|
97
|
-
UserError: if DSP-API returns no token with the provided user credentials
|
|
98
|
-
"""
|
|
99
|
-
try:
|
|
100
|
-
response = self.post(
|
|
101
|
-
route="/v2/authentication",
|
|
102
|
-
data={"email": email, "password": password},
|
|
103
|
-
timeout=10,
|
|
104
|
-
)
|
|
105
|
-
except BadCredentialsError:
|
|
106
|
-
raise UserError(f"Username and/or password are not valid on server '{self.server}'") from None
|
|
107
|
-
except PermanentConnectionError as e:
|
|
108
|
-
raise UserError(e.message) from None
|
|
109
|
-
if not response.get("token"):
|
|
110
|
-
raise UserError("Unable to retrieve a token from the server with the provided credentials.")
|
|
111
|
-
self.token = response["token"]
|
|
112
|
-
self.session.headers["Authorization"] = f"Bearer {self.token}"
|
|
113
|
-
|
|
114
|
-
def logout(self) -> None:
|
|
115
|
-
"""
|
|
116
|
-
Delete the token on the server and in this class.
|
|
117
|
-
"""
|
|
118
|
-
if self.token:
|
|
119
|
-
self.delete(route="/v2/authentication")
|
|
120
|
-
self.token = None
|
|
121
|
-
del self.session.headers["Authorization"]
|
|
122
|
-
|
|
123
|
-
def get_token(self) -> str:
|
|
124
|
-
"""
|
|
125
|
-
Return the token of this connection.
|
|
126
|
-
|
|
127
|
-
Returns:
|
|
128
|
-
token
|
|
129
|
-
|
|
130
|
-
Raises:
|
|
131
|
-
BaseError: if no token is available
|
|
132
|
-
"""
|
|
133
|
-
if not self.token:
|
|
134
|
-
raise BaseError("No token available.")
|
|
135
|
-
return self.token
|
|
136
|
-
|
|
137
|
-
def post(
|
|
138
|
-
self,
|
|
139
|
-
route: str,
|
|
140
|
-
data: dict[str, Any] | None = None,
|
|
141
|
-
files: dict[str, tuple[str, Any]] | None = None,
|
|
142
|
-
headers: dict[str, str] | None = None,
|
|
143
|
-
timeout: int | None = None,
|
|
144
|
-
) -> dict[str, Any]:
|
|
145
|
-
"""
|
|
146
|
-
Make an HTTP POST request to the server to which this connection has been established.
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
route: route that will be called on the server
|
|
150
|
-
data: payload of the HTTP request
|
|
151
|
-
files: files to be uploaded, if any
|
|
152
|
-
headers: headers for the HTTP request
|
|
153
|
-
timeout: timeout of the HTTP request, or None if the default should be used
|
|
154
|
-
|
|
155
|
-
Returns:
|
|
156
|
-
response from server
|
|
157
|
-
|
|
158
|
-
Raises:
|
|
159
|
-
PermanentConnectionError: if all attempts have failed
|
|
160
|
-
InvalidInputError: if the API responds with a permanent error because of invalid input data
|
|
161
|
-
"""
|
|
162
|
-
if data:
|
|
163
|
-
headers = headers or {}
|
|
164
|
-
if "Content-Type" not in headers:
|
|
165
|
-
headers["Content-Type"] = "application/json; charset=UTF-8"
|
|
166
|
-
params = RequestParameters(
|
|
167
|
-
"POST", self._make_url(route), timeout or self.timeout_put_post, data, headers, files
|
|
168
|
-
)
|
|
169
|
-
response = self._try_network_action(params)
|
|
170
|
-
return cast(dict[str, Any], response.json())
|
|
171
|
-
|
|
172
|
-
def get(
|
|
173
|
-
self,
|
|
174
|
-
route: str,
|
|
175
|
-
headers: dict[str, str] | None = None,
|
|
176
|
-
) -> dict[str, Any]:
|
|
177
|
-
"""
|
|
178
|
-
Make an HTTP GET request to the server to which this connection has been established.
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
route: route that will be called on the server
|
|
182
|
-
headers: headers for the HTTP request
|
|
183
|
-
|
|
184
|
-
Returns:
|
|
185
|
-
response from server
|
|
186
|
-
|
|
187
|
-
Raises:
|
|
188
|
-
PermanentConnectionError: if all attempts have failed
|
|
189
|
-
InvalidInputError: if the API responds with a permanent error because of invalid input data
|
|
190
|
-
"""
|
|
191
|
-
params = RequestParameters("GET", self._make_url(route), self.timeout_get_delete, headers=headers)
|
|
192
|
-
response = self._try_network_action(params)
|
|
193
|
-
return cast(dict[str, Any], response.json())
|
|
194
|
-
|
|
195
|
-
def put(
|
|
196
|
-
self,
|
|
197
|
-
route: str,
|
|
198
|
-
data: dict[str, Any] | None = None,
|
|
199
|
-
headers: dict[str, str] | None = None,
|
|
200
|
-
) -> dict[str, Any]:
|
|
201
|
-
"""
|
|
202
|
-
Make an HTTP GET request to the server to which this connection has been established.
|
|
203
|
-
|
|
204
|
-
Args:
|
|
205
|
-
route: route that will be called on the server
|
|
206
|
-
data: payload of the HTTP request
|
|
207
|
-
headers: headers of the HTTP request
|
|
208
|
-
|
|
209
|
-
Returns:
|
|
210
|
-
response from server
|
|
211
|
-
|
|
212
|
-
Raises:
|
|
213
|
-
PermanentConnectionError: if all attempts have failed
|
|
214
|
-
InvalidInputError: if the API responds with a permanent error because of invalid input data
|
|
215
|
-
"""
|
|
216
|
-
if data:
|
|
217
|
-
headers = headers or {}
|
|
218
|
-
if "Content-Type" not in headers:
|
|
219
|
-
headers["Content-Type"] = "application/json; charset=UTF-8"
|
|
220
|
-
params = RequestParameters("PUT", self._make_url(route), self.timeout_put_post, data, headers)
|
|
221
|
-
response = self._try_network_action(params)
|
|
222
|
-
return cast(dict[str, Any], response.json())
|
|
223
|
-
|
|
224
|
-
def delete(
|
|
225
|
-
self,
|
|
226
|
-
route: str,
|
|
227
|
-
headers: dict[str, str] | None = None,
|
|
228
|
-
) -> dict[str, Any]:
|
|
229
|
-
"""
|
|
230
|
-
Make an HTTP GET request to the server to which this connection has been established.
|
|
231
|
-
|
|
232
|
-
Args:
|
|
233
|
-
route: route that will be called on the server
|
|
234
|
-
headers: headers for the HTTP request
|
|
235
|
-
|
|
236
|
-
Returns:
|
|
237
|
-
response from server
|
|
238
|
-
|
|
239
|
-
Raises:
|
|
240
|
-
PermanentConnectionError: if all attempts have failed
|
|
241
|
-
InvalidInputError: if the API responds with a permanent error because of invalid input data
|
|
242
|
-
"""
|
|
243
|
-
params = RequestParameters("DELETE", self._make_url(route), self.timeout_get_delete, headers=headers)
|
|
244
|
-
response = self._try_network_action(params)
|
|
245
|
-
return cast(dict[str, Any], response.json())
|
|
246
|
-
|
|
247
|
-
def _make_url(self, route: str) -> str:
|
|
248
|
-
if not route.startswith("/"):
|
|
249
|
-
route = f"/{route}"
|
|
250
|
-
return self.server + route
|
|
251
|
-
|
|
252
|
-
def _try_network_action(self, params: RequestParameters) -> Response:
|
|
253
|
-
"""
|
|
254
|
-
Try 7 times to execute an HTTP request.
|
|
255
|
-
If a timeout error, a ConnectionError, or a requests.RequestException occur,
|
|
256
|
-
or if the response indicates that there is a non-permanent server-side problem,
|
|
257
|
-
this function waits and retries the HTTP request.
|
|
258
|
-
The waiting times are 1, 2, 4, 8, 16, 32, 64 seconds.
|
|
259
|
-
|
|
260
|
-
Args:
|
|
261
|
-
params: keyword arguments for the HTTP request
|
|
262
|
-
|
|
263
|
-
Raises:
|
|
264
|
-
BadCredentialsError: if the server returns a 401 status code on the route /v2/authentication
|
|
265
|
-
PermanentConnectionError: if all attempts have failed
|
|
266
|
-
InvalidInputError: if the API responds with a permanent error because of invalid input data
|
|
267
|
-
unexpected exceptions: if the action fails with an unexpected exception
|
|
268
|
-
|
|
269
|
-
Returns:
|
|
270
|
-
the return value of action
|
|
271
|
-
"""
|
|
272
|
-
action = partial(self.session.request, **params.as_kwargs())
|
|
273
|
-
for i in range(7):
|
|
274
|
-
try:
|
|
275
|
-
self._log_request(params)
|
|
276
|
-
response = action()
|
|
277
|
-
except (TimeoutError, ReadTimeout) as err:
|
|
278
|
-
self._log_and_raise_timeouts(err)
|
|
279
|
-
except (ConnectionError, RequestException):
|
|
280
|
-
self._renew_session()
|
|
281
|
-
self._log_and_sleep(reason="Connection Error raised", retry_counter=i, exc_info=True)
|
|
282
|
-
continue
|
|
283
|
-
|
|
284
|
-
self._log_response(response)
|
|
285
|
-
if response.status_code == HTTP_OK:
|
|
286
|
-
return response
|
|
287
|
-
|
|
288
|
-
self._handle_non_ok_responses(response, params.url, i)
|
|
289
|
-
|
|
290
|
-
# if all attempts have failed, raise error
|
|
291
|
-
msg = f"Permanently unable to execute the network action. See {WARNINGS_SAVEPATH} for more information."
|
|
292
|
-
raise PermanentConnectionError(msg)
|
|
293
|
-
|
|
294
|
-
def _handle_non_ok_responses(self, response: Response, request_url: str, retry_counter: int) -> None:
|
|
295
|
-
if "v2/authentication" in request_url and response.status_code == HTTP_UNAUTHORIZED:
|
|
296
|
-
raise BadCredentialsError("Bad credentials")
|
|
297
|
-
in_500_range = 500 <= response.status_code < 600
|
|
298
|
-
try_again_later = "try again later" in response.text.lower()
|
|
299
|
-
should_retry = (try_again_later or in_500_range) and not self._in_testing_environment()
|
|
300
|
-
if should_retry:
|
|
301
|
-
self._log_and_sleep("Transient Error", retry_counter, exc_info=False)
|
|
302
|
-
return None
|
|
303
|
-
else:
|
|
304
|
-
msg = "Permanently unable to execute the network action. "
|
|
305
|
-
if original_str := regex.search(r'{"knora-api:error":"dsp\.errors\.(.*)","@context', str(response.content)):
|
|
306
|
-
msg += f"\n{' '*37}Original Message: {original_str.group(1)}\n"
|
|
307
|
-
if original_str.group(1).startswith("OntologyConstraintException"):
|
|
308
|
-
msg += f"See {WARNINGS_SAVEPATH} for more information."
|
|
309
|
-
raise InvalidInputError(msg)
|
|
310
|
-
msg += f"See {WARNINGS_SAVEPATH} for more information."
|
|
311
|
-
raise PermanentConnectionError(msg)
|
|
312
|
-
|
|
313
|
-
def _renew_session(self) -> None:
|
|
314
|
-
self.session.close()
|
|
315
|
-
self.session = Session()
|
|
316
|
-
self.session.headers["User-Agent"] = f'DSP-TOOLS/{version("dsp-tools")}'
|
|
317
|
-
if self.token:
|
|
318
|
-
self.session.headers["Authorization"] = f"Bearer {self.token}"
|
|
319
|
-
|
|
320
|
-
def _log_and_sleep(self, reason: str, retry_counter: int, exc_info: bool) -> None:
|
|
321
|
-
msg = f"{reason}: Try reconnecting to DSP server, next attempt in {2 ** retry_counter} seconds..."
|
|
322
|
-
print(f"{datetime.now()}: {msg}")
|
|
323
|
-
if exc_info:
|
|
324
|
-
logger.opt(exception=True).error(f"{msg} ({retry_counter=:})")
|
|
325
|
-
else:
|
|
326
|
-
logger.error(f"{msg} ({retry_counter=:})")
|
|
327
|
-
time.sleep(2**retry_counter)
|
|
328
|
-
|
|
329
|
-
def _log_and_raise_timeouts(self, error: TimeoutError | ReadTimeout) -> Never:
|
|
330
|
-
msg = f"A '{error.__class__.__name__}' occurred during the connection to the DSP server."
|
|
331
|
-
print(f"{datetime.now()}: {msg}")
|
|
332
|
-
logger.exception(msg)
|
|
333
|
-
raise PermanentTimeOutError(msg) from None
|
|
334
|
-
|
|
335
|
-
def _log_response(self, response: Response) -> None:
|
|
336
|
-
dumpobj: dict[str, Any] = {
|
|
337
|
-
"status_code": response.status_code,
|
|
338
|
-
"headers": self._anonymize(dict(response.headers)),
|
|
339
|
-
}
|
|
340
|
-
try:
|
|
341
|
-
dumpobj["content"] = self._anonymize(json.loads(response.text))
|
|
342
|
-
except json.JSONDecodeError:
|
|
343
|
-
dumpobj["content"] = response.text if "token" not in response.text else "***"
|
|
344
|
-
logger.debug(f"RESPONSE: {json.dumps(dumpobj)}")
|
|
345
|
-
|
|
346
|
-
def _anonymize(self, data: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
347
|
-
if not data:
|
|
348
|
-
return data
|
|
349
|
-
data = data.copy()
|
|
350
|
-
if "token" in data:
|
|
351
|
-
data["token"] = self._mask(data["token"])
|
|
352
|
-
if "Set-Cookie" in data:
|
|
353
|
-
data["Set-Cookie"] = self._mask(data["Set-Cookie"])
|
|
354
|
-
if "Authorization" in data:
|
|
355
|
-
if match := regex.search(r"^Bearer (.+)", data["Authorization"]):
|
|
356
|
-
data["Authorization"] = f"Bearer {self._mask(match.group(1))}"
|
|
357
|
-
if "password" in data:
|
|
358
|
-
data["password"] = "*" * len(data["password"])
|
|
359
|
-
return data
|
|
360
|
-
|
|
361
|
-
def _mask(self, sensitive_info: str) -> str:
|
|
362
|
-
unmasked_until = 5
|
|
363
|
-
if len(sensitive_info) <= unmasked_until * 2:
|
|
364
|
-
return "*" * len(sensitive_info)
|
|
365
|
-
else:
|
|
366
|
-
return f"{sensitive_info[:unmasked_until]}[+{len(sensitive_info) - unmasked_until}]"
|
|
367
|
-
|
|
368
|
-
def _in_testing_environment(self) -> bool:
|
|
369
|
-
in_testing_env = os.getenv("DSP_TOOLS_TESTING") # set in .github/workflows/tests-on-push.yml
|
|
370
|
-
return in_testing_env == "true"
|
|
371
|
-
|
|
372
|
-
def _log_request(self, params: RequestParameters) -> None:
|
|
373
|
-
dumpobj = {
|
|
374
|
-
"method": params.method,
|
|
375
|
-
"url": params.url,
|
|
376
|
-
"headers": self._anonymize(dict(self.session.headers) | (params.headers or {})),
|
|
377
|
-
"timeout": params.timeout,
|
|
378
|
-
}
|
|
379
|
-
if params.data:
|
|
380
|
-
dumpobj["data"] = self._anonymize(params.data)
|
|
381
|
-
if params.files:
|
|
382
|
-
dumpobj["files"] = params.files["file"][0]
|
|
383
|
-
logger.debug(f"REQUEST: {json.dumps(dumpobj, cls=SetEncoder)}")
|
dsp_tools/utils/iri_util.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import regex
|
|
2
|
-
|
|
3
|
-
_iri_pattern = r"^(http)s?://[\w\.\-~]*(:\d{,6})?(/[\w\-~]+)*(#[\w\-~]*)?"
|
|
4
|
-
_resource_iri_pattern = r"https?://rdfh.ch/[a-fA-F0-9]{4}/[\w-]{22}"
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def is_iri(s: str) -> bool:
|
|
8
|
-
"""Checks whether a string is a valid IRI."""
|
|
9
|
-
return regex.fullmatch(_iri_pattern, s) is not None
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def is_resource_iri(s: str) -> bool:
|
|
13
|
-
"""Checks whether a string is a valid resource IRI."""
|
|
14
|
-
return regex.fullmatch(_resource_iri_pattern, s) is not None
|
dsp_tools/utils/logger_config.py
DELETED
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from loguru import logger
|
|
4
|
-
|
|
5
|
-
LOGGER_SAVEPATH = (Path.home() / ".dsp-tools" / "logging.log").absolute()
|
|
6
|
-
WARNINGS_SAVEPATH = Path("warnings.log")
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def logger_config() -> None:
|
|
10
|
-
"""
|
|
11
|
-
This function configures the log files.
|
|
12
|
-
Currently, there are three sinks:
|
|
13
|
-
- logger.log in the home directory for development purposes
|
|
14
|
-
- warnings.log in the cwd only with level warning and higher for the user (no stack-trace)
|
|
15
|
-
- print output on the terminal, formatted the same as the warnings.log
|
|
16
|
-
"""
|
|
17
|
-
# If this is not removed, the default formatting is also printed out on the terminal
|
|
18
|
-
logger.remove()
|
|
19
|
-
|
|
20
|
-
text_format = "<level>{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {message}</level>"
|
|
21
|
-
rotation_size = "100 MB"
|
|
22
|
-
retention_number = 30
|
|
23
|
-
|
|
24
|
-
logger.add(
|
|
25
|
-
sink=LOGGER_SAVEPATH,
|
|
26
|
-
format=text_format,
|
|
27
|
-
backtrace=True,
|
|
28
|
-
diagnose=True,
|
|
29
|
-
rotation=rotation_size,
|
|
30
|
-
retention=retention_number,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
logger.add(
|
|
34
|
-
sink=WARNINGS_SAVEPATH,
|
|
35
|
-
level="WARNING",
|
|
36
|
-
format=text_format,
|
|
37
|
-
backtrace=False,
|
|
38
|
-
diagnose=False,
|
|
39
|
-
rotation=rotation_size,
|
|
40
|
-
retention=10,
|
|
41
|
-
)
|
dsp_tools/utils/set_encoder.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
from typing import Any
|
|
3
|
-
from typing import Union
|
|
4
|
-
|
|
5
|
-
from dsp_tools.commands.project.models.context import Context
|
|
6
|
-
from dsp_tools.commands.project.models.helpers import OntoIri
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class SetEncoder(json.JSONEncoder):
|
|
10
|
-
"""Encoder used to serialize objects to JSON that would by default not be serializable"""
|
|
11
|
-
|
|
12
|
-
def default(self, o: Union[set[Any], Context, OntoIri]) -> Any:
|
|
13
|
-
"""Return a serializable object for o"""
|
|
14
|
-
if isinstance(o, set):
|
|
15
|
-
return list(o)
|
|
16
|
-
elif isinstance(o, Context):
|
|
17
|
-
return o.toJsonObj()
|
|
18
|
-
elif isinstance(o, OntoIri):
|
|
19
|
-
return {"iri": o.iri, "hashtag": o.hashtag}
|
|
20
|
-
return json.JSONEncoder.default(self, o)
|
dsp_tools/utils/xml_utils.py
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
from copy import deepcopy
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from loguru import logger
|
|
8
|
-
from lxml import etree
|
|
9
|
-
|
|
10
|
-
from dsp_tools.models.exceptions import InputError
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def parse_and_clean_xml_file(input_file: Path) -> etree._Element:
|
|
14
|
-
"""
|
|
15
|
-
Parse an XML file with DSP-conform data,
|
|
16
|
-
remove namespace URI from the elements' names,
|
|
17
|
-
and transform the special tags `<annotation>`, `<region>`, `<link>`, `<video-segment>`, `<audio-segment>`
|
|
18
|
-
to their technically correct form
|
|
19
|
-
`<resource restype="Annotation">`, `<resource restype="Region">`, `<resource restype="LinkObj">`,
|
|
20
|
-
`<resource restype="VideoSegment">`, `<resource restype="AudioSegment">`.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
input_file: path to the XML file
|
|
24
|
-
|
|
25
|
-
Returns:
|
|
26
|
-
the root element of the parsed XML file
|
|
27
|
-
|
|
28
|
-
Raises:
|
|
29
|
-
InputError: if the input is not of either the expected types
|
|
30
|
-
"""
|
|
31
|
-
root = parse_xml_file(input_file)
|
|
32
|
-
root = remove_comments_from_element_tree(root)
|
|
33
|
-
return transform_special_tags_make_localname(root)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def transform_special_tags_make_localname(input_tree: etree._Element) -> etree._Element:
|
|
37
|
-
"""
|
|
38
|
-
This function removes the namespace URIs from the elements' names
|
|
39
|
-
and transforms the special tags `<annotation>`, `<region>`, `<link>`, `<video-segment>`, `<audio-segment>`
|
|
40
|
-
to their technically correct form
|
|
41
|
-
`<resource restype="Annotation">`, `<resource restype="Region">`, `<resource restype="LinkObj">`,
|
|
42
|
-
`<resource restype="VideoSegment">`, `<resource restype="AudioSegment">`.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
input_tree: unclean tree
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
cleaned tree
|
|
49
|
-
"""
|
|
50
|
-
tree = transform_into_localnames(input_tree)
|
|
51
|
-
return _transform_special_tags(tree)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _transform_special_tags(tree: etree._Element) -> etree._Element:
|
|
55
|
-
tree = deepcopy(tree)
|
|
56
|
-
for elem in tree.iter():
|
|
57
|
-
if elem.tag == "annotation":
|
|
58
|
-
elem.attrib["restype"] = "Annotation"
|
|
59
|
-
elem.tag = "resource"
|
|
60
|
-
elif elem.tag == "link":
|
|
61
|
-
elem.attrib["restype"] = "LinkObj"
|
|
62
|
-
elem.tag = "resource"
|
|
63
|
-
elif elem.tag == "region":
|
|
64
|
-
elem.attrib["restype"] = "Region"
|
|
65
|
-
elem.tag = "resource"
|
|
66
|
-
elif elem.tag == "video-segment":
|
|
67
|
-
elem.attrib["restype"] = "VideoSegment"
|
|
68
|
-
elem.tag = "resource"
|
|
69
|
-
elif elem.tag == "audio-segment":
|
|
70
|
-
elem.attrib["restype"] = "AudioSegment"
|
|
71
|
-
elem.tag = "resource"
|
|
72
|
-
return tree
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def transform_into_localnames(root: etree._Element) -> etree._Element:
|
|
76
|
-
"""
|
|
77
|
-
This function removes the namespace URIs from the elements' names
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
root: unclean tree
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
cleaned tree
|
|
84
|
-
"""
|
|
85
|
-
tree = deepcopy(root)
|
|
86
|
-
for elem in tree.iter():
|
|
87
|
-
elem.tag = etree.QName(elem).localname
|
|
88
|
-
return tree
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def remove_comments_from_element_tree(input_tree: etree._Element) -> etree._Element:
|
|
92
|
-
"""
|
|
93
|
-
This function removes comments and processing instructions.
|
|
94
|
-
Commented out properties break the XMLProperty constructor.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
input_tree: etree root that will be cleaned
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
clean xml
|
|
101
|
-
"""
|
|
102
|
-
root = copy.deepcopy(input_tree)
|
|
103
|
-
for c in root.xpath("//comment()"):
|
|
104
|
-
c.getparent().remove(c)
|
|
105
|
-
for c in root.xpath("//processing-instruction()"):
|
|
106
|
-
c.getparent().remove(c)
|
|
107
|
-
return root
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def parse_xml_file(input_file: str | Path) -> etree._Element:
|
|
111
|
-
"""
|
|
112
|
-
This function parses an XML file and returns an Element Tree
|
|
113
|
-
|
|
114
|
-
Args:
|
|
115
|
-
input_file: path to the input file
|
|
116
|
-
|
|
117
|
-
Returns:
|
|
118
|
-
element tree
|
|
119
|
-
|
|
120
|
-
Raises:
|
|
121
|
-
InputError: if the file contains a syntax error
|
|
122
|
-
"""
|
|
123
|
-
parser = etree.XMLParser(remove_comments=True, remove_pis=True)
|
|
124
|
-
try:
|
|
125
|
-
return etree.parse(source=input_file, parser=parser).getroot()
|
|
126
|
-
except etree.XMLSyntaxError as err:
|
|
127
|
-
logger.opt(exception=True).error(f"The XML file contains the following syntax error: {err.msg}")
|
|
128
|
-
raise InputError(f"The XML file contains the following syntax error: {err.msg}") from None
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def remove_namespaces_from_xml(data_xml: etree._Element) -> etree._Element:
|
|
132
|
-
"""
|
|
133
|
-
This function removes all the namespaces from an XML element tree.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
data_xml: xml with namespaces
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
the XMl without the namespaces
|
|
140
|
-
"""
|
|
141
|
-
xml_no_namespace = copy.deepcopy(data_xml)
|
|
142
|
-
for elem in xml_no_namespace.iter():
|
|
143
|
-
if not isinstance(elem, (etree._Comment, etree._ProcessingInstruction)):
|
|
144
|
-
elem.tag = etree.QName(elem).localname
|
|
145
|
-
return xml_no_namespace
|