dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import importlib.resources
|
|
4
|
-
import warnings
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
import regex
|
|
9
|
-
from loguru import logger
|
|
10
|
-
from lxml import etree
|
|
11
|
-
|
|
12
|
-
from dsp_tools.models.custom_warnings import DspToolsUserWarning
|
|
13
|
-
from dsp_tools.models.exceptions import InputError
|
|
14
|
-
from dsp_tools.utils.xml_utils import parse_xml_file
|
|
15
|
-
from dsp_tools.utils.xml_utils import remove_comments_from_element_tree
|
|
16
|
-
from dsp_tools.utils.xml_utils import remove_namespaces_from_xml
|
|
17
|
-
from dsp_tools.utils.xml_validation_models import InconsistentTextValueEncodings
|
|
18
|
-
from dsp_tools.utils.xml_validation_models import TextValueData
|
|
19
|
-
|
|
20
|
-
separator = "\n "
|
|
21
|
-
list_separator = "\n - "
|
|
22
|
-
medium_separator = "\n----------------------------\n"
|
|
23
|
-
grand_separator = "\n\n---------------------------------------\n\n"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def validate_xml_file(input_file: Path | str) -> bool:
|
|
27
|
-
"""
|
|
28
|
-
Validates an XML file against the DSP XSD schema.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
input_file: path to the XML file to be validated, or parsed ElementTree
|
|
32
|
-
|
|
33
|
-
Raises:
|
|
34
|
-
InputError: if the XML file is invalid
|
|
35
|
-
|
|
36
|
-
Returns:
|
|
37
|
-
True if the XML file is valid
|
|
38
|
-
"""
|
|
39
|
-
root = parse_xml_file(input_file)
|
|
40
|
-
data_xml = remove_comments_from_element_tree(root)
|
|
41
|
-
return validate_xml(data_xml)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def validate_xml(xml: etree._Element) -> bool:
|
|
45
|
-
"""
|
|
46
|
-
Validates an XML element tree against the DSP XSD schema.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
xml: the XML element tree to be validated
|
|
50
|
-
|
|
51
|
-
Raises:
|
|
52
|
-
InputError: if the XML file is invalid
|
|
53
|
-
|
|
54
|
-
Returns:
|
|
55
|
-
True if the XML file is valid
|
|
56
|
-
"""
|
|
57
|
-
xml_no_namespace = remove_namespaces_from_xml(xml)
|
|
58
|
-
|
|
59
|
-
problems = []
|
|
60
|
-
|
|
61
|
-
problems.extend(_validate_xml_against_schema(xml))
|
|
62
|
-
problems.extend(_validate_xml_contents(xml_no_namespace))
|
|
63
|
-
|
|
64
|
-
if problems:
|
|
65
|
-
err_msg = grand_separator.join(problems)
|
|
66
|
-
logger.opt(exception=True).error(err_msg)
|
|
67
|
-
raise InputError(err_msg)
|
|
68
|
-
|
|
69
|
-
logger.info("The XML file is syntactically correct and passed validation.")
|
|
70
|
-
print(f"{datetime.now()}: The XML file is syntactically correct and passed validation.")
|
|
71
|
-
|
|
72
|
-
return True
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _validate_xml_against_schema(data_xml: etree._Element) -> list[str]:
|
|
76
|
-
schema_res = importlib.resources.files("dsp_tools").joinpath("resources/schema/data.xsd")
|
|
77
|
-
with schema_res.open(encoding="utf-8") as schema_file:
|
|
78
|
-
xmlschema = etree.XMLSchema(etree.parse(schema_file))
|
|
79
|
-
if not xmlschema.validate(data_xml):
|
|
80
|
-
error_msg = "The XML file cannot be uploaded due to the following validation error(s):"
|
|
81
|
-
for error in xmlschema.error_log:
|
|
82
|
-
error_msg = f"{error_msg}{separator}Line {error.line}: {error.message}"
|
|
83
|
-
error_msg = error_msg.replace("{https://dasch.swiss/schema}", "")
|
|
84
|
-
return [error_msg]
|
|
85
|
-
return []
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _validate_xml_contents(xml_no_namespace: etree._Element) -> list[str]:
|
|
89
|
-
problems = []
|
|
90
|
-
_find_xml_tags_in_simple_text_elements(xml_no_namespace)
|
|
91
|
-
problems.extend(_find_mixed_encodings_in_one_text_prop(xml_no_namespace))
|
|
92
|
-
_check_for_deprecated_syntax(xml_no_namespace)
|
|
93
|
-
return problems
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def _find_xml_tags_in_simple_text_elements(xml_no_namespace: etree._Element) -> None:
|
|
97
|
-
"""
|
|
98
|
-
Checks if there are angular brackets in simple text.
|
|
99
|
-
It is possible that the user mistakenly added XML tags into a simple text field.
|
|
100
|
-
But it is also possible that an angular bracket should be displayed.
|
|
101
|
-
So that the user does not insert XML tags mistakenly into simple text fields,
|
|
102
|
-
the user is warned, if there is any present.
|
|
103
|
-
|
|
104
|
-
Args:
|
|
105
|
-
xml_no_namespace: parsed XML file with the namespaces removed
|
|
106
|
-
"""
|
|
107
|
-
resources_with_potential_xml_tags = []
|
|
108
|
-
for text in xml_no_namespace.findall(path="resource/text-prop/text"):
|
|
109
|
-
regex_finds_tags = bool(regex.search(r'<([a-zA-Z/"]+|[^\s0-9].*[^\s0-9])>', str(text.text)))
|
|
110
|
-
etree_finds_tags = bool(list(text.iterchildren()))
|
|
111
|
-
has_tags = regex_finds_tags or etree_finds_tags
|
|
112
|
-
if text.attrib["encoding"] == "utf8" and has_tags:
|
|
113
|
-
sourceline = f"line {text.sourceline}: " if text.sourceline else " "
|
|
114
|
-
propname = text.getparent().attrib["name"] # type: ignore[union-attr]
|
|
115
|
-
resname = text.getparent().getparent().attrib["id"] # type: ignore[union-attr]
|
|
116
|
-
resources_with_potential_xml_tags.append(f"{sourceline}resource '{resname}', property '{propname}'")
|
|
117
|
-
if resources_with_potential_xml_tags:
|
|
118
|
-
err_msg = (
|
|
119
|
-
"Angular brackets in the format of <text> were found in text properties with encoding=utf8.\n"
|
|
120
|
-
"Please note that these will not be recognised as formatting in the text field, "
|
|
121
|
-
"but will be displayed as-is.\n"
|
|
122
|
-
f"The following resources of your XML file contain angular brackets:{list_separator}"
|
|
123
|
-
f"{list_separator.join(resources_with_potential_xml_tags)}"
|
|
124
|
-
)
|
|
125
|
-
warnings.warn(DspToolsUserWarning(err_msg))
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _find_mixed_encodings_in_one_text_prop(xml_no_namespace: etree._Element) -> list[str]:
|
|
129
|
-
problems = check_if_only_one_encoding_is_used_per_prop_in_root(xml_no_namespace)
|
|
130
|
-
if not problems:
|
|
131
|
-
return []
|
|
132
|
-
msg, df = InconsistentTextValueEncodings(problems).execute_problem_protocol()
|
|
133
|
-
if df is not None:
|
|
134
|
-
csv_path = Path(f"XML_syntax_errors_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}.csv")
|
|
135
|
-
msg = f"\nAll the problems are listed in the file: '{csv_path.absolute()}'{msg}"
|
|
136
|
-
df.to_csv(csv_path)
|
|
137
|
-
return [msg]
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def _check_for_deprecated_syntax(data_xml: etree._Element) -> None:
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
def check_if_only_one_encoding_is_used_per_prop_in_root(
|
|
145
|
-
root: etree._Element,
|
|
146
|
-
) -> list[TextValueData]:
|
|
147
|
-
"""
|
|
148
|
-
Check if all the encodings in the `<text>` elements are consistent within one `<text-prop>`
|
|
149
|
-
|
|
150
|
-
This is correct:
|
|
151
|
-
```
|
|
152
|
-
<text-prop name=":hasSimpleText">
|
|
153
|
-
<text encoding="utf8">Text 1</text>
|
|
154
|
-
<text encoding="utf8">Text 2</text>
|
|
155
|
-
</text-prop>
|
|
156
|
-
```
|
|
157
|
-
|
|
158
|
-
This is wrong:
|
|
159
|
-
```
|
|
160
|
-
<text-prop name=":hasSimpleText">
|
|
161
|
-
<text encoding="utf8">Text 1</text>
|
|
162
|
-
<text encoding="xml">Text 2</text>
|
|
163
|
-
</text-prop>
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
Args:
|
|
167
|
-
root: root of the data xml document
|
|
168
|
-
|
|
169
|
-
Returns:
|
|
170
|
-
A list of all the inconsistent `<text-props>`
|
|
171
|
-
"""
|
|
172
|
-
text_props = _get_all_ids_and_encodings_from_root(root)
|
|
173
|
-
return _find_all_text_props_with_multiple_encodings(text_props)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def _get_all_ids_and_encodings_from_root(
|
|
177
|
-
root: etree._Element,
|
|
178
|
-
) -> list[TextValueData]:
|
|
179
|
-
res_list = []
|
|
180
|
-
for res_input in root.iterchildren(tag="resource"):
|
|
181
|
-
res_list.extend(_get_encodings_from_one_resource(res_input))
|
|
182
|
-
return res_list
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def _get_encodings_from_one_resource(resource: etree._Element) -> list[TextValueData]:
|
|
186
|
-
res_id = resource.attrib["id"]
|
|
187
|
-
return [_get_encodings_from_one_property(res_id, child) for child in list(resource.iterchildren(tag="text-prop"))]
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _get_encodings_from_one_property(res_id: str, prop: etree._Element) -> TextValueData:
|
|
191
|
-
prop_name = prop.attrib["name"]
|
|
192
|
-
encodings = {x.attrib["encoding"] for x in prop.iterchildren()}
|
|
193
|
-
return TextValueData(res_id, prop_name, encodings)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def _find_all_text_props_with_multiple_encodings(text_props: list[TextValueData]) -> list[TextValueData]:
|
|
197
|
-
return [x for x in text_props if len(x.encoding) != 1]
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
list_separator = "\n - "
|
|
6
|
-
medium_separator = "\n----------------------------\n"
|
|
7
|
-
grand_separator = "\n\n---------------------------------------\n\n"
|
|
8
|
-
|
|
9
|
-
maximum_prints = 50
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class TextValueData:
|
|
14
|
-
resource_id: str
|
|
15
|
-
property_name: str
|
|
16
|
-
encoding: set[str]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@dataclass
|
|
20
|
-
class InconsistentTextValueEncodings:
|
|
21
|
-
"""
|
|
22
|
-
This class implements the `Problem` protocol
|
|
23
|
-
for resources and properties that contain invalid encodings.
|
|
24
|
-
|
|
25
|
-
An invalid encoding would be a `<text-prop>` element, that contains
|
|
26
|
-
`<text encoding="utf8">`
|
|
27
|
-
and
|
|
28
|
-
`<text encoding="xml">`
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
problematic_resources: list[TextValueData]
|
|
32
|
-
|
|
33
|
-
def execute_problem_protocol(self) -> tuple[str, pd.DataFrame | None]:
|
|
34
|
-
"""
|
|
35
|
-
This method composes an error message for the user.
|
|
36
|
-
If the number of errors exceeds `maximum_prints`,
|
|
37
|
-
the errors are additionally returned as a dataframe that can be saved as a CSV file.
|
|
38
|
-
|
|
39
|
-
Returns:
|
|
40
|
-
the error message, and optionally a dataframe with the errors
|
|
41
|
-
"""
|
|
42
|
-
base_msg = (
|
|
43
|
-
"\nSome <text-prop> elements contain <text> elements that use both 'xml' and 'utf8' encoding.\n"
|
|
44
|
-
"Only one encoding type can be used within one <text-prop> element."
|
|
45
|
-
)
|
|
46
|
-
df = self._get_problems_as_df()
|
|
47
|
-
if len(df) > maximum_prints:
|
|
48
|
-
return base_msg, df
|
|
49
|
-
return base_msg + grand_separator + _make_msg_from_df(df), None
|
|
50
|
-
|
|
51
|
-
def _get_problems_as_df(self) -> pd.DataFrame:
|
|
52
|
-
df = pd.DataFrame(
|
|
53
|
-
{
|
|
54
|
-
"Resource ID": [x.resource_id for x in self.problematic_resources],
|
|
55
|
-
"Property Name": [x.property_name for x in self.problematic_resources],
|
|
56
|
-
}
|
|
57
|
-
)
|
|
58
|
-
return df.sort_values(by=["Resource ID", "Property Name"], ignore_index=True)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def _make_msg_from_df(df: pd.DataFrame) -> str:
|
|
62
|
-
groups = df.groupby(by="Resource ID")
|
|
63
|
-
return medium_separator.join([_make_msg_for_one_resource(str(_id), res_df) for _id, res_df in groups])
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def _make_msg_for_one_resource(res_id: str, res_df: pd.DataFrame) -> str:
|
|
67
|
-
problems = [f"Property Name: '{p}'" for p in res_df["Property Name"].tolist()]
|
|
68
|
-
return f"Resource ID: '{res_id}'{list_separator}{list_separator.join(problems)}"
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import warnings
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
7
|
-
from typing import Protocol
|
|
8
|
-
|
|
9
|
-
from lxml import etree
|
|
10
|
-
|
|
11
|
-
from dsp_tools.models.custom_warnings import DspToolsUserWarning
|
|
12
|
-
from dsp_tools.utils.uri_util import is_iiif_uri
|
|
13
|
-
from dsp_tools.xmllib.value_checkers import is_string_like
|
|
14
|
-
|
|
15
|
-
XML_NAMESPACE_MAP = {None: "https://dasch.swiss/schema", "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
|
|
16
|
-
DASCH_SCHEMA = "{https://dasch.swiss/schema}"
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class AbstractFileValue(Protocol):
|
|
20
|
-
value: str | Path
|
|
21
|
-
permissions: str | None
|
|
22
|
-
comment: str | None = None
|
|
23
|
-
|
|
24
|
-
def serialise(self) -> etree._Element:
|
|
25
|
-
raise NotImplementedError
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
@dataclass
|
|
29
|
-
class FileValue(AbstractFileValue):
|
|
30
|
-
value: str | Path
|
|
31
|
-
permissions: str | None = "prop-default"
|
|
32
|
-
comment: str | None = None
|
|
33
|
-
resource_id: str | None = None
|
|
34
|
-
|
|
35
|
-
def __post_init__(self) -> None:
|
|
36
|
-
if not is_string_like(str(self.value)):
|
|
37
|
-
_warn_type_mismatch(expected_type="file name", value=self.value, res_id=self.resource_id)
|
|
38
|
-
|
|
39
|
-
def serialise(self) -> etree._Element:
|
|
40
|
-
attribs = {}
|
|
41
|
-
if self.permissions:
|
|
42
|
-
attribs["permissions"] = self.permissions
|
|
43
|
-
if self.comment:
|
|
44
|
-
attribs["comment"] = self.comment
|
|
45
|
-
ele = etree.Element(f"{DASCH_SCHEMA}bitstream", attrib=attribs, nsmap=XML_NAMESPACE_MAP)
|
|
46
|
-
ele.text = str(self.value)
|
|
47
|
-
return ele
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
@dataclass
|
|
51
|
-
class IIIFUri(AbstractFileValue):
|
|
52
|
-
value: str
|
|
53
|
-
permissions: str | None = "prop-default"
|
|
54
|
-
comment: str | None = None
|
|
55
|
-
resource_id: str | None = None
|
|
56
|
-
|
|
57
|
-
def __post_init__(self) -> None:
|
|
58
|
-
if not is_iiif_uri(self.value):
|
|
59
|
-
_warn_type_mismatch(expected_type="IIIF uri", value=self.value, res_id=self.resource_id)
|
|
60
|
-
|
|
61
|
-
def serialise(self) -> etree._Element:
|
|
62
|
-
attribs = {}
|
|
63
|
-
if self.permissions:
|
|
64
|
-
attribs["permissions"] = self.permissions
|
|
65
|
-
if self.comment:
|
|
66
|
-
attribs["comment"] = self.comment
|
|
67
|
-
ele = etree.Element(f"{DASCH_SCHEMA}iiif-uri", attrib=attribs, nsmap=XML_NAMESPACE_MAP)
|
|
68
|
-
ele.text = str(self.value)
|
|
69
|
-
return ele
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def _warn_type_mismatch(expected_type: str, value: Any, res_id: str | None) -> None:
|
|
73
|
-
"""Emits a warning if a values is not in the expected format."""
|
|
74
|
-
if res_id:
|
|
75
|
-
msg = f"The Resource '{res_id}' has an invalid input: The value '{value}' is not a valid {expected_type}."
|
|
76
|
-
else:
|
|
77
|
-
msg = f"The value '{value}' is not a valid {expected_type}."
|
|
78
|
-
warnings.warn(DspToolsUserWarning(msg))
|