dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -1,151 +0,0 @@
|
|
|
1
|
-
import warnings
|
|
2
|
-
from copy import deepcopy
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from lxml import etree
|
|
6
|
-
from rdflib import SH
|
|
7
|
-
from rdflib import Graph
|
|
8
|
-
from termcolor import cprint
|
|
9
|
-
|
|
10
|
-
from dsp_tools.commands.xml_validate.api_connection import OntologyConnection
|
|
11
|
-
from dsp_tools.commands.xml_validate.api_connection import ShaclValidator
|
|
12
|
-
from dsp_tools.commands.xml_validate.deserialise_input import deserialise_xml
|
|
13
|
-
from dsp_tools.commands.xml_validate.make_data_rdf import make_data_rdf
|
|
14
|
-
from dsp_tools.commands.xml_validate.models.data_deserialised import ProjectDeserialised
|
|
15
|
-
from dsp_tools.commands.xml_validate.models.data_rdf import DataRDF
|
|
16
|
-
from dsp_tools.commands.xml_validate.models.validation import ValidationReport
|
|
17
|
-
from dsp_tools.commands.xml_validate.reformat_validaton_result import reformat_validation_graph
|
|
18
|
-
from dsp_tools.commands.xml_validate.sparql.construct_shapes import construct_shapes_graph
|
|
19
|
-
from dsp_tools.models.custom_warnings import DspToolsUserWarning
|
|
20
|
-
from dsp_tools.utils.xml_utils import parse_xml_file
|
|
21
|
-
from dsp_tools.utils.xml_utils import remove_comments_from_element_tree
|
|
22
|
-
from dsp_tools.utils.xml_utils import transform_into_localnames
|
|
23
|
-
from dsp_tools.utils.xml_validation import validate_xml
|
|
24
|
-
|
|
25
|
-
LIST_SEPARATOR = "\n - "
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def xml_validate(filepath: Path, api_url: str, dev_route: bool, save_graphs: bool) -> bool: # noqa: ARG001 (unused argument)
|
|
29
|
-
"""
|
|
30
|
-
Takes a file and project information and validates it against the ontologies on the server.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
filepath: path to the xml data file
|
|
34
|
-
api_url: url of the api host
|
|
35
|
-
dev_route: if this flag is set features that are still in development will be used
|
|
36
|
-
save_graphs: if this flag is set, all the graphs will be saved in a folder
|
|
37
|
-
|
|
38
|
-
Returns:
|
|
39
|
-
true unless it crashed
|
|
40
|
-
"""
|
|
41
|
-
_inform_about_experimental_feature()
|
|
42
|
-
data_rdf, shortcode = _get_data_info_from_file(filepath, api_url)
|
|
43
|
-
onto_con = OntologyConnection(api_url, shortcode)
|
|
44
|
-
ontologies, shapes = _get_shacl(onto_con)
|
|
45
|
-
data_graph = data_rdf.make_graph()
|
|
46
|
-
generic_filepath = Path()
|
|
47
|
-
if save_graphs:
|
|
48
|
-
generic_filepath = _save_graphs(filepath, ontologies, shapes, data_graph)
|
|
49
|
-
# data_graph += ontologies
|
|
50
|
-
val = ShaclValidator(api_url)
|
|
51
|
-
report = _validate(val, shapes, data_graph)
|
|
52
|
-
if save_graphs:
|
|
53
|
-
report.validation_graph.serialize(f"{generic_filepath}_VALIDATION_REPORT.ttl")
|
|
54
|
-
if report.conforms:
|
|
55
|
-
cprint("\n Validation passed! ", color="green", attrs=["bold", "reverse"])
|
|
56
|
-
else:
|
|
57
|
-
reformatted = reformat_validation_graph(report.validation_graph, data_graph)
|
|
58
|
-
problem_msg = reformatted.get_msg(filepath)
|
|
59
|
-
cprint("\n Validation errors found! ", color="light_red", attrs=["bold", "reverse"])
|
|
60
|
-
print(problem_msg)
|
|
61
|
-
if reformatted.unexpected_results:
|
|
62
|
-
reformatted.unexpected_results.save_inform_user(
|
|
63
|
-
results_graph=report.validation_graph,
|
|
64
|
-
shacl=report.shacl_graph,
|
|
65
|
-
data=data_graph,
|
|
66
|
-
)
|
|
67
|
-
return True
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def _inform_about_experimental_feature() -> None:
|
|
71
|
-
what_is_validated = [
|
|
72
|
-
"This is an experimental feature, it will change and be extended continuously. "
|
|
73
|
-
"The following information of your data is being validated:",
|
|
74
|
-
"Cardinalities",
|
|
75
|
-
]
|
|
76
|
-
warnings.warn(DspToolsUserWarning(LIST_SEPARATOR.join(what_is_validated)))
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _save_graphs(filepath: Path, onto: Graph, shacl: Graph, data: Graph) -> Path:
|
|
80
|
-
parent_directory = filepath.parent
|
|
81
|
-
new_directory = parent_directory / "graphs"
|
|
82
|
-
new_directory.mkdir(exist_ok=True)
|
|
83
|
-
cprint(f"\n Saving graphs to {new_directory} ", color="light_blue", attrs=["bold", "reverse"])
|
|
84
|
-
generic_filepath = new_directory / filepath.stem
|
|
85
|
-
onto.serialize(f"{generic_filepath}_ONTO.ttl")
|
|
86
|
-
shacl.serialize(f"{generic_filepath}_SHACL.ttl")
|
|
87
|
-
data.serialize(f"{generic_filepath}_DATA.ttl")
|
|
88
|
-
onto_data = onto + data
|
|
89
|
-
onto_data.serialize(f"{generic_filepath}_ONTO_DATA.ttl")
|
|
90
|
-
return generic_filepath
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _validate(validator: ShaclValidator, shapes_graph: Graph, data_graph: Graph) -> ValidationReport:
|
|
94
|
-
shape_str = shapes_graph.serialize(format="ttl")
|
|
95
|
-
data_str = data_graph.serialize(format="ttl")
|
|
96
|
-
results = validator.validate(data_str, shape_str)
|
|
97
|
-
conforms = bool(next(results.objects(None, SH.conforms)))
|
|
98
|
-
return ValidationReport(
|
|
99
|
-
conforms=conforms,
|
|
100
|
-
validation_graph=results,
|
|
101
|
-
shacl_graph=shapes_graph,
|
|
102
|
-
data_graph=data_graph,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _get_shacl(onto_con: OntologyConnection) -> tuple[Graph, Graph]:
|
|
107
|
-
ontologies = _get_project_ontos(onto_con)
|
|
108
|
-
knora_ttl = onto_con.get_knora_api()
|
|
109
|
-
kag = Graph()
|
|
110
|
-
kag.parse(data=knora_ttl, format="ttl")
|
|
111
|
-
onto_for_construction = deepcopy(ontologies) + kag
|
|
112
|
-
shapes = construct_shapes_graph(onto_for_construction)
|
|
113
|
-
shapes += ontologies
|
|
114
|
-
return ontologies, shapes
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _get_project_ontos(onto_con: OntologyConnection) -> Graph:
|
|
118
|
-
all_ontos = onto_con.get_ontologies()
|
|
119
|
-
onto_g = Graph()
|
|
120
|
-
for onto in all_ontos:
|
|
121
|
-
og = Graph()
|
|
122
|
-
og.parse(data=onto, format="ttl")
|
|
123
|
-
onto_g += og
|
|
124
|
-
return onto_g
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def _get_data_info_from_file(file: Path, api_url: str) -> tuple[DataRDF, str]:
|
|
128
|
-
cleaned_root = _parse_and_clean_file(file, api_url)
|
|
129
|
-
deserialised: ProjectDeserialised = deserialise_xml(cleaned_root)
|
|
130
|
-
rdf_data: DataRDF = make_data_rdf(deserialised.data)
|
|
131
|
-
return rdf_data, deserialised.info.shortcode
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element:
|
|
135
|
-
root = parse_xml_file(file)
|
|
136
|
-
root = remove_comments_from_element_tree(root)
|
|
137
|
-
validate_xml(root)
|
|
138
|
-
root = transform_into_localnames(root)
|
|
139
|
-
return _replace_namespaces(root, api_url)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _replace_namespaces(root: etree._Element, api_url: str) -> etree._Element:
|
|
143
|
-
with open("src/dsp_tools/resources/xml_validate/replace_namespace.xslt", "rb") as xslt_file:
|
|
144
|
-
xslt_data = xslt_file.read()
|
|
145
|
-
shortcode = root.attrib["shortcode"]
|
|
146
|
-
default_ontology = root.attrib["default-ontology"]
|
|
147
|
-
namespace = f"{api_url}/ontology/{shortcode}/{default_ontology}/v2#"
|
|
148
|
-
xslt_root = etree.XML(xslt_data)
|
|
149
|
-
transform = etree.XSLT(xslt_root)
|
|
150
|
-
replacement_value = etree.XSLT.strparam(namespace)
|
|
151
|
-
return transform(root, replacementValue=replacement_value).getroot()
|
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
from collections import defaultdict
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import cast
|
|
5
|
-
|
|
6
|
-
import regex
|
|
7
|
-
from lxml import etree
|
|
8
|
-
from regex import Pattern
|
|
9
|
-
|
|
10
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import AllowedEncodings
|
|
11
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import ProjectOntosInformation
|
|
12
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import PropertyTextValueTypes
|
|
13
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import TextValueData
|
|
14
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import get_text_value_types_of_properties_from_onto
|
|
15
|
-
from dsp_tools.commands.xmlupload.models.ontology_lookup_models import make_project_onto_information
|
|
16
|
-
from dsp_tools.commands.xmlupload.models.ontology_problem_models import InvalidOntologyElementsInData
|
|
17
|
-
from dsp_tools.commands.xmlupload.models.ontology_problem_models import InvalidTextValueEncodings
|
|
18
|
-
from dsp_tools.commands.xmlupload.ontology_client import OntologyClient
|
|
19
|
-
from dsp_tools.models.exceptions import InputError
|
|
20
|
-
|
|
21
|
-
defaultOntologyColon: Pattern[str] = regex.compile(r"^:\w+$")
|
|
22
|
-
knoraUndeclared: Pattern[str] = regex.compile(r"^\w+$")
|
|
23
|
-
genericPrefixedOntology: Pattern[str] = regex.compile(r"^[\w\-]+:\w+$")
|
|
24
|
-
KNORA_BASE_PROPERTIES = {
|
|
25
|
-
"bitstream",
|
|
26
|
-
"iiif-uri",
|
|
27
|
-
"isSegmentOf",
|
|
28
|
-
"hasSegmentBounds",
|
|
29
|
-
"hasTitle",
|
|
30
|
-
"hasComment",
|
|
31
|
-
"hasDescription",
|
|
32
|
-
"hasKeyword",
|
|
33
|
-
"relatesTo",
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def do_xml_consistency_check_with_ontology(onto_client: OntologyClient, root: etree._Element) -> None:
|
|
38
|
-
"""
|
|
39
|
-
This function takes an OntologyClient and the root of an XML.
|
|
40
|
-
It retrieves the ontologies from the server.
|
|
41
|
-
It analyses if any classes or properties are used that are not in the ontology.
|
|
42
|
-
It analyses if any properties have encodings that are not consistent with those specified in the ontology.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
onto_client: client for the ontology retrieval
|
|
46
|
-
root: root of the XML
|
|
47
|
-
|
|
48
|
-
Raises:
|
|
49
|
-
InputError: if there are any invalid properties or classes and/or text values with wrong encodings.
|
|
50
|
-
"""
|
|
51
|
-
cls_prop_lookup, text_value_encoding_lookup = _get_onto_lookups(onto_client)
|
|
52
|
-
classes_in_data, properties_in_data = _get_all_classes_and_properties_from_data(root)
|
|
53
|
-
problem_str = ""
|
|
54
|
-
problem_str += _check_all_classes_and_properties_in_onto(classes_in_data, properties_in_data, cls_prop_lookup)
|
|
55
|
-
problem_str += _check_correctness_all_text_value_encodings(root, text_value_encoding_lookup)
|
|
56
|
-
if problem_str:
|
|
57
|
-
raise InputError(problem_str)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _get_onto_lookups(onto_client: OntologyClient) -> tuple[ProjectOntosInformation, PropertyTextValueTypes]:
|
|
61
|
-
ontos = onto_client.get_all_project_ontologies_from_server()
|
|
62
|
-
text_value_encoding_lookup = get_text_value_types_of_properties_from_onto(ontos, onto_client.default_ontology)
|
|
63
|
-
ontos["knora-api"] = onto_client.get_knora_api_ontology_from_server()
|
|
64
|
-
return make_project_onto_information(onto_client.default_ontology, ontos), text_value_encoding_lookup
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _check_all_classes_and_properties_in_onto(
|
|
68
|
-
classes_in_data: dict[str, list[str]],
|
|
69
|
-
properties_in_data: dict[str, list[str]],
|
|
70
|
-
onto_check_info: ProjectOntosInformation,
|
|
71
|
-
) -> str:
|
|
72
|
-
class_problems = _find_all_class_types_in_onto(classes_in_data, onto_check_info)
|
|
73
|
-
property_problems = _find_all_properties_in_onto(properties_in_data, onto_check_info)
|
|
74
|
-
if not class_problems and not property_problems:
|
|
75
|
-
return ""
|
|
76
|
-
problems = InvalidOntologyElementsInData(
|
|
77
|
-
classes=class_problems, properties=property_problems, ontos_on_server=list(onto_check_info.onto_lookup.keys())
|
|
78
|
-
)
|
|
79
|
-
msg, df = problems.execute_problem_protocol()
|
|
80
|
-
if df is not None:
|
|
81
|
-
csv_file = f"XML_syntax_errors_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}.csv"
|
|
82
|
-
df.to_csv(path_or_buf=Path(Path.cwd(), csv_file), index=False)
|
|
83
|
-
msg += (
|
|
84
|
-
"\n\n---------------------------------------\n\n"
|
|
85
|
-
f"\nAll the problems are listed in the file: '{Path.cwd()}/{csv_file}'\n"
|
|
86
|
-
)
|
|
87
|
-
return msg
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def _get_all_classes_and_properties_from_data(
|
|
91
|
-
root: etree._Element,
|
|
92
|
-
) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
|
|
93
|
-
cls_dict = _get_all_class_types_and_ids_from_data(root)
|
|
94
|
-
prop_dict: defaultdict[str, list[str]] = defaultdict(list)
|
|
95
|
-
for resource in root.iterchildren(tag="resource"):
|
|
96
|
-
prop_dict = _get_all_property_names_and_resource_ids_one_resource(resource, prop_dict)
|
|
97
|
-
return cls_dict, prop_dict
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def _get_all_class_types_and_ids_from_data(root: etree._Element) -> dict[str, list[str]]:
|
|
101
|
-
cls_dict: dict[str, list[str]] = {}
|
|
102
|
-
for resource in root.iterchildren(tag="resource"):
|
|
103
|
-
restype = resource.attrib["restype"]
|
|
104
|
-
if restype in cls_dict:
|
|
105
|
-
cls_dict[restype].append(resource.attrib["id"])
|
|
106
|
-
else:
|
|
107
|
-
cls_dict[restype] = [resource.attrib["id"]]
|
|
108
|
-
return cls_dict
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def _get_all_property_names_and_resource_ids_one_resource(
|
|
112
|
-
resource: etree._Element, prop_dict: defaultdict[str, list[str]]
|
|
113
|
-
) -> defaultdict[str, list[str]]:
|
|
114
|
-
for prop in resource.iterchildren():
|
|
115
|
-
if prop.tag in KNORA_BASE_PROPERTIES:
|
|
116
|
-
continue
|
|
117
|
-
prop_name = prop.attrib["name"]
|
|
118
|
-
prop_dict[prop_name].append(resource.attrib["id"])
|
|
119
|
-
return prop_dict
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def _find_all_class_types_in_onto(
|
|
123
|
-
classes: dict[str, list[str]], onto_check_info: ProjectOntosInformation
|
|
124
|
-
) -> list[tuple[str, list[str], str]]:
|
|
125
|
-
problem_list = []
|
|
126
|
-
for cls_type, ids in classes.items():
|
|
127
|
-
if problem := _find_one_class_type_in_onto(cls_type, onto_check_info):
|
|
128
|
-
problem_list.append((cls_type, ids, problem))
|
|
129
|
-
return problem_list
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def _find_one_class_type_in_onto(cls_type: str, onto_check_info: ProjectOntosInformation) -> str | None:
|
|
133
|
-
prefix, cls_ = _get_separate_prefix_and_iri_from_onto_prop_or_cls(cls_type, onto_check_info.default_ontology_prefix)
|
|
134
|
-
if not prefix:
|
|
135
|
-
return "Class name does not follow a known ontology pattern"
|
|
136
|
-
if onto := onto_check_info.onto_lookup.get(prefix):
|
|
137
|
-
return "Invalid Class Type" if cls_ not in onto.classes else None
|
|
138
|
-
else:
|
|
139
|
-
return "Unknown ontology prefix"
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
def _find_all_properties_in_onto(
|
|
143
|
-
properties: dict[str, list[str]], onto_check_info: ProjectOntosInformation
|
|
144
|
-
) -> list[tuple[str, list[str], str]]:
|
|
145
|
-
problem_list = []
|
|
146
|
-
for prop_name, ids in properties.items():
|
|
147
|
-
if problem := _find_one_property_in_onto(prop_name, onto_check_info):
|
|
148
|
-
problem_list.append((prop_name, ids, problem))
|
|
149
|
-
return problem_list
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def _find_one_property_in_onto(prop_name: str, onto_check_info: ProjectOntosInformation) -> str | None:
|
|
153
|
-
prefix, prop = _get_separate_prefix_and_iri_from_onto_prop_or_cls(
|
|
154
|
-
prop_name, onto_check_info.default_ontology_prefix
|
|
155
|
-
)
|
|
156
|
-
if not prefix:
|
|
157
|
-
return "Property name does not follow a known ontology pattern"
|
|
158
|
-
if onto := onto_check_info.onto_lookup.get(prefix):
|
|
159
|
-
return "Invalid Property" if prop not in onto.properties else None
|
|
160
|
-
else:
|
|
161
|
-
return "Unknown ontology prefix"
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
def _get_separate_prefix_and_iri_from_onto_prop_or_cls(
|
|
165
|
-
prop_or_cls: str, default_ontology_prefix: str
|
|
166
|
-
) -> tuple[str, ...] | tuple[None, None]:
|
|
167
|
-
if defaultOntologyColon.match(prop_or_cls):
|
|
168
|
-
return default_ontology_prefix, prop_or_cls.lstrip(":")
|
|
169
|
-
elif knoraUndeclared.match(prop_or_cls):
|
|
170
|
-
return "knora-api", prop_or_cls
|
|
171
|
-
elif genericPrefixedOntology.match(prop_or_cls):
|
|
172
|
-
return tuple(prop_or_cls.split(":"))
|
|
173
|
-
else:
|
|
174
|
-
return None, None
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def _check_correctness_all_text_value_encodings(root: etree._Element, text_prop_look_up: PropertyTextValueTypes) -> str:
|
|
178
|
-
"""
|
|
179
|
-
This function analyses if all the encodings for the `<text>` elements are correct
|
|
180
|
-
with respect to the specification in the ontology.
|
|
181
|
-
|
|
182
|
-
For example, if the ontology specifies that `:hasSimpleText` is without mark-up,
|
|
183
|
-
the encoding has to be `utf8`.
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
This is correct:
|
|
187
|
-
```
|
|
188
|
-
<text-prop name=":hasSimpleText">
|
|
189
|
-
<text encoding="utf8">Dies ist ein einfacher Text ohne Markup</text>
|
|
190
|
-
</text-prop>
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
This is wrong:
|
|
194
|
-
```
|
|
195
|
-
<text-prop name=":hasSimpleText">
|
|
196
|
-
<text encoding="xml">Dies ist ein einfacher Text ohne Markup</text>
|
|
197
|
-
</text-prop>
|
|
198
|
-
```
|
|
199
|
-
|
|
200
|
-
The accepted encodings are `xml` or `utf8`
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
root: root of the data xml document
|
|
204
|
-
text_prop_look_up: a lookup containing the property names and their specified types
|
|
205
|
-
|
|
206
|
-
Returns:
|
|
207
|
-
A string communicating the problem, if there are none the string is empty.
|
|
208
|
-
"""
|
|
209
|
-
text_values_in_data = _get_all_ids_and_props_and_encodings_from_root(root)
|
|
210
|
-
invalid_text_values = [x for x in text_values_in_data if not _check_correctness_of_one_prop(x, text_prop_look_up)]
|
|
211
|
-
if not invalid_text_values:
|
|
212
|
-
return ""
|
|
213
|
-
msg, df = InvalidTextValueEncodings(invalid_text_values).execute_problem_protocol()
|
|
214
|
-
if df is not None:
|
|
215
|
-
csv_file = Path(f"text_value_encoding_errors_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}.csv")
|
|
216
|
-
df.to_csv(path_or_buf=csv_file, index=False)
|
|
217
|
-
msg += (
|
|
218
|
-
"\n\n---------------------------------------\n\n"
|
|
219
|
-
f"All the problems are listed in the file: '{csv_file.absolute()}'"
|
|
220
|
-
)
|
|
221
|
-
return msg
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def _get_all_ids_and_props_and_encodings_from_root(root: etree._Element) -> list[TextValueData]:
|
|
225
|
-
res_list = []
|
|
226
|
-
for res_input in root.iterchildren(tag="resource"):
|
|
227
|
-
res_list.extend(_get_id_and_props_and_encodings_from_one_resource(res_input))
|
|
228
|
-
return res_list
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
def _get_id_and_props_and_encodings_from_one_resource(resource: etree._Element) -> list[TextValueData]:
|
|
232
|
-
res_id = resource.attrib["id"]
|
|
233
|
-
res_type = resource.attrib["restype"]
|
|
234
|
-
return [
|
|
235
|
-
_get_prop_and_encoding_from_one_property(res_id, res_type, child)
|
|
236
|
-
for child in resource.iterchildren(tag="text-prop")
|
|
237
|
-
]
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
def _get_prop_and_encoding_from_one_property(res_id: str, res_type: str, prop: etree._Element) -> TextValueData:
|
|
241
|
-
prop_name = prop.attrib["name"]
|
|
242
|
-
encoding = cast(AllowedEncodings, prop[0].attrib["encoding"])
|
|
243
|
-
return TextValueData(res_id, res_type, prop_name, encoding)
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
def _check_correctness_of_one_prop(text_val: TextValueData, text_prop_look_up: PropertyTextValueTypes) -> bool:
|
|
247
|
-
match text_val.encoding:
|
|
248
|
-
case "xml":
|
|
249
|
-
return text_val.property_name in text_prop_look_up.formatted_text_props
|
|
250
|
-
case "utf8":
|
|
251
|
-
return text_val.property_name in text_prop_look_up.unformatted_text_props
|
|
252
|
-
case _:
|
|
253
|
-
return False
|
|
@@ -1,236 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from dataclasses import field
|
|
5
|
-
from typing import Optional
|
|
6
|
-
from typing import Union
|
|
7
|
-
from typing import cast
|
|
8
|
-
|
|
9
|
-
import regex
|
|
10
|
-
from lxml import etree
|
|
11
|
-
|
|
12
|
-
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
|
|
13
|
-
from dsp_tools.models.exceptions import XmlUploadError
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass(frozen=True)
|
|
17
|
-
class XMLProperty:
|
|
18
|
-
"""
|
|
19
|
-
Represents a property of a resource in the XML used for data import.
|
|
20
|
-
|
|
21
|
-
Attributes:
|
|
22
|
-
name: The name of the property
|
|
23
|
-
valtype: The type of the property
|
|
24
|
-
values: The list of values of the property
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
name: str
|
|
28
|
-
valtype: str
|
|
29
|
-
values: list[XMLValue]
|
|
30
|
-
|
|
31
|
-
@staticmethod
|
|
32
|
-
def from_node(node: etree._Element, valtype: str, default_ontology: str) -> XMLProperty:
|
|
33
|
-
"""
|
|
34
|
-
The factory for the DSP property
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
node: the property node, p.ex. `<decimal-prop></decimal-prop>`
|
|
38
|
-
valtype: the type of value given by the name of the property node, p.ex. decimal in `<decimal-prop>`
|
|
39
|
-
default_ontology: the name of the ontology
|
|
40
|
-
|
|
41
|
-
Raises:
|
|
42
|
-
XmlUploadError: If an upload fails
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
The DSP property
|
|
46
|
-
"""
|
|
47
|
-
name = XMLProperty._get_name(node, default_ontology)
|
|
48
|
-
if node.tag.endswith("-prop"):
|
|
49
|
-
values = XMLProperty._get_values_from_normal_props(node, valtype)
|
|
50
|
-
else:
|
|
51
|
-
values = [XMLProperty._get_value_from_knora_base_prop(node)]
|
|
52
|
-
return XMLProperty(name, valtype, values)
|
|
53
|
-
|
|
54
|
-
@staticmethod
|
|
55
|
-
def _get_name(node: etree._Element, default_ontology: str) -> str:
|
|
56
|
-
# get the property name which is in format namespace:propertyname, p.ex. rosetta:hasName
|
|
57
|
-
orig = node.attrib.get("name")
|
|
58
|
-
if not orig: # tags like <isSegmentOf> don't have a name attribute
|
|
59
|
-
return f"knora-api:{node.tag}"
|
|
60
|
-
elif ":" not in orig:
|
|
61
|
-
return f"knora-api:{orig}"
|
|
62
|
-
elif orig.startswith(":"):
|
|
63
|
-
# replace an empty namespace with the default ontology name
|
|
64
|
-
return f"{default_ontology}{orig}"
|
|
65
|
-
else:
|
|
66
|
-
return orig
|
|
67
|
-
|
|
68
|
-
@staticmethod
|
|
69
|
-
def _get_values_from_normal_props(node: etree._Element, valtype: str) -> list[XMLValue]:
|
|
70
|
-
# parse the subnodes of the property nodes which contain the actual values of the property
|
|
71
|
-
listname = node.attrib.get("list") # save the list name if given (only for lists)
|
|
72
|
-
values: list[XMLValue] = []
|
|
73
|
-
for subnode in node:
|
|
74
|
-
if subnode.tag == valtype: # the subnode must correspond to the expected value type
|
|
75
|
-
values.append(XMLValue.from_node(subnode, valtype, listname))
|
|
76
|
-
else:
|
|
77
|
-
raise XmlUploadError(f"ERROR Unexpected tag: '{subnode.tag}'. Property may contain only value tags!")
|
|
78
|
-
return values
|
|
79
|
-
|
|
80
|
-
@staticmethod
|
|
81
|
-
def _get_value_from_knora_base_prop(node: etree._Element) -> XMLValue:
|
|
82
|
-
resrefs = set()
|
|
83
|
-
if node.tag.endswith("hasSegmentBounds"):
|
|
84
|
-
value: str | FormattedTextValue = f"{node.attrib["segment_start"]}:{node.attrib["segment_end"]}"
|
|
85
|
-
elif node.tag.endswith(("hasDescription", "hasComment")):
|
|
86
|
-
value = _extract_formatted_text_from_node(node)
|
|
87
|
-
resrefs = value.find_internal_ids()
|
|
88
|
-
else:
|
|
89
|
-
str_orig = "".join(node.itertext())
|
|
90
|
-
value = _cleanup_unformatted_text(str_orig)
|
|
91
|
-
comment = node.attrib.get("comment")
|
|
92
|
-
permissions = node.attrib.get("permissions")
|
|
93
|
-
link_uuid = node.attrib.get("linkUUID")
|
|
94
|
-
return XMLValue(value=value, resrefs=resrefs, comment=comment, permissions=permissions, link_uuid=link_uuid)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
@dataclass
|
|
98
|
-
class XMLValue:
|
|
99
|
-
"""Represents a value of a resource property in the XML used for data import"""
|
|
100
|
-
|
|
101
|
-
value: Union[str, FormattedTextValue]
|
|
102
|
-
resrefs: set[str] = field(default_factory=set)
|
|
103
|
-
comment: Optional[str] = None
|
|
104
|
-
permissions: Optional[str] = None
|
|
105
|
-
link_uuid: Optional[str] = None
|
|
106
|
-
|
|
107
|
-
@staticmethod
|
|
108
|
-
def from_node(
|
|
109
|
-
node: etree._Element,
|
|
110
|
-
val_type: str,
|
|
111
|
-
listname: Optional[str] = None,
|
|
112
|
-
) -> XMLValue:
|
|
113
|
-
"""Factory method to create an XMLValue from an XML node"""
|
|
114
|
-
value: Union[str, FormattedTextValue] = ""
|
|
115
|
-
resrefs = set()
|
|
116
|
-
comment = node.get("comment")
|
|
117
|
-
permissions = node.get("permissions")
|
|
118
|
-
if val_type == "text" and node.get("encoding") == "xml":
|
|
119
|
-
value = _extract_formatted_text_from_node(node)
|
|
120
|
-
resrefs = value.find_internal_ids()
|
|
121
|
-
elif val_type == "text" and node.get("encoding") == "utf8":
|
|
122
|
-
str_orig = "".join(node.itertext())
|
|
123
|
-
value = _cleanup_unformatted_text(str_orig)
|
|
124
|
-
elif val_type == "list":
|
|
125
|
-
listname = cast(str, listname)
|
|
126
|
-
value = f"{listname}:" + "".join(node.itertext())
|
|
127
|
-
else:
|
|
128
|
-
value = "".join(node.itertext())
|
|
129
|
-
link_uuid = node.attrib.get("linkUUID") # not all richtexts have a link, so this attribute is optional
|
|
130
|
-
return XMLValue(value=value, resrefs=resrefs, comment=comment, permissions=permissions, link_uuid=link_uuid)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def _extract_formatted_text_from_node(node: etree._Element) -> FormattedTextValue:
|
|
134
|
-
xmlstr = etree.tostring(node, encoding="unicode", method="xml")
|
|
135
|
-
xmlstr = regex.sub(f"<{node.tag}.*?>|</{node.tag}>", "", xmlstr)
|
|
136
|
-
xmlstr = _cleanup_formatted_text(xmlstr)
|
|
137
|
-
return FormattedTextValue(xmlstr)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def _cleanup_formatted_text(xmlstr_orig: str) -> str:
|
|
141
|
-
"""
|
|
142
|
-
In a xml-encoded text value from the XML file,
|
|
143
|
-
there may be non-text characters that must be removed.
|
|
144
|
-
This function:
|
|
145
|
-
- replaces (multiple) line breaks by a space
|
|
146
|
-
- replaces multiple spaces or tabstops by a single space (except within `<code>` or `<pre>` tags)
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
xmlstr_orig: content of the tag from the XML file, in serialized form
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
purged string, suitable to be sent to DSP-API
|
|
153
|
-
"""
|
|
154
|
-
# replace (multiple) line breaks by a space
|
|
155
|
-
xmlstr = regex.sub("\n+", " ", xmlstr_orig)
|
|
156
|
-
|
|
157
|
-
# replace multiple spaces or tabstops by a single space (except within <code> or <pre> tags)
|
|
158
|
-
# the regex selects all spaces/tabstops not followed by </xyz> without <xyz in between.
|
|
159
|
-
# credits: https://stackoverflow.com/a/46937770/14414188
|
|
160
|
-
xmlstr = regex.sub("( {2,}|\t+)(?!(.(?!<(code|pre)))*</(code|pre)>)", " ", xmlstr)
|
|
161
|
-
|
|
162
|
-
# remove spaces after <br/> tags (except within <code> tags)
|
|
163
|
-
xmlstr = regex.sub("((?<=<br/?>) )(?!(.(?!<code))*</code>)", "", xmlstr)
|
|
164
|
-
|
|
165
|
-
# remove leading and trailing spaces
|
|
166
|
-
xmlstr = xmlstr.strip()
|
|
167
|
-
|
|
168
|
-
return xmlstr
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def _cleanup_unformatted_text(string_orig: str) -> str:
|
|
172
|
-
"""
|
|
173
|
-
In a utf8-encoded text value from the XML file,
|
|
174
|
-
there may be non-text characters that must be removed.
|
|
175
|
-
This function:
|
|
176
|
-
- removes the `<text>` tags
|
|
177
|
-
- replaces multiple spaces or tabstops by a single space
|
|
178
|
-
|
|
179
|
-
Args:
|
|
180
|
-
string_orig: original string from the XML file
|
|
181
|
-
|
|
182
|
-
Returns:
|
|
183
|
-
purged string, suitable to be sent to DSP-API
|
|
184
|
-
"""
|
|
185
|
-
# remove the <text> tags
|
|
186
|
-
string = regex.sub("<text.*?>", "", string_orig)
|
|
187
|
-
string = regex.sub("</text>", "", string)
|
|
188
|
-
|
|
189
|
-
# replace multiple spaces or tabstops by a single space
|
|
190
|
-
string = regex.sub(r" {2,}|\t+", " ", string)
|
|
191
|
-
|
|
192
|
-
# remove leading and trailing spaces (of every line, but also of the entire string)
|
|
193
|
-
string = "\n".join([s.strip() for s in string.split("\n")])
|
|
194
|
-
return string.strip()
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
@dataclass(frozen=True)
|
|
198
|
-
class XMLBitstream:
|
|
199
|
-
"""
|
|
200
|
-
Represents a bitstream object (file) of a resource in the XML used for data import
|
|
201
|
-
|
|
202
|
-
Attributes:
|
|
203
|
-
value: The file path of the bitstream object
|
|
204
|
-
permissions: Reference to the set of permissions for the bitstream object
|
|
205
|
-
"""
|
|
206
|
-
|
|
207
|
-
value: str
|
|
208
|
-
permissions: Optional[str] = None
|
|
209
|
-
|
|
210
|
-
@staticmethod
|
|
211
|
-
def from_node(node: etree._Element) -> XMLBitstream:
|
|
212
|
-
"""Factory that parses a bitstream node from the XML DOM"""
|
|
213
|
-
if not node.text:
|
|
214
|
-
raise XmlUploadError("Empty bitstream tag")
|
|
215
|
-
return XMLBitstream(node.text, node.get("permissions"))
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
@dataclass(frozen=True)
|
|
219
|
-
class IIIFUriInfo:
|
|
220
|
-
"""
|
|
221
|
-
Represents a IIIF URI of a resource in the XML used for data import
|
|
222
|
-
|
|
223
|
-
Attributes:
|
|
224
|
-
value: The IIIF URI of the object
|
|
225
|
-
permissions: Reference to the set of permissions for the IIIF URI
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
value: str
|
|
229
|
-
permissions: str | None = None
|
|
230
|
-
|
|
231
|
-
@staticmethod
|
|
232
|
-
def from_node(node: etree._Element) -> IIIFUriInfo:
|
|
233
|
-
"""Factory that parses an IIIF URI node from the XML DOM"""
|
|
234
|
-
if not node.text:
|
|
235
|
-
raise XmlUploadError("Empty IIIF URI tag")
|
|
236
|
-
return IIIFUriInfo(node.text, node.get("permissions"))
|