dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from dataclasses import field
|
|
6
|
+
|
|
7
|
+
from dsp_tools.error.exceptions import InvalidLicenseError
|
|
8
|
+
from dsp_tools.xmllib.general_functions import find_license_in_string
|
|
9
|
+
from dsp_tools.xmllib.models.licenses.recommended import License
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class UpdateCounter:
|
|
14
|
+
resources_updated: int = 0
|
|
15
|
+
licenses_set: int = 0
|
|
16
|
+
copyrights_set: int = 0
|
|
17
|
+
authorships_set: int = 0
|
|
18
|
+
invalid_licenses_replaced: int = 0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class LegalProperties:
|
|
23
|
+
"""Property names in the XML, e.g. ':hasAuthor', ':hasCopyright', ':hasLicense'."""
|
|
24
|
+
|
|
25
|
+
authorship_prop: str | None = None
|
|
26
|
+
copyright_prop: str | None = None
|
|
27
|
+
license_prop: str | None = None
|
|
28
|
+
|
|
29
|
+
def has_any_property(self) -> bool:
|
|
30
|
+
return any([self.authorship_prop, self.copyright_prop, self.license_prop])
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class LegalMetadata:
|
|
35
|
+
"""Represents legal metadata for a single resource, either from XML or CSV."""
|
|
36
|
+
|
|
37
|
+
license: str | None
|
|
38
|
+
copyright: str | None
|
|
39
|
+
authorships: Authorships
|
|
40
|
+
|
|
41
|
+
def any(self) -> bool:
|
|
42
|
+
return bool(self.license or self.copyright or (self.authorships and any(x for x in self.authorships.elems)))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LegalMetadataDefaults:
|
|
46
|
+
"""Default values to use when legal metadata is missing from XML."""
|
|
47
|
+
|
|
48
|
+
authorship_default: str | None = None
|
|
49
|
+
copyright_default: str | None = None
|
|
50
|
+
license_default: License | None = None
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
authorship_default: str | None = None,
|
|
55
|
+
copyright_default: str | None = None,
|
|
56
|
+
license_default: str | None = None,
|
|
57
|
+
):
|
|
58
|
+
self.authorship_default = authorship_default
|
|
59
|
+
self.copyright_default = copyright_default
|
|
60
|
+
if license_default:
|
|
61
|
+
if lic := find_license_in_string(license_default):
|
|
62
|
+
self.license_default = lic
|
|
63
|
+
else:
|
|
64
|
+
raise InvalidLicenseError(license_default)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class Problem:
|
|
69
|
+
"""Represents a problem with legal metadata for a single resource."""
|
|
70
|
+
|
|
71
|
+
file_or_iiif_uri: str
|
|
72
|
+
res_id: str
|
|
73
|
+
license: str
|
|
74
|
+
copyright: str
|
|
75
|
+
authorships: list[str]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class Authorships:
|
|
80
|
+
elems: frozenset[str] = field(default_factory=frozenset)
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_iterable(cls, iterable: Iterable[str]) -> Authorships:
|
|
84
|
+
return cls(frozenset([x.strip() for x in iterable]))
|
|
85
|
+
|
|
86
|
+
def is_empty(self) -> bool:
|
|
87
|
+
return len(self.elems) == 0
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import regex
|
|
4
|
+
from lxml import etree
|
|
5
|
+
|
|
6
|
+
from dsp_tools.commands.update_legal.csv_operations import is_fixme_value
|
|
7
|
+
from dsp_tools.commands.update_legal.models import Authorships
|
|
8
|
+
from dsp_tools.commands.update_legal.models import LegalMetadata
|
|
9
|
+
from dsp_tools.commands.update_legal.models import LegalMetadataDefaults
|
|
10
|
+
from dsp_tools.commands.update_legal.models import LegalProperties
|
|
11
|
+
from dsp_tools.commands.update_legal.models import UpdateCounter
|
|
12
|
+
from dsp_tools.xmllib.general_functions import find_license_in_string
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def collect_metadata(
|
|
16
|
+
res: etree._Element,
|
|
17
|
+
properties: LegalProperties,
|
|
18
|
+
defaults: LegalMetadataDefaults,
|
|
19
|
+
counter: UpdateCounter,
|
|
20
|
+
csv_metadata: LegalMetadata | None,
|
|
21
|
+
treat_invalid_licenses_as_unknown: bool = False,
|
|
22
|
+
) -> LegalMetadata:
|
|
23
|
+
"""Collect legal metadata from CSV corrections, XML properties, or defaults."""
|
|
24
|
+
license_val, copyright_val, authorships = _resolve_metadata_values(
|
|
25
|
+
res=res,
|
|
26
|
+
properties=properties,
|
|
27
|
+
defaults=defaults,
|
|
28
|
+
counter=counter,
|
|
29
|
+
csv_metadata=csv_metadata,
|
|
30
|
+
treat_invalid_licenses_as_unknown=treat_invalid_licenses_as_unknown,
|
|
31
|
+
)
|
|
32
|
+
return LegalMetadata(
|
|
33
|
+
license=license_val,
|
|
34
|
+
copyright=copyright_val,
|
|
35
|
+
authorships=authorships,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def apply_metadata_to_resource(
|
|
40
|
+
res: etree._Element,
|
|
41
|
+
media_elem: etree._Element,
|
|
42
|
+
metadata: LegalMetadata,
|
|
43
|
+
properties: LegalProperties,
|
|
44
|
+
auth_text_to_id: dict[Authorships, int],
|
|
45
|
+
) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Apply legal metadata to a resource's media element and remove old text properties.
|
|
48
|
+
This function modifies the XML tree in-place.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
res: The resource element
|
|
52
|
+
media_elem: The bitstream or iiif-uri element to apply attributes to
|
|
53
|
+
metadata: The legal metadata to apply
|
|
54
|
+
properties: Configuration for property names (used for removal)
|
|
55
|
+
auth_text_to_id: Dictionary to track unique authorships (modified in-place)
|
|
56
|
+
"""
|
|
57
|
+
_apply_metadata_to_element(
|
|
58
|
+
media_elem=media_elem,
|
|
59
|
+
license_val=metadata.license,
|
|
60
|
+
copyright_val=metadata.copyright,
|
|
61
|
+
authorships=metadata.authorships,
|
|
62
|
+
auth_text_to_id=auth_text_to_id,
|
|
63
|
+
)
|
|
64
|
+
_remove_text_properties(res, properties)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _resolve_metadata_values(
|
|
68
|
+
res: etree._Element,
|
|
69
|
+
properties: LegalProperties,
|
|
70
|
+
defaults: LegalMetadataDefaults,
|
|
71
|
+
counter: UpdateCounter,
|
|
72
|
+
csv_metadata: LegalMetadata | None,
|
|
73
|
+
treat_invalid_licenses_as_unknown: bool = False,
|
|
74
|
+
) -> tuple[str | None, str | None, Authorships]:
|
|
75
|
+
"""
|
|
76
|
+
Resolve metadata values using priority: CSV > XML > defaults.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Tuple of (license_val, copyright_val, authorships)
|
|
80
|
+
"""
|
|
81
|
+
# Start with CSV corrections if available
|
|
82
|
+
if csv_metadata:
|
|
83
|
+
license_val = csv_metadata.license
|
|
84
|
+
copyright_val = csv_metadata.copyright
|
|
85
|
+
authorships = csv_metadata.authorships
|
|
86
|
+
else:
|
|
87
|
+
license_val = None
|
|
88
|
+
copyright_val = None
|
|
89
|
+
authorships = Authorships()
|
|
90
|
+
|
|
91
|
+
# Collect license from XML, fall back to default
|
|
92
|
+
if license_val is None and properties.license_prop:
|
|
93
|
+
license_val = _extract_license_from_xml(
|
|
94
|
+
res, properties.license_prop, counter, treat_invalid_licenses_as_unknown
|
|
95
|
+
)
|
|
96
|
+
if license_val is None and defaults.license_default:
|
|
97
|
+
license_val = defaults.license_default.value
|
|
98
|
+
|
|
99
|
+
# Collect copyright XML, fall back to default
|
|
100
|
+
if copyright_val is None and properties.copyright_prop:
|
|
101
|
+
copyright_val = _extract_copyright_from_xml(res, properties.copyright_prop)
|
|
102
|
+
if copyright_val is None and defaults.copyright_default:
|
|
103
|
+
copyright_val = defaults.copyright_default
|
|
104
|
+
|
|
105
|
+
# Collect authorship from XML, fall back to default
|
|
106
|
+
if authorships.is_empty() and properties.authorship_prop:
|
|
107
|
+
authorships = _extract_authorships_from_xml(res, properties.authorship_prop)
|
|
108
|
+
if authorships.is_empty() and defaults.authorship_default:
|
|
109
|
+
authorships = Authorships.from_iterable({defaults.authorship_default})
|
|
110
|
+
|
|
111
|
+
return license_val, copyright_val, authorships
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _extract_license_from_xml(
|
|
115
|
+
res: etree._Element,
|
|
116
|
+
license_prop: str,
|
|
117
|
+
counter: UpdateCounter,
|
|
118
|
+
treat_invalid_as_unknown: bool = False,
|
|
119
|
+
) -> str | None:
|
|
120
|
+
"""
|
|
121
|
+
Extract license from XML property.
|
|
122
|
+
|
|
123
|
+
- If one license is found and can be parsed, return its parsed value.
|
|
124
|
+
- If the property is absent or empty, return None -> will fall back to default.
|
|
125
|
+
- If multiple licenses are found, return a FIXME string.
|
|
126
|
+
- If the license is invalid and treat_invalid_as_unknown is True, return 'unknown' and increment counter.
|
|
127
|
+
- If the license is invalid and treat_invalid_as_unknown is False, return a FIXME string.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
License value, None, or FIXME string
|
|
131
|
+
"""
|
|
132
|
+
license_elems: list[etree._Element] = res.xpath(f"./text-prop[@name='{license_prop}']/text")
|
|
133
|
+
if not license_elems:
|
|
134
|
+
return None
|
|
135
|
+
if len(license_elems) > 1:
|
|
136
|
+
license_texts = [elem.text.strip() for elem in license_elems if elem.text and elem.text.strip()]
|
|
137
|
+
return f"FIXME: Multiple licenses found. Choose one: {', '.join(license_texts)}"
|
|
138
|
+
license_elem = license_elems[0]
|
|
139
|
+
if not license_elem.text or not (license_text := license_elem.text.strip()):
|
|
140
|
+
return None
|
|
141
|
+
if not (lic := find_license_in_string(license_text)):
|
|
142
|
+
if treat_invalid_as_unknown:
|
|
143
|
+
counter.invalid_licenses_replaced += 1
|
|
144
|
+
return "http://rdfh.ch/licenses/unknown"
|
|
145
|
+
return f"FIXME: Invalid license: {license_text}"
|
|
146
|
+
return lic.value
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _extract_copyright_from_xml(res: etree._Element, copy_prop: str) -> str | None:
|
|
150
|
+
copy_elems: list[etree._Element] = res.xpath(f"./text-prop[@name='{copy_prop}']/text")
|
|
151
|
+
if not copy_elems:
|
|
152
|
+
return None
|
|
153
|
+
if len(copy_elems) > 1:
|
|
154
|
+
copy_texts = [elem.text.strip() for elem in copy_elems if elem.text and elem.text.strip()]
|
|
155
|
+
return f"FIXME: Multiple copyrights found. Choose one: {', '.join(copy_texts)}"
|
|
156
|
+
copy_elem = copy_elems[0]
|
|
157
|
+
if not copy_elem.text or not (copy_text := copy_elem.text.strip()):
|
|
158
|
+
return None
|
|
159
|
+
return copy_text
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _extract_authorships_from_xml(res: etree._Element, auth_prop: str) -> Authorships:
|
|
163
|
+
auth_elems: list[etree._Element] = res.xpath(f"./text-prop[@name='{auth_prop}']/text")
|
|
164
|
+
if not auth_elems:
|
|
165
|
+
return Authorships()
|
|
166
|
+
return Authorships.from_iterable([auth_elem.text.strip() for auth_elem in auth_elems if auth_elem.text])
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _apply_metadata_to_element(
|
|
170
|
+
media_elem: etree._Element,
|
|
171
|
+
license_val: str | None,
|
|
172
|
+
copyright_val: str | None,
|
|
173
|
+
authorships: Authorships,
|
|
174
|
+
auth_text_to_id: dict[Authorships, int],
|
|
175
|
+
) -> None:
|
|
176
|
+
"""Apply legal metadata as attributes on the bitstream/iiif element."""
|
|
177
|
+
if license_val and not is_fixme_value(license_val):
|
|
178
|
+
media_elem.attrib["license"] = license_val
|
|
179
|
+
if copyright_val and not is_fixme_value(copyright_val):
|
|
180
|
+
media_elem.attrib["copyright-holder"] = copyright_val
|
|
181
|
+
if not any(is_fixme_value(x) for x in authorships.elems):
|
|
182
|
+
if (auth_id := auth_text_to_id.get(authorships)) is None:
|
|
183
|
+
auth_id = len(auth_text_to_id)
|
|
184
|
+
auth_text_to_id[authorships] = auth_id
|
|
185
|
+
media_elem.attrib["authorship-id"] = f"authorship_{auth_id}"
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _remove_text_properties(res: etree._Element, properties: LegalProperties) -> None:
|
|
189
|
+
"""Remove the text properties from XML (they're now attributes on media element)."""
|
|
190
|
+
if properties.authorship_prop:
|
|
191
|
+
for prop_elem in res.xpath(f"./text-prop[@name='{properties.authorship_prop}']"):
|
|
192
|
+
res.remove(prop_elem)
|
|
193
|
+
if properties.copyright_prop:
|
|
194
|
+
for prop_elem in res.xpath(f"./text-prop[@name='{properties.copyright_prop}']"):
|
|
195
|
+
res.remove(prop_elem)
|
|
196
|
+
if properties.license_prop:
|
|
197
|
+
for prop_elem in res.xpath(f"./text-prop[@name='{properties.license_prop}']"):
|
|
198
|
+
res.remove(prop_elem)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def add_authorship_definitions_to_xml(root: etree._Element, auth_text_to_id: dict[Authorships, int]) -> None:
|
|
202
|
+
auth_defs = []
|
|
203
|
+
for auth_text, auth_id in auth_text_to_id.items():
|
|
204
|
+
auth_def = etree.Element("authorship", attrib={"id": f"authorship_{auth_id}"})
|
|
205
|
+
for single_auth in auth_text.elems:
|
|
206
|
+
auth_child = etree.Element("author")
|
|
207
|
+
auth_child.text = single_auth
|
|
208
|
+
auth_def.append(auth_child)
|
|
209
|
+
auth_defs.append(auth_def)
|
|
210
|
+
for auth_def in reversed(auth_defs):
|
|
211
|
+
root.insert(0, auth_def)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def write_updated_xml(
|
|
215
|
+
input_file: Path,
|
|
216
|
+
root: etree._Element,
|
|
217
|
+
counter: UpdateCounter,
|
|
218
|
+
partial: bool = False,
|
|
219
|
+
) -> None:
|
|
220
|
+
root_new = etree.ElementTree(root)
|
|
221
|
+
|
|
222
|
+
base_filename = regex.sub(r"(_PARTIALLY_updated|_updated)$", "", input_file.stem)
|
|
223
|
+
if partial and input_file.stem.endswith("_PARTIALLY_updated"):
|
|
224
|
+
# Overwrite the existing partial file
|
|
225
|
+
output_file = input_file
|
|
226
|
+
elif partial:
|
|
227
|
+
# Create new partial file
|
|
228
|
+
output_file = input_file.with_stem(f"{base_filename}_PARTIALLY_updated")
|
|
229
|
+
else:
|
|
230
|
+
# Success - create final updated file
|
|
231
|
+
output_file = input_file.with_stem(f"{base_filename}_updated")
|
|
232
|
+
|
|
233
|
+
etree.indent(root_new, space=" ")
|
|
234
|
+
root_new.write(output_file, pretty_print=True, encoding="utf-8", doctype='<?xml version="1.0" encoding="UTF-8"?>')
|
|
235
|
+
|
|
236
|
+
if partial:
|
|
237
|
+
print(f"\n⚠️ Partial update completed. Output written to: {output_file}")
|
|
238
|
+
print(" Some resources still have errors - check the CSV error file.\n")
|
|
239
|
+
else:
|
|
240
|
+
print(f"\n✓ Successfully updated all legal metadata. Output written to: {output_file}\n")
|
|
241
|
+
|
|
242
|
+
print(f" - Resources updated: {counter.resources_updated}\n")
|
|
243
|
+
print(f" - Licenses set: {counter.licenses_set}\n")
|
|
244
|
+
print(f" - Copyrights set: {counter.copyrights_set}\n")
|
|
245
|
+
print(f" - Authorships set: {counter.authorships_set}\n")
|
|
246
|
+
if counter.invalid_licenses_replaced > 0:
|
|
247
|
+
print(f" - Invalid licenses replaced with 'unknown': {counter.invalid_licenses_replaced}\n")
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## What is validate_data?
|
|
6
|
+
|
|
7
|
+
The `validate_data` module is a CLI command that validates XML data files against ontologies stored on a DSP server.
|
|
8
|
+
It performs comprehensive validation using SHACL (Shapes Constraint Language) via a Docker-based CLI tool
|
|
9
|
+
to ensure data conforms to ontological constraints before upload.
|
|
10
|
+
|
|
11
|
+
## Key Components
|
|
12
|
+
|
|
13
|
+
### Core Entry Points
|
|
14
|
+
|
|
15
|
+
- **validate_data.py**: Main validation orchestrator with two public functions:
|
|
16
|
+
- `validate_data()`: Validates XML files from filesystem
|
|
17
|
+
- `validate_parsed_resources()`: Validates pre-parsed resources (used by the CLI commands `xmlupload` and `ingest-xmlupload`)
|
|
18
|
+
|
|
19
|
+
### SHACL Validation Engine
|
|
20
|
+
|
|
21
|
+
- **shacl_cli_validator.py**: Docker-based SHACL validation engine that:
|
|
22
|
+
- Runs SHACL validation using a containerized CLI tool
|
|
23
|
+
- Handles Docker communication and error handling
|
|
24
|
+
- Parses validation results back into Python objects
|
|
25
|
+
- Requires Docker Desktop to be running
|
|
26
|
+
|
|
27
|
+
### Data Preparation Pipeline (`prepare_data/`)
|
|
28
|
+
|
|
29
|
+
- **prepare_data.py**: Main data preparation coordinator with functions:
|
|
30
|
+
- `get_info_and_parsed_resources_from_file()`: Extracts resources from XML files
|
|
31
|
+
- `prepare_data_for_validation_from_parsed_resource()`: Prepares data for validation
|
|
32
|
+
- **get_rdf_like_data.py**: Converts ParsedResource objects to RDF-like data structures
|
|
33
|
+
- **make_data_graph.py**: Creates RDF graphs from RDF-like data
|
|
34
|
+
|
|
35
|
+
### Validation Pipeline (`validation/`)
|
|
36
|
+
|
|
37
|
+
- **get_validation_report.py**: Main validation orchestrator that coordinates SHACL validation
|
|
38
|
+
- **check_for_unknown_classes.py**: Validates that all classes used in data are defined in the ontology
|
|
39
|
+
- **validate_ontology.py**: Validates the ontology itself before data validation
|
|
40
|
+
- **check_duplicate_files.py**: Checks for duplicate file references in the data
|
|
41
|
+
|
|
42
|
+
### Validation Report Processing (`process_validation_report/`)
|
|
43
|
+
|
|
44
|
+
- **query_validation_result.py**: Processes SHACL validation results into user-friendly formats
|
|
45
|
+
- **get_user_validation_message.py**: Converts validation problems into user messages
|
|
46
|
+
|
|
47
|
+
### API Clients (`api_clients.py`)
|
|
48
|
+
|
|
49
|
+
- **OntologyClient**: Fetches project ontologies and knora-api ontology from DSP server
|
|
50
|
+
- **ListClient**: Retrieves and reformats project lists for validation
|
|
51
|
+
|
|
52
|
+
### Models
|
|
53
|
+
|
|
54
|
+
- **models/validation.py**: Core data structures for RDF graphs and validation results
|
|
55
|
+
- **models/api_responses.py**: API response models for ontologies, lists, and validation reports
|
|
56
|
+
- **models/input_problems.py**: User-facing error/warning message structures
|
|
57
|
+
- **models/rdf_like_data.py**: Intermediate data structures for RDF conversion
|
|
58
|
+
|
|
59
|
+
### SHACL Shape Generation (`sparql/`)
|
|
60
|
+
|
|
61
|
+
`sparql/` contains the shapes that are ontology specific and are generated during runtime.
|
|
62
|
+
|
|
63
|
+
- **construct_shacl.py**: Main SHACL shape construction coordinator
|
|
64
|
+
- **cardinality_shacl.py**: Generates cardinality constraint shapes
|
|
65
|
+
- **value_shacl.py**: Generates value content validation shapes
|
|
66
|
+
- **legal_info_shacl.py**: Generates license validation shapes
|
|
67
|
+
|
|
68
|
+
### Ontology Independent SHACL Shapes
|
|
69
|
+
|
|
70
|
+
`src/dsp_tools/resources/validate_data/` contains RDF turtle files with SHACL shapes that apply to all ontologies.
|
|
71
|
+
|
|
72
|
+
### Utility Functions
|
|
73
|
+
|
|
74
|
+
- **constants.py**: Defines file paths for validation artifacts and RDF property type information
|
|
75
|
+
- **utils.py**: Helper functions for:
|
|
76
|
+
- Temporary directory management (`get_temp_directory()`, `clean_up_temp_directory()`)
|
|
77
|
+
- IRI reformatting for user-friendly display
|
|
78
|
+
- Validation file cleanup and organization
|
|
79
|
+
- **mappers.py**: Data transformation utilities for converting between different data representations
|
|
80
|
+
|
|
81
|
+
### Validation Flow
|
|
82
|
+
|
|
83
|
+
```text
|
|
84
|
+
XML file → ParsedResource → RDF-like data → RDF Graph
|
|
85
|
+
↓
|
|
86
|
+
Project ontology → SHACL shapes ← ← ← ← ← ← ← ←
|
|
87
|
+
↓
|
|
88
|
+
Docker SHACL CLI → Validation Report → User Messages
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Architecture Patterns
|
|
92
|
+
|
|
93
|
+
### Four-Stage Validation Pipeline
|
|
94
|
+
|
|
95
|
+
The validation process follows a strict sequential pipeline:
|
|
96
|
+
|
|
97
|
+
1. **Unknown Classes Check** (`validation/check_for_unknown_classes.py`):
|
|
98
|
+
- Ensures all classes used in data are defined in the ontology
|
|
99
|
+
- If unknown classes are found, validation terminates with an error
|
|
100
|
+
2. **Ontology Validation** (`validation/validate_ontology.py`):
|
|
101
|
+
- Validates the ontology itself for correctness
|
|
102
|
+
- If ontology errors are found, validation terminates with an error
|
|
103
|
+
3. **Duplicate File Check** (`validation/check_duplicate_files.py`):
|
|
104
|
+
- Checks for duplicate file references in the data
|
|
105
|
+
- Generates warnings which will be added to potential warnings from the SHACL validation
|
|
106
|
+
4. **SHACL Validation** (`validation/get_validation_report.py`):
|
|
107
|
+
- Performs comprehensive SHACL validation using Docker CLI
|
|
108
|
+
- Validates both cardinality constraints and content validation
|
|
109
|
+
- Generates detailed validation reports
|
|
110
|
+
|
|
111
|
+
### Docker-Based Validation Architecture
|
|
112
|
+
|
|
113
|
+
- **Containerized SHACL**: Uses a Docker container for SHACL validation to ensure consistency
|
|
114
|
+
- **File-based Communication**: Writes RDF files to temporary directories for Docker processing
|
|
115
|
+
- **Error Handling**: Robust error handling for Docker communication failures
|
|
116
|
+
- **Temporary File Management**: Automatic cleanup of temporary validation files
|
|
117
|
+
|
|
118
|
+
### Modular Data Processing Pipeline
|
|
119
|
+
|
|
120
|
+
- **Data Preparation** (`prepare_data/`): Converts XML to RDF-ready data structures
|
|
121
|
+
- **Validation Execution** (`validation/`): Performs validation checks in sequence
|
|
122
|
+
- **Report Processing** (`process_validation_report/`): Converts validation results to user messages
|
|
123
|
+
- **Clear Separation**: Each stage has dedicated modules with well-defined interfaces
|
|
124
|
+
|
|
125
|
+
### Problem Categorization and Severity Levels
|
|
126
|
+
|
|
127
|
+
Validation results are categorized into:
|
|
128
|
+
|
|
129
|
+
- **Violations**: Critical errors that prevent xmlupload (always displayed)
|
|
130
|
+
- **Warnings**: Issues that block upload on production servers (displayed based on severity setting)
|
|
131
|
+
- **Info**: Potential problems that don't block upload (displayed only with INFO severity)
|
|
132
|
+
- **Unexpected**: Unknown SHACL violations requiring dev team attention (always displayed)
|
|
133
|
+
|
|
134
|
+
## Key Dependencies
|
|
135
|
+
|
|
136
|
+
- **rdflib**: RDF graph manipulation and SPARQL queries
|
|
137
|
+
- **Docker**: Required for containerized SHACL validation
|
|
138
|
+
- **subprocess**: For executing Docker commands
|
|
139
|
+
- **pandas**: For handling large validation result datasets
|
|
140
|
+
- **yaml**: For parsing Docker configuration files
|
|
141
|
+
- **loguru**: Structured logging throughout validation process
|
|
142
|
+
|
|
143
|
+
## Testing Strategy
|
|
144
|
+
|
|
145
|
+
- **Unit tests**: Test individual validation components
|
|
146
|
+
- **Integration tests**: Test API client interactions
|
|
147
|
+
- **E2E tests**: Test complete validation workflows with testcontainers
|
|
148
|
+
|
|
149
|
+
## Important Notes
|
|
150
|
+
|
|
151
|
+
- **Docker Requirement**: Validation requires Docker Desktop to be running for SHACL validation
|
|
152
|
+
- **Local SHACL Validation**: Validation is performed locally using a Docker container, not server-side
|
|
153
|
+
- **Temporary File Management**: Creates temporary files for Docker communication, automatically cleaned up
|
|
154
|
+
- **Graph Saving**: Optional graph saving functionality for debugging complex validation failures
|
|
155
|
+
- **Production Behavior**: Production servers treat warnings as blockers, preventing upload
|
|
156
|
+
- **Dual Interface**: Supports both direct XML file validation and pre-parsed resource validation (for xmlupload integration)
|
|
157
|
+
- **Configuration Support**: Uses ValidateDataConfig for controlling validation behavior and output options
|
|
158
|
+
- **CSV Output**: Large validation results are saved as CSV files for better handling
|
|
159
|
+
- **Severity Levels**: Configurable severity levels control which validation messages are displayed
|
|
File without changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from rdflib import XSD
|
|
2
|
+
|
|
3
|
+
from dsp_tools.commands.xmlupload.models.rdf_models import RDFPropTypeInfo
|
|
4
|
+
from dsp_tools.utils.rdf_constants import KNORA_API
|
|
5
|
+
|
|
6
|
+
# paths for the validation files used by the docker container
|
|
7
|
+
|
|
8
|
+
ONTOLOGIES_DATA_TTL = "ONTOLOGIES_DATA.ttl"
|
|
9
|
+
ONTOLOGIES_SHACL_TTL = "ONTOLOGIES_SHACL.ttl"
|
|
10
|
+
ONTOLOGIES_REPORT_TTL = "ONTOLOGIES_REPORT.ttl"
|
|
11
|
+
|
|
12
|
+
CARDINALITY_DATA_TTL = "CARDINALITY_DATA.ttl"
|
|
13
|
+
CARDINALITY_SHACL_TTL = "CARDINALITY_SHACL.ttl"
|
|
14
|
+
CARDINALITY_REPORT_TTL = "CARDINALITY_REPORT.ttl"
|
|
15
|
+
|
|
16
|
+
CONTENT_DATA_TTL = "CONTENT_DATA.ttl"
|
|
17
|
+
CONTENT_SHACL_TTL = "CONTENT_SHACL.ttl"
|
|
18
|
+
CONTENT_REPORT_TTL = "CONTENT_REPORT.ttl"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
ARCHIVE_FILE_VALUE = RDFPropTypeInfo(KNORA_API.ArchiveFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
22
|
+
AUDIO_FILE_VALUE = RDFPropTypeInfo(KNORA_API.AudioFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
23
|
+
DOCUMENT_FILE_VALUE = RDFPropTypeInfo(KNORA_API.DocumentFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
24
|
+
MOVING_IMAGE_FILE_VALUE = RDFPropTypeInfo(KNORA_API.MovingImageFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
25
|
+
STILL_IMAGE_FILE_VALUE = RDFPropTypeInfo(KNORA_API.StillImageFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
26
|
+
TEXT_FILE_VALUE = RDFPropTypeInfo(KNORA_API.TextFileValue, KNORA_API.fileValueHasFilename, XSD.string)
|
|
27
|
+
IIIF_URI_VALUE = RDFPropTypeInfo(
|
|
28
|
+
KNORA_API.StillImageExternalFileValue, KNORA_API.stillImageFileValueHasExternalUrl, XSD.anyURI
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# validation results
|
|
33
|
+
|
|
34
|
+
FILE_VALUE_PROPERTIES = {
|
|
35
|
+
KNORA_API.hasArchiveFileValue,
|
|
36
|
+
KNORA_API.hasAudioFileValue,
|
|
37
|
+
KNORA_API.hasDocumentFileValue,
|
|
38
|
+
KNORA_API.hasMovingImageFileValue,
|
|
39
|
+
KNORA_API.hasTextFileValue,
|
|
40
|
+
KNORA_API.hasStillImageFileValue,
|
|
41
|
+
KNORA_API.hasLicense,
|
|
42
|
+
KNORA_API.hasCopyrightHolder,
|
|
43
|
+
KNORA_API.hasAuthorship,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
FILEVALUE_DETAIL_INFO = {
|
|
48
|
+
KNORA_API.hasArchiveFileValue: ("bitstream", "'zip', 'tar', 'gz', 'z', 'tgz', 'gzip', '7z'"),
|
|
49
|
+
KNORA_API.hasAudioFileValue: ("bitstream", "'mp3', 'wav'"),
|
|
50
|
+
KNORA_API.hasDocumentFileValue: ("bitstream", "'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'epub'"),
|
|
51
|
+
KNORA_API.hasMovingImageFileValue: ("bitstream", "'mp4'"),
|
|
52
|
+
KNORA_API.hasTextFileValue: ("bitstream", "'odd', 'rng', 'txt', 'xml', 'htm', 'html', 'xsd', 'xsl', 'csv', 'json'"),
|
|
53
|
+
KNORA_API.hasStillImageFileValue: (
|
|
54
|
+
"bitstream / iiif-uri",
|
|
55
|
+
"'jpg', 'jpeg', 'png', 'tif', 'tiff', 'jp2' or a IIIF-URI",
|
|
56
|
+
),
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
LEGAL_INFO_PROPS = {KNORA_API.hasLicense, KNORA_API.hasCopyrightHolder, KNORA_API.hasAuthorship}
|