dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +5 -0
- dsp_tools/cli/args.py +47 -0
- dsp_tools/cli/call_action.py +85 -0
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +479 -0
- dsp_tools/cli/entry_point.py +322 -0
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/clients/connection.py +35 -0
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +321 -0
- dsp_tools/commands/excel2json/lists/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
- dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
- dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
- dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
- dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
- dsp_tools/commands/excel2json/lists/utils.py +81 -0
- dsp_tools/commands/excel2json/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/models/input_error.py +416 -0
- dsp_tools/commands/excel2json/models/json_header.py +175 -0
- dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
- dsp_tools/commands/excel2json/models/ontology.py +76 -0
- dsp_tools/commands/excel2json/old_lists.py +328 -0
- dsp_tools/commands/excel2json/project.py +280 -0
- dsp_tools/commands/excel2json/properties.py +370 -0
- dsp_tools/commands/excel2json/resources.py +336 -0
- dsp_tools/commands/excel2json/utils.py +352 -0
- dsp_tools/commands/excel2xml/__init__.py +7 -0
- dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
- dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
- dsp_tools/commands/excel2xml/propertyelement.py +47 -0
- dsp_tools/commands/get/__init__.py +0 -0
- dsp_tools/commands/get/get.py +166 -0
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/get/legacy_models/__init__.py +0 -0
- dsp_tools/commands/get/legacy_models/context.py +318 -0
- dsp_tools/commands/get/legacy_models/group.py +241 -0
- dsp_tools/commands/get/legacy_models/helpers.py +47 -0
- dsp_tools/commands/get/legacy_models/listnode.py +390 -0
- dsp_tools/commands/get/legacy_models/model.py +12 -0
- dsp_tools/commands/get/legacy_models/ontology.py +324 -0
- dsp_tools/commands/get/legacy_models/project.py +366 -0
- dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
- dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
- dsp_tools/commands/get/legacy_models/user.py +438 -0
- dsp_tools/commands/get/models/__init__.py +0 -0
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +258 -0
- dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
- dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
- dsp_tools/commands/start_stack.py +428 -0
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/__init__.py +0 -0
- dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
- dsp_tools/commands/xmlupload/models/ingest.py +143 -0
- dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +45 -0
- dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
- dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
- dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
- dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
- dsp_tools/commands/xmlupload/upload_config.py +76 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
- dsp_tools/commands/xmlupload/xmlupload.py +516 -0
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/config/warnings_config.py +32 -0
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/legacy_models/datetimestamp.py +81 -0
- dsp_tools/legacy_models/langstring.py +253 -0
- dsp_tools/legacy_models/projectContext.py +49 -0
- dsp_tools/py.typed +0 -0
- dsp_tools/resources/schema/data.xsd +648 -0
- dsp_tools/resources/schema/lists-only.json +72 -0
- dsp_tools/resources/schema/project.json +1258 -0
- dsp_tools/resources/schema/properties-only.json +874 -0
- dsp_tools/resources/schema/resources-only.json +140 -0
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +88 -0
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/__init__.py +0 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/data_formats/date_util.py +166 -0
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/data_formats/shared.py +81 -0
- dsp_tools/utils/data_formats/uri_util.py +76 -0
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/__init__.py +0 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +348 -0
- dsp_tools/xmllib/value_checkers.py +434 -0
- dsp_tools/xmllib/value_converters.py +777 -0
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
- dsp_tools-0.9.13.dist-info/LICENSE +0 -674
- dsp_tools-0.9.13.dist-info/METADATA +0 -144
- dsp_tools-0.9.13.dist-info/RECORD +0 -71
- dsp_tools-0.9.13.dist-info/WHEEL +0 -5
- dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
- dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
- dsplib/models/connection.py +0 -272
- dsplib/models/group.py +0 -296
- dsplib/models/helpers.py +0 -505
- dsplib/models/langstring.py +0 -277
- dsplib/models/listnode.py +0 -578
- dsplib/models/model.py +0 -20
- dsplib/models/ontology.py +0 -448
- dsplib/models/permission.py +0 -112
- dsplib/models/project.py +0 -547
- dsplib/models/propertyclass.py +0 -505
- dsplib/models/resource.py +0 -366
- dsplib/models/resourceclass.py +0 -810
- dsplib/models/sipi.py +0 -30
- dsplib/models/user.py +0 -731
- dsplib/models/value.py +0 -1000
- dsplib/utils/knora-data-schema.xsd +0 -454
- dsplib/utils/knora-schema-lists.json +0 -83
- dsplib/utils/knora-schema.json +0 -434
- dsplib/utils/onto_commons.py +0 -24
- dsplib/utils/onto_create_lists.py +0 -73
- dsplib/utils/onto_create_ontology.py +0 -442
- dsplib/utils/onto_get.py +0 -58
- dsplib/utils/onto_validate.py +0 -33
- dsplib/utils/xml_upload.py +0 -539
- dsplib/widgets/doublepassword.py +0 -80
- knora/MLS-import-libraries.py +0 -84
- knora/dsp_tools.py +0 -96
- knora/dsplib/models/connection.py +0 -272
- knora/dsplib/models/group.py +0 -296
- knora/dsplib/models/helpers.py +0 -506
- knora/dsplib/models/langstring.py +0 -277
- knora/dsplib/models/listnode.py +0 -578
- knora/dsplib/models/model.py +0 -20
- knora/dsplib/models/ontology.py +0 -448
- knora/dsplib/models/permission.py +0 -112
- knora/dsplib/models/project.py +0 -583
- knora/dsplib/models/propertyclass.py +0 -505
- knora/dsplib/models/resource.py +0 -416
- knora/dsplib/models/resourceclass.py +0 -811
- knora/dsplib/models/sipi.py +0 -35
- knora/dsplib/models/user.py +0 -731
- knora/dsplib/models/value.py +0 -1000
- knora/dsplib/utils/knora-data-schema.xsd +0 -464
- knora/dsplib/utils/knora-schema-lists.json +0 -83
- knora/dsplib/utils/knora-schema.json +0 -444
- knora/dsplib/utils/onto_commons.py +0 -24
- knora/dsplib/utils/onto_create_lists.py +0 -73
- knora/dsplib/utils/onto_create_ontology.py +0 -451
- knora/dsplib/utils/onto_get.py +0 -58
- knora/dsplib/utils/onto_validate.py +0 -33
- knora/dsplib/utils/xml_upload.py +0 -540
- knora/dsplib/widgets/doublepassword.py +0 -80
- knora/knora.py +0 -2108
- knora/test.py +0 -99
- knora/testit.py +0 -76
- knora/xml2knora.py +0 -633
- {dsplib → dsp_tools/cli}/__init__.py +0 -0
- {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
- {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
- {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
- {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
- {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
- {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
- {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
- {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import cast
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
from dsp_tools.cli.args import ServerCredentials
|
|
9
|
+
from dsp_tools.cli.args import ValidateDataConfig
|
|
10
|
+
from dsp_tools.cli.args import ValidationSeverity
|
|
11
|
+
from dsp_tools.clients.authentication_client import AuthenticationClient
|
|
12
|
+
from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
|
|
13
|
+
from dsp_tools.clients.metadata_client import ExistingResourcesRetrieved
|
|
14
|
+
from dsp_tools.commands.validate_data.models.input_problems import OntologyValidationProblem
|
|
15
|
+
from dsp_tools.commands.validate_data.models.input_problems import SortedProblems
|
|
16
|
+
from dsp_tools.commands.validate_data.models.input_problems import UnknownClassesInData
|
|
17
|
+
from dsp_tools.commands.validate_data.models.input_problems import ValidateDataResult
|
|
18
|
+
from dsp_tools.commands.validate_data.models.validation import RDFGraphs
|
|
19
|
+
from dsp_tools.commands.validate_data.models.validation import ValidationReportGraphs
|
|
20
|
+
from dsp_tools.commands.validate_data.prepare_data.prepare_data import get_info_and_parsed_resources_from_file
|
|
21
|
+
from dsp_tools.commands.validate_data.prepare_data.prepare_data import prepare_data_for_validation_from_parsed_resource
|
|
22
|
+
from dsp_tools.commands.validate_data.process_validation_report.get_user_validation_message import get_user_message
|
|
23
|
+
from dsp_tools.commands.validate_data.process_validation_report.get_user_validation_message import sort_user_problems
|
|
24
|
+
from dsp_tools.commands.validate_data.process_validation_report.query_validation_result import reformat_validation_graph
|
|
25
|
+
from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
|
|
26
|
+
from dsp_tools.commands.validate_data.validation.check_duplicate_files import check_for_duplicate_files
|
|
27
|
+
from dsp_tools.commands.validate_data.validation.check_for_unknown_classes import check_for_unknown_resource_classes
|
|
28
|
+
from dsp_tools.commands.validate_data.validation.check_for_unknown_classes import get_msg_str_unknown_classes_in_data
|
|
29
|
+
from dsp_tools.commands.validate_data.validation.get_validation_report import get_validation_report
|
|
30
|
+
from dsp_tools.commands.validate_data.validation.validate_ontology import get_msg_str_ontology_validation_violation
|
|
31
|
+
from dsp_tools.commands.validate_data.validation.validate_ontology import validate_ontology
|
|
32
|
+
from dsp_tools.error.exceptions import BaseError
|
|
33
|
+
from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_CYAN
|
|
34
|
+
from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_GREEN
|
|
35
|
+
from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_RED
|
|
36
|
+
from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_YELLOW
|
|
37
|
+
from dsp_tools.utils.ansi_colors import BOLD_CYAN
|
|
38
|
+
from dsp_tools.utils.ansi_colors import BOLD_RED
|
|
39
|
+
from dsp_tools.utils.ansi_colors import BOLD_YELLOW
|
|
40
|
+
from dsp_tools.utils.ansi_colors import RESET_TO_DEFAULT
|
|
41
|
+
from dsp_tools.utils.data_formats.uri_util import is_prod_like_server
|
|
42
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
|
|
43
|
+
|
|
44
|
+
VALIDATION_ERRORS_FOUND_MSG = BACKGROUND_BOLD_RED + "\n Validation errors found! " + RESET_TO_DEFAULT
|
|
45
|
+
NO_VALIDATION_ERRORS_FOUND_MSG = BACKGROUND_BOLD_GREEN + "\n No validation errors found! " + RESET_TO_DEFAULT
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def validate_data(
|
|
49
|
+
filepath: Path,
|
|
50
|
+
creds: ServerCredentials,
|
|
51
|
+
ignore_duplicate_files_warning: bool,
|
|
52
|
+
save_graphs: bool,
|
|
53
|
+
skip_ontology_validation: bool,
|
|
54
|
+
id2iri_file: str | None,
|
|
55
|
+
do_not_request_resource_metadata_from_db: bool,
|
|
56
|
+
) -> bool:
|
|
57
|
+
"""
|
|
58
|
+
Takes a file and project information and validates it against the ontologies on the server.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
filepath: path to the xml data file
|
|
62
|
+
creds: server credentials for authentication
|
|
63
|
+
ignore_duplicate_files_warning: ignore the shape that checks for duplicate files
|
|
64
|
+
save_graphs: if this flag is set, all the graphs will be saved in a folder
|
|
65
|
+
skip_ontology_validation: skip the ontology validation
|
|
66
|
+
id2iri_file: to replace internal IDs of an XML file by IRIs provided in this mapping file
|
|
67
|
+
do_not_request_resource_metadata_from_db: true if no metadata for existing resources should be requested
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
True if no errors that impede an xmlupload were found.
|
|
71
|
+
Warnings and user info do not impede an xmlupload.
|
|
72
|
+
"""
|
|
73
|
+
graph_save_dir = None
|
|
74
|
+
|
|
75
|
+
if save_graphs:
|
|
76
|
+
graph_save_dir = _get_graph_save_dir(filepath)
|
|
77
|
+
config = ValidateDataConfig(
|
|
78
|
+
xml_file=filepath,
|
|
79
|
+
save_graph_dir=graph_save_dir,
|
|
80
|
+
severity=ValidationSeverity.INFO,
|
|
81
|
+
ignore_duplicate_files_warning=ignore_duplicate_files_warning,
|
|
82
|
+
is_on_prod_server=is_prod_like_server(creds.server),
|
|
83
|
+
skip_ontology_validation=skip_ontology_validation,
|
|
84
|
+
do_not_request_resource_metadata_from_db=do_not_request_resource_metadata_from_db,
|
|
85
|
+
)
|
|
86
|
+
auth = AuthenticationClientLive(server=creds.server, email=creds.user, password=creds.password)
|
|
87
|
+
|
|
88
|
+
parsed_resources, shortcode, authorship_lookup, permission_ids = get_info_and_parsed_resources_from_file(
|
|
89
|
+
file=filepath,
|
|
90
|
+
api_url=auth.server,
|
|
91
|
+
id2iri_file=id2iri_file,
|
|
92
|
+
)
|
|
93
|
+
return validate_parsed_resources(
|
|
94
|
+
parsed_resources=parsed_resources,
|
|
95
|
+
authorship_lookup=authorship_lookup,
|
|
96
|
+
permission_ids=permission_ids,
|
|
97
|
+
shortcode=shortcode,
|
|
98
|
+
config=config,
|
|
99
|
+
auth=auth,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def validate_parsed_resources(
|
|
104
|
+
parsed_resources: list[ParsedResource],
|
|
105
|
+
authorship_lookup: dict[str, list[str]],
|
|
106
|
+
permission_ids: list[str],
|
|
107
|
+
shortcode: str,
|
|
108
|
+
config: ValidateDataConfig,
|
|
109
|
+
auth: AuthenticationClient,
|
|
110
|
+
) -> bool:
|
|
111
|
+
msg = "Starting SHACL schema validation."
|
|
112
|
+
print(msg)
|
|
113
|
+
logger.debug(msg)
|
|
114
|
+
rdf_graphs, used_iris, existing_resources_retrieved = prepare_data_for_validation_from_parsed_resource(
|
|
115
|
+
parsed_resources=parsed_resources,
|
|
116
|
+
authorship_lookup=authorship_lookup,
|
|
117
|
+
permission_ids=permission_ids,
|
|
118
|
+
auth=auth,
|
|
119
|
+
shortcode=shortcode,
|
|
120
|
+
do_not_request_resource_metadata_from_db=config.do_not_request_resource_metadata_from_db,
|
|
121
|
+
)
|
|
122
|
+
validation_result = _validate_data(
|
|
123
|
+
rdf_graphs, used_iris, parsed_resources, config, shortcode, existing_resources_retrieved
|
|
124
|
+
)
|
|
125
|
+
if validation_result.no_problems:
|
|
126
|
+
logger.debug("No validation errors found.")
|
|
127
|
+
print(NO_VALIDATION_ERRORS_FOUND_MSG)
|
|
128
|
+
return True
|
|
129
|
+
if isinstance(validation_result.problems, UnknownClassesInData):
|
|
130
|
+
msg = get_msg_str_unknown_classes_in_data(validation_result.problems)
|
|
131
|
+
logger.error(msg)
|
|
132
|
+
print(VALIDATION_ERRORS_FOUND_MSG)
|
|
133
|
+
print(msg + "\n")
|
|
134
|
+
# if unknown classes are found, we cannot validate all the data in the file
|
|
135
|
+
return False
|
|
136
|
+
if isinstance(validation_result.problems, OntologyValidationProblem):
|
|
137
|
+
msg = get_msg_str_ontology_validation_violation(validation_result.problems)
|
|
138
|
+
logger.error(msg)
|
|
139
|
+
print(VALIDATION_ERRORS_FOUND_MSG)
|
|
140
|
+
print(msg + "\n")
|
|
141
|
+
# if the ontology itself has errors, we will not validate the data
|
|
142
|
+
return False
|
|
143
|
+
if isinstance(validation_result.problems, SortedProblems):
|
|
144
|
+
_print_shacl_validation_violation_message(validation_result.problems, validation_result.report_graphs, config)
|
|
145
|
+
return _get_validation_status(validation_result.problems, config.is_on_prod_server)
|
|
146
|
+
else:
|
|
147
|
+
raise BaseError(f"Unknown validate data problems: {validation_result.problems!s}")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _validate_data(
|
|
151
|
+
graphs: RDFGraphs,
|
|
152
|
+
used_iris: set[str],
|
|
153
|
+
parsed_resources: list[ParsedResource],
|
|
154
|
+
config: ValidateDataConfig,
|
|
155
|
+
shortcode: str,
|
|
156
|
+
existing_resources_retrieved: ExistingResourcesRetrieved,
|
|
157
|
+
) -> ValidateDataResult:
|
|
158
|
+
logger.debug(f"Validate-data called with the following config: {vars(config)}")
|
|
159
|
+
# Check if unknown classes are used
|
|
160
|
+
if unknown_classes := check_for_unknown_resource_classes(graphs, used_iris):
|
|
161
|
+
return ValidateDataResult(False, unknown_classes, None)
|
|
162
|
+
shacl_validator = ShaclCliValidator()
|
|
163
|
+
if not config.skip_ontology_validation:
|
|
164
|
+
# Validation of the ontology
|
|
165
|
+
onto_validation_result = validate_ontology(graphs.ontos, shacl_validator, config)
|
|
166
|
+
if onto_validation_result:
|
|
167
|
+
return ValidateDataResult(False, onto_validation_result, None)
|
|
168
|
+
# Validation of the data
|
|
169
|
+
duplicate_file_warnings = None
|
|
170
|
+
if not config.ignore_duplicate_files_warning:
|
|
171
|
+
duplicate_file_warnings = check_for_duplicate_files(parsed_resources)
|
|
172
|
+
report = get_validation_report(graphs, shacl_validator, config.save_graph_dir)
|
|
173
|
+
if report.conforms:
|
|
174
|
+
if not duplicate_file_warnings:
|
|
175
|
+
return ValidateDataResult(True, None, None)
|
|
176
|
+
else:
|
|
177
|
+
sorted_problems = SortedProblems(
|
|
178
|
+
unique_violations=[],
|
|
179
|
+
user_warnings=duplicate_file_warnings.problems,
|
|
180
|
+
user_info=[],
|
|
181
|
+
unexpected_shacl_validation_components=[],
|
|
182
|
+
)
|
|
183
|
+
return ValidateDataResult(False, sorted_problems, report)
|
|
184
|
+
reformatted = reformat_validation_graph(report)
|
|
185
|
+
sorted_problems = sort_user_problems(reformatted, duplicate_file_warnings, shortcode, existing_resources_retrieved)
|
|
186
|
+
return ValidateDataResult(False, sorted_problems, report)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _get_graph_save_dir(filepath: Path) -> Path:
|
|
190
|
+
parent_directory = filepath.parent
|
|
191
|
+
new_directory = parent_directory / "graphs"
|
|
192
|
+
new_directory.mkdir(exist_ok=True)
|
|
193
|
+
save_file_template = new_directory / filepath.stem
|
|
194
|
+
print(BOLD_CYAN + f"\n Saving graphs to {save_file_template} " + RESET_TO_DEFAULT)
|
|
195
|
+
return save_file_template
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _get_validation_status(all_problems: SortedProblems, is_on_prod: bool) -> bool:
|
|
199
|
+
violations = any(
|
|
200
|
+
[
|
|
201
|
+
bool(all_problems.unique_violations),
|
|
202
|
+
bool(all_problems.unexpected_shacl_validation_components),
|
|
203
|
+
]
|
|
204
|
+
)
|
|
205
|
+
if violations:
|
|
206
|
+
return False
|
|
207
|
+
if is_on_prod and all_problems.user_warnings:
|
|
208
|
+
return False
|
|
209
|
+
return True
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _print_shacl_validation_violation_message(
|
|
213
|
+
sorted_problems: SortedProblems, report: ValidationReportGraphs | None, config: ValidateDataConfig
|
|
214
|
+
) -> None:
|
|
215
|
+
messages = get_user_message(sorted_problems, config.severity)
|
|
216
|
+
if messages.violations:
|
|
217
|
+
print(VALIDATION_ERRORS_FOUND_MSG)
|
|
218
|
+
print(BOLD_RED, messages.violations.message_header, RESET_TO_DEFAULT)
|
|
219
|
+
v_body = messages.violations.message_body
|
|
220
|
+
if messages.violations.message_df is not None:
|
|
221
|
+
v_body = _save_message_df_get_message_body(messages.violations.message_df, "error", config.xml_file)
|
|
222
|
+
print(v_body)
|
|
223
|
+
logger.error(messages.violations.message_header, v_body)
|
|
224
|
+
else:
|
|
225
|
+
logger.debug("No validation errors found.")
|
|
226
|
+
print(NO_VALIDATION_ERRORS_FOUND_MSG)
|
|
227
|
+
if messages.warnings and config.severity.value <= 2:
|
|
228
|
+
print(BACKGROUND_BOLD_YELLOW + "\n Warning! " + RESET_TO_DEFAULT)
|
|
229
|
+
print(BOLD_YELLOW, messages.warnings.message_header, RESET_TO_DEFAULT)
|
|
230
|
+
w_body = messages.warnings.message_body
|
|
231
|
+
if messages.warnings.message_df is not None:
|
|
232
|
+
w_body = _save_message_df_get_message_body(messages.warnings.message_df, "warning", config.xml_file)
|
|
233
|
+
print(w_body)
|
|
234
|
+
logger.warning(messages.warnings.message_header, w_body)
|
|
235
|
+
if messages.infos and config.severity.value == 1:
|
|
236
|
+
print(BACKGROUND_BOLD_CYAN + "\n Potential Problems Found " + RESET_TO_DEFAULT)
|
|
237
|
+
print(BOLD_CYAN, messages.infos.message_header, RESET_TO_DEFAULT)
|
|
238
|
+
i_body = messages.infos.message_body
|
|
239
|
+
if messages.infos.message_df is not None:
|
|
240
|
+
i_body = _save_message_df_get_message_body(messages.infos.message_df, "info", config.xml_file)
|
|
241
|
+
print(i_body)
|
|
242
|
+
logger.info(messages.infos.message_header, i_body)
|
|
243
|
+
if messages.unexpected_violations:
|
|
244
|
+
logger.error(messages.unexpected_violations.message_header, messages.unexpected_violations.message_body)
|
|
245
|
+
print(
|
|
246
|
+
BACKGROUND_BOLD_RED,
|
|
247
|
+
"\n Unknown violations found! ",
|
|
248
|
+
RESET_TO_DEFAULT,
|
|
249
|
+
)
|
|
250
|
+
if config.save_graph_dir:
|
|
251
|
+
print(
|
|
252
|
+
BOLD_RED,
|
|
253
|
+
messages.unexpected_violations.message_header,
|
|
254
|
+
"Consult the saved graphs for details.",
|
|
255
|
+
RESET_TO_DEFAULT,
|
|
256
|
+
)
|
|
257
|
+
print(messages.unexpected_violations.message_body)
|
|
258
|
+
else:
|
|
259
|
+
report_graph = cast(ValidationReportGraphs, report)
|
|
260
|
+
_save_unexpected_results_and_inform_user(report_graph, config.xml_file)
|
|
261
|
+
print("\n")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _save_message_df_get_message_body(df: pd.DataFrame, severity: str, file_path: Path) -> str:
|
|
265
|
+
out_path = file_path.parent / f"{file_path.stem}_validation_{severity}.csv"
|
|
266
|
+
msg = f"Due to the large number of violations the information was saved at '{out_path}'"
|
|
267
|
+
df.to_csv(out_path, index=False)
|
|
268
|
+
return msg
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _save_unexpected_results_and_inform_user(report: ValidationReportGraphs, filepath: Path) -> None:
|
|
272
|
+
timestamp = f"{datetime.now()!s}_"
|
|
273
|
+
save_path = filepath.parent / f"{timestamp}_validation_result.ttl"
|
|
274
|
+
report.validation_graph.serialize(save_path)
|
|
275
|
+
shacl_p = filepath.parent / f"{timestamp}_shacl.ttl"
|
|
276
|
+
report.shacl_graph.serialize(shacl_p)
|
|
277
|
+
data_p = filepath.parent / f"{timestamp}_data.ttl"
|
|
278
|
+
report.data_graph.serialize(data_p)
|
|
279
|
+
msg = (
|
|
280
|
+
f"\nPlease contact the development team with the files starting with the timestamp '{timestamp}' "
|
|
281
|
+
f"in the directory '{filepath.parent}'."
|
|
282
|
+
)
|
|
283
|
+
print(BOLD_RED + msg + RESET_TO_DEFAULT)
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
from dsp_tools.commands.validate_data.models.input_problems import DuplicateFileWarning
|
|
6
|
+
from dsp_tools.commands.validate_data.models.input_problems import InputProblem
|
|
7
|
+
from dsp_tools.commands.validate_data.models.input_problems import ProblemType
|
|
8
|
+
from dsp_tools.commands.validate_data.models.input_problems import Severity
|
|
9
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_for_duplicate_files(parsed_resources: list[ParsedResource]) -> DuplicateFileWarning | None:
|
|
13
|
+
"""
|
|
14
|
+
Too many duplicate filepaths in the data may cause the SHACL validator to crash.
|
|
15
|
+
If one file is referenced n times, this produces n * (n-1) validation errors.
|
|
16
|
+
Therefore, this programmatic pre-validation prevents crashes during later validation steps.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
parsed_resources: Resources to check
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Results for the user and decisions how the program should continue
|
|
23
|
+
"""
|
|
24
|
+
count_dict = _get_filepaths_with_more_than_one_usage(parsed_resources)
|
|
25
|
+
if not count_dict:
|
|
26
|
+
return None
|
|
27
|
+
input_problems = _create_input_problems(count_dict)
|
|
28
|
+
return DuplicateFileWarning(input_problems)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_filepaths_with_more_than_one_usage(parsed_resources: list[ParsedResource]) -> dict[str, int]:
|
|
32
|
+
count_dict: dict[str, int] = defaultdict(int)
|
|
33
|
+
for res in parsed_resources:
|
|
34
|
+
if res.file_value and res.file_value.value:
|
|
35
|
+
count_dict[res.file_value.value] += 1
|
|
36
|
+
return {f_path: count for f_path, count in count_dict.items() if count > 1}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _create_input_problems(duplicates: dict[str, int]) -> list[InputProblem]:
|
|
40
|
+
all_duplicates = []
|
|
41
|
+
for dup_entry, usage_count in duplicates.items():
|
|
42
|
+
msg = f"value used {usage_count} times"
|
|
43
|
+
logger.warning(f"File '{dup_entry}' {msg}")
|
|
44
|
+
all_duplicates.append(
|
|
45
|
+
InputProblem(
|
|
46
|
+
problem_type=ProblemType.FILE_DUPLICATE,
|
|
47
|
+
res_id=None,
|
|
48
|
+
res_type=None,
|
|
49
|
+
prop_name="bitstream / iiif-uri",
|
|
50
|
+
severity=Severity.WARNING,
|
|
51
|
+
message=msg,
|
|
52
|
+
input_value=dup_entry,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
return all_duplicates
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from rdflib import Literal
|
|
2
|
+
from rdflib import URIRef
|
|
3
|
+
|
|
4
|
+
from dsp_tools.commands.validate_data.models.input_problems import UnknownClassesInData
|
|
5
|
+
from dsp_tools.commands.validate_data.models.validation import RDFGraphs
|
|
6
|
+
from dsp_tools.commands.validate_data.utils import reformat_onto_iri
|
|
7
|
+
from dsp_tools.utils.rdf_constants import KNORA_API_PREFIX
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def check_for_unknown_resource_classes(
|
|
11
|
+
rdf_graphs: RDFGraphs, used_resource_iris: set[str]
|
|
12
|
+
) -> UnknownClassesInData | None:
|
|
13
|
+
"""
|
|
14
|
+
Checks if any classes are referenced in the data that are not in the ontology.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
rdf_graphs: Data graphs
|
|
18
|
+
used_resource_iris: resource IRIs in use
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Unknown classes if any
|
|
22
|
+
"""
|
|
23
|
+
res_cls = _get_all_onto_classes(rdf_graphs)
|
|
24
|
+
if extra_cls := used_resource_iris - res_cls:
|
|
25
|
+
unknown_classes = {reformat_onto_iri(x) for x in extra_cls}
|
|
26
|
+
defined_classes = {reformat_onto_iri(x) for x in res_cls}
|
|
27
|
+
return UnknownClassesInData(unknown_classes=unknown_classes, defined_classes=defined_classes)
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_all_onto_classes(rdf_graphs: RDFGraphs) -> set[str]:
|
|
32
|
+
ontos = rdf_graphs.ontos + rdf_graphs.knora_api
|
|
33
|
+
is_resource_iri = URIRef(KNORA_API_PREFIX + "isResourceClass")
|
|
34
|
+
resource_classes = set(ontos.subjects(is_resource_iri, Literal(True)))
|
|
35
|
+
is_usable = URIRef(KNORA_API_PREFIX + "canBeInstantiated")
|
|
36
|
+
usable_resource_classes = set(ontos.subjects(is_usable, Literal(True)))
|
|
37
|
+
user_facing = usable_resource_classes.intersection(resource_classes)
|
|
38
|
+
return {str(x) for x in user_facing}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_msg_str_unknown_classes_in_data(unknown: UnknownClassesInData) -> str:
|
|
42
|
+
if unknown_onto_msg := _get_unknown_ontos_msg(unknown):
|
|
43
|
+
return unknown_onto_msg
|
|
44
|
+
unknown_classes = sorted(list(unknown.unknown_classes))
|
|
45
|
+
known_classes = sorted(list(unknown.defined_classes))
|
|
46
|
+
return (
|
|
47
|
+
f"Your data uses resource classes that do not exist in the ontologies in the database.\n"
|
|
48
|
+
f"The following classes that are used in the data are unknown: {', '.join(unknown_classes)}\n"
|
|
49
|
+
f"The following classes exist in the uploaded ontologies: {', '.join(known_classes)}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _get_unknown_ontos_msg(unknown: UnknownClassesInData) -> str | None:
|
|
54
|
+
def split_prefix(relative_iri: str) -> str | None:
|
|
55
|
+
if ":" not in relative_iri:
|
|
56
|
+
return None
|
|
57
|
+
return relative_iri.split(":")[0]
|
|
58
|
+
|
|
59
|
+
used_ontos = set(not_knora for x in unknown.unknown_classes if (not_knora := split_prefix(x)))
|
|
60
|
+
exising_ontos = set(not_knora for x in unknown.defined_classes if (not_knora := split_prefix(x)))
|
|
61
|
+
if unknown_found := used_ontos - exising_ontos:
|
|
62
|
+
return (
|
|
63
|
+
f"Your data uses ontologies that don't exist in the database.\n"
|
|
64
|
+
f"The following ontologies that are used in the data are unknown: {', '.join(unknown_found)}\n"
|
|
65
|
+
f"The following ontologies are uploaded: {', '.join(exising_ontos)}"
|
|
66
|
+
)
|
|
67
|
+
return None
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
from rdflib import Graph
|
|
5
|
+
|
|
6
|
+
from dsp_tools.commands.validate_data.constants import CARDINALITY_DATA_TTL
|
|
7
|
+
from dsp_tools.commands.validate_data.constants import CARDINALITY_REPORT_TTL
|
|
8
|
+
from dsp_tools.commands.validate_data.constants import CARDINALITY_SHACL_TTL
|
|
9
|
+
from dsp_tools.commands.validate_data.constants import CONTENT_DATA_TTL
|
|
10
|
+
from dsp_tools.commands.validate_data.constants import CONTENT_REPORT_TTL
|
|
11
|
+
from dsp_tools.commands.validate_data.constants import CONTENT_SHACL_TTL
|
|
12
|
+
from dsp_tools.commands.validate_data.models.validation import RDFGraphs
|
|
13
|
+
from dsp_tools.commands.validate_data.models.validation import ValidationFilePaths
|
|
14
|
+
from dsp_tools.commands.validate_data.models.validation import ValidationReportGraphs
|
|
15
|
+
from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
|
|
16
|
+
from dsp_tools.commands.validate_data.utils import clean_up_temp_directory
|
|
17
|
+
from dsp_tools.commands.validate_data.utils import get_temp_directory
|
|
18
|
+
from dsp_tools.error.exceptions import ShaclValidationError
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_validation_report(
|
|
22
|
+
rdf_graphs: RDFGraphs, shacl_validator: ShaclCliValidator, graph_save_dir: Path | None = None
|
|
23
|
+
) -> ValidationReportGraphs:
|
|
24
|
+
tmp_dir = get_temp_directory()
|
|
25
|
+
tmp_path = Path(tmp_dir.name)
|
|
26
|
+
dir_to_save_graphs = graph_save_dir
|
|
27
|
+
try:
|
|
28
|
+
result = _call_shacl_cli(rdf_graphs, shacl_validator, tmp_path)
|
|
29
|
+
return result
|
|
30
|
+
except Exception as e: # noqa: BLE001
|
|
31
|
+
logger.exception(e)
|
|
32
|
+
dir_to_save_graphs = tmp_path.parent / "validation-graphs"
|
|
33
|
+
msg = (
|
|
34
|
+
f"An error occurred during the data validation. "
|
|
35
|
+
f"Please contact the dsp-tools development team (at support@dasch.swiss) "
|
|
36
|
+
f"with your log files and the files in the directory: {dir_to_save_graphs}"
|
|
37
|
+
)
|
|
38
|
+
raise ShaclValidationError(msg) from None
|
|
39
|
+
finally:
|
|
40
|
+
clean_up_temp_directory(tmp_dir, dir_to_save_graphs)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _call_shacl_cli(
|
|
44
|
+
rdf_graphs: RDFGraphs, shacl_validator: ShaclCliValidator, tmp_path: Path
|
|
45
|
+
) -> ValidationReportGraphs:
|
|
46
|
+
_create_and_write_graphs(rdf_graphs, tmp_path)
|
|
47
|
+
results_graph = Graph()
|
|
48
|
+
conforms = True
|
|
49
|
+
card_files = ValidationFilePaths(
|
|
50
|
+
directory=tmp_path,
|
|
51
|
+
data_file=CARDINALITY_DATA_TTL,
|
|
52
|
+
shacl_file=CARDINALITY_SHACL_TTL,
|
|
53
|
+
report_file=CARDINALITY_REPORT_TTL,
|
|
54
|
+
)
|
|
55
|
+
card_result = shacl_validator.validate(card_files)
|
|
56
|
+
if not card_result.conforms:
|
|
57
|
+
results_graph += card_result.validation_graph
|
|
58
|
+
conforms = False
|
|
59
|
+
content_files = ValidationFilePaths(
|
|
60
|
+
directory=tmp_path,
|
|
61
|
+
data_file=CONTENT_DATA_TTL,
|
|
62
|
+
shacl_file=CONTENT_SHACL_TTL,
|
|
63
|
+
report_file=CONTENT_REPORT_TTL,
|
|
64
|
+
)
|
|
65
|
+
content_result = shacl_validator.validate(content_files)
|
|
66
|
+
if not content_result.conforms:
|
|
67
|
+
results_graph += content_result.validation_graph
|
|
68
|
+
conforms = False
|
|
69
|
+
return ValidationReportGraphs(
|
|
70
|
+
conforms=conforms,
|
|
71
|
+
validation_graph=results_graph,
|
|
72
|
+
shacl_graph=rdf_graphs.cardinality_shapes + rdf_graphs.content_shapes,
|
|
73
|
+
onto_graph=rdf_graphs.ontos + rdf_graphs.knora_api,
|
|
74
|
+
data_graph=rdf_graphs.data,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _create_and_write_graphs(rdf_graphs: RDFGraphs, tmp_path: Path) -> None:
|
|
79
|
+
logger.debug("Serialise RDF graphs into turtle strings")
|
|
80
|
+
data_str = rdf_graphs.data.serialize(format="ttl")
|
|
81
|
+
ontos_str = rdf_graphs.ontos.serialize(format="ttl")
|
|
82
|
+
card_shape_str = rdf_graphs.cardinality_shapes.serialize(format="ttl")
|
|
83
|
+
content_shape_str = rdf_graphs.content_shapes.serialize(format="ttl")
|
|
84
|
+
knora_api_str = rdf_graphs.knora_api.serialize(format="ttl")
|
|
85
|
+
res_in_db_str = rdf_graphs.resources_in_db_graph.serialize(format="ttl")
|
|
86
|
+
turtle_paths_and_graphs = [
|
|
87
|
+
(tmp_path / CARDINALITY_DATA_TTL, data_str),
|
|
88
|
+
(tmp_path / CARDINALITY_SHACL_TTL, card_shape_str + ontos_str + knora_api_str),
|
|
89
|
+
(tmp_path / CONTENT_DATA_TTL, data_str + ontos_str + knora_api_str + res_in_db_str),
|
|
90
|
+
(tmp_path / CONTENT_SHACL_TTL, content_shape_str + ontos_str + knora_api_str),
|
|
91
|
+
]
|
|
92
|
+
for f_path, content in turtle_paths_and_graphs:
|
|
93
|
+
with open(f_path, "w") as writer:
|
|
94
|
+
writer.write(content)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
from importlib.resources import as_file
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from rdflib import RDF
|
|
8
|
+
from rdflib import SH
|
|
9
|
+
from rdflib import Graph
|
|
10
|
+
|
|
11
|
+
from dsp_tools.cli.args import ValidateDataConfig
|
|
12
|
+
from dsp_tools.commands.validate_data.constants import ONTOLOGIES_DATA_TTL
|
|
13
|
+
from dsp_tools.commands.validate_data.constants import ONTOLOGIES_REPORT_TTL
|
|
14
|
+
from dsp_tools.commands.validate_data.constants import ONTOLOGIES_SHACL_TTL
|
|
15
|
+
from dsp_tools.commands.validate_data.models.input_problems import OntologyResourceProblem
|
|
16
|
+
from dsp_tools.commands.validate_data.models.input_problems import OntologyValidationProblem
|
|
17
|
+
from dsp_tools.commands.validate_data.models.validation import ValidationFilePaths
|
|
18
|
+
from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
|
|
19
|
+
from dsp_tools.commands.validate_data.utils import clean_up_temp_directory
|
|
20
|
+
from dsp_tools.commands.validate_data.utils import get_temp_directory
|
|
21
|
+
from dsp_tools.commands.validate_data.utils import reformat_onto_iri
|
|
22
|
+
from dsp_tools.error.exceptions import ShaclValidationError
|
|
23
|
+
from dsp_tools.utils.rdf_constants import SubjectObjectTypeAlias
|
|
24
|
+
|
|
25
|
+
LIST_SEPARATOR = "\n - "
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_ontology(
|
|
29
|
+
onto_graph: Graph, shacl_validator: ShaclCliValidator, config: ValidateDataConfig
|
|
30
|
+
) -> OntologyValidationProblem | None:
|
|
31
|
+
"""
|
|
32
|
+
The API accepts erroneous cardinalities in the ontology.
|
|
33
|
+
To distinguish a mistake in the data from the erroneous ontology, the ontology will be validated beforehand.
|
|
34
|
+
This way, we do not have to take an erroneous ontology into account when validating the data.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
onto_graph: the graph of the project ontologies
|
|
38
|
+
shacl_validator: SHACL CLI validator
|
|
39
|
+
config: The configuration where to save the information to
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A validation report if errors were found
|
|
43
|
+
"""
|
|
44
|
+
tmp_dir = get_temp_directory()
|
|
45
|
+
tmp_path = Path(tmp_dir.name)
|
|
46
|
+
save_graph_dir = config.save_graph_dir
|
|
47
|
+
try:
|
|
48
|
+
result = _get_ontology_validation_result(onto_graph, shacl_validator, tmp_path)
|
|
49
|
+
return result
|
|
50
|
+
except Exception as e: # noqa: BLE001
|
|
51
|
+
logger.exception(e)
|
|
52
|
+
save_graph_dir = tmp_path.parent / "validation-graphs"
|
|
53
|
+
msg = (
|
|
54
|
+
f"An error occurred during the ontology validation. "
|
|
55
|
+
f"Please contact the dsp-tools development team (at support@dasch.swiss) "
|
|
56
|
+
f"with your log files and the files in the directory: {save_graph_dir}"
|
|
57
|
+
)
|
|
58
|
+
raise ShaclValidationError(msg) from None
|
|
59
|
+
finally:
|
|
60
|
+
clean_up_temp_directory(tmp_dir, save_graph_dir)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_ontology_validation_result(
|
|
64
|
+
onto_graph: Graph, shacl_validator: ShaclCliValidator, tmp_path: Path
|
|
65
|
+
) -> OntologyValidationProblem | None:
|
|
66
|
+
with as_file(files("dsp_tools").joinpath("resources/validate_data/validate-ontology.ttl")) as shacl_file_path:
|
|
67
|
+
shacl_file = Path(shacl_file_path)
|
|
68
|
+
shutil.copy(shacl_file, tmp_path / ONTOLOGIES_SHACL_TTL)
|
|
69
|
+
onto_graph.serialize(tmp_path / ONTOLOGIES_DATA_TTL)
|
|
70
|
+
paths = ValidationFilePaths(
|
|
71
|
+
directory=tmp_path,
|
|
72
|
+
data_file=ONTOLOGIES_DATA_TTL,
|
|
73
|
+
shacl_file=ONTOLOGIES_SHACL_TTL,
|
|
74
|
+
report_file=ONTOLOGIES_REPORT_TTL,
|
|
75
|
+
)
|
|
76
|
+
validation_result = shacl_validator.validate(paths)
|
|
77
|
+
if validation_result.conforms:
|
|
78
|
+
return None
|
|
79
|
+
return OntologyValidationProblem(_reformat_ontology_validation_result(validation_result.validation_graph))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _reformat_ontology_validation_result(validation_result: Graph) -> list[OntologyResourceProblem]:
|
|
83
|
+
bns = validation_result.subjects(RDF.type, SH.ValidationResult)
|
|
84
|
+
return [_get_one_problem(validation_result, bn) for bn in bns]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _get_one_problem(val_g: Graph, result_bn: SubjectObjectTypeAlias) -> OntologyResourceProblem:
|
|
88
|
+
iri = next(val_g.objects(result_bn, SH.focusNode))
|
|
89
|
+
iri_str = reformat_onto_iri(iri)
|
|
90
|
+
msg = str(next(val_g.objects(result_bn, SH.resultMessage)))
|
|
91
|
+
splt = [x.strip() for x in msg.split("\n")]
|
|
92
|
+
return OntologyResourceProblem(iri_str, " ".join(splt))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_msg_str_ontology_validation_violation(onto_violations: OntologyValidationProblem) -> str:
|
|
96
|
+
probs = sorted(onto_violations.problems, key=lambda x: x.res_iri)
|
|
97
|
+
|
|
98
|
+
def get_resource_msg(res: OntologyResourceProblem) -> str:
|
|
99
|
+
return f"Resource Class: {res.res_iri} | Problem: {res.msg}"
|
|
100
|
+
|
|
101
|
+
problems = [get_resource_msg(x) for x in probs]
|
|
102
|
+
return (
|
|
103
|
+
"The ontology structure contains errors that prevent the validation of the data.\n"
|
|
104
|
+
"Please correct the following errors and re-upload the corrected ontology.\n"
|
|
105
|
+
f"Once those two steps are done, the command `validate-data` will find any problems in the data.\n"
|
|
106
|
+
f"{LIST_SEPARATOR}{LIST_SEPARATOR.join(problems)}"
|
|
107
|
+
)
|