dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +5 -0
- dsp_tools/cli/args.py +47 -0
- dsp_tools/cli/call_action.py +85 -0
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +479 -0
- dsp_tools/cli/entry_point.py +322 -0
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/clients/connection.py +35 -0
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +321 -0
- dsp_tools/commands/excel2json/lists/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
- dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
- dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
- dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
- dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
- dsp_tools/commands/excel2json/lists/utils.py +81 -0
- dsp_tools/commands/excel2json/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/models/input_error.py +416 -0
- dsp_tools/commands/excel2json/models/json_header.py +175 -0
- dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
- dsp_tools/commands/excel2json/models/ontology.py +76 -0
- dsp_tools/commands/excel2json/old_lists.py +328 -0
- dsp_tools/commands/excel2json/project.py +280 -0
- dsp_tools/commands/excel2json/properties.py +370 -0
- dsp_tools/commands/excel2json/resources.py +336 -0
- dsp_tools/commands/excel2json/utils.py +352 -0
- dsp_tools/commands/excel2xml/__init__.py +7 -0
- dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
- dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
- dsp_tools/commands/excel2xml/propertyelement.py +47 -0
- dsp_tools/commands/get/__init__.py +0 -0
- dsp_tools/commands/get/get.py +166 -0
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/get/legacy_models/__init__.py +0 -0
- dsp_tools/commands/get/legacy_models/context.py +318 -0
- dsp_tools/commands/get/legacy_models/group.py +241 -0
- dsp_tools/commands/get/legacy_models/helpers.py +47 -0
- dsp_tools/commands/get/legacy_models/listnode.py +390 -0
- dsp_tools/commands/get/legacy_models/model.py +12 -0
- dsp_tools/commands/get/legacy_models/ontology.py +324 -0
- dsp_tools/commands/get/legacy_models/project.py +366 -0
- dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
- dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
- dsp_tools/commands/get/legacy_models/user.py +438 -0
- dsp_tools/commands/get/models/__init__.py +0 -0
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +258 -0
- dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
- dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
- dsp_tools/commands/start_stack.py +428 -0
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/__init__.py +0 -0
- dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
- dsp_tools/commands/xmlupload/models/ingest.py +143 -0
- dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +45 -0
- dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
- dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
- dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
- dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
- dsp_tools/commands/xmlupload/upload_config.py +76 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
- dsp_tools/commands/xmlupload/xmlupload.py +516 -0
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/config/warnings_config.py +32 -0
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/legacy_models/datetimestamp.py +81 -0
- dsp_tools/legacy_models/langstring.py +253 -0
- dsp_tools/legacy_models/projectContext.py +49 -0
- dsp_tools/py.typed +0 -0
- dsp_tools/resources/schema/data.xsd +648 -0
- dsp_tools/resources/schema/lists-only.json +72 -0
- dsp_tools/resources/schema/project.json +1258 -0
- dsp_tools/resources/schema/properties-only.json +874 -0
- dsp_tools/resources/schema/resources-only.json +140 -0
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +88 -0
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/__init__.py +0 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/data_formats/date_util.py +166 -0
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/data_formats/shared.py +81 -0
- dsp_tools/utils/data_formats/uri_util.py +76 -0
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/__init__.py +0 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +348 -0
- dsp_tools/xmllib/value_checkers.py +434 -0
- dsp_tools/xmllib/value_converters.py +777 -0
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
- dsp_tools-0.9.13.dist-info/LICENSE +0 -674
- dsp_tools-0.9.13.dist-info/METADATA +0 -144
- dsp_tools-0.9.13.dist-info/RECORD +0 -71
- dsp_tools-0.9.13.dist-info/WHEEL +0 -5
- dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
- dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
- dsplib/models/connection.py +0 -272
- dsplib/models/group.py +0 -296
- dsplib/models/helpers.py +0 -505
- dsplib/models/langstring.py +0 -277
- dsplib/models/listnode.py +0 -578
- dsplib/models/model.py +0 -20
- dsplib/models/ontology.py +0 -448
- dsplib/models/permission.py +0 -112
- dsplib/models/project.py +0 -547
- dsplib/models/propertyclass.py +0 -505
- dsplib/models/resource.py +0 -366
- dsplib/models/resourceclass.py +0 -810
- dsplib/models/sipi.py +0 -30
- dsplib/models/user.py +0 -731
- dsplib/models/value.py +0 -1000
- dsplib/utils/knora-data-schema.xsd +0 -454
- dsplib/utils/knora-schema-lists.json +0 -83
- dsplib/utils/knora-schema.json +0 -434
- dsplib/utils/onto_commons.py +0 -24
- dsplib/utils/onto_create_lists.py +0 -73
- dsplib/utils/onto_create_ontology.py +0 -442
- dsplib/utils/onto_get.py +0 -58
- dsplib/utils/onto_validate.py +0 -33
- dsplib/utils/xml_upload.py +0 -539
- dsplib/widgets/doublepassword.py +0 -80
- knora/MLS-import-libraries.py +0 -84
- knora/dsp_tools.py +0 -96
- knora/dsplib/models/connection.py +0 -272
- knora/dsplib/models/group.py +0 -296
- knora/dsplib/models/helpers.py +0 -506
- knora/dsplib/models/langstring.py +0 -277
- knora/dsplib/models/listnode.py +0 -578
- knora/dsplib/models/model.py +0 -20
- knora/dsplib/models/ontology.py +0 -448
- knora/dsplib/models/permission.py +0 -112
- knora/dsplib/models/project.py +0 -583
- knora/dsplib/models/propertyclass.py +0 -505
- knora/dsplib/models/resource.py +0 -416
- knora/dsplib/models/resourceclass.py +0 -811
- knora/dsplib/models/sipi.py +0 -35
- knora/dsplib/models/user.py +0 -731
- knora/dsplib/models/value.py +0 -1000
- knora/dsplib/utils/knora-data-schema.xsd +0 -464
- knora/dsplib/utils/knora-schema-lists.json +0 -83
- knora/dsplib/utils/knora-schema.json +0 -444
- knora/dsplib/utils/onto_commons.py +0 -24
- knora/dsplib/utils/onto_create_lists.py +0 -73
- knora/dsplib/utils/onto_create_ontology.py +0 -451
- knora/dsplib/utils/onto_get.py +0 -58
- knora/dsplib/utils/onto_validate.py +0 -33
- knora/dsplib/utils/xml_upload.py +0 -540
- knora/dsplib/widgets/doublepassword.py +0 -80
- knora/knora.py +0 -2108
- knora/test.py +0 -99
- knora/testit.py +0 -76
- knora/xml2knora.py +0 -633
- {dsplib → dsp_tools/cli}/__init__.py +0 -0
- {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
- {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
- {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
- {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
- {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
- {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
- {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
- {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from dsp_tools.cli.args import ValidationSeverity
|
|
6
|
+
from dsp_tools.clients.metadata_client import ExistingResourcesRetrieved
|
|
7
|
+
from dsp_tools.commands.validate_data.models.input_problems import AllProblems
|
|
8
|
+
from dsp_tools.commands.validate_data.models.input_problems import DuplicateFileWarning
|
|
9
|
+
from dsp_tools.commands.validate_data.models.input_problems import InputProblem
|
|
10
|
+
from dsp_tools.commands.validate_data.models.input_problems import MessageComponents
|
|
11
|
+
from dsp_tools.commands.validate_data.models.input_problems import ProblemType
|
|
12
|
+
from dsp_tools.commands.validate_data.models.input_problems import Severity
|
|
13
|
+
from dsp_tools.commands.validate_data.models.input_problems import SortedProblems
|
|
14
|
+
from dsp_tools.commands.validate_data.models.input_problems import UserPrintMessages
|
|
15
|
+
|
|
16
|
+
LIST_SEPARATOR = "\n - "
|
|
17
|
+
GRAND_SEPARATOR = "\n\n----------------------------\n"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
PROBLEM_TYPES_IGNORE_STR_ENUM_INFO = {ProblemType.GENERIC, ProblemType.FILE_VALUE_MISSING, ProblemType.FILE_DUPLICATE}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sort_user_problems(
|
|
24
|
+
all_problems: AllProblems,
|
|
25
|
+
duplicate_file_warnings: DuplicateFileWarning | None,
|
|
26
|
+
shortcode: str,
|
|
27
|
+
existing_resources_retrieved: ExistingResourcesRetrieved,
|
|
28
|
+
) -> SortedProblems:
|
|
29
|
+
iris_removed, links_level_info = _separate_resource_links_to_iris_of_own_project(
|
|
30
|
+
all_problems.problems, shortcode, existing_resources_retrieved
|
|
31
|
+
)
|
|
32
|
+
filtered_problems = _filter_out_duplicate_problems(iris_removed)
|
|
33
|
+
violations, warnings, info = _separate_according_to_severity(filtered_problems)
|
|
34
|
+
if duplicate_file_warnings:
|
|
35
|
+
warnings.extend(duplicate_file_warnings.problems)
|
|
36
|
+
info.extend(links_level_info)
|
|
37
|
+
unique_unexpected = list(set(x.component_type for x in all_problems.unexpected_results or []))
|
|
38
|
+
return SortedProblems(
|
|
39
|
+
unique_violations=violations,
|
|
40
|
+
user_warnings=warnings,
|
|
41
|
+
user_info=info,
|
|
42
|
+
unexpected_shacl_validation_components=unique_unexpected,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _separate_according_to_severity(
|
|
47
|
+
problems: list[InputProblem],
|
|
48
|
+
) -> tuple[list[InputProblem], list[InputProblem], list[InputProblem]]:
|
|
49
|
+
violations = [x for x in problems if x.severity == Severity.VIOLATION]
|
|
50
|
+
warnings = [x for x in problems if x.severity == Severity.WARNING]
|
|
51
|
+
info = [x for x in problems if x.severity == Severity.INFO]
|
|
52
|
+
return violations, warnings, info
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _separate_resource_links_to_iris_of_own_project(
|
|
56
|
+
problems: list[InputProblem], shortcode: str, existing_resources_retrieved: ExistingResourcesRetrieved
|
|
57
|
+
) -> tuple[list[InputProblem], list[InputProblem]]:
|
|
58
|
+
link_level_info = []
|
|
59
|
+
all_others = []
|
|
60
|
+
for prblm in problems:
|
|
61
|
+
if prblm.problem_type != ProblemType.INEXISTENT_LINKED_RESOURCE:
|
|
62
|
+
all_others.append(prblm)
|
|
63
|
+
else:
|
|
64
|
+
is_violation, triaged_problem = _determined_link_value_message_and_level(
|
|
65
|
+
prblm, shortcode, existing_resources_retrieved
|
|
66
|
+
)
|
|
67
|
+
if is_violation:
|
|
68
|
+
all_others.append(triaged_problem)
|
|
69
|
+
else:
|
|
70
|
+
link_level_info.append(triaged_problem)
|
|
71
|
+
return all_others, link_level_info
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _determined_link_value_message_and_level(
|
|
75
|
+
problem: InputProblem, shortcode: str, existing_resources_retrieved: ExistingResourcesRetrieved
|
|
76
|
+
) -> tuple[bool, InputProblem]:
|
|
77
|
+
is_violation = True
|
|
78
|
+
resource_iri_start = "http://rdfh.ch/"
|
|
79
|
+
project_resource_iri = f"{resource_iri_start}{shortcode}/"
|
|
80
|
+
if not problem.input_value:
|
|
81
|
+
return is_violation, problem
|
|
82
|
+
if problem.input_value.startswith(project_resource_iri):
|
|
83
|
+
# case IRI and matches those of the projects itself
|
|
84
|
+
if existing_resources_retrieved == ExistingResourcesRetrieved.TRUE:
|
|
85
|
+
# if metadata was sucessfully retrieved, then the IRI is wrong
|
|
86
|
+
problem.problem_type = ProblemType.LINK_TARGET_NOT_FOUND_IN_DB
|
|
87
|
+
problem.message = (
|
|
88
|
+
"You used an absolute IRI to reference an existing resource in the DB. "
|
|
89
|
+
"We could not find a reference to this resource in the database."
|
|
90
|
+
)
|
|
91
|
+
return is_violation, problem
|
|
92
|
+
# if we could not retrieve the metadata, then we cannot verify if it exists or not, so it is only an info
|
|
93
|
+
problem.problem_type = ProblemType.LINK_TARGET_IS_IRI_OF_PROJECT
|
|
94
|
+
problem.message = (
|
|
95
|
+
"You used an absolute IRI to reference an existing resource in the DB. "
|
|
96
|
+
"If this resource does not exist or is not of the correct type, an xmlupload will fail."
|
|
97
|
+
)
|
|
98
|
+
return not is_violation, problem
|
|
99
|
+
if problem.input_value.startswith(resource_iri_start):
|
|
100
|
+
# case IRI, but does not contain the shortcode of the project
|
|
101
|
+
problem.message = (
|
|
102
|
+
"You used an absolute IRI to reference an existing resource of another project in the DB. "
|
|
103
|
+
"Cross-Project resource links are not permitted."
|
|
104
|
+
)
|
|
105
|
+
problem.problem_type = ProblemType.LINK_TARGET_OF_ANOTHER_PROJECT
|
|
106
|
+
return is_violation, problem
|
|
107
|
+
# all other cases, it is not an IRI and must be an internal ID that does not exist in the XML
|
|
108
|
+
return is_violation, problem
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _filter_out_duplicate_problems(problems: list[InputProblem]) -> list[InputProblem]:
|
|
112
|
+
grouped, without_res_id = _group_problems_by_resource(problems)
|
|
113
|
+
filtered = without_res_id
|
|
114
|
+
for problems_per_resource in grouped.values():
|
|
115
|
+
text_value_filtered = _filter_out_duplicate_text_value_problem(problems_per_resource)
|
|
116
|
+
file_value_corrected = _filter_out_duplicate_wrong_file_type_problems(text_value_filtered)
|
|
117
|
+
filtered.extend(file_value_corrected)
|
|
118
|
+
return filtered
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _filter_out_duplicate_text_value_problem(problems: list[InputProblem]) -> list[InputProblem]:
|
|
122
|
+
filtered_problems = [x for x in problems if x.problem_type != ProblemType.VALUE_TYPE_MISMATCH]
|
|
123
|
+
type_problems = [x for x in problems if x.problem_type == ProblemType.VALUE_TYPE_MISMATCH]
|
|
124
|
+
|
|
125
|
+
grouped_dict = defaultdict(list)
|
|
126
|
+
for prob in type_problems:
|
|
127
|
+
grouped_dict[prob.prop_name].append(prob)
|
|
128
|
+
|
|
129
|
+
for problem_list in grouped_dict.values():
|
|
130
|
+
messages = [x.expected for x in problem_list]
|
|
131
|
+
# Is there a chance of a duplicate (only possible for TextValue)?
|
|
132
|
+
if "This property requires a TextValue" not in messages:
|
|
133
|
+
filtered_problems.extend(problem_list)
|
|
134
|
+
continue
|
|
135
|
+
# Is there a more precise message about the type of TextValue?
|
|
136
|
+
if "TextValue without formatting" not in messages and "TextValue with formatting" not in messages:
|
|
137
|
+
# If there is not a more precise message, then the generic one is communicated to the user
|
|
138
|
+
filtered_problems.extend(problem_list)
|
|
139
|
+
continue
|
|
140
|
+
# We remove the generic message and leave the specific one
|
|
141
|
+
inx = messages.index("This property requires a TextValue")
|
|
142
|
+
problem_list.pop(inx)
|
|
143
|
+
filtered_problems.extend(problem_list)
|
|
144
|
+
|
|
145
|
+
return filtered_problems
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _filter_out_duplicate_wrong_file_type_problems(problems: list[InputProblem]) -> list[InputProblem]:
|
|
149
|
+
# If a class is for example, an AudioRepresentation, but a jpg file is used,
|
|
150
|
+
# the created value is of type StillImageFileValue.
|
|
151
|
+
# This creates a min cardinality (because the audio file is missing)
|
|
152
|
+
# and a closed constraint violation (because it is not permissible to add an image)
|
|
153
|
+
# However, we only want to give one message to the user
|
|
154
|
+
idx_missing = next((i for i, x in enumerate(problems) if x.problem_type == ProblemType.FILE_VALUE_MISSING), None)
|
|
155
|
+
idx_prohibited = next(
|
|
156
|
+
(i for i, x in enumerate(problems) if x.problem_type == ProblemType.FILE_VALUE_PROHIBITED), None
|
|
157
|
+
)
|
|
158
|
+
if idx_missing is None or idx_prohibited is None:
|
|
159
|
+
return problems
|
|
160
|
+
missing_problem = problems[idx_missing]
|
|
161
|
+
prohibited_problem = problems[idx_prohibited]
|
|
162
|
+
# The result of the closed constraint violation, contains the input value,
|
|
163
|
+
# while the message of the other shape is better, we want to include the actual input value.
|
|
164
|
+
missing_problem.input_value = prohibited_problem.input_value
|
|
165
|
+
return [problem for i, problem in enumerate(problems) if i not in {idx_missing, idx_prohibited}] + [missing_problem]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _group_problems_by_resource(
|
|
169
|
+
problems: list[InputProblem],
|
|
170
|
+
) -> tuple[dict[str, list[InputProblem]], list[InputProblem]]:
|
|
171
|
+
grouped_res = defaultdict(list)
|
|
172
|
+
problem_no_res_id = []
|
|
173
|
+
for prob in problems:
|
|
174
|
+
if not prob.res_id:
|
|
175
|
+
problem_no_res_id.append(prob)
|
|
176
|
+
else:
|
|
177
|
+
grouped_res[prob.res_id].append(prob)
|
|
178
|
+
return grouped_res, problem_no_res_id
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def get_user_message(sorted_problems: SortedProblems, severity: ValidationSeverity) -> UserPrintMessages:
|
|
182
|
+
"""
|
|
183
|
+
Creates the string to communicate the user message.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
sorted_problems: validation problems
|
|
187
|
+
severity: Severity level of validation information
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Problem message
|
|
191
|
+
"""
|
|
192
|
+
violation_message, warning_message, info_message, unexpected_violations = None, None, None, None
|
|
193
|
+
too_many_to_print = _are_there_too_many_to_print(sorted_problems, severity)
|
|
194
|
+
if sorted_problems.unique_violations:
|
|
195
|
+
if too_many_to_print:
|
|
196
|
+
violation_body = None
|
|
197
|
+
violation_df = _get_message_df(sorted_problems.unique_violations)
|
|
198
|
+
else:
|
|
199
|
+
violation_body = _get_problem_print_message(sorted_problems.unique_violations)
|
|
200
|
+
violation_df = None
|
|
201
|
+
violation_header = (
|
|
202
|
+
f"During the validation of the data {len(sorted_problems.unique_violations)} errors were found. "
|
|
203
|
+
f"Until they are resolved an xmlupload is not possible."
|
|
204
|
+
)
|
|
205
|
+
violation_message = MessageComponents(violation_header, violation_body, violation_df)
|
|
206
|
+
if sorted_problems.user_warnings:
|
|
207
|
+
if too_many_to_print:
|
|
208
|
+
warning_body = None
|
|
209
|
+
warning_df = _get_message_df(sorted_problems.user_warnings)
|
|
210
|
+
else:
|
|
211
|
+
warning_body = _get_problem_print_message(sorted_problems.user_warnings)
|
|
212
|
+
warning_df = None
|
|
213
|
+
warning_header = (
|
|
214
|
+
f"During the validation of the data {len(sorted_problems.user_warnings)} "
|
|
215
|
+
f"problems were found. Warnings are allowed on test servers. "
|
|
216
|
+
f"Please note that an xmlupload on a prod sever will fail."
|
|
217
|
+
)
|
|
218
|
+
warning_message = MessageComponents(warning_header, warning_body, warning_df)
|
|
219
|
+
if sorted_problems.user_info:
|
|
220
|
+
if too_many_to_print:
|
|
221
|
+
info_body = None
|
|
222
|
+
info_df = _get_message_df(sorted_problems.user_info)
|
|
223
|
+
else:
|
|
224
|
+
info_body = _get_problem_print_message(sorted_problems.user_info)
|
|
225
|
+
info_df = None
|
|
226
|
+
info_header = (
|
|
227
|
+
f"During the validation of the data {len(sorted_problems.user_info)} "
|
|
228
|
+
f"potential problems were found. They will not impede an xmlupload."
|
|
229
|
+
)
|
|
230
|
+
info_message = MessageComponents(info_header, info_body, info_df)
|
|
231
|
+
if sorted_problems.unexpected_shacl_validation_components:
|
|
232
|
+
unexpected_header = "The following unknown violation types were found!"
|
|
233
|
+
unexpected_body = LIST_SEPARATOR + LIST_SEPARATOR.join(sorted_problems.unexpected_shacl_validation_components)
|
|
234
|
+
unexpected_violations = MessageComponents(unexpected_header, unexpected_body, None)
|
|
235
|
+
return UserPrintMessages(violation_message, warning_message, info_message, unexpected_violations)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _are_there_too_many_to_print(sorted_problems: SortedProblems, severity: ValidationSeverity) -> bool:
|
|
239
|
+
number_of_problems = len(sorted_problems.unique_violations)
|
|
240
|
+
if severity.value <= ValidationSeverity.WARNING.value:
|
|
241
|
+
number_of_problems += len(sorted_problems.user_warnings)
|
|
242
|
+
if severity.value == ValidationSeverity.INFO.value:
|
|
243
|
+
number_of_problems += len(sorted_problems.user_info)
|
|
244
|
+
return bool(number_of_problems > 60)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _get_problem_print_message(problems: list[InputProblem]) -> str:
|
|
248
|
+
grouped, without_res_id = _group_problems_by_resource(problems)
|
|
249
|
+
messages = [_get_message_for_one_resource(without_res_id)] if without_res_id else []
|
|
250
|
+
messages_with_ids = [
|
|
251
|
+
_get_message_for_one_resource(v) for v in sorted(grouped.values(), key=lambda x: str(x[0].res_id))
|
|
252
|
+
]
|
|
253
|
+
messages.extend(messages_with_ids)
|
|
254
|
+
return GRAND_SEPARATOR.join(messages)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _get_message_for_one_resource(problems: list[InputProblem]) -> str:
|
|
258
|
+
if problems[0].res_id:
|
|
259
|
+
start_msg = f"Resource ID: {problems[0].res_id} | Resource Type: {problems[0].res_type}"
|
|
260
|
+
else:
|
|
261
|
+
start_msg = ""
|
|
262
|
+
prop_messages = _get_message_with_properties(problems)
|
|
263
|
+
return f"{start_msg}\n{prop_messages}"
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _get_message_with_properties(problems: list[InputProblem]) -> str:
|
|
267
|
+
messages = defaultdict(list)
|
|
268
|
+
for prob in problems:
|
|
269
|
+
messages[prob.prop_name].append(_get_message_detail_str(prob))
|
|
270
|
+
|
|
271
|
+
def format_msg(propname: str, msg: list[str]) -> str:
|
|
272
|
+
return f"{propname}{LIST_SEPARATOR}{LIST_SEPARATOR.join(msg)}"
|
|
273
|
+
|
|
274
|
+
return "\n".join([format_msg(k, v) for k, v in messages.items()])
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _get_message_detail_str(problem: InputProblem) -> str:
|
|
278
|
+
msg = []
|
|
279
|
+
if problem.message:
|
|
280
|
+
msg.append(problem.message)
|
|
281
|
+
if problem.problem_type not in PROBLEM_TYPES_IGNORE_STR_ENUM_INFO:
|
|
282
|
+
msg.append(str(problem.problem_type))
|
|
283
|
+
if problem.input_value:
|
|
284
|
+
msg.append(f"Your input: '{_shorten_input(problem.input_value, problem.problem_type)}'")
|
|
285
|
+
if problem.input_type:
|
|
286
|
+
msg.append(f"Actual input type: '{problem.input_type}'")
|
|
287
|
+
if problem.expected:
|
|
288
|
+
msg.append(f"Expected{_get_expected_prefix(problem.problem_type)}: {problem.expected}")
|
|
289
|
+
return " | ".join(msg)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _get_expected_prefix(problem_type: ProblemType) -> str | None:
|
|
293
|
+
match problem_type:
|
|
294
|
+
case ProblemType.VALUE_TYPE_MISMATCH:
|
|
295
|
+
return " Value Type"
|
|
296
|
+
case ProblemType.INPUT_REGEX:
|
|
297
|
+
return " Input Format"
|
|
298
|
+
case ProblemType.LINK_TARGET_TYPE_MISMATCH:
|
|
299
|
+
return " Resource Type"
|
|
300
|
+
case _:
|
|
301
|
+
return ""
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _get_message_df(problems: list[InputProblem]) -> pd.DataFrame:
|
|
305
|
+
problem_dicts = [_get_message_dict(x) for x in problems]
|
|
306
|
+
df = pd.DataFrame.from_records(problem_dicts)
|
|
307
|
+
sort_by = [x for x in ["Resource Type", "Resource ID", "Property"] if x in df.columns]
|
|
308
|
+
df = df.sort_values(by=sort_by)
|
|
309
|
+
return df
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _get_message_dict(problem: InputProblem) -> dict[str, str]:
|
|
313
|
+
msg_dict = {
|
|
314
|
+
"Resource ID": problem.res_id,
|
|
315
|
+
"Resource Type": problem.res_type,
|
|
316
|
+
"Property": problem.prop_name,
|
|
317
|
+
"Your Input": _shorten_input(problem.input_value, problem.problem_type),
|
|
318
|
+
"Input Type": problem.input_type,
|
|
319
|
+
}
|
|
320
|
+
non_empty_dict = {k: v for k, v in msg_dict.items() if v}
|
|
321
|
+
expected_and_message = _get_expected_message_dict(problem)
|
|
322
|
+
non_empty_dict.update(expected_and_message)
|
|
323
|
+
if problem.problem_type not in PROBLEM_TYPES_IGNORE_STR_ENUM_INFO:
|
|
324
|
+
non_empty_dict["Problem"] = str(problem.problem_type)
|
|
325
|
+
return non_empty_dict
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _get_expected_message_dict(problem: InputProblem) -> dict[str, str]:
|
|
329
|
+
out_dict = {}
|
|
330
|
+
if problem.expected:
|
|
331
|
+
msg_str = problem.expected
|
|
332
|
+
if prefix := _get_expected_prefix(problem.problem_type):
|
|
333
|
+
msg_str = f"{prefix.strip()}: {msg_str}"
|
|
334
|
+
out_dict["Expected"] = msg_str
|
|
335
|
+
if problem.message:
|
|
336
|
+
if problem.expected:
|
|
337
|
+
out_dict["Message"] = problem.message
|
|
338
|
+
else:
|
|
339
|
+
out_dict["Expected"] = problem.message
|
|
340
|
+
return out_dict
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _shorten_input(user_input: str | None, problem_type: ProblemType) -> str | None:
|
|
344
|
+
if problem_type in [
|
|
345
|
+
ProblemType.FILE_DUPLICATE,
|
|
346
|
+
ProblemType.FILE_VALUE_MISSING,
|
|
347
|
+
ProblemType.FILE_VALUE_PROHIBITED,
|
|
348
|
+
ProblemType.LINK_TARGET_TYPE_MISMATCH,
|
|
349
|
+
ProblemType.INEXISTENT_LINKED_RESOURCE,
|
|
350
|
+
]:
|
|
351
|
+
return user_input
|
|
352
|
+
if not user_input:
|
|
353
|
+
return None
|
|
354
|
+
if user_input.startswith(("http://rdfh.ch/", " / http://rdfh.ch/lists/")):
|
|
355
|
+
return user_input
|
|
356
|
+
if len(user_input) < 51:
|
|
357
|
+
return user_input
|
|
358
|
+
return f"{user_input[:50]}[...]"
|