dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
File without changes
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import regex
|
|
7
|
+
|
|
8
|
+
from dsp_tools.error.xmllib_warnings import MessageInfo
|
|
9
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_info
|
|
10
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_warning
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def is_nonempty_value_internal(value: Any) -> bool:
|
|
14
|
+
"""
|
|
15
|
+
This is a duplicate of value_checkers.is_nonempty_value(), to avoid circular imports.
|
|
16
|
+
|
|
17
|
+
Check if a value is not None-like
|
|
18
|
+
or that its string representation contains at least one of the following characters:
|
|
19
|
+
|
|
20
|
+
- ``\\p{S}`` = symbols and special characters
|
|
21
|
+
- ``\\p{P}`` = punctuation
|
|
22
|
+
- ``\\w`` = all Unicode letters, numbers, and _
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
value: value of any type
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
True if the value is not None-like and contains at least one of the above-mentioned characters
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(value, (tuple, list, set)):
|
|
31
|
+
all_vals = [is_nonempty_value_internal(v) for v in value]
|
|
32
|
+
return all(all_vals)
|
|
33
|
+
if isinstance(value, dict):
|
|
34
|
+
all_vals = []
|
|
35
|
+
for k, v in value.items():
|
|
36
|
+
all_vals.append(is_nonempty_value_internal(k))
|
|
37
|
+
all_vals.append(is_nonempty_value_internal(v))
|
|
38
|
+
return all(all_vals)
|
|
39
|
+
if pd.isna(value):
|
|
40
|
+
return False
|
|
41
|
+
if regex.search(r"[\p{S}\p{P}\w]", str(value), flags=regex.UNICODE):
|
|
42
|
+
return True
|
|
43
|
+
return False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def is_date_internal(value: Any) -> bool:
|
|
47
|
+
"""
|
|
48
|
+
Is it a valid dsp-date.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
value: value to check
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
True if it conforms
|
|
55
|
+
"""
|
|
56
|
+
calendar_optional = r"(?:(GREGORIAN|JULIAN|ISLAMIC):)?"
|
|
57
|
+
first_era_optional = r"(?:(CE|BCE|BC|AD):)?"
|
|
58
|
+
second_area_optional = r"(?::(CE|BCE|BC|AD))?"
|
|
59
|
+
date = r"\d{1,4}(?:-\d{1,2}){0,2}"
|
|
60
|
+
date_mandatory = rf"({date})"
|
|
61
|
+
date_optional = rf"(:{date})?"
|
|
62
|
+
full_date_pattern = (
|
|
63
|
+
rf"^{calendar_optional}{first_era_optional}{date_mandatory}{second_area_optional}{date_optional}$"
|
|
64
|
+
)
|
|
65
|
+
found = regex.search(full_date_pattern, str(value))
|
|
66
|
+
if not found:
|
|
67
|
+
return False
|
|
68
|
+
if found.group(1) == "ISLAMIC" and (found.group(2) or found.group(4)):
|
|
69
|
+
# eras are not supported yet for the islamic calendar
|
|
70
|
+
return False
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def check_and_warn_potentially_empty_string(
|
|
75
|
+
*, value: Any, res_id: str | None, expected: str, prop_name: str | None = None, field: str | None = None
|
|
76
|
+
) -> None:
|
|
77
|
+
"""
|
|
78
|
+
If a user str() casts an input before using it in the xmllib we may get `None` values that are not recognised
|
|
79
|
+
as potentially empty.
|
|
80
|
+
This is to be used if the potentially erroneous input is not detectable through any other check.
|
|
81
|
+
For example a color or date value has its specific regex and it is not possible for such a value to slip through.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
value: user input
|
|
85
|
+
res_id: Resource ID
|
|
86
|
+
expected: the type of value that is expected
|
|
87
|
+
prop_name: property name if used to check a property
|
|
88
|
+
field: if used to check a non-property field, for example a comment on a value
|
|
89
|
+
|
|
90
|
+
Warnings:
|
|
91
|
+
XmllibInputWarning: if it is an empty value or a string only with whitespaces
|
|
92
|
+
XmllibInputInfo: if it is a string containing a string value
|
|
93
|
+
that may be the result of str() casting an empty value
|
|
94
|
+
"""
|
|
95
|
+
if not is_nonempty_value_internal(value):
|
|
96
|
+
msg_info = MessageInfo(
|
|
97
|
+
message=f"Your input '{value}' is empty. Please enter a valid {expected}.",
|
|
98
|
+
resource_id=res_id,
|
|
99
|
+
prop_name=prop_name,
|
|
100
|
+
field=field,
|
|
101
|
+
)
|
|
102
|
+
emit_xmllib_input_warning(msg_info)
|
|
103
|
+
else:
|
|
104
|
+
check_and_warn_if_a_string_contains_a_potentially_empty_value(
|
|
105
|
+
value=value, res_id=res_id, prop_name=prop_name, field=field
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def check_and_warn_if_a_string_contains_a_potentially_empty_value(
|
|
110
|
+
*, value: Any, res_id: str | None, prop_name: str | None = None, field: str | None = None
|
|
111
|
+
) -> None:
|
|
112
|
+
"""
|
|
113
|
+
If a user str() casts an input before using it in the xmllib we may get `None` values that are not recognised
|
|
114
|
+
as potentially empty.
|
|
115
|
+
This is to be used if the potentially erroneous input is not detectable through any other check.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
value: user input
|
|
119
|
+
res_id: Resource ID
|
|
120
|
+
prop_name: property name if used to check a property
|
|
121
|
+
field: if used to check a non-property field, for example a comment on a value
|
|
122
|
+
|
|
123
|
+
Warnings:
|
|
124
|
+
XmllibInputInfo: if it is a string containing a string value
|
|
125
|
+
that may be the result of str() casting an empty value
|
|
126
|
+
"""
|
|
127
|
+
empty_val_string = r"^(<NA>|nan|None)$"
|
|
128
|
+
if bool(regex.search(empty_val_string, str(value))):
|
|
129
|
+
type_lookup = {"<NA>": "pd.NA", "nan": "np.nan", "None": "None"}
|
|
130
|
+
msg = (
|
|
131
|
+
f"Your input '{value}' is a string but may be the result of `str({type_lookup[str(value)]})`. "
|
|
132
|
+
f"Please verify that the input is as expected."
|
|
133
|
+
)
|
|
134
|
+
msg_info = MessageInfo(
|
|
135
|
+
message=msg,
|
|
136
|
+
resource_id=res_id,
|
|
137
|
+
prop_name=prop_name,
|
|
138
|
+
field=field,
|
|
139
|
+
)
|
|
140
|
+
emit_xmllib_input_info(msg_info)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def check_and_inform_about_angular_brackets(value: Any, res_id: str | None, prop_name: str | None = None) -> None:
|
|
144
|
+
"""
|
|
145
|
+
Checks if a string value contains angular brackets.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
value: String value
|
|
149
|
+
res_id: resource id
|
|
150
|
+
prop_name: property name
|
|
151
|
+
"""
|
|
152
|
+
if bool(regex.search(r'<([a-zA-Z/"]+|[^\s0-9].*[^\s0-9])>', str(value))):
|
|
153
|
+
msg_info = MessageInfo(
|
|
154
|
+
message=(
|
|
155
|
+
f"Your input '{value}' contains angular brackets. "
|
|
156
|
+
f"Since this is a simpletext, please note that these will not be recognised as formatting "
|
|
157
|
+
f"in the text field, but will be displayed as-is."
|
|
158
|
+
),
|
|
159
|
+
resource_id=res_id,
|
|
160
|
+
prop_name=prop_name,
|
|
161
|
+
)
|
|
162
|
+
emit_xmllib_input_info(msg_info)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from lxml import etree
|
|
4
|
+
|
|
5
|
+
from dsp_tools.error.xmllib_warnings import MessageInfo
|
|
6
|
+
from dsp_tools.xmllib.general_functions import escape_reserved_xml_characters
|
|
7
|
+
from dsp_tools.xmllib.internal.input_converters import numeric_entities
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_richtext_as_xml(
|
|
11
|
+
input_str: str, resource_id: str | None = None, prop_name: str | None = None
|
|
12
|
+
) -> etree._Element | MessageInfo:
|
|
13
|
+
"""
|
|
14
|
+
Parses an input string as XML. If it contains a syntax error a MessageInfo is returned.
|
|
15
|
+
Else it returns the XML as etree
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
input_str: Richtext string
|
|
19
|
+
resource_id: ID of the resource for improved error message
|
|
20
|
+
prop_name: Property name for improved error message
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Parsed string or information for the user message.
|
|
24
|
+
"""
|
|
25
|
+
escaped_text = escape_reserved_xml_characters(input_str)
|
|
26
|
+
num_ent = numeric_entities(escaped_text)
|
|
27
|
+
pseudo_xml = f"<ignore-this>{num_ent}</ignore-this>"
|
|
28
|
+
try:
|
|
29
|
+
return etree.fromstring(pseudo_xml)
|
|
30
|
+
except etree.XMLSyntaxError as err:
|
|
31
|
+
msg_str = (
|
|
32
|
+
f"The entered richtext value could not be converted to a valid XML.\n"
|
|
33
|
+
f"Original error message: {err.msg}\n"
|
|
34
|
+
f"Potential line/column numbers are relative to this text: {pseudo_xml}"
|
|
35
|
+
)
|
|
36
|
+
return MessageInfo(resource_id=resource_id, prop_name=prop_name, message=msg_str)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# The accepted XML tags are defined at https://docs.dasch.swiss/latest/DSP-API/03-endpoints/api-v2/text/standard-standoff/
|
|
2
|
+
_COMMON_BASE = [
|
|
3
|
+
"p",
|
|
4
|
+
"em",
|
|
5
|
+
"strong",
|
|
6
|
+
"u",
|
|
7
|
+
"sub",
|
|
8
|
+
"sup",
|
|
9
|
+
"strike",
|
|
10
|
+
"h1",
|
|
11
|
+
"h2",
|
|
12
|
+
"h3",
|
|
13
|
+
"h4",
|
|
14
|
+
"h5",
|
|
15
|
+
"h6",
|
|
16
|
+
"ol",
|
|
17
|
+
"ul",
|
|
18
|
+
"li",
|
|
19
|
+
"tbody",
|
|
20
|
+
"table",
|
|
21
|
+
"tr",
|
|
22
|
+
"td",
|
|
23
|
+
"br",
|
|
24
|
+
"hr",
|
|
25
|
+
"pre",
|
|
26
|
+
"cite",
|
|
27
|
+
"blockquote",
|
|
28
|
+
"code",
|
|
29
|
+
]
|
|
30
|
+
KNOWN_XML_TAG_REGEXES = [
|
|
31
|
+
*_COMMON_BASE,
|
|
32
|
+
"a( [^>]+?)?", # <a> has attributes
|
|
33
|
+
"footnote( [^>]+?)?", # the footnote text is in the attributes
|
|
34
|
+
]
|
|
35
|
+
KNOWN_XML_TAGS = [
|
|
36
|
+
*_COMMON_BASE,
|
|
37
|
+
"a",
|
|
38
|
+
"footnote",
|
|
39
|
+
]
|
|
40
|
+
XML_NAMESPACE_MAP = {None: "https://dasch.swiss/schema", "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
|
|
41
|
+
DASCH_SCHEMA = "{https://dasch.swiss/schema}"
|
|
42
|
+
|
|
43
|
+
# These named entities are defined by the XML specification and are always expanded during parsing,
|
|
44
|
+
# regardless of parser options.
|
|
45
|
+
# Numeric character references (e.g., `"` or `"`) are also always resolved
|
|
46
|
+
PREDEFINED_XML_ENTITIES = ["&", "<", ">", """, "'"]
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from html.entities import html5
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import regex
|
|
7
|
+
|
|
8
|
+
from dsp_tools.error.xmllib_warnings import MessageInfo
|
|
9
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_info
|
|
10
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_type_mismatch_warning
|
|
11
|
+
from dsp_tools.error.xmllib_warnings_util import raise_xmllib_input_error
|
|
12
|
+
from dsp_tools.xmllib.internal.checkers import check_and_warn_if_a_string_contains_a_potentially_empty_value
|
|
13
|
+
from dsp_tools.xmllib.internal.checkers import is_nonempty_value_internal
|
|
14
|
+
from dsp_tools.xmllib.internal.constants import PREDEFINED_XML_ENTITIES
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def check_and_get_corrected_comment(comment: Any, res_id: str | None, prop_name: str | None) -> str | None:
|
|
18
|
+
"""The input of comments may also be pd.NA or such. In our models we only want a string or None."""
|
|
19
|
+
if is_nonempty_value_internal(comment):
|
|
20
|
+
check_and_warn_if_a_string_contains_a_potentially_empty_value(
|
|
21
|
+
value=comment,
|
|
22
|
+
res_id=res_id,
|
|
23
|
+
prop_name=prop_name,
|
|
24
|
+
field="comment on value",
|
|
25
|
+
)
|
|
26
|
+
return str(comment)
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def check_and_fix_is_non_empty_string(
|
|
31
|
+
value: Any,
|
|
32
|
+
res_id: str | None = None,
|
|
33
|
+
prop_name: str | None = None,
|
|
34
|
+
value_field: str | None = None,
|
|
35
|
+
expected: str = "non empty string",
|
|
36
|
+
) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Emits warnings if the string is or looks empty.
|
|
39
|
+
Converts a non-empty input into a string.
|
|
40
|
+
Returns an empty string if the input is empty (eg. pd.NA, None, etc.).
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
value: input to check
|
|
44
|
+
res_id: resource ID
|
|
45
|
+
prop_name: property name
|
|
46
|
+
value_field: field if it is not a property
|
|
47
|
+
expected: expected type
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
The value as string, if it is empty an empty string.
|
|
51
|
+
"""
|
|
52
|
+
if is_nonempty_value_internal(value):
|
|
53
|
+
check_and_warn_if_a_string_contains_a_potentially_empty_value(
|
|
54
|
+
value=value,
|
|
55
|
+
res_id=res_id,
|
|
56
|
+
prop_name=prop_name,
|
|
57
|
+
field=value_field,
|
|
58
|
+
)
|
|
59
|
+
return str(value)
|
|
60
|
+
else:
|
|
61
|
+
emit_xmllib_input_type_mismatch_warning(
|
|
62
|
+
expected_type=expected,
|
|
63
|
+
value=value,
|
|
64
|
+
res_id=res_id,
|
|
65
|
+
prop_name=prop_name,
|
|
66
|
+
value_field=value_field,
|
|
67
|
+
)
|
|
68
|
+
return ""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def check_and_fix_collection_input(value: Any, prop_name: str, res_id: str) -> list[Any]:
|
|
72
|
+
"""
|
|
73
|
+
To allow varied input but ensure consistent typing internally, collections are converted.
|
|
74
|
+
If a collection is empty, a warning is emitted for the user.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
value: Input value
|
|
78
|
+
prop_name: Property name
|
|
79
|
+
res_id: Resource ID
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The input as a list
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
XmllibInputError: if the input is a dictionary
|
|
86
|
+
"""
|
|
87
|
+
match value:
|
|
88
|
+
case set() | list() | tuple():
|
|
89
|
+
if len(value) == 0:
|
|
90
|
+
msg_info = MessageInfo(
|
|
91
|
+
message="The input is empty. Please note that no values will be added to the resource.",
|
|
92
|
+
resource_id=res_id,
|
|
93
|
+
prop_name=prop_name,
|
|
94
|
+
)
|
|
95
|
+
emit_xmllib_input_info(msg_info)
|
|
96
|
+
return list(value)
|
|
97
|
+
case dict():
|
|
98
|
+
msg_info = MessageInfo(
|
|
99
|
+
message="The input is a dictionary. Only collections (list, set, tuple) are permissible.",
|
|
100
|
+
resource_id=res_id,
|
|
101
|
+
prop_name=prop_name,
|
|
102
|
+
)
|
|
103
|
+
raise_xmllib_input_error(msg_info)
|
|
104
|
+
case _:
|
|
105
|
+
return [value]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def unescape_reserved_xml_chars(richtext: str) -> str:
|
|
109
|
+
"""
|
|
110
|
+
This function unescapes characters that are reserved in an XML.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
richtext: Text to be escaped
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Escaped string
|
|
117
|
+
"""
|
|
118
|
+
richtext = regex.sub(r"<", "<", richtext or "")
|
|
119
|
+
richtext = regex.sub(r">", ">", richtext)
|
|
120
|
+
richtext = regex.sub(r"&", "&", richtext)
|
|
121
|
+
return richtext
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def numeric_entities(text: str) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Make a string suitable to be written to an XML file,
|
|
127
|
+
by replacing all named HTML entities by their decimal numeric counterparts,
|
|
128
|
+
except the ones that are predefined in the XML specification.
|
|
129
|
+
Numeric HTML entities remain untouched.
|
|
130
|
+
Unescaped special characters remain untouched.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
text: original text
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
text with all named entities replaced
|
|
137
|
+
|
|
138
|
+
Examples:
|
|
139
|
+
```python
|
|
140
|
+
result = xmllib.numeric_entities(' "')
|
|
141
|
+
# result == '  "'
|
|
142
|
+
```
|
|
143
|
+
"""
|
|
144
|
+
replacements: dict[str, str] = {}
|
|
145
|
+
for match in regex.findall(r"&[0-9A-Za-z]+;", text):
|
|
146
|
+
if match in PREDEFINED_XML_ENTITIES:
|
|
147
|
+
continue
|
|
148
|
+
char = html5[match[1:]]
|
|
149
|
+
replacements[match] = f"&#{ord(char)};"
|
|
150
|
+
text = regex.sub(
|
|
151
|
+
r"&[0-9A-Za-z]+;",
|
|
152
|
+
lambda x: replacements[x.group()] if x.group() not in PREDEFINED_XML_ENTITIES else x.group(),
|
|
153
|
+
text,
|
|
154
|
+
)
|
|
155
|
+
return text
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from lxml import etree
|
|
2
|
+
|
|
3
|
+
from dsp_tools.error.xmllib_errors import XmllibInternalError
|
|
4
|
+
from dsp_tools.xmllib.internal.constants import DASCH_SCHEMA
|
|
5
|
+
from dsp_tools.xmllib.internal.constants import XML_NAMESPACE_MAP
|
|
6
|
+
from dsp_tools.xmllib.models.internal.file_values import AbstractFileValue
|
|
7
|
+
from dsp_tools.xmllib.models.internal.file_values import FileValue
|
|
8
|
+
from dsp_tools.xmllib.models.internal.file_values import IIIFUri
|
|
9
|
+
from dsp_tools.xmllib.models.internal.file_values import Metadata
|
|
10
|
+
from dsp_tools.xmllib.models.permissions import Permissions
|
|
11
|
+
from dsp_tools.xmllib.value_checkers import is_nonempty_value
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def serialise_file_value(file_value: AbstractFileValue, authorship_id: str | None) -> etree._Element:
|
|
15
|
+
"""
|
|
16
|
+
Serialise the file value of a resource.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
file_value: File Value
|
|
20
|
+
authorship_id: ID referencing the authorship
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
File Value as etree
|
|
24
|
+
|
|
25
|
+
Raises:
|
|
26
|
+
XmllibInternalError: in case of an unknown class
|
|
27
|
+
"""
|
|
28
|
+
if isinstance(file_value, FileValue):
|
|
29
|
+
return _serialise_file_value(file_value, authorship_id, "bitstream")
|
|
30
|
+
elif isinstance(file_value, IIIFUri):
|
|
31
|
+
return _serialise_file_value(file_value, authorship_id, "iiif-uri")
|
|
32
|
+
else:
|
|
33
|
+
raise XmllibInternalError(
|
|
34
|
+
f"file_value must be either a FileValue or a IIIFUri, but you provided {file_value.__class__.__name__}"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _serialise_file_value(value: AbstractFileValue, authorship_id: str | None, tag_name: str) -> etree._Element:
|
|
39
|
+
attribs = _serialise_metadata(value.metadata, authorship_id)
|
|
40
|
+
if is_nonempty_value(value.comment):
|
|
41
|
+
attribs["comment"] = str(value.comment)
|
|
42
|
+
ele = etree.Element(f"{DASCH_SCHEMA}{tag_name}", attrib=attribs, nsmap=XML_NAMESPACE_MAP)
|
|
43
|
+
ele.text = str(value.value)
|
|
44
|
+
return ele
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _serialise_metadata(metadata: Metadata, authorship_id: str | None) -> dict[str, str]:
|
|
48
|
+
attribs = {}
|
|
49
|
+
if metadata.license:
|
|
50
|
+
attribs["license"] = str(metadata.license)
|
|
51
|
+
if metadata.copyright_holder:
|
|
52
|
+
attribs["copyright-holder"] = metadata.copyright_holder
|
|
53
|
+
if authorship_id:
|
|
54
|
+
attribs["authorship-id"] = authorship_id
|
|
55
|
+
if metadata.permissions != Permissions.PROJECT_SPECIFIC_PERMISSIONS:
|
|
56
|
+
attribs["permissions"] = metadata.permissions.value
|
|
57
|
+
return attribs
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from dotenv import find_dotenv
|
|
6
|
+
from dotenv import load_dotenv
|
|
7
|
+
from lxml import etree
|
|
8
|
+
|
|
9
|
+
from dsp_tools.error.xmllib_warnings import MessageInfo
|
|
10
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_warning
|
|
11
|
+
from dsp_tools.error.xmllib_warnings_util import raise_xmllib_input_error
|
|
12
|
+
from dsp_tools.xmllib.internal.constants import DASCH_SCHEMA
|
|
13
|
+
from dsp_tools.xmllib.internal.constants import XML_NAMESPACE_MAP
|
|
14
|
+
from dsp_tools.xmllib.internal.serialise_file_value import serialise_file_value
|
|
15
|
+
from dsp_tools.xmllib.internal.serialise_values import serialise_values
|
|
16
|
+
from dsp_tools.xmllib.internal.type_aliases import AnyResource
|
|
17
|
+
from dsp_tools.xmllib.models.dsp_base_resources import AudioSegmentResource
|
|
18
|
+
from dsp_tools.xmllib.models.dsp_base_resources import LinkResource
|
|
19
|
+
from dsp_tools.xmllib.models.dsp_base_resources import RegionResource
|
|
20
|
+
from dsp_tools.xmllib.models.dsp_base_resources import VideoSegmentResource
|
|
21
|
+
from dsp_tools.xmllib.models.internal.file_values import AuthorshipLookup
|
|
22
|
+
from dsp_tools.xmllib.models.internal.values import ColorValue
|
|
23
|
+
from dsp_tools.xmllib.models.internal.values import LinkValue
|
|
24
|
+
from dsp_tools.xmllib.models.internal.values import Richtext
|
|
25
|
+
from dsp_tools.xmllib.models.internal.values import Value
|
|
26
|
+
from dsp_tools.xmllib.models.permissions import Permissions
|
|
27
|
+
from dsp_tools.xmllib.models.res import Resource
|
|
28
|
+
|
|
29
|
+
load_dotenv(dotenv_path=find_dotenv(usecwd=True))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def serialise_resources(resources: list[AnyResource], authorship_lookup: AuthorshipLookup) -> list[etree._Element]:
|
|
33
|
+
"""
|
|
34
|
+
Serialise all the resources
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
resources: list of resources
|
|
38
|
+
authorship_lookup: lookup to map the authors to the corresponding IDs
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
serialised resources
|
|
42
|
+
"""
|
|
43
|
+
env_var = str(os.getenv("XMLLIB_SORT_RESOURCES")).lower()
|
|
44
|
+
if env_var == "true":
|
|
45
|
+
resources = sorted(resources, key=lambda x: x.res_id)
|
|
46
|
+
return [_serialise_one_resource(x, authorship_lookup) for x in resources]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _serialise_one_resource(res: AnyResource, authorship_lookup: AuthorshipLookup) -> etree._Element:
|
|
50
|
+
match res:
|
|
51
|
+
case Resource():
|
|
52
|
+
return _serialise_generic_resource(res, authorship_lookup)
|
|
53
|
+
case RegionResource():
|
|
54
|
+
return _serialise_region(res)
|
|
55
|
+
case LinkResource():
|
|
56
|
+
return _serialise_link(res)
|
|
57
|
+
case AudioSegmentResource():
|
|
58
|
+
return _serialise_segment(res, "audio-segment")
|
|
59
|
+
case VideoSegmentResource():
|
|
60
|
+
return _serialise_segment(res, "video-segment")
|
|
61
|
+
case _:
|
|
62
|
+
raise_xmllib_input_error(
|
|
63
|
+
MessageInfo(
|
|
64
|
+
f"An unknown resource was added to the root. "
|
|
65
|
+
f"Only Resource, RegionResource, LinkResource, VideoSegmentResource, AudioSegmentResource "
|
|
66
|
+
f"are permitted."
|
|
67
|
+
f"The input type is {res.__class__.__name__}"
|
|
68
|
+
)
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _serialise_generic_resource(res: Resource, authorship_lookup: AuthorshipLookup) -> etree._Element:
|
|
73
|
+
ele = _make_generic_resource_element(res, "resource")
|
|
74
|
+
ele.attrib["restype"] = res.restype
|
|
75
|
+
if res.file_value:
|
|
76
|
+
auth_id = authorship_lookup.get_id(res.file_value.metadata.authorship)
|
|
77
|
+
ele.append(serialise_file_value(res.file_value, auth_id))
|
|
78
|
+
ele.extend(serialise_values(res.values))
|
|
79
|
+
return ele
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _serialise_region(res: RegionResource) -> etree._Element:
|
|
83
|
+
ele = _make_generic_resource_element(res, "region")
|
|
84
|
+
ele.extend(_serialise_geometry_shape(res))
|
|
85
|
+
ele.extend(serialise_values(res.values))
|
|
86
|
+
return ele
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _serialise_geometry_shape(res: RegionResource) -> list[etree._Element]:
|
|
90
|
+
prop_list: list[etree._Element] = []
|
|
91
|
+
if not res.geometry:
|
|
92
|
+
emit_xmllib_input_warning(
|
|
93
|
+
MessageInfo(
|
|
94
|
+
"The region resource does not have a geometry. Please note that an xmlupload will fail.", res.res_id
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
return prop_list
|
|
98
|
+
geo_prop = etree.Element(f"{DASCH_SCHEMA}geometry-prop", name="hasGeometry", nsmap=XML_NAMESPACE_MAP)
|
|
99
|
+
geo_attrib = (
|
|
100
|
+
{"permissions": res.permissions.value} if res.permissions != Permissions.PROJECT_SPECIFIC_PERMISSIONS else {}
|
|
101
|
+
)
|
|
102
|
+
ele = etree.Element(f"{DASCH_SCHEMA}geometry", nsmap=XML_NAMESPACE_MAP, attrib=geo_attrib)
|
|
103
|
+
ele.text = res.geometry.to_json_string()
|
|
104
|
+
geo_prop.append(ele)
|
|
105
|
+
prop_list.append(geo_prop)
|
|
106
|
+
prop_list.extend(
|
|
107
|
+
serialise_values([ColorValue(value=res.geometry.color, prop_name="hasColor", permissions=res.permissions)]),
|
|
108
|
+
)
|
|
109
|
+
return prop_list
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _serialise_link(res: LinkResource) -> etree._Element:
|
|
113
|
+
problems = []
|
|
114
|
+
if not any([isinstance(x, Richtext) for x in res.values]):
|
|
115
|
+
problems.append("at least one comment")
|
|
116
|
+
if not any([isinstance(x, LinkValue) for x in res.values]):
|
|
117
|
+
problems.append("at least two links")
|
|
118
|
+
if problems:
|
|
119
|
+
msg = f"A link object requires {' and '.join(problems)}. Please note that an xmlupload will fail."
|
|
120
|
+
emit_xmllib_input_warning(MessageInfo(msg, res.res_id))
|
|
121
|
+
ele = _make_generic_resource_element(res, "link")
|
|
122
|
+
ele.extend(serialise_values(res.values))
|
|
123
|
+
return ele
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _serialise_segment(res: AudioSegmentResource | VideoSegmentResource, segment_type: str) -> etree._Element:
|
|
127
|
+
seg = _make_generic_resource_element(res, segment_type)
|
|
128
|
+
seg.extend(_serialise_segment_children(res))
|
|
129
|
+
return seg
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _make_generic_resource_element(res: AnyResource, res_type: str) -> etree._Element:
|
|
133
|
+
attribs = {"label": res.label, "id": res.res_id}
|
|
134
|
+
if res.permissions != Permissions.PROJECT_SPECIFIC_PERMISSIONS:
|
|
135
|
+
attribs["permissions"] = res.permissions.value
|
|
136
|
+
return etree.Element(f"{DASCH_SCHEMA}{res_type}", attrib=attribs, nsmap=XML_NAMESPACE_MAP)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _serialise_segment_children(segment: AudioSegmentResource | VideoSegmentResource) -> list[etree._Element]:
|
|
140
|
+
# The segment elements need to be in a specific order in order to pass the XSD validation
|
|
141
|
+
# Therefore, this filtering according to the properties is necessary
|
|
142
|
+
segment_elements = _serialise_according_to_prop_name(segment.values, "isSegmentOf")
|
|
143
|
+
bounds_attrib = {
|
|
144
|
+
"segment_start": str(segment.segment_bounds.segment_start),
|
|
145
|
+
"segment_end": str(segment.segment_bounds.segment_end),
|
|
146
|
+
}
|
|
147
|
+
if segment.permissions != Permissions.PROJECT_SPECIFIC_PERMISSIONS:
|
|
148
|
+
bounds_attrib["permissions"] = segment.permissions.value
|
|
149
|
+
segment_elements.append(
|
|
150
|
+
etree.Element(
|
|
151
|
+
f"{DASCH_SCHEMA}hasSegmentBounds",
|
|
152
|
+
attrib=bounds_attrib,
|
|
153
|
+
nsmap=XML_NAMESPACE_MAP,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
segment_elements.extend(_serialise_according_to_prop_name(segment.values, "hasTitle"))
|
|
157
|
+
segment_elements.extend(_serialise_according_to_prop_name(segment.values, "hasComment"))
|
|
158
|
+
segment_elements.extend(_serialise_according_to_prop_name(segment.values, "hasDescription"))
|
|
159
|
+
segment_elements.extend(_serialise_according_to_prop_name(segment.values, "hasKeyword"))
|
|
160
|
+
segment_elements.extend(_serialise_according_to_prop_name(segment.values, "relatesTo"))
|
|
161
|
+
return segment_elements
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _serialise_according_to_prop_name(values: list[Value], prop_name: str) -> list[etree._Element]:
|
|
165
|
+
to_serialise = (x for x in values if x.prop_name == prop_name)
|
|
166
|
+
return [_make_element_with_text(x) for x in to_serialise]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _make_element_with_text(value: Value) -> etree._Element:
|
|
170
|
+
attribs = {}
|
|
171
|
+
if value.permissions != Permissions.PROJECT_SPECIFIC_PERMISSIONS:
|
|
172
|
+
attribs["permissions"] = value.permissions.value
|
|
173
|
+
if value.comment is not None:
|
|
174
|
+
attribs["comment"] = str(value.comment)
|
|
175
|
+
ele = etree.Element(f"{DASCH_SCHEMA}{value.prop_name}", nsmap=XML_NAMESPACE_MAP, attrib=attribs)
|
|
176
|
+
ele.text = value.value
|
|
177
|
+
return ele
|