dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -0,0 +1,877 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import regex
|
|
11
|
+
from lxml import etree
|
|
12
|
+
|
|
13
|
+
from dsp_tools.error.xmllib_warnings import MessageInfo
|
|
14
|
+
from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_warning
|
|
15
|
+
from dsp_tools.error.xmllib_warnings_util import raise_xmllib_input_error
|
|
16
|
+
from dsp_tools.xmllib.internal.checkers import is_nonempty_value_internal
|
|
17
|
+
from dsp_tools.xmllib.internal.constants import KNOWN_XML_TAG_REGEXES
|
|
18
|
+
from dsp_tools.xmllib.internal.input_converters import unescape_reserved_xml_chars
|
|
19
|
+
from dsp_tools.xmllib.models.config_options import NewlineReplacement
|
|
20
|
+
from dsp_tools.xmllib.models.licenses.other import LicenseOther
|
|
21
|
+
from dsp_tools.xmllib.models.licenses.recommended import License
|
|
22
|
+
from dsp_tools.xmllib.models.licenses.recommended import LicenseRecommended
|
|
23
|
+
from dsp_tools.xmllib.value_converters import replace_newlines_with_tags
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_footnote_string(
|
|
27
|
+
footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
|
|
28
|
+
) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Takes the text for a footnote, and returns a string with the correct formatting.
|
|
31
|
+
You can use this if you want to add the footnote to a string.
|
|
32
|
+
Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.
|
|
33
|
+
The reserved characters `<`, `>` and `&` will be escaped temporarily,
|
|
34
|
+
but they will be correctly displayed in DSP-APP.
|
|
35
|
+
|
|
36
|
+
Attention:
|
|
37
|
+
- The text in the footnote may be richtext, i.e. contain XML tags.
|
|
38
|
+
- Not all tags supported in ordinary richtext are currently implemented.
|
|
39
|
+
- The allowed tags are:
|
|
40
|
+
- `<br>` (break line)
|
|
41
|
+
- `<strong>` (bold)
|
|
42
|
+
- `<em>` (italic)
|
|
43
|
+
- `<u>` (underline)
|
|
44
|
+
- `<strike>` (strike through)
|
|
45
|
+
- `<a href="URI">` (link to a URI)
|
|
46
|
+
- `<a class="salsah-link" href="Knora IRI">` (link to a resource)
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
footnote_text: Text for the footnote
|
|
50
|
+
newline_replacement_option: options to replace newlines
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
The footnote as a string
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
```python
|
|
60
|
+
result = xmllib.create_footnote_string("Text")
|
|
61
|
+
# result == '<footnote content="Text"/>'
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
result = xmllib.create_footnote_string("Text\\nSecond Line")
|
|
66
|
+
# result == '<footnote content="Text<br/>Second Line"/>'
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
result = xmllib.create_footnote_string("Already escaped <>")
|
|
71
|
+
# already escaped characters will not be escaped again
|
|
72
|
+
# result == '<footnote content="Already escaped <>"/>'
|
|
73
|
+
```
|
|
74
|
+
"""
|
|
75
|
+
text_tag = create_footnote_element(footnote_text, newline_replacement_option)
|
|
76
|
+
return etree.tostring(text_tag, encoding="unicode")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def create_footnote_element(
|
|
80
|
+
footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
|
|
81
|
+
) -> etree._Element:
|
|
82
|
+
"""
|
|
83
|
+
Takes the text for a footnote, and returns an `etree.Element`.
|
|
84
|
+
You can use this if you are working with `lxml`.
|
|
85
|
+
Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.
|
|
86
|
+
|
|
87
|
+
Attention:
|
|
88
|
+
- The text in the footnote may be richtext, i.e. contain XML tags.
|
|
89
|
+
- Not all tags supported in ordinary richtext are currently implemented.
|
|
90
|
+
- The allowed tags are:
|
|
91
|
+
- `<br>` (break line)
|
|
92
|
+
- `<strong>` (bold)
|
|
93
|
+
- `<em>` (italic)
|
|
94
|
+
- `<u>` (underline)
|
|
95
|
+
- `<strike>` (strike through)
|
|
96
|
+
- `<a href="URI">` (link to a URI)
|
|
97
|
+
- `<a class="salsah-link" href="Knora IRI">` (link to a resource)
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
footnote_text: Text for the footnote
|
|
101
|
+
newline_replacement_option: options to replace newlines
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
The footnote as a string
|
|
108
|
+
"""
|
|
109
|
+
if newline_replacement_option not in {NewlineReplacement.LINEBREAK, NewlineReplacement.NONE}:
|
|
110
|
+
raise_xmllib_input_error(
|
|
111
|
+
MessageInfo("Currently the only supported newline replacement is linebreak (<br/>) or None.")
|
|
112
|
+
)
|
|
113
|
+
if not is_nonempty_value_internal(footnote_text):
|
|
114
|
+
raise_xmllib_input_error(MessageInfo("The input value is empty."))
|
|
115
|
+
footnote_text = replace_newlines_with_tags(str(footnote_text), newline_replacement_option)
|
|
116
|
+
unescaped_text = unescape_reserved_xml_chars(footnote_text)
|
|
117
|
+
return etree.Element("footnote", attrib={"content": unescaped_text})
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def create_standoff_link_to_resource(resource_id: str, displayed_text: str) -> str:
|
|
121
|
+
"""
|
|
122
|
+
Creates a standoff link to a resource.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
resource_id: ID of the resource that is linked
|
|
126
|
+
displayed_text: text to display for the embedded link
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
A standoff link in string form.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
XmllibInputError: if the resource ID or the displayed text are empty
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
```python
|
|
136
|
+
result = xmllib.create_standoff_link_to_resource("resource_id", "Text")
|
|
137
|
+
# result == '<a class="salsah-link" href="IRI:resource_id:IRI">Text</a>'
|
|
138
|
+
```
|
|
139
|
+
"""
|
|
140
|
+
if not all([is_nonempty_value_internal(resource_id), is_nonempty_value_internal(displayed_text)]):
|
|
141
|
+
msg_str = (
|
|
142
|
+
f"The entered resource ID and displayed text may not be empty. "
|
|
143
|
+
f"Your input: resource_id '{resource_id}' / displayed_text '{displayed_text}'"
|
|
144
|
+
)
|
|
145
|
+
raise_xmllib_input_error(MessageInfo(msg_str))
|
|
146
|
+
attribs = {"class": "salsah-link", "href": f"IRI:{resource_id}:IRI"}
|
|
147
|
+
ele = etree.Element("a", attrib=attribs)
|
|
148
|
+
ele.text = displayed_text
|
|
149
|
+
return etree.tostring(ele, encoding="unicode")
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def create_standoff_link_to_uri(uri: str, displayed_text: str) -> str:
|
|
153
|
+
"""
|
|
154
|
+
Creates a standoff link to a URI.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
uri: the target URI that should be linked to
|
|
158
|
+
displayed_text: text to display for the embedded link
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
A standoff link in string form.
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
XmllibInputError: if the URI or the displayed text are empty
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
```python
|
|
168
|
+
result = xmllib.create_standoff_link_to_uri("https://www.dasch.swiss/", "This is DaSCH")
|
|
169
|
+
# result == '<a href="https://www.dasch.swiss/">This is DaSCH</a>'
|
|
170
|
+
```
|
|
171
|
+
"""
|
|
172
|
+
if not all([is_nonempty_value_internal(uri), is_nonempty_value_internal(displayed_text)]):
|
|
173
|
+
msg_str = (
|
|
174
|
+
f"The entered URI and displayed text may not be empty. "
|
|
175
|
+
f"Your input: uri '{uri}' / displayed_text '{displayed_text}'"
|
|
176
|
+
)
|
|
177
|
+
raise_xmllib_input_error(MessageInfo(msg_str))
|
|
178
|
+
attribs = {"href": uri}
|
|
179
|
+
ele = etree.Element("a", attrib=attribs)
|
|
180
|
+
ele.text = displayed_text
|
|
181
|
+
return etree.tostring(ele, encoding="unicode")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _get_label_to_node_one_list(
|
|
185
|
+
list_section: list[dict[str, Any]], list_name: str, language_of_label: str
|
|
186
|
+
) -> dict[str, str]:
|
|
187
|
+
json_subset = [x for x in list_section if x["name"] == list_name]
|
|
188
|
+
# json_subset is a list containing one item, namely the json object containing the entire json-list
|
|
189
|
+
res = {}
|
|
190
|
+
for label, name in _name_label_mapper_iterator(json_subset, language_of_label):
|
|
191
|
+
if name != list_name:
|
|
192
|
+
res[label] = name
|
|
193
|
+
res[label.strip().lower()] = name
|
|
194
|
+
return res
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _get_label_to_node_all_lists(
|
|
198
|
+
list_section: list[dict[str, Any]], language_of_label: str
|
|
199
|
+
) -> dict[str, dict[str, str]]:
|
|
200
|
+
mapper = {}
|
|
201
|
+
for li in list_section:
|
|
202
|
+
mapper[li["name"]] = _get_label_to_node_one_list(list_section, li["name"], language_of_label)
|
|
203
|
+
return mapper
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _get_property_to_list_name_mapping(ontologies: list[dict[str, Any]], default_ontology: str) -> dict[str, str]:
|
|
207
|
+
prop_lookup = {}
|
|
208
|
+
for onto in ontologies:
|
|
209
|
+
prefix = onto["name"]
|
|
210
|
+
property_section = onto["properties"]
|
|
211
|
+
for prop in property_section:
|
|
212
|
+
if prop["gui_element"] == "List":
|
|
213
|
+
prefixed_prop = f"{prefix}:{prop['name']}"
|
|
214
|
+
prop_lookup[prefixed_prop] = prop["gui_attributes"]["hlist"]
|
|
215
|
+
default_props = {
|
|
216
|
+
k.replace(default_ontology, "", 1): v for k, v in prop_lookup.items() if k.startswith(f"{default_ontology}:")
|
|
217
|
+
}
|
|
218
|
+
prop_lookup = prop_lookup | default_props
|
|
219
|
+
return prop_lookup
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class ListLookup:
|
|
224
|
+
_lookup: dict[str, dict[str, str]]
|
|
225
|
+
_prop_to_list_name: dict[str, str]
|
|
226
|
+
_label_language: str
|
|
227
|
+
|
|
228
|
+
@staticmethod
|
|
229
|
+
def create_new(project_json_path: str | Path, language_of_label: str, default_ontology: str) -> ListLookup:
|
|
230
|
+
"""
|
|
231
|
+
Creates a list lookup based on list labels in a specified language and returning list node names.
|
|
232
|
+
Works for all lists in a project.json
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
project_json_path: path to a JSON project file (a.k.a. ontology)
|
|
236
|
+
language_of_label: label language used for the list
|
|
237
|
+
default_ontology: ontology prefix which is defined as default in the XML file
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
`ListLookup` for a project
|
|
241
|
+
|
|
242
|
+
Examples:
|
|
243
|
+
```python
|
|
244
|
+
list_lookup = xmllib.ListLookup.create_new(
|
|
245
|
+
project_json_path="project.json",
|
|
246
|
+
language_of_label="en",
|
|
247
|
+
default_ontology="default-onto",
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
"""
|
|
251
|
+
with open(project_json_path, encoding="utf-8") as f:
|
|
252
|
+
json_file = json.load(f)
|
|
253
|
+
label_to_list_node_lookup = _get_label_to_node_all_lists(json_file["project"]["lists"], language_of_label)
|
|
254
|
+
prop_to_list_mapper = _get_property_to_list_name_mapping(json_file["project"]["ontologies"], default_ontology)
|
|
255
|
+
return ListLookup(
|
|
256
|
+
_lookup=label_to_list_node_lookup,
|
|
257
|
+
_prop_to_list_name=prop_to_list_mapper,
|
|
258
|
+
_label_language=language_of_label,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def get_node_via_list_name(self, list_name: str, node_label: str) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Returns the list node name based on a label.
|
|
264
|
+
The language of the label was specified when creating the `ListLookup`.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
list_name: name of the list
|
|
268
|
+
node_label: label of the node
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
node name
|
|
272
|
+
|
|
273
|
+
Examples:
|
|
274
|
+
```python
|
|
275
|
+
node_name = list_lookup.get_node_via_list_name(
|
|
276
|
+
list_name="list1",
|
|
277
|
+
node_label="Label 1" # or: "label 1" (capitalisation is not relevant)
|
|
278
|
+
)
|
|
279
|
+
# node_name == "node1"
|
|
280
|
+
```
|
|
281
|
+
"""
|
|
282
|
+
if not (list_lookup := self._lookup.get(list_name)):
|
|
283
|
+
emit_xmllib_input_warning(
|
|
284
|
+
MessageInfo(f"The entered list name '{list_name}' was not found. An empty string is returned.")
|
|
285
|
+
)
|
|
286
|
+
return ""
|
|
287
|
+
if not (found_node := list_lookup.get(node_label)):
|
|
288
|
+
emit_xmllib_input_warning(
|
|
289
|
+
MessageInfo(
|
|
290
|
+
f"'{node_label}' was not recognised as label of the list '{list_name}'. "
|
|
291
|
+
f"This ListLookup is configured for '{self._label_language}' labels. An empty string is returned."
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
return ""
|
|
295
|
+
return found_node
|
|
296
|
+
|
|
297
|
+
def get_list_name_and_node_via_property(self, prop_name: str, node_label: str) -> tuple[str, str]:
|
|
298
|
+
"""
|
|
299
|
+
Returns the list name and the node name based on a property that is used with the list and the label of a node.
|
|
300
|
+
The language of the label was specified when creating the `ListLookup`.
|
|
301
|
+
The list name needs to be referenced in the XML file.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
prop_name: name of the list
|
|
305
|
+
node_label: label of the node
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
list name and node name
|
|
309
|
+
|
|
310
|
+
Examples:
|
|
311
|
+
```python
|
|
312
|
+
list_name, node_name = list_lookup.get_list_name_and_node_via_property(
|
|
313
|
+
prop_name=":hasList", # or: "default-onto:hasList"
|
|
314
|
+
node_label="label 1"
|
|
315
|
+
)
|
|
316
|
+
# list_name == "list1"
|
|
317
|
+
# node_name == "node1"
|
|
318
|
+
```
|
|
319
|
+
"""
|
|
320
|
+
if not (list_name := self.get_list_name_via_property(prop_name)):
|
|
321
|
+
return "", ""
|
|
322
|
+
return list_name, self.get_node_via_list_name(list_name, node_label)
|
|
323
|
+
|
|
324
|
+
def get_list_name_via_property(self, prop_name: str) -> str:
|
|
325
|
+
"""
|
|
326
|
+
Returns the list name as specified in the ontology for a property.
|
|
327
|
+
The list name needs to be referenced in the XML file.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
prop_name: name of the property
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Name of the list
|
|
334
|
+
|
|
335
|
+
Examples:
|
|
336
|
+
```python
|
|
337
|
+
list_name = list_lookup.get_list_name_via_property(
|
|
338
|
+
prop_name=":hasList", # or: "default-onto:hasList"
|
|
339
|
+
)
|
|
340
|
+
# list_name == "list1"
|
|
341
|
+
```
|
|
342
|
+
"""
|
|
343
|
+
if not (list_name := self._prop_to_list_name.get(prop_name)):
|
|
344
|
+
emit_xmllib_input_warning(
|
|
345
|
+
MessageInfo(f"The entered property '{prop_name}' was not found. An empty string is returned.")
|
|
346
|
+
)
|
|
347
|
+
return ""
|
|
348
|
+
return list_name
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def get_list_nodes_from_string_via_list_name(
|
|
352
|
+
string_with_list_labels: str, label_separator: str, list_name: str, list_lookup: ListLookup
|
|
353
|
+
) -> list[str]:
|
|
354
|
+
"""
|
|
355
|
+
Resolves list labels to node names.
|
|
356
|
+
|
|
357
|
+
Args:
|
|
358
|
+
string_with_list_labels: the string containing list labels
|
|
359
|
+
label_separator: separator in the string that contains the labels
|
|
360
|
+
list_name: name of the list
|
|
361
|
+
list_lookup: `ListLookup` of the project
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
A list of node names. If the string is empty, it returns an empty list.
|
|
365
|
+
|
|
366
|
+
Examples:
|
|
367
|
+
```python
|
|
368
|
+
string_with_list_labels = "Label 1; Label 2"
|
|
369
|
+
nodes = xmllib.get_list_nodes_from_string_via_list_name(
|
|
370
|
+
string_with_list_labels=string_with_list_labels,
|
|
371
|
+
label_separator=";",
|
|
372
|
+
list_name="list1",
|
|
373
|
+
list_lookup=list_lookup,
|
|
374
|
+
)
|
|
375
|
+
# nodes == ["node1", "node2"]
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
```python
|
|
379
|
+
string_with_list_labels = ""
|
|
380
|
+
nodes = xmllib.get_list_nodes_from_string_via_list_name(
|
|
381
|
+
string_with_list_labels=string_with_list_labels,
|
|
382
|
+
label_separator=";",
|
|
383
|
+
list_name="list1",
|
|
384
|
+
list_lookup=list_lookup,
|
|
385
|
+
)
|
|
386
|
+
# nodes == []
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
```python
|
|
390
|
+
string_with_list_labels = pd.NA
|
|
391
|
+
nodes = xmllib.get_list_nodes_from_string_via_list_name(
|
|
392
|
+
string_with_list_labels=string_with_list_labels,
|
|
393
|
+
label_separator=";",
|
|
394
|
+
list_name="list1",
|
|
395
|
+
list_lookup=list_lookup,
|
|
396
|
+
)
|
|
397
|
+
# nodes == []
|
|
398
|
+
```
|
|
399
|
+
"""
|
|
400
|
+
if not is_nonempty_value_internal(string_with_list_labels):
|
|
401
|
+
return []
|
|
402
|
+
labels_list = create_list_from_input(string_with_list_labels, label_separator)
|
|
403
|
+
nodes_list = [list_lookup.get_node_via_list_name(list_name, label) for label in labels_list]
|
|
404
|
+
return nodes_list
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def get_list_nodes_from_string_via_property(
|
|
408
|
+
string_with_list_labels: str, label_separator: str, property_name: str, list_lookup: ListLookup
|
|
409
|
+
) -> tuple[str, list[str]]:
|
|
410
|
+
"""
|
|
411
|
+
Takes a string containing list labels, the separator by which they can be split,
|
|
412
|
+
a property name and the list lookup.
|
|
413
|
+
Resolves the labels and returns the list name to be referenced in the XML file and a list of node names.
|
|
414
|
+
If the string is empty, it returns an empty list.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
string_with_list_labels: the string containing the labels
|
|
418
|
+
label_separator: separator in the string that contains the labels
|
|
419
|
+
property_name: name of the property
|
|
420
|
+
list_lookup: `ListLookup` of the project
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
The name of the list and a list of node names.
|
|
424
|
+
|
|
425
|
+
Examples:
|
|
426
|
+
```python
|
|
427
|
+
string_with_list_labels = "Label 1; Label 2"
|
|
428
|
+
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
|
|
429
|
+
string_with_list_labels=string_with_list_labels,
|
|
430
|
+
label_separator=";",
|
|
431
|
+
property_name=":hasList",
|
|
432
|
+
list_lookup=list_lookup,
|
|
433
|
+
)
|
|
434
|
+
# list_name == "list1"
|
|
435
|
+
# nodes == ["node1", "node2"]
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
```python
|
|
439
|
+
string_with_list_labels = ""
|
|
440
|
+
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
|
|
441
|
+
string_with_list_labels=string_with_list_labels,
|
|
442
|
+
label_separator=";",
|
|
443
|
+
property_name=":hasList",
|
|
444
|
+
list_lookup=list_lookup,
|
|
445
|
+
)
|
|
446
|
+
# list_name == ""
|
|
447
|
+
# nodes == []
|
|
448
|
+
```
|
|
449
|
+
|
|
450
|
+
```python
|
|
451
|
+
string_with_list_labels = pd.NA
|
|
452
|
+
list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
|
|
453
|
+
string_with_list_labels=string_with_list_labels,
|
|
454
|
+
label_separator=";",
|
|
455
|
+
property_name=":hasList",
|
|
456
|
+
list_lookup=list_lookup,
|
|
457
|
+
)
|
|
458
|
+
# list_name == ""
|
|
459
|
+
# nodes == []
|
|
460
|
+
```
|
|
461
|
+
"""
|
|
462
|
+
if not is_nonempty_value_internal(string_with_list_labels):
|
|
463
|
+
return "", []
|
|
464
|
+
labels_list = create_list_from_input(string_with_list_labels, label_separator)
|
|
465
|
+
list_name = ""
|
|
466
|
+
nodes = []
|
|
467
|
+
for lbl in labels_list:
|
|
468
|
+
list_name, node_name = list_lookup.get_list_name_and_node_via_property(property_name, lbl)
|
|
469
|
+
nodes.append(node_name)
|
|
470
|
+
return list_name, nodes
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _name_label_mapper_iterator(
|
|
474
|
+
json_subset: list[dict[str, Any]],
|
|
475
|
+
language_of_label: str,
|
|
476
|
+
) -> Iterable[tuple[str, str]]:
|
|
477
|
+
"""
|
|
478
|
+
Go through list nodes of a JSON project and yield (label, name) pairs.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
json_subset: list of DSP lists (a DSP list being a dictionary with the keys "name", "labels" and "nodes")
|
|
482
|
+
language_of_label: which language of the label to choose
|
|
483
|
+
|
|
484
|
+
Yields:
|
|
485
|
+
(label, name) pairs
|
|
486
|
+
"""
|
|
487
|
+
for node in json_subset:
|
|
488
|
+
# node is the json object containing the entire json-list
|
|
489
|
+
if "nodes" in node:
|
|
490
|
+
# "nodes" is the json sub-object containing the entries of the json-list
|
|
491
|
+
yield from _name_label_mapper_iterator(node["nodes"], language_of_label)
|
|
492
|
+
# each yielded value is a (label, name) pair of a single list entry
|
|
493
|
+
if "name" in node:
|
|
494
|
+
# the actual values of the name and the label
|
|
495
|
+
if found := node["labels"].get(language_of_label):
|
|
496
|
+
yield found, node["name"]
|
|
497
|
+
else:
|
|
498
|
+
msg = (
|
|
499
|
+
f"The language of the labels is '{language_of_label}', "
|
|
500
|
+
f"the list node with the name '{node['name']}' does not have a label in this language."
|
|
501
|
+
)
|
|
502
|
+
emit_xmllib_input_warning(MessageInfo(msg))
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def escape_reserved_xml_characters(text: str) -> str:
|
|
506
|
+
"""
|
|
507
|
+
From richtext strings (encoding="xml"), escape the reserved characters `<`, `>` and `&`,
|
|
508
|
+
but only if they are not part of a standard standoff tag or escape sequence.
|
|
509
|
+
|
|
510
|
+
[See the documentation for the standard standoff tags allowed by DSP-API,
|
|
511
|
+
which will not be escaped.](https://docs.dasch.swiss/latest/DSP-API/03-endpoints/api-v2/text/standard-standoff/)
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
text: the richtext string to be escaped
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
The escaped richtext string
|
|
518
|
+
|
|
519
|
+
Examples:
|
|
520
|
+
```python
|
|
521
|
+
result = xmllib.escape_reserved_xml_characters("Text <unknownTag>")
|
|
522
|
+
# result == "Text <unknownTag>"
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
```python
|
|
526
|
+
result = xmllib.escape_reserved_xml_characters("Text <br/> text after")
|
|
527
|
+
# result == "Text <br/> text after"
|
|
528
|
+
```
|
|
529
|
+
"""
|
|
530
|
+
allowed_tags_regex = "|".join(KNOWN_XML_TAG_REGEXES)
|
|
531
|
+
lookahead = rf"(?!/?({allowed_tags_regex})/?>)"
|
|
532
|
+
illegal_lt = rf"<{lookahead}"
|
|
533
|
+
lookbehind = rf"(?<!</?({allowed_tags_regex})/?)"
|
|
534
|
+
illegal_gt = rf"{lookbehind}>"
|
|
535
|
+
illegal_amp = r"&(?![#a-zA-Z0-9]+;)"
|
|
536
|
+
text = regex.sub(illegal_lt, "<", text)
|
|
537
|
+
text = regex.sub(illegal_gt, ">", text)
|
|
538
|
+
text = regex.sub(illegal_amp, "&", text)
|
|
539
|
+
return text
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def make_xsd_compatible_id(input_value: str | float | int) -> str:
|
|
543
|
+
"""
|
|
544
|
+
An xsd:ID may not contain all types of special characters,
|
|
545
|
+
and it must start with a letter or underscore.
|
|
546
|
+
Replace illegal characters with `_`, and prepend a leading `_` if necessary.
|
|
547
|
+
|
|
548
|
+
The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
|
|
549
|
+
`_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
input_value: input value
|
|
553
|
+
|
|
554
|
+
Raises:
|
|
555
|
+
XmllibInputError: if the input cannot be transformed to an xsd:ID
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
An xsd ID compatible string based on the input value
|
|
559
|
+
|
|
560
|
+
Examples:
|
|
561
|
+
```python
|
|
562
|
+
result = xmllib.make_xsd_compatible_id("0_Universität_Basel")
|
|
563
|
+
# result == "_0_Universit_t_Basel"
|
|
564
|
+
```
|
|
565
|
+
"""
|
|
566
|
+
if not is_nonempty_value_internal(input_value):
|
|
567
|
+
raise_xmllib_input_error(MessageInfo(f"The input '{input_value}' cannot be transformed to an xsd:ID"))
|
|
568
|
+
# if the start of string is neither letter nor underscore, add an underscore
|
|
569
|
+
res = regex.sub(r"^(?=[^A-Za-z_])", "_", str(input_value))
|
|
570
|
+
# replace all illegal characters by underscore
|
|
571
|
+
res = regex.sub(r"[^\w_\-.]", "_", res, flags=regex.ASCII)
|
|
572
|
+
return res
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def make_xsd_compatible_id_with_uuid(input_value: str | float | int) -> str:
|
|
576
|
+
"""
|
|
577
|
+
An xsd:ID may not contain all types of special characters,
|
|
578
|
+
and it must start with a letter or underscore.
|
|
579
|
+
Replace illegal characters with `_`, and prepend a leading `_` if necessary.
|
|
580
|
+
Additionally, add a UUID at the end.
|
|
581
|
+
The UUID will be different each time the function is called.
|
|
582
|
+
|
|
583
|
+
The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
|
|
584
|
+
`_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
input_value: input value
|
|
588
|
+
|
|
589
|
+
Raises:
|
|
590
|
+
XmllibInputError: if the input cannot be transformed to an xsd:ID
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
an xsd ID based on the input value, with a UUID attached.
|
|
594
|
+
|
|
595
|
+
Examples:
|
|
596
|
+
```python
|
|
597
|
+
result = xmllib.make_xsd_compatible_id_with_uuid("Universität_Basel")
|
|
598
|
+
# result == "Universit_t_Basel_88f5cd0b-f333-4174-9030-65900b17773d"
|
|
599
|
+
```
|
|
600
|
+
"""
|
|
601
|
+
res = make_xsd_compatible_id(input_value)
|
|
602
|
+
_uuid = uuid.uuid4()
|
|
603
|
+
res = f"{res}_{_uuid}"
|
|
604
|
+
return res
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def create_list_from_string(string: str, separator: str) -> list[str]: # noqa:ARG001
|
|
608
|
+
"""
|
|
609
|
+
Attention:
|
|
610
|
+
This function is deprecated, use the new function called 'create_list_from_input' instead.
|
|
611
|
+
"""
|
|
612
|
+
raise_xmllib_input_error(
|
|
613
|
+
MessageInfo(
|
|
614
|
+
"The function 'create_list_from_string' is deprecated. "
|
|
615
|
+
"Use the new function called 'create_list_from_input' instead."
|
|
616
|
+
)
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def create_list_from_input(input_value: Any, separator: str) -> list[str]:
|
|
621
|
+
"""
|
|
622
|
+
Create a list of strings from the input value, using the provided separator.
|
|
623
|
+
If the input is empty it returns an empty list.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
input_value: input value to check and convert
|
|
627
|
+
separator: The character that separates the different values in the string.
|
|
628
|
+
For example, a comma or newline.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
The list that results from splitting the input string.
|
|
632
|
+
|
|
633
|
+
Examples:
|
|
634
|
+
```python
|
|
635
|
+
result = xmllib.create_list_from_input(" one, two, three", ",")
|
|
636
|
+
# result == ["one", "two", "three"]
|
|
637
|
+
```
|
|
638
|
+
|
|
639
|
+
```python
|
|
640
|
+
result = xmllib.create_list_from_input(1, "-")
|
|
641
|
+
# result == ["1"]
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
```python
|
|
645
|
+
result = xmllib.create_list_from_input(" \\n ", "\\n")
|
|
646
|
+
# result == []
|
|
647
|
+
```
|
|
648
|
+
|
|
649
|
+
```python
|
|
650
|
+
result = xmllib.create_list_from_input(None, ",")
|
|
651
|
+
# result == []
|
|
652
|
+
```
|
|
653
|
+
"""
|
|
654
|
+
if not is_nonempty_value_internal(input_value):
|
|
655
|
+
return []
|
|
656
|
+
if isinstance(input_value, str):
|
|
657
|
+
return [strpd for x in input_value.split(separator) if (strpd := x.strip())]
|
|
658
|
+
return [str(input_value)]
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def create_non_empty_list_from_string(
|
|
662
|
+
string: str, separator: str, resource_id: str | None = None, prop_name: str | None = None
|
|
663
|
+
) -> list[str]:
|
|
664
|
+
"""
|
|
665
|
+
Creates a list from a string.
|
|
666
|
+
Trailing and leading whitespaces are removed from the list items.
|
|
667
|
+
|
|
668
|
+
If the resulting list is empty it will raise an `XmllibInputError`.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
string: input string
|
|
672
|
+
separator: The character that separates the different values in the string.
|
|
673
|
+
For example, a comma or newline.
|
|
674
|
+
resource_id: If the ID of the resource is provided, a better error message can be composed
|
|
675
|
+
prop_name: If the name of the property is provided, a better error message can be composed
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
The list that results from splitting the input string.
|
|
679
|
+
|
|
680
|
+
Raises:
|
|
681
|
+
XmllibInputError: If the resulting list is empty.
|
|
682
|
+
|
|
683
|
+
Examples:
|
|
684
|
+
```python
|
|
685
|
+
result = xmllib.create_non_empty_list_from_string("One\\nTwo ", "\\n")
|
|
686
|
+
# result == ["One", "Two"]
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
```python
|
|
690
|
+
result = xmllib.create_non_empty_list_from_string(" \\n/ ", "/")
|
|
691
|
+
# raises XmllibInputError
|
|
692
|
+
```
|
|
693
|
+
"""
|
|
694
|
+
lst = create_list_from_input(string, separator)
|
|
695
|
+
if len(lst) == 0:
|
|
696
|
+
msg_info = MessageInfo(
|
|
697
|
+
message="The input for this function must result in a non-empty list. Your input results in an empty list.",
|
|
698
|
+
resource_id=resource_id,
|
|
699
|
+
prop_name=prop_name,
|
|
700
|
+
)
|
|
701
|
+
raise_xmllib_input_error(msg_info)
|
|
702
|
+
return lst
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
def clean_whitespaces_from_string(string: str) -> str:
|
|
706
|
+
"""
|
|
707
|
+
Remove redundant whitespaces (space, `\\n`, `\\t`, etc.) and replace them with a single space.
|
|
708
|
+
|
|
709
|
+
If the resulting string is empty, a warning will be printed.
|
|
710
|
+
|
|
711
|
+
Args:
|
|
712
|
+
string: input string
|
|
713
|
+
|
|
714
|
+
Returns:
|
|
715
|
+
The cleaned string.
|
|
716
|
+
|
|
717
|
+
Examples:
|
|
718
|
+
```python
|
|
719
|
+
result = xmllib.clean_whitespaces_from_string("\\t Text\\nafter newline")
|
|
720
|
+
# result == "Text after newline"
|
|
721
|
+
```
|
|
722
|
+
|
|
723
|
+
```python
|
|
724
|
+
result = xmllib.clean_whitespaces_from_string(" \\n\\t ")
|
|
725
|
+
# result == ""
|
|
726
|
+
# warns that the string is now empty
|
|
727
|
+
```
|
|
728
|
+
"""
|
|
729
|
+
cleaned = regex.sub(r"\s+", " ", string).strip()
|
|
730
|
+
if len(cleaned) == 0:
|
|
731
|
+
emit_xmllib_input_warning(
|
|
732
|
+
MessageInfo(
|
|
733
|
+
"The entered string is empty after all redundant whitespaces were removed. An empty string is returned."
|
|
734
|
+
)
|
|
735
|
+
)
|
|
736
|
+
return cleaned
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
def find_license_in_string(string: str) -> License | None: # noqa: PLR0911 (too many return statements)
|
|
740
|
+
"""
|
|
741
|
+
Checks if a string contains a license, and returns it.
|
|
742
|
+
Returns None if no license was found.
|
|
743
|
+
The case (upper case/lower case) is ignored.
|
|
744
|
+
|
|
745
|
+
Look out: Your string should contain no more than 1 license.
|
|
746
|
+
If it contains more, there is no guarantee which one will be returned.
|
|
747
|
+
|
|
748
|
+
See [recommended licenses](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/licenses/recommended/)
|
|
749
|
+
for details.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
string: string to check
|
|
753
|
+
|
|
754
|
+
Returns:
|
|
755
|
+
`License` object or `None`
|
|
756
|
+
|
|
757
|
+
Examples:
|
|
758
|
+
```python
|
|
759
|
+
result = xmllib.find_license_in_string("CC BY")
|
|
760
|
+
# result == LicenseRecommended.CC.BY
|
|
761
|
+
```
|
|
762
|
+
|
|
763
|
+
```python
|
|
764
|
+
result = xmllib.find_license_in_string("Creative Commons Developing Nations 2.0 Generic Deed")
|
|
765
|
+
# result == None
|
|
766
|
+
```
|
|
767
|
+
|
|
768
|
+
Currently supported license formats:
|
|
769
|
+
- "AI" -> LicenseRecommended.DSP.AI_GENERATED
|
|
770
|
+
- "KI" -> LicenseRecommended.DSP.AI_GENERATED
|
|
771
|
+
- "IA" -> LicenseRecommended.DSP.AI_GENERATED
|
|
772
|
+
- "public domain" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
773
|
+
- "gemeinfrei" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
774
|
+
- "frei von Urheberrechten" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
775
|
+
- "urheberrechtsbefreit" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
776
|
+
- "libre de droits" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
777
|
+
- "domaine public" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
778
|
+
- "unknown" -> LicenseRecommended.DSP.UNKNOWN
|
|
779
|
+
- "unbekannt" -> LicenseRecommended.DSP.UNKNOWN
|
|
780
|
+
- "inconnu" -> LicenseRecommended.DSP.UNKNOWN
|
|
781
|
+
- "CC BY" -> LicenseRecommended.CC.BY
|
|
782
|
+
- "Creative Commons BY 4.0" -> LicenseRecommended.CC.BY
|
|
783
|
+
- "CC 0 1.0" -> LicenseOther.Public.CC_0_1_0
|
|
784
|
+
- "CC PDM 1.0" -> LicenseOther.Public.CC_PDM_1_0
|
|
785
|
+
- "BORIS Standard License" -> LicenseOther.Various.BORIS_STANDARD
|
|
786
|
+
- "LICENCE OUVERTE 2.0" -> LicenseOther.Various.FRANCE_OUVERTE
|
|
787
|
+
"""
|
|
788
|
+
if lic := _get_already_parsed_license(string):
|
|
789
|
+
return lic
|
|
790
|
+
|
|
791
|
+
sep = r"[-_\p{Zs}]+" # Zs = unicode category for space separator characters
|
|
792
|
+
|
|
793
|
+
if regex.search(rf"\b(Creative{sep}Commons|CC){sep}0({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
|
|
794
|
+
return LicenseOther.Public.CC_0_1_0
|
|
795
|
+
|
|
796
|
+
if regex.search(rf"\b(Creative{sep}Commons|CC){sep}PDM({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
|
|
797
|
+
return LicenseOther.Public.CC_PDM_1_0
|
|
798
|
+
|
|
799
|
+
if match := regex.search(
|
|
800
|
+
rf"\b(CC|Creative{sep}Commons)({sep}(BY|NC|ND|SA))*({sep}[\d\.]+)?\b", string, flags=regex.IGNORECASE
|
|
801
|
+
):
|
|
802
|
+
return _find_cc_license(match.group(0))
|
|
803
|
+
|
|
804
|
+
if regex.search(r"\b(AI|IA|KI)\b", string, flags=regex.IGNORECASE):
|
|
805
|
+
return LicenseRecommended.DSP.AI_GENERATED
|
|
806
|
+
|
|
807
|
+
rgx_public_domain = (
|
|
808
|
+
rf"\b(public{sep}domain|gemeinfrei|frei{sep}von{sep}Urheberrechten|urheberrechtsbefreit|"
|
|
809
|
+
rf"libre{sep}de{sep}droits|domaine{sep}public)\b"
|
|
810
|
+
)
|
|
811
|
+
if regex.search(rgx_public_domain, string, flags=regex.IGNORECASE):
|
|
812
|
+
return LicenseRecommended.DSP.PUBLIC_DOMAIN
|
|
813
|
+
|
|
814
|
+
if regex.search(r"\b(unknown|unbekannt|inconnu)\b", string, flags=regex.IGNORECASE):
|
|
815
|
+
return LicenseRecommended.DSP.UNKNOWN
|
|
816
|
+
|
|
817
|
+
if regex.search(
|
|
818
|
+
rf"\b(BORIS|Bern{sep}Open{sep}Repository{sep}and{sep}Information{sep}System){sep}Standard{sep}License\b",
|
|
819
|
+
string,
|
|
820
|
+
flags=regex.IGNORECASE,
|
|
821
|
+
):
|
|
822
|
+
return LicenseOther.Various.BORIS_STANDARD
|
|
823
|
+
|
|
824
|
+
if regex.search(
|
|
825
|
+
rf"\b(France{sep})?Licence{sep}ouverte({sep}2\.0)?\b",
|
|
826
|
+
string,
|
|
827
|
+
flags=regex.IGNORECASE,
|
|
828
|
+
):
|
|
829
|
+
return LicenseOther.Various.FRANCE_OUVERTE
|
|
830
|
+
|
|
831
|
+
return None
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
def _find_cc_license(string: str) -> License | None: # noqa: PLR0911 (too many return statements)
|
|
835
|
+
string = string.lower()
|
|
836
|
+
if "by" not in string:
|
|
837
|
+
return None
|
|
838
|
+
if any((string.count("by") > 1, string.count("nd") > 1, string.count("sa") > 1, string.count("nc") > 1)):
|
|
839
|
+
return None
|
|
840
|
+
has_nc = "nc" in string
|
|
841
|
+
has_nd = "nd" in string
|
|
842
|
+
has_sa = "sa" in string
|
|
843
|
+
if not any((has_nc, has_nd, has_sa)):
|
|
844
|
+
return LicenseRecommended.CC.BY
|
|
845
|
+
if not has_nc and has_nd and not has_sa:
|
|
846
|
+
return LicenseRecommended.CC.BY_ND
|
|
847
|
+
if not has_nc and not has_nd and has_sa:
|
|
848
|
+
return LicenseRecommended.CC.BY_SA
|
|
849
|
+
if has_nc and not has_nd and not has_sa:
|
|
850
|
+
return LicenseRecommended.CC.BY_NC
|
|
851
|
+
if has_nc and has_nd and not has_sa:
|
|
852
|
+
return LicenseRecommended.CC.BY_NC_ND
|
|
853
|
+
if has_nc and not has_nd and has_sa:
|
|
854
|
+
return LicenseRecommended.CC.BY_NC_SA
|
|
855
|
+
return None
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
def _get_already_parsed_license(string: str) -> License | None:
|
|
859
|
+
already_parsed_dict: dict[str, License] = {
|
|
860
|
+
r"http://rdfh\.ch/licenses/cc-by-4\.0": LicenseRecommended.CC.BY,
|
|
861
|
+
r"http://rdfh\.ch/licenses/cc-by-sa-4\.0": LicenseRecommended.CC.BY_SA,
|
|
862
|
+
r"http://rdfh\.ch/licenses/cc-by-nc-4\.0": LicenseRecommended.CC.BY_NC,
|
|
863
|
+
r"http://rdfh\.ch/licenses/cc-by-nc-sa-4\.0": LicenseRecommended.CC.BY_NC_SA,
|
|
864
|
+
r"http://rdfh\.ch/licenses/cc-by-nd-4\.0": LicenseRecommended.CC.BY_ND,
|
|
865
|
+
r"http://rdfh\.ch/licenses/cc-by-nc-nd-4\.0": LicenseRecommended.CC.BY_NC_ND,
|
|
866
|
+
r"http://rdfh\.ch/licenses/ai-generated": LicenseRecommended.DSP.AI_GENERATED,
|
|
867
|
+
r"http://rdfh\.ch/licenses/unknown": LicenseRecommended.DSP.UNKNOWN,
|
|
868
|
+
r"http://rdfh\.ch/licenses/public-domain": LicenseRecommended.DSP.PUBLIC_DOMAIN,
|
|
869
|
+
r"http://rdfh\.ch/licenses/cc-0-1.0": LicenseOther.Public.CC_0_1_0,
|
|
870
|
+
r"http://rdfh\.ch/licenses/cc-pdm-1.0": LicenseOther.Public.CC_PDM_1_0,
|
|
871
|
+
r"http://rdfh\.ch/licenses/boris": LicenseOther.Various.BORIS_STANDARD,
|
|
872
|
+
r"http://rdfh\.ch/licenses/open-licence-2.0": LicenseOther.Various.FRANCE_OUVERTE,
|
|
873
|
+
}
|
|
874
|
+
for rgx, lic in already_parsed_dict.items():
|
|
875
|
+
if regex.search(rgx, string):
|
|
876
|
+
return lic
|
|
877
|
+
return None
|