dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +4 -0
- dsp_tools/cli/args.py +36 -0
- dsp_tools/cli/call_action.py +51 -231
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +156 -58
- dsp_tools/cli/entry_point.py +56 -26
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/{utils → clients}/connection.py +2 -18
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +57 -23
- dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
- dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
- dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
- dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
- dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
- dsp_tools/commands/excel2json/models/input_error.py +31 -11
- dsp_tools/commands/excel2json/models/json_header.py +53 -15
- dsp_tools/commands/excel2json/models/ontology.py +4 -3
- dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
- dsp_tools/commands/excel2json/project.py +78 -34
- dsp_tools/commands/excel2json/properties.py +57 -36
- dsp_tools/commands/excel2json/resources.py +32 -12
- dsp_tools/commands/excel2json/utils.py +20 -1
- dsp_tools/commands/excel2xml/__init__.py +2 -2
- dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
- dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
- dsp_tools/commands/excel2xml/propertyelement.py +5 -5
- dsp_tools/commands/{project → get}/get.py +29 -13
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
- dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
- dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
- dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
- dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
- dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
- dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
- dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
- dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +20 -10
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
- dsp_tools/commands/start_stack.py +126 -77
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
- dsp_tools/commands/xmlupload/models/ingest.py +56 -70
- dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +0 -39
- dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
- dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
- dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
- dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
- dsp_tools/commands/xmlupload/upload_config.py +8 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
- dsp_tools/commands/xmlupload/xmlupload.py +214 -192
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/{utils → config}/warnings_config.py +4 -1
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
- dsp_tools/{models → legacy_models}/langstring.py +1 -1
- dsp_tools/{models → legacy_models}/projectContext.py +4 -4
- dsp_tools/resources/schema/data.xsd +108 -83
- dsp_tools/resources/schema/lists-only.json +4 -23
- dsp_tools/resources/schema/project.json +80 -35
- dsp_tools/resources/schema/properties-only.json +1 -4
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +34 -30
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
- dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +313 -26
- dsp_tools/xmllib/value_checkers.py +310 -47
- dsp_tools/xmllib/value_converters.py +765 -8
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
- dsp_tools/commands/project/create/project_create.py +0 -1107
- dsp_tools/commands/project/create/project_create_lists.py +0 -204
- dsp_tools/commands/project/create/project_validate.py +0 -453
- dsp_tools/commands/project/models/project_definition.py +0 -12
- dsp_tools/commands/rosetta.py +0 -124
- dsp_tools/commands/template.py +0 -30
- dsp_tools/commands/xml_validate/api_connection.py +0 -122
- dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
- dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
- dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
- dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
- dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
- dsp_tools/commands/xml_validate/models/validation.py +0 -29
- dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
- dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
- dsp_tools/commands/xml_validate/xml_validate.py +0 -151
- dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
- dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
- dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
- dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
- dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
- dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
- dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
- dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
- dsp_tools/commands/xmlupload/ontology_client.py +0 -92
- dsp_tools/commands/xmlupload/project_client.py +0 -91
- dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
- dsp_tools/models/custom_warnings.py +0 -31
- dsp_tools/models/exceptions.py +0 -90
- dsp_tools/resources/0100-template-repo/template.json +0 -45
- dsp_tools/resources/0100-template-repo/template.xml +0 -27
- dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
- dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
- dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
- dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
- dsp_tools/utils/connection_live.py +0 -383
- dsp_tools/utils/iri_util.py +0 -14
- dsp_tools/utils/logger_config.py +0 -41
- dsp_tools/utils/set_encoder.py +0 -20
- dsp_tools/utils/xml_utils.py +0 -145
- dsp_tools/utils/xml_validation.py +0 -197
- dsp_tools/utils/xml_validation_models.py +0 -68
- dsp_tools/xmllib/models/file_values.py +0 -78
- dsp_tools/xmllib/models/resource.py +0 -415
- dsp_tools/xmllib/models/values.py +0 -428
- dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
- dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
- dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
- dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
- /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
- /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
- /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
- /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
- /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
- /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
- /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
- /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
- /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
- /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import regex
|
|
2
|
+
|
|
3
|
+
from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
|
|
4
|
+
from dsp_tools.error.exceptions import Id2IriReplacementError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def prepare_richtext_string_for_upload(richtext_str: str, iri_resolver: IriResolver) -> str:
|
|
8
|
+
richtext_str, ids_not_found = replace_ids_if_found(richtext_str, iri_resolver)
|
|
9
|
+
if ids_not_found:
|
|
10
|
+
raise Id2IriReplacementError(
|
|
11
|
+
f"Some internal IDs of the following richtext could not be resolved to an IRI: {richtext_str}"
|
|
12
|
+
)
|
|
13
|
+
return _richtext_as_xml(richtext_str)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def replace_ids_if_found(richtext_str: str, iri_resolver: IriResolver) -> tuple[str, set[str]]:
|
|
17
|
+
ids_used = find_internal_ids(richtext_str)
|
|
18
|
+
not_found = set()
|
|
19
|
+
if ids_used:
|
|
20
|
+
for id_ in ids_used:
|
|
21
|
+
if iri_found := iri_resolver.get(id_):
|
|
22
|
+
richtext_str = _replace_one_id(richtext_str, id_, iri_found)
|
|
23
|
+
else:
|
|
24
|
+
not_found.add(id_)
|
|
25
|
+
return richtext_str, not_found
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _replace_one_id(txt: str, id_: str, iri: str) -> str:
|
|
29
|
+
return txt.replace(f'href="IRI:{id_}:IRI"', f'href="{iri}"')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_internal_ids(txt: str) -> set[str]:
|
|
33
|
+
return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=txt))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _richtext_as_xml(richtext_str: str) -> str:
|
|
37
|
+
return f'<?xml version="1.0" encoding="UTF-8"?>\n<text>{richtext_str}</text>'
|
|
@@ -1,124 +1,34 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from typing import Any
|
|
4
|
-
from typing import cast
|
|
5
4
|
|
|
6
|
-
import regex
|
|
7
5
|
import rustworkx as rx
|
|
8
|
-
from lxml import etree
|
|
9
6
|
|
|
10
7
|
from dsp_tools.commands.xmlupload.stash.graph_models import Cost
|
|
11
8
|
from dsp_tools.commands.xmlupload.stash.graph_models import Edge
|
|
12
|
-
from dsp_tools.commands.xmlupload.stash.graph_models import
|
|
13
|
-
from dsp_tools.commands.xmlupload.stash.graph_models import
|
|
14
|
-
from dsp_tools.
|
|
9
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
|
|
10
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
|
|
11
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
|
|
15
12
|
|
|
16
13
|
|
|
17
|
-
def
|
|
18
|
-
root: etree._Element,
|
|
19
|
-
) -> tuple[list[ResptrLink], list[XMLLink], list[str]]:
|
|
14
|
+
def generate_upload_order(info_for_graph: InfoForGraph) -> tuple[dict[str, list[str]], list[str]]:
|
|
20
15
|
"""
|
|
21
|
-
|
|
22
|
-
and add a reference UUID to each XML element that contains a link (<resptr> or <text>).
|
|
23
|
-
With this UUID, the link objects can be identified in the XML data file.
|
|
16
|
+
Generates the upload order from the Info for the graph
|
|
24
17
|
|
|
25
18
|
Args:
|
|
26
|
-
|
|
19
|
+
info_for_graph: Info for the graph
|
|
27
20
|
|
|
28
21
|
Returns:
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
- A list with all resource IDs used in the XML file.
|
|
22
|
+
- A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
|
|
23
|
+
- A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
|
|
32
24
|
"""
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
xml_links.extend(xml)
|
|
41
|
-
return resptr_links, xml_links, all_resource_ids
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _create_info_from_xml_for_graph_from_one_resource(
|
|
45
|
-
resource: etree._Element,
|
|
46
|
-
) -> tuple[list[ResptrLink], list[XMLLink]]:
|
|
47
|
-
resptr_links: list[ResptrLink] = []
|
|
48
|
-
xml_links: list[XMLLink] = []
|
|
49
|
-
for prop in resource.getchildren():
|
|
50
|
-
match prop.tag:
|
|
51
|
-
case "resptr-prop":
|
|
52
|
-
resptr_links.extend(_create_resptr_link_objects(resource.attrib["id"], prop))
|
|
53
|
-
case "text-prop":
|
|
54
|
-
xml_links.extend(_create_text_link_objects(resource.attrib["id"], prop))
|
|
55
|
-
case "hasComment" | "hasDescription":
|
|
56
|
-
if xml_link := _create_text_link_object_from_special_tags(resource.attrib["id"], prop):
|
|
57
|
-
xml_links.append(xml_link)
|
|
58
|
-
case "isSegmentOf" | "relatesTo":
|
|
59
|
-
if segment_link := _create_segmentOf_link_objects(resource.attrib["id"], prop):
|
|
60
|
-
resptr_links.append(segment_link)
|
|
61
|
-
return resptr_links, xml_links
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def _create_segmentOf_link_objects(subject_id: str, resptr: etree._Element) -> ResptrLink | None:
|
|
65
|
-
resptr.text = cast(str, resptr.text)
|
|
66
|
-
if is_resource_iri(resptr.text):
|
|
67
|
-
return None
|
|
68
|
-
link_object = ResptrLink(subject_id, resptr.text)
|
|
69
|
-
# this UUID is so that the links that were stashed can be identified in the XML data file
|
|
70
|
-
resptr.attrib["linkUUID"] = link_object.link_uuid
|
|
71
|
-
return link_object
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def _create_resptr_link_objects(subject_id: str, resptr_prop: etree._Element) -> list[ResptrLink]:
|
|
75
|
-
resptr_links = []
|
|
76
|
-
for resptr in resptr_prop.getchildren():
|
|
77
|
-
resptr.text = cast(str, resptr.text)
|
|
78
|
-
if not is_resource_iri(resptr.text):
|
|
79
|
-
link_object = ResptrLink(subject_id, resptr.text)
|
|
80
|
-
# this UUID is so that the links that were stashed can be identified in the XML data file
|
|
81
|
-
resptr.attrib["linkUUID"] = link_object.link_uuid
|
|
82
|
-
resptr_links.append(link_object)
|
|
83
|
-
return resptr_links
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def _create_text_link_objects(subject_id: str, text_prop: etree._Element) -> list[XMLLink]:
|
|
87
|
-
# if the same ID is in several separate <text> values of one <text-prop>, they are considered separate links
|
|
88
|
-
xml_props = []
|
|
89
|
-
for text in text_prop.getchildren():
|
|
90
|
-
if links := _extract_ids_from_one_text_value(text):
|
|
91
|
-
xml_link = XMLLink(subject_id, links)
|
|
92
|
-
xml_props.append(xml_link)
|
|
93
|
-
# this UUID is so that the links that were stashed can be identified in the XML data file
|
|
94
|
-
text.attrib["linkUUID"] = xml_link.link_uuid
|
|
95
|
-
return xml_props
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def _create_text_link_object_from_special_tags(subject_id: str, special_tag: etree._Element) -> XMLLink | None:
|
|
99
|
-
# This is for <hasDescription> and <hasComment> properties of <video-segment>s or <audio-segment>s
|
|
100
|
-
if not (links := _extract_ids_from_one_text_value(special_tag)):
|
|
101
|
-
return None
|
|
102
|
-
xml_link = XMLLink(subject_id, links)
|
|
103
|
-
# this UUID is so that the links that were stashed can be identified in the XML data file
|
|
104
|
-
special_tag.attrib["linkUUID"] = xml_link.link_uuid
|
|
105
|
-
return xml_link
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
def _extract_ids_from_one_text_value(text: etree._Element) -> set[str]:
|
|
109
|
-
# the same id in one <text> only means one link to the resource
|
|
110
|
-
all_links = set()
|
|
111
|
-
for ele in text.iterdescendants():
|
|
112
|
-
if href := ele.attrib.get("href"):
|
|
113
|
-
if internal_id := regex.search(r"IRI:(.*):IRI", href):
|
|
114
|
-
all_links.add(internal_id.group(1))
|
|
115
|
-
return all_links
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def make_graph(
|
|
119
|
-
resptr_links: list[ResptrLink],
|
|
120
|
-
xml_links: list[XMLLink],
|
|
121
|
-
all_resource_ids: list[str],
|
|
25
|
+
graph, node_to_id, edges = _make_graph(info_for_graph)
|
|
26
|
+
stash_lookup, upload_order, _ = _generate_upload_order_from_graph(graph, node_to_id, edges)
|
|
27
|
+
return stash_lookup, upload_order
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _make_graph(
|
|
31
|
+
info_for_graph: InfoForGraph,
|
|
122
32
|
) -> tuple[rx.PyDiGraph[Any, Any], dict[int, str], list[Edge]]:
|
|
123
33
|
"""
|
|
124
34
|
This function takes information about the resources of an XML file and links between them.
|
|
@@ -126,10 +36,7 @@ def make_graph(
|
|
|
126
36
|
Resources are represented as nodes and links as edges.
|
|
127
37
|
|
|
128
38
|
Args:
|
|
129
|
-
|
|
130
|
-
xml_links: objects representing one or more links from a single text value of a single starting resource
|
|
131
|
-
to a set of target resources
|
|
132
|
-
all_resource_ids: IDs of all resources in the graph
|
|
39
|
+
info_for_graph: Information required to construct the graph
|
|
133
40
|
|
|
134
41
|
Returns:
|
|
135
42
|
- The rustworkx graph.
|
|
@@ -137,17 +44,57 @@ def make_graph(
|
|
|
137
44
|
- A list with all the edges in the graph.
|
|
138
45
|
"""
|
|
139
46
|
graph: rx.PyDiGraph[Any, Any] = rx.PyDiGraph()
|
|
140
|
-
nodes = [(id_, None, None) for id_ in all_resource_ids]
|
|
47
|
+
nodes = [(id_, None, None) for id_ in info_for_graph.all_resource_ids]
|
|
141
48
|
node_indices = list(graph.add_nodes_from(nodes))
|
|
142
|
-
id_to_node = dict(zip(all_resource_ids, node_indices))
|
|
143
|
-
node_to_id = dict(zip(node_indices, all_resource_ids))
|
|
144
|
-
edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in
|
|
145
|
-
for xml in
|
|
49
|
+
id_to_node = dict(zip(info_for_graph.all_resource_ids, node_indices))
|
|
50
|
+
node_to_id = dict(zip(node_indices, info_for_graph.all_resource_ids))
|
|
51
|
+
edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in info_for_graph.link_values]
|
|
52
|
+
for xml in info_for_graph.standoff_links:
|
|
146
53
|
edges.extend([Edge(id_to_node[xml.source_id], id_to_node[x], xml) for x in xml.target_ids])
|
|
147
54
|
graph.add_edges_from([e.as_tuple() for e in edges])
|
|
148
55
|
return graph, node_to_id, edges
|
|
149
56
|
|
|
150
57
|
|
|
58
|
+
def _generate_upload_order_from_graph(
|
|
59
|
+
graph: rx.PyDiGraph[Any, Any],
|
|
60
|
+
node_to_id: dict[int, str],
|
|
61
|
+
edges: list[Edge],
|
|
62
|
+
) -> tuple[dict[str, list[str]], list[str], int]:
|
|
63
|
+
"""
|
|
64
|
+
Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
graph: graph
|
|
68
|
+
node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
|
|
69
|
+
edges: edges in the graph (contains info about source node, target node, and link info)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
- A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
|
|
73
|
+
- A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
|
|
74
|
+
- The number of links in the stash.
|
|
75
|
+
"""
|
|
76
|
+
upload_order: list[str] = []
|
|
77
|
+
stash_lookup: dict[str, list[str]] = {}
|
|
78
|
+
node_indices = set(node_to_id.keys())
|
|
79
|
+
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
|
|
80
|
+
upload_order.extend(leaf_nodes)
|
|
81
|
+
stash_counter = 0
|
|
82
|
+
while remaining_node_indices:
|
|
83
|
+
cycle = list(rx.digraph_find_cycle(graph))
|
|
84
|
+
links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
|
|
85
|
+
stash_counter += len(links_to_remove)
|
|
86
|
+
_remove_edges_to_stash(
|
|
87
|
+
graph=graph,
|
|
88
|
+
edges_to_remove=links_to_remove,
|
|
89
|
+
all_edges=edges,
|
|
90
|
+
remaining_nodes=remaining_node_indices,
|
|
91
|
+
)
|
|
92
|
+
stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
|
|
93
|
+
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
|
|
94
|
+
upload_order.extend(leaf_nodes)
|
|
95
|
+
return stash_lookup, upload_order, stash_counter
|
|
96
|
+
|
|
97
|
+
|
|
151
98
|
def _remove_leaf_nodes(
|
|
152
99
|
graph: rx.PyDiGraph[Any, Any],
|
|
153
100
|
node_to_id: dict[int, str],
|
|
@@ -227,7 +174,7 @@ def _remove_edges_to_stash(
|
|
|
227
174
|
phantom_edges_to_remove = []
|
|
228
175
|
source, target = edges_to_remove[0].source, edges_to_remove[0].target
|
|
229
176
|
for link_to_stash in [x.link_object for x in edges_to_remove]:
|
|
230
|
-
if isinstance(link_to_stash,
|
|
177
|
+
if isinstance(link_to_stash, StandOffLink):
|
|
231
178
|
phantom_edges_to_remove.extend(
|
|
232
179
|
_find_phantom_xml_edges(source, target, all_edges, link_to_stash, remaining_nodes)
|
|
233
180
|
)
|
|
@@ -240,7 +187,7 @@ def _find_phantom_xml_edges(
|
|
|
240
187
|
source_node_index: int,
|
|
241
188
|
target_node_index: int,
|
|
242
189
|
all_edges: list[Edge],
|
|
243
|
-
xml_link_to_stash:
|
|
190
|
+
xml_link_to_stash: StandOffLink,
|
|
244
191
|
remaining_nodes: set[int],
|
|
245
192
|
) -> list[tuple[int, int]]:
|
|
246
193
|
"""
|
|
@@ -277,7 +224,7 @@ def _find_phantom_xml_edges(
|
|
|
277
224
|
|
|
278
225
|
def _add_stash_to_lookup_dict(
|
|
279
226
|
stash_dict: dict[str, list[str]],
|
|
280
|
-
links_to_stash: list[
|
|
227
|
+
links_to_stash: list[StandOffLink | LinkValueLink],
|
|
281
228
|
) -> dict[str, list[str]]:
|
|
282
229
|
stash_list = [stash_link.link_uuid for stash_link in links_to_stash]
|
|
283
230
|
# all stashed links have the same subject id, so we can just take the first one
|
|
@@ -287,43 +234,3 @@ def _add_stash_to_lookup_dict(
|
|
|
287
234
|
else:
|
|
288
235
|
stash_dict[subj_id] = stash_list
|
|
289
236
|
return stash_dict
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
def generate_upload_order(
|
|
293
|
-
graph: rx.PyDiGraph[Any, Any],
|
|
294
|
-
node_to_id: dict[int, str],
|
|
295
|
-
edges: list[Edge],
|
|
296
|
-
) -> tuple[dict[str, list[str]], list[str], int]:
|
|
297
|
-
"""
|
|
298
|
-
Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
|
|
299
|
-
|
|
300
|
-
Args:
|
|
301
|
-
graph: graph
|
|
302
|
-
node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
|
|
303
|
-
edges: edges in the graph (contains info about source node, target node, and link info)
|
|
304
|
-
|
|
305
|
-
Returns:
|
|
306
|
-
- A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
|
|
307
|
-
- A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
|
|
308
|
-
- The number of links in the stash.
|
|
309
|
-
"""
|
|
310
|
-
upload_order: list[str] = []
|
|
311
|
-
stash_lookup: dict[str, list[str]] = {}
|
|
312
|
-
node_indices = set(node_to_id.keys())
|
|
313
|
-
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
|
|
314
|
-
upload_order.extend(leaf_nodes)
|
|
315
|
-
stash_counter = 0
|
|
316
|
-
while remaining_node_indices:
|
|
317
|
-
cycle = list(rx.digraph_find_cycle(graph))
|
|
318
|
-
links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
|
|
319
|
-
stash_counter += len(links_to_remove)
|
|
320
|
-
_remove_edges_to_stash(
|
|
321
|
-
graph=graph,
|
|
322
|
-
edges_to_remove=links_to_remove,
|
|
323
|
-
all_edges=edges,
|
|
324
|
-
remaining_nodes=remaining_node_indices,
|
|
325
|
-
)
|
|
326
|
-
stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
|
|
327
|
-
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
|
|
328
|
-
upload_order.extend(leaf_nodes)
|
|
329
|
-
return stash_lookup, upload_order, stash_counter
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
|
|
2
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
|
|
3
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
|
|
4
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
|
|
5
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
|
|
6
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
|
|
7
|
+
from dsp_tools.utils.data_formats.iri_util import is_resource_iri
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_info_for_graph_from_processed_resources(resources: list[ProcessedResource]) -> InfoForGraph:
|
|
11
|
+
"""Extracts information to create the graph to analyse the circular references."""
|
|
12
|
+
all_links = []
|
|
13
|
+
all_stand_off = []
|
|
14
|
+
all_resource_ids = []
|
|
15
|
+
for res in resources:
|
|
16
|
+
links, stand_off = _process_one_resource(res)
|
|
17
|
+
all_links.extend(links)
|
|
18
|
+
all_stand_off.extend(stand_off)
|
|
19
|
+
all_resource_ids.append(res.res_id)
|
|
20
|
+
return InfoForGraph(
|
|
21
|
+
all_resource_ids=all_resource_ids,
|
|
22
|
+
link_values=all_links,
|
|
23
|
+
standoff_links=all_stand_off,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _process_one_resource(resource: ProcessedResource) -> tuple[list[LinkValueLink], list[StandOffLink]]:
|
|
28
|
+
link_values = []
|
|
29
|
+
stand_off = []
|
|
30
|
+
for val in resource.values:
|
|
31
|
+
if isinstance(val, ProcessedLink):
|
|
32
|
+
if is_resource_iri(val.value):
|
|
33
|
+
continue
|
|
34
|
+
link_values.append(
|
|
35
|
+
LinkValueLink(
|
|
36
|
+
source_id=resource.res_id,
|
|
37
|
+
target_id=val.value,
|
|
38
|
+
link_uuid=val.value_uuid,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
elif isinstance(val, ProcessedRichtext):
|
|
42
|
+
if val.resource_references:
|
|
43
|
+
only_ids = {x for x in val.resource_references if not is_resource_iri(x)}
|
|
44
|
+
if not only_ids:
|
|
45
|
+
continue
|
|
46
|
+
stand_off.append(
|
|
47
|
+
StandOffLink(
|
|
48
|
+
source_id=resource.res_id,
|
|
49
|
+
target_ids=only_ids,
|
|
50
|
+
link_uuid=val.value_uuid,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
return link_values, stand_off
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import uuid
|
|
4
3
|
from dataclasses import dataclass
|
|
5
|
-
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class InfoForGraph:
|
|
8
|
+
all_resource_ids: list[str]
|
|
9
|
+
link_values: list[LinkValueLink]
|
|
10
|
+
standoff_links: list[StandOffLink]
|
|
6
11
|
|
|
7
12
|
|
|
8
13
|
@dataclass(frozen=True)
|
|
9
|
-
class
|
|
14
|
+
class LinkValueLink:
|
|
10
15
|
"""
|
|
11
16
|
This class represents a direct link (resptr) between a starting resource and a target resource.
|
|
12
17
|
|
|
@@ -18,7 +23,7 @@ class ResptrLink:
|
|
|
18
23
|
|
|
19
24
|
source_id: str
|
|
20
25
|
target_id: str
|
|
21
|
-
link_uuid: str
|
|
26
|
+
link_uuid: str
|
|
22
27
|
|
|
23
28
|
@property
|
|
24
29
|
def cost_links(self) -> float:
|
|
@@ -27,7 +32,7 @@ class ResptrLink:
|
|
|
27
32
|
|
|
28
33
|
|
|
29
34
|
@dataclass(frozen=True)
|
|
30
|
-
class
|
|
35
|
+
class StandOffLink:
|
|
31
36
|
"""
|
|
32
37
|
This class represents one or more links from a single starting resource to a set of target resources,
|
|
33
38
|
where all target resources are linked to from a single text value of the starting resource.
|
|
@@ -40,7 +45,7 @@ class XMLLink:
|
|
|
40
45
|
|
|
41
46
|
source_id: str
|
|
42
47
|
target_ids: set[str]
|
|
43
|
-
link_uuid: str
|
|
48
|
+
link_uuid: str
|
|
44
49
|
|
|
45
50
|
@property
|
|
46
51
|
def cost_links(self) -> float:
|
|
@@ -61,9 +66,9 @@ class Edge:
|
|
|
61
66
|
|
|
62
67
|
source: int
|
|
63
68
|
target: int
|
|
64
|
-
link_object:
|
|
69
|
+
link_object: LinkValueLink | StandOffLink
|
|
65
70
|
|
|
66
|
-
def as_tuple(self) -> tuple[int, int,
|
|
71
|
+
def as_tuple(self) -> tuple[int, int, LinkValueLink | StandOffLink]:
|
|
67
72
|
"""Returns a representation of this edge as a tuple of the source index, target index and link object"""
|
|
68
73
|
return self.source, self.target, self.link_object
|
|
69
74
|
|
|
@@ -1,17 +1,9 @@
|
|
|
1
|
-
from
|
|
1
|
+
from copy import deepcopy
|
|
2
2
|
|
|
3
|
-
from typing import cast
|
|
4
|
-
from uuid import uuid4
|
|
5
|
-
|
|
6
|
-
from lxml import etree
|
|
7
|
-
|
|
8
|
-
from dsp_tools.commands.xmlupload.models.deserialise.deserialise_value import XMLProperty
|
|
9
|
-
from dsp_tools.commands.xmlupload.models.deserialise.xmlresource import XMLResource
|
|
10
3
|
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
|
|
11
|
-
from dsp_tools.commands.xmlupload.models.
|
|
12
|
-
from dsp_tools.commands.xmlupload.
|
|
13
|
-
from dsp_tools.commands.xmlupload.
|
|
14
|
-
from dsp_tools.commands.xmlupload.stash.construct_and_analyze_graph import make_graph
|
|
4
|
+
from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
|
|
5
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
|
|
6
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
|
|
15
7
|
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash
|
|
16
8
|
from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem
|
|
17
9
|
from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash
|
|
@@ -19,117 +11,58 @@ from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem
|
|
|
19
11
|
from dsp_tools.commands.xmlupload.stash.stash_models import Stash
|
|
20
12
|
|
|
21
13
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
restype: str,
|
|
25
|
-
link_prop: XMLProperty,
|
|
26
|
-
stash_lookup: dict[str, list[str]],
|
|
27
|
-
) -> list[StandoffStashItem]:
|
|
28
|
-
stashed_items = []
|
|
29
|
-
for value in link_prop.values:
|
|
30
|
-
if value.link_uuid not in stash_lookup[res_id]:
|
|
31
|
-
continue
|
|
32
|
-
# value.value is a KnoraStandoffXml text with problematic links.
|
|
33
|
-
# stash it, then replace the problematic text with a UUID
|
|
34
|
-
standoff_xml = cast(FormattedTextValue, value.value)
|
|
35
|
-
uuid = str(uuid4())
|
|
36
|
-
standoff_stash_item = StandoffStashItem(
|
|
37
|
-
res_id=res_id,
|
|
38
|
-
res_type=restype,
|
|
39
|
-
uuid=uuid,
|
|
40
|
-
prop_name=link_prop.name,
|
|
41
|
-
value=standoff_xml,
|
|
42
|
-
)
|
|
43
|
-
value.value = FormattedTextValue(uuid)
|
|
44
|
-
stashed_items.append(standoff_stash_item)
|
|
45
|
-
return stashed_items
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def _stash_resptr(
|
|
49
|
-
res_id: str,
|
|
50
|
-
restype: str,
|
|
51
|
-
link_prop: XMLProperty,
|
|
52
|
-
stash_lookup: dict[str, list[str]],
|
|
53
|
-
permission_lookup: dict[str, Permissions],
|
|
54
|
-
) -> list[LinkValueStashItem]:
|
|
55
|
-
stashed_items = []
|
|
56
|
-
for value in link_prop.values.copy():
|
|
57
|
-
if value.link_uuid not in stash_lookup[res_id]:
|
|
58
|
-
continue
|
|
59
|
-
permission = str(permission_lookup[value.permissions]) if value.permissions else None
|
|
60
|
-
# value.value is the ID of the target resource. stash it, then delete it
|
|
61
|
-
link_stash_item = LinkValueStashItem(
|
|
62
|
-
res_id=res_id,
|
|
63
|
-
res_type=restype,
|
|
64
|
-
prop_name=link_prop.name,
|
|
65
|
-
target_id=str(value.value),
|
|
66
|
-
permission=permission,
|
|
67
|
-
)
|
|
68
|
-
link_prop.values.remove(value)
|
|
69
|
-
stashed_items.append(link_stash_item)
|
|
70
|
-
return stashed_items
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def stash_circular_references(
|
|
74
|
-
resources: list[XMLResource],
|
|
75
|
-
stash_lookup: dict[str, list[str]],
|
|
76
|
-
permission_lookup: dict[str, Permissions],
|
|
77
|
-
) -> Stash | None:
|
|
78
|
-
"""
|
|
79
|
-
Stashes problematic resource-references from a list of resources.
|
|
80
|
-
The resources are modified in-place.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
resources: all resources of the XML file
|
|
84
|
-
stash_lookup: A dictionary which maps the resources that have stashes to the UUIDs of the stashed links
|
|
85
|
-
permission_lookup: A dictionary which maps the permissions of the stashed links to their string representation
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
stash: an object that contains the stashed references
|
|
89
|
-
|
|
90
|
-
Raises:
|
|
91
|
-
ValueError: If a link property of one of the resources is not "text" or "resptr"
|
|
92
|
-
"""
|
|
93
|
-
stashed_standoff_values: list[StandoffStashItem] = []
|
|
14
|
+
def stash_circular_references(resources: list[ProcessedResource], stash_lookup: dict[str, list[str]]) -> Stash | None:
|
|
15
|
+
"""Stash the values that would create circular references and remove them from the Resources."""
|
|
94
16
|
stashed_link_values: list[LinkValueStashItem] = []
|
|
17
|
+
stashed_standoff_values: list[StandoffStashItem] = []
|
|
18
|
+
|
|
19
|
+
if not stash_lookup:
|
|
20
|
+
return None
|
|
95
21
|
|
|
96
22
|
for res in resources:
|
|
97
23
|
if res.res_id not in stash_lookup:
|
|
98
24
|
continue
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
stashed_standoff_values.extend(standoff_stash_item)
|
|
103
|
-
elif link_prop.valtype == "resptr":
|
|
104
|
-
link_stash_item = _stash_resptr(res.res_id, res.restype, link_prop, stash_lookup, permission_lookup)
|
|
105
|
-
stashed_link_values.extend(link_stash_item)
|
|
106
|
-
else:
|
|
107
|
-
raise ValueError(f"Unknown value type: '{link_prop.valtype}' (should be 'text' or 'resptr')")
|
|
108
|
-
|
|
109
|
-
if len(link_prop.values) == 0:
|
|
110
|
-
# if all values of a link property have been stashed, the property needs to be removed
|
|
111
|
-
res.properties.remove(link_prop)
|
|
25
|
+
links, standoff = _process_one_resource(res, stash_lookup)
|
|
26
|
+
stashed_link_values.extend(links)
|
|
27
|
+
stashed_standoff_values.extend(standoff)
|
|
112
28
|
|
|
113
29
|
standoff_stash = StandoffStash.make(stashed_standoff_values)
|
|
114
30
|
link_value_stash = LinkValueStash.make(stashed_link_values)
|
|
115
|
-
return Stash.make(standoff_stash, link_value_stash)
|
|
116
|
-
|
|
31
|
+
return Stash.make(standoff_stash=standoff_stash, link_value_stash=link_value_stash)
|
|
117
32
|
|
|
118
|
-
def identify_circular_references(root: etree._Element) -> tuple[dict[str, list[str]], list[str]]:
|
|
119
|
-
"""
|
|
120
|
-
Identifies problematic resource-references inside an XML tree.
|
|
121
|
-
A reference is problematic if it creates a circle (circular references).
|
|
122
|
-
The XML tree is modified in-place:
|
|
123
|
-
A reference UUID is added to each XML element that contains a link (`<resptr>` or `<text>`).
|
|
124
33
|
|
|
125
|
-
|
|
126
|
-
|
|
34
|
+
def _process_one_resource(
|
|
35
|
+
resource: ProcessedResource,
|
|
36
|
+
stash_lookup: dict[str, list[str]],
|
|
37
|
+
) -> tuple[list[LinkValueStashItem], list[StandoffStashItem]]:
|
|
38
|
+
stashed_link_values: list[LinkValueStashItem] = []
|
|
39
|
+
stashed_standoff_values: list[StandoffStashItem] = []
|
|
127
40
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
41
|
+
for val in resource.values.copy():
|
|
42
|
+
if isinstance(val, ProcessedLink):
|
|
43
|
+
if val.value_uuid not in stash_lookup[resource.res_id]:
|
|
44
|
+
continue
|
|
45
|
+
stashed_link_values.append(LinkValueStashItem(resource.res_id, resource.type_iri, val))
|
|
46
|
+
resource.values.remove(val)
|
|
47
|
+
elif isinstance(val, ProcessedRichtext):
|
|
48
|
+
if val.value_uuid not in stash_lookup[resource.res_id]:
|
|
49
|
+
continue
|
|
50
|
+
# val.value is a KnoraStandoffXml text with problematic links.
|
|
51
|
+
# stash it, then replace the problematic text with a UUID
|
|
52
|
+
stashed_standoff_values.append(_stash_standoff(val, resource.res_id, resource.type_iri))
|
|
53
|
+
|
|
54
|
+
return stashed_link_values, stashed_standoff_values
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _stash_standoff(value: ProcessedRichtext, res_id: str, res_type: str) -> StandoffStashItem:
|
|
58
|
+
original_value = deepcopy(value)
|
|
59
|
+
# Replace the content with the UUID
|
|
60
|
+
value.value = FormattedTextValue(value.value_uuid)
|
|
61
|
+
# It is not necessary to add the permissions to the StandoffStashItem.
|
|
62
|
+
# Because when no new permissions are given during an update request,
|
|
63
|
+
# the permissions of the previous value are taken.
|
|
64
|
+
return StandoffStashItem(
|
|
65
|
+
res_id=res_id,
|
|
66
|
+
res_type=res_type,
|
|
67
|
+
value=original_value,
|
|
68
|
+
)
|
|
@@ -3,18 +3,15 @@ from __future__ import annotations
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from itertools import groupby
|
|
5
5
|
|
|
6
|
-
from dsp_tools.commands.xmlupload.models.
|
|
6
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
|
|
7
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
@dataclass(frozen=True)
|
|
10
11
|
class StandoffStashItem:
|
|
11
|
-
"""Holds information about a single stashed XML text value."""
|
|
12
|
-
|
|
13
12
|
res_id: str
|
|
14
13
|
res_type: str
|
|
15
|
-
|
|
16
|
-
prop_name: str
|
|
17
|
-
value: FormattedTextValue
|
|
14
|
+
value: ProcessedRichtext
|
|
18
15
|
|
|
19
16
|
|
|
20
17
|
@dataclass(frozen=True)
|
|
@@ -51,9 +48,7 @@ class LinkValueStashItem:
|
|
|
51
48
|
|
|
52
49
|
res_id: str
|
|
53
50
|
res_type: str
|
|
54
|
-
|
|
55
|
-
target_id: str
|
|
56
|
-
permission: str | None = None
|
|
51
|
+
value: ProcessedLink
|
|
57
52
|
|
|
58
53
|
|
|
59
54
|
@dataclass(frozen=True)
|