dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +5 -0
- dsp_tools/cli/args.py +47 -0
- dsp_tools/cli/call_action.py +85 -0
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +479 -0
- dsp_tools/cli/entry_point.py +322 -0
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/clients/connection.py +35 -0
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +321 -0
- dsp_tools/commands/excel2json/lists/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
- dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
- dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
- dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
- dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
- dsp_tools/commands/excel2json/lists/utils.py +81 -0
- dsp_tools/commands/excel2json/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/models/input_error.py +416 -0
- dsp_tools/commands/excel2json/models/json_header.py +175 -0
- dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
- dsp_tools/commands/excel2json/models/ontology.py +76 -0
- dsp_tools/commands/excel2json/old_lists.py +328 -0
- dsp_tools/commands/excel2json/project.py +280 -0
- dsp_tools/commands/excel2json/properties.py +370 -0
- dsp_tools/commands/excel2json/resources.py +336 -0
- dsp_tools/commands/excel2json/utils.py +352 -0
- dsp_tools/commands/excel2xml/__init__.py +7 -0
- dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
- dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
- dsp_tools/commands/excel2xml/propertyelement.py +47 -0
- dsp_tools/commands/get/__init__.py +0 -0
- dsp_tools/commands/get/get.py +166 -0
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/get/legacy_models/__init__.py +0 -0
- dsp_tools/commands/get/legacy_models/context.py +318 -0
- dsp_tools/commands/get/legacy_models/group.py +241 -0
- dsp_tools/commands/get/legacy_models/helpers.py +47 -0
- dsp_tools/commands/get/legacy_models/listnode.py +390 -0
- dsp_tools/commands/get/legacy_models/model.py +12 -0
- dsp_tools/commands/get/legacy_models/ontology.py +324 -0
- dsp_tools/commands/get/legacy_models/project.py +366 -0
- dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
- dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
- dsp_tools/commands/get/legacy_models/user.py +438 -0
- dsp_tools/commands/get/models/__init__.py +0 -0
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +258 -0
- dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
- dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
- dsp_tools/commands/start_stack.py +428 -0
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/__init__.py +0 -0
- dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
- dsp_tools/commands/xmlupload/models/ingest.py +143 -0
- dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +45 -0
- dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
- dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
- dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
- dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
- dsp_tools/commands/xmlupload/upload_config.py +76 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
- dsp_tools/commands/xmlupload/xmlupload.py +516 -0
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/config/warnings_config.py +32 -0
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/legacy_models/datetimestamp.py +81 -0
- dsp_tools/legacy_models/langstring.py +253 -0
- dsp_tools/legacy_models/projectContext.py +49 -0
- dsp_tools/py.typed +0 -0
- dsp_tools/resources/schema/data.xsd +648 -0
- dsp_tools/resources/schema/lists-only.json +72 -0
- dsp_tools/resources/schema/project.json +1258 -0
- dsp_tools/resources/schema/properties-only.json +874 -0
- dsp_tools/resources/schema/resources-only.json +140 -0
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +88 -0
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/__init__.py +0 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/data_formats/date_util.py +166 -0
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/data_formats/shared.py +81 -0
- dsp_tools/utils/data_formats/uri_util.py +76 -0
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/__init__.py +0 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +348 -0
- dsp_tools/xmllib/value_checkers.py +434 -0
- dsp_tools/xmllib/value_converters.py +777 -0
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
- dsp_tools-0.9.13.dist-info/LICENSE +0 -674
- dsp_tools-0.9.13.dist-info/METADATA +0 -144
- dsp_tools-0.9.13.dist-info/RECORD +0 -71
- dsp_tools-0.9.13.dist-info/WHEEL +0 -5
- dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
- dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
- dsplib/models/connection.py +0 -272
- dsplib/models/group.py +0 -296
- dsplib/models/helpers.py +0 -505
- dsplib/models/langstring.py +0 -277
- dsplib/models/listnode.py +0 -578
- dsplib/models/model.py +0 -20
- dsplib/models/ontology.py +0 -448
- dsplib/models/permission.py +0 -112
- dsplib/models/project.py +0 -547
- dsplib/models/propertyclass.py +0 -505
- dsplib/models/resource.py +0 -366
- dsplib/models/resourceclass.py +0 -810
- dsplib/models/sipi.py +0 -30
- dsplib/models/user.py +0 -731
- dsplib/models/value.py +0 -1000
- dsplib/utils/knora-data-schema.xsd +0 -454
- dsplib/utils/knora-schema-lists.json +0 -83
- dsplib/utils/knora-schema.json +0 -434
- dsplib/utils/onto_commons.py +0 -24
- dsplib/utils/onto_create_lists.py +0 -73
- dsplib/utils/onto_create_ontology.py +0 -442
- dsplib/utils/onto_get.py +0 -58
- dsplib/utils/onto_validate.py +0 -33
- dsplib/utils/xml_upload.py +0 -539
- dsplib/widgets/doublepassword.py +0 -80
- knora/MLS-import-libraries.py +0 -84
- knora/dsp_tools.py +0 -96
- knora/dsplib/models/connection.py +0 -272
- knora/dsplib/models/group.py +0 -296
- knora/dsplib/models/helpers.py +0 -506
- knora/dsplib/models/langstring.py +0 -277
- knora/dsplib/models/listnode.py +0 -578
- knora/dsplib/models/model.py +0 -20
- knora/dsplib/models/ontology.py +0 -448
- knora/dsplib/models/permission.py +0 -112
- knora/dsplib/models/project.py +0 -583
- knora/dsplib/models/propertyclass.py +0 -505
- knora/dsplib/models/resource.py +0 -416
- knora/dsplib/models/resourceclass.py +0 -811
- knora/dsplib/models/sipi.py +0 -35
- knora/dsplib/models/user.py +0 -731
- knora/dsplib/models/value.py +0 -1000
- knora/dsplib/utils/knora-data-schema.xsd +0 -464
- knora/dsplib/utils/knora-schema-lists.json +0 -83
- knora/dsplib/utils/knora-schema.json +0 -444
- knora/dsplib/utils/onto_commons.py +0 -24
- knora/dsplib/utils/onto_create_lists.py +0 -73
- knora/dsplib/utils/onto_create_ontology.py +0 -451
- knora/dsplib/utils/onto_get.py +0 -58
- knora/dsplib/utils/onto_validate.py +0 -33
- knora/dsplib/utils/xml_upload.py +0 -540
- knora/dsplib/widgets/doublepassword.py +0 -80
- knora/knora.py +0 -2108
- knora/test.py +0 -99
- knora/testit.py +0 -76
- knora/xml2knora.py +0 -633
- {dsplib → dsp_tools/cli}/__init__.py +0 -0
- {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
- {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
- {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
- {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
- {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
- {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
- {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
- {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
from lxml import etree
|
|
5
|
+
|
|
6
|
+
from dsp_tools.clients.connection import Connection
|
|
7
|
+
from dsp_tools.commands.xmlupload.models.lookup_models import XmlReferenceLookups
|
|
8
|
+
from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
|
|
9
|
+
from dsp_tools.commands.xmlupload.models.upload_clients import UploadClients
|
|
10
|
+
from dsp_tools.commands.xmlupload.stash.analyse_circular_reference_graph import generate_upload_order
|
|
11
|
+
from dsp_tools.commands.xmlupload.stash.create_info_for_graph import create_info_for_graph_from_processed_resources
|
|
12
|
+
from dsp_tools.commands.xmlupload.stash.stash_circular_references import stash_circular_references
|
|
13
|
+
from dsp_tools.commands.xmlupload.stash.stash_models import Stash
|
|
14
|
+
from dsp_tools.error.exceptions import BaseError
|
|
15
|
+
from dsp_tools.error.exceptions import InputError
|
|
16
|
+
from dsp_tools.legacy_models.projectContext import ProjectContext
|
|
17
|
+
from dsp_tools.utils.xml_parsing.get_lookups import get_authorship_lookup
|
|
18
|
+
from dsp_tools.utils.xml_parsing.get_lookups import get_permissions_lookup
|
|
19
|
+
from dsp_tools.utils.xml_parsing.get_parsed_resources import get_parsed_resources
|
|
20
|
+
from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
|
|
21
|
+
|
|
22
|
+
LIST_SEPARATOR = "\n- "
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_parsed_resources_and_mappers(
|
|
26
|
+
root: etree._Element, clients: UploadClients
|
|
27
|
+
) -> tuple[list[ParsedResource], XmlReferenceLookups]:
|
|
28
|
+
logger.debug("Get ParsedResource and XML-Lookups from root")
|
|
29
|
+
print("Parsing XML file for upload.")
|
|
30
|
+
parsed_resources = get_parsed_resources(root, clients.legal_info_client.server)
|
|
31
|
+
processed_lookups = _get_xml_reference_lookups(root=root, clients=clients)
|
|
32
|
+
return parsed_resources, processed_lookups
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _get_xml_reference_lookups(root: etree._Element, clients: UploadClients) -> XmlReferenceLookups:
|
|
36
|
+
proj_context = _get_project_context_from_server(
|
|
37
|
+
connection=clients.list_client.con, shortcode=root.attrib["shortcode"]
|
|
38
|
+
)
|
|
39
|
+
permissions_lookup = get_permissions_lookup(root, proj_context)
|
|
40
|
+
authorship_lookup = get_authorship_lookup(root)
|
|
41
|
+
listnode_lookup = clients.list_client.get_list_node_id_to_iri_lookup()
|
|
42
|
+
return XmlReferenceLookups(
|
|
43
|
+
permissions=permissions_lookup,
|
|
44
|
+
listnodes=listnode_lookup,
|
|
45
|
+
authorships=authorship_lookup,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _get_project_context_from_server(connection: Connection, shortcode: str) -> ProjectContext:
|
|
50
|
+
try:
|
|
51
|
+
proj_context = ProjectContext(con=connection, shortcode=shortcode)
|
|
52
|
+
except BaseError:
|
|
53
|
+
logger.exception("Unable to retrieve project context from DSP server")
|
|
54
|
+
raise InputError("Unable to retrieve project context from DSP server") from None
|
|
55
|
+
return proj_context
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_stash_and_upload_order(
|
|
59
|
+
resources: list[ProcessedResource],
|
|
60
|
+
) -> tuple[list[ProcessedResource], Stash | None]:
|
|
61
|
+
logger.debug("Get stash and upload order")
|
|
62
|
+
info_for_graph = create_info_for_graph_from_processed_resources(resources)
|
|
63
|
+
stash_lookup, upload_order = generate_upload_order(info_for_graph)
|
|
64
|
+
sorting_lookup = {res.res_id: res for res in resources}
|
|
65
|
+
sorted_resources = [sorting_lookup[res_id] for res_id in upload_order]
|
|
66
|
+
stash = stash_circular_references(sorted_resources, stash_lookup)
|
|
67
|
+
return sorted_resources, stash
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from lxml import etree
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from dsp_tools.commands.xmlupload.models.input_problems import AllIIIFUriProblems
|
|
11
|
+
from dsp_tools.commands.xmlupload.prepare_xml_input.iiif_uri_validator import IIIFUriValidator
|
|
12
|
+
from dsp_tools.error.custom_warnings import DspToolsUserWarning
|
|
13
|
+
from dsp_tools.error.exceptions import InputError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def validate_iiif_uris(root: etree._Element) -> None:
|
|
17
|
+
uris = [uri.strip() for node in root.iter(tag="iiif-uri") if (uri := node.text)]
|
|
18
|
+
if (num := len(uris)) > 1001:
|
|
19
|
+
warnings.warn(
|
|
20
|
+
DspToolsUserWarning(
|
|
21
|
+
f"Your data contains {num} IIIF-URIs. "
|
|
22
|
+
f"Each validation makes a server call. "
|
|
23
|
+
f"Due to the large number, the validation of the IIIF-URIs has to be skipped."
|
|
24
|
+
)
|
|
25
|
+
)
|
|
26
|
+
return
|
|
27
|
+
progress_bar = tqdm(uris, desc="Checking IIIF-URIs", dynamic_ncols=True)
|
|
28
|
+
validator = IIIFUriValidator()
|
|
29
|
+
problems = []
|
|
30
|
+
for uri in progress_bar:
|
|
31
|
+
if result := validator.validate_one_uri(uri):
|
|
32
|
+
problems.append(result)
|
|
33
|
+
if problems:
|
|
34
|
+
msg = AllIIIFUriProblems(problems).get_msg()
|
|
35
|
+
warnings.warn(DspToolsUserWarning(msg))
|
|
36
|
+
logger.warning(msg)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def check_if_bitstreams_exist(root: etree._Element, imgdir: str) -> None:
|
|
40
|
+
"""
|
|
41
|
+
Make sure that all bitstreams referenced in the XML file exist in the imgdir.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
root: parsed XML file
|
|
45
|
+
imgdir: folder where the bitstreams are stored
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
InputError: if a bitstream does not exist in the imgdir
|
|
49
|
+
"""
|
|
50
|
+
logger.debug("Checking if filepaths exist.")
|
|
51
|
+
multimedia_resources = [x for x in root if any(y.tag == "bitstream" for y in x.iter())]
|
|
52
|
+
progress_bar = tqdm(multimedia_resources, desc="Checking multimedia filepaths", dynamic_ncols=True)
|
|
53
|
+
for res in progress_bar:
|
|
54
|
+
pth = next(Path(x.text.strip()) for x in res.iter() if x.tag == "bitstream" and x.text)
|
|
55
|
+
if not Path(imgdir / pth).is_file():
|
|
56
|
+
raise InputError(
|
|
57
|
+
f"Bitstream '{pth!s}' of resource '{res.attrib['label']}' does not exist in the imgdir '{imgdir}'."
|
|
58
|
+
)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from json.decoder import JSONDecodeError
|
|
7
|
+
from typing import Union
|
|
8
|
+
from typing import assert_never
|
|
9
|
+
from typing import cast
|
|
10
|
+
|
|
11
|
+
from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
|
|
12
|
+
from dsp_tools.commands.xmlupload.models.processed.values import IntervalFloats
|
|
13
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedValue
|
|
14
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedValueTypes
|
|
15
|
+
from dsp_tools.error.exceptions import XmlInputConversionError
|
|
16
|
+
from dsp_tools.utils.data_formats.date_util import Date
|
|
17
|
+
from dsp_tools.utils.data_formats.date_util import parse_date_string
|
|
18
|
+
|
|
19
|
+
type InputTypes = Union[str, FormattedTextValue, tuple[str | None, str | None] | None]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TypeTransformerMapper:
|
|
24
|
+
val_type: type[ProcessedValue]
|
|
25
|
+
val_transformer: Callable[[InputTypes], ProcessedValueTypes]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def assert_is_string(value: InputTypes) -> str:
|
|
29
|
+
"""Assert a value is a string."""
|
|
30
|
+
match value:
|
|
31
|
+
case str() as s:
|
|
32
|
+
return s
|
|
33
|
+
case FormattedTextValue() as xml:
|
|
34
|
+
raise XmlInputConversionError(f"Expected string value, but got XML value: {xml.xmlstr}")
|
|
35
|
+
case tuple():
|
|
36
|
+
raise XmlInputConversionError(f"Expected string value, but got tuple value: {value}")
|
|
37
|
+
case None:
|
|
38
|
+
raise XmlInputConversionError("Expected string value, but got None")
|
|
39
|
+
case _:
|
|
40
|
+
assert_never(value)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def assert_is_tuple(value: InputTypes) -> tuple[str, str]:
|
|
44
|
+
"""Assert a value is a tuple."""
|
|
45
|
+
match value:
|
|
46
|
+
case tuple() as t:
|
|
47
|
+
if len(t) == 2 and isinstance(t[0], str) and isinstance(t[1], str):
|
|
48
|
+
return cast(tuple[str, str], t)
|
|
49
|
+
raise XmlInputConversionError(f"Expected tuple with two elements but got {value}")
|
|
50
|
+
case FormattedTextValue() as xml:
|
|
51
|
+
raise XmlInputConversionError(f"Expected tuple value, but got XML value: {xml.xmlstr}")
|
|
52
|
+
case str():
|
|
53
|
+
raise XmlInputConversionError(f"Expected tuple value, but got string value: {value}")
|
|
54
|
+
case None:
|
|
55
|
+
raise XmlInputConversionError("Expected tuple value, but got None")
|
|
56
|
+
case _:
|
|
57
|
+
assert_never(value)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def transform_boolean(value: InputTypes) -> bool:
|
|
61
|
+
"""Transform the value into a boolean"""
|
|
62
|
+
match value:
|
|
63
|
+
case "True" | "true" | "1" | 1 | True:
|
|
64
|
+
return True
|
|
65
|
+
case "False" | "false" | "0" | 0 | False:
|
|
66
|
+
return False
|
|
67
|
+
case _:
|
|
68
|
+
raise XmlInputConversionError(f"Could not parse boolean value: {value}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def transform_date(input_value: InputTypes) -> Date:
|
|
72
|
+
"""Transform a date string into a date object."""
|
|
73
|
+
val = assert_is_string(input_value)
|
|
74
|
+
return parse_date_string(val)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def transform_decimal(value: InputTypes) -> float:
|
|
78
|
+
"""Transform a value into a float"""
|
|
79
|
+
str_val = assert_is_string(value)
|
|
80
|
+
return float(str_val)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def transform_integer(value: InputTypes) -> int:
|
|
84
|
+
"""Transform a value into an integer"""
|
|
85
|
+
str_val = assert_is_string(value)
|
|
86
|
+
return int(str_val)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def transform_interval(input_value: InputTypes) -> IntervalFloats:
|
|
90
|
+
"""Transform a string input into an interval object."""
|
|
91
|
+
val = assert_is_tuple(input_value)
|
|
92
|
+
try:
|
|
93
|
+
return IntervalFloats(float(val[0]), float(val[1]))
|
|
94
|
+
except ValueError:
|
|
95
|
+
raise XmlInputConversionError(f"Could not parse interval: {val}") from None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def transform_geometry(value: InputTypes) -> str:
|
|
99
|
+
"""Transform a value into a geometry string"""
|
|
100
|
+
str_val = assert_is_string(value)
|
|
101
|
+
try:
|
|
102
|
+
return json.dumps(json.loads(str_val))
|
|
103
|
+
except JSONDecodeError:
|
|
104
|
+
raise XmlInputConversionError(f"Could not parse json value: {value}") from None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def transform_simpletext(value: InputTypes) -> str:
|
|
108
|
+
str_val = assert_is_string(value)
|
|
109
|
+
if len(str_val) == 0:
|
|
110
|
+
raise XmlInputConversionError("After removing redundant whitespaces and newlines the input string is empty.")
|
|
111
|
+
return str_val
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def transform_richtext(value: InputTypes) -> FormattedTextValue:
|
|
115
|
+
str_val = assert_is_string(value)
|
|
116
|
+
if len(str_val) == 0:
|
|
117
|
+
raise XmlInputConversionError("After removing redundant whitespaces and newlines the input string is empty.")
|
|
118
|
+
return FormattedTextValue(str_val)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import cast
|
|
3
|
+
|
|
4
|
+
from rdflib import Graph
|
|
5
|
+
|
|
6
|
+
from dsp_tools.clients.connection import Connection
|
|
7
|
+
from dsp_tools.commands.xmlupload.make_rdf_graph.jsonld_utils import serialise_jsonld_for_resource
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class ResourceCreateClient:
|
|
12
|
+
"""client class that creates resources on a DSP server."""
|
|
13
|
+
|
|
14
|
+
con: Connection
|
|
15
|
+
|
|
16
|
+
def create_resource(
|
|
17
|
+
self,
|
|
18
|
+
graph: Graph,
|
|
19
|
+
resource_has_bitstream: bool,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Creates a resource on the DSP server, and returns its IRI"""
|
|
22
|
+
res_dict = serialise_jsonld_for_resource(graph)
|
|
23
|
+
headers = {"X-Asset-Ingested": "true"} if resource_has_bitstream else None
|
|
24
|
+
res = self.con.post(route="/v2/resources", data=res_dict, headers=headers)
|
|
25
|
+
return cast(str, res["@id"])
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import regex
|
|
2
|
+
|
|
3
|
+
from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
|
|
4
|
+
from dsp_tools.error.exceptions import Id2IriReplacementError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def prepare_richtext_string_for_upload(richtext_str: str, iri_resolver: IriResolver) -> str:
|
|
8
|
+
richtext_str, ids_not_found = replace_ids_if_found(richtext_str, iri_resolver)
|
|
9
|
+
if ids_not_found:
|
|
10
|
+
raise Id2IriReplacementError(
|
|
11
|
+
f"Some internal IDs of the following richtext could not be resolved to an IRI: {richtext_str}"
|
|
12
|
+
)
|
|
13
|
+
return _richtext_as_xml(richtext_str)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def replace_ids_if_found(richtext_str: str, iri_resolver: IriResolver) -> tuple[str, set[str]]:
|
|
17
|
+
ids_used = find_internal_ids(richtext_str)
|
|
18
|
+
not_found = set()
|
|
19
|
+
if ids_used:
|
|
20
|
+
for id_ in ids_used:
|
|
21
|
+
if iri_found := iri_resolver.get(id_):
|
|
22
|
+
richtext_str = _replace_one_id(richtext_str, id_, iri_found)
|
|
23
|
+
else:
|
|
24
|
+
not_found.add(id_)
|
|
25
|
+
return richtext_str, not_found
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _replace_one_id(txt: str, id_: str, iri: str) -> str:
|
|
29
|
+
return txt.replace(f'href="IRI:{id_}:IRI"', f'href="{iri}"')
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_internal_ids(txt: str) -> set[str]:
|
|
33
|
+
return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=txt))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _richtext_as_xml(richtext_str: str) -> str:
|
|
37
|
+
return f'<?xml version="1.0" encoding="UTF-8"?>\n<text>{richtext_str}</text>'
|
|
File without changes
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import rustworkx as rx
|
|
6
|
+
|
|
7
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import Cost
|
|
8
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import Edge
|
|
9
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
|
|
10
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
|
|
11
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_upload_order(info_for_graph: InfoForGraph) -> tuple[dict[str, list[str]], list[str]]:
|
|
15
|
+
"""
|
|
16
|
+
Generates the upload order from the Info for the graph
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
info_for_graph: Info for the graph
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
- A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
|
|
23
|
+
- A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
|
|
24
|
+
"""
|
|
25
|
+
graph, node_to_id, edges = _make_graph(info_for_graph)
|
|
26
|
+
stash_lookup, upload_order, _ = _generate_upload_order_from_graph(graph, node_to_id, edges)
|
|
27
|
+
return stash_lookup, upload_order
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _make_graph(
|
|
31
|
+
info_for_graph: InfoForGraph,
|
|
32
|
+
) -> tuple[rx.PyDiGraph[Any, Any], dict[int, str], list[Edge]]:
|
|
33
|
+
"""
|
|
34
|
+
This function takes information about the resources of an XML file and links between them.
|
|
35
|
+
From that it constructs a rustworkx directed graph.
|
|
36
|
+
Resources are represented as nodes and links as edges.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
info_for_graph: Information required to construct the graph
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
- The rustworkx graph.
|
|
43
|
+
- A dictionary that maps the rustworkx index number of the nodes to the original resource ID from the XML file.
|
|
44
|
+
- A list with all the edges in the graph.
|
|
45
|
+
"""
|
|
46
|
+
graph: rx.PyDiGraph[Any, Any] = rx.PyDiGraph()
|
|
47
|
+
nodes = [(id_, None, None) for id_ in info_for_graph.all_resource_ids]
|
|
48
|
+
node_indices = list(graph.add_nodes_from(nodes))
|
|
49
|
+
id_to_node = dict(zip(info_for_graph.all_resource_ids, node_indices))
|
|
50
|
+
node_to_id = dict(zip(node_indices, info_for_graph.all_resource_ids))
|
|
51
|
+
edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in info_for_graph.link_values]
|
|
52
|
+
for xml in info_for_graph.standoff_links:
|
|
53
|
+
edges.extend([Edge(id_to_node[xml.source_id], id_to_node[x], xml) for x in xml.target_ids])
|
|
54
|
+
graph.add_edges_from([e.as_tuple() for e in edges])
|
|
55
|
+
return graph, node_to_id, edges
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _generate_upload_order_from_graph(
|
|
59
|
+
graph: rx.PyDiGraph[Any, Any],
|
|
60
|
+
node_to_id: dict[int, str],
|
|
61
|
+
edges: list[Edge],
|
|
62
|
+
) -> tuple[dict[str, list[str]], list[str], int]:
|
|
63
|
+
"""
|
|
64
|
+
Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
graph: graph
|
|
68
|
+
node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
|
|
69
|
+
edges: edges in the graph (contains info about source node, target node, and link info)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
- A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
|
|
73
|
+
- A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
|
|
74
|
+
- The number of links in the stash.
|
|
75
|
+
"""
|
|
76
|
+
upload_order: list[str] = []
|
|
77
|
+
stash_lookup: dict[str, list[str]] = {}
|
|
78
|
+
node_indices = set(node_to_id.keys())
|
|
79
|
+
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
|
|
80
|
+
upload_order.extend(leaf_nodes)
|
|
81
|
+
stash_counter = 0
|
|
82
|
+
while remaining_node_indices:
|
|
83
|
+
cycle = list(rx.digraph_find_cycle(graph))
|
|
84
|
+
links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
|
|
85
|
+
stash_counter += len(links_to_remove)
|
|
86
|
+
_remove_edges_to_stash(
|
|
87
|
+
graph=graph,
|
|
88
|
+
edges_to_remove=links_to_remove,
|
|
89
|
+
all_edges=edges,
|
|
90
|
+
remaining_nodes=remaining_node_indices,
|
|
91
|
+
)
|
|
92
|
+
stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
|
|
93
|
+
leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
|
|
94
|
+
upload_order.extend(leaf_nodes)
|
|
95
|
+
return stash_lookup, upload_order, stash_counter
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _remove_leaf_nodes(
|
|
99
|
+
graph: rx.PyDiGraph[Any, Any],
|
|
100
|
+
node_to_id: dict[int, str],
|
|
101
|
+
node_indices: set[int],
|
|
102
|
+
) -> tuple[list[str], set[int]]:
|
|
103
|
+
"""
|
|
104
|
+
Leaf nodes are nodes that do not have any outgoing links.
|
|
105
|
+
This means that they have no dependencies and are ok to upload.
|
|
106
|
+
This function removes them from the graph.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
graph: graph
|
|
110
|
+
node_to_id: mapping of the rustworkx index number of the nodes to the original resource ID from the XML file
|
|
111
|
+
node_indices: node indices that are in the graph
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
- A list with the IDs of the removed leaf nodes.
|
|
115
|
+
- A set with the indices of the nodes that remain in the graph.
|
|
116
|
+
"""
|
|
117
|
+
removed_leaf_nodes: list[str] = []
|
|
118
|
+
remaining_node_indices = set(node_indices)
|
|
119
|
+
while leaf_nodes := [x for x in remaining_node_indices if graph.out_degree(x) == 0]:
|
|
120
|
+
removed_leaf_nodes.extend(node_to_id[n] for n in leaf_nodes)
|
|
121
|
+
graph.remove_nodes_from(leaf_nodes)
|
|
122
|
+
remaining_node_indices = remaining_node_indices - set(leaf_nodes)
|
|
123
|
+
return removed_leaf_nodes, remaining_node_indices
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _find_cheapest_outgoing_links(
|
|
127
|
+
graph: rx.PyDiGraph[Any, Any],
|
|
128
|
+
cycle: list[tuple[int, int]],
|
|
129
|
+
edges: list[Edge],
|
|
130
|
+
) -> list[Edge]:
|
|
131
|
+
"""
|
|
132
|
+
This function searches for the nodes whose outgoing links should be removed in order to break the cycle.
|
|
133
|
+
It calculates which links between the resources create the smallest stash.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
graph: graph
|
|
137
|
+
cycle: the list with (source, target) for each edge in the cycle
|
|
138
|
+
edges: edges in the graph (contains info about source node, target node, and link info)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
The edges (i.e. links) that should be stashed (containing all the edges connecting the two nodes)
|
|
142
|
+
"""
|
|
143
|
+
costs: list[Cost] = []
|
|
144
|
+
for source, target in cycle:
|
|
145
|
+
edges_in = graph.in_edges(source)
|
|
146
|
+
node_gain = len(edges_in)
|
|
147
|
+
edges_out = graph.out_edges(source)
|
|
148
|
+
node_cost = sum(x[2].cost_links for x in edges_out)
|
|
149
|
+
node_value = node_cost / node_gain
|
|
150
|
+
costs.append(Cost(source, target, node_value))
|
|
151
|
+
cheapest_cost = sorted(costs, key=lambda x: x.node_value)[0]
|
|
152
|
+
return [x for x in edges if x.source == cheapest_cost.source and x.target == cheapest_cost.target]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _remove_edges_to_stash(
|
|
156
|
+
graph: rx.PyDiGraph[Any, Any],
|
|
157
|
+
edges_to_remove: list[Edge],
|
|
158
|
+
all_edges: list[Edge],
|
|
159
|
+
remaining_nodes: set[int],
|
|
160
|
+
) -> None:
|
|
161
|
+
"""
|
|
162
|
+
This function removes the edges from the graph in order to break a cycle.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
graph: graph
|
|
166
|
+
edges_to_remove: edges that should be removed
|
|
167
|
+
all_edges: all edges in the original graph
|
|
168
|
+
remaining_nodes: indices of the nodes in the graph
|
|
169
|
+
"""
|
|
170
|
+
normal_edges_to_remove = [(x.source, x.target) for x in edges_to_remove]
|
|
171
|
+
# if only one (source, target) is removed, it removes only one edge, not all
|
|
172
|
+
# therefore we need as many entries in the list as there are edges between the source and the target
|
|
173
|
+
|
|
174
|
+
phantom_edges_to_remove = []
|
|
175
|
+
source, target = edges_to_remove[0].source, edges_to_remove[0].target
|
|
176
|
+
for link_to_stash in [x.link_object for x in edges_to_remove]:
|
|
177
|
+
if isinstance(link_to_stash, StandOffLink):
|
|
178
|
+
phantom_edges_to_remove.extend(
|
|
179
|
+
_find_phantom_xml_edges(source, target, all_edges, link_to_stash, remaining_nodes)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
all_edges_to_remove = normal_edges_to_remove + phantom_edges_to_remove
|
|
183
|
+
graph.remove_edges_from(all_edges_to_remove)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _find_phantom_xml_edges(
|
|
187
|
+
source_node_index: int,
|
|
188
|
+
target_node_index: int,
|
|
189
|
+
all_edges: list[Edge],
|
|
190
|
+
xml_link_to_stash: StandOffLink,
|
|
191
|
+
remaining_nodes: set[int],
|
|
192
|
+
) -> list[tuple[int, int]]:
|
|
193
|
+
"""
|
|
194
|
+
If an edge that will be removed represents an XML link,
|
|
195
|
+
the text value may contain further links to other resources.
|
|
196
|
+
If we stash the XMLLink, then in the real data all links of that text value are stashed.
|
|
197
|
+
So, these "phantom" links must be removed from the graph.
|
|
198
|
+
This function identifies the edges that must be removed from the rx graph.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
source_node_index: rustworkx index of source node
|
|
202
|
+
target_node_index: rustworkx index of target node
|
|
203
|
+
all_edges: all edges in the original graph
|
|
204
|
+
xml_link_to_stash: XML link that will be stashed
|
|
205
|
+
remaining_nodes: indices of all nodes in the graph
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
edges (rustworkx indices of nodes) that represent the links in the original XML text
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def check(x: Edge) -> bool:
|
|
212
|
+
return all(
|
|
213
|
+
(
|
|
214
|
+
x.source == source_node_index,
|
|
215
|
+
x.target != target_node_index,
|
|
216
|
+
x.link_object == xml_link_to_stash,
|
|
217
|
+
x.target in remaining_nodes,
|
|
218
|
+
# the target could have been removed because it was a leaf node, so we must check if it is still there
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return [(x.source, x.target) for x in all_edges if check(x)]
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _add_stash_to_lookup_dict(
|
|
226
|
+
stash_dict: dict[str, list[str]],
|
|
227
|
+
links_to_stash: list[StandOffLink | LinkValueLink],
|
|
228
|
+
) -> dict[str, list[str]]:
|
|
229
|
+
stash_list = [stash_link.link_uuid for stash_link in links_to_stash]
|
|
230
|
+
# all stashed links have the same subject id, so we can just take the first one
|
|
231
|
+
subj_id = links_to_stash[0].source_id
|
|
232
|
+
if subj_id in stash_dict:
|
|
233
|
+
stash_dict[subj_id].extend(stash_list)
|
|
234
|
+
else:
|
|
235
|
+
stash_dict[subj_id] = stash_list
|
|
236
|
+
return stash_dict
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
|
|
2
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
|
|
3
|
+
from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
|
|
4
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
|
|
5
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
|
|
6
|
+
from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
|
|
7
|
+
from dsp_tools.utils.data_formats.iri_util import is_resource_iri
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_info_for_graph_from_processed_resources(resources: list[ProcessedResource]) -> InfoForGraph:
|
|
11
|
+
"""Extracts information to create the graph to analyse the circular references."""
|
|
12
|
+
all_links = []
|
|
13
|
+
all_stand_off = []
|
|
14
|
+
all_resource_ids = []
|
|
15
|
+
for res in resources:
|
|
16
|
+
links, stand_off = _process_one_resource(res)
|
|
17
|
+
all_links.extend(links)
|
|
18
|
+
all_stand_off.extend(stand_off)
|
|
19
|
+
all_resource_ids.append(res.res_id)
|
|
20
|
+
return InfoForGraph(
|
|
21
|
+
all_resource_ids=all_resource_ids,
|
|
22
|
+
link_values=all_links,
|
|
23
|
+
standoff_links=all_stand_off,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _process_one_resource(resource: ProcessedResource) -> tuple[list[LinkValueLink], list[StandOffLink]]:
|
|
28
|
+
link_values = []
|
|
29
|
+
stand_off = []
|
|
30
|
+
for val in resource.values:
|
|
31
|
+
if isinstance(val, ProcessedLink):
|
|
32
|
+
if is_resource_iri(val.value):
|
|
33
|
+
continue
|
|
34
|
+
link_values.append(
|
|
35
|
+
LinkValueLink(
|
|
36
|
+
source_id=resource.res_id,
|
|
37
|
+
target_id=val.value,
|
|
38
|
+
link_uuid=val.value_uuid,
|
|
39
|
+
)
|
|
40
|
+
)
|
|
41
|
+
elif isinstance(val, ProcessedRichtext):
|
|
42
|
+
if val.resource_references:
|
|
43
|
+
only_ids = {x for x in val.resource_references if not is_resource_iri(x)}
|
|
44
|
+
if not only_ids:
|
|
45
|
+
continue
|
|
46
|
+
stand_off.append(
|
|
47
|
+
StandOffLink(
|
|
48
|
+
source_id=resource.res_id,
|
|
49
|
+
target_ids=only_ids,
|
|
50
|
+
link_uuid=val.value_uuid,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
return link_values, stand_off
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class InfoForGraph:
|
|
8
|
+
all_resource_ids: list[str]
|
|
9
|
+
link_values: list[LinkValueLink]
|
|
10
|
+
standoff_links: list[StandOffLink]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class LinkValueLink:
|
|
15
|
+
"""
|
|
16
|
+
This class represents a direct link (resptr) between a starting resource and a target resource.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
source_id: ID of the resource from which the link originates
|
|
20
|
+
target_id: ID of the resource where the link points to
|
|
21
|
+
link_uuid: identifier of this link
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
source_id: str
|
|
25
|
+
target_id: str
|
|
26
|
+
link_uuid: str
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def cost_links(self) -> float:
|
|
30
|
+
"""The cost of this outgoing is consistently 1"""
|
|
31
|
+
return 1
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class StandOffLink:
|
|
36
|
+
"""
|
|
37
|
+
This class represents one or more links from a single starting resource to a set of target resources,
|
|
38
|
+
where all target resources are linked to from a single text value of the starting resource.
|
|
39
|
+
|
|
40
|
+
Attributes:
|
|
41
|
+
source_id: ID of the resource from which the link(s) originate
|
|
42
|
+
target_ids: IDs of the resources that are referenced in the text value
|
|
43
|
+
link_uuid: identifier of this link
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
source_id: str
|
|
47
|
+
target_ids: set[str]
|
|
48
|
+
link_uuid: str
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def cost_links(self) -> float:
|
|
52
|
+
"""The cost of this outgoing link (1 / number of links in the XML text)"""
|
|
53
|
+
return 1 / len(self.target_ids)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class Edge:
|
|
58
|
+
"""
|
|
59
|
+
This class represents an edge in the rustworkx graph.
|
|
60
|
+
|
|
61
|
+
Attributes:
|
|
62
|
+
source: rustworkx index of the resource from which the link originates
|
|
63
|
+
target: rustworkx index of the resource where the link points to
|
|
64
|
+
link_object: the link that connects the source with the target
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
source: int
|
|
68
|
+
target: int
|
|
69
|
+
link_object: LinkValueLink | StandOffLink
|
|
70
|
+
|
|
71
|
+
def as_tuple(self) -> tuple[int, int, LinkValueLink | StandOffLink]:
|
|
72
|
+
"""Returns a representation of this edge as a tuple of the source index, target index and link object"""
|
|
73
|
+
return self.source, self.target, self.link_object
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@dataclass(frozen=True)
|
|
77
|
+
class Cost:
|
|
78
|
+
"""
|
|
79
|
+
Attributes:
|
|
80
|
+
source: rustworkx index of the resource from which the link originates
|
|
81
|
+
target: rustworkx index of the resource where the link points to
|
|
82
|
+
node_value: cost-gain-ratio if this link is stashed
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
source: int
|
|
86
|
+
target: int
|
|
87
|
+
node_value: float
|