dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +5 -0
- dsp_tools/cli/args.py +47 -0
- dsp_tools/cli/call_action.py +85 -0
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +479 -0
- dsp_tools/cli/entry_point.py +322 -0
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/clients/connection.py +35 -0
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +321 -0
- dsp_tools/commands/excel2json/lists/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
- dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
- dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
- dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
- dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
- dsp_tools/commands/excel2json/lists/utils.py +81 -0
- dsp_tools/commands/excel2json/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/models/input_error.py +416 -0
- dsp_tools/commands/excel2json/models/json_header.py +175 -0
- dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
- dsp_tools/commands/excel2json/models/ontology.py +76 -0
- dsp_tools/commands/excel2json/old_lists.py +328 -0
- dsp_tools/commands/excel2json/project.py +280 -0
- dsp_tools/commands/excel2json/properties.py +370 -0
- dsp_tools/commands/excel2json/resources.py +336 -0
- dsp_tools/commands/excel2json/utils.py +352 -0
- dsp_tools/commands/excel2xml/__init__.py +7 -0
- dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
- dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
- dsp_tools/commands/excel2xml/propertyelement.py +47 -0
- dsp_tools/commands/get/__init__.py +0 -0
- dsp_tools/commands/get/get.py +166 -0
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/get/legacy_models/__init__.py +0 -0
- dsp_tools/commands/get/legacy_models/context.py +318 -0
- dsp_tools/commands/get/legacy_models/group.py +241 -0
- dsp_tools/commands/get/legacy_models/helpers.py +47 -0
- dsp_tools/commands/get/legacy_models/listnode.py +390 -0
- dsp_tools/commands/get/legacy_models/model.py +12 -0
- dsp_tools/commands/get/legacy_models/ontology.py +324 -0
- dsp_tools/commands/get/legacy_models/project.py +366 -0
- dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
- dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
- dsp_tools/commands/get/legacy_models/user.py +438 -0
- dsp_tools/commands/get/models/__init__.py +0 -0
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +258 -0
- dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
- dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
- dsp_tools/commands/start_stack.py +428 -0
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/__init__.py +0 -0
- dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
- dsp_tools/commands/xmlupload/models/ingest.py +143 -0
- dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +45 -0
- dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
- dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
- dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
- dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
- dsp_tools/commands/xmlupload/upload_config.py +76 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
- dsp_tools/commands/xmlupload/xmlupload.py +516 -0
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/config/warnings_config.py +32 -0
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/legacy_models/datetimestamp.py +81 -0
- dsp_tools/legacy_models/langstring.py +253 -0
- dsp_tools/legacy_models/projectContext.py +49 -0
- dsp_tools/py.typed +0 -0
- dsp_tools/resources/schema/data.xsd +648 -0
- dsp_tools/resources/schema/lists-only.json +72 -0
- dsp_tools/resources/schema/project.json +1258 -0
- dsp_tools/resources/schema/properties-only.json +874 -0
- dsp_tools/resources/schema/resources-only.json +140 -0
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +88 -0
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/__init__.py +0 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/data_formats/date_util.py +166 -0
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/data_formats/shared.py +81 -0
- dsp_tools/utils/data_formats/uri_util.py +76 -0
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/__init__.py +0 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +348 -0
- dsp_tools/xmllib/value_checkers.py +434 -0
- dsp_tools/xmllib/value_converters.py +777 -0
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
- dsp_tools-0.9.13.dist-info/LICENSE +0 -674
- dsp_tools-0.9.13.dist-info/METADATA +0 -144
- dsp_tools-0.9.13.dist-info/RECORD +0 -71
- dsp_tools-0.9.13.dist-info/WHEEL +0 -5
- dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
- dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
- dsplib/models/connection.py +0 -272
- dsplib/models/group.py +0 -296
- dsplib/models/helpers.py +0 -505
- dsplib/models/langstring.py +0 -277
- dsplib/models/listnode.py +0 -578
- dsplib/models/model.py +0 -20
- dsplib/models/ontology.py +0 -448
- dsplib/models/permission.py +0 -112
- dsplib/models/project.py +0 -547
- dsplib/models/propertyclass.py +0 -505
- dsplib/models/resource.py +0 -366
- dsplib/models/resourceclass.py +0 -810
- dsplib/models/sipi.py +0 -30
- dsplib/models/user.py +0 -731
- dsplib/models/value.py +0 -1000
- dsplib/utils/knora-data-schema.xsd +0 -454
- dsplib/utils/knora-schema-lists.json +0 -83
- dsplib/utils/knora-schema.json +0 -434
- dsplib/utils/onto_commons.py +0 -24
- dsplib/utils/onto_create_lists.py +0 -73
- dsplib/utils/onto_create_ontology.py +0 -442
- dsplib/utils/onto_get.py +0 -58
- dsplib/utils/onto_validate.py +0 -33
- dsplib/utils/xml_upload.py +0 -539
- dsplib/widgets/doublepassword.py +0 -80
- knora/MLS-import-libraries.py +0 -84
- knora/dsp_tools.py +0 -96
- knora/dsplib/models/connection.py +0 -272
- knora/dsplib/models/group.py +0 -296
- knora/dsplib/models/helpers.py +0 -506
- knora/dsplib/models/langstring.py +0 -277
- knora/dsplib/models/listnode.py +0 -578
- knora/dsplib/models/model.py +0 -20
- knora/dsplib/models/ontology.py +0 -448
- knora/dsplib/models/permission.py +0 -112
- knora/dsplib/models/project.py +0 -583
- knora/dsplib/models/propertyclass.py +0 -505
- knora/dsplib/models/resource.py +0 -416
- knora/dsplib/models/resourceclass.py +0 -811
- knora/dsplib/models/sipi.py +0 -35
- knora/dsplib/models/user.py +0 -731
- knora/dsplib/models/value.py +0 -1000
- knora/dsplib/utils/knora-data-schema.xsd +0 -464
- knora/dsplib/utils/knora-schema-lists.json +0 -83
- knora/dsplib/utils/knora-schema.json +0 -444
- knora/dsplib/utils/onto_commons.py +0 -24
- knora/dsplib/utils/onto_create_lists.py +0 -73
- knora/dsplib/utils/onto_create_ontology.py +0 -451
- knora/dsplib/utils/onto_get.py +0 -58
- knora/dsplib/utils/onto_validate.py +0 -33
- knora/dsplib/utils/xml_upload.py +0 -540
- knora/dsplib/widgets/doublepassword.py +0 -80
- knora/knora.py +0 -2108
- knora/test.py +0 -99
- knora/testit.py +0 -76
- knora/xml2knora.py +0 -633
- {dsplib → dsp_tools/cli}/__init__.py +0 -0
- {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
- {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
- {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
- {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
- {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
- {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
- {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
- {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# explicitly re-export imported stuff, so that API users can import it from this module
|
|
2
|
+
# (see https://mypy.readthedocs.io/en/stable/command_line.html#cmdoption-mypy-no-implicit-reexport)
|
|
3
|
+
|
|
4
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import *
|
|
5
|
+
from dsp_tools.commands.excel2xml.propertyelement import PropertyElement as PropertyElement
|
|
6
|
+
from dsp_tools.utils.data_formats.shared import check_notna as check_notna
|
|
7
|
+
from dsp_tools.utils.data_formats.shared import simplify_name as simplify_name
|
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
import pandas as pd
|
|
11
|
+
import regex
|
|
12
|
+
from lxml import etree
|
|
13
|
+
|
|
14
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import append_permissions
|
|
15
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_bitstream_prop
|
|
16
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_boolean_prop
|
|
17
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_color_prop
|
|
18
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_date_prop
|
|
19
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_decimal_prop
|
|
20
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_geometry_prop
|
|
21
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_geoname_prop
|
|
22
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_integer_prop
|
|
23
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_link
|
|
24
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_list_prop
|
|
25
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_region
|
|
26
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_resource
|
|
27
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_resptr_prop
|
|
28
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_root
|
|
29
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_text_prop
|
|
30
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import make_uri_prop
|
|
31
|
+
from dsp_tools.commands.excel2xml.excel2xml_lib import write_xml
|
|
32
|
+
from dsp_tools.commands.excel2xml.propertyelement import PropertyElement
|
|
33
|
+
from dsp_tools.error.exceptions import BaseError
|
|
34
|
+
from dsp_tools.utils.data_formats.shared import check_notna
|
|
35
|
+
|
|
36
|
+
# ruff: noqa: E501 (line-too-long)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _read_cli_input_file(datafile: str) -> pd.DataFrame:
|
|
40
|
+
"""
|
|
41
|
+
Parse the input file from the CLI (in either CSV or Excel format)
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
datafile: path to the input file
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
BaseError: if the input file is neither .csv, .xls nor .xlsx
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
a pandas DataFrame with the input data
|
|
51
|
+
"""
|
|
52
|
+
if regex.search(r"\.csv$", datafile):
|
|
53
|
+
dataframe = pd.read_csv(
|
|
54
|
+
filepath_or_buffer=datafile,
|
|
55
|
+
encoding="utf_8_sig", # utf_8_sig is the default encoding of Excel
|
|
56
|
+
dtype="str",
|
|
57
|
+
sep=None,
|
|
58
|
+
engine="python", # let the "python" engine detect the separator
|
|
59
|
+
)
|
|
60
|
+
elif regex.search(r"(\.xls|\.xlsx)$", datafile):
|
|
61
|
+
dataframe = pd.read_excel(io=datafile, dtype="str")
|
|
62
|
+
else:
|
|
63
|
+
raise BaseError(f"Cannot open file '{datafile}': Invalid extension. Allowed extensions: 'csv', 'xls', 'xlsx'")
|
|
64
|
+
return dataframe
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _validate_and_prepare_cli_input_file(dataframe: pd.DataFrame) -> pd.DataFrame:
|
|
68
|
+
"""
|
|
69
|
+
Make sure that the required columns are present,
|
|
70
|
+
replace NA-like cells by NA,
|
|
71
|
+
remove empty columns, so that the max_num_of_props can be calculated without errors,
|
|
72
|
+
and remove empty rows, to prevent them from being processed and raising an error.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
dataframe: pandas dataframe with the input data
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
BaseError: if one of the required columns is missing
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
the prepared pandas DataFrame
|
|
82
|
+
"""
|
|
83
|
+
# make sure that the required columns are present
|
|
84
|
+
required_columns = ["id", "label", "restype", "permissions", "prop name", "prop type", "1_value"]
|
|
85
|
+
if any(req not in dataframe for req in required_columns):
|
|
86
|
+
raise BaseError(
|
|
87
|
+
f"Some columns in your input file are missing. The following columns are required: {required_columns}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# replace NA-like cells by NA
|
|
91
|
+
dataframe = dataframe.map(
|
|
92
|
+
lambda x: x if pd.notna(x) and regex.search(r"[\p{L}\d_!?\-]", str(x), flags=regex.U) else pd.NA
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# remove empty columns/rows
|
|
96
|
+
dataframe = dataframe.dropna(axis="columns", how="all")
|
|
97
|
+
dataframe = dataframe.dropna(axis="index", how="all")
|
|
98
|
+
|
|
99
|
+
return dataframe
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _convert_rows_to_xml(
|
|
103
|
+
dataframe: pd.DataFrame,
|
|
104
|
+
max_num_of_props: int,
|
|
105
|
+
) -> list[etree._Element]:
|
|
106
|
+
"""
|
|
107
|
+
Iterate through the rows of the CSV/Excel input file,
|
|
108
|
+
convert every row to either a XML resource or an XML property,
|
|
109
|
+
and return a list of XML resources.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
dataframe: pandas dataframe with the input data
|
|
113
|
+
max_num_of_props: highest number of properties that a resource in this file has
|
|
114
|
+
|
|
115
|
+
Raises:
|
|
116
|
+
BaseError: if one of the rows is neither a resource-row nor a property-row,
|
|
117
|
+
or if the file starts with a property-row
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
a list of XML resources (with their respective properties)
|
|
121
|
+
"""
|
|
122
|
+
resources: list[etree._Element] = []
|
|
123
|
+
|
|
124
|
+
# to start, there is no previous resource
|
|
125
|
+
resource: Optional[etree._Element] = None
|
|
126
|
+
|
|
127
|
+
for index, row in dataframe.iterrows():
|
|
128
|
+
row_number = int(str(index)) + 2
|
|
129
|
+
is_resource = check_notna(row["id"])
|
|
130
|
+
is_property = check_notna(row["prop name"])
|
|
131
|
+
match is_resource, is_property:
|
|
132
|
+
case (True, True) | (False, False): # either the row is a resource-row or a property-row, but not both
|
|
133
|
+
raise BaseError(
|
|
134
|
+
f"Exactly 1 of the 2 columns 'id' and 'prop name' must be filled. "
|
|
135
|
+
f"Excel row {row_number} has too many/too less entries:\n"
|
|
136
|
+
f"id: '{row['id']}'\n"
|
|
137
|
+
f"prop name: '{row['prop name']}'"
|
|
138
|
+
)
|
|
139
|
+
case True, False: # this is a resource-row
|
|
140
|
+
# the previous resource is finished, a new resource begins: append the previous to the resulting list
|
|
141
|
+
# in all cases (except for the very first iteration), a previous resource exists
|
|
142
|
+
if resource is not None:
|
|
143
|
+
resources.append(resource)
|
|
144
|
+
resource = _convert_resource_row_to_xml(row_number=row_number, row=row)
|
|
145
|
+
case False, True: # this is a property-row
|
|
146
|
+
if resource is None:
|
|
147
|
+
raise BaseError(
|
|
148
|
+
"The first row of your Excel/CSV is invalid. "
|
|
149
|
+
"The first row must define a resource, not a property."
|
|
150
|
+
)
|
|
151
|
+
prop = _convert_property_row_to_xml(
|
|
152
|
+
row_number=row_number,
|
|
153
|
+
row=row,
|
|
154
|
+
max_num_of_props=max_num_of_props,
|
|
155
|
+
resource_id=resource.attrib["id"],
|
|
156
|
+
)
|
|
157
|
+
resource.append(prop)
|
|
158
|
+
|
|
159
|
+
# append the resource of the very last iteration of the for loop
|
|
160
|
+
if resource is not None:
|
|
161
|
+
resources.append(resource)
|
|
162
|
+
|
|
163
|
+
return resources
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _append_bitstream_to_resource(
|
|
167
|
+
resource: etree._Element,
|
|
168
|
+
row: pd.Series[Any],
|
|
169
|
+
row_number: int,
|
|
170
|
+
) -> etree._Element:
|
|
171
|
+
"""
|
|
172
|
+
Create a bitstream-prop element, and append it to the resource.
|
|
173
|
+
If the file permissions are missing, try to deduce them from the resource permissions.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
resource: the resource element to which the bitstream-prop element should be appended
|
|
177
|
+
row: the row of the CSV/Excel file from where all information comes from
|
|
178
|
+
row_number: row number of the CSV/Excel sheet
|
|
179
|
+
|
|
180
|
+
Warning:
|
|
181
|
+
if the file permissions are missing and cannot be deduced from the resource permissions
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
the resource element with the appended bitstream-prop element
|
|
185
|
+
"""
|
|
186
|
+
file_permissions = row.get("file permissions")
|
|
187
|
+
if not check_notna(file_permissions):
|
|
188
|
+
if resource_permissions := row.get("permissions"):
|
|
189
|
+
file_permissions = resource_permissions
|
|
190
|
+
else:
|
|
191
|
+
file_permissions = ""
|
|
192
|
+
warnings.warn(
|
|
193
|
+
f"Missing file permissions for file "
|
|
194
|
+
f"'{row['file']}' (Resource ID '{row['id']}', Excel row {row_number}). "
|
|
195
|
+
f"An attempt to deduce them from the resource permissions failed."
|
|
196
|
+
)
|
|
197
|
+
resource.append(
|
|
198
|
+
make_bitstream_prop(
|
|
199
|
+
path=str(row["file"]),
|
|
200
|
+
permissions=str(file_permissions),
|
|
201
|
+
calling_resource=row["id"],
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
return resource
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _convert_resource_row_to_xml(
|
|
208
|
+
row_number: int,
|
|
209
|
+
row: pd.Series[Any],
|
|
210
|
+
) -> etree._Element:
|
|
211
|
+
"""
|
|
212
|
+
Convert a resource-row to an XML resource element.
|
|
213
|
+
First, check if the mandatory cells are present.
|
|
214
|
+
Then, call the appropriate function, depending on the restype (Resource, LinkObj, Region).
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
row_number: row number of the CSV/Excel sheet
|
|
218
|
+
row: the pandas series representing the current row
|
|
219
|
+
|
|
220
|
+
Warning:
|
|
221
|
+
if a mandatory cell is missing
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
the resource element created from the row
|
|
225
|
+
"""
|
|
226
|
+
# read and check the mandatory columns
|
|
227
|
+
resource_id = row["id"]
|
|
228
|
+
resource_label = row.get("label")
|
|
229
|
+
if pd.isna([resource_label]):
|
|
230
|
+
resource_label = ""
|
|
231
|
+
warnings.warn(f"Missing label for resource '{resource_id}' (Excel row {row_number})")
|
|
232
|
+
elif not check_notna(resource_label):
|
|
233
|
+
warnings.warn(
|
|
234
|
+
f"The label of resource '{resource_id}' looks suspicious: '{resource_label}' (Excel row {row_number})"
|
|
235
|
+
)
|
|
236
|
+
resource_restype = row.get("restype")
|
|
237
|
+
if not check_notna(resource_restype):
|
|
238
|
+
resource_restype = ""
|
|
239
|
+
warnings.warn(f"Missing restype for resource '{resource_id}' (Excel row {row_number})")
|
|
240
|
+
resource_permissions = row.get("permissions")
|
|
241
|
+
if not check_notna(resource_permissions):
|
|
242
|
+
resource_permissions = ""
|
|
243
|
+
warnings.warn(f"Missing permissions for resource '{resource_id}' (Excel row {row_number})")
|
|
244
|
+
|
|
245
|
+
# construct the kwargs for the method call
|
|
246
|
+
kwargs_resource = {"label": resource_label, "permissions": resource_permissions, "id": resource_id}
|
|
247
|
+
if check_notna(row.get("ark")):
|
|
248
|
+
kwargs_resource["ark"] = row["ark"]
|
|
249
|
+
if check_notna(row.get("iri")):
|
|
250
|
+
kwargs_resource["iri"] = row["iri"]
|
|
251
|
+
if check_notna(row.get("ark")) and check_notna(row.get("iri")):
|
|
252
|
+
warnings.warn(
|
|
253
|
+
f"Both ARK and IRI were provided for resource '{resource_label}' ({resource_id}). "
|
|
254
|
+
"The ARK will override the IRI."
|
|
255
|
+
)
|
|
256
|
+
if check_notna(row.get("created")):
|
|
257
|
+
kwargs_resource["creation_date"] = row["created"]
|
|
258
|
+
|
|
259
|
+
# call the appropriate method
|
|
260
|
+
if resource_restype == "Region":
|
|
261
|
+
with warnings.catch_warnings(): # prevent dublette warnings: most problems were already checked above
|
|
262
|
+
warnings.filterwarnings("ignore")
|
|
263
|
+
resource = make_region(**kwargs_resource)
|
|
264
|
+
elif resource_restype == "LinkObj":
|
|
265
|
+
with warnings.catch_warnings(): # prevent dublette warnings: most problems were already checked above
|
|
266
|
+
warnings.filterwarnings("ignore")
|
|
267
|
+
resource = make_link(**kwargs_resource)
|
|
268
|
+
else:
|
|
269
|
+
kwargs_resource["restype"] = resource_restype
|
|
270
|
+
with warnings.catch_warnings(): # prevent dublette warnings: most problems were already checked above
|
|
271
|
+
warnings.filterwarnings("ignore")
|
|
272
|
+
resource = make_resource(**kwargs_resource)
|
|
273
|
+
with warnings.catch_warnings(): # ignore only the warnings about not existing files
|
|
274
|
+
warnings.filterwarnings("ignore", message=".*path doesn't point to a file.*")
|
|
275
|
+
if check_notna(row.get("file")):
|
|
276
|
+
resource = _append_bitstream_to_resource(
|
|
277
|
+
resource=resource,
|
|
278
|
+
row=row,
|
|
279
|
+
row_number=row_number,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return resource
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _get_prop_function(
|
|
286
|
+
row: pd.Series[Any],
|
|
287
|
+
resource_id: str,
|
|
288
|
+
) -> Callable[..., etree._Element]:
|
|
289
|
+
"""
|
|
290
|
+
Return the function that creates the appropriate property, depending on the proptype.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
row: row of the CSV/Excel sheet that defines the property
|
|
294
|
+
resource_id: resource ID of the resource to which the property belongs
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
BaseError: if the proptype is invalid
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
the function that creates the appropriate property
|
|
301
|
+
"""
|
|
302
|
+
proptype_2_function: dict[str, Callable[..., etree._Element]] = {
|
|
303
|
+
"bitstream": make_bitstream_prop,
|
|
304
|
+
"boolean-prop": make_boolean_prop,
|
|
305
|
+
"color-prop": make_color_prop,
|
|
306
|
+
"date-prop": make_date_prop,
|
|
307
|
+
"decimal-prop": make_decimal_prop,
|
|
308
|
+
"geometry-prop": make_geometry_prop,
|
|
309
|
+
"geoname-prop": make_geoname_prop,
|
|
310
|
+
"integer-prop": make_integer_prop,
|
|
311
|
+
"list-prop": make_list_prop,
|
|
312
|
+
"resptr-prop": make_resptr_prop,
|
|
313
|
+
"text-prop": make_text_prop,
|
|
314
|
+
"uri-prop": make_uri_prop,
|
|
315
|
+
}
|
|
316
|
+
if row.get("prop type") not in proptype_2_function:
|
|
317
|
+
raise BaseError(f"Invalid prop type for property {row.get('prop name')} in resource {resource_id}")
|
|
318
|
+
return proptype_2_function[row["prop type"]]
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _convert_row_to_property_elements(
|
|
322
|
+
row: pd.Series[Any],
|
|
323
|
+
max_num_of_props: int,
|
|
324
|
+
row_number: int,
|
|
325
|
+
resource_id: str,
|
|
326
|
+
) -> list[PropertyElement]:
|
|
327
|
+
"""
|
|
328
|
+
Every property contains i elements,
|
|
329
|
+
which are represented in the Excel as groups of columns named
|
|
330
|
+
{i_value, i_encoding, i_permissions, i_comment}.
|
|
331
|
+
Depending on the property type, some of these cells are empty.
|
|
332
|
+
This method converts a row to a list of PropertyElement objects.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
row: the pandas series representing the current row
|
|
336
|
+
max_num_of_props: highest number of properties that a resource in this file has
|
|
337
|
+
row_number: row number of the CSV/Excel sheet
|
|
338
|
+
resource_id: id of resource to which this property belongs to
|
|
339
|
+
|
|
340
|
+
Warning:
|
|
341
|
+
if a mandatory cell is missing, or if there are too many/too few values per property
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
list of PropertyElement objects
|
|
345
|
+
"""
|
|
346
|
+
property_elements: list[PropertyElement] = []
|
|
347
|
+
for i in range(1, max_num_of_props + 1):
|
|
348
|
+
value = row[f"{i}_value"]
|
|
349
|
+
if pd.isna(value):
|
|
350
|
+
# issue a warning if other cells of this property element are not empty
|
|
351
|
+
# if all other cells are empty, continue with next property element
|
|
352
|
+
other_cell_headers = [f"{i}_{x}" for x in ["encoding", "permissions", "comment"]]
|
|
353
|
+
notna_cell_headers = [x for x in other_cell_headers if check_notna(row.get(x))]
|
|
354
|
+
if notna_cell_headers_str := ", ".join([f"'{x}'" for x in notna_cell_headers]):
|
|
355
|
+
warnings.warn(
|
|
356
|
+
f"Error in resource '{resource_id}': Excel row {row_number} has an entry "
|
|
357
|
+
f"in column(s) {notna_cell_headers_str}, but not in '{i}_value'. "
|
|
358
|
+
r"Please note that cell contents that don't meet the requirements of the regex [\p{L}\d_!?\-] "
|
|
359
|
+
"are considered inexistent."
|
|
360
|
+
)
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
# construct a PropertyElement from this property element
|
|
364
|
+
kwargs_propelem = {"value": value, "permissions": str(row.get(f"{i}_permissions"))}
|
|
365
|
+
if not check_notna(row.get(f"{i}_permissions")):
|
|
366
|
+
warnings.warn(
|
|
367
|
+
f"Resource '{resource_id}': "
|
|
368
|
+
f"Missing permissions in column '{i}_permissions' of property '{row['prop name']}'"
|
|
369
|
+
)
|
|
370
|
+
if check_notna(row.get(f"{i}_comment")):
|
|
371
|
+
kwargs_propelem["comment"] = str(row[f"{i}_comment"])
|
|
372
|
+
if check_notna(row.get(f"{i}_encoding")):
|
|
373
|
+
kwargs_propelem["encoding"] = str(row[f"{i}_encoding"])
|
|
374
|
+
property_elements.append(PropertyElement(**kwargs_propelem))
|
|
375
|
+
|
|
376
|
+
# validate the end result before returning it
|
|
377
|
+
if not property_elements:
|
|
378
|
+
warnings.warn(
|
|
379
|
+
f"At least one value per property is required, "
|
|
380
|
+
f"but resource '{resource_id}', property '{row['prop name']}' (Excel row {row_number}) doesn't contain any values."
|
|
381
|
+
)
|
|
382
|
+
if row.get("prop type") == "boolean-prop" and len(property_elements) > 1:
|
|
383
|
+
warnings.warn(
|
|
384
|
+
f"A <boolean-prop> can only have a single value, "
|
|
385
|
+
f"but resource '{resource_id}', property '{row['prop name']}' (Excel row {row_number}) contains more than one value."
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return property_elements
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def _convert_property_row_to_xml(
|
|
392
|
+
row_number: int,
|
|
393
|
+
row: pd.Series[Any],
|
|
394
|
+
max_num_of_props: int,
|
|
395
|
+
resource_id: str,
|
|
396
|
+
) -> etree._Element:
|
|
397
|
+
"""
|
|
398
|
+
Convert a property-row of the CSV/Excel sheet to an XML element.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
row_number: row number of the CSV/Excel sheet
|
|
402
|
+
row: the pandas series representing the current row
|
|
403
|
+
max_num_of_props: highest number of properties that a resource in this file has
|
|
404
|
+
resource_id: id of the resource to which the property will be appended
|
|
405
|
+
|
|
406
|
+
Raises:
|
|
407
|
+
BaseError: if there is inconsistent data in the row / if a validation fails
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
the resource element with the appended property
|
|
411
|
+
"""
|
|
412
|
+
# based on the property type, the right function has to be chosen
|
|
413
|
+
make_prop_function = _get_prop_function(
|
|
414
|
+
row=row,
|
|
415
|
+
resource_id=resource_id,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# convert the row to a list of PropertyElement objects
|
|
419
|
+
property_elements = _convert_row_to_property_elements(
|
|
420
|
+
row=row,
|
|
421
|
+
max_num_of_props=max_num_of_props,
|
|
422
|
+
row_number=row_number,
|
|
423
|
+
resource_id=resource_id,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
# create the property
|
|
427
|
+
return _create_property(
|
|
428
|
+
make_prop_function=make_prop_function,
|
|
429
|
+
row=row,
|
|
430
|
+
property_elements=property_elements,
|
|
431
|
+
resource_id=resource_id,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _create_property(
|
|
436
|
+
make_prop_function: Callable[..., etree._Element],
|
|
437
|
+
row: pd.Series[Any],
|
|
438
|
+
property_elements: list[PropertyElement],
|
|
439
|
+
resource_id: str,
|
|
440
|
+
) -> etree._Element:
|
|
441
|
+
"""
|
|
442
|
+
Create a property based on the appropriate function and the property elements.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
make_prop_function: the function to create the property
|
|
446
|
+
row: the pandas series representing the current row of the Excel/CSV
|
|
447
|
+
property_elements: the list of PropertyElement objects
|
|
448
|
+
resource_id: id of resource to which this property belongs to
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
the resource with the properties appended
|
|
452
|
+
"""
|
|
453
|
+
kwargs_propfunc: dict[str, Union[str, PropertyElement, list[PropertyElement]]] = {
|
|
454
|
+
"name": row["prop name"],
|
|
455
|
+
"calling_resource": resource_id,
|
|
456
|
+
"value": property_elements[0] if row.get("prop type") == "boolean-prop" else property_elements,
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
if check_notna(row.get("prop list")):
|
|
460
|
+
kwargs_propfunc["list_name"] = str(row["prop list"])
|
|
461
|
+
|
|
462
|
+
return make_prop_function(**kwargs_propfunc)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def excel2xml(
|
|
466
|
+
datafile: str,
|
|
467
|
+
shortcode: str,
|
|
468
|
+
default_ontology: str,
|
|
469
|
+
) -> tuple[bool, list[warnings.WarningMessage]]:
|
|
470
|
+
"""
|
|
471
|
+
This is a method that is called from the command line.
|
|
472
|
+
It isn't intended to be used in a Python script.
|
|
473
|
+
It takes a tabular data source in CSV/XLS(X) format that is formatted according to the specifications,
|
|
474
|
+
and transforms it into a DSP-conforming XML file
|
|
475
|
+
that can be uploaded to a DSP server with the xmlupload command.
|
|
476
|
+
The output file is saved in the current working directory,
|
|
477
|
+
with the name [default_ontology]-data.xml.
|
|
478
|
+
|
|
479
|
+
Please note that this method doesn't do any data cleaning or data transformation tasks.
|
|
480
|
+
The input and the output of this method are semantically exactly equivalent.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
datafile: path to the data file (CSV or XLS(X))
|
|
484
|
+
shortcode: shortcode of the project that this data belongs to
|
|
485
|
+
default_ontology: name of the ontology that this data belongs to
|
|
486
|
+
|
|
487
|
+
Raises:
|
|
488
|
+
BaseError: if something went wrong
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
True, empty list if everything went well,
|
|
492
|
+
False and a list with caught warnings if there were any problems
|
|
493
|
+
"""
|
|
494
|
+
success = True
|
|
495
|
+
dataframe = _read_cli_input_file(datafile)
|
|
496
|
+
dataframe = _validate_and_prepare_cli_input_file(dataframe)
|
|
497
|
+
last_column_title = str(list(dataframe)[-1]) # last column title, in the format "i_comment"
|
|
498
|
+
max_num_of_props = int(last_column_title.split("_")[0])
|
|
499
|
+
output_file = Path(f"{default_ontology}-data.xml")
|
|
500
|
+
|
|
501
|
+
root = make_root(shortcode=shortcode, default_ontology=default_ontology)
|
|
502
|
+
root = append_permissions(root)
|
|
503
|
+
|
|
504
|
+
with warnings.catch_warnings(record=True) as catched_warnings:
|
|
505
|
+
resources = _convert_rows_to_xml(
|
|
506
|
+
dataframe=dataframe,
|
|
507
|
+
max_num_of_props=max_num_of_props,
|
|
508
|
+
)
|
|
509
|
+
root.extend(resources)
|
|
510
|
+
write_xml(root, output_file)
|
|
511
|
+
if len(catched_warnings) > 0:
|
|
512
|
+
success = False
|
|
513
|
+
for warning in catched_warnings:
|
|
514
|
+
print(f"WARNING: {warning.message}")
|
|
515
|
+
|
|
516
|
+
if success:
|
|
517
|
+
print(f"XML file successfully written to '{output_file}'")
|
|
518
|
+
else:
|
|
519
|
+
red = "\033[31m"
|
|
520
|
+
end = "\033[0m"
|
|
521
|
+
print(f"{red}Some problems occurred. The XML file was written to '{output_file}', but it might be corrupt{end}")
|
|
522
|
+
|
|
523
|
+
return success, catched_warnings
|