dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dsp_tools/__init__.py +5 -0
- dsp_tools/cli/args.py +47 -0
- dsp_tools/cli/call_action.py +85 -0
- dsp_tools/cli/call_action_files_only.py +101 -0
- dsp_tools/cli/call_action_with_network.py +207 -0
- dsp_tools/cli/create_parsers.py +479 -0
- dsp_tools/cli/entry_point.py +322 -0
- dsp_tools/cli/utils.py +87 -0
- dsp_tools/clients/CLAUDE.md +420 -0
- dsp_tools/clients/authentication_client.py +14 -0
- dsp_tools/clients/authentication_client_live.py +66 -0
- dsp_tools/clients/connection.py +35 -0
- dsp_tools/clients/connection_live.py +233 -0
- dsp_tools/clients/fuseki_metrics.py +60 -0
- dsp_tools/clients/group_user_clients.py +35 -0
- dsp_tools/clients/group_user_clients_live.py +181 -0
- dsp_tools/clients/legal_info_client.py +23 -0
- dsp_tools/clients/legal_info_client_live.py +132 -0
- dsp_tools/clients/list_client.py +49 -0
- dsp_tools/clients/list_client_live.py +166 -0
- dsp_tools/clients/metadata_client.py +24 -0
- dsp_tools/clients/metadata_client_live.py +47 -0
- dsp_tools/clients/ontology_clients.py +49 -0
- dsp_tools/clients/ontology_create_client_live.py +166 -0
- dsp_tools/clients/ontology_get_client_live.py +80 -0
- dsp_tools/clients/permissions_client.py +68 -0
- dsp_tools/clients/project_client.py +16 -0
- dsp_tools/clients/project_client_live.py +66 -0
- dsp_tools/commands/create/communicate_problems.py +24 -0
- dsp_tools/commands/create/create.py +134 -0
- dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
- dsp_tools/commands/create/create_on_server/classes.py +99 -0
- dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
- dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
- dsp_tools/commands/create/create_on_server/group_users.py +165 -0
- dsp_tools/commands/create/create_on_server/lists.py +163 -0
- dsp_tools/commands/create/create_on_server/mappers.py +12 -0
- dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
- dsp_tools/commands/create/create_on_server/ontology.py +52 -0
- dsp_tools/commands/create/create_on_server/project.py +68 -0
- dsp_tools/commands/create/create_on_server/properties.py +119 -0
- dsp_tools/commands/create/exceptions.py +29 -0
- dsp_tools/commands/create/lists_only.py +66 -0
- dsp_tools/commands/create/models/create_problems.py +87 -0
- dsp_tools/commands/create/models/parsed_ontology.py +88 -0
- dsp_tools/commands/create/models/parsed_project.py +81 -0
- dsp_tools/commands/create/models/rdf_ontology.py +12 -0
- dsp_tools/commands/create/models/server_project_info.py +100 -0
- dsp_tools/commands/create/parsing/parse_lists.py +45 -0
- dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
- dsp_tools/commands/create/parsing/parse_project.py +149 -0
- dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
- dsp_tools/commands/create/project_validate.py +595 -0
- dsp_tools/commands/create/serialisation/ontology.py +119 -0
- dsp_tools/commands/create/serialisation/project.py +44 -0
- dsp_tools/commands/excel2json/CLAUDE.md +101 -0
- dsp_tools/commands/excel2json/json_header.py +321 -0
- dsp_tools/commands/excel2json/lists/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
- dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
- dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
- dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
- dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
- dsp_tools/commands/excel2json/lists/utils.py +81 -0
- dsp_tools/commands/excel2json/models/__init__.py +0 -0
- dsp_tools/commands/excel2json/models/input_error.py +416 -0
- dsp_tools/commands/excel2json/models/json_header.py +175 -0
- dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
- dsp_tools/commands/excel2json/models/ontology.py +76 -0
- dsp_tools/commands/excel2json/old_lists.py +328 -0
- dsp_tools/commands/excel2json/project.py +280 -0
- dsp_tools/commands/excel2json/properties.py +370 -0
- dsp_tools/commands/excel2json/resources.py +336 -0
- dsp_tools/commands/excel2json/utils.py +352 -0
- dsp_tools/commands/excel2xml/__init__.py +7 -0
- dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
- dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
- dsp_tools/commands/excel2xml/propertyelement.py +47 -0
- dsp_tools/commands/get/__init__.py +0 -0
- dsp_tools/commands/get/get.py +166 -0
- dsp_tools/commands/get/get_permissions.py +257 -0
- dsp_tools/commands/get/get_permissions_legacy.py +89 -0
- dsp_tools/commands/get/legacy_models/__init__.py +0 -0
- dsp_tools/commands/get/legacy_models/context.py +318 -0
- dsp_tools/commands/get/legacy_models/group.py +241 -0
- dsp_tools/commands/get/legacy_models/helpers.py +47 -0
- dsp_tools/commands/get/legacy_models/listnode.py +390 -0
- dsp_tools/commands/get/legacy_models/model.py +12 -0
- dsp_tools/commands/get/legacy_models/ontology.py +324 -0
- dsp_tools/commands/get/legacy_models/project.py +366 -0
- dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
- dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
- dsp_tools/commands/get/legacy_models/user.py +438 -0
- dsp_tools/commands/get/models/__init__.py +0 -0
- dsp_tools/commands/get/models/permissions_models.py +10 -0
- dsp_tools/commands/id2iri.py +258 -0
- dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
- dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
- dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
- dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
- dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
- dsp_tools/commands/start_stack.py +428 -0
- dsp_tools/commands/update_legal/CLAUDE.md +344 -0
- dsp_tools/commands/update_legal/__init__.py +0 -0
- dsp_tools/commands/update_legal/core.py +182 -0
- dsp_tools/commands/update_legal/csv_operations.py +135 -0
- dsp_tools/commands/update_legal/models.py +87 -0
- dsp_tools/commands/update_legal/xml_operations.py +247 -0
- dsp_tools/commands/validate_data/CLAUDE.md +159 -0
- dsp_tools/commands/validate_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/constants.py +59 -0
- dsp_tools/commands/validate_data/mappers.py +143 -0
- dsp_tools/commands/validate_data/models/__init__.py +0 -0
- dsp_tools/commands/validate_data/models/api_responses.py +45 -0
- dsp_tools/commands/validate_data/models/input_problems.py +119 -0
- dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
- dsp_tools/commands/validate_data/models/validation.py +106 -0
- dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
- dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
- dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
- dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
- dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
- dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
- dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
- dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
- dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
- dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
- dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
- dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
- dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
- dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
- dsp_tools/commands/validate_data/utils.py +59 -0
- dsp_tools/commands/validate_data/validate_data.py +283 -0
- dsp_tools/commands/validate_data/validation/__init__.py +0 -0
- dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
- dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
- dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
- dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
- dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
- dsp_tools/commands/xmlupload/__init__.py +0 -0
- dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
- dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
- dsp_tools/commands/xmlupload/models/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
- dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
- dsp_tools/commands/xmlupload/models/ingest.py +143 -0
- dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
- dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
- dsp_tools/commands/xmlupload/models/permission.py +45 -0
- dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
- dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
- dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
- dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
- dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
- dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
- dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
- dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
- dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
- dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
- dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
- dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
- dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
- dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
- dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
- dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
- dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
- dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
- dsp_tools/commands/xmlupload/upload_config.py +76 -0
- dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
- dsp_tools/commands/xmlupload/xmlupload.py +516 -0
- dsp_tools/config/__init__.py +0 -0
- dsp_tools/config/logger_config.py +69 -0
- dsp_tools/config/warnings_config.py +32 -0
- dsp_tools/error/__init__.py +0 -0
- dsp_tools/error/custom_warnings.py +39 -0
- dsp_tools/error/exceptions.py +204 -0
- dsp_tools/error/problems.py +10 -0
- dsp_tools/error/xmllib_errors.py +20 -0
- dsp_tools/error/xmllib_warnings.py +54 -0
- dsp_tools/error/xmllib_warnings_util.py +159 -0
- dsp_tools/error/xsd_validation_error_msg.py +19 -0
- dsp_tools/legacy_models/__init__.py +0 -0
- dsp_tools/legacy_models/datetimestamp.py +81 -0
- dsp_tools/legacy_models/langstring.py +253 -0
- dsp_tools/legacy_models/projectContext.py +49 -0
- dsp_tools/py.typed +0 -0
- dsp_tools/resources/schema/data.xsd +648 -0
- dsp_tools/resources/schema/lists-only.json +72 -0
- dsp_tools/resources/schema/project.json +1258 -0
- dsp_tools/resources/schema/properties-only.json +874 -0
- dsp_tools/resources/schema/resources-only.json +140 -0
- dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
- dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
- dsp_tools/resources/start-stack/docker-compose.yml +88 -0
- dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
- dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
- dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
- dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
- dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
- dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
- dsp_tools/utils/__init__.py +0 -0
- dsp_tools/utils/ansi_colors.py +32 -0
- dsp_tools/utils/data_formats/__init__.py +0 -0
- dsp_tools/utils/data_formats/date_util.py +166 -0
- dsp_tools/utils/data_formats/iri_util.py +30 -0
- dsp_tools/utils/data_formats/shared.py +81 -0
- dsp_tools/utils/data_formats/uri_util.py +76 -0
- dsp_tools/utils/fuseki_bloating.py +63 -0
- dsp_tools/utils/json_parsing.py +22 -0
- dsp_tools/utils/rdf_constants.py +42 -0
- dsp_tools/utils/rdflib_utils.py +10 -0
- dsp_tools/utils/replace_id_with_iri.py +66 -0
- dsp_tools/utils/request_utils.py +238 -0
- dsp_tools/utils/xml_parsing/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
- dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
- dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
- dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
- dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
- dsp_tools/xmllib/CLAUDE.md +302 -0
- dsp_tools/xmllib/__init__.py +49 -0
- dsp_tools/xmllib/general_functions.py +877 -0
- dsp_tools/xmllib/internal/__init__.py +0 -0
- dsp_tools/xmllib/internal/checkers.py +162 -0
- dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
- dsp_tools/xmllib/internal/constants.py +46 -0
- dsp_tools/xmllib/internal/input_converters.py +155 -0
- dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
- dsp_tools/xmllib/internal/serialise_resource.py +177 -0
- dsp_tools/xmllib/internal/serialise_values.py +152 -0
- dsp_tools/xmllib/internal/type_aliases.py +11 -0
- dsp_tools/xmllib/models/__init__.py +0 -0
- dsp_tools/xmllib/models/config_options.py +28 -0
- dsp_tools/xmllib/models/date_formats.py +48 -0
- dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
- dsp_tools/xmllib/models/internal/__init__.py +0 -0
- dsp_tools/xmllib/models/internal/file_values.py +172 -0
- dsp_tools/xmllib/models/internal/geometry.py +162 -0
- dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
- dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
- dsp_tools/xmllib/models/internal/values.py +342 -0
- dsp_tools/xmllib/models/licenses/__init__.py +0 -0
- dsp_tools/xmllib/models/licenses/other.py +59 -0
- dsp_tools/xmllib/models/licenses/recommended.py +107 -0
- dsp_tools/xmllib/models/permissions.py +41 -0
- dsp_tools/xmllib/models/res.py +1782 -0
- dsp_tools/xmllib/models/root.py +348 -0
- dsp_tools/xmllib/value_checkers.py +434 -0
- dsp_tools/xmllib/value_converters.py +777 -0
- dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
- dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
- dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
- dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
- dsp_tools-0.9.13.dist-info/LICENSE +0 -674
- dsp_tools-0.9.13.dist-info/METADATA +0 -144
- dsp_tools-0.9.13.dist-info/RECORD +0 -71
- dsp_tools-0.9.13.dist-info/WHEEL +0 -5
- dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
- dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
- dsplib/models/connection.py +0 -272
- dsplib/models/group.py +0 -296
- dsplib/models/helpers.py +0 -505
- dsplib/models/langstring.py +0 -277
- dsplib/models/listnode.py +0 -578
- dsplib/models/model.py +0 -20
- dsplib/models/ontology.py +0 -448
- dsplib/models/permission.py +0 -112
- dsplib/models/project.py +0 -547
- dsplib/models/propertyclass.py +0 -505
- dsplib/models/resource.py +0 -366
- dsplib/models/resourceclass.py +0 -810
- dsplib/models/sipi.py +0 -30
- dsplib/models/user.py +0 -731
- dsplib/models/value.py +0 -1000
- dsplib/utils/knora-data-schema.xsd +0 -454
- dsplib/utils/knora-schema-lists.json +0 -83
- dsplib/utils/knora-schema.json +0 -434
- dsplib/utils/onto_commons.py +0 -24
- dsplib/utils/onto_create_lists.py +0 -73
- dsplib/utils/onto_create_ontology.py +0 -442
- dsplib/utils/onto_get.py +0 -58
- dsplib/utils/onto_validate.py +0 -33
- dsplib/utils/xml_upload.py +0 -539
- dsplib/widgets/doublepassword.py +0 -80
- knora/MLS-import-libraries.py +0 -84
- knora/dsp_tools.py +0 -96
- knora/dsplib/models/connection.py +0 -272
- knora/dsplib/models/group.py +0 -296
- knora/dsplib/models/helpers.py +0 -506
- knora/dsplib/models/langstring.py +0 -277
- knora/dsplib/models/listnode.py +0 -578
- knora/dsplib/models/model.py +0 -20
- knora/dsplib/models/ontology.py +0 -448
- knora/dsplib/models/permission.py +0 -112
- knora/dsplib/models/project.py +0 -583
- knora/dsplib/models/propertyclass.py +0 -505
- knora/dsplib/models/resource.py +0 -416
- knora/dsplib/models/resourceclass.py +0 -811
- knora/dsplib/models/sipi.py +0 -35
- knora/dsplib/models/user.py +0 -731
- knora/dsplib/models/value.py +0 -1000
- knora/dsplib/utils/knora-data-schema.xsd +0 -464
- knora/dsplib/utils/knora-schema-lists.json +0 -83
- knora/dsplib/utils/knora-schema.json +0 -444
- knora/dsplib/utils/onto_commons.py +0 -24
- knora/dsplib/utils/onto_create_lists.py +0 -73
- knora/dsplib/utils/onto_create_ontology.py +0 -451
- knora/dsplib/utils/onto_get.py +0 -58
- knora/dsplib/utils/onto_validate.py +0 -33
- knora/dsplib/utils/xml_upload.py +0 -540
- knora/dsplib/widgets/doublepassword.py +0 -80
- knora/knora.py +0 -2108
- knora/test.py +0 -99
- knora/testit.py +0 -76
- knora/xml2knora.py +0 -633
- {dsplib → dsp_tools/cli}/__init__.py +0 -0
- {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
- {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
- {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
- {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
- {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
- {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
- {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
- {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""This module handles all the operations which are used for the creation of JSON lists from Excel files."""
|
|
2
|
+
|
|
3
|
+
import importlib.resources
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
from typing import Optional
|
|
8
|
+
from typing import cast
|
|
9
|
+
|
|
10
|
+
import jsonschema
|
|
11
|
+
import regex
|
|
12
|
+
from openpyxl import load_workbook
|
|
13
|
+
from openpyxl.cell import Cell
|
|
14
|
+
from openpyxl.worksheet.worksheet import Worksheet
|
|
15
|
+
|
|
16
|
+
from dsp_tools.commands.excel2json.models.input_error import MoreThanOneSheetProblem
|
|
17
|
+
from dsp_tools.commands.excel2json.models.list_node_name import ListNodeNames
|
|
18
|
+
from dsp_tools.error.exceptions import InputError
|
|
19
|
+
from dsp_tools.utils.data_formats.shared import simplify_name
|
|
20
|
+
from dsp_tools.utils.json_parsing import parse_json_file
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def old_excel2lists(
|
|
24
|
+
excelfolder: str,
|
|
25
|
+
path_to_output_file: Optional[str] = None,
|
|
26
|
+
verbose: bool = False,
|
|
27
|
+
) -> tuple[list[dict[str, Any]], bool]:
|
|
28
|
+
"""
|
|
29
|
+
Converts lists described in Excel files into a "lists" section that can be inserted into a JSON project file.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
excelfolder: path to the folder containing the Excel file(s)
|
|
33
|
+
path_to_output_file: if provided, the output is written into this JSON file
|
|
34
|
+
verbose: verbose switch
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
InputError: if something went wrong
|
|
38
|
+
BaseError: if something went wrong
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
a tuple consisting of the "lists" section as Python list, and the success status (True if everything went well)
|
|
42
|
+
"""
|
|
43
|
+
excel_file_paths = _extract_excel_file_paths(excelfolder)
|
|
44
|
+
if verbose:
|
|
45
|
+
print("The following Excel files will be processed:")
|
|
46
|
+
print(*(f" - {filename}" for filename in excel_file_paths), sep="\n")
|
|
47
|
+
|
|
48
|
+
finished_lists = _make_json_lists_from_excel(excel_file_paths, verbose=verbose)
|
|
49
|
+
validate_lists_section_with_schema(lists_section=finished_lists)
|
|
50
|
+
|
|
51
|
+
if path_to_output_file:
|
|
52
|
+
with open(path_to_output_file, "w", encoding="utf-8") as fp:
|
|
53
|
+
json.dump(finished_lists, fp, indent=4, ensure_ascii=False)
|
|
54
|
+
print(f"lists section was created successfully and written to file '{path_to_output_file}'")
|
|
55
|
+
|
|
56
|
+
return finished_lists, True
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get_values_from_excel(
|
|
60
|
+
excelfiles: dict[str, Worksheet],
|
|
61
|
+
base_file: dict[str, Worksheet],
|
|
62
|
+
parentnode: dict[str, Any],
|
|
63
|
+
row: int,
|
|
64
|
+
col: int,
|
|
65
|
+
preval: list[str],
|
|
66
|
+
list_node_names: ListNodeNames,
|
|
67
|
+
verbose: bool = False,
|
|
68
|
+
) -> tuple[int, dict[str, Any]]:
|
|
69
|
+
"""
|
|
70
|
+
This function calls itself recursively to go through the Excel files. It extracts the cell values and composes
|
|
71
|
+
the JSON list.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
excelfiles: List of Excel files with the values in different languages
|
|
75
|
+
base_file: File name of the base file
|
|
76
|
+
parentnode: Name(s) of the parent node(s) of the current node
|
|
77
|
+
row: The index of the current row of the Excel sheet
|
|
78
|
+
col: The index of the current column of the Excel sheet
|
|
79
|
+
preval: List of previous values, needed to check the consistency of the list hierarchy
|
|
80
|
+
list_node_names: object containing the information which node names were used by previous lists
|
|
81
|
+
verbose: verbose switch
|
|
82
|
+
|
|
83
|
+
Raises:
|
|
84
|
+
InputError: if one of the Excel files contains invalid data
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
int: Row index for the next loop (current row index minus 1)
|
|
88
|
+
dict: The JSON list up to the current recursion. At the last recursion, this is the final JSON list.
|
|
89
|
+
"""
|
|
90
|
+
nodes: list[dict[str, Any]] = []
|
|
91
|
+
currentnode: dict[str, Any] = {}
|
|
92
|
+
base_file_ws: Worksheet = next(iter(base_file.values()))
|
|
93
|
+
cell: Cell = cast(Cell, base_file_ws.cell(column=col, row=row))
|
|
94
|
+
|
|
95
|
+
for excelfile in excelfiles.values():
|
|
96
|
+
if any((not excelfile["A1"].value, excelfile["B1"].value)):
|
|
97
|
+
raise InputError(
|
|
98
|
+
f"ERROR: Inconsistency in Excel list: The first row must consist of exactly one value, in cell A1. "
|
|
99
|
+
f"All other cells of row 1 must be empty.\nInstead, found the following:\n"
|
|
100
|
+
f" - Cell A1: '{excelfile['A1'].value}'\n"
|
|
101
|
+
f" - Cell B1: '{excelfile['B1'].value}'"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if col > 1:
|
|
105
|
+
# append the cell value of the parent node (which is one value to the left of the current cell) to the list of
|
|
106
|
+
# previous values
|
|
107
|
+
preval.append(str(base_file_ws.cell(column=col - 1, row=row).value).strip())
|
|
108
|
+
|
|
109
|
+
while cell.value and regex.search(r"\p{L}", str(cell.value), flags=regex.UNICODE):
|
|
110
|
+
_check_if_predecessors_in_row_are_consistent_with_preval(base_file_ws, preval, row)
|
|
111
|
+
|
|
112
|
+
# loop through the row until the last (furthest right) value is found
|
|
113
|
+
next_value = base_file_ws.cell(column=col + 1, row=row).value
|
|
114
|
+
if next_value and regex.search(r"\p{L}", str(next_value), flags=regex.UNICODE):
|
|
115
|
+
row, _ = _get_values_from_excel(
|
|
116
|
+
excelfiles=excelfiles,
|
|
117
|
+
base_file=base_file,
|
|
118
|
+
parentnode=currentnode,
|
|
119
|
+
col=col + 1,
|
|
120
|
+
row=row,
|
|
121
|
+
preval=preval,
|
|
122
|
+
list_node_names=list_node_names,
|
|
123
|
+
verbose=verbose,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# if value was last in row (no further values to the right), it's a node, continue here
|
|
127
|
+
else:
|
|
128
|
+
currentnode = _make_new_node(cell, col, excelfiles, preval, row, list_node_names, verbose)
|
|
129
|
+
nodes.append(currentnode)
|
|
130
|
+
|
|
131
|
+
# go one row down and repeat loop if there is a value
|
|
132
|
+
row += 1
|
|
133
|
+
cell = cast(Cell, base_file_ws.cell(column=col, row=row))
|
|
134
|
+
|
|
135
|
+
if col > 1:
|
|
136
|
+
preval.pop()
|
|
137
|
+
|
|
138
|
+
# add the new nodes to the parentnode
|
|
139
|
+
parentnode["nodes"] = nodes
|
|
140
|
+
|
|
141
|
+
return row - 1, parentnode
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _check_if_predecessors_in_row_are_consistent_with_preval(
|
|
145
|
+
base_file_ws: Worksheet, preval: list[str], row: int
|
|
146
|
+
) -> None:
|
|
147
|
+
for idx, val in enumerate(preval[:-1]):
|
|
148
|
+
if val != str(base_file_ws.cell(column=idx + 1, row=row).value).strip():
|
|
149
|
+
raise InputError(
|
|
150
|
+
"ERROR: Inconsistency in Excel list: "
|
|
151
|
+
f"{val} not equal to {str(base_file_ws.cell(column=idx + 1, row=row).value).strip()}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _make_new_node(
|
|
156
|
+
cell: Cell,
|
|
157
|
+
col: int,
|
|
158
|
+
excelfiles: dict[str, Worksheet],
|
|
159
|
+
preval: list[str],
|
|
160
|
+
row: int,
|
|
161
|
+
list_node_names: ListNodeNames,
|
|
162
|
+
verbose: bool = False,
|
|
163
|
+
) -> dict[str, Any]:
|
|
164
|
+
_check_if_duplicate_nodes_exist(cell, list_node_names, preval)
|
|
165
|
+
# create a simplified version of the cell value and use it as name of the node
|
|
166
|
+
nodename = simplify_name(str(cell.value).strip())
|
|
167
|
+
list_node_names.previous_node_names.append(nodename)
|
|
168
|
+
# append a number (p.ex. node-name-2) if there are list nodes with identical names
|
|
169
|
+
n = list_node_names.previous_node_names.count(nodename)
|
|
170
|
+
|
|
171
|
+
if n > 1:
|
|
172
|
+
nodename = f"{nodename}-{n}"
|
|
173
|
+
|
|
174
|
+
labels_dict = _get_all_languages_of_node(excelfiles, col, row)
|
|
175
|
+
|
|
176
|
+
# create the current node from extracted cell values and append it to the nodes list
|
|
177
|
+
currentnode = {"name": nodename, "labels": labels_dict}
|
|
178
|
+
if verbose:
|
|
179
|
+
print(f"Added list node: {str(cell.value).strip()} ({nodename})")
|
|
180
|
+
return currentnode
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _check_if_duplicate_nodes_exist(cell: Cell, list_node_names: ListNodeNames, preval: list[str]) -> None:
|
|
184
|
+
new_check_list = preval.copy()
|
|
185
|
+
new_check_list.append(str(cell.value).strip())
|
|
186
|
+
list_node_names.lists_with_previous_cell_values.append(new_check_list)
|
|
187
|
+
if any(
|
|
188
|
+
list_node_names.lists_with_previous_cell_values.count(x) > 1
|
|
189
|
+
for x in list_node_names.lists_with_previous_cell_values
|
|
190
|
+
):
|
|
191
|
+
raise InputError(
|
|
192
|
+
f"ERROR: There is at least one duplicate node in the list. "
|
|
193
|
+
f"Found duplicate in column {cell.column}, row {cell.row}:\n'{str(cell.value).strip()}'"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _get_all_languages_of_node(excelfiles: dict[str, Worksheet], col: int, row: int) -> dict[str, str]:
|
|
198
|
+
labels_dict: dict[str, str] = {}
|
|
199
|
+
for other_lang, ws_other_lang in excelfiles.items():
|
|
200
|
+
cell_value = ws_other_lang.cell(column=col, row=row).value
|
|
201
|
+
if not (isinstance(cell_value, str) and len(cell_value) > 0):
|
|
202
|
+
raise InputError(
|
|
203
|
+
"ERROR: Malformed Excel file: The Excel file with the language code "
|
|
204
|
+
f"'{other_lang}' should have a value in row {row}, column {col}"
|
|
205
|
+
)
|
|
206
|
+
else:
|
|
207
|
+
labels_dict[other_lang] = cell_value.strip()
|
|
208
|
+
return labels_dict
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _make_json_lists_from_excel(
|
|
212
|
+
excel_file_paths: list[Path],
|
|
213
|
+
verbose: bool = False,
|
|
214
|
+
) -> list[dict[str, Any]]:
|
|
215
|
+
"""
|
|
216
|
+
Reads Excel files and transforms them into a list of dictionaries that can be used as "lists" array of a JSON
|
|
217
|
+
project file.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
excel_file_paths: Excel files to be processed
|
|
221
|
+
verbose: verbose switch
|
|
222
|
+
|
|
223
|
+
Raises:
|
|
224
|
+
InputError: if one of the Excel files contains invalid data
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
The finished "lists" section
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
# Define starting point in Excel file
|
|
231
|
+
startrow = 1
|
|
232
|
+
startcol = 1
|
|
233
|
+
|
|
234
|
+
lang_to_worksheet: dict[str, Worksheet] = {x.stem: _read_and_check_workbook(x) for x in excel_file_paths}
|
|
235
|
+
|
|
236
|
+
base_lang = "en" if "en" in lang_to_worksheet else next(iter(lang_to_worksheet.keys()))
|
|
237
|
+
base_file = {base_lang: lang_to_worksheet[base_lang]}
|
|
238
|
+
|
|
239
|
+
list_node_names = ListNodeNames()
|
|
240
|
+
|
|
241
|
+
# construct the entire "lists" section as children of a fictive dummy parent node
|
|
242
|
+
_, _list = _get_values_from_excel(
|
|
243
|
+
excelfiles=lang_to_worksheet,
|
|
244
|
+
base_file=base_file,
|
|
245
|
+
parentnode={},
|
|
246
|
+
row=startrow,
|
|
247
|
+
col=startcol,
|
|
248
|
+
preval=[],
|
|
249
|
+
list_node_names=list_node_names,
|
|
250
|
+
verbose=verbose,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# extract the children of the fictive dummy parent node
|
|
254
|
+
finished_lists: list[dict[str, Any]] = _list["nodes"]
|
|
255
|
+
|
|
256
|
+
# the "comments" section is mandatory for the root node of each list, so make a copy of "labels" and insert it at
|
|
257
|
+
# the right place
|
|
258
|
+
for i, _list in enumerate(finished_lists):
|
|
259
|
+
finished_lists[i] = {
|
|
260
|
+
"name": _list["name"],
|
|
261
|
+
"labels": _list["labels"],
|
|
262
|
+
"comments": _list["labels"],
|
|
263
|
+
"nodes": _list["nodes"],
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return finished_lists
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _read_and_check_workbook(excelpath: Path) -> Worksheet:
|
|
270
|
+
all_worksheets = cast(list[Worksheet], load_workbook(excelpath, read_only=True).worksheets)
|
|
271
|
+
if len(all_worksheets) != 1:
|
|
272
|
+
msg = MoreThanOneSheetProblem(excelpath.name, [x.title for x in all_worksheets]).execute_error_protocol()
|
|
273
|
+
raise InputError(msg)
|
|
274
|
+
return all_worksheets[0]
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def validate_lists_section_from_project(project_file: Path) -> bool:
|
|
278
|
+
project = parse_json_file(project_file)
|
|
279
|
+
lists_section = project["project"].get("lists")
|
|
280
|
+
if not lists_section:
|
|
281
|
+
raise InputError(
|
|
282
|
+
f"Cannot validate 'lists' section of {project_file}, because there is no 'lists' section in this file."
|
|
283
|
+
)
|
|
284
|
+
return validate_lists_section_with_schema(lists_section)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def validate_lists_section_with_schema(lists_section: list[dict[str, Any]]) -> bool:
|
|
288
|
+
with (
|
|
289
|
+
importlib.resources.files("dsp_tools")
|
|
290
|
+
.joinpath("resources/schema/lists-only.json")
|
|
291
|
+
.open(encoding="utf-8") as schema_file
|
|
292
|
+
):
|
|
293
|
+
lists_schema = json.load(schema_file)
|
|
294
|
+
try:
|
|
295
|
+
jsonschema.validate(instance={"lists": lists_section}, schema=lists_schema)
|
|
296
|
+
except jsonschema.ValidationError as err:
|
|
297
|
+
raise InputError(
|
|
298
|
+
f"'lists' section did not pass validation. The error message is: {err.message}\n"
|
|
299
|
+
f"The error occurred at {err.json_path}"
|
|
300
|
+
) from None
|
|
301
|
+
return True
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _extract_excel_file_paths(excelfolder: str) -> list[Path]:
|
|
305
|
+
"""
|
|
306
|
+
This method extracts the names of the Excel files that are in the folder, and asserts that they are named according
|
|
307
|
+
to the requirements.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
excelfolder: path to the folder containing the Excel file(s)
|
|
311
|
+
|
|
312
|
+
Raises:
|
|
313
|
+
InputError: if excelfolder is not a directory, or if one of the files in it has an invalid name
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
list of the Excel file paths to process
|
|
317
|
+
"""
|
|
318
|
+
if not Path(excelfolder).is_dir():
|
|
319
|
+
raise InputError(f"ERROR: {excelfolder} is not a directory.")
|
|
320
|
+
|
|
321
|
+
supported_files = ["en.xlsx", "de.xlsx", "fr.xlsx", "it.xlsx", "rm.xlsx"]
|
|
322
|
+
excel_file_paths = [x for x in Path(excelfolder).glob("*.xlsx") if x.is_file() and not x.name.startswith("~$")]
|
|
323
|
+
|
|
324
|
+
for filepath in excel_file_paths:
|
|
325
|
+
if filepath.name not in supported_files:
|
|
326
|
+
raise InputError(f"Invalid file name '{filepath}'. Expected format: 'languagecode.xlsx'")
|
|
327
|
+
|
|
328
|
+
return excel_file_paths
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import regex
|
|
8
|
+
|
|
9
|
+
from dsp_tools.commands.excel2json.json_header import get_json_header
|
|
10
|
+
from dsp_tools.commands.excel2json.lists.make_lists import excel2lists
|
|
11
|
+
from dsp_tools.commands.excel2json.models.json_header import PermissionsOverrulesPrefixed
|
|
12
|
+
from dsp_tools.commands.excel2json.old_lists import old_excel2lists
|
|
13
|
+
from dsp_tools.commands.excel2json.properties import excel2properties
|
|
14
|
+
from dsp_tools.commands.excel2json.resources import excel2resources
|
|
15
|
+
from dsp_tools.error.exceptions import BaseError
|
|
16
|
+
from dsp_tools.error.exceptions import InputError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def old_excel2json(
|
|
20
|
+
data_model_files: str,
|
|
21
|
+
path_to_output_file: str,
|
|
22
|
+
) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Converts a folder containing Excel files into a JSON data model file. The folder must be structured like this:
|
|
25
|
+
|
|
26
|
+
::
|
|
27
|
+
|
|
28
|
+
data_model_files
|
|
29
|
+
├── json_header.xlsx (optional)
|
|
30
|
+
├── lists
|
|
31
|
+
│ ├── de.xlsx
|
|
32
|
+
│ └── en.xlsx
|
|
33
|
+
└── onto_name (onto_label)
|
|
34
|
+
├── properties.xlsx
|
|
35
|
+
└── resources.xlsx
|
|
36
|
+
|
|
37
|
+
The names of the files must be exactly like in the example. The folder "lists" can be missing, because it is
|
|
38
|
+
optional to have lists in a DSP project. Only XLSX files are allowed.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
data_model_files: path to the folder (called "data_model_files" in the example)
|
|
42
|
+
path_to_output_file: path to the file where the output JSON file will be saved
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
InputError: if something went wrong
|
|
46
|
+
BaseError: if something went wrong
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
True if everything went well
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
listfolder, onto_folders = _old_validate_folder_structure_and_get_filenames(data_model_files)
|
|
53
|
+
|
|
54
|
+
overall_success, project = _old_create_project_json(data_model_files, listfolder, onto_folders)
|
|
55
|
+
|
|
56
|
+
with open(path_to_output_file, "w", encoding="utf-8") as f:
|
|
57
|
+
json.dump(project, f, indent=4, ensure_ascii=False)
|
|
58
|
+
|
|
59
|
+
print(f"JSON project file successfully saved at {path_to_output_file}")
|
|
60
|
+
|
|
61
|
+
return overall_success
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _old_validate_folder_structure_and_get_filenames(data_model_files: str) -> tuple[list[Path], list[Path]]:
|
|
65
|
+
if not Path(data_model_files).is_dir():
|
|
66
|
+
raise InputError(f"ERROR: {data_model_files} is not a directory.")
|
|
67
|
+
sub_folders = [x for x in Path(data_model_files).glob("*") if _non_hidden(x) and x.is_dir()]
|
|
68
|
+
files_to_process = []
|
|
69
|
+
onto_folders, onto_files_to_process = _get_and_validate_onto_folder(Path(data_model_files), sub_folders)
|
|
70
|
+
files_to_process.extend(onto_files_to_process)
|
|
71
|
+
listfolder, lists_to_process = _old_get_and_validate_list_folder(data_model_files, sub_folders)
|
|
72
|
+
files_to_process.extend(lists_to_process)
|
|
73
|
+
if len(onto_folders) + len(listfolder) != len(sub_folders):
|
|
74
|
+
raise InputError(
|
|
75
|
+
f"The only allowed subfolders in '{data_model_files}' are 'lists' "
|
|
76
|
+
"and folders that match the pattern 'onto_name (onto_label)'"
|
|
77
|
+
)
|
|
78
|
+
if (json_header := Path(data_model_files) / Path("json_header.xlsx")).exists():
|
|
79
|
+
files_to_process.append(str(json_header))
|
|
80
|
+
print("The following files will be processed:")
|
|
81
|
+
print(*(f" - {file}" for file in files_to_process), sep="\n")
|
|
82
|
+
return listfolder, onto_folders
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _old_get_and_validate_list_folder(data_model_files: str, folder: list[Path]) -> tuple[list[Path], list[str]]:
|
|
86
|
+
processed_files: list[str] = []
|
|
87
|
+
listfolder = [x for x in folder if x.is_dir() and x.name == "lists"]
|
|
88
|
+
if listfolder:
|
|
89
|
+
listfolder_contents = [x for x in Path(listfolder[0]).glob("*") if _non_hidden(x)]
|
|
90
|
+
if not all(regex.search(r"(de|en|fr|it|rm).xlsx", file.name) for file in listfolder_contents):
|
|
91
|
+
raise InputError(
|
|
92
|
+
f"The only files allowed in '{data_model_files}/lists' are en.xlsx, de.xlsx, fr.xlsx, it.xlsx, rm.xlsx"
|
|
93
|
+
)
|
|
94
|
+
processed_files = [f"{data_model_files}/lists/{file.name}" for file in listfolder_contents]
|
|
95
|
+
return listfolder, processed_files
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def excel2json(
|
|
99
|
+
data_model_files: str,
|
|
100
|
+
path_to_output_file: str,
|
|
101
|
+
) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Converts a folder containing Excel files into a JSON data model file. The folder must be structured like this:
|
|
104
|
+
|
|
105
|
+
::
|
|
106
|
+
|
|
107
|
+
data_model_files
|
|
108
|
+
├── json_header.xlsx (optional)
|
|
109
|
+
├── lists
|
|
110
|
+
│ ├── list.xlsx
|
|
111
|
+
│ └── list_1.xlsx
|
|
112
|
+
└── onto_name (onto_label)
|
|
113
|
+
├── properties.xlsx
|
|
114
|
+
└── resources.xlsx
|
|
115
|
+
|
|
116
|
+
The names of the files must be exactly like in the example. The folder "lists" can be missing, because it is
|
|
117
|
+
optional to have lists in a DSP project. Only XLSX files are allowed.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
data_model_files: path to the folder (called "data_model_files" in the example)
|
|
121
|
+
path_to_output_file: path to the file where the output JSON file will be saved
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
InputError: if something went wrong
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
True if everything went well
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
listfolder, onto_folders = _validate_folder_structure_and_get_filenames(Path(data_model_files))
|
|
131
|
+
|
|
132
|
+
overall_success, project = _create_project_json(data_model_files, onto_folders, listfolder)
|
|
133
|
+
|
|
134
|
+
with open(path_to_output_file, "w", encoding="utf-8") as f:
|
|
135
|
+
dump_str = json.dumps(project, indent=4, ensure_ascii=False)
|
|
136
|
+
f.write(dump_str)
|
|
137
|
+
|
|
138
|
+
print(f"JSON project file successfully saved at {path_to_output_file}")
|
|
139
|
+
|
|
140
|
+
return overall_success
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _validate_folder_structure_and_get_filenames(data_model_files: Path) -> tuple[Path | None, list[Path]]:
|
|
144
|
+
if not data_model_files.is_dir():
|
|
145
|
+
raise InputError(f"ERROR: {data_model_files} is not a directory.")
|
|
146
|
+
folder = [x for x in data_model_files.glob("*") if _non_hidden(x)]
|
|
147
|
+
processed_files = []
|
|
148
|
+
onto_folders, processed_onto = _get_and_validate_onto_folder(data_model_files, folder)
|
|
149
|
+
processed_files.extend(processed_onto)
|
|
150
|
+
listfolder, processed_lists = _get_and_validate_list_folder(data_model_files)
|
|
151
|
+
processed_files.extend(processed_lists)
|
|
152
|
+
print("The following files will be processed:")
|
|
153
|
+
print(*(f" - {file}" for file in processed_files), sep="\n")
|
|
154
|
+
return listfolder, onto_folders
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _get_and_validate_onto_folder(data_model_files: Path, folder: list[Path]) -> tuple[list[Path], list[str]]:
|
|
158
|
+
processed_files = []
|
|
159
|
+
onto_folders = [x for x in folder if x.is_dir() and regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", x.name)]
|
|
160
|
+
if not onto_folders:
|
|
161
|
+
raise InputError(
|
|
162
|
+
f"'{data_model_files}' must contain at least one subfolder named after the pattern 'onto_name (onto_label)'"
|
|
163
|
+
)
|
|
164
|
+
for onto_folder in onto_folders:
|
|
165
|
+
contents = sorted([x.name for x in Path(onto_folder).glob("*") if _non_hidden(x)])
|
|
166
|
+
if contents != ["properties.xlsx", "resources.xlsx"]:
|
|
167
|
+
raise InputError(
|
|
168
|
+
f"ERROR: '{data_model_files}/{onto_folder.name}' must contain one file 'properties.xlsx' "
|
|
169
|
+
"and one file 'resources.xlsx', but nothing else."
|
|
170
|
+
)
|
|
171
|
+
processed_files.extend([f"{data_model_files}/{onto_folder.name}/{file}" for file in contents])
|
|
172
|
+
return onto_folders, processed_files
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _get_and_validate_list_folder(
|
|
176
|
+
data_model_files: Path,
|
|
177
|
+
) -> tuple[Path | None, list[str]]:
|
|
178
|
+
if not (list_dir := (data_model_files / "lists")).is_dir():
|
|
179
|
+
return None, []
|
|
180
|
+
processed_files = [str(file) for file in list_dir.glob("*list*.xlsx") if _non_hidden(file)]
|
|
181
|
+
return list_dir, sorted(processed_files)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _non_hidden(path: Path) -> bool:
|
|
185
|
+
return not regex.search(r"^(\.|~\$).+", path.name)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _old_create_project_json(
|
|
189
|
+
data_model_files: str, listfolder: list[Path], onto_folders: list[Path]
|
|
190
|
+
) -> tuple[bool, dict[str, Any]]:
|
|
191
|
+
overall_success = True
|
|
192
|
+
lists, success = old_excel2lists(excelfolder=f"{data_model_files}/lists") if listfolder else (None, True)
|
|
193
|
+
if not success:
|
|
194
|
+
overall_success = False
|
|
195
|
+
ontologies, default_permissions_overrule, success = _get_ontologies(data_model_files, onto_folders)
|
|
196
|
+
if not success:
|
|
197
|
+
overall_success = False
|
|
198
|
+
project = get_json_header(Path(data_model_files) / "json_header.xlsx").to_dict()
|
|
199
|
+
if default_permissions_overrule.non_empty():
|
|
200
|
+
project["project"]["default_permissions_overrule"] = default_permissions_overrule.serialize()
|
|
201
|
+
if lists:
|
|
202
|
+
project["project"]["lists"] = lists
|
|
203
|
+
project["project"]["ontologies"] = ontologies
|
|
204
|
+
project["project"] = _sort_project_dict(project["project"])
|
|
205
|
+
return overall_success, project
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _create_project_json(
|
|
209
|
+
data_model_files: str, onto_folders: list[Path], list_folder: Path | None
|
|
210
|
+
) -> tuple[bool, dict[str, Any]]:
|
|
211
|
+
overall_success = True
|
|
212
|
+
lists = None
|
|
213
|
+
if list_folder:
|
|
214
|
+
lists, success = excel2lists(list_folder)
|
|
215
|
+
if not success:
|
|
216
|
+
overall_success = False
|
|
217
|
+
ontologies, default_permissions_overrule, success = _get_ontologies(data_model_files, onto_folders)
|
|
218
|
+
if not success:
|
|
219
|
+
overall_success = False
|
|
220
|
+
project = get_json_header(Path(data_model_files) / "json_header.xlsx").to_dict()
|
|
221
|
+
if default_permissions_overrule.non_empty():
|
|
222
|
+
project["project"]["default_permissions_overrule"] = default_permissions_overrule.serialize()
|
|
223
|
+
if lists:
|
|
224
|
+
project["project"]["lists"] = lists
|
|
225
|
+
project["project"]["ontologies"] = ontologies
|
|
226
|
+
project["project"] = _sort_project_dict(project["project"])
|
|
227
|
+
return overall_success, project
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _sort_project_dict(unsorted_project_dict: dict[str, Any]) -> dict[str, Any]:
|
|
231
|
+
# order how the keys should appear in the JSON file
|
|
232
|
+
ordered_keys = [
|
|
233
|
+
"shortcode",
|
|
234
|
+
"shortname",
|
|
235
|
+
"longname",
|
|
236
|
+
"descriptions",
|
|
237
|
+
"keywords",
|
|
238
|
+
"enabled_licenses",
|
|
239
|
+
"default_permissions",
|
|
240
|
+
"default_permissions_overrule",
|
|
241
|
+
"users",
|
|
242
|
+
"groups",
|
|
243
|
+
"lists",
|
|
244
|
+
"ontologies",
|
|
245
|
+
]
|
|
246
|
+
# filter out unused keys, to prevent a KeyError
|
|
247
|
+
ordered_keys = [key for key in ordered_keys if key in unsorted_project_dict]
|
|
248
|
+
# important - if in the future, more keys are added, they must be added to the list above
|
|
249
|
+
if any(forgotten_keys := [key for key in unsorted_project_dict if key not in ordered_keys]):
|
|
250
|
+
raise BaseError(
|
|
251
|
+
"The list of keys is outdated. During sorting, the following keys would be discarded: "
|
|
252
|
+
+ ", ".join(forgotten_keys)
|
|
253
|
+
)
|
|
254
|
+
# do the actual sorting
|
|
255
|
+
return {key: unsorted_project_dict[key] for key in ordered_keys}
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _get_ontologies(
|
|
259
|
+
data_model_files: str, onto_folders: list[Path]
|
|
260
|
+
) -> tuple[list[dict[str, Any]], PermissionsOverrulesPrefixed, bool]:
|
|
261
|
+
success = True
|
|
262
|
+
ontologies = []
|
|
263
|
+
permissions_overrules = PermissionsOverrulesPrefixed(private=[], limited_view=[])
|
|
264
|
+
for onto_folder in onto_folders:
|
|
265
|
+
name, label = regex.search(r"([\w.-]+) \(([\w.\- ]+)\)", onto_folder.name).groups() # type: ignore[union-attr]
|
|
266
|
+
resources, r_overrules, success1 = excel2resources(f"{data_model_files}/{onto_folder.name}/resources.xlsx")
|
|
267
|
+
properties, p_overrules, success2 = excel2properties(f"{data_model_files}/{onto_folder.name}/properties.xlsx")
|
|
268
|
+
if not success1 or not success2:
|
|
269
|
+
success = False
|
|
270
|
+
ontologies.append(
|
|
271
|
+
{
|
|
272
|
+
"name": name,
|
|
273
|
+
"label": label,
|
|
274
|
+
"properties": properties,
|
|
275
|
+
"resources": resources,
|
|
276
|
+
}
|
|
277
|
+
)
|
|
278
|
+
permissions_overrules.add_overrules(r_overrules, name)
|
|
279
|
+
permissions_overrules.add_overrules(p_overrules, name)
|
|
280
|
+
return ontologies, permissions_overrules, success
|