dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,292 @@
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from collections import defaultdict
5
+ from typing import Any
6
+ from typing import cast
7
+
8
+ import pandas as pd
9
+ import regex
10
+ from loguru import logger
11
+
12
+ from dsp_tools.commands.excel2json.lists.models.deserialise import Columns
13
+ from dsp_tools.commands.excel2json.lists.models.deserialise import ExcelSheet
14
+ from dsp_tools.commands.excel2json.lists.models.input_error import CollectedSheetProblems
15
+ from dsp_tools.commands.excel2json.lists.models.input_error import DuplicateIDProblem
16
+ from dsp_tools.commands.excel2json.lists.models.input_error import DuplicatesCustomIDInProblem
17
+ from dsp_tools.commands.excel2json.lists.models.input_error import DuplicatesInSheetProblem
18
+ from dsp_tools.commands.excel2json.lists.models.input_error import DuplicatesListNameProblem
19
+ from dsp_tools.commands.excel2json.lists.models.input_error import ListCreationProblem
20
+ from dsp_tools.commands.excel2json.lists.models.input_error import ListInformation
21
+ from dsp_tools.commands.excel2json.lists.models.input_error import ListSheetComplianceProblem
22
+ from dsp_tools.commands.excel2json.lists.models.input_error import ListSheetContentProblem
23
+ from dsp_tools.commands.excel2json.lists.models.input_error import MinimumRowsProblem
24
+ from dsp_tools.commands.excel2json.lists.models.input_error import MissingExpectedColumn
25
+ from dsp_tools.commands.excel2json.lists.models.input_error import MissingNodeColumn
26
+ from dsp_tools.commands.excel2json.lists.models.input_error import MissingNodeTranslationProblem
27
+ from dsp_tools.commands.excel2json.lists.models.input_error import MissingTranslationsSheetProblem
28
+ from dsp_tools.commands.excel2json.lists.models.input_error import MultipleListPerSheetProblem
29
+ from dsp_tools.commands.excel2json.lists.models.input_error import NodesPerRowProblem
30
+ from dsp_tools.commands.excel2json.lists.models.input_error import SheetProblem
31
+ from dsp_tools.commands.excel2json.lists.utils import get_columns_of_preferred_lang
32
+ from dsp_tools.commands.excel2json.lists.utils import get_hierarchy_nums
33
+ from dsp_tools.commands.excel2json.lists.utils import get_lang_string_from_column_name
34
+ from dsp_tools.commands.excel2json.models.input_error import PositionInExcel
35
+ from dsp_tools.error.custom_warnings import DspToolsUserWarning
36
+ from dsp_tools.error.exceptions import InputError
37
+ from dsp_tools.error.problems import Problem
38
+
39
+
40
+ def make_all_excel_compliance_checks(sheet_list: list[ExcelSheet]) -> None:
41
+ """Check if the excel files are compliant with the expected format."""
42
+ # These functions must be called in this order,
43
+ # as some of the following checks only work if the previous have passed.
44
+ _check_duplicates_all_excels(sheet_list)
45
+ _make_shape_compliance_all_excels(sheet_list)
46
+ _check_for_missing_translations_all_excels(sheet_list)
47
+ _check_for_unique_list_names(sheet_list)
48
+ _check_for_erroneous_entries_all_excels(sheet_list)
49
+
50
+
51
+ def _check_duplicates_all_excels(sheet_list: list[ExcelSheet]) -> None:
52
+ """
53
+ Check if the excel files contain duplicates with regard to the node names,
54
+ and if the custom IDs are unique across all excel files.
55
+ A duplicate in the node names is defined as several rows with the same entries in the columns with the node names.
56
+
57
+ Args:
58
+ sheet_list: class instances representing an excel file with sheets
59
+
60
+ Raises:
61
+ InputError: If any complete duplicates are found in the excel files.
62
+ """
63
+ problems: list[Problem] = []
64
+ duplicate_problems: list[SheetProblem] = [
65
+ p for sheet in sheet_list if (p := _check_for_duplicate_nodes_one_df(sheet)) is not None
66
+ ]
67
+ if duplicate_problems:
68
+ problems.append(CollectedSheetProblems(duplicate_problems))
69
+ if id_problem := _check_for_duplicate_custom_id_all_excels(sheet_list):
70
+ problems.append(id_problem)
71
+ if problems:
72
+ msg = ListCreationProblem(problems).execute_error_protocol()
73
+ logger.error(msg)
74
+ raise InputError(msg)
75
+
76
+
77
+ def _check_for_unique_list_names(sheet_list: list[ExcelSheet]) -> None:
78
+ """This functon checks that one sheet only has one list and that all lists have unique names."""
79
+ list_names: list[ListInformation] = []
80
+ all_problems: list[Problem] = []
81
+ sheet_problems: list[SheetProblem] = []
82
+ for sheet in sheet_list:
83
+ unique_list_names = list(sheet.df[f"{sheet.col_info.preferred_lang}_list"].unique())
84
+ if len(unique_list_names) != 1:
85
+ sheet_problems.append(MultipleListPerSheetProblem(sheet.excel_name, sheet.sheet_name, unique_list_names))
86
+ list_names.extend([ListInformation(sheet.excel_name, sheet.sheet_name, name) for name in unique_list_names])
87
+ if sheet_problems:
88
+ all_problems.append(CollectedSheetProblems(sheet_problems))
89
+ list_info_dict = defaultdict(list)
90
+ for item in list_names:
91
+ list_info_dict[item.list_name].append(item)
92
+ duplicate_list_names = []
93
+ for info in list_info_dict.values():
94
+ if len(info) > 1:
95
+ duplicate_list_names.extend(info)
96
+ if duplicate_list_names:
97
+ all_problems.append(DuplicatesListNameProblem(duplicate_list_names))
98
+ if all_problems:
99
+ msg = ListCreationProblem(all_problems).execute_error_protocol()
100
+ logger.error(msg)
101
+ raise InputError(msg)
102
+
103
+
104
+ def _check_for_duplicate_nodes_one_df(sheet: ExcelSheet) -> DuplicatesInSheetProblem | None:
105
+ """Check if any rows have duplicates when taking into account the columns with the node names."""
106
+ lang_columns = [col for col in sheet.df.columns if regex.search(r"^(en|de|fr|it|rm)_(\d+|list)$", col)]
107
+ if (duplicate_filter := sheet.df.duplicated(lang_columns, keep=False)).any():
108
+ return DuplicatesInSheetProblem(
109
+ sheet.excel_name, sheet.sheet_name, duplicate_filter.index[duplicate_filter].tolist()
110
+ )
111
+ return None
112
+
113
+
114
+ def _check_for_duplicate_custom_id_all_excels(sheet_list: list[ExcelSheet]) -> DuplicatesCustomIDInProblem | None:
115
+ id_list = []
116
+ for sheet in sheet_list:
117
+ for i, row in sheet.df.iterrows():
118
+ if not pd.isna(row["id (optional)"]):
119
+ id_list.append(
120
+ {
121
+ "filename": sheet.excel_name,
122
+ "sheet_name": sheet.sheet_name,
123
+ "id": row["id (optional)"],
124
+ "row_number": int(str(i)) + 2,
125
+ }
126
+ )
127
+ id_df = pd.DataFrame.from_records(id_list)
128
+ if (duplicate_ids := id_df.duplicated("id", keep=False)).any():
129
+ problems: dict[str, DuplicateIDProblem] = defaultdict(lambda: DuplicateIDProblem())
130
+ for i, row in id_df[duplicate_ids].iterrows():
131
+ problems[row["id"]].custom_id = row["id"]
132
+ problems[row["id"]].excel_locations.append(
133
+ PositionInExcel(sheet=row["sheet_name"], excel_filename=row["filename"], row=row["row_number"])
134
+ )
135
+ final_problems = list(problems.values())
136
+ return DuplicatesCustomIDInProblem(final_problems)
137
+ return None
138
+
139
+
140
+ def _make_shape_compliance_all_excels(sheet_list: list[ExcelSheet]) -> None:
141
+ """Check if the excel files are compliant with the expected format."""
142
+ problems: list[SheetProblem] = [
143
+ p for sheet in sheet_list if (p := _make_shape_compliance_one_sheet(sheet)) is not None
144
+ ]
145
+ if problems:
146
+ msg = ListCreationProblem([CollectedSheetProblems(problems)]).execute_error_protocol()
147
+ logger.error(msg)
148
+ raise InputError(msg)
149
+
150
+
151
+ def _make_shape_compliance_one_sheet(sheet: ExcelSheet) -> ListSheetComplianceProblem | None:
152
+ problems: list[Problem] = []
153
+ if len(sheet.df) < 2:
154
+ problems.append(MinimumRowsProblem())
155
+ if not sheet.col_info.node_cols:
156
+ problems.append(MissingNodeColumn())
157
+ if missing := _check_if_all_translations_in_all_column_levels_present_one_sheet(sheet.df.columns):
158
+ problems.append(missing)
159
+ _check_warn_unusual_columns_one_sheet(sheet.df.columns)
160
+ if problems:
161
+ return ListSheetComplianceProblem(sheet.excel_name, sheet.sheet_name, problems)
162
+ return None
163
+
164
+
165
+ def _check_warn_unusual_columns_one_sheet(cols: pd.Index[str]) -> None:
166
+ not_matched = [x for x in cols if not regex.search(r"^(en|de|fr|it|rm)_(\d+|list|comments)|(id \(optional\))$", x)]
167
+ if not_matched:
168
+ msg = (
169
+ f"The following columns do not conform to the expected format "
170
+ f"and will not be included in the output: {', '.join(not_matched)}"
171
+ )
172
+ warnings.warn(DspToolsUserWarning(msg))
173
+
174
+
175
+ def _check_if_all_translations_in_all_column_levels_present_one_sheet(
176
+ cols: pd.Index[str],
177
+ ) -> MissingExpectedColumn | None:
178
+ # All levels, eg. 1, 2, 3 must have the same languages
179
+ languages = {r for c in cols if (r := get_lang_string_from_column_name(c)) is not None}
180
+ all_nums = [str(n) for n in get_hierarchy_nums(cols)]
181
+ all_nums.append("list")
182
+
183
+ def make_col_names(lang: str) -> set[str]:
184
+ return {f"{lang}_{num}" for num in all_nums}
185
+
186
+ expected_cols = set()
187
+ for lang in languages:
188
+ expected_cols.update(make_col_names(lang))
189
+ if missing_cols := expected_cols - set(cols):
190
+ return MissingExpectedColumn(missing_cols)
191
+ return None
192
+
193
+
194
+ def _check_for_missing_translations_all_excels(sheet_list: list[ExcelSheet]) -> None:
195
+ problems: list[SheetProblem] = [
196
+ p for sheet in sheet_list if (p := _check_for_missing_translations_one_sheet(sheet)) is not None
197
+ ]
198
+ if problems:
199
+ msg = ListCreationProblem([CollectedSheetProblems(problems)]).execute_error_protocol()
200
+ logger.error(msg)
201
+ raise InputError(msg)
202
+
203
+
204
+ def _check_for_missing_translations_one_sheet(sheet: ExcelSheet) -> MissingTranslationsSheetProblem | None:
205
+ problems = []
206
+ for i, row in sheet.df.iterrows():
207
+ if problem := _check_missing_translations_one_row(int(str(i)), row, sheet.col_info):
208
+ problems.append(problem)
209
+ if problems:
210
+ return MissingTranslationsSheetProblem(sheet.excel_name, sheet.sheet_name, problems)
211
+ return None
212
+
213
+
214
+ def _check_missing_translations_one_row(
215
+ row_index: int, row: pd.Series[Any], columns: Columns
216
+ ) -> MissingNodeTranslationProblem | None:
217
+ missing_translations = []
218
+ for col_group in columns.node_cols:
219
+ missing_translations.extend(_check_for_missing_translations_one_column_group(row, col_group.columns))
220
+ missing_translations.extend(_check_for_missing_translations_one_column_group(row, columns.list_cols))
221
+ if columns.comment_cols:
222
+ missing_translations.extend(_check_for_missing_translations_one_column_group(row, columns.comment_cols))
223
+ if missing_translations:
224
+ return MissingNodeTranslationProblem(empty_columns=missing_translations, index_num=row_index)
225
+ return None
226
+
227
+
228
+ def _check_for_missing_translations_one_column_group(row: pd.Series[Any], columns: list[str]) -> list[str]:
229
+ missing = row[columns].isna()
230
+ if missing.any() and not missing.all():
231
+ return [str(index) for index, is_missing in missing.items() if is_missing]
232
+ return []
233
+
234
+
235
+ def _check_for_erroneous_entries_all_excels(sheet_list: list[ExcelSheet]) -> None:
236
+ problems: list[SheetProblem] = [
237
+ p for sheet in sheet_list if (p := _check_for_erroneous_entries_one_list(sheet)) is not None
238
+ ]
239
+ if problems:
240
+ msg = ListCreationProblem([CollectedSheetProblems(problems)]).execute_error_protocol()
241
+ logger.error(msg)
242
+ raise InputError(msg)
243
+
244
+
245
+ def _check_for_erroneous_entries_one_list(sheet: ExcelSheet) -> ListSheetContentProblem | None:
246
+ preferred_cols = get_columns_of_preferred_lang(sheet.df.columns, sheet.col_info.preferred_lang, r"\d+")
247
+ preferred_cols = sorted(preferred_cols)
248
+ preferred_cols.insert(0, f"{sheet.col_info.preferred_lang}_list")
249
+ problems = _check_for_erroneous_node_info_one_df(sheet.df, preferred_cols)
250
+ if problems:
251
+ list_problems = cast(list[Problem], problems)
252
+ return ListSheetContentProblem(sheet.excel_name, sheet.sheet_name, list_problems)
253
+ return None
254
+
255
+
256
+ def _check_for_erroneous_node_info_one_df(df: pd.DataFrame, columns: list[str]) -> list[NodesPerRowProblem]:
257
+ problems = []
258
+ for focus_col_index, col in enumerate(columns):
259
+ problems.extend(_check_for_erroneous_entries_one_column_level(df, columns, focus_col_index))
260
+ return problems
261
+
262
+
263
+ def _check_for_erroneous_entries_one_column_level(
264
+ df: pd.DataFrame, columns: list[str], focus_col_index: int
265
+ ) -> list[NodesPerRowProblem]:
266
+ # column level refers to the hierarchical level of the nodes. eg. "en_1"
267
+ # we need to group by from the current column all the way back to its ancestors,
268
+ # otherwise identical values in that column may be interpreted as belonging to the same group
269
+ grouped = df.groupby(columns[: focus_col_index + 1])
270
+ problems = []
271
+ for name, group in grouped:
272
+ remaining_to_check_columns = columns[focus_col_index:]
273
+ problems.extend(_check_for_erroneous_entries_one_grouped_df(group, remaining_to_check_columns))
274
+ return problems
275
+
276
+
277
+ def _check_for_erroneous_entries_one_grouped_df(
278
+ group: pd.DataFrame, target_cols: list[str]
279
+ ) -> list[NodesPerRowProblem]:
280
+ problems: list[NodesPerRowProblem] = []
281
+ first_col = min(group.index)
282
+ # The first row is the current parent node. The remaining columns in that row must be empty.
283
+ if not group.loc[first_col, target_cols[1:]].isna().all():
284
+ problems.append(NodesPerRowProblem(target_cols[1:], int(first_col), should_be_empty=True))
285
+ if not len(target_cols) > 1:
286
+ return problems
287
+ # The second column of the remaining rows must not be empty, as these are the child nodes of the first row.
288
+ remaining_rows_of_next_column = group.loc[group.index[1:], target_cols[1]]
289
+ if (missing := remaining_rows_of_next_column.isna()).any():
290
+ for i, row in group[1:][missing].iterrows():
291
+ problems.append(NodesPerRowProblem([target_cols[1]], int(str(i)), should_be_empty=False))
292
+ return problems
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections import Counter
5
+ from pathlib import Path
6
+ from typing import Any
7
+ from typing import Optional
8
+
9
+ import pandas as pd
10
+ import regex
11
+
12
+ from dsp_tools.commands.excel2json.lists.compliance_checks import make_all_excel_compliance_checks
13
+ from dsp_tools.commands.excel2json.lists.models.deserialise import Columns
14
+ from dsp_tools.commands.excel2json.lists.models.deserialise import ExcelSheet
15
+ from dsp_tools.commands.excel2json.lists.models.serialise import ListNode
16
+ from dsp_tools.commands.excel2json.lists.models.serialise import ListRoot
17
+ from dsp_tools.commands.excel2json.lists.utils import get_column_info
18
+ from dsp_tools.commands.excel2json.lists.utils import get_columns_of_preferred_lang
19
+ from dsp_tools.commands.excel2json.lists.utils import get_lang_string_from_column_name
20
+ from dsp_tools.commands.excel2json.old_lists import validate_lists_section_with_schema
21
+ from dsp_tools.commands.excel2json.utils import add_optional_columns
22
+ from dsp_tools.commands.excel2json.utils import read_and_clean_all_sheets
23
+
24
+
25
+ def excel2lists(
26
+ excelfolder: str | Path,
27
+ path_to_output_file: Optional[Path] = None,
28
+ ) -> tuple[list[dict[str, Any]], bool]:
29
+ """
30
+ Convert lists described in Excel files into a "lists" section that can be inserted into a JSON project file.
31
+ If path_to_output_file is not None, write the result into the output file.
32
+
33
+ Args:
34
+ excelfolder: path to the folder containing the Excel file(s)
35
+ path_to_output_file: path to the file where the output JSON file will be saved
36
+
37
+ Raises:
38
+ InputError: if there is a problem with the input data
39
+
40
+ Returns:
41
+ a tuple consisting of the "lists" section as Python list, and the success status (True if everything went well)
42
+ """
43
+ df_dict = _parse_files(excelfolder)
44
+ sheet_list = _prepare_sheets(df_dict)
45
+
46
+ finished_lists = _make_serialised_lists(sheet_list)
47
+ validate_lists_section_with_schema(lists_section=finished_lists)
48
+
49
+ if path_to_output_file:
50
+ with open(path_to_output_file, "w", encoding="utf-8") as fp:
51
+ json.dump(finished_lists, fp, indent=4, ensure_ascii=False)
52
+ print(f"lists section was created successfully and written to file '{path_to_output_file}'")
53
+
54
+ return finished_lists, True
55
+
56
+
57
+ def _parse_files(excelfolder: Path | str) -> dict[str, dict[str, pd.DataFrame]]:
58
+ file_names = [file for file in Path(excelfolder).glob("*list*.xlsx", case_sensitive=False) if _non_hidden(file)]
59
+ df_dict = {}
60
+ for file in file_names:
61
+ df_dict[str(file)] = read_and_clean_all_sheets(file)
62
+ return df_dict
63
+
64
+
65
+ def _prepare_sheets(df_dict: dict[str, dict[str, pd.DataFrame]]) -> list[ExcelSheet]:
66
+ all_sheets: list[ExcelSheet] = []
67
+ for file, sheets in df_dict.items():
68
+ all_sheets.extend(_prepare_one_sheet(df, file, sheet_name) for sheet_name, df in sheets.items())
69
+ make_all_excel_compliance_checks(all_sheets)
70
+ return _construct_ids(all_sheets)
71
+
72
+
73
+ def _prepare_one_sheet(df: pd.DataFrame, filename: str, sheet_name: str) -> ExcelSheet:
74
+ columns = get_column_info(df.columns)
75
+ df = add_optional_columns(df, {"id (optional)"})
76
+ return ExcelSheet(excel_name=filename, sheet_name=sheet_name, col_info=columns, df=df)
77
+
78
+
79
+ def _non_hidden(path: Path) -> bool:
80
+ return not regex.search(r"^(\.|~\$).+", path.name)
81
+
82
+
83
+ def _construct_ids(sheet_list: list[ExcelSheet]) -> list[ExcelSheet]:
84
+ all_sheets = []
85
+ for sheet in sheet_list:
86
+ df = _complete_id_one_df(sheet.df, sheet.col_info.preferred_lang)
87
+ all_sheets.append(
88
+ ExcelSheet(excel_name=sheet.excel_name, col_info=sheet.col_info, sheet_name=sheet.sheet_name, df=df)
89
+ )
90
+ all_sheets = _resolve_duplicate_ids_all_excels(all_sheets)
91
+ return _fill_parent_id_col_all_excels(all_sheets)
92
+
93
+
94
+ def _fill_parent_id_col_all_excels(sheet_list: list[ExcelSheet]) -> list[ExcelSheet]:
95
+ all_sheets = []
96
+ for sheet in sheet_list:
97
+ df = _fill_parent_id_col_one_df(sheet.df, sheet.col_info.preferred_lang)
98
+ all_sheets.append(
99
+ ExcelSheet(excel_name=sheet.excel_name, sheet_name=sheet.sheet_name, col_info=sheet.col_info, df=df)
100
+ )
101
+ return all_sheets
102
+
103
+
104
+ def _fill_parent_id_col_one_df(df: pd.DataFrame, preferred_language: str) -> pd.DataFrame:
105
+ """Create an extra column with the ID of the parent node."""
106
+ # To start, all rows get the ID of the list. These will be overwritten if the row has another parent.
107
+ df["parent_id"] = df.at[0, "id"]
108
+ columns = get_columns_of_preferred_lang(df.columns, preferred_language, r"\d+")
109
+ for num in range(len(columns)):
110
+ grouped = df.groupby(columns[: num + 1])
111
+ for name, group in grouped:
112
+ if group.shape[0] > 1:
113
+ # The first row already has the correct ID assigned
114
+ rest_index = list(group.index)[1:]
115
+ df.loc[rest_index, "parent_id"] = group.at[group.index[0], "id"]
116
+ return df
117
+
118
+
119
+ def _resolve_duplicate_ids_all_excels(sheet_list: list[ExcelSheet]) -> list[ExcelSheet]:
120
+ ids = []
121
+ for sheet in sheet_list:
122
+ ids.extend(sheet.df["id"].tolist())
123
+ counter = Counter(ids)
124
+ if duplicate_ids := [item for item, count in counter.items() if count > 1]:
125
+ return _remove_duplicate_ids_in_all_excels(duplicate_ids, sheet_list)
126
+ return sheet_list
127
+
128
+
129
+ def _remove_duplicate_ids_in_all_excels(duplicate_ids: list[str], sheet_list: list[ExcelSheet]) -> list[ExcelSheet]:
130
+ all_sheets = []
131
+ for sheet in sheet_list:
132
+ df = sheet.df
133
+ for i, row in df.iterrows():
134
+ if row["id"] in duplicate_ids and pd.isna(row["id (optional)"]):
135
+ df.loc[i, "id"] = _construct_non_duplicate_id_string(row, sheet.col_info.preferred_lang) # type: ignore[index]
136
+ all_sheets.append(
137
+ ExcelSheet(excel_name=sheet.excel_name, sheet_name=sheet.sheet_name, col_info=sheet.col_info, df=df)
138
+ )
139
+ return sheet_list
140
+
141
+
142
+ def _complete_id_one_df(df: pd.DataFrame, preferred_language: str) -> pd.DataFrame:
143
+ df = _create_auto_id_one_df(df, preferred_language)
144
+ df["id"] = df["id (optional)"].fillna(df["auto_id"])
145
+ df = _resolve_duplicate_ids_keep_custom_change_auto_id_one_df(df, preferred_language)
146
+ return df
147
+
148
+
149
+ def _resolve_duplicate_ids_keep_custom_change_auto_id_one_df(df: pd.DataFrame, preferred_language: str) -> pd.DataFrame:
150
+ """If there are duplicates in the id column, the auto_id is changed, the custom ID remains the same."""
151
+ if (duplicate_filter := df["id"].duplicated(keep=False)).any():
152
+ for i in duplicate_filter.index[duplicate_filter]:
153
+ if pd.isna(df.loc[i, "id (optional)"]):
154
+ df.loc[i, "id"] = _construct_non_duplicate_id_string(df.loc[i], preferred_language)
155
+ return df
156
+
157
+
158
+ def _create_auto_id_one_df(df: pd.DataFrame, preferred_language: str) -> pd.DataFrame:
159
+ """For every node without manual ID, take the label of the preferred language as ID."""
160
+ df["auto_id"] = pd.NA
161
+ if not df["id (optional)"].isna().any():
162
+ return df
163
+ if pd.isna(df.at[0, "id (optional)"]):
164
+ df.loc[0, "auto_id"] = df.at[0, f"{preferred_language}_list"]
165
+ column_names = sorted(get_columns_of_preferred_lang(df.columns, preferred_language, r"\d+"), reverse=True)
166
+ for i, row in df.iterrows():
167
+ if pd.isna(row["id (optional)"]):
168
+ for col in column_names:
169
+ if pd.notna(row[col]):
170
+ df.loc[i, "auto_id"] = row[col] # type: ignore[index]
171
+ break
172
+ df = _resolve_duplicate_ids_for_auto_id_one_df(df, preferred_language)
173
+ return df
174
+
175
+
176
+ def _resolve_duplicate_ids_for_auto_id_one_df(df: pd.DataFrame, preferred_language: str) -> pd.DataFrame:
177
+ """In case the auto_id is not unique; both auto_ids get a new ID by joining the node names of all the ancestors."""
178
+ if (duplicate_filter := df["auto_id"].dropna().duplicated(keep=False)).any():
179
+ for i in duplicate_filter.index[duplicate_filter]:
180
+ df.loc[i, "auto_id"] = _construct_non_duplicate_id_string(df.loc[i], preferred_language)
181
+ return df
182
+
183
+
184
+ def _construct_non_duplicate_id_string(row: pd.Series[Any], preferred_language: str) -> str:
185
+ """In case the node name is not unique; an ID is created by joining the node names of all the ancestors."""
186
+ column_names = get_columns_of_preferred_lang(row.index, preferred_language, r"\d+")
187
+ column_names.insert(0, f"{preferred_language}_list")
188
+ id_cols = [row[col] for col in column_names if pd.notna(row[col])]
189
+ return ":".join(id_cols)
190
+
191
+
192
+ def _make_serialised_lists(sheet_list: list[ExcelSheet]) -> list[dict[str, Any]]:
193
+ all_lists: list[ListRoot] = []
194
+ for sheet in sheet_list:
195
+ all_lists.append(_make_one_list(sheet))
196
+ all_lists = sorted(all_lists, key=lambda x: x.id_)
197
+ return [list_.to_dict() for list_ in all_lists]
198
+
199
+
200
+ def _make_one_list(sheet: ExcelSheet) -> ListRoot:
201
+ node_dict = _make_list_nodes_from_df(sheet.df, sheet.col_info)
202
+ nodes_for_root = _add_nodes_to_parent(node_dict, str(sheet.df.at[0, "id"])) if node_dict else []
203
+ return ListRoot(
204
+ id_=str(sheet.df.at[0, "id"]),
205
+ labels=_get_lang_dict(sheet.df.iloc[0], sheet.col_info.list_cols),
206
+ nodes=nodes_for_root,
207
+ comments=_get_lang_dict(sheet.df.iloc[0], sheet.col_info.comment_cols),
208
+ )
209
+
210
+
211
+ def _add_nodes_to_parent(node_dict: dict[str, ListNode], list_id: str) -> list[ListNode]:
212
+ root_list = []
213
+ for _, node in node_dict.items():
214
+ if node.parent_id == list_id:
215
+ root_list.append(node)
216
+ else:
217
+ node_dict[node.parent_id].sub_nodes.append(node)
218
+ return root_list
219
+
220
+
221
+ def _make_list_nodes_from_df(df: pd.DataFrame, col_info: Columns) -> dict[str, ListNode]:
222
+ node_dict = {}
223
+ for i, row in df[1:].iterrows():
224
+ node = _make_one_node(row, col_info)
225
+ node_dict[node.id_] = node
226
+ return node_dict
227
+
228
+
229
+ def _make_one_node(row: pd.Series[Any], col_info: Columns) -> ListNode:
230
+ labels = {}
231
+ for col_group in col_info.node_cols:
232
+ if found := _get_lang_dict(row, col_group.columns):
233
+ labels = found
234
+ break
235
+ return ListNode(
236
+ id_=str(row["id"]),
237
+ labels=labels,
238
+ comments=_get_lang_dict(row, col_info.comment_cols),
239
+ parent_id=str(row["parent_id"]),
240
+ sub_nodes=[],
241
+ )
242
+
243
+
244
+ def _get_lang_dict(row: pd.Series[Any], columns: list[str]) -> dict[str, str]:
245
+ return {
246
+ lang: row[col] for col in columns if not (pd.isna(row[col])) and (lang := get_lang_string_from_column_name(col))
247
+ }
File without changes
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import pandas as pd
6
+
7
+
8
+ @dataclass
9
+ class ExcelSheet:
10
+ excel_name: str
11
+ sheet_name: str
12
+ col_info: Columns
13
+ df: pd.DataFrame
14
+
15
+
16
+ @dataclass
17
+ class Columns:
18
+ preferred_lang: str
19
+ list_cols: list[str]
20
+ comment_cols: list[str]
21
+ node_cols: list[ColumnNodes]
22
+
23
+ def __post_init__(self) -> None:
24
+ self.node_cols = sorted(self.node_cols, key=lambda x: x.level_num, reverse=True)
25
+
26
+
27
+ @dataclass
28
+ class ColumnNodes:
29
+ level_num: int
30
+ columns: list[str]