dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,336 @@
1
+ from __future__ import annotations
2
+
3
+ import importlib.resources
4
+ import json
5
+ import warnings
6
+ from copy import deepcopy
7
+ from typing import Any
8
+ from typing import Optional
9
+
10
+ import jsonpath_ng.ext
11
+ import jsonschema
12
+ import pandas as pd
13
+ import regex
14
+
15
+ from dsp_tools.commands.excel2json.models.input_error import ExcelFileProblem
16
+ from dsp_tools.commands.excel2json.models.input_error import ExcelSheetProblem
17
+ from dsp_tools.commands.excel2json.models.input_error import JsonValidationResourceProblem
18
+ from dsp_tools.commands.excel2json.models.input_error import MandatorySheetsMissingProblem
19
+ from dsp_tools.commands.excel2json.models.input_error import MissingValuesProblem
20
+ from dsp_tools.commands.excel2json.models.input_error import PositionInExcel
21
+ from dsp_tools.commands.excel2json.models.input_error import ResourceSheetNotListedProblem
22
+ from dsp_tools.commands.excel2json.models.json_header import PermissionsOverrulesUnprefixed
23
+ from dsp_tools.commands.excel2json.models.ontology import OntoResource
24
+ from dsp_tools.commands.excel2json.models.ontology import ResourceCardinality
25
+ from dsp_tools.commands.excel2json.utils import add_optional_columns
26
+ from dsp_tools.commands.excel2json.utils import check_column_for_duplicate
27
+ from dsp_tools.commands.excel2json.utils import check_contains_required_columns
28
+ from dsp_tools.commands.excel2json.utils import check_permissions
29
+ from dsp_tools.commands.excel2json.utils import find_missing_required_values
30
+ from dsp_tools.commands.excel2json.utils import get_comments
31
+ from dsp_tools.commands.excel2json.utils import get_labels
32
+ from dsp_tools.commands.excel2json.utils import read_and_clean_all_sheets
33
+ from dsp_tools.error.exceptions import InputError
34
+ from dsp_tools.error.problems import Problem
35
+
36
+ languages = ["en", "de", "fr", "it", "rm"]
37
+
38
+
39
+ def excel2resources(
40
+ excelfile: str,
41
+ path_to_output_file: Optional[str] = None,
42
+ ) -> tuple[list[dict[str, Any]], PermissionsOverrulesUnprefixed, bool]:
43
+ """
44
+ Converts resources described in an Excel file into a "resources" section which can be inserted into a JSON
45
+ project file.
46
+
47
+ Args:
48
+ excelfile: path to the Excel file containing the resources
49
+ path_to_output_file: if provided, the output is written into this JSON file
50
+ (otherwise, it's only returned as return value)
51
+
52
+ Raises:
53
+ InputError: if something went wrong
54
+
55
+ Returns:
56
+ - the "resources" section as Python list,
57
+ - the unprefixed "default_permissions_overrule",
58
+ - the success status (True if everything went well)
59
+ """
60
+
61
+ all_dfs = read_and_clean_all_sheets(excelfile)
62
+
63
+ if validation_problems := _validate_excel_file(all_dfs):
64
+ msg = validation_problems.execute_error_protocol()
65
+ raise InputError(msg)
66
+ classes_df, resource_dfs = _prepare_classes_df(all_dfs)
67
+
68
+ # transform every row into a resource
69
+ res = [_row2resource(row, resource_dfs.get(row["name"])) for i, row in classes_df.iterrows()]
70
+ resources = [x.serialise() for x in res]
71
+ default_permissions_overrule = _extract_default_permissions_overrule(classes_df)
72
+
73
+ # write final "resources" section into a JSON file
74
+ _validate_resources(resources_list=resources)
75
+
76
+ if path_to_output_file:
77
+ with open(file=path_to_output_file, mode="w", encoding="utf-8") as file:
78
+ json.dump(resources, file, indent=4, ensure_ascii=False)
79
+ print(f"resources section was created successfully and written to file '{path_to_output_file}'")
80
+
81
+ return resources, default_permissions_overrule, True
82
+
83
+
84
+ def _validate_excel_file(all_dfs: dict[str, pd.DataFrame]) -> ExcelFileProblem | None:
85
+ df_dict = deepcopy(all_dfs)
86
+ lower_case_to_original = {k.lower(): k for k in df_dict}
87
+ if not (cls_name := lower_case_to_original.get("classes")):
88
+ return ExcelFileProblem(
89
+ "resources.xlsx", [MandatorySheetsMissingProblem(["classes"], list(lower_case_to_original.values()))]
90
+ )
91
+ classes_df = df_dict.pop(cls_name)
92
+ problems: list[Problem] = []
93
+ if cls_problem := _validate_classes_excel_sheet(classes_df, set(df_dict)):
94
+ problems.append(cls_problem)
95
+ if sheet_problems := _validate_individual_class_sheets(df_dict):
96
+ problems.extend(sheet_problems)
97
+ if permissions_prob := check_permissions(df=classes_df, allowed_vals=["private", "limited_view"]):
98
+ problems.append(permissions_prob)
99
+ if problems:
100
+ return ExcelFileProblem("resources.xlsx", problems)
101
+ return None
102
+
103
+
104
+ def _validate_classes_excel_sheet(classes_df: pd.DataFrame, sheet_names: set[str]) -> ExcelSheetProblem | None:
105
+ if any(classes_df.get(lang) is not None for lang in languages):
106
+ warnings.warn(
107
+ f"The file 'resources.xlsx' uses {languages} as column titles, which is deprecated. "
108
+ f"Please use {[f'label_{lang}' for lang in languages]}"
109
+ )
110
+ problems: list[Problem] = []
111
+ required_cols = ["name", "super"]
112
+ if missing_cols := check_contains_required_columns(classes_df, set(required_cols)):
113
+ # If this condition is not fulfilled the following tests will produce KeyErrors
114
+ return ExcelSheetProblem("classes", [missing_cols])
115
+ names_listed = set(classes_df["name"].tolist())
116
+ if not sheet_names.issubset(names_listed):
117
+ problems.append(ResourceSheetNotListedProblem(sheet_names - names_listed))
118
+ if missing_values := find_missing_required_values(classes_df, required_cols):
119
+ problems.extend(missing_values)
120
+ if duplicate_check := check_column_for_duplicate(classes_df, "name"):
121
+ problems.append(duplicate_check)
122
+ if problems:
123
+ return ExcelSheetProblem("classes", problems)
124
+ return None
125
+
126
+
127
+ def _validate_individual_class_sheets(class_df_dict: dict[str, pd.DataFrame]) -> list[Problem]:
128
+ required_cols = ["property", "cardinality"]
129
+ missing_required_columns = {
130
+ sheet: missing_cols
131
+ for sheet, df in class_df_dict.items()
132
+ if (missing_cols := check_contains_required_columns(df, set(required_cols)))
133
+ }
134
+ if missing_required_columns:
135
+ return [ExcelSheetProblem(sheet, [missing]) for sheet, missing in missing_required_columns.items()]
136
+ missing_values_position: list[PositionInExcel] = []
137
+ for sheet_name, df in class_df_dict.items():
138
+ if missing_vals_position := find_missing_required_values(df, required_cols, sheet_name):
139
+ missing_values_position.extend(missing_vals_position)
140
+ if missing_values_position:
141
+ return [MissingValuesProblem(missing_values_position)]
142
+ return []
143
+
144
+
145
+ def _prepare_classes_df(resource_dfs: dict[str, pd.DataFrame]) -> tuple[pd.DataFrame, dict[str, pd.DataFrame]]:
146
+ lower_case_to_original = {k.lower(): k for k in resource_dfs}
147
+ classes_df = resource_dfs.pop(lower_case_to_original["classes"])
148
+ classes_df = add_optional_columns(
149
+ classes_df,
150
+ {
151
+ "label_en",
152
+ "label_de",
153
+ "label_fr",
154
+ "label_it",
155
+ "label_rm",
156
+ "comment_en",
157
+ "comment_de",
158
+ "comment_fr",
159
+ "comment_it",
160
+ "comment_rm",
161
+ "default_permissions_overrule",
162
+ },
163
+ )
164
+ resource_dfs = {k: add_optional_columns(v, {"gui_order"}) for k, v in resource_dfs.items()}
165
+ return classes_df, resource_dfs
166
+
167
+
168
+ def _row2resource(
169
+ class_info_row: pd.Series[Any],
170
+ class_df_with_cardinalities: pd.DataFrame | None,
171
+ ) -> OntoResource:
172
+ """
173
+ Method that reads one row from the "classes" DataFrame,
174
+ opens the corresponding details DataFrame,
175
+ and builds a dict object of the resource.
176
+
177
+ Args:
178
+ class_info_row: row from the "classes" DataFrame
179
+ class_df_with_cardinalities: Excel sheet of the individual class
180
+
181
+ Raises:
182
+ InputError: if the row or the details sheet contains invalid data
183
+
184
+ Returns:
185
+ dict object of the resource
186
+ """
187
+
188
+ class_name = class_info_row["name"]
189
+ labels = get_labels(class_info_row)
190
+ supers = [s.strip() for s in class_info_row["super"].split(",")]
191
+ comments = get_comments(class_info_row)
192
+ cards = _make_cardinality_section(class_name, class_df_with_cardinalities)
193
+ return OntoResource(name=class_name, super=supers, labels=labels, comments=comments, cardinalities=cards)
194
+
195
+
196
+ def _make_cardinality_section(
197
+ class_name: str, class_df_with_cardinalities: pd.DataFrame | None
198
+ ) -> list[ResourceCardinality] | None:
199
+ if class_df_with_cardinalities is None:
200
+ return None
201
+ if len(class_df_with_cardinalities) == 0:
202
+ return None
203
+ return _create_all_cardinalities(class_name, class_df_with_cardinalities)
204
+
205
+
206
+ def _create_all_cardinalities(class_name: str, class_df_with_cardinalities: pd.DataFrame) -> list[ResourceCardinality]:
207
+ class_df_with_cardinalities = _check_complete_gui_order(class_name, class_df_with_cardinalities)
208
+ cards = [_make_one_cardinality(detail_row) for _, detail_row in class_df_with_cardinalities.iterrows()]
209
+ return cards
210
+
211
+
212
+ def _make_one_cardinality(detail_row: pd.Series[str | int]) -> ResourceCardinality:
213
+ prop_str = str(detail_row["property"])
214
+ knora_props = ["seqnum", "isPartOf"]
215
+ prop = prop_str if ":" in prop_str or prop_str in knora_props else f":{prop_str}"
216
+ return ResourceCardinality(prop, str(detail_row["cardinality"]).lower(), int(detail_row["gui_order"]))
217
+
218
+
219
+ def _check_complete_gui_order(class_name: str, class_df_with_cardinalities: pd.DataFrame) -> pd.DataFrame:
220
+ detail_problem_msg = ""
221
+ attempt_conversion = False
222
+ if "gui_order" not in class_df_with_cardinalities:
223
+ pass
224
+ elif class_df_with_cardinalities["gui_order"].isna().all():
225
+ pass
226
+ elif class_df_with_cardinalities["gui_order"].isna().any():
227
+ detail_problem_msg = "some rows in the column 'gui_order' are empty."
228
+ elif not class_df_with_cardinalities["gui_order"].isna().all():
229
+ attempt_conversion = True
230
+
231
+ if attempt_conversion:
232
+ try:
233
+ class_df_with_cardinalities["gui_order"] = [int(float(x)) for x in class_df_with_cardinalities["gui_order"]]
234
+ return class_df_with_cardinalities
235
+ except ValueError:
236
+ detail_problem_msg = (
237
+ "some rows in the column 'gui_order' contain invalid characters "
238
+ "that could not be converted to an integer."
239
+ )
240
+
241
+ class_df_with_cardinalities["gui_order"] = list(range(1, len(class_df_with_cardinalities) + 1))
242
+
243
+ if detail_problem_msg:
244
+ complete_msg = (
245
+ f"In the sheet '{class_name}' of the file 'resources.xlsx', "
246
+ f"{detail_problem_msg}\n"
247
+ f"Values have been filled in automatically, "
248
+ f"so that the gui-order reflects the order of the properties in the file."
249
+ )
250
+ warnings.warn(complete_msg)
251
+ return class_df_with_cardinalities
252
+
253
+
254
+ def _validate_resources(resources_list: list[dict[str, Any]]) -> None:
255
+ """
256
+ This function checks if the "resources" section of a JSON project file is valid according to the JSON schema,
257
+ and if the resource names are unique.
258
+
259
+ Args:
260
+ resources_list: the "resources" section of a JSON project as a list of dicts
261
+
262
+ Raises:
263
+ InputError: if the validation fails
264
+ """
265
+ with (
266
+ importlib.resources.files("dsp_tools")
267
+ .joinpath("resources/schema/resources-only.json")
268
+ .open(encoding="utf-8") as schema_file
269
+ ):
270
+ resources_schema = json.load(schema_file)
271
+ try:
272
+ jsonschema.validate(instance=resources_list, schema=resources_schema)
273
+ except jsonschema.ValidationError as err:
274
+ validation_problem = _find_validation_problem(
275
+ validation_error=err,
276
+ resources_list=resources_list,
277
+ )
278
+ msg = "\nThe Excel file 'resources.xlsx' did not pass validation." + validation_problem.execute_error_protocol()
279
+ raise InputError(msg) from None
280
+
281
+
282
+ def _find_validation_problem(
283
+ validation_error: jsonschema.ValidationError, resources_list: list[dict[str, Any]]
284
+ ) -> JsonValidationResourceProblem:
285
+ if json_path_to_resource := regex.search(r"^\$\[(\d+)\]", validation_error.json_path):
286
+ # fmt: off
287
+ wrong_res_name = (
288
+ jsonpath_ng.ext.parse(json_path_to_resource.group(0))
289
+ .find(resources_list)[0]
290
+ .value["name"]
291
+ )
292
+ # fmt: on
293
+ if affected_field := regex.search(
294
+ r"name|labels|comments|super|cardinalities\[(\d+)\]", validation_error.json_path
295
+ ):
296
+ affected_value = affected_field.group(0)
297
+ problematic_resource, excel_sheet, excel_row, excel_column = "", None, None, None
298
+
299
+ if affected_value in ["name", "labels", "comments", "super"]:
300
+ excel_sheet = "classes"
301
+ problematic_resource = wrong_res_name
302
+ excel_row = int(json_path_to_resource.group(1)) + 2
303
+ excel_column = affected_value
304
+
305
+ elif "cardinalities" in affected_value:
306
+ excel_row = int(affected_field.group(1)) + 2
307
+ excel_sheet = wrong_res_name
308
+
309
+ if validation_error.json_path.endswith("cardinality"):
310
+ excel_column = "Cardinality"
311
+
312
+ elif validation_error.json_path.endswith("propname"):
313
+ excel_column = "Property"
314
+
315
+ return JsonValidationResourceProblem(
316
+ problematic_resource=problematic_resource,
317
+ excel_position=PositionInExcel(sheet=excel_sheet, column=excel_column, row=excel_row),
318
+ original_msg=validation_error.message,
319
+ )
320
+ return JsonValidationResourceProblem(
321
+ original_msg=validation_error.message,
322
+ message_path=validation_error.json_path,
323
+ )
324
+
325
+
326
+ def _extract_default_permissions_overrule(classes_df: pd.DataFrame) -> PermissionsOverrulesUnprefixed:
327
+ result = PermissionsOverrulesUnprefixed(private=[], limited_view=[])
328
+ for _, row in classes_df.iterrows():
329
+ perm = row.get("default_permissions_overrule")
330
+ if pd.isna(perm):
331
+ continue
332
+ if perm.strip().lower() == "private":
333
+ result.private.append(row["name"])
334
+ elif perm.strip().lower() == "limited_view":
335
+ result.limited_view.append(row["name"])
336
+ return result
@@ -0,0 +1,352 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any
5
+ from unittest import mock
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import regex
10
+
11
+ from dsp_tools.commands.excel2json.models.input_error import DuplicateSheetProblem
12
+ from dsp_tools.commands.excel2json.models.input_error import DuplicatesInColumnProblem
13
+ from dsp_tools.commands.excel2json.models.input_error import ExcelFileProblem
14
+ from dsp_tools.commands.excel2json.models.input_error import InvalidPermissionsOverrule
15
+ from dsp_tools.commands.excel2json.models.input_error import InvalidPermissionsOverruleProblem
16
+ from dsp_tools.commands.excel2json.models.input_error import InvalidSheetNameProblem
17
+ from dsp_tools.commands.excel2json.models.input_error import PositionInExcel
18
+ from dsp_tools.commands.excel2json.models.input_error import RequiredColumnMissingProblem
19
+ from dsp_tools.commands.excel2json.models.ontology import LanguageDict
20
+ from dsp_tools.error.exceptions import InputError
21
+ from dsp_tools.error.exceptions import UserFilepathNotFoundError
22
+
23
+ languages = ["en", "de", "fr", "it", "rm"]
24
+
25
+
26
+ def read_and_clean_all_sheets(excelfile: str | Path) -> dict[str, pd.DataFrame]:
27
+ """
28
+ This function reads an Excel file with all its sheets.
29
+ If there is a ValueError, it patches the openpyxl part that causes the error
30
+ and opens it with that patch.
31
+ It cleans the dataframes and then returns them in the form {sheet_name: dataframe}.
32
+
33
+ Args:
34
+ excelfile: path to the Excel file
35
+
36
+ Returns:
37
+ All sheets of the excel file, in the form of a dictionary {sheet_name: dataframe}
38
+
39
+ Raises:
40
+ InputError: If the sheets are not correctly named
41
+ """
42
+ if not Path(excelfile).exists():
43
+ raise UserFilepathNotFoundError(excelfile)
44
+ try:
45
+ df_dict = pd.read_excel(excelfile, sheet_name=None)
46
+ except ValueError:
47
+ # Pandas relies on openpyxl to parse XLSX files.
48
+ # A strange behavior of openpyxl prevents pandas from opening files with some formatting properties
49
+ # (unclear which formatting properties exactly).
50
+ # Apparently, the excel2json test files have one of the unsupported formatting properties.
51
+ # Credits: https://stackoverflow.com/a/70537454/14414188
52
+ with mock.patch("openpyxl.styles.fonts.Font.family.max", new=100):
53
+ df_dict = pd.read_excel(excelfile, sheet_name=None)
54
+ _find_duplicate_col_names(str(excelfile), list(df_dict))
55
+ try:
56
+ return {name.strip(""): clean_data_frame(df) for name, df in df_dict.items()}
57
+ except AttributeError:
58
+ msg = InvalidSheetNameProblem(str(excelfile), list(df_dict.keys())).execute_error_protocol()
59
+ raise InputError(msg) from None
60
+
61
+
62
+ def _find_duplicate_col_names(excelfile: str, col_names: list[str]) -> None:
63
+ sheet_names = [str(x).lower().strip() for x in col_names]
64
+ duplicate_names = list({x for x in sheet_names if sheet_names.count(x) > 1})
65
+ if duplicate_names:
66
+ msg = ExcelFileProblem(str(excelfile), [DuplicateSheetProblem(duplicate_names)]).execute_error_protocol()
67
+ raise InputError(msg) from None
68
+
69
+
70
+ def clean_data_frame(df: pd.DataFrame) -> pd.DataFrame:
71
+ """
72
+ This function takes a pd.DataFrame and removes:
73
+ - Leading and trailing spaces from the column names
74
+ - Leading and trailing spaces from each cell and any characters in the cells that are not part of any known
75
+ language, for example, linebreaks and replaces it with a pd.NA.
76
+ - Removes all rows that are empty in all columns
77
+
78
+ Args:
79
+ df: The pd.DataFrame that is to be cleaned
80
+
81
+ Returns:
82
+ pd.DataFrame which has the above-mentioned removed
83
+ """
84
+ # Remove leading and trailing blanks in column names and make them lower case
85
+ df = df.rename(columns=lambda x: x.strip().lower())
86
+ # Remove the values of all cells that do not at least contain one character of any known language and removes
87
+ # leading and trailing spaces.
88
+ df = df.map(
89
+ lambda x: str(x).strip() if pd.notna(x) and regex.search(r"[\w\p{L}]", str(x), flags=regex.U) else pd.NA
90
+ )
91
+ # drop all the rows that are entirely empty
92
+ df = df.dropna(axis=0, how="all")
93
+ return df
94
+
95
+
96
+ def check_contains_required_columns(
97
+ df: pd.DataFrame, required_columns: set[str]
98
+ ) -> None | RequiredColumnMissingProblem:
99
+ """
100
+ This function checks if all the columns from the set are in the pd.DataFrame.
101
+ Additional columns to the ones in the set are allowed.
102
+
103
+ Args:
104
+ df: pd.DataFrame that is checked
105
+ required_columns: set of column names
106
+
107
+ Returns:
108
+ An object if there is a problem else None.
109
+ """
110
+ if not required_columns.issubset(set(df.columns)):
111
+ required = list(required_columns.difference(set(df.columns)))
112
+ return RequiredColumnMissingProblem(columns=required)
113
+ return None
114
+
115
+
116
+ def check_column_for_duplicate(df: pd.DataFrame, to_check_column: str) -> None | DuplicatesInColumnProblem:
117
+ """
118
+ This function checks if a specified column contains duplicate values.
119
+ Empty cells (pd.NA) also count as duplicates.
120
+
121
+ Args:
122
+ df: pd.DataFrame that is checked for duplicates
123
+ to_check_column: Name of the column that must not contain duplicates
124
+
125
+ Returns:
126
+ If there are problems it returns an object that stores the relevant user information.
127
+
128
+ """
129
+ if df[to_check_column].duplicated().any():
130
+ duplicate_values = df[to_check_column][df[to_check_column].duplicated()].tolist()
131
+ return DuplicatesInColumnProblem(
132
+ column=to_check_column,
133
+ duplicate_values=duplicate_values,
134
+ )
135
+ else:
136
+ return None
137
+
138
+
139
+ def find_missing_required_values(
140
+ df: pd.DataFrame, required_values_columns: list[str], sheetname: str | None = None
141
+ ) -> list[PositionInExcel] | None:
142
+ """
143
+ If there are empty cells in the specified columns,
144
+ It specifies the column and row numbers of all missing values and
145
+ returns all the locations wrapped in the MissingValuesProblem
146
+
147
+ Args:
148
+ df: pd.DataFrame that is checked
149
+ required_values_columns: a list of column names that may not contain empty cells
150
+ sheetname: optional name of the Excel sheet
151
+
152
+ Returns:
153
+ Locations of missing values
154
+ None if all are filled
155
+ """
156
+ if missing_values := check_required_values(df, required_values_columns):
157
+ locations = []
158
+ row_nums = get_wrong_row_numbers(missing_values)
159
+ for col, nums in row_nums.items():
160
+ locations.extend([PositionInExcel(sheet=sheetname, column=col, row=x) for x in nums])
161
+ return locations
162
+ return None
163
+
164
+
165
+ def check_required_values(df: pd.DataFrame, required_values_columns: list[str]) -> dict[str, pd.Series[bool]]:
166
+ """
167
+ If there are any empty cells in the column, it adds the column name and a boolean pd.Series to the dictionary.
168
+ If there are no empty cells, then it is not included in the dictionary.
169
+ If no column has any empty cells, then it returns an empty dictionary.
170
+
171
+ Args:
172
+ df: pd.DataFrame that is checked
173
+ required_values_columns: a list of column names that may not contain empty cells
174
+
175
+ Returns:
176
+ a dictionary with the column names as key and pd.Series as values if there are any empty cells
177
+ """
178
+ # It checks if any of the values in a specified column are empty. If they are, they are added to the dictionary
179
+ # with the column name as key and a boolean series as value that contain true for every pd.NA
180
+ # If all the columns are filled, then it returns an empty dictionary.
181
+ return {col: df[col].isnull() for col in required_values_columns if df[col].isnull().any()}
182
+
183
+
184
+ def _turn_bool_array_into_index_numbers(series: pd.Series[bool], true_remains: bool = True) -> list[int]:
185
+ """
186
+ This function takes a pd.Series containing boolean values.
187
+ By default, this method extracts the index numbers of the True values.
188
+ If the index numbers of the False values are required, the parameter "true_remains" should be turned to False.
189
+
190
+ Args:
191
+ series: pd.Series, which only contains True and False values
192
+ true_remains: True if the index numbers of True are required, likewise with False
193
+
194
+ Returns:
195
+ A list of index numbers
196
+ """
197
+ # If the False are required, we need to invert the array.
198
+ if not true_remains:
199
+ series = ~series
200
+ return list(series[series].index)
201
+
202
+
203
+ def get_wrong_row_numbers(
204
+ wrong_row_dict: dict[str, pd.Series[bool]], true_remains: bool = True
205
+ ) -> dict[str, list[int]]:
206
+ """
207
+ From the boolean pd.Series the index numbers of the True values are extracted.
208
+ The resulting list is the new value of the dictionary.
209
+ This new dictionary is taken and to each index number 2 is added, so that it corresponds to the Excel row number.
210
+
211
+ Args:
212
+ wrong_row_dict: The dictionary which contains column names and a boolean pd.Series
213
+ true_remains: If True then the index of True is taken, if False then the index of False values is taken
214
+
215
+ Returns:
216
+ Dictionary with the column name as key and the row number as a list.
217
+ """
218
+ wrong_row_int_dict = {
219
+ k: _turn_bool_array_into_index_numbers(series=v, true_remains=true_remains) for k, v in wrong_row_dict.items()
220
+ }
221
+ return {k: [x + 2 for x in v] for k, v in wrong_row_int_dict.items()}
222
+
223
+
224
+ def get_labels(df_row: pd.Series[Any]) -> LanguageDict:
225
+ """
226
+ This function takes a pd.Series which has "label_[language tag]" in the index.
227
+ If the value of the index is not pd.NA, the language tag and the value are added to a dictionary.
228
+ If it is empty, it is omitted from the dictionary.
229
+
230
+ Args:
231
+ df_row: a pd.Series (usually a row of a pd.DataFrame) from which the content of the columns containing the
232
+ label is extracted
233
+
234
+ Returns:
235
+ A dictionary with the language tag and the content of the cell
236
+ """
237
+ labels = {lang: label for lang in languages if not pd.isna(label := df_row[f"label_{lang}"])}
238
+ if not labels:
239
+ labels = {lang: label for lang in languages if not pd.isna(label := df_row[lang])}
240
+ return LanguageDict(labels)
241
+
242
+
243
+ def get_comments(df_row: pd.Series[Any]) -> LanguageDict | None:
244
+ """
245
+ This function takes a pd.Series which has "comment_[language tag]" in the index.
246
+ If the value of the index is not pd.NA, the language tag and the value are added to a dictionary.
247
+ If it is empty, it is omitted from the dictionary.
248
+
249
+ Args:
250
+ df_row: a pd.Series (usually a row of a pd.DataFrame) from which the content of the columns containing the
251
+ comment is extracted
252
+
253
+ Returns:
254
+ A dictionary with the language tag and the content of the cell
255
+ """
256
+ comments = {lang: comment for lang in languages if not pd.isna(comment := df_row[f"comment_{lang}"])}
257
+ return LanguageDict(comments) if comments else None
258
+
259
+
260
+ def find_one_full_cell_in_cols(df: pd.DataFrame, required_columns: list[str]) -> pd.Series[bool] | None:
261
+ """
262
+ This function takes a pd.DataFrame and a list of column names where at least one cell must have a value per row.
263
+ A pd.Series with boolean values is returned, True if any rows do not have a value in at least one column
264
+
265
+ Args:
266
+ df: The pd.DataFrame which should be checked
267
+ required_columns: A list of column names where at least one cell per row must have a value
268
+
269
+ Returns:
270
+ None if there is no problem or a pd.Series if there is a problem in a row
271
+ """
272
+ # The series has True if the cell is empty
273
+ # In order to combine more than two arrays, we need to reduce the arrays, which takes a tuple
274
+ result_arrays = tuple(df[col].isnull() for col in required_columns)
275
+ # If all are True logical_and returns True otherwise False
276
+ combined_array = np.logical_and.reduce(result_arrays)
277
+ # if any of the values are True, it is turned into a pd.Series
278
+ return pd.Series(combined_array) if any(combined_array) else None
279
+
280
+
281
+ def col_must_or_not_empty_based_on_other_col(
282
+ df: pd.DataFrame,
283
+ substring_list: list[str],
284
+ substring_colname: str,
285
+ check_empty_colname: str,
286
+ must_have_value: bool,
287
+ ) -> pd.Series[bool] | None:
288
+ """
289
+ It is presumed that the column "substring_colname" has no empty cells.
290
+ Based on the string content of the individual rows, which is specified in the "substring_list",
291
+ the cell in the column "check_empty_colname" is checked whether it is empty or not.
292
+ The "substring_list" contains the different possibilities regarding the content of the cell.
293
+ If the parameter "must_have_value" is True, then the cell in the "check_empty_colname" column must not be empty.
294
+ If the parameter is set to False, then it must be empty.
295
+
296
+ Args:
297
+ df: The pd.DataFrame which is checked
298
+ substring_list: A list of possible information that could be in the column "substring_colname"
299
+ substring_colname: The name of the column that may contain any of the sub-strings
300
+ check_empty_colname: The name of the column which is checked if it is empty or not
301
+ must_have_value: True if the "check_empty_colname" should have a value or the reverse.
302
+
303
+ Returns:
304
+ None if all rows are correctly filled or empty.
305
+ A series which contains True values for the rows, where it does
306
+ not comply with the specifications.
307
+ """
308
+ na_series = df[check_empty_colname].isna()
309
+ # If the cells have to be empty, we need to reverse the series
310
+ if not must_have_value:
311
+ na_series = ~na_series
312
+ # This returns True if it finds the substring in the cell, they are joined in a RegEx "|" which denotes "or".
313
+ # If it does not match any of the sub-strings, then the RegEx returns False,
314
+ # which means that the value in the column "check_empty_colname" is not relevant.
315
+ substring_array = df[substring_colname].str.contains("|".join(substring_list), na=False, regex=True)
316
+ # If both are True logical_and returns True otherwise False
317
+ combined_array = np.logical_and(na_series, substring_array)
318
+ return pd.Series(combined_array) if any(combined_array) else None
319
+
320
+
321
+ def add_optional_columns(df: pd.DataFrame, optional_col_set: set[str]) -> pd.DataFrame:
322
+ """
323
+ Adds columns to a df if they are not already present.
324
+ The content of the columns is empty.
325
+
326
+ Args:
327
+ df: Dataframe
328
+ optional_col_set: set of columns that may be added
329
+
330
+ Returns:
331
+ Dataframe with additional columns if they were not present
332
+ """
333
+ in_df_cols = set(df.columns)
334
+ if not optional_col_set.issubset(in_df_cols):
335
+ additional_col = list(optional_col_set.difference(in_df_cols))
336
+ additional_df = pd.DataFrame(columns=additional_col, index=df.index)
337
+ df = pd.concat(objs=[df, additional_df], axis=1)
338
+ return df
339
+
340
+
341
+ def check_permissions(df: pd.DataFrame, allowed_vals: list[str]) -> None | InvalidPermissionsOverruleProblem:
342
+ problems: list[InvalidPermissionsOverrule] = []
343
+ for _, row in df.iterrows():
344
+ if pd.isna(actual_val := row.get("default_permissions_overrule")):
345
+ continue
346
+ if actual_val.strip().lower() not in allowed_vals:
347
+ prob = InvalidPermissionsOverrule(entity_name=row["name"], actual_val=actual_val, allowed_vals=allowed_vals)
348
+ problems.append(prob)
349
+ if problems:
350
+ return InvalidPermissionsOverruleProblem(problems)
351
+ else:
352
+ return None