dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,66 @@
1
+ from copy import deepcopy
2
+ from pathlib import Path
3
+
4
+ from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
5
+ from dsp_tools.commands.xmlupload.richtext_id2iri import replace_ids_if_found
6
+ from dsp_tools.error.exceptions import DuplicateIdsInXmlAndId2IriMapping
7
+ from dsp_tools.utils.json_parsing import parse_json_file
8
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import KnoraValueType
9
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
10
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedValue
11
+
12
+
13
+ def use_id2iri_mapping_to_replace_ids(resources: list[ParsedResource], id2iri_file: Path) -> list[ParsedResource]:
14
+ lookup = parse_json_file(id2iri_file)
15
+ iri_lookup = IriResolver(lookup)
16
+ return _replace_all_ids_with_iris(resources, iri_lookup)
17
+
18
+
19
+ def _replace_all_ids_with_iris(resources: list[ParsedResource], iri_lookup: IriResolver) -> list[ParsedResource]:
20
+ new_ids = {x.res_id for x in resources}
21
+ _raise_error_if_resource_ids_are_reused(new_ids, set(iri_lookup.lookup.keys()))
22
+ return [_process_one_resource(r, iri_lookup) for r in resources]
23
+
24
+
25
+ def _raise_error_if_resource_ids_are_reused(
26
+ resources_ids_from_xml: set[str], resource_ids_from_mapping: set[str]
27
+ ) -> None:
28
+ overlap = resources_ids_from_xml.intersection(resource_ids_from_mapping)
29
+ if overlap:
30
+ msg = (
31
+ "You provided a id2iri mapping file to replace IDs referenced in links and "
32
+ "richtext with IRIs of previously uploaded resources. "
33
+ "This means that the new XML file must not contain IDs that were previously used.\n"
34
+ f"The following ID(s) are both in the id2iri mapping and the new data: {', '.join(overlap)}"
35
+ )
36
+ raise DuplicateIdsInXmlAndId2IriMapping(msg)
37
+
38
+
39
+ def _process_one_resource(res: ParsedResource, iri_lookup: IriResolver) -> ParsedResource:
40
+ new_vals = []
41
+ previous_vals = deepcopy(res.values)
42
+ for v in previous_vals:
43
+ if v.value_type == KnoraValueType.LINK_VALUE:
44
+ new_vals.append(_process_link_value(v, iri_lookup))
45
+ elif v.value_type == KnoraValueType.RICHTEXT_VALUE:
46
+ new_vals.append(_process_richtext_value(v, iri_lookup))
47
+ else:
48
+ new_vals.append(v)
49
+ res.values = new_vals
50
+ return res
51
+
52
+
53
+ def _process_link_value(val: ParsedValue, iri_lookup: IriResolver) -> ParsedValue:
54
+ # later code deals with cases when it is not a string, we can safely ignore this here
55
+ if isinstance(val.value, str):
56
+ if found := iri_lookup.get(val.value):
57
+ val.value = found
58
+ return val
59
+
60
+
61
+ def _process_richtext_value(val: ParsedValue, iri_lookup: IriResolver) -> ParsedValue:
62
+ # later code deals with cases when it is not a string, we can safely ignore this here
63
+ if isinstance(val.value, str):
64
+ replaced, _ = replace_ids_if_found(val.value, iri_lookup)
65
+ val.value = replaced
66
+ return val
@@ -0,0 +1,238 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ import warnings
7
+ from dataclasses import dataclass
8
+ from dataclasses import field
9
+ from datetime import datetime
10
+ from http import HTTPStatus
11
+ from typing import Any
12
+ from typing import Literal
13
+ from typing import Never
14
+ from typing import Union
15
+
16
+ from loguru import logger
17
+ from requests import JSONDecodeError
18
+ from requests import ReadTimeout
19
+ from requests import RequestException
20
+ from requests import Response
21
+
22
+ from dsp_tools.commands.get.legacy_models.context import Context
23
+ from dsp_tools.commands.get.legacy_models.helpers import OntoIri
24
+ from dsp_tools.config.logger_config import LOGGER_SAVEPATH
25
+ from dsp_tools.error.custom_warnings import DspToolsUnexpectedStatusCodeWarning
26
+ from dsp_tools.error.exceptions import DspToolsRequestException
27
+ from dsp_tools.error.exceptions import PermanentTimeOutError
28
+
29
+
30
+ @dataclass
31
+ class ResponseCodeAndText:
32
+ status_code: int
33
+ text: str
34
+
35
+
36
+ @dataclass
37
+ class PostFiles:
38
+ """One or more files to be uploaded in a POST request."""
39
+
40
+ files: list[PostFile]
41
+
42
+ def to_dict(self) -> dict[str, tuple[str, Any, str] | tuple[str, Any]]:
43
+ return {x.file_name: x.to_tuple() for x in self.files}
44
+
45
+
46
+ @dataclass
47
+ class PostFile:
48
+ file_name: str
49
+ fileobj: Any
50
+ content_type: str | None = None
51
+
52
+ def to_tuple(self) -> tuple[str, Any, str] | tuple[str, Any]:
53
+ if self.content_type:
54
+ return self.file_name, self.fileobj, self.content_type
55
+ return self.file_name, self.fileobj
56
+
57
+
58
+ class SetEncoder(json.JSONEncoder):
59
+ """Encoder used to serialize objects to JSON that would by default not be serializable"""
60
+
61
+ def default(self, o: Union[set[Any], Context, OntoIri]) -> Any:
62
+ """Return a serializable object for o"""
63
+ if isinstance(o, set):
64
+ return list(o)
65
+ elif isinstance(o, Context):
66
+ return o.toJsonObj()
67
+ elif isinstance(o, OntoIri):
68
+ return {"iri": o.iri, "hashtag": o.hashtag}
69
+ return json.JSONEncoder.default(self, o)
70
+
71
+
72
+ @dataclass
73
+ class RequestParameters:
74
+ method: Literal["POST", "GET", "PUT", "DELETE"]
75
+ url: str
76
+ timeout: int
77
+ data: dict[str, Any] | None = None
78
+ data_serialized: bytes | None = field(init=False, default=None)
79
+ headers: dict[str, str] | None = None
80
+ files: PostFiles | None = None
81
+
82
+ def __post_init__(self) -> None:
83
+ self.data_serialized = self._serialize_payload(self.data)
84
+
85
+ def _serialize_payload(self, payload: dict[str, Any] | None) -> bytes | None:
86
+ # If data is not encoded as bytes, issues can occur with non-ASCII characters,
87
+ # where the content-length of the request will turn out to be different from the actual length.
88
+ return json.dumps(payload, cls=SetEncoder, ensure_ascii=False).encode("utf-8") if payload else None
89
+
90
+ def as_kwargs(self) -> dict[str, Any]:
91
+ kwargs = {
92
+ "method": self.method,
93
+ "url": self.url,
94
+ "timeout": self.timeout,
95
+ "data": self.data_serialized,
96
+ "headers": self.headers,
97
+ }
98
+ if self.files:
99
+ kwargs["files"] = self.files.to_dict()
100
+ return kwargs
101
+
102
+
103
+ def log_request(params: RequestParameters, extra_headers: dict[str, Any] | None = None) -> None:
104
+ """Logs the request."""
105
+ dumpobj = {
106
+ "method": params.method,
107
+ "url": params.url,
108
+ "timeout": params.timeout,
109
+ }
110
+ headers_to_log = {}
111
+ if extra_headers:
112
+ headers_to_log = extra_headers
113
+ if params.headers:
114
+ headers_to_log = headers_to_log | params.headers
115
+ dumpobj["headers"] = sanitize_headers(headers_to_log)
116
+ if params.data:
117
+ data = params.data.copy()
118
+ if "password" in data:
119
+ data["password"] = "***"
120
+ dumpobj["data"] = data
121
+ if params.files:
122
+ dumpobj["files"] = [x.file_name for x in params.files.files]
123
+ logger.debug(f"REQUEST: {json.dumps(dumpobj, cls=SetEncoder)}")
124
+
125
+
126
+ def log_response(response: Response, include_response_content: bool = True) -> None:
127
+ """Log the response of a request."""
128
+ dumpobj: dict[str, Any] = {
129
+ "status_code": response.status_code,
130
+ "headers": sanitize_headers(dict(response.headers)) if response.headers else "",
131
+ }
132
+ if include_response_content:
133
+ try:
134
+ dumpobj["content"] = response.json()
135
+ except JSONDecodeError:
136
+ dumpobj["content"] = response.text
137
+ else:
138
+ dumpobj["content"] = "too big to be logged"
139
+ logger.debug(f"RESPONSE: {json.dumps(dumpobj)}")
140
+
141
+
142
+ def sanitize_headers(headers: dict[str, str | bytes]) -> dict[str, str]:
143
+ """Remove sensitive information from request headers."""
144
+
145
+ def _mask(key: str, value: str | bytes) -> str:
146
+ if isinstance(value, bytes):
147
+ value = value.decode("utf-8")
148
+ if key == "Authorization" and value.startswith("Bearer "):
149
+ return "Bearer ***"
150
+ if key == "Set-Cookie":
151
+ return "***"
152
+ return value
153
+
154
+ return {k: _mask(k, v) for k, v in headers.items()}
155
+
156
+
157
+ def log_request_failure_and_sleep(reason: str, retry_counter: int, exc_info: bool) -> None:
158
+ """
159
+ Log the reason for a request failure and sleep.
160
+
161
+ ============= ================ =============================
162
+ retry_counter seconds to sleep cumulative waiting time (min)
163
+ ============= ================ =============================
164
+ 0 1 0
165
+ 1 2 0
166
+ 2 4 0
167
+ 3 8 0
168
+ 4 16 0
169
+ 5 32 1
170
+ 6 64 2
171
+ 7 128 4
172
+ 8 256 9
173
+ 9 300 14
174
+ 10 300 19
175
+ 11 300 24
176
+ 12 300 29
177
+ 15 300 44
178
+ 18 300 59
179
+ 24 300 89
180
+ 30 300 119
181
+ ============= ================ =============================
182
+ """
183
+ sleep_time = min(2**retry_counter, 300)
184
+ msg = f"{reason}: Try reconnecting to DSP server, next attempt in {sleep_time} seconds..."
185
+ print(f"{datetime.now()}: {msg}")
186
+ if exc_info:
187
+ logger.exception(f"{msg} ({retry_counter=:})")
188
+ else:
189
+ logger.error(f"{msg} ({retry_counter=:})")
190
+ time.sleep(sleep_time)
191
+
192
+
193
+ def log_and_raise_timeouts(error: TimeoutError | ReadTimeout) -> Never:
194
+ """Log a timeout error raised by a request and raise our own PermanentTimeOutError"""
195
+ msg = f"A '{error.__class__.__name__}' occurred during the connection to the DSP server."
196
+ print(f"{datetime.now()}: {msg}")
197
+ logger.error(msg)
198
+ raise PermanentTimeOutError(msg) from None
199
+
200
+
201
+ def should_retry(response: Response) -> bool:
202
+ """Returns the decision if a retry of a request is sensible."""
203
+ in_500_range = 500 <= response.status_code < 600
204
+ try_again_later = "try again later" in response.text.lower()
205
+ in_testing_env = os.getenv("DSP_TOOLS_TESTING") == "true" # set in .github/workflows/tests-on-push.yml
206
+ return (try_again_later or in_500_range) and not in_testing_env
207
+
208
+
209
+ def log_and_raise_request_exception(error: RequestException) -> Never:
210
+ msg = (
211
+ f"During an API call the following exception occurred. "
212
+ f"Please contact support@dasch.swiss with the log file at {LOGGER_SAVEPATH} "
213
+ f"if you required help resolving the issue.\n"
214
+ f"Original exception name: {error.__class__.__name__}\n"
215
+ )
216
+ if error.request:
217
+ msg += f"Original request: {error.request.method} {error.request.url}"
218
+ logger.exception(msg)
219
+ raise DspToolsRequestException(msg) from None
220
+
221
+
222
+ def log_and_warn_unexpected_non_ok_response(status_code: int, response_text: str) -> None:
223
+ resp_txt = response_text[:200] if len(response_text) > 200 else response_text
224
+ msg = (
225
+ "We got an unexpected API response during the following request. "
226
+ "Please contact the dsp-tools development team (at support@dasch.swiss) with your log file "
227
+ "so that we can handle this more gracefully in the future.\n"
228
+ f"Response status code: {status_code}\n"
229
+ f"Original Message: {resp_txt}"
230
+ )
231
+ logger.warning(msg)
232
+ warnings.warn(DspToolsUnexpectedStatusCodeWarning(msg))
233
+
234
+
235
+ def is_server_error(response: ResponseCodeAndText) -> bool:
236
+ if HTTPStatus.INTERNAL_SERVER_ERROR <= response.status_code <= HTTPStatus.NETWORK_AUTHENTICATION_REQUIRED:
237
+ return True
238
+ return False
File without changes
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import cast
4
+
5
+ import regex
6
+ from lxml import etree
7
+
8
+ from dsp_tools.commands.xmlupload.models.permission import Permissions
9
+ from dsp_tools.commands.xmlupload.models.permissions_parsed import XmlPermission
10
+ from dsp_tools.legacy_models.projectContext import ProjectContext
11
+
12
+
13
+ def get_authorship_lookup(root: etree._Element) -> dict[str, list[str]]:
14
+ def get_one_author(ele: etree._Element) -> str:
15
+ # The xsd file ensures that the body of the element contains valid non-whitespace characters
16
+ txt = cast(str, ele.text)
17
+ txt = regex.sub(r" +", " ", txt)
18
+ return txt.strip()
19
+
20
+ authorship_lookup = {}
21
+ for auth in root.iter(tag="authorship"):
22
+ individual_authors = [get_one_author(child) for child in auth.iterchildren()]
23
+ authorship_lookup[auth.attrib["id"]] = individual_authors
24
+ return authorship_lookup
25
+
26
+
27
+ def get_permissions_lookup(root: etree._Element, proj_context: ProjectContext) -> dict[str, Permissions]:
28
+ permission_ele = list(root.iter(tag="permissions"))
29
+ permissions = [XmlPermission(permission, proj_context) for permission in permission_ele]
30
+ permissions_dict = {permission.permission_id: permission for permission in permissions}
31
+ permissions_lookup = {name: perm.get_permission_instance() for name, perm in permissions_dict.items()}
32
+ return permissions_lookup
@@ -0,0 +1,325 @@
1
+ from pathlib import Path
2
+
3
+ import regex
4
+ from lxml import etree
5
+
6
+ from dsp_tools.commands.validate_data.mappers import XML_TAG_TO_VALUE_TYPE_MAPPER
7
+ from dsp_tools.error.exceptions import InputError
8
+ from dsp_tools.utils.data_formats.iri_util import convert_api_url_for_correct_iri_namespace_construction
9
+ from dsp_tools.utils.rdf_constants import KNORA_API_PREFIX
10
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import KnoraValueType
11
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValue
12
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValueMetadata
13
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedMigrationMetadata
14
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
15
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedValue
16
+
17
+
18
+ def get_parsed_resources(root: etree._Element, api_url: str) -> list[ParsedResource]:
19
+ api_url = convert_api_url_for_correct_iri_namespace_construction(api_url)
20
+ iri_lookup = _create_from_local_name_to_absolute_iri_lookup(root, api_url)
21
+ all_res: list[ParsedResource] = []
22
+ for res in root.iterdescendants(tag="resource"):
23
+ res_type = iri_lookup[res.attrib["restype"]]
24
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
25
+ for res in root.iterdescendants(tag="region"):
26
+ res_type = f"{KNORA_API_PREFIX}Region"
27
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
28
+ for res in root.iterdescendants(tag="link"):
29
+ res_type = f"{KNORA_API_PREFIX}LinkObj"
30
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
31
+ for res in root.iterdescendants(tag="video-segment"):
32
+ all_res.append(_parse_segment(res, "Video"))
33
+ for res in root.iterdescendants(tag="audio-segment"):
34
+ all_res.append(_parse_segment(res, "Audio"))
35
+ return all_res
36
+
37
+
38
+ def _create_from_local_name_to_absolute_iri_lookup(root: etree._Element, api_url: str) -> dict[str, str]:
39
+ shortcode = root.attrib["shortcode"]
40
+ default_ontology = root.attrib["default-ontology"]
41
+ local_names = {ele.attrib["restype"] for ele in root.iterdescendants(tag="resource")}
42
+ props = {ele.attrib["name"] for ele in root.iter() if "name" in ele.attrib}
43
+ local_names.update(props)
44
+ lookup = {local: _get_one_absolute_iri(local, shortcode, default_ontology, api_url) for local in local_names}
45
+ return lookup
46
+
47
+
48
+ def _get_one_absolute_iri(local_name: str, shortcode: str, default_ontology: str, api_url: str) -> str:
49
+ split_name = local_name.split(":")
50
+ if len(split_name) == 1:
51
+ return f"{KNORA_API_PREFIX}{local_name}"
52
+ if len(split_name) == 2:
53
+ if split_name[0] == "":
54
+ return f"{_construct_namespace(api_url, shortcode, default_ontology)}{split_name[1]}"
55
+ if split_name[0] == "knora-api":
56
+ return f"{KNORA_API_PREFIX}{split_name[1]}"
57
+ return f"{_construct_namespace(api_url, shortcode, split_name[0])}{split_name[1]}"
58
+ raise InputError(
59
+ f"It is not permissible to have a colon in a property or resource class name. "
60
+ f"Please correct the following: {local_name}"
61
+ )
62
+
63
+
64
+ def _construct_namespace(api_url: str, shortcode: str, onto_name: str) -> str:
65
+ return f"{api_url}/ontology/{shortcode}/{onto_name}/v2#"
66
+
67
+
68
+ def _parse_segment(segment: etree._Element, segment_type: str) -> ParsedResource:
69
+ values = _parse_segment_values(segment, segment_type)
70
+ migration_metadata = _parse_migration_metadata(segment)
71
+ return ParsedResource(
72
+ res_id=segment.attrib["id"],
73
+ res_type=f"{KNORA_API_PREFIX}{segment_type}Segment",
74
+ label=segment.attrib["label"],
75
+ permissions_id=segment.attrib.get("permissions"),
76
+ values=values,
77
+ file_value=None,
78
+ migration_metadata=migration_metadata,
79
+ )
80
+
81
+
82
+ def _parse_segment_values(segment: etree._Element, segment_type: str) -> list[ParsedValue]:
83
+ values: list[ParsedValue] = []
84
+ value: str | tuple[str, str] | None
85
+ for val in segment.iterchildren():
86
+ prop = f"{KNORA_API_PREFIX}{val.tag!s}"
87
+ match val.tag:
88
+ case "isSegmentOf":
89
+ val_type = KnoraValueType.LINK_VALUE
90
+ prop = f"{KNORA_API_PREFIX}is{segment_type}SegmentOf"
91
+ value = val.text.strip() if val.text else None
92
+ case "hasSegmentBounds":
93
+ val_type = KnoraValueType.INTERVAL_VALUE
94
+ value = (val.attrib["segment_start"], val.attrib["segment_end"])
95
+ case "hasDescription" | "hasComment":
96
+ val_type = KnoraValueType.RICHTEXT_VALUE
97
+ value = _get_richtext_as_string(val)
98
+ case "relatesTo":
99
+ val_type = KnoraValueType.LINK_VALUE
100
+ value = val.text.strip() if val.text else None
101
+ case _:
102
+ val_type = KnoraValueType.SIMPLETEXT_VALUE
103
+ value = _get_simpletext_as_string(val)
104
+ values.append(
105
+ ParsedValue(
106
+ prop_name=prop,
107
+ value=value,
108
+ value_type=val_type,
109
+ permissions_id=val.attrib.get("permissions"),
110
+ comment=val.attrib.get("comment"),
111
+ )
112
+ )
113
+ return values
114
+
115
+
116
+ def _parse_one_resource(resource: etree._Element, res_type: str, iri_lookup: dict[str, str]) -> ParsedResource:
117
+ values, file_value = _parse_values(resource, iri_lookup)
118
+ migration_metadata = _parse_migration_metadata(resource)
119
+ return ParsedResource(
120
+ res_id=resource.attrib["id"],
121
+ res_type=res_type,
122
+ label=resource.attrib["label"],
123
+ permissions_id=resource.attrib.get("permissions"),
124
+ values=values,
125
+ file_value=file_value,
126
+ migration_metadata=migration_metadata,
127
+ )
128
+
129
+
130
+ def _parse_migration_metadata(resource: etree._Element) -> ParsedMigrationMetadata | None:
131
+ metadata = (resource.attrib.get("iri"), resource.attrib.get("ark"), resource.attrib.get("creation_date"))
132
+ if any(metadata):
133
+ return ParsedMigrationMetadata(
134
+ iri=metadata[0],
135
+ ark=metadata[1],
136
+ creation_date=metadata[2],
137
+ )
138
+ return None
139
+
140
+
141
+ def _parse_values(
142
+ resource: etree._Element, iri_lookup: dict[str, str]
143
+ ) -> tuple[list[ParsedValue], ParsedFileValue | None]:
144
+ values = []
145
+ asset_value = None
146
+ for val in resource.iterchildren():
147
+ match val.tag:
148
+ case "bitstream":
149
+ asset_value = _parse_file_values(val)
150
+ case "iiif-uri":
151
+ asset_value = _parse_iiif_uri(val)
152
+ case _:
153
+ values.extend(_parse_one_value(val, iri_lookup))
154
+ return values, asset_value
155
+
156
+
157
+ def _parse_one_value(values: etree._Element, iri_lookup: dict[str, str]) -> list[ParsedValue]:
158
+ prop_name = iri_lookup[values.attrib["name"]]
159
+ match values.tag:
160
+ case "list-prop":
161
+ return _parse_list_value(values, prop_name)
162
+ case "text-prop":
163
+ return _parse_text_value(values, prop_name)
164
+ case _:
165
+ return _parse_generic_values(values, prop_name)
166
+
167
+
168
+ def _parse_generic_values(values: etree._Element, prop_name: str) -> list[ParsedValue]:
169
+ value_type = XML_TAG_TO_VALUE_TYPE_MAPPER[str(values.tag)]
170
+ parsed_values = []
171
+ for val in values:
172
+ parsed_values.append(
173
+ ParsedValue(
174
+ prop_name=prop_name,
175
+ value=val.text.strip() if val.text else None,
176
+ value_type=value_type,
177
+ permissions_id=val.attrib.get("permissions"),
178
+ comment=val.attrib.get("comment"),
179
+ )
180
+ )
181
+ return parsed_values
182
+
183
+
184
+ def _parse_list_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
185
+ parsed_values = []
186
+ list_name = values.attrib["list"]
187
+ for val in values:
188
+ list_node = val.text.strip() if val.text else None
189
+ parsed_values.append(
190
+ ParsedValue(
191
+ prop_name=prop_name,
192
+ value=(list_name, list_node),
193
+ value_type=KnoraValueType.LIST_VALUE,
194
+ permissions_id=val.attrib.get("permissions"),
195
+ comment=val.attrib.get("comment"),
196
+ )
197
+ )
198
+ return parsed_values
199
+
200
+
201
+ def _parse_text_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
202
+ parsed_values = []
203
+ for val in values:
204
+ if val.attrib["encoding"] == "xml":
205
+ val_type = KnoraValueType.RICHTEXT_VALUE
206
+ value = _get_richtext_as_string(val)
207
+ else:
208
+ val_type = KnoraValueType.SIMPLETEXT_VALUE
209
+ value = _get_simpletext_as_string(val)
210
+ parsed_values.append(
211
+ ParsedValue(
212
+ prop_name=prop_name,
213
+ value=value,
214
+ value_type=val_type,
215
+ permissions_id=val.attrib.get("permissions"),
216
+ comment=val.attrib.get("comment"),
217
+ )
218
+ )
219
+ return parsed_values
220
+
221
+
222
+ def _get_richtext_as_string(value: etree._Element) -> str | None:
223
+ # Not entering any values within the tag results in None,
224
+ # however if only whitespaces are entered then it should return an empty string so that the user message is precise.
225
+ if not value.text and len(value) == 0:
226
+ return None
227
+ xmlstr = etree.tostring(value, encoding="unicode", method="xml").strip()
228
+ xmlstr = regex.sub(f"^<{value.tag!s}.*?>", "", xmlstr, count=1)
229
+ xmlstr = regex.sub(f"</{value.tag!s}>$", "", xmlstr)
230
+ striped_str = xmlstr.strip()
231
+ return _cleanup_formatted_text(striped_str)
232
+
233
+
234
+ def _cleanup_formatted_text(xmlstr_orig: str) -> str:
235
+ """
236
+ In a xml-encoded text value from the XML file,
237
+ there may be non-text characters that must be removed.
238
+ This function:
239
+ - replaces (multiple) line breaks by a space
240
+ - replaces multiple spaces or tabstops by a single space (except within `<code>` or `<pre>` tags)
241
+
242
+ Args:
243
+ xmlstr_orig: content of the tag from the XML file, in serialized form
244
+
245
+ Returns:
246
+ purged string, suitable to be sent to DSP-API
247
+ """
248
+ # replace (multiple) line breaks by a space
249
+ xmlstr = regex.sub("\n+", " ", xmlstr_orig)
250
+ # replace multiple spaces or tabstops by a single space (except within <code> or <pre> tags)
251
+ # the regex selects all spaces/tabstops not followed by </xyz> without <xyz in between.
252
+ # credits: https://stackoverflow.com/a/46937770/14414188
253
+ xmlstr = regex.sub("( {2,}|\t+)(?!(.(?!<(code|pre)))*</(code|pre)>)", " ", xmlstr)
254
+ # remove spaces after <br/> tags (except within <code> tags)
255
+ xmlstr = regex.sub("((?<=<br/?>) )(?!(.(?!<code))*</code>)", "", xmlstr)
256
+ # remove leading and trailing spaces
257
+ xmlstr = xmlstr.strip()
258
+ return xmlstr
259
+
260
+
261
+ def _get_simpletext_as_string(value: etree._Element) -> str | None:
262
+ # Not entering any values within the tag results in None,
263
+ # however if only whitespaces are entered then it should return an empty string so that the user message is precise.
264
+ if len(value) == 0:
265
+ if not (found := value.text):
266
+ return None
267
+ else:
268
+ # Extract the inner XML content, preserving tags
269
+ found = "".join(etree.tostring(child, encoding="unicode") for child in value.iterdescendants())
270
+ if value.text:
271
+ found = value.text + found
272
+ # replace multiple spaces or tabstops by a single space
273
+ str_val = regex.sub(r" {2,}|\t+", " ", found)
274
+ # remove leading and trailing spaces (of every line, but also of the entire string)
275
+ str_val = "\n".join([s.strip() for s in str_val.split("\n")])
276
+ result = str_val.strip()
277
+ return result
278
+
279
+
280
+ def _parse_iiif_uri(iiif_uri: etree._Element) -> ParsedFileValue:
281
+ return ParsedFileValue(
282
+ value=iiif_uri.text.strip() if iiif_uri.text else None,
283
+ value_type=KnoraValueType.STILL_IMAGE_IIIF,
284
+ metadata=_parse_file_metadata(iiif_uri),
285
+ )
286
+
287
+
288
+ def _parse_file_values(file_value: etree._Element) -> ParsedFileValue:
289
+ val = file_value.text.strip() if file_value.text else None
290
+ return ParsedFileValue(
291
+ value=val,
292
+ value_type=_get_file_value_type(val),
293
+ metadata=_parse_file_metadata(file_value),
294
+ )
295
+
296
+
297
+ def _parse_file_metadata(file_value: etree._Element) -> ParsedFileValueMetadata:
298
+ return ParsedFileValueMetadata(
299
+ license_iri=file_value.attrib.get("license"),
300
+ copyright_holder=file_value.attrib.get("copyright-holder"),
301
+ authorship_id=file_value.attrib.get("authorship-id"),
302
+ permissions_id=file_value.attrib.get("permissions"),
303
+ )
304
+
305
+
306
+ def _get_file_value_type(file_name: str | None) -> KnoraValueType | None: # noqa:PLR0911 (Too many return statements)
307
+ if not file_name:
308
+ return None
309
+ file_extension = Path(file_name).suffix[1:].lower()
310
+ match file_extension:
311
+ case "zip" | "tar" | "gz" | "z" | "tgz" | "gzip" | "7z":
312
+ return KnoraValueType.ARCHIVE_FILE
313
+ case "mp3" | "wav":
314
+ return KnoraValueType.AUDIO_FILE
315
+ case "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "epub":
316
+ return KnoraValueType.DOCUMENT_FILE
317
+ case "mp4":
318
+ return KnoraValueType.MOVING_IMAGE_FILE
319
+ # jpx is the extension of the files returned by dsp-ingest
320
+ case "jpg" | "jpeg" | "jp2" | "png" | "tif" | "tiff" | "jpx":
321
+ return KnoraValueType.STILL_IMAGE_FILE
322
+ case "odd" | "rng" | "txt" | "xml" | "htm" | "html" | "xsd" | "xsl" | "csv" | "json":
323
+ return KnoraValueType.TEXT_FILE
324
+ case _:
325
+ return None
File without changes