dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -0,0 +1,238 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import time
6
+ import warnings
7
+ from dataclasses import dataclass
8
+ from dataclasses import field
9
+ from datetime import datetime
10
+ from http import HTTPStatus
11
+ from typing import Any
12
+ from typing import Literal
13
+ from typing import Never
14
+ from typing import Union
15
+
16
+ from loguru import logger
17
+ from requests import JSONDecodeError
18
+ from requests import ReadTimeout
19
+ from requests import RequestException
20
+ from requests import Response
21
+
22
+ from dsp_tools.commands.get.legacy_models.context import Context
23
+ from dsp_tools.commands.get.legacy_models.helpers import OntoIri
24
+ from dsp_tools.config.logger_config import LOGGER_SAVEPATH
25
+ from dsp_tools.error.custom_warnings import DspToolsUnexpectedStatusCodeWarning
26
+ from dsp_tools.error.exceptions import DspToolsRequestException
27
+ from dsp_tools.error.exceptions import PermanentTimeOutError
28
+
29
+
30
+ @dataclass
31
+ class ResponseCodeAndText:
32
+ status_code: int
33
+ text: str
34
+
35
+
36
+ @dataclass
37
+ class PostFiles:
38
+ """One or more files to be uploaded in a POST request."""
39
+
40
+ files: list[PostFile]
41
+
42
+ def to_dict(self) -> dict[str, tuple[str, Any, str] | tuple[str, Any]]:
43
+ return {x.file_name: x.to_tuple() for x in self.files}
44
+
45
+
46
+ @dataclass
47
+ class PostFile:
48
+ file_name: str
49
+ fileobj: Any
50
+ content_type: str | None = None
51
+
52
+ def to_tuple(self) -> tuple[str, Any, str] | tuple[str, Any]:
53
+ if self.content_type:
54
+ return self.file_name, self.fileobj, self.content_type
55
+ return self.file_name, self.fileobj
56
+
57
+
58
+ class SetEncoder(json.JSONEncoder):
59
+ """Encoder used to serialize objects to JSON that would by default not be serializable"""
60
+
61
+ def default(self, o: Union[set[Any], Context, OntoIri]) -> Any:
62
+ """Return a serializable object for o"""
63
+ if isinstance(o, set):
64
+ return list(o)
65
+ elif isinstance(o, Context):
66
+ return o.toJsonObj()
67
+ elif isinstance(o, OntoIri):
68
+ return {"iri": o.iri, "hashtag": o.hashtag}
69
+ return json.JSONEncoder.default(self, o)
70
+
71
+
72
+ @dataclass
73
+ class RequestParameters:
74
+ method: Literal["POST", "GET", "PUT", "DELETE"]
75
+ url: str
76
+ timeout: int
77
+ data: dict[str, Any] | None = None
78
+ data_serialized: bytes | None = field(init=False, default=None)
79
+ headers: dict[str, str] | None = None
80
+ files: PostFiles | None = None
81
+
82
+ def __post_init__(self) -> None:
83
+ self.data_serialized = self._serialize_payload(self.data)
84
+
85
+ def _serialize_payload(self, payload: dict[str, Any] | None) -> bytes | None:
86
+ # If data is not encoded as bytes, issues can occur with non-ASCII characters,
87
+ # where the content-length of the request will turn out to be different from the actual length.
88
+ return json.dumps(payload, cls=SetEncoder, ensure_ascii=False).encode("utf-8") if payload else None
89
+
90
+ def as_kwargs(self) -> dict[str, Any]:
91
+ kwargs = {
92
+ "method": self.method,
93
+ "url": self.url,
94
+ "timeout": self.timeout,
95
+ "data": self.data_serialized,
96
+ "headers": self.headers,
97
+ }
98
+ if self.files:
99
+ kwargs["files"] = self.files.to_dict()
100
+ return kwargs
101
+
102
+
103
+ def log_request(params: RequestParameters, extra_headers: dict[str, Any] | None = None) -> None:
104
+ """Logs the request."""
105
+ dumpobj = {
106
+ "method": params.method,
107
+ "url": params.url,
108
+ "timeout": params.timeout,
109
+ }
110
+ headers_to_log = {}
111
+ if extra_headers:
112
+ headers_to_log = extra_headers
113
+ if params.headers:
114
+ headers_to_log = headers_to_log | params.headers
115
+ dumpobj["headers"] = sanitize_headers(headers_to_log)
116
+ if params.data:
117
+ data = params.data.copy()
118
+ if "password" in data:
119
+ data["password"] = "***"
120
+ dumpobj["data"] = data
121
+ if params.files:
122
+ dumpobj["files"] = [x.file_name for x in params.files.files]
123
+ logger.debug(f"REQUEST: {json.dumps(dumpobj, cls=SetEncoder)}")
124
+
125
+
126
+ def log_response(response: Response, include_response_content: bool = True) -> None:
127
+ """Log the response of a request."""
128
+ dumpobj: dict[str, Any] = {
129
+ "status_code": response.status_code,
130
+ "headers": sanitize_headers(dict(response.headers)) if response.headers else "",
131
+ }
132
+ if include_response_content:
133
+ try:
134
+ dumpobj["content"] = response.json()
135
+ except JSONDecodeError:
136
+ dumpobj["content"] = response.text
137
+ else:
138
+ dumpobj["content"] = "too big to be logged"
139
+ logger.debug(f"RESPONSE: {json.dumps(dumpobj)}")
140
+
141
+
142
+ def sanitize_headers(headers: dict[str, str | bytes]) -> dict[str, str]:
143
+ """Remove sensitive information from request headers."""
144
+
145
+ def _mask(key: str, value: str | bytes) -> str:
146
+ if isinstance(value, bytes):
147
+ value = value.decode("utf-8")
148
+ if key == "Authorization" and value.startswith("Bearer "):
149
+ return "Bearer ***"
150
+ if key == "Set-Cookie":
151
+ return "***"
152
+ return value
153
+
154
+ return {k: _mask(k, v) for k, v in headers.items()}
155
+
156
+
157
+ def log_request_failure_and_sleep(reason: str, retry_counter: int, exc_info: bool) -> None:
158
+ """
159
+ Log the reason for a request failure and sleep.
160
+
161
+ ============= ================ =============================
162
+ retry_counter seconds to sleep cumulative waiting time (min)
163
+ ============= ================ =============================
164
+ 0 1 0
165
+ 1 2 0
166
+ 2 4 0
167
+ 3 8 0
168
+ 4 16 0
169
+ 5 32 1
170
+ 6 64 2
171
+ 7 128 4
172
+ 8 256 9
173
+ 9 300 14
174
+ 10 300 19
175
+ 11 300 24
176
+ 12 300 29
177
+ 15 300 44
178
+ 18 300 59
179
+ 24 300 89
180
+ 30 300 119
181
+ ============= ================ =============================
182
+ """
183
+ sleep_time = min(2**retry_counter, 300)
184
+ msg = f"{reason}: Try reconnecting to DSP server, next attempt in {sleep_time} seconds..."
185
+ print(f"{datetime.now()}: {msg}")
186
+ if exc_info:
187
+ logger.exception(f"{msg} ({retry_counter=:})")
188
+ else:
189
+ logger.error(f"{msg} ({retry_counter=:})")
190
+ time.sleep(sleep_time)
191
+
192
+
193
+ def log_and_raise_timeouts(error: TimeoutError | ReadTimeout) -> Never:
194
+ """Log a timeout error raised by a request and raise our own PermanentTimeOutError"""
195
+ msg = f"A '{error.__class__.__name__}' occurred during the connection to the DSP server."
196
+ print(f"{datetime.now()}: {msg}")
197
+ logger.error(msg)
198
+ raise PermanentTimeOutError(msg) from None
199
+
200
+
201
+ def should_retry(response: Response) -> bool:
202
+ """Returns the decision if a retry of a request is sensible."""
203
+ in_500_range = 500 <= response.status_code < 600
204
+ try_again_later = "try again later" in response.text.lower()
205
+ in_testing_env = os.getenv("DSP_TOOLS_TESTING") == "true" # set in .github/workflows/tests-on-push.yml
206
+ return (try_again_later or in_500_range) and not in_testing_env
207
+
208
+
209
+ def log_and_raise_request_exception(error: RequestException) -> Never:
210
+ msg = (
211
+ f"During an API call the following exception occurred. "
212
+ f"Please contact support@dasch.swiss with the log file at {LOGGER_SAVEPATH} "
213
+ f"if you required help resolving the issue.\n"
214
+ f"Original exception name: {error.__class__.__name__}\n"
215
+ )
216
+ if error.request:
217
+ msg += f"Original request: {error.request.method} {error.request.url}"
218
+ logger.exception(msg)
219
+ raise DspToolsRequestException(msg) from None
220
+
221
+
222
+ def log_and_warn_unexpected_non_ok_response(status_code: int, response_text: str) -> None:
223
+ resp_txt = response_text[:200] if len(response_text) > 200 else response_text
224
+ msg = (
225
+ "We got an unexpected API response during the following request. "
226
+ "Please contact the dsp-tools development team (at support@dasch.swiss) with your log file "
227
+ "so that we can handle this more gracefully in the future.\n"
228
+ f"Response status code: {status_code}\n"
229
+ f"Original Message: {resp_txt}"
230
+ )
231
+ logger.warning(msg)
232
+ warnings.warn(DspToolsUnexpectedStatusCodeWarning(msg))
233
+
234
+
235
+ def is_server_error(response: ResponseCodeAndText) -> bool:
236
+ if HTTPStatus.INTERNAL_SERVER_ERROR <= response.status_code <= HTTPStatus.NETWORK_AUTHENTICATION_REQUIRED:
237
+ return True
238
+ return False
File without changes
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import cast
4
+
5
+ import regex
6
+ from lxml import etree
7
+
8
+ from dsp_tools.commands.xmlupload.models.permission import Permissions
9
+ from dsp_tools.commands.xmlupload.models.permissions_parsed import XmlPermission
10
+ from dsp_tools.legacy_models.projectContext import ProjectContext
11
+
12
+
13
+ def get_authorship_lookup(root: etree._Element) -> dict[str, list[str]]:
14
+ def get_one_author(ele: etree._Element) -> str:
15
+ # The xsd file ensures that the body of the element contains valid non-whitespace characters
16
+ txt = cast(str, ele.text)
17
+ txt = regex.sub(r" +", " ", txt)
18
+ return txt.strip()
19
+
20
+ authorship_lookup = {}
21
+ for auth in root.iter(tag="authorship"):
22
+ individual_authors = [get_one_author(child) for child in auth.iterchildren()]
23
+ authorship_lookup[auth.attrib["id"]] = individual_authors
24
+ return authorship_lookup
25
+
26
+
27
+ def get_permissions_lookup(root: etree._Element, proj_context: ProjectContext) -> dict[str, Permissions]:
28
+ permission_ele = list(root.iter(tag="permissions"))
29
+ permissions = [XmlPermission(permission, proj_context) for permission in permission_ele]
30
+ permissions_dict = {permission.permission_id: permission for permission in permissions}
31
+ permissions_lookup = {name: perm.get_permission_instance() for name, perm in permissions_dict.items()}
32
+ return permissions_lookup
@@ -0,0 +1,325 @@
1
+ from pathlib import Path
2
+
3
+ import regex
4
+ from lxml import etree
5
+
6
+ from dsp_tools.commands.validate_data.mappers import XML_TAG_TO_VALUE_TYPE_MAPPER
7
+ from dsp_tools.error.exceptions import InputError
8
+ from dsp_tools.utils.data_formats.iri_util import convert_api_url_for_correct_iri_namespace_construction
9
+ from dsp_tools.utils.rdf_constants import KNORA_API_PREFIX
10
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import KnoraValueType
11
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValue
12
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedFileValueMetadata
13
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedMigrationMetadata
14
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
15
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedValue
16
+
17
+
18
+ def get_parsed_resources(root: etree._Element, api_url: str) -> list[ParsedResource]:
19
+ api_url = convert_api_url_for_correct_iri_namespace_construction(api_url)
20
+ iri_lookup = _create_from_local_name_to_absolute_iri_lookup(root, api_url)
21
+ all_res: list[ParsedResource] = []
22
+ for res in root.iterdescendants(tag="resource"):
23
+ res_type = iri_lookup[res.attrib["restype"]]
24
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
25
+ for res in root.iterdescendants(tag="region"):
26
+ res_type = f"{KNORA_API_PREFIX}Region"
27
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
28
+ for res in root.iterdescendants(tag="link"):
29
+ res_type = f"{KNORA_API_PREFIX}LinkObj"
30
+ all_res.append(_parse_one_resource(res, res_type, iri_lookup))
31
+ for res in root.iterdescendants(tag="video-segment"):
32
+ all_res.append(_parse_segment(res, "Video"))
33
+ for res in root.iterdescendants(tag="audio-segment"):
34
+ all_res.append(_parse_segment(res, "Audio"))
35
+ return all_res
36
+
37
+
38
+ def _create_from_local_name_to_absolute_iri_lookup(root: etree._Element, api_url: str) -> dict[str, str]:
39
+ shortcode = root.attrib["shortcode"]
40
+ default_ontology = root.attrib["default-ontology"]
41
+ local_names = {ele.attrib["restype"] for ele in root.iterdescendants(tag="resource")}
42
+ props = {ele.attrib["name"] for ele in root.iter() if "name" in ele.attrib}
43
+ local_names.update(props)
44
+ lookup = {local: _get_one_absolute_iri(local, shortcode, default_ontology, api_url) for local in local_names}
45
+ return lookup
46
+
47
+
48
+ def _get_one_absolute_iri(local_name: str, shortcode: str, default_ontology: str, api_url: str) -> str:
49
+ split_name = local_name.split(":")
50
+ if len(split_name) == 1:
51
+ return f"{KNORA_API_PREFIX}{local_name}"
52
+ if len(split_name) == 2:
53
+ if split_name[0] == "":
54
+ return f"{_construct_namespace(api_url, shortcode, default_ontology)}{split_name[1]}"
55
+ if split_name[0] == "knora-api":
56
+ return f"{KNORA_API_PREFIX}{split_name[1]}"
57
+ return f"{_construct_namespace(api_url, shortcode, split_name[0])}{split_name[1]}"
58
+ raise InputError(
59
+ f"It is not permissible to have a colon in a property or resource class name. "
60
+ f"Please correct the following: {local_name}"
61
+ )
62
+
63
+
64
+ def _construct_namespace(api_url: str, shortcode: str, onto_name: str) -> str:
65
+ return f"{api_url}/ontology/{shortcode}/{onto_name}/v2#"
66
+
67
+
68
+ def _parse_segment(segment: etree._Element, segment_type: str) -> ParsedResource:
69
+ values = _parse_segment_values(segment, segment_type)
70
+ migration_metadata = _parse_migration_metadata(segment)
71
+ return ParsedResource(
72
+ res_id=segment.attrib["id"],
73
+ res_type=f"{KNORA_API_PREFIX}{segment_type}Segment",
74
+ label=segment.attrib["label"],
75
+ permissions_id=segment.attrib.get("permissions"),
76
+ values=values,
77
+ file_value=None,
78
+ migration_metadata=migration_metadata,
79
+ )
80
+
81
+
82
+ def _parse_segment_values(segment: etree._Element, segment_type: str) -> list[ParsedValue]:
83
+ values: list[ParsedValue] = []
84
+ value: str | tuple[str, str] | None
85
+ for val in segment.iterchildren():
86
+ prop = f"{KNORA_API_PREFIX}{val.tag!s}"
87
+ match val.tag:
88
+ case "isSegmentOf":
89
+ val_type = KnoraValueType.LINK_VALUE
90
+ prop = f"{KNORA_API_PREFIX}is{segment_type}SegmentOf"
91
+ value = val.text.strip() if val.text else None
92
+ case "hasSegmentBounds":
93
+ val_type = KnoraValueType.INTERVAL_VALUE
94
+ value = (val.attrib["segment_start"], val.attrib["segment_end"])
95
+ case "hasDescription" | "hasComment":
96
+ val_type = KnoraValueType.RICHTEXT_VALUE
97
+ value = _get_richtext_as_string(val)
98
+ case "relatesTo":
99
+ val_type = KnoraValueType.LINK_VALUE
100
+ value = val.text.strip() if val.text else None
101
+ case _:
102
+ val_type = KnoraValueType.SIMPLETEXT_VALUE
103
+ value = _get_simpletext_as_string(val)
104
+ values.append(
105
+ ParsedValue(
106
+ prop_name=prop,
107
+ value=value,
108
+ value_type=val_type,
109
+ permissions_id=val.attrib.get("permissions"),
110
+ comment=val.attrib.get("comment"),
111
+ )
112
+ )
113
+ return values
114
+
115
+
116
+ def _parse_one_resource(resource: etree._Element, res_type: str, iri_lookup: dict[str, str]) -> ParsedResource:
117
+ values, file_value = _parse_values(resource, iri_lookup)
118
+ migration_metadata = _parse_migration_metadata(resource)
119
+ return ParsedResource(
120
+ res_id=resource.attrib["id"],
121
+ res_type=res_type,
122
+ label=resource.attrib["label"],
123
+ permissions_id=resource.attrib.get("permissions"),
124
+ values=values,
125
+ file_value=file_value,
126
+ migration_metadata=migration_metadata,
127
+ )
128
+
129
+
130
+ def _parse_migration_metadata(resource: etree._Element) -> ParsedMigrationMetadata | None:
131
+ metadata = (resource.attrib.get("iri"), resource.attrib.get("ark"), resource.attrib.get("creation_date"))
132
+ if any(metadata):
133
+ return ParsedMigrationMetadata(
134
+ iri=metadata[0],
135
+ ark=metadata[1],
136
+ creation_date=metadata[2],
137
+ )
138
+ return None
139
+
140
+
141
+ def _parse_values(
142
+ resource: etree._Element, iri_lookup: dict[str, str]
143
+ ) -> tuple[list[ParsedValue], ParsedFileValue | None]:
144
+ values = []
145
+ asset_value = None
146
+ for val in resource.iterchildren():
147
+ match val.tag:
148
+ case "bitstream":
149
+ asset_value = _parse_file_values(val)
150
+ case "iiif-uri":
151
+ asset_value = _parse_iiif_uri(val)
152
+ case _:
153
+ values.extend(_parse_one_value(val, iri_lookup))
154
+ return values, asset_value
155
+
156
+
157
+ def _parse_one_value(values: etree._Element, iri_lookup: dict[str, str]) -> list[ParsedValue]:
158
+ prop_name = iri_lookup[values.attrib["name"]]
159
+ match values.tag:
160
+ case "list-prop":
161
+ return _parse_list_value(values, prop_name)
162
+ case "text-prop":
163
+ return _parse_text_value(values, prop_name)
164
+ case _:
165
+ return _parse_generic_values(values, prop_name)
166
+
167
+
168
+ def _parse_generic_values(values: etree._Element, prop_name: str) -> list[ParsedValue]:
169
+ value_type = XML_TAG_TO_VALUE_TYPE_MAPPER[str(values.tag)]
170
+ parsed_values = []
171
+ for val in values:
172
+ parsed_values.append(
173
+ ParsedValue(
174
+ prop_name=prop_name,
175
+ value=val.text.strip() if val.text else None,
176
+ value_type=value_type,
177
+ permissions_id=val.attrib.get("permissions"),
178
+ comment=val.attrib.get("comment"),
179
+ )
180
+ )
181
+ return parsed_values
182
+
183
+
184
+ def _parse_list_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
185
+ parsed_values = []
186
+ list_name = values.attrib["list"]
187
+ for val in values:
188
+ list_node = val.text.strip() if val.text else None
189
+ parsed_values.append(
190
+ ParsedValue(
191
+ prop_name=prop_name,
192
+ value=(list_name, list_node),
193
+ value_type=KnoraValueType.LIST_VALUE,
194
+ permissions_id=val.attrib.get("permissions"),
195
+ comment=val.attrib.get("comment"),
196
+ )
197
+ )
198
+ return parsed_values
199
+
200
+
201
+ def _parse_text_value(values: etree._Element, prop_name: str) -> list[ParsedValue]:
202
+ parsed_values = []
203
+ for val in values:
204
+ if val.attrib["encoding"] == "xml":
205
+ val_type = KnoraValueType.RICHTEXT_VALUE
206
+ value = _get_richtext_as_string(val)
207
+ else:
208
+ val_type = KnoraValueType.SIMPLETEXT_VALUE
209
+ value = _get_simpletext_as_string(val)
210
+ parsed_values.append(
211
+ ParsedValue(
212
+ prop_name=prop_name,
213
+ value=value,
214
+ value_type=val_type,
215
+ permissions_id=val.attrib.get("permissions"),
216
+ comment=val.attrib.get("comment"),
217
+ )
218
+ )
219
+ return parsed_values
220
+
221
+
222
+ def _get_richtext_as_string(value: etree._Element) -> str | None:
223
+ # Not entering any values within the tag results in None,
224
+ # however if only whitespaces are entered then it should return an empty string so that the user message is precise.
225
+ if not value.text and len(value) == 0:
226
+ return None
227
+ xmlstr = etree.tostring(value, encoding="unicode", method="xml").strip()
228
+ xmlstr = regex.sub(f"^<{value.tag!s}.*?>", "", xmlstr, count=1)
229
+ xmlstr = regex.sub(f"</{value.tag!s}>$", "", xmlstr)
230
+ striped_str = xmlstr.strip()
231
+ return _cleanup_formatted_text(striped_str)
232
+
233
+
234
+ def _cleanup_formatted_text(xmlstr_orig: str) -> str:
235
+ """
236
+ In a xml-encoded text value from the XML file,
237
+ there may be non-text characters that must be removed.
238
+ This function:
239
+ - replaces (multiple) line breaks by a space
240
+ - replaces multiple spaces or tabstops by a single space (except within `<code>` or `<pre>` tags)
241
+
242
+ Args:
243
+ xmlstr_orig: content of the tag from the XML file, in serialized form
244
+
245
+ Returns:
246
+ purged string, suitable to be sent to DSP-API
247
+ """
248
+ # replace (multiple) line breaks by a space
249
+ xmlstr = regex.sub("\n+", " ", xmlstr_orig)
250
+ # replace multiple spaces or tabstops by a single space (except within <code> or <pre> tags)
251
+ # the regex selects all spaces/tabstops not followed by </xyz> without <xyz in between.
252
+ # credits: https://stackoverflow.com/a/46937770/14414188
253
+ xmlstr = regex.sub("( {2,}|\t+)(?!(.(?!<(code|pre)))*</(code|pre)>)", " ", xmlstr)
254
+ # remove spaces after <br/> tags (except within <code> tags)
255
+ xmlstr = regex.sub("((?<=<br/?>) )(?!(.(?!<code))*</code>)", "", xmlstr)
256
+ # remove leading and trailing spaces
257
+ xmlstr = xmlstr.strip()
258
+ return xmlstr
259
+
260
+
261
+ def _get_simpletext_as_string(value: etree._Element) -> str | None:
262
+ # Not entering any values within the tag results in None,
263
+ # however if only whitespaces are entered then it should return an empty string so that the user message is precise.
264
+ if len(value) == 0:
265
+ if not (found := value.text):
266
+ return None
267
+ else:
268
+ # Extract the inner XML content, preserving tags
269
+ found = "".join(etree.tostring(child, encoding="unicode") for child in value.iterdescendants())
270
+ if value.text:
271
+ found = value.text + found
272
+ # replace multiple spaces or tabstops by a single space
273
+ str_val = regex.sub(r" {2,}|\t+", " ", found)
274
+ # remove leading and trailing spaces (of every line, but also of the entire string)
275
+ str_val = "\n".join([s.strip() for s in str_val.split("\n")])
276
+ result = str_val.strip()
277
+ return result
278
+
279
+
280
+ def _parse_iiif_uri(iiif_uri: etree._Element) -> ParsedFileValue:
281
+ return ParsedFileValue(
282
+ value=iiif_uri.text.strip() if iiif_uri.text else None,
283
+ value_type=KnoraValueType.STILL_IMAGE_IIIF,
284
+ metadata=_parse_file_metadata(iiif_uri),
285
+ )
286
+
287
+
288
+ def _parse_file_values(file_value: etree._Element) -> ParsedFileValue:
289
+ val = file_value.text.strip() if file_value.text else None
290
+ return ParsedFileValue(
291
+ value=val,
292
+ value_type=_get_file_value_type(val),
293
+ metadata=_parse_file_metadata(file_value),
294
+ )
295
+
296
+
297
+ def _parse_file_metadata(file_value: etree._Element) -> ParsedFileValueMetadata:
298
+ return ParsedFileValueMetadata(
299
+ license_iri=file_value.attrib.get("license"),
300
+ copyright_holder=file_value.attrib.get("copyright-holder"),
301
+ authorship_id=file_value.attrib.get("authorship-id"),
302
+ permissions_id=file_value.attrib.get("permissions"),
303
+ )
304
+
305
+
306
+ def _get_file_value_type(file_name: str | None) -> KnoraValueType | None: # noqa:PLR0911 (Too many return statements)
307
+ if not file_name:
308
+ return None
309
+ file_extension = Path(file_name).suffix[1:].lower()
310
+ match file_extension:
311
+ case "zip" | "tar" | "gz" | "z" | "tgz" | "gzip" | "7z":
312
+ return KnoraValueType.ARCHIVE_FILE
313
+ case "mp3" | "wav":
314
+ return KnoraValueType.AUDIO_FILE
315
+ case "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "epub":
316
+ return KnoraValueType.DOCUMENT_FILE
317
+ case "mp4":
318
+ return KnoraValueType.MOVING_IMAGE_FILE
319
+ # jpx is the extension of the files returned by dsp-ingest
320
+ case "jpg" | "jpeg" | "jp2" | "png" | "tif" | "tiff" | "jpx":
321
+ return KnoraValueType.STILL_IMAGE_FILE
322
+ case "odd" | "rng" | "txt" | "xml" | "htm" | "html" | "xsd" | "xsl" | "csv" | "json":
323
+ return KnoraValueType.TEXT_FILE
324
+ case _:
325
+ return None
File without changes
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from enum import auto
6
+
7
+
8
+ @dataclass
9
+ class ParsedResource:
10
+ res_id: str
11
+ res_type: str
12
+ label: str
13
+ permissions_id: str | None
14
+ values: list[ParsedValue]
15
+ file_value: ParsedFileValue | None
16
+ migration_metadata: ParsedMigrationMetadata | None
17
+
18
+
19
+ @dataclass
20
+ class ParsedMigrationMetadata:
21
+ iri: str | None
22
+ ark: str | None
23
+ creation_date: str | None
24
+
25
+
26
+ @dataclass
27
+ class ParsedValue:
28
+ prop_name: str
29
+ value: str | tuple[str | None, str | None] | None
30
+ value_type: KnoraValueType
31
+ permissions_id: str | None
32
+ comment: str | None
33
+
34
+
35
+ @dataclass
36
+ class ParsedFileValue:
37
+ value: str | None
38
+ value_type: KnoraValueType | None
39
+ metadata: ParsedFileValueMetadata
40
+
41
+
42
+ @dataclass
43
+ class ParsedFileValueMetadata:
44
+ license_iri: str | None
45
+ copyright_holder: str | None
46
+ authorship_id: str | None
47
+ permissions_id: str | None
48
+
49
+
50
+ class KnoraValueType(Enum):
51
+ """
52
+ Maps to a knora value type, for example: BOOLEAN_VALUE -> knora-api:BooleanValue
53
+ """
54
+
55
+ BOOLEAN_VALUE = auto()
56
+ COLOR_VALUE = auto()
57
+ DATE_VALUE = auto()
58
+ DECIMAL_VALUE = auto()
59
+ GEONAME_VALUE = auto()
60
+ GEOM_VALUE = auto()
61
+ INT_VALUE = auto()
62
+ INTERVAL_VALUE = auto()
63
+ LINK_VALUE = auto()
64
+ LIST_VALUE = auto()
65
+ SIMPLETEXT_VALUE = auto()
66
+ RICHTEXT_VALUE = auto()
67
+ TIME_VALUE = auto()
68
+ URI_VALUE = auto()
69
+
70
+ ARCHIVE_FILE = auto()
71
+ AUDIO_FILE = auto()
72
+ DOCUMENT_FILE = auto()
73
+ MOVING_IMAGE_FILE = auto()
74
+ STILL_IMAGE_FILE = auto()
75
+ STILL_IMAGE_IIIF = auto()
76
+ TEXT_FILE = auto()