dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -1,151 +0,0 @@
1
- import warnings
2
- from copy import deepcopy
3
- from pathlib import Path
4
-
5
- from lxml import etree
6
- from rdflib import SH
7
- from rdflib import Graph
8
- from termcolor import cprint
9
-
10
- from dsp_tools.commands.xml_validate.api_connection import OntologyConnection
11
- from dsp_tools.commands.xml_validate.api_connection import ShaclValidator
12
- from dsp_tools.commands.xml_validate.deserialise_input import deserialise_xml
13
- from dsp_tools.commands.xml_validate.make_data_rdf import make_data_rdf
14
- from dsp_tools.commands.xml_validate.models.data_deserialised import ProjectDeserialised
15
- from dsp_tools.commands.xml_validate.models.data_rdf import DataRDF
16
- from dsp_tools.commands.xml_validate.models.validation import ValidationReport
17
- from dsp_tools.commands.xml_validate.reformat_validaton_result import reformat_validation_graph
18
- from dsp_tools.commands.xml_validate.sparql.construct_shapes import construct_shapes_graph
19
- from dsp_tools.models.custom_warnings import DspToolsUserWarning
20
- from dsp_tools.utils.xml_utils import parse_xml_file
21
- from dsp_tools.utils.xml_utils import remove_comments_from_element_tree
22
- from dsp_tools.utils.xml_utils import transform_into_localnames
23
- from dsp_tools.utils.xml_validation import validate_xml
24
-
25
- LIST_SEPARATOR = "\n - "
26
-
27
-
28
- def xml_validate(filepath: Path, api_url: str, dev_route: bool, save_graphs: bool) -> bool: # noqa: ARG001 (unused argument)
29
- """
30
- Takes a file and project information and validates it against the ontologies on the server.
31
-
32
- Args:
33
- filepath: path to the xml data file
34
- api_url: url of the api host
35
- dev_route: if this flag is set features that are still in development will be used
36
- save_graphs: if this flag is set, all the graphs will be saved in a folder
37
-
38
- Returns:
39
- true unless it crashed
40
- """
41
- _inform_about_experimental_feature()
42
- data_rdf, shortcode = _get_data_info_from_file(filepath, api_url)
43
- onto_con = OntologyConnection(api_url, shortcode)
44
- ontologies, shapes = _get_shacl(onto_con)
45
- data_graph = data_rdf.make_graph()
46
- generic_filepath = Path()
47
- if save_graphs:
48
- generic_filepath = _save_graphs(filepath, ontologies, shapes, data_graph)
49
- # data_graph += ontologies
50
- val = ShaclValidator(api_url)
51
- report = _validate(val, shapes, data_graph)
52
- if save_graphs:
53
- report.validation_graph.serialize(f"{generic_filepath}_VALIDATION_REPORT.ttl")
54
- if report.conforms:
55
- cprint("\n Validation passed! ", color="green", attrs=["bold", "reverse"])
56
- else:
57
- reformatted = reformat_validation_graph(report.validation_graph, data_graph)
58
- problem_msg = reformatted.get_msg(filepath)
59
- cprint("\n Validation errors found! ", color="light_red", attrs=["bold", "reverse"])
60
- print(problem_msg)
61
- if reformatted.unexpected_results:
62
- reformatted.unexpected_results.save_inform_user(
63
- results_graph=report.validation_graph,
64
- shacl=report.shacl_graph,
65
- data=data_graph,
66
- )
67
- return True
68
-
69
-
70
- def _inform_about_experimental_feature() -> None:
71
- what_is_validated = [
72
- "This is an experimental feature, it will change and be extended continuously. "
73
- "The following information of your data is being validated:",
74
- "Cardinalities",
75
- ]
76
- warnings.warn(DspToolsUserWarning(LIST_SEPARATOR.join(what_is_validated)))
77
-
78
-
79
- def _save_graphs(filepath: Path, onto: Graph, shacl: Graph, data: Graph) -> Path:
80
- parent_directory = filepath.parent
81
- new_directory = parent_directory / "graphs"
82
- new_directory.mkdir(exist_ok=True)
83
- cprint(f"\n Saving graphs to {new_directory} ", color="light_blue", attrs=["bold", "reverse"])
84
- generic_filepath = new_directory / filepath.stem
85
- onto.serialize(f"{generic_filepath}_ONTO.ttl")
86
- shacl.serialize(f"{generic_filepath}_SHACL.ttl")
87
- data.serialize(f"{generic_filepath}_DATA.ttl")
88
- onto_data = onto + data
89
- onto_data.serialize(f"{generic_filepath}_ONTO_DATA.ttl")
90
- return generic_filepath
91
-
92
-
93
- def _validate(validator: ShaclValidator, shapes_graph: Graph, data_graph: Graph) -> ValidationReport:
94
- shape_str = shapes_graph.serialize(format="ttl")
95
- data_str = data_graph.serialize(format="ttl")
96
- results = validator.validate(data_str, shape_str)
97
- conforms = bool(next(results.objects(None, SH.conforms)))
98
- return ValidationReport(
99
- conforms=conforms,
100
- validation_graph=results,
101
- shacl_graph=shapes_graph,
102
- data_graph=data_graph,
103
- )
104
-
105
-
106
- def _get_shacl(onto_con: OntologyConnection) -> tuple[Graph, Graph]:
107
- ontologies = _get_project_ontos(onto_con)
108
- knora_ttl = onto_con.get_knora_api()
109
- kag = Graph()
110
- kag.parse(data=knora_ttl, format="ttl")
111
- onto_for_construction = deepcopy(ontologies) + kag
112
- shapes = construct_shapes_graph(onto_for_construction)
113
- shapes += ontologies
114
- return ontologies, shapes
115
-
116
-
117
- def _get_project_ontos(onto_con: OntologyConnection) -> Graph:
118
- all_ontos = onto_con.get_ontologies()
119
- onto_g = Graph()
120
- for onto in all_ontos:
121
- og = Graph()
122
- og.parse(data=onto, format="ttl")
123
- onto_g += og
124
- return onto_g
125
-
126
-
127
- def _get_data_info_from_file(file: Path, api_url: str) -> tuple[DataRDF, str]:
128
- cleaned_root = _parse_and_clean_file(file, api_url)
129
- deserialised: ProjectDeserialised = deserialise_xml(cleaned_root)
130
- rdf_data: DataRDF = make_data_rdf(deserialised.data)
131
- return rdf_data, deserialised.info.shortcode
132
-
133
-
134
- def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element:
135
- root = parse_xml_file(file)
136
- root = remove_comments_from_element_tree(root)
137
- validate_xml(root)
138
- root = transform_into_localnames(root)
139
- return _replace_namespaces(root, api_url)
140
-
141
-
142
- def _replace_namespaces(root: etree._Element, api_url: str) -> etree._Element:
143
- with open("src/dsp_tools/resources/xml_validate/replace_namespace.xslt", "rb") as xslt_file:
144
- xslt_data = xslt_file.read()
145
- shortcode = root.attrib["shortcode"]
146
- default_ontology = root.attrib["default-ontology"]
147
- namespace = f"{api_url}/ontology/{shortcode}/{default_ontology}/v2#"
148
- xslt_root = etree.XML(xslt_data)
149
- transform = etree.XSLT(xslt_root)
150
- replacement_value = etree.XSLT.strparam(namespace)
151
- return transform(root, replacementValue=replacement_value).getroot()
@@ -1,253 +0,0 @@
1
- from collections import defaultdict
2
- from datetime import datetime
3
- from pathlib import Path
4
- from typing import cast
5
-
6
- import regex
7
- from lxml import etree
8
- from regex import Pattern
9
-
10
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import AllowedEncodings
11
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import ProjectOntosInformation
12
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import PropertyTextValueTypes
13
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import TextValueData
14
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import get_text_value_types_of_properties_from_onto
15
- from dsp_tools.commands.xmlupload.models.ontology_lookup_models import make_project_onto_information
16
- from dsp_tools.commands.xmlupload.models.ontology_problem_models import InvalidOntologyElementsInData
17
- from dsp_tools.commands.xmlupload.models.ontology_problem_models import InvalidTextValueEncodings
18
- from dsp_tools.commands.xmlupload.ontology_client import OntologyClient
19
- from dsp_tools.models.exceptions import InputError
20
-
21
- defaultOntologyColon: Pattern[str] = regex.compile(r"^:\w+$")
22
- knoraUndeclared: Pattern[str] = regex.compile(r"^\w+$")
23
- genericPrefixedOntology: Pattern[str] = regex.compile(r"^[\w\-]+:\w+$")
24
- KNORA_BASE_PROPERTIES = {
25
- "bitstream",
26
- "iiif-uri",
27
- "isSegmentOf",
28
- "hasSegmentBounds",
29
- "hasTitle",
30
- "hasComment",
31
- "hasDescription",
32
- "hasKeyword",
33
- "relatesTo",
34
- }
35
-
36
-
37
- def do_xml_consistency_check_with_ontology(onto_client: OntologyClient, root: etree._Element) -> None:
38
- """
39
- This function takes an OntologyClient and the root of an XML.
40
- It retrieves the ontologies from the server.
41
- It analyses if any classes or properties are used that are not in the ontology.
42
- It analyses if any properties have encodings that are not consistent with those specified in the ontology.
43
-
44
- Args:
45
- onto_client: client for the ontology retrieval
46
- root: root of the XML
47
-
48
- Raises:
49
- InputError: if there are any invalid properties or classes and/or text values with wrong encodings.
50
- """
51
- cls_prop_lookup, text_value_encoding_lookup = _get_onto_lookups(onto_client)
52
- classes_in_data, properties_in_data = _get_all_classes_and_properties_from_data(root)
53
- problem_str = ""
54
- problem_str += _check_all_classes_and_properties_in_onto(classes_in_data, properties_in_data, cls_prop_lookup)
55
- problem_str += _check_correctness_all_text_value_encodings(root, text_value_encoding_lookup)
56
- if problem_str:
57
- raise InputError(problem_str)
58
-
59
-
60
- def _get_onto_lookups(onto_client: OntologyClient) -> tuple[ProjectOntosInformation, PropertyTextValueTypes]:
61
- ontos = onto_client.get_all_project_ontologies_from_server()
62
- text_value_encoding_lookup = get_text_value_types_of_properties_from_onto(ontos, onto_client.default_ontology)
63
- ontos["knora-api"] = onto_client.get_knora_api_ontology_from_server()
64
- return make_project_onto_information(onto_client.default_ontology, ontos), text_value_encoding_lookup
65
-
66
-
67
- def _check_all_classes_and_properties_in_onto(
68
- classes_in_data: dict[str, list[str]],
69
- properties_in_data: dict[str, list[str]],
70
- onto_check_info: ProjectOntosInformation,
71
- ) -> str:
72
- class_problems = _find_all_class_types_in_onto(classes_in_data, onto_check_info)
73
- property_problems = _find_all_properties_in_onto(properties_in_data, onto_check_info)
74
- if not class_problems and not property_problems:
75
- return ""
76
- problems = InvalidOntologyElementsInData(
77
- classes=class_problems, properties=property_problems, ontos_on_server=list(onto_check_info.onto_lookup.keys())
78
- )
79
- msg, df = problems.execute_problem_protocol()
80
- if df is not None:
81
- csv_file = f"XML_syntax_errors_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}.csv"
82
- df.to_csv(path_or_buf=Path(Path.cwd(), csv_file), index=False)
83
- msg += (
84
- "\n\n---------------------------------------\n\n"
85
- f"\nAll the problems are listed in the file: '{Path.cwd()}/{csv_file}'\n"
86
- )
87
- return msg
88
-
89
-
90
- def _get_all_classes_and_properties_from_data(
91
- root: etree._Element,
92
- ) -> tuple[dict[str, list[str]], dict[str, list[str]]]:
93
- cls_dict = _get_all_class_types_and_ids_from_data(root)
94
- prop_dict: defaultdict[str, list[str]] = defaultdict(list)
95
- for resource in root.iterchildren(tag="resource"):
96
- prop_dict = _get_all_property_names_and_resource_ids_one_resource(resource, prop_dict)
97
- return cls_dict, prop_dict
98
-
99
-
100
- def _get_all_class_types_and_ids_from_data(root: etree._Element) -> dict[str, list[str]]:
101
- cls_dict: dict[str, list[str]] = {}
102
- for resource in root.iterchildren(tag="resource"):
103
- restype = resource.attrib["restype"]
104
- if restype in cls_dict:
105
- cls_dict[restype].append(resource.attrib["id"])
106
- else:
107
- cls_dict[restype] = [resource.attrib["id"]]
108
- return cls_dict
109
-
110
-
111
- def _get_all_property_names_and_resource_ids_one_resource(
112
- resource: etree._Element, prop_dict: defaultdict[str, list[str]]
113
- ) -> defaultdict[str, list[str]]:
114
- for prop in resource.iterchildren():
115
- if prop.tag in KNORA_BASE_PROPERTIES:
116
- continue
117
- prop_name = prop.attrib["name"]
118
- prop_dict[prop_name].append(resource.attrib["id"])
119
- return prop_dict
120
-
121
-
122
- def _find_all_class_types_in_onto(
123
- classes: dict[str, list[str]], onto_check_info: ProjectOntosInformation
124
- ) -> list[tuple[str, list[str], str]]:
125
- problem_list = []
126
- for cls_type, ids in classes.items():
127
- if problem := _find_one_class_type_in_onto(cls_type, onto_check_info):
128
- problem_list.append((cls_type, ids, problem))
129
- return problem_list
130
-
131
-
132
- def _find_one_class_type_in_onto(cls_type: str, onto_check_info: ProjectOntosInformation) -> str | None:
133
- prefix, cls_ = _get_separate_prefix_and_iri_from_onto_prop_or_cls(cls_type, onto_check_info.default_ontology_prefix)
134
- if not prefix:
135
- return "Class name does not follow a known ontology pattern"
136
- if onto := onto_check_info.onto_lookup.get(prefix):
137
- return "Invalid Class Type" if cls_ not in onto.classes else None
138
- else:
139
- return "Unknown ontology prefix"
140
-
141
-
142
- def _find_all_properties_in_onto(
143
- properties: dict[str, list[str]], onto_check_info: ProjectOntosInformation
144
- ) -> list[tuple[str, list[str], str]]:
145
- problem_list = []
146
- for prop_name, ids in properties.items():
147
- if problem := _find_one_property_in_onto(prop_name, onto_check_info):
148
- problem_list.append((prop_name, ids, problem))
149
- return problem_list
150
-
151
-
152
- def _find_one_property_in_onto(prop_name: str, onto_check_info: ProjectOntosInformation) -> str | None:
153
- prefix, prop = _get_separate_prefix_and_iri_from_onto_prop_or_cls(
154
- prop_name, onto_check_info.default_ontology_prefix
155
- )
156
- if not prefix:
157
- return "Property name does not follow a known ontology pattern"
158
- if onto := onto_check_info.onto_lookup.get(prefix):
159
- return "Invalid Property" if prop not in onto.properties else None
160
- else:
161
- return "Unknown ontology prefix"
162
-
163
-
164
- def _get_separate_prefix_and_iri_from_onto_prop_or_cls(
165
- prop_or_cls: str, default_ontology_prefix: str
166
- ) -> tuple[str, ...] | tuple[None, None]:
167
- if defaultOntologyColon.match(prop_or_cls):
168
- return default_ontology_prefix, prop_or_cls.lstrip(":")
169
- elif knoraUndeclared.match(prop_or_cls):
170
- return "knora-api", prop_or_cls
171
- elif genericPrefixedOntology.match(prop_or_cls):
172
- return tuple(prop_or_cls.split(":"))
173
- else:
174
- return None, None
175
-
176
-
177
- def _check_correctness_all_text_value_encodings(root: etree._Element, text_prop_look_up: PropertyTextValueTypes) -> str:
178
- """
179
- This function analyses if all the encodings for the `<text>` elements are correct
180
- with respect to the specification in the ontology.
181
-
182
- For example, if the ontology specifies that `:hasSimpleText` is without mark-up,
183
- the encoding has to be `utf8`.
184
-
185
-
186
- This is correct:
187
- ```
188
- <text-prop name=":hasSimpleText">
189
- <text encoding="utf8">Dies ist ein einfacher Text ohne Markup</text>
190
- </text-prop>
191
- ```
192
-
193
- This is wrong:
194
- ```
195
- <text-prop name=":hasSimpleText">
196
- <text encoding="xml">Dies ist ein einfacher Text ohne Markup</text>
197
- </text-prop>
198
- ```
199
-
200
- The accepted encodings are `xml` or `utf8`
201
-
202
- Args:
203
- root: root of the data xml document
204
- text_prop_look_up: a lookup containing the property names and their specified types
205
-
206
- Returns:
207
- A string communicating the problem, if there are none the string is empty.
208
- """
209
- text_values_in_data = _get_all_ids_and_props_and_encodings_from_root(root)
210
- invalid_text_values = [x for x in text_values_in_data if not _check_correctness_of_one_prop(x, text_prop_look_up)]
211
- if not invalid_text_values:
212
- return ""
213
- msg, df = InvalidTextValueEncodings(invalid_text_values).execute_problem_protocol()
214
- if df is not None:
215
- csv_file = Path(f"text_value_encoding_errors_{datetime.now().strftime('%Y-%m-%d_%H%M%S')}.csv")
216
- df.to_csv(path_or_buf=csv_file, index=False)
217
- msg += (
218
- "\n\n---------------------------------------\n\n"
219
- f"All the problems are listed in the file: '{csv_file.absolute()}'"
220
- )
221
- return msg
222
-
223
-
224
- def _get_all_ids_and_props_and_encodings_from_root(root: etree._Element) -> list[TextValueData]:
225
- res_list = []
226
- for res_input in root.iterchildren(tag="resource"):
227
- res_list.extend(_get_id_and_props_and_encodings_from_one_resource(res_input))
228
- return res_list
229
-
230
-
231
- def _get_id_and_props_and_encodings_from_one_resource(resource: etree._Element) -> list[TextValueData]:
232
- res_id = resource.attrib["id"]
233
- res_type = resource.attrib["restype"]
234
- return [
235
- _get_prop_and_encoding_from_one_property(res_id, res_type, child)
236
- for child in resource.iterchildren(tag="text-prop")
237
- ]
238
-
239
-
240
- def _get_prop_and_encoding_from_one_property(res_id: str, res_type: str, prop: etree._Element) -> TextValueData:
241
- prop_name = prop.attrib["name"]
242
- encoding = cast(AllowedEncodings, prop[0].attrib["encoding"])
243
- return TextValueData(res_id, res_type, prop_name, encoding)
244
-
245
-
246
- def _check_correctness_of_one_prop(text_val: TextValueData, text_prop_look_up: PropertyTextValueTypes) -> bool:
247
- match text_val.encoding:
248
- case "xml":
249
- return text_val.property_name in text_prop_look_up.formatted_text_props
250
- case "utf8":
251
- return text_val.property_name in text_prop_look_up.unformatted_text_props
252
- case _:
253
- return False
@@ -1,236 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from dataclasses import dataclass
4
- from dataclasses import field
5
- from typing import Optional
6
- from typing import Union
7
- from typing import cast
8
-
9
- import regex
10
- from lxml import etree
11
-
12
- from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
13
- from dsp_tools.models.exceptions import XmlUploadError
14
-
15
-
16
- @dataclass(frozen=True)
17
- class XMLProperty:
18
- """
19
- Represents a property of a resource in the XML used for data import.
20
-
21
- Attributes:
22
- name: The name of the property
23
- valtype: The type of the property
24
- values: The list of values of the property
25
- """
26
-
27
- name: str
28
- valtype: str
29
- values: list[XMLValue]
30
-
31
- @staticmethod
32
- def from_node(node: etree._Element, valtype: str, default_ontology: str) -> XMLProperty:
33
- """
34
- The factory for the DSP property
35
-
36
- Args:
37
- node: the property node, p.ex. `<decimal-prop></decimal-prop>`
38
- valtype: the type of value given by the name of the property node, p.ex. decimal in `<decimal-prop>`
39
- default_ontology: the name of the ontology
40
-
41
- Raises:
42
- XmlUploadError: If an upload fails
43
-
44
- Returns:
45
- The DSP property
46
- """
47
- name = XMLProperty._get_name(node, default_ontology)
48
- if node.tag.endswith("-prop"):
49
- values = XMLProperty._get_values_from_normal_props(node, valtype)
50
- else:
51
- values = [XMLProperty._get_value_from_knora_base_prop(node)]
52
- return XMLProperty(name, valtype, values)
53
-
54
- @staticmethod
55
- def _get_name(node: etree._Element, default_ontology: str) -> str:
56
- # get the property name which is in format namespace:propertyname, p.ex. rosetta:hasName
57
- orig = node.attrib.get("name")
58
- if not orig: # tags like <isSegmentOf> don't have a name attribute
59
- return f"knora-api:{node.tag}"
60
- elif ":" not in orig:
61
- return f"knora-api:{orig}"
62
- elif orig.startswith(":"):
63
- # replace an empty namespace with the default ontology name
64
- return f"{default_ontology}{orig}"
65
- else:
66
- return orig
67
-
68
- @staticmethod
69
- def _get_values_from_normal_props(node: etree._Element, valtype: str) -> list[XMLValue]:
70
- # parse the subnodes of the property nodes which contain the actual values of the property
71
- listname = node.attrib.get("list") # save the list name if given (only for lists)
72
- values: list[XMLValue] = []
73
- for subnode in node:
74
- if subnode.tag == valtype: # the subnode must correspond to the expected value type
75
- values.append(XMLValue.from_node(subnode, valtype, listname))
76
- else:
77
- raise XmlUploadError(f"ERROR Unexpected tag: '{subnode.tag}'. Property may contain only value tags!")
78
- return values
79
-
80
- @staticmethod
81
- def _get_value_from_knora_base_prop(node: etree._Element) -> XMLValue:
82
- resrefs = set()
83
- if node.tag.endswith("hasSegmentBounds"):
84
- value: str | FormattedTextValue = f"{node.attrib["segment_start"]}:{node.attrib["segment_end"]}"
85
- elif node.tag.endswith(("hasDescription", "hasComment")):
86
- value = _extract_formatted_text_from_node(node)
87
- resrefs = value.find_internal_ids()
88
- else:
89
- str_orig = "".join(node.itertext())
90
- value = _cleanup_unformatted_text(str_orig)
91
- comment = node.attrib.get("comment")
92
- permissions = node.attrib.get("permissions")
93
- link_uuid = node.attrib.get("linkUUID")
94
- return XMLValue(value=value, resrefs=resrefs, comment=comment, permissions=permissions, link_uuid=link_uuid)
95
-
96
-
97
- @dataclass
98
- class XMLValue:
99
- """Represents a value of a resource property in the XML used for data import"""
100
-
101
- value: Union[str, FormattedTextValue]
102
- resrefs: set[str] = field(default_factory=set)
103
- comment: Optional[str] = None
104
- permissions: Optional[str] = None
105
- link_uuid: Optional[str] = None
106
-
107
- @staticmethod
108
- def from_node(
109
- node: etree._Element,
110
- val_type: str,
111
- listname: Optional[str] = None,
112
- ) -> XMLValue:
113
- """Factory method to create an XMLValue from an XML node"""
114
- value: Union[str, FormattedTextValue] = ""
115
- resrefs = set()
116
- comment = node.get("comment")
117
- permissions = node.get("permissions")
118
- if val_type == "text" and node.get("encoding") == "xml":
119
- value = _extract_formatted_text_from_node(node)
120
- resrefs = value.find_internal_ids()
121
- elif val_type == "text" and node.get("encoding") == "utf8":
122
- str_orig = "".join(node.itertext())
123
- value = _cleanup_unformatted_text(str_orig)
124
- elif val_type == "list":
125
- listname = cast(str, listname)
126
- value = f"{listname}:" + "".join(node.itertext())
127
- else:
128
- value = "".join(node.itertext())
129
- link_uuid = node.attrib.get("linkUUID") # not all richtexts have a link, so this attribute is optional
130
- return XMLValue(value=value, resrefs=resrefs, comment=comment, permissions=permissions, link_uuid=link_uuid)
131
-
132
-
133
- def _extract_formatted_text_from_node(node: etree._Element) -> FormattedTextValue:
134
- xmlstr = etree.tostring(node, encoding="unicode", method="xml")
135
- xmlstr = regex.sub(f"<{node.tag}.*?>|</{node.tag}>", "", xmlstr)
136
- xmlstr = _cleanup_formatted_text(xmlstr)
137
- return FormattedTextValue(xmlstr)
138
-
139
-
140
- def _cleanup_formatted_text(xmlstr_orig: str) -> str:
141
- """
142
- In a xml-encoded text value from the XML file,
143
- there may be non-text characters that must be removed.
144
- This function:
145
- - replaces (multiple) line breaks by a space
146
- - replaces multiple spaces or tabstops by a single space (except within `<code>` or `<pre>` tags)
147
-
148
- Args:
149
- xmlstr_orig: content of the tag from the XML file, in serialized form
150
-
151
- Returns:
152
- purged string, suitable to be sent to DSP-API
153
- """
154
- # replace (multiple) line breaks by a space
155
- xmlstr = regex.sub("\n+", " ", xmlstr_orig)
156
-
157
- # replace multiple spaces or tabstops by a single space (except within <code> or <pre> tags)
158
- # the regex selects all spaces/tabstops not followed by </xyz> without <xyz in between.
159
- # credits: https://stackoverflow.com/a/46937770/14414188
160
- xmlstr = regex.sub("( {2,}|\t+)(?!(.(?!<(code|pre)))*</(code|pre)>)", " ", xmlstr)
161
-
162
- # remove spaces after <br/> tags (except within <code> tags)
163
- xmlstr = regex.sub("((?<=<br/?>) )(?!(.(?!<code))*</code>)", "", xmlstr)
164
-
165
- # remove leading and trailing spaces
166
- xmlstr = xmlstr.strip()
167
-
168
- return xmlstr
169
-
170
-
171
- def _cleanup_unformatted_text(string_orig: str) -> str:
172
- """
173
- In a utf8-encoded text value from the XML file,
174
- there may be non-text characters that must be removed.
175
- This function:
176
- - removes the `<text>` tags
177
- - replaces multiple spaces or tabstops by a single space
178
-
179
- Args:
180
- string_orig: original string from the XML file
181
-
182
- Returns:
183
- purged string, suitable to be sent to DSP-API
184
- """
185
- # remove the <text> tags
186
- string = regex.sub("<text.*?>", "", string_orig)
187
- string = regex.sub("</text>", "", string)
188
-
189
- # replace multiple spaces or tabstops by a single space
190
- string = regex.sub(r" {2,}|\t+", " ", string)
191
-
192
- # remove leading and trailing spaces (of every line, but also of the entire string)
193
- string = "\n".join([s.strip() for s in string.split("\n")])
194
- return string.strip()
195
-
196
-
197
- @dataclass(frozen=True)
198
- class XMLBitstream:
199
- """
200
- Represents a bitstream object (file) of a resource in the XML used for data import
201
-
202
- Attributes:
203
- value: The file path of the bitstream object
204
- permissions: Reference to the set of permissions for the bitstream object
205
- """
206
-
207
- value: str
208
- permissions: Optional[str] = None
209
-
210
- @staticmethod
211
- def from_node(node: etree._Element) -> XMLBitstream:
212
- """Factory that parses a bitstream node from the XML DOM"""
213
- if not node.text:
214
- raise XmlUploadError("Empty bitstream tag")
215
- return XMLBitstream(node.text, node.get("permissions"))
216
-
217
-
218
- @dataclass(frozen=True)
219
- class IIIFUriInfo:
220
- """
221
- Represents a IIIF URI of a resource in the XML used for data import
222
-
223
- Attributes:
224
- value: The IIIF URI of the object
225
- permissions: Reference to the set of permissions for the IIIF URI
226
- """
227
-
228
- value: str
229
- permissions: str | None = None
230
-
231
- @staticmethod
232
- def from_node(node: etree._Element) -> IIIFUriInfo:
233
- """Factory that parses an IIIF URI node from the XML DOM"""
234
- if not node.text:
235
- raise XmlUploadError("Empty IIIF URI tag")
236
- return IIIFUriInfo(node.text, node.get("permissions"))