dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -0,0 +1,283 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ from typing import cast
4
+
5
+ import pandas as pd
6
+ from loguru import logger
7
+
8
+ from dsp_tools.cli.args import ServerCredentials
9
+ from dsp_tools.cli.args import ValidateDataConfig
10
+ from dsp_tools.cli.args import ValidationSeverity
11
+ from dsp_tools.clients.authentication_client import AuthenticationClient
12
+ from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
13
+ from dsp_tools.clients.metadata_client import ExistingResourcesRetrieved
14
+ from dsp_tools.commands.validate_data.models.input_problems import OntologyValidationProblem
15
+ from dsp_tools.commands.validate_data.models.input_problems import SortedProblems
16
+ from dsp_tools.commands.validate_data.models.input_problems import UnknownClassesInData
17
+ from dsp_tools.commands.validate_data.models.input_problems import ValidateDataResult
18
+ from dsp_tools.commands.validate_data.models.validation import RDFGraphs
19
+ from dsp_tools.commands.validate_data.models.validation import ValidationReportGraphs
20
+ from dsp_tools.commands.validate_data.prepare_data.prepare_data import get_info_and_parsed_resources_from_file
21
+ from dsp_tools.commands.validate_data.prepare_data.prepare_data import prepare_data_for_validation_from_parsed_resource
22
+ from dsp_tools.commands.validate_data.process_validation_report.get_user_validation_message import get_user_message
23
+ from dsp_tools.commands.validate_data.process_validation_report.get_user_validation_message import sort_user_problems
24
+ from dsp_tools.commands.validate_data.process_validation_report.query_validation_result import reformat_validation_graph
25
+ from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
26
+ from dsp_tools.commands.validate_data.validation.check_duplicate_files import check_for_duplicate_files
27
+ from dsp_tools.commands.validate_data.validation.check_for_unknown_classes import check_for_unknown_resource_classes
28
+ from dsp_tools.commands.validate_data.validation.check_for_unknown_classes import get_msg_str_unknown_classes_in_data
29
+ from dsp_tools.commands.validate_data.validation.get_validation_report import get_validation_report
30
+ from dsp_tools.commands.validate_data.validation.validate_ontology import get_msg_str_ontology_validation_violation
31
+ from dsp_tools.commands.validate_data.validation.validate_ontology import validate_ontology
32
+ from dsp_tools.error.exceptions import BaseError
33
+ from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_CYAN
34
+ from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_GREEN
35
+ from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_RED
36
+ from dsp_tools.utils.ansi_colors import BACKGROUND_BOLD_YELLOW
37
+ from dsp_tools.utils.ansi_colors import BOLD_CYAN
38
+ from dsp_tools.utils.ansi_colors import BOLD_RED
39
+ from dsp_tools.utils.ansi_colors import BOLD_YELLOW
40
+ from dsp_tools.utils.ansi_colors import RESET_TO_DEFAULT
41
+ from dsp_tools.utils.data_formats.uri_util import is_prod_like_server
42
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
43
+
44
+ VALIDATION_ERRORS_FOUND_MSG = BACKGROUND_BOLD_RED + "\n Validation errors found! " + RESET_TO_DEFAULT
45
+ NO_VALIDATION_ERRORS_FOUND_MSG = BACKGROUND_BOLD_GREEN + "\n No validation errors found! " + RESET_TO_DEFAULT
46
+
47
+
48
+ def validate_data(
49
+ filepath: Path,
50
+ creds: ServerCredentials,
51
+ ignore_duplicate_files_warning: bool,
52
+ save_graphs: bool,
53
+ skip_ontology_validation: bool,
54
+ id2iri_file: str | None,
55
+ do_not_request_resource_metadata_from_db: bool,
56
+ ) -> bool:
57
+ """
58
+ Takes a file and project information and validates it against the ontologies on the server.
59
+
60
+ Args:
61
+ filepath: path to the xml data file
62
+ creds: server credentials for authentication
63
+ ignore_duplicate_files_warning: ignore the shape that checks for duplicate files
64
+ save_graphs: if this flag is set, all the graphs will be saved in a folder
65
+ skip_ontology_validation: skip the ontology validation
66
+ id2iri_file: to replace internal IDs of an XML file by IRIs provided in this mapping file
67
+ do_not_request_resource_metadata_from_db: true if no metadata for existing resources should be requested
68
+
69
+ Returns:
70
+ True if no errors that impede an xmlupload were found.
71
+ Warnings and user info do not impede an xmlupload.
72
+ """
73
+ graph_save_dir = None
74
+
75
+ if save_graphs:
76
+ graph_save_dir = _get_graph_save_dir(filepath)
77
+ config = ValidateDataConfig(
78
+ xml_file=filepath,
79
+ save_graph_dir=graph_save_dir,
80
+ severity=ValidationSeverity.INFO,
81
+ ignore_duplicate_files_warning=ignore_duplicate_files_warning,
82
+ is_on_prod_server=is_prod_like_server(creds.server),
83
+ skip_ontology_validation=skip_ontology_validation,
84
+ do_not_request_resource_metadata_from_db=do_not_request_resource_metadata_from_db,
85
+ )
86
+ auth = AuthenticationClientLive(server=creds.server, email=creds.user, password=creds.password)
87
+
88
+ parsed_resources, shortcode, authorship_lookup, permission_ids = get_info_and_parsed_resources_from_file(
89
+ file=filepath,
90
+ api_url=auth.server,
91
+ id2iri_file=id2iri_file,
92
+ )
93
+ return validate_parsed_resources(
94
+ parsed_resources=parsed_resources,
95
+ authorship_lookup=authorship_lookup,
96
+ permission_ids=permission_ids,
97
+ shortcode=shortcode,
98
+ config=config,
99
+ auth=auth,
100
+ )
101
+
102
+
103
+ def validate_parsed_resources(
104
+ parsed_resources: list[ParsedResource],
105
+ authorship_lookup: dict[str, list[str]],
106
+ permission_ids: list[str],
107
+ shortcode: str,
108
+ config: ValidateDataConfig,
109
+ auth: AuthenticationClient,
110
+ ) -> bool:
111
+ msg = "Starting SHACL schema validation."
112
+ print(msg)
113
+ logger.debug(msg)
114
+ rdf_graphs, used_iris, existing_resources_retrieved = prepare_data_for_validation_from_parsed_resource(
115
+ parsed_resources=parsed_resources,
116
+ authorship_lookup=authorship_lookup,
117
+ permission_ids=permission_ids,
118
+ auth=auth,
119
+ shortcode=shortcode,
120
+ do_not_request_resource_metadata_from_db=config.do_not_request_resource_metadata_from_db,
121
+ )
122
+ validation_result = _validate_data(
123
+ rdf_graphs, used_iris, parsed_resources, config, shortcode, existing_resources_retrieved
124
+ )
125
+ if validation_result.no_problems:
126
+ logger.debug("No validation errors found.")
127
+ print(NO_VALIDATION_ERRORS_FOUND_MSG)
128
+ return True
129
+ if isinstance(validation_result.problems, UnknownClassesInData):
130
+ msg = get_msg_str_unknown_classes_in_data(validation_result.problems)
131
+ logger.error(msg)
132
+ print(VALIDATION_ERRORS_FOUND_MSG)
133
+ print(msg + "\n")
134
+ # if unknown classes are found, we cannot validate all the data in the file
135
+ return False
136
+ if isinstance(validation_result.problems, OntologyValidationProblem):
137
+ msg = get_msg_str_ontology_validation_violation(validation_result.problems)
138
+ logger.error(msg)
139
+ print(VALIDATION_ERRORS_FOUND_MSG)
140
+ print(msg + "\n")
141
+ # if the ontology itself has errors, we will not validate the data
142
+ return False
143
+ if isinstance(validation_result.problems, SortedProblems):
144
+ _print_shacl_validation_violation_message(validation_result.problems, validation_result.report_graphs, config)
145
+ return _get_validation_status(validation_result.problems, config.is_on_prod_server)
146
+ else:
147
+ raise BaseError(f"Unknown validate data problems: {validation_result.problems!s}")
148
+
149
+
150
+ def _validate_data(
151
+ graphs: RDFGraphs,
152
+ used_iris: set[str],
153
+ parsed_resources: list[ParsedResource],
154
+ config: ValidateDataConfig,
155
+ shortcode: str,
156
+ existing_resources_retrieved: ExistingResourcesRetrieved,
157
+ ) -> ValidateDataResult:
158
+ logger.debug(f"Validate-data called with the following config: {vars(config)}")
159
+ # Check if unknown classes are used
160
+ if unknown_classes := check_for_unknown_resource_classes(graphs, used_iris):
161
+ return ValidateDataResult(False, unknown_classes, None)
162
+ shacl_validator = ShaclCliValidator()
163
+ if not config.skip_ontology_validation:
164
+ # Validation of the ontology
165
+ onto_validation_result = validate_ontology(graphs.ontos, shacl_validator, config)
166
+ if onto_validation_result:
167
+ return ValidateDataResult(False, onto_validation_result, None)
168
+ # Validation of the data
169
+ duplicate_file_warnings = None
170
+ if not config.ignore_duplicate_files_warning:
171
+ duplicate_file_warnings = check_for_duplicate_files(parsed_resources)
172
+ report = get_validation_report(graphs, shacl_validator, config.save_graph_dir)
173
+ if report.conforms:
174
+ if not duplicate_file_warnings:
175
+ return ValidateDataResult(True, None, None)
176
+ else:
177
+ sorted_problems = SortedProblems(
178
+ unique_violations=[],
179
+ user_warnings=duplicate_file_warnings.problems,
180
+ user_info=[],
181
+ unexpected_shacl_validation_components=[],
182
+ )
183
+ return ValidateDataResult(False, sorted_problems, report)
184
+ reformatted = reformat_validation_graph(report)
185
+ sorted_problems = sort_user_problems(reformatted, duplicate_file_warnings, shortcode, existing_resources_retrieved)
186
+ return ValidateDataResult(False, sorted_problems, report)
187
+
188
+
189
+ def _get_graph_save_dir(filepath: Path) -> Path:
190
+ parent_directory = filepath.parent
191
+ new_directory = parent_directory / "graphs"
192
+ new_directory.mkdir(exist_ok=True)
193
+ save_file_template = new_directory / filepath.stem
194
+ print(BOLD_CYAN + f"\n Saving graphs to {save_file_template} " + RESET_TO_DEFAULT)
195
+ return save_file_template
196
+
197
+
198
+ def _get_validation_status(all_problems: SortedProblems, is_on_prod: bool) -> bool:
199
+ violations = any(
200
+ [
201
+ bool(all_problems.unique_violations),
202
+ bool(all_problems.unexpected_shacl_validation_components),
203
+ ]
204
+ )
205
+ if violations:
206
+ return False
207
+ if is_on_prod and all_problems.user_warnings:
208
+ return False
209
+ return True
210
+
211
+
212
+ def _print_shacl_validation_violation_message(
213
+ sorted_problems: SortedProblems, report: ValidationReportGraphs | None, config: ValidateDataConfig
214
+ ) -> None:
215
+ messages = get_user_message(sorted_problems, config.severity)
216
+ if messages.violations:
217
+ print(VALIDATION_ERRORS_FOUND_MSG)
218
+ print(BOLD_RED, messages.violations.message_header, RESET_TO_DEFAULT)
219
+ v_body = messages.violations.message_body
220
+ if messages.violations.message_df is not None:
221
+ v_body = _save_message_df_get_message_body(messages.violations.message_df, "error", config.xml_file)
222
+ print(v_body)
223
+ logger.error(messages.violations.message_header, v_body)
224
+ else:
225
+ logger.debug("No validation errors found.")
226
+ print(NO_VALIDATION_ERRORS_FOUND_MSG)
227
+ if messages.warnings and config.severity.value <= 2:
228
+ print(BACKGROUND_BOLD_YELLOW + "\n Warning! " + RESET_TO_DEFAULT)
229
+ print(BOLD_YELLOW, messages.warnings.message_header, RESET_TO_DEFAULT)
230
+ w_body = messages.warnings.message_body
231
+ if messages.warnings.message_df is not None:
232
+ w_body = _save_message_df_get_message_body(messages.warnings.message_df, "warning", config.xml_file)
233
+ print(w_body)
234
+ logger.warning(messages.warnings.message_header, w_body)
235
+ if messages.infos and config.severity.value == 1:
236
+ print(BACKGROUND_BOLD_CYAN + "\n Potential Problems Found " + RESET_TO_DEFAULT)
237
+ print(BOLD_CYAN, messages.infos.message_header, RESET_TO_DEFAULT)
238
+ i_body = messages.infos.message_body
239
+ if messages.infos.message_df is not None:
240
+ i_body = _save_message_df_get_message_body(messages.infos.message_df, "info", config.xml_file)
241
+ print(i_body)
242
+ logger.info(messages.infos.message_header, i_body)
243
+ if messages.unexpected_violations:
244
+ logger.error(messages.unexpected_violations.message_header, messages.unexpected_violations.message_body)
245
+ print(
246
+ BACKGROUND_BOLD_RED,
247
+ "\n Unknown violations found! ",
248
+ RESET_TO_DEFAULT,
249
+ )
250
+ if config.save_graph_dir:
251
+ print(
252
+ BOLD_RED,
253
+ messages.unexpected_violations.message_header,
254
+ "Consult the saved graphs for details.",
255
+ RESET_TO_DEFAULT,
256
+ )
257
+ print(messages.unexpected_violations.message_body)
258
+ else:
259
+ report_graph = cast(ValidationReportGraphs, report)
260
+ _save_unexpected_results_and_inform_user(report_graph, config.xml_file)
261
+ print("\n")
262
+
263
+
264
+ def _save_message_df_get_message_body(df: pd.DataFrame, severity: str, file_path: Path) -> str:
265
+ out_path = file_path.parent / f"{file_path.stem}_validation_{severity}.csv"
266
+ msg = f"Due to the large number of violations the information was saved at '{out_path}'"
267
+ df.to_csv(out_path, index=False)
268
+ return msg
269
+
270
+
271
+ def _save_unexpected_results_and_inform_user(report: ValidationReportGraphs, filepath: Path) -> None:
272
+ timestamp = f"{datetime.now()!s}_"
273
+ save_path = filepath.parent / f"{timestamp}_validation_result.ttl"
274
+ report.validation_graph.serialize(save_path)
275
+ shacl_p = filepath.parent / f"{timestamp}_shacl.ttl"
276
+ report.shacl_graph.serialize(shacl_p)
277
+ data_p = filepath.parent / f"{timestamp}_data.ttl"
278
+ report.data_graph.serialize(data_p)
279
+ msg = (
280
+ f"\nPlease contact the development team with the files starting with the timestamp '{timestamp}' "
281
+ f"in the directory '{filepath.parent}'."
282
+ )
283
+ print(BOLD_RED + msg + RESET_TO_DEFAULT)
@@ -0,0 +1,55 @@
1
+ from collections import defaultdict
2
+
3
+ from loguru import logger
4
+
5
+ from dsp_tools.commands.validate_data.models.input_problems import DuplicateFileWarning
6
+ from dsp_tools.commands.validate_data.models.input_problems import InputProblem
7
+ from dsp_tools.commands.validate_data.models.input_problems import ProblemType
8
+ from dsp_tools.commands.validate_data.models.input_problems import Severity
9
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
10
+
11
+
12
+ def check_for_duplicate_files(parsed_resources: list[ParsedResource]) -> DuplicateFileWarning | None:
13
+ """
14
+ Too many duplicate filepaths in the data may cause the SHACL validator to crash.
15
+ If one file is referenced n times, this produces n * (n-1) validation errors.
16
+ Therefore, this programmatic pre-validation prevents crashes during later validation steps.
17
+
18
+ Args:
19
+ parsed_resources: Resources to check
20
+
21
+ Returns:
22
+ Results for the user and decisions how the program should continue
23
+ """
24
+ count_dict = _get_filepaths_with_more_than_one_usage(parsed_resources)
25
+ if not count_dict:
26
+ return None
27
+ input_problems = _create_input_problems(count_dict)
28
+ return DuplicateFileWarning(input_problems)
29
+
30
+
31
+ def _get_filepaths_with_more_than_one_usage(parsed_resources: list[ParsedResource]) -> dict[str, int]:
32
+ count_dict: dict[str, int] = defaultdict(int)
33
+ for res in parsed_resources:
34
+ if res.file_value and res.file_value.value:
35
+ count_dict[res.file_value.value] += 1
36
+ return {f_path: count for f_path, count in count_dict.items() if count > 1}
37
+
38
+
39
+ def _create_input_problems(duplicates: dict[str, int]) -> list[InputProblem]:
40
+ all_duplicates = []
41
+ for dup_entry, usage_count in duplicates.items():
42
+ msg = f"value used {usage_count} times"
43
+ logger.warning(f"File '{dup_entry}' {msg}")
44
+ all_duplicates.append(
45
+ InputProblem(
46
+ problem_type=ProblemType.FILE_DUPLICATE,
47
+ res_id=None,
48
+ res_type=None,
49
+ prop_name="bitstream / iiif-uri",
50
+ severity=Severity.WARNING,
51
+ message=msg,
52
+ input_value=dup_entry,
53
+ )
54
+ )
55
+ return all_duplicates
@@ -0,0 +1,67 @@
1
+ from rdflib import Literal
2
+ from rdflib import URIRef
3
+
4
+ from dsp_tools.commands.validate_data.models.input_problems import UnknownClassesInData
5
+ from dsp_tools.commands.validate_data.models.validation import RDFGraphs
6
+ from dsp_tools.commands.validate_data.utils import reformat_onto_iri
7
+ from dsp_tools.utils.rdf_constants import KNORA_API_PREFIX
8
+
9
+
10
+ def check_for_unknown_resource_classes(
11
+ rdf_graphs: RDFGraphs, used_resource_iris: set[str]
12
+ ) -> UnknownClassesInData | None:
13
+ """
14
+ Checks if any classes are referenced in the data that are not in the ontology.
15
+
16
+ Args:
17
+ rdf_graphs: Data graphs
18
+ used_resource_iris: resource IRIs in use
19
+
20
+ Returns:
21
+ Unknown classes if any
22
+ """
23
+ res_cls = _get_all_onto_classes(rdf_graphs)
24
+ if extra_cls := used_resource_iris - res_cls:
25
+ unknown_classes = {reformat_onto_iri(x) for x in extra_cls}
26
+ defined_classes = {reformat_onto_iri(x) for x in res_cls}
27
+ return UnknownClassesInData(unknown_classes=unknown_classes, defined_classes=defined_classes)
28
+ return None
29
+
30
+
31
+ def _get_all_onto_classes(rdf_graphs: RDFGraphs) -> set[str]:
32
+ ontos = rdf_graphs.ontos + rdf_graphs.knora_api
33
+ is_resource_iri = URIRef(KNORA_API_PREFIX + "isResourceClass")
34
+ resource_classes = set(ontos.subjects(is_resource_iri, Literal(True)))
35
+ is_usable = URIRef(KNORA_API_PREFIX + "canBeInstantiated")
36
+ usable_resource_classes = set(ontos.subjects(is_usable, Literal(True)))
37
+ user_facing = usable_resource_classes.intersection(resource_classes)
38
+ return {str(x) for x in user_facing}
39
+
40
+
41
+ def get_msg_str_unknown_classes_in_data(unknown: UnknownClassesInData) -> str:
42
+ if unknown_onto_msg := _get_unknown_ontos_msg(unknown):
43
+ return unknown_onto_msg
44
+ unknown_classes = sorted(list(unknown.unknown_classes))
45
+ known_classes = sorted(list(unknown.defined_classes))
46
+ return (
47
+ f"Your data uses resource classes that do not exist in the ontologies in the database.\n"
48
+ f"The following classes that are used in the data are unknown: {', '.join(unknown_classes)}\n"
49
+ f"The following classes exist in the uploaded ontologies: {', '.join(known_classes)}"
50
+ )
51
+
52
+
53
+ def _get_unknown_ontos_msg(unknown: UnknownClassesInData) -> str | None:
54
+ def split_prefix(relative_iri: str) -> str | None:
55
+ if ":" not in relative_iri:
56
+ return None
57
+ return relative_iri.split(":")[0]
58
+
59
+ used_ontos = set(not_knora for x in unknown.unknown_classes if (not_knora := split_prefix(x)))
60
+ exising_ontos = set(not_knora for x in unknown.defined_classes if (not_knora := split_prefix(x)))
61
+ if unknown_found := used_ontos - exising_ontos:
62
+ return (
63
+ f"Your data uses ontologies that don't exist in the database.\n"
64
+ f"The following ontologies that are used in the data are unknown: {', '.join(unknown_found)}\n"
65
+ f"The following ontologies are uploaded: {', '.join(exising_ontos)}"
66
+ )
67
+ return None
@@ -0,0 +1,94 @@
1
+ from pathlib import Path
2
+
3
+ from loguru import logger
4
+ from rdflib import Graph
5
+
6
+ from dsp_tools.commands.validate_data.constants import CARDINALITY_DATA_TTL
7
+ from dsp_tools.commands.validate_data.constants import CARDINALITY_REPORT_TTL
8
+ from dsp_tools.commands.validate_data.constants import CARDINALITY_SHACL_TTL
9
+ from dsp_tools.commands.validate_data.constants import CONTENT_DATA_TTL
10
+ from dsp_tools.commands.validate_data.constants import CONTENT_REPORT_TTL
11
+ from dsp_tools.commands.validate_data.constants import CONTENT_SHACL_TTL
12
+ from dsp_tools.commands.validate_data.models.validation import RDFGraphs
13
+ from dsp_tools.commands.validate_data.models.validation import ValidationFilePaths
14
+ from dsp_tools.commands.validate_data.models.validation import ValidationReportGraphs
15
+ from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
16
+ from dsp_tools.commands.validate_data.utils import clean_up_temp_directory
17
+ from dsp_tools.commands.validate_data.utils import get_temp_directory
18
+ from dsp_tools.error.exceptions import ShaclValidationError
19
+
20
+
21
+ def get_validation_report(
22
+ rdf_graphs: RDFGraphs, shacl_validator: ShaclCliValidator, graph_save_dir: Path | None = None
23
+ ) -> ValidationReportGraphs:
24
+ tmp_dir = get_temp_directory()
25
+ tmp_path = Path(tmp_dir.name)
26
+ dir_to_save_graphs = graph_save_dir
27
+ try:
28
+ result = _call_shacl_cli(rdf_graphs, shacl_validator, tmp_path)
29
+ return result
30
+ except Exception as e: # noqa: BLE001
31
+ logger.exception(e)
32
+ dir_to_save_graphs = tmp_path.parent / "validation-graphs"
33
+ msg = (
34
+ f"An error occurred during the data validation. "
35
+ f"Please contact the dsp-tools development team (at support@dasch.swiss) "
36
+ f"with your log files and the files in the directory: {dir_to_save_graphs}"
37
+ )
38
+ raise ShaclValidationError(msg) from None
39
+ finally:
40
+ clean_up_temp_directory(tmp_dir, dir_to_save_graphs)
41
+
42
+
43
+ def _call_shacl_cli(
44
+ rdf_graphs: RDFGraphs, shacl_validator: ShaclCliValidator, tmp_path: Path
45
+ ) -> ValidationReportGraphs:
46
+ _create_and_write_graphs(rdf_graphs, tmp_path)
47
+ results_graph = Graph()
48
+ conforms = True
49
+ card_files = ValidationFilePaths(
50
+ directory=tmp_path,
51
+ data_file=CARDINALITY_DATA_TTL,
52
+ shacl_file=CARDINALITY_SHACL_TTL,
53
+ report_file=CARDINALITY_REPORT_TTL,
54
+ )
55
+ card_result = shacl_validator.validate(card_files)
56
+ if not card_result.conforms:
57
+ results_graph += card_result.validation_graph
58
+ conforms = False
59
+ content_files = ValidationFilePaths(
60
+ directory=tmp_path,
61
+ data_file=CONTENT_DATA_TTL,
62
+ shacl_file=CONTENT_SHACL_TTL,
63
+ report_file=CONTENT_REPORT_TTL,
64
+ )
65
+ content_result = shacl_validator.validate(content_files)
66
+ if not content_result.conforms:
67
+ results_graph += content_result.validation_graph
68
+ conforms = False
69
+ return ValidationReportGraphs(
70
+ conforms=conforms,
71
+ validation_graph=results_graph,
72
+ shacl_graph=rdf_graphs.cardinality_shapes + rdf_graphs.content_shapes,
73
+ onto_graph=rdf_graphs.ontos + rdf_graphs.knora_api,
74
+ data_graph=rdf_graphs.data,
75
+ )
76
+
77
+
78
+ def _create_and_write_graphs(rdf_graphs: RDFGraphs, tmp_path: Path) -> None:
79
+ logger.debug("Serialise RDF graphs into turtle strings")
80
+ data_str = rdf_graphs.data.serialize(format="ttl")
81
+ ontos_str = rdf_graphs.ontos.serialize(format="ttl")
82
+ card_shape_str = rdf_graphs.cardinality_shapes.serialize(format="ttl")
83
+ content_shape_str = rdf_graphs.content_shapes.serialize(format="ttl")
84
+ knora_api_str = rdf_graphs.knora_api.serialize(format="ttl")
85
+ res_in_db_str = rdf_graphs.resources_in_db_graph.serialize(format="ttl")
86
+ turtle_paths_and_graphs = [
87
+ (tmp_path / CARDINALITY_DATA_TTL, data_str),
88
+ (tmp_path / CARDINALITY_SHACL_TTL, card_shape_str + ontos_str + knora_api_str),
89
+ (tmp_path / CONTENT_DATA_TTL, data_str + ontos_str + knora_api_str + res_in_db_str),
90
+ (tmp_path / CONTENT_SHACL_TTL, content_shape_str + ontos_str + knora_api_str),
91
+ ]
92
+ for f_path, content in turtle_paths_and_graphs:
93
+ with open(f_path, "w") as writer:
94
+ writer.write(content)
@@ -0,0 +1,107 @@
1
+ import shutil
2
+ from importlib.resources import as_file
3
+ from importlib.resources import files
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+ from rdflib import RDF
8
+ from rdflib import SH
9
+ from rdflib import Graph
10
+
11
+ from dsp_tools.cli.args import ValidateDataConfig
12
+ from dsp_tools.commands.validate_data.constants import ONTOLOGIES_DATA_TTL
13
+ from dsp_tools.commands.validate_data.constants import ONTOLOGIES_REPORT_TTL
14
+ from dsp_tools.commands.validate_data.constants import ONTOLOGIES_SHACL_TTL
15
+ from dsp_tools.commands.validate_data.models.input_problems import OntologyResourceProblem
16
+ from dsp_tools.commands.validate_data.models.input_problems import OntologyValidationProblem
17
+ from dsp_tools.commands.validate_data.models.validation import ValidationFilePaths
18
+ from dsp_tools.commands.validate_data.shacl_cli_validator import ShaclCliValidator
19
+ from dsp_tools.commands.validate_data.utils import clean_up_temp_directory
20
+ from dsp_tools.commands.validate_data.utils import get_temp_directory
21
+ from dsp_tools.commands.validate_data.utils import reformat_onto_iri
22
+ from dsp_tools.error.exceptions import ShaclValidationError
23
+ from dsp_tools.utils.rdf_constants import SubjectObjectTypeAlias
24
+
25
+ LIST_SEPARATOR = "\n - "
26
+
27
+
28
+ def validate_ontology(
29
+ onto_graph: Graph, shacl_validator: ShaclCliValidator, config: ValidateDataConfig
30
+ ) -> OntologyValidationProblem | None:
31
+ """
32
+ The API accepts erroneous cardinalities in the ontology.
33
+ To distinguish a mistake in the data from the erroneous ontology, the ontology will be validated beforehand.
34
+ This way, we do not have to take an erroneous ontology into account when validating the data.
35
+
36
+ Args:
37
+ onto_graph: the graph of the project ontologies
38
+ shacl_validator: SHACL CLI validator
39
+ config: The configuration where to save the information to
40
+
41
+ Returns:
42
+ A validation report if errors were found
43
+ """
44
+ tmp_dir = get_temp_directory()
45
+ tmp_path = Path(tmp_dir.name)
46
+ save_graph_dir = config.save_graph_dir
47
+ try:
48
+ result = _get_ontology_validation_result(onto_graph, shacl_validator, tmp_path)
49
+ return result
50
+ except Exception as e: # noqa: BLE001
51
+ logger.exception(e)
52
+ save_graph_dir = tmp_path.parent / "validation-graphs"
53
+ msg = (
54
+ f"An error occurred during the ontology validation. "
55
+ f"Please contact the dsp-tools development team (at support@dasch.swiss) "
56
+ f"with your log files and the files in the directory: {save_graph_dir}"
57
+ )
58
+ raise ShaclValidationError(msg) from None
59
+ finally:
60
+ clean_up_temp_directory(tmp_dir, save_graph_dir)
61
+
62
+
63
+ def _get_ontology_validation_result(
64
+ onto_graph: Graph, shacl_validator: ShaclCliValidator, tmp_path: Path
65
+ ) -> OntologyValidationProblem | None:
66
+ with as_file(files("dsp_tools").joinpath("resources/validate_data/validate-ontology.ttl")) as shacl_file_path:
67
+ shacl_file = Path(shacl_file_path)
68
+ shutil.copy(shacl_file, tmp_path / ONTOLOGIES_SHACL_TTL)
69
+ onto_graph.serialize(tmp_path / ONTOLOGIES_DATA_TTL)
70
+ paths = ValidationFilePaths(
71
+ directory=tmp_path,
72
+ data_file=ONTOLOGIES_DATA_TTL,
73
+ shacl_file=ONTOLOGIES_SHACL_TTL,
74
+ report_file=ONTOLOGIES_REPORT_TTL,
75
+ )
76
+ validation_result = shacl_validator.validate(paths)
77
+ if validation_result.conforms:
78
+ return None
79
+ return OntologyValidationProblem(_reformat_ontology_validation_result(validation_result.validation_graph))
80
+
81
+
82
+ def _reformat_ontology_validation_result(validation_result: Graph) -> list[OntologyResourceProblem]:
83
+ bns = validation_result.subjects(RDF.type, SH.ValidationResult)
84
+ return [_get_one_problem(validation_result, bn) for bn in bns]
85
+
86
+
87
+ def _get_one_problem(val_g: Graph, result_bn: SubjectObjectTypeAlias) -> OntologyResourceProblem:
88
+ iri = next(val_g.objects(result_bn, SH.focusNode))
89
+ iri_str = reformat_onto_iri(iri)
90
+ msg = str(next(val_g.objects(result_bn, SH.resultMessage)))
91
+ splt = [x.strip() for x in msg.split("\n")]
92
+ return OntologyResourceProblem(iri_str, " ".join(splt))
93
+
94
+
95
+ def get_msg_str_ontology_validation_violation(onto_violations: OntologyValidationProblem) -> str:
96
+ probs = sorted(onto_violations.problems, key=lambda x: x.res_iri)
97
+
98
+ def get_resource_msg(res: OntologyResourceProblem) -> str:
99
+ return f"Resource Class: {res.res_iri} | Problem: {res.msg}"
100
+
101
+ problems = [get_resource_msg(x) for x in probs]
102
+ return (
103
+ "The ontology structure contains errors that prevent the validation of the data.\n"
104
+ "Please correct the following errors and re-upload the corrected ontology.\n"
105
+ f"Once those two steps are done, the command `validate-data` will find any problems in the data.\n"
106
+ f"{LIST_SEPARATOR}{LIST_SEPARATOR.join(problems)}"
107
+ )