dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -0,0 +1,358 @@
1
+ from collections import defaultdict
2
+
3
+ import pandas as pd
4
+
5
+ from dsp_tools.cli.args import ValidationSeverity
6
+ from dsp_tools.clients.metadata_client import ExistingResourcesRetrieved
7
+ from dsp_tools.commands.validate_data.models.input_problems import AllProblems
8
+ from dsp_tools.commands.validate_data.models.input_problems import DuplicateFileWarning
9
+ from dsp_tools.commands.validate_data.models.input_problems import InputProblem
10
+ from dsp_tools.commands.validate_data.models.input_problems import MessageComponents
11
+ from dsp_tools.commands.validate_data.models.input_problems import ProblemType
12
+ from dsp_tools.commands.validate_data.models.input_problems import Severity
13
+ from dsp_tools.commands.validate_data.models.input_problems import SortedProblems
14
+ from dsp_tools.commands.validate_data.models.input_problems import UserPrintMessages
15
+
16
+ LIST_SEPARATOR = "\n - "
17
+ GRAND_SEPARATOR = "\n\n----------------------------\n"
18
+
19
+
20
+ PROBLEM_TYPES_IGNORE_STR_ENUM_INFO = {ProblemType.GENERIC, ProblemType.FILE_VALUE_MISSING, ProblemType.FILE_DUPLICATE}
21
+
22
+
23
+ def sort_user_problems(
24
+ all_problems: AllProblems,
25
+ duplicate_file_warnings: DuplicateFileWarning | None,
26
+ shortcode: str,
27
+ existing_resources_retrieved: ExistingResourcesRetrieved,
28
+ ) -> SortedProblems:
29
+ iris_removed, links_level_info = _separate_resource_links_to_iris_of_own_project(
30
+ all_problems.problems, shortcode, existing_resources_retrieved
31
+ )
32
+ filtered_problems = _filter_out_duplicate_problems(iris_removed)
33
+ violations, warnings, info = _separate_according_to_severity(filtered_problems)
34
+ if duplicate_file_warnings:
35
+ warnings.extend(duplicate_file_warnings.problems)
36
+ info.extend(links_level_info)
37
+ unique_unexpected = list(set(x.component_type for x in all_problems.unexpected_results or []))
38
+ return SortedProblems(
39
+ unique_violations=violations,
40
+ user_warnings=warnings,
41
+ user_info=info,
42
+ unexpected_shacl_validation_components=unique_unexpected,
43
+ )
44
+
45
+
46
+ def _separate_according_to_severity(
47
+ problems: list[InputProblem],
48
+ ) -> tuple[list[InputProblem], list[InputProblem], list[InputProblem]]:
49
+ violations = [x for x in problems if x.severity == Severity.VIOLATION]
50
+ warnings = [x for x in problems if x.severity == Severity.WARNING]
51
+ info = [x for x in problems if x.severity == Severity.INFO]
52
+ return violations, warnings, info
53
+
54
+
55
+ def _separate_resource_links_to_iris_of_own_project(
56
+ problems: list[InputProblem], shortcode: str, existing_resources_retrieved: ExistingResourcesRetrieved
57
+ ) -> tuple[list[InputProblem], list[InputProblem]]:
58
+ link_level_info = []
59
+ all_others = []
60
+ for prblm in problems:
61
+ if prblm.problem_type != ProblemType.INEXISTENT_LINKED_RESOURCE:
62
+ all_others.append(prblm)
63
+ else:
64
+ is_violation, triaged_problem = _determined_link_value_message_and_level(
65
+ prblm, shortcode, existing_resources_retrieved
66
+ )
67
+ if is_violation:
68
+ all_others.append(triaged_problem)
69
+ else:
70
+ link_level_info.append(triaged_problem)
71
+ return all_others, link_level_info
72
+
73
+
74
+ def _determined_link_value_message_and_level(
75
+ problem: InputProblem, shortcode: str, existing_resources_retrieved: ExistingResourcesRetrieved
76
+ ) -> tuple[bool, InputProblem]:
77
+ is_violation = True
78
+ resource_iri_start = "http://rdfh.ch/"
79
+ project_resource_iri = f"{resource_iri_start}{shortcode}/"
80
+ if not problem.input_value:
81
+ return is_violation, problem
82
+ if problem.input_value.startswith(project_resource_iri):
83
+ # case IRI and matches those of the projects itself
84
+ if existing_resources_retrieved == ExistingResourcesRetrieved.TRUE:
85
+ # if metadata was sucessfully retrieved, then the IRI is wrong
86
+ problem.problem_type = ProblemType.LINK_TARGET_NOT_FOUND_IN_DB
87
+ problem.message = (
88
+ "You used an absolute IRI to reference an existing resource in the DB. "
89
+ "We could not find a reference to this resource in the database."
90
+ )
91
+ return is_violation, problem
92
+ # if we could not retrieve the metadata, then we cannot verify if it exists or not, so it is only an info
93
+ problem.problem_type = ProblemType.LINK_TARGET_IS_IRI_OF_PROJECT
94
+ problem.message = (
95
+ "You used an absolute IRI to reference an existing resource in the DB. "
96
+ "If this resource does not exist or is not of the correct type, an xmlupload will fail."
97
+ )
98
+ return not is_violation, problem
99
+ if problem.input_value.startswith(resource_iri_start):
100
+ # case IRI, but does not contain the shortcode of the project
101
+ problem.message = (
102
+ "You used an absolute IRI to reference an existing resource of another project in the DB. "
103
+ "Cross-Project resource links are not permitted."
104
+ )
105
+ problem.problem_type = ProblemType.LINK_TARGET_OF_ANOTHER_PROJECT
106
+ return is_violation, problem
107
+ # all other cases, it is not an IRI and must be an internal ID that does not exist in the XML
108
+ return is_violation, problem
109
+
110
+
111
+ def _filter_out_duplicate_problems(problems: list[InputProblem]) -> list[InputProblem]:
112
+ grouped, without_res_id = _group_problems_by_resource(problems)
113
+ filtered = without_res_id
114
+ for problems_per_resource in grouped.values():
115
+ text_value_filtered = _filter_out_duplicate_text_value_problem(problems_per_resource)
116
+ file_value_corrected = _filter_out_duplicate_wrong_file_type_problems(text_value_filtered)
117
+ filtered.extend(file_value_corrected)
118
+ return filtered
119
+
120
+
121
+ def _filter_out_duplicate_text_value_problem(problems: list[InputProblem]) -> list[InputProblem]:
122
+ filtered_problems = [x for x in problems if x.problem_type != ProblemType.VALUE_TYPE_MISMATCH]
123
+ type_problems = [x for x in problems if x.problem_type == ProblemType.VALUE_TYPE_MISMATCH]
124
+
125
+ grouped_dict = defaultdict(list)
126
+ for prob in type_problems:
127
+ grouped_dict[prob.prop_name].append(prob)
128
+
129
+ for problem_list in grouped_dict.values():
130
+ messages = [x.expected for x in problem_list]
131
+ # Is there a chance of a duplicate (only possible for TextValue)?
132
+ if "This property requires a TextValue" not in messages:
133
+ filtered_problems.extend(problem_list)
134
+ continue
135
+ # Is there a more precise message about the type of TextValue?
136
+ if "TextValue without formatting" not in messages and "TextValue with formatting" not in messages:
137
+ # If there is not a more precise message, then the generic one is communicated to the user
138
+ filtered_problems.extend(problem_list)
139
+ continue
140
+ # We remove the generic message and leave the specific one
141
+ inx = messages.index("This property requires a TextValue")
142
+ problem_list.pop(inx)
143
+ filtered_problems.extend(problem_list)
144
+
145
+ return filtered_problems
146
+
147
+
148
+ def _filter_out_duplicate_wrong_file_type_problems(problems: list[InputProblem]) -> list[InputProblem]:
149
+ # If a class is for example, an AudioRepresentation, but a jpg file is used,
150
+ # the created value is of type StillImageFileValue.
151
+ # This creates a min cardinality (because the audio file is missing)
152
+ # and a closed constraint violation (because it is not permissible to add an image)
153
+ # However, we only want to give one message to the user
154
+ idx_missing = next((i for i, x in enumerate(problems) if x.problem_type == ProblemType.FILE_VALUE_MISSING), None)
155
+ idx_prohibited = next(
156
+ (i for i, x in enumerate(problems) if x.problem_type == ProblemType.FILE_VALUE_PROHIBITED), None
157
+ )
158
+ if idx_missing is None or idx_prohibited is None:
159
+ return problems
160
+ missing_problem = problems[idx_missing]
161
+ prohibited_problem = problems[idx_prohibited]
162
+ # The result of the closed constraint violation, contains the input value,
163
+ # while the message of the other shape is better, we want to include the actual input value.
164
+ missing_problem.input_value = prohibited_problem.input_value
165
+ return [problem for i, problem in enumerate(problems) if i not in {idx_missing, idx_prohibited}] + [missing_problem]
166
+
167
+
168
+ def _group_problems_by_resource(
169
+ problems: list[InputProblem],
170
+ ) -> tuple[dict[str, list[InputProblem]], list[InputProblem]]:
171
+ grouped_res = defaultdict(list)
172
+ problem_no_res_id = []
173
+ for prob in problems:
174
+ if not prob.res_id:
175
+ problem_no_res_id.append(prob)
176
+ else:
177
+ grouped_res[prob.res_id].append(prob)
178
+ return grouped_res, problem_no_res_id
179
+
180
+
181
+ def get_user_message(sorted_problems: SortedProblems, severity: ValidationSeverity) -> UserPrintMessages:
182
+ """
183
+ Creates the string to communicate the user message.
184
+
185
+ Args:
186
+ sorted_problems: validation problems
187
+ severity: Severity level of validation information
188
+
189
+ Returns:
190
+ Problem message
191
+ """
192
+ violation_message, warning_message, info_message, unexpected_violations = None, None, None, None
193
+ too_many_to_print = _are_there_too_many_to_print(sorted_problems, severity)
194
+ if sorted_problems.unique_violations:
195
+ if too_many_to_print:
196
+ violation_body = None
197
+ violation_df = _get_message_df(sorted_problems.unique_violations)
198
+ else:
199
+ violation_body = _get_problem_print_message(sorted_problems.unique_violations)
200
+ violation_df = None
201
+ violation_header = (
202
+ f"During the validation of the data {len(sorted_problems.unique_violations)} errors were found. "
203
+ f"Until they are resolved an xmlupload is not possible."
204
+ )
205
+ violation_message = MessageComponents(violation_header, violation_body, violation_df)
206
+ if sorted_problems.user_warnings:
207
+ if too_many_to_print:
208
+ warning_body = None
209
+ warning_df = _get_message_df(sorted_problems.user_warnings)
210
+ else:
211
+ warning_body = _get_problem_print_message(sorted_problems.user_warnings)
212
+ warning_df = None
213
+ warning_header = (
214
+ f"During the validation of the data {len(sorted_problems.user_warnings)} "
215
+ f"problems were found. Warnings are allowed on test servers. "
216
+ f"Please note that an xmlupload on a prod sever will fail."
217
+ )
218
+ warning_message = MessageComponents(warning_header, warning_body, warning_df)
219
+ if sorted_problems.user_info:
220
+ if too_many_to_print:
221
+ info_body = None
222
+ info_df = _get_message_df(sorted_problems.user_info)
223
+ else:
224
+ info_body = _get_problem_print_message(sorted_problems.user_info)
225
+ info_df = None
226
+ info_header = (
227
+ f"During the validation of the data {len(sorted_problems.user_info)} "
228
+ f"potential problems were found. They will not impede an xmlupload."
229
+ )
230
+ info_message = MessageComponents(info_header, info_body, info_df)
231
+ if sorted_problems.unexpected_shacl_validation_components:
232
+ unexpected_header = "The following unknown violation types were found!"
233
+ unexpected_body = LIST_SEPARATOR + LIST_SEPARATOR.join(sorted_problems.unexpected_shacl_validation_components)
234
+ unexpected_violations = MessageComponents(unexpected_header, unexpected_body, None)
235
+ return UserPrintMessages(violation_message, warning_message, info_message, unexpected_violations)
236
+
237
+
238
+ def _are_there_too_many_to_print(sorted_problems: SortedProblems, severity: ValidationSeverity) -> bool:
239
+ number_of_problems = len(sorted_problems.unique_violations)
240
+ if severity.value <= ValidationSeverity.WARNING.value:
241
+ number_of_problems += len(sorted_problems.user_warnings)
242
+ if severity.value == ValidationSeverity.INFO.value:
243
+ number_of_problems += len(sorted_problems.user_info)
244
+ return bool(number_of_problems > 60)
245
+
246
+
247
+ def _get_problem_print_message(problems: list[InputProblem]) -> str:
248
+ grouped, without_res_id = _group_problems_by_resource(problems)
249
+ messages = [_get_message_for_one_resource(without_res_id)] if without_res_id else []
250
+ messages_with_ids = [
251
+ _get_message_for_one_resource(v) for v in sorted(grouped.values(), key=lambda x: str(x[0].res_id))
252
+ ]
253
+ messages.extend(messages_with_ids)
254
+ return GRAND_SEPARATOR.join(messages)
255
+
256
+
257
+ def _get_message_for_one_resource(problems: list[InputProblem]) -> str:
258
+ if problems[0].res_id:
259
+ start_msg = f"Resource ID: {problems[0].res_id} | Resource Type: {problems[0].res_type}"
260
+ else:
261
+ start_msg = ""
262
+ prop_messages = _get_message_with_properties(problems)
263
+ return f"{start_msg}\n{prop_messages}"
264
+
265
+
266
+ def _get_message_with_properties(problems: list[InputProblem]) -> str:
267
+ messages = defaultdict(list)
268
+ for prob in problems:
269
+ messages[prob.prop_name].append(_get_message_detail_str(prob))
270
+
271
+ def format_msg(propname: str, msg: list[str]) -> str:
272
+ return f"{propname}{LIST_SEPARATOR}{LIST_SEPARATOR.join(msg)}"
273
+
274
+ return "\n".join([format_msg(k, v) for k, v in messages.items()])
275
+
276
+
277
+ def _get_message_detail_str(problem: InputProblem) -> str:
278
+ msg = []
279
+ if problem.message:
280
+ msg.append(problem.message)
281
+ if problem.problem_type not in PROBLEM_TYPES_IGNORE_STR_ENUM_INFO:
282
+ msg.append(str(problem.problem_type))
283
+ if problem.input_value:
284
+ msg.append(f"Your input: '{_shorten_input(problem.input_value, problem.problem_type)}'")
285
+ if problem.input_type:
286
+ msg.append(f"Actual input type: '{problem.input_type}'")
287
+ if problem.expected:
288
+ msg.append(f"Expected{_get_expected_prefix(problem.problem_type)}: {problem.expected}")
289
+ return " | ".join(msg)
290
+
291
+
292
+ def _get_expected_prefix(problem_type: ProblemType) -> str | None:
293
+ match problem_type:
294
+ case ProblemType.VALUE_TYPE_MISMATCH:
295
+ return " Value Type"
296
+ case ProblemType.INPUT_REGEX:
297
+ return " Input Format"
298
+ case ProblemType.LINK_TARGET_TYPE_MISMATCH:
299
+ return " Resource Type"
300
+ case _:
301
+ return ""
302
+
303
+
304
+ def _get_message_df(problems: list[InputProblem]) -> pd.DataFrame:
305
+ problem_dicts = [_get_message_dict(x) for x in problems]
306
+ df = pd.DataFrame.from_records(problem_dicts)
307
+ sort_by = [x for x in ["Resource Type", "Resource ID", "Property"] if x in df.columns]
308
+ df = df.sort_values(by=sort_by)
309
+ return df
310
+
311
+
312
+ def _get_message_dict(problem: InputProblem) -> dict[str, str]:
313
+ msg_dict = {
314
+ "Resource ID": problem.res_id,
315
+ "Resource Type": problem.res_type,
316
+ "Property": problem.prop_name,
317
+ "Your Input": _shorten_input(problem.input_value, problem.problem_type),
318
+ "Input Type": problem.input_type,
319
+ }
320
+ non_empty_dict = {k: v for k, v in msg_dict.items() if v}
321
+ expected_and_message = _get_expected_message_dict(problem)
322
+ non_empty_dict.update(expected_and_message)
323
+ if problem.problem_type not in PROBLEM_TYPES_IGNORE_STR_ENUM_INFO:
324
+ non_empty_dict["Problem"] = str(problem.problem_type)
325
+ return non_empty_dict
326
+
327
+
328
+ def _get_expected_message_dict(problem: InputProblem) -> dict[str, str]:
329
+ out_dict = {}
330
+ if problem.expected:
331
+ msg_str = problem.expected
332
+ if prefix := _get_expected_prefix(problem.problem_type):
333
+ msg_str = f"{prefix.strip()}: {msg_str}"
334
+ out_dict["Expected"] = msg_str
335
+ if problem.message:
336
+ if problem.expected:
337
+ out_dict["Message"] = problem.message
338
+ else:
339
+ out_dict["Expected"] = problem.message
340
+ return out_dict
341
+
342
+
343
+ def _shorten_input(user_input: str | None, problem_type: ProblemType) -> str | None:
344
+ if problem_type in [
345
+ ProblemType.FILE_DUPLICATE,
346
+ ProblemType.FILE_VALUE_MISSING,
347
+ ProblemType.FILE_VALUE_PROHIBITED,
348
+ ProblemType.LINK_TARGET_TYPE_MISMATCH,
349
+ ProblemType.INEXISTENT_LINKED_RESOURCE,
350
+ ]:
351
+ return user_input
352
+ if not user_input:
353
+ return None
354
+ if user_input.startswith(("http://rdfh.ch/", " / http://rdfh.ch/lists/")):
355
+ return user_input
356
+ if len(user_input) < 51:
357
+ return user_input
358
+ return f"{user_input[:50]}[...]"