dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -1,9 +1,11 @@
1
1
  import urllib.parse
2
+ from collections.abc import Iterator
2
3
  from dataclasses import dataclass
3
4
  from dataclasses import field
5
+ from http import HTTPStatus
4
6
  from pathlib import Path
5
- from typing import Iterator
6
7
 
8
+ import regex
7
9
  from loguru import logger
8
10
  from requests import JSONDecodeError
9
11
  from requests import RequestException
@@ -11,18 +13,14 @@ from requests import Session
11
13
  from requests.adapters import HTTPAdapter
12
14
  from requests.adapters import Retry
13
15
 
16
+ from dsp_tools.clients.authentication_client import AuthenticationClient
14
17
  from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailure
15
- from dsp_tools.models.exceptions import BadCredentialsError
16
- from dsp_tools.models.exceptions import UserError
17
- from dsp_tools.utils.logger_config import LOGGER_SAVEPATH
18
-
19
- STATUS_OK = 200
20
- STATUS_UNAUTHORIZED = 401
21
- STATUS_FORBIDDEN = 403
22
- STATUS_NOT_FOUND = 404
23
- STATUS_CONFLICT = 409
24
- STATUS_INTERNAL_SERVER_ERROR = 500
25
- STATUS_SERVER_UNAVAILABLE = 503
18
+ from dsp_tools.config.logger_config import LOGGER_SAVEPATH
19
+ from dsp_tools.error.exceptions import BadCredentialsError
20
+ from dsp_tools.error.exceptions import InputError
21
+ from dsp_tools.utils.request_utils import RequestParameters
22
+ from dsp_tools.utils.request_utils import log_request
23
+ from dsp_tools.utils.request_utils import log_response
26
24
 
27
25
 
28
26
  @dataclass
@@ -30,7 +28,7 @@ class BulkIngestClient:
30
28
  """Client to upload multiple files to the ingest server and monitor the ingest process."""
31
29
 
32
30
  dsp_ingest_url: str
33
- token: str
31
+ authentication_client: AuthenticationClient
34
32
  shortcode: str
35
33
  imgdir: Path = field(default=Path.cwd())
36
34
  session: Session = field(init=False)
@@ -45,68 +43,93 @@ class BulkIngestClient:
45
43
  connect=retries,
46
44
  backoff_factor=0.3,
47
45
  allowed_methods=None, # means all methods
48
- status_forcelist=[STATUS_INTERNAL_SERVER_ERROR, STATUS_SERVER_UNAVAILABLE],
46
+ status_forcelist=[HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE],
49
47
  )
50
48
  adapter = HTTPAdapter(max_retries=retry)
51
49
  self.session.mount("http://", adapter)
52
50
  self.session.mount("https://", adapter)
53
- self.session.headers["Authorization"] = f"Bearer {self.token}"
54
51
 
55
52
  def upload_file(
56
53
  self,
57
54
  filepath: Path,
58
55
  ) -> UploadFailure | None:
59
- """Uploads a file to the ingest server."""
60
- url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{urllib.parse.quote(str(filepath))}"
61
- headers = {"Content-Type": "application/octet-stream"}
62
- timeout = 600
56
+ """
57
+ Uploads a file to the ingest server.
58
+ The load balancer on DSP servers currently has a timeout of 10m,
59
+ so we need to use a slightly shorter timeout of 9m.
60
+ See https://linear.app/dasch/issue/INFRA-847/increase-traefik-readtimeout
61
+ # noqa: DAR101
62
+ # noqa: DAR201
63
+ """
64
+ logger.debug(f"Uploading file '{filepath}'")
65
+ timeout = 9 * 60
66
+ url = self._build_url_for_bulk_ingest_ingest_route(filepath)
67
+ headers = {
68
+ "Content-Type": "application/octet-stream",
69
+ "Authorization": f"Bearer {self.authentication_client.get_token()}",
70
+ }
63
71
  err_msg = f"Failed to upload '{filepath}' to '{url}'."
72
+ params = RequestParameters("POST", url, timeout, headers=headers)
73
+ log_request(params)
64
74
  try:
65
75
  with open(self.imgdir / filepath, "rb") as binary_io:
66
- content = binary_io.read()
67
- except OSError as e:
68
- logger.error(err_msg)
69
- return UploadFailure(filepath, f"File could not be opened/read: {e.strerror}")
70
- try:
71
- logger.debug(f"REQUEST: POST to {url}, timeout: {timeout}, headers: {headers}")
72
- res = self.session.post(
73
- url=url,
74
- headers=headers,
75
- data=content,
76
- timeout=timeout,
77
- )
78
- logger.debug(f"RESPONSE: {res.status_code}")
76
+ res = self.session.post(
77
+ url=params.url,
78
+ headers=params.headers,
79
+ data=binary_io, # https://requests.readthedocs.io/en/latest/user/advanced/#streaming-uploads
80
+ timeout=params.timeout,
81
+ )
82
+ log_response(res)
79
83
  except RequestException as e:
80
- logger.error(err_msg)
84
+ logger.exception(err_msg)
81
85
  return UploadFailure(filepath, f"Exception of requests library: {e}")
82
- if res.status_code != STATUS_OK:
86
+ except OSError as e:
87
+ err_msg = f"Cannot bulk-ingest {filepath}, because the file could not be opened/read: {e.strerror}"
83
88
  logger.error(err_msg)
84
- reason = f"Response {res.status_code}: {res.text}" if res.text else f"Response {res.status_code}"
85
- return UploadFailure(filepath, reason)
86
-
89
+ return UploadFailure(filepath, err_msg)
90
+ if res.status_code != HTTPStatus.OK:
91
+ logger.error(f"{err_msg}: Response {res.status_code}: {res.text}")
92
+ return UploadFailure(filepath, res.reason, res.status_code, res.text)
87
93
  return None
88
94
 
95
+ def _build_url_for_bulk_ingest_ingest_route(self, filepath: Path) -> str:
96
+ """
97
+ Remove the leading slash of absolute filepaths,
98
+ because the `/project/<shortcode>/bulk-ingest/ingest` route only accepts relative paths.
99
+ The leading slash has to be added again in the "ingest-xmlupload" step, when applying the ingest ID.
100
+
101
+ Args:
102
+ filepath: filepath
103
+
104
+ Returns:
105
+ url
106
+ """
107
+ quoted = regex.sub(r"^%2F", "", urllib.parse.quote(str(filepath), safe=""))
108
+ return f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{quoted}"
109
+
89
110
  def trigger_ingest_process(self) -> None:
90
111
  """Start the ingest process on the server."""
91
112
  url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest"
92
113
  timeout = 5
93
- logger.debug(f"REQUEST: POST to {url}, timeout: {timeout}")
94
- res = self.session.post(url, timeout=timeout)
95
- logger.debug(f"RESPONSE: {res.status_code}: {res.text}")
96
- if res.status_code in [STATUS_UNAUTHORIZED, STATUS_FORBIDDEN]:
97
- raise BadCredentialsError("Unauthorized to start the ingest process. Please check your credentials.")
98
- if res.status_code == STATUS_NOT_FOUND:
99
- raise UserError(
114
+ headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
115
+ params = RequestParameters("POST", url, timeout, headers=headers)
116
+ log_request(params)
117
+ res = self.session.post(params.url, timeout=params.timeout, headers=params.headers)
118
+ log_response(res)
119
+ if res.status_code == HTTPStatus.FORBIDDEN:
120
+ raise BadCredentialsError("Only ProjectAdmins or SystemAdmins can start the ingest process.")
121
+ if res.status_code == HTTPStatus.NOT_FOUND:
122
+ raise InputError(
100
123
  f"No assets have been uploaded for project {self.shortcode}. "
101
124
  "Before using the 'ingest-files' command, you must upload some files with the 'upload-files' command."
102
125
  )
103
- if res.status_code == STATUS_CONFLICT:
126
+ if res.status_code == HTTPStatus.CONFLICT:
104
127
  msg = f"Ingest process on the server {self.dsp_ingest_url} is already running. Wait until it completes..."
105
128
  print(msg)
106
129
  logger.info(msg)
107
130
  return
108
- if res.status_code in [STATUS_INTERNAL_SERVER_ERROR, STATUS_SERVER_UNAVAILABLE]:
109
- raise UserError("Server is unavailable. Please try again later.")
131
+ if res.status_code in [HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE]:
132
+ raise InputError("Server is unavailable. Please try again later.")
110
133
 
111
134
  try:
112
135
  returned_shortcode = res.json().get("id")
@@ -114,7 +137,7 @@ class BulkIngestClient:
114
137
  except JSONDecodeError:
115
138
  failed = True
116
139
  if failed:
117
- raise UserError("Failed to trigger the ingest process. Please check the server logs, or try again later.")
140
+ raise InputError("Failed to trigger the ingest process. Please check the server logs, or try again later.")
118
141
  print(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
119
142
  logger.info(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
120
143
 
@@ -128,26 +151,28 @@ class BulkIngestClient:
128
151
  The mapping CSV if the ingest process has completed.
129
152
 
130
153
  Raises:
131
- UserError: if there are too many server errors in a row.
154
+ InputError: if there are too many server errors in a row.
132
155
  """
133
156
  url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/mapping.csv"
134
157
  timeout = 5
135
158
  while True:
136
- logger.debug(f"REQUEST: GET to {url}, timeout: {timeout}")
137
- res = self.session.get(url, timeout=timeout)
138
- logger.debug(f"RESPONSE: {res.status_code}")
139
- if res.status_code == STATUS_CONFLICT:
159
+ headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
160
+ params = RequestParameters("GET", url, timeout, headers=headers)
161
+ log_request(params)
162
+ res = self.session.get(params.url, timeout=params.timeout, headers=params.headers)
163
+ log_response(res)
164
+ if res.status_code == HTTPStatus.CONFLICT:
140
165
  self.retrieval_failures = 0
141
166
  logger.info("Ingest process is still running. Wait until it completes...")
142
167
  yield True
143
- elif res.status_code != STATUS_OK or not res.text.startswith("original,derivative"):
168
+ elif res.status_code != HTTPStatus.OK or not res.text.startswith("original,derivative"):
144
169
  self.retrieval_failures += 1
145
170
  if self.retrieval_failures > 15:
146
- raise UserError(f"There were too many server errors. Please check the logs at {LOGGER_SAVEPATH}.")
171
+ raise InputError(f"There were too many server errors. Please check the logs at {LOGGER_SAVEPATH}.")
147
172
  msg = "While retrieving the mapping CSV, the server responded with an unexpected status code/content."
148
173
  logger.error(msg)
149
174
  yield False
150
175
  else:
151
176
  logger.info("Ingest process completed.")
152
177
  break
153
- yield res.text
178
+ yield res.content.decode("utf-8")
@@ -9,7 +9,7 @@ from loguru import logger
9
9
  from lxml import etree
10
10
 
11
11
  from dsp_tools.commands.ingest_xmlupload.create_resources.user_information import IngestInformation
12
- from dsp_tools.models.exceptions import InputError
12
+ from dsp_tools.error.exceptions import InputError
13
13
 
14
14
 
15
15
  def get_mapping_dict_from_file(shortcode: str) -> dict[str, str]:
@@ -56,15 +56,9 @@ def replace_filepath_with_internal_filename(
56
56
  for elem in new_tree.iter():
57
57
  if not etree.QName(elem).localname.endswith("bitstream") or not elem.text:
58
58
  continue
59
- img_path_str = elem.text
60
- if img_path_str not in orig_path_2_asset_id:
61
- img_path = Path(elem.text)
62
- img_path_str = str(img_path.relative_to(Path.cwd()) if img_path.is_absolute() else img_path)
63
- if img_path_str not in orig_path_2_asset_id:
64
- img_path_str = str(img_path.with_suffix(img_path.suffix.lower()))
65
- if img_path_str not in orig_path_2_asset_id:
66
- img_path_str = str(img_path.with_suffix(img_path.suffix.upper()))
67
-
59
+ img_path_str = elem.text.strip()
60
+ if img_path_str not in orig_path_2_asset_id and img_path_str.startswith("/"):
61
+ img_path_str = img_path_str[1:]
68
62
  if img_path_str in orig_path_2_asset_id:
69
63
  elem.text = orig_path_2_asset_id[img_path_str]
70
64
  used_media_file_paths.append(img_path_str)
@@ -6,27 +6,44 @@ from loguru import logger
6
6
  from lxml import etree
7
7
 
8
8
  from dsp_tools.cli.args import ServerCredentials
9
+ from dsp_tools.cli.args import ValidateDataConfig
10
+ from dsp_tools.cli.args import ValidationSeverity
11
+ from dsp_tools.clients.authentication_client import AuthenticationClient
12
+ from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
13
+ from dsp_tools.clients.connection import Connection
14
+ from dsp_tools.clients.connection_live import ConnectionLive
15
+ from dsp_tools.clients.legal_info_client_live import LegalInfoClientLive
16
+ from dsp_tools.clients.project_client_live import ProjectClientLive
9
17
  from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import get_mapping_dict_from_file
10
18
  from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import replace_filepath_with_internal_filename
11
- from dsp_tools.commands.xmlupload.list_client import ListClientLive
19
+ from dsp_tools.commands.validate_data.validate_data import validate_parsed_resources
12
20
  from dsp_tools.commands.xmlupload.models.ingest import BulkIngestedAssetClient
13
21
  from dsp_tools.commands.xmlupload.models.upload_clients import UploadClients
14
22
  from dsp_tools.commands.xmlupload.models.upload_state import UploadState
15
- from dsp_tools.commands.xmlupload.ontology_client import OntologyClientLive
16
- from dsp_tools.commands.xmlupload.project_client import ProjectClientLive
17
- from dsp_tools.commands.xmlupload.read_validate_xml_file import validate_and_parse
23
+ from dsp_tools.commands.xmlupload.prepare_xml_input.get_processed_resources import get_processed_resources
24
+ from dsp_tools.commands.xmlupload.prepare_xml_input.list_client import ListClientLive
25
+ from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_parsed_resources_and_mappers
26
+ from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_stash_and_upload_order
27
+ from dsp_tools.commands.xmlupload.prepare_xml_input.read_validate_xml_file import validate_iiif_uris
18
28
  from dsp_tools.commands.xmlupload.upload_config import UploadConfig
29
+ from dsp_tools.commands.xmlupload.xmlupload import enable_unknown_license_if_any_are_missing
19
30
  from dsp_tools.commands.xmlupload.xmlupload import execute_upload
20
- from dsp_tools.commands.xmlupload.xmlupload import prepare_upload
21
- from dsp_tools.models.exceptions import InputError
22
- from dsp_tools.utils.connection import Connection
23
- from dsp_tools.utils.connection_live import ConnectionLive
31
+ from dsp_tools.error.exceptions import InputError
32
+ from dsp_tools.utils.ansi_colors import BOLD_RED
33
+ from dsp_tools.utils.ansi_colors import RESET_TO_DEFAULT
34
+ from dsp_tools.utils.data_formats.uri_util import is_prod_like_server
35
+ from dsp_tools.utils.replace_id_with_iri import use_id2iri_mapping_to_replace_ids
36
+ from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
24
37
 
25
38
 
26
39
  def ingest_xmlupload(
27
40
  xml_file: Path,
28
41
  creds: ServerCredentials,
29
42
  interrupt_after: int | None = None,
43
+ skip_validation: bool = False,
44
+ skip_ontology_validation: bool = False,
45
+ id2iri_file: str | None = None,
46
+ do_not_request_resource_metadata_from_db: bool = False,
30
47
  ) -> bool:
31
48
  """
32
49
  This function reads an XML file
@@ -40,6 +57,11 @@ def ingest_xmlupload(
40
57
  xml_file: path to XML file containing the resources
41
58
  creds: credentials to access the DSP server
42
59
  interrupt_after: if set, the upload will be interrupted after this number of resources
60
+ skip_validation: skip the SHACL validation
61
+ skip_ontology_validation: skip the ontology validation
62
+ id2iri_file: to replace internal IDs of an XML file by IRIs provided in this mapping file
63
+ do_not_request_resource_metadata_from_db: if true do not request metadata information from the api
64
+ for existing resources
43
65
 
44
66
  Returns:
45
67
  True if all resources could be uploaded without errors; False if one of the resources could not be
@@ -48,11 +70,12 @@ def ingest_xmlupload(
48
70
  Raises:
49
71
  InputError: if any media was not uploaded or uploaded media was not referenced.
50
72
  """
51
- default_ontology, root, shortcode = _parse_xml_and_replace_filepaths(xml_file)
52
-
53
- con = ConnectionLive(creds.server)
54
- con.login(creds.user, creds.password)
73
+ root = parse_and_clean_xml_file(xml_file)
74
+ shortcode = root.attrib["shortcode"]
75
+ root = _replace_filepaths_with_internal_filename_from_ingest(root, shortcode)
55
76
 
77
+ auth = AuthenticationClientLive(server=creds.server, email=creds.user, password=creds.password)
78
+ con = ConnectionLive(creds.server, auth)
56
79
  config = UploadConfig(
57
80
  media_previously_uploaded=True,
58
81
  interrupt_after=interrupt_after,
@@ -60,34 +83,70 @@ def ingest_xmlupload(
60
83
  server=creds.server,
61
84
  shortcode=shortcode,
62
85
  )
86
+ clients = _get_live_clients(con, config, auth)
87
+
88
+ parsed_resources, lookups = get_parsed_resources_and_mappers(root, clients)
89
+ if id2iri_file:
90
+ parsed_resources = use_id2iri_mapping_to_replace_ids(parsed_resources, Path(id2iri_file))
91
+
92
+ validation_should_be_skipped = skip_validation
93
+ is_on_prod_like_server = is_prod_like_server(creds.server)
94
+ if is_on_prod_like_server and config.skip_validation:
95
+ msg = (
96
+ "You set the flag '--skip-validation' to circumvent the SHACL schema validation. "
97
+ "This means that the upload may fail due to undetected errors. "
98
+ "Do you wish to skip the validation (yes/no)? "
99
+ )
100
+ resp = ""
101
+ while resp not in ["yes", "no"]:
102
+ resp = input(BOLD_RED + msg + RESET_TO_DEFAULT)
103
+ if str(resp) == "no":
104
+ validation_should_be_skipped = False
105
+ if not validation_should_be_skipped:
106
+ v_severity = config.validation_severity
107
+ if is_on_prod_like_server:
108
+ v_severity = ValidationSeverity.INFO
109
+ validation_passed = validate_parsed_resources(
110
+ parsed_resources=parsed_resources,
111
+ authorship_lookup=lookups.authorships,
112
+ permission_ids=list(lookups.permissions.keys()),
113
+ shortcode=shortcode,
114
+ config=ValidateDataConfig(
115
+ xml_file,
116
+ save_graph_dir=None,
117
+ severity=v_severity,
118
+ ignore_duplicate_files_warning=True,
119
+ is_on_prod_server=is_on_prod_like_server,
120
+ skip_ontology_validation=skip_ontology_validation,
121
+ do_not_request_resource_metadata_from_db=do_not_request_resource_metadata_from_db,
122
+ ),
123
+ auth=auth,
124
+ )
125
+ if not validation_passed:
126
+ return False
127
+ else:
128
+ logger.debug("SHACL validation was skipped.")
63
129
 
64
- ontology_client = OntologyClientLive(con=con, shortcode=shortcode, default_ontology=default_ontology)
65
- resources, permissions_lookup, stash = prepare_upload(root, ontology_client)
66
-
67
- clients = _get_live_clients(con, config)
68
- state = UploadState(resources, stash, config, permissions_lookup)
69
-
70
- return execute_upload(clients, state)
130
+ if not config.skip_iiif_validation:
131
+ validate_iiif_uris(root)
71
132
 
133
+ if not is_on_prod_like_server:
134
+ enable_unknown_license_if_any_are_missing(clients.legal_info_client, parsed_resources)
72
135
 
73
- def _parse_xml_and_replace_filepaths(xml_file: Path) -> tuple[str, etree._Element, str]:
74
- """
75
- Validate and parse an upload XML file.
76
- The bulk-ingest must already have taken place, and the mapping CSV must be in the CWD.
136
+ processed_resources = get_processed_resources(parsed_resources, lookups, is_on_prod_like_server)
77
137
 
78
- Args:
79
- xml_file: file that will be parsed
138
+ sorted_resources, stash = get_stash_and_upload_order(processed_resources)
80
139
 
81
- Returns:
82
- The ontology name, the parsed XML file and the shortcode of the project
140
+ state = UploadState(
141
+ pending_resources=sorted_resources,
142
+ pending_stash=stash,
143
+ config=config,
144
+ )
83
145
 
84
- Raises:
85
- InputError: if replacing file paths with internal asset IDs failed
86
- """
87
- root, shortcode, default_ontology = validate_and_parse(xml_file)
146
+ return execute_upload(clients, state)
88
147
 
89
- logger.info(f"Validated and parsed the XML. {shortcode=:} and {default_ontology=:}")
90
148
 
149
+ def _replace_filepaths_with_internal_filename_from_ingest(root: etree._Element, shortcode: str) -> etree._Element:
91
150
  orig_path_2_asset_id = get_mapping_dict_from_file(shortcode)
92
151
  root, ingest_info = replace_filepath_with_internal_filename(root, orig_path_2_asset_id)
93
152
  if ok := ingest_info.ok_msg():
@@ -96,11 +155,12 @@ def _parse_xml_and_replace_filepaths(xml_file: Path) -> tuple[str, etree._Elemen
96
155
  else:
97
156
  err_msg = ingest_info.execute_error_protocol()
98
157
  raise InputError(err_msg)
99
- return default_ontology, root, shortcode
158
+ return root
100
159
 
101
160
 
102
- def _get_live_clients(con: Connection, config: UploadConfig) -> UploadClients:
161
+ def _get_live_clients(con: Connection, config: UploadConfig, auth: AuthenticationClient) -> UploadClients:
103
162
  ingest_client = BulkIngestedAssetClient()
104
- project_client = ProjectClientLive(con, config.shortcode)
105
- list_client = ListClientLive(con, project_client.get_project_iri())
106
- return UploadClients(ingest_client, project_client, list_client)
163
+ project_client = ProjectClientLive(auth.server, auth)
164
+ list_client = ListClientLive(con, project_client.get_project_iri(config.shortcode))
165
+ legal_info_client = LegalInfoClientLive(config.server, config.shortcode, auth)
166
+ return UploadClients(ingest_client, list_client, legal_info_client)
@@ -72,7 +72,7 @@ class IngestInformation:
72
72
  "The data XML file contains references to multimedia files "
73
73
  "which were not previously uploaded through dsp-ingest:\n"
74
74
  f" The file with the resource IDs and problematic filenames was saved at "
75
- f"'{Path(self.csv_directory_path/self.mediafiles_no_id_csv)}'."
75
+ f"'{Path(self.csv_directory_path / self.mediafiles_no_id_csv)}'."
76
76
  )
77
77
  return None
78
78
 
@@ -87,7 +87,7 @@ class IngestInformation:
87
87
  "The data XML file does not reference all the multimedia files which were previously "
88
88
  "uploaded through dsp-ingest.\n"
89
89
  f" The file with the unused filenames was saved at "
90
- f"'{Path(self.csv_directory_path/self.unused_mediafiles_csv)}'."
90
+ f"'{Path(self.csv_directory_path / self.unused_mediafiles_csv)}'."
91
91
  )
92
92
  return None
93
93
 
@@ -1,4 +1,3 @@
1
- from datetime import datetime
2
1
  from pathlib import Path
3
2
  from time import sleep
4
3
  from typing import cast
@@ -7,9 +6,8 @@ from loguru import logger
7
6
  from tqdm import tqdm
8
7
 
9
8
  from dsp_tools.cli.args import ServerCredentials
9
+ from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
10
10
  from dsp_tools.commands.ingest_xmlupload.bulk_ingest_client import BulkIngestClient
11
- from dsp_tools.utils.connection import Connection
12
- from dsp_tools.utils.connection_live import ConnectionLive
13
11
 
14
12
 
15
13
  def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
@@ -24,9 +22,8 @@ def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
24
22
  Returns:
25
23
  success status
26
24
  """
27
- con: Connection = ConnectionLive(creds.server)
28
- con.login(creds.user, creds.password)
29
- bulk_ingest_client = BulkIngestClient(creds.dsp_ingest_url, con.get_token(), shortcode)
25
+ auth = AuthenticationClientLive(creds.server, creds.user, creds.password)
26
+ bulk_ingest_client = BulkIngestClient(creds.dsp_ingest_url, auth, shortcode)
30
27
  bulk_ingest_client.trigger_ingest_process()
31
28
  sleep(5)
32
29
  mapping = _retrieve_mapping(bulk_ingest_client)
@@ -35,7 +32,7 @@ def ingest_files(creds: ServerCredentials, shortcode: str) -> bool:
35
32
 
36
33
 
37
34
  def _retrieve_mapping(bulk_ingest_client: BulkIngestClient) -> str:
38
- sleeping_time = 10
35
+ sleeping_time = 60
39
36
  desc = f"Wait until mapping CSV is ready. Ask server every {sleeping_time} seconds "
40
37
  progress_bar = tqdm(
41
38
  bulk_ingest_client.retrieve_mapping_generator(), desc=desc, bar_format="{desc}{elapsed}", dynamic_ncols=True
@@ -58,8 +55,10 @@ def _retrieve_mapping(bulk_ingest_client: BulkIngestClient) -> str:
58
55
  def _save_mapping(mapping: str, shortcode: str) -> None:
59
56
  filepath = Path(f"mapping-{shortcode}.csv")
60
57
  if filepath.exists():
61
- timestamp = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
62
- filepath = filepath.with_name(f"{filepath.stem}-{timestamp}.csv")
63
- filepath.write_text(mapping)
58
+ i = 1
59
+ while (new_name_for_existing := Path(f"mapping-{shortcode}-{i}.csv")).exists():
60
+ i += 1
61
+ filepath.rename(new_name_for_existing)
62
+ filepath.write_text(mapping, encoding="utf-8")
64
63
  print(f"Saved mapping CSV to '{filepath}'")
65
64
  logger.info(f"Saved mapping CSV to '{filepath}'")
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from collections.abc import Iterable
3
4
  from pathlib import Path
4
- from typing import Iterable
5
5
 
6
6
  from dsp_tools.commands.ingest_xmlupload.upload_files.input_error import FileProblems
7
7
 
8
8
  SUPPORTED_EXTENSIONS = (
9
- "zip,tar,gz,z,tgz,gzip,7z,mp3,wav,pdf,doc,docx,xls,xlsx,ppt,pptx,"
10
- "mp4,jpg,jpeg,jp2,png,tif,tiff,odd,rng,txt,xml,xsd,xsl,csv"
9
+ "zip,tar,gz,z,tgz,gzip,7z,mp3,wav,pdf,doc,docx,xls,xlsx,ppt,pptx,epub,"
10
+ "mp4,jpg,jpeg,jp2,json,png,tif,tiff,odd,rng,txt,xml,htm,html,xsd,xsl,csv"
11
11
  ).split(",")
12
12
 
13
13
 
@@ -1,22 +1,14 @@
1
1
  from dataclasses import dataclass
2
2
  from pathlib import Path
3
- from typing import Protocol
4
3
 
5
4
  import pandas as pd
6
5
 
6
+ from dsp_tools.error.problems import Problem
7
+
7
8
  separator = "\n\n"
8
9
  list_separator = "\n - "
9
10
 
10
11
 
11
- class Problem(Protocol):
12
- """Information about input errors."""
13
-
14
- def execute_error_protocol(self) -> str:
15
- """
16
- This function initiates all the steps for successful problem communication with the user.
17
- """
18
-
19
-
20
12
  @dataclass(frozen=True)
21
13
  class FileProblems(Problem):
22
14
  """Handle the error communication to the user in case that some files don't exist or are unsupported."""
@@ -5,7 +5,8 @@ import pandas as pd
5
5
  import regex
6
6
 
7
7
  separator = "\n\n"
8
- list_separator = "\n - "
8
+ list_separator = "\n- "
9
+ list_separator_indented = "\n - "
9
10
 
10
11
 
11
12
  @dataclass(frozen=True)
@@ -14,6 +15,8 @@ class UploadFailure:
14
15
 
15
16
  filepath: Path
16
17
  reason: str
18
+ status_code: int | None = None
19
+ response_text: str | None = None
17
20
 
18
21
 
19
22
  @dataclass(frozen=True)
@@ -44,13 +47,20 @@ class UploadFailures:
44
47
  msg += f"The full list of failed files has been saved to '{output_file}'."
45
48
  else:
46
49
  msg += f"Failed to upload the following {len(self.failures)} files:"
47
- msg += list_separator + list_separator.join([f"{x.filepath}: {x.reason}" for x in self.failures])
50
+ for f in self.failures:
51
+ msg += list_separator + f"{f.filepath}: {f.reason}"
52
+ if f.status_code:
53
+ msg += list_separator_indented + f"Status code: {f.status_code}"
54
+ if f.response_text:
55
+ msg += list_separator_indented + f"Response text: {f.response_text}"
48
56
  return msg
49
57
 
50
58
  def _save_to_csv(self, output_file: Path) -> None:
51
59
  data = {
52
60
  "Filepath": [failure.filepath for failure in self.failures],
53
61
  "Reason": [failure.reason for failure in self.failures],
62
+ "Status code": [failure.status_code for failure in self.failures],
63
+ "Response text": [failure.response_text for failure in self.failures],
54
64
  }
55
65
  df = pd.DataFrame(data)
56
66
  df.to_csv(output_file, index=False)
@@ -5,14 +5,13 @@ from lxml import etree
5
5
  from tqdm import tqdm
6
6
 
7
7
  from dsp_tools.cli.args import ServerCredentials
8
+ from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
8
9
  from dsp_tools.commands.ingest_xmlupload.bulk_ingest_client import BulkIngestClient
9
10
  from dsp_tools.commands.ingest_xmlupload.upload_files.filechecker import check_files
10
11
  from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailure
11
12
  from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailures
12
- from dsp_tools.commands.xmlupload.read_validate_xml_file import validate_and_parse
13
- from dsp_tools.models.exceptions import InputError
14
- from dsp_tools.utils.connection import Connection
15
- from dsp_tools.utils.connection_live import ConnectionLive
13
+ from dsp_tools.error.exceptions import InputError
14
+ from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
16
15
 
17
16
 
18
17
  def upload_files(
@@ -32,14 +31,14 @@ def upload_files(
32
31
  Returns:
33
32
  success status
34
33
  """
35
- root, shortcode, _ = validate_and_parse(xml_file)
34
+ root = parse_and_clean_xml_file(xml_file)
35
+ shortcode = root.attrib["shortcode"]
36
36
  paths = _get_validated_paths(root)
37
37
  print(f"Found {len(paths)} files to upload onto server {creds.dsp_ingest_url}.")
38
38
  logger.info(f"Found {len(paths)} files to upload onto server {creds.dsp_ingest_url}.")
39
39
 
40
- con: Connection = ConnectionLive(creds.server)
41
- con.login(creds.user, creds.password)
42
- ingest_client = BulkIngestClient(creds.dsp_ingest_url, con.get_token(), shortcode, imgdir)
40
+ auth = AuthenticationClientLive(creds.server, creds.user, creds.password)
41
+ ingest_client = BulkIngestClient(creds.dsp_ingest_url, auth, shortcode, imgdir)
43
42
 
44
43
  failures: list[UploadFailure] = []
45
44
  progress_bar = tqdm(paths, desc="Uploading files", unit="file(s)", dynamic_ncols=True)
@@ -61,7 +60,7 @@ def upload_files(
61
60
 
62
61
 
63
62
  def _get_validated_paths(root: etree._Element) -> set[Path]:
64
- paths = {Path(x.text) for x in root.xpath("//bitstream")}
63
+ paths = {Path(x.text.strip()) for x in root.xpath("//bitstream")}
65
64
  if problems := check_files(paths):
66
65
  msg = problems.execute_error_protocol()
67
66
  raise InputError(msg)