dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,258 @@
1
+ import copy
2
+ import json
3
+ import warnings
4
+ from collections.abc import Mapping
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ import regex
9
+ from loguru import logger
10
+ from lxml import etree
11
+
12
+ from dsp_tools.error.custom_warnings import DspToolsUserWarning
13
+ from dsp_tools.error.exceptions import InputError
14
+ from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
15
+
16
+
17
+ def _check_input_parameters(
18
+ xml_file: str,
19
+ json_file: str,
20
+ ) -> tuple[Path, Path]:
21
+ """
22
+ Transform the input parameters into Path objects
23
+ and check if they are valid files.
24
+
25
+ Args:
26
+ xml_file: the XML file with the data to be replaced
27
+ json_file: the JSON file with the mapping (dict) of internal IDs to IRIs
28
+
29
+ Raises:
30
+ InputError: if one of the files could not be found
31
+
32
+ Returns:
33
+ path objects of the input parameters
34
+ """
35
+ xml_file_as_path = Path(xml_file)
36
+ if not xml_file_as_path.is_file():
37
+ logger.error(f"File {xml_file} could not be found.")
38
+ raise InputError(f"File {xml_file} could not be found.")
39
+
40
+ json_file_as_path = Path(json_file)
41
+ if not json_file_as_path.is_file():
42
+ logger.error(f"File {json_file} could not be found.")
43
+ raise InputError(f"File {json_file} could not be found.")
44
+
45
+ return xml_file_as_path, json_file_as_path
46
+
47
+
48
+ def _parse_json_file(json_file: Path) -> dict[str, str]:
49
+ """
50
+ Read JSON file and parse it into a dictionary.
51
+
52
+ Args:
53
+ json_file: path to JSON file
54
+
55
+ Returns:
56
+ dictionary with the contents of the JSON file
57
+ """
58
+ with open(json_file, encoding="utf-8", mode="r") as file:
59
+ mapping: dict[str, str] = json.load(file)
60
+ return mapping
61
+
62
+
63
+ def _replace_resptrs(
64
+ tree: etree._Element,
65
+ mapping: Mapping[str, str],
66
+ used_mapping_entries: set[str],
67
+ ) -> tuple[etree._Element, set[str]]:
68
+ """
69
+ Replace the internal IDs in the `<resptr>` tags by IRIs.
70
+
71
+ Args:
72
+ tree: parsed XML file
73
+ mapping: mapping of internal IDs to IRIs
74
+ used_mapping_entries: IDs of the mapping that have been found in the XML and have been replaced
75
+
76
+ Returns:
77
+ a tuple of the modified copy of the XML tree, and the set of the IDs that have been replaced
78
+ """
79
+ modified_tree = copy.deepcopy(tree)
80
+ xpaths = [f"/knora/{x}/resptr-prop/resptr" for x in ["resource", "link", "region"]]
81
+ xpaths.extend([f"/knora/{x}-segment/isSegmentOf" for x in ["video", "audio"]])
82
+ xpaths.extend([f"/knora/{x}-segment/relatesTo" for x in ["video", "audio"]])
83
+ resptr_xpath = "|".join(xpaths)
84
+ resptr_elems = modified_tree.xpath(resptr_xpath)
85
+ resptr_elems_replaced = 0
86
+ for resptr_elem in resptr_elems:
87
+ value_before = resptr_elem.text
88
+ if value_after := mapping.get(value_before):
89
+ resptr_elem.text = value_after
90
+ resptr_elems_replaced += 1
91
+ used_mapping_entries.add(value_before)
92
+
93
+ logger.info(f"Replaced {resptr_elems_replaced}/{len(resptr_elems)} resptr links in the XML file")
94
+ print(f"Replaced {resptr_elems_replaced}/{len(resptr_elems)} resptr links in the XML file")
95
+
96
+ return modified_tree, used_mapping_entries
97
+
98
+
99
+ def _replace_salsah_links(
100
+ tree: etree._Element,
101
+ mapping: Mapping[str, str],
102
+ used_mapping_entries: set[str],
103
+ ) -> tuple[etree._Element, set[str]]:
104
+ """
105
+ Replace the internal IDs in the salsah-links of the `<text>` tags by IRIs.
106
+
107
+ Args:
108
+ tree: parsed XML file
109
+ mapping: mapping of internal IDs to IRIs
110
+ used_mapping_entries: IDs of the mapping that have been found in the XML and have been replaced
111
+
112
+ Returns:
113
+ a tuple of the modified copy of the XML tree, and the set of the IDs that have been replaced
114
+ """
115
+ modified_tree = copy.deepcopy(tree)
116
+ xpaths = [f"/knora/{x}/text-prop/text//a" for x in ["resource", "link", "region"]]
117
+ xpaths.extend([f"/knora/{x}-segment/hasComment//a" for x in ["video", "audio"]])
118
+ xpaths.extend([f"/knora/{x}-segment/hasDescription//a" for x in ["video", "audio"]])
119
+ salsah_xpath = "|".join(xpaths)
120
+ salsah_links = [x for x in modified_tree.xpath(salsah_xpath) if x.attrib.get("class") == "salsah-link"]
121
+ salsah_links_replaced = 0
122
+ for salsah_link in salsah_links:
123
+ value_before = regex.sub("IRI:|:IRI", "", salsah_link.attrib.get("href", ""))
124
+ if value_after := mapping.get(value_before):
125
+ salsah_link.attrib["href"] = value_after
126
+ salsah_links_replaced += 1
127
+ used_mapping_entries.add(value_before)
128
+
129
+ logger.info(f"Replaced {salsah_links_replaced}/{len(salsah_links)} salsah-links in the XML file")
130
+ print(f"Replaced {salsah_links_replaced}/{len(salsah_links)} salsah-links in the XML file")
131
+
132
+ return modified_tree, used_mapping_entries
133
+
134
+
135
+ def _replace_ids_by_iris(
136
+ tree: etree._Element,
137
+ mapping: Mapping[str, str],
138
+ ) -> etree._Element:
139
+ """
140
+ Iterate over the `<resptr>` tags and the salsah-links of the `<text>` tags,
141
+ and replace the internal IDs by IRIs.
142
+ If an internal ID cannot be found in the mapping, the original ID is kept.
143
+
144
+ Args:
145
+ tree: parsed XML file
146
+ mapping: mapping of internal IDs to IRIs
147
+
148
+ Returns:
149
+ a modified copy of the XML tree
150
+ """
151
+ used_mapping_entries: set[str] = set()
152
+
153
+ tree, used_mapping_entries = _replace_resptrs(
154
+ tree=tree,
155
+ mapping=mapping,
156
+ used_mapping_entries=used_mapping_entries,
157
+ )
158
+
159
+ tree, used_mapping_entries = _replace_salsah_links(
160
+ tree=tree,
161
+ mapping=mapping,
162
+ used_mapping_entries=used_mapping_entries,
163
+ )
164
+
165
+ logger.info(f"Used {len(used_mapping_entries)}/{len(mapping)} entries from the mapping file")
166
+ print(f"Used {len(used_mapping_entries)}/{len(mapping)} entries from the mapping file")
167
+
168
+ return tree
169
+
170
+
171
+ def _remove_resources_if_id_in_mapping(
172
+ tree: etree._Element,
173
+ mapping: Mapping[str, str],
174
+ ) -> etree._Element:
175
+ """
176
+ Remove all resources from the XML file if their ID is in the mapping.
177
+
178
+ Args:
179
+ tree: parsed XML file
180
+ mapping: mapping of internal IDs to IRIs
181
+
182
+ Returns:
183
+ a modified copy of the XML tree
184
+ """
185
+ modified_tree = copy.deepcopy(tree)
186
+ resources = modified_tree.xpath(
187
+ "|".join([f"/knora/{x}" for x in ["resource", "link", "region", "video-segment", "audio-segment"]])
188
+ )
189
+ resources_to_remove = [x for x in resources if x.attrib.get("id") in mapping]
190
+ for resource in resources_to_remove:
191
+ resource.getparent().remove(resource)
192
+
193
+ msg = (
194
+ f"Removed {len(resources_to_remove)}/{len(resources)} resources from the XML file, "
195
+ "because their ID was in the mapping"
196
+ )
197
+ logger.warning(msg)
198
+ warnings.warn(DspToolsUserWarning(msg))
199
+
200
+ return modified_tree
201
+
202
+
203
+ def _write_output_file(
204
+ orig_xml_file: Path,
205
+ tree: etree._Element,
206
+ ) -> None:
207
+ """
208
+ Write modified XML file with replaced IDs to disk.
209
+
210
+ Args:
211
+ orig_xml_file: XML file that was provided as input
212
+ tree: modified XML tree with replaced IDs
213
+ """
214
+ timestamp_str = datetime.now().strftime("%Y%m%d-%H%M%S")
215
+ out_file = f"{orig_xml_file.stem}_replaced_{timestamp_str}.xml"
216
+ et = etree.ElementTree(tree)
217
+ et.write(out_file, pretty_print=True, xml_declaration=True, encoding="utf-8")
218
+ logger.info(f"XML with replaced IDs was written to file {out_file}.")
219
+ print(f"XML with replaced IDs was written to file {out_file}.")
220
+
221
+
222
+ def id2iri(
223
+ xml_file: str,
224
+ json_file: str,
225
+ remove_resource_if_id_in_mapping: bool = False,
226
+ ) -> bool:
227
+ """
228
+ Replace internal IDs of an XML file
229
+ (`<resptr>` tags and salsah-links inside `<text>` tags)
230
+ by IRIs provided in a mapping file.
231
+ If an internal ID cannot be found in the mapping, the original ID is kept.
232
+ The output is written to a new XML file named "[original name]_replaced_[timestamp].xml".
233
+
234
+ Args:
235
+ xml_file: the XML file with the data to be replaced
236
+ json_file: the JSON file with the mapping (dict) of internal IDs to IRIs
237
+ remove_resource_if_id_in_mapping: if True, remove all resources from the XML file if their ID is in the mapping
238
+
239
+ Raises:
240
+ BaseError: if one of the two input files is not a valid file
241
+
242
+ Returns:
243
+ success status
244
+ """
245
+ xml_file_as_path, json_file_as_path = _check_input_parameters(xml_file=xml_file, json_file=json_file)
246
+ mapping = _parse_json_file(json_file_as_path)
247
+ tree = parse_and_clean_xml_file(xml_file_as_path)
248
+ tree = _replace_ids_by_iris(
249
+ tree=tree,
250
+ mapping=mapping,
251
+ )
252
+ if remove_resource_if_id_in_mapping:
253
+ tree = _remove_resources_if_id_in_mapping(
254
+ tree=tree,
255
+ mapping=mapping,
256
+ )
257
+ _write_output_file(orig_xml_file=xml_file_as_path, tree=tree)
258
+ return True
File without changes
@@ -0,0 +1,178 @@
1
+ import urllib.parse
2
+ from collections.abc import Iterator
3
+ from dataclasses import dataclass
4
+ from dataclasses import field
5
+ from http import HTTPStatus
6
+ from pathlib import Path
7
+
8
+ import regex
9
+ from loguru import logger
10
+ from requests import JSONDecodeError
11
+ from requests import RequestException
12
+ from requests import Session
13
+ from requests.adapters import HTTPAdapter
14
+ from requests.adapters import Retry
15
+
16
+ from dsp_tools.clients.authentication_client import AuthenticationClient
17
+ from dsp_tools.commands.ingest_xmlupload.upload_files.upload_failures import UploadFailure
18
+ from dsp_tools.config.logger_config import LOGGER_SAVEPATH
19
+ from dsp_tools.error.exceptions import BadCredentialsError
20
+ from dsp_tools.error.exceptions import InputError
21
+ from dsp_tools.utils.request_utils import RequestParameters
22
+ from dsp_tools.utils.request_utils import log_request
23
+ from dsp_tools.utils.request_utils import log_response
24
+
25
+
26
+ @dataclass
27
+ class BulkIngestClient:
28
+ """Client to upload multiple files to the ingest server and monitor the ingest process."""
29
+
30
+ dsp_ingest_url: str
31
+ authentication_client: AuthenticationClient
32
+ shortcode: str
33
+ imgdir: Path = field(default=Path.cwd())
34
+ session: Session = field(init=False)
35
+ retrieval_failures = 0
36
+
37
+ def __post_init__(self) -> None:
38
+ retries = 6
39
+ self.session = Session()
40
+ retry = Retry(
41
+ total=retries,
42
+ read=retries,
43
+ connect=retries,
44
+ backoff_factor=0.3,
45
+ allowed_methods=None, # means all methods
46
+ status_forcelist=[HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE],
47
+ )
48
+ adapter = HTTPAdapter(max_retries=retry)
49
+ self.session.mount("http://", adapter)
50
+ self.session.mount("https://", adapter)
51
+
52
+ def upload_file(
53
+ self,
54
+ filepath: Path,
55
+ ) -> UploadFailure | None:
56
+ """
57
+ Uploads a file to the ingest server.
58
+ The load balancer on DSP servers currently has a timeout of 10m,
59
+ so we need to use a slightly shorter timeout of 9m.
60
+ See https://linear.app/dasch/issue/INFRA-847/increase-traefik-readtimeout
61
+ # noqa: DAR101
62
+ # noqa: DAR201
63
+ """
64
+ logger.debug(f"Uploading file '{filepath}'")
65
+ timeout = 9 * 60
66
+ url = self._build_url_for_bulk_ingest_ingest_route(filepath)
67
+ headers = {
68
+ "Content-Type": "application/octet-stream",
69
+ "Authorization": f"Bearer {self.authentication_client.get_token()}",
70
+ }
71
+ err_msg = f"Failed to upload '{filepath}' to '{url}'."
72
+ params = RequestParameters("POST", url, timeout, headers=headers)
73
+ log_request(params)
74
+ try:
75
+ with open(self.imgdir / filepath, "rb") as binary_io:
76
+ res = self.session.post(
77
+ url=params.url,
78
+ headers=params.headers,
79
+ data=binary_io, # https://requests.readthedocs.io/en/latest/user/advanced/#streaming-uploads
80
+ timeout=params.timeout,
81
+ )
82
+ log_response(res)
83
+ except RequestException as e:
84
+ logger.exception(err_msg)
85
+ return UploadFailure(filepath, f"Exception of requests library: {e}")
86
+ except OSError as e:
87
+ err_msg = f"Cannot bulk-ingest {filepath}, because the file could not be opened/read: {e.strerror}"
88
+ logger.error(err_msg)
89
+ return UploadFailure(filepath, err_msg)
90
+ if res.status_code != HTTPStatus.OK:
91
+ logger.error(f"{err_msg}: Response {res.status_code}: {res.text}")
92
+ return UploadFailure(filepath, res.reason, res.status_code, res.text)
93
+ return None
94
+
95
+ def _build_url_for_bulk_ingest_ingest_route(self, filepath: Path) -> str:
96
+ """
97
+ Remove the leading slash of absolute filepaths,
98
+ because the `/project/<shortcode>/bulk-ingest/ingest` route only accepts relative paths.
99
+ The leading slash has to be added again in the "ingest-xmlupload" step, when applying the ingest ID.
100
+
101
+ Args:
102
+ filepath: filepath
103
+
104
+ Returns:
105
+ url
106
+ """
107
+ quoted = regex.sub(r"^%2F", "", urllib.parse.quote(str(filepath), safe=""))
108
+ return f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/ingest/{quoted}"
109
+
110
+ def trigger_ingest_process(self) -> None:
111
+ """Start the ingest process on the server."""
112
+ url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest"
113
+ timeout = 5
114
+ headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
115
+ params = RequestParameters("POST", url, timeout, headers=headers)
116
+ log_request(params)
117
+ res = self.session.post(params.url, timeout=params.timeout, headers=params.headers)
118
+ log_response(res)
119
+ if res.status_code == HTTPStatus.FORBIDDEN:
120
+ raise BadCredentialsError("Only ProjectAdmins or SystemAdmins can start the ingest process.")
121
+ if res.status_code == HTTPStatus.NOT_FOUND:
122
+ raise InputError(
123
+ f"No assets have been uploaded for project {self.shortcode}. "
124
+ "Before using the 'ingest-files' command, you must upload some files with the 'upload-files' command."
125
+ )
126
+ if res.status_code == HTTPStatus.CONFLICT:
127
+ msg = f"Ingest process on the server {self.dsp_ingest_url} is already running. Wait until it completes..."
128
+ print(msg)
129
+ logger.info(msg)
130
+ return
131
+ if res.status_code in [HTTPStatus.INTERNAL_SERVER_ERROR, HTTPStatus.SERVICE_UNAVAILABLE]:
132
+ raise InputError("Server is unavailable. Please try again later.")
133
+
134
+ try:
135
+ returned_shortcode = res.json().get("id")
136
+ failed: bool = returned_shortcode != self.shortcode
137
+ except JSONDecodeError:
138
+ failed = True
139
+ if failed:
140
+ raise InputError("Failed to trigger the ingest process. Please check the server logs, or try again later.")
141
+ print(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
142
+ logger.info(f"Kicked off the ingest process on the server {self.dsp_ingest_url}. Wait until it completes...")
143
+
144
+ def retrieve_mapping_generator(self) -> Iterator[str | bool]:
145
+ """
146
+ Try to retrieve the mapping CSV from the server.
147
+
148
+ Yields:
149
+ True if the ingest process is still running.
150
+ False if there is a server error.
151
+ The mapping CSV if the ingest process has completed.
152
+
153
+ Raises:
154
+ InputError: if there are too many server errors in a row.
155
+ """
156
+ url = f"{self.dsp_ingest_url}/projects/{self.shortcode}/bulk-ingest/mapping.csv"
157
+ timeout = 5
158
+ while True:
159
+ headers = {"Authorization": f"Bearer {self.authentication_client.get_token()}"}
160
+ params = RequestParameters("GET", url, timeout, headers=headers)
161
+ log_request(params)
162
+ res = self.session.get(params.url, timeout=params.timeout, headers=params.headers)
163
+ log_response(res)
164
+ if res.status_code == HTTPStatus.CONFLICT:
165
+ self.retrieval_failures = 0
166
+ logger.info("Ingest process is still running. Wait until it completes...")
167
+ yield True
168
+ elif res.status_code != HTTPStatus.OK or not res.text.startswith("original,derivative"):
169
+ self.retrieval_failures += 1
170
+ if self.retrieval_failures > 15:
171
+ raise InputError(f"There were too many server errors. Please check the logs at {LOGGER_SAVEPATH}.")
172
+ msg = "While retrieving the mapping CSV, the server responded with an unexpected status code/content."
173
+ logger.error(msg)
174
+ yield False
175
+ else:
176
+ logger.info("Ingest process completed.")
177
+ break
178
+ yield res.content.decode("utf-8")
@@ -0,0 +1,69 @@
1
+ from __future__ import annotations
2
+
3
+ from copy import deepcopy
4
+ from pathlib import Path
5
+ from typing import cast
6
+
7
+ import pandas as pd
8
+ from loguru import logger
9
+ from lxml import etree
10
+
11
+ from dsp_tools.commands.ingest_xmlupload.create_resources.user_information import IngestInformation
12
+ from dsp_tools.error.exceptions import InputError
13
+
14
+
15
+ def get_mapping_dict_from_file(shortcode: str) -> dict[str, str]:
16
+ """
17
+ This functions returns the information to replace the original filepaths with the identifier from dsp-ingest.
18
+
19
+ Args:
20
+ shortcode: Shortcode of the project
21
+
22
+ Returns:
23
+ dictionary with original: identifier from dsp-ingest
24
+
25
+ Raises:
26
+ InputError: if no file was found
27
+ """
28
+ filepath = Path(f"mapping-{shortcode}.csv")
29
+ if not filepath.is_file():
30
+ raise InputError(f"No mapping CSV file was found at {filepath}.")
31
+ df = pd.read_csv(filepath)
32
+ msg = f"The file '{filepath}' is used to map the internal original filepaths to the internal image IDs."
33
+ print(msg)
34
+ logger.info(msg)
35
+ return dict(zip(df["original"].tolist(), df["derivative"].tolist()))
36
+
37
+
38
+ def replace_filepath_with_internal_filename(
39
+ xml_tree: etree._Element,
40
+ orig_path_2_asset_id: dict[str, str],
41
+ ) -> tuple[etree._Element, IngestInformation]:
42
+ """
43
+ Replace the original filepaths in the `<bitstream>` tags by the id filenames of the uploaded files.
44
+
45
+ Args:
46
+ xml_tree: The parsed original XML tree
47
+ orig_path_2_asset_id: Mapping from original filenames to asset IDs from the mapping.csv
48
+
49
+ Returns:
50
+ A copy of the XMl tree, with the replaced filepaths.
51
+ Message informing if all referenced files were uploaded or not.
52
+ """
53
+ no_id_found = []
54
+ used_media_file_paths = []
55
+ new_tree = deepcopy(xml_tree)
56
+ for elem in new_tree.iter():
57
+ if not etree.QName(elem).localname.endswith("bitstream") or not elem.text:
58
+ continue
59
+ img_path_str = elem.text.strip()
60
+ if img_path_str not in orig_path_2_asset_id and img_path_str.startswith("/"):
61
+ img_path_str = img_path_str[1:]
62
+ if img_path_str in orig_path_2_asset_id:
63
+ elem.text = orig_path_2_asset_id[img_path_str]
64
+ used_media_file_paths.append(img_path_str)
65
+ else:
66
+ no_id_found.append((cast("etree._Element", elem.getparent()).attrib["id"], str(elem.text)))
67
+
68
+ unused_media_paths = [x for x in orig_path_2_asset_id if x not in used_media_file_paths]
69
+ return new_tree, IngestInformation(unused_mediafiles=unused_media_paths, mediafiles_no_id=no_id_found)
@@ -0,0 +1,166 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+ from lxml import etree
7
+
8
+ from dsp_tools.cli.args import ServerCredentials
9
+ from dsp_tools.cli.args import ValidateDataConfig
10
+ from dsp_tools.cli.args import ValidationSeverity
11
+ from dsp_tools.clients.authentication_client import AuthenticationClient
12
+ from dsp_tools.clients.authentication_client_live import AuthenticationClientLive
13
+ from dsp_tools.clients.connection import Connection
14
+ from dsp_tools.clients.connection_live import ConnectionLive
15
+ from dsp_tools.clients.legal_info_client_live import LegalInfoClientLive
16
+ from dsp_tools.clients.project_client_live import ProjectClientLive
17
+ from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import get_mapping_dict_from_file
18
+ from dsp_tools.commands.ingest_xmlupload.create_resources.apply_ingest_id import replace_filepath_with_internal_filename
19
+ from dsp_tools.commands.validate_data.validate_data import validate_parsed_resources
20
+ from dsp_tools.commands.xmlupload.models.ingest import BulkIngestedAssetClient
21
+ from dsp_tools.commands.xmlupload.models.upload_clients import UploadClients
22
+ from dsp_tools.commands.xmlupload.models.upload_state import UploadState
23
+ from dsp_tools.commands.xmlupload.prepare_xml_input.get_processed_resources import get_processed_resources
24
+ from dsp_tools.commands.xmlupload.prepare_xml_input.list_client import ListClientLive
25
+ from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_parsed_resources_and_mappers
26
+ from dsp_tools.commands.xmlupload.prepare_xml_input.prepare_xml_input import get_stash_and_upload_order
27
+ from dsp_tools.commands.xmlupload.prepare_xml_input.read_validate_xml_file import validate_iiif_uris
28
+ from dsp_tools.commands.xmlupload.upload_config import UploadConfig
29
+ from dsp_tools.commands.xmlupload.xmlupload import enable_unknown_license_if_any_are_missing
30
+ from dsp_tools.commands.xmlupload.xmlupload import execute_upload
31
+ from dsp_tools.error.exceptions import InputError
32
+ from dsp_tools.utils.ansi_colors import BOLD_RED
33
+ from dsp_tools.utils.ansi_colors import RESET_TO_DEFAULT
34
+ from dsp_tools.utils.data_formats.uri_util import is_prod_like_server
35
+ from dsp_tools.utils.replace_id_with_iri import use_id2iri_mapping_to_replace_ids
36
+ from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_clean_xml_file
37
+
38
+
39
+ def ingest_xmlupload(
40
+ xml_file: Path,
41
+ creds: ServerCredentials,
42
+ interrupt_after: int | None = None,
43
+ skip_validation: bool = False,
44
+ skip_ontology_validation: bool = False,
45
+ id2iri_file: str | None = None,
46
+ do_not_request_resource_metadata_from_db: bool = False,
47
+ ) -> bool:
48
+ """
49
+ This function reads an XML file
50
+ and imports the data described in it onto the DSP server,
51
+ using the ingest XML upload method.
52
+ Before using this function,
53
+ the multimedia files must be ingested on the DSP server.
54
+ A mapping file with the internal IDs of the multimedia files must also be provided.
55
+
56
+ Args:
57
+ xml_file: path to XML file containing the resources
58
+ creds: credentials to access the DSP server
59
+ interrupt_after: if set, the upload will be interrupted after this number of resources
60
+ skip_validation: skip the SHACL validation
61
+ skip_ontology_validation: skip the ontology validation
62
+ id2iri_file: to replace internal IDs of an XML file by IRIs provided in this mapping file
63
+ do_not_request_resource_metadata_from_db: if true do not request metadata information from the api
64
+ for existing resources
65
+
66
+ Returns:
67
+ True if all resources could be uploaded without errors; False if one of the resources could not be
68
+ uploaded because there is an error in it
69
+
70
+ Raises:
71
+ InputError: if any media was not uploaded or uploaded media was not referenced.
72
+ """
73
+ root = parse_and_clean_xml_file(xml_file)
74
+ shortcode = root.attrib["shortcode"]
75
+ root = _replace_filepaths_with_internal_filename_from_ingest(root, shortcode)
76
+
77
+ auth = AuthenticationClientLive(server=creds.server, email=creds.user, password=creds.password)
78
+ con = ConnectionLive(creds.server, auth)
79
+ config = UploadConfig(
80
+ media_previously_uploaded=True,
81
+ interrupt_after=interrupt_after,
82
+ ).with_server_info(
83
+ server=creds.server,
84
+ shortcode=shortcode,
85
+ )
86
+ clients = _get_live_clients(con, config, auth)
87
+
88
+ parsed_resources, lookups = get_parsed_resources_and_mappers(root, clients)
89
+ if id2iri_file:
90
+ parsed_resources = use_id2iri_mapping_to_replace_ids(parsed_resources, Path(id2iri_file))
91
+
92
+ validation_should_be_skipped = skip_validation
93
+ is_on_prod_like_server = is_prod_like_server(creds.server)
94
+ if is_on_prod_like_server and config.skip_validation:
95
+ msg = (
96
+ "You set the flag '--skip-validation' to circumvent the SHACL schema validation. "
97
+ "This means that the upload may fail due to undetected errors. "
98
+ "Do you wish to skip the validation (yes/no)? "
99
+ )
100
+ resp = ""
101
+ while resp not in ["yes", "no"]:
102
+ resp = input(BOLD_RED + msg + RESET_TO_DEFAULT)
103
+ if str(resp) == "no":
104
+ validation_should_be_skipped = False
105
+ if not validation_should_be_skipped:
106
+ v_severity = config.validation_severity
107
+ if is_on_prod_like_server:
108
+ v_severity = ValidationSeverity.INFO
109
+ validation_passed = validate_parsed_resources(
110
+ parsed_resources=parsed_resources,
111
+ authorship_lookup=lookups.authorships,
112
+ permission_ids=list(lookups.permissions.keys()),
113
+ shortcode=shortcode,
114
+ config=ValidateDataConfig(
115
+ xml_file,
116
+ save_graph_dir=None,
117
+ severity=v_severity,
118
+ ignore_duplicate_files_warning=True,
119
+ is_on_prod_server=is_on_prod_like_server,
120
+ skip_ontology_validation=skip_ontology_validation,
121
+ do_not_request_resource_metadata_from_db=do_not_request_resource_metadata_from_db,
122
+ ),
123
+ auth=auth,
124
+ )
125
+ if not validation_passed:
126
+ return False
127
+ else:
128
+ logger.debug("SHACL validation was skipped.")
129
+
130
+ if not config.skip_iiif_validation:
131
+ validate_iiif_uris(root)
132
+
133
+ if not is_on_prod_like_server:
134
+ enable_unknown_license_if_any_are_missing(clients.legal_info_client, parsed_resources)
135
+
136
+ processed_resources = get_processed_resources(parsed_resources, lookups, is_on_prod_like_server)
137
+
138
+ sorted_resources, stash = get_stash_and_upload_order(processed_resources)
139
+
140
+ state = UploadState(
141
+ pending_resources=sorted_resources,
142
+ pending_stash=stash,
143
+ config=config,
144
+ )
145
+
146
+ return execute_upload(clients, state)
147
+
148
+
149
+ def _replace_filepaths_with_internal_filename_from_ingest(root: etree._Element, shortcode: str) -> etree._Element:
150
+ orig_path_2_asset_id = get_mapping_dict_from_file(shortcode)
151
+ root, ingest_info = replace_filepath_with_internal_filename(root, orig_path_2_asset_id)
152
+ if ok := ingest_info.ok_msg():
153
+ print(ok)
154
+ logger.info(ok)
155
+ else:
156
+ err_msg = ingest_info.execute_error_protocol()
157
+ raise InputError(err_msg)
158
+ return root
159
+
160
+
161
+ def _get_live_clients(con: Connection, config: UploadConfig, auth: AuthenticationClient) -> UploadClients:
162
+ ingest_client = BulkIngestedAssetClient()
163
+ project_client = ProjectClientLive(auth.server, auth)
164
+ list_client = ListClientLive(con, project_client.get_project_iri(config.shortcode))
165
+ legal_info_client = LegalInfoClientLive(config.server, config.shortcode, auth)
166
+ return UploadClients(ingest_client, list_client, legal_info_client)