dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -0,0 +1,37 @@
1
+ import regex
2
+
3
+ from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
4
+ from dsp_tools.error.exceptions import Id2IriReplacementError
5
+
6
+
7
+ def prepare_richtext_string_for_upload(richtext_str: str, iri_resolver: IriResolver) -> str:
8
+ richtext_str, ids_not_found = replace_ids_if_found(richtext_str, iri_resolver)
9
+ if ids_not_found:
10
+ raise Id2IriReplacementError(
11
+ f"Some internal IDs of the following richtext could not be resolved to an IRI: {richtext_str}"
12
+ )
13
+ return _richtext_as_xml(richtext_str)
14
+
15
+
16
+ def replace_ids_if_found(richtext_str: str, iri_resolver: IriResolver) -> tuple[str, set[str]]:
17
+ ids_used = find_internal_ids(richtext_str)
18
+ not_found = set()
19
+ if ids_used:
20
+ for id_ in ids_used:
21
+ if iri_found := iri_resolver.get(id_):
22
+ richtext_str = _replace_one_id(richtext_str, id_, iri_found)
23
+ else:
24
+ not_found.add(id_)
25
+ return richtext_str, not_found
26
+
27
+
28
+ def _replace_one_id(txt: str, id_: str, iri: str) -> str:
29
+ return txt.replace(f'href="IRI:{id_}:IRI"', f'href="{iri}"')
30
+
31
+
32
+ def find_internal_ids(txt: str) -> set[str]:
33
+ return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=txt))
34
+
35
+
36
+ def _richtext_as_xml(richtext_str: str) -> str:
37
+ return f'<?xml version="1.0" encoding="UTF-8"?>\n<text>{richtext_str}</text>'
@@ -1,124 +1,34 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from typing import Any
4
- from typing import cast
5
4
 
6
- import regex
7
5
  import rustworkx as rx
8
- from lxml import etree
9
6
 
10
7
  from dsp_tools.commands.xmlupload.stash.graph_models import Cost
11
8
  from dsp_tools.commands.xmlupload.stash.graph_models import Edge
12
- from dsp_tools.commands.xmlupload.stash.graph_models import ResptrLink
13
- from dsp_tools.commands.xmlupload.stash.graph_models import XMLLink
14
- from dsp_tools.utils.iri_util import is_resource_iri
9
+ from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
10
+ from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
11
+ from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
15
12
 
16
13
 
17
- def create_info_from_xml_for_graph(
18
- root: etree._Element,
19
- ) -> tuple[list[ResptrLink], list[XMLLink], list[str]]:
14
+ def generate_upload_order(info_for_graph: InfoForGraph) -> tuple[dict[str, list[str]], list[str]]:
20
15
  """
21
- Create link objects (ResptrLink/XMLLink) from the XML file,
22
- and add a reference UUID to each XML element that contains a link (<resptr> or <text>).
23
- With this UUID, the link objects can be identified in the XML data file.
16
+ Generates the upload order from the Info for the graph
24
17
 
25
18
  Args:
26
- root: root of the parsed XML file
19
+ info_for_graph: Info for the graph
27
20
 
28
21
  Returns:
29
- - All resptr links contained in the XML file, represented as ResptrLink objects.
30
- - All XML links contained in the XML file, represented as XMLLink objects.
31
- - A list with all resource IDs used in the XML file.
22
+ - A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
23
+ - A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
32
24
  """
33
- resptr_links = []
34
- xml_links = []
35
- all_resource_ids = []
36
- for resource in root.iter(tag="resource"):
37
- resptr, xml = _create_info_from_xml_for_graph_from_one_resource(resource)
38
- all_resource_ids.append(resource.attrib["id"])
39
- resptr_links.extend(resptr)
40
- xml_links.extend(xml)
41
- return resptr_links, xml_links, all_resource_ids
42
-
43
-
44
- def _create_info_from_xml_for_graph_from_one_resource(
45
- resource: etree._Element,
46
- ) -> tuple[list[ResptrLink], list[XMLLink]]:
47
- resptr_links: list[ResptrLink] = []
48
- xml_links: list[XMLLink] = []
49
- for prop in resource.getchildren():
50
- match prop.tag:
51
- case "resptr-prop":
52
- resptr_links.extend(_create_resptr_link_objects(resource.attrib["id"], prop))
53
- case "text-prop":
54
- xml_links.extend(_create_text_link_objects(resource.attrib["id"], prop))
55
- case "hasComment" | "hasDescription":
56
- if xml_link := _create_text_link_object_from_special_tags(resource.attrib["id"], prop):
57
- xml_links.append(xml_link)
58
- case "isSegmentOf" | "relatesTo":
59
- if segment_link := _create_segmentOf_link_objects(resource.attrib["id"], prop):
60
- resptr_links.append(segment_link)
61
- return resptr_links, xml_links
62
-
63
-
64
- def _create_segmentOf_link_objects(subject_id: str, resptr: etree._Element) -> ResptrLink | None:
65
- resptr.text = cast(str, resptr.text)
66
- if is_resource_iri(resptr.text):
67
- return None
68
- link_object = ResptrLink(subject_id, resptr.text)
69
- # this UUID is so that the links that were stashed can be identified in the XML data file
70
- resptr.attrib["linkUUID"] = link_object.link_uuid
71
- return link_object
72
-
73
-
74
- def _create_resptr_link_objects(subject_id: str, resptr_prop: etree._Element) -> list[ResptrLink]:
75
- resptr_links = []
76
- for resptr in resptr_prop.getchildren():
77
- resptr.text = cast(str, resptr.text)
78
- if not is_resource_iri(resptr.text):
79
- link_object = ResptrLink(subject_id, resptr.text)
80
- # this UUID is so that the links that were stashed can be identified in the XML data file
81
- resptr.attrib["linkUUID"] = link_object.link_uuid
82
- resptr_links.append(link_object)
83
- return resptr_links
84
-
85
-
86
- def _create_text_link_objects(subject_id: str, text_prop: etree._Element) -> list[XMLLink]:
87
- # if the same ID is in several separate <text> values of one <text-prop>, they are considered separate links
88
- xml_props = []
89
- for text in text_prop.getchildren():
90
- if links := _extract_ids_from_one_text_value(text):
91
- xml_link = XMLLink(subject_id, links)
92
- xml_props.append(xml_link)
93
- # this UUID is so that the links that were stashed can be identified in the XML data file
94
- text.attrib["linkUUID"] = xml_link.link_uuid
95
- return xml_props
96
-
97
-
98
- def _create_text_link_object_from_special_tags(subject_id: str, special_tag: etree._Element) -> XMLLink | None:
99
- # This is for <hasDescription> and <hasComment> properties of <video-segment>s or <audio-segment>s
100
- if not (links := _extract_ids_from_one_text_value(special_tag)):
101
- return None
102
- xml_link = XMLLink(subject_id, links)
103
- # this UUID is so that the links that were stashed can be identified in the XML data file
104
- special_tag.attrib["linkUUID"] = xml_link.link_uuid
105
- return xml_link
106
-
107
-
108
- def _extract_ids_from_one_text_value(text: etree._Element) -> set[str]:
109
- # the same id in one <text> only means one link to the resource
110
- all_links = set()
111
- for ele in text.iterdescendants():
112
- if href := ele.attrib.get("href"):
113
- if internal_id := regex.search(r"IRI:(.*):IRI", href):
114
- all_links.add(internal_id.group(1))
115
- return all_links
116
-
117
-
118
- def make_graph(
119
- resptr_links: list[ResptrLink],
120
- xml_links: list[XMLLink],
121
- all_resource_ids: list[str],
25
+ graph, node_to_id, edges = _make_graph(info_for_graph)
26
+ stash_lookup, upload_order, _ = _generate_upload_order_from_graph(graph, node_to_id, edges)
27
+ return stash_lookup, upload_order
28
+
29
+
30
+ def _make_graph(
31
+ info_for_graph: InfoForGraph,
122
32
  ) -> tuple[rx.PyDiGraph[Any, Any], dict[int, str], list[Edge]]:
123
33
  """
124
34
  This function takes information about the resources of an XML file and links between them.
@@ -126,10 +36,7 @@ def make_graph(
126
36
  Resources are represented as nodes and links as edges.
127
37
 
128
38
  Args:
129
- resptr_links: objects representing a direct link between a starting resource and a target resource
130
- xml_links: objects representing one or more links from a single text value of a single starting resource
131
- to a set of target resources
132
- all_resource_ids: IDs of all resources in the graph
39
+ info_for_graph: Information required to construct the graph
133
40
 
134
41
  Returns:
135
42
  - The rustworkx graph.
@@ -137,17 +44,57 @@ def make_graph(
137
44
  - A list with all the edges in the graph.
138
45
  """
139
46
  graph: rx.PyDiGraph[Any, Any] = rx.PyDiGraph()
140
- nodes = [(id_, None, None) for id_ in all_resource_ids]
47
+ nodes = [(id_, None, None) for id_ in info_for_graph.all_resource_ids]
141
48
  node_indices = list(graph.add_nodes_from(nodes))
142
- id_to_node = dict(zip(all_resource_ids, node_indices))
143
- node_to_id = dict(zip(node_indices, all_resource_ids))
144
- edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in resptr_links]
145
- for xml in xml_links:
49
+ id_to_node = dict(zip(info_for_graph.all_resource_ids, node_indices))
50
+ node_to_id = dict(zip(node_indices, info_for_graph.all_resource_ids))
51
+ edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in info_for_graph.link_values]
52
+ for xml in info_for_graph.standoff_links:
146
53
  edges.extend([Edge(id_to_node[xml.source_id], id_to_node[x], xml) for x in xml.target_ids])
147
54
  graph.add_edges_from([e.as_tuple() for e in edges])
148
55
  return graph, node_to_id, edges
149
56
 
150
57
 
58
+ def _generate_upload_order_from_graph(
59
+ graph: rx.PyDiGraph[Any, Any],
60
+ node_to_id: dict[int, str],
61
+ edges: list[Edge],
62
+ ) -> tuple[dict[str, list[str]], list[str], int]:
63
+ """
64
+ Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
65
+
66
+ Args:
67
+ graph: graph
68
+ node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
69
+ edges: edges in the graph (contains info about source node, target node, and link info)
70
+
71
+ Returns:
72
+ - A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
73
+ - A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
74
+ - The number of links in the stash.
75
+ """
76
+ upload_order: list[str] = []
77
+ stash_lookup: dict[str, list[str]] = {}
78
+ node_indices = set(node_to_id.keys())
79
+ leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
80
+ upload_order.extend(leaf_nodes)
81
+ stash_counter = 0
82
+ while remaining_node_indices:
83
+ cycle = list(rx.digraph_find_cycle(graph))
84
+ links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
85
+ stash_counter += len(links_to_remove)
86
+ _remove_edges_to_stash(
87
+ graph=graph,
88
+ edges_to_remove=links_to_remove,
89
+ all_edges=edges,
90
+ remaining_nodes=remaining_node_indices,
91
+ )
92
+ stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
93
+ leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
94
+ upload_order.extend(leaf_nodes)
95
+ return stash_lookup, upload_order, stash_counter
96
+
97
+
151
98
  def _remove_leaf_nodes(
152
99
  graph: rx.PyDiGraph[Any, Any],
153
100
  node_to_id: dict[int, str],
@@ -227,7 +174,7 @@ def _remove_edges_to_stash(
227
174
  phantom_edges_to_remove = []
228
175
  source, target = edges_to_remove[0].source, edges_to_remove[0].target
229
176
  for link_to_stash in [x.link_object for x in edges_to_remove]:
230
- if isinstance(link_to_stash, XMLLink):
177
+ if isinstance(link_to_stash, StandOffLink):
231
178
  phantom_edges_to_remove.extend(
232
179
  _find_phantom_xml_edges(source, target, all_edges, link_to_stash, remaining_nodes)
233
180
  )
@@ -240,7 +187,7 @@ def _find_phantom_xml_edges(
240
187
  source_node_index: int,
241
188
  target_node_index: int,
242
189
  all_edges: list[Edge],
243
- xml_link_to_stash: XMLLink,
190
+ xml_link_to_stash: StandOffLink,
244
191
  remaining_nodes: set[int],
245
192
  ) -> list[tuple[int, int]]:
246
193
  """
@@ -277,7 +224,7 @@ def _find_phantom_xml_edges(
277
224
 
278
225
  def _add_stash_to_lookup_dict(
279
226
  stash_dict: dict[str, list[str]],
280
- links_to_stash: list[XMLLink | ResptrLink],
227
+ links_to_stash: list[StandOffLink | LinkValueLink],
281
228
  ) -> dict[str, list[str]]:
282
229
  stash_list = [stash_link.link_uuid for stash_link in links_to_stash]
283
230
  # all stashed links have the same subject id, so we can just take the first one
@@ -287,43 +234,3 @@ def _add_stash_to_lookup_dict(
287
234
  else:
288
235
  stash_dict[subj_id] = stash_list
289
236
  return stash_dict
290
-
291
-
292
- def generate_upload_order(
293
- graph: rx.PyDiGraph[Any, Any],
294
- node_to_id: dict[int, str],
295
- edges: list[Edge],
296
- ) -> tuple[dict[str, list[str]], list[str], int]:
297
- """
298
- Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
299
-
300
- Args:
301
- graph: graph
302
- node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
303
- edges: edges in the graph (contains info about source node, target node, and link info)
304
-
305
- Returns:
306
- - A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
307
- - A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
308
- - The number of links in the stash.
309
- """
310
- upload_order: list[str] = []
311
- stash_lookup: dict[str, list[str]] = {}
312
- node_indices = set(node_to_id.keys())
313
- leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
314
- upload_order.extend(leaf_nodes)
315
- stash_counter = 0
316
- while remaining_node_indices:
317
- cycle = list(rx.digraph_find_cycle(graph))
318
- links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
319
- stash_counter += len(links_to_remove)
320
- _remove_edges_to_stash(
321
- graph=graph,
322
- edges_to_remove=links_to_remove,
323
- all_edges=edges,
324
- remaining_nodes=remaining_node_indices,
325
- )
326
- stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
327
- leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
328
- upload_order.extend(leaf_nodes)
329
- return stash_lookup, upload_order, stash_counter
@@ -0,0 +1,53 @@
1
+ from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
2
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
3
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
4
+ from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
5
+ from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
6
+ from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
7
+ from dsp_tools.utils.data_formats.iri_util import is_resource_iri
8
+
9
+
10
+ def create_info_for_graph_from_processed_resources(resources: list[ProcessedResource]) -> InfoForGraph:
11
+ """Extracts information to create the graph to analyse the circular references."""
12
+ all_links = []
13
+ all_stand_off = []
14
+ all_resource_ids = []
15
+ for res in resources:
16
+ links, stand_off = _process_one_resource(res)
17
+ all_links.extend(links)
18
+ all_stand_off.extend(stand_off)
19
+ all_resource_ids.append(res.res_id)
20
+ return InfoForGraph(
21
+ all_resource_ids=all_resource_ids,
22
+ link_values=all_links,
23
+ standoff_links=all_stand_off,
24
+ )
25
+
26
+
27
+ def _process_one_resource(resource: ProcessedResource) -> tuple[list[LinkValueLink], list[StandOffLink]]:
28
+ link_values = []
29
+ stand_off = []
30
+ for val in resource.values:
31
+ if isinstance(val, ProcessedLink):
32
+ if is_resource_iri(val.value):
33
+ continue
34
+ link_values.append(
35
+ LinkValueLink(
36
+ source_id=resource.res_id,
37
+ target_id=val.value,
38
+ link_uuid=val.value_uuid,
39
+ )
40
+ )
41
+ elif isinstance(val, ProcessedRichtext):
42
+ if val.resource_references:
43
+ only_ids = {x for x in val.resource_references if not is_resource_iri(x)}
44
+ if not only_ids:
45
+ continue
46
+ stand_off.append(
47
+ StandOffLink(
48
+ source_id=resource.res_id,
49
+ target_ids=only_ids,
50
+ link_uuid=val.value_uuid,
51
+ )
52
+ )
53
+ return link_values, stand_off
@@ -1,12 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- import uuid
4
3
  from dataclasses import dataclass
5
- from dataclasses import field
4
+
5
+
6
+ @dataclass
7
+ class InfoForGraph:
8
+ all_resource_ids: list[str]
9
+ link_values: list[LinkValueLink]
10
+ standoff_links: list[StandOffLink]
6
11
 
7
12
 
8
13
  @dataclass(frozen=True)
9
- class ResptrLink:
14
+ class LinkValueLink:
10
15
  """
11
16
  This class represents a direct link (resptr) between a starting resource and a target resource.
12
17
 
@@ -18,7 +23,7 @@ class ResptrLink:
18
23
 
19
24
  source_id: str
20
25
  target_id: str
21
- link_uuid: str = field(default_factory=lambda: str(uuid.uuid4()))
26
+ link_uuid: str
22
27
 
23
28
  @property
24
29
  def cost_links(self) -> float:
@@ -27,7 +32,7 @@ class ResptrLink:
27
32
 
28
33
 
29
34
  @dataclass(frozen=True)
30
- class XMLLink:
35
+ class StandOffLink:
31
36
  """
32
37
  This class represents one or more links from a single starting resource to a set of target resources,
33
38
  where all target resources are linked to from a single text value of the starting resource.
@@ -40,7 +45,7 @@ class XMLLink:
40
45
 
41
46
  source_id: str
42
47
  target_ids: set[str]
43
- link_uuid: str = field(default_factory=lambda: str(uuid.uuid4()))
48
+ link_uuid: str
44
49
 
45
50
  @property
46
51
  def cost_links(self) -> float:
@@ -61,9 +66,9 @@ class Edge:
61
66
 
62
67
  source: int
63
68
  target: int
64
- link_object: ResptrLink | XMLLink
69
+ link_object: LinkValueLink | StandOffLink
65
70
 
66
- def as_tuple(self) -> tuple[int, int, ResptrLink | XMLLink]:
71
+ def as_tuple(self) -> tuple[int, int, LinkValueLink | StandOffLink]:
67
72
  """Returns a representation of this edge as a tuple of the source index, target index and link object"""
68
73
  return self.source, self.target, self.link_object
69
74
 
@@ -1,17 +1,9 @@
1
- from __future__ import annotations
1
+ from copy import deepcopy
2
2
 
3
- from typing import cast
4
- from uuid import uuid4
5
-
6
- from lxml import etree
7
-
8
- from dsp_tools.commands.xmlupload.models.deserialise.deserialise_value import XMLProperty
9
- from dsp_tools.commands.xmlupload.models.deserialise.xmlresource import XMLResource
10
3
  from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
11
- from dsp_tools.commands.xmlupload.models.permission import Permissions
12
- from dsp_tools.commands.xmlupload.stash.construct_and_analyze_graph import create_info_from_xml_for_graph
13
- from dsp_tools.commands.xmlupload.stash.construct_and_analyze_graph import generate_upload_order
14
- from dsp_tools.commands.xmlupload.stash.construct_and_analyze_graph import make_graph
4
+ from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
5
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
6
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
15
7
  from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStash
16
8
  from dsp_tools.commands.xmlupload.stash.stash_models import LinkValueStashItem
17
9
  from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStash
@@ -19,117 +11,58 @@ from dsp_tools.commands.xmlupload.stash.stash_models import StandoffStashItem
19
11
  from dsp_tools.commands.xmlupload.stash.stash_models import Stash
20
12
 
21
13
 
22
- def _stash_standoff(
23
- res_id: str,
24
- restype: str,
25
- link_prop: XMLProperty,
26
- stash_lookup: dict[str, list[str]],
27
- ) -> list[StandoffStashItem]:
28
- stashed_items = []
29
- for value in link_prop.values:
30
- if value.link_uuid not in stash_lookup[res_id]:
31
- continue
32
- # value.value is a KnoraStandoffXml text with problematic links.
33
- # stash it, then replace the problematic text with a UUID
34
- standoff_xml = cast(FormattedTextValue, value.value)
35
- uuid = str(uuid4())
36
- standoff_stash_item = StandoffStashItem(
37
- res_id=res_id,
38
- res_type=restype,
39
- uuid=uuid,
40
- prop_name=link_prop.name,
41
- value=standoff_xml,
42
- )
43
- value.value = FormattedTextValue(uuid)
44
- stashed_items.append(standoff_stash_item)
45
- return stashed_items
46
-
47
-
48
- def _stash_resptr(
49
- res_id: str,
50
- restype: str,
51
- link_prop: XMLProperty,
52
- stash_lookup: dict[str, list[str]],
53
- permission_lookup: dict[str, Permissions],
54
- ) -> list[LinkValueStashItem]:
55
- stashed_items = []
56
- for value in link_prop.values.copy():
57
- if value.link_uuid not in stash_lookup[res_id]:
58
- continue
59
- permission = str(permission_lookup[value.permissions]) if value.permissions else None
60
- # value.value is the ID of the target resource. stash it, then delete it
61
- link_stash_item = LinkValueStashItem(
62
- res_id=res_id,
63
- res_type=restype,
64
- prop_name=link_prop.name,
65
- target_id=str(value.value),
66
- permission=permission,
67
- )
68
- link_prop.values.remove(value)
69
- stashed_items.append(link_stash_item)
70
- return stashed_items
71
-
72
-
73
- def stash_circular_references(
74
- resources: list[XMLResource],
75
- stash_lookup: dict[str, list[str]],
76
- permission_lookup: dict[str, Permissions],
77
- ) -> Stash | None:
78
- """
79
- Stashes problematic resource-references from a list of resources.
80
- The resources are modified in-place.
81
-
82
- Args:
83
- resources: all resources of the XML file
84
- stash_lookup: A dictionary which maps the resources that have stashes to the UUIDs of the stashed links
85
- permission_lookup: A dictionary which maps the permissions of the stashed links to their string representation
86
-
87
- Returns:
88
- stash: an object that contains the stashed references
89
-
90
- Raises:
91
- ValueError: If a link property of one of the resources is not "text" or "resptr"
92
- """
93
- stashed_standoff_values: list[StandoffStashItem] = []
14
+ def stash_circular_references(resources: list[ProcessedResource], stash_lookup: dict[str, list[str]]) -> Stash | None:
15
+ """Stash the values that would create circular references and remove them from the Resources."""
94
16
  stashed_link_values: list[LinkValueStashItem] = []
17
+ stashed_standoff_values: list[StandoffStashItem] = []
18
+
19
+ if not stash_lookup:
20
+ return None
95
21
 
96
22
  for res in resources:
97
23
  if res.res_id not in stash_lookup:
98
24
  continue
99
- for link_prop in res.get_props_with_links():
100
- if link_prop.valtype == "text":
101
- standoff_stash_item = _stash_standoff(res.res_id, res.restype, link_prop, stash_lookup)
102
- stashed_standoff_values.extend(standoff_stash_item)
103
- elif link_prop.valtype == "resptr":
104
- link_stash_item = _stash_resptr(res.res_id, res.restype, link_prop, stash_lookup, permission_lookup)
105
- stashed_link_values.extend(link_stash_item)
106
- else:
107
- raise ValueError(f"Unknown value type: '{link_prop.valtype}' (should be 'text' or 'resptr')")
108
-
109
- if len(link_prop.values) == 0:
110
- # if all values of a link property have been stashed, the property needs to be removed
111
- res.properties.remove(link_prop)
25
+ links, standoff = _process_one_resource(res, stash_lookup)
26
+ stashed_link_values.extend(links)
27
+ stashed_standoff_values.extend(standoff)
112
28
 
113
29
  standoff_stash = StandoffStash.make(stashed_standoff_values)
114
30
  link_value_stash = LinkValueStash.make(stashed_link_values)
115
- return Stash.make(standoff_stash, link_value_stash)
116
-
31
+ return Stash.make(standoff_stash=standoff_stash, link_value_stash=link_value_stash)
117
32
 
118
- def identify_circular_references(root: etree._Element) -> tuple[dict[str, list[str]], list[str]]:
119
- """
120
- Identifies problematic resource-references inside an XML tree.
121
- A reference is problematic if it creates a circle (circular references).
122
- The XML tree is modified in-place:
123
- A reference UUID is added to each XML element that contains a link (`<resptr>` or `<text>`).
124
33
 
125
- Args:
126
- root: the root element of the parsed XML document
34
+ def _process_one_resource(
35
+ resource: ProcessedResource,
36
+ stash_lookup: dict[str, list[str]],
37
+ ) -> tuple[list[LinkValueStashItem], list[StandoffStashItem]]:
38
+ stashed_link_values: list[LinkValueStashItem] = []
39
+ stashed_standoff_values: list[StandoffStashItem] = []
127
40
 
128
- Returns:
129
- stash_lookup: A dictionary which maps the resources that have stashes to the UUIDs of the stashed links
130
- upload_order: A list of resource IDs in the order in which they should be uploaded
131
- """
132
- resptr_links, xml_links, all_resource_ids = create_info_from_xml_for_graph(root)
133
- graph, node_to_id, edges = make_graph(resptr_links, xml_links, all_resource_ids)
134
- stash_lookup, upload_order, _ = generate_upload_order(graph, node_to_id, edges)
135
- return stash_lookup, upload_order
41
+ for val in resource.values.copy():
42
+ if isinstance(val, ProcessedLink):
43
+ if val.value_uuid not in stash_lookup[resource.res_id]:
44
+ continue
45
+ stashed_link_values.append(LinkValueStashItem(resource.res_id, resource.type_iri, val))
46
+ resource.values.remove(val)
47
+ elif isinstance(val, ProcessedRichtext):
48
+ if val.value_uuid not in stash_lookup[resource.res_id]:
49
+ continue
50
+ # val.value is a KnoraStandoffXml text with problematic links.
51
+ # stash it, then replace the problematic text with a UUID
52
+ stashed_standoff_values.append(_stash_standoff(val, resource.res_id, resource.type_iri))
53
+
54
+ return stashed_link_values, stashed_standoff_values
55
+
56
+
57
+ def _stash_standoff(value: ProcessedRichtext, res_id: str, res_type: str) -> StandoffStashItem:
58
+ original_value = deepcopy(value)
59
+ # Replace the content with the UUID
60
+ value.value = FormattedTextValue(value.value_uuid)
61
+ # It is not necessary to add the permissions to the StandoffStashItem.
62
+ # Because when no new permissions are given during an update request,
63
+ # the permissions of the previous value are taken.
64
+ return StandoffStashItem(
65
+ res_id=res_id,
66
+ res_type=res_type,
67
+ value=original_value,
68
+ )
@@ -3,18 +3,15 @@ from __future__ import annotations
3
3
  from dataclasses import dataclass
4
4
  from itertools import groupby
5
5
 
6
- from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
6
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
7
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
7
8
 
8
9
 
9
10
  @dataclass(frozen=True)
10
11
  class StandoffStashItem:
11
- """Holds information about a single stashed XML text value."""
12
-
13
12
  res_id: str
14
13
  res_type: str
15
- uuid: str
16
- prop_name: str
17
- value: FormattedTextValue
14
+ value: ProcessedRichtext
18
15
 
19
16
 
20
17
  @dataclass(frozen=True)
@@ -51,9 +48,7 @@ class LinkValueStashItem:
51
48
 
52
49
  res_id: str
53
50
  res_type: str
54
- prop_name: str
55
- target_id: str
56
- permission: str | None = None
51
+ value: ProcessedLink
57
52
 
58
53
 
59
54
  @dataclass(frozen=True)