dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from loguru import logger
4
+ from lxml import etree
5
+
6
+ from dsp_tools.clients.connection import Connection
7
+ from dsp_tools.commands.xmlupload.models.lookup_models import XmlReferenceLookups
8
+ from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
9
+ from dsp_tools.commands.xmlupload.models.upload_clients import UploadClients
10
+ from dsp_tools.commands.xmlupload.stash.analyse_circular_reference_graph import generate_upload_order
11
+ from dsp_tools.commands.xmlupload.stash.create_info_for_graph import create_info_for_graph_from_processed_resources
12
+ from dsp_tools.commands.xmlupload.stash.stash_circular_references import stash_circular_references
13
+ from dsp_tools.commands.xmlupload.stash.stash_models import Stash
14
+ from dsp_tools.error.exceptions import BaseError
15
+ from dsp_tools.error.exceptions import InputError
16
+ from dsp_tools.legacy_models.projectContext import ProjectContext
17
+ from dsp_tools.utils.xml_parsing.get_lookups import get_authorship_lookup
18
+ from dsp_tools.utils.xml_parsing.get_lookups import get_permissions_lookup
19
+ from dsp_tools.utils.xml_parsing.get_parsed_resources import get_parsed_resources
20
+ from dsp_tools.utils.xml_parsing.models.parsed_resource import ParsedResource
21
+
22
+ LIST_SEPARATOR = "\n- "
23
+
24
+
25
+ def get_parsed_resources_and_mappers(
26
+ root: etree._Element, clients: UploadClients
27
+ ) -> tuple[list[ParsedResource], XmlReferenceLookups]:
28
+ logger.debug("Get ParsedResource and XML-Lookups from root")
29
+ print("Parsing XML file for upload.")
30
+ parsed_resources = get_parsed_resources(root, clients.legal_info_client.server)
31
+ processed_lookups = _get_xml_reference_lookups(root=root, clients=clients)
32
+ return parsed_resources, processed_lookups
33
+
34
+
35
+ def _get_xml_reference_lookups(root: etree._Element, clients: UploadClients) -> XmlReferenceLookups:
36
+ proj_context = _get_project_context_from_server(
37
+ connection=clients.list_client.con, shortcode=root.attrib["shortcode"]
38
+ )
39
+ permissions_lookup = get_permissions_lookup(root, proj_context)
40
+ authorship_lookup = get_authorship_lookup(root)
41
+ listnode_lookup = clients.list_client.get_list_node_id_to_iri_lookup()
42
+ return XmlReferenceLookups(
43
+ permissions=permissions_lookup,
44
+ listnodes=listnode_lookup,
45
+ authorships=authorship_lookup,
46
+ )
47
+
48
+
49
+ def _get_project_context_from_server(connection: Connection, shortcode: str) -> ProjectContext:
50
+ try:
51
+ proj_context = ProjectContext(con=connection, shortcode=shortcode)
52
+ except BaseError:
53
+ logger.exception("Unable to retrieve project context from DSP server")
54
+ raise InputError("Unable to retrieve project context from DSP server") from None
55
+ return proj_context
56
+
57
+
58
+ def get_stash_and_upload_order(
59
+ resources: list[ProcessedResource],
60
+ ) -> tuple[list[ProcessedResource], Stash | None]:
61
+ logger.debug("Get stash and upload order")
62
+ info_for_graph = create_info_for_graph_from_processed_resources(resources)
63
+ stash_lookup, upload_order = generate_upload_order(info_for_graph)
64
+ sorting_lookup = {res.res_id: res for res in resources}
65
+ sorted_resources = [sorting_lookup[res_id] for res_id in upload_order]
66
+ stash = stash_circular_references(sorted_resources, stash_lookup)
67
+ return sorted_resources, stash
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import warnings
4
+ from pathlib import Path
5
+
6
+ from loguru import logger
7
+ from lxml import etree
8
+ from tqdm import tqdm
9
+
10
+ from dsp_tools.commands.xmlupload.models.input_problems import AllIIIFUriProblems
11
+ from dsp_tools.commands.xmlupload.prepare_xml_input.iiif_uri_validator import IIIFUriValidator
12
+ from dsp_tools.error.custom_warnings import DspToolsUserWarning
13
+ from dsp_tools.error.exceptions import InputError
14
+
15
+
16
+ def validate_iiif_uris(root: etree._Element) -> None:
17
+ uris = [uri.strip() for node in root.iter(tag="iiif-uri") if (uri := node.text)]
18
+ if (num := len(uris)) > 1001:
19
+ warnings.warn(
20
+ DspToolsUserWarning(
21
+ f"Your data contains {num} IIIF-URIs. "
22
+ f"Each validation makes a server call. "
23
+ f"Due to the large number, the validation of the IIIF-URIs has to be skipped."
24
+ )
25
+ )
26
+ return
27
+ progress_bar = tqdm(uris, desc="Checking IIIF-URIs", dynamic_ncols=True)
28
+ validator = IIIFUriValidator()
29
+ problems = []
30
+ for uri in progress_bar:
31
+ if result := validator.validate_one_uri(uri):
32
+ problems.append(result)
33
+ if problems:
34
+ msg = AllIIIFUriProblems(problems).get_msg()
35
+ warnings.warn(DspToolsUserWarning(msg))
36
+ logger.warning(msg)
37
+
38
+
39
+ def check_if_bitstreams_exist(root: etree._Element, imgdir: str) -> None:
40
+ """
41
+ Make sure that all bitstreams referenced in the XML file exist in the imgdir.
42
+
43
+ Args:
44
+ root: parsed XML file
45
+ imgdir: folder where the bitstreams are stored
46
+
47
+ Raises:
48
+ InputError: if a bitstream does not exist in the imgdir
49
+ """
50
+ logger.debug("Checking if filepaths exist.")
51
+ multimedia_resources = [x for x in root if any(y.tag == "bitstream" for y in x.iter())]
52
+ progress_bar = tqdm(multimedia_resources, desc="Checking multimedia filepaths", dynamic_ncols=True)
53
+ for res in progress_bar:
54
+ pth = next(Path(x.text.strip()) for x in res.iter() if x.tag == "bitstream" and x.text)
55
+ if not Path(imgdir / pth).is_file():
56
+ raise InputError(
57
+ f"Bitstream '{pth!s}' of resource '{res.attrib['label']}' does not exist in the imgdir '{imgdir}'."
58
+ )
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Callable
5
+ from dataclasses import dataclass
6
+ from json.decoder import JSONDecodeError
7
+ from typing import Union
8
+ from typing import assert_never
9
+ from typing import cast
10
+
11
+ from dsp_tools.commands.xmlupload.models.formatted_text_value import FormattedTextValue
12
+ from dsp_tools.commands.xmlupload.models.processed.values import IntervalFloats
13
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedValue
14
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedValueTypes
15
+ from dsp_tools.error.exceptions import XmlInputConversionError
16
+ from dsp_tools.utils.data_formats.date_util import Date
17
+ from dsp_tools.utils.data_formats.date_util import parse_date_string
18
+
19
+ type InputTypes = Union[str, FormattedTextValue, tuple[str | None, str | None] | None]
20
+
21
+
22
+ @dataclass
23
+ class TypeTransformerMapper:
24
+ val_type: type[ProcessedValue]
25
+ val_transformer: Callable[[InputTypes], ProcessedValueTypes]
26
+
27
+
28
+ def assert_is_string(value: InputTypes) -> str:
29
+ """Assert a value is a string."""
30
+ match value:
31
+ case str() as s:
32
+ return s
33
+ case FormattedTextValue() as xml:
34
+ raise XmlInputConversionError(f"Expected string value, but got XML value: {xml.xmlstr}")
35
+ case tuple():
36
+ raise XmlInputConversionError(f"Expected string value, but got tuple value: {value}")
37
+ case None:
38
+ raise XmlInputConversionError("Expected string value, but got None")
39
+ case _:
40
+ assert_never(value)
41
+
42
+
43
+ def assert_is_tuple(value: InputTypes) -> tuple[str, str]:
44
+ """Assert a value is a tuple."""
45
+ match value:
46
+ case tuple() as t:
47
+ if len(t) == 2 and isinstance(t[0], str) and isinstance(t[1], str):
48
+ return cast(tuple[str, str], t)
49
+ raise XmlInputConversionError(f"Expected tuple with two elements but got {value}")
50
+ case FormattedTextValue() as xml:
51
+ raise XmlInputConversionError(f"Expected tuple value, but got XML value: {xml.xmlstr}")
52
+ case str():
53
+ raise XmlInputConversionError(f"Expected tuple value, but got string value: {value}")
54
+ case None:
55
+ raise XmlInputConversionError("Expected tuple value, but got None")
56
+ case _:
57
+ assert_never(value)
58
+
59
+
60
+ def transform_boolean(value: InputTypes) -> bool:
61
+ """Transform the value into a boolean"""
62
+ match value:
63
+ case "True" | "true" | "1" | 1 | True:
64
+ return True
65
+ case "False" | "false" | "0" | 0 | False:
66
+ return False
67
+ case _:
68
+ raise XmlInputConversionError(f"Could not parse boolean value: {value}")
69
+
70
+
71
+ def transform_date(input_value: InputTypes) -> Date:
72
+ """Transform a date string into a date object."""
73
+ val = assert_is_string(input_value)
74
+ return parse_date_string(val)
75
+
76
+
77
+ def transform_decimal(value: InputTypes) -> float:
78
+ """Transform a value into a float"""
79
+ str_val = assert_is_string(value)
80
+ return float(str_val)
81
+
82
+
83
+ def transform_integer(value: InputTypes) -> int:
84
+ """Transform a value into an integer"""
85
+ str_val = assert_is_string(value)
86
+ return int(str_val)
87
+
88
+
89
+ def transform_interval(input_value: InputTypes) -> IntervalFloats:
90
+ """Transform a string input into an interval object."""
91
+ val = assert_is_tuple(input_value)
92
+ try:
93
+ return IntervalFloats(float(val[0]), float(val[1]))
94
+ except ValueError:
95
+ raise XmlInputConversionError(f"Could not parse interval: {val}") from None
96
+
97
+
98
+ def transform_geometry(value: InputTypes) -> str:
99
+ """Transform a value into a geometry string"""
100
+ str_val = assert_is_string(value)
101
+ try:
102
+ return json.dumps(json.loads(str_val))
103
+ except JSONDecodeError:
104
+ raise XmlInputConversionError(f"Could not parse json value: {value}") from None
105
+
106
+
107
+ def transform_simpletext(value: InputTypes) -> str:
108
+ str_val = assert_is_string(value)
109
+ if len(str_val) == 0:
110
+ raise XmlInputConversionError("After removing redundant whitespaces and newlines the input string is empty.")
111
+ return str_val
112
+
113
+
114
+ def transform_richtext(value: InputTypes) -> FormattedTextValue:
115
+ str_val = assert_is_string(value)
116
+ if len(str_val) == 0:
117
+ raise XmlInputConversionError("After removing redundant whitespaces and newlines the input string is empty.")
118
+ return FormattedTextValue(str_val)
@@ -0,0 +1,25 @@
1
+ from dataclasses import dataclass
2
+ from typing import cast
3
+
4
+ from rdflib import Graph
5
+
6
+ from dsp_tools.clients.connection import Connection
7
+ from dsp_tools.commands.xmlupload.make_rdf_graph.jsonld_utils import serialise_jsonld_for_resource
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class ResourceCreateClient:
12
+ """client class that creates resources on a DSP server."""
13
+
14
+ con: Connection
15
+
16
+ def create_resource(
17
+ self,
18
+ graph: Graph,
19
+ resource_has_bitstream: bool,
20
+ ) -> str:
21
+ """Creates a resource on the DSP server, and returns its IRI"""
22
+ res_dict = serialise_jsonld_for_resource(graph)
23
+ headers = {"X-Asset-Ingested": "true"} if resource_has_bitstream else None
24
+ res = self.con.post(route="/v2/resources", data=res_dict, headers=headers)
25
+ return cast(str, res["@id"])
@@ -0,0 +1,37 @@
1
+ import regex
2
+
3
+ from dsp_tools.commands.xmlupload.iri_resolver import IriResolver
4
+ from dsp_tools.error.exceptions import Id2IriReplacementError
5
+
6
+
7
+ def prepare_richtext_string_for_upload(richtext_str: str, iri_resolver: IriResolver) -> str:
8
+ richtext_str, ids_not_found = replace_ids_if_found(richtext_str, iri_resolver)
9
+ if ids_not_found:
10
+ raise Id2IriReplacementError(
11
+ f"Some internal IDs of the following richtext could not be resolved to an IRI: {richtext_str}"
12
+ )
13
+ return _richtext_as_xml(richtext_str)
14
+
15
+
16
+ def replace_ids_if_found(richtext_str: str, iri_resolver: IriResolver) -> tuple[str, set[str]]:
17
+ ids_used = find_internal_ids(richtext_str)
18
+ not_found = set()
19
+ if ids_used:
20
+ for id_ in ids_used:
21
+ if iri_found := iri_resolver.get(id_):
22
+ richtext_str = _replace_one_id(richtext_str, id_, iri_found)
23
+ else:
24
+ not_found.add(id_)
25
+ return richtext_str, not_found
26
+
27
+
28
+ def _replace_one_id(txt: str, id_: str, iri: str) -> str:
29
+ return txt.replace(f'href="IRI:{id_}:IRI"', f'href="{iri}"')
30
+
31
+
32
+ def find_internal_ids(txt: str) -> set[str]:
33
+ return set(regex.findall(pattern='href="IRI:(.*?):IRI"', string=txt))
34
+
35
+
36
+ def _richtext_as_xml(richtext_str: str) -> str:
37
+ return f'<?xml version="1.0" encoding="UTF-8"?>\n<text>{richtext_str}</text>'
File without changes
@@ -0,0 +1,236 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import rustworkx as rx
6
+
7
+ from dsp_tools.commands.xmlupload.stash.graph_models import Cost
8
+ from dsp_tools.commands.xmlupload.stash.graph_models import Edge
9
+ from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
10
+ from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
11
+ from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
12
+
13
+
14
+ def generate_upload_order(info_for_graph: InfoForGraph) -> tuple[dict[str, list[str]], list[str]]:
15
+ """
16
+ Generates the upload order from the Info for the graph
17
+
18
+ Args:
19
+ info_for_graph: Info for the graph
20
+
21
+ Returns:
22
+ - A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
23
+ - A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
24
+ """
25
+ graph, node_to_id, edges = _make_graph(info_for_graph)
26
+ stash_lookup, upload_order, _ = _generate_upload_order_from_graph(graph, node_to_id, edges)
27
+ return stash_lookup, upload_order
28
+
29
+
30
+ def _make_graph(
31
+ info_for_graph: InfoForGraph,
32
+ ) -> tuple[rx.PyDiGraph[Any, Any], dict[int, str], list[Edge]]:
33
+ """
34
+ This function takes information about the resources of an XML file and links between them.
35
+ From that it constructs a rustworkx directed graph.
36
+ Resources are represented as nodes and links as edges.
37
+
38
+ Args:
39
+ info_for_graph: Information required to construct the graph
40
+
41
+ Returns:
42
+ - The rustworkx graph.
43
+ - A dictionary that maps the rustworkx index number of the nodes to the original resource ID from the XML file.
44
+ - A list with all the edges in the graph.
45
+ """
46
+ graph: rx.PyDiGraph[Any, Any] = rx.PyDiGraph()
47
+ nodes = [(id_, None, None) for id_ in info_for_graph.all_resource_ids]
48
+ node_indices = list(graph.add_nodes_from(nodes))
49
+ id_to_node = dict(zip(info_for_graph.all_resource_ids, node_indices))
50
+ node_to_id = dict(zip(node_indices, info_for_graph.all_resource_ids))
51
+ edges = [Edge(id_to_node[x.source_id], id_to_node[x.target_id], x) for x in info_for_graph.link_values]
52
+ for xml in info_for_graph.standoff_links:
53
+ edges.extend([Edge(id_to_node[xml.source_id], id_to_node[x], xml) for x in xml.target_ids])
54
+ graph.add_edges_from([e.as_tuple() for e in edges])
55
+ return graph, node_to_id, edges
56
+
57
+
58
+ def _generate_upload_order_from_graph(
59
+ graph: rx.PyDiGraph[Any, Any],
60
+ node_to_id: dict[int, str],
61
+ edges: list[Edge],
62
+ ) -> tuple[dict[str, list[str]], list[str], int]:
63
+ """
64
+ Generate the order in which the resources should be uploaded to the DSP-API based on the dependencies.
65
+
66
+ Args:
67
+ graph: graph
68
+ node_to_id: mapping between indices of the graph nodes and original resource IDs from the XML file
69
+ edges: edges in the graph (contains info about source node, target node, and link info)
70
+
71
+ Returns:
72
+ - A dictionary which maps the resources that have stashes to the UUIDs of the stashed links.
73
+ - A list of resource IDs which gives the order in which the resources should be uploaded to DSP-API.
74
+ - The number of links in the stash.
75
+ """
76
+ upload_order: list[str] = []
77
+ stash_lookup: dict[str, list[str]] = {}
78
+ node_indices = set(node_to_id.keys())
79
+ leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, node_indices)
80
+ upload_order.extend(leaf_nodes)
81
+ stash_counter = 0
82
+ while remaining_node_indices:
83
+ cycle = list(rx.digraph_find_cycle(graph))
84
+ links_to_remove = _find_cheapest_outgoing_links(graph, cycle, edges)
85
+ stash_counter += len(links_to_remove)
86
+ _remove_edges_to_stash(
87
+ graph=graph,
88
+ edges_to_remove=links_to_remove,
89
+ all_edges=edges,
90
+ remaining_nodes=remaining_node_indices,
91
+ )
92
+ stash_lookup = _add_stash_to_lookup_dict(stash_lookup, [x.link_object for x in links_to_remove])
93
+ leaf_nodes, remaining_node_indices = _remove_leaf_nodes(graph, node_to_id, remaining_node_indices)
94
+ upload_order.extend(leaf_nodes)
95
+ return stash_lookup, upload_order, stash_counter
96
+
97
+
98
+ def _remove_leaf_nodes(
99
+ graph: rx.PyDiGraph[Any, Any],
100
+ node_to_id: dict[int, str],
101
+ node_indices: set[int],
102
+ ) -> tuple[list[str], set[int]]:
103
+ """
104
+ Leaf nodes are nodes that do not have any outgoing links.
105
+ This means that they have no dependencies and are ok to upload.
106
+ This function removes them from the graph.
107
+
108
+ Args:
109
+ graph: graph
110
+ node_to_id: mapping of the rustworkx index number of the nodes to the original resource ID from the XML file
111
+ node_indices: node indices that are in the graph
112
+
113
+ Returns:
114
+ - A list with the IDs of the removed leaf nodes.
115
+ - A set with the indices of the nodes that remain in the graph.
116
+ """
117
+ removed_leaf_nodes: list[str] = []
118
+ remaining_node_indices = set(node_indices)
119
+ while leaf_nodes := [x for x in remaining_node_indices if graph.out_degree(x) == 0]:
120
+ removed_leaf_nodes.extend(node_to_id[n] for n in leaf_nodes)
121
+ graph.remove_nodes_from(leaf_nodes)
122
+ remaining_node_indices = remaining_node_indices - set(leaf_nodes)
123
+ return removed_leaf_nodes, remaining_node_indices
124
+
125
+
126
+ def _find_cheapest_outgoing_links(
127
+ graph: rx.PyDiGraph[Any, Any],
128
+ cycle: list[tuple[int, int]],
129
+ edges: list[Edge],
130
+ ) -> list[Edge]:
131
+ """
132
+ This function searches for the nodes whose outgoing links should be removed in order to break the cycle.
133
+ It calculates which links between the resources create the smallest stash.
134
+
135
+ Args:
136
+ graph: graph
137
+ cycle: the list with (source, target) for each edge in the cycle
138
+ edges: edges in the graph (contains info about source node, target node, and link info)
139
+
140
+ Returns:
141
+ The edges (i.e. links) that should be stashed (containing all the edges connecting the two nodes)
142
+ """
143
+ costs: list[Cost] = []
144
+ for source, target in cycle:
145
+ edges_in = graph.in_edges(source)
146
+ node_gain = len(edges_in)
147
+ edges_out = graph.out_edges(source)
148
+ node_cost = sum(x[2].cost_links for x in edges_out)
149
+ node_value = node_cost / node_gain
150
+ costs.append(Cost(source, target, node_value))
151
+ cheapest_cost = sorted(costs, key=lambda x: x.node_value)[0]
152
+ return [x for x in edges if x.source == cheapest_cost.source and x.target == cheapest_cost.target]
153
+
154
+
155
+ def _remove_edges_to_stash(
156
+ graph: rx.PyDiGraph[Any, Any],
157
+ edges_to_remove: list[Edge],
158
+ all_edges: list[Edge],
159
+ remaining_nodes: set[int],
160
+ ) -> None:
161
+ """
162
+ This function removes the edges from the graph in order to break a cycle.
163
+
164
+ Args:
165
+ graph: graph
166
+ edges_to_remove: edges that should be removed
167
+ all_edges: all edges in the original graph
168
+ remaining_nodes: indices of the nodes in the graph
169
+ """
170
+ normal_edges_to_remove = [(x.source, x.target) for x in edges_to_remove]
171
+ # if only one (source, target) is removed, it removes only one edge, not all
172
+ # therefore we need as many entries in the list as there are edges between the source and the target
173
+
174
+ phantom_edges_to_remove = []
175
+ source, target = edges_to_remove[0].source, edges_to_remove[0].target
176
+ for link_to_stash in [x.link_object for x in edges_to_remove]:
177
+ if isinstance(link_to_stash, StandOffLink):
178
+ phantom_edges_to_remove.extend(
179
+ _find_phantom_xml_edges(source, target, all_edges, link_to_stash, remaining_nodes)
180
+ )
181
+
182
+ all_edges_to_remove = normal_edges_to_remove + phantom_edges_to_remove
183
+ graph.remove_edges_from(all_edges_to_remove)
184
+
185
+
186
+ def _find_phantom_xml_edges(
187
+ source_node_index: int,
188
+ target_node_index: int,
189
+ all_edges: list[Edge],
190
+ xml_link_to_stash: StandOffLink,
191
+ remaining_nodes: set[int],
192
+ ) -> list[tuple[int, int]]:
193
+ """
194
+ If an edge that will be removed represents an XML link,
195
+ the text value may contain further links to other resources.
196
+ If we stash the XMLLink, then in the real data all links of that text value are stashed.
197
+ So, these "phantom" links must be removed from the graph.
198
+ This function identifies the edges that must be removed from the rx graph.
199
+
200
+ Args:
201
+ source_node_index: rustworkx index of source node
202
+ target_node_index: rustworkx index of target node
203
+ all_edges: all edges in the original graph
204
+ xml_link_to_stash: XML link that will be stashed
205
+ remaining_nodes: indices of all nodes in the graph
206
+
207
+ Returns:
208
+ edges (rustworkx indices of nodes) that represent the links in the original XML text
209
+ """
210
+
211
+ def check(x: Edge) -> bool:
212
+ return all(
213
+ (
214
+ x.source == source_node_index,
215
+ x.target != target_node_index,
216
+ x.link_object == xml_link_to_stash,
217
+ x.target in remaining_nodes,
218
+ # the target could have been removed because it was a leaf node, so we must check if it is still there
219
+ )
220
+ )
221
+
222
+ return [(x.source, x.target) for x in all_edges if check(x)]
223
+
224
+
225
+ def _add_stash_to_lookup_dict(
226
+ stash_dict: dict[str, list[str]],
227
+ links_to_stash: list[StandOffLink | LinkValueLink],
228
+ ) -> dict[str, list[str]]:
229
+ stash_list = [stash_link.link_uuid for stash_link in links_to_stash]
230
+ # all stashed links have the same subject id, so we can just take the first one
231
+ subj_id = links_to_stash[0].source_id
232
+ if subj_id in stash_dict:
233
+ stash_dict[subj_id].extend(stash_list)
234
+ else:
235
+ stash_dict[subj_id] = stash_list
236
+ return stash_dict
@@ -0,0 +1,53 @@
1
+ from dsp_tools.commands.xmlupload.models.processed.res import ProcessedResource
2
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedLink
3
+ from dsp_tools.commands.xmlupload.models.processed.values import ProcessedRichtext
4
+ from dsp_tools.commands.xmlupload.stash.graph_models import InfoForGraph
5
+ from dsp_tools.commands.xmlupload.stash.graph_models import LinkValueLink
6
+ from dsp_tools.commands.xmlupload.stash.graph_models import StandOffLink
7
+ from dsp_tools.utils.data_formats.iri_util import is_resource_iri
8
+
9
+
10
+ def create_info_for_graph_from_processed_resources(resources: list[ProcessedResource]) -> InfoForGraph:
11
+ """Extracts information to create the graph to analyse the circular references."""
12
+ all_links = []
13
+ all_stand_off = []
14
+ all_resource_ids = []
15
+ for res in resources:
16
+ links, stand_off = _process_one_resource(res)
17
+ all_links.extend(links)
18
+ all_stand_off.extend(stand_off)
19
+ all_resource_ids.append(res.res_id)
20
+ return InfoForGraph(
21
+ all_resource_ids=all_resource_ids,
22
+ link_values=all_links,
23
+ standoff_links=all_stand_off,
24
+ )
25
+
26
+
27
+ def _process_one_resource(resource: ProcessedResource) -> tuple[list[LinkValueLink], list[StandOffLink]]:
28
+ link_values = []
29
+ stand_off = []
30
+ for val in resource.values:
31
+ if isinstance(val, ProcessedLink):
32
+ if is_resource_iri(val.value):
33
+ continue
34
+ link_values.append(
35
+ LinkValueLink(
36
+ source_id=resource.res_id,
37
+ target_id=val.value,
38
+ link_uuid=val.value_uuid,
39
+ )
40
+ )
41
+ elif isinstance(val, ProcessedRichtext):
42
+ if val.resource_references:
43
+ only_ids = {x for x in val.resource_references if not is_resource_iri(x)}
44
+ if not only_ids:
45
+ continue
46
+ stand_off.append(
47
+ StandOffLink(
48
+ source_id=resource.res_id,
49
+ target_ids=only_ids,
50
+ link_uuid=val.value_uuid,
51
+ )
52
+ )
53
+ return link_values, stand_off
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class InfoForGraph:
8
+ all_resource_ids: list[str]
9
+ link_values: list[LinkValueLink]
10
+ standoff_links: list[StandOffLink]
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class LinkValueLink:
15
+ """
16
+ This class represents a direct link (resptr) between a starting resource and a target resource.
17
+
18
+ Attributes:
19
+ source_id: ID of the resource from which the link originates
20
+ target_id: ID of the resource where the link points to
21
+ link_uuid: identifier of this link
22
+ """
23
+
24
+ source_id: str
25
+ target_id: str
26
+ link_uuid: str
27
+
28
+ @property
29
+ def cost_links(self) -> float:
30
+ """The cost of this outgoing is consistently 1"""
31
+ return 1
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class StandOffLink:
36
+ """
37
+ This class represents one or more links from a single starting resource to a set of target resources,
38
+ where all target resources are linked to from a single text value of the starting resource.
39
+
40
+ Attributes:
41
+ source_id: ID of the resource from which the link(s) originate
42
+ target_ids: IDs of the resources that are referenced in the text value
43
+ link_uuid: identifier of this link
44
+ """
45
+
46
+ source_id: str
47
+ target_ids: set[str]
48
+ link_uuid: str
49
+
50
+ @property
51
+ def cost_links(self) -> float:
52
+ """The cost of this outgoing link (1 / number of links in the XML text)"""
53
+ return 1 / len(self.target_ids)
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class Edge:
58
+ """
59
+ This class represents an edge in the rustworkx graph.
60
+
61
+ Attributes:
62
+ source: rustworkx index of the resource from which the link originates
63
+ target: rustworkx index of the resource where the link points to
64
+ link_object: the link that connects the source with the target
65
+ """
66
+
67
+ source: int
68
+ target: int
69
+ link_object: LinkValueLink | StandOffLink
70
+
71
+ def as_tuple(self) -> tuple[int, int, LinkValueLink | StandOffLink]:
72
+ """Returns a representation of this edge as a tuple of the source index, target index and link object"""
73
+ return self.source, self.target, self.link_object
74
+
75
+
76
+ @dataclass(frozen=True)
77
+ class Cost:
78
+ """
79
+ Attributes:
80
+ source: rustworkx index of the resource from which the link originates
81
+ target: rustworkx index of the resource where the link points to
82
+ node_value: cost-gain-ratio if this link is stashed
83
+ """
84
+
85
+ source: int
86
+ target: int
87
+ node_value: float