dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -0,0 +1,877 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import uuid
5
+ from collections.abc import Iterable
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import regex
11
+ from lxml import etree
12
+
13
+ from dsp_tools.error.xmllib_warnings import MessageInfo
14
+ from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_warning
15
+ from dsp_tools.error.xmllib_warnings_util import raise_xmllib_input_error
16
+ from dsp_tools.xmllib.internal.checkers import is_nonempty_value_internal
17
+ from dsp_tools.xmllib.internal.constants import KNOWN_XML_TAG_REGEXES
18
+ from dsp_tools.xmllib.internal.input_converters import unescape_reserved_xml_chars
19
+ from dsp_tools.xmllib.models.config_options import NewlineReplacement
20
+ from dsp_tools.xmllib.models.licenses.other import LicenseOther
21
+ from dsp_tools.xmllib.models.licenses.recommended import License
22
+ from dsp_tools.xmllib.models.licenses.recommended import LicenseRecommended
23
+ from dsp_tools.xmllib.value_converters import replace_newlines_with_tags
24
+
25
+
26
+ def create_footnote_string(
27
+ footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
28
+ ) -> str:
29
+ """
30
+ Takes the text for a footnote, and returns a string with the correct formatting.
31
+ You can use this if you want to add the footnote to a string.
32
+ Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.
33
+ The reserved characters `<`, `>` and `&` will be escaped temporarily,
34
+ but they will be correctly displayed in DSP-APP.
35
+
36
+ Attention:
37
+ - The text in the footnote may be richtext, i.e. contain XML tags.
38
+ - Not all tags supported in ordinary richtext are currently implemented.
39
+ - The allowed tags are:
40
+ - `<br>` (break line)
41
+ - `<strong>` (bold)
42
+ - `<em>` (italic)
43
+ - `<u>` (underline)
44
+ - `<strike>` (strike through)
45
+ - `<a href="URI">` (link to a URI)
46
+ - `<a class="salsah-link" href="Knora IRI">` (link to a resource)
47
+
48
+ Args:
49
+ footnote_text: Text for the footnote
50
+ newline_replacement_option: options to replace newlines
51
+
52
+ Raises:
53
+ XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered
54
+
55
+ Returns:
56
+ The footnote as a string
57
+
58
+ Examples:
59
+ ```python
60
+ result = xmllib.create_footnote_string("Text")
61
+ # result == '<footnote content="Text"/>'
62
+ ```
63
+
64
+ ```python
65
+ result = xmllib.create_footnote_string("Text\\nSecond Line")
66
+ # result == '<footnote content="Text&lt;br/&gt;Second Line"/>'
67
+ ```
68
+
69
+ ```python
70
+ result = xmllib.create_footnote_string("Already escaped &lt;&gt;")
71
+ # already escaped characters will not be escaped again
72
+ # result == '<footnote content="Already escaped &lt;&gt;"/>'
73
+ ```
74
+ """
75
+ text_tag = create_footnote_element(footnote_text, newline_replacement_option)
76
+ return etree.tostring(text_tag, encoding="unicode")
77
+
78
+
79
+ def create_footnote_element(
80
+ footnote_text: str, newline_replacement_option: NewlineReplacement = NewlineReplacement.LINEBREAK
81
+ ) -> etree._Element:
82
+ """
83
+ Takes the text for a footnote, and returns an `etree.Element`.
84
+ You can use this if you are working with `lxml`.
85
+ Currently, the newline replacement options are restricted to `LINEBREAK` and `NONE`.
86
+
87
+ Attention:
88
+ - The text in the footnote may be richtext, i.e. contain XML tags.
89
+ - Not all tags supported in ordinary richtext are currently implemented.
90
+ - The allowed tags are:
91
+ - `<br>` (break line)
92
+ - `<strong>` (bold)
93
+ - `<em>` (italic)
94
+ - `<u>` (underline)
95
+ - `<strike>` (strike through)
96
+ - `<a href="URI">` (link to a URI)
97
+ - `<a class="salsah-link" href="Knora IRI">` (link to a resource)
98
+
99
+ Args:
100
+ footnote_text: Text for the footnote
101
+ newline_replacement_option: options to replace newlines
102
+
103
+ Raises:
104
+ XmllibInputError: If the text is empty, or if a newline replacement which is not implemented is entered
105
+
106
+ Returns:
107
+ The footnote as a string
108
+ """
109
+ if newline_replacement_option not in {NewlineReplacement.LINEBREAK, NewlineReplacement.NONE}:
110
+ raise_xmllib_input_error(
111
+ MessageInfo("Currently the only supported newline replacement is linebreak (<br/>) or None.")
112
+ )
113
+ if not is_nonempty_value_internal(footnote_text):
114
+ raise_xmllib_input_error(MessageInfo("The input value is empty."))
115
+ footnote_text = replace_newlines_with_tags(str(footnote_text), newline_replacement_option)
116
+ unescaped_text = unescape_reserved_xml_chars(footnote_text)
117
+ return etree.Element("footnote", attrib={"content": unescaped_text})
118
+
119
+
120
+ def create_standoff_link_to_resource(resource_id: str, displayed_text: str) -> str:
121
+ """
122
+ Creates a standoff link to a resource.
123
+
124
+ Args:
125
+ resource_id: ID of the resource that is linked
126
+ displayed_text: text to display for the embedded link
127
+
128
+ Returns:
129
+ A standoff link in string form.
130
+
131
+ Raises:
132
+ XmllibInputError: if the resource ID or the displayed text are empty
133
+
134
+ Examples:
135
+ ```python
136
+ result = xmllib.create_standoff_link_to_resource("resource_id", "Text")
137
+ # result == '<a class="salsah-link" href="IRI:resource_id:IRI">Text</a>'
138
+ ```
139
+ """
140
+ if not all([is_nonempty_value_internal(resource_id), is_nonempty_value_internal(displayed_text)]):
141
+ msg_str = (
142
+ f"The entered resource ID and displayed text may not be empty. "
143
+ f"Your input: resource_id '{resource_id}' / displayed_text '{displayed_text}'"
144
+ )
145
+ raise_xmllib_input_error(MessageInfo(msg_str))
146
+ attribs = {"class": "salsah-link", "href": f"IRI:{resource_id}:IRI"}
147
+ ele = etree.Element("a", attrib=attribs)
148
+ ele.text = displayed_text
149
+ return etree.tostring(ele, encoding="unicode")
150
+
151
+
152
+ def create_standoff_link_to_uri(uri: str, displayed_text: str) -> str:
153
+ """
154
+ Creates a standoff link to a URI.
155
+
156
+ Args:
157
+ uri: the target URI that should be linked to
158
+ displayed_text: text to display for the embedded link
159
+
160
+ Returns:
161
+ A standoff link in string form.
162
+
163
+ Raises:
164
+ XmllibInputError: if the URI or the displayed text are empty
165
+
166
+ Examples:
167
+ ```python
168
+ result = xmllib.create_standoff_link_to_uri("https://www.dasch.swiss/", "This is DaSCH")
169
+ # result == '<a href="https://www.dasch.swiss/">This is DaSCH</a>'
170
+ ```
171
+ """
172
+ if not all([is_nonempty_value_internal(uri), is_nonempty_value_internal(displayed_text)]):
173
+ msg_str = (
174
+ f"The entered URI and displayed text may not be empty. "
175
+ f"Your input: uri '{uri}' / displayed_text '{displayed_text}'"
176
+ )
177
+ raise_xmllib_input_error(MessageInfo(msg_str))
178
+ attribs = {"href": uri}
179
+ ele = etree.Element("a", attrib=attribs)
180
+ ele.text = displayed_text
181
+ return etree.tostring(ele, encoding="unicode")
182
+
183
+
184
+ def _get_label_to_node_one_list(
185
+ list_section: list[dict[str, Any]], list_name: str, language_of_label: str
186
+ ) -> dict[str, str]:
187
+ json_subset = [x for x in list_section if x["name"] == list_name]
188
+ # json_subset is a list containing one item, namely the json object containing the entire json-list
189
+ res = {}
190
+ for label, name in _name_label_mapper_iterator(json_subset, language_of_label):
191
+ if name != list_name:
192
+ res[label] = name
193
+ res[label.strip().lower()] = name
194
+ return res
195
+
196
+
197
+ def _get_label_to_node_all_lists(
198
+ list_section: list[dict[str, Any]], language_of_label: str
199
+ ) -> dict[str, dict[str, str]]:
200
+ mapper = {}
201
+ for li in list_section:
202
+ mapper[li["name"]] = _get_label_to_node_one_list(list_section, li["name"], language_of_label)
203
+ return mapper
204
+
205
+
206
+ def _get_property_to_list_name_mapping(ontologies: list[dict[str, Any]], default_ontology: str) -> dict[str, str]:
207
+ prop_lookup = {}
208
+ for onto in ontologies:
209
+ prefix = onto["name"]
210
+ property_section = onto["properties"]
211
+ for prop in property_section:
212
+ if prop["gui_element"] == "List":
213
+ prefixed_prop = f"{prefix}:{prop['name']}"
214
+ prop_lookup[prefixed_prop] = prop["gui_attributes"]["hlist"]
215
+ default_props = {
216
+ k.replace(default_ontology, "", 1): v for k, v in prop_lookup.items() if k.startswith(f"{default_ontology}:")
217
+ }
218
+ prop_lookup = prop_lookup | default_props
219
+ return prop_lookup
220
+
221
+
222
+ @dataclass
223
+ class ListLookup:
224
+ _lookup: dict[str, dict[str, str]]
225
+ _prop_to_list_name: dict[str, str]
226
+ _label_language: str
227
+
228
+ @staticmethod
229
+ def create_new(project_json_path: str | Path, language_of_label: str, default_ontology: str) -> ListLookup:
230
+ """
231
+ Creates a list lookup based on list labels in a specified language and returning list node names.
232
+ Works for all lists in a project.json
233
+
234
+ Args:
235
+ project_json_path: path to a JSON project file (a.k.a. ontology)
236
+ language_of_label: label language used for the list
237
+ default_ontology: ontology prefix which is defined as default in the XML file
238
+
239
+ Returns:
240
+ `ListLookup` for a project
241
+
242
+ Examples:
243
+ ```python
244
+ list_lookup = xmllib.ListLookup.create_new(
245
+ project_json_path="project.json",
246
+ language_of_label="en",
247
+ default_ontology="default-onto",
248
+ )
249
+ ```
250
+ """
251
+ with open(project_json_path, encoding="utf-8") as f:
252
+ json_file = json.load(f)
253
+ label_to_list_node_lookup = _get_label_to_node_all_lists(json_file["project"]["lists"], language_of_label)
254
+ prop_to_list_mapper = _get_property_to_list_name_mapping(json_file["project"]["ontologies"], default_ontology)
255
+ return ListLookup(
256
+ _lookup=label_to_list_node_lookup,
257
+ _prop_to_list_name=prop_to_list_mapper,
258
+ _label_language=language_of_label,
259
+ )
260
+
261
+ def get_node_via_list_name(self, list_name: str, node_label: str) -> str:
262
+ """
263
+ Returns the list node name based on a label.
264
+ The language of the label was specified when creating the `ListLookup`.
265
+
266
+ Args:
267
+ list_name: name of the list
268
+ node_label: label of the node
269
+
270
+ Returns:
271
+ node name
272
+
273
+ Examples:
274
+ ```python
275
+ node_name = list_lookup.get_node_via_list_name(
276
+ list_name="list1",
277
+ node_label="Label 1" # or: "label 1" (capitalisation is not relevant)
278
+ )
279
+ # node_name == "node1"
280
+ ```
281
+ """
282
+ if not (list_lookup := self._lookup.get(list_name)):
283
+ emit_xmllib_input_warning(
284
+ MessageInfo(f"The entered list name '{list_name}' was not found. An empty string is returned.")
285
+ )
286
+ return ""
287
+ if not (found_node := list_lookup.get(node_label)):
288
+ emit_xmllib_input_warning(
289
+ MessageInfo(
290
+ f"'{node_label}' was not recognised as label of the list '{list_name}'. "
291
+ f"This ListLookup is configured for '{self._label_language}' labels. An empty string is returned."
292
+ )
293
+ )
294
+ return ""
295
+ return found_node
296
+
297
+ def get_list_name_and_node_via_property(self, prop_name: str, node_label: str) -> tuple[str, str]:
298
+ """
299
+ Returns the list name and the node name based on a property that is used with the list and the label of a node.
300
+ The language of the label was specified when creating the `ListLookup`.
301
+ The list name needs to be referenced in the XML file.
302
+
303
+ Args:
304
+ prop_name: name of the list
305
+ node_label: label of the node
306
+
307
+ Returns:
308
+ list name and node name
309
+
310
+ Examples:
311
+ ```python
312
+ list_name, node_name = list_lookup.get_list_name_and_node_via_property(
313
+ prop_name=":hasList", # or: "default-onto:hasList"
314
+ node_label="label 1"
315
+ )
316
+ # list_name == "list1"
317
+ # node_name == "node1"
318
+ ```
319
+ """
320
+ if not (list_name := self.get_list_name_via_property(prop_name)):
321
+ return "", ""
322
+ return list_name, self.get_node_via_list_name(list_name, node_label)
323
+
324
+ def get_list_name_via_property(self, prop_name: str) -> str:
325
+ """
326
+ Returns the list name as specified in the ontology for a property.
327
+ The list name needs to be referenced in the XML file.
328
+
329
+ Args:
330
+ prop_name: name of the property
331
+
332
+ Returns:
333
+ Name of the list
334
+
335
+ Examples:
336
+ ```python
337
+ list_name = list_lookup.get_list_name_via_property(
338
+ prop_name=":hasList", # or: "default-onto:hasList"
339
+ )
340
+ # list_name == "list1"
341
+ ```
342
+ """
343
+ if not (list_name := self._prop_to_list_name.get(prop_name)):
344
+ emit_xmllib_input_warning(
345
+ MessageInfo(f"The entered property '{prop_name}' was not found. An empty string is returned.")
346
+ )
347
+ return ""
348
+ return list_name
349
+
350
+
351
+ def get_list_nodes_from_string_via_list_name(
352
+ string_with_list_labels: str, label_separator: str, list_name: str, list_lookup: ListLookup
353
+ ) -> list[str]:
354
+ """
355
+ Resolves list labels to node names.
356
+
357
+ Args:
358
+ string_with_list_labels: the string containing list labels
359
+ label_separator: separator in the string that contains the labels
360
+ list_name: name of the list
361
+ list_lookup: `ListLookup` of the project
362
+
363
+ Returns:
364
+ A list of node names. If the string is empty, it returns an empty list.
365
+
366
+ Examples:
367
+ ```python
368
+ string_with_list_labels = "Label 1; Label 2"
369
+ nodes = xmllib.get_list_nodes_from_string_via_list_name(
370
+ string_with_list_labels=string_with_list_labels,
371
+ label_separator=";",
372
+ list_name="list1",
373
+ list_lookup=list_lookup,
374
+ )
375
+ # nodes == ["node1", "node2"]
376
+ ```
377
+
378
+ ```python
379
+ string_with_list_labels = ""
380
+ nodes = xmllib.get_list_nodes_from_string_via_list_name(
381
+ string_with_list_labels=string_with_list_labels,
382
+ label_separator=";",
383
+ list_name="list1",
384
+ list_lookup=list_lookup,
385
+ )
386
+ # nodes == []
387
+ ```
388
+
389
+ ```python
390
+ string_with_list_labels = pd.NA
391
+ nodes = xmllib.get_list_nodes_from_string_via_list_name(
392
+ string_with_list_labels=string_with_list_labels,
393
+ label_separator=";",
394
+ list_name="list1",
395
+ list_lookup=list_lookup,
396
+ )
397
+ # nodes == []
398
+ ```
399
+ """
400
+ if not is_nonempty_value_internal(string_with_list_labels):
401
+ return []
402
+ labels_list = create_list_from_input(string_with_list_labels, label_separator)
403
+ nodes_list = [list_lookup.get_node_via_list_name(list_name, label) for label in labels_list]
404
+ return nodes_list
405
+
406
+
407
+ def get_list_nodes_from_string_via_property(
408
+ string_with_list_labels: str, label_separator: str, property_name: str, list_lookup: ListLookup
409
+ ) -> tuple[str, list[str]]:
410
+ """
411
+ Takes a string containing list labels, the separator by which they can be split,
412
+ a property name and the list lookup.
413
+ Resolves the labels and returns the list name to be referenced in the XML file and a list of node names.
414
+ If the string is empty, it returns an empty list.
415
+
416
+ Args:
417
+ string_with_list_labels: the string containing the labels
418
+ label_separator: separator in the string that contains the labels
419
+ property_name: name of the property
420
+ list_lookup: `ListLookup` of the project
421
+
422
+ Returns:
423
+ The name of the list and a list of node names.
424
+
425
+ Examples:
426
+ ```python
427
+ string_with_list_labels = "Label 1; Label 2"
428
+ list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
429
+ string_with_list_labels=string_with_list_labels,
430
+ label_separator=";",
431
+ property_name=":hasList",
432
+ list_lookup=list_lookup,
433
+ )
434
+ # list_name == "list1"
435
+ # nodes == ["node1", "node2"]
436
+ ```
437
+
438
+ ```python
439
+ string_with_list_labels = ""
440
+ list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
441
+ string_with_list_labels=string_with_list_labels,
442
+ label_separator=";",
443
+ property_name=":hasList",
444
+ list_lookup=list_lookup,
445
+ )
446
+ # list_name == ""
447
+ # nodes == []
448
+ ```
449
+
450
+ ```python
451
+ string_with_list_labels = pd.NA
452
+ list_name, nodes = xmllib.get_list_nodes_from_string_via_property(
453
+ string_with_list_labels=string_with_list_labels,
454
+ label_separator=";",
455
+ property_name=":hasList",
456
+ list_lookup=list_lookup,
457
+ )
458
+ # list_name == ""
459
+ # nodes == []
460
+ ```
461
+ """
462
+ if not is_nonempty_value_internal(string_with_list_labels):
463
+ return "", []
464
+ labels_list = create_list_from_input(string_with_list_labels, label_separator)
465
+ list_name = ""
466
+ nodes = []
467
+ for lbl in labels_list:
468
+ list_name, node_name = list_lookup.get_list_name_and_node_via_property(property_name, lbl)
469
+ nodes.append(node_name)
470
+ return list_name, nodes
471
+
472
+
473
+ def _name_label_mapper_iterator(
474
+ json_subset: list[dict[str, Any]],
475
+ language_of_label: str,
476
+ ) -> Iterable[tuple[str, str]]:
477
+ """
478
+ Go through list nodes of a JSON project and yield (label, name) pairs.
479
+
480
+ Args:
481
+ json_subset: list of DSP lists (a DSP list being a dictionary with the keys "name", "labels" and "nodes")
482
+ language_of_label: which language of the label to choose
483
+
484
+ Yields:
485
+ (label, name) pairs
486
+ """
487
+ for node in json_subset:
488
+ # node is the json object containing the entire json-list
489
+ if "nodes" in node:
490
+ # "nodes" is the json sub-object containing the entries of the json-list
491
+ yield from _name_label_mapper_iterator(node["nodes"], language_of_label)
492
+ # each yielded value is a (label, name) pair of a single list entry
493
+ if "name" in node:
494
+ # the actual values of the name and the label
495
+ if found := node["labels"].get(language_of_label):
496
+ yield found, node["name"]
497
+ else:
498
+ msg = (
499
+ f"The language of the labels is '{language_of_label}', "
500
+ f"the list node with the name '{node['name']}' does not have a label in this language."
501
+ )
502
+ emit_xmllib_input_warning(MessageInfo(msg))
503
+
504
+
505
+ def escape_reserved_xml_characters(text: str) -> str:
506
+ """
507
+ From richtext strings (encoding="xml"), escape the reserved characters `<`, `>` and `&`,
508
+ but only if they are not part of a standard standoff tag or escape sequence.
509
+
510
+ [See the documentation for the standard standoff tags allowed by DSP-API,
511
+ which will not be escaped.](https://docs.dasch.swiss/latest/DSP-API/03-endpoints/api-v2/text/standard-standoff/)
512
+
513
+ Args:
514
+ text: the richtext string to be escaped
515
+
516
+ Returns:
517
+ The escaped richtext string
518
+
519
+ Examples:
520
+ ```python
521
+ result = xmllib.escape_reserved_xml_characters("Text <unknownTag>")
522
+ # result == "Text &lt;unknownTag&gt;"
523
+ ```
524
+
525
+ ```python
526
+ result = xmllib.escape_reserved_xml_characters("Text <br/> text after")
527
+ # result == "Text <br/> text after"
528
+ ```
529
+ """
530
+ allowed_tags_regex = "|".join(KNOWN_XML_TAG_REGEXES)
531
+ lookahead = rf"(?!/?({allowed_tags_regex})/?>)"
532
+ illegal_lt = rf"<{lookahead}"
533
+ lookbehind = rf"(?<!</?({allowed_tags_regex})/?)"
534
+ illegal_gt = rf"{lookbehind}>"
535
+ illegal_amp = r"&(?![#a-zA-Z0-9]+;)"
536
+ text = regex.sub(illegal_lt, "&lt;", text)
537
+ text = regex.sub(illegal_gt, "&gt;", text)
538
+ text = regex.sub(illegal_amp, "&amp;", text)
539
+ return text
540
+
541
+
542
+ def make_xsd_compatible_id(input_value: str | float | int) -> str:
543
+ """
544
+ An xsd:ID may not contain all types of special characters,
545
+ and it must start with a letter or underscore.
546
+ Replace illegal characters with `_`, and prepend a leading `_` if necessary.
547
+
548
+ The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
549
+ `_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.
550
+
551
+ Args:
552
+ input_value: input value
553
+
554
+ Raises:
555
+ XmllibInputError: if the input cannot be transformed to an xsd:ID
556
+
557
+ Returns:
558
+ An xsd ID compatible string based on the input value
559
+
560
+ Examples:
561
+ ```python
562
+ result = xmllib.make_xsd_compatible_id("0_Universität_Basel")
563
+ # result == "_0_Universit_t_Basel"
564
+ ```
565
+ """
566
+ if not is_nonempty_value_internal(input_value):
567
+ raise_xmllib_input_error(MessageInfo(f"The input '{input_value}' cannot be transformed to an xsd:ID"))
568
+ # if the start of string is neither letter nor underscore, add an underscore
569
+ res = regex.sub(r"^(?=[^A-Za-z_])", "_", str(input_value))
570
+ # replace all illegal characters by underscore
571
+ res = regex.sub(r"[^\w_\-.]", "_", res, flags=regex.ASCII)
572
+ return res
573
+
574
+
575
+ def make_xsd_compatible_id_with_uuid(input_value: str | float | int) -> str:
576
+ """
577
+ An xsd:ID may not contain all types of special characters,
578
+ and it must start with a letter or underscore.
579
+ Replace illegal characters with `_`, and prepend a leading `_` if necessary.
580
+ Additionally, add a UUID at the end.
581
+ The UUID will be different each time the function is called.
582
+
583
+ The string must contain at least one Unicode letter (matching the regex ``\\p{L}``),
584
+ `_`, `!`, `?`, or number, but must not be `None`, `<NA>`, `N/A`, or `-`.
585
+
586
+ Args:
587
+ input_value: input value
588
+
589
+ Raises:
590
+ XmllibInputError: if the input cannot be transformed to an xsd:ID
591
+
592
+ Returns:
593
+ an xsd ID based on the input value, with a UUID attached.
594
+
595
+ Examples:
596
+ ```python
597
+ result = xmllib.make_xsd_compatible_id_with_uuid("Universität_Basel")
598
+ # result == "Universit_t_Basel_88f5cd0b-f333-4174-9030-65900b17773d"
599
+ ```
600
+ """
601
+ res = make_xsd_compatible_id(input_value)
602
+ _uuid = uuid.uuid4()
603
+ res = f"{res}_{_uuid}"
604
+ return res
605
+
606
+
607
+ def create_list_from_string(string: str, separator: str) -> list[str]: # noqa:ARG001
608
+ """
609
+ Attention:
610
+ This function is deprecated, use the new function called 'create_list_from_input' instead.
611
+ """
612
+ raise_xmllib_input_error(
613
+ MessageInfo(
614
+ "The function 'create_list_from_string' is deprecated. "
615
+ "Use the new function called 'create_list_from_input' instead."
616
+ )
617
+ )
618
+
619
+
620
+ def create_list_from_input(input_value: Any, separator: str) -> list[str]:
621
+ """
622
+ Create a list of strings from the input value, using the provided separator.
623
+ If the input is empty it returns an empty list.
624
+
625
+ Args:
626
+ input_value: input value to check and convert
627
+ separator: The character that separates the different values in the string.
628
+ For example, a comma or newline.
629
+
630
+ Returns:
631
+ The list that results from splitting the input string.
632
+
633
+ Examples:
634
+ ```python
635
+ result = xmllib.create_list_from_input(" one, two, three", ",")
636
+ # result == ["one", "two", "three"]
637
+ ```
638
+
639
+ ```python
640
+ result = xmllib.create_list_from_input(1, "-")
641
+ # result == ["1"]
642
+ ```
643
+
644
+ ```python
645
+ result = xmllib.create_list_from_input(" \\n ", "\\n")
646
+ # result == []
647
+ ```
648
+
649
+ ```python
650
+ result = xmllib.create_list_from_input(None, ",")
651
+ # result == []
652
+ ```
653
+ """
654
+ if not is_nonempty_value_internal(input_value):
655
+ return []
656
+ if isinstance(input_value, str):
657
+ return [strpd for x in input_value.split(separator) if (strpd := x.strip())]
658
+ return [str(input_value)]
659
+
660
+
661
+ def create_non_empty_list_from_string(
662
+ string: str, separator: str, resource_id: str | None = None, prop_name: str | None = None
663
+ ) -> list[str]:
664
+ """
665
+ Creates a list from a string.
666
+ Trailing and leading whitespaces are removed from the list items.
667
+
668
+ If the resulting list is empty it will raise an `XmllibInputError`.
669
+
670
+ Args:
671
+ string: input string
672
+ separator: The character that separates the different values in the string.
673
+ For example, a comma or newline.
674
+ resource_id: If the ID of the resource is provided, a better error message can be composed
675
+ prop_name: If the name of the property is provided, a better error message can be composed
676
+
677
+ Returns:
678
+ The list that results from splitting the input string.
679
+
680
+ Raises:
681
+ XmllibInputError: If the resulting list is empty.
682
+
683
+ Examples:
684
+ ```python
685
+ result = xmllib.create_non_empty_list_from_string("One\\nTwo ", "\\n")
686
+ # result == ["One", "Two"]
687
+ ```
688
+
689
+ ```python
690
+ result = xmllib.create_non_empty_list_from_string(" \\n/ ", "/")
691
+ # raises XmllibInputError
692
+ ```
693
+ """
694
+ lst = create_list_from_input(string, separator)
695
+ if len(lst) == 0:
696
+ msg_info = MessageInfo(
697
+ message="The input for this function must result in a non-empty list. Your input results in an empty list.",
698
+ resource_id=resource_id,
699
+ prop_name=prop_name,
700
+ )
701
+ raise_xmllib_input_error(msg_info)
702
+ return lst
703
+
704
+
705
+ def clean_whitespaces_from_string(string: str) -> str:
706
+ """
707
+ Remove redundant whitespaces (space, `\\n`, `\\t`, etc.) and replace them with a single space.
708
+
709
+ If the resulting string is empty, a warning will be printed.
710
+
711
+ Args:
712
+ string: input string
713
+
714
+ Returns:
715
+ The cleaned string.
716
+
717
+ Examples:
718
+ ```python
719
+ result = xmllib.clean_whitespaces_from_string("\\t Text\\nafter newline")
720
+ # result == "Text after newline"
721
+ ```
722
+
723
+ ```python
724
+ result = xmllib.clean_whitespaces_from_string(" \\n\\t ")
725
+ # result == ""
726
+ # warns that the string is now empty
727
+ ```
728
+ """
729
+ cleaned = regex.sub(r"\s+", " ", string).strip()
730
+ if len(cleaned) == 0:
731
+ emit_xmllib_input_warning(
732
+ MessageInfo(
733
+ "The entered string is empty after all redundant whitespaces were removed. An empty string is returned."
734
+ )
735
+ )
736
+ return cleaned
737
+
738
+
739
+ def find_license_in_string(string: str) -> License | None: # noqa: PLR0911 (too many return statements)
740
+ """
741
+ Checks if a string contains a license, and returns it.
742
+ Returns None if no license was found.
743
+ The case (upper case/lower case) is ignored.
744
+
745
+ Look out: Your string should contain no more than 1 license.
746
+ If it contains more, there is no guarantee which one will be returned.
747
+
748
+ See [recommended licenses](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/licenses/recommended/)
749
+ for details.
750
+
751
+ Args:
752
+ string: string to check
753
+
754
+ Returns:
755
+ `License` object or `None`
756
+
757
+ Examples:
758
+ ```python
759
+ result = xmllib.find_license_in_string("CC BY")
760
+ # result == LicenseRecommended.CC.BY
761
+ ```
762
+
763
+ ```python
764
+ result = xmllib.find_license_in_string("Creative Commons Developing Nations 2.0 Generic Deed")
765
+ # result == None
766
+ ```
767
+
768
+ Currently supported license formats:
769
+ - "AI" -> LicenseRecommended.DSP.AI_GENERATED
770
+ - "KI" -> LicenseRecommended.DSP.AI_GENERATED
771
+ - "IA" -> LicenseRecommended.DSP.AI_GENERATED
772
+ - "public domain" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
773
+ - "gemeinfrei" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
774
+ - "frei von Urheberrechten" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
775
+ - "urheberrechtsbefreit" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
776
+ - "libre de droits" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
777
+ - "domaine public" -> LicenseRecommended.DSP.PUBLIC_DOMAIN
778
+ - "unknown" -> LicenseRecommended.DSP.UNKNOWN
779
+ - "unbekannt" -> LicenseRecommended.DSP.UNKNOWN
780
+ - "inconnu" -> LicenseRecommended.DSP.UNKNOWN
781
+ - "CC BY" -> LicenseRecommended.CC.BY
782
+ - "Creative Commons BY 4.0" -> LicenseRecommended.CC.BY
783
+ - "CC 0 1.0" -> LicenseOther.Public.CC_0_1_0
784
+ - "CC PDM 1.0" -> LicenseOther.Public.CC_PDM_1_0
785
+ - "BORIS Standard License" -> LicenseOther.Various.BORIS_STANDARD
786
+ - "LICENCE OUVERTE 2.0" -> LicenseOther.Various.FRANCE_OUVERTE
787
+ """
788
+ if lic := _get_already_parsed_license(string):
789
+ return lic
790
+
791
+ sep = r"[-_\p{Zs}]+" # Zs = unicode category for space separator characters
792
+
793
+ if regex.search(rf"\b(Creative{sep}Commons|CC){sep}0({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
794
+ return LicenseOther.Public.CC_0_1_0
795
+
796
+ if regex.search(rf"\b(Creative{sep}Commons|CC){sep}PDM({sep}1\.0)?\b", string, flags=regex.IGNORECASE):
797
+ return LicenseOther.Public.CC_PDM_1_0
798
+
799
+ if match := regex.search(
800
+ rf"\b(CC|Creative{sep}Commons)({sep}(BY|NC|ND|SA))*({sep}[\d\.]+)?\b", string, flags=regex.IGNORECASE
801
+ ):
802
+ return _find_cc_license(match.group(0))
803
+
804
+ if regex.search(r"\b(AI|IA|KI)\b", string, flags=regex.IGNORECASE):
805
+ return LicenseRecommended.DSP.AI_GENERATED
806
+
807
+ rgx_public_domain = (
808
+ rf"\b(public{sep}domain|gemeinfrei|frei{sep}von{sep}Urheberrechten|urheberrechtsbefreit|"
809
+ rf"libre{sep}de{sep}droits|domaine{sep}public)\b"
810
+ )
811
+ if regex.search(rgx_public_domain, string, flags=regex.IGNORECASE):
812
+ return LicenseRecommended.DSP.PUBLIC_DOMAIN
813
+
814
+ if regex.search(r"\b(unknown|unbekannt|inconnu)\b", string, flags=regex.IGNORECASE):
815
+ return LicenseRecommended.DSP.UNKNOWN
816
+
817
+ if regex.search(
818
+ rf"\b(BORIS|Bern{sep}Open{sep}Repository{sep}and{sep}Information{sep}System){sep}Standard{sep}License\b",
819
+ string,
820
+ flags=regex.IGNORECASE,
821
+ ):
822
+ return LicenseOther.Various.BORIS_STANDARD
823
+
824
+ if regex.search(
825
+ rf"\b(France{sep})?Licence{sep}ouverte({sep}2\.0)?\b",
826
+ string,
827
+ flags=regex.IGNORECASE,
828
+ ):
829
+ return LicenseOther.Various.FRANCE_OUVERTE
830
+
831
+ return None
832
+
833
+
834
+ def _find_cc_license(string: str) -> License | None: # noqa: PLR0911 (too many return statements)
835
+ string = string.lower()
836
+ if "by" not in string:
837
+ return None
838
+ if any((string.count("by") > 1, string.count("nd") > 1, string.count("sa") > 1, string.count("nc") > 1)):
839
+ return None
840
+ has_nc = "nc" in string
841
+ has_nd = "nd" in string
842
+ has_sa = "sa" in string
843
+ if not any((has_nc, has_nd, has_sa)):
844
+ return LicenseRecommended.CC.BY
845
+ if not has_nc and has_nd and not has_sa:
846
+ return LicenseRecommended.CC.BY_ND
847
+ if not has_nc and not has_nd and has_sa:
848
+ return LicenseRecommended.CC.BY_SA
849
+ if has_nc and not has_nd and not has_sa:
850
+ return LicenseRecommended.CC.BY_NC
851
+ if has_nc and has_nd and not has_sa:
852
+ return LicenseRecommended.CC.BY_NC_ND
853
+ if has_nc and not has_nd and has_sa:
854
+ return LicenseRecommended.CC.BY_NC_SA
855
+ return None
856
+
857
+
858
+ def _get_already_parsed_license(string: str) -> License | None:
859
+ already_parsed_dict: dict[str, License] = {
860
+ r"http://rdfh\.ch/licenses/cc-by-4\.0": LicenseRecommended.CC.BY,
861
+ r"http://rdfh\.ch/licenses/cc-by-sa-4\.0": LicenseRecommended.CC.BY_SA,
862
+ r"http://rdfh\.ch/licenses/cc-by-nc-4\.0": LicenseRecommended.CC.BY_NC,
863
+ r"http://rdfh\.ch/licenses/cc-by-nc-sa-4\.0": LicenseRecommended.CC.BY_NC_SA,
864
+ r"http://rdfh\.ch/licenses/cc-by-nd-4\.0": LicenseRecommended.CC.BY_ND,
865
+ r"http://rdfh\.ch/licenses/cc-by-nc-nd-4\.0": LicenseRecommended.CC.BY_NC_ND,
866
+ r"http://rdfh\.ch/licenses/ai-generated": LicenseRecommended.DSP.AI_GENERATED,
867
+ r"http://rdfh\.ch/licenses/unknown": LicenseRecommended.DSP.UNKNOWN,
868
+ r"http://rdfh\.ch/licenses/public-domain": LicenseRecommended.DSP.PUBLIC_DOMAIN,
869
+ r"http://rdfh\.ch/licenses/cc-0-1.0": LicenseOther.Public.CC_0_1_0,
870
+ r"http://rdfh\.ch/licenses/cc-pdm-1.0": LicenseOther.Public.CC_PDM_1_0,
871
+ r"http://rdfh\.ch/licenses/boris": LicenseOther.Various.BORIS_STANDARD,
872
+ r"http://rdfh\.ch/licenses/open-licence-2.0": LicenseOther.Various.FRANCE_OUVERTE,
873
+ }
874
+ for rgx, lic in already_parsed_dict.items():
875
+ if regex.search(rgx, string):
876
+ return lic
877
+ return None