dsp-tools 0.9.13__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (348) hide show
  1. dsp_tools/__init__.py +5 -0
  2. dsp_tools/cli/args.py +47 -0
  3. dsp_tools/cli/call_action.py +85 -0
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +479 -0
  7. dsp_tools/cli/entry_point.py +322 -0
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/clients/connection.py +35 -0
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +321 -0
  58. dsp_tools/commands/excel2json/lists/__init__.py +0 -0
  59. dsp_tools/commands/excel2json/lists/compliance_checks.py +292 -0
  60. dsp_tools/commands/excel2json/lists/make_lists.py +247 -0
  61. dsp_tools/commands/excel2json/lists/models/__init__.py +0 -0
  62. dsp_tools/commands/excel2json/lists/models/deserialise.py +30 -0
  63. dsp_tools/commands/excel2json/lists/models/input_error.py +216 -0
  64. dsp_tools/commands/excel2json/lists/models/serialise.py +57 -0
  65. dsp_tools/commands/excel2json/lists/utils.py +81 -0
  66. dsp_tools/commands/excel2json/models/__init__.py +0 -0
  67. dsp_tools/commands/excel2json/models/input_error.py +416 -0
  68. dsp_tools/commands/excel2json/models/json_header.py +175 -0
  69. dsp_tools/commands/excel2json/models/list_node_name.py +16 -0
  70. dsp_tools/commands/excel2json/models/ontology.py +76 -0
  71. dsp_tools/commands/excel2json/old_lists.py +328 -0
  72. dsp_tools/commands/excel2json/project.py +280 -0
  73. dsp_tools/commands/excel2json/properties.py +370 -0
  74. dsp_tools/commands/excel2json/resources.py +336 -0
  75. dsp_tools/commands/excel2json/utils.py +352 -0
  76. dsp_tools/commands/excel2xml/__init__.py +7 -0
  77. dsp_tools/commands/excel2xml/excel2xml_cli.py +523 -0
  78. dsp_tools/commands/excel2xml/excel2xml_lib.py +1953 -0
  79. dsp_tools/commands/excel2xml/propertyelement.py +47 -0
  80. dsp_tools/commands/get/__init__.py +0 -0
  81. dsp_tools/commands/get/get.py +166 -0
  82. dsp_tools/commands/get/get_permissions.py +257 -0
  83. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  84. dsp_tools/commands/get/legacy_models/__init__.py +0 -0
  85. dsp_tools/commands/get/legacy_models/context.py +318 -0
  86. dsp_tools/commands/get/legacy_models/group.py +241 -0
  87. dsp_tools/commands/get/legacy_models/helpers.py +47 -0
  88. dsp_tools/commands/get/legacy_models/listnode.py +390 -0
  89. dsp_tools/commands/get/legacy_models/model.py +12 -0
  90. dsp_tools/commands/get/legacy_models/ontology.py +324 -0
  91. dsp_tools/commands/get/legacy_models/project.py +366 -0
  92. dsp_tools/commands/get/legacy_models/propertyclass.py +417 -0
  93. dsp_tools/commands/get/legacy_models/resourceclass.py +676 -0
  94. dsp_tools/commands/get/legacy_models/user.py +438 -0
  95. dsp_tools/commands/get/models/__init__.py +0 -0
  96. dsp_tools/commands/get/models/permissions_models.py +10 -0
  97. dsp_tools/commands/id2iri.py +258 -0
  98. dsp_tools/commands/ingest_xmlupload/__init__.py +0 -0
  99. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +178 -0
  100. dsp_tools/commands/ingest_xmlupload/create_resources/__init__.py +0 -0
  101. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +69 -0
  102. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +166 -0
  103. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +121 -0
  104. dsp_tools/commands/ingest_xmlupload/ingest_files/__init__.py +0 -0
  105. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +64 -0
  106. dsp_tools/commands/ingest_xmlupload/upload_files/__init__.py +0 -0
  107. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +20 -0
  108. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +57 -0
  109. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +66 -0
  110. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +67 -0
  111. dsp_tools/commands/resume_xmlupload/__init__.py +0 -0
  112. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +96 -0
  113. dsp_tools/commands/start_stack.py +428 -0
  114. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  115. dsp_tools/commands/update_legal/__init__.py +0 -0
  116. dsp_tools/commands/update_legal/core.py +182 -0
  117. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  118. dsp_tools/commands/update_legal/models.py +87 -0
  119. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  120. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  121. dsp_tools/commands/validate_data/__init__.py +0 -0
  122. dsp_tools/commands/validate_data/constants.py +59 -0
  123. dsp_tools/commands/validate_data/mappers.py +143 -0
  124. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  125. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  126. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  127. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  128. dsp_tools/commands/validate_data/models/validation.py +106 -0
  129. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  130. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  131. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  132. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  133. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  134. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  135. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  136. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  137. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  138. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  139. dsp_tools/commands/validate_data/sparql/cardinality_shacl.py +209 -0
  140. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  141. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  142. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  143. dsp_tools/commands/validate_data/utils.py +59 -0
  144. dsp_tools/commands/validate_data/validate_data.py +283 -0
  145. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  146. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  147. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  148. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  149. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  150. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  151. dsp_tools/commands/xmlupload/__init__.py +0 -0
  152. dsp_tools/commands/xmlupload/iri_resolver.py +21 -0
  153. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  154. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  155. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  156. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  157. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  158. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  159. dsp_tools/commands/xmlupload/models/__init__.py +0 -0
  160. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  161. dsp_tools/commands/xmlupload/models/formatted_text_value.py +10 -0
  162. dsp_tools/commands/xmlupload/models/ingest.py +143 -0
  163. dsp_tools/commands/xmlupload/models/input_problems.py +58 -0
  164. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  165. dsp_tools/commands/xmlupload/models/permission.py +45 -0
  166. dsp_tools/commands/xmlupload/models/permissions_parsed.py +93 -0
  167. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  168. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  169. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  170. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  171. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  172. dsp_tools/commands/xmlupload/models/upload_clients.py +14 -0
  173. dsp_tools/commands/xmlupload/models/upload_state.py +20 -0
  174. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  175. dsp_tools/commands/xmlupload/prepare_xml_input/ark2iri.py +55 -0
  176. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  177. dsp_tools/commands/xmlupload/prepare_xml_input/iiif_uri_validator.py +50 -0
  178. dsp_tools/commands/xmlupload/prepare_xml_input/list_client.py +120 -0
  179. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  180. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  181. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  182. dsp_tools/commands/xmlupload/resource_create_client.py +25 -0
  183. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  184. dsp_tools/commands/xmlupload/stash/__init__.py +0 -0
  185. dsp_tools/commands/xmlupload/stash/analyse_circular_reference_graph.py +236 -0
  186. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  187. dsp_tools/commands/xmlupload/stash/graph_models.py +87 -0
  188. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +68 -0
  189. dsp_tools/commands/xmlupload/stash/stash_models.py +109 -0
  190. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +106 -0
  191. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +196 -0
  192. dsp_tools/commands/xmlupload/upload_config.py +76 -0
  193. dsp_tools/commands/xmlupload/write_diagnostic_info.py +27 -0
  194. dsp_tools/commands/xmlupload/xmlupload.py +516 -0
  195. dsp_tools/config/__init__.py +0 -0
  196. dsp_tools/config/logger_config.py +69 -0
  197. dsp_tools/config/warnings_config.py +32 -0
  198. dsp_tools/error/__init__.py +0 -0
  199. dsp_tools/error/custom_warnings.py +39 -0
  200. dsp_tools/error/exceptions.py +204 -0
  201. dsp_tools/error/problems.py +10 -0
  202. dsp_tools/error/xmllib_errors.py +20 -0
  203. dsp_tools/error/xmllib_warnings.py +54 -0
  204. dsp_tools/error/xmllib_warnings_util.py +159 -0
  205. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  206. dsp_tools/legacy_models/__init__.py +0 -0
  207. dsp_tools/legacy_models/datetimestamp.py +81 -0
  208. dsp_tools/legacy_models/langstring.py +253 -0
  209. dsp_tools/legacy_models/projectContext.py +49 -0
  210. dsp_tools/py.typed +0 -0
  211. dsp_tools/resources/schema/data.xsd +648 -0
  212. dsp_tools/resources/schema/lists-only.json +72 -0
  213. dsp_tools/resources/schema/project.json +1258 -0
  214. dsp_tools/resources/schema/properties-only.json +874 -0
  215. dsp_tools/resources/schema/resources-only.json +140 -0
  216. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  217. dsp_tools/resources/start-stack/docker-compose.override.yml +11 -0
  218. dsp_tools/resources/start-stack/docker-compose.yml +88 -0
  219. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  220. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  221. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  222. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  223. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  224. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  225. dsp_tools/utils/__init__.py +0 -0
  226. dsp_tools/utils/ansi_colors.py +32 -0
  227. dsp_tools/utils/data_formats/__init__.py +0 -0
  228. dsp_tools/utils/data_formats/date_util.py +166 -0
  229. dsp_tools/utils/data_formats/iri_util.py +30 -0
  230. dsp_tools/utils/data_formats/shared.py +81 -0
  231. dsp_tools/utils/data_formats/uri_util.py +76 -0
  232. dsp_tools/utils/fuseki_bloating.py +63 -0
  233. dsp_tools/utils/json_parsing.py +22 -0
  234. dsp_tools/utils/rdf_constants.py +42 -0
  235. dsp_tools/utils/rdflib_utils.py +10 -0
  236. dsp_tools/utils/replace_id_with_iri.py +66 -0
  237. dsp_tools/utils/request_utils.py +238 -0
  238. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  239. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  240. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  241. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  242. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  243. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  244. dsp_tools/xmllib/CLAUDE.md +302 -0
  245. dsp_tools/xmllib/__init__.py +49 -0
  246. dsp_tools/xmllib/general_functions.py +877 -0
  247. dsp_tools/xmllib/internal/__init__.py +0 -0
  248. dsp_tools/xmllib/internal/checkers.py +162 -0
  249. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  250. dsp_tools/xmllib/internal/constants.py +46 -0
  251. dsp_tools/xmllib/internal/input_converters.py +155 -0
  252. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  253. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  254. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  255. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  256. dsp_tools/xmllib/models/__init__.py +0 -0
  257. dsp_tools/xmllib/models/config_options.py +28 -0
  258. dsp_tools/xmllib/models/date_formats.py +48 -0
  259. dsp_tools/xmllib/models/dsp_base_resources.py +1542 -0
  260. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  261. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  262. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  263. dsp_tools/xmllib/models/internal/migration_metadata.py +55 -0
  264. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  265. dsp_tools/xmllib/models/internal/values.py +342 -0
  266. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  267. dsp_tools/xmllib/models/licenses/other.py +59 -0
  268. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  269. dsp_tools/xmllib/models/permissions.py +41 -0
  270. dsp_tools/xmllib/models/res.py +1782 -0
  271. dsp_tools/xmllib/models/root.py +348 -0
  272. dsp_tools/xmllib/value_checkers.py +434 -0
  273. dsp_tools/xmllib/value_converters.py +777 -0
  274. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  275. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  276. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  277. dsp_tools-18.3.0.post13.dist-info/entry_points.txt +3 -0
  278. dsp_tools-0.9.13.dist-info/LICENSE +0 -674
  279. dsp_tools-0.9.13.dist-info/METADATA +0 -144
  280. dsp_tools-0.9.13.dist-info/RECORD +0 -71
  281. dsp_tools-0.9.13.dist-info/WHEEL +0 -5
  282. dsp_tools-0.9.13.dist-info/entry_points.txt +0 -3
  283. dsp_tools-0.9.13.dist-info/top_level.txt +0 -1
  284. dsplib/models/connection.py +0 -272
  285. dsplib/models/group.py +0 -296
  286. dsplib/models/helpers.py +0 -505
  287. dsplib/models/langstring.py +0 -277
  288. dsplib/models/listnode.py +0 -578
  289. dsplib/models/model.py +0 -20
  290. dsplib/models/ontology.py +0 -448
  291. dsplib/models/permission.py +0 -112
  292. dsplib/models/project.py +0 -547
  293. dsplib/models/propertyclass.py +0 -505
  294. dsplib/models/resource.py +0 -366
  295. dsplib/models/resourceclass.py +0 -810
  296. dsplib/models/sipi.py +0 -30
  297. dsplib/models/user.py +0 -731
  298. dsplib/models/value.py +0 -1000
  299. dsplib/utils/knora-data-schema.xsd +0 -454
  300. dsplib/utils/knora-schema-lists.json +0 -83
  301. dsplib/utils/knora-schema.json +0 -434
  302. dsplib/utils/onto_commons.py +0 -24
  303. dsplib/utils/onto_create_lists.py +0 -73
  304. dsplib/utils/onto_create_ontology.py +0 -442
  305. dsplib/utils/onto_get.py +0 -58
  306. dsplib/utils/onto_validate.py +0 -33
  307. dsplib/utils/xml_upload.py +0 -539
  308. dsplib/widgets/doublepassword.py +0 -80
  309. knora/MLS-import-libraries.py +0 -84
  310. knora/dsp_tools.py +0 -96
  311. knora/dsplib/models/connection.py +0 -272
  312. knora/dsplib/models/group.py +0 -296
  313. knora/dsplib/models/helpers.py +0 -506
  314. knora/dsplib/models/langstring.py +0 -277
  315. knora/dsplib/models/listnode.py +0 -578
  316. knora/dsplib/models/model.py +0 -20
  317. knora/dsplib/models/ontology.py +0 -448
  318. knora/dsplib/models/permission.py +0 -112
  319. knora/dsplib/models/project.py +0 -583
  320. knora/dsplib/models/propertyclass.py +0 -505
  321. knora/dsplib/models/resource.py +0 -416
  322. knora/dsplib/models/resourceclass.py +0 -811
  323. knora/dsplib/models/sipi.py +0 -35
  324. knora/dsplib/models/user.py +0 -731
  325. knora/dsplib/models/value.py +0 -1000
  326. knora/dsplib/utils/knora-data-schema.xsd +0 -464
  327. knora/dsplib/utils/knora-schema-lists.json +0 -83
  328. knora/dsplib/utils/knora-schema.json +0 -444
  329. knora/dsplib/utils/onto_commons.py +0 -24
  330. knora/dsplib/utils/onto_create_lists.py +0 -73
  331. knora/dsplib/utils/onto_create_ontology.py +0 -451
  332. knora/dsplib/utils/onto_get.py +0 -58
  333. knora/dsplib/utils/onto_validate.py +0 -33
  334. knora/dsplib/utils/xml_upload.py +0 -540
  335. knora/dsplib/widgets/doublepassword.py +0 -80
  336. knora/knora.py +0 -2108
  337. knora/test.py +0 -99
  338. knora/testit.py +0 -76
  339. knora/xml2knora.py +0 -633
  340. {dsplib → dsp_tools/cli}/__init__.py +0 -0
  341. {dsplib/models → dsp_tools/clients}/__init__.py +0 -0
  342. {dsplib/utils → dsp_tools/commands}/__init__.py +0 -0
  343. {dsplib/widgets → dsp_tools/commands/create}/__init__.py +0 -0
  344. {knora → dsp_tools/commands/create/create_on_server}/__init__.py +0 -0
  345. {knora/dsplib → dsp_tools/commands/create/models}/__init__.py +0 -0
  346. {knora/dsplib/models → dsp_tools/commands/create/parsing}/__init__.py +0 -0
  347. {knora/dsplib/utils → dsp_tools/commands/create/serialisation}/__init__.py +0 -0
  348. {knora/dsplib/widgets → dsp_tools/commands/excel2json}/__init__.py +0 -0
@@ -0,0 +1,1953 @@
1
+ import copy
2
+ import dataclasses
3
+ import difflib
4
+ import json
5
+ import os
6
+ import warnings
7
+ from collections.abc import Iterable
8
+ from pathlib import Path
9
+ from typing import Any
10
+ from typing import Optional
11
+ from typing import Union
12
+
13
+ import regex
14
+ from lxml import etree
15
+ from lxml.builder import E
16
+
17
+ from dsp_tools.commands.excel2xml.propertyelement import PropertyElement
18
+ from dsp_tools.error.custom_warnings import DspToolsUserWarning
19
+ from dsp_tools.error.exceptions import BaseError
20
+ from dsp_tools.legacy_models.datetimestamp import DateTimeStamp
21
+ from dsp_tools.utils.data_formats.date_util import is_full_date
22
+ from dsp_tools.utils.data_formats.shared import check_notna
23
+ from dsp_tools.utils.data_formats.shared import simplify_name
24
+ from dsp_tools.utils.data_formats.uri_util import is_iiif_uri
25
+ from dsp_tools.utils.data_formats.uri_util import is_uri
26
+ from dsp_tools.utils.xml_parsing.parse_clean_validate_xml import parse_and_validate_xml_file
27
+ from dsp_tools.xmllib import find_dates_in_string as find_dates_in_string # noqa: PLC0414 (explicit re-export)
28
+ from dsp_tools.xmllib.general_functions import make_xsd_compatible_id
29
+ from dsp_tools.xmllib.general_functions import make_xsd_compatible_id_with_uuid
30
+ from dsp_tools.xmllib.internal.input_converters import numeric_entities
31
+
32
+ # ruff: noqa: E501, UP031 (line-too-long, use f-string over percent formatting)
33
+
34
+ make_xsd_id_compatible = make_xsd_compatible_id_with_uuid
35
+ make_xsd_id_compatible_without_uuid = make_xsd_compatible_id
36
+
37
+ xml_namespace_map = {None: "https://dasch.swiss/schema", "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
38
+
39
+
40
+ def prepare_value(
41
+ value: Union[PropertyElement, str, int, float, bool, Iterable[Union[PropertyElement, str, int, float, bool]]],
42
+ ) -> list[PropertyElement]:
43
+ """
44
+ This method transforms the parameter "value" from a make_*_prop() method into a list of PropertyElements. "value" is
45
+ passed on to this method as it was received.
46
+
47
+ Args:
48
+ value: "value" as received from the caller
49
+
50
+ Returns:
51
+ a list of PropertyElements
52
+ """
53
+ # make sure that "value" is list-like
54
+ if not isinstance(value, Iterable) or isinstance(value, str):
55
+ value = [value]
56
+
57
+ # make a PropertyElement out of its elements, if necessary.
58
+ return [x if isinstance(x, PropertyElement) else PropertyElement(x) for x in value]
59
+
60
+
61
+ def make_root(
62
+ shortcode: str,
63
+ default_ontology: str,
64
+ ) -> etree._Element:
65
+ """
66
+ Start building your XML document by creating the root element `<knora>`.
67
+
68
+ Args:
69
+ shortcode: The shortcode of this project as defined in the JSON project file
70
+ default_ontology: one of the ontologies of the JSON project file
71
+
72
+ Returns:
73
+ The root element `<knora>`.
74
+
75
+ Examples:
76
+ >>> root = excel2xml.make_root(shortcode=shortcode, default_ontology=default_ontology)
77
+ >>> root = excel2xml.append_permissions(root)
78
+
79
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#the-root-element-knora
80
+ """
81
+ schema_url = "https://raw.githubusercontent.com/dasch-swiss/dsp-tools/main/src/dsp_tools/resources/schema/data.xsd"
82
+ schema_location_key = str(etree.QName("http://www.w3.org/2001/XMLSchema-instance", "schemaLocation"))
83
+ schema_location_value = f"https://dasch.swiss/schema {schema_url}"
84
+ return etree.Element(
85
+ "{%s}knora" % xml_namespace_map[None],
86
+ attrib={
87
+ schema_location_key: schema_location_value,
88
+ "shortcode": shortcode,
89
+ "default-ontology": default_ontology,
90
+ },
91
+ nsmap=xml_namespace_map,
92
+ )
93
+
94
+
95
+ def append_permissions(root_element: etree._Element) -> etree._Element:
96
+ """
97
+ After having created a root element, call this function to append the standard permission definitions "public",
98
+ "limited_view", and "private" to it. These definitions are a good basis to
99
+ start with, but remember that they can be adapted, and that other permissions can be defined instead of these.
100
+
101
+ Args:
102
+ root_element: The XML root element `<knora>` created by make_root()
103
+
104
+ Returns:
105
+ The root element with the permission definition blocks appended
106
+
107
+ Examples:
108
+ >>> root = excel2xml.make_root(shortcode=shortcode, default_ontology=default_ontology)
109
+ >>> root = excel2xml.append_permissions(root)
110
+
111
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#describing-permissions-with-permissions-elements
112
+ """
113
+
114
+ PERMISSIONS = E.permissions
115
+ ALLOW = E.allow
116
+ # lxml.builder.E is a more sophisticated element factory than etree.Element.
117
+ # E.tag is equivalent to E("tag") and results in <tag>
118
+
119
+ _public = PERMISSIONS(id="public")
120
+ _public.append(ALLOW("V", group="UnknownUser"))
121
+ _public.append(ALLOW("V", group="KnownUser"))
122
+ _public.append(ALLOW("D", group="ProjectMember"))
123
+ _public.append(ALLOW("CR", group="ProjectAdmin"))
124
+ root_element.append(_public)
125
+
126
+ _private_view = PERMISSIONS(id="limited_view")
127
+ _private_view.append(ALLOW("RV", group="UnknownUser"))
128
+ _private_view.append(ALLOW("RV", group="KnownUser"))
129
+ _private_view.append(ALLOW("D", group="ProjectMember"))
130
+ _private_view.append(ALLOW("CR", group="ProjectAdmin"))
131
+ root_element.append(_private_view)
132
+
133
+ _private = PERMISSIONS(id="private")
134
+ _private.append(ALLOW("D", group="ProjectMember"))
135
+ _private.append(ALLOW("CR", group="ProjectAdmin"))
136
+ root_element.append(_private)
137
+
138
+ return root_element
139
+
140
+
141
+ def make_resource( # noqa: D417 (undocumented-param)
142
+ label: str,
143
+ restype: str,
144
+ id: str,
145
+ permissions: str = "public",
146
+ ark: Optional[str] = None,
147
+ iri: Optional[str] = None,
148
+ creation_date: Optional[str] = None,
149
+ ) -> etree._Element:
150
+ """
151
+ Creates an empty resource element, with the attributes as specified by the arguments.
152
+
153
+ Args:
154
+ The arguments correspond to the attributes of the `<resource>` element.
155
+
156
+ Returns:
157
+ The resource element, without any children, but with the attributes
158
+ ``<resource label=label restype=restype id=id permissions=permissions ark=ark iri=iri></resource>``
159
+
160
+ Raises:
161
+ Warning: if both an ARK and an IRI are provided
162
+ BaseError: if the creation date is invalid
163
+
164
+ Examples:
165
+ >>> resource = excel2xml.make_resource(...)
166
+ >>> resource.append(excel2xml.make_text_prop(...))
167
+ >>> root.append(resource)
168
+
169
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#describing-resources-with-the-resource-element
170
+ """
171
+ if not check_notna(label):
172
+ msg = f"Your resource's label looks suspicious (resource with id '{id}' and label '{label}')"
173
+ warnings.warn(DspToolsUserWarning(msg))
174
+ if not check_notna(id):
175
+ msg = f"Your resource's id looks suspicious (resource with id '{id}' and label '{label}'"
176
+ warnings.warn(DspToolsUserWarning(msg))
177
+ kwargs = {"label": label, "restype": restype, "id": id, "permissions": permissions, "nsmap": xml_namespace_map}
178
+ if ark:
179
+ kwargs["ark"] = ark
180
+ if iri:
181
+ kwargs["iri"] = iri
182
+ if ark and iri:
183
+ msg = f"Both ARK and IRI were provided for resource '{label}' ({id}). The ARK will override the IRI."
184
+ warnings.warn(DspToolsUserWarning(msg))
185
+ if creation_date:
186
+ try:
187
+ DateTimeStamp(creation_date)
188
+ except BaseError:
189
+ raise BaseError(
190
+ f"The resource '{label}' (ID: {id}) has an invalid creation date '{creation_date}'. "
191
+ f"Did you perhaps forget the timezone?"
192
+ ) from None
193
+ kwargs["creation_date"] = creation_date
194
+
195
+ return etree.Element("{%s}resource" % xml_namespace_map[None], **kwargs) # type: ignore[arg-type]
196
+
197
+
198
+ def make_bitstream_prop(
199
+ path: Union[str, os.PathLike[Any]],
200
+ permissions: str = "public",
201
+ check: bool = False,
202
+ calling_resource: str = "",
203
+ ) -> etree._Element:
204
+ """
205
+ Creates a bitstream element that points to "path".
206
+
207
+ Args:
208
+ path: path to a valid file that will be uploaded
209
+ permissions: permissions string
210
+ check: if True, issue a warning if the path doesn't point to an existing file
211
+ calling_resource: the name of the parent resource (for better error messages)
212
+
213
+ Warns:
214
+ if the path doesn't point to an existing file (only if check=True)
215
+
216
+ Returns:
217
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
218
+
219
+ Examples:
220
+ >>> resource = excel2xml.make_resource(...)
221
+ >>> resource.append(excel2xml.make_bitstream_prop("data/images/tree.jpg"))
222
+ >>> root.append(resource)
223
+
224
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#bitstream
225
+ """
226
+
227
+ if check and not Path(path).is_file():
228
+ msg = (
229
+ f"Failed validation in bitstream tag of resource '{calling_resource}': "
230
+ f"The following path doesn't point to a file: {path}"
231
+ )
232
+ warnings.warn(DspToolsUserWarning(msg))
233
+ prop_ = etree.Element(
234
+ "{%s}bitstream" % xml_namespace_map[None],
235
+ permissions=permissions,
236
+ nsmap=xml_namespace_map,
237
+ )
238
+ prop_.text = str(path)
239
+ return prop_
240
+
241
+
242
+ def make_iiif_uri_prop(
243
+ iiif_uri: str,
244
+ permissions: str = "public",
245
+ calling_resource: str = "",
246
+ ) -> etree._Element:
247
+ """
248
+ Creates a iiif-uri element that points to "path".
249
+
250
+ Args:
251
+ iiif_uri: URI to a IIIF image
252
+ permissions: permissions string
253
+ calling_resource: the name of the parent resource (for better error messages)
254
+
255
+ Warns:
256
+ If the iiif_uri doesn't conform to the IIIF URI specifications
257
+
258
+ Returns:
259
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
260
+
261
+ Examples:
262
+ >>> resource = excel2xml.make_resource(...)
263
+ >>> resource.append(excel2xml.make_iiif_uri_prop("https://example.org/image-service/abcd1234/full/max/0/default.jpg"))
264
+ >>> root.append(resource)
265
+
266
+ """
267
+
268
+ if not is_iiif_uri(iiif_uri):
269
+ msg = (
270
+ f"Failed validation in iiif-uri tag of resource '{calling_resource}': "
271
+ f"The URI: '{iiif_uri}' does not conform to the specifications."
272
+ )
273
+ warnings.warn(DspToolsUserWarning(msg))
274
+ prop_ = etree.Element(
275
+ "{%s}iiif-uri" % xml_namespace_map[None],
276
+ permissions=permissions,
277
+ nsmap=xml_namespace_map,
278
+ )
279
+ prop_.text = iiif_uri
280
+ return prop_
281
+
282
+
283
+ def _format_bool(
284
+ unformatted: Union[bool, str, int, float],
285
+ name: str,
286
+ calling_resource: str,
287
+ ) -> str:
288
+ """
289
+ This method takes an unformatted boolean-like value, and transforms it into the string values "true" or "false".
290
+
291
+ Args:
292
+ unformatted: boolean-like value
293
+ name: property name, for better error messages
294
+ calling_resource: resource name, for better error messages
295
+
296
+ Raises:
297
+ BaseError: if the input cannot be transformed into "true"/"false"
298
+
299
+ Returns:
300
+ "true" if the input is in (True, "true", "1", 1, "yes"); "false" if input is in (False, "false", "0", 0, "no")
301
+ """
302
+ if isinstance(unformatted, str):
303
+ unformatted = unformatted.lower()
304
+ if unformatted in (False, "false", "0", 0, 0.0, "no"):
305
+ return "false"
306
+ elif unformatted in (True, "true", "1", 1, 1.0, "yes"):
307
+ return "true"
308
+ else:
309
+ raise BaseError(
310
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
311
+ f"'{unformatted}' is not a valid boolean."
312
+ )
313
+
314
+
315
+ def make_boolean_prop(
316
+ name: str,
317
+ value: Union[PropertyElement, str, int, bool],
318
+ calling_resource: str = "",
319
+ ) -> etree._Element:
320
+ """
321
+ Make a `<boolean-prop>` from a boolean value. The value can be provided directly or inside a PropertyElement. The
322
+ following formats are supported:
323
+ - true: (True, "true", "True", "1", 1, "yes", "Yes")
324
+ - false: (False, "false", "False", "0", 0, "no", "No")
325
+
326
+ Unless provided as PropertyElement, the permissions of the value default to "public".
327
+
328
+ Args:
329
+ name: the name of this property as defined in the onto
330
+ value: a boolean value as str/bool/int, or as str/bool/int inside a PropertyElement
331
+ calling_resource: the name of the parent resource (for better error messages)
332
+
333
+ Raises:
334
+ BaseError: if the value is not a valid boolean
335
+
336
+ Returns:
337
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
338
+
339
+ Examples:
340
+ >>> excel2xml.make_boolean_prop(":testproperty", "no")
341
+ <boolean-prop name=":testproperty">
342
+ <boolean permissions="public">false</boolean>
343
+ </boolean-prop>
344
+ >>> excel2xml.make_boolean_prop(":testproperty", excel2xml.PropertyElement("1", permissions="private", comment="example"))
345
+ <boolean-prop name=":testproperty">
346
+ <boolean permissions="private" comment="example">true</boolean>
347
+ </boolean-prop>
348
+
349
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#boolean-prop
350
+ """
351
+
352
+ # validate input
353
+ if isinstance(value, PropertyElement):
354
+ value_new = dataclasses.replace(value, value=_format_bool(value.value, name, calling_resource))
355
+ elif isinstance(value, str | bool | int):
356
+ value_new = PropertyElement(_format_bool(value, name, calling_resource))
357
+ else:
358
+ raise BaseError(
359
+ f"Failed validation in resource '{calling_resource}', property '{name}': '{value}' is not a valid boolean."
360
+ )
361
+
362
+ # make xml structure of the value
363
+ prop_ = etree.Element(
364
+ "{%s}boolean-prop" % xml_namespace_map[None],
365
+ name=name,
366
+ nsmap=xml_namespace_map,
367
+ )
368
+ kwargs = {"permissions": value_new.permissions}
369
+ if value_new.comment and check_notna(value_new.comment):
370
+ kwargs["comment"] = value_new.comment
371
+ value_ = etree.Element(
372
+ "{%s}boolean" % xml_namespace_map[None],
373
+ **kwargs, # type: ignore[arg-type]
374
+ nsmap=xml_namespace_map,
375
+ )
376
+ value_.text = str(value_new.value)
377
+ prop_.append(value_)
378
+
379
+ return prop_
380
+
381
+
382
+ def make_color_prop(
383
+ name: str,
384
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
385
+ calling_resource: str = "",
386
+ ) -> etree._Element:
387
+ """
388
+ Make a `<color-prop>` from one or more colors. The color(s) can be provided as string or as PropertyElement with a
389
+ string inside. If provided as string, the permissions default to "public".
390
+
391
+ Args:
392
+ name: the name of this property as defined in the onto
393
+ value: one or more DSP color(s), as string/PropertyElement, or as iterable of strings/PropertyElements
394
+ calling_resource: the name of the parent resource (for better error messages)
395
+
396
+ Warns:
397
+ If the value is not a valid color
398
+
399
+ Returns:
400
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
401
+
402
+ Examples:
403
+ >>> excel2xml.make_color_prop(":testproperty", "#00ff66")
404
+ <color-prop name=":testproperty">
405
+ <color permissions="public">#00ff66</color>
406
+ </color-prop>
407
+ >>> excel2xml.make_color_prop(":testproperty", excel2xml.PropertyElement("#00ff66", permissions="private", comment="example"))
408
+ <color-prop name=":testproperty">
409
+ <color permissions="private" comment="example">#00ff66</color>
410
+ </color-prop>
411
+ >>> excel2xml.make_color_prop(":testproperty", ["#00ff66", "#000000"])
412
+ <color-prop name=":testproperty">
413
+ <color permissions="public">#00ff66</color>
414
+ <color permissions="public">#000000</color>
415
+ </color-prop>
416
+
417
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#color-prop
418
+ """
419
+
420
+ # check the input: prepare a list with valid values
421
+ values = prepare_value(value)
422
+
423
+ # check value type
424
+ for val in values:
425
+ if not regex.search(r"^#[0-9a-f]{6}$", str(val.value).strip(), flags=regex.IGNORECASE):
426
+ msg = (
427
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
428
+ f"'{val.value}' is not a valid color."
429
+ )
430
+ warnings.warn(DspToolsUserWarning(msg))
431
+
432
+ # make xml structure of the valid values
433
+ prop_ = etree.Element(
434
+ "{%s}color-prop" % xml_namespace_map[None],
435
+ name=name,
436
+ nsmap=xml_namespace_map,
437
+ )
438
+ for val in values:
439
+ kwargs = {"permissions": val.permissions}
440
+ if val.comment and check_notna(val.comment):
441
+ kwargs["comment"] = val.comment
442
+ value_ = etree.Element(
443
+ "{%s}color" % xml_namespace_map[None],
444
+ **kwargs, # type: ignore[arg-type]
445
+ nsmap=xml_namespace_map,
446
+ )
447
+ value_.text = str(val.value).strip()
448
+ prop_.append(value_)
449
+
450
+ return prop_
451
+
452
+
453
+ def make_date_prop(
454
+ name: str,
455
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
456
+ calling_resource: str = "",
457
+ ) -> etree._Element:
458
+ """
459
+ Make a `<date-prop>` from one or more dates/date ranges. The date(s) can be provided as string or as PropertyElement
460
+ with a string inside. If provided as string, the permissions default to "public".
461
+
462
+ Args:
463
+ name: the name of this property as defined in the onto
464
+ value: one or more DSP dates, as string/PropertyElement, or as iterable of strings/PropertyElements
465
+ calling_resource: the name of the parent resource (for better error messages)
466
+
467
+ Warns:
468
+ If the value is not a valid DSP date
469
+
470
+ Returns:
471
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
472
+
473
+ Examples:
474
+ >>> excel2xml.make_date_prop(":testproperty", "GREGORIAN:CE:2014-01-31")
475
+ <date-prop name=":testproperty">
476
+ <date permissions="public">GREGORIAN:CE:2014-01-31</date>
477
+ </date-prop>
478
+ >>> excel2xml.make_date_prop(":testproperty", excel2xml.PropertyElement("GREGORIAN:CE:2014-01-31", permissions="private", comment="example"))
479
+ <date-prop name=":testproperty">
480
+ <date permissions="private" comment="example">
481
+ GREGORIAN:CE:2014-01-31
482
+ </date>
483
+ </date-prop>
484
+ >>> excel2xml.make_date_prop(":testproperty", ["GREGORIAN:CE:1930-09-02:CE:1930-09-03", "GREGORIAN:CE:1930-09-02:CE:1930-09-03"])
485
+ <date-prop name=":testproperty">
486
+ <date permissions="public">
487
+ GREGORIAN:CE:1930-09-02:CE:1930-09-03
488
+ </date>
489
+ <date permissions="public">
490
+ GREGORIAN:CE:1930-09-02:CE:1930-09-03
491
+ </date>
492
+ </date-prop>
493
+
494
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#date-prop
495
+ """
496
+
497
+ # check the input: prepare a list with valid values
498
+ values = prepare_value(value)
499
+
500
+ # check value type
501
+ for val in values:
502
+ if not is_full_date(str(val.value).strip()):
503
+ msg = (
504
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
505
+ f"'{val.value}' is not a valid DSP date."
506
+ )
507
+ warnings.warn(DspToolsUserWarning(msg))
508
+
509
+ # make xml structure of the valid values
510
+ prop_ = etree.Element(
511
+ "{%s}date-prop" % xml_namespace_map[None],
512
+ name=name,
513
+ nsmap=xml_namespace_map,
514
+ )
515
+ for val in values:
516
+ kwargs = {"permissions": val.permissions}
517
+ if val.comment and check_notna(val.comment):
518
+ kwargs["comment"] = val.comment
519
+ value_ = etree.Element(
520
+ "{%s}date" % xml_namespace_map[None],
521
+ **kwargs, # type: ignore[arg-type]
522
+ nsmap=xml_namespace_map,
523
+ )
524
+ value_.text = str(val.value).strip()
525
+ prop_.append(value_)
526
+
527
+ return prop_
528
+
529
+
530
+ def make_decimal_prop(
531
+ name: str,
532
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
533
+ calling_resource: str = "",
534
+ ) -> etree._Element:
535
+ """
536
+ Make a `<decimal-prop>` from one or more decimal numbers. The decimal(s) can be provided as string, float, or as
537
+ PropertyElement with a string/float inside. If provided as string/float, the permissions default to
538
+ "public".
539
+
540
+ Args:
541
+ name: the name of this property as defined in the onto
542
+ value: one or more decimal numbers, as string/float/PropertyElement, or as iterable of strings/PropertyElements
543
+ calling_resource: the name of the parent resource (for better error messages)
544
+
545
+ Warns:
546
+ If the value is not a valid decimal number
547
+
548
+ Returns:
549
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
550
+
551
+ Examples:
552
+ >>> excel2xml.make_decimal_prop(":testproperty", "3.14159")
553
+ <decimal-prop name=":testproperty">
554
+ <decimal permissions="public">3.14159</decimal>
555
+ </decimal-prop>
556
+ >>> excel2xml.make_decimal_prop(":testproperty", excel2xml.PropertyElement("3.14159", permissions="private", comment="example"))
557
+ <decimal-prop name=":testproperty">
558
+ <decimal permissions="private" comment="example">3.14159</decimal>
559
+ </decimal-prop>
560
+ >>> excel2xml.make_decimal_prop(":testproperty", ["3.14159", "2.718"])
561
+ <decimal-prop name=":testproperty">
562
+ <decimal permissions="public">3.14159</decimal>
563
+ <decimal permissions="public">2.718</decimal>
564
+ </decimal-prop>
565
+
566
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#decimal-prop
567
+ """
568
+
569
+ # check the input: prepare a list with valid values
570
+ values = prepare_value(value)
571
+
572
+ # check value type
573
+ for val in values:
574
+ try:
575
+ float(val.value)
576
+ except ValueError:
577
+ msg = (
578
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
579
+ f"'{val.value}' is not a valid decimal number."
580
+ )
581
+ warnings.warn(DspToolsUserWarning(msg))
582
+
583
+ # make xml structure of the valid values
584
+ prop_ = etree.Element(
585
+ "{%s}decimal-prop" % xml_namespace_map[None],
586
+ name=name,
587
+ nsmap=xml_namespace_map,
588
+ )
589
+ for val in values:
590
+ kwargs = {"permissions": val.permissions}
591
+ if val.comment and check_notna(val.comment):
592
+ kwargs["comment"] = val.comment
593
+ value_ = etree.Element(
594
+ "{%s}decimal" % xml_namespace_map[None],
595
+ **kwargs, # type: ignore[arg-type]
596
+ nsmap=xml_namespace_map,
597
+ )
598
+ value_.text = str(val.value)
599
+ prop_.append(value_)
600
+
601
+ return prop_
602
+
603
+
604
+ def make_geometry_prop(
605
+ name: str,
606
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
607
+ calling_resource: str = "",
608
+ ) -> etree._Element:
609
+ """
610
+ Make a `<geometry-prop>` from one or more areas of an image. The area(s) can be provided as JSON-string or as
611
+ PropertyElement with the JSON-string inside. If provided as string, the permissions default to "public".
612
+
613
+ Args:
614
+ name: the name of this property as defined in the onto
615
+ value: one or more JSON geometry objects, as string/PropertyElement, or as iterable of strings/PropertyElements
616
+ calling_resource: the name of the parent resource (for better error messages)
617
+
618
+ Warns:
619
+ If the value is not a valid JSON geometry object
620
+
621
+ Returns:
622
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
623
+
624
+ Examples:
625
+ >>> excel2xml.make_geometry_prop(":testproperty", json_string)
626
+ <geometry-prop name=":testproperty">
627
+ <geometry permissions="public">{JSON}</geometry>
628
+ </geometry-prop>
629
+ >>> excel2xml.make_geometry_prop(":testproperty", excel2xml.PropertyElement(json_string, permissions="private", comment="example"))
630
+ <geometry-prop name=":testproperty">
631
+ <geometry permissions="private" comment="example">{JSON}</geometry>
632
+ </geometry-prop>
633
+ >>> excel2xml.make_geometry_prop(":testproperty", [json_string1, json_string2])
634
+ <geometry-prop name=":testproperty">
635
+ <geometry permissions="public">{JSON}</geometry>
636
+ <geometry permissions="public">{JSON}</geometry>
637
+ </geometry-prop>
638
+
639
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#geometry-prop
640
+ """
641
+
642
+ # check the input: prepare a list with valid values
643
+ values = prepare_value(value)
644
+
645
+ # check value type
646
+ for val in values:
647
+ try:
648
+ value_as_dict = json.loads(str(val.value))
649
+ if value_as_dict["type"] not in ["rectangle", "circle", "polygon"]:
650
+ msg = (
651
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
652
+ f"The 'type' of the JSON geometry object must be 'rectangle', 'circle', or 'polygon'."
653
+ )
654
+ warnings.warn(DspToolsUserWarning(msg))
655
+ if not isinstance(value_as_dict["points"], list):
656
+ msg = (
657
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
658
+ f"The 'points'of the JSON geometry object must be a list of points."
659
+ )
660
+ warnings.warn(DspToolsUserWarning(msg))
661
+ except (json.JSONDecodeError, TypeError, IndexError, KeyError, AssertionError):
662
+ msg = (
663
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
664
+ f"'{val.value}' is not a valid JSON geometry object."
665
+ )
666
+ warnings.warn(DspToolsUserWarning(msg))
667
+
668
+ # make xml structure of the valid values
669
+ prop_ = etree.Element(
670
+ "{%s}geometry-prop" % xml_namespace_map[None],
671
+ name=name,
672
+ nsmap=xml_namespace_map,
673
+ )
674
+ for val in values:
675
+ kwargs = {"permissions": val.permissions}
676
+ if val.comment and check_notna(val.comment):
677
+ kwargs["comment"] = val.comment
678
+ value_ = etree.Element(
679
+ "{%s}geometry" % xml_namespace_map[None],
680
+ **kwargs, # type: ignore[arg-type]
681
+ nsmap=xml_namespace_map,
682
+ )
683
+ value_.text = str(val.value)
684
+ prop_.append(value_)
685
+ return prop_
686
+
687
+
688
+ def make_geoname_prop(
689
+ name: str,
690
+ value: Union[PropertyElement, str, int, Iterable[Union[PropertyElement, str, int]]],
691
+ calling_resource: str = "",
692
+ ) -> etree._Element:
693
+ """
694
+ Make a `<geoname-prop>` from one or more geonames.org IDs. The ID(s) can be provided as string, integer, or as
695
+ PropertyElement with a string/integer inside. If provided as string/integer, the permissions default to
696
+ "public".
697
+
698
+ Args:
699
+ name: the name of this property as defined in the onto
700
+ value: one or more geonames.org IDs, as str/int/PropertyElement, or as iterable of str/int/PropertyElement
701
+ calling_resource: the name of the parent resource (for better error messages)
702
+
703
+ Warns:
704
+ If the value is not a valid geonames.org identifier
705
+
706
+ Returns:
707
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
708
+
709
+ Examples:
710
+ >>> excel2xml.make_geoname_prop(":testproperty", "2761369")
711
+ <geoname-prop name=":testproperty">
712
+ <geoname permissions="public">2761369</geoname>
713
+ </geoname-prop>
714
+ >>> excel2xml.make_geoname_prop(":testproperty", excel2xml.PropertyElement("2761369", permissions="private", comment="example"))
715
+ <geoname-prop name=":testproperty">
716
+ <geoname permissions="private" comment="example">2761369</geoname>
717
+ </geoname-prop>
718
+ >>> excel2xml.make_geoname_prop(":testproperty", ["2761369", "1010101"])
719
+ <geoname-prop name=":testproperty">
720
+ <geoname permissions="public">2761369</geoname>
721
+ <geoname permissions="public">1010101</geoname>
722
+ </geoname-prop>
723
+
724
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#geoname-prop
725
+ """
726
+
727
+ # check the input: prepare a list with valid values
728
+ values = prepare_value(value)
729
+
730
+ # check value type
731
+ for val in values:
732
+ if not regex.search(r"^[0-9]+$", str(val.value)):
733
+ msg = (
734
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
735
+ f"'{val.value}' is not a geonames.org identifier."
736
+ )
737
+ warnings.warn(DspToolsUserWarning(msg))
738
+
739
+ # make xml structure of the valid values
740
+ prop_ = etree.Element(
741
+ "{%s}geoname-prop" % xml_namespace_map[None],
742
+ name=name,
743
+ nsmap=xml_namespace_map,
744
+ )
745
+ for val in values:
746
+ kwargs = {"permissions": val.permissions}
747
+ if val.comment and check_notna(val.comment):
748
+ kwargs["comment"] = val.comment
749
+ value_ = etree.Element(
750
+ "{%s}geoname" % xml_namespace_map[None],
751
+ **kwargs, # type: ignore[arg-type]
752
+ nsmap=xml_namespace_map,
753
+ )
754
+ value_.text = str(val.value)
755
+ prop_.append(value_)
756
+
757
+ return prop_
758
+
759
+
760
+ def make_integer_prop(
761
+ name: str,
762
+ value: Union[PropertyElement, str, int, Iterable[Union[PropertyElement, str, int]]],
763
+ calling_resource: str = "",
764
+ ) -> etree._Element:
765
+ """
766
+ Make a `<integer-prop>` from one or more integers. The integers can be provided as string, integer, or as
767
+ PropertyElement with a string/integer inside. If provided as string/integer, the permissions default to
768
+ "public".
769
+
770
+ Args:
771
+ name: the name of this property as defined in the onto
772
+ value: one or more integers, as string/int/PropertyElement, or as iterable of strings/ints/PropertyElements
773
+ calling_resource: the name of the parent resource (for better error messages)
774
+
775
+ Warns:
776
+ If the value is not a valid integer
777
+
778
+ Returns:
779
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
780
+
781
+ Examples:
782
+ >>> excel2xml.make_integer_prop(":testproperty", "2761369")
783
+ <integer-prop name=":testproperty">
784
+ <integer permissions="public">2761369</integer>
785
+ </integer-prop>
786
+ >>> excel2xml.make_integer_prop(":testproperty", excel2xml.PropertyElement("2761369", permissions="private", comment="example"))
787
+ <integer-prop name=":testproperty">
788
+ <integer permissions="private" comment="example">2761369</integer>
789
+ </integer-prop>
790
+ >>> excel2xml.make_integer_prop(":testproperty", ["2761369", "1010101"])
791
+ <integer-prop name=":testproperty">
792
+ <integer permissions="public">2761369</integer>
793
+ <integer permissions="public">1010101</integer>
794
+ </integer-prop>
795
+
796
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#integer-prop
797
+ """
798
+
799
+ # check the input: prepare a list with valid values
800
+ values = prepare_value(value)
801
+
802
+ # check value type
803
+ for val in values:
804
+ try:
805
+ int(val.value)
806
+ except ValueError:
807
+ msg = (
808
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
809
+ f"'{val.value}' is not a valid integer."
810
+ )
811
+ warnings.warn(DspToolsUserWarning(msg))
812
+
813
+ # make xml structure of the valid values
814
+ prop_ = etree.Element(
815
+ "{%s}integer-prop" % xml_namespace_map[None],
816
+ name=name,
817
+ nsmap=xml_namespace_map,
818
+ )
819
+ for val in values:
820
+ kwargs = {"permissions": val.permissions}
821
+ if val.comment and check_notna(val.comment):
822
+ kwargs["comment"] = val.comment
823
+ value_ = etree.Element(
824
+ "{%s}integer" % xml_namespace_map[None],
825
+ **kwargs, # type: ignore[arg-type]
826
+ nsmap=xml_namespace_map,
827
+ )
828
+ value_.text = str(val.value)
829
+ prop_.append(value_)
830
+
831
+ return prop_
832
+
833
+
834
+ def make_list_prop(
835
+ list_name: str,
836
+ name: str,
837
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
838
+ calling_resource: str = "",
839
+ ) -> etree._Element:
840
+ """
841
+ Make a `<list-prop>` from one or more list nodes. The name(s) of the list node(s) can be provided as string or as
842
+ PropertyElement with a string inside. If provided as string, the permissions default to "public".
843
+
844
+ Args:
845
+ list_name: the name of the list as defined in the onto
846
+ name: the name of this property as defined in the onto
847
+ value: one or more node names, as string/PropertyElement, or as iterable of strings/PropertyElements
848
+ calling_resource: the name of the parent resource (for better error messages)
849
+
850
+ Warns:
851
+ If the name of one of the list nodes is not a valid string
852
+
853
+ Returns:
854
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
855
+
856
+ Examples:
857
+ >>> excel2xml.make_list_prop("mylist", ":testproperty", "first_node")
858
+ <list-prop list="mylist" name=":testproperty">
859
+ <list permissions="public">first_node</list>
860
+ </list-prop>
861
+ >>> excel2xml.make_list_prop("mylist", ":testproperty", excel2xml.PropertyElement("first_node", permissions="private", comment="example"))
862
+ <list-prop list="mylist" name=":testproperty">
863
+ <list permissions="private" comment="example">first_node</list>
864
+ </list-prop>
865
+ >>> excel2xml.make_list_prop("mylist", ":testproperty", ["first_node", "second_node"])
866
+ <list-prop list="mylist" name=":testproperty">
867
+ <list permissions="public">first_node</list>
868
+ <list permissions="public">second_node</list>
869
+ </list-prop>
870
+
871
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#list-prop
872
+ """
873
+
874
+ # check the input: prepare a list with valid values
875
+ values = prepare_value(value)
876
+
877
+ # check value type
878
+ for val in values:
879
+ if not isinstance(val.value, str) or not check_notna(val.value):
880
+ msg = (
881
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
882
+ f"'{val.value}' is not a valid name of a list node."
883
+ )
884
+ warnings.warn(DspToolsUserWarning(msg))
885
+
886
+ # make xml structure of the valid values
887
+ prop_ = etree.Element(
888
+ "{%s}list-prop" % xml_namespace_map[None],
889
+ list=list_name,
890
+ name=name,
891
+ nsmap=xml_namespace_map,
892
+ )
893
+ for val in values:
894
+ kwargs = {"permissions": val.permissions}
895
+ if val.comment and check_notna(val.comment):
896
+ kwargs["comment"] = val.comment
897
+ value_ = etree.Element(
898
+ "{%s}list" % xml_namespace_map[None],
899
+ **kwargs, # type: ignore[arg-type]
900
+ nsmap=xml_namespace_map,
901
+ )
902
+ value_.text = str(val.value)
903
+ prop_.append(value_)
904
+
905
+ return prop_
906
+
907
+
908
+ def make_resptr_prop(
909
+ name: str,
910
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
911
+ calling_resource: str = "",
912
+ ) -> etree._Element:
913
+ """
914
+ Make a `<resptr-prop>` from one or more IDs of other resources. The ID(s) can be provided as string or as
915
+ PropertyElement with a string inside. If provided as string, the permissions default to "public".
916
+
917
+ Args:
918
+ name: the name of this property as defined in the onto
919
+ value: one or more resource identifiers, as string/PropertyElement, or as iterable of strings/PropertyElements
920
+ calling_resource: the name of the parent resource (for better error messages)
921
+
922
+ Warns:
923
+ If the ID of one of the target resources is not a valid string
924
+
925
+ Returns:
926
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
927
+
928
+ Examples:
929
+ >>> excel2xml.make_resptr_prop(":testproperty", "resource_1")
930
+ <resptr-prop name=":testproperty">
931
+ <resptr permissions="public">resource_1</resptr>
932
+ </resptr-prop>
933
+ >>> excel2xml.make_resptr_prop(":testproperty", excel2xml.PropertyElement("resource_1", permissions="private", comment="example"))
934
+ <resptr-prop name=":testproperty">
935
+ <resptr permissions="private" comment="example">resource_1</resptr>
936
+ </resptr-prop>
937
+ >>> excel2xml.make_resptr_prop(":testproperty", ["resource_1", "resource_2"])
938
+ <resptr-prop name=":testproperty">
939
+ <resptr permissions="public">resource_1</resptr>
940
+ <resptr permissions="public">resource_2</resptr>
941
+ </resptr-prop>
942
+
943
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#resptr-prop
944
+ """
945
+
946
+ # check the input: prepare a list with valid values
947
+ values = prepare_value(value)
948
+
949
+ # check value type
950
+ for val in values:
951
+ if not isinstance(val.value, str) or not check_notna(val.value):
952
+ msg = (
953
+ f"Validation Error in resource '{calling_resource}', property '{name}': "
954
+ f"The following doesn't seem to be a valid ID of a target resource: '{val.value}'"
955
+ )
956
+ warnings.warn(DspToolsUserWarning(msg))
957
+
958
+ # make xml structure of the valid values
959
+ prop_ = etree.Element(
960
+ "{%s}resptr-prop" % xml_namespace_map[None],
961
+ name=name,
962
+ nsmap=xml_namespace_map,
963
+ )
964
+ for val in values:
965
+ kwargs = {"permissions": val.permissions}
966
+ if val.comment and check_notna(val.comment):
967
+ kwargs["comment"] = val.comment
968
+ value_ = etree.Element(
969
+ "{%s}resptr" % xml_namespace_map[None],
970
+ **kwargs, # type: ignore[arg-type]
971
+ nsmap=xml_namespace_map,
972
+ )
973
+ value_.text = str(val.value)
974
+ prop_.append(value_)
975
+
976
+ return prop_
977
+
978
+
979
+ def make_text_prop(
980
+ name: str,
981
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
982
+ calling_resource: str = "",
983
+ ) -> etree._Element:
984
+ """
985
+ Make a `<text-prop>` from one or more strings. The string(s) can be provided as string or as PropertyElement with a
986
+ string inside. If provided as string, the encoding defaults to utf8, and the permissions to "public".
987
+
988
+ Args:
989
+ name: the name of this property as defined in the onto
990
+ value: one or more strings, as string/PropertyElement, or as iterable of strings/PropertyElements
991
+ calling_resource: the name of the parent resource (for better error messages)
992
+
993
+ Raises:
994
+ BaseError: if the XML tags in a richtext property (encoding=xml) are not well-formed
995
+ Warning: if one of the values doesn't look like a reasonable string
996
+ (e.g. `<NA>` is a valid string, but probably not intended)
997
+
998
+ Returns:
999
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
1000
+
1001
+ Examples:
1002
+ >>> excel2xml.make_text_prop(":testproperty", "first text")
1003
+ <text-prop name=":testproperty">
1004
+ <text encoding="utf8" permissions="public">first text</text>
1005
+ </text-prop>
1006
+ >>> excel2xml.make_text_prop(":testproperty", excel2xml.PropertyElement("first text", permissions="private", encoding="xml"))
1007
+ <text-prop name=":testproperty">
1008
+ <text encoding="xml" permissions="private">first text</text>
1009
+ </text-prop>
1010
+ >>> excel2xml.make_text_prop(":testproperty", ["first text", "second text"])
1011
+ <text-prop name=":testproperty">
1012
+ <text encoding="utf8" permissions="public">first text</text>
1013
+ <text encoding="utf8" permissions="public">second text</text>
1014
+ </text-prop>
1015
+
1016
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#text-prop
1017
+ """
1018
+
1019
+ # check the input: prepare a list with valid values
1020
+ values = prepare_value(value)
1021
+
1022
+ # check value type
1023
+ for val in values:
1024
+ if not isinstance(val.value, str) or not check_notna(val.value):
1025
+ msg = (
1026
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
1027
+ f"'{val.value}' is probably not a usable string."
1028
+ )
1029
+ warnings.warn(DspToolsUserWarning(msg))
1030
+
1031
+ # make xml structure of the valid values
1032
+ prop_ = etree.Element(
1033
+ "{%s}text-prop" % xml_namespace_map[None],
1034
+ name=name,
1035
+ nsmap=xml_namespace_map,
1036
+ )
1037
+ for val in values:
1038
+ kwargs = {"permissions": val.permissions}
1039
+ if check_notna(val.comment):
1040
+ kwargs["comment"] = val.comment
1041
+ kwargs["encoding"] = val.encoding if check_notna(val.encoding) else "utf8"
1042
+ value_ = etree.Element(
1043
+ "{%s}text" % xml_namespace_map[None],
1044
+ **kwargs, # type: ignore[arg-type]
1045
+ nsmap=xml_namespace_map,
1046
+ )
1047
+ if kwargs["encoding"] == "utf8":
1048
+ # write the text into the tag, without validation
1049
+ value_.text = str(val.value)
1050
+ else:
1051
+ try:
1052
+ value_ = _add_richtext_to_etree_element(str(val.value), value_)
1053
+ except BaseError as err:
1054
+ if calling_resource:
1055
+ err.message += f"The error occurred in resource {calling_resource}, property {name}"
1056
+ raise err from None
1057
+ prop_.append(value_)
1058
+
1059
+ return prop_
1060
+
1061
+
1062
+ def _add_richtext_to_etree_element(richtext: str, element: etree._Element) -> etree._Element:
1063
+ new_element = copy.deepcopy(element)
1064
+ escaped_text = _escape_reserved_chars(richtext)
1065
+ num_ent = numeric_entities(escaped_text)
1066
+ pseudo_xml = f"<ignore-this>{num_ent}</ignore-this>"
1067
+ try:
1068
+ parsed = etree.fromstring(pseudo_xml)
1069
+ except etree.XMLSyntaxError as err:
1070
+ msg = (
1071
+ "The XML tags contained in a richtext property (encoding=xml) must be well-formed. "
1072
+ "The special characters <, > and & are only allowed to construct a tag. "
1073
+ )
1074
+ msg += f"\nOriginal error message: {err.msg}"
1075
+ msg += f"\nEventual line/column numbers are relative to this text: {pseudo_xml}"
1076
+ raise BaseError(msg) from None
1077
+ new_element.text = parsed.text # everything before the first child tag
1078
+ new_element.extend(list(parsed)) # all (nested) children of the pseudo-xml
1079
+ return new_element
1080
+
1081
+
1082
+ def _escape_reserved_chars(text: str) -> str:
1083
+ """
1084
+ From richtext strings (encoding="xml"), escape the reserved characters <, > and &,
1085
+ but only if they are not part of a standard standoff tag or escape sequence.
1086
+ The standard standoff tags allowed by DSP-API are documented here:
1087
+ https://docs.dasch.swiss/2023.12.01/DSP-API/03-endpoints/api-v2/text/standard-standoff/
1088
+
1089
+ Args:
1090
+ text: the richtext string to be escaped
1091
+
1092
+ Returns:
1093
+ the escaped richtext string
1094
+ """
1095
+ allowed_tags = [
1096
+ "a( [^>]+)?", # <a> is the only tag that can have attributes
1097
+ "p",
1098
+ "em",
1099
+ "strong",
1100
+ "u",
1101
+ "sub",
1102
+ "sup",
1103
+ "strike",
1104
+ "h1",
1105
+ "ol",
1106
+ "ul",
1107
+ "li",
1108
+ "tbody",
1109
+ "table",
1110
+ "tr",
1111
+ "td",
1112
+ "br",
1113
+ "hr",
1114
+ "pre",
1115
+ "cite",
1116
+ "blockquote",
1117
+ "code",
1118
+ ]
1119
+ allowed_tags_regex = "|".join(allowed_tags)
1120
+ lookahead = rf"(?!/?({allowed_tags_regex})/?>)"
1121
+ illegal_lt = rf"<{lookahead}"
1122
+ lookbehind = rf"(?<!</?({allowed_tags_regex})/?)"
1123
+ illegal_gt = rf"{lookbehind}>"
1124
+ illegal_amp = r"&(?![#a-zA-Z0-9]+;)"
1125
+ text = regex.sub(illegal_lt, "&lt;", text)
1126
+ text = regex.sub(illegal_gt, "&gt;", text)
1127
+ text = regex.sub(illegal_amp, "&amp;", text)
1128
+ return text
1129
+
1130
+
1131
+ def make_time_prop(
1132
+ name: str,
1133
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
1134
+ calling_resource: str = "",
1135
+ ) -> etree._Element:
1136
+ """
1137
+ Make a `<time-prop>` from one or more datetime values of the form "2009-10-10T12:00:00-05:00". The time(s) can be
1138
+ provided as string or as PropertyElement with a string inside. If provided as string, the permissions default to
1139
+ "public".
1140
+
1141
+ Args:
1142
+ name: the name of this property as defined in the onto
1143
+ value: one or more DSP times, as string/PropertyElement, or as iterable of strings/PropertyElements
1144
+ calling_resource: the name of the parent resource (for better error messages)
1145
+
1146
+ Warns:
1147
+ If one of the values is not a valid DSP time string
1148
+
1149
+ Returns:
1150
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
1151
+
1152
+ Examples:
1153
+ >>> excel2xml.make_time_prop(":testproperty", "2009-10-10T12:00:00-05:00")
1154
+ <time-prop name=":testproperty">
1155
+ <time permissions="public">
1156
+ 2009-10-10T12:00:00-05:00
1157
+ </time>
1158
+ </time-prop>
1159
+ >>> excel2xml.make_time_prop(":testproperty", excel2xml.PropertyElement("2009-10-10T12:00:00-05:00", permissions="private", comment="example"))
1160
+ <time-prop name=":testproperty">
1161
+ <time permissions="private" comment="example">
1162
+ 2009-10-10T12:00:00-05:00
1163
+ </time>
1164
+ </time-prop>
1165
+ >>> excel2xml.make_time_prop(":testproperty", ["2009-10-10T12:00:00-05:00", "1901-01-01T01:00:00-00:00"])
1166
+ <time-prop name=":testproperty">
1167
+ <time permissions="public">
1168
+ 2009-10-10T12:00:00-05:00
1169
+ </time>
1170
+ <time permissions="public">
1171
+ 1901-01-01T01:00:00-00:002
1172
+ </time>
1173
+ </time-prop>
1174
+
1175
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#time-prop
1176
+ """
1177
+
1178
+ # check the input: prepare a list with valid values
1179
+ values = prepare_value(value)
1180
+
1181
+ # check value type
1182
+ validation_regex = r"^\d{4}-[0-1]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(.\d{1,12})?(Z|[+-][0-1]\d:[0-5]\d)$"
1183
+ for val in values:
1184
+ if not regex.search(validation_regex, str(val.value)):
1185
+ msg = (
1186
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
1187
+ f"'{val.value}' is not a valid DSP time."
1188
+ )
1189
+ warnings.warn(DspToolsUserWarning(msg))
1190
+
1191
+ # make xml structure of the valid values
1192
+ prop_ = etree.Element(
1193
+ "{%s}time-prop" % xml_namespace_map[None],
1194
+ name=name,
1195
+ nsmap=xml_namespace_map,
1196
+ )
1197
+ for val in values:
1198
+ kwargs = {"permissions": val.permissions}
1199
+ if val.comment and check_notna(val.comment):
1200
+ kwargs["comment"] = val.comment
1201
+ value_ = etree.Element(
1202
+ "{%s}time" % xml_namespace_map[None],
1203
+ **kwargs, # type: ignore[arg-type]
1204
+ nsmap=xml_namespace_map,
1205
+ )
1206
+ value_.text = str(val.value)
1207
+ prop_.append(value_)
1208
+
1209
+ return prop_
1210
+
1211
+
1212
+ def make_uri_prop(
1213
+ name: str,
1214
+ value: Union[PropertyElement, str, Iterable[Union[PropertyElement, str]]],
1215
+ calling_resource: str = "",
1216
+ ) -> etree._Element:
1217
+ """
1218
+ Make an `<uri-prop>` from one or more URIs. The URI(s) can be provided as string or as PropertyElement with a string
1219
+ inside. If provided as string, the permissions default to "public".
1220
+
1221
+ Args:
1222
+ name: the name of this property as defined in the onto
1223
+ value: one or more URIs, as string/PropertyElement, or as iterable of strings/PropertyElements
1224
+ calling_resource: the name of the parent resource (for better error messages)
1225
+
1226
+ Warns:
1227
+ If one of the values is not a valid URI
1228
+
1229
+ Returns:
1230
+ an etree._Element that can be appended to the parent resource with resource.append(make_*_prop(...))
1231
+
1232
+ Examples:
1233
+ >>> excel2xml.make_uri_prop(":testproperty", "www.test.com")
1234
+ <uri-prop name=":testproperty">
1235
+ <uri permissions="public">www.test.com</uri>
1236
+ </uri-prop>
1237
+ >>> excel2xml.make_uri_prop(":testproperty", excel2xml.PropertyElement("www.test.com", permissions="private", comment="example"))
1238
+ <uri-prop name=":testproperty">
1239
+ <uri permissions="private" comment="example">www.test.com</uri>
1240
+ </uri-prop>
1241
+ >>> excel2xml.make_uri_prop(":testproperty", ["www.1.com", "www.2.com"])
1242
+ <uri-prop name=":testproperty">
1243
+ <uri permissions="public">www.1.com</uri>
1244
+ <uri permissions="public">www.2.com</uri>
1245
+ </uri-prop>
1246
+
1247
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#uri-prop
1248
+ """
1249
+
1250
+ # check the input: prepare a list with valid values
1251
+ values = prepare_value(value)
1252
+
1253
+ # check value type
1254
+ for val in values:
1255
+ if not is_uri(str(val.value)):
1256
+ msg = (
1257
+ f"Failed validation in resource '{calling_resource}', property '{name}': "
1258
+ f"'{val.value}' is not a valid URI."
1259
+ )
1260
+ warnings.warn(DspToolsUserWarning(msg))
1261
+
1262
+ # make xml structure of the valid values
1263
+ prop_ = etree.Element(
1264
+ "{%s}uri-prop" % xml_namespace_map[None],
1265
+ name=name,
1266
+ nsmap=xml_namespace_map,
1267
+ )
1268
+ for val in values:
1269
+ kwargs = {"permissions": val.permissions}
1270
+ if val.comment and check_notna(val.comment):
1271
+ kwargs["comment"] = val.comment
1272
+ value_ = etree.Element(
1273
+ "{%s}uri" % xml_namespace_map[None],
1274
+ **kwargs, # type: ignore[arg-type]
1275
+ nsmap=xml_namespace_map,
1276
+ )
1277
+ value_.text = str(val.value)
1278
+ prop_.append(value_)
1279
+
1280
+ return prop_
1281
+
1282
+
1283
+ def make_region( # noqa: D417 (undocumented-param)
1284
+ label: str,
1285
+ id: str,
1286
+ permissions: str = "public",
1287
+ ark: Optional[str] = None,
1288
+ iri: Optional[str] = None,
1289
+ creation_date: Optional[str] = None,
1290
+ ) -> etree._Element:
1291
+ """
1292
+ Creates an empty region element, with the attributes as specified by the arguments.
1293
+
1294
+ Args:
1295
+ The arguments correspond 1:1 to the attributes of the `<region>` element.
1296
+
1297
+ Raises:
1298
+ Warning: if both an ARK and an IRI are provided
1299
+ BaseError: if the creation date is invalid
1300
+
1301
+ Returns:
1302
+ The region element, without any children, but with the attributes
1303
+ `<region label=label id=id permissions=permissions ark=ark iri=iri></region>`
1304
+
1305
+ Examples:
1306
+ >>> region = excel2xml.make_region("label", "id")
1307
+ >>> region.append(excel2xml.make_text_prop("hasComment", "This is a comment"))
1308
+ >>> region.append(excel2xml.make_color_prop("hasColor", "#5d1f1e"))
1309
+ >>> region.append(excel2xml.make_resptr_prop("isRegionOf", "image_0"))
1310
+ >>> region.append(excel2xml.make_geometry_prop("hasGeometry", "{...}"))
1311
+ >>> root.append(region)
1312
+
1313
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#region
1314
+ """
1315
+
1316
+ kwargs = {"label": label, "id": id, "permissions": permissions, "nsmap": xml_namespace_map}
1317
+ if ark:
1318
+ kwargs["ark"] = ark
1319
+ if iri:
1320
+ kwargs["iri"] = iri
1321
+ if ark and iri:
1322
+ msg = f"Both ARK and IRI were provided for resource '{label}' ({id}). The ARK will override the IRI."
1323
+ warnings.warn(DspToolsUserWarning(msg))
1324
+ if creation_date:
1325
+ try:
1326
+ DateTimeStamp(creation_date)
1327
+ except BaseError:
1328
+ raise BaseError(
1329
+ f"The region '{label}' (ID: {id}) has an invalid creation date '{creation_date}'. "
1330
+ f"Did you perhaps forget the timezone?"
1331
+ ) from None
1332
+ kwargs["creation_date"] = creation_date
1333
+
1334
+ return etree.Element(
1335
+ "{%s}region" % xml_namespace_map[None],
1336
+ **kwargs, # type: ignore[arg-type]
1337
+ )
1338
+
1339
+
1340
+ def make_link( # noqa: D417 (undocumented-param)
1341
+ label: str,
1342
+ id: str,
1343
+ permissions: str = "public",
1344
+ ark: Optional[str] = None,
1345
+ iri: Optional[str] = None,
1346
+ creation_date: Optional[str] = None,
1347
+ ) -> etree._Element:
1348
+ """
1349
+ Creates an empty link element, with the attributes as specified by the arguments.
1350
+
1351
+ Args:
1352
+ The arguments correspond 1:1 to the attributes of the `<link>` element.
1353
+
1354
+ Raises:
1355
+ Warning: if both an ARK and an IRI are provided
1356
+ BaseError: if the creation date is invalid
1357
+
1358
+ Returns:
1359
+ The link element, without any children, but with the attributes
1360
+ `<link label=label id=id permissions=permissions ark=ark iri=iri></link>`
1361
+
1362
+ Examples:
1363
+ >>> link = excel2xml.make_link("label", "id")
1364
+ >>> link.append(excel2xml.make_text_prop("hasComment", "This is a comment"))
1365
+ >>> link.append(excel2xml.make_resptr_prop("hasLinkTo", ["resource_0", "resource_1"]))
1366
+ >>> root.append(link)
1367
+
1368
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#link
1369
+ """
1370
+
1371
+ kwargs = {"label": label, "id": id, "permissions": permissions, "nsmap": xml_namespace_map}
1372
+ if ark:
1373
+ kwargs["ark"] = ark
1374
+ if iri:
1375
+ kwargs["iri"] = iri
1376
+ if ark and iri:
1377
+ msg = f"Both ARK and IRI were provided for resource '{label}' ({id}). The ARK will override the IRI."
1378
+ warnings.warn(DspToolsUserWarning(msg))
1379
+ if creation_date:
1380
+ try:
1381
+ DateTimeStamp(creation_date)
1382
+ except BaseError:
1383
+ raise BaseError(
1384
+ f"The link '{label}' (ID: {id}) has an invalid creation date '{creation_date}'. "
1385
+ f"Did you perhaps forget the timezone?"
1386
+ ) from None
1387
+ kwargs["creation_date"] = creation_date
1388
+
1389
+ return etree.Element(
1390
+ "{%s}link" % xml_namespace_map[None],
1391
+ **kwargs, # type: ignore[arg-type]
1392
+ )
1393
+
1394
+
1395
+ def make_audio_segment( # noqa: D417 (undocumented-param)
1396
+ label: str,
1397
+ id: str,
1398
+ permissions: str = "public",
1399
+ ) -> etree._Element:
1400
+ """
1401
+ Creates an empty `<audio-segment>` element, with the attributes as specified by the arguments.
1402
+
1403
+ Args:
1404
+ The arguments correspond 1:1 to the attributes of the `<audio-segment>` element.
1405
+
1406
+ Returns:
1407
+ The audio-segment element, without any children, but with the attributes
1408
+ `<audio-segment label=label id=id permissions=permissions></audio-segment>`
1409
+
1410
+ Examples:
1411
+ >>> audio_segment = excel2xml.make_audio_segment("label", "id")
1412
+ >>> audio_segment.append(excel2xml.make_isSegmentOf_prop("audio_resource_id"))
1413
+ >>> audio_segment.append(excel2xml.make_hasSegmentBounds_prop(segment_start=60, segment_end=120)
1414
+ >>> root.append(audio_segment)
1415
+
1416
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#video-segment-audio-segment
1417
+ """
1418
+ return etree.Element(
1419
+ "{%s}audio-segment" % xml_namespace_map[None],
1420
+ label=label,
1421
+ id=id,
1422
+ permissions=permissions,
1423
+ nsmap=xml_namespace_map,
1424
+ )
1425
+
1426
+
1427
+ def make_video_segment( # noqa: D417 (undocumented-param)
1428
+ label: str,
1429
+ id: str,
1430
+ permissions: str = "public",
1431
+ ) -> etree._Element:
1432
+ """
1433
+ Creates an empty `<video-segment>` element, with the attributes as specified by the arguments.
1434
+
1435
+ Args:
1436
+ The arguments correspond 1:1 to the attributes of the `<video-segment>` element.
1437
+
1438
+ Returns:
1439
+ The video-segment element, without any children, but with the attributes
1440
+ `<video-segment label=label id=id permissions=permissions></video-segment>`
1441
+
1442
+ Examples:
1443
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1444
+ >>> video_segment.append(excel2xml.make_isSegmentOf_prop("video_resource_id"))
1445
+ >>> video_segment.append(excel2xml.make_hasSegmentBounds_prop(segment_start=60, segment_end=120)
1446
+ >>> root.append(video_segment)
1447
+
1448
+ See https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#video-segment-audio-segment
1449
+ """
1450
+ return etree.Element(
1451
+ "{%s}video-segment" % xml_namespace_map[None],
1452
+ label=label,
1453
+ id=id,
1454
+ permissions=permissions,
1455
+ nsmap=xml_namespace_map,
1456
+ )
1457
+
1458
+
1459
+ def make_isSegmentOf_prop(
1460
+ target_id: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1461
+ ) -> etree._Element:
1462
+ """
1463
+ Make a `<isSegmentOf>` property for a `<video-segment>` or `<audio-segment>`.
1464
+
1465
+ Args:
1466
+ target_id: ID of target video/audio resource
1467
+ permissions: defaults to "public".
1468
+ comment: optional comment for this property. Defaults to None.
1469
+ calling_resource: the name of the parent resource (for better error messages)
1470
+
1471
+ Warns:
1472
+ - If the target_id is not a valid string
1473
+
1474
+ Examples:
1475
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1476
+ >>> video_segment.append(excel2xml.make_isSegmentOf_prop("video_resource_id"))
1477
+ >>> video_segment.append(excel2xml.make_hasSegmentBounds_prop(segment_start=60, segment_end=120)
1478
+ >>> root.append(video_segment)
1479
+
1480
+ Returns:
1481
+ an etree._Element that can be appended to an audio/video segment with `segment.append(make_isSegmentOf_prop(...))`
1482
+ """
1483
+ if not isinstance(target_id, str) or not check_notna(target_id):
1484
+ msg = (
1485
+ f"Validation Error in resource '{calling_resource}', property 'isSegmentOf': "
1486
+ f"The following doesn't seem to be a valid ID of a target resource: '{target_id}'"
1487
+ )
1488
+ warnings.warn(DspToolsUserWarning(msg))
1489
+ prop = etree.Element("{%s}isSegmentOf" % xml_namespace_map[None], permissions=permissions)
1490
+ if comment:
1491
+ prop.set("comment", comment)
1492
+ prop.text = target_id
1493
+ return prop
1494
+
1495
+
1496
+ def make_relatesTo_prop(
1497
+ target_id: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1498
+ ) -> etree._Element:
1499
+ """
1500
+ Make a `<relatesTo>` property for a `<video-segment>` or `<audio-segment>`.
1501
+
1502
+ Args:
1503
+ target_id: ID of the related resource
1504
+ permissions: defaults to "public".
1505
+ comment: optional comment for this property. Defaults to None.
1506
+ calling_resource: the name of the parent resource (for better error messages)
1507
+
1508
+ Warns:
1509
+ - If the target_id is not a valid string
1510
+
1511
+ Examples:
1512
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1513
+ >>> video_segment.append(excel2xml.make_relatesTo_prop("resource_id"))
1514
+ >>> video_segment.append(excel2xml.make_relatesTo_prop("other_resource_id")
1515
+ >>> # add other properties
1516
+ >>> root.append(video_segment)
1517
+
1518
+ Returns:
1519
+ an etree._Element that can be appended to an audio/video segment with `segment.append(make_relatesTo_prop(...))`
1520
+ """
1521
+ if not isinstance(target_id, str) or not check_notna(target_id):
1522
+ msg = (
1523
+ f"Validation Error in resource '{calling_resource}', property 'relatesTo': "
1524
+ f"The following doesn't seem to be a valid ID of a target resource: '{target_id}'"
1525
+ )
1526
+ warnings.warn(DspToolsUserWarning(msg))
1527
+ prop = etree.Element("{%s}relatesTo" % xml_namespace_map[None], permissions=permissions)
1528
+ if comment:
1529
+ prop.set("comment", comment)
1530
+ prop.text = target_id
1531
+ return prop
1532
+
1533
+
1534
+ def make_hasSegmentBounds_prop(
1535
+ segment_start: int | float,
1536
+ segment_end: int | float,
1537
+ permissions: str = "public",
1538
+ comment: str | None = None,
1539
+ calling_resource: str = "",
1540
+ ) -> etree._Element:
1541
+ """
1542
+ Make a `<hasSegmentBounds>` property for a `<video-segment>` or `<audio-segment>`.
1543
+
1544
+ Args:
1545
+ segment_start: start, in seconds, counted from the beginning of the audio/video
1546
+ segment_end: end, in seconds, counted from the beginning of the audio/video
1547
+ permissions: Defaults to "public".
1548
+ comment: Optional comment for this property. Defaults to None.
1549
+ calling_resource: the name of the parent resource (for better error messages)
1550
+
1551
+ Warns:
1552
+ - If start or end are not integers or floats
1553
+ - If start is bigger than end
1554
+
1555
+ Examples:
1556
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1557
+ >>> video_segment.append(excel2xml.make_isSegmentOf_prop("video_resource_id"))
1558
+ >>> video_segment.append(excel2xml.make_hasSegmentBounds_prop(segment_start=60, segment_end=120)
1559
+ >>> root.append(video_segment)
1560
+
1561
+ Returns:
1562
+ an etree._Element that can be appended to an audio/video segment with `segment.append(make_hasSegmentBounds_prop(...))`
1563
+ """
1564
+ if not isinstance(segment_start, int | float) or not isinstance(segment_end, int | float):
1565
+ try:
1566
+ segment_start = float(segment_start)
1567
+ segment_end = float(segment_end)
1568
+ except ValueError:
1569
+ msg = (
1570
+ f"Validation Error in resource '{calling_resource}', property 'hasSegmentBounds': "
1571
+ f"The start and the end of an audio/video segment must be integers or floats, "
1572
+ f"but you provided: {segment_start=} and {segment_end=}"
1573
+ )
1574
+ warnings.warn(DspToolsUserWarning(msg))
1575
+ if isinstance(segment_start, int | float) and isinstance(segment_end, int | float) and segment_start > segment_end:
1576
+ msg = (
1577
+ f"Validation Error in resource '{calling_resource}', property 'hasSegmentBounds': "
1578
+ f"The start of an audio/video segment must be less than the end, "
1579
+ f"but you provided: {segment_start=} and {segment_end=}"
1580
+ )
1581
+ warnings.warn(DspToolsUserWarning(msg))
1582
+ prop = etree.Element(
1583
+ "{%s}hasSegmentBounds" % xml_namespace_map[None],
1584
+ segment_start=str(segment_start),
1585
+ segment_end=str(segment_end),
1586
+ permissions=permissions,
1587
+ )
1588
+ if comment:
1589
+ prop.set("comment", comment)
1590
+ return prop
1591
+
1592
+
1593
+ def make_hasTitle_prop(
1594
+ title: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1595
+ ) -> etree._Element:
1596
+ """
1597
+ Make a `<hasTitle>` property for a `<video-segment>` or `<audio-segment>`.
1598
+
1599
+ Args:
1600
+ title: the title of the segment
1601
+ permissions: defaults to "public".
1602
+ comment: optional comment for this property. Defaults to None.
1603
+ calling_resource: the name of the parent resource (for better error messages)
1604
+
1605
+ Warns:
1606
+ - If the title is not a valid string
1607
+
1608
+ Examples:
1609
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1610
+ >>> video_segment.append(excel2xml.make_hasTitle_prop("title of my segment"))
1611
+ >>> # add other properties
1612
+ >>> root.append(video_segment)
1613
+
1614
+ Returns:
1615
+ an etree._Element that can be appended to an audio/video resource with `segment.append(make_hasTitle_prop(...))`
1616
+ """
1617
+ if not isinstance(title, str) or not check_notna(title):
1618
+ msg = (
1619
+ f"Validation Error in resource '{calling_resource}', property 'hasTitle': "
1620
+ f"The following doesn't seem to be a valid string: '{title}'"
1621
+ )
1622
+ warnings.warn(DspToolsUserWarning(msg))
1623
+ prop = etree.Element("{%s}hasTitle" % xml_namespace_map[None], permissions=permissions)
1624
+ if comment:
1625
+ prop.set("comment", comment)
1626
+ prop.text = title
1627
+ return prop
1628
+
1629
+
1630
+ def make_hasKeyword_prop(
1631
+ keyword: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1632
+ ) -> etree._Element:
1633
+ """
1634
+ Make a `<hasKeyword>` property for a `<video-segment>` or `<audio-segment>`.
1635
+
1636
+ Args:
1637
+ keyword: a keyword of the segment
1638
+ permissions: defaults to "public".
1639
+ comment: optional comment for this property. Defaults to None.
1640
+ calling_resource: the name of the parent resource (for better error messages)
1641
+
1642
+ Warns:
1643
+ - If the keyword is not a valid string
1644
+
1645
+ Examples:
1646
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1647
+ >>> video_segment.append(excel2xml.make_hasKeyword_prop("keyword of my segment"))
1648
+ >>> video_segment.append(excel2xml.make_hasKeyword_prop("another keyword"))
1649
+ >>> # add other properties
1650
+ >>> root.append(video_segment)
1651
+
1652
+ Returns:
1653
+ an etree._Element that can be appended to an audio/video resource with `segment.append(make_hasKeyword_prop(...))`
1654
+ """
1655
+ if not isinstance(keyword, str) or not check_notna(keyword):
1656
+ msg = (
1657
+ f"Validation Error in resource '{calling_resource}', property 'hasKeyword': "
1658
+ f"The following doesn't seem to be a valid string: '{keyword}'"
1659
+ )
1660
+ warnings.warn(DspToolsUserWarning(msg))
1661
+ prop = etree.Element("{%s}hasKeyword" % xml_namespace_map[None], permissions=permissions)
1662
+ if comment:
1663
+ prop.set("comment", comment)
1664
+ prop.text = keyword
1665
+ return prop
1666
+
1667
+
1668
+ def make_hasComment_prop(
1669
+ comment_text: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1670
+ ) -> etree._Element:
1671
+ """
1672
+ Make a `<hasComment>` property for a `<video-segment>` or `<audio-segment>`.
1673
+
1674
+ Args:
1675
+ comment_text: a text with some background info about the segment. Can be formatted with tags.
1676
+ permissions: defaults to "public".
1677
+ comment: optional comment for this property. Defaults to None.
1678
+ calling_resource: the name of the parent resource (for better error messages)
1679
+
1680
+ Warns:
1681
+ - If the comment text is not a valid string
1682
+
1683
+ Examples:
1684
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1685
+ >>> video_segment.append(excel2xml.make_hasComment_prop("comment of my segment"))
1686
+ >>> video_segment.append(excel2xml.make_hasComment_prop("another comment"))
1687
+ >>> # add other properties
1688
+ >>> root.append(video_segment)
1689
+
1690
+ Returns:
1691
+ an etree._Element that can be appended to an audio/video resource with `segment.append(make_hasComment_prop(...))`
1692
+ """
1693
+ if not isinstance(comment_text, str) or not check_notna(comment_text):
1694
+ msg = (
1695
+ f"Validation Error in resource '{calling_resource}', property 'hasComment': "
1696
+ f"The following doesn't seem to be a valid string: '{comment_text}'"
1697
+ )
1698
+ warnings.warn(DspToolsUserWarning(msg))
1699
+ prop = etree.Element("{%s}hasComment" % xml_namespace_map[None], permissions=permissions)
1700
+ if comment:
1701
+ prop.set("comment", comment)
1702
+ prop = _add_richtext_to_etree_element(comment_text, prop)
1703
+ return prop
1704
+
1705
+
1706
+ def make_hasDescription_prop(
1707
+ description: str, permissions: str = "public", comment: str | None = None, calling_resource: str = ""
1708
+ ) -> etree._Element:
1709
+ """
1710
+ Make a `<hasDescription>` property for a `<video-segment>` or `<audio-segment>`.
1711
+
1712
+ Args:
1713
+ description: a text with some background info about the segment. Can be formatted with tags.
1714
+ permissions: defaults to "public".
1715
+ comment: optional comment for this property. Defaults to None.
1716
+ calling_resource: the name of the parent resource (for better error messages)
1717
+
1718
+ Warns:
1719
+ - If the description is not a valid string
1720
+
1721
+ Examples:
1722
+ >>> video_segment = excel2xml.make_video_segment("label", "id")
1723
+ >>> video_segment.append(excel2xml.make_hasDescription_prop("a description of my segment"))
1724
+ >>> video_segment.append(excel2xml.make_hasDescription_prop("another description"))
1725
+ >>> # add other properties
1726
+ >>> root.append(video_segment)
1727
+
1728
+ Returns:
1729
+ an etree._Element that can be appended to an audio/video resource with `segment.append(make_hasDescription_prop(...))`
1730
+ """
1731
+ if not isinstance(description, str) or not check_notna(description):
1732
+ msg = (
1733
+ f"Validation Error in resource '{calling_resource}', property 'hasDescription': "
1734
+ f"The following doesn't seem to be a valid string: '{description}'"
1735
+ )
1736
+ warnings.warn(DspToolsUserWarning(msg))
1737
+ prop = etree.Element("{%s}hasDescription" % xml_namespace_map[None], permissions=permissions)
1738
+ if comment:
1739
+ prop.set("comment", comment)
1740
+ prop = _add_richtext_to_etree_element(description, prop)
1741
+ return prop
1742
+
1743
+
1744
+ def create_json_excel_list_mapping(
1745
+ path_to_json: str,
1746
+ list_name: str,
1747
+ excel_values: Iterable[str],
1748
+ sep: str = '+"*ç%&/()=',
1749
+ corrections: Optional[dict[str, str]] = None,
1750
+ ) -> dict[str, str]:
1751
+ """
1752
+ Often, data sources contain list values that aren't identical to the name of the node in the list of the JSON
1753
+ project file (colloquially: ontology). In order to create a correct XML for the `dsp-tools xmlupload`, a mapping is
1754
+ necessary. This function takes a JSON list and an Excel column containing list-values, and tries to match them
1755
+ automatically based on similarity. The result is a dict of the form {excel_value: list_node_name}.
1756
+
1757
+ Alternatively, consider using the function create_json_list_mapping(), which also builds a dictionary,
1758
+ but from the names and labels in the JSON list, which is less error-prone than this function's approach. However,
1759
+ this function has the advantage that it even works when your data source doesn't use the list labels correctly.
1760
+
1761
+ Args:
1762
+ path_to_json: path to the JSON project file
1763
+ list_name: name of the list in the JSON project file (can also be a nested list)
1764
+ excel_values: the Excel column (e.g. as list) with the list values in it
1765
+ sep: separator string, if the cells in the Excel contain more than one list entry
1766
+ corrections: dict with wrong entries, each pointing to its correct counterpart
1767
+
1768
+ Raises:
1769
+ Warning: if there is an Excel value that couldn't be matched
1770
+ Exception: if the path doesn't point to a JSON project file
1771
+
1772
+ Returns:
1773
+ dict of the form ``{excel_value: list_node_name}``.
1774
+ Every excel_value is stripped, and also present in a lowercase form.
1775
+
1776
+ Examples:
1777
+ >>> json_list_nodes = [
1778
+ {
1779
+ "name": "giraffe",
1780
+ "labels": {"en": "giraffe"}
1781
+ },
1782
+ {
1783
+ "name": "antelope",
1784
+ "labels": {"en": "antelope"}
1785
+ }
1786
+ ]
1787
+ >>> excel_row_1 = ["Giraffeeh ", " Antiloupe", "Girraffe , Antiloupe "]
1788
+ >>> json_excel_list_mapping = {
1789
+ "Giraffeeh": "giraffe",
1790
+ "giraffeeh": "giraffe",
1791
+ "Girraffe": "giraffe",
1792
+ "girraffe": "giraffe",
1793
+ "Antiloupe": "antelope",
1794
+ "antiloupe": "antelope"
1795
+ }
1796
+ """
1797
+
1798
+ # avoid mutable default argument
1799
+ corrections = corrections or {}
1800
+
1801
+ # split the values, if necessary
1802
+ excel_values_new = []
1803
+ for val in excel_values:
1804
+ if isinstance(val, str):
1805
+ excel_values_new.extend([x.strip() for x in val.split(sep) if x.strip()])
1806
+
1807
+ # read the list of the JSON project (works also for nested lists)
1808
+ with open(path_to_json, encoding="utf-8") as f:
1809
+ json_file = json.load(f)
1810
+ json_subset = []
1811
+ for elem in json_file["project"]["lists"]:
1812
+ if elem["name"] == list_name:
1813
+ json_subset = elem["nodes"]
1814
+ json_values = set(_nested_dict_values_iterator(json_subset))
1815
+
1816
+ # build dictionary with the mapping, based on string similarity
1817
+ res = {}
1818
+ for excel_value in excel_values_new:
1819
+ excel_value_corrected = corrections.get(excel_value, excel_value)
1820
+ excel_value_simpl = simplify_name(excel_value_corrected) # increase match probability by removing illegal chars
1821
+ if matches := difflib.get_close_matches(
1822
+ word=excel_value_simpl,
1823
+ possibilities=json_values,
1824
+ n=1,
1825
+ cutoff=0.6,
1826
+ ):
1827
+ res[excel_value] = matches[0]
1828
+ res[excel_value.lower()] = matches[0]
1829
+ else:
1830
+ msg = (
1831
+ f"Did not find a close match to the excel list entry '{excel_value}' "
1832
+ f"among the values in the JSON project list '{list_name}'"
1833
+ )
1834
+ warnings.warn(DspToolsUserWarning(msg))
1835
+
1836
+ return res
1837
+
1838
+
1839
+ def _nested_dict_values_iterator(dicts: list[dict[str, Any]]) -> Iterable[str]:
1840
+ """
1841
+ Yield all values of a nested dictionary.
1842
+
1843
+ Args:
1844
+ dicts: list of nested dictionaries
1845
+
1846
+ Yields:
1847
+ values of the nested dictionaries
1848
+ """
1849
+ # Credits: https://thispointer.com/python-iterate-loop-over-all-nested-dictionary-values/
1850
+ for _dict in dicts:
1851
+ if "nodes" in _dict:
1852
+ yield from _nested_dict_values_iterator(_dict["nodes"])
1853
+ if "name" in _dict:
1854
+ yield _dict["name"]
1855
+
1856
+
1857
+ def create_json_list_mapping(
1858
+ path_to_json: str,
1859
+ list_name: str,
1860
+ language_label: str,
1861
+ ) -> dict[str, str]:
1862
+ """
1863
+ Often, data sources contain list values named after the "label" of the JSON project list node, instead of the "name"
1864
+ which is needed for the `dsp-tools xmlupload`. In order to create a correct XML, you need a dictionary that maps the
1865
+ "labels" to their correct "names".
1866
+
1867
+ Alternatively, consider using the method create_json_excel_list_mapping(), which also creates a dictionary, but maps
1868
+ values from your data source to list node names from the JSON project file, based on similarity.
1869
+
1870
+ Args:
1871
+ path_to_json: path to a JSON project file (a.k.a. ontology)
1872
+ list_name: name of a list in the JSON project (works also for nested lists)
1873
+ language_label: which language of the label to choose
1874
+
1875
+ Returns:
1876
+ a dictionary of the form {label: name}
1877
+ """
1878
+ with open(path_to_json, encoding="utf-8") as f:
1879
+ json_file = json.load(f)
1880
+ json_subset = [x for x in json_file["project"]["lists"] if x["name"] == list_name]
1881
+ # json_subset is a list containing one item, namely the json object containing the entire json-list
1882
+
1883
+ res = {}
1884
+ for label, name in _name_label_mapper_iterator(json_subset, language_label):
1885
+ if name != list_name:
1886
+ res[label] = name
1887
+ res[label.strip().lower()] = name
1888
+
1889
+ return res
1890
+
1891
+
1892
+ def _name_label_mapper_iterator(
1893
+ json_subset: list[dict[str, Any]],
1894
+ language_label: str,
1895
+ ) -> Iterable[tuple[str, str]]:
1896
+ """
1897
+ Go through list nodes of a JSON project and yield (label, name) pairs.
1898
+
1899
+ Args:
1900
+ json_subset: list of DSP lists (a DSP list being a dictionary with the keys "name", "labels" and "nodes")
1901
+ language_label: which language of the label to choose
1902
+
1903
+ Yields:
1904
+ (label, name) pairs
1905
+ """
1906
+ for node in json_subset:
1907
+ # node is the json object containing the entire json-list
1908
+ if "nodes" in node:
1909
+ # "nodes" is the json sub-object containing the entries of the json-list
1910
+ yield from _name_label_mapper_iterator(node["nodes"], language_label)
1911
+ # each yielded value is a (label, name) pair of a single list entry
1912
+ if "name" in node:
1913
+ yield (node["labels"][language_label], node["name"])
1914
+ # the actual values of the name and the label
1915
+
1916
+
1917
+ def write_xml(
1918
+ root: etree._Element,
1919
+ filepath: str | Path,
1920
+ ) -> None:
1921
+ """
1922
+ Write the finished XML to a file.
1923
+
1924
+ Args:
1925
+ root: etree Element with the entire XML document
1926
+ filepath: where to save the file
1927
+
1928
+ Warning:
1929
+ if the XML is not valid according to the schema
1930
+ """
1931
+ warn_msg = (
1932
+ "The excel2xml lib is deprecated in favor of the xmllib. It will be removed in a future release.\n"
1933
+ "See the xmllib docs: https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/xmlroot/"
1934
+ )
1935
+ warnings.warn(DspToolsUserWarning(warn_msg))
1936
+ etree.indent(root, space=" ")
1937
+ xml_string = etree.tostring(
1938
+ root,
1939
+ encoding="unicode",
1940
+ pretty_print=True,
1941
+ doctype='<?xml version="1.0" encoding="UTF-8"?>',
1942
+ )
1943
+ with open(filepath, "w", encoding="utf-8") as f:
1944
+ f.write(xml_string)
1945
+ try:
1946
+ parse_and_validate_xml_file(input_file=filepath)
1947
+ print(f"The XML file was successfully saved to {filepath}")
1948
+ except BaseError as err:
1949
+ msg = (
1950
+ f"The XML file was successfully saved to {filepath}, "
1951
+ f"but the following Schema validation error(s) occurred: {err.message}"
1952
+ )
1953
+ warnings.warn(DspToolsUserWarning(msg))