dsp-tools 9.1.0.post11__py3-none-any.whl → 18.3.0.post13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. dsp_tools/__init__.py +4 -0
  2. dsp_tools/cli/args.py +36 -0
  3. dsp_tools/cli/call_action.py +51 -231
  4. dsp_tools/cli/call_action_files_only.py +101 -0
  5. dsp_tools/cli/call_action_with_network.py +207 -0
  6. dsp_tools/cli/create_parsers.py +156 -58
  7. dsp_tools/cli/entry_point.py +56 -26
  8. dsp_tools/cli/utils.py +87 -0
  9. dsp_tools/clients/CLAUDE.md +420 -0
  10. dsp_tools/clients/authentication_client.py +14 -0
  11. dsp_tools/clients/authentication_client_live.py +66 -0
  12. dsp_tools/{utils → clients}/connection.py +2 -18
  13. dsp_tools/clients/connection_live.py +233 -0
  14. dsp_tools/clients/fuseki_metrics.py +60 -0
  15. dsp_tools/clients/group_user_clients.py +35 -0
  16. dsp_tools/clients/group_user_clients_live.py +181 -0
  17. dsp_tools/clients/legal_info_client.py +23 -0
  18. dsp_tools/clients/legal_info_client_live.py +132 -0
  19. dsp_tools/clients/list_client.py +49 -0
  20. dsp_tools/clients/list_client_live.py +166 -0
  21. dsp_tools/clients/metadata_client.py +24 -0
  22. dsp_tools/clients/metadata_client_live.py +47 -0
  23. dsp_tools/clients/ontology_clients.py +49 -0
  24. dsp_tools/clients/ontology_create_client_live.py +166 -0
  25. dsp_tools/clients/ontology_get_client_live.py +80 -0
  26. dsp_tools/clients/permissions_client.py +68 -0
  27. dsp_tools/clients/project_client.py +16 -0
  28. dsp_tools/clients/project_client_live.py +66 -0
  29. dsp_tools/commands/create/communicate_problems.py +24 -0
  30. dsp_tools/commands/create/create.py +134 -0
  31. dsp_tools/commands/create/create_on_server/cardinalities.py +111 -0
  32. dsp_tools/commands/create/create_on_server/classes.py +99 -0
  33. dsp_tools/commands/create/create_on_server/complete_ontologies.py +116 -0
  34. dsp_tools/commands/create/create_on_server/default_permissions.py +134 -0
  35. dsp_tools/commands/create/create_on_server/group_users.py +165 -0
  36. dsp_tools/commands/create/create_on_server/lists.py +163 -0
  37. dsp_tools/commands/create/create_on_server/mappers.py +12 -0
  38. dsp_tools/commands/create/create_on_server/onto_utils.py +74 -0
  39. dsp_tools/commands/create/create_on_server/ontology.py +52 -0
  40. dsp_tools/commands/create/create_on_server/project.py +68 -0
  41. dsp_tools/commands/create/create_on_server/properties.py +119 -0
  42. dsp_tools/commands/create/exceptions.py +29 -0
  43. dsp_tools/commands/create/lists_only.py +66 -0
  44. dsp_tools/commands/create/models/create_problems.py +87 -0
  45. dsp_tools/commands/create/models/parsed_ontology.py +88 -0
  46. dsp_tools/commands/create/models/parsed_project.py +81 -0
  47. dsp_tools/commands/create/models/rdf_ontology.py +12 -0
  48. dsp_tools/commands/create/models/server_project_info.py +100 -0
  49. dsp_tools/commands/create/parsing/parse_lists.py +45 -0
  50. dsp_tools/commands/create/parsing/parse_ontology.py +243 -0
  51. dsp_tools/commands/create/parsing/parse_project.py +149 -0
  52. dsp_tools/commands/create/parsing/parsing_utils.py +40 -0
  53. dsp_tools/commands/create/project_validate.py +595 -0
  54. dsp_tools/commands/create/serialisation/ontology.py +119 -0
  55. dsp_tools/commands/create/serialisation/project.py +44 -0
  56. dsp_tools/commands/excel2json/CLAUDE.md +101 -0
  57. dsp_tools/commands/excel2json/json_header.py +57 -23
  58. dsp_tools/commands/excel2json/{new_lists → lists}/compliance_checks.py +26 -26
  59. dsp_tools/commands/excel2json/{new_lists/make_new_lists.py → lists/make_lists.py} +19 -18
  60. dsp_tools/commands/excel2json/{new_lists → lists}/models/input_error.py +1 -12
  61. dsp_tools/commands/excel2json/{new_lists → lists}/models/serialise.py +9 -5
  62. dsp_tools/commands/excel2json/{new_lists → lists}/utils.py +4 -4
  63. dsp_tools/commands/excel2json/models/input_error.py +31 -11
  64. dsp_tools/commands/excel2json/models/json_header.py +53 -15
  65. dsp_tools/commands/excel2json/models/ontology.py +4 -3
  66. dsp_tools/commands/excel2json/{lists.py → old_lists.py} +26 -112
  67. dsp_tools/commands/excel2json/project.py +78 -34
  68. dsp_tools/commands/excel2json/properties.py +57 -36
  69. dsp_tools/commands/excel2json/resources.py +32 -12
  70. dsp_tools/commands/excel2json/utils.py +20 -1
  71. dsp_tools/commands/excel2xml/__init__.py +2 -2
  72. dsp_tools/commands/excel2xml/excel2xml_cli.py +7 -15
  73. dsp_tools/commands/excel2xml/excel2xml_lib.py +138 -493
  74. dsp_tools/commands/excel2xml/propertyelement.py +5 -5
  75. dsp_tools/commands/{project → get}/get.py +29 -13
  76. dsp_tools/commands/get/get_permissions.py +257 -0
  77. dsp_tools/commands/get/get_permissions_legacy.py +89 -0
  78. dsp_tools/commands/{project/models → get/legacy_models}/context.py +6 -6
  79. dsp_tools/commands/{project/models → get/legacy_models}/group.py +5 -10
  80. dsp_tools/commands/{project/models → get/legacy_models}/listnode.py +5 -35
  81. dsp_tools/commands/{project/models → get/legacy_models}/model.py +1 -1
  82. dsp_tools/commands/{project/models → get/legacy_models}/ontology.py +9 -14
  83. dsp_tools/commands/{project/models → get/legacy_models}/project.py +13 -6
  84. dsp_tools/commands/{project/models → get/legacy_models}/propertyclass.py +9 -16
  85. dsp_tools/commands/{project/models → get/legacy_models}/resourceclass.py +8 -46
  86. dsp_tools/commands/{project/models → get/legacy_models}/user.py +19 -60
  87. dsp_tools/commands/get/models/permissions_models.py +10 -0
  88. dsp_tools/commands/id2iri.py +20 -10
  89. dsp_tools/commands/ingest_xmlupload/bulk_ingest_client.py +81 -56
  90. dsp_tools/commands/ingest_xmlupload/create_resources/apply_ingest_id.py +4 -10
  91. dsp_tools/commands/ingest_xmlupload/create_resources/upload_xml.py +97 -37
  92. dsp_tools/commands/ingest_xmlupload/create_resources/user_information.py +2 -2
  93. dsp_tools/commands/ingest_xmlupload/ingest_files/ingest_files.py +9 -10
  94. dsp_tools/commands/ingest_xmlupload/upload_files/filechecker.py +3 -3
  95. dsp_tools/commands/ingest_xmlupload/upload_files/input_error.py +2 -10
  96. dsp_tools/commands/ingest_xmlupload/upload_files/upload_failures.py +12 -2
  97. dsp_tools/commands/ingest_xmlupload/upload_files/upload_files.py +8 -9
  98. dsp_tools/commands/resume_xmlupload/resume_xmlupload.py +18 -18
  99. dsp_tools/commands/start_stack.py +126 -77
  100. dsp_tools/commands/update_legal/CLAUDE.md +344 -0
  101. dsp_tools/commands/update_legal/__init__.py +0 -0
  102. dsp_tools/commands/update_legal/core.py +182 -0
  103. dsp_tools/commands/update_legal/csv_operations.py +135 -0
  104. dsp_tools/commands/update_legal/models.py +87 -0
  105. dsp_tools/commands/update_legal/xml_operations.py +247 -0
  106. dsp_tools/commands/validate_data/CLAUDE.md +159 -0
  107. dsp_tools/commands/validate_data/__init__.py +0 -0
  108. dsp_tools/commands/validate_data/constants.py +59 -0
  109. dsp_tools/commands/validate_data/mappers.py +143 -0
  110. dsp_tools/commands/validate_data/models/__init__.py +0 -0
  111. dsp_tools/commands/validate_data/models/api_responses.py +45 -0
  112. dsp_tools/commands/validate_data/models/input_problems.py +119 -0
  113. dsp_tools/commands/validate_data/models/rdf_like_data.py +117 -0
  114. dsp_tools/commands/validate_data/models/validation.py +106 -0
  115. dsp_tools/commands/validate_data/prepare_data/__init__.py +0 -0
  116. dsp_tools/commands/validate_data/prepare_data/get_rdf_like_data.py +296 -0
  117. dsp_tools/commands/validate_data/prepare_data/make_data_graph.py +91 -0
  118. dsp_tools/commands/validate_data/prepare_data/prepare_data.py +184 -0
  119. dsp_tools/commands/validate_data/process_validation_report/__init__.py +0 -0
  120. dsp_tools/commands/validate_data/process_validation_report/get_user_validation_message.py +358 -0
  121. dsp_tools/commands/validate_data/process_validation_report/query_validation_result.py +507 -0
  122. dsp_tools/commands/validate_data/process_validation_report/reformat_validation_results.py +150 -0
  123. dsp_tools/commands/validate_data/shacl_cli_validator.py +70 -0
  124. dsp_tools/commands/validate_data/sparql/__init__.py +0 -0
  125. dsp_tools/commands/{xml_validate/sparql/resource_shacl.py → validate_data/sparql/cardinality_shacl.py} +45 -47
  126. dsp_tools/commands/validate_data/sparql/construct_shacl.py +92 -0
  127. dsp_tools/commands/validate_data/sparql/legal_info_shacl.py +36 -0
  128. dsp_tools/commands/validate_data/sparql/value_shacl.py +357 -0
  129. dsp_tools/commands/validate_data/utils.py +59 -0
  130. dsp_tools/commands/validate_data/validate_data.py +283 -0
  131. dsp_tools/commands/validate_data/validation/__init__.py +0 -0
  132. dsp_tools/commands/validate_data/validation/check_duplicate_files.py +55 -0
  133. dsp_tools/commands/validate_data/validation/check_for_unknown_classes.py +67 -0
  134. dsp_tools/commands/validate_data/validation/get_validation_report.py +94 -0
  135. dsp_tools/commands/validate_data/validation/validate_ontology.py +107 -0
  136. dsp_tools/commands/xmlupload/CLAUDE.md +292 -0
  137. dsp_tools/commands/xmlupload/make_rdf_graph/__init__.py +0 -0
  138. dsp_tools/commands/xmlupload/make_rdf_graph/constants.py +63 -0
  139. dsp_tools/commands/xmlupload/make_rdf_graph/jsonld_utils.py +44 -0
  140. dsp_tools/commands/xmlupload/make_rdf_graph/make_file_value.py +77 -0
  141. dsp_tools/commands/xmlupload/make_rdf_graph/make_resource_and_values.py +114 -0
  142. dsp_tools/commands/xmlupload/make_rdf_graph/make_values.py +262 -0
  143. dsp_tools/commands/xmlupload/models/bitstream_info.py +18 -0
  144. dsp_tools/commands/xmlupload/models/formatted_text_value.py +0 -25
  145. dsp_tools/commands/xmlupload/models/ingest.py +56 -70
  146. dsp_tools/commands/xmlupload/models/input_problems.py +6 -14
  147. dsp_tools/commands/xmlupload/models/lookup_models.py +21 -0
  148. dsp_tools/commands/xmlupload/models/permission.py +0 -39
  149. dsp_tools/commands/xmlupload/models/{deserialise/xmlpermission.py → permissions_parsed.py} +2 -2
  150. dsp_tools/commands/xmlupload/models/processed/__init__.py +0 -0
  151. dsp_tools/commands/xmlupload/models/processed/file_values.py +29 -0
  152. dsp_tools/commands/xmlupload/models/processed/res.py +27 -0
  153. dsp_tools/commands/xmlupload/models/processed/values.py +101 -0
  154. dsp_tools/commands/xmlupload/models/rdf_models.py +26 -0
  155. dsp_tools/commands/xmlupload/models/upload_clients.py +3 -3
  156. dsp_tools/commands/xmlupload/models/upload_state.py +2 -4
  157. dsp_tools/commands/xmlupload/prepare_xml_input/__init__.py +0 -0
  158. dsp_tools/commands/xmlupload/{ark2iri.py → prepare_xml_input/ark2iri.py} +1 -1
  159. dsp_tools/commands/xmlupload/prepare_xml_input/get_processed_resources.py +252 -0
  160. dsp_tools/commands/xmlupload/{iiif_uri_validator.py → prepare_xml_input/iiif_uri_validator.py} +2 -14
  161. dsp_tools/commands/xmlupload/{list_client.py → prepare_xml_input/list_client.py} +15 -10
  162. dsp_tools/commands/xmlupload/prepare_xml_input/prepare_xml_input.py +67 -0
  163. dsp_tools/commands/xmlupload/prepare_xml_input/read_validate_xml_file.py +58 -0
  164. dsp_tools/commands/xmlupload/prepare_xml_input/transform_input_values.py +118 -0
  165. dsp_tools/commands/xmlupload/resource_create_client.py +7 -468
  166. dsp_tools/commands/xmlupload/richtext_id2iri.py +37 -0
  167. dsp_tools/commands/xmlupload/stash/{construct_and_analyze_graph.py → analyse_circular_reference_graph.py} +64 -157
  168. dsp_tools/commands/xmlupload/stash/create_info_for_graph.py +53 -0
  169. dsp_tools/commands/xmlupload/stash/graph_models.py +13 -8
  170. dsp_tools/commands/xmlupload/stash/stash_circular_references.py +48 -115
  171. dsp_tools/commands/xmlupload/stash/stash_models.py +4 -9
  172. dsp_tools/commands/xmlupload/stash/upload_stashed_resptr_props.py +34 -40
  173. dsp_tools/commands/xmlupload/stash/upload_stashed_xml_texts.py +98 -108
  174. dsp_tools/commands/xmlupload/upload_config.py +8 -0
  175. dsp_tools/commands/xmlupload/write_diagnostic_info.py +14 -9
  176. dsp_tools/commands/xmlupload/xmlupload.py +214 -192
  177. dsp_tools/config/__init__.py +0 -0
  178. dsp_tools/config/logger_config.py +69 -0
  179. dsp_tools/{utils → config}/warnings_config.py +4 -1
  180. dsp_tools/error/__init__.py +0 -0
  181. dsp_tools/error/custom_warnings.py +39 -0
  182. dsp_tools/error/exceptions.py +204 -0
  183. dsp_tools/error/problems.py +10 -0
  184. dsp_tools/error/xmllib_errors.py +20 -0
  185. dsp_tools/error/xmllib_warnings.py +54 -0
  186. dsp_tools/error/xmllib_warnings_util.py +159 -0
  187. dsp_tools/error/xsd_validation_error_msg.py +19 -0
  188. dsp_tools/legacy_models/__init__.py +0 -0
  189. dsp_tools/{models → legacy_models}/datetimestamp.py +7 -7
  190. dsp_tools/{models → legacy_models}/langstring.py +1 -1
  191. dsp_tools/{models → legacy_models}/projectContext.py +4 -4
  192. dsp_tools/resources/schema/data.xsd +108 -83
  193. dsp_tools/resources/schema/lists-only.json +4 -23
  194. dsp_tools/resources/schema/project.json +80 -35
  195. dsp_tools/resources/schema/properties-only.json +1 -4
  196. dsp_tools/resources/start-stack/docker-compose.override-host.j2 +11 -0
  197. dsp_tools/resources/start-stack/docker-compose.yml +34 -30
  198. dsp_tools/resources/start-stack/dsp-app-config.json +45 -0
  199. dsp_tools/resources/start-stack/dsp-app-config.override-host.j2 +26 -0
  200. dsp_tools/resources/validate_data/api-shapes-resource-cardinalities.ttl +191 -0
  201. dsp_tools/resources/validate_data/api-shapes.ttl +804 -0
  202. dsp_tools/resources/validate_data/shacl-cli-image.yml +4 -0
  203. dsp_tools/resources/validate_data/validate-ontology.ttl +99 -0
  204. dsp_tools/utils/ansi_colors.py +32 -0
  205. dsp_tools/utils/data_formats/__init__.py +0 -0
  206. dsp_tools/utils/{date_util.py → data_formats/date_util.py} +13 -1
  207. dsp_tools/utils/data_formats/iri_util.py +30 -0
  208. dsp_tools/utils/{shared.py → data_formats/shared.py} +1 -35
  209. dsp_tools/utils/{uri_util.py → data_formats/uri_util.py} +12 -2
  210. dsp_tools/utils/fuseki_bloating.py +63 -0
  211. dsp_tools/utils/json_parsing.py +22 -0
  212. dsp_tools/utils/rdf_constants.py +42 -0
  213. dsp_tools/utils/rdflib_utils.py +10 -0
  214. dsp_tools/utils/replace_id_with_iri.py +66 -0
  215. dsp_tools/utils/request_utils.py +238 -0
  216. dsp_tools/utils/xml_parsing/__init__.py +0 -0
  217. dsp_tools/utils/xml_parsing/get_lookups.py +32 -0
  218. dsp_tools/utils/xml_parsing/get_parsed_resources.py +325 -0
  219. dsp_tools/utils/xml_parsing/models/__init__.py +0 -0
  220. dsp_tools/utils/xml_parsing/models/parsed_resource.py +76 -0
  221. dsp_tools/utils/xml_parsing/parse_clean_validate_xml.py +137 -0
  222. dsp_tools/xmllib/CLAUDE.md +302 -0
  223. dsp_tools/xmllib/__init__.py +49 -0
  224. dsp_tools/xmllib/general_functions.py +877 -0
  225. dsp_tools/xmllib/internal/__init__.py +0 -0
  226. dsp_tools/xmllib/internal/checkers.py +162 -0
  227. dsp_tools/xmllib/internal/circumvent_circular_imports.py +36 -0
  228. dsp_tools/xmllib/internal/constants.py +46 -0
  229. dsp_tools/xmllib/internal/input_converters.py +155 -0
  230. dsp_tools/xmllib/internal/serialise_file_value.py +57 -0
  231. dsp_tools/xmllib/internal/serialise_resource.py +177 -0
  232. dsp_tools/xmllib/internal/serialise_values.py +152 -0
  233. dsp_tools/xmllib/internal/type_aliases.py +11 -0
  234. dsp_tools/xmllib/models/config_options.py +28 -0
  235. dsp_tools/xmllib/models/date_formats.py +48 -0
  236. dsp_tools/xmllib/models/dsp_base_resources.py +1380 -400
  237. dsp_tools/xmllib/models/internal/__init__.py +0 -0
  238. dsp_tools/xmllib/models/internal/file_values.py +172 -0
  239. dsp_tools/xmllib/models/internal/geometry.py +162 -0
  240. dsp_tools/xmllib/models/{migration_metadata.py → internal/migration_metadata.py} +14 -10
  241. dsp_tools/xmllib/models/internal/serialise_permissions.py +66 -0
  242. dsp_tools/xmllib/models/internal/values.py +342 -0
  243. dsp_tools/xmllib/models/licenses/__init__.py +0 -0
  244. dsp_tools/xmllib/models/licenses/other.py +59 -0
  245. dsp_tools/xmllib/models/licenses/recommended.py +107 -0
  246. dsp_tools/xmllib/models/permissions.py +41 -0
  247. dsp_tools/xmllib/models/res.py +1782 -0
  248. dsp_tools/xmllib/models/root.py +313 -26
  249. dsp_tools/xmllib/value_checkers.py +310 -47
  250. dsp_tools/xmllib/value_converters.py +765 -8
  251. dsp_tools-18.3.0.post13.dist-info/METADATA +90 -0
  252. dsp_tools-18.3.0.post13.dist-info/RECORD +286 -0
  253. dsp_tools-18.3.0.post13.dist-info/WHEEL +4 -0
  254. {dsp_tools-9.1.0.post11.dist-info → dsp_tools-18.3.0.post13.dist-info}/entry_points.txt +1 -0
  255. dsp_tools/commands/project/create/project_create.py +0 -1107
  256. dsp_tools/commands/project/create/project_create_lists.py +0 -204
  257. dsp_tools/commands/project/create/project_validate.py +0 -453
  258. dsp_tools/commands/project/models/project_definition.py +0 -12
  259. dsp_tools/commands/rosetta.py +0 -124
  260. dsp_tools/commands/template.py +0 -30
  261. dsp_tools/commands/xml_validate/api_connection.py +0 -122
  262. dsp_tools/commands/xml_validate/deserialise_input.py +0 -135
  263. dsp_tools/commands/xml_validate/make_data_rdf.py +0 -193
  264. dsp_tools/commands/xml_validate/models/data_deserialised.py +0 -108
  265. dsp_tools/commands/xml_validate/models/data_rdf.py +0 -214
  266. dsp_tools/commands/xml_validate/models/input_problems.py +0 -191
  267. dsp_tools/commands/xml_validate/models/validation.py +0 -29
  268. dsp_tools/commands/xml_validate/reformat_validaton_result.py +0 -89
  269. dsp_tools/commands/xml_validate/sparql/construct_shapes.py +0 -16
  270. dsp_tools/commands/xml_validate/xml_validate.py +0 -151
  271. dsp_tools/commands/xmlupload/check_consistency_with_ontology.py +0 -253
  272. dsp_tools/commands/xmlupload/models/deserialise/deserialise_value.py +0 -236
  273. dsp_tools/commands/xmlupload/models/deserialise/xmlresource.py +0 -171
  274. dsp_tools/commands/xmlupload/models/namespace_context.py +0 -39
  275. dsp_tools/commands/xmlupload/models/ontology_lookup_models.py +0 -161
  276. dsp_tools/commands/xmlupload/models/ontology_problem_models.py +0 -178
  277. dsp_tools/commands/xmlupload/models/serialise/jsonld_serialiser.py +0 -40
  278. dsp_tools/commands/xmlupload/models/serialise/serialise_value.py +0 -51
  279. dsp_tools/commands/xmlupload/ontology_client.py +0 -92
  280. dsp_tools/commands/xmlupload/project_client.py +0 -91
  281. dsp_tools/commands/xmlupload/read_validate_xml_file.py +0 -99
  282. dsp_tools/models/custom_warnings.py +0 -31
  283. dsp_tools/models/exceptions.py +0 -90
  284. dsp_tools/resources/0100-template-repo/template.json +0 -45
  285. dsp_tools/resources/0100-template-repo/template.xml +0 -27
  286. dsp_tools/resources/start-stack/docker-compose-validation.yml +0 -5
  287. dsp_tools/resources/start-stack/start-stack-config.yml +0 -4
  288. dsp_tools/resources/xml_validate/api-shapes.ttl +0 -411
  289. dsp_tools/resources/xml_validate/replace_namespace.xslt +0 -61
  290. dsp_tools/utils/connection_live.py +0 -383
  291. dsp_tools/utils/iri_util.py +0 -14
  292. dsp_tools/utils/logger_config.py +0 -41
  293. dsp_tools/utils/set_encoder.py +0 -20
  294. dsp_tools/utils/xml_utils.py +0 -145
  295. dsp_tools/utils/xml_validation.py +0 -197
  296. dsp_tools/utils/xml_validation_models.py +0 -68
  297. dsp_tools/xmllib/models/file_values.py +0 -78
  298. dsp_tools/xmllib/models/resource.py +0 -415
  299. dsp_tools/xmllib/models/values.py +0 -428
  300. dsp_tools-9.1.0.post11.dist-info/METADATA +0 -130
  301. dsp_tools-9.1.0.post11.dist-info/RECORD +0 -167
  302. dsp_tools-9.1.0.post11.dist-info/WHEEL +0 -4
  303. dsp_tools-9.1.0.post11.dist-info/licenses/LICENSE +0 -674
  304. /dsp_tools/{commands/excel2json/new_lists → clients}/__init__.py +0 -0
  305. /dsp_tools/commands/{excel2json/new_lists/models → create}/__init__.py +0 -0
  306. /dsp_tools/commands/{project → create/create_on_server}/__init__.py +0 -0
  307. /dsp_tools/commands/{project/create → create/models}/__init__.py +0 -0
  308. /dsp_tools/commands/{project/models → create/parsing}/__init__.py +0 -0
  309. /dsp_tools/commands/{xml_validate → create/serialisation}/__init__.py +0 -0
  310. /dsp_tools/commands/{xml_validate/models → excel2json/lists}/__init__.py +0 -0
  311. /dsp_tools/commands/{xml_validate/sparql → excel2json/lists/models}/__init__.py +0 -0
  312. /dsp_tools/commands/excel2json/{new_lists → lists}/models/deserialise.py +0 -0
  313. /dsp_tools/commands/{xmlupload/models/deserialise → get}/__init__.py +0 -0
  314. /dsp_tools/commands/{xmlupload/models/serialise → get/legacy_models}/__init__.py +0 -0
  315. /dsp_tools/commands/{project/models → get/legacy_models}/helpers.py +0 -0
  316. /dsp_tools/{models → commands/get/models}/__init__.py +0 -0
@@ -1,20 +1,777 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
1
4
  from typing import Any
2
5
 
6
+ import regex
7
+ from regex import Match
8
+
9
+ from dsp_tools.error.xmllib_warnings import MessageInfo
10
+ from dsp_tools.error.xmllib_warnings_util import emit_xmllib_input_warning
11
+ from dsp_tools.error.xmllib_warnings_util import raise_xmllib_input_error
12
+ from dsp_tools.xmllib.internal.checkers import is_date_internal
13
+ from dsp_tools.xmllib.internal.checkers import is_nonempty_value_internal
14
+ from dsp_tools.xmllib.models.config_options import NewlineReplacement
15
+ from dsp_tools.xmllib.models.date_formats import Calendar
16
+ from dsp_tools.xmllib.models.date_formats import DateFormat
17
+ from dsp_tools.xmllib.models.date_formats import Era
18
+
3
19
 
4
- def convert_to_bool_string(value: Any) -> str:
20
+ def convert_to_bool_string(value: Any) -> bool:
5
21
  """
6
22
  Turns a value into a bool string, suitable for an XML.
23
+ It is case-insensitive, meaning that the words can also be capitalised.
24
+
25
+ Accepted values:
26
+ - `false`, `0`, `0.0`, `no`, `non`, `nein` -> `False`
27
+ - `true`, `1`, `1.0`, `yes`, `oui`, `ja`, `sì` -> `True`
7
28
 
8
29
  Args:
9
30
  value: value to transform
10
31
 
11
32
  Returns:
12
- 'true' or 'false' if it is a known value,
13
- else it returns the original value as a string.
33
+ `True` or `False` if it is an accepted value.
34
+
35
+ Raises:
36
+ XmllibInputError: If the value is not convertable to a boolean
37
+
38
+ Examples:
39
+ ```python
40
+ result = xmllib.convert_to_bool_string(1)
41
+ # result == True
42
+ ```
43
+
44
+ ```python
45
+ result = xmllib.convert_to_bool_string("nein")
46
+ # result == False
47
+ ```
48
+
49
+ ```python
50
+ result = xmllib.convert_to_bool_string(None)
51
+ # raises XmllibInputError
52
+ ```
14
53
  """
15
54
  str_val = str(value).lower().strip()
16
- if str_val in ("false", "0", "0.0", "no"):
17
- return "false"
18
- elif str_val in ("true", "1", "1.0", "yes"):
19
- return "true"
20
- return str(value)
55
+ if str_val in ("false", "0", "0.0", "no", "non", "nein"):
56
+ return False
57
+ elif str_val in ("true", "1", "1.0", "yes", "oui", "ja", "sì"):
58
+ return True
59
+ raise_xmllib_input_error(MessageInfo(f"The entered value '{value}' cannot be converted to a bool."))
60
+
61
+
62
+ def replace_newlines_with_tags(text: str, converter_option: NewlineReplacement) -> str:
63
+ """
64
+ Converts the newlines in a string to XML tags.
65
+
66
+ Args:
67
+ text: string to convert
68
+ converter_option: specifies what tag to use instead of the newline
69
+
70
+ Returns:
71
+ String with replaced values
72
+
73
+ Raises:
74
+ XmllibInputError: If an invalid conversion option is given
75
+
76
+ Examples:
77
+ ```python
78
+ result = xmllib.replace_newlines_with_tags(
79
+ "Start\\nEnd", xmllib.NewlineReplacement.NONE
80
+ )
81
+ # result == "Start\\nEnd"
82
+ ```
83
+
84
+ ```python
85
+ result = xmllib.replace_newlines_with_tags(
86
+ "Start\\nEnd", xmllib.NewlineReplacement.LINEBREAK
87
+ )
88
+ # result == "Start<br/>End"
89
+ ```
90
+
91
+ ```python
92
+ result = xmllib.replace_newlines_with_tags(
93
+ "Start\\n\\nEnd", xmllib.NewlineReplacement.PARAGRAPH
94
+ )
95
+ # result == "<p>Start</p><p>End</p>"
96
+ ```
97
+ """
98
+ match converter_option:
99
+ case NewlineReplacement.NONE:
100
+ return text
101
+ case NewlineReplacement.LINEBREAK:
102
+ return replace_newlines_with_br_tags(text)
103
+ case NewlineReplacement.PARAGRAPH:
104
+ return replace_newlines_with_paragraph_tags(text)
105
+
106
+
107
+ def replace_newlines_with_paragraph_tags(text: str) -> str:
108
+ """
109
+ Replace `Start\\nEnd` with `<p>Start</p><p>End</p>`
110
+
111
+ Args:
112
+ text: string to be formatted
113
+
114
+ Returns:
115
+ Formatted string with paragraph tags
116
+
117
+ Examples:
118
+ ```python
119
+ result = xmllib.replace_newlines_with_paragraph_tags("Start\\nEnd")
120
+ # result == "<p>Start</p><p>End</p>"
121
+ ```
122
+
123
+ ```python
124
+ # multiple consecutive newlines will be treated as one newline
125
+
126
+ result = xmllib.replace_newlines_with_paragraph_tags("Start\\n\\nEnd")
127
+ # result == "<p>Start</p><p>End</p>"
128
+ ```
129
+ """
130
+ splt = [x for x in text.split("\n") if x != ""]
131
+ formatted = [f"<p>{x}</p>" for x in splt]
132
+ return "".join(formatted)
133
+
134
+
135
+ def replace_newlines_with_br_tags(text: str) -> str:
136
+ """
137
+ Replaces `\\n` with `<br/>`
138
+
139
+ Args:
140
+ text: string to be formatted
141
+
142
+ Returns:
143
+ Formatted string with break-line tags
144
+
145
+ Examples:
146
+ ```python
147
+ result = xmllib.replace_newlines_with_br_tags("Start\\nEnd")
148
+ # result == "Start<br/>End"
149
+ ```
150
+
151
+ ```python
152
+ # multiple consecutive newlines will be converted into multiple break-lines
153
+
154
+ result = xmllib.replace_newlines_with_br_tags("Start\\n\\nEnd")
155
+ # result == "Start<br/><br/>End"
156
+ ```
157
+ """
158
+ return text.replace("\n", "<br/>")
159
+
160
+
161
+ def reformat_date(
162
+ date: str | int,
163
+ date_precision_separator: str | None,
164
+ date_range_separator: str | None,
165
+ date_format: DateFormat,
166
+ calendar: Calendar = Calendar.GREGORIAN,
167
+ era: Era | None = Era.CE,
168
+ resource_id: str | None = None,
169
+ ) -> str:
170
+ """
171
+ Reformats a date string into the DSP format.
172
+
173
+ - If the input cannot be reformatted according to the configuration, or if the result
174
+ is not a valid DSP date, a warning is emitted and the original input is returned.
175
+ - If the input is empty, a warning is emitted and an empty string is returned.
176
+ - If the input is already a correctly formatted DSP-date, the original input is returned.
177
+
178
+ Args:
179
+ date: date string to be reformatted
180
+ date_precision_separator: the separation between the day, month and year
181
+ date_range_separator: the separation between two dates
182
+ date_format: the format of the date, see [`DateFormat` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/date_formats/#xmllib.models.date_formats.DateFormat)
183
+ calendar: the calendar of the date, see [`Calendar` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/date_formats/#xmllib.models.date_formats.Calendar)
184
+ era: the era of the date, see [`Era` for options](https://docs.dasch.swiss/latest/DSP-TOOLS/xmllib-docs/date_formats/#xmllib.models.date_formats.Era)
185
+ resource_id: the ID of the associated resource, this is to improve the error message
186
+
187
+ Returns:
188
+ A reformatted date or the original input if the reformatted result is not a valid DSP date
189
+
190
+ Examples:
191
+ ```python
192
+ # default configuration, starting with the day
193
+ result = xmllib.reformat_date(
194
+ date="1.11.2000",
195
+ date_precision_separator=".",
196
+ date_range_separator=None,
197
+ date_format=xmllib.DateFormat.DD_MM_YYYY
198
+ )
199
+ # result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"
200
+ ```
201
+
202
+ ```python
203
+ # default configuration, but starting with the year
204
+ result = xmllib.reformat_date(
205
+ date="2000.11.1",
206
+ date_precision_separator=".",
207
+ date_range_separator=None,
208
+ date_format=xmllib.DateFormat.YYYY_MM_DD,
209
+ )
210
+ # result == "GREGORIAN:CE:2000-11-1:CE:2000-11-1"
211
+ ```
212
+
213
+ ```python
214
+ # with a date range
215
+ result = xmllib.reformat_date(
216
+ date="1.11.2000-2001",
217
+ date_precision_separator=".",
218
+ date_range_separator="-",
219
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
220
+ )
221
+ # result == "GREGORIAN:CE:2000-11-1:CE:2001"
222
+ ```
223
+
224
+ ```python
225
+ # islamic calendar, where eras are not allowed
226
+ result = xmllib.reformat_date(
227
+ date="1.11.2000",
228
+ date_precision_separator=".",
229
+ date_range_separator=None,
230
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
231
+ calendar=xmllib.Calendar.ISLAMIC,
232
+ era=None
233
+ )
234
+ # result == "ISLAMIC:2000-11-1:2000-11-1"
235
+ ```
236
+
237
+ ```python
238
+ # with a different era
239
+ result = xmllib.reformat_date(
240
+ date="1.11.2000",
241
+ date_precision_separator=".",
242
+ date_range_separator="-",
243
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
244
+ era=xmllib.Era.AD
245
+ )
246
+ # result == "GREGORIAN:AD:2000-11-1:AD:2000-11-1"
247
+ ```
248
+
249
+ ```python
250
+ # reformatted date, no precision in the date string is required
251
+ result = xmllib.reformat_date(
252
+ date="2000",
253
+ date_precision_separator=".",
254
+ date_range_separator="-",
255
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
256
+ )
257
+ # result == "GREGORIAN:CE:2000:CE:2000"
258
+ ```
259
+
260
+ ```python
261
+ # already correctly formatted date
262
+ result = xmllib.reformat_date(
263
+ date="GREGORIAN:CE:2000:CE:2000",
264
+ date_precision_separator=".",
265
+ date_range_separator="-",
266
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
267
+ )
268
+ # result == "GREGORIAN:CE:2000:CE:2000"
269
+ ```
270
+
271
+ ```python
272
+ # invalid input: a warning is emitted and the original input is returned
273
+ result = xmllib.reformat_date(
274
+ date="not-a-date",
275
+ date_precision_separator=".",
276
+ date_range_separator="-",
277
+ date_format=xmllib.DateFormat.DD_MM_YYYY,
278
+ )
279
+ # WARNING is emitted
280
+ # result == "not-a-date"
281
+ ```
282
+ """
283
+ if not is_nonempty_value_internal(date):
284
+ msg_info = MessageInfo(
285
+ "The date to be reformatted is empty. An empty string is returned.", resource_id=resource_id
286
+ )
287
+ emit_xmllib_input_warning(msg_info)
288
+ return ""
289
+ date = str(date).strip()
290
+ invalid_date_info = MessageInfo(
291
+ f"The provided date '{date}' does not conform to the expected format, the original value is returned.",
292
+ resource_id=resource_id,
293
+ )
294
+ # Here we want to check if the input is already a reformatted date. In that case, we would expect a calendar.
295
+ # The function that checks if an input is a valid date does not require a calendar,
296
+ # so unformatted input for example, '2000' may be accepted as a valid date.
297
+ if regex.search(r"(GREGORIAN|JULIAN|ISLAMIC)", date):
298
+ if is_date_internal(date):
299
+ return date
300
+ else:
301
+ emit_xmllib_input_warning(invalid_date_info)
302
+ return date
303
+ if date_precision_separator and date_range_separator:
304
+ if date_precision_separator == date_range_separator:
305
+ msg_info = MessageInfo(
306
+ f"The precision separator and range separator provided are identical: '{date_precision_separator}'. "
307
+ f"This is not allowed.",
308
+ resource_id=resource_id,
309
+ )
310
+ raise_xmllib_input_error(msg_info)
311
+ if date_range_separator is not None:
312
+ date_split = [found for x in date.split(date_range_separator) if (found := x.strip())]
313
+ else:
314
+ date_split = [date.strip()]
315
+ all_dates = [_reformat_single_date(x, date_precision_separator, date_format, resource_id) for x in date_split]
316
+ if era:
317
+ all_dates = [f"{era.value}:{x}" for x in all_dates]
318
+ if len(all_dates) == 1:
319
+ all_dates.append(all_dates[0])
320
+ reformatted_str = ":".join(all_dates)
321
+ if calendar:
322
+ reformatted_str = f"{calendar.value}:{reformatted_str}"
323
+ if is_date_internal(reformatted_str):
324
+ return reformatted_str
325
+ emit_xmllib_input_warning(invalid_date_info)
326
+ return date
327
+
328
+
329
+ def _reformat_single_date( # noqa: PLR0911 Too many return statements
330
+ single_date: str, date_precision_separator: str | None, date_format: DateFormat, resource_id: str | None
331
+ ) -> str:
332
+ if date_precision_separator is None:
333
+ return single_date
334
+ date_split = [found for x in single_date.split(date_precision_separator) if (found := x.strip())]
335
+ if date_format == DateFormat.YYYY_MM_DD:
336
+ return "-".join(date_split)
337
+ if date_format == DateFormat.DD_MM_YYYY:
338
+ return "-".join(reversed(date_split))
339
+ if date_format == DateFormat.MM_DD_YYYY:
340
+ if len(date_split) == 3:
341
+ month, day, year = date_split
342
+ return f"{year}-{month}-{day}"
343
+ if len(date_split) == 2:
344
+ return "-".join(reversed(date_split))
345
+ if len(date_split) == 1:
346
+ return date_split.pop()
347
+ else:
348
+ msg_info = MessageInfo(
349
+ f"The provided input of a single date '{single_date}' could not be reformatted correctly.",
350
+ resource_id=resource_id,
351
+ )
352
+ emit_xmllib_input_warning(msg_info)
353
+ return single_date
354
+ msg_info = MessageInfo(
355
+ f"The provided date format '{date_format}' to reformat the date is invalid.",
356
+ resource_id=resource_id,
357
+ )
358
+ raise_xmllib_input_error(msg_info)
359
+
360
+
361
+ def find_dates_in_string(string: str) -> set[str]:
362
+ """
363
+ Checks if a string contains date values (single dates, or date ranges),
364
+ and return all found dates as set of DSP-formatted strings.
365
+ Returns an empty set if no date was found.
366
+ [See XML documentation for details](https://docs.dasch.swiss/latest/DSP-TOOLS/file-formats/xml-data-file/#date).
367
+
368
+ Notes:
369
+ - If no era or calendar is given, dates are interpreted in the Common Era and the Gregorian calendar.
370
+ - Standalone numbers from 000-2999, in 3/4-digit form, are interpreted as years CE.
371
+ - If a number (with any number of digits) is followed by CE, C.E., AD, A.D., it is interpreted as years CE.
372
+ - If a number (with any number of digits) is followed by BCE, BC, B.C., B.C.E., av. J.-C.,
373
+ it is interpreted as years BCE.
374
+ - Dates written with slashes are always interpreted in a European manner: 5/11/2021 is the 5th of November.
375
+ - In the European notation, 2-digit years are expanded to 4 digits, with the current year as watershed:
376
+ - 30.4.24 -> 30.04.2024
377
+ - 30.4.50 -> 30.04.1950
378
+
379
+ Currently supported date formats:
380
+ - 0476-09-04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
381
+ - 0476_09_04 -> GREGORIAN:CE:0476-09-04:CE:0476-09-04
382
+ - 30.4.2021 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
383
+ - 30.4.21 -> GREGORIAN:CE:2021-04-30:CE:2021-04-30
384
+ - 5/11/2021 -> GREGORIAN:CE:2021-11-05:CE:2021-11-05
385
+ - Jan 26, 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
386
+ - 26 Jan 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
387
+ - 26 January 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
388
+ - 26. Jan. 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
389
+ - 26. Januar 1993 -> GREGORIAN:CE:1993-01-26:CE:1993-01-26
390
+ - 28.2.-1.12.1515 -> GREGORIAN:CE:1515-02-28:CE:1515-12-01
391
+ - 25.-26.2.0800 -> GREGORIAN:CE:0800-02-25:CE:0800-02-26
392
+ - 1.9.2022-3.1.2024 -> GREGORIAN:CE:2022-09-01:CE:2024-01-03
393
+ - 1848 -> GREGORIAN:CE:1848:CE:1848
394
+ - 1849/1850 -> GREGORIAN:CE:1849:CE:1850
395
+ - 1849/50 -> GREGORIAN:CE:1849:CE:1850
396
+ - 1845-50 -> GREGORIAN:CE:1845:CE:1850
397
+ - 840-50 -> GREGORIAN:CE:840:CE:850
398
+ - 840-1 -> GREGORIAN:CE:840:CE:841
399
+ - 9 BC / 9 B.C. / 9 B.C.E. / 9 BCE -> GREGORIAN:BC:9:BC:9
400
+ - 20 BCE - 50 CE -> GREGORIAN:BC:20:CE:50
401
+ - 1000-900 av. J.-C. -> GREGORIAN:BC:1000:BC:900
402
+ - 45 av. J.-C. -> GREGORIAN:BC:45:BC:45
403
+
404
+ Args:
405
+ string: string to check
406
+
407
+ Returns:
408
+ (possibly empty) set of DSP-formatted date strings
409
+
410
+ Examples:
411
+ ```python
412
+ result = xmllib.find_dates_in_string("1849/1850")
413
+ # result == {"GREGORIAN:CE:1849:CE:1850"}
414
+ ```
415
+
416
+ ```python
417
+ result = xmllib.find_dates_in_string("not a valid date")
418
+ # result == {}
419
+ ```
420
+
421
+ ```python
422
+ result = xmllib.find_dates_in_string("first date: 2024. Second: 2025.")
423
+ # result == {"GREGORIAN:CE:2024:CE:2024", "GREGORIAN:CE:2025:CE:2025"}
424
+ ```
425
+ """
426
+
427
+ # sanitise input, just in case that the function was called on an empty or N/A cell
428
+ if not is_nonempty_value_internal(string):
429
+ return set()
430
+ return _find_dates_in_string(string)
431
+
432
+
433
+ _months_dict = {
434
+ "January": 1,
435
+ "Januar": 1,
436
+ "Jan": 1,
437
+ "February": 2,
438
+ "Februar": 2,
439
+ "Feb": 2,
440
+ "March": 3,
441
+ "März": 3,
442
+ "Mar": 3,
443
+ "April": 4,
444
+ "Apr": 4,
445
+ "May": 5,
446
+ "Mai": 5,
447
+ "June": 6,
448
+ "Juni": 6,
449
+ "Jun": 6,
450
+ "July": 7,
451
+ "Juli": 7,
452
+ "Jul": 7,
453
+ "August": 8,
454
+ "Aug": 8,
455
+ "September": 9,
456
+ "Sept": 9,
457
+ "October": 10,
458
+ "Oktober": 10,
459
+ "Oct": 10,
460
+ "Okt": 10,
461
+ "November": 11,
462
+ "Nov": 11,
463
+ "December": 12,
464
+ "Dezember": 12,
465
+ "Dec": 12,
466
+ "Dez": 12,
467
+ }
468
+ all_months = "|".join(_months_dict)
469
+
470
+
471
+ def _find_dates_in_string(string: str) -> set[str]:
472
+ year_regex = r"([0-2]?[0-9][0-9][0-9])"
473
+ year_regex_2_or_4_digits = r"((?:[0-2]?[0-9])?[0-9][0-9])"
474
+ month_regex = r"([0-1]?[0-9])"
475
+ day_regex = r"([0-3]?[0-9])"
476
+ sep_regex = r"[\./]"
477
+ lookbehind = r"(?<![0-9A-Za-z])"
478
+ lookahead = r"(?![0-9A-Za-z])"
479
+ range_operator_regex = r" ?- ?"
480
+
481
+ remaining_string = string
482
+ results: set[str | None] = set()
483
+
484
+ remaining_string = _extract_already_parsed_date(remaining_string, results)
485
+
486
+ remaining_string = _find_english_BC_or_CE_dates(
487
+ string=remaining_string,
488
+ lookbehind=lookbehind,
489
+ lookahead=lookahead,
490
+ range_operator_regex=range_operator_regex,
491
+ results=results,
492
+ )
493
+
494
+ remaining_string = _find_french_bc_dates(
495
+ string=remaining_string,
496
+ lookbehind=lookbehind,
497
+ lookahead=lookahead,
498
+ range_operator_regex=range_operator_regex,
499
+ results=results,
500
+ )
501
+
502
+ # template: 2021-01-01 | 2015_01_02
503
+ iso_dates_regex = rf"{lookbehind}{year_regex}[_-]([0-1][0-9])[_-]([0-3][0-9]){lookahead}"
504
+ if iso_dates := list(regex.finditer(iso_dates_regex, remaining_string)):
505
+ results.update(_from_iso_date(x) for x in iso_dates)
506
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in iso_dates])
507
+
508
+ # template: 6.-8.3.1948 | 6/2/1947 - 24.03.1948
509
+ eur_date_range_regex = (
510
+ rf"{lookbehind}"
511
+ rf"{day_regex}{sep_regex}(?:{month_regex}{sep_regex}{year_regex_2_or_4_digits}?)?{range_operator_regex}"
512
+ rf"{day_regex}{sep_regex}{month_regex}{sep_regex}{year_regex_2_or_4_digits}"
513
+ rf"{lookahead}"
514
+ )
515
+ if eur_date_ranges := list(regex.finditer(eur_date_range_regex, remaining_string)):
516
+ results.update(_from_eur_date_range(x) for x in eur_date_ranges)
517
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in eur_date_ranges])
518
+
519
+ # template: 1.4.2021 | 5/11/2021
520
+ eur_date_regex = rf"{lookbehind}{day_regex}{sep_regex}{month_regex}{sep_regex}{year_regex_2_or_4_digits}{lookahead}"
521
+ if eur_dates := list(regex.finditer(eur_date_regex, remaining_string)):
522
+ results.update(_from_eur_date(x) for x in eur_dates)
523
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in eur_dates])
524
+
525
+ # template: March 9, 1908 | March5,1908 | May 11, 1906
526
+ monthname_date_regex = rf"{lookbehind}({all_months}) ?{day_regex}, ?{year_regex}{lookahead}"
527
+ if monthname_dates := list(regex.finditer(monthname_date_regex, remaining_string)):
528
+ results.update(_from_monthname_date(x) for x in monthname_dates)
529
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in monthname_dates])
530
+
531
+ # template: 9 March 1908
532
+ monthname_after_day_regex = rf"{lookbehind}{day_regex} ?({all_months}) ?{year_regex}{lookahead}"
533
+ if monthname_after_days := list(regex.finditer(monthname_after_day_regex, remaining_string)):
534
+ results.update(_from_monthname_after_day(x) for x in monthname_after_days)
535
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in monthname_after_days])
536
+
537
+ # template: 26. Januar 1993 | 26. Jan. 1993 | 26. Jan 1993
538
+ german_monthname_date_regex = rf"{lookbehind}{day_regex}\.? ?({all_months})\.? ?{year_regex}{lookahead}"
539
+ if german_monthname_dates := list(regex.finditer(german_monthname_date_regex, remaining_string)):
540
+ results.update(_from_german_monthname_date(x) for x in german_monthname_dates)
541
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in german_monthname_dates])
542
+
543
+ # template: 1849/50 | 1849-50 | 1849/1850
544
+ if year_ranges := list(regex.finditer(lookbehind + year_regex + r"[/-](\d{1,4})" + lookahead, remaining_string)):
545
+ results.update(_from_year_range(x) for x in year_ranges)
546
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in year_ranges])
547
+
548
+ # template: 1907
549
+ if year_onlies := list(regex.finditer(rf"{lookbehind}{year_regex}{lookahead}", remaining_string)):
550
+ results.update(f"GREGORIAN:CE:{int(x.group(0))}:CE:{int(x.group(0))}" for x in year_onlies)
551
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in year_onlies])
552
+
553
+ return {x for x in results if x}
554
+
555
+
556
+ def _remove_used_spans(string: str, spans: list[tuple[int, int]]) -> str:
557
+ """Once a regex has matched parts of the original string, remove these parts, so that they're not matched again."""
558
+ for start, end in reversed(spans):
559
+ string = string[:start] + string[end:]
560
+ return string
561
+
562
+
563
+ def _find_english_BC_or_CE_dates(
564
+ string: str,
565
+ lookbehind: str,
566
+ lookahead: str,
567
+ range_operator_regex: str,
568
+ results: set[str | None],
569
+ ) -> str:
570
+ eraless_date_regex = r"(\d+)"
571
+ bc_era_regex = r"(?:BC|BCE|B\.C\.|B\.C\.E\.)"
572
+ bc_date_regex = rf"(?:{eraless_date_regex} ?{bc_era_regex})"
573
+ ce_era_regex = r"(?:CE|AD|C\.E\.|A\.D\.)"
574
+ ce_date_regex = rf"(?:{eraless_date_regex} ?{ce_era_regex})"
575
+ bc_or_ce_date_regex = rf"(?:{bc_date_regex}|{ce_date_regex})"
576
+
577
+ remaining_string = string
578
+ results_new: set[str | None] = set()
579
+
580
+ range_regex = (
581
+ rf"{lookbehind}(?:{bc_or_ce_date_regex}|{eraless_date_regex})"
582
+ rf"{range_operator_regex}"
583
+ rf"{bc_or_ce_date_regex}{lookahead}"
584
+ )
585
+ if matchs := list(regex.finditer(range_regex, remaining_string)):
586
+ results_new.update(
587
+ _from_english_BC_or_CE_range(
588
+ string=x.group(0),
589
+ range_operator_regex=range_operator_regex,
590
+ bc_era_regex=bc_era_regex,
591
+ ce_era_regex=ce_era_regex,
592
+ eraless_date_regex=eraless_date_regex,
593
+ )
594
+ for x in matchs
595
+ )
596
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in matchs])
597
+
598
+ if matchs := list(regex.finditer(rf"{lookbehind}{bc_date_regex}{lookahead}", remaining_string)):
599
+ results_new.update({f"GREGORIAN:BC:{x.group(1)}:BC:{x.group(1)}" for x in matchs})
600
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in matchs])
601
+
602
+ if matchs := list(regex.finditer(rf"{lookbehind}{ce_date_regex}{lookahead}", remaining_string)):
603
+ results_new.update({f"GREGORIAN:CE:{x.group(1)}:CE:{x.group(1)}" for x in matchs})
604
+ remaining_string = _remove_used_spans(remaining_string, [x.span() for x in matchs])
605
+
606
+ results.update({x for x in results_new if x})
607
+ return remaining_string
608
+
609
+
610
+ def _from_english_BC_or_CE_range(
611
+ string: str, range_operator_regex: str, bc_era_regex: str, ce_era_regex: str, eraless_date_regex: str
612
+ ) -> str | None:
613
+ split_result = regex.split(range_operator_regex, string)
614
+ if len(split_result) != 2:
615
+ return None
616
+ start_raw, end_raw = split_result
617
+ if regex.search(bc_era_regex, end_raw):
618
+ end_era = "BC"
619
+ elif regex.search(ce_era_regex, end_raw):
620
+ end_era = "CE"
621
+ else:
622
+ return None
623
+
624
+ if regex.search(bc_era_regex, start_raw):
625
+ start_era = "BC"
626
+ elif regex.search(ce_era_regex, start_raw):
627
+ start_era = "CE"
628
+ else:
629
+ start_era = end_era
630
+
631
+ if not (start_year_match := regex.search(eraless_date_regex, start_raw)):
632
+ return None
633
+ if not (end_year_match := regex.search(eraless_date_regex, end_raw)):
634
+ return None
635
+
636
+ return f"GREGORIAN:{start_era}:{start_year_match.group(0)}:{end_era}:{end_year_match.group(0)}"
637
+
638
+
639
+ def _find_french_bc_dates(
640
+ string: str,
641
+ lookbehind: str,
642
+ lookahead: str,
643
+ range_operator_regex: str,
644
+ results: set[str | None],
645
+ ) -> str:
646
+ remaining_string = string
647
+ results_new: set[str | None] = set()
648
+ french_bc_regex = r"av(?:\. |\.| )J\.?-?C\.?"
649
+
650
+ year_regex = r"\d{1,5}"
651
+ year_range_regex = rf"{lookbehind}({year_regex}){range_operator_regex}({year_regex}) {french_bc_regex}{lookahead}"
652
+ for year_range in reversed(list(regex.finditer(year_range_regex, remaining_string))):
653
+ start_year = int(year_range.group(1))
654
+ end_year = int(year_range.group(2))
655
+ if end_year > start_year:
656
+ continue
657
+ results_new.add(f"GREGORIAN:BC:{start_year}:BC:{end_year}")
658
+ remaining_string = _remove_used_spans(remaining_string, [year_range.span()])
659
+
660
+ single_year_regex = rf"{lookbehind}({year_regex}) {french_bc_regex}{lookahead}"
661
+ for single_year in reversed(list(regex.finditer(single_year_regex, remaining_string))):
662
+ start_year = int(single_year.group(1))
663
+ results_new.add(f"GREGORIAN:BC:{start_year}:BC:{start_year}")
664
+ remaining_string = _remove_used_spans(remaining_string, [single_year.span()])
665
+
666
+ results.update({x for x in results_new if x})
667
+ return remaining_string
668
+
669
+
670
+ def _from_iso_date(iso_date: Match[str]) -> str | None:
671
+ year = int(iso_date.group(1))
672
+ month = int(iso_date.group(2))
673
+ day = int(iso_date.group(3))
674
+ try:
675
+ date = datetime.date(year, month, day)
676
+ return f"GREGORIAN:CE:{date.isoformat()}:CE:{date.isoformat()}"
677
+ except ValueError:
678
+ return None
679
+
680
+
681
+ def _expand_2_digit_year(year: int) -> int:
682
+ current_year = datetime.date.today().year - 2000
683
+ if year <= current_year:
684
+ return year + 2000
685
+ elif year <= 99:
686
+ return year + 1900
687
+ else:
688
+ return year
689
+
690
+
691
+ def _from_eur_date_range(eur_date_range: Match[str]) -> str | None:
692
+ startday = int(eur_date_range.group(1))
693
+ startmonth = int(eur_date_range.group(2)) if eur_date_range.group(2) else int(eur_date_range.group(5))
694
+ startyear = int(eur_date_range.group(3)) if eur_date_range.group(3) else int(eur_date_range.group(6))
695
+ startyear = _expand_2_digit_year(startyear)
696
+ endday = int(eur_date_range.group(4))
697
+ endmonth = int(eur_date_range.group(5))
698
+ endyear = int(eur_date_range.group(6))
699
+ endyear = _expand_2_digit_year(endyear)
700
+ try:
701
+ startdate = datetime.date(startyear, startmonth, startday)
702
+ enddate = datetime.date(endyear, endmonth, endday)
703
+ except ValueError:
704
+ return None
705
+ if enddate < startdate:
706
+ return None
707
+ return f"GREGORIAN:CE:{startdate.isoformat()}:CE:{enddate.isoformat()}"
708
+
709
+
710
+ def _from_eur_date(eur_date: Match[str]) -> str | None:
711
+ startday = int(eur_date.group(1))
712
+ startmonth = int(eur_date.group(2))
713
+ startyear = int(eur_date.group(3))
714
+ startyear = _expand_2_digit_year(startyear)
715
+ try:
716
+ date = datetime.date(startyear, startmonth, startday)
717
+ return f"GREGORIAN:CE:{date.isoformat()}:CE:{date.isoformat()}"
718
+ except ValueError:
719
+ return None
720
+
721
+
722
+ def _from_monthname_date(monthname_date: Match[str]) -> str | None:
723
+ day = int(monthname_date.group(2))
724
+ month = _months_dict[monthname_date.group(1)]
725
+ year = int(monthname_date.group(3))
726
+ try:
727
+ date = datetime.date(year, month, day)
728
+ return f"GREGORIAN:CE:{date.isoformat()}:CE:{date.isoformat()}"
729
+ except ValueError:
730
+ return None
731
+
732
+
733
+ def _from_monthname_after_day(monthname_after_day: Match[str]) -> str | None:
734
+ day = int(monthname_after_day.group(1))
735
+ month = _months_dict[monthname_after_day.group(2)]
736
+ year = int(monthname_after_day.group(3))
737
+ try:
738
+ date = datetime.date(year, month, day)
739
+ return f"GREGORIAN:CE:{date.isoformat()}:CE:{date.isoformat()}"
740
+ except ValueError:
741
+ return None
742
+
743
+
744
+ def _from_german_monthname_date(german_monthname_date: Match[str]) -> str | None:
745
+ day = int(german_monthname_date.group(1))
746
+ month = _months_dict[german_monthname_date.group(2)]
747
+ year = int(german_monthname_date.group(3))
748
+ try:
749
+ date = datetime.date(year, month, day)
750
+ return f"GREGORIAN:CE:{date.isoformat()}:CE:{date.isoformat()}"
751
+ except ValueError:
752
+ return None
753
+
754
+
755
+ def _from_year_range(year_range: Match[str]) -> str | None:
756
+ startyear = int(year_range.group(1))
757
+ endyear = int(year_range.group(2))
758
+ if endyear // 10 == 0:
759
+ # endyear is only 1-digit: add the first 2-3 digits of startyear
760
+ endyear = startyear // 10 * 10 + endyear
761
+ elif endyear // 100 == 0:
762
+ # endyear is only 2-digit: add the first 1-2 digits of startyear
763
+ endyear = startyear // 100 * 100 + endyear
764
+ if endyear <= startyear:
765
+ return None
766
+ return f"GREGORIAN:CE:{startyear}:CE:{endyear}"
767
+
768
+
769
+ def _extract_already_parsed_date(string: str, results: set[str | None]) -> str:
770
+ rgx_year = r"\d+(-\d{2}(-\d{2})?)?"
771
+ era_with_colon = r"(CE:|BC:)"
772
+ rgx = rf"(GREGORIAN|JULIAN|ISLAMIC):{era_with_colon}{rgx_year}:{era_with_colon}?{rgx_year}"
773
+ if matchs := list(regex.finditer(rgx, string)):
774
+ results.update({x.group(0) for x in matchs})
775
+ remaining_string = _remove_used_spans(string, [x.span() for x in matchs])
776
+ return remaining_string
777
+ return string