invenio-vocabularies 1.2.0__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (239) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +4 -4
  6. invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +2 -2
  7. invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
  8. invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +1 -1
  9. invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +1 -1
  10. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
  11. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
  12. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
  13. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
  14. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
  15. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
  16. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
  17. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +223 -0
  18. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
  19. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
  20. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +270 -0
  21. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
  22. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
  23. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
  24. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
  25. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
  26. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +63 -0
  27. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
  28. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +1 -0
  29. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +53 -0
  30. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +39 -0
  31. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +19 -0
  32. invenio_vocabularies/cli.py +31 -44
  33. invenio_vocabularies/config.py +74 -7
  34. invenio_vocabularies/contrib/affiliations/affiliations.py +22 -6
  35. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  36. invenio_vocabularies/contrib/affiliations/config.py +10 -5
  37. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  38. invenio_vocabularies/contrib/affiliations/facets.py +36 -0
  39. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -7
  40. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  41. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  42. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  44. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/affiliations/schema.py +23 -5
  46. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  47. invenio_vocabularies/contrib/awards/awards.py +18 -6
  48. invenio_vocabularies/contrib/awards/config.py +1 -3
  49. invenio_vocabularies/contrib/awards/datastreams.py +246 -3
  50. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +41 -0
  51. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +53 -1
  52. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +53 -1
  53. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +53 -1
  54. invenio_vocabularies/contrib/awards/schema.py +27 -35
  55. invenio_vocabularies/contrib/awards/serializer.py +9 -1
  56. invenio_vocabularies/contrib/awards/services.py +1 -2
  57. invenio_vocabularies/contrib/common/__init__.py +9 -0
  58. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  59. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  60. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  61. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  62. invenio_vocabularies/contrib/funders/config.py +12 -5
  63. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  64. invenio_vocabularies/contrib/funders/facets.py +13 -5
  65. invenio_vocabularies/contrib/funders/funders.py +4 -2
  66. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  67. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  68. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  69. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  70. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  71. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  72. invenio_vocabularies/contrib/funders/schema.py +8 -0
  73. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  74. invenio_vocabularies/contrib/names/config.py +5 -3
  75. invenio_vocabularies/contrib/names/datastreams.py +177 -38
  76. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +2 -6
  77. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  78. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  79. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  80. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  81. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  82. invenio_vocabularies/contrib/names/names.py +29 -13
  83. invenio_vocabularies/contrib/names/permissions.py +20 -0
  84. invenio_vocabularies/contrib/names/s3client.py +44 -0
  85. invenio_vocabularies/contrib/names/schema.py +31 -4
  86. invenio_vocabularies/contrib/subjects/config.py +9 -3
  87. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  88. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  89. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  90. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  91. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  92. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  93. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  94. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  95. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  96. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  97. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  98. invenio_vocabularies/datastreams/datastreams.py +61 -13
  99. invenio_vocabularies/datastreams/factories.py +1 -2
  100. invenio_vocabularies/datastreams/readers.py +138 -29
  101. invenio_vocabularies/datastreams/tasks.py +37 -0
  102. invenio_vocabularies/datastreams/transformers.py +17 -27
  103. invenio_vocabularies/datastreams/writers.py +116 -14
  104. invenio_vocabularies/datastreams/xml.py +34 -0
  105. invenio_vocabularies/ext.py +59 -5
  106. invenio_vocabularies/factories.py +137 -0
  107. invenio_vocabularies/jobs.py +133 -0
  108. invenio_vocabularies/proxies.py +2 -2
  109. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  110. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  111. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  112. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  113. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  114. invenio_vocabularies/records/models.py +8 -10
  115. invenio_vocabularies/records/pidprovider.py +1 -2
  116. invenio_vocabularies/records/systemfields/relations.py +2 -2
  117. invenio_vocabularies/resources/__init__.py +9 -1
  118. invenio_vocabularies/resources/config.py +105 -0
  119. invenio_vocabularies/resources/resource.py +31 -41
  120. invenio_vocabularies/resources/schema.py +2 -1
  121. invenio_vocabularies/services/__init__.py +5 -2
  122. invenio_vocabularies/services/config.py +179 -0
  123. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  124. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  125. invenio_vocabularies/services/custom_fields/vocabulary.py +19 -9
  126. invenio_vocabularies/services/facets.py +67 -37
  127. invenio_vocabularies/services/permissions.py +3 -1
  128. invenio_vocabularies/services/results.py +110 -0
  129. invenio_vocabularies/services/schema.py +39 -2
  130. invenio_vocabularies/services/service.py +46 -94
  131. invenio_vocabularies/services/tasks.py +1 -1
  132. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  133. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  134. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  135. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +1 -1
  137. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +9 -8
  139. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +1 -1
  141. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  142. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +1 -1
  143. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +1 -1
  145. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +1 -1
  147. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +1 -1
  149. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +139 -0
  151. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +139 -0
  153. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +1 -1
  155. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +139 -0
  158. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  159. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +139 -0
  160. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  161. invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +1 -1
  162. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  163. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +139 -0
  164. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  165. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +139 -0
  166. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  167. invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +1 -1
  168. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  169. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +1 -1
  170. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  171. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +1 -1
  172. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  173. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +139 -0
  174. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  175. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +1 -1
  176. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  177. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +139 -0
  178. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  179. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +139 -0
  180. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  181. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +1 -1
  182. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  183. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +139 -0
  184. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  185. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +1 -1
  186. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  187. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +4 -4
  188. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  189. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +139 -0
  190. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  191. invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +4 -3
  192. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  193. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +1 -1
  194. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  195. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +1 -1
  196. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  197. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +1 -1
  198. invenio_vocabularies/translations/messages.pot +95 -48
  199. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  200. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +139 -0
  201. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  202. invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +1 -1
  203. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  204. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +1 -1
  205. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  206. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +1 -1
  207. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  208. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +1 -1
  209. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  210. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +1 -1
  211. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  212. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +1 -1
  213. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  214. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +1 -1
  215. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  216. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +4 -3
  217. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  218. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +139 -0
  219. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  220. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +1 -1
  221. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  222. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +17 -13
  223. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  224. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +139 -0
  225. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  226. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +1 -1
  227. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  228. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +1 -1
  229. invenio_vocabularies/views.py +12 -26
  230. invenio_vocabularies/webpack.py +51 -0
  231. invenio_vocabularies-6.3.1.dist-info/METADATA +346 -0
  232. invenio_vocabularies-6.3.1.dist-info/RECORD +306 -0
  233. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  234. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +20 -0
  235. invenio_vocabularies-1.2.0.dist-info/METADATA +0 -133
  236. invenio_vocabularies-1.2.0.dist-info/RECORD +0 -220
  237. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  238. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  239. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,43 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2022-2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """MeSH subjects datastreams, transformers, writers and readers."""
11
+
12
+ from invenio_vocabularies.datastreams.transformers import (
13
+ BaseTransformer,
14
+ TransformerError,
15
+ )
16
+
17
+
18
+ class MeshSubjectsTransformer(BaseTransformer):
19
+ """MeSH subjects Transformer."""
20
+
21
+ def apply(self, stream_entry, *args, **kwargs):
22
+ """Apply transformation on steam entry."""
23
+ entry_data = stream_entry.entry
24
+
25
+ # ID in MeSH data is the URL, ex. https://id.nlm.nih.gov/mesh/D000001
26
+ # We just want to use the ID prefixed by "mesh:""
27
+ try:
28
+ mesh_id = entry_data["id"].split("/")[-1]
29
+ except Exception:
30
+ raise TransformerError("Not a valid MeSH ID.")
31
+
32
+ entry_data["id"] = "mesh:" + mesh_id
33
+ return stream_entry
34
+
35
+
36
+ VOCABULARIES_DATASTREAM_READERS = {}
37
+ """MeSH datastream readers."""
38
+
39
+ VOCABULARIES_DATASTREAM_WRITERS = {}
40
+ """MeSH subject datastream writers."""
41
+
42
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {"mesh-subjects": MeshSubjectsTransformer}
43
+ """MeSH subjects datastream transformers."""
@@ -1,7 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2021 Northwestern University.
4
- # Copyright (C) 2021-2022 CERN.
4
+ # Copyright (C) 2021-2024 CERN.
5
+ # Copyright (C) 2024 University of Münster.
5
6
  #
6
7
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
8
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,10 +10,19 @@
9
10
 
10
11
  """Subjects schema."""
11
12
 
12
- from invenio_i18n import lazy_gettext as _
13
- from marshmallow_utils.fields import SanitizedUnicode
13
+ from functools import partial
14
14
 
15
- from ...services.schema import BaseVocabularySchema, ContribVocabularyRelationSchema
15
+ from invenio_i18n import get_locale
16
+ from marshmallow import Schema, fields, pre_load
17
+ from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
18
+ from marshmallow_utils.schemas import IdentifierSchema
19
+
20
+ from ...services.schema import (
21
+ BaseVocabularySchema,
22
+ ContribVocabularyRelationSchema,
23
+ i18n_strings,
24
+ )
25
+ from .config import subject_schemes
16
26
 
17
27
 
18
28
  class SubjectSchema(BaseVocabularySchema):
@@ -24,6 +34,26 @@ class SubjectSchema(BaseVocabularySchema):
24
34
  id = SanitizedUnicode(required=True)
25
35
  scheme = SanitizedUnicode(required=True)
26
36
  subject = SanitizedUnicode(required=True)
37
+ title = i18n_strings
38
+ props = fields.Dict(keys=SanitizedUnicode(), values=SanitizedUnicode())
39
+ identifiers = IdentifierSet(
40
+ fields.Nested(
41
+ partial(
42
+ IdentifierSchema,
43
+ allowed_schemes=subject_schemes,
44
+ identifier_required=False,
45
+ )
46
+ )
47
+ )
48
+ synonyms = fields.List(SanitizedUnicode())
49
+
50
+ @pre_load
51
+ def add_subject_from_title(self, data, **kwargs):
52
+ """Add subject from title if not present."""
53
+ locale = get_locale().language
54
+ if "subject" not in data:
55
+ data["subject"] = data["title"].get(locale) or data["title"].values()[0]
56
+ return data
27
57
 
28
58
 
29
59
  class SubjectRelationSchema(ContribVocabularyRelationSchema):
@@ -32,4 +62,16 @@ class SubjectRelationSchema(ContribVocabularyRelationSchema):
32
62
  ftf_name = "subject"
33
63
  parent_field_name = "subjects"
34
64
  subject = SanitizedUnicode()
35
- scheme = SanitizedUnicode()
65
+ scheme = SanitizedUnicode(dump_only=True)
66
+ title = fields.Dict(dump_only=True)
67
+ props = fields.Dict(dump_only=True)
68
+ identifiers = IdentifierSet(
69
+ fields.Nested(
70
+ partial(
71
+ IdentifierSchema,
72
+ allowed_schemes=subject_schemes,
73
+ identifier_required=False,
74
+ )
75
+ )
76
+ )
77
+ synonyms = fields.List(SanitizedUnicode(), dump_only=True)
@@ -9,9 +9,11 @@
9
9
 
10
10
  """Vocabulary subjects."""
11
11
 
12
+ from flask_resources import JSONSerializer, ResponseHandler
12
13
  from invenio_records.dumpers import SearchDumper
13
14
  from invenio_records.dumpers.indexedat import IndexedAtDumperExt
14
15
  from invenio_records_resources.factories.factory import RecordTypeFactory
16
+ from invenio_records_resources.resources.records.headers import etag_headers
15
17
 
16
18
  from ...records.pidprovider import PIDProviderFactory
17
19
  from ...records.systemfields import BaseVocabularyPIDFieldContext
@@ -42,4 +44,12 @@ record_type = RecordTypeFactory(
42
44
  permission_policy_cls=PermissionPolicy,
43
45
  # Resource layer
44
46
  endpoint_route="/subjects",
47
+ resource_cls_attrs={
48
+ "response_handlers": {
49
+ "application/json": ResponseHandler(JSONSerializer(), headers=etag_headers),
50
+ "application/vnd.inveniordm.v1+json": ResponseHandler(
51
+ JSONSerializer(), headers=etag_headers
52
+ ),
53
+ }
54
+ },
45
55
  )
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,17 +8,41 @@
8
8
 
9
9
  """Base data stream."""
10
10
 
11
+ from flask import current_app
12
+
11
13
  from .errors import ReaderError, TransformerError, WriterError
12
14
 
13
15
 
14
16
  class StreamEntry:
15
17
  """Object to encapsulate streams processing."""
16
18
 
17
- def __init__(self, entry, errors=None):
18
- """Constructor."""
19
+ def __init__(self, entry, record=None, errors=None, op_type=None, exc=None):
20
+ """Constructor for the StreamEntry class.
21
+
22
+ :param entry (object): The entry object, usually a record dict.
23
+ :param record (object): The record object, usually a record class.
24
+ :param errors (list, optional): List of errors. Defaults to None.
25
+ :param op_type (str, optional): The operation type. Defaults to None.
26
+ :param exc (str, optional): The raised unhandled exception. Defaults to None.
27
+ """
19
28
  self.entry = entry
29
+ self.record = record
20
30
  self.filtered = False
21
31
  self.errors = errors or []
32
+ self.op_type = op_type
33
+ self.exc = exc
34
+
35
+ def log_errors(self, logger=None):
36
+ """Log the errors using the provided logger or the default logger.
37
+
38
+ :param logger (logging.Logger, optional): Logger instance to use. Defaults to None.
39
+ """
40
+ if logger is None:
41
+ logger = current_app.logger
42
+ for error in self.errors:
43
+ logger.error(f"Error in entry {self.entry}: {error}")
44
+ if self.exc:
45
+ logger.error(f"Exception in entry {self.entry}: {self.exc}")
22
46
 
23
47
 
24
48
  class DataStream:
@@ -39,15 +63,10 @@ class DataStream:
39
63
  """Checks if an stream_entry should be filtered out (skipped)."""
40
64
  return False
41
65
 
42
- def process(self, *args, **kwargs):
43
- """Iterates over the entries.
44
-
45
- Uses the reader to get the raw entries and transforms them.
46
- It will iterate over the `StreamEntry` objects returned by
47
- the reader, apply the transformations and yield the result of
48
- writing it.
49
- """
50
- for stream_entry in self.read():
66
+ def process_batch(self, batch, write_many=False):
67
+ """Process a batch of entries."""
68
+ transformed_entries = []
69
+ for stream_entry in batch:
51
70
  if stream_entry.errors:
52
71
  yield stream_entry # reading errors
53
72
  else:
@@ -58,7 +77,31 @@ class DataStream:
58
77
  transformed_entry.filtered = True
59
78
  yield transformed_entry
60
79
  else:
61
- yield self.write(transformed_entry)
80
+ transformed_entries.append(transformed_entry)
81
+ if transformed_entries:
82
+ if write_many:
83
+ yield from self.batch_write(transformed_entries)
84
+ else:
85
+ yield from (self.write(entry) for entry in transformed_entries)
86
+
87
+ def process(self, batch_size=100, write_many=False, *args, **kwargs):
88
+ """Iterates over the entries.
89
+
90
+ Uses the reader to get the raw entries and transforms them.
91
+ It will iterate over the `StreamEntry` objects returned by
92
+ the reader, apply the transformations and yield the result of
93
+ writing it.
94
+ """
95
+ batch = []
96
+ for stream_entry in self.read():
97
+ batch.append(stream_entry)
98
+ if len(batch) >= batch_size:
99
+ yield from self.process_batch(batch, write_many=write_many)
100
+ batch = []
101
+
102
+ # Process any remaining entries in the last batch
103
+ if batch:
104
+ yield from self.process_batch(batch, write_many=write_many)
62
105
 
63
106
  def read(self):
64
107
  """Recursively read the entries."""
@@ -107,6 +150,11 @@ class DataStream:
107
150
 
108
151
  return stream_entry
109
152
 
153
+ def batch_write(self, stream_entries, *args, **kwargs):
154
+ """Apply the transformations to an stream_entry. Errors are handler in the service layer."""
155
+ for writer in self._writers:
156
+ yield from writer.write_many(stream_entries)
157
+
110
158
  def total(self, *args, **kwargs):
111
159
  """The total of entries obtained from the origin."""
112
160
  raise NotImplementedError()
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -36,7 +36,6 @@ class Factory:
36
36
  try:
37
37
  type_ = config["type"]
38
38
  args = config.get("args", {})
39
-
40
39
  return cls.options()[type_](**args)
41
40
  except KeyError:
42
41
  raise FactoryError(name=cls.FACTORY_NAME, key=type_)
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
+ # Copyright (C) 2024 University of Münster.
4
5
  #
5
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -15,14 +16,21 @@ import re
15
16
  import tarfile
16
17
  import zipfile
17
18
  from abc import ABC, abstractmethod
18
- from collections import defaultdict
19
19
  from json.decoder import JSONDecodeError
20
20
 
21
21
  import requests
22
22
  import yaml
23
+ from lxml import etree
24
+ from lxml.html import fromstring
23
25
  from lxml.html import parse as html_parse
24
26
 
25
27
  from .errors import ReaderError
28
+ from .xml import etree_to_dict
29
+
30
+ try:
31
+ import oaipmh_scythe
32
+ except ImportError:
33
+ oaipmh_scythe = None
26
34
 
27
35
 
28
36
  class BaseReader(ABC):
@@ -79,7 +87,12 @@ class TarReader(BaseReader):
79
87
  def read(self, item=None, *args, **kwargs):
80
88
  """Opens a tar archive or uses the given file pointer."""
81
89
  if item:
82
- yield from self._iter(fp=item, *args, **kwargs)
90
+ if isinstance(item, tarfile.TarFile):
91
+ yield from self._iter(fp=item, *args, **kwargs)
92
+ else:
93
+ # If the item is not already a TarFile (e.g. if it is a BytesIO), try to create a TarFile from the item.
94
+ with tarfile.open(mode=self._mode, fileobj=item) as archive:
95
+ yield from self._iter(fp=archive, *args, **kwargs)
83
96
  else:
84
97
  with tarfile.open(self._origin, self._mode) as archive:
85
98
  yield from self._iter(fp=archive, *args, **kwargs)
@@ -135,7 +148,12 @@ class ZipReader(BaseReader):
135
148
  """Opens a Zip archive or uses the given file pointer."""
136
149
  # https://docs.python.org/3/library/zipfile.html
137
150
  if item:
138
- yield from self._iter(fp=item, *args, **kwargs)
151
+ if isinstance(item, zipfile.ZipFile):
152
+ yield from self._iter(fp=item, *args, **kwargs)
153
+ else:
154
+ # If the item is not already a ZipFile (e.g. if it is a BytesIO), try to create a ZipFile from the item.
155
+ with zipfile.ZipFile(item, **self._options) as archive:
156
+ yield from self._iter(fp=archive, *args, **kwargs)
139
157
  else:
140
158
  with zipfile.ZipFile(self._origin, **self._options) as archive:
141
159
  yield from self._iter(fp=archive, *args, **kwargs)
@@ -206,34 +224,125 @@ class CSVReader(BaseReader):
206
224
  class XMLReader(BaseReader):
207
225
  """XML reader."""
208
226
 
209
- @classmethod
210
- def _etree_to_dict(cls, tree):
211
- d = {tree.tag: {} if tree.attrib else None}
212
- children = list(tree)
213
- if children:
214
- dd = defaultdict(list)
215
- for dc in map(cls._etree_to_dict, children):
216
- for k, v in dc.items():
217
- dd[k].append(v)
218
- d = {tree.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
219
- if tree.attrib:
220
- d[tree.tag].update(("@" + k, v) for k, v in tree.attrib.items())
221
- if tree.text:
222
- text = tree.text.strip()
223
- if children or tree.attrib:
224
- if text:
225
- d[tree.tag]["#text"] = text
226
- else:
227
- d[tree.tag] = text
228
- return d
227
+ def __init__(self, root_element=None, *args, **kwargs):
228
+ """Constructor."""
229
+ self.root_element = root_element
230
+ super().__init__(*args, **kwargs)
229
231
 
230
232
  def _iter(self, fp, *args, **kwargs):
231
233
  """Read and parse an XML file to dict."""
232
234
  # NOTE: We parse HTML, to skip XML validation and strip XML namespaces
233
- xml_tree = html_parse(fp).getroot()
234
- record = self._etree_to_dict(xml_tree)["html"]["body"].get("record")
235
-
236
- if not record:
237
- raise ReaderError(f"Record not found in XML entry.")
235
+ record = None
236
+ try:
237
+ xml_tree = fromstring(fp)
238
+ xml_dict = etree_to_dict(xml_tree)
239
+ except Exception as e:
240
+ xml_tree = html_parse(fp).getroot()
241
+ xml_dict = etree_to_dict(xml_tree)["html"]["body"]
242
+
243
+ if self.root_element:
244
+ record = xml_dict.get(self.root_element)
245
+ if not record:
246
+ raise ReaderError(
247
+ f"Root element '{self.root_element}' not found in XML entry."
248
+ )
249
+ else:
250
+ record = xml_dict
238
251
 
239
252
  yield record
253
+
254
+
255
+ class OAIPMHReader(BaseReader):
256
+ """OAIPMH reader."""
257
+
258
+ def __init__(
259
+ self,
260
+ *args,
261
+ base_url=None,
262
+ metadata_prefix=None,
263
+ set=None,
264
+ from_date=None,
265
+ until_date=None,
266
+ verb=None,
267
+ **kwargs,
268
+ ):
269
+ """Constructor."""
270
+ self._base_url = base_url
271
+ self._metadata_prefix = metadata_prefix if not None else "oai_dc"
272
+ self._set = set
273
+ self._until = until_date
274
+ self._from = from_date
275
+ self._verb = verb if not None else "ListRecords"
276
+ super().__init__(*args, **kwargs)
277
+
278
+ def _iter(self, scythe, *args, **kwargs):
279
+ """Read and parse an OAIPMH stream to dict."""
280
+
281
+ class OAIRecord(oaipmh_scythe.models.Record):
282
+ """An XML unpacking implementation for more complicated formats."""
283
+
284
+ def get_metadata(self):
285
+ """Extract and return the record's metadata as a dictionary."""
286
+ return xml_to_dict(
287
+ self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
288
+ )
289
+
290
+ if self._verb == "ListRecords":
291
+ scythe.class_mapping["ListRecords"] = OAIRecord
292
+ try:
293
+ records = scythe.list_records(
294
+ from_=self._from,
295
+ until=self._until,
296
+ metadata_prefix=self._metadata_prefix,
297
+ set_=self._set,
298
+ ignore_deleted=True,
299
+ )
300
+ for record in records:
301
+ yield {"record": record}
302
+ except oaipmh_scythe.NoRecordsMatch:
303
+ raise ReaderError("No records found in OAI-PMH request.")
304
+ else:
305
+ scythe.class_mapping["GetRecord"] = OAIRecord
306
+ try:
307
+ headers = scythe.list_identifiers(
308
+ from_=self._from,
309
+ until=self._until,
310
+ metadata_prefix=self._metadata_prefix,
311
+ set_=self._set,
312
+ ignore_deleted=True,
313
+ )
314
+ for header in headers:
315
+ record = scythe.get_record(
316
+ identifier=header.identifier,
317
+ metadata_prefix=self._metadata_prefix,
318
+ )
319
+ yield {"record": record}
320
+ except oaipmh_scythe.NoRecordsMatch:
321
+ raise ReaderError("No records found in OAI-PMH request.")
322
+
323
+ def read(self, item=None, *args, **kwargs):
324
+ """Reads from item or opens the file descriptor from origin."""
325
+ if item:
326
+ raise NotImplementedError(
327
+ "OAIPMHReader does not support being chained after another reader"
328
+ )
329
+ else:
330
+ with oaipmh_scythe.Scythe(self._base_url) as scythe:
331
+ yield from self._iter(scythe=scythe, *args, **kwargs)
332
+
333
+
334
+ def xml_to_dict(tree: etree._Element):
335
+ """Convert an XML tree to a dictionary.
336
+
337
+ This function takes an XML element tree and converts it into a dictionary.
338
+
339
+ Args:
340
+ tree: The root element of the XML tree to be converted.
341
+
342
+ Returns:
343
+ A dictionary with the key "record".
344
+ """
345
+ dict_obj = dict()
346
+ dict_obj["record"] = etree.tostring(tree)
347
+
348
+ return dict_obj
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2022-2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Data Streams Celery tasks."""
10
+
11
+ from celery import shared_task
12
+
13
+ from ..datastreams import StreamEntry
14
+ from ..datastreams.factories import WriterFactory
15
+
16
+
17
+ @shared_task(ignore_result=True)
18
+ def write_entry(writer_config, entry):
19
+ """Write an entry.
20
+
21
+ :param writer: writer configuration as accepted by the WriterFactory.
22
+ :param entry: dictionary, StreamEntry is not serializable.
23
+ """
24
+ writer = WriterFactory.create(config=writer_config)
25
+ writer.write(StreamEntry(entry))
26
+
27
+
28
+ @shared_task(ignore_result=True)
29
+ def write_many_entry(writer_config, entries):
30
+ """Write many entries.
31
+
32
+ :param writer: writer configuration as accepted by the WriterFactory.
33
+ :param entry: lisf ot dictionaries, StreamEntry is not serializable.
34
+ """
35
+ writer = WriterFactory.create(config=writer_config)
36
+ stream_entries = [StreamEntry(entry) for entry in entries]
37
+ writer.write_many(stream_entries)
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,11 +9,11 @@
9
9
  """Transformers module."""
10
10
 
11
11
  from abc import ABC, abstractmethod
12
- from collections import defaultdict
13
12
 
14
13
  from lxml import etree
15
14
 
16
15
  from .errors import TransformerError
16
+ from .xml import etree_to_dict
17
17
 
18
18
 
19
19
  class BaseTransformer(ABC):
@@ -32,42 +32,32 @@ class BaseTransformer(ABC):
32
32
  class XMLTransformer(BaseTransformer):
33
33
  """XML transformer."""
34
34
 
35
+ def __init__(self, root_element=None, *args, **kwargs):
36
+ """Initializes the transformer."""
37
+ self.root_element = root_element
38
+ super().__init__(*args, **kwargs)
39
+
35
40
  @classmethod
36
41
  def _xml_to_etree(cls, xml):
37
42
  """Converts XML to a lxml etree."""
38
43
  return etree.HTML(xml)
39
44
 
40
- @classmethod
41
- def _etree_to_dict(cls, tree):
42
- d = {tree.tag: {} if tree.attrib else None}
43
- children = list(tree)
44
- if children:
45
- dd = defaultdict(list)
46
- for dc in map(cls._etree_to_dict, children):
47
- for k, v in dc.items():
48
- dd[k].append(v)
49
- d = {tree.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.items()}}
50
- if tree.attrib:
51
- d[tree.tag].update(("@" + k, v) for k, v in tree.attrib.items())
52
- if tree.text:
53
- text = tree.text.strip()
54
- if children or tree.attrib:
55
- if text:
56
- d[tree.tag]["#text"] = text
57
- else:
58
- d[tree.tag] = text
59
- return d
60
-
61
45
  def apply(self, stream_entry, **kwargs):
62
46
  """Applies the transformation to the stream entry.
63
47
 
64
48
  Requires the root element to be named "record".
65
49
  """
66
50
  xml_tree = self._xml_to_etree(stream_entry.entry)
67
- record = self._etree_to_dict(xml_tree)["html"]["body"].get("record")
68
-
69
- if not record:
70
- raise TransformerError(f"Record not found in XML entry.")
51
+ xml_dict = etree_to_dict(xml_tree)["html"]["body"]
52
+
53
+ if self.root_element:
54
+ record = xml_dict.get(self.root_element)
55
+ if not record:
56
+ raise TransformerError(
57
+ f"Root element '{self.root_element}' not found in XML entry."
58
+ )
59
+ else:
60
+ record = xml_dict
71
61
 
72
62
  stream_entry.entry = record
73
63
  return stream_entry