invenio-vocabularies 1.2.0__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (239) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/alembic/4a9a4fd235f8_create_vocabulary_schemes.py +4 -4
  6. invenio_vocabularies/alembic/4f365fced43f_create_vocabularies_tables.py +2 -2
  7. invenio_vocabularies/alembic/55a700f897b6_add_names_and_afiliations_pid_column.py +96 -0
  8. invenio_vocabularies/alembic/676dd587542d_create_funders_vocabulary_table.py +1 -1
  9. invenio_vocabularies/alembic/e1146238edd3_create_awards_table.py +1 -1
  10. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.eslintrc.yml +11 -0
  11. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/.prettierrc +1 -0
  12. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/index.js +7 -0
  13. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +25 -0
  14. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/AwardResults.js +95 -0
  15. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +139 -0
  16. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +87 -0
  17. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +223 -0
  18. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.test.js +1 -0
  19. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +152 -0
  20. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +270 -0
  21. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +37 -0
  22. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/index.js +8 -0
  23. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/index.js +7 -0
  24. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/index.js +7 -0
  25. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/index.js +7 -0
  26. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +63 -0
  27. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +36 -0
  28. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/index.js +1 -0
  29. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +53 -0
  30. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +39 -0
  31. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +19 -0
  32. invenio_vocabularies/cli.py +31 -44
  33. invenio_vocabularies/config.py +74 -7
  34. invenio_vocabularies/contrib/affiliations/affiliations.py +22 -6
  35. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  36. invenio_vocabularies/contrib/affiliations/config.py +10 -5
  37. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  38. invenio_vocabularies/contrib/affiliations/facets.py +36 -0
  39. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -7
  40. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  41. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  42. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  44. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/affiliations/schema.py +23 -5
  46. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  47. invenio_vocabularies/contrib/awards/awards.py +18 -6
  48. invenio_vocabularies/contrib/awards/config.py +1 -3
  49. invenio_vocabularies/contrib/awards/datastreams.py +246 -3
  50. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +41 -0
  51. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +53 -1
  52. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +53 -1
  53. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +53 -1
  54. invenio_vocabularies/contrib/awards/schema.py +27 -35
  55. invenio_vocabularies/contrib/awards/serializer.py +9 -1
  56. invenio_vocabularies/contrib/awards/services.py +1 -2
  57. invenio_vocabularies/contrib/common/__init__.py +9 -0
  58. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  59. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  60. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  61. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  62. invenio_vocabularies/contrib/funders/config.py +12 -5
  63. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  64. invenio_vocabularies/contrib/funders/facets.py +13 -5
  65. invenio_vocabularies/contrib/funders/funders.py +4 -2
  66. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  67. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  68. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  69. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  70. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  71. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  72. invenio_vocabularies/contrib/funders/schema.py +8 -0
  73. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  74. invenio_vocabularies/contrib/names/config.py +5 -3
  75. invenio_vocabularies/contrib/names/datastreams.py +177 -38
  76. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +2 -6
  77. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  78. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  79. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  80. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  81. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  82. invenio_vocabularies/contrib/names/names.py +29 -13
  83. invenio_vocabularies/contrib/names/permissions.py +20 -0
  84. invenio_vocabularies/contrib/names/s3client.py +44 -0
  85. invenio_vocabularies/contrib/names/schema.py +31 -4
  86. invenio_vocabularies/contrib/subjects/config.py +9 -3
  87. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  88. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  89. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  90. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  91. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  92. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  93. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  94. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  95. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  96. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  97. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  98. invenio_vocabularies/datastreams/datastreams.py +61 -13
  99. invenio_vocabularies/datastreams/factories.py +1 -2
  100. invenio_vocabularies/datastreams/readers.py +138 -29
  101. invenio_vocabularies/datastreams/tasks.py +37 -0
  102. invenio_vocabularies/datastreams/transformers.py +17 -27
  103. invenio_vocabularies/datastreams/writers.py +116 -14
  104. invenio_vocabularies/datastreams/xml.py +34 -0
  105. invenio_vocabularies/ext.py +59 -5
  106. invenio_vocabularies/factories.py +137 -0
  107. invenio_vocabularies/jobs.py +133 -0
  108. invenio_vocabularies/proxies.py +2 -2
  109. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  110. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  111. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  112. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  113. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  114. invenio_vocabularies/records/models.py +8 -10
  115. invenio_vocabularies/records/pidprovider.py +1 -2
  116. invenio_vocabularies/records/systemfields/relations.py +2 -2
  117. invenio_vocabularies/resources/__init__.py +9 -1
  118. invenio_vocabularies/resources/config.py +105 -0
  119. invenio_vocabularies/resources/resource.py +31 -41
  120. invenio_vocabularies/resources/schema.py +2 -1
  121. invenio_vocabularies/services/__init__.py +5 -2
  122. invenio_vocabularies/services/config.py +179 -0
  123. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  124. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  125. invenio_vocabularies/services/custom_fields/vocabulary.py +19 -9
  126. invenio_vocabularies/services/facets.py +67 -37
  127. invenio_vocabularies/services/permissions.py +3 -1
  128. invenio_vocabularies/services/results.py +110 -0
  129. invenio_vocabularies/services/schema.py +39 -2
  130. invenio_vocabularies/services/service.py +46 -94
  131. invenio_vocabularies/services/tasks.py +1 -1
  132. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  133. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  134. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  135. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +1 -1
  137. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.po +9 -8
  139. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.po +1 -1
  141. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  142. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.po +1 -1
  143. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.po +1 -1
  145. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/da/LC_MESSAGES/messages.po +1 -1
  147. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/de/LC_MESSAGES/messages.po +1 -1
  149. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +139 -0
  151. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +139 -0
  153. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/el/LC_MESSAGES/messages.po +1 -1
  155. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +139 -0
  158. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  159. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +139 -0
  160. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  161. invenio_vocabularies/translations/es/LC_MESSAGES/messages.po +1 -1
  162. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  163. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +139 -0
  164. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  165. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +139 -0
  166. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  167. invenio_vocabularies/translations/et/LC_MESSAGES/messages.po +1 -1
  168. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  169. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +1 -1
  170. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  171. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.po +1 -1
  172. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  173. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +139 -0
  174. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  175. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.po +1 -1
  176. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  177. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +139 -0
  178. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  179. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +139 -0
  180. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  181. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +1 -1
  182. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  183. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +139 -0
  184. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  185. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.po +1 -1
  186. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  187. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.po +4 -4
  188. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  189. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +139 -0
  190. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  191. invenio_vocabularies/translations/it/LC_MESSAGES/messages.po +4 -3
  192. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  193. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.po +1 -1
  194. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  195. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.po +1 -1
  196. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  197. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.po +1 -1
  198. invenio_vocabularies/translations/messages.pot +95 -48
  199. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  200. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +139 -0
  201. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  202. invenio_vocabularies/translations/no/LC_MESSAGES/messages.po +1 -1
  203. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  204. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.po +1 -1
  205. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  206. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.po +1 -1
  207. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  208. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.po +1 -1
  209. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  210. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.po +1 -1
  211. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  212. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +1 -1
  213. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  214. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.po +1 -1
  215. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  216. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.po +4 -3
  217. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  218. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +139 -0
  219. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  220. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.po +1 -1
  221. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  222. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.po +17 -13
  223. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  224. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +139 -0
  225. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  226. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po +1 -1
  227. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  228. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po +1 -1
  229. invenio_vocabularies/views.py +12 -26
  230. invenio_vocabularies/webpack.py +51 -0
  231. invenio_vocabularies-6.3.1.dist-info/METADATA +346 -0
  232. invenio_vocabularies-6.3.1.dist-info/RECORD +306 -0
  233. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  234. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +20 -0
  235. invenio_vocabularies-1.2.0.dist-info/METADATA +0 -133
  236. invenio_vocabularies-1.2.0.dist-info/RECORD +0 -220
  237. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  238. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  239. {invenio_vocabularies-1.2.0.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # Copyright (C) 2021-2022 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
4
5
  #
5
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -43,6 +44,8 @@ class FunderSchema(BaseVocabularySchema):
43
44
  required=True, validate=validate.Length(min=1, error=_("Name cannot be blank."))
44
45
  )
45
46
  country = SanitizedUnicode()
47
+ country_name = SanitizedUnicode()
48
+ location_name = SanitizedUnicode()
46
49
  identifiers = IdentifierSet(
47
50
  fields.Nested(
48
51
  partial(
@@ -57,6 +60,11 @@ class FunderSchema(BaseVocabularySchema):
57
60
  validate=validate.Length(min=1, error=_("PID cannot be blank."))
58
61
  )
59
62
 
63
+ acronym = SanitizedUnicode()
64
+ aliases = fields.List(SanitizedUnicode())
65
+ status = SanitizedUnicode()
66
+ types = fields.List(SanitizedUnicode())
67
+
60
68
  @validates_schema
61
69
  def validate_id(self, data, **kwargs):
62
70
  """Validates ID."""
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2022 CERN.
3
+ # Copyright (C) 2022-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -29,4 +29,5 @@ class FunderL10NItemSchema(Schema):
29
29
  props = fields.Dict(dump_only=True)
30
30
  name = fields.String(dump_only=True)
31
31
  country = fields.String(dump_only=True)
32
+ country_name = fields.String(dump_only=True)
32
33
  identifiers = fields.List(fields.Nested(IdentifierSchema), dump_only=True)
@@ -28,12 +28,14 @@ class NamesSearchOptions(SearchOptions):
28
28
 
29
29
  suggest_parser_cls = SuggestQueryParser.factory(
30
30
  fields=[
31
- "name^100",
32
- "family_name^100",
33
31
  "given_name^100",
32
+ "name^70",
33
+ "family_name^50",
34
34
  "identifiers.identifier^20",
35
- "affiliations.name^10",
35
+ "affiliations.name^20",
36
36
  ],
37
+ type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
38
+ fuzziness="AUTO",
37
39
  )
38
40
 
39
41
  sort_default = "bestmatch"
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,18 +8,123 @@
8
8
 
9
9
  """Names datastreams, transformers, writers and readers."""
10
10
 
11
+ import csv
12
+ import io
13
+ import tarfile
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from datetime import timedelta
16
+
17
+ import arrow
18
+ import regex as re
19
+ from flask import current_app
11
20
  from invenio_access.permissions import system_identity
12
- from invenio_pidstore.errors import PIDDoesNotExistError
13
21
  from invenio_records.dictutils import dict_lookup
14
- from marshmallow import ValidationError
15
22
 
16
- from ...datastreams import StreamEntry
17
- from ...datastreams.errors import TransformerError, WriterError
18
- from ...datastreams.readers import SimpleHTTPReader
23
+ from invenio_vocabularies.contrib.names.s3client import S3OrcidClient
24
+
25
+ from ...datastreams.errors import TransformerError
26
+ from ...datastreams.readers import BaseReader, SimpleHTTPReader
19
27
  from ...datastreams.transformers import BaseTransformer
20
28
  from ...datastreams.writers import ServiceWriter
21
29
 
22
30
 
31
+ class OrcidDataSyncReader(BaseReader):
32
+ """ORCiD Data Sync Reader."""
33
+
34
+ def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
35
+ """Constructor.
36
+
37
+ :param origin: Data source (e.g. filepath).
38
+ Can be none in case of piped readers.
39
+ """
40
+ super().__init__(origin=origin, mode=mode, *args, **kwargs)
41
+ self.s3_client = S3OrcidClient()
42
+ self.since = since
43
+
44
+ def _fetch_orcid_data(self, orcid_to_sync, bucket):
45
+ """Fetches a single ORCiD record from S3."""
46
+ # The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
47
+ suffix = orcid_to_sync[-3:]
48
+ key = f"{suffix}/{orcid_to_sync}.xml"
49
+ try:
50
+ return self.s3_client.read_file(f"s3://{bucket}/{key}")
51
+ except Exception as e:
52
+ # TODO: log
53
+ return None
54
+
55
+ def _process_lambda_file(self, fileobj):
56
+ """Process the ORCiD lambda file and returns a list of ORCiDs to sync.
57
+
58
+ The decoded fileobj looks like the following:
59
+ orcid, path, date_created, last_modified
60
+ 0000-0001-5109-3700, http://orcid.org/0000-0001-5109-3700, 2014-08-02 15:00:00.000,2021-08-02 15:00:00.000
61
+
62
+ Yield ORCiDs to sync until the last sync date is reached.
63
+ """
64
+ date_format = "YYYY-MM-DD HH:mm:ss.SSSSSS"
65
+ date_format_no_millis = "YYYY-MM-DD HH:mm:ss"
66
+ time_shift = current_app.config["VOCABULARIES_ORCID_SYNC_SINCE"]
67
+ if self.since:
68
+ time_shift = self.since
69
+ last_sync = arrow.now() - timedelta(**time_shift)
70
+
71
+ file_content = fileobj.read().decode("utf-8")
72
+
73
+ csv_reader = csv.DictReader(file_content.splitlines())
74
+
75
+ for row in csv_reader: # Skip the header line
76
+ orcid = row["orcid"]
77
+
78
+ # Lambda file is ordered by last modified date
79
+ last_modified_str = row["last_modified"]
80
+ try:
81
+ last_modified_date = arrow.get(last_modified_str, date_format)
82
+ except arrow.parser.ParserError:
83
+ last_modified_date = arrow.get(last_modified_str, date_format_no_millis)
84
+
85
+ if last_modified_date < last_sync:
86
+ break
87
+ yield orcid
88
+
89
+ def _iter(self, orcids):
90
+ """Iterates over the ORCiD records yielding each one."""
91
+ with ThreadPoolExecutor(
92
+ max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
93
+ ) as executor:
94
+ futures = [
95
+ executor.submit(
96
+ self._fetch_orcid_data,
97
+ orcid,
98
+ current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
99
+ )
100
+ for orcid in orcids
101
+ ]
102
+ for future in as_completed(futures):
103
+ result = future.result()
104
+ if result is not None:
105
+ yield result
106
+
107
+ def read(self, item=None, *args, **kwargs):
108
+ """Streams the ORCiD lambda file, process it to get the ORCiDS to sync and yields it's data."""
109
+ # Read the file from S3
110
+ tar_content = self.s3_client.read_file(
111
+ "s3://orcid-lambda-file/last_modified.csv.tar"
112
+ )
113
+
114
+ orcids_to_sync = []
115
+ # Opens tar file and process it
116
+ with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
117
+ # Iterate over each member (file or directory) in the tar file
118
+ for member in tar.getmembers():
119
+ # Extract the file
120
+ extracted_file = tar.extractfile(member)
121
+ if extracted_file:
122
+ # Process the file and get the ORCiDs to sync
123
+ orcids_to_sync.extend(self._process_lambda_file(extracted_file))
124
+
125
+ yield from self._iter(orcids_to_sync)
126
+
127
+
23
128
  class OrcidHTTPReader(SimpleHTTPReader):
24
129
  """ORCiD HTTP Reader."""
25
130
 
@@ -33,20 +138,43 @@ class OrcidHTTPReader(SimpleHTTPReader):
33
138
  super().__init__(origin, *args, **kwargs)
34
139
 
35
140
 
141
+ DEFAULT_NAMES_EXCLUDE_REGEX = r"[\p{P}\p{S}\p{Nd}\p{No}\p{Emoji}--,.()\-']"
142
+ """Regex to filter out names with punctuations, symbols, decimal numbers and emojis."""
143
+
144
+
36
145
  class OrcidTransformer(BaseTransformer):
37
146
  """Transforms an ORCiD record into a names record."""
38
147
 
148
+ def __init__(
149
+ self, *args, names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX, **kwargs
150
+ ) -> None:
151
+ """Constructor."""
152
+ self._names_exclude_regex = names_exclude_regex
153
+ super().__init__()
154
+
155
+ def _is_valid_name(self, name):
156
+ """Check whether the name passes the regex."""
157
+ if not self._names_exclude_regex:
158
+ return True
159
+ return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
160
+
39
161
  def apply(self, stream_entry, **kwargs):
40
162
  """Applies the transformation to the stream entry."""
41
163
  record = stream_entry.entry
42
164
  person = record["person"]
43
- orcid_id = record["orcid-identifier"]["uri"]
165
+ orcid_id = record["orcid-identifier"]["path"]
44
166
 
45
167
  name = person.get("name")
46
168
  if name is None:
47
169
  raise TransformerError(f"Name not found in ORCiD entry.")
170
+ if name.get("family-name") is None:
171
+ raise TransformerError(f"Family name not found in ORCiD entry.")
172
+
173
+ if not self._is_valid_name(name["given-names"] + name["family-name"]):
174
+ raise TransformerError(f"Invalid characters in name.")
48
175
 
49
176
  entry = {
177
+ "id": orcid_id,
50
178
  "given_name": name.get("given-names"),
51
179
  "family_name": name.get("family-name"),
52
180
  "identifiers": [{"scheme": "orcid", "identifier": orcid_id}],
@@ -79,45 +207,19 @@ class OrcidTransformer(BaseTransformer):
79
207
  class NamesServiceWriter(ServiceWriter):
80
208
  """Names service writer."""
81
209
 
82
- def __init__(self, *args, scheme_id="orcid", **kwargs):
210
+ def __init__(self, *args, **kwargs):
83
211
  """Constructor."""
84
212
  service_or_name = kwargs.pop("service_or_name", "names")
85
213
  super().__init__(service_or_name=service_or_name, *args, **kwargs)
86
- self._scheme_id = scheme_id
87
214
 
88
215
  def _entry_id(self, entry):
89
216
  """Get the id from an entry."""
90
- for identifier in entry.get("identifiers"):
91
- if identifier.get("scheme") == self._scheme_id:
92
- return identifier["identifier"]
93
-
94
- def _resolve(self, id_):
95
- """Resolve an entry given an id."""
96
- return self._service.resolve(self._identity, id_=id_, id_type=self._scheme_id)
97
-
98
- def write(self, stream_entry, *args, **kwargs):
99
- """Writes the input entry using a given service."""
100
- entry = stream_entry.entry
101
- try:
102
- vocab_id = self._entry_id(entry)
103
- # it is resolved before creation to avoid duplicates since
104
- # the pid is recidv2 not e.g. the orcid
105
- current = self._resolve(vocab_id)
106
- if not self._update:
107
- raise WriterError([f"Vocabulary entry already exists: {entry}"])
108
- updated = dict(current.to_dict(), **entry)
109
- return StreamEntry(
110
- self._service.update(self._identity, current.id, updated)
111
- )
112
- except PIDDoesNotExistError:
113
- return StreamEntry(self._service.create(self._identity, entry))
114
-
115
- except ValidationError as err:
116
- raise WriterError([{"ValidationError": err.messages}])
217
+ return entry["id"]
117
218
 
118
219
 
119
220
  VOCABULARIES_DATASTREAM_READERS = {
120
221
  "orcid-http": OrcidHTTPReader,
222
+ "orcid-data-sync": OrcidDataSyncReader,
121
223
  }
122
224
 
123
225
 
@@ -138,10 +240,15 @@ DATASTREAM_CONFIG = {
138
240
  {
139
241
  "type": "tar",
140
242
  "args": {
141
- "regex": ".xml$",
243
+ "regex": "\\.xml$",
244
+ },
245
+ },
246
+ {
247
+ "type": "xml",
248
+ "args": {
249
+ "root_element": "record",
142
250
  },
143
251
  },
144
- {"type": "xml"},
145
252
  ],
146
253
  "transformers": [{"type": "orcid"}],
147
254
  "writers": [
@@ -157,3 +264,35 @@ DATASTREAM_CONFIG = {
157
264
 
158
265
  An origin is required for the reader.
159
266
  """
267
+
268
+ # TODO: Used on the jobs and should be set as a "PRESET" (naming to be defined)
269
+ ORCID_PRESET_DATASTREAM_CONFIG = {
270
+ "readers": [
271
+ {
272
+ "type": "orcid-data-sync",
273
+ },
274
+ {
275
+ "type": "xml",
276
+ "args": {
277
+ "root_element": "record",
278
+ },
279
+ },
280
+ ],
281
+ "transformers": [{"type": "orcid"}],
282
+ "writers": [
283
+ {
284
+ "type": "async",
285
+ "args": {
286
+ "writer": {
287
+ "type": "names-service",
288
+ }
289
+ },
290
+ }
291
+ ],
292
+ "batch_size": 1000,
293
+ "write_many": True,
294
+ }
295
+ """ORCiD Data Stream configuration.
296
+
297
+ An origin is required for the reader.
298
+ """
@@ -8,12 +8,8 @@
8
8
  "$schema": {
9
9
  "$ref": "local://definitions-v1.0.0.json#/$schema"
10
10
  },
11
- "id": {
12
- "description": "URI or classification code as identifier - globally unique among all names schemes.",
13
- "$ref": "local://definitions-v1.0.0.json#/identifier"
14
- },
15
- "pid": {
16
- "$ref": "local://definitions-v1.0.0.json#/internal-pid"
11
+ "tags": {
12
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
17
13
  },
18
14
  "scheme": {
19
15
  "description": "Identifier of the name scheme.",
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },