invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (165) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  9. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  10. invenio_vocabularies/cli.py +31 -44
  11. invenio_vocabularies/config.py +68 -4
  12. invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
  13. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  14. invenio_vocabularies/contrib/affiliations/config.py +13 -2
  15. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  16. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  17. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  18. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  19. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  20. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  21. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  22. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  23. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  24. invenio_vocabularies/contrib/awards/awards.py +17 -5
  25. invenio_vocabularies/contrib/awards/datastreams.py +241 -7
  26. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
  27. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
  28. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
  29. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
  30. invenio_vocabularies/contrib/awards/schema.py +16 -1
  31. invenio_vocabularies/contrib/awards/serializer.py +8 -1
  32. invenio_vocabularies/contrib/awards/services.py +1 -2
  33. invenio_vocabularies/contrib/common/__init__.py +9 -0
  34. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  35. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  36. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  37. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  38. invenio_vocabularies/contrib/funders/config.py +11 -2
  39. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  40. invenio_vocabularies/contrib/funders/funders.py +3 -1
  41. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  42. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  44. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  46. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  47. invenio_vocabularies/contrib/funders/schema.py +8 -0
  48. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  49. invenio_vocabularies/contrib/names/config.py +5 -3
  50. invenio_vocabularies/contrib/names/datastreams.py +172 -4
  51. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  52. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  53. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  54. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  55. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  56. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  57. invenio_vocabularies/contrib/names/names.py +15 -3
  58. invenio_vocabularies/contrib/names/permissions.py +20 -0
  59. invenio_vocabularies/contrib/names/s3client.py +44 -0
  60. invenio_vocabularies/contrib/names/schema.py +14 -0
  61. invenio_vocabularies/contrib/subjects/config.py +9 -3
  62. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  63. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  64. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  65. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  66. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  67. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  68. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  69. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  70. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  71. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  72. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  73. invenio_vocabularies/datastreams/datastreams.py +61 -13
  74. invenio_vocabularies/datastreams/factories.py +1 -2
  75. invenio_vocabularies/datastreams/readers.py +138 -29
  76. invenio_vocabularies/datastreams/tasks.py +37 -0
  77. invenio_vocabularies/datastreams/transformers.py +17 -27
  78. invenio_vocabularies/datastreams/writers.py +116 -14
  79. invenio_vocabularies/datastreams/xml.py +34 -0
  80. invenio_vocabularies/ext.py +59 -5
  81. invenio_vocabularies/factories.py +137 -0
  82. invenio_vocabularies/jobs.py +133 -0
  83. invenio_vocabularies/proxies.py +2 -2
  84. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  85. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  86. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  87. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  88. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  89. invenio_vocabularies/records/models.py +2 -4
  90. invenio_vocabularies/records/pidprovider.py +1 -2
  91. invenio_vocabularies/records/systemfields/relations.py +2 -2
  92. invenio_vocabularies/resources/__init__.py +9 -1
  93. invenio_vocabularies/resources/config.py +105 -0
  94. invenio_vocabularies/resources/resource.py +31 -41
  95. invenio_vocabularies/resources/schema.py +2 -1
  96. invenio_vocabularies/services/__init__.py +5 -2
  97. invenio_vocabularies/services/config.py +179 -0
  98. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  99. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  100. invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
  101. invenio_vocabularies/services/permissions.py +3 -1
  102. invenio_vocabularies/services/results.py +110 -0
  103. invenio_vocabularies/services/schema.py +11 -2
  104. invenio_vocabularies/services/service.py +46 -94
  105. invenio_vocabularies/services/tasks.py +1 -1
  106. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  107. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  108. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  109. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  110. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  111. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  112. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  113. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  114. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  115. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  116. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  117. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  118. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  119. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  120. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  121. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  122. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  123. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  124. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  125. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  126. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  127. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  128. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  129. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  130. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  131. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  132. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  133. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  134. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  135. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  137. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  139. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  141. invenio_vocabularies/translations/messages.pot +95 -48
  142. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  143. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  145. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  147. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  149. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  151. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  153. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  155. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/views.py +12 -26
  158. invenio_vocabularies/webpack.py +3 -3
  159. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
  160. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
  161. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  162. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
  163. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  164. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  165. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2020 CERN.
3
+ # Copyright (C) 2020-2024 CERN.
4
4
  # Copyright (C) 2021 Northwestern University.
5
+ # Copyright (C) 2024 University of Münster.
5
6
  #
6
7
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
8
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -17,15 +18,16 @@ from .datastreams.readers import (
17
18
  GzipReader,
18
19
  JsonLinesReader,
19
20
  JsonReader,
21
+ OAIPMHReader,
20
22
  TarReader,
21
23
  XMLReader,
22
24
  YamlReader,
23
25
  ZipReader,
24
26
  )
25
27
  from .datastreams.transformers import XMLTransformer
26
- from .datastreams.writers import ServiceWriter, YamlWriter
27
- from .resources.resource import VocabulariesResourceConfig
28
- from .services.service import VocabulariesServiceConfig
28
+ from .datastreams.writers import AsyncWriter, ServiceWriter, YamlWriter
29
+ from .resources import VocabulariesResourceConfig
30
+ from .services.config import VocabulariesServiceConfig
29
31
 
30
32
  VOCABULARIES_RESOURCE_CONFIG = VocabulariesResourceConfig
31
33
  """Configure the resource."""
@@ -41,8 +43,17 @@ VOCABULARIES_IDENTIFIER_SCHEMES = {
41
43
  }
42
44
  """"Generic identifier schemes, usable by other vocabularies."""
43
45
 
46
+
47
+ def is_pic(val):
48
+ """Test if argument is a Participant Identification Code (PIC)."""
49
+ if len(val) != 9:
50
+ return False
51
+ return val.isdigit()
52
+
53
+
44
54
  VOCABULARIES_AFFILIATION_SCHEMES = {
45
55
  **VOCABULARIES_IDENTIFIER_SCHEMES,
56
+ "pic": {"label": _("PIC"), "validator": is_pic},
46
57
  }
47
58
  """Affiliations allowed identifier schemes."""
48
59
 
@@ -75,6 +86,7 @@ VOCABULARIES_AWARDS_OPENAIRE_FUNDERS = {
75
86
  "fwf_________": "013tf3c58",
76
87
  "inca________": "03m8vkq32",
77
88
  "irb_hr______": "03n51vw80",
89
+ "lcs_________": "02ar66p97",
78
90
  "mestd_______": "01znas443",
79
91
  "nhmrc_______": "011kf5r70",
80
92
  "nih_________": "01cwqze88",
@@ -86,6 +98,7 @@ VOCABULARIES_AWARDS_OPENAIRE_FUNDERS = {
86
98
  "snsf________": "00yjd3n13",
87
99
  "sshrc_______": "006cvnv84",
88
100
  "tubitakf____": "04w9kkr77",
101
+ "twcf________": "00x0z1472",
89
102
  "ukri________": "001aqnf71",
90
103
  "wt__________": "029chgv08",
91
104
  }
@@ -101,6 +114,21 @@ VOCABULARIES_NAMES_SCHEMES = {
101
114
  }
102
115
  """Names allowed identifier schemes."""
103
116
 
117
+ VOCABULARIES_SUBJECTS_SCHEMES = {
118
+ "gnd": {"label": _("GND"), "validator": idutils.is_gnd, "datacite": "GND"},
119
+ "url": {"label": _("URL"), "validator": idutils.is_url},
120
+ }
121
+ """Subjects allowed identifier schemes."""
122
+
123
+ VOCABULARIES_CUSTOM_VOCABULARY_TYPES = [
124
+ "names",
125
+ "affiliations",
126
+ "awards",
127
+ "funders",
128
+ "subjects",
129
+ ]
130
+ """List of custom vocabulary types."""
131
+
104
132
  VOCABULARIES_DATASTREAM_READERS = {
105
133
  "csv": CSVReader,
106
134
  "json": JsonReader,
@@ -110,6 +138,7 @@ VOCABULARIES_DATASTREAM_READERS = {
110
138
  "yaml": YamlReader,
111
139
  "zip": ZipReader,
112
140
  "xml": XMLReader,
141
+ "oai-pmh": OAIPMHReader,
113
142
  }
114
143
  """Data Streams readers."""
115
144
 
@@ -121,5 +150,40 @@ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
121
150
  VOCABULARIES_DATASTREAM_WRITERS = {
122
151
  "service": ServiceWriter,
123
152
  "yaml": YamlWriter,
153
+ "async": AsyncWriter,
124
154
  }
125
155
  """Data Streams writers."""
156
+
157
+ VOCABULARIES_TYPES_SORT_OPTIONS = {
158
+ "name": dict(
159
+ title=_("Name"),
160
+ fields=["id"],
161
+ ),
162
+ "count": dict(
163
+ title=_("Number of entries"),
164
+ fields=["count"],
165
+ ),
166
+ }
167
+ """Definitions of available Vocabulary types sort options. """
168
+
169
+ VOCABULARIES_TYPES_SEARCH = {
170
+ "facets": [],
171
+ "sort": ["name", "count"],
172
+ }
173
+ """Vocabulary type search configuration."""
174
+
175
+ SUBJECTS_EUROSCIVOC_FILE_URL = "https://op.europa.eu/o/opportal-service/euvoc-download-handler?cellarURI=http%3A%2F%2Fpublications.europa.eu%2Fresource%2Fdistribution%2Feuroscivoc%2F20231115-0%2Frdf%2Fskos_ap_eu%2FEuroSciVoc-skos-ap-eu.rdf&fileName=EuroSciVoc-skos-ap-eu.rdf"
176
+ """Subject EuroSciVoc file download link."""
177
+
178
+ VOCABULARIES_ORCID_ACCESS_KEY = "TODO"
179
+ """ORCID access key to access the s3 bucket."""
180
+ VOCABULARIES_ORCID_SECRET_KEY = "TODO"
181
+ """ORCID secret key to access the s3 bucket."""
182
+ VOCABULARIES_ORCID_SUMMARIES_BUCKET = "v3.0-summaries"
183
+ """ORCID summaries bucket name."""
184
+ VOCABULARIES_ORCID_SYNC_MAX_WORKERS = 32
185
+ """ORCID max number of simultaneous workers/connections."""
186
+ VOCABULARIES_ORCID_SYNC_SINCE = {
187
+ "days": 1,
188
+ }
189
+ """ORCID time shift to sync. Parameters accepted are the ones passed to 'datetime.timedelta'."""
@@ -8,11 +8,13 @@
8
8
 
9
9
  """Vocabulary affiliations."""
10
10
 
11
+ from flask_resources import JSONSerializer, ResponseHandler
11
12
  from invenio_db import db
12
13
  from invenio_records.dumpers import SearchDumper
13
14
  from invenio_records.dumpers.indexedat import IndexedAtDumperExt
14
15
  from invenio_records_resources.factories.factory import RecordTypeFactory
15
16
  from invenio_records_resources.records.systemfields import ModelPIDField
17
+ from invenio_records_resources.resources.records.headers import etag_headers
16
18
 
17
19
  from ...services.permissions import PermissionPolicy
18
20
  from .config import AffiliationsSearchOptions, service_components
@@ -32,6 +34,7 @@ record_type = RecordTypeFactory(
32
34
  },
33
35
  schema_version="1.0.0",
34
36
  schema_path="local://affiliations/affiliation-v1.0.0.json",
37
+ index_name="affiliations-affiliation-v2.0.0",
35
38
  record_dumper=SearchDumper(
36
39
  model_fields={"pid": ("id", str)},
37
40
  extensions=[
@@ -46,4 +49,12 @@ record_type = RecordTypeFactory(
46
49
  permission_policy_cls=PermissionPolicy,
47
50
  # Resource layer
48
51
  endpoint_route="/affiliations",
52
+ resource_cls_attrs={
53
+ "response_handlers": {
54
+ "application/json": ResponseHandler(JSONSerializer(), headers=etag_headers),
55
+ "application/vnd.inveniordm.v1+json": ResponseHandler(
56
+ JSONSerializer(), headers=etag_headers
57
+ ),
58
+ }
59
+ },
49
60
  )
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,7 +8,6 @@
8
8
 
9
9
  """Vocabulary affiliations."""
10
10
 
11
-
12
11
  from .affiliations import record_type
13
12
 
14
13
  Affiliation = record_type.record_cls
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -9,6 +9,7 @@
9
9
  """Vocabulary affiliations configuration."""
10
10
 
11
11
  from flask import current_app
12
+ from invenio_i18n import get_locale
12
13
  from invenio_i18n import lazy_gettext as _
13
14
  from invenio_records_resources.services import SearchOptions
14
15
  from invenio_records_resources.services.records.components import DataComponent
@@ -20,13 +21,23 @@ from ...services.components import PIDComponent
20
21
  affiliation_schemes = LocalProxy(
21
22
  lambda: current_app.config["VOCABULARIES_AFFILIATION_SCHEMES"]
22
23
  )
24
+ localized_title = LocalProxy(lambda: f"title.{get_locale()}^20")
23
25
 
24
26
 
25
27
  class AffiliationsSearchOptions(SearchOptions):
26
28
  """Search options."""
27
29
 
28
30
  suggest_parser_cls = SuggestQueryParser.factory(
29
- fields=["name^100", "acronym^20", "id^20"],
31
+ fields=[
32
+ "name^100",
33
+ "acronym.keyword^100",
34
+ "acronym^40",
35
+ localized_title,
36
+ "id^20",
37
+ "aliases^20",
38
+ ],
39
+ type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
40
+ fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
30
41
  )
31
42
 
32
43
  sort_default = "bestmatch"
@@ -0,0 +1,186 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2022-2024 CERN.
4
+ # Copyright (C) 2024 California Institute of Technology.
5
+ #
6
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
7
+ # modify it under the terms of the MIT License; see LICENSE file for more
8
+ # details.
9
+
10
+ """Affiliations datastreams, transformers, writers and readers."""
11
+
12
+ from flask import current_app
13
+
14
+ from ...datastreams.errors import TransformerError, WriterError
15
+ from ...datastreams.transformers import BaseTransformer
16
+ from ...datastreams.writers import ServiceWriter
17
+ from ..common.ror.datastreams import RORTransformer
18
+
19
+
20
+ class AffiliationsServiceWriter(ServiceWriter):
21
+ """Affiliations service writer."""
22
+
23
+ def __init__(self, *args, **kwargs):
24
+ """Constructor."""
25
+ service_or_name = kwargs.pop("service_or_name", "affiliations")
26
+ super().__init__(service_or_name=service_or_name, *args, **kwargs)
27
+
28
+ def _entry_id(self, entry):
29
+ """Get the id from an entry."""
30
+ return entry["id"]
31
+
32
+
33
+ class AffiliationsRORTransformer(RORTransformer):
34
+ """Affiliations ROR Transformer."""
35
+
36
+ def __init__(
37
+ self, *args, vocab_schemes=None, funder_fundref_doi_prefix=None, **kwargs
38
+ ):
39
+ """Constructor."""
40
+ if vocab_schemes is None:
41
+ vocab_schemes = current_app.config.get("VOCABULARIES_AFFILIATION_SCHEMES")
42
+ super().__init__(
43
+ *args,
44
+ vocab_schemes=vocab_schemes,
45
+ funder_fundref_doi_prefix=funder_fundref_doi_prefix,
46
+ **kwargs,
47
+ )
48
+
49
+
50
+ class OpenAIREOrganizationTransformer(BaseTransformer):
51
+ """OpenAIRE Organization Transformer."""
52
+
53
+ def apply(self, stream_entry, **kwargs):
54
+ """Applies the transformation to the stream entry."""
55
+ record = stream_entry.entry
56
+
57
+ if "id" not in record:
58
+ raise TransformerError([f"No id for: {record}"])
59
+
60
+ if not record["id"].startswith("openorgs____::"):
61
+ raise TransformerError([f"Not valid OpenAIRE OpenOrgs id for: {record}"])
62
+
63
+ if "pid" not in record:
64
+ raise TransformerError([f"No alternative identifiers for: {record}"])
65
+
66
+ organization = {}
67
+
68
+ for pid in record["pid"]:
69
+ if pid["scheme"] == "ROR":
70
+ organization["id"] = pid["value"].removeprefix("https://ror.org/")
71
+ elif pid["scheme"] == "PIC":
72
+ organization["identifiers"] = [
73
+ {
74
+ "scheme": "pic",
75
+ "identifier": pid["value"],
76
+ }
77
+ ]
78
+
79
+ stream_entry.entry = organization
80
+ return stream_entry
81
+
82
+
83
+ class OpenAIREAffiliationsServiceWriter(ServiceWriter):
84
+ """OpenAIRE Affiliations service writer."""
85
+
86
+ def __init__(self, *args, **kwargs):
87
+ """Constructor."""
88
+ service_or_name = kwargs.pop("service_or_name", "affiliations")
89
+ # Here we only update and we do not insert, since OpenAIRE data is used to augment existing affiliations
90
+ # (with PIC identifiers) and is not used to create new affiliations.
91
+ super().__init__(
92
+ service_or_name=service_or_name, insert=False, update=True, *args, **kwargs
93
+ )
94
+
95
+ def _entry_id(self, entry):
96
+ """Get the id from an entry."""
97
+ return entry["id"]
98
+
99
+ def write(self, stream_entry, *args, **kwargs):
100
+ """Writes the input entry using a given service."""
101
+ entry = stream_entry.entry
102
+
103
+ return super().write(stream_entry, *args, **kwargs)
104
+
105
+ def write_many(self, stream_entries, *args, **kwargs):
106
+ """Writes the input entries using a given service."""
107
+ return super().write_many(stream_entries, *args, **kwargs)
108
+
109
+
110
+ VOCABULARIES_DATASTREAM_READERS = {}
111
+ """Affiliations datastream readers."""
112
+
113
+ VOCABULARIES_DATASTREAM_WRITERS = {
114
+ "affiliations-service": AffiliationsServiceWriter,
115
+ "openaire-affiliations-service": OpenAIREAffiliationsServiceWriter,
116
+ }
117
+ """Affiliations datastream writers."""
118
+
119
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
120
+ "ror-affiliations": AffiliationsRORTransformer,
121
+ "openaire-organization": OpenAIREOrganizationTransformer,
122
+ }
123
+ """Affiliations datastream transformers."""
124
+
125
+
126
+ DATASTREAM_CONFIG = {
127
+ "readers": [
128
+ {
129
+ "type": "zip",
130
+ "args": {
131
+ "regex": "_schema_v2\\.json$",
132
+ },
133
+ },
134
+ {"type": "json"},
135
+ ],
136
+ "transformers": [
137
+ {
138
+ "type": "ror-affiliations",
139
+ },
140
+ ],
141
+ "writers": [
142
+ {
143
+ "type": "async",
144
+ "args": {
145
+ "writer": {
146
+ "type": "affiliations-service",
147
+ }
148
+ },
149
+ }
150
+ ],
151
+ }
152
+ """Data Stream configuration.
153
+
154
+ An origin is required for the reader.
155
+ """
156
+
157
+ DATASTREAM_CONFIG_OPENAIRE = {
158
+ "readers": [
159
+ {"type": "openaire-http", "args": {"tar_href": "/organization.tar"}},
160
+ {
161
+ "type": "tar",
162
+ "args": {
163
+ "regex": "\\.json.gz$",
164
+ "mode": "r",
165
+ },
166
+ },
167
+ {"type": "gzip"},
168
+ {"type": "jsonl"},
169
+ ],
170
+ "transformers": [
171
+ {
172
+ "type": "openaire-organization",
173
+ },
174
+ ],
175
+ "writers": [
176
+ {
177
+ "type": "async",
178
+ "args": {
179
+ "writer": {
180
+ "type": "openaire-affiliations-service",
181
+ }
182
+ },
183
+ }
184
+ ],
185
+ }
186
+ """Alternative Data Stream configuration for OpenAIRE Affiliations."""
@@ -7,13 +7,50 @@
7
7
  "$schema": {
8
8
  "$ref": "local://definitions-v1.0.0.json#/$schema"
9
9
  },
10
+ "tags": {
11
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
12
+ },
13
+ "country": {
14
+ "type": "string",
15
+ "description": "Represents a affiliation's country as a country code."
16
+ },
17
+ "country_name": {
18
+ "type": "string",
19
+ "description": "Represents a affiliation's country as a full name."
20
+ },
21
+ "location_name": {
22
+ "type": "string",
23
+ "description": "Represents a affiliation's location name (usually a city)."
24
+ },
10
25
  "acronym": {
11
26
  "type": "string"
12
27
  },
13
28
  "identifiers": {
14
29
  "description": "Alternate identifiers for the record.",
15
30
  "type": "array",
16
- "items": {"$ref": "local://definitions-v1.0.0.json#/identifiers_with_scheme"},
31
+ "items": {
32
+ "$ref": "local://definitions-v1.0.0.json#/identifiers_with_scheme"
33
+ },
34
+ "uniqueItems": true
35
+ },
36
+ "status": {
37
+ "type": "string",
38
+ "description": "Status of the affiliation organization."
39
+ },
40
+ "aliases": {
41
+ "description": "Alternate names for the affiliation.",
42
+ "type": "array",
43
+ "items": {
44
+ "type": "string"
45
+ },
46
+ "uniqueItems": true
47
+ },
48
+ "types": {
49
+ "description": "Types of affiliation organization.",
50
+ "type": "array",
51
+ "items": {
52
+ "type": "string"
53
+ },
17
54
  "uniqueItems": true
18
55
  },
19
56
  "name": {
@@ -84,7 +84,28 @@
84
84
  },
85
85
  "title": {
86
86
  "type": "object",
87
- "dynamic": true
87
+ "dynamic": "true"
88
+ },
89
+ "tags": {
90
+ "type": "keyword"
91
+ },
92
+ "country": {
93
+ "type": "text"
94
+ },
95
+ "country_name": {
96
+ "type": "text"
97
+ },
98
+ "location_name": {
99
+ "type": "text"
100
+ },
101
+ "status": {
102
+ "type": "keyword"
103
+ },
104
+ "aliases": {
105
+ "type": "text"
106
+ },
107
+ "types": {
108
+ "type": "keyword"
88
109
  }
89
110
  }
90
111
  }
@@ -0,0 +1,171 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic_templates": [
61
+ {
62
+ "i18n_title": {
63
+ "path_match": "title.*",
64
+ "match_mapping_type": "string",
65
+ "mapping": {
66
+ "type": "text",
67
+ "analyzer": "accent_edge_analyzer",
68
+ "search_analyzer": "accent_analyzer"
69
+ }
70
+ }
71
+ }
72
+ ],
73
+ "dynamic": "strict",
74
+ "properties": {
75
+ "$schema": {
76
+ "type": "keyword",
77
+ "index": "false"
78
+ },
79
+ "created": {
80
+ "type": "date"
81
+ },
82
+ "updated": {
83
+ "type": "date"
84
+ },
85
+ "indexed_at": {
86
+ "type": "date"
87
+ },
88
+ "uuid": {
89
+ "type": "keyword"
90
+ },
91
+ "version_id": {
92
+ "type": "integer"
93
+ },
94
+ "id": {
95
+ "type": "keyword"
96
+ },
97
+ "name_sort": {
98
+ "type": "keyword"
99
+ },
100
+ "name": {
101
+ "type": "text",
102
+ "analyzer": "accent_edge_analyzer",
103
+ "search_analyzer": "accent_analyzer",
104
+ "copy_to": "name_sort"
105
+ },
106
+ "acronym": {
107
+ "type": "text",
108
+ "analyzer": "accent_edge_analyzer",
109
+ "search_analyzer": "accent_analyzer",
110
+ "fields": {
111
+ "keyword": {
112
+ "type": "keyword",
113
+ "normalizer": "accent_normalizer"
114
+ }
115
+ }
116
+ },
117
+ "identifiers": {
118
+ "properties": {
119
+ "identifier": {
120
+ "type": "keyword"
121
+ },
122
+ "scheme": {
123
+ "type": "keyword"
124
+ }
125
+ }
126
+ },
127
+ "pid": {
128
+ "type": "object",
129
+ "properties": {
130
+ "pk": {
131
+ "type": "integer"
132
+ },
133
+ "pid_type": {
134
+ "type": "keyword"
135
+ },
136
+ "obj_type": {
137
+ "type": "keyword"
138
+ },
139
+ "status": {
140
+ "type": "keyword"
141
+ }
142
+ }
143
+ },
144
+ "title": {
145
+ "type": "object",
146
+ "dynamic": "true"
147
+ },
148
+ "tags": {
149
+ "type": "keyword"
150
+ },
151
+ "country": {
152
+ "type": "text"
153
+ },
154
+ "country_name": {
155
+ "type": "text"
156
+ },
157
+ "location_name": {
158
+ "type": "text"
159
+ },
160
+ "status": {
161
+ "type": "keyword"
162
+ },
163
+ "aliases": {
164
+ "type": "text"
165
+ },
166
+ "types": {
167
+ "type": "keyword"
168
+ }
169
+ }
170
+ }
171
+ }
@@ -84,7 +84,28 @@
84
84
  },
85
85
  "title": {
86
86
  "type": "object",
87
- "dynamic": true
87
+ "dynamic": "true"
88
+ },
89
+ "tags": {
90
+ "type": "keyword"
91
+ },
92
+ "country": {
93
+ "type": "text"
94
+ },
95
+ "country_name": {
96
+ "type": "text"
97
+ },
98
+ "location_name": {
99
+ "type": "text"
100
+ },
101
+ "status": {
102
+ "type": "keyword"
103
+ },
104
+ "aliases": {
105
+ "type": "text"
106
+ },
107
+ "types": {
108
+ "type": "keyword"
88
109
  }
89
110
  }
90
111
  }