invenio-vocabularies 2.3.1__py2.py3-none-any.whl → 6.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (165) hide show
  1. invenio_vocabularies/__init__.py +2 -2
  2. invenio_vocabularies/administration/__init__.py +10 -0
  3. invenio_vocabularies/administration/views/__init__.py +10 -0
  4. invenio_vocabularies/administration/views/vocabularies.py +45 -0
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/package.json +1 -7
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +80 -64
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +49 -41
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +5 -7
  9. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +3 -3
  10. invenio_vocabularies/cli.py +31 -44
  11. invenio_vocabularies/config.py +68 -4
  12. invenio_vocabularies/contrib/affiliations/affiliations.py +11 -0
  13. invenio_vocabularies/contrib/affiliations/api.py +1 -2
  14. invenio_vocabularies/contrib/affiliations/config.py +13 -2
  15. invenio_vocabularies/contrib/affiliations/datastreams.py +186 -0
  16. invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json +38 -1
  17. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v1.0.0.json +22 -1
  18. invenio_vocabularies/contrib/affiliations/mappings/os-v1/affiliations/affiliation-v2.0.0.json +171 -0
  19. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v1.0.0.json +22 -1
  20. invenio_vocabularies/contrib/affiliations/mappings/os-v2/affiliations/affiliation-v2.0.0.json +171 -0
  21. invenio_vocabularies/contrib/affiliations/mappings/v7/affiliations/affiliation-v1.0.0.json +22 -1
  22. invenio_vocabularies/contrib/affiliations/schema.py +17 -3
  23. invenio_vocabularies/contrib/affiliations/services.py +1 -2
  24. invenio_vocabularies/contrib/awards/awards.py +17 -5
  25. invenio_vocabularies/contrib/awards/datastreams.py +241 -7
  26. invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +38 -0
  27. invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +51 -2
  28. invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +51 -2
  29. invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +51 -2
  30. invenio_vocabularies/contrib/awards/schema.py +16 -1
  31. invenio_vocabularies/contrib/awards/serializer.py +8 -1
  32. invenio_vocabularies/contrib/awards/services.py +1 -2
  33. invenio_vocabularies/contrib/common/__init__.py +9 -0
  34. invenio_vocabularies/contrib/common/openaire/__init__.py +9 -0
  35. invenio_vocabularies/contrib/common/openaire/datastreams.py +84 -0
  36. invenio_vocabularies/contrib/common/ror/__init__.py +9 -0
  37. invenio_vocabularies/contrib/common/ror/datastreams.py +220 -0
  38. invenio_vocabularies/contrib/funders/config.py +11 -2
  39. invenio_vocabularies/contrib/funders/datastreams.py +40 -62
  40. invenio_vocabularies/contrib/funders/funders.py +3 -1
  41. invenio_vocabularies/contrib/funders/jsonschemas/funders/funder-v1.0.0.json +36 -1
  42. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v1.0.0.json +22 -1
  43. invenio_vocabularies/contrib/funders/mappings/os-v1/funders/funder-v2.0.0.json +156 -0
  44. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v1.0.0.json +22 -1
  45. invenio_vocabularies/contrib/funders/mappings/os-v2/funders/funder-v2.0.0.json +156 -0
  46. invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json +22 -1
  47. invenio_vocabularies/contrib/funders/schema.py +8 -0
  48. invenio_vocabularies/contrib/funders/serializer.py +2 -1
  49. invenio_vocabularies/contrib/names/config.py +5 -3
  50. invenio_vocabularies/contrib/names/datastreams.py +172 -4
  51. invenio_vocabularies/contrib/names/jsonschemas/names/name-v1.0.0.json +3 -0
  52. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v1.0.0.json +3 -0
  53. invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +150 -0
  54. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v1.0.0.json +3 -0
  55. invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +150 -0
  56. invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json +3 -0
  57. invenio_vocabularies/contrib/names/names.py +15 -3
  58. invenio_vocabularies/contrib/names/permissions.py +20 -0
  59. invenio_vocabularies/contrib/names/s3client.py +44 -0
  60. invenio_vocabularies/contrib/names/schema.py +14 -0
  61. invenio_vocabularies/contrib/subjects/config.py +9 -3
  62. invenio_vocabularies/contrib/subjects/datastreams.py +61 -0
  63. invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py +9 -0
  64. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +171 -0
  65. invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +31 -0
  66. invenio_vocabularies/contrib/subjects/mappings/os-v1/subjects/subject-v1.0.0.json +35 -0
  67. invenio_vocabularies/contrib/subjects/mappings/os-v2/subjects/subject-v1.0.0.json +35 -0
  68. invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json +35 -0
  69. invenio_vocabularies/contrib/subjects/mesh/__init__.py +9 -0
  70. invenio_vocabularies/contrib/subjects/mesh/datastreams.py +43 -0
  71. invenio_vocabularies/contrib/subjects/schema.py +47 -5
  72. invenio_vocabularies/contrib/subjects/subjects.py +10 -0
  73. invenio_vocabularies/datastreams/datastreams.py +61 -13
  74. invenio_vocabularies/datastreams/factories.py +1 -2
  75. invenio_vocabularies/datastreams/readers.py +138 -29
  76. invenio_vocabularies/datastreams/tasks.py +37 -0
  77. invenio_vocabularies/datastreams/transformers.py +17 -27
  78. invenio_vocabularies/datastreams/writers.py +116 -14
  79. invenio_vocabularies/datastreams/xml.py +34 -0
  80. invenio_vocabularies/ext.py +59 -5
  81. invenio_vocabularies/factories.py +137 -0
  82. invenio_vocabularies/jobs.py +133 -0
  83. invenio_vocabularies/proxies.py +2 -2
  84. invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +7 -0
  85. invenio_vocabularies/records/jsonschemas/vocabularies/vocabulary-v1.0.0.json +1 -4
  86. invenio_vocabularies/records/mappings/os-v1/vocabularies/vocabulary-v1.0.0.json +3 -3
  87. invenio_vocabularies/records/mappings/os-v2/vocabularies/vocabulary-v1.0.0.json +3 -3
  88. invenio_vocabularies/records/mappings/v7/vocabularies/vocabulary-v1.0.0.json +3 -3
  89. invenio_vocabularies/records/models.py +2 -4
  90. invenio_vocabularies/records/pidprovider.py +1 -2
  91. invenio_vocabularies/records/systemfields/relations.py +2 -2
  92. invenio_vocabularies/resources/__init__.py +9 -1
  93. invenio_vocabularies/resources/config.py +105 -0
  94. invenio_vocabularies/resources/resource.py +31 -41
  95. invenio_vocabularies/resources/schema.py +2 -1
  96. invenio_vocabularies/services/__init__.py +5 -2
  97. invenio_vocabularies/services/config.py +179 -0
  98. invenio_vocabularies/services/custom_fields/__init__.py +6 -2
  99. invenio_vocabularies/services/custom_fields/subject.py +82 -0
  100. invenio_vocabularies/services/custom_fields/vocabulary.py +5 -3
  101. invenio_vocabularies/services/permissions.py +3 -1
  102. invenio_vocabularies/services/results.py +110 -0
  103. invenio_vocabularies/services/schema.py +11 -2
  104. invenio_vocabularies/services/service.py +46 -94
  105. invenio_vocabularies/services/tasks.py +1 -1
  106. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +23 -0
  107. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabularies-list.html +12 -0
  108. invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/vocabulary-details.html +71 -0
  109. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  110. invenio_vocabularies/translations/ar/LC_MESSAGES/messages.mo +0 -0
  111. invenio_vocabularies/translations/bg/LC_MESSAGES/messages.mo +0 -0
  112. invenio_vocabularies/translations/ca/LC_MESSAGES/messages.mo +0 -0
  113. invenio_vocabularies/translations/cs/LC_MESSAGES/messages.mo +0 -0
  114. invenio_vocabularies/translations/da/LC_MESSAGES/messages.mo +0 -0
  115. invenio_vocabularies/translations/de/LC_MESSAGES/messages.mo +0 -0
  116. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  117. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  118. invenio_vocabularies/translations/el/LC_MESSAGES/messages.mo +0 -0
  119. invenio_vocabularies/translations/en/LC_MESSAGES/messages.mo +0 -0
  120. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  121. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  122. invenio_vocabularies/translations/es/LC_MESSAGES/messages.mo +0 -0
  123. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  124. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  125. invenio_vocabularies/translations/et/LC_MESSAGES/messages.mo +0 -0
  126. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  127. invenio_vocabularies/translations/fa/LC_MESSAGES/messages.mo +0 -0
  128. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  129. invenio_vocabularies/translations/fr/LC_MESSAGES/messages.mo +0 -0
  130. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  131. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  132. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  133. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  134. invenio_vocabularies/translations/hr/LC_MESSAGES/messages.mo +0 -0
  135. invenio_vocabularies/translations/hu/LC_MESSAGES/messages.mo +0 -0
  136. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  137. invenio_vocabularies/translations/it/LC_MESSAGES/messages.mo +0 -0
  138. invenio_vocabularies/translations/ja/LC_MESSAGES/messages.mo +0 -0
  139. invenio_vocabularies/translations/ka/LC_MESSAGES/messages.mo +0 -0
  140. invenio_vocabularies/translations/lt/LC_MESSAGES/messages.mo +0 -0
  141. invenio_vocabularies/translations/messages.pot +95 -48
  142. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  143. invenio_vocabularies/translations/no/LC_MESSAGES/messages.mo +0 -0
  144. invenio_vocabularies/translations/pl/LC_MESSAGES/messages.mo +0 -0
  145. invenio_vocabularies/translations/pt/LC_MESSAGES/messages.mo +0 -0
  146. invenio_vocabularies/translations/ro/LC_MESSAGES/messages.mo +0 -0
  147. invenio_vocabularies/translations/ru/LC_MESSAGES/messages.mo +0 -0
  148. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  149. invenio_vocabularies/translations/sk/LC_MESSAGES/messages.mo +0 -0
  150. invenio_vocabularies/translations/sv/LC_MESSAGES/messages.mo +0 -0
  151. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  152. invenio_vocabularies/translations/tr/LC_MESSAGES/messages.mo +0 -0
  153. invenio_vocabularies/translations/uk/LC_MESSAGES/messages.mo +0 -0
  154. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  155. invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo +0 -0
  156. invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo +0 -0
  157. invenio_vocabularies/views.py +12 -26
  158. invenio_vocabularies/webpack.py +3 -3
  159. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/METADATA +150 -6
  160. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/RECORD +165 -132
  161. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/WHEEL +1 -1
  162. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/entry_points.txt +17 -0
  163. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/AUTHORS.rst +0 -0
  164. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/LICENSE +0 -0
  165. {invenio_vocabularies-2.3.1.dist-info → invenio_vocabularies-6.3.1.dist-info}/top_level.txt +0 -0
@@ -28,12 +28,14 @@ class NamesSearchOptions(SearchOptions):
28
28
 
29
29
  suggest_parser_cls = SuggestQueryParser.factory(
30
30
  fields=[
31
- "name^100",
32
- "family_name^100",
33
31
  "given_name^100",
32
+ "name^70",
33
+ "family_name^50",
34
34
  "identifiers.identifier^20",
35
- "affiliations.name^10",
35
+ "affiliations.name^20",
36
36
  ],
37
+ type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
38
+ fuzziness="AUTO",
37
39
  )
38
40
 
39
41
  sort_default = "bestmatch"
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2022 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,15 +8,123 @@
8
8
 
9
9
  """Names datastreams, transformers, writers and readers."""
10
10
 
11
+ import csv
12
+ import io
13
+ import tarfile
14
+ from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from datetime import timedelta
16
+
17
+ import arrow
18
+ import regex as re
19
+ from flask import current_app
11
20
  from invenio_access.permissions import system_identity
12
21
  from invenio_records.dictutils import dict_lookup
13
22
 
23
+ from invenio_vocabularies.contrib.names.s3client import S3OrcidClient
24
+
14
25
  from ...datastreams.errors import TransformerError
15
- from ...datastreams.readers import SimpleHTTPReader
26
+ from ...datastreams.readers import BaseReader, SimpleHTTPReader
16
27
  from ...datastreams.transformers import BaseTransformer
17
28
  from ...datastreams.writers import ServiceWriter
18
29
 
19
30
 
31
+ class OrcidDataSyncReader(BaseReader):
32
+ """ORCiD Data Sync Reader."""
33
+
34
+ def __init__(self, origin=None, mode="r", since=None, *args, **kwargs):
35
+ """Constructor.
36
+
37
+ :param origin: Data source (e.g. filepath).
38
+ Can be none in case of piped readers.
39
+ """
40
+ super().__init__(origin=origin, mode=mode, *args, **kwargs)
41
+ self.s3_client = S3OrcidClient()
42
+ self.since = since
43
+
44
+ def _fetch_orcid_data(self, orcid_to_sync, bucket):
45
+ """Fetches a single ORCiD record from S3."""
46
+ # The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
47
+ suffix = orcid_to_sync[-3:]
48
+ key = f"{suffix}/{orcid_to_sync}.xml"
49
+ try:
50
+ return self.s3_client.read_file(f"s3://{bucket}/{key}")
51
+ except Exception as e:
52
+ # TODO: log
53
+ return None
54
+
55
+ def _process_lambda_file(self, fileobj):
56
+ """Process the ORCiD lambda file and returns a list of ORCiDs to sync.
57
+
58
+ The decoded fileobj looks like the following:
59
+ orcid, path, date_created, last_modified
60
+ 0000-0001-5109-3700, http://orcid.org/0000-0001-5109-3700, 2014-08-02 15:00:00.000,2021-08-02 15:00:00.000
61
+
62
+ Yield ORCiDs to sync until the last sync date is reached.
63
+ """
64
+ date_format = "YYYY-MM-DD HH:mm:ss.SSSSSS"
65
+ date_format_no_millis = "YYYY-MM-DD HH:mm:ss"
66
+ time_shift = current_app.config["VOCABULARIES_ORCID_SYNC_SINCE"]
67
+ if self.since:
68
+ time_shift = self.since
69
+ last_sync = arrow.now() - timedelta(**time_shift)
70
+
71
+ file_content = fileobj.read().decode("utf-8")
72
+
73
+ csv_reader = csv.DictReader(file_content.splitlines())
74
+
75
+ for row in csv_reader: # Skip the header line
76
+ orcid = row["orcid"]
77
+
78
+ # Lambda file is ordered by last modified date
79
+ last_modified_str = row["last_modified"]
80
+ try:
81
+ last_modified_date = arrow.get(last_modified_str, date_format)
82
+ except arrow.parser.ParserError:
83
+ last_modified_date = arrow.get(last_modified_str, date_format_no_millis)
84
+
85
+ if last_modified_date < last_sync:
86
+ break
87
+ yield orcid
88
+
89
+ def _iter(self, orcids):
90
+ """Iterates over the ORCiD records yielding each one."""
91
+ with ThreadPoolExecutor(
92
+ max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
93
+ ) as executor:
94
+ futures = [
95
+ executor.submit(
96
+ self._fetch_orcid_data,
97
+ orcid,
98
+ current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
99
+ )
100
+ for orcid in orcids
101
+ ]
102
+ for future in as_completed(futures):
103
+ result = future.result()
104
+ if result is not None:
105
+ yield result
106
+
107
+ def read(self, item=None, *args, **kwargs):
108
+ """Streams the ORCiD lambda file, process it to get the ORCiDS to sync and yields it's data."""
109
+ # Read the file from S3
110
+ tar_content = self.s3_client.read_file(
111
+ "s3://orcid-lambda-file/last_modified.csv.tar"
112
+ )
113
+
114
+ orcids_to_sync = []
115
+ # Opens tar file and process it
116
+ with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
117
+ # Iterate over each member (file or directory) in the tar file
118
+ for member in tar.getmembers():
119
+ # Extract the file
120
+ extracted_file = tar.extractfile(member)
121
+ if extracted_file:
122
+ # Process the file and get the ORCiDs to sync
123
+ orcids_to_sync.extend(self._process_lambda_file(extracted_file))
124
+
125
+ yield from self._iter(orcids_to_sync)
126
+
127
+
20
128
  class OrcidHTTPReader(SimpleHTTPReader):
21
129
  """ORCiD HTTP Reader."""
22
130
 
@@ -30,9 +138,26 @@ class OrcidHTTPReader(SimpleHTTPReader):
30
138
  super().__init__(origin, *args, **kwargs)
31
139
 
32
140
 
141
+ DEFAULT_NAMES_EXCLUDE_REGEX = r"[\p{P}\p{S}\p{Nd}\p{No}\p{Emoji}--,.()\-']"
142
+ """Regex to filter out names with punctuations, symbols, decimal numbers and emojis."""
143
+
144
+
33
145
  class OrcidTransformer(BaseTransformer):
34
146
  """Transforms an ORCiD record into a names record."""
35
147
 
148
+ def __init__(
149
+ self, *args, names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX, **kwargs
150
+ ) -> None:
151
+ """Constructor."""
152
+ self._names_exclude_regex = names_exclude_regex
153
+ super().__init__()
154
+
155
+ def _is_valid_name(self, name):
156
+ """Check whether the name passes the regex."""
157
+ if not self._names_exclude_regex:
158
+ return True
159
+ return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
160
+
36
161
  def apply(self, stream_entry, **kwargs):
37
162
  """Applies the transformation to the stream entry."""
38
163
  record = stream_entry.entry
@@ -42,6 +167,11 @@ class OrcidTransformer(BaseTransformer):
42
167
  name = person.get("name")
43
168
  if name is None:
44
169
  raise TransformerError(f"Name not found in ORCiD entry.")
170
+ if name.get("family-name") is None:
171
+ raise TransformerError(f"Family name not found in ORCiD entry.")
172
+
173
+ if not self._is_valid_name(name["given-names"] + name["family-name"]):
174
+ raise TransformerError(f"Invalid characters in name.")
45
175
 
46
176
  entry = {
47
177
  "id": orcid_id,
@@ -89,6 +219,7 @@ class NamesServiceWriter(ServiceWriter):
89
219
 
90
220
  VOCABULARIES_DATASTREAM_READERS = {
91
221
  "orcid-http": OrcidHTTPReader,
222
+ "orcid-data-sync": OrcidDataSyncReader,
92
223
  }
93
224
 
94
225
 
@@ -109,10 +240,15 @@ DATASTREAM_CONFIG = {
109
240
  {
110
241
  "type": "tar",
111
242
  "args": {
112
- "regex": ".xml$",
243
+ "regex": "\\.xml$",
244
+ },
245
+ },
246
+ {
247
+ "type": "xml",
248
+ "args": {
249
+ "root_element": "record",
113
250
  },
114
251
  },
115
- {"type": "xml"},
116
252
  ],
117
253
  "transformers": [{"type": "orcid"}],
118
254
  "writers": [
@@ -128,3 +264,35 @@ DATASTREAM_CONFIG = {
128
264
 
129
265
  An origin is required for the reader.
130
266
  """
267
+
268
+ # TODO: Used on the jobs and should be set as a "PRESET" (naming to be defined)
269
+ ORCID_PRESET_DATASTREAM_CONFIG = {
270
+ "readers": [
271
+ {
272
+ "type": "orcid-data-sync",
273
+ },
274
+ {
275
+ "type": "xml",
276
+ "args": {
277
+ "root_element": "record",
278
+ },
279
+ },
280
+ ],
281
+ "transformers": [{"type": "orcid"}],
282
+ "writers": [
283
+ {
284
+ "type": "async",
285
+ "args": {
286
+ "writer": {
287
+ "type": "names-service",
288
+ }
289
+ },
290
+ }
291
+ ],
292
+ "batch_size": 1000,
293
+ "write_many": True,
294
+ }
295
+ """ORCiD Data Stream configuration.
296
+
297
+ An origin is required for the reader.
298
+ """
@@ -8,6 +8,9 @@
8
8
  "$schema": {
9
9
  "$ref": "local://definitions-v1.0.0.json#/$schema"
10
10
  },
11
+ "tags": {
12
+ "$ref": "local://vocabularies/definitions-v1.0.0.json#/tags"
13
+ },
11
14
  "scheme": {
12
15
  "description": "Identifier of the name scheme.",
13
16
  "$ref": "local://definitions-v1.0.0.json#/identifier"
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },
@@ -0,0 +1,150 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "char_filter": {
5
+ "strip_special_chars": {
6
+ "type": "pattern_replace",
7
+ "pattern": "[\\p{Punct}\\p{S}]",
8
+ "replacement": ""
9
+ }
10
+ },
11
+ "analyzer": {
12
+ "accent_edge_analyzer": {
13
+ "tokenizer": "standard",
14
+ "type": "custom",
15
+ "char_filter": ["strip_special_chars"],
16
+ "filter": [
17
+ "lowercasepreserveoriginal",
18
+ "asciifoldingpreserveoriginal",
19
+ "edgegrams"
20
+ ]
21
+ },
22
+ "accent_analyzer": {
23
+ "tokenizer": "standard",
24
+ "type": "custom",
25
+ "char_filter": ["strip_special_chars"],
26
+ "filter": [
27
+ "lowercasepreserveoriginal",
28
+ "asciifoldingpreserveoriginal"
29
+ ]
30
+ }
31
+ },
32
+ "normalizer": {
33
+ "accent_normalizer": {
34
+ "type": "custom",
35
+ "char_filter": ["strip_special_chars"],
36
+ "filter": [
37
+ "lowercase",
38
+ "asciifolding"
39
+ ]
40
+ }
41
+ },
42
+ "filter": {
43
+ "lowercasepreserveoriginal": {
44
+ "type": "lowercase",
45
+ "preserve_original": true
46
+ },
47
+ "asciifoldingpreserveoriginal": {
48
+ "type": "asciifolding",
49
+ "preserve_original": true
50
+ },
51
+ "edgegrams": {
52
+ "type": "edge_ngram",
53
+ "min_gram": 2,
54
+ "max_gram": 20
55
+ }
56
+ }
57
+ }
58
+ },
59
+ "mappings": {
60
+ "dynamic": "strict",
61
+ "properties": {
62
+ "$schema": {
63
+ "type": "keyword",
64
+ "index": "false"
65
+ },
66
+ "created": {
67
+ "type": "date"
68
+ },
69
+ "updated": {
70
+ "type": "date"
71
+ },
72
+ "indexed_at": {
73
+ "type": "date"
74
+ },
75
+ "uuid": {
76
+ "type": "keyword"
77
+ },
78
+ "version_id": {
79
+ "type": "integer"
80
+ },
81
+ "id": {
82
+ "type": "keyword"
83
+ },
84
+ "tags": {
85
+ "type": "keyword"
86
+ },
87
+ "name_sort": {
88
+ "type": "keyword"
89
+ },
90
+ "name": {
91
+ "type": "text",
92
+ "analyzer": "accent_edge_analyzer",
93
+ "search_analyzer": "accent_analyzer",
94
+ "copy_to": "name_sort"
95
+ },
96
+ "given_name": {
97
+ "type": "text",
98
+ "analyzer": "accent_edge_analyzer",
99
+ "search_analyzer": "accent_analyzer"
100
+ },
101
+ "family_name": {
102
+ "type": "text"
103
+ },
104
+ "identifiers": {
105
+ "properties": {
106
+ "identifier": {
107
+ "type": "keyword",
108
+ "normalizer": "accent_normalizer"
109
+ },
110
+ "scheme": {
111
+ "type": "keyword"
112
+ }
113
+ }
114
+ },
115
+ "affiliations": {
116
+ "type": "object",
117
+ "properties": {
118
+ "@v": {
119
+ "type": "keyword"
120
+ },
121
+ "id": {
122
+ "type": "keyword"
123
+ },
124
+ "name": {
125
+ "type": "text",
126
+ "analyzer": "accent_edge_analyzer",
127
+ "search_analyzer": "accent_analyzer"
128
+ }
129
+ }
130
+ },
131
+ "pid": {
132
+ "type": "object",
133
+ "properties": {
134
+ "pk": {
135
+ "type": "integer"
136
+ },
137
+ "pid_type": {
138
+ "type": "keyword"
139
+ },
140
+ "obj_type": {
141
+ "type": "keyword"
142
+ },
143
+ "status": {
144
+ "type": "keyword"
145
+ }
146
+ }
147
+ }
148
+ }
149
+ }
150
+ }
@@ -24,6 +24,9 @@
24
24
  "id": {
25
25
  "type": "keyword"
26
26
  },
27
+ "tags": {
28
+ "type": "keyword"
29
+ },
27
30
  "name_sort": {
28
31
  "type": "keyword"
29
32
  },
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021 CERN.
3
+ # Copyright (C) 2021-2024 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -8,6 +8,7 @@
8
8
 
9
9
  """Vocabulary names."""
10
10
 
11
+ from flask_resources import JSONSerializer, ResponseHandler
11
12
  from invenio_db import db
12
13
  from invenio_records.dumpers import SearchDumper
13
14
  from invenio_records.dumpers.indexedat import IndexedAtDumperExt
@@ -18,8 +19,10 @@ from invenio_records_resources.records.systemfields import (
18
19
  ModelPIDField,
19
20
  PIDListRelation,
20
21
  )
22
+ from invenio_records_resources.resources.records.headers import etag_headers
23
+
24
+ from invenio_vocabularies.contrib.names.permissions import NamesPermissionPolicy
21
25
 
22
- from ...services.permissions import PermissionPolicy
23
26
  from ..affiliations.api import Affiliation
24
27
  from .config import NamesSearchOptions, service_components
25
28
  from .schema import NameSchema
@@ -47,6 +50,7 @@ record_type = RecordTypeFactory(
47
50
  },
48
51
  schema_version="1.0.0",
49
52
  schema_path="local://names/name-v1.0.0.json",
53
+ index_name="names-name-v2.0.0",
50
54
  record_relations=name_relations,
51
55
  record_dumper=SearchDumper(
52
56
  model_fields={"pid": ("id", str)},
@@ -60,7 +64,15 @@ record_type = RecordTypeFactory(
60
64
  service_schema=NameSchema,
61
65
  search_options=NamesSearchOptions,
62
66
  service_components=service_components,
63
- permission_policy_cls=PermissionPolicy,
67
+ permission_policy_cls=NamesPermissionPolicy,
64
68
  # Resource layer
65
69
  endpoint_route="/names",
70
+ resource_cls_attrs={
71
+ "response_handlers": {
72
+ "application/json": ResponseHandler(JSONSerializer(), headers=etag_headers),
73
+ "application/vnd.inveniordm.v1+json": ResponseHandler(
74
+ JSONSerializer(), headers=etag_headers
75
+ ),
76
+ }
77
+ },
66
78
  )
@@ -0,0 +1,20 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2020-2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """Vocabulary permissions."""
10
+
11
+ from invenio_records_permissions.generators import AuthenticatedUser, SystemProcess
12
+
13
+ from ...services.permissions import PermissionPolicy
14
+
15
+
16
+ class NamesPermissionPolicy(PermissionPolicy):
17
+ """Permission policy."""
18
+
19
+ can_search = [SystemProcess(), AuthenticatedUser()]
20
+ can_read = [SystemProcess(), AuthenticatedUser()]