invenio-vocabularies 6.6.0__py2.py3-none-any.whl → 6.8.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +3 -27
- invenio_vocabularies/cli.py +2 -0
- invenio_vocabularies/config.py +43 -1
- invenio_vocabularies/contrib/affiliations/config.py +21 -10
- invenio_vocabularies/contrib/affiliations/datastreams.py +103 -1
- invenio_vocabularies/contrib/awards/datastreams.py +7 -0
- invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json +9 -0
- invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json +22 -1
- invenio_vocabularies/contrib/awards/schema.py +9 -3
- invenio_vocabularies/contrib/funders/config.py +19 -12
- invenio_vocabularies/contrib/names/config.py +13 -10
- invenio_vocabularies/contrib/names/datastreams.py +182 -57
- invenio_vocabularies/contrib/names/mappings/os-v1/names/name-v2.0.0.json +11 -0
- invenio_vocabularies/contrib/names/mappings/os-v2/names/name-v2.0.0.json +11 -0
- invenio_vocabularies/contrib/names/names.py +1 -1
- invenio_vocabularies/contrib/names/schema.py +10 -2
- invenio_vocabularies/contrib/subjects/bodc/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/bodc/datastreams.py +111 -0
- invenio_vocabularies/contrib/subjects/config.py +19 -5
- invenio_vocabularies/contrib/subjects/datastreams.py +4 -2
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +56 -126
- invenio_vocabularies/contrib/subjects/gemet/__init__.py +9 -0
- invenio_vocabularies/contrib/subjects/gemet/datastreams.py +140 -0
- invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json +13 -2
- invenio_vocabularies/contrib/subjects/schema.py +18 -3
- invenio_vocabularies/datastreams/datastreams.py +18 -7
- invenio_vocabularies/datastreams/factories.py +3 -1
- invenio_vocabularies/datastreams/readers.py +99 -9
- invenio_vocabularies/datastreams/transformers.py +67 -0
- invenio_vocabularies/datastreams/writers.py +6 -2
- invenio_vocabularies/factories.py +56 -0
- invenio_vocabularies/fixtures.py +2 -0
- invenio_vocabularies/records/jsonschemas/vocabularies/definitions-v1.0.0.json +9 -0
- invenio_vocabularies/services/config.py +1 -7
- invenio_vocabularies/services/querystr.py +5 -0
- invenio_vocabularies/services/tasks.py +2 -0
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/METADATA +28 -2
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/RECORD +46 -42
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/WHEEL +0 -0
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/entry_points.txt +0 -0
- {invenio_vocabularies-6.6.0.dist-info → invenio_vocabularies-6.8.0.dist-info}/top_level.txt +0 -0
invenio_vocabularies/__init__.py
CHANGED
|
@@ -52,37 +52,13 @@ const CustomFundingSchema = Yup.object().shape({
|
|
|
52
52
|
id: Yup.string().required(i18next.t("Funder is required.")),
|
|
53
53
|
}),
|
|
54
54
|
award: Yup.object().shape({
|
|
55
|
-
title: Yup.string()
|
|
56
|
-
|
|
57
|
-
message: i18next.t("Title must be set alongside number."),
|
|
58
|
-
test: function testTitle(value) {
|
|
59
|
-
const { number } = this.parent;
|
|
60
|
-
|
|
61
|
-
if (number && !value) {
|
|
62
|
-
return false;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
return true;
|
|
66
|
-
},
|
|
67
|
-
}),
|
|
68
|
-
number: Yup.string().test({
|
|
69
|
-
name: "testNumber",
|
|
70
|
-
message: i18next.t("Number must be set alongside title."),
|
|
71
|
-
test: function testNumber(value) {
|
|
72
|
-
const { title } = this.parent;
|
|
73
|
-
|
|
74
|
-
if (title && !value) {
|
|
75
|
-
return false;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
return true;
|
|
79
|
-
},
|
|
80
|
-
}),
|
|
55
|
+
title: Yup.string(),
|
|
56
|
+
number: Yup.string(),
|
|
81
57
|
url: Yup.string()
|
|
82
58
|
.url(i18next.t("URL must be valid."))
|
|
83
59
|
.test({
|
|
84
60
|
name: "validateUrlDependencies",
|
|
85
|
-
message: i18next.t("URL must be set alongside title
|
|
61
|
+
message: i18next.t("URL must be set alongside title or number."),
|
|
86
62
|
test: function testUrl(value) {
|
|
87
63
|
const { title, number } = this.parent;
|
|
88
64
|
|
invenio_vocabularies/cli.py
CHANGED
|
@@ -29,6 +29,8 @@ def _process_vocab(config, num_samples=None):
|
|
|
29
29
|
readers_config=config["readers"],
|
|
30
30
|
transformers_config=config.get("transformers"),
|
|
31
31
|
writers_config=config["writers"],
|
|
32
|
+
batch_size=config.get("batch_size", 1000),
|
|
33
|
+
write_many=config.get("write_many", False),
|
|
32
34
|
)
|
|
33
35
|
|
|
34
36
|
success, errored, filtered = 0, 0, 0
|
invenio_vocabularies/config.py
CHANGED
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
|
|
11
11
|
"""Vocabularies configuration."""
|
|
12
12
|
|
|
13
|
+
import re
|
|
14
|
+
|
|
13
15
|
import idutils
|
|
14
16
|
from invenio_i18n import lazy_gettext as _
|
|
15
17
|
|
|
@@ -19,6 +21,9 @@ from .datastreams.readers import (
|
|
|
19
21
|
JsonLinesReader,
|
|
20
22
|
JsonReader,
|
|
21
23
|
OAIPMHReader,
|
|
24
|
+
RDFReader,
|
|
25
|
+
SimpleHTTPReader,
|
|
26
|
+
SPARQLReader,
|
|
22
27
|
TarReader,
|
|
23
28
|
XMLReader,
|
|
24
29
|
YamlReader,
|
|
@@ -43,6 +48,8 @@ VOCABULARIES_IDENTIFIER_SCHEMES = {
|
|
|
43
48
|
}
|
|
44
49
|
""""Generic identifier schemes, usable by other vocabularies."""
|
|
45
50
|
|
|
51
|
+
edmo_regexp = re.compile(r"^https://edmo\.seadatanet\.org/report/\d+$")
|
|
52
|
+
|
|
46
53
|
|
|
47
54
|
def is_pic(val):
|
|
48
55
|
"""Test if argument is a Participant Identification Code (PIC)."""
|
|
@@ -51,9 +58,15 @@ def is_pic(val):
|
|
|
51
58
|
return val.isdigit()
|
|
52
59
|
|
|
53
60
|
|
|
61
|
+
def is_edmo(val):
|
|
62
|
+
"""Test if argument is a European Directory of Marine Organisations (EDMO) identifier."""
|
|
63
|
+
return edmo_regexp.match(val)
|
|
64
|
+
|
|
65
|
+
|
|
54
66
|
VOCABULARIES_AFFILIATION_SCHEMES = {
|
|
55
67
|
**VOCABULARIES_IDENTIFIER_SCHEMES,
|
|
56
68
|
"pic": {"label": _("PIC"), "validator": is_pic},
|
|
69
|
+
"edmo": {"label": _("EDMO"), "validator": is_edmo},
|
|
57
70
|
}
|
|
58
71
|
"""Affiliations allowed identifier schemes."""
|
|
59
72
|
|
|
@@ -135,6 +148,9 @@ VOCABULARIES_DATASTREAM_READERS = {
|
|
|
135
148
|
"jsonl": JsonLinesReader,
|
|
136
149
|
"gzip": GzipReader,
|
|
137
150
|
"tar": TarReader,
|
|
151
|
+
"http": SimpleHTTPReader,
|
|
152
|
+
"rdf": RDFReader,
|
|
153
|
+
"sparql": SPARQLReader,
|
|
138
154
|
"yaml": YamlReader,
|
|
139
155
|
"zip": ZipReader,
|
|
140
156
|
"xml": XMLReader,
|
|
@@ -172,9 +188,22 @@ VOCABULARIES_TYPES_SEARCH = {
|
|
|
172
188
|
}
|
|
173
189
|
"""Vocabulary type search configuration."""
|
|
174
190
|
|
|
175
|
-
|
|
191
|
+
VOCABULARIES_SUBJECTS_EUROSCIVOC_FILE_URL = "https://publications.europa.eu/resource/distribution/euroscivoc/rdf/skos_ap_eu/EuroSciVoc-skos-ap-eu.rdf"
|
|
176
192
|
"""Subject EuroSciVoc file download link."""
|
|
177
193
|
|
|
194
|
+
VOCABULARIES_SUBJECTS_GEMET_FILE_URL = (
|
|
195
|
+
"https://www.eionet.europa.eu/gemet/latest/gemet.rdf.gz"
|
|
196
|
+
)
|
|
197
|
+
"""Subject GEMET file download link."""
|
|
198
|
+
|
|
199
|
+
VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL = "http://vocab.nerc.ac.uk/collection/P01/current/?_profile=nvs&_mediatype=application/rdf+xml"
|
|
200
|
+
"""Subject BODC-PUV file download link."""
|
|
201
|
+
|
|
202
|
+
VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
|
|
203
|
+
"Cape Verde": "Cabo Verde",
|
|
204
|
+
}
|
|
205
|
+
"""Affiliations EDMO Country name remapping dictionary."""
|
|
206
|
+
|
|
178
207
|
VOCABULARIES_ORCID_ACCESS_KEY = "TODO"
|
|
179
208
|
"""ORCID access key to access the s3 bucket."""
|
|
180
209
|
VOCABULARIES_ORCID_SECRET_KEY = "TODO"
|
|
@@ -187,3 +216,16 @@ VOCABULARIES_ORCID_SYNC_SINCE = {
|
|
|
187
216
|
"days": 1,
|
|
188
217
|
}
|
|
189
218
|
"""ORCID time shift to sync. Parameters accepted are the ones passed to 'datetime.timedelta'."""
|
|
219
|
+
|
|
220
|
+
VOCABULARIES_ORCID_ORG_IDS_MAPPING_PATH = None
|
|
221
|
+
"""Path to the CSV file for mapping ORCiD organization IDs to affiliation IDs.
|
|
222
|
+
|
|
223
|
+
The path can be specified as either an absolute path or a relative path within the
|
|
224
|
+
Flask app instance folder (i.e. ``current_app.instance_path``).
|
|
225
|
+
|
|
226
|
+
The CSV file should have the following columns:
|
|
227
|
+
|
|
228
|
+
- `org_scheme`: The ORCiD organization ID.
|
|
229
|
+
- `org_id`: The ORCiD organization ID.
|
|
230
|
+
- `aff_id`: The affiliation ID to map to.
|
|
231
|
+
"""
|
|
@@ -13,7 +13,9 @@ from invenio_i18n import get_locale
|
|
|
13
13
|
from invenio_i18n import lazy_gettext as _
|
|
14
14
|
from invenio_records_resources.services import SearchOptions
|
|
15
15
|
from invenio_records_resources.services.records.components import DataComponent
|
|
16
|
-
from invenio_records_resources.services.records.
|
|
16
|
+
from invenio_records_resources.services.records.queryparser import (
|
|
17
|
+
CompositeSuggestQueryParser,
|
|
18
|
+
)
|
|
17
19
|
from werkzeug.local import LocalProxy
|
|
18
20
|
|
|
19
21
|
from ...services.components import PIDComponent
|
|
@@ -21,23 +23,32 @@ from ...services.components import PIDComponent
|
|
|
21
23
|
affiliation_schemes = LocalProxy(
|
|
22
24
|
lambda: current_app.config["VOCABULARIES_AFFILIATION_SCHEMES"]
|
|
23
25
|
)
|
|
24
|
-
|
|
26
|
+
affiliation_edmo_country_mappings = LocalProxy(
|
|
27
|
+
lambda: current_app.config["VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING"]
|
|
28
|
+
)
|
|
29
|
+
localized_title = LocalProxy(lambda: f"title.{get_locale()}^2")
|
|
25
30
|
|
|
26
31
|
|
|
27
32
|
class AffiliationsSearchOptions(SearchOptions):
|
|
28
33
|
"""Search options."""
|
|
29
34
|
|
|
30
|
-
suggest_parser_cls =
|
|
35
|
+
suggest_parser_cls = CompositeSuggestQueryParser.factory(
|
|
31
36
|
fields=[
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"acronym^
|
|
37
|
+
# We boost the acronym fields, since they're smaller words and are more
|
|
38
|
+
# likely to be used in a query.
|
|
39
|
+
"acronym.keyword^50",
|
|
40
|
+
"acronym^10",
|
|
41
|
+
"name^10",
|
|
42
|
+
# Aliases can sometimes be shorter, so we boost them a bit.
|
|
43
|
+
"aliases^5",
|
|
35
44
|
localized_title,
|
|
36
|
-
"id^
|
|
37
|
-
|
|
45
|
+
"id^2",
|
|
46
|
+
# Allow to search identifiers directly (e.g. ROR)
|
|
47
|
+
"identifiers.identifier",
|
|
48
|
+
"country",
|
|
49
|
+
"country_name",
|
|
50
|
+
"types",
|
|
38
51
|
],
|
|
39
|
-
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
|
|
40
|
-
fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness
|
|
41
52
|
)
|
|
42
53
|
|
|
43
54
|
sort_default = "bestmatch"
|
|
@@ -11,12 +11,14 @@
|
|
|
11
11
|
|
|
12
12
|
from copy import deepcopy
|
|
13
13
|
|
|
14
|
+
import pycountry
|
|
14
15
|
from flask import current_app
|
|
15
16
|
|
|
16
17
|
from ...datastreams import StreamEntry
|
|
17
|
-
from ...datastreams.errors import TransformerError
|
|
18
|
+
from ...datastreams.errors import TransformerError
|
|
18
19
|
from ...datastreams.transformers import BaseTransformer
|
|
19
20
|
from ...datastreams.writers import ServiceWriter
|
|
21
|
+
from ..affiliations.config import affiliation_edmo_country_mappings
|
|
20
22
|
from ..common.ror.datastreams import RORTransformer
|
|
21
23
|
|
|
22
24
|
|
|
@@ -119,6 +121,66 @@ class OpenAIREAffiliationsServiceWriter(ServiceWriter):
|
|
|
119
121
|
return StreamEntry(self._service.update(self._identity, vocab_id, updated))
|
|
120
122
|
|
|
121
123
|
|
|
124
|
+
class EDMOOrganizationTransformer(BaseTransformer):
|
|
125
|
+
"""Transformer class to convert EDMO RDF data to a dictionary format."""
|
|
126
|
+
|
|
127
|
+
def apply(self, stream_entry, **kwargs):
|
|
128
|
+
"""Applies the transformation to the stream entry."""
|
|
129
|
+
record = stream_entry.entry
|
|
130
|
+
edmo_uri = record["org"]["value"]
|
|
131
|
+
id_ = "edmo:" + edmo_uri.split("/")[-1]
|
|
132
|
+
name = record["name"]["value"]
|
|
133
|
+
alt_name = record.get("altName", {}).get("value")
|
|
134
|
+
country_name = record.get("countryName", {}).get("value")
|
|
135
|
+
locality = record.get("locality", {}).get("value")
|
|
136
|
+
deprecated = record["deprecated"]["value"]
|
|
137
|
+
|
|
138
|
+
# Fix organizations with the old country name "Cape Verde".
|
|
139
|
+
# "Cabo Verde" is the new official name also used by ROR, e.g. https://api.ror.org/v2/organizations/001fphc23
|
|
140
|
+
if country_name in affiliation_edmo_country_mappings:
|
|
141
|
+
country_name = affiliation_edmo_country_mappings[country_name]
|
|
142
|
+
|
|
143
|
+
# Logic to convert a country name to a 2 letters country code.
|
|
144
|
+
country = None
|
|
145
|
+
if country_name:
|
|
146
|
+
country_dict = pycountry.countries.get(name=country_name)
|
|
147
|
+
if country_dict:
|
|
148
|
+
country = country_dict.alpha_2
|
|
149
|
+
else:
|
|
150
|
+
raise TransformerError([f"No alpha_2 country found for: {record}"])
|
|
151
|
+
|
|
152
|
+
# Mandatory fields
|
|
153
|
+
organization = {
|
|
154
|
+
"id": id_,
|
|
155
|
+
"identifiers": [
|
|
156
|
+
{
|
|
157
|
+
"scheme": "edmo",
|
|
158
|
+
"identifier": edmo_uri,
|
|
159
|
+
}
|
|
160
|
+
],
|
|
161
|
+
"name": name,
|
|
162
|
+
"title": {
|
|
163
|
+
"en": name,
|
|
164
|
+
},
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# Optional fields
|
|
168
|
+
if alt_name:
|
|
169
|
+
organization["acronym"] = alt_name
|
|
170
|
+
|
|
171
|
+
if country_name:
|
|
172
|
+
organization["country_name"] = country_name
|
|
173
|
+
|
|
174
|
+
if country:
|
|
175
|
+
organization["country"] = country
|
|
176
|
+
|
|
177
|
+
if locality:
|
|
178
|
+
organization["location_name"] = locality
|
|
179
|
+
|
|
180
|
+
stream_entry.entry = organization
|
|
181
|
+
return stream_entry
|
|
182
|
+
|
|
183
|
+
|
|
122
184
|
VOCABULARIES_DATASTREAM_READERS = {}
|
|
123
185
|
"""Affiliations datastream readers."""
|
|
124
186
|
|
|
@@ -131,6 +193,7 @@ VOCABULARIES_DATASTREAM_WRITERS = {
|
|
|
131
193
|
VOCABULARIES_DATASTREAM_TRANSFORMERS = {
|
|
132
194
|
"ror-affiliations": AffiliationsRORTransformer,
|
|
133
195
|
"openaire-organization": OpenAIREOrganizationTransformer,
|
|
196
|
+
"edmo-organization": EDMOOrganizationTransformer,
|
|
134
197
|
}
|
|
135
198
|
"""Affiliations datastream transformers."""
|
|
136
199
|
|
|
@@ -196,3 +259,42 @@ DATASTREAM_CONFIG_OPENAIRE = {
|
|
|
196
259
|
],
|
|
197
260
|
}
|
|
198
261
|
"""Alternative Data Stream configuration for OpenAIRE Affiliations."""
|
|
262
|
+
|
|
263
|
+
DATASTREAM_CONFIG_EDMO = {
|
|
264
|
+
"readers": [
|
|
265
|
+
{
|
|
266
|
+
"type": "sparql",
|
|
267
|
+
"args": {
|
|
268
|
+
"origin": "https://edmo.seadatanet.org/sparql/sparql",
|
|
269
|
+
"query": """
|
|
270
|
+
SELECT ?org ?name ?altName ?countryName ?locality ?deprecated
|
|
271
|
+
WHERE {
|
|
272
|
+
?org a <http://www.w3.org/ns/org#Organization> .
|
|
273
|
+
?org <http://www.w3.org/ns/org#name> ?name .
|
|
274
|
+
OPTIONAL { ?org <http://www.w3.org/2004/02/skos/core#altName> ?altName } .
|
|
275
|
+
OPTIONAL { ?org <http://www.w3.org/2006/vcard/ns#country-name> ?countryName } .
|
|
276
|
+
OPTIONAL { ?org <http://www.w3.org/2006/vcard/ns#locality> ?locality } .
|
|
277
|
+
OPTIONAL { ?org <http://www.w3.org/2002/07/owl#deprecated> ?deprecated } .
|
|
278
|
+
FILTER (!?deprecated)
|
|
279
|
+
}
|
|
280
|
+
""",
|
|
281
|
+
},
|
|
282
|
+
}
|
|
283
|
+
],
|
|
284
|
+
"transformers": [
|
|
285
|
+
{
|
|
286
|
+
"type": "edmo-organization",
|
|
287
|
+
},
|
|
288
|
+
],
|
|
289
|
+
"writers": [
|
|
290
|
+
{
|
|
291
|
+
"type": "async",
|
|
292
|
+
"args": {
|
|
293
|
+
"writer": {
|
|
294
|
+
"type": "affiliations-service",
|
|
295
|
+
}
|
|
296
|
+
},
|
|
297
|
+
},
|
|
298
|
+
],
|
|
299
|
+
}
|
|
300
|
+
"""Alternative Data Stream configuration for EDMO Affiliations."""
|
|
@@ -117,6 +117,13 @@ class OpenAIREProjectTransformer(BaseTransformer):
|
|
|
117
117
|
if acronym:
|
|
118
118
|
award["acronym"] = acronym
|
|
119
119
|
|
|
120
|
+
if "startDate" in record:
|
|
121
|
+
award["start_date"] = record["startDate"]
|
|
122
|
+
if "endDate" in record:
|
|
123
|
+
award["end_date"] = record["endDate"]
|
|
124
|
+
if "summary" in record:
|
|
125
|
+
award["description"] = {"en": record["summary"]}
|
|
126
|
+
|
|
120
127
|
stream_entry.entry = award
|
|
121
128
|
return stream_entry
|
|
122
129
|
|
|
@@ -43,6 +43,15 @@
|
|
|
43
43
|
"program": {
|
|
44
44
|
"type": "string"
|
|
45
45
|
},
|
|
46
|
+
"start_date": {
|
|
47
|
+
"type": "string"
|
|
48
|
+
},
|
|
49
|
+
"end_date": {
|
|
50
|
+
"type": "string"
|
|
51
|
+
},
|
|
52
|
+
"description": {
|
|
53
|
+
"$ref": "local://vocabularies/definitions-v1.0.0.json#/description"
|
|
54
|
+
},
|
|
46
55
|
"subjects": {
|
|
47
56
|
"description": "Award's subjects.",
|
|
48
57
|
"type": "array",
|
|
@@ -9,6 +9,15 @@
|
|
|
9
9
|
"type": "search_as_you_type"
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"i18n_description": {
|
|
15
|
+
"path_match": "description.*",
|
|
16
|
+
"match_mapping_type": "string",
|
|
17
|
+
"mapping": {
|
|
18
|
+
"type": "text"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
12
21
|
}
|
|
13
22
|
],
|
|
14
23
|
"dynamic": "strict",
|
|
@@ -58,9 +67,21 @@
|
|
|
58
67
|
"acronym": {
|
|
59
68
|
"type": "keyword",
|
|
60
69
|
"fields": {
|
|
61
|
-
"text": {
|
|
70
|
+
"text": {
|
|
71
|
+
"type": "text"
|
|
72
|
+
}
|
|
62
73
|
}
|
|
63
74
|
},
|
|
75
|
+
"start_date": {
|
|
76
|
+
"type": "date"
|
|
77
|
+
},
|
|
78
|
+
"end_date": {
|
|
79
|
+
"type": "date"
|
|
80
|
+
},
|
|
81
|
+
"description": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"dynamic": "true"
|
|
84
|
+
},
|
|
64
85
|
"program": {
|
|
65
86
|
"type": "keyword"
|
|
66
87
|
},
|
|
@@ -9,6 +9,15 @@
|
|
|
9
9
|
"type": "search_as_you_type"
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"i18n_description": {
|
|
15
|
+
"path_match": "description.*",
|
|
16
|
+
"match_mapping_type": "string",
|
|
17
|
+
"mapping": {
|
|
18
|
+
"type": "text"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
12
21
|
}
|
|
13
22
|
],
|
|
14
23
|
"dynamic": "strict",
|
|
@@ -58,9 +67,21 @@
|
|
|
58
67
|
"acronym": {
|
|
59
68
|
"type": "keyword",
|
|
60
69
|
"fields": {
|
|
61
|
-
"text": {
|
|
70
|
+
"text": {
|
|
71
|
+
"type": "text"
|
|
72
|
+
}
|
|
62
73
|
}
|
|
63
74
|
},
|
|
75
|
+
"start_date": {
|
|
76
|
+
"type": "date"
|
|
77
|
+
},
|
|
78
|
+
"end_date": {
|
|
79
|
+
"type": "date"
|
|
80
|
+
},
|
|
81
|
+
"description": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"dynamic": "true"
|
|
84
|
+
},
|
|
64
85
|
"program": {
|
|
65
86
|
"type": "keyword"
|
|
66
87
|
},
|
|
@@ -9,6 +9,15 @@
|
|
|
9
9
|
"type": "search_as_you_type"
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"i18n_description": {
|
|
15
|
+
"path_match": "description.*",
|
|
16
|
+
"match_mapping_type": "string",
|
|
17
|
+
"mapping": {
|
|
18
|
+
"type": "text"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
12
21
|
}
|
|
13
22
|
],
|
|
14
23
|
"dynamic": "strict",
|
|
@@ -58,9 +67,21 @@
|
|
|
58
67
|
"acronym": {
|
|
59
68
|
"type": "keyword",
|
|
60
69
|
"fields": {
|
|
61
|
-
"text": {
|
|
70
|
+
"text": {
|
|
71
|
+
"type": "text"
|
|
72
|
+
}
|
|
62
73
|
}
|
|
63
74
|
},
|
|
75
|
+
"start_date": {
|
|
76
|
+
"type": "date"
|
|
77
|
+
},
|
|
78
|
+
"end_date": {
|
|
79
|
+
"type": "date"
|
|
80
|
+
},
|
|
81
|
+
"description": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"dynamic": "true"
|
|
84
|
+
},
|
|
64
85
|
"program": {
|
|
65
86
|
"type": "keyword"
|
|
66
87
|
},
|
|
@@ -12,7 +12,7 @@ from functools import partial
|
|
|
12
12
|
|
|
13
13
|
from invenio_i18n import lazy_gettext as _
|
|
14
14
|
from marshmallow import Schema, ValidationError, fields, validate, validates_schema
|
|
15
|
-
from marshmallow_utils.fields import IdentifierSet, SanitizedUnicode
|
|
15
|
+
from marshmallow_utils.fields import IdentifierSet, ISODateString, SanitizedUnicode
|
|
16
16
|
from marshmallow_utils.schemas import IdentifierSchema
|
|
17
17
|
|
|
18
18
|
from ...services.schema import (
|
|
@@ -61,6 +61,10 @@ class AwardSchema(BaseVocabularySchema, ModePIDFieldVocabularyMixin):
|
|
|
61
61
|
|
|
62
62
|
organizations = fields.List(fields.Nested(AwardOrganizationRelationSchema))
|
|
63
63
|
|
|
64
|
+
start_date = ISODateString()
|
|
65
|
+
|
|
66
|
+
end_date = ISODateString()
|
|
67
|
+
|
|
64
68
|
id = SanitizedUnicode(
|
|
65
69
|
validate=validate.Length(min=1, error=_("PID cannot be blank."))
|
|
66
70
|
)
|
|
@@ -90,9 +94,11 @@ class AwardRelationSchema(Schema):
|
|
|
90
94
|
id_ = data.get("id")
|
|
91
95
|
number = data.get("number")
|
|
92
96
|
title = data.get("title")
|
|
93
|
-
|
|
97
|
+
|
|
98
|
+
if not id_ and not (number or title):
|
|
94
99
|
raise ValidationError(
|
|
95
|
-
_("An existing id or number
|
|
100
|
+
_("An existing id or either number or title must be present."),
|
|
101
|
+
"award",
|
|
96
102
|
)
|
|
97
103
|
|
|
98
104
|
|
|
@@ -13,7 +13,9 @@ from invenio_i18n import get_locale
|
|
|
13
13
|
from invenio_i18n import lazy_gettext as _
|
|
14
14
|
from invenio_records_resources.services import SearchOptions
|
|
15
15
|
from invenio_records_resources.services.records.components import DataComponent
|
|
16
|
-
from invenio_records_resources.services.records.
|
|
16
|
+
from invenio_records_resources.services.records.queryparser import (
|
|
17
|
+
CompositeSuggestQueryParser,
|
|
18
|
+
)
|
|
17
19
|
from werkzeug.local import LocalProxy
|
|
18
20
|
|
|
19
21
|
from ...services.components import ModelPIDComponent
|
|
@@ -23,24 +25,29 @@ funder_schemes = LocalProxy(lambda: current_app.config["VOCABULARIES_FUNDER_SCHE
|
|
|
23
25
|
funder_fundref_doi_prefix = LocalProxy(
|
|
24
26
|
lambda: current_app.config["VOCABULARIES_FUNDER_DOI_PREFIX"]
|
|
25
27
|
)
|
|
26
|
-
localized_title = LocalProxy(lambda: f"title.{get_locale()}^
|
|
28
|
+
localized_title = LocalProxy(lambda: f"title.{get_locale()}^2")
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
class FundersSearchOptions(SearchOptions):
|
|
30
32
|
"""Search options."""
|
|
31
33
|
|
|
32
|
-
suggest_parser_cls =
|
|
34
|
+
suggest_parser_cls = CompositeSuggestQueryParser.factory(
|
|
33
35
|
fields=[
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"acronym^
|
|
36
|
+
# We boost the acronym fields, since they're smaller words and are more
|
|
37
|
+
# likely to be used in a query.
|
|
38
|
+
"acronym.keyword^50",
|
|
39
|
+
"acronym^10",
|
|
40
|
+
"name^10",
|
|
41
|
+
# Aliases can sometimes be shorter, so we boost them a bit.
|
|
42
|
+
"aliases^5",
|
|
37
43
|
localized_title,
|
|
38
|
-
"id^
|
|
39
|
-
|
|
40
|
-
"identifiers.identifier
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
+
"id^2",
|
|
45
|
+
# Allow to search identifiers directly (e.g. ROR)
|
|
46
|
+
"identifiers.identifier",
|
|
47
|
+
"country",
|
|
48
|
+
"country_name",
|
|
49
|
+
"types",
|
|
50
|
+
]
|
|
44
51
|
)
|
|
45
52
|
|
|
46
53
|
sort_default = "bestmatch"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021 CERN.
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -15,7 +15,9 @@ from invenio_records_resources.services.records.components import (
|
|
|
15
15
|
DataComponent,
|
|
16
16
|
RelationsComponent,
|
|
17
17
|
)
|
|
18
|
-
from invenio_records_resources.services.records.
|
|
18
|
+
from invenio_records_resources.services.records.queryparser import (
|
|
19
|
+
CompositeSuggestQueryParser,
|
|
20
|
+
)
|
|
19
21
|
from werkzeug.local import LocalProxy
|
|
20
22
|
|
|
21
23
|
from ...services.components import PIDComponent
|
|
@@ -26,16 +28,17 @@ names_schemes = LocalProxy(lambda: current_app.config["VOCABULARIES_NAMES_SCHEME
|
|
|
26
28
|
class NamesSearchOptions(SearchOptions):
|
|
27
29
|
"""Search options."""
|
|
28
30
|
|
|
29
|
-
suggest_parser_cls =
|
|
31
|
+
suggest_parser_cls = CompositeSuggestQueryParser.factory(
|
|
30
32
|
fields=[
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
35
|
-
"affiliations.
|
|
33
|
+
"name^5",
|
|
34
|
+
# We boost the affiliation acronym fields, since they're short and more
|
|
35
|
+
# likely to be used in a query.
|
|
36
|
+
"affiliations.acronym.keyword^3",
|
|
37
|
+
"affiliations.acronym",
|
|
38
|
+
"affiliations.name",
|
|
39
|
+
# Allow to search identifiers directly (e.g. ORCID)
|
|
40
|
+
"identifiers.identifier",
|
|
36
41
|
],
|
|
37
|
-
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types
|
|
38
|
-
fuzziness="AUTO",
|
|
39
42
|
)
|
|
40
43
|
|
|
41
44
|
sort_default = "bestmatch"
|