invenio-vocabularies 7.3.0__py2.py3-none-any.whl → 7.5.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +1 -1
- invenio_vocabularies/administration/views/vocabularies.py +7 -9
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +1 -1
- invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +1 -1
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +1 -1
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +1 -1
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/translations.json +28 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package-lock.json +1992 -0
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +0 -4
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +1 -1
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +1 -1
- invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/translations.pot +88 -0
- invenio_vocabularies/config.py +14 -14
- invenio_vocabularies/contrib/common/ror/datastreams.py +7 -1
- invenio_vocabularies/contrib/names/datastreams.py +38 -9
- invenio_vocabularies/contrib/names/s3client.py +9 -3
- invenio_vocabularies/datastreams/datastreams.py +14 -0
- invenio_vocabularies/datastreams/readers.py +12 -2
- invenio_vocabularies/datastreams/writers.py +6 -0
- invenio_vocabularies/jobs.py +11 -11
- invenio_vocabularies/records/models.py +4 -1
- invenio_vocabularies/services/custom_fields/subject.py +4 -4
- invenio_vocabularies/services/tasks.py +7 -1
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/METADATA +21 -6
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/RECORD +62 -67
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/WHEEL +1 -1
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +0 -139
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
- invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +0 -139
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/entry_points.txt +0 -0
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info/licenses}/AUTHORS.rst +0 -0
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info/licenses}/LICENSE +0 -0
- {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
"name": "invenio-vocabularies-ui",
|
|
3
3
|
"config": {
|
|
4
4
|
"languages": [
|
|
5
|
-
"af",
|
|
6
5
|
"ar",
|
|
7
6
|
"bg",
|
|
8
7
|
"ca",
|
|
@@ -13,10 +12,8 @@
|
|
|
13
12
|
"en",
|
|
14
13
|
"es",
|
|
15
14
|
"et",
|
|
16
|
-
"et_EE",
|
|
17
15
|
"fa",
|
|
18
16
|
"fr",
|
|
19
|
-
"gl",
|
|
20
17
|
"hr",
|
|
21
18
|
"hu",
|
|
22
19
|
"it",
|
|
@@ -28,7 +25,6 @@
|
|
|
28
25
|
"pt",
|
|
29
26
|
"ro",
|
|
30
27
|
"ru",
|
|
31
|
-
"rw",
|
|
32
28
|
"sk",
|
|
33
29
|
"sv",
|
|
34
30
|
"tr",
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
msgid ""
|
|
2
|
+
msgstr ""
|
|
3
|
+
"Project-Id-Version: i18next-conv\n"
|
|
4
|
+
"mime-version: 1.0\n"
|
|
5
|
+
"Content-Type: text/plain; charset=utf-8\n"
|
|
6
|
+
"Content-Transfer-Encoding: 8bit\n"
|
|
7
|
+
"Plural-Forms: nplurals=2; plural=(n != 1)\n"
|
|
8
|
+
"POT-Creation-Date: 2025-05-13T09:10:50.615Z\n"
|
|
9
|
+
"PO-Revision-Date: 2025-05-13T09:10:50.616Z\n"
|
|
10
|
+
"Language: en\n"
|
|
11
|
+
|
|
12
|
+
msgid "Search for a funder by name"
|
|
13
|
+
msgstr "Search for a funder by name"
|
|
14
|
+
|
|
15
|
+
msgid "Funder"
|
|
16
|
+
msgstr "Funder"
|
|
17
|
+
|
|
18
|
+
msgid "Search for funder..."
|
|
19
|
+
msgstr "Search for funder..."
|
|
20
|
+
|
|
21
|
+
msgid "Additional information"
|
|
22
|
+
msgstr "Additional information"
|
|
23
|
+
|
|
24
|
+
msgid "optional"
|
|
25
|
+
msgstr "optional"
|
|
26
|
+
|
|
27
|
+
msgid "Number"
|
|
28
|
+
msgstr "Number"
|
|
29
|
+
|
|
30
|
+
msgid "Award/Grant number"
|
|
31
|
+
msgstr "Award/Grant number"
|
|
32
|
+
|
|
33
|
+
msgid "Title"
|
|
34
|
+
msgstr "Title"
|
|
35
|
+
|
|
36
|
+
msgid "Award/Grant Title"
|
|
37
|
+
msgstr "Award/Grant Title"
|
|
38
|
+
|
|
39
|
+
msgid "URL"
|
|
40
|
+
msgstr "URL"
|
|
41
|
+
|
|
42
|
+
msgid "Award/Grant URL"
|
|
43
|
+
msgstr "Award/Grant URL"
|
|
44
|
+
|
|
45
|
+
msgid "Add"
|
|
46
|
+
msgstr "Add"
|
|
47
|
+
|
|
48
|
+
msgid "Add custom"
|
|
49
|
+
msgstr "Add custom"
|
|
50
|
+
|
|
51
|
+
msgid "Edit"
|
|
52
|
+
msgstr "Edit"
|
|
53
|
+
|
|
54
|
+
msgid "Remove"
|
|
55
|
+
msgstr "Remove"
|
|
56
|
+
|
|
57
|
+
msgid "Open external link"
|
|
58
|
+
msgstr "Open external link"
|
|
59
|
+
|
|
60
|
+
msgid "Funder is required."
|
|
61
|
+
msgstr "Funder is required."
|
|
62
|
+
|
|
63
|
+
msgid "URL must be valid."
|
|
64
|
+
msgstr "URL must be valid."
|
|
65
|
+
|
|
66
|
+
msgid "URL must be set alongside title or number."
|
|
67
|
+
msgstr "URL must be set alongside title or number."
|
|
68
|
+
|
|
69
|
+
msgid "Add standard award/grant"
|
|
70
|
+
msgstr "Add standard award/grant"
|
|
71
|
+
|
|
72
|
+
msgid "Add custom funding"
|
|
73
|
+
msgstr "Add custom funding"
|
|
74
|
+
|
|
75
|
+
msgid "Search for awards/grants"
|
|
76
|
+
msgstr "Search for awards/grants"
|
|
77
|
+
|
|
78
|
+
msgid "Cancel"
|
|
79
|
+
msgstr "Cancel"
|
|
80
|
+
|
|
81
|
+
msgid "Change"
|
|
82
|
+
msgstr "Change"
|
|
83
|
+
|
|
84
|
+
msgid "Did not find your award/grant? "
|
|
85
|
+
msgstr "Did not find your award/grant? "
|
|
86
|
+
|
|
87
|
+
msgid "Add a custom award/grant."
|
|
88
|
+
msgstr "Add a custom award/grant."
|
invenio_vocabularies/config.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import re
|
|
14
14
|
|
|
15
|
-
import
|
|
15
|
+
from idutils import is_doi, is_gnd, is_isni, is_orcid, is_ror, is_url
|
|
16
16
|
from invenio_i18n import lazy_gettext as _
|
|
17
17
|
|
|
18
18
|
from .datastreams.readers import (
|
|
@@ -42,9 +42,9 @@ VOCABULARIES_SERVICE_CONFIG = VocabulariesServiceConfig
|
|
|
42
42
|
|
|
43
43
|
VOCABULARIES_IDENTIFIER_SCHEMES = {
|
|
44
44
|
"grid": {"label": _("GRID"), "validator": lambda x: True},
|
|
45
|
-
"gnd": {"label": _("GND"), "validator":
|
|
46
|
-
"isni": {"label": _("ISNI"), "validator":
|
|
47
|
-
"ror": {"label": _("ROR"), "validator":
|
|
45
|
+
"gnd": {"label": _("GND"), "validator": is_gnd},
|
|
46
|
+
"isni": {"label": _("ISNI"), "validator": is_isni},
|
|
47
|
+
"ror": {"label": _("ROR"), "validator": is_ror},
|
|
48
48
|
}
|
|
49
49
|
""""Generic identifier schemes, usable by other vocabularies."""
|
|
50
50
|
|
|
@@ -72,7 +72,7 @@ VOCABULARIES_AFFILIATION_SCHEMES = {
|
|
|
72
72
|
|
|
73
73
|
VOCABULARIES_FUNDER_SCHEMES = {
|
|
74
74
|
**VOCABULARIES_IDENTIFIER_SCHEMES,
|
|
75
|
-
"doi": {"label": _("DOI"), "validator":
|
|
75
|
+
"doi": {"label": _("DOI"), "validator": is_doi},
|
|
76
76
|
}
|
|
77
77
|
"""Funders allowed identifier schemes."""
|
|
78
78
|
|
|
@@ -80,8 +80,8 @@ VOCABULARIES_FUNDER_DOI_PREFIX = "10.13039"
|
|
|
80
80
|
"""DOI prefix for the identifier formed with the FundRef id."""
|
|
81
81
|
|
|
82
82
|
VOCABULARIES_AWARD_SCHEMES = {
|
|
83
|
-
"url": {"label": _("URL"), "validator":
|
|
84
|
-
"doi": {"label": _("DOI"), "validator":
|
|
83
|
+
"url": {"label": _("URL"), "validator": is_url},
|
|
84
|
+
"doi": {"label": _("DOI"), "validator": is_doi},
|
|
85
85
|
}
|
|
86
86
|
"""Awards allowed identifier schemes."""
|
|
87
87
|
|
|
@@ -121,15 +121,15 @@ VOCABULARIES_AWARDS_EC_ROR_ID = "00k4n6c32"
|
|
|
121
121
|
"""ROR ID for EC funder."""
|
|
122
122
|
|
|
123
123
|
VOCABULARIES_NAMES_SCHEMES = {
|
|
124
|
-
"orcid": {"label": _("ORCID"), "validator":
|
|
125
|
-
"isni": {"label": _("ISNI"), "validator":
|
|
126
|
-
"gnd": {"label": _("GND"), "validator":
|
|
124
|
+
"orcid": {"label": _("ORCID"), "validator": is_orcid, "datacite": "ORCID"},
|
|
125
|
+
"isni": {"label": _("ISNI"), "validator": is_isni, "datacite": "ISNI"},
|
|
126
|
+
"gnd": {"label": _("GND"), "validator": is_gnd, "datacite": "GND"},
|
|
127
127
|
}
|
|
128
128
|
"""Names allowed identifier schemes."""
|
|
129
129
|
|
|
130
130
|
VOCABULARIES_SUBJECTS_SCHEMES = {
|
|
131
|
-
"gnd": {"label": _("GND"), "validator":
|
|
132
|
-
"url": {"label": _("URL"), "validator":
|
|
131
|
+
"gnd": {"label": _("GND"), "validator": is_gnd, "datacite": "GND"},
|
|
132
|
+
"url": {"label": _("URL"), "validator": is_url},
|
|
133
133
|
}
|
|
134
134
|
"""Subjects allowed identifier schemes."""
|
|
135
135
|
|
|
@@ -204,9 +204,9 @@ VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
|
|
|
204
204
|
}
|
|
205
205
|
"""Affiliations EDMO Country name remapping dictionary."""
|
|
206
206
|
|
|
207
|
-
VOCABULARIES_ORCID_ACCESS_KEY = "
|
|
207
|
+
VOCABULARIES_ORCID_ACCESS_KEY = "CHANGEME"
|
|
208
208
|
"""ORCID access key to access the s3 bucket."""
|
|
209
|
-
VOCABULARIES_ORCID_SECRET_KEY = "
|
|
209
|
+
VOCABULARIES_ORCID_SECRET_KEY = "CHANGEME"
|
|
210
210
|
"""ORCID secret key to access the s3 bucket."""
|
|
211
211
|
VOCABULARIES_ORCID_SUMMARIES_BUCKET = "v3.0-summaries"
|
|
212
212
|
"""ORCID summaries bucket name."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2024 CERN.
|
|
3
|
+
# Copyright (C) 2024-2025 CERN.
|
|
4
4
|
# Copyright (C) 2024 California Institute of Technology.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
@@ -13,6 +13,7 @@ import io
|
|
|
13
13
|
|
|
14
14
|
import arrow
|
|
15
15
|
import requests
|
|
16
|
+
from flask import current_app
|
|
16
17
|
from idutils import normalize_ror
|
|
17
18
|
|
|
18
19
|
from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
|
|
@@ -93,6 +94,9 @@ class RORHTTPReader(BaseReader):
|
|
|
93
94
|
if self._since:
|
|
94
95
|
last_dump_date = self._get_last_dump_date(linksets)
|
|
95
96
|
if last_dump_date < arrow.get(self._since):
|
|
97
|
+
current_app.logger.info(
|
|
98
|
+
f"Skipping ROR data dump (last dump: {last_dump_date}, since: {self._since})"
|
|
99
|
+
)
|
|
96
100
|
return
|
|
97
101
|
|
|
98
102
|
for linkset in linksets:
|
|
@@ -104,6 +108,8 @@ class RORHTTPReader(BaseReader):
|
|
|
104
108
|
if len(zip_files) > 1:
|
|
105
109
|
raise ReaderError(f"Expected 1 ZIP item but got {len(zip_files)}")
|
|
106
110
|
|
|
111
|
+
current_app.logger.info(f"Reading ROR data dump (URL: {file_url})")
|
|
112
|
+
|
|
107
113
|
# Download the ZIP file and fully load the response bytes content in memory.
|
|
108
114
|
# The bytes content are then wrapped by a BytesIO to be
|
|
109
115
|
# file-like object (as required by `zipfile.ZipFile`).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2025 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -12,6 +12,7 @@ import csv
|
|
|
12
12
|
import io
|
|
13
13
|
import tarfile
|
|
14
14
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from contextvars import copy_context
|
|
15
16
|
from datetime import timedelta
|
|
16
17
|
from itertools import islice
|
|
17
18
|
from pathlib import Path
|
|
@@ -43,17 +44,18 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
43
44
|
self.s3_client = S3OrcidClient()
|
|
44
45
|
self.since = since
|
|
45
46
|
|
|
46
|
-
def _fetch_orcid_data(self, orcid_to_sync, bucket):
|
|
47
|
+
def _fetch_orcid_data(self, app, orcid_to_sync, bucket):
|
|
47
48
|
"""Fetches a single ORCiD record from S3."""
|
|
48
49
|
# The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
|
|
49
50
|
suffix = orcid_to_sync[-3:]
|
|
50
51
|
key = f"{suffix}/{orcid_to_sync}.xml"
|
|
52
|
+
app.logger.debug(f"Fetching ORCiD record: {key} from bucket: {bucket}")
|
|
51
53
|
try:
|
|
52
54
|
# Potential improvement: use the a XML jax parser to avoid loading the whole file in memory
|
|
53
55
|
# and choose the sections we need to read (probably the summary)
|
|
54
56
|
return self.s3_client.read_file(f"s3://{bucket}/{key}")
|
|
55
57
|
except Exception:
|
|
56
|
-
|
|
58
|
+
app.logger.exception(f"Failed to fetch ORCiD record: {key}")
|
|
57
59
|
|
|
58
60
|
def _process_lambda_file(self, fileobj):
|
|
59
61
|
"""Process the ORCiD lambda file and returns a list of ORCiDs to sync.
|
|
@@ -87,7 +89,11 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
87
89
|
)
|
|
88
90
|
|
|
89
91
|
if last_modified_date < last_sync:
|
|
92
|
+
current_app.logger.debug(
|
|
93
|
+
f"Skipping ORCiD {orcid} (last modified: {last_modified_date})"
|
|
94
|
+
)
|
|
90
95
|
break
|
|
96
|
+
current_app.logger.debug(f"Yielding ORCiD {orcid} for sync.")
|
|
91
97
|
yield orcid
|
|
92
98
|
finally:
|
|
93
99
|
fileobj.close()
|
|
@@ -97,10 +103,15 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
97
103
|
with ThreadPoolExecutor(
|
|
98
104
|
max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
|
|
99
105
|
) as executor:
|
|
106
|
+
app = current_app._get_current_object()
|
|
100
107
|
# futures is a dictionary where the key is the ORCID value and the item is the Future object
|
|
108
|
+
# Flask does not propagate app/request context to new threads, so `copy_context().run`
|
|
109
|
+
# ensures the current instantianted contextvars (such as job_context) is preserved in each thread.
|
|
101
110
|
futures = {
|
|
102
111
|
orcid: executor.submit(
|
|
112
|
+
copy_context().run, # Required to pass the context to the thread
|
|
103
113
|
self._fetch_orcid_data,
|
|
114
|
+
app, # Pass the Flask app to the thread
|
|
104
115
|
orcid,
|
|
105
116
|
current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
|
|
106
117
|
)
|
|
@@ -111,7 +122,14 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
111
122
|
try:
|
|
112
123
|
result = futures[orcid].result()
|
|
113
124
|
if result:
|
|
125
|
+
current_app.logger.debug(
|
|
126
|
+
f"Successfully fetched ORCiD record: {orcid}"
|
|
127
|
+
)
|
|
114
128
|
yield result
|
|
129
|
+
except Exception:
|
|
130
|
+
current_app.logger.exception(
|
|
131
|
+
f"Error processing ORCiD record: {orcid}"
|
|
132
|
+
)
|
|
115
133
|
finally:
|
|
116
134
|
# Explicitly release memory, as we don't need the future anymore.
|
|
117
135
|
# This is mostly required because as long as we keep a reference to the future
|
|
@@ -125,7 +143,7 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
125
143
|
tar_content = self.s3_client.read_file(
|
|
126
144
|
"s3://orcid-lambda-file/last_modified.csv.tar"
|
|
127
145
|
)
|
|
128
|
-
|
|
146
|
+
current_app.logger.info("Fetching ORCiD lambda file")
|
|
129
147
|
# Opens tar file and process it
|
|
130
148
|
with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
|
|
131
149
|
# Iterate over each member (file or directory) in the tar file
|
|
@@ -133,7 +151,7 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
133
151
|
# Extract the file
|
|
134
152
|
extracted_file = tar.extractfile(member)
|
|
135
153
|
if extracted_file:
|
|
136
|
-
current_app.logger.info(f"
|
|
154
|
+
current_app.logger.info(f"Processing lambda file: {member.name}")
|
|
137
155
|
# Process the file and get the ORCiDs to sync
|
|
138
156
|
orcids_to_sync = set(self._process_lambda_file(extracted_file))
|
|
139
157
|
|
|
@@ -150,6 +168,7 @@ class OrcidDataSyncReader(BaseReader):
|
|
|
150
168
|
"""Yield successive chunks of a given size."""
|
|
151
169
|
it = iter(iterable)
|
|
152
170
|
while chunk := list(islice(it, batch_size)):
|
|
171
|
+
current_app.logger.debug(f"Processing batch of size {len(chunk)}.")
|
|
153
172
|
yield chunk
|
|
154
173
|
|
|
155
174
|
|
|
@@ -239,18 +258,25 @@ class OrcidTransformer(BaseTransformer):
|
|
|
239
258
|
|
|
240
259
|
def apply(self, stream_entry, **kwargs):
|
|
241
260
|
"""Applies the transformation to the stream entry."""
|
|
261
|
+
current_app.logger.debug("Applying transformation to stream entry.")
|
|
242
262
|
record = stream_entry.entry
|
|
243
263
|
person = record["person"]
|
|
244
264
|
orcid_id = record["orcid-identifier"]["path"]
|
|
245
265
|
|
|
246
266
|
name = person.get("name")
|
|
247
267
|
if name is None:
|
|
248
|
-
raise TransformerError(
|
|
268
|
+
raise TransformerError(
|
|
269
|
+
f"Name not found in ORCiD entry for ORCiD ID: {orcid_id}."
|
|
270
|
+
)
|
|
249
271
|
if name.get("family-name") is None:
|
|
250
|
-
raise TransformerError(
|
|
272
|
+
raise TransformerError(
|
|
273
|
+
f"Family name not found in ORCiD entry for ORCiD ID: {orcid_id}."
|
|
274
|
+
)
|
|
251
275
|
|
|
252
276
|
if not self._is_valid_name(name["given-names"] + name["family-name"]):
|
|
253
|
-
raise TransformerError(
|
|
277
|
+
raise TransformerError(
|
|
278
|
+
f"Invalid characters in name for ORCiD ID: {orcid_id}."
|
|
279
|
+
)
|
|
254
280
|
|
|
255
281
|
entry = {
|
|
256
282
|
"id": orcid_id,
|
|
@@ -261,6 +287,7 @@ class OrcidTransformer(BaseTransformer):
|
|
|
261
287
|
}
|
|
262
288
|
|
|
263
289
|
stream_entry.entry = entry
|
|
290
|
+
current_app.logger.debug(f"Transformed entry: {entry}")
|
|
264
291
|
return stream_entry
|
|
265
292
|
|
|
266
293
|
def _is_valid_name(self, name):
|
|
@@ -271,6 +298,7 @@ class OrcidTransformer(BaseTransformer):
|
|
|
271
298
|
|
|
272
299
|
def _extract_affiliations(self, record):
|
|
273
300
|
"""Extract affiliations from the ORCiD record."""
|
|
301
|
+
current_app.logger.debug("Extracting affiliations from ORCiD record.")
|
|
274
302
|
result = []
|
|
275
303
|
try:
|
|
276
304
|
employments = (
|
|
@@ -312,7 +340,7 @@ class OrcidTransformer(BaseTransformer):
|
|
|
312
340
|
|
|
313
341
|
result.append(aff)
|
|
314
342
|
except Exception:
|
|
315
|
-
|
|
343
|
+
current_app.logger.error("Error extracting affiliations.")
|
|
316
344
|
return result
|
|
317
345
|
|
|
318
346
|
def _extract_affiliation_id(self, org):
|
|
@@ -410,6 +438,7 @@ ORCID_PRESET_DATASTREAM_CONFIG = {
|
|
|
410
438
|
"args": {
|
|
411
439
|
"writer": {
|
|
412
440
|
"type": "names-service",
|
|
441
|
+
"args": {"update": True},
|
|
413
442
|
}
|
|
414
443
|
},
|
|
415
444
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
3
|
# This file is part of Invenio.
|
|
4
|
-
# Copyright (C) 2024 CERN.
|
|
4
|
+
# Copyright (C) 2024-2025 CERN.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
7
7
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -38,7 +38,13 @@ class S3OrcidClient(S3Client):
|
|
|
38
38
|
|
|
39
39
|
def __init__(self):
|
|
40
40
|
"""Constructor."""
|
|
41
|
+
access_key = current_app.config["VOCABULARIES_ORCID_ACCESS_KEY"]
|
|
42
|
+
secret_key = current_app.config["VOCABULARIES_ORCID_SECRET_KEY"]
|
|
43
|
+
if access_key == "CHANGEME" or secret_key == "CHANGEME":
|
|
44
|
+
raise Exception(
|
|
45
|
+
"VOCABULARIES_ORCID_ACCESS_KEY and VOCABULARIES_ORCID_SECRET_KEY are not configured."
|
|
46
|
+
)
|
|
41
47
|
super().__init__(
|
|
42
|
-
access_key=
|
|
43
|
-
secret_key=
|
|
48
|
+
access_key=access_key,
|
|
49
|
+
secret_key=secret_key,
|
|
44
50
|
)
|
|
@@ -72,13 +72,18 @@ class DataStream:
|
|
|
72
72
|
|
|
73
73
|
def filter(self, stream_entry, *args, **kwargs):
|
|
74
74
|
"""Checks if an stream_entry should be filtered out (skipped)."""
|
|
75
|
+
current_app.logger.debug(f"Filtering entry: {stream_entry.entry}")
|
|
75
76
|
return False
|
|
76
77
|
|
|
77
78
|
def process_batch(self, batch):
|
|
78
79
|
"""Process a batch of entries."""
|
|
80
|
+
current_app.logger.info(f"Processing batch of size: {len(batch)}")
|
|
79
81
|
transformed_entries = []
|
|
80
82
|
for stream_entry in batch:
|
|
81
83
|
if stream_entry.errors:
|
|
84
|
+
current_app.logger.warning(
|
|
85
|
+
f"Skipping entry with errors: {stream_entry.errors}"
|
|
86
|
+
)
|
|
82
87
|
yield stream_entry # reading errors
|
|
83
88
|
else:
|
|
84
89
|
transformed_entry = self.transform(stream_entry)
|
|
@@ -103,19 +108,23 @@ class DataStream:
|
|
|
103
108
|
the reader, apply the transformations and yield the result of
|
|
104
109
|
writing it.
|
|
105
110
|
"""
|
|
111
|
+
current_app.logger.info("Starting data stream processing")
|
|
106
112
|
batch = []
|
|
107
113
|
for stream_entry in self.read():
|
|
108
114
|
batch.append(stream_entry)
|
|
109
115
|
if len(batch) >= self.batch_size:
|
|
116
|
+
current_app.logger.debug(f"Processing batch of size: {len(batch)}")
|
|
110
117
|
yield from self.process_batch(batch)
|
|
111
118
|
batch = []
|
|
112
119
|
|
|
113
120
|
# Process any remaining entries in the last batch
|
|
114
121
|
if batch:
|
|
122
|
+
current_app.logger.debug(f"Processing final batch of size: {len(batch)}")
|
|
115
123
|
yield from self.process_batch(batch)
|
|
116
124
|
|
|
117
125
|
def read(self):
|
|
118
126
|
"""Recursively read the entries."""
|
|
127
|
+
current_app.logger.debug("Reading entries from readers")
|
|
119
128
|
|
|
120
129
|
def pipe_gen(gen_funcs, piped_item=None):
|
|
121
130
|
_gen_funcs = list(gen_funcs) # copy to avoid modifying ref list
|
|
@@ -130,6 +139,7 @@ class DataStream:
|
|
|
130
139
|
else:
|
|
131
140
|
yield StreamEntry(item)
|
|
132
141
|
except ReaderError as err:
|
|
142
|
+
current_app.logger.error(f"Reader error: {str(err)}")
|
|
133
143
|
yield StreamEntry(
|
|
134
144
|
entry=item,
|
|
135
145
|
errors=[f"{current_gen_func.__qualname__}: {str(err)}"],
|
|
@@ -140,6 +150,7 @@ class DataStream:
|
|
|
140
150
|
|
|
141
151
|
def transform(self, stream_entry, *args, **kwargs):
|
|
142
152
|
"""Apply the transformations to an stream_entry."""
|
|
153
|
+
current_app.logger.debug(f"Transforming entry: {stream_entry.entry}")
|
|
143
154
|
for transformer in self._transformers:
|
|
144
155
|
try:
|
|
145
156
|
stream_entry = transformer.apply(stream_entry)
|
|
@@ -153,16 +164,19 @@ class DataStream:
|
|
|
153
164
|
|
|
154
165
|
def write(self, stream_entry, *args, **kwargs):
|
|
155
166
|
"""Apply the transformations to an stream_entry."""
|
|
167
|
+
current_app.logger.debug(f"Writing entry: {stream_entry.entry}")
|
|
156
168
|
for writer in self._writers:
|
|
157
169
|
try:
|
|
158
170
|
writer.write(stream_entry)
|
|
159
171
|
except WriterError as err:
|
|
172
|
+
current_app.logger.error(f"Writer error: {str(err)}")
|
|
160
173
|
stream_entry.errors.append(f"{writer.__class__.__name__}: {str(err)}")
|
|
161
174
|
|
|
162
175
|
return stream_entry
|
|
163
176
|
|
|
164
177
|
def batch_write(self, stream_entries, *args, **kwargs):
|
|
165
178
|
"""Apply the transformations to an stream_entry. Errors are handler in the service layer."""
|
|
179
|
+
current_app.logger.debug(f"Batch writing entries: {len(stream_entries)}")
|
|
166
180
|
for writer in self._writers:
|
|
167
181
|
yield from writer.write_many(stream_entries)
|
|
168
182
|
|
|
@@ -407,15 +407,18 @@ class RDFReader(BaseReader):
|
|
|
407
407
|
class SPARQLReader(BaseReader):
|
|
408
408
|
"""Generic reader class to fetch and process RDF data from a SPARQL endpoint."""
|
|
409
409
|
|
|
410
|
-
def __init__(self, origin, query, mode="r", *args, **kwargs):
|
|
410
|
+
def __init__(self, origin, query, mode="r", client_params=None, *args, **kwargs):
|
|
411
411
|
"""Initialize the reader with the data source.
|
|
412
412
|
|
|
413
413
|
:param origin: The SPARQL endpoint from which to fetch the RDF data.
|
|
414
414
|
:param query: The SPARQL query to execute.
|
|
415
415
|
:param mode: Mode of operation (default is 'r' for reading).
|
|
416
|
+
:param client_params: Additional client parameters to pass to the SPARQL client.
|
|
416
417
|
"""
|
|
417
418
|
self._origin = origin
|
|
418
419
|
self._query = query
|
|
420
|
+
self._client_params = client_params or {}
|
|
421
|
+
|
|
419
422
|
super().__init__(origin=origin, mode=mode, *args, **kwargs)
|
|
420
423
|
|
|
421
424
|
def _iter(self, fp, *args, **kwargs):
|
|
@@ -430,7 +433,14 @@ class SPARQLReader(BaseReader):
|
|
|
430
433
|
"SPARQLReader does not support being chained after another reader"
|
|
431
434
|
)
|
|
432
435
|
|
|
433
|
-
|
|
436
|
+
# Avoid overwriting SPARQLWrapper's default value for the user agent string
|
|
437
|
+
if self._client_params.get("user_agent"):
|
|
438
|
+
sparql_client = sparql.SPARQLWrapper(
|
|
439
|
+
self._origin, agent=self._client_params.get("user_agent")
|
|
440
|
+
)
|
|
441
|
+
else:
|
|
442
|
+
sparql_client = sparql.SPARQLWrapper(self._origin)
|
|
443
|
+
|
|
434
444
|
sparql_client.setQuery(self._query)
|
|
435
445
|
sparql_client.setReturnFormat(sparql.JSON)
|
|
436
446
|
|
|
@@ -87,17 +87,21 @@ class ServiceWriter(BaseWriter):
|
|
|
87
87
|
|
|
88
88
|
def _do_update(self, entry):
|
|
89
89
|
vocab_id = self._entry_id(entry)
|
|
90
|
+
current_app.logger.debug(f"Resolving entry with ID: {vocab_id}")
|
|
90
91
|
current = self._resolve(vocab_id)
|
|
91
92
|
updated = dict(current.to_dict(), **entry)
|
|
93
|
+
current_app.logger.debug(f"Updating entry with ID: {vocab_id}")
|
|
92
94
|
return StreamEntry(self._service.update(self._identity, vocab_id, updated))
|
|
93
95
|
|
|
94
96
|
def write(self, stream_entry, *args, **kwargs):
|
|
95
97
|
"""Writes the input entry using a given service."""
|
|
96
98
|
entry = stream_entry.entry
|
|
99
|
+
current_app.logger.debug(f"Writing entry: {entry}")
|
|
97
100
|
|
|
98
101
|
try:
|
|
99
102
|
if self._insert:
|
|
100
103
|
try:
|
|
104
|
+
current_app.logger.debug("Inserting entry.")
|
|
101
105
|
return StreamEntry(self._service.create(self._identity, entry))
|
|
102
106
|
except PIDAlreadyExists:
|
|
103
107
|
if not self._update:
|
|
@@ -105,6 +109,7 @@ class ServiceWriter(BaseWriter):
|
|
|
105
109
|
return self._do_update(entry)
|
|
106
110
|
elif self._update:
|
|
107
111
|
try:
|
|
112
|
+
current_app.logger.debug("Attempting to update entry.")
|
|
108
113
|
return self._do_update(entry)
|
|
109
114
|
except (NoResultFound, PIDDoesNotExistError):
|
|
110
115
|
raise WriterError([f"Vocabulary entry does not exist: {entry}"])
|
|
@@ -139,6 +144,7 @@ class ServiceWriter(BaseWriter):
|
|
|
139
144
|
processed_stream_entry.log_errors()
|
|
140
145
|
stream_entries_processed.append(processed_stream_entry)
|
|
141
146
|
|
|
147
|
+
current_app.logger.debug(f"Finished writing {len(stream_entries)} entries")
|
|
142
148
|
return stream_entries_processed
|
|
143
149
|
|
|
144
150
|
|
invenio_vocabularies/jobs.py
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import datetime
|
|
12
12
|
|
|
13
|
-
from invenio_i18n import
|
|
13
|
+
from invenio_i18n import lazy_gettext as _
|
|
14
14
|
from invenio_jobs.jobs import JobType
|
|
15
15
|
|
|
16
16
|
from invenio_vocabularies.services.tasks import process_datastream
|
|
@@ -27,8 +27,8 @@ class ProcessDataStreamJob(JobType):
|
|
|
27
27
|
class ProcessRORAffiliationsJob(ProcessDataStreamJob):
|
|
28
28
|
"""Process ROR affiliations datastream registered task."""
|
|
29
29
|
|
|
30
|
-
description = "Process ROR affiliations"
|
|
31
|
-
title = "Load ROR affiliations"
|
|
30
|
+
description = _("Process ROR affiliations")
|
|
31
|
+
title = _("Load ROR affiliations")
|
|
32
32
|
id = "process_ror_affiliations"
|
|
33
33
|
|
|
34
34
|
@classmethod
|
|
@@ -65,8 +65,8 @@ class ProcessRORAffiliationsJob(ProcessDataStreamJob):
|
|
|
65
65
|
class ProcessRORFundersJob(ProcessDataStreamJob):
|
|
66
66
|
"""Process ROR funders datastream registered task."""
|
|
67
67
|
|
|
68
|
-
description = "Process ROR funders"
|
|
69
|
-
title = "Load ROR funders"
|
|
68
|
+
description = _("Process ROR funders")
|
|
69
|
+
title = _("Load ROR funders")
|
|
70
70
|
id = "process_ror_funders"
|
|
71
71
|
|
|
72
72
|
@classmethod
|
|
@@ -103,8 +103,8 @@ class ProcessRORFundersJob(ProcessDataStreamJob):
|
|
|
103
103
|
class ImportAwardsOpenAIREJob(ProcessDataStreamJob):
|
|
104
104
|
"""Import awards from OpenAIRE registered task."""
|
|
105
105
|
|
|
106
|
-
description = "Import awards from OpenAIRE"
|
|
107
|
-
title = "Import Awards OpenAIRE"
|
|
106
|
+
description = _("Import awards from OpenAIRE")
|
|
107
|
+
title = _("Import Awards OpenAIRE")
|
|
108
108
|
id = "import_awards_openaire"
|
|
109
109
|
|
|
110
110
|
@classmethod
|
|
@@ -138,8 +138,8 @@ class ImportAwardsOpenAIREJob(ProcessDataStreamJob):
|
|
|
138
138
|
class UpdateAwardsCordisJob(ProcessDataStreamJob):
|
|
139
139
|
"""Update awards from CORDIS registered task."""
|
|
140
140
|
|
|
141
|
-
description = "Update awards from CORDIS"
|
|
142
|
-
title = "Update Awards CORDIS"
|
|
141
|
+
description = _("Update awards from CORDIS")
|
|
142
|
+
title = _("Update Awards CORDIS")
|
|
143
143
|
id = "update_awards_cordis"
|
|
144
144
|
|
|
145
145
|
@classmethod
|
|
@@ -166,8 +166,8 @@ class UpdateAwardsCordisJob(ProcessDataStreamJob):
|
|
|
166
166
|
class ImportORCIDJob(ProcessDataStreamJob):
|
|
167
167
|
"""Import ORCID data registered task."""
|
|
168
168
|
|
|
169
|
-
description = "Import ORCID data"
|
|
170
|
-
title = "Import ORCID data"
|
|
169
|
+
description = _("Import ORCID data")
|
|
170
|
+
title = _("Import ORCID data")
|
|
171
171
|
id = "import_orcid"
|
|
172
172
|
|
|
173
173
|
@classmethod
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"""Vocabulary models."""
|
|
10
10
|
|
|
11
11
|
from invenio_db import db
|
|
12
|
+
from invenio_i18n import gettext as _
|
|
12
13
|
from invenio_records.models import RecordMetadataBase
|
|
13
14
|
|
|
14
15
|
|
|
@@ -79,7 +80,9 @@ class VocabularyScheme(db.Model):
|
|
|
79
80
|
"""Create a new vocabulary subtype."""
|
|
80
81
|
banned = [",", ":"]
|
|
81
82
|
for b in banned:
|
|
82
|
-
assert b not in data["id"],
|
|
83
|
+
assert b not in data["id"], _(
|
|
84
|
+
"No '%(banned_char)s' allowed in VocabularyScheme.id", banned_char=b
|
|
85
|
+
)
|
|
83
86
|
|
|
84
87
|
with db.session.begin_nested():
|
|
85
88
|
obj = cls(**data)
|