invenio-vocabularies 7.3.0__py2.py3-none-any.whl → 7.5.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (98) hide show
  1. invenio_vocabularies/__init__.py +1 -1
  2. invenio_vocabularies/administration/views/vocabularies.py +7 -9
  3. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/CustomAwardForm.js +1 -1
  4. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FunderDropdown.js +1 -1
  5. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingField.js +1 -1
  6. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingFieldItem.js +1 -1
  7. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/FundingModal.js +1 -1
  8. invenio_vocabularies/assets/semantic-ui/js/invenio_vocabularies/src/contrib/forms/Funding/NoAwardResults.js +1 -1
  9. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next-scanner.config.js +1 -1
  10. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/i18next.js +1 -1
  11. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ar/translations.json +28 -0
  12. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/bg/translations.json +28 -0
  13. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ca/translations.json +28 -0
  14. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/cs/translations.json +28 -0
  15. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/da/translations.json +28 -0
  16. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/de/translations.json +28 -0
  17. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/el/translations.json +28 -0
  18. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/en/translations.json +28 -0
  19. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/es/translations.json +28 -0
  20. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/et/translations.json +28 -0
  21. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fa/translations.json +28 -0
  22. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/fr/translations.json +28 -0
  23. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hr/translations.json +28 -0
  24. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/hu/translations.json +28 -0
  25. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/it/translations.json +28 -0
  26. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ja/translations.json +28 -0
  27. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ka/translations.json +28 -0
  28. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/lt/translations.json +28 -0
  29. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/no/translations.json +28 -0
  30. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pl/translations.json +28 -0
  31. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/pt/translations.json +28 -0
  32. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ro/translations.json +28 -0
  33. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/ru/translations.json +28 -0
  34. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sk/translations.json +28 -0
  35. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/sv/translations.json +28 -0
  36. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/tr/translations.json +28 -0
  37. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/uk/translations.json +28 -0
  38. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_CN/translations.json +28 -0
  39. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/messages/zh_TW/translations.json +28 -0
  40. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package-lock.json +1992 -0
  41. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/package.json +0 -4
  42. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/compileCatalog.js +1 -1
  43. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/scripts/initCatalog.js +1 -1
  44. invenio_vocabularies/assets/semantic-ui/translations/invenio_vocabularies/translations.pot +88 -0
  45. invenio_vocabularies/config.py +14 -14
  46. invenio_vocabularies/contrib/common/ror/datastreams.py +7 -1
  47. invenio_vocabularies/contrib/names/datastreams.py +38 -9
  48. invenio_vocabularies/contrib/names/s3client.py +9 -3
  49. invenio_vocabularies/datastreams/datastreams.py +14 -0
  50. invenio_vocabularies/datastreams/readers.py +12 -2
  51. invenio_vocabularies/datastreams/writers.py +6 -0
  52. invenio_vocabularies/jobs.py +11 -11
  53. invenio_vocabularies/records/models.py +4 -1
  54. invenio_vocabularies/services/custom_fields/subject.py +4 -4
  55. invenio_vocabularies/services/tasks.py +7 -1
  56. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/METADATA +21 -6
  57. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/RECORD +62 -67
  58. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/WHEEL +1 -1
  59. invenio_vocabularies/translations/af/LC_MESSAGES/messages.mo +0 -0
  60. invenio_vocabularies/translations/af/LC_MESSAGES/messages.po +0 -139
  61. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.mo +0 -0
  62. invenio_vocabularies/translations/de_AT/LC_MESSAGES/messages.po +0 -139
  63. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.mo +0 -0
  64. invenio_vocabularies/translations/de_DE/LC_MESSAGES/messages.po +0 -139
  65. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.mo +0 -0
  66. invenio_vocabularies/translations/en_AT/LC_MESSAGES/messages.po +0 -139
  67. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.mo +0 -0
  68. invenio_vocabularies/translations/en_HU/LC_MESSAGES/messages.po +0 -139
  69. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.mo +0 -0
  70. invenio_vocabularies/translations/es_CU/LC_MESSAGES/messages.po +0 -139
  71. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.mo +0 -0
  72. invenio_vocabularies/translations/es_MX/LC_MESSAGES/messages.po +0 -139
  73. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.mo +0 -0
  74. invenio_vocabularies/translations/et_EE/LC_MESSAGES/messages.po +0 -139
  75. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.mo +0 -0
  76. invenio_vocabularies/translations/fa_IR/LC_MESSAGES/messages.po +0 -139
  77. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.mo +0 -0
  78. invenio_vocabularies/translations/fr_CI/LC_MESSAGES/messages.po +0 -139
  79. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.mo +0 -0
  80. invenio_vocabularies/translations/fr_FR/LC_MESSAGES/messages.po +0 -139
  81. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.mo +0 -0
  82. invenio_vocabularies/translations/gl/LC_MESSAGES/messages.po +0 -139
  83. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.mo +0 -0
  84. invenio_vocabularies/translations/hi_IN/LC_MESSAGES/messages.po +0 -139
  85. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.mo +0 -0
  86. invenio_vocabularies/translations/hu_HU/LC_MESSAGES/messages.po +0 -139
  87. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.mo +0 -0
  88. invenio_vocabularies/translations/ne/LC_MESSAGES/messages.po +0 -139
  89. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.mo +0 -0
  90. invenio_vocabularies/translations/rw/LC_MESSAGES/messages.po +0 -139
  91. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.mo +0 -0
  92. invenio_vocabularies/translations/sv_SE/LC_MESSAGES/messages.po +0 -139
  93. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.mo +0 -0
  94. invenio_vocabularies/translations/uk_UA/LC_MESSAGES/messages.po +0 -139
  95. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/entry_points.txt +0 -0
  96. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info/licenses}/AUTHORS.rst +0 -0
  97. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info/licenses}/LICENSE +0 -0
  98. {invenio_vocabularies-7.3.0.dist-info → invenio_vocabularies-7.5.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,6 @@
2
2
  "name": "invenio-vocabularies-ui",
3
3
  "config": {
4
4
  "languages": [
5
- "af",
6
5
  "ar",
7
6
  "bg",
8
7
  "ca",
@@ -13,10 +12,8 @@
13
12
  "en",
14
13
  "es",
15
14
  "et",
16
- "et_EE",
17
15
  "fa",
18
16
  "fr",
19
- "gl",
20
17
  "hr",
21
18
  "hu",
22
19
  "it",
@@ -28,7 +25,6 @@
28
25
  "pt",
29
26
  "ro",
30
27
  "ru",
31
- "rw",
32
28
  "sk",
33
29
  "sv",
34
30
  "tr",
@@ -1,4 +1,4 @@
1
- // This file is part of React-Invenio-Deposit
1
+ // This file is part of Invenio-Vocabularies
2
2
  //
3
3
  // Invenio-administration is free software; you can redistribute it and/or modify it
4
4
  // under the terms of the MIT License; see LICENSE file for more details.
@@ -1,4 +1,4 @@
1
- // This file is part of React-Invenio-Deposit
1
+ // This file is part of Invenio-Vocabularies
2
2
  //
3
3
  // Invenio-administration is free software; you can redistribute it and/or modify it
4
4
  // under the terms of the MIT License; see LICENSE file for more details.
@@ -0,0 +1,88 @@
1
+ msgid ""
2
+ msgstr ""
3
+ "Project-Id-Version: i18next-conv\n"
4
+ "mime-version: 1.0\n"
5
+ "Content-Type: text/plain; charset=utf-8\n"
6
+ "Content-Transfer-Encoding: 8bit\n"
7
+ "Plural-Forms: nplurals=2; plural=(n != 1)\n"
8
+ "POT-Creation-Date: 2025-05-13T09:10:50.615Z\n"
9
+ "PO-Revision-Date: 2025-05-13T09:10:50.616Z\n"
10
+ "Language: en\n"
11
+
12
+ msgid "Search for a funder by name"
13
+ msgstr "Search for a funder by name"
14
+
15
+ msgid "Funder"
16
+ msgstr "Funder"
17
+
18
+ msgid "Search for funder..."
19
+ msgstr "Search for funder..."
20
+
21
+ msgid "Additional information"
22
+ msgstr "Additional information"
23
+
24
+ msgid "optional"
25
+ msgstr "optional"
26
+
27
+ msgid "Number"
28
+ msgstr "Number"
29
+
30
+ msgid "Award/Grant number"
31
+ msgstr "Award/Grant number"
32
+
33
+ msgid "Title"
34
+ msgstr "Title"
35
+
36
+ msgid "Award/Grant Title"
37
+ msgstr "Award/Grant Title"
38
+
39
+ msgid "URL"
40
+ msgstr "URL"
41
+
42
+ msgid "Award/Grant URL"
43
+ msgstr "Award/Grant URL"
44
+
45
+ msgid "Add"
46
+ msgstr "Add"
47
+
48
+ msgid "Add custom"
49
+ msgstr "Add custom"
50
+
51
+ msgid "Edit"
52
+ msgstr "Edit"
53
+
54
+ msgid "Remove"
55
+ msgstr "Remove"
56
+
57
+ msgid "Open external link"
58
+ msgstr "Open external link"
59
+
60
+ msgid "Funder is required."
61
+ msgstr "Funder is required."
62
+
63
+ msgid "URL must be valid."
64
+ msgstr "URL must be valid."
65
+
66
+ msgid "URL must be set alongside title or number."
67
+ msgstr "URL must be set alongside title or number."
68
+
69
+ msgid "Add standard award/grant"
70
+ msgstr "Add standard award/grant"
71
+
72
+ msgid "Add custom funding"
73
+ msgstr "Add custom funding"
74
+
75
+ msgid "Search for awards/grants"
76
+ msgstr "Search for awards/grants"
77
+
78
+ msgid "Cancel"
79
+ msgstr "Cancel"
80
+
81
+ msgid "Change"
82
+ msgstr "Change"
83
+
84
+ msgid "Did not find your award/grant? "
85
+ msgstr "Did not find your award/grant? "
86
+
87
+ msgid "Add a custom award/grant."
88
+ msgstr "Add a custom award/grant."
@@ -12,7 +12,7 @@
12
12
 
13
13
  import re
14
14
 
15
- import idutils
15
+ from idutils import is_doi, is_gnd, is_isni, is_orcid, is_ror, is_url
16
16
  from invenio_i18n import lazy_gettext as _
17
17
 
18
18
  from .datastreams.readers import (
@@ -42,9 +42,9 @@ VOCABULARIES_SERVICE_CONFIG = VocabulariesServiceConfig
42
42
 
43
43
  VOCABULARIES_IDENTIFIER_SCHEMES = {
44
44
  "grid": {"label": _("GRID"), "validator": lambda x: True},
45
- "gnd": {"label": _("GND"), "validator": idutils.is_gnd},
46
- "isni": {"label": _("ISNI"), "validator": idutils.is_isni},
47
- "ror": {"label": _("ROR"), "validator": idutils.is_ror},
45
+ "gnd": {"label": _("GND"), "validator": is_gnd},
46
+ "isni": {"label": _("ISNI"), "validator": is_isni},
47
+ "ror": {"label": _("ROR"), "validator": is_ror},
48
48
  }
49
49
  """"Generic identifier schemes, usable by other vocabularies."""
50
50
 
@@ -72,7 +72,7 @@ VOCABULARIES_AFFILIATION_SCHEMES = {
72
72
 
73
73
  VOCABULARIES_FUNDER_SCHEMES = {
74
74
  **VOCABULARIES_IDENTIFIER_SCHEMES,
75
- "doi": {"label": _("DOI"), "validator": idutils.is_doi},
75
+ "doi": {"label": _("DOI"), "validator": is_doi},
76
76
  }
77
77
  """Funders allowed identifier schemes."""
78
78
 
@@ -80,8 +80,8 @@ VOCABULARIES_FUNDER_DOI_PREFIX = "10.13039"
80
80
  """DOI prefix for the identifier formed with the FundRef id."""
81
81
 
82
82
  VOCABULARIES_AWARD_SCHEMES = {
83
- "url": {"label": _("URL"), "validator": idutils.is_url},
84
- "doi": {"label": _("DOI"), "validator": idutils.is_doi},
83
+ "url": {"label": _("URL"), "validator": is_url},
84
+ "doi": {"label": _("DOI"), "validator": is_doi},
85
85
  }
86
86
  """Awards allowed identifier schemes."""
87
87
 
@@ -121,15 +121,15 @@ VOCABULARIES_AWARDS_EC_ROR_ID = "00k4n6c32"
121
121
  """ROR ID for EC funder."""
122
122
 
123
123
  VOCABULARIES_NAMES_SCHEMES = {
124
- "orcid": {"label": _("ORCID"), "validator": idutils.is_orcid, "datacite": "ORCID"},
125
- "isni": {"label": _("ISNI"), "validator": idutils.is_isni, "datacite": "ISNI"},
126
- "gnd": {"label": _("GND"), "validator": idutils.is_gnd, "datacite": "GND"},
124
+ "orcid": {"label": _("ORCID"), "validator": is_orcid, "datacite": "ORCID"},
125
+ "isni": {"label": _("ISNI"), "validator": is_isni, "datacite": "ISNI"},
126
+ "gnd": {"label": _("GND"), "validator": is_gnd, "datacite": "GND"},
127
127
  }
128
128
  """Names allowed identifier schemes."""
129
129
 
130
130
  VOCABULARIES_SUBJECTS_SCHEMES = {
131
- "gnd": {"label": _("GND"), "validator": idutils.is_gnd, "datacite": "GND"},
132
- "url": {"label": _("URL"), "validator": idutils.is_url},
131
+ "gnd": {"label": _("GND"), "validator": is_gnd, "datacite": "GND"},
132
+ "url": {"label": _("URL"), "validator": is_url},
133
133
  }
134
134
  """Subjects allowed identifier schemes."""
135
135
 
@@ -204,9 +204,9 @@ VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
204
204
  }
205
205
  """Affiliations EDMO Country name remapping dictionary."""
206
206
 
207
- VOCABULARIES_ORCID_ACCESS_KEY = "TODO"
207
+ VOCABULARIES_ORCID_ACCESS_KEY = "CHANGEME"
208
208
  """ORCID access key to access the s3 bucket."""
209
- VOCABULARIES_ORCID_SECRET_KEY = "TODO"
209
+ VOCABULARIES_ORCID_SECRET_KEY = "CHANGEME"
210
210
  """ORCID secret key to access the s3 bucket."""
211
211
  VOCABULARIES_ORCID_SUMMARIES_BUCKET = "v3.0-summaries"
212
212
  """ORCID summaries bucket name."""
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2024 CERN.
3
+ # Copyright (C) 2024-2025 CERN.
4
4
  # Copyright (C) 2024 California Institute of Technology.
5
5
  #
6
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
@@ -13,6 +13,7 @@ import io
13
13
 
14
14
  import arrow
15
15
  import requests
16
+ from flask import current_app
16
17
  from idutils import normalize_ror
17
18
 
18
19
  from invenio_vocabularies.datastreams.errors import ReaderError, TransformerError
@@ -93,6 +94,9 @@ class RORHTTPReader(BaseReader):
93
94
  if self._since:
94
95
  last_dump_date = self._get_last_dump_date(linksets)
95
96
  if last_dump_date < arrow.get(self._since):
97
+ current_app.logger.info(
98
+ f"Skipping ROR data dump (last dump: {last_dump_date}, since: {self._since})"
99
+ )
96
100
  return
97
101
 
98
102
  for linkset in linksets:
@@ -104,6 +108,8 @@ class RORHTTPReader(BaseReader):
104
108
  if len(zip_files) > 1:
105
109
  raise ReaderError(f"Expected 1 ZIP item but got {len(zip_files)}")
106
110
 
111
+ current_app.logger.info(f"Reading ROR data dump (URL: {file_url})")
112
+
107
113
  # Download the ZIP file and fully load the response bytes content in memory.
108
114
  # The bytes content are then wrapped by a BytesIO to be
109
115
  # file-like object (as required by `zipfile.ZipFile`).
@@ -1,6 +1,6 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
- # Copyright (C) 2021-2024 CERN.
3
+ # Copyright (C) 2021-2025 CERN.
4
4
  #
5
5
  # Invenio-Vocabularies is free software; you can redistribute it and/or
6
6
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -12,6 +12,7 @@ import csv
12
12
  import io
13
13
  import tarfile
14
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
15
+ from contextvars import copy_context
15
16
  from datetime import timedelta
16
17
  from itertools import islice
17
18
  from pathlib import Path
@@ -43,17 +44,18 @@ class OrcidDataSyncReader(BaseReader):
43
44
  self.s3_client = S3OrcidClient()
44
45
  self.since = since
45
46
 
46
- def _fetch_orcid_data(self, orcid_to_sync, bucket):
47
+ def _fetch_orcid_data(self, app, orcid_to_sync, bucket):
47
48
  """Fetches a single ORCiD record from S3."""
48
49
  # The ORCiD file key is located in a folder which name corresponds to the last three digits of the ORCiD
49
50
  suffix = orcid_to_sync[-3:]
50
51
  key = f"{suffix}/{orcid_to_sync}.xml"
52
+ app.logger.debug(f"Fetching ORCiD record: {key} from bucket: {bucket}")
51
53
  try:
52
54
  # Potential improvement: use the a XML jax parser to avoid loading the whole file in memory
53
55
  # and choose the sections we need to read (probably the summary)
54
56
  return self.s3_client.read_file(f"s3://{bucket}/{key}")
55
57
  except Exception:
56
- current_app.logger.exception("Failed to fetch ORCiD record.")
58
+ app.logger.exception(f"Failed to fetch ORCiD record: {key}")
57
59
 
58
60
  def _process_lambda_file(self, fileobj):
59
61
  """Process the ORCiD lambda file and returns a list of ORCiDs to sync.
@@ -87,7 +89,11 @@ class OrcidDataSyncReader(BaseReader):
87
89
  )
88
90
 
89
91
  if last_modified_date < last_sync:
92
+ current_app.logger.debug(
93
+ f"Skipping ORCiD {orcid} (last modified: {last_modified_date})"
94
+ )
90
95
  break
96
+ current_app.logger.debug(f"Yielding ORCiD {orcid} for sync.")
91
97
  yield orcid
92
98
  finally:
93
99
  fileobj.close()
@@ -97,10 +103,15 @@ class OrcidDataSyncReader(BaseReader):
97
103
  with ThreadPoolExecutor(
98
104
  max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
99
105
  ) as executor:
106
+ app = current_app._get_current_object()
100
107
  # futures is a dictionary where the key is the ORCID value and the item is the Future object
108
+ # Flask does not propagate app/request context to new threads, so `copy_context().run`
109
+ # ensures the current instantianted contextvars (such as job_context) is preserved in each thread.
101
110
  futures = {
102
111
  orcid: executor.submit(
112
+ copy_context().run, # Required to pass the context to the thread
103
113
  self._fetch_orcid_data,
114
+ app, # Pass the Flask app to the thread
104
115
  orcid,
105
116
  current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
106
117
  )
@@ -111,7 +122,14 @@ class OrcidDataSyncReader(BaseReader):
111
122
  try:
112
123
  result = futures[orcid].result()
113
124
  if result:
125
+ current_app.logger.debug(
126
+ f"Successfully fetched ORCiD record: {orcid}"
127
+ )
114
128
  yield result
129
+ except Exception:
130
+ current_app.logger.exception(
131
+ f"Error processing ORCiD record: {orcid}"
132
+ )
115
133
  finally:
116
134
  # Explicitly release memory, as we don't need the future anymore.
117
135
  # This is mostly required because as long as we keep a reference to the future
@@ -125,7 +143,7 @@ class OrcidDataSyncReader(BaseReader):
125
143
  tar_content = self.s3_client.read_file(
126
144
  "s3://orcid-lambda-file/last_modified.csv.tar"
127
145
  )
128
-
146
+ current_app.logger.info("Fetching ORCiD lambda file")
129
147
  # Opens tar file and process it
130
148
  with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
131
149
  # Iterate over each member (file or directory) in the tar file
@@ -133,7 +151,7 @@ class OrcidDataSyncReader(BaseReader):
133
151
  # Extract the file
134
152
  extracted_file = tar.extractfile(member)
135
153
  if extracted_file:
136
- current_app.logger.info(f"[ORCID Reader] Processing lambda file...")
154
+ current_app.logger.info(f"Processing lambda file: {member.name}")
137
155
  # Process the file and get the ORCiDs to sync
138
156
  orcids_to_sync = set(self._process_lambda_file(extracted_file))
139
157
 
@@ -150,6 +168,7 @@ class OrcidDataSyncReader(BaseReader):
150
168
  """Yield successive chunks of a given size."""
151
169
  it = iter(iterable)
152
170
  while chunk := list(islice(it, batch_size)):
171
+ current_app.logger.debug(f"Processing batch of size {len(chunk)}.")
153
172
  yield chunk
154
173
 
155
174
 
@@ -239,18 +258,25 @@ class OrcidTransformer(BaseTransformer):
239
258
 
240
259
  def apply(self, stream_entry, **kwargs):
241
260
  """Applies the transformation to the stream entry."""
261
+ current_app.logger.debug("Applying transformation to stream entry.")
242
262
  record = stream_entry.entry
243
263
  person = record["person"]
244
264
  orcid_id = record["orcid-identifier"]["path"]
245
265
 
246
266
  name = person.get("name")
247
267
  if name is None:
248
- raise TransformerError("Name not found in ORCiD entry.")
268
+ raise TransformerError(
269
+ f"Name not found in ORCiD entry for ORCiD ID: {orcid_id}."
270
+ )
249
271
  if name.get("family-name") is None:
250
- raise TransformerError("Family name not found in ORCiD entry.")
272
+ raise TransformerError(
273
+ f"Family name not found in ORCiD entry for ORCiD ID: {orcid_id}."
274
+ )
251
275
 
252
276
  if not self._is_valid_name(name["given-names"] + name["family-name"]):
253
- raise TransformerError("Invalid characters in name.")
277
+ raise TransformerError(
278
+ f"Invalid characters in name for ORCiD ID: {orcid_id}."
279
+ )
254
280
 
255
281
  entry = {
256
282
  "id": orcid_id,
@@ -261,6 +287,7 @@ class OrcidTransformer(BaseTransformer):
261
287
  }
262
288
 
263
289
  stream_entry.entry = entry
290
+ current_app.logger.debug(f"Transformed entry: {entry}")
264
291
  return stream_entry
265
292
 
266
293
  def _is_valid_name(self, name):
@@ -271,6 +298,7 @@ class OrcidTransformer(BaseTransformer):
271
298
 
272
299
  def _extract_affiliations(self, record):
273
300
  """Extract affiliations from the ORCiD record."""
301
+ current_app.logger.debug("Extracting affiliations from ORCiD record.")
274
302
  result = []
275
303
  try:
276
304
  employments = (
@@ -312,7 +340,7 @@ class OrcidTransformer(BaseTransformer):
312
340
 
313
341
  result.append(aff)
314
342
  except Exception:
315
- pass
343
+ current_app.logger.error("Error extracting affiliations.")
316
344
  return result
317
345
 
318
346
  def _extract_affiliation_id(self, org):
@@ -410,6 +438,7 @@ ORCID_PRESET_DATASTREAM_CONFIG = {
410
438
  "args": {
411
439
  "writer": {
412
440
  "type": "names-service",
441
+ "args": {"update": True},
413
442
  }
414
443
  },
415
444
  }
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  #
3
3
  # This file is part of Invenio.
4
- # Copyright (C) 2024 CERN.
4
+ # Copyright (C) 2024-2025 CERN.
5
5
  #
6
6
  # Invenio-Vocabularies is free software; you can redistribute it and/or
7
7
  # modify it under the terms of the MIT License; see LICENSE file for more
@@ -38,7 +38,13 @@ class S3OrcidClient(S3Client):
38
38
 
39
39
  def __init__(self):
40
40
  """Constructor."""
41
+ access_key = current_app.config["VOCABULARIES_ORCID_ACCESS_KEY"]
42
+ secret_key = current_app.config["VOCABULARIES_ORCID_SECRET_KEY"]
43
+ if access_key == "CHANGEME" or secret_key == "CHANGEME":
44
+ raise Exception(
45
+ "VOCABULARIES_ORCID_ACCESS_KEY and VOCABULARIES_ORCID_SECRET_KEY are not configured."
46
+ )
41
47
  super().__init__(
42
- access_key=current_app.config["VOCABULARIES_ORCID_ACCESS_KEY"],
43
- secret_key=current_app.config["VOCABULARIES_ORCID_SECRET_KEY"],
48
+ access_key=access_key,
49
+ secret_key=secret_key,
44
50
  )
@@ -72,13 +72,18 @@ class DataStream:
72
72
 
73
73
  def filter(self, stream_entry, *args, **kwargs):
74
74
  """Checks if an stream_entry should be filtered out (skipped)."""
75
+ current_app.logger.debug(f"Filtering entry: {stream_entry.entry}")
75
76
  return False
76
77
 
77
78
  def process_batch(self, batch):
78
79
  """Process a batch of entries."""
80
+ current_app.logger.info(f"Processing batch of size: {len(batch)}")
79
81
  transformed_entries = []
80
82
  for stream_entry in batch:
81
83
  if stream_entry.errors:
84
+ current_app.logger.warning(
85
+ f"Skipping entry with errors: {stream_entry.errors}"
86
+ )
82
87
  yield stream_entry # reading errors
83
88
  else:
84
89
  transformed_entry = self.transform(stream_entry)
@@ -103,19 +108,23 @@ class DataStream:
103
108
  the reader, apply the transformations and yield the result of
104
109
  writing it.
105
110
  """
111
+ current_app.logger.info("Starting data stream processing")
106
112
  batch = []
107
113
  for stream_entry in self.read():
108
114
  batch.append(stream_entry)
109
115
  if len(batch) >= self.batch_size:
116
+ current_app.logger.debug(f"Processing batch of size: {len(batch)}")
110
117
  yield from self.process_batch(batch)
111
118
  batch = []
112
119
 
113
120
  # Process any remaining entries in the last batch
114
121
  if batch:
122
+ current_app.logger.debug(f"Processing final batch of size: {len(batch)}")
115
123
  yield from self.process_batch(batch)
116
124
 
117
125
  def read(self):
118
126
  """Recursively read the entries."""
127
+ current_app.logger.debug("Reading entries from readers")
119
128
 
120
129
  def pipe_gen(gen_funcs, piped_item=None):
121
130
  _gen_funcs = list(gen_funcs) # copy to avoid modifying ref list
@@ -130,6 +139,7 @@ class DataStream:
130
139
  else:
131
140
  yield StreamEntry(item)
132
141
  except ReaderError as err:
142
+ current_app.logger.error(f"Reader error: {str(err)}")
133
143
  yield StreamEntry(
134
144
  entry=item,
135
145
  errors=[f"{current_gen_func.__qualname__}: {str(err)}"],
@@ -140,6 +150,7 @@ class DataStream:
140
150
 
141
151
  def transform(self, stream_entry, *args, **kwargs):
142
152
  """Apply the transformations to an stream_entry."""
153
+ current_app.logger.debug(f"Transforming entry: {stream_entry.entry}")
143
154
  for transformer in self._transformers:
144
155
  try:
145
156
  stream_entry = transformer.apply(stream_entry)
@@ -153,16 +164,19 @@ class DataStream:
153
164
 
154
165
  def write(self, stream_entry, *args, **kwargs):
155
166
  """Apply the transformations to an stream_entry."""
167
+ current_app.logger.debug(f"Writing entry: {stream_entry.entry}")
156
168
  for writer in self._writers:
157
169
  try:
158
170
  writer.write(stream_entry)
159
171
  except WriterError as err:
172
+ current_app.logger.error(f"Writer error: {str(err)}")
160
173
  stream_entry.errors.append(f"{writer.__class__.__name__}: {str(err)}")
161
174
 
162
175
  return stream_entry
163
176
 
164
177
  def batch_write(self, stream_entries, *args, **kwargs):
165
178
  """Apply the transformations to an stream_entry. Errors are handler in the service layer."""
179
+ current_app.logger.debug(f"Batch writing entries: {len(stream_entries)}")
166
180
  for writer in self._writers:
167
181
  yield from writer.write_many(stream_entries)
168
182
 
@@ -407,15 +407,18 @@ class RDFReader(BaseReader):
407
407
  class SPARQLReader(BaseReader):
408
408
  """Generic reader class to fetch and process RDF data from a SPARQL endpoint."""
409
409
 
410
- def __init__(self, origin, query, mode="r", *args, **kwargs):
410
+ def __init__(self, origin, query, mode="r", client_params=None, *args, **kwargs):
411
411
  """Initialize the reader with the data source.
412
412
 
413
413
  :param origin: The SPARQL endpoint from which to fetch the RDF data.
414
414
  :param query: The SPARQL query to execute.
415
415
  :param mode: Mode of operation (default is 'r' for reading).
416
+ :param client_params: Additional client parameters to pass to the SPARQL client.
416
417
  """
417
418
  self._origin = origin
418
419
  self._query = query
420
+ self._client_params = client_params or {}
421
+
419
422
  super().__init__(origin=origin, mode=mode, *args, **kwargs)
420
423
 
421
424
  def _iter(self, fp, *args, **kwargs):
@@ -430,7 +433,14 @@ class SPARQLReader(BaseReader):
430
433
  "SPARQLReader does not support being chained after another reader"
431
434
  )
432
435
 
433
- sparql_client = sparql.SPARQLWrapper(self._origin)
436
+ # Avoid overwriting SPARQLWrapper's default value for the user agent string
437
+ if self._client_params.get("user_agent"):
438
+ sparql_client = sparql.SPARQLWrapper(
439
+ self._origin, agent=self._client_params.get("user_agent")
440
+ )
441
+ else:
442
+ sparql_client = sparql.SPARQLWrapper(self._origin)
443
+
434
444
  sparql_client.setQuery(self._query)
435
445
  sparql_client.setReturnFormat(sparql.JSON)
436
446
 
@@ -87,17 +87,21 @@ class ServiceWriter(BaseWriter):
87
87
 
88
88
  def _do_update(self, entry):
89
89
  vocab_id = self._entry_id(entry)
90
+ current_app.logger.debug(f"Resolving entry with ID: {vocab_id}")
90
91
  current = self._resolve(vocab_id)
91
92
  updated = dict(current.to_dict(), **entry)
93
+ current_app.logger.debug(f"Updating entry with ID: {vocab_id}")
92
94
  return StreamEntry(self._service.update(self._identity, vocab_id, updated))
93
95
 
94
96
  def write(self, stream_entry, *args, **kwargs):
95
97
  """Writes the input entry using a given service."""
96
98
  entry = stream_entry.entry
99
+ current_app.logger.debug(f"Writing entry: {entry}")
97
100
 
98
101
  try:
99
102
  if self._insert:
100
103
  try:
104
+ current_app.logger.debug("Inserting entry.")
101
105
  return StreamEntry(self._service.create(self._identity, entry))
102
106
  except PIDAlreadyExists:
103
107
  if not self._update:
@@ -105,6 +109,7 @@ class ServiceWriter(BaseWriter):
105
109
  return self._do_update(entry)
106
110
  elif self._update:
107
111
  try:
112
+ current_app.logger.debug("Attempting to update entry.")
108
113
  return self._do_update(entry)
109
114
  except (NoResultFound, PIDDoesNotExistError):
110
115
  raise WriterError([f"Vocabulary entry does not exist: {entry}"])
@@ -139,6 +144,7 @@ class ServiceWriter(BaseWriter):
139
144
  processed_stream_entry.log_errors()
140
145
  stream_entries_processed.append(processed_stream_entry)
141
146
 
147
+ current_app.logger.debug(f"Finished writing {len(stream_entries)} entries")
142
148
  return stream_entries_processed
143
149
 
144
150
 
@@ -10,7 +10,7 @@
10
10
 
11
11
  import datetime
12
12
 
13
- from invenio_i18n import gettext as _
13
+ from invenio_i18n import lazy_gettext as _
14
14
  from invenio_jobs.jobs import JobType
15
15
 
16
16
  from invenio_vocabularies.services.tasks import process_datastream
@@ -27,8 +27,8 @@ class ProcessDataStreamJob(JobType):
27
27
  class ProcessRORAffiliationsJob(ProcessDataStreamJob):
28
28
  """Process ROR affiliations datastream registered task."""
29
29
 
30
- description = "Process ROR affiliations"
31
- title = "Load ROR affiliations"
30
+ description = _("Process ROR affiliations")
31
+ title = _("Load ROR affiliations")
32
32
  id = "process_ror_affiliations"
33
33
 
34
34
  @classmethod
@@ -65,8 +65,8 @@ class ProcessRORAffiliationsJob(ProcessDataStreamJob):
65
65
  class ProcessRORFundersJob(ProcessDataStreamJob):
66
66
  """Process ROR funders datastream registered task."""
67
67
 
68
- description = "Process ROR funders"
69
- title = "Load ROR funders"
68
+ description = _("Process ROR funders")
69
+ title = _("Load ROR funders")
70
70
  id = "process_ror_funders"
71
71
 
72
72
  @classmethod
@@ -103,8 +103,8 @@ class ProcessRORFundersJob(ProcessDataStreamJob):
103
103
  class ImportAwardsOpenAIREJob(ProcessDataStreamJob):
104
104
  """Import awards from OpenAIRE registered task."""
105
105
 
106
- description = "Import awards from OpenAIRE"
107
- title = "Import Awards OpenAIRE"
106
+ description = _("Import awards from OpenAIRE")
107
+ title = _("Import Awards OpenAIRE")
108
108
  id = "import_awards_openaire"
109
109
 
110
110
  @classmethod
@@ -138,8 +138,8 @@ class ImportAwardsOpenAIREJob(ProcessDataStreamJob):
138
138
  class UpdateAwardsCordisJob(ProcessDataStreamJob):
139
139
  """Update awards from CORDIS registered task."""
140
140
 
141
- description = "Update awards from CORDIS"
142
- title = "Update Awards CORDIS"
141
+ description = _("Update awards from CORDIS")
142
+ title = _("Update Awards CORDIS")
143
143
  id = "update_awards_cordis"
144
144
 
145
145
  @classmethod
@@ -166,8 +166,8 @@ class UpdateAwardsCordisJob(ProcessDataStreamJob):
166
166
  class ImportORCIDJob(ProcessDataStreamJob):
167
167
  """Import ORCID data registered task."""
168
168
 
169
- description = "Import ORCID data"
170
- title = "Import ORCID data"
169
+ description = _("Import ORCID data")
170
+ title = _("Import ORCID data")
171
171
  id = "import_orcid"
172
172
 
173
173
  @classmethod
@@ -9,6 +9,7 @@
9
9
  """Vocabulary models."""
10
10
 
11
11
  from invenio_db import db
12
+ from invenio_i18n import gettext as _
12
13
  from invenio_records.models import RecordMetadataBase
13
14
 
14
15
 
@@ -79,7 +80,9 @@ class VocabularyScheme(db.Model):
79
80
  """Create a new vocabulary subtype."""
80
81
  banned = [",", ":"]
81
82
  for b in banned:
82
- assert b not in data["id"], f"No '{b}' allowed in VocabularyScheme.id"
83
+ assert b not in data["id"], _(
84
+ "No '%(banned_char)s' allowed in VocabularyScheme.id", banned_char=b
85
+ )
83
86
 
84
87
  with db.session.begin_nested():
85
88
  obj = cls(**data)