invenio-vocabularies 6.7.0__py2.py3-none-any.whl → 6.9.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of invenio-vocabularies might be problematic. Click here for more details.

Files changed (26) hide show
  1. invenio_vocabularies/__init__.py +1 -1
  2. invenio_vocabularies/cli.py +2 -0
  3. invenio_vocabularies/config.py +16 -0
  4. invenio_vocabularies/contrib/affiliations/schema.py +10 -0
  5. invenio_vocabularies/contrib/names/datastreams.py +182 -57
  6. invenio_vocabularies/contrib/subjects/bodc/__init__.py +9 -0
  7. invenio_vocabularies/contrib/subjects/bodc/datastreams.py +111 -0
  8. invenio_vocabularies/contrib/subjects/config.py +9 -4
  9. invenio_vocabularies/contrib/subjects/datastreams.py +2 -4
  10. invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py +33 -11
  11. invenio_vocabularies/contrib/subjects/gemet/datastreams.py +36 -5
  12. invenio_vocabularies/datastreams/datastreams.py +18 -7
  13. invenio_vocabularies/datastreams/factories.py +3 -1
  14. invenio_vocabularies/datastreams/transformers.py +12 -0
  15. invenio_vocabularies/datastreams/writers.py +6 -2
  16. invenio_vocabularies/factories.py +41 -0
  17. invenio_vocabularies/fixtures.py +2 -0
  18. invenio_vocabularies/services/querystr.py +5 -0
  19. invenio_vocabularies/services/tasks.py +2 -0
  20. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/METADATA +12 -1
  21. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/RECORD +26 -24
  22. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/AUTHORS.rst +0 -0
  23. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/LICENSE +0 -0
  24. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/WHEEL +0 -0
  25. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/entry_points.txt +0 -0
  26. {invenio_vocabularies-6.7.0.dist-info → invenio_vocabularies-6.9.0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,6 @@
10
10
 
11
11
  from .ext import InvenioVocabularies
12
12
 
13
- __version__ = "6.7.0"
13
+ __version__ = "6.9.0"
14
14
 
15
15
  __all__ = ("__version__", "InvenioVocabularies")
@@ -29,6 +29,8 @@ def _process_vocab(config, num_samples=None):
29
29
  readers_config=config["readers"],
30
30
  transformers_config=config.get("transformers"),
31
31
  writers_config=config["writers"],
32
+ batch_size=config.get("batch_size", 1000),
33
+ write_many=config.get("write_many", False),
32
34
  )
33
35
 
34
36
  success, errored, filtered = 0, 0, 0
@@ -196,6 +196,9 @@ VOCABULARIES_SUBJECTS_GEMET_FILE_URL = (
196
196
  )
197
197
  """Subject GEMET file download link."""
198
198
 
199
+ VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL = "http://vocab.nerc.ac.uk/collection/P01/current/?_profile=nvs&_mediatype=application/rdf+xml"
200
+ """Subject BODC-PUV file download link."""
201
+
199
202
  VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
200
203
  "Cape Verde": "Cabo Verde",
201
204
  }
@@ -213,3 +216,16 @@ VOCABULARIES_ORCID_SYNC_SINCE = {
213
216
  "days": 1,
214
217
  }
215
218
  """ORCID time shift to sync. Parameters accepted are the ones passed to 'datetime.timedelta'."""
219
+
220
+ VOCABULARIES_ORCID_ORG_IDS_MAPPING_PATH = None
221
+ """Path to the CSV file for mapping ORCiD organization IDs to affiliation IDs.
222
+
223
+ The path can be specified as either an absolute path or a relative path within the
224
+ Flask app instance folder (i.e. ``current_app.instance_path``).
225
+
226
+ The CSV file should have the following columns:
227
+
228
+ - `org_scheme`: The ORCiD organization ID.
229
+ - `org_id`: The ORCiD organization ID.
230
+ - `aff_id`: The affiliation ID to map to.
231
+ """
@@ -59,3 +59,13 @@ class AffiliationRelationSchema(ContribVocabularyRelationSchema):
59
59
  ftf_name = "name"
60
60
  parent_field_name = "affiliations"
61
61
  name = SanitizedUnicode()
62
+ identifiers = IdentifierSet(
63
+ fields.Nested(
64
+ partial(
65
+ IdentifierSchema,
66
+ allowed_schemes=affiliation_schemes,
67
+ identifier_required=False,
68
+ )
69
+ ),
70
+ dump_only=True,
71
+ )
@@ -13,12 +13,14 @@ import io
13
13
  import tarfile
14
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
15
15
  from datetime import timedelta
16
+ from itertools import islice
17
+ from pathlib import Path
16
18
 
17
19
  import arrow
18
20
  import regex as re
19
21
  from flask import current_app
20
22
  from invenio_access.permissions import system_identity
21
- from invenio_records.dictutils import dict_lookup
23
+ from werkzeug.utils import cached_property
22
24
 
23
25
  from invenio_vocabularies.contrib.names.s3client import S3OrcidClient
24
26
 
@@ -47,10 +49,11 @@ class OrcidDataSyncReader(BaseReader):
47
49
  suffix = orcid_to_sync[-3:]
48
50
  key = f"{suffix}/{orcid_to_sync}.xml"
49
51
  try:
52
+ # Potential improvement: use the a XML jax parser to avoid loading the whole file in memory
53
+ # and choose the sections we need to read (probably the summary)
50
54
  return self.s3_client.read_file(f"s3://{bucket}/{key}")
51
- except Exception as e:
52
- # TODO: log
53
- return None
55
+ except Exception:
56
+ current_app.logger.exception("Failed to fetch ORCiD record.")
54
57
 
55
58
  def _process_lambda_file(self, fileobj):
56
59
  """Process the ORCiD lambda file and returns a list of ORCiDs to sync.
@@ -67,42 +70,54 @@ class OrcidDataSyncReader(BaseReader):
67
70
  if self.since:
68
71
  time_shift = self.since
69
72
  last_sync = arrow.now() - timedelta(**time_shift)
70
-
71
- file_content = fileobj.read().decode("utf-8")
72
-
73
- csv_reader = csv.DictReader(file_content.splitlines())
74
-
75
- for row in csv_reader: # Skip the header line
76
- orcid = row["orcid"]
77
-
78
- # Lambda file is ordered by last modified date
79
- last_modified_str = row["last_modified"]
80
- try:
81
- last_modified_date = arrow.get(last_modified_str, date_format)
82
- except arrow.parser.ParserError:
83
- last_modified_date = arrow.get(last_modified_str, date_format_no_millis)
84
-
85
- if last_modified_date < last_sync:
86
- break
87
- yield orcid
73
+ try:
74
+ content = io.TextIOWrapper(fileobj, encoding="utf-8")
75
+ csv_reader = csv.DictReader(content)
76
+
77
+ for row in csv_reader: # Skip the header line
78
+ orcid = row["orcid"]
79
+
80
+ # Lambda file is ordered by last modified date
81
+ last_modified_str = row["last_modified"]
82
+ try:
83
+ last_modified_date = arrow.get(last_modified_str, date_format)
84
+ except arrow.parser.ParserError:
85
+ last_modified_date = arrow.get(
86
+ last_modified_str, date_format_no_millis
87
+ )
88
+
89
+ if last_modified_date < last_sync:
90
+ break
91
+ yield orcid
92
+ finally:
93
+ fileobj.close()
88
94
 
89
95
  def _iter(self, orcids):
90
96
  """Iterates over the ORCiD records yielding each one."""
91
97
  with ThreadPoolExecutor(
92
98
  max_workers=current_app.config["VOCABULARIES_ORCID_SYNC_MAX_WORKERS"]
93
99
  ) as executor:
94
- futures = [
95
- executor.submit(
100
+ # futures is a dictionary where the key is the ORCID value and the item is the Future object
101
+ futures = {
102
+ orcid: executor.submit(
96
103
  self._fetch_orcid_data,
97
104
  orcid,
98
105
  current_app.config["VOCABULARIES_ORCID_SUMMARIES_BUCKET"],
99
106
  )
100
107
  for orcid in orcids
101
- ]
102
- for future in as_completed(futures):
103
- result = future.result()
104
- if result is not None:
105
- yield result
108
+ }
109
+
110
+ for orcid in list(futures.keys()):
111
+ try:
112
+ result = futures[orcid].result()
113
+ if result:
114
+ yield result
115
+ finally:
116
+ # Explicitly release memory, as we don't need the future anymore.
117
+ # This is mostly required because as long as we keep a reference to the future
118
+ # (in the above futures dict), the garbage collector won't collect it
119
+ # and it will keep the memory allocated.
120
+ del futures[orcid]
106
121
 
107
122
  def read(self, item=None, *args, **kwargs):
108
123
  """Streams the ORCiD lambda file, process it to get the ORCiDS to sync and yields it's data."""
@@ -111,7 +126,6 @@ class OrcidDataSyncReader(BaseReader):
111
126
  "s3://orcid-lambda-file/last_modified.csv.tar"
112
127
  )
113
128
 
114
- orcids_to_sync = []
115
129
  # Opens tar file and process it
116
130
  with tarfile.open(fileobj=io.BytesIO(tar_content)) as tar:
117
131
  # Iterate over each member (file or directory) in the tar file
@@ -119,10 +133,24 @@ class OrcidDataSyncReader(BaseReader):
119
133
  # Extract the file
120
134
  extracted_file = tar.extractfile(member)
121
135
  if extracted_file:
136
+ current_app.logger.info(f"[ORCID Reader] Processing lambda file...")
122
137
  # Process the file and get the ORCiDs to sync
123
- orcids_to_sync.extend(self._process_lambda_file(extracted_file))
138
+ orcids_to_sync = set(self._process_lambda_file(extracted_file))
139
+
140
+ # Close the file explicitly after processing
141
+ extracted_file.close()
142
+
143
+ # Process ORCIDs in smaller batches
144
+ for orcid_batch in self._chunked_iter(
145
+ orcids_to_sync, batch_size=100
146
+ ):
147
+ yield from self._iter(orcid_batch)
124
148
 
125
- yield from self._iter(orcids_to_sync)
149
+ def _chunked_iter(self, iterable, batch_size):
150
+ """Yield successive chunks of a given size."""
151
+ it = iter(iterable)
152
+ while chunk := list(islice(it, batch_size)):
153
+ yield chunk
126
154
 
127
155
 
128
156
  class OrcidHTTPReader(SimpleHTTPReader):
@@ -139,24 +167,75 @@ class OrcidHTTPReader(SimpleHTTPReader):
139
167
 
140
168
 
141
169
  DEFAULT_NAMES_EXCLUDE_REGEX = r"[\p{P}\p{S}\p{Nd}\p{No}\p{Emoji}--,.()\-']"
142
- """Regex to filter out names with punctuations, symbols, decimal numbers and emojis."""
170
+ """Regex to filter out names with punctuation, symbols, numbers and emojis."""
171
+
172
+
173
+ class OrcidOrgToAffiliationMapper:
174
+ """Default ORCiD Org ID to affiliation ID mapper."""
175
+
176
+ def __init__(self, org_ids_mapping=None, org_ids_mapping_file=None):
177
+ """Constructor."""
178
+ self._org_ids_mapping = org_ids_mapping
179
+ self._org_ids_mapping_file = org_ids_mapping_file
180
+
181
+ @cached_property
182
+ def org_ids_mapping(self):
183
+ """Mapping of ORCiD org IDs to affiliation IDs."""
184
+ org_ids_mapping_file = self._org_ids_mapping_file or current_app.config.get(
185
+ "VOCABULARIES_ORCID_ORG_IDS_MAPPING_PATH"
186
+ )
187
+ if org_ids_mapping_file:
188
+ org_ids_mapping_file = Path(org_ids_mapping_file)
189
+ # If the path is relative, prepend the instance path
190
+ if not org_ids_mapping_file.is_absolute():
191
+ org_ids_mapping_file = (
192
+ Path(current_app.instance_path) / org_ids_mapping_file
193
+ )
194
+ with open(org_ids_mapping_file) as fin:
195
+ result = {}
196
+ reader = csv.reader(fin)
197
+
198
+ # Check if the first row is a header
199
+ org_scheme, org_id, aff_id = next(reader)
200
+ if org_scheme.lower() != "org_scheme":
201
+ result[(org_scheme, org_id)] = aff_id
202
+
203
+ for org_scheme, org_id, aff_id in reader:
204
+ result[(org_scheme, org_id)] = aff_id
205
+
206
+ return result
207
+
208
+ return self._org_ids_mapping or {}
209
+
210
+ def __call__(self, org_scheme, org_id):
211
+ """Map an ORCiD org ID to an affiliation ID."""
212
+ # By default we know that ROR IDs are linkable
213
+ if org_scheme == "ROR":
214
+ return org_id.split("/")[-1]
215
+ # Otherwise see if we have a mapping from other schemes to an affiliation ID
216
+ return self.org_ids_mapping.get((org_scheme, org_id))
143
217
 
144
218
 
145
219
  class OrcidTransformer(BaseTransformer):
146
220
  """Transforms an ORCiD record into a names record."""
147
221
 
148
222
  def __init__(
149
- self, *args, names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX, **kwargs
223
+ self,
224
+ *args,
225
+ names_exclude_regex=DEFAULT_NAMES_EXCLUDE_REGEX,
226
+ org_id_to_affiliation_id_func=None,
227
+ **kwargs,
150
228
  ) -> None:
151
229
  """Constructor."""
152
230
  self._names_exclude_regex = names_exclude_regex
231
+ self._org_id_to_affiliation_id_func = (
232
+ org_id_to_affiliation_id_func or OrcidOrgToAffiliationMapper()
233
+ )
153
234
  super().__init__()
154
235
 
155
- def _is_valid_name(self, name):
156
- """Check whether the name passes the regex."""
157
- if not self._names_exclude_regex:
158
- return True
159
- return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
236
+ def org_id_to_affiliation_id(self, org_scheme, org_id):
237
+ """Convert and ORCiD org ID to a linkable affiliation ID."""
238
+ return self._org_id_to_affiliation_id_func(org_scheme, org_id)
160
239
 
161
240
  def apply(self, stream_entry, **kwargs):
162
241
  """Applies the transformation to the stream entry."""
@@ -166,42 +245,88 @@ class OrcidTransformer(BaseTransformer):
166
245
 
167
246
  name = person.get("name")
168
247
  if name is None:
169
- raise TransformerError(f"Name not found in ORCiD entry.")
248
+ raise TransformerError("Name not found in ORCiD entry.")
170
249
  if name.get("family-name") is None:
171
- raise TransformerError(f"Family name not found in ORCiD entry.")
250
+ raise TransformerError("Family name not found in ORCiD entry.")
172
251
 
173
252
  if not self._is_valid_name(name["given-names"] + name["family-name"]):
174
- raise TransformerError(f"Invalid characters in name.")
253
+ raise TransformerError("Invalid characters in name.")
175
254
 
176
255
  entry = {
177
256
  "id": orcid_id,
178
257
  "given_name": name.get("given-names"),
179
258
  "family_name": name.get("family-name"),
180
259
  "identifiers": [{"scheme": "orcid", "identifier": orcid_id}],
181
- "affiliations": [],
260
+ "affiliations": self._extract_affiliations(record),
182
261
  }
183
262
 
263
+ stream_entry.entry = entry
264
+ return stream_entry
265
+
266
+ def _is_valid_name(self, name):
267
+ """Check whether the name passes the regex."""
268
+ if not self._names_exclude_regex:
269
+ return True
270
+ return not bool(re.search(self._names_exclude_regex, name, re.UNICODE | re.V1))
271
+
272
+ def _extract_affiliations(self, record):
273
+ """Extract affiliations from the ORCiD record."""
274
+ result = []
184
275
  try:
185
- employments = dict_lookup(
186
- record, "activities-summary.employments.affiliation-group"
276
+ employments = (
277
+ record.get("activities-summary", {})
278
+ .get("employments", {})
279
+ .get("affiliation-group", [])
187
280
  )
281
+
282
+ # If there are single values, the XML to dict, doesn't wrap them in a list
188
283
  if isinstance(employments, dict):
189
284
  employments = [employments]
190
- history = set()
285
+
286
+ # Remove the "employment-summary" nesting
287
+ employments = [
288
+ employment.get("employment-summary", {}) for employment in employments
289
+ ]
290
+
191
291
  for employment in employments:
192
- terminated = employment["employment-summary"].get("end-date")
193
- affiliation = dict_lookup(
194
- employment,
195
- "employment-summary.organization.name",
196
- )
197
- if affiliation not in history and not terminated:
198
- history.add(affiliation)
199
- entry["affiliations"].append({"name": affiliation})
292
+ terminated = employment.get("end-date")
293
+ if terminated:
294
+ continue
295
+
296
+ org = employment["organization"]
297
+ aff_id = self._extract_affiliation_id(org)
298
+
299
+ # Skip adding if the ID already exists in result
300
+ if aff_id and any(aff.get("id") == aff_id for aff in result):
301
+ continue
302
+
303
+ # Skip adding if the name exists in result with no ID
304
+ if any(
305
+ aff.get("name") == org["name"] and "id" not in aff for aff in result
306
+ ):
307
+ continue
308
+
309
+ aff = {"name": org["name"]}
310
+ if aff_id:
311
+ aff["id"] = aff_id
312
+
313
+ result.append(aff)
200
314
  except Exception:
201
315
  pass
202
-
203
- stream_entry.entry = entry
204
- return stream_entry
316
+ return result
317
+
318
+ def _extract_affiliation_id(self, org):
319
+ """Extract the affiliation ID from an ORCiD organization."""
320
+ dis_org = org.get("disambiguated-organization")
321
+ if not dis_org:
322
+ return
323
+
324
+ aff_id = None
325
+ org_id = dis_org.get("disambiguated-organization-identifier")
326
+ org_scheme = dis_org.get("disambiguation-source")
327
+ if org_id and org_scheme:
328
+ aff_id = self.org_id_to_affiliation_id(org_scheme, org_id)
329
+ return aff_id
205
330
 
206
331
 
207
332
  class NamesServiceWriter(ServiceWriter):
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """BODC Subjects module."""
@@ -0,0 +1,111 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2024 CERN.
4
+ #
5
+ # Invenio-Vocabularies is free software; you can redistribute it and/or
6
+ # modify it under the terms of the MIT License; see LICENSE file for more
7
+ # details.
8
+
9
+ """BODC subjects datastreams, readers, transformers, and writers."""
10
+
11
+ from invenio_vocabularies.datastreams.errors import TransformerError
12
+ from invenio_vocabularies.datastreams.readers import RDFReader
13
+ from invenio_vocabularies.datastreams.transformers import RDFTransformer
14
+
15
+ from ..config import bodc_puv_file_url
16
+
17
+ # Available with the "rdf" extra
18
+ try:
19
+ import rdflib
20
+ except ImportError:
21
+ rdflib = None
22
+
23
+
24
+ class BODCPUVSubjectsTransformer(RDFTransformer):
25
+ """
26
+ Transformer class to convert BODC-PUV RDF data to a dictionary format.
27
+
28
+ Input:
29
+ - Relevant fields:
30
+ - `skos:notation`: Primary identifier for the concept.
31
+ - `skos:prefLabel`: Preferred labels with language codes.
32
+ - `skos:altLabel`: Alternative labels (optional).
33
+ - `skos:definition`: Definitions of the concept.
34
+ - `owl:deprecated`: Boolean flag indicating if the concept is deprecated.
35
+
36
+ Output:
37
+ - A dictionary with the following structure:
38
+ {
39
+ "id": "SDN:P01::SAGEMSFM", # BODC-specific parameter ID (skos:notation).
40
+ "scheme": "BODC-PUV", # The scheme name indicating this is a BODC Parameter Usage Vocabulary concept.
41
+ "subject": "AMSSedAge", # The alternative label (skos:altLabel), if available, or None.
42
+ "title": {
43
+ "en": "14C age of Foraminiferida" # English preferred label (skos:prefLabel).
44
+ },
45
+ "props": {
46
+ "definitions": "Accelerated mass spectrometry on picked tests", # Definition of subject (skos:definition).
47
+ },
48
+ "identifiers": [
49
+ {
50
+ "scheme": "url", # Type of identifier (URL).
51
+ "identifier": "http://vocab.nerc.ac.uk/collection/P01/current/SAGEMSFM" # URI of the concept.
52
+ }
53
+ ]
54
+ }
55
+ """
56
+
57
+ def _get_subject_data(self, rdf_graph, subject):
58
+ """Fetch all triples for a subject and organize them into a dictionary."""
59
+ data = {}
60
+ for predicate, obj in rdf_graph.predicate_objects(subject=subject):
61
+ predicate_name = str(predicate)
62
+ if predicate_name not in data:
63
+ data[predicate_name] = []
64
+ data[predicate_name].append(obj)
65
+ return data
66
+
67
+ def _transform_entry(self, subject, rdf_graph):
68
+ """Transform an entry to the required dictionary format."""
69
+ labels = self._get_labels(subject, rdf_graph)
70
+ subject_data = self._get_subject_data(rdf_graph, subject)
71
+ deprecated = subject_data.get(str(rdflib.namespace.OWL.deprecated), [False])
72
+ if deprecated and str(deprecated[0]).lower() == "true":
73
+ return None # Skip deprecated subjects
74
+
75
+ notation = subject_data.get(str(self.skos_core.notation), [])
76
+ if notation:
77
+ id = str(notation[0])
78
+ else:
79
+ raise TransformerError(f"No id found for: {subject}")
80
+
81
+ alt_labels = [obj for obj in subject_data.get(str(self.skos_core.altLabel), [])]
82
+ subject_text = str(alt_labels[0]) if alt_labels else ""
83
+ definition = str(subject_data.get(str(self.skos_core.definition), [None])[0])
84
+
85
+ return {
86
+ "id": id,
87
+ "scheme": "BODC-PUV",
88
+ "subject": subject_text,
89
+ "title": labels,
90
+ "props": {"definition": definition} if definition else {},
91
+ "identifiers": self._get_identifiers(subject),
92
+ }
93
+
94
+
95
+ # Configuration for datastream
96
+
97
+ VOCABULARIES_DATASTREAM_TRANSFORMERS = {"bodc-transformer": BODCPUVSubjectsTransformer}
98
+
99
+ DATASTREAM_CONFIG = {
100
+ "readers": [
101
+ {
102
+ "type": "http",
103
+ "args": {
104
+ "origin": bodc_puv_file_url,
105
+ },
106
+ },
107
+ {"type": "rdf"},
108
+ ],
109
+ "transformers": [{"type": "bodc-transformer"}],
110
+ "writers": [{"args": {"writer": {"type": "subjects-service"}}, "type": "async"}],
111
+ }
@@ -15,10 +15,12 @@ from invenio_i18n import get_locale
15
15
  from invenio_i18n import lazy_gettext as _
16
16
  from invenio_records_resources.services import SearchOptions
17
17
  from invenio_records_resources.services.records.components import DataComponent
18
+ from invenio_records_resources.services.records.queryparser import (
19
+ CompositeSuggestQueryParser,
20
+ )
18
21
  from werkzeug.local import LocalProxy
19
22
 
20
23
  from ...services.components import PIDComponent
21
- from ...services.querystr import FilteredSuggestQueryParser
22
24
 
23
25
  subject_schemes = LocalProxy(
24
26
  lambda: current_app.config["VOCABULARIES_SUBJECTS_SCHEMES"]
@@ -34,13 +36,16 @@ euroscivoc_file_url = LocalProxy(
34
36
  lambda: current_app.config["VOCABULARIES_SUBJECTS_EUROSCIVOC_FILE_URL"]
35
37
  )
36
38
 
39
+ bodc_puv_file_url = LocalProxy(
40
+ lambda: current_app.config["VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL"]
41
+ )
42
+
37
43
 
38
44
  class SubjectsSearchOptions(SearchOptions):
39
45
  """Search options."""
40
46
 
41
- suggest_parser_cls = FilteredSuggestQueryParser.factory(
42
- filter_field="scheme",
43
- fields=[ # suggest fields
47
+ suggest_parser_cls = CompositeSuggestQueryParser.factory(
48
+ fields=[
44
49
  "subject^100",
45
50
  localized_title,
46
51
  "synonyms^20",
@@ -12,6 +12,7 @@ from invenio_access.permissions import system_identity
12
12
  from invenio_i18n import lazy_gettext as _
13
13
 
14
14
  from ...datastreams.writers import ServiceWriter
15
+ from .bodc import datastreams as bodc_datastreams
15
16
  from .euroscivoc import datastreams as euroscivoc_datastreams
16
17
  from .gemet import datastreams as gemet_datastreams
17
18
  from .mesh import datastreams as mesh_datastreams
@@ -32,8 +33,6 @@ class SubjectsServiceWriter(ServiceWriter):
32
33
 
33
34
  VOCABULARIES_DATASTREAM_READERS = {
34
35
  **mesh_datastreams.VOCABULARIES_DATASTREAM_READERS,
35
- **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_READERS,
36
- **gemet_datastreams.VOCABULARIES_DATASTREAM_READERS,
37
36
  }
38
37
  """Subjects Data Streams readers."""
39
38
 
@@ -41,14 +40,13 @@ VOCABULARIES_DATASTREAM_TRANSFORMERS = {
41
40
  **mesh_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
42
41
  **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
43
42
  **gemet_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
43
+ **bodc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
44
44
  }
45
45
  """Subjects Data Streams transformers."""
46
46
 
47
47
  VOCABULARIES_DATASTREAM_WRITERS = {
48
48
  "subjects-service": SubjectsServiceWriter,
49
49
  **mesh_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
50
- **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
51
- **gemet_datastreams.VOCABULARIES_DATASTREAM_WRITERS,
52
50
  }
53
51
  """Subjects Data Streams writers."""
54
52
 
@@ -14,7 +14,36 @@ from ..config import euroscivoc_file_url
14
14
 
15
15
 
16
16
  class EuroSciVocSubjectsTransformer(RDFTransformer):
17
- """Transformer class to convert EuroSciVoc RDF data to a dictionary format."""
17
+ """
18
+ Transformer class to convert EuroSciVoc RDF data to a dictionary format.
19
+
20
+ Input:
21
+ - Relevant fields:
22
+ - `skos:notation`: Primary identifier for the concept.
23
+ - `skos:prefLabel`: Preferred labels with language codes.
24
+ - `skos:altLabel`: Alternative labels.
25
+ - `skos:broader`: Broader concepts that this concept belongs to.
26
+
27
+ Output:
28
+ {
29
+ "id": "euroscivoc:1717", # EuroSciVoc-specific concept ID (skos:notation).
30
+ "scheme": "EuroSciVoc", # The scheme name indicating this is a EuroSciVoc concept.
31
+ "subject": "Satellite radio", # The primary subject label (first preferred label in English, skos:prefLabel).
32
+ "title": {
33
+ "it": "Radio satellitare", # Italian preferred label (skos:prefLabel).
34
+ "en": "Satellite radio", # English preferred label (skos:prefLabel).
35
+ },
36
+ "props": {
37
+ "parents": "euroscivoc:1225", # The broader concept (skos:broader), identified by its EuroSciVoc Concept ID.
38
+ },
39
+ "identifiers": [
40
+ {
41
+ "scheme": "url", # Type of identifier (URL).
42
+ "identifier": "http://data.europa.eu/8mn/euroscivoc/87ff3577-527a-4a40-9c76-2f9d3075e2ba", # URI of the concept (rdf:about).
43
+ }
44
+ ],
45
+ }
46
+ """
18
47
 
19
48
  def _get_notation(self, subject, rdf_graph):
20
49
  """Extract the numeric notation for a subject."""
@@ -38,7 +67,6 @@ class EuroSciVocSubjectsTransformer(RDFTransformer):
38
67
  for n in reversed(self._find_parents(subject, rdf_graph))
39
68
  if n
40
69
  )
41
- identifiers = [{"scheme": "url", "identifier": str(subject)}]
42
70
 
43
71
  return {
44
72
  "id": id,
@@ -46,13 +74,11 @@ class EuroSciVocSubjectsTransformer(RDFTransformer):
46
74
  "subject": labels.get("en", "").capitalize(),
47
75
  "title": labels,
48
76
  "props": {"parents": parents} if parents else {},
49
- "identifiers": identifiers,
77
+ "identifiers": self._get_identifiers(subject),
50
78
  }
51
79
 
52
80
 
53
- # Configuration for datastream transformers, and writers
54
- VOCABULARIES_DATASTREAM_READERS = {}
55
- VOCABULARIES_DATASTREAM_WRITERS = {}
81
+ # Configuration for datastream
56
82
 
57
83
  VOCABULARIES_DATASTREAM_TRANSFORMERS = {
58
84
  "euroscivoc-transformer": EuroSciVocSubjectsTransformer
@@ -71,9 +97,5 @@ DATASTREAM_CONFIG = {
71
97
  },
72
98
  ],
73
99
  "transformers": [{"type": "euroscivoc-transformer"}],
74
- "writers": [
75
- {
76
- "type": "subjects-service",
77
- }
78
- ],
100
+ "writers": [{"args": {"writer": {"type": "subjects-service"}}, "type": "async"}],
79
101
  }
@@ -20,7 +20,40 @@ except ImportError:
20
20
 
21
21
 
22
22
  class GEMETSubjectsTransformer(RDFTransformer):
23
- """Transformer class to convert GEMET RDF data to a dictionary format."""
23
+ """
24
+ Transformer class to convert GEMET RDF data to a dictionary format.
25
+
26
+ Input:
27
+ - Relevant fields:
28
+ - `skos:prefLabel`: Preferred labels with language codes.
29
+ - `skos:broader`: References to broader concepts (parent concepts).
30
+ - `skos:memberOf`: References to groups or themes the concept belongs to.
31
+
32
+ Output:
33
+ - A dictionary with the following structure:
34
+ {
35
+ "id": "gemet:concept/10008", # GEMET-specific concept ID (skos:Concept).
36
+ "scheme": "GEMET", # The scheme name indicating this is a GEMET concept.
37
+ "subject": "Consumer product", # The subject label (first preferred label in English, skos:prefLabel).
38
+ "title": {
39
+ "en": "Consumer product", # English label for the concept (skos:prefLabel).
40
+ "ar": "منتج استهلاكي" # Arabic label for the concept (skos:prefLabel).
41
+ },
42
+ "props": {
43
+ "parents": "gemet:concept/6660", # The parent concept (skos:broader), identified by its GEMET Concept ID.
44
+ "groups": ["http://www.eionet.europa.eu/gemet/group/10112"], # Group the concept belongs to (skos:memberOf)(skos:prefLabel).
45
+ "themes": [
46
+ "http://www.eionet.europa.eu/gemet/theme/27", # Theme the concept belongs to (skos:memberOf)(rdfs:label).
47
+ ]
48
+ },
49
+ "identifiers": [
50
+ {
51
+ "scheme": "url", # Type of identifier (URL).
52
+ "identifier": "http://www.eionet.europa.eu/gemet/concept/10008" # URI of the concept (rdf:about).
53
+ }
54
+ ]
55
+ }
56
+ """
24
57
 
25
58
  def _get_parent_notation(self, broader, rdf_graph):
26
59
  """Extract parent notation from GEMET URI."""
@@ -83,13 +116,11 @@ class GEMETSubjectsTransformer(RDFTransformer):
83
116
  "subject": labels.get("en", "").capitalize(),
84
117
  "title": labels,
85
118
  "props": props,
86
- "identifiers": identifiers,
119
+ "identifiers": self._get_identifiers(subject),
87
120
  }
88
121
 
89
122
 
90
- # Configuration for datastream transformers, and writers
91
- VOCABULARIES_DATASTREAM_READERS = {}
92
- VOCABULARIES_DATASTREAM_WRITERS = {}
123
+ # Configuration for datastream
93
124
 
94
125
  VOCABULARIES_DATASTREAM_TRANSFORMERS = {"gemet-transformer": GEMETSubjectsTransformer}
95
126
 
@@ -48,7 +48,16 @@ class StreamEntry:
48
48
  class DataStream:
49
49
  """Data stream."""
50
50
 
51
- def __init__(self, readers, writers, transformers=None, *args, **kwargs):
51
+ def __init__(
52
+ self,
53
+ readers,
54
+ writers,
55
+ transformers=None,
56
+ batch_size=100,
57
+ write_many=False,
58
+ *args,
59
+ **kwargs,
60
+ ):
52
61
  """Constructor.
53
62
 
54
63
  :param readers: an ordered list of readers.
@@ -58,12 +67,14 @@ class DataStream:
58
67
  self._readers = readers
59
68
  self._transformers = transformers
60
69
  self._writers = writers
70
+ self.batch_size = batch_size
71
+ self.write_many = write_many
61
72
 
62
73
  def filter(self, stream_entry, *args, **kwargs):
63
74
  """Checks if an stream_entry should be filtered out (skipped)."""
64
75
  return False
65
76
 
66
- def process_batch(self, batch, write_many=False):
77
+ def process_batch(self, batch):
67
78
  """Process a batch of entries."""
68
79
  transformed_entries = []
69
80
  for stream_entry in batch:
@@ -79,12 +90,12 @@ class DataStream:
79
90
  else:
80
91
  transformed_entries.append(transformed_entry)
81
92
  if transformed_entries:
82
- if write_many:
93
+ if self.write_many:
83
94
  yield from self.batch_write(transformed_entries)
84
95
  else:
85
96
  yield from (self.write(entry) for entry in transformed_entries)
86
97
 
87
- def process(self, batch_size=100, write_many=False, *args, **kwargs):
98
+ def process(self, *args, **kwargs):
88
99
  """Iterates over the entries.
89
100
 
90
101
  Uses the reader to get the raw entries and transforms them.
@@ -95,13 +106,13 @@ class DataStream:
95
106
  batch = []
96
107
  for stream_entry in self.read():
97
108
  batch.append(stream_entry)
98
- if len(batch) >= batch_size:
99
- yield from self.process_batch(batch, write_many=write_many)
109
+ if len(batch) >= self.batch_size:
110
+ yield from self.process_batch(batch)
100
111
  batch = []
101
112
 
102
113
  # Process any remaining entries in the last batch
103
114
  if batch:
104
- yield from self.process_batch(batch, write_many=write_many)
115
+ yield from self.process_batch(batch)
105
116
 
106
117
  def read(self):
107
118
  """Recursively read the entries."""
@@ -81,4 +81,6 @@ class DataStreamFactory:
81
81
  for t_conf in transformers_config:
82
82
  transformers.append(TransformerFactory.create(t_conf))
83
83
 
84
- return DataStream(readers=readers, writers=writers, transformers=transformers)
84
+ return DataStream(
85
+ readers=readers, writers=writers, transformers=transformers, **kwargs
86
+ )
@@ -9,6 +9,7 @@
9
9
  """Transformers module."""
10
10
 
11
11
  from abc import ABC, abstractmethod
12
+ from urllib.parse import urlparse
12
13
 
13
14
  from lxml import etree
14
15
 
@@ -76,6 +77,17 @@ class RDFTransformer(BaseTransformer):
76
77
  """Get the SKOS core namespace."""
77
78
  return rdflib.Namespace("http://www.w3.org/2004/02/skos/core#")
78
79
 
80
+ def _validate_subject_url(self, subject):
81
+ """Check if the subject is a valid URL."""
82
+ parsed = urlparse(str(subject))
83
+ return bool(parsed.netloc and parsed.scheme)
84
+
85
+ def _get_identifiers(self, subject):
86
+ """Generate identifiers field for a valid subject URL."""
87
+ if self._validate_subject_url(subject):
88
+ return [{"scheme": "url", "identifier": str(subject)}]
89
+ return []
90
+
79
91
  def _get_labels(self, subject, rdf_graph):
80
92
  """Extract labels (prefLabel or altLabel) for a subject."""
81
93
  labels = {
@@ -12,6 +12,7 @@ from abc import ABC, abstractmethod
12
12
  from pathlib import Path
13
13
 
14
14
  import yaml
15
+ from flask import current_app
15
16
  from invenio_access.permissions import system_identity
16
17
  from invenio_pidstore.errors import PIDAlreadyExists, PIDDoesNotExistError
17
18
  from invenio_records.systemfields.relations.errors import InvalidRelationValue
@@ -120,11 +121,14 @@ class ServiceWriter(BaseWriter):
120
121
 
121
122
  def write_many(self, stream_entries, *args, **kwargs):
122
123
  """Writes the input entries using a given service."""
124
+ current_app.logger.info(f"Writing {len(stream_entries)} entries")
123
125
  entries = [entry.entry for entry in stream_entries]
124
126
  entries_with_id = [(self._entry_id(entry), entry) for entry in entries]
125
- results = self._service.create_or_update_many(self._identity, entries_with_id)
127
+ result_list = self._service.create_or_update_many(
128
+ self._identity, entries_with_id
129
+ )
126
130
  stream_entries_processed = []
127
- for entry, result in zip(entries, results):
131
+ for entry, result in zip(entries, result_list.results):
128
132
  processed_stream_entry = StreamEntry(
129
133
  entry=entry,
130
134
  record=result.record,
@@ -28,7 +28,12 @@ from .contrib.awards.datastreams import (
28
28
  )
29
29
  from .contrib.funders.datastreams import DATASTREAM_CONFIG as funders_ds_config
30
30
  from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
31
+ from .contrib.subjects.bodc.datastreams import DATASTREAM_CONFIG as bodc_ds_config
31
32
  from .contrib.subjects.datastreams import DATASTREAM_CONFIG as subjects_ds_config
33
+ from .contrib.subjects.euroscivoc.datastreams import (
34
+ DATASTREAM_CONFIG as euroscivoc_ds_config,
35
+ )
36
+ from .contrib.subjects.gemet.datastreams import DATASTREAM_CONFIG as gemet_ds_config
32
37
 
33
38
 
34
39
  class VocabularyConfig:
@@ -137,6 +142,39 @@ class AffiliationsEDMOVocabularyConfig(VocabularyConfig):
137
142
  raise NotImplementedError("Service not implemented for EDMO Affiliations")
138
143
 
139
144
 
145
+ class SubjectsEuroSciVocVocabularyConfig(VocabularyConfig):
146
+ """EuroSciVoc Subjects Vocabulary Config."""
147
+
148
+ config = euroscivoc_ds_config
149
+ vocabulary_name = "subjects:euroscivoc"
150
+
151
+ def get_service(self):
152
+ """Get the service for the vocabulary."""
153
+ raise NotImplementedError("Service not implemented for EuroSciVoc Subjects")
154
+
155
+
156
+ class SubjectsGEMETVocabularyConfig(VocabularyConfig):
157
+ """GEMET Subjects Vocabulary Config."""
158
+
159
+ config = gemet_ds_config
160
+ vocabulary_name = "subjects:gemet"
161
+
162
+ def get_service(self):
163
+ """Get the service for the vocabulary."""
164
+ raise NotImplementedError("Service not implemented for GEMET Subjects")
165
+
166
+
167
+ class SubjectsBODCVocabularyConfig(VocabularyConfig):
168
+ """BODC Subjects Vocabulary Config."""
169
+
170
+ config = bodc_ds_config
171
+ vocabulary_name = "subjects:bodc-puv"
172
+
173
+ def get_service(self):
174
+ """Get the service for the vocabulary."""
175
+ raise NotImplementedError("Service not implemented for BODC Subjects")
176
+
177
+
140
178
  def get_vocabulary_config(vocabulary):
141
179
  """Factory function to get the appropriate Vocabulary Config."""
142
180
  vocab_config = {
@@ -148,5 +186,8 @@ def get_vocabulary_config(vocabulary):
148
186
  "affiliations:openaire": AffiliationsOpenAIREVocabularyConfig,
149
187
  "affiliations:edmo": AffiliationsEDMOVocabularyConfig,
150
188
  "subjects": SubjectsVocabularyConfig,
189
+ "subjects:gemet": SubjectsGEMETVocabularyConfig,
190
+ "subjects:bodc": SubjectsBODCVocabularyConfig,
191
+ "subjects:euroscivoc": SubjectsEuroSciVocVocabularyConfig,
151
192
  }
152
193
  return vocab_config.get(vocabulary, VocabularyConfig)()
@@ -28,6 +28,8 @@ class VocabularyFixture:
28
28
  readers_config=config["readers"],
29
29
  transformers_config=config.get("transformers"),
30
30
  writers_config=config["writers"],
31
+ batch_size=config.get("batch_size", 1000),
32
+ write_many=config.get("write_many", False),
31
33
  )
32
34
 
33
35
  errors = []
@@ -8,6 +8,7 @@
8
8
 
9
9
  """Querystring parsing."""
10
10
 
11
+ import warnings
11
12
  from functools import partial
12
13
 
13
14
  from invenio_records_resources.services.records.params import SuggestQueryParser
@@ -20,6 +21,10 @@ class FilteredSuggestQueryParser(SuggestQueryParser):
20
21
  @classmethod
21
22
  def factory(cls, filter_field=None, **extra_params):
22
23
  """Create a prepared instance of the query parser."""
24
+ warnings.warn(
25
+ "FilteredSuggestQueryParser is deprecated, use SuggestQueryParser or CompositeSuggestQueryParser instead",
26
+ DeprecationWarning,
27
+ )
23
28
  return partial(cls, filter_field=filter_field, extra_params=extra_params)
24
29
 
25
30
  def __init__(self, identity=None, filter_field=None, extra_params=None):
@@ -20,6 +20,8 @@ def process_datastream(config):
20
20
  readers_config=config["readers"],
21
21
  transformers_config=config.get("transformers"),
22
22
  writers_config=config["writers"],
23
+ batch_size=config.get("batch_size", 1000),
24
+ write_many=config.get("write_many", False),
23
25
  )
24
26
 
25
27
  for result in ds.process():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: invenio-vocabularies
3
- Version: 6.7.0
3
+ Version: 6.9.0
4
4
  Summary: Invenio module for managing vocabularies.
5
5
  Home-page: https://github.com/inveniosoftware/invenio-vocabularies
6
6
  Author: CERN
@@ -88,6 +88,17 @@ https://invenio-vocabularies.readthedocs.io/
88
88
  Changes
89
89
  =======
90
90
 
91
+ Version v6.9.0 (released 2024-12-09)
92
+
93
+ - schema: added identifiers in affiliations relation
94
+
95
+ Version v6.8.0 (released 2024-12-09)
96
+
97
+ - names: extract affiliation identifiers from employments
98
+ - names: optimize memory usage on ORCID sync
99
+ - subjects: improve search with CompositeSuggestQueryParser
100
+ - subjects: added datastream for bodc
101
+
91
102
  Version v6.7.0 (released 2024-11-27)
92
103
 
93
104
  - contrib: improve search accuracy for names, funders, affiliations
@@ -1,9 +1,9 @@
1
- invenio_vocabularies/__init__.py,sha256=IvdaI7jMnH_uHjMrYwtACXOE2Nw-H-a8HQbkLpurCl0,377
2
- invenio_vocabularies/cli.py,sha256=S3lBsLxsSYa83sCDaGZr5mP7TwPvmmwxzzbB13h8VBI,5856
3
- invenio_vocabularies/config.py,sha256=NcfDDZ0s1y2V78DcN69itua9wWt3_1-y87wonBl0vts,6340
1
+ invenio_vocabularies/__init__.py,sha256=sQ6j-Dnfro84dUjb1SBr8X7MucAMXh1112UnZK_-tZE,377
2
+ invenio_vocabularies/cli.py,sha256=CpXTTIn2GTpUqNfLEMlRAp3JWst8ZjHVxoGYdhuuv_4,5959
3
+ invenio_vocabularies/config.py,sha256=h9Iied753mmZwZZHe5COMqUYvV-zSQtx763EIkUVb1Q,6973
4
4
  invenio_vocabularies/ext.py,sha256=GujJ4UARd4Fxf4z7zznRk9JAgHamZuYCOdrKU5czg00,5987
5
- invenio_vocabularies/factories.py,sha256=7VHpoF3jQch5O7efytRwFPojz_MpeiXeu-cfw7ajHs4,4949
6
- invenio_vocabularies/fixtures.py,sha256=nNWwH04HFASjfj1oy5kMdcQGKmVjzUuA5wSw-ER1QAg,1585
5
+ invenio_vocabularies/factories.py,sha256=lRHPGPos6GdXf0yAhB1d7iMjVfpOeFDZRL9PPZvuWlY,6408
6
+ invenio_vocabularies/fixtures.py,sha256=iEPkWf_ZjdP2D9r2sLdIlPoR8Rq2m5cnoFwywUGHneg,1696
7
7
  invenio_vocabularies/jobs.py,sha256=0aTukWooBPCvEgvnjJcQAZuMeS2H_m-RGULNIfJ5Gmc,6800
8
8
  invenio_vocabularies/proxies.py,sha256=k7cTUgWfnCoYIuNqAj_VFi1zBN33KNNclRSVnBkObEM,711
9
9
  invenio_vocabularies/views.py,sha256=PNJ5nvc3O7ASwNe56xmqy5YaU9n3UYF3W2JwvtE_kYs,1561
@@ -51,7 +51,7 @@ invenio_vocabularies/contrib/affiliations/datastreams.py,sha256=sMvkt9XOBTV7Q0we
51
51
  invenio_vocabularies/contrib/affiliations/facets.py,sha256=w316MGvtdyTpRCPOpCEmMxxLraRkbFFb1VvLkFlEc9o,1229
52
52
  invenio_vocabularies/contrib/affiliations/models.py,sha256=JUcj-1ydc2Cw2Rsc24JwXE3TFBJ_6fivhUYhGq4rT8A,329
53
53
  invenio_vocabularies/contrib/affiliations/resources.py,sha256=DBEbRxQmp-o-PeZlgFG588Q4sGcruuwIL8L9O-SzCes,435
54
- invenio_vocabularies/contrib/affiliations/schema.py,sha256=O4s6aHcO1w4_aAfGuYLx_eLS6nctd6ktyIuHB6dMKqw,1842
54
+ invenio_vocabularies/contrib/affiliations/schema.py,sha256=geORDYdBIWnv81Txl07qdHhB3U_fo9ObVp7UrSlCLRI,2104
55
55
  invenio_vocabularies/contrib/affiliations/services.py,sha256=KJbv46c2LuQOW3xz7KVLtfZjWR8vhMRPHninlUEhrss,395
56
56
  invenio_vocabularies/contrib/affiliations/jsonschemas/__init__.py,sha256=ILyZ5kejTr0p50macMBPALQCTJSe4KEE3_cgf2p3zV4,252
57
57
  invenio_vocabularies/contrib/affiliations/jsonschemas/affiliations/affiliation-v1.0.0.json,sha256=be-glRNIBtIO87Tcyw8d68OdG4J8-ojjiCj8UJBnckg,1649
@@ -113,7 +113,7 @@ invenio_vocabularies/contrib/funders/mappings/v7/funders/funder-v1.0.0.json,sha2
113
113
  invenio_vocabularies/contrib/names/__init__.py,sha256=DBfsM7JMETZGaV5QmXEwE7zhCaAXvc2SZN6uXnW_V-c,451
114
114
  invenio_vocabularies/contrib/names/api.py,sha256=sEPn_jFX3gyoxgbdEUSIvOoPCUI8pocI6qCZO6mzCgQ,300
115
115
  invenio_vocabularies/contrib/names/config.py,sha256=9sb5novWuQYXg_5Egexn52mjgGd1D_D9UKyQ1fmIuh4,1977
116
- invenio_vocabularies/contrib/names/datastreams.py,sha256=EKatHb0gkvcC9LCYBLIcN5pAgklBY4G43lZR_XE52wY,9505
116
+ invenio_vocabularies/contrib/names/datastreams.py,sha256=mmhtdrda6b4c83dRjxVF5JTqtkt92GSEMHTU6TzQtHw,14570
117
117
  invenio_vocabularies/contrib/names/models.py,sha256=SYdtDDG-y5Wq_d06YhiVO5n8gfxPW_mx-tECsIcv5H8,308
118
118
  invenio_vocabularies/contrib/names/names.py,sha256=_kBJBcPuANgUHlZ8RoVkpfJwzR5qaOQCBIyZusjKoCE,2509
119
119
  invenio_vocabularies/contrib/names/permissions.py,sha256=5xrpYsA3oQUJ5lJpF7wjRAFiW-pM6_yP1k9zllbRwnQ,844
@@ -134,18 +134,20 @@ invenio_vocabularies/contrib/names/mappings/v7/__init__.py,sha256=qLGB8C0kPI3xub
134
134
  invenio_vocabularies/contrib/names/mappings/v7/names/name-v1.0.0.json,sha256=5Ybcq3fUMYx3u1MNKmHh-CWBtATS9MYpdEcwAM8EQ80,1943
135
135
  invenio_vocabularies/contrib/subjects/__init__.py,sha256=GtXZKA6VWG1oA1fUX2Wh92nd-1i7RnnQF6RprGhxkD4,591
136
136
  invenio_vocabularies/contrib/subjects/api.py,sha256=QH8mxoLsa8qjJT1i1Tj6rRnpbH23plo2IMOJ56rnvbU,347
137
- invenio_vocabularies/contrib/subjects/config.py,sha256=VsS1fAlHOceLu0E_ciAE4XrrqpI7cfl-OPScqVPimR8,2064
138
- invenio_vocabularies/contrib/subjects/datastreams.py,sha256=V-g4d-gh9MUhAVG-3lrbd-IO6TNWlsGZedCivT_6aFg,2015
137
+ invenio_vocabularies/contrib/subjects/config.py,sha256=6svsCjiptqWB5x3NlG6wDH_dehdQYRTPKDCkNc9MtNA,2169
138
+ invenio_vocabularies/contrib/subjects/datastreams.py,sha256=YRdUP0saks5LGVuFjDhhyJdsiYzkysLTBja32I4x9eU,1888
139
139
  invenio_vocabularies/contrib/subjects/facets.py,sha256=qQ7_rppFBzsmrlZu4-MvOIdUcjeOmDA9gOHAcs0lWwI,695
140
140
  invenio_vocabularies/contrib/subjects/models.py,sha256=8XgbVRxDDvhWPjMWsoCriNlOKdmV_113a14yLRtlvM4,363
141
141
  invenio_vocabularies/contrib/subjects/resources.py,sha256=0KRfUMizwgIziZybk4HnIjiSsXbrCv_XmguNPwnxoo8,506
142
142
  invenio_vocabularies/contrib/subjects/schema.py,sha256=VOW8a9Ob5M-mKrict2bApdFyTpHBwCTJZSxrm93Puv0,3516
143
143
  invenio_vocabularies/contrib/subjects/services.py,sha256=s1U6HMmpjuz7rrgR0DtT9C28TC6sZEeDTsa4Jh1TXQk,864
144
144
  invenio_vocabularies/contrib/subjects/subjects.py,sha256=NwZycExLyV8l7ikGStH4GOecVuDSxFT70KoNv6qC78I,1877
145
+ invenio_vocabularies/contrib/subjects/bodc/__init__.py,sha256=RlJVmWpbRgDcpx61ITjco3IqHkwZwIypeo2Dt2_AWRc,241
146
+ invenio_vocabularies/contrib/subjects/bodc/datastreams.py,sha256=RgFJTrr-eMyKrS2MuGK4QHhOkPseMwpmsKKpEqE_tgs,4220
145
147
  invenio_vocabularies/contrib/subjects/euroscivoc/__init__.py,sha256=e5L9E4l5JHqVzijAX8tn2DIa2n01vJ5wOAZdN62RnIo,247
146
- invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py,sha256=eyEHbH-cTbtf3kKHv4ehMUMngy48H0CIwsjU-qQKK6I,2367
148
+ invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py,sha256=Vs4mpIn321KZ94lzTxpYnQTATle1QdKg0yegmDMptw4,3565
147
149
  invenio_vocabularies/contrib/subjects/gemet/__init__.py,sha256=OlRWH2gumZZ1Djc_N3ZGPHyt2wOcIwlDDYO6uOfaZfI,242
148
- invenio_vocabularies/contrib/subjects/gemet/datastreams.py,sha256=YaCt1698-jQRn4-jj-qfcluLCR9nSHAID9bbHWVOyUk,3566
150
+ invenio_vocabularies/contrib/subjects/gemet/datastreams.py,sha256=OZaKnT6cw3cjNtB_TxEBtVwWWf1Wrm-x9h71YXMAmBk,5203
149
151
  invenio_vocabularies/contrib/subjects/jsonschemas/__init__.py,sha256=WowVUST1JoEDS3-xeHhCJvIgC9nzMkFs8XRks9zgzaM,292
150
152
  invenio_vocabularies/contrib/subjects/jsonschemas/subjects/subject-v1.0.0.json,sha256=O1IsPWrVeuEiMBKtADcRByFNmd1soABgODOnauEJBoI,1868
151
153
  invenio_vocabularies/contrib/subjects/mappings/__init__.py,sha256=Qk-yj1ENsTmijO8ImWuDYGzXi6QQ2VjP4DbjrpRfDk8,243
@@ -158,13 +160,13 @@ invenio_vocabularies/contrib/subjects/mappings/v7/subjects/subject-v1.0.0.json,s
158
160
  invenio_vocabularies/contrib/subjects/mesh/__init__.py,sha256=P44hmgVNNTN5O_EmWgaeYJ91yqkGNoeKYo0wfif_wE4,241
159
161
  invenio_vocabularies/contrib/subjects/mesh/datastreams.py,sha256=6W6bgQ7P_31kf3enkAqCBTFBqgrQ2BlV625vn0N9ibQ,1544
160
162
  invenio_vocabularies/datastreams/__init__.py,sha256=VPefh6k4Q3eYxKIW8I5zXUGucntp7VHxaOR5Vhgkfmg,412
161
- invenio_vocabularies/datastreams/datastreams.py,sha256=SpI6ivmf2LIDS2JSkxoM2v5kRmrPoRDtAG5fuzZO4oQ,6078
163
+ invenio_vocabularies/datastreams/datastreams.py,sha256=mAi_xUDmDWpc3NyhU1TMOhqVRbwYu_meJ9UY6-wgBKQ,6169
162
164
  invenio_vocabularies/datastreams/errors.py,sha256=IDUZ3gNtYGrhcOgApHCms1gNNJTyJzoMPmG5JtIeYNU,678
163
- invenio_vocabularies/datastreams/factories.py,sha256=H8a2gAy7KNImtdCdtqpVKC5gIvE3ON6U1Wn1_zaMlQ4,2181
165
+ invenio_vocabularies/datastreams/factories.py,sha256=kuuN4Zt7Xw58rwf0M03djqcdZOZRWgJdLK16-HmID24,2213
164
166
  invenio_vocabularies/datastreams/readers.py,sha256=DUuV-D2PLio3nVR0J-2knASq8rB-H14QBr3DoRL6UgA,14352
165
167
  invenio_vocabularies/datastreams/tasks.py,sha256=0fuH_PRt9Ncv6WHM4pkYmfheRVGDKkERZiMPvgV4bZU,1129
166
- invenio_vocabularies/datastreams/transformers.py,sha256=czWKO-h9CB1QoO-mL3fhQv_YHjX-bkAVyhgph-GQFa8,3667
167
- invenio_vocabularies/datastreams/writers.py,sha256=FMTQdGavRgq6Qk21UcifYeDVH3jUA3rmwdyICr1ywxU,6719
168
+ invenio_vocabularies/datastreams/transformers.py,sha256=PJFbmRSj3dpJ95NzONAIns5ksztshd99JOp_FLQAlJM,4133
169
+ invenio_vocabularies/datastreams/writers.py,sha256=VIXx9klJaCEdscaKqi2zO959cc157YUGjVYdeTfhTTI,6861
168
170
  invenio_vocabularies/datastreams/xml.py,sha256=HFa-lfxj7kFrr2IjeN1jxSLDfcvpBwO9nZLZF2-BryE,997
169
171
  invenio_vocabularies/records/__init__.py,sha256=Uj7O6fYdAtLOkLXUGSAYPADBB7aqP4yVs9b6OAjA158,243
170
172
  invenio_vocabularies/records/api.py,sha256=Lynt6Sz4BVN1orh0zgJ5ljhnUobEtcq8c22PmSeUo2U,1494
@@ -194,11 +196,11 @@ invenio_vocabularies/services/config.py,sha256=A9_r2vErcfo3Xt6fC4YVobHXdd64_YyI7
194
196
  invenio_vocabularies/services/facets.py,sha256=qvdHoGSJJr90dZHSVe0-hlO1r0LtTnFVSjrt9PNuNAg,3872
195
197
  invenio_vocabularies/services/generators.py,sha256=jcXwb9Hiyek4o-cQ1G2osVgbTBKDbd-5siJMBOWE018,1116
196
198
  invenio_vocabularies/services/permissions.py,sha256=83rNOwCuggdJji3VtWTQgytTrhfiWqASCpvI75DxEus,960
197
- invenio_vocabularies/services/querystr.py,sha256=X3JHVF9B0O0iLWrnW3ok_bf_8jA-Cs_oAcYYkGOm3Uw,1829
199
+ invenio_vocabularies/services/querystr.py,sha256=OrNUR_QAcQ_T-EiL3H1Jvzz9gK2ZB5FicsG0fOipSro,2029
198
200
  invenio_vocabularies/services/results.py,sha256=6LZIpzWSbt9wpRNWgjA1uIM4RFooOYTkHcp5-PnIJdU,3767
199
201
  invenio_vocabularies/services/schema.py,sha256=mwIBFylpQlWw1M6h_axc-z4Yd7X3Z1S0PxJOlZGpfrQ,4634
200
202
  invenio_vocabularies/services/service.py,sha256=9QQDsG1WShCpBVFze-Dnq-iC2BwNX_0-qzfzrpImJo8,6469
201
- invenio_vocabularies/services/tasks.py,sha256=AH0XifkOypsEdh8LyjmlHnPLQK5qqUJC8cNVWGkbqks,788
203
+ invenio_vocabularies/services/tasks.py,sha256=xKEymph1M-wFjPLeCGkqvnuYdPMMHgxhCCdC0j44Pi4,891
202
204
  invenio_vocabularies/services/custom_fields/__init__.py,sha256=QgvSsn-S1xLzbZ57pjjGTt5oI3HqzXHVjwGTtuPgzN8,421
203
205
  invenio_vocabularies/services/custom_fields/subject.py,sha256=ZM-ZkaxoouF9lL62smOtLxsjQQZwiQs0jG3qGruP6nY,2231
204
206
  invenio_vocabularies/services/custom_fields/vocabulary.py,sha256=oQwI8Aoi2Nr9k3eWKnde5H7RXc7qdlATSeI6coy8UR0,3020
@@ -300,10 +302,10 @@ invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.mo,sha256=g1I5aNO8r
300
302
  invenio_vocabularies/translations/zh_CN/LC_MESSAGES/messages.po,sha256=vg8qC8ofpAdJ3mQz7mWM1ylKDpiNWXFs7rlMdSPkgKk,4629
301
303
  invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.mo,sha256=cqSm8NtMAwrP9O6qbmtkDtRT1e9D93qpsJN5X9_PPVw,600
302
304
  invenio_vocabularies/translations/zh_TW/LC_MESSAGES/messages.po,sha256=9ACePz_EpB-LfcIJajZ2kp8Q04tcdrQLOtug162ZUss,4115
303
- invenio_vocabularies-6.7.0.dist-info/AUTHORS.rst,sha256=8d0p_WWE1r9DavvzMDi2D4YIGBHiMYcN3LYxqQOj8sY,291
304
- invenio_vocabularies-6.7.0.dist-info/LICENSE,sha256=UvI8pR8jGWqe0sTkb_hRG6eIrozzWwWzyCGEpuXX4KE,1062
305
- invenio_vocabularies-6.7.0.dist-info/METADATA,sha256=Eb8yxpxyr_aqn1dkluiEffaS4t5uFS8LCcErmHmo6D0,11971
306
- invenio_vocabularies-6.7.0.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
307
- invenio_vocabularies-6.7.0.dist-info/entry_points.txt,sha256=ud9nfdMlhO_mu3okwmy5vQD48r3-rCU_pSR-lUtLeYE,3180
308
- invenio_vocabularies-6.7.0.dist-info/top_level.txt,sha256=x1gRNbaODF_bCD0SBLM3nVOFPGi06cmGX5X94WKrFKk,21
309
- invenio_vocabularies-6.7.0.dist-info/RECORD,,
305
+ invenio_vocabularies-6.9.0.dist-info/AUTHORS.rst,sha256=8d0p_WWE1r9DavvzMDi2D4YIGBHiMYcN3LYxqQOj8sY,291
306
+ invenio_vocabularies-6.9.0.dist-info/LICENSE,sha256=UvI8pR8jGWqe0sTkb_hRG6eIrozzWwWzyCGEpuXX4KE,1062
307
+ invenio_vocabularies-6.9.0.dist-info/METADATA,sha256=DDZji_utemzfuJq2CxCfHfyIwQApzq4hDckuaKpIsRo,12303
308
+ invenio_vocabularies-6.9.0.dist-info/WHEEL,sha256=-G_t0oGuE7UD0DrSpVZnq1hHMBV9DD2XkS5v7XpmTnk,110
309
+ invenio_vocabularies-6.9.0.dist-info/entry_points.txt,sha256=ud9nfdMlhO_mu3okwmy5vQD48r3-rCU_pSR-lUtLeYE,3180
310
+ invenio_vocabularies-6.9.0.dist-info/top_level.txt,sha256=x1gRNbaODF_bCD0SBLM3nVOFPGi06cmGX5X94WKrFKk,21
311
+ invenio_vocabularies-6.9.0.dist-info/RECORD,,