invenio-vocabularies 4.1.1__py2.py3-none-any.whl → 4.3.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of invenio-vocabularies might be problematic. Click here for more details.
- invenio_vocabularies/__init__.py +1 -1
- invenio_vocabularies/administration/views/vocabularies.py +1 -0
- invenio_vocabularies/cli.py +17 -6
- invenio_vocabularies/config.py +15 -1
- invenio_vocabularies/contrib/affiliations/api.py +1 -2
- invenio_vocabularies/contrib/affiliations/datastreams.py +33 -8
- invenio_vocabularies/contrib/affiliations/services.py +1 -2
- invenio_vocabularies/contrib/awards/awards.py +2 -1
- invenio_vocabularies/contrib/awards/datastreams.py +1 -0
- invenio_vocabularies/contrib/awards/services.py +1 -2
- invenio_vocabularies/contrib/common/ror/datastreams.py +39 -5
- invenio_vocabularies/contrib/funders/datastreams.py +38 -11
- invenio_vocabularies/contrib/funders/funders.py +2 -1
- invenio_vocabularies/contrib/names/datastreams.py +160 -2
- invenio_vocabularies/contrib/names/s3client.py +44 -0
- invenio_vocabularies/datastreams/datastreams.py +61 -13
- invenio_vocabularies/datastreams/readers.py +40 -15
- invenio_vocabularies/datastreams/tasks.py +37 -0
- invenio_vocabularies/datastreams/writers.py +70 -0
- invenio_vocabularies/factories.py +1 -0
- invenio_vocabularies/records/models.py +2 -4
- invenio_vocabularies/records/pidprovider.py +1 -2
- invenio_vocabularies/resources/__init__.py +1 -0
- invenio_vocabularies/resources/schema.py +2 -1
- invenio_vocabularies/services/custom_fields/subject.py +3 -2
- invenio_vocabularies/services/custom_fields/vocabulary.py +1 -1
- invenio_vocabularies/services/tasks.py +0 -30
- invenio_vocabularies/templates/semantic-ui/invenio_vocabularies/subjects.html +1 -1
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/METADATA +18 -1
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/RECORD +35 -33
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/AUTHORS.rst +0 -0
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/LICENSE +0 -0
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/WHEEL +0 -0
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/entry_points.txt +0 -0
- {invenio_vocabularies-4.1.1.dist-info → invenio_vocabularies-4.3.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021-
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,17 +8,41 @@
|
|
|
8
8
|
|
|
9
9
|
"""Base data stream."""
|
|
10
10
|
|
|
11
|
+
from flask import current_app
|
|
12
|
+
|
|
11
13
|
from .errors import ReaderError, TransformerError, WriterError
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class StreamEntry:
|
|
15
17
|
"""Object to encapsulate streams processing."""
|
|
16
18
|
|
|
17
|
-
def __init__(self, entry, errors=None):
|
|
18
|
-
"""Constructor.
|
|
19
|
+
def __init__(self, entry, record=None, errors=None, op_type=None, exc=None):
|
|
20
|
+
"""Constructor for the StreamEntry class.
|
|
21
|
+
|
|
22
|
+
:param entry (object): The entry object, usually a record dict.
|
|
23
|
+
:param record (object): The record object, usually a record class.
|
|
24
|
+
:param errors (list, optional): List of errors. Defaults to None.
|
|
25
|
+
:param op_type (str, optional): The operation type. Defaults to None.
|
|
26
|
+
:param exc (str, optional): The raised unhandled exception. Defaults to None.
|
|
27
|
+
"""
|
|
19
28
|
self.entry = entry
|
|
29
|
+
self.record = record
|
|
20
30
|
self.filtered = False
|
|
21
31
|
self.errors = errors or []
|
|
32
|
+
self.op_type = op_type
|
|
33
|
+
self.exc = exc
|
|
34
|
+
|
|
35
|
+
def log_errors(self, logger=None):
|
|
36
|
+
"""Log the errors using the provided logger or the default logger.
|
|
37
|
+
|
|
38
|
+
:param logger (logging.Logger, optional): Logger instance to use. Defaults to None.
|
|
39
|
+
"""
|
|
40
|
+
if logger is None:
|
|
41
|
+
logger = current_app.logger
|
|
42
|
+
for error in self.errors:
|
|
43
|
+
logger.error(f"Error in entry {self.entry}: {error}")
|
|
44
|
+
if self.exc:
|
|
45
|
+
logger.error(f"Exception in entry {self.entry}: {self.exc}")
|
|
22
46
|
|
|
23
47
|
|
|
24
48
|
class DataStream:
|
|
@@ -39,15 +63,10 @@ class DataStream:
|
|
|
39
63
|
"""Checks if an stream_entry should be filtered out (skipped)."""
|
|
40
64
|
return False
|
|
41
65
|
|
|
42
|
-
def
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
It will iterate over the `StreamEntry` objects returned by
|
|
47
|
-
the reader, apply the transformations and yield the result of
|
|
48
|
-
writing it.
|
|
49
|
-
"""
|
|
50
|
-
for stream_entry in self.read():
|
|
66
|
+
def process_batch(self, batch, write_many=False):
|
|
67
|
+
"""Process a batch of entries."""
|
|
68
|
+
transformed_entries = []
|
|
69
|
+
for stream_entry in batch:
|
|
51
70
|
if stream_entry.errors:
|
|
52
71
|
yield stream_entry # reading errors
|
|
53
72
|
else:
|
|
@@ -58,7 +77,31 @@ class DataStream:
|
|
|
58
77
|
transformed_entry.filtered = True
|
|
59
78
|
yield transformed_entry
|
|
60
79
|
else:
|
|
61
|
-
|
|
80
|
+
transformed_entries.append(transformed_entry)
|
|
81
|
+
if transformed_entries:
|
|
82
|
+
if write_many:
|
|
83
|
+
yield from self.batch_write(transformed_entries)
|
|
84
|
+
else:
|
|
85
|
+
yield from (self.write(entry) for entry in transformed_entries)
|
|
86
|
+
|
|
87
|
+
def process(self, batch_size=100, write_many=False, *args, **kwargs):
|
|
88
|
+
"""Iterates over the entries.
|
|
89
|
+
|
|
90
|
+
Uses the reader to get the raw entries and transforms them.
|
|
91
|
+
It will iterate over the `StreamEntry` objects returned by
|
|
92
|
+
the reader, apply the transformations and yield the result of
|
|
93
|
+
writing it.
|
|
94
|
+
"""
|
|
95
|
+
batch = []
|
|
96
|
+
for stream_entry in self.read():
|
|
97
|
+
batch.append(stream_entry)
|
|
98
|
+
if len(batch) >= batch_size:
|
|
99
|
+
yield from self.process_batch(batch, write_many=write_many)
|
|
100
|
+
batch = []
|
|
101
|
+
|
|
102
|
+
# Process any remaining entries in the last batch
|
|
103
|
+
if batch:
|
|
104
|
+
yield from self.process_batch(batch, write_many=write_many)
|
|
62
105
|
|
|
63
106
|
def read(self):
|
|
64
107
|
"""Recursively read the entries."""
|
|
@@ -107,6 +150,11 @@ class DataStream:
|
|
|
107
150
|
|
|
108
151
|
return stream_entry
|
|
109
152
|
|
|
153
|
+
def batch_write(self, stream_entries, *args, **kwargs):
|
|
154
|
+
"""Apply the transformations to an stream_entry. Errors are handler in the service layer."""
|
|
155
|
+
for writer in self._writers:
|
|
156
|
+
yield from writer.write_many(stream_entries)
|
|
157
|
+
|
|
110
158
|
def total(self, *args, **kwargs):
|
|
111
159
|
"""The total of entries obtained from the origin."""
|
|
112
160
|
raise NotImplementedError()
|
|
@@ -21,6 +21,7 @@ from json.decoder import JSONDecodeError
|
|
|
21
21
|
import requests
|
|
22
22
|
import yaml
|
|
23
23
|
from lxml import etree
|
|
24
|
+
from lxml.html import fromstring
|
|
24
25
|
from lxml.html import parse as html_parse
|
|
25
26
|
|
|
26
27
|
from .errors import ReaderError
|
|
@@ -226,8 +227,13 @@ class XMLReader(BaseReader):
|
|
|
226
227
|
def _iter(self, fp, *args, **kwargs):
|
|
227
228
|
"""Read and parse an XML file to dict."""
|
|
228
229
|
# NOTE: We parse HTML, to skip XML validation and strip XML namespaces
|
|
229
|
-
|
|
230
|
-
|
|
230
|
+
record = None
|
|
231
|
+
try:
|
|
232
|
+
xml_tree = fromstring(fp)
|
|
233
|
+
record = etree_to_dict(xml_tree).get("record")
|
|
234
|
+
except Exception as e:
|
|
235
|
+
xml_tree = html_parse(fp).getroot()
|
|
236
|
+
record = etree_to_dict(xml_tree)["html"]["body"].get("record")
|
|
231
237
|
|
|
232
238
|
if not record:
|
|
233
239
|
raise ReaderError(f"Record not found in XML entry.")
|
|
@@ -270,19 +276,38 @@ class OAIPMHReader(BaseReader):
|
|
|
270
276
|
self.xml.find(f".//{self._oai_namespace}metadata").getchildren()[0],
|
|
271
277
|
)
|
|
272
278
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
279
|
+
if self._verb == "ListRecords":
|
|
280
|
+
scythe.class_mapping["ListRecords"] = OAIRecord
|
|
281
|
+
try:
|
|
282
|
+
records = scythe.list_records(
|
|
283
|
+
from_=self._from,
|
|
284
|
+
until=self._until,
|
|
285
|
+
metadata_prefix=self._metadata_prefix,
|
|
286
|
+
set_=self._set,
|
|
287
|
+
ignore_deleted=True,
|
|
288
|
+
)
|
|
289
|
+
for record in records:
|
|
290
|
+
yield {"record": record}
|
|
291
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
292
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
293
|
+
else:
|
|
294
|
+
scythe.class_mapping["GetRecord"] = OAIRecord
|
|
295
|
+
try:
|
|
296
|
+
headers = scythe.list_identifiers(
|
|
297
|
+
from_=self._from,
|
|
298
|
+
until=self._until,
|
|
299
|
+
metadata_prefix=self._metadata_prefix,
|
|
300
|
+
set_=self._set,
|
|
301
|
+
ignore_deleted=True,
|
|
302
|
+
)
|
|
303
|
+
for header in headers:
|
|
304
|
+
record = scythe.get_record(
|
|
305
|
+
identifier=header.identifier,
|
|
306
|
+
metadata_prefix=self._metadata_prefix,
|
|
307
|
+
)
|
|
308
|
+
yield {"record": record}
|
|
309
|
+
except oaipmh_scythe.NoRecordsMatch:
|
|
310
|
+
raise ReaderError("No records found in OAI-PMH request.")
|
|
286
311
|
|
|
287
312
|
def read(self, item=None, *args, **kwargs):
|
|
288
313
|
"""Reads from item or opens the file descriptor from origin."""
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
# Copyright (C) 2022-2024 CERN.
|
|
4
|
+
#
|
|
5
|
+
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
|
+
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
7
|
+
# details.
|
|
8
|
+
|
|
9
|
+
"""Data Streams Celery tasks."""
|
|
10
|
+
|
|
11
|
+
from celery import shared_task
|
|
12
|
+
|
|
13
|
+
from ..datastreams import StreamEntry
|
|
14
|
+
from ..datastreams.factories import WriterFactory
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@shared_task(ignore_result=True)
|
|
18
|
+
def write_entry(writer_config, entry):
|
|
19
|
+
"""Write an entry.
|
|
20
|
+
|
|
21
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
22
|
+
:param entry: dictionary, StreamEntry is not serializable.
|
|
23
|
+
"""
|
|
24
|
+
writer = WriterFactory.create(config=writer_config)
|
|
25
|
+
writer.write(StreamEntry(entry))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@shared_task(ignore_result=True)
|
|
29
|
+
def write_many_entry(writer_config, entries):
|
|
30
|
+
"""Write many entries.
|
|
31
|
+
|
|
32
|
+
:param writer: writer configuration as accepted by the WriterFactory.
|
|
33
|
+
:param entry: lisf ot dictionaries, StreamEntry is not serializable.
|
|
34
|
+
"""
|
|
35
|
+
writer = WriterFactory.create(config=writer_config)
|
|
36
|
+
stream_entries = [StreamEntry(entry) for entry in entries]
|
|
37
|
+
writer.write_many(stream_entries)
|
|
@@ -20,11 +20,17 @@ from marshmallow import ValidationError
|
|
|
20
20
|
|
|
21
21
|
from .datastreams import StreamEntry
|
|
22
22
|
from .errors import WriterError
|
|
23
|
+
from .tasks import write_entry, write_many_entry
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class BaseWriter(ABC):
|
|
26
27
|
"""Base writer."""
|
|
27
28
|
|
|
29
|
+
def __init__(self, *args, **kwargs):
|
|
30
|
+
"""Base initialization logic."""
|
|
31
|
+
# Add any base initialization here if needed
|
|
32
|
+
pass
|
|
33
|
+
|
|
28
34
|
@abstractmethod
|
|
29
35
|
def write(self, stream_entry, *args, **kwargs):
|
|
30
36
|
"""Writes the input stream entry to the target output.
|
|
@@ -35,6 +41,16 @@ class BaseWriter(ABC):
|
|
|
35
41
|
"""
|
|
36
42
|
pass
|
|
37
43
|
|
|
44
|
+
@abstractmethod
|
|
45
|
+
def write_many(self, stream_entries, *args, **kwargs):
|
|
46
|
+
"""Writes the input streams entry to the target output.
|
|
47
|
+
|
|
48
|
+
:returns: A List of StreamEntry. The result of writing the entry.
|
|
49
|
+
Raises WriterException in case of errors.
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
pass
|
|
53
|
+
|
|
38
54
|
|
|
39
55
|
class ServiceWriter(BaseWriter):
|
|
40
56
|
"""Writes the entries to an RDM instance using a Service object."""
|
|
@@ -85,6 +101,25 @@ class ServiceWriter(BaseWriter):
|
|
|
85
101
|
# TODO: Check if we can get the error message easier
|
|
86
102
|
raise WriterError([{"InvalidRelationValue": err.args[0]}])
|
|
87
103
|
|
|
104
|
+
def write_many(self, stream_entries, *args, **kwargs):
|
|
105
|
+
"""Writes the input entries using a given service."""
|
|
106
|
+
entries = [entry.entry for entry in stream_entries]
|
|
107
|
+
entries_with_id = [(self._entry_id(entry), entry) for entry in entries]
|
|
108
|
+
results = self._service.create_or_update_many(self._identity, entries_with_id)
|
|
109
|
+
stream_entries_processed = []
|
|
110
|
+
for entry, result in zip(entries, results):
|
|
111
|
+
processed_stream_entry = StreamEntry(
|
|
112
|
+
entry=entry,
|
|
113
|
+
record=result.record,
|
|
114
|
+
errors=result.errors,
|
|
115
|
+
op_type=result.op_type,
|
|
116
|
+
exc=result.exc,
|
|
117
|
+
)
|
|
118
|
+
processed_stream_entry.log_errors()
|
|
119
|
+
stream_entries_processed.append(processed_stream_entry)
|
|
120
|
+
|
|
121
|
+
return stream_entries_processed
|
|
122
|
+
|
|
88
123
|
|
|
89
124
|
class YamlWriter(BaseWriter):
|
|
90
125
|
"""Writes the entries to a YAML file."""
|
|
@@ -106,3 +141,38 @@ class YamlWriter(BaseWriter):
|
|
|
106
141
|
yaml.safe_dump([stream_entry.entry], file, allow_unicode=True)
|
|
107
142
|
|
|
108
143
|
return stream_entry
|
|
144
|
+
|
|
145
|
+
def write_many(self, stream_entries, *args, **kwargs):
|
|
146
|
+
"""Writes the yaml input entries."""
|
|
147
|
+
with open(self._filepath, "a") as file:
|
|
148
|
+
yaml.safe_dump(
|
|
149
|
+
[stream_entry.entry for stream_entry in stream_entries],
|
|
150
|
+
file,
|
|
151
|
+
allow_unicode=True,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class AsyncWriter(BaseWriter):
|
|
156
|
+
"""Writes the entries asynchronously (celery task)."""
|
|
157
|
+
|
|
158
|
+
def __init__(self, writer, *args, **kwargs):
|
|
159
|
+
"""Constructor.
|
|
160
|
+
|
|
161
|
+
:param writer: writer to use.
|
|
162
|
+
"""
|
|
163
|
+
super().__init__(*args, **kwargs)
|
|
164
|
+
self._writer = writer
|
|
165
|
+
|
|
166
|
+
def write(self, stream_entry, *args, **kwargs):
|
|
167
|
+
"""Launches a celery task to write an entry."""
|
|
168
|
+
write_entry.delay(self._writer, stream_entry.entry)
|
|
169
|
+
|
|
170
|
+
return stream_entry
|
|
171
|
+
|
|
172
|
+
def write_many(self, stream_entries, *args, **kwargs):
|
|
173
|
+
"""Launches a celery task to write an entry."""
|
|
174
|
+
write_many_entry.delay(
|
|
175
|
+
self._writer, [stream_entry.entry for stream_entry in stream_entries]
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return stream_entries
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2020-
|
|
3
|
+
# Copyright (C) 2020-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -79,9 +79,7 @@ class VocabularyScheme(db.Model):
|
|
|
79
79
|
"""Create a new vocabulary subtype."""
|
|
80
80
|
banned = [",", ":"]
|
|
81
81
|
for b in banned:
|
|
82
|
-
assert
|
|
83
|
-
b not in data["id"]
|
|
84
|
-
), f"No '{b}' allowed in VocabularyScheme.id" # noqa
|
|
82
|
+
assert b not in data["id"], f"No '{b}' allowed in VocabularyScheme.id"
|
|
85
83
|
|
|
86
84
|
with db.session.begin_nested():
|
|
87
85
|
obj = cls(**data)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2021 CERN.
|
|
3
|
+
# Copyright (C) 2021-2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
6
6
|
# modify it under the terms of the MIT License; see LICENSE file for more
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
|
|
9
9
|
"""Persistent identifier provider for vocabularies."""
|
|
10
10
|
|
|
11
|
-
|
|
12
11
|
from invenio_pidstore.models import PIDStatus
|
|
13
12
|
from invenio_pidstore.providers.base import BaseProvider
|
|
14
13
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2020-
|
|
3
|
+
# Copyright (C) 2020-2024 CERN.
|
|
4
4
|
# Copyright (C) 2021 Northwestern University.
|
|
5
5
|
#
|
|
6
6
|
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
# details.
|
|
9
9
|
|
|
10
10
|
"""Vocabulary resource schema."""
|
|
11
|
+
|
|
11
12
|
from marshmallow import Schema, fields
|
|
12
13
|
|
|
13
14
|
from invenio_vocabularies.resources.serializer import L10NString
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
#
|
|
3
|
-
# Copyright (C) 2024
|
|
3
|
+
# Copyright (C) 2024 CERN.
|
|
4
4
|
#
|
|
5
5
|
# Invenio-RDM-Records is free software; you can redistribute it and/or modify
|
|
6
6
|
# it under the terms of the MIT License; see LICENSE file for more details.
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
"""Custom fields."""
|
|
10
|
+
|
|
10
11
|
from invenio_i18n import lazy_gettext as _
|
|
11
12
|
|
|
12
13
|
from ...contrib.subjects.api import Subject
|
|
@@ -25,7 +26,7 @@ class SubjectCF(VocabularyCF):
|
|
|
25
26
|
vocabulary_id="subjects",
|
|
26
27
|
schema=SubjectRelationSchema,
|
|
27
28
|
ui_schema=SubjectRelationSchema,
|
|
28
|
-
**kwargs
|
|
29
|
+
**kwargs,
|
|
29
30
|
)
|
|
30
31
|
self.pid_field = Subject.pid
|
|
31
32
|
|
|
@@ -11,7 +11,6 @@ from celery import shared_task
|
|
|
11
11
|
from flask import current_app
|
|
12
12
|
|
|
13
13
|
from ..datastreams.factories import DataStreamFactory
|
|
14
|
-
from ..factories import get_vocabulary_config
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
@shared_task(ignore_result=True)
|
|
@@ -27,32 +26,3 @@ def process_datastream(config):
|
|
|
27
26
|
if result.errors:
|
|
28
27
|
for err in result.errors:
|
|
29
28
|
current_app.logger.error(err)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@shared_task()
|
|
33
|
-
def import_funders():
|
|
34
|
-
"""Import the funders vocabulary.
|
|
35
|
-
|
|
36
|
-
Only new records are imported.
|
|
37
|
-
Existing records are not updated.
|
|
38
|
-
"""
|
|
39
|
-
vc = get_vocabulary_config("funders")
|
|
40
|
-
config = vc.get_config()
|
|
41
|
-
|
|
42
|
-
# When importing funders via a Celery task, make sure that we are automatically downloading the ROR file,
|
|
43
|
-
# instead of relying on a local file on the file system.
|
|
44
|
-
if config["readers"][0]["type"] == "ror-http":
|
|
45
|
-
readers_config_with_ror_http = config["readers"]
|
|
46
|
-
else:
|
|
47
|
-
readers_config_with_ror_http = [{"type": "ror-http"}] + config["readers"]
|
|
48
|
-
|
|
49
|
-
ds = DataStreamFactory.create(
|
|
50
|
-
readers_config=readers_config_with_ror_http,
|
|
51
|
-
transformers_config=config.get("transformers"),
|
|
52
|
-
writers_config=config["writers"],
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
for result in ds.process():
|
|
56
|
-
if result.errors:
|
|
57
|
-
for err in result.errors:
|
|
58
|
-
current_app.logger.exception(err)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: invenio-vocabularies
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.3.0
|
|
4
4
|
Summary: "Invenio module for managing vocabularies."
|
|
5
5
|
Home-page: https://github.com/inveniosoftware/invenio-vocabularies
|
|
6
6
|
Author: CERN
|
|
@@ -15,6 +15,7 @@ Requires-Dist: invenio-records-resources <7.0.0,>=6.0.0
|
|
|
15
15
|
Requires-Dist: invenio-administration <3.0.0,>=2.0.0
|
|
16
16
|
Requires-Dist: lxml >=4.5.0
|
|
17
17
|
Requires-Dist: PyYAML >=5.4.1
|
|
18
|
+
Requires-Dist: regex >=2024.7.24
|
|
18
19
|
Provides-Extra: elasticsearch7
|
|
19
20
|
Requires-Dist: invenio-search[elasticsearch7] <3.0.0,>=2.1.0 ; extra == 'elasticsearch7'
|
|
20
21
|
Provides-Extra: mysql
|
|
@@ -25,6 +26,8 @@ Requires-Dist: invenio-search[opensearch1] <3.0.0,>=2.1.0 ; extra == 'opensearch
|
|
|
25
26
|
Provides-Extra: opensearch2
|
|
26
27
|
Requires-Dist: invenio-search[opensearch2] <3.0.0,>=2.1.0 ; extra == 'opensearch2'
|
|
27
28
|
Provides-Extra: postgresql
|
|
29
|
+
Provides-Extra: s3fs
|
|
30
|
+
Requires-Dist: s3fs >=2024.6.1 ; extra == 's3fs'
|
|
28
31
|
Provides-Extra: sqlite
|
|
29
32
|
Provides-Extra: tests
|
|
30
33
|
Requires-Dist: pytest-black-ng >=0.4.0 ; extra == 'tests'
|
|
@@ -78,6 +81,20 @@ https://invenio-vocabularies.readthedocs.io/
|
|
|
78
81
|
Changes
|
|
79
82
|
=======
|
|
80
83
|
|
|
84
|
+
Version v4.3.0 (released 2024-08-05)
|
|
85
|
+
|
|
86
|
+
- names: make names_exclude_regex configurable
|
|
87
|
+
- names: validate entry full names
|
|
88
|
+
- names: add orcid public data sync
|
|
89
|
+
|
|
90
|
+
Version v4.2.0 (released 2024-07-24)
|
|
91
|
+
|
|
92
|
+
- ror: check last update; use ld+json for metadata (#367)
|
|
93
|
+
- tasks: remove import funders task
|
|
94
|
+
- funders: add and export custom transformer
|
|
95
|
+
- affiliations: add and export custom transformer
|
|
96
|
+
- datastreams: implement asynchronous writer
|
|
97
|
+
|
|
81
98
|
Version v4.1.1 (released 2024-07-15)
|
|
82
99
|
|
|
83
100
|
- installation: use invenio-oaipmh-scythe from PyPI
|