oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +210 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +98 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +61 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/facets/__init__.py +12 -33
- oarepo_runtime/services/facets/params.py +45 -110
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/METADATA +10 -21
- oarepo_runtime-2.0.0.dev4.dist-info/RECORD +32 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev4.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,123 +0,0 @@
|
|
1
|
-
import re
|
2
|
-
from traceback import format_exc
|
3
|
-
from typing import Iterator
|
4
|
-
|
5
|
-
import openpyxl
|
6
|
-
|
7
|
-
from ..errors import ReaderError
|
8
|
-
from . import BaseReader, StreamEntry
|
9
|
-
from .attachments import AttachmentsReaderMixin
|
10
|
-
|
11
|
-
|
12
|
-
class ExcelReader(AttachmentsReaderMixin, BaseReader):
|
13
|
-
def iter_entries(self) -> Iterator[StreamEntry]:
|
14
|
-
with self._open("rb") as f:
|
15
|
-
try:
|
16
|
-
wb_obj = openpyxl.load_workbook(f)
|
17
|
-
sheet_obj = wb_obj.active
|
18
|
-
except Exception as err:
|
19
|
-
raise ReaderError(
|
20
|
-
f"Cannot decode excel file {self._data_file.name}: {str(err)}",
|
21
|
-
code="EXCEL_DECODE_ERROR",
|
22
|
-
detail={
|
23
|
-
"message": str(err),
|
24
|
-
"exception": type(err).__name__,
|
25
|
-
"stack": format_exc(limit=10),
|
26
|
-
},
|
27
|
-
)
|
28
|
-
|
29
|
-
header, data = self.get_excel_data(sheet_obj)
|
30
|
-
for row in data:
|
31
|
-
yield StreamEntry(row)
|
32
|
-
|
33
|
-
def get_excel_data(self, sheet_obj):
|
34
|
-
"""
|
35
|
-
returns an iterator (header, data)
|
36
|
-
"""
|
37
|
-
header = []
|
38
|
-
data = []
|
39
|
-
it = sheet_obj.iter_rows()
|
40
|
-
|
41
|
-
try:
|
42
|
-
row = next_row(it)
|
43
|
-
while empty(row):
|
44
|
-
row = next_row(it)
|
45
|
-
while not empty(row):
|
46
|
-
header.append(row)
|
47
|
-
row = next_row(it)
|
48
|
-
while empty(row):
|
49
|
-
row = next_row(it)
|
50
|
-
while True:
|
51
|
-
if not empty(row):
|
52
|
-
data.append(row)
|
53
|
-
row = next_row(it)
|
54
|
-
except StopIteration:
|
55
|
-
pass
|
56
|
-
if not data:
|
57
|
-
return [], self.to_dict(header)
|
58
|
-
else:
|
59
|
-
return self.to_dict(header), self.to_dict(data)
|
60
|
-
|
61
|
-
def to_dict(self, dta):
|
62
|
-
def is_array(val):
|
63
|
-
try:
|
64
|
-
int(val)
|
65
|
-
return True
|
66
|
-
except:
|
67
|
-
return False
|
68
|
-
|
69
|
-
def set_single(container, key, val):
|
70
|
-
try:
|
71
|
-
key = int(key)
|
72
|
-
while key >= len(container):
|
73
|
-
container.append(None)
|
74
|
-
container[key] = val
|
75
|
-
except (TypeError, ValueError):
|
76
|
-
container[key] = val
|
77
|
-
|
78
|
-
def iterset(k, v, container):
|
79
|
-
while True:
|
80
|
-
current_key = k[0]
|
81
|
-
next_key = k[1] if len(k) > 1 else None
|
82
|
-
if not next_key:
|
83
|
-
set_single(container, current_key, v)
|
84
|
-
return
|
85
|
-
if isinstance(container, list):
|
86
|
-
container.append({} if not is_array(next_key) else [])
|
87
|
-
container = container[-1]
|
88
|
-
else:
|
89
|
-
container = container.setdefault(
|
90
|
-
current_key, {} if not is_array(next_key) else []
|
91
|
-
)
|
92
|
-
k = k[1:]
|
93
|
-
|
94
|
-
def to_dict_item(header, item):
|
95
|
-
ret = RowDict()
|
96
|
-
for k, v in zip(header, item):
|
97
|
-
if not k:
|
98
|
-
continue
|
99
|
-
v = v if v is not None else ""
|
100
|
-
v = str(v).strip()
|
101
|
-
if v:
|
102
|
-
iterset(k, v, ret)
|
103
|
-
return ret
|
104
|
-
|
105
|
-
keys = [re.split("[_.]", x) if x else None for x in dta[0]]
|
106
|
-
return [to_dict_item(keys, d) for d in dta[1:]]
|
107
|
-
|
108
|
-
|
109
|
-
def next_row(it):
|
110
|
-
return [x.value for x in next(it)]
|
111
|
-
|
112
|
-
|
113
|
-
def empty(r):
|
114
|
-
for val in r:
|
115
|
-
if val:
|
116
|
-
return False
|
117
|
-
return True
|
118
|
-
|
119
|
-
|
120
|
-
class RowDict(dict):
|
121
|
-
def __init__(self, *args, **kwargs):
|
122
|
-
super().__init__(*args, **kwargs)
|
123
|
-
self._header = {}
|
@@ -1,27 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Iterator
|
3
|
-
|
4
|
-
from . import BaseReader, StreamEntry
|
5
|
-
from .attachments import AttachmentsReaderMixin
|
6
|
-
|
7
|
-
|
8
|
-
class JSONReader(AttachmentsReaderMixin, BaseReader):
|
9
|
-
"""JSON Lines data iterator that loads records from JSON Lines files."""
|
10
|
-
|
11
|
-
def iter_entries(self) -> Iterator[StreamEntry]:
|
12
|
-
"""Iterate over records."""
|
13
|
-
with self._open() as fp:
|
14
|
-
data = json.load(fp)
|
15
|
-
assert isinstance(data, list)
|
16
|
-
for d in data:
|
17
|
-
yield StreamEntry(d)
|
18
|
-
|
19
|
-
|
20
|
-
class JSONLinesReader(BaseReader):
|
21
|
-
"""JSON Lines data iterator that loads records from JSON Lines files."""
|
22
|
-
|
23
|
-
def __iter__(self) -> Iterator[StreamEntry]:
|
24
|
-
"""Iterate over records."""
|
25
|
-
with self._open() as fp:
|
26
|
-
for line in fp:
|
27
|
-
yield StreamEntry(json.loads(line))
|
@@ -1,54 +0,0 @@
|
|
1
|
-
from base64 import b64encode
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
from invenio_access.permissions import system_identity
|
5
|
-
from invenio_records_resources.proxies import current_service_registry
|
6
|
-
|
7
|
-
from ..types import StreamEntryFile
|
8
|
-
from ..utils import get_file_service_for_record_class
|
9
|
-
from . import BaseReader, StreamEntry
|
10
|
-
|
11
|
-
|
12
|
-
class ServiceReader(BaseReader):
|
13
|
-
"""Writes the entries to a repository instance using a Service object."""
|
14
|
-
|
15
|
-
def __init__(self, *, service=None, identity=None, load_files=False, **kwargs):
|
16
|
-
"""Constructor.
|
17
|
-
:param service_or_name: a service instance or a key of the
|
18
|
-
service registry.
|
19
|
-
:param identity: access identity.
|
20
|
-
:param update: if True it will update records if they exist.
|
21
|
-
"""
|
22
|
-
super().__init__(**kwargs)
|
23
|
-
|
24
|
-
if isinstance(service, str):
|
25
|
-
service = current_service_registry.get(service)
|
26
|
-
|
27
|
-
self._service = service
|
28
|
-
self._identity = identity or system_identity
|
29
|
-
self._file_service = None
|
30
|
-
self._record_cls = getattr(self._service.config, "record_cls", None)
|
31
|
-
|
32
|
-
if self._record_cls and load_files:
|
33
|
-
# try to get file service
|
34
|
-
self._file_service = get_file_service_for_record_class(self._record_cls)
|
35
|
-
|
36
|
-
def __iter__(self):
|
37
|
-
for idx, entry in enumerate(self._service.scan(self._identity)):
|
38
|
-
files: List[StreamEntryFile] = []
|
39
|
-
if self._file_service:
|
40
|
-
for f in self._file_service.list_files(
|
41
|
-
self._identity, entry["id"]
|
42
|
-
).entries:
|
43
|
-
file_item = self._file_service.get_file_content(
|
44
|
-
self._identity, entry["id"], f["key"]
|
45
|
-
)
|
46
|
-
with file_item.open_stream("rb") as ff:
|
47
|
-
base64_content = b64encode(ff.read()).decode("ascii")
|
48
|
-
files.append(
|
49
|
-
StreamEntryFile(
|
50
|
-
metadata=f, content_url=f"data:{base64_content}"
|
51
|
-
)
|
52
|
-
)
|
53
|
-
|
54
|
-
yield StreamEntry(entry, files=files)
|
@@ -1,14 +0,0 @@
|
|
1
|
-
import yaml
|
2
|
-
|
3
|
-
from . import BaseReader, StreamEntry
|
4
|
-
from .attachments import AttachmentsReaderMixin
|
5
|
-
|
6
|
-
|
7
|
-
class YamlReader(AttachmentsReaderMixin, BaseReader):
|
8
|
-
"""YAML data iterator that loads records from YAML files."""
|
9
|
-
|
10
|
-
def iter_entries(self):
|
11
|
-
"""Iterate over records."""
|
12
|
-
with self._open() as fp:
|
13
|
-
for entry in yaml.safe_load_all(fp):
|
14
|
-
yield StreamEntry(entry)
|
@@ -1,91 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This package was taken from Invenio vocabularies and modified to be more universal
|
3
|
-
#
|
4
|
-
import logging
|
5
|
-
from typing import List
|
6
|
-
|
7
|
-
from celery import shared_task
|
8
|
-
from celery.canvas import Signature as CelerySignature
|
9
|
-
|
10
|
-
from .asynchronous import (
|
11
|
-
AsynchronousDataStream,
|
12
|
-
AsynchronousDataStreamChain,
|
13
|
-
deserialize_identity,
|
14
|
-
serialize_identity,
|
15
|
-
)
|
16
|
-
from .datastreams import DataStreamChain, Signature
|
17
|
-
from .transformers import BaseTransformer
|
18
|
-
from .types import JSONObject, StreamBatch, StreamEntryError
|
19
|
-
|
20
|
-
log = logging.getLogger("datastreams")
|
21
|
-
|
22
|
-
|
23
|
-
class SemiAsynchronousDataStreamChain(AsynchronousDataStreamChain):
|
24
|
-
def _prepare_chain(self, callback: CelerySignature):
|
25
|
-
serialized_identity = serialize_identity(self._identity)
|
26
|
-
return run_semi_asynchronous_datastream_processor.s(
|
27
|
-
transformers=[tr.json for tr in self._transformers],
|
28
|
-
writers=[wr.json for wr in self._writers],
|
29
|
-
identity=serialized_identity,
|
30
|
-
callback=callback,
|
31
|
-
)
|
32
|
-
|
33
|
-
|
34
|
-
class SemiAsynchronousDataStream(AsynchronousDataStream):
|
35
|
-
"""Data stream."""
|
36
|
-
|
37
|
-
def build_chain(self, identity) -> DataStreamChain:
|
38
|
-
return SemiAsynchronousDataStreamChain(
|
39
|
-
transformers=self._transformers,
|
40
|
-
writers=self._writers,
|
41
|
-
on_background=self._on_background,
|
42
|
-
identity=identity,
|
43
|
-
)
|
44
|
-
|
45
|
-
|
46
|
-
@shared_task
|
47
|
-
def run_semi_asynchronous_datastream_processor(
|
48
|
-
batch: JSONObject,
|
49
|
-
*,
|
50
|
-
transformers: List[JSONObject],
|
51
|
-
writers: List[JSONObject],
|
52
|
-
identity: JSONObject,
|
53
|
-
callback: CelerySignature,
|
54
|
-
):
|
55
|
-
"""Run datastream processor."""
|
56
|
-
|
57
|
-
callback.apply(kwargs={"callback": "batch_started", "batch": batch})
|
58
|
-
|
59
|
-
batch = StreamBatch.from_json(batch)
|
60
|
-
identity = deserialize_identity(identity)
|
61
|
-
|
62
|
-
for signature in (transformers or []) + (writers or []):
|
63
|
-
signature = Signature.from_json(signature)
|
64
|
-
try:
|
65
|
-
processor = signature.resolve(identity=identity)
|
66
|
-
if isinstance(processor, BaseTransformer):
|
67
|
-
batch = processor.apply(batch) or batch
|
68
|
-
else:
|
69
|
-
batch = processor.write(batch) or batch
|
70
|
-
except Exception as ex:
|
71
|
-
if log.getEffectiveLevel():
|
72
|
-
log.error(
|
73
|
-
"Unexpected error in %s: %s",
|
74
|
-
repr(signature),
|
75
|
-
repr(batch),
|
76
|
-
)
|
77
|
-
err = StreamEntryError.from_exception(ex)
|
78
|
-
batch.errors.append(err)
|
79
|
-
callback.apply(
|
80
|
-
(),
|
81
|
-
{
|
82
|
-
"batch": batch.json,
|
83
|
-
"identity": serialize_identity(identity),
|
84
|
-
"callback": f"{signature.kind.value}_error",
|
85
|
-
"exception": err.json,
|
86
|
-
},
|
87
|
-
)
|
88
|
-
|
89
|
-
callback.apply(kwargs={"callback": "batch_finished", "batch": batch.json})
|
90
|
-
|
91
|
-
return None # do not return anything to avoid redis pollution
|
@@ -1,70 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This package was taken from Invenio vocabularies and modified to be more universal
|
3
|
-
#
|
4
|
-
import logging
|
5
|
-
from typing import List
|
6
|
-
|
7
|
-
from ..proxies import current_datastreams
|
8
|
-
from .datastreams import AbstractDataStream, DataStreamChain
|
9
|
-
from .transformers import BaseTransformer
|
10
|
-
from .types import DataStreamCallback, StreamEntryError
|
11
|
-
from .writers import BaseWriter
|
12
|
-
|
13
|
-
log = logging.getLogger("datastreams")
|
14
|
-
|
15
|
-
|
16
|
-
class SynchronousDataStreamChain(DataStreamChain):
|
17
|
-
def __init__(self, transformers: List[BaseTransformer], writers: List[BaseWriter]):
|
18
|
-
self._transformers = transformers
|
19
|
-
self._writers = writers
|
20
|
-
|
21
|
-
def process(self, batch, callback: DataStreamCallback):
|
22
|
-
callback.batch_started(batch)
|
23
|
-
for transformer in self._transformers:
|
24
|
-
try:
|
25
|
-
batch = transformer.apply(batch) or batch
|
26
|
-
except Exception as ex:
|
27
|
-
if log.getEffectiveLevel():
|
28
|
-
log.error(
|
29
|
-
"Unexpected error in transformer: %s: %s",
|
30
|
-
repr(transformer),
|
31
|
-
repr(batch),
|
32
|
-
)
|
33
|
-
batch.errors.append(StreamEntryError.from_exception(ex))
|
34
|
-
callback.transformer_error(batch, transformer, exception=ex)
|
35
|
-
|
36
|
-
for writer in self._writers:
|
37
|
-
try:
|
38
|
-
batch = writer.write(batch) or batch
|
39
|
-
except Exception as ex:
|
40
|
-
if log.getEffectiveLevel():
|
41
|
-
log.error(
|
42
|
-
"Unexpected error in writer: %s: %s", repr(writer), repr(batch)
|
43
|
-
)
|
44
|
-
batch.errors.append(StreamEntryError.from_exception(ex))
|
45
|
-
callback.writer_error(batch, writer, exception=ex)
|
46
|
-
callback.batch_finished(batch)
|
47
|
-
|
48
|
-
def finish(self, callback: DataStreamCallback):
|
49
|
-
for writer in self._writers:
|
50
|
-
try:
|
51
|
-
writer.finish()
|
52
|
-
except Exception as e:
|
53
|
-
log.error("Unexpected error in writer: %s", repr(writer))
|
54
|
-
callback.writer_error(batch=None, writer=writer, exception=e)
|
55
|
-
|
56
|
-
|
57
|
-
class SynchronousDataStream(AbstractDataStream):
|
58
|
-
"""Data stream."""
|
59
|
-
|
60
|
-
def build_chain(self, identity) -> DataStreamChain:
|
61
|
-
return SynchronousDataStreamChain(
|
62
|
-
transformers=[
|
63
|
-
current_datastreams.get_transformer(tr, identity=identity)
|
64
|
-
for tr in self._transformers
|
65
|
-
],
|
66
|
-
writers=[
|
67
|
-
current_datastreams.get_writer(wr, identity=identity)
|
68
|
-
for wr in self._writers
|
69
|
-
],
|
70
|
-
)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
from oarepo_runtime.datastreams.types import StreamBatch
|
5
|
-
|
6
|
-
|
7
|
-
class BaseTransformer(ABC):
|
8
|
-
"""Base transformer."""
|
9
|
-
|
10
|
-
def __init__(self, **kwargs) -> None:
|
11
|
-
pass
|
12
|
-
|
13
|
-
@abstractmethod
|
14
|
-
def apply(self, batch: StreamBatch, *args, **kwargs) -> Union[StreamBatch, None]:
|
15
|
-
"""Applies the transformation to the entry.
|
16
|
-
:returns: A StreamEntry. The transformed entry.
|
17
|
-
Raises TransformerError in case of errors.
|
18
|
-
"""
|