oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +111 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +86 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +59 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/METADATA +9 -21
- oarepo_runtime-2.0.0.dev3.dist-info/RECORD +30 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev3.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/__init__.py +0 -33
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/params.py +0 -192
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,247 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import Any, Dict, List, Union
|
3
|
-
|
4
|
-
import celery
|
5
|
-
from celery.canvas import Signature as CelerySignature
|
6
|
-
from celery.canvas import chain
|
7
|
-
from celery.result import allow_join_result
|
8
|
-
from flask_principal import (
|
9
|
-
ActionNeed,
|
10
|
-
Identity,
|
11
|
-
ItemNeed,
|
12
|
-
Need,
|
13
|
-
RoleNeed,
|
14
|
-
TypeNeed,
|
15
|
-
UserNeed,
|
16
|
-
)
|
17
|
-
|
18
|
-
from oarepo_runtime.datastreams.datastreams import (
|
19
|
-
AbstractDataStream,
|
20
|
-
DataStreamChain,
|
21
|
-
Signature,
|
22
|
-
)
|
23
|
-
|
24
|
-
from .datastreams import DataStreamCallback, StreamBatch
|
25
|
-
from .json import JSONObject
|
26
|
-
from .types import StreamEntryError
|
27
|
-
from .writers import BaseWriter
|
28
|
-
|
29
|
-
timing = logging.getLogger("oai.harvester.timing")
|
30
|
-
log = logging.getLogger("datastreams")
|
31
|
-
|
32
|
-
|
33
|
-
class AsynchronousDataStream(AbstractDataStream):
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
*,
|
37
|
-
readers: List[Union[Signature, Any]],
|
38
|
-
writers: List[Union[Signature, Any]],
|
39
|
-
transformers: List[Union[Signature, Any]] = None,
|
40
|
-
callback: Union[DataStreamCallback, Any],
|
41
|
-
batch_size=100,
|
42
|
-
on_background=True,
|
43
|
-
reader_callback=None,
|
44
|
-
):
|
45
|
-
super().__init__(
|
46
|
-
readers=readers,
|
47
|
-
writers=writers,
|
48
|
-
transformers=transformers,
|
49
|
-
callback=callback,
|
50
|
-
batch_size=batch_size,
|
51
|
-
reader_callback=reader_callback,
|
52
|
-
)
|
53
|
-
self._on_background = on_background
|
54
|
-
|
55
|
-
def build_chain(self, identity) -> DataStreamChain:
|
56
|
-
return AsynchronousDataStreamChain(
|
57
|
-
transformers=self._transformers,
|
58
|
-
writers=self._writers,
|
59
|
-
on_background=self._on_background,
|
60
|
-
identity=identity,
|
61
|
-
)
|
62
|
-
|
63
|
-
def _reader_error(self, reader, exception):
|
64
|
-
self._callback.apply(
|
65
|
-
kwargs={
|
66
|
-
"callback": f"reader_error",
|
67
|
-
"exception": StreamEntryError.from_exception(exception).json,
|
68
|
-
}
|
69
|
-
)
|
70
|
-
|
71
|
-
|
72
|
-
class AsynchronousDataStreamChain(DataStreamChain):
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
identity: Identity,
|
76
|
-
transformers: List[Signature],
|
77
|
-
writers: List[Signature],
|
78
|
-
on_background=True,
|
79
|
-
):
|
80
|
-
self._transformers = transformers
|
81
|
-
self._writers = writers
|
82
|
-
self._on_background = on_background
|
83
|
-
self._identity = identity
|
84
|
-
|
85
|
-
def process(self, batch: StreamBatch, callback: CelerySignature):
|
86
|
-
chain = self._prepare_chain(callback)
|
87
|
-
self._call(chain, batch=batch.json)
|
88
|
-
|
89
|
-
def _prepare_chain(self, callback: CelerySignature):
|
90
|
-
chain_def = [
|
91
|
-
datastreams_call_callback.signature(
|
92
|
-
(), kwargs={"callback": callback, "callback_name": "batch_started"}
|
93
|
-
)
|
94
|
-
]
|
95
|
-
serialized_identity = serialize_identity(self._identity)
|
96
|
-
if self._transformers:
|
97
|
-
for transformer in self._transformers:
|
98
|
-
chain_def.append(
|
99
|
-
run_datastream_processor.signature(
|
100
|
-
kwargs={
|
101
|
-
"processor": transformer.json,
|
102
|
-
"identity": serialized_identity,
|
103
|
-
"callback": callback,
|
104
|
-
}
|
105
|
-
)
|
106
|
-
)
|
107
|
-
|
108
|
-
for writer in self._writers:
|
109
|
-
chain_def.append(
|
110
|
-
run_datastream_processor.signature(
|
111
|
-
kwargs={
|
112
|
-
"processor": writer.json,
|
113
|
-
"identity": serialized_identity,
|
114
|
-
"callback": callback,
|
115
|
-
}
|
116
|
-
)
|
117
|
-
)
|
118
|
-
|
119
|
-
chain_def.append(
|
120
|
-
datastreams_call_callback.signature(
|
121
|
-
(),
|
122
|
-
kwargs=dict(
|
123
|
-
callback=callback,
|
124
|
-
callback_name="batch_finished",
|
125
|
-
identity=serialized_identity,
|
126
|
-
),
|
127
|
-
)
|
128
|
-
)
|
129
|
-
|
130
|
-
chain_sig = chain(*chain_def)
|
131
|
-
chain_sig.link_error(
|
132
|
-
datastreams_error_callback.signature(
|
133
|
-
(),
|
134
|
-
kwargs=dict(
|
135
|
-
callback=callback,
|
136
|
-
callback_name="error",
|
137
|
-
identity=serialized_identity,
|
138
|
-
),
|
139
|
-
)
|
140
|
-
)
|
141
|
-
return chain_sig
|
142
|
-
|
143
|
-
def _call(self, sig, **kwargs):
|
144
|
-
if self._on_background:
|
145
|
-
call = sig.apply_async
|
146
|
-
else:
|
147
|
-
call = sig.apply
|
148
|
-
call([], kwargs)
|
149
|
-
|
150
|
-
def finish(self, callback: Signature):
|
151
|
-
"nothing to finish here, dumpers needing finish (such as file dumpers) are not supported in async"
|
152
|
-
|
153
|
-
|
154
|
-
@celery.shared_task
|
155
|
-
def run_datastream_processor(batch: Dict, *, processor: JSONObject, identity, callback):
|
156
|
-
identity = deserialize_identity(identity)
|
157
|
-
processor_signature = Signature.from_json(processor)
|
158
|
-
deserialized_batch: StreamBatch = StreamBatch.from_json(batch)
|
159
|
-
|
160
|
-
processor = processor_signature.resolve(identity=identity)
|
161
|
-
try:
|
162
|
-
if isinstance(processor, BaseWriter):
|
163
|
-
deserialized_batch = (
|
164
|
-
processor.write(deserialized_batch) or deserialized_batch
|
165
|
-
)
|
166
|
-
else:
|
167
|
-
deserialized_batch = (
|
168
|
-
processor.apply(deserialized_batch) or deserialized_batch
|
169
|
-
)
|
170
|
-
|
171
|
-
except Exception as ex:
|
172
|
-
log.exception("Error processing batch inside %s", processor_signature)
|
173
|
-
|
174
|
-
err = StreamEntryError.from_exception(ex)
|
175
|
-
deserialized_batch.errors.append(err)
|
176
|
-
callback.apply(
|
177
|
-
(),
|
178
|
-
{
|
179
|
-
"batch": deserialized_batch.json,
|
180
|
-
"identity": serialize_identity(identity),
|
181
|
-
"callback": f"{processor_signature.kind.value}_error",
|
182
|
-
"exception": err.json,
|
183
|
-
},
|
184
|
-
)
|
185
|
-
return deserialized_batch.json
|
186
|
-
|
187
|
-
|
188
|
-
@celery.shared_task
|
189
|
-
def datastreams_call_callback(
|
190
|
-
batch: Dict, *, identity=None, callback, callback_name, **kwargs
|
191
|
-
):
|
192
|
-
callback = CelerySignature(callback)
|
193
|
-
callback.apply(
|
194
|
-
kwargs=dict(batch=batch, identity=identity, callback=callback_name, **kwargs)
|
195
|
-
)
|
196
|
-
return batch
|
197
|
-
|
198
|
-
|
199
|
-
@celery.shared_task
|
200
|
-
def datastreams_error_callback(
|
201
|
-
parent_task_id, *, identity=None, callback, callback_name, **kwargs
|
202
|
-
):
|
203
|
-
with allow_join_result():
|
204
|
-
from celery import current_app
|
205
|
-
|
206
|
-
result = current_app.AsyncResult(parent_task_id)
|
207
|
-
result.get(propagate=False)
|
208
|
-
|
209
|
-
callback = CelerySignature(callback)
|
210
|
-
callback.apply(
|
211
|
-
kwargs=dict(
|
212
|
-
batch={},
|
213
|
-
identity=identity,
|
214
|
-
callback=callback_name,
|
215
|
-
result=result.result,
|
216
|
-
traceback=result.traceback,
|
217
|
-
**kwargs,
|
218
|
-
)
|
219
|
-
)
|
220
|
-
|
221
|
-
|
222
|
-
def serialize_identity(identity):
|
223
|
-
return {
|
224
|
-
"id": identity.id,
|
225
|
-
"auth_type": identity.auth_type,
|
226
|
-
"provides": [
|
227
|
-
{"type": type(x).__name__, "params": x._asdict()} for x in identity.provides
|
228
|
-
],
|
229
|
-
}
|
230
|
-
|
231
|
-
|
232
|
-
def deserialize_identity(identity_dict):
|
233
|
-
if identity_dict is None:
|
234
|
-
return None
|
235
|
-
ret = Identity(id=identity_dict["id"], auth_type=identity_dict["auth_type"])
|
236
|
-
for provide in identity_dict["provides"]:
|
237
|
-
clz = {
|
238
|
-
"Need": Need,
|
239
|
-
"UserNeed": UserNeed,
|
240
|
-
"RoleNeed": RoleNeed,
|
241
|
-
"TypeNeed": TypeNeed,
|
242
|
-
"ActionNeed": ActionNeed,
|
243
|
-
"ItemNeed": ItemNeed,
|
244
|
-
}[provide["type"]]
|
245
|
-
|
246
|
-
ret.provides.add(clz(**provide["params"]))
|
247
|
-
return ret
|
@@ -1,150 +0,0 @@
|
|
1
|
-
import dataclasses
|
2
|
-
from pathlib import Path
|
3
|
-
from typing import Iterator, List
|
4
|
-
|
5
|
-
import yaml
|
6
|
-
from flask import current_app
|
7
|
-
|
8
|
-
from oarepo_runtime.datastreams.datastreams import Signature, SignatureKind
|
9
|
-
|
10
|
-
from .errors import DataStreamCatalogueError
|
11
|
-
|
12
|
-
|
13
|
-
@dataclasses.dataclass
|
14
|
-
class CatalogueDataStream:
|
15
|
-
stream_name: str
|
16
|
-
readers: List[Signature]
|
17
|
-
writers: List[Signature]
|
18
|
-
transformers: List[Signature]
|
19
|
-
|
20
|
-
|
21
|
-
class DataStreamCatalogue:
|
22
|
-
def __init__(self, catalogue, content=None) -> None:
|
23
|
-
"""
|
24
|
-
Catalogue of data streams. The catalogue contains a dict of:
|
25
|
-
stream_name: stream_definition, where stream definition is an array of:
|
26
|
-
|
27
|
-
- reader: reader_class
|
28
|
-
<rest of parameters go to reader constructor>
|
29
|
-
- transformer: transformer_class
|
30
|
-
<rest of parameters go to transformer constructor>
|
31
|
-
- writer: writer_class
|
32
|
-
<rest of parameters go to writer constructor>
|
33
|
-
|
34
|
-
If reader class is not passed and _source_ is, then the reader class will be taken from the
|
35
|
-
DATASTREAMS_READERS_BY_EXTENSION config variable - map from file extension to reader class.
|
36
|
-
|
37
|
-
If 'service' is passed, service writer will be used with this service
|
38
|
-
|
39
|
-
Transformer class must always be passed.
|
40
|
-
"""
|
41
|
-
self._catalogue_path = Path(catalogue)
|
42
|
-
if content:
|
43
|
-
self._catalogue = content
|
44
|
-
else:
|
45
|
-
with open(catalogue) as f:
|
46
|
-
self._catalogue = yaml.safe_load(f)
|
47
|
-
|
48
|
-
@property
|
49
|
-
def path(self):
|
50
|
-
return self._catalogue_path
|
51
|
-
|
52
|
-
@property
|
53
|
-
def directory(self):
|
54
|
-
return self._catalogue_path.parent
|
55
|
-
|
56
|
-
def get_datastreams(self) -> Iterator[CatalogueDataStream]:
|
57
|
-
for stream_name in self._catalogue:
|
58
|
-
yield self.get_datastream(stream_name)
|
59
|
-
|
60
|
-
def __iter__(self):
|
61
|
-
return iter(self._catalogue)
|
62
|
-
|
63
|
-
def get_datastream(
|
64
|
-
self,
|
65
|
-
stream_name,
|
66
|
-
) -> CatalogueDataStream:
|
67
|
-
stream_definition = self._catalogue[stream_name]
|
68
|
-
readers = []
|
69
|
-
transformers = []
|
70
|
-
writers = []
|
71
|
-
for entry in stream_definition:
|
72
|
-
entry = {**entry}
|
73
|
-
try:
|
74
|
-
if "reader" in entry:
|
75
|
-
readers.append(
|
76
|
-
get_signature(
|
77
|
-
"reader",
|
78
|
-
entry,
|
79
|
-
base_path=str(self.directory),
|
80
|
-
)
|
81
|
-
)
|
82
|
-
elif "transformer" in entry:
|
83
|
-
transformers.append(
|
84
|
-
get_signature(
|
85
|
-
"transformer",
|
86
|
-
entry,
|
87
|
-
base_path=str(self.directory),
|
88
|
-
)
|
89
|
-
)
|
90
|
-
elif "writer" in entry:
|
91
|
-
writers.append(
|
92
|
-
get_signature(
|
93
|
-
"writer",
|
94
|
-
entry,
|
95
|
-
base_path=str(self.directory),
|
96
|
-
)
|
97
|
-
)
|
98
|
-
elif "source" in entry:
|
99
|
-
readers.append(self.get_reader(entry))
|
100
|
-
elif "service" in entry:
|
101
|
-
writers.append(self.get_service_writer(entry))
|
102
|
-
else:
|
103
|
-
raise DataStreamCatalogueError(
|
104
|
-
"Can not decide what this record is - reader, transformer or service?"
|
105
|
-
)
|
106
|
-
except DataStreamCatalogueError as e:
|
107
|
-
e.entry = entry
|
108
|
-
e.stream_name = stream_name
|
109
|
-
raise e
|
110
|
-
return CatalogueDataStream(
|
111
|
-
stream_name=stream_name,
|
112
|
-
readers=readers,
|
113
|
-
transformers=transformers,
|
114
|
-
writers=writers,
|
115
|
-
)
|
116
|
-
|
117
|
-
def get_reader(self, entry):
|
118
|
-
entry = {**entry}
|
119
|
-
if not entry.get("reader"):
|
120
|
-
try:
|
121
|
-
source = Path(entry["source"])
|
122
|
-
ext = source.suffix[1:]
|
123
|
-
reader_class = (
|
124
|
-
current_app.config["DATASTREAMS_READERS_BY_EXTENSION"].get(ext)
|
125
|
-
or current_app.config["DEFAULT_DATASTREAMS_READERS_BY_EXTENSION"][
|
126
|
-
ext
|
127
|
-
]
|
128
|
-
)
|
129
|
-
entry["reader"] = reader_class
|
130
|
-
except KeyError:
|
131
|
-
raise DataStreamCatalogueError(
|
132
|
-
f"Do not have loader for file {source} - extension {ext} not defined in DATASTREAMS_READERS_BY_EXTENSION config"
|
133
|
-
)
|
134
|
-
return get_signature(
|
135
|
-
"reader",
|
136
|
-
entry,
|
137
|
-
base_path=str(self.directory),
|
138
|
-
)
|
139
|
-
|
140
|
-
def get_service_writer(self, entry):
|
141
|
-
return Signature(
|
142
|
-
SignatureKind("writer"),
|
143
|
-
"service",
|
144
|
-
kwargs={**entry, "base_path": str(self.directory)},
|
145
|
-
)
|
146
|
-
|
147
|
-
|
148
|
-
def get_signature(kind, entry, **kwargs):
|
149
|
-
entry = {**entry, **kwargs}
|
150
|
-
return Signature(kind=SignatureKind(kind), name=entry.pop(kind), kwargs=entry)
|
@@ -1,152 +0,0 @@
|
|
1
|
-
import abc
|
2
|
-
import copy
|
3
|
-
import dataclasses
|
4
|
-
from enum import Enum
|
5
|
-
from typing import Any, Callable, Iterator, List, Union
|
6
|
-
|
7
|
-
from invenio_access.permissions import system_identity
|
8
|
-
|
9
|
-
from oarepo_runtime.datastreams.types import (
|
10
|
-
DataStreamCallback,
|
11
|
-
StreamBatch,
|
12
|
-
StreamEntry,
|
13
|
-
)
|
14
|
-
from oarepo_runtime.proxies import current_datastreams
|
15
|
-
|
16
|
-
from .json import JSONObject
|
17
|
-
|
18
|
-
|
19
|
-
class DataStreamChain(abc.ABC):
|
20
|
-
@abc.abstractmethod
|
21
|
-
def process(self, batch: StreamBatch, callback: Union[DataStreamCallback, Any]):
|
22
|
-
pass
|
23
|
-
|
24
|
-
@abc.abstractmethod
|
25
|
-
def finish(self, callback: Union[DataStreamCallback, Any]):
|
26
|
-
pass
|
27
|
-
|
28
|
-
try:
|
29
|
-
from enum import StrEnum
|
30
|
-
|
31
|
-
class SignatureKind(StrEnum):
|
32
|
-
READER = "reader"
|
33
|
-
TRANSFORMER = "transformer"
|
34
|
-
WRITER = "writer"
|
35
|
-
|
36
|
-
except ImportError:
|
37
|
-
|
38
|
-
class SignatureKind(str, Enum):
|
39
|
-
READER = "reader"
|
40
|
-
TRANSFORMER = "transformer"
|
41
|
-
WRITER = "writer"
|
42
|
-
|
43
|
-
|
44
|
-
@dataclasses.dataclass
|
45
|
-
class Signature:
|
46
|
-
kind: SignatureKind
|
47
|
-
name: str
|
48
|
-
kwargs: JSONObject
|
49
|
-
|
50
|
-
@property
|
51
|
-
def json(self):
|
52
|
-
return {"kind": self.kind.value, "name": self.name, "kwargs": self.kwargs}
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def from_json(cls, json):
|
56
|
-
return cls(
|
57
|
-
kind=SignatureKind(json["kind"]),
|
58
|
-
name=json["name"],
|
59
|
-
kwargs=json["kwargs"],
|
60
|
-
)
|
61
|
-
|
62
|
-
def resolve(self, *, identity, **kwargs):
|
63
|
-
if self.kind == SignatureKind.TRANSFORMER:
|
64
|
-
return current_datastreams.get_transformer(
|
65
|
-
self, **kwargs, identity=identity
|
66
|
-
)
|
67
|
-
elif self.kind == SignatureKind.WRITER:
|
68
|
-
return current_datastreams.get_writer(self, **kwargs, identity=identity)
|
69
|
-
else:
|
70
|
-
raise ValueError(f"Unknown signature kind: {self.kind}")
|
71
|
-
|
72
|
-
|
73
|
-
class AbstractDataStream(abc.ABC):
|
74
|
-
def __init__(
|
75
|
-
self,
|
76
|
-
*,
|
77
|
-
readers: List[Union[Signature, Any]],
|
78
|
-
writers: List[Union[Signature, Any]],
|
79
|
-
transformers: List[Union[Signature, Any]] = None,
|
80
|
-
callback: Union[DataStreamCallback, Signature],
|
81
|
-
batch_size=1,
|
82
|
-
identity=system_identity,
|
83
|
-
reader_callback: Callable[[StreamBatch], None] = None,
|
84
|
-
):
|
85
|
-
"""Constructor.
|
86
|
-
:param readers: an ordered list of readers (whatever a reader is).
|
87
|
-
:param writers: an ordered list of writers (whatever a writer is).
|
88
|
-
:param transformers: an ordered list of transformers to apply (whatever a transformer is).
|
89
|
-
"""
|
90
|
-
self._readers: List[Signature] = [*readers]
|
91
|
-
self._transformers: List[Signature] = [*(transformers or [])]
|
92
|
-
self._writers: List[Signature] = [*writers]
|
93
|
-
self._callback = callback
|
94
|
-
self._batch_size = batch_size
|
95
|
-
self._identity = identity
|
96
|
-
self._reader_callback = reader_callback
|
97
|
-
|
98
|
-
def _read_entries(self) -> Iterator[StreamEntry]:
|
99
|
-
seq = 0
|
100
|
-
for reader_signature in self._readers:
|
101
|
-
reader = current_datastreams.get_reader(
|
102
|
-
reader_signature, identity=self._identity
|
103
|
-
)
|
104
|
-
try:
|
105
|
-
for entry in reader:
|
106
|
-
seq += 1
|
107
|
-
entry.seq = seq
|
108
|
-
yield entry
|
109
|
-
except Exception as ex:
|
110
|
-
self._reader_error(reader, exception=ex)
|
111
|
-
|
112
|
-
def _read_batches(self, context) -> Iterator[StreamBatch]:
|
113
|
-
batch_entries = []
|
114
|
-
batch_number = 0
|
115
|
-
|
116
|
-
def batch_maker(last=False):
|
117
|
-
nonlocal batch_number, batch_entries
|
118
|
-
batch_number += 1
|
119
|
-
ret = StreamBatch(
|
120
|
-
entries=batch_entries,
|
121
|
-
seq=batch_number,
|
122
|
-
context=copy.deepcopy(context),
|
123
|
-
last=last,
|
124
|
-
)
|
125
|
-
batch_entries = []
|
126
|
-
return ret
|
127
|
-
|
128
|
-
for entry in self._read_entries():
|
129
|
-
if len(batch_entries) == self._batch_size:
|
130
|
-
batch = batch_maker()
|
131
|
-
if self._reader_callback:
|
132
|
-
self._reader_callback(batch)
|
133
|
-
yield batch
|
134
|
-
batch_entries = []
|
135
|
-
batch_entries.append(entry)
|
136
|
-
batch = batch_maker(last=True)
|
137
|
-
if self._reader_callback:
|
138
|
-
self._reader_callback(batch)
|
139
|
-
yield batch
|
140
|
-
|
141
|
-
def process(self, context=None, identity=system_identity):
|
142
|
-
context = context or {}
|
143
|
-
chain = self.build_chain(identity)
|
144
|
-
for batch in self._read_batches(context):
|
145
|
-
chain.process(batch, self._callback)
|
146
|
-
|
147
|
-
@abc.abstractmethod
|
148
|
-
def build_chain(self, identity) -> DataStreamChain:
|
149
|
-
pass
|
150
|
-
|
151
|
-
def _reader_error(self, reader, exception):
|
152
|
-
self._callback.reader_error(reader, exception=exception)
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# Copyright (C) 2021 CERN.
|
4
|
-
#
|
5
|
-
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
6
|
-
# modify it under the terms of the MIT License; see LICENSE file for more
|
7
|
-
# details.
|
8
|
-
|
9
|
-
"""Datastream errors."""
|
10
|
-
from typing import Union
|
11
|
-
|
12
|
-
from .json import JSONObject
|
13
|
-
|
14
|
-
|
15
|
-
class DataStreamError(Exception):
|
16
|
-
def __init__(
|
17
|
-
self,
|
18
|
-
message,
|
19
|
-
code=None,
|
20
|
-
location=None,
|
21
|
-
detail: Union[JSONObject, None] = None,
|
22
|
-
):
|
23
|
-
"""
|
24
|
-
@param message: a string message (overview)
|
25
|
-
@param code: a machine processable code
|
26
|
-
@param location: location inside the json, where the error was detected. Using dot notation,
|
27
|
-
arrays are indexed from 0, for example: `metadata.titles.0.language`
|
28
|
-
@param detail: a json-serializable object (dictionary) with details
|
29
|
-
"""
|
30
|
-
super().__init__(message)
|
31
|
-
assert detail is None or isinstance(detail, dict)
|
32
|
-
self.detail = detail
|
33
|
-
self.message = message
|
34
|
-
self.code = code
|
35
|
-
self.location = location
|
36
|
-
|
37
|
-
|
38
|
-
class ReaderError(DataStreamError):
|
39
|
-
"""Transformer application exception."""
|
40
|
-
|
41
|
-
|
42
|
-
class TransformerError(DataStreamError):
|
43
|
-
"""Transformer application exception."""
|
44
|
-
|
45
|
-
|
46
|
-
class WriterError(DataStreamError):
|
47
|
-
"""Transformer application exception."""
|
48
|
-
|
49
|
-
|
50
|
-
class DataStreamCatalogueError(Exception):
|
51
|
-
def __init__(self, message, entry=None, stream_name=None) -> None:
|
52
|
-
super().__init__(message)
|
53
|
-
self.entry = entry
|
54
|
-
self.stream_name = stream_name
|
@@ -1,41 +0,0 @@
|
|
1
|
-
import functools
|
2
|
-
|
3
|
-
from invenio_base.utils import obj_or_import_string
|
4
|
-
|
5
|
-
from oarepo_runtime.datastreams.datastreams import Signature
|
6
|
-
|
7
|
-
|
8
|
-
class OARepoDataStreamsExt:
|
9
|
-
def __init__(self, app):
|
10
|
-
self.app = app
|
11
|
-
|
12
|
-
def get_reader(self, reader, identity, **kwargs):
|
13
|
-
return self._get_instance("DATASTREAMS_READERS", identity, kwargs, reader)
|
14
|
-
|
15
|
-
def get_writer(self, writer, identity, **kwargs):
|
16
|
-
return self._get_instance("DATASTREAMS_WRITERS", identity, kwargs, writer)
|
17
|
-
|
18
|
-
def get_transformer(self, transformer, identity, **kwargs):
|
19
|
-
return self._get_instance(
|
20
|
-
"DATASTREAMS_TRANSFORMERS", identity, kwargs, transformer
|
21
|
-
)
|
22
|
-
|
23
|
-
def _get_instance(self, config_name, identity, kwargs, inst):
|
24
|
-
if isinstance(inst, Signature):
|
25
|
-
config_classes = self._get_classes_from_config(config_name)
|
26
|
-
if inst.name not in config_classes:
|
27
|
-
raise KeyError(f"'{inst.name}' not found in config {config_name}")
|
28
|
-
reader_class = config_classes[inst.name]
|
29
|
-
all_kwargs = {**(inst.kwargs or {}), **kwargs}
|
30
|
-
if "identity" not in all_kwargs:
|
31
|
-
all_kwargs["identity"] = identity
|
32
|
-
return reader_class(**all_kwargs)
|
33
|
-
else:
|
34
|
-
return inst
|
35
|
-
|
36
|
-
@functools.lru_cache(maxsize=5)
|
37
|
-
def _get_classes_from_config(self, config_name):
|
38
|
-
return {
|
39
|
-
class_key: obj_or_import_string(class_name)
|
40
|
-
for class_key, class_name in self.app.config[config_name].items()
|
41
|
-
}
|