oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. oarepo_runtime/__init__.py +24 -0
  2. oarepo_runtime/api.py +111 -0
  3. oarepo_runtime/cli/__init__.py +10 -21
  4. oarepo_runtime/cli/search.py +34 -0
  5. oarepo_runtime/config.py +86 -13
  6. oarepo_runtime/ext.py +64 -82
  7. oarepo_runtime/proxies.py +21 -5
  8. oarepo_runtime/records/__init__.py +11 -50
  9. oarepo_runtime/records/drafts.py +24 -18
  10. oarepo_runtime/records/mapping.py +84 -0
  11. oarepo_runtime/records/pid_providers.py +43 -7
  12. oarepo_runtime/records/systemfields/__init__.py +15 -33
  13. oarepo_runtime/records/systemfields/mapping.py +41 -24
  14. oarepo_runtime/records/systemfields/publication_status.py +59 -0
  15. oarepo_runtime/services/__init__.py +12 -0
  16. oarepo_runtime/services/config/__init__.py +15 -21
  17. oarepo_runtime/services/config/link_conditions.py +69 -75
  18. oarepo_runtime/services/config/permissions.py +62 -0
  19. oarepo_runtime/services/records/__init__.py +14 -1
  20. oarepo_runtime/services/records/links.py +21 -11
  21. oarepo_runtime/services/records/mapping.py +42 -0
  22. oarepo_runtime/services/results.py +98 -109
  23. oarepo_runtime/services/schema/__init__.py +12 -44
  24. oarepo_runtime/services/schema/i18n.py +47 -22
  25. oarepo_runtime/services/schema/i18n_ui.py +61 -24
  26. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/METADATA +9 -21
  27. oarepo_runtime-2.0.0.dev3.dist-info/RECORD +30 -0
  28. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/WHEEL +1 -2
  29. oarepo_runtime-2.0.0.dev3.dist-info/entry_points.txt +5 -0
  30. oarepo_runtime/cli/assets.py +0 -145
  31. oarepo_runtime/cli/base.py +0 -25
  32. oarepo_runtime/cli/cf.py +0 -15
  33. oarepo_runtime/cli/check.py +0 -167
  34. oarepo_runtime/cli/configuration.py +0 -51
  35. oarepo_runtime/cli/fixtures.py +0 -167
  36. oarepo_runtime/cli/index.py +0 -272
  37. oarepo_runtime/cli/permissions/__init__.py +0 -6
  38. oarepo_runtime/cli/permissions/base.py +0 -26
  39. oarepo_runtime/cli/permissions/evaluate.py +0 -63
  40. oarepo_runtime/cli/permissions/list.py +0 -239
  41. oarepo_runtime/cli/permissions/search.py +0 -121
  42. oarepo_runtime/cli/validate.py +0 -150
  43. oarepo_runtime/datastreams/__init__.py +0 -38
  44. oarepo_runtime/datastreams/asynchronous.py +0 -247
  45. oarepo_runtime/datastreams/catalogue.py +0 -150
  46. oarepo_runtime/datastreams/datastreams.py +0 -152
  47. oarepo_runtime/datastreams/errors.py +0 -54
  48. oarepo_runtime/datastreams/ext.py +0 -41
  49. oarepo_runtime/datastreams/fixtures.py +0 -265
  50. oarepo_runtime/datastreams/json.py +0 -4
  51. oarepo_runtime/datastreams/readers/__init__.py +0 -39
  52. oarepo_runtime/datastreams/readers/attachments.py +0 -51
  53. oarepo_runtime/datastreams/readers/excel.py +0 -123
  54. oarepo_runtime/datastreams/readers/json.py +0 -27
  55. oarepo_runtime/datastreams/readers/service.py +0 -54
  56. oarepo_runtime/datastreams/readers/yaml.py +0 -14
  57. oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
  58. oarepo_runtime/datastreams/synchronous.py +0 -70
  59. oarepo_runtime/datastreams/transformers.py +0 -18
  60. oarepo_runtime/datastreams/types.py +0 -323
  61. oarepo_runtime/datastreams/utils.py +0 -131
  62. oarepo_runtime/datastreams/writers/__init__.py +0 -21
  63. oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
  64. oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
  65. oarepo_runtime/datastreams/writers/publish.py +0 -70
  66. oarepo_runtime/datastreams/writers/service.py +0 -175
  67. oarepo_runtime/datastreams/writers/utils.py +0 -30
  68. oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
  69. oarepo_runtime/datastreams/writers/yaml.py +0 -56
  70. oarepo_runtime/ext_config.py +0 -67
  71. oarepo_runtime/i18n/__init__.py +0 -3
  72. oarepo_runtime/info/__init__.py +0 -0
  73. oarepo_runtime/info/check.py +0 -95
  74. oarepo_runtime/info/permissions/__init__.py +0 -0
  75. oarepo_runtime/info/permissions/debug.py +0 -191
  76. oarepo_runtime/info/views.py +0 -586
  77. oarepo_runtime/profile.py +0 -60
  78. oarepo_runtime/records/dumpers/__init__.py +0 -8
  79. oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
  80. oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
  81. oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
  82. oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
  83. oarepo_runtime/records/mappings/__init__.py +0 -0
  84. oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
  85. oarepo_runtime/records/owners/__init__.py +0 -3
  86. oarepo_runtime/records/owners/registry.py +0 -22
  87. oarepo_runtime/records/relations/__init__.py +0 -22
  88. oarepo_runtime/records/relations/base.py +0 -296
  89. oarepo_runtime/records/relations/internal.py +0 -46
  90. oarepo_runtime/records/relations/lookup.py +0 -28
  91. oarepo_runtime/records/relations/pid_relation.py +0 -102
  92. oarepo_runtime/records/systemfields/featured_file.py +0 -45
  93. oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
  94. oarepo_runtime/records/systemfields/icu.py +0 -371
  95. oarepo_runtime/records/systemfields/owner.py +0 -115
  96. oarepo_runtime/records/systemfields/record_status.py +0 -35
  97. oarepo_runtime/records/systemfields/selectors.py +0 -98
  98. oarepo_runtime/records/systemfields/synthetic.py +0 -130
  99. oarepo_runtime/resources/__init__.py +0 -4
  100. oarepo_runtime/resources/config.py +0 -12
  101. oarepo_runtime/resources/file_resource.py +0 -15
  102. oarepo_runtime/resources/json_serializer.py +0 -27
  103. oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
  104. oarepo_runtime/resources/resource.py +0 -53
  105. oarepo_runtime/resources/responses.py +0 -20
  106. oarepo_runtime/services/components.py +0 -429
  107. oarepo_runtime/services/config/draft_link.py +0 -23
  108. oarepo_runtime/services/config/permissions_presets.py +0 -174
  109. oarepo_runtime/services/config/service.py +0 -117
  110. oarepo_runtime/services/custom_fields/__init__.py +0 -80
  111. oarepo_runtime/services/custom_fields/mappings.py +0 -188
  112. oarepo_runtime/services/entity/__init__.py +0 -0
  113. oarepo_runtime/services/entity/config.py +0 -14
  114. oarepo_runtime/services/entity/schema.py +0 -9
  115. oarepo_runtime/services/entity/service.py +0 -48
  116. oarepo_runtime/services/expansions/__init__.py +0 -0
  117. oarepo_runtime/services/expansions/expandable_fields.py +0 -21
  118. oarepo_runtime/services/expansions/service.py +0 -4
  119. oarepo_runtime/services/facets/__init__.py +0 -33
  120. oarepo_runtime/services/facets/base.py +0 -12
  121. oarepo_runtime/services/facets/date.py +0 -72
  122. oarepo_runtime/services/facets/enum.py +0 -11
  123. oarepo_runtime/services/facets/facet_groups_names.py +0 -17
  124. oarepo_runtime/services/facets/max_facet.py +0 -13
  125. oarepo_runtime/services/facets/multilingual_facet.py +0 -33
  126. oarepo_runtime/services/facets/nested_facet.py +0 -32
  127. oarepo_runtime/services/facets/params.py +0 -192
  128. oarepo_runtime/services/facets/year_histogram.py +0 -200
  129. oarepo_runtime/services/files/__init__.py +0 -8
  130. oarepo_runtime/services/files/components.py +0 -62
  131. oarepo_runtime/services/files/service.py +0 -16
  132. oarepo_runtime/services/generators.py +0 -10
  133. oarepo_runtime/services/permissions/__init__.py +0 -3
  134. oarepo_runtime/services/permissions/generators.py +0 -103
  135. oarepo_runtime/services/relations/__init__.py +0 -0
  136. oarepo_runtime/services/relations/components.py +0 -15
  137. oarepo_runtime/services/relations/errors.py +0 -18
  138. oarepo_runtime/services/relations/mapping.py +0 -38
  139. oarepo_runtime/services/schema/cf.py +0 -13
  140. oarepo_runtime/services/schema/i18n_validation.py +0 -7
  141. oarepo_runtime/services/schema/marshmallow.py +0 -44
  142. oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
  143. oarepo_runtime/services/schema/oneofschema.py +0 -192
  144. oarepo_runtime/services/schema/polymorphic.py +0 -21
  145. oarepo_runtime/services/schema/rdm.py +0 -146
  146. oarepo_runtime/services/schema/rdm_ui.py +0 -156
  147. oarepo_runtime/services/schema/ui.py +0 -251
  148. oarepo_runtime/services/schema/validation.py +0 -70
  149. oarepo_runtime/services/search.py +0 -282
  150. oarepo_runtime/services/service.py +0 -61
  151. oarepo_runtime/tasks.py +0 -6
  152. oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
  153. oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
  154. oarepo_runtime/translations/default_translations.py +0 -6
  155. oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
  156. oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
  157. oarepo_runtime/translations/messages.pot +0 -100
  158. oarepo_runtime/uow.py +0 -146
  159. oarepo_runtime/utils/__init__.py +0 -0
  160. oarepo_runtime/utils/functools.py +0 -37
  161. oarepo_runtime/utils/identity_utils.py +0 -35
  162. oarepo_runtime/utils/index.py +0 -11
  163. oarepo_runtime/utils/path.py +0 -97
  164. oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
  165. oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
  166. oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
  167. tests/marshmallow_to_json/__init__.py +0 -0
  168. tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
  169. tests/marshmallow_to_json/test_simple_schema.py +0 -52
  170. tests/pkg_data/__init__.py +0 -0
  171. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,247 +0,0 @@
1
- import logging
2
- from typing import Any, Dict, List, Union
3
-
4
- import celery
5
- from celery.canvas import Signature as CelerySignature
6
- from celery.canvas import chain
7
- from celery.result import allow_join_result
8
- from flask_principal import (
9
- ActionNeed,
10
- Identity,
11
- ItemNeed,
12
- Need,
13
- RoleNeed,
14
- TypeNeed,
15
- UserNeed,
16
- )
17
-
18
- from oarepo_runtime.datastreams.datastreams import (
19
- AbstractDataStream,
20
- DataStreamChain,
21
- Signature,
22
- )
23
-
24
- from .datastreams import DataStreamCallback, StreamBatch
25
- from .json import JSONObject
26
- from .types import StreamEntryError
27
- from .writers import BaseWriter
28
-
29
- timing = logging.getLogger("oai.harvester.timing")
30
- log = logging.getLogger("datastreams")
31
-
32
-
33
- class AsynchronousDataStream(AbstractDataStream):
34
- def __init__(
35
- self,
36
- *,
37
- readers: List[Union[Signature, Any]],
38
- writers: List[Union[Signature, Any]],
39
- transformers: List[Union[Signature, Any]] = None,
40
- callback: Union[DataStreamCallback, Any],
41
- batch_size=100,
42
- on_background=True,
43
- reader_callback=None,
44
- ):
45
- super().__init__(
46
- readers=readers,
47
- writers=writers,
48
- transformers=transformers,
49
- callback=callback,
50
- batch_size=batch_size,
51
- reader_callback=reader_callback,
52
- )
53
- self._on_background = on_background
54
-
55
- def build_chain(self, identity) -> DataStreamChain:
56
- return AsynchronousDataStreamChain(
57
- transformers=self._transformers,
58
- writers=self._writers,
59
- on_background=self._on_background,
60
- identity=identity,
61
- )
62
-
63
- def _reader_error(self, reader, exception):
64
- self._callback.apply(
65
- kwargs={
66
- "callback": f"reader_error",
67
- "exception": StreamEntryError.from_exception(exception).json,
68
- }
69
- )
70
-
71
-
72
- class AsynchronousDataStreamChain(DataStreamChain):
73
- def __init__(
74
- self,
75
- identity: Identity,
76
- transformers: List[Signature],
77
- writers: List[Signature],
78
- on_background=True,
79
- ):
80
- self._transformers = transformers
81
- self._writers = writers
82
- self._on_background = on_background
83
- self._identity = identity
84
-
85
- def process(self, batch: StreamBatch, callback: CelerySignature):
86
- chain = self._prepare_chain(callback)
87
- self._call(chain, batch=batch.json)
88
-
89
- def _prepare_chain(self, callback: CelerySignature):
90
- chain_def = [
91
- datastreams_call_callback.signature(
92
- (), kwargs={"callback": callback, "callback_name": "batch_started"}
93
- )
94
- ]
95
- serialized_identity = serialize_identity(self._identity)
96
- if self._transformers:
97
- for transformer in self._transformers:
98
- chain_def.append(
99
- run_datastream_processor.signature(
100
- kwargs={
101
- "processor": transformer.json,
102
- "identity": serialized_identity,
103
- "callback": callback,
104
- }
105
- )
106
- )
107
-
108
- for writer in self._writers:
109
- chain_def.append(
110
- run_datastream_processor.signature(
111
- kwargs={
112
- "processor": writer.json,
113
- "identity": serialized_identity,
114
- "callback": callback,
115
- }
116
- )
117
- )
118
-
119
- chain_def.append(
120
- datastreams_call_callback.signature(
121
- (),
122
- kwargs=dict(
123
- callback=callback,
124
- callback_name="batch_finished",
125
- identity=serialized_identity,
126
- ),
127
- )
128
- )
129
-
130
- chain_sig = chain(*chain_def)
131
- chain_sig.link_error(
132
- datastreams_error_callback.signature(
133
- (),
134
- kwargs=dict(
135
- callback=callback,
136
- callback_name="error",
137
- identity=serialized_identity,
138
- ),
139
- )
140
- )
141
- return chain_sig
142
-
143
- def _call(self, sig, **kwargs):
144
- if self._on_background:
145
- call = sig.apply_async
146
- else:
147
- call = sig.apply
148
- call([], kwargs)
149
-
150
- def finish(self, callback: Signature):
151
- "nothing to finish here, dumpers needing finish (such as file dumpers) are not supported in async"
152
-
153
-
154
- @celery.shared_task
155
- def run_datastream_processor(batch: Dict, *, processor: JSONObject, identity, callback):
156
- identity = deserialize_identity(identity)
157
- processor_signature = Signature.from_json(processor)
158
- deserialized_batch: StreamBatch = StreamBatch.from_json(batch)
159
-
160
- processor = processor_signature.resolve(identity=identity)
161
- try:
162
- if isinstance(processor, BaseWriter):
163
- deserialized_batch = (
164
- processor.write(deserialized_batch) or deserialized_batch
165
- )
166
- else:
167
- deserialized_batch = (
168
- processor.apply(deserialized_batch) or deserialized_batch
169
- )
170
-
171
- except Exception as ex:
172
- log.exception("Error processing batch inside %s", processor_signature)
173
-
174
- err = StreamEntryError.from_exception(ex)
175
- deserialized_batch.errors.append(err)
176
- callback.apply(
177
- (),
178
- {
179
- "batch": deserialized_batch.json,
180
- "identity": serialize_identity(identity),
181
- "callback": f"{processor_signature.kind.value}_error",
182
- "exception": err.json,
183
- },
184
- )
185
- return deserialized_batch.json
186
-
187
-
188
- @celery.shared_task
189
- def datastreams_call_callback(
190
- batch: Dict, *, identity=None, callback, callback_name, **kwargs
191
- ):
192
- callback = CelerySignature(callback)
193
- callback.apply(
194
- kwargs=dict(batch=batch, identity=identity, callback=callback_name, **kwargs)
195
- )
196
- return batch
197
-
198
-
199
- @celery.shared_task
200
- def datastreams_error_callback(
201
- parent_task_id, *, identity=None, callback, callback_name, **kwargs
202
- ):
203
- with allow_join_result():
204
- from celery import current_app
205
-
206
- result = current_app.AsyncResult(parent_task_id)
207
- result.get(propagate=False)
208
-
209
- callback = CelerySignature(callback)
210
- callback.apply(
211
- kwargs=dict(
212
- batch={},
213
- identity=identity,
214
- callback=callback_name,
215
- result=result.result,
216
- traceback=result.traceback,
217
- **kwargs,
218
- )
219
- )
220
-
221
-
222
- def serialize_identity(identity):
223
- return {
224
- "id": identity.id,
225
- "auth_type": identity.auth_type,
226
- "provides": [
227
- {"type": type(x).__name__, "params": x._asdict()} for x in identity.provides
228
- ],
229
- }
230
-
231
-
232
- def deserialize_identity(identity_dict):
233
- if identity_dict is None:
234
- return None
235
- ret = Identity(id=identity_dict["id"], auth_type=identity_dict["auth_type"])
236
- for provide in identity_dict["provides"]:
237
- clz = {
238
- "Need": Need,
239
- "UserNeed": UserNeed,
240
- "RoleNeed": RoleNeed,
241
- "TypeNeed": TypeNeed,
242
- "ActionNeed": ActionNeed,
243
- "ItemNeed": ItemNeed,
244
- }[provide["type"]]
245
-
246
- ret.provides.add(clz(**provide["params"]))
247
- return ret
@@ -1,150 +0,0 @@
1
- import dataclasses
2
- from pathlib import Path
3
- from typing import Iterator, List
4
-
5
- import yaml
6
- from flask import current_app
7
-
8
- from oarepo_runtime.datastreams.datastreams import Signature, SignatureKind
9
-
10
- from .errors import DataStreamCatalogueError
11
-
12
-
13
- @dataclasses.dataclass
14
- class CatalogueDataStream:
15
- stream_name: str
16
- readers: List[Signature]
17
- writers: List[Signature]
18
- transformers: List[Signature]
19
-
20
-
21
- class DataStreamCatalogue:
22
- def __init__(self, catalogue, content=None) -> None:
23
- """
24
- Catalogue of data streams. The catalogue contains a dict of:
25
- stream_name: stream_definition, where stream definition is an array of:
26
-
27
- - reader: reader_class
28
- <rest of parameters go to reader constructor>
29
- - transformer: transformer_class
30
- <rest of parameters go to transformer constructor>
31
- - writer: writer_class
32
- <rest of parameters go to writer constructor>
33
-
34
- If reader class is not passed and _source_ is, then the reader class will be taken from the
35
- DATASTREAMS_READERS_BY_EXTENSION config variable - map from file extension to reader class.
36
-
37
- If 'service' is passed, service writer will be used with this service
38
-
39
- Transformer class must always be passed.
40
- """
41
- self._catalogue_path = Path(catalogue)
42
- if content:
43
- self._catalogue = content
44
- else:
45
- with open(catalogue) as f:
46
- self._catalogue = yaml.safe_load(f)
47
-
48
- @property
49
- def path(self):
50
- return self._catalogue_path
51
-
52
- @property
53
- def directory(self):
54
- return self._catalogue_path.parent
55
-
56
- def get_datastreams(self) -> Iterator[CatalogueDataStream]:
57
- for stream_name in self._catalogue:
58
- yield self.get_datastream(stream_name)
59
-
60
- def __iter__(self):
61
- return iter(self._catalogue)
62
-
63
- def get_datastream(
64
- self,
65
- stream_name,
66
- ) -> CatalogueDataStream:
67
- stream_definition = self._catalogue[stream_name]
68
- readers = []
69
- transformers = []
70
- writers = []
71
- for entry in stream_definition:
72
- entry = {**entry}
73
- try:
74
- if "reader" in entry:
75
- readers.append(
76
- get_signature(
77
- "reader",
78
- entry,
79
- base_path=str(self.directory),
80
- )
81
- )
82
- elif "transformer" in entry:
83
- transformers.append(
84
- get_signature(
85
- "transformer",
86
- entry,
87
- base_path=str(self.directory),
88
- )
89
- )
90
- elif "writer" in entry:
91
- writers.append(
92
- get_signature(
93
- "writer",
94
- entry,
95
- base_path=str(self.directory),
96
- )
97
- )
98
- elif "source" in entry:
99
- readers.append(self.get_reader(entry))
100
- elif "service" in entry:
101
- writers.append(self.get_service_writer(entry))
102
- else:
103
- raise DataStreamCatalogueError(
104
- "Can not decide what this record is - reader, transformer or service?"
105
- )
106
- except DataStreamCatalogueError as e:
107
- e.entry = entry
108
- e.stream_name = stream_name
109
- raise e
110
- return CatalogueDataStream(
111
- stream_name=stream_name,
112
- readers=readers,
113
- transformers=transformers,
114
- writers=writers,
115
- )
116
-
117
- def get_reader(self, entry):
118
- entry = {**entry}
119
- if not entry.get("reader"):
120
- try:
121
- source = Path(entry["source"])
122
- ext = source.suffix[1:]
123
- reader_class = (
124
- current_app.config["DATASTREAMS_READERS_BY_EXTENSION"].get(ext)
125
- or current_app.config["DEFAULT_DATASTREAMS_READERS_BY_EXTENSION"][
126
- ext
127
- ]
128
- )
129
- entry["reader"] = reader_class
130
- except KeyError:
131
- raise DataStreamCatalogueError(
132
- f"Do not have loader for file {source} - extension {ext} not defined in DATASTREAMS_READERS_BY_EXTENSION config"
133
- )
134
- return get_signature(
135
- "reader",
136
- entry,
137
- base_path=str(self.directory),
138
- )
139
-
140
- def get_service_writer(self, entry):
141
- return Signature(
142
- SignatureKind("writer"),
143
- "service",
144
- kwargs={**entry, "base_path": str(self.directory)},
145
- )
146
-
147
-
148
- def get_signature(kind, entry, **kwargs):
149
- entry = {**entry, **kwargs}
150
- return Signature(kind=SignatureKind(kind), name=entry.pop(kind), kwargs=entry)
@@ -1,152 +0,0 @@
1
- import abc
2
- import copy
3
- import dataclasses
4
- from enum import Enum
5
- from typing import Any, Callable, Iterator, List, Union
6
-
7
- from invenio_access.permissions import system_identity
8
-
9
- from oarepo_runtime.datastreams.types import (
10
- DataStreamCallback,
11
- StreamBatch,
12
- StreamEntry,
13
- )
14
- from oarepo_runtime.proxies import current_datastreams
15
-
16
- from .json import JSONObject
17
-
18
-
19
- class DataStreamChain(abc.ABC):
20
- @abc.abstractmethod
21
- def process(self, batch: StreamBatch, callback: Union[DataStreamCallback, Any]):
22
- pass
23
-
24
- @abc.abstractmethod
25
- def finish(self, callback: Union[DataStreamCallback, Any]):
26
- pass
27
-
28
- try:
29
- from enum import StrEnum
30
-
31
- class SignatureKind(StrEnum):
32
- READER = "reader"
33
- TRANSFORMER = "transformer"
34
- WRITER = "writer"
35
-
36
- except ImportError:
37
-
38
- class SignatureKind(str, Enum):
39
- READER = "reader"
40
- TRANSFORMER = "transformer"
41
- WRITER = "writer"
42
-
43
-
44
- @dataclasses.dataclass
45
- class Signature:
46
- kind: SignatureKind
47
- name: str
48
- kwargs: JSONObject
49
-
50
- @property
51
- def json(self):
52
- return {"kind": self.kind.value, "name": self.name, "kwargs": self.kwargs}
53
-
54
- @classmethod
55
- def from_json(cls, json):
56
- return cls(
57
- kind=SignatureKind(json["kind"]),
58
- name=json["name"],
59
- kwargs=json["kwargs"],
60
- )
61
-
62
- def resolve(self, *, identity, **kwargs):
63
- if self.kind == SignatureKind.TRANSFORMER:
64
- return current_datastreams.get_transformer(
65
- self, **kwargs, identity=identity
66
- )
67
- elif self.kind == SignatureKind.WRITER:
68
- return current_datastreams.get_writer(self, **kwargs, identity=identity)
69
- else:
70
- raise ValueError(f"Unknown signature kind: {self.kind}")
71
-
72
-
73
- class AbstractDataStream(abc.ABC):
74
- def __init__(
75
- self,
76
- *,
77
- readers: List[Union[Signature, Any]],
78
- writers: List[Union[Signature, Any]],
79
- transformers: List[Union[Signature, Any]] = None,
80
- callback: Union[DataStreamCallback, Signature],
81
- batch_size=1,
82
- identity=system_identity,
83
- reader_callback: Callable[[StreamBatch], None] = None,
84
- ):
85
- """Constructor.
86
- :param readers: an ordered list of readers (whatever a reader is).
87
- :param writers: an ordered list of writers (whatever a writer is).
88
- :param transformers: an ordered list of transformers to apply (whatever a transformer is).
89
- """
90
- self._readers: List[Signature] = [*readers]
91
- self._transformers: List[Signature] = [*(transformers or [])]
92
- self._writers: List[Signature] = [*writers]
93
- self._callback = callback
94
- self._batch_size = batch_size
95
- self._identity = identity
96
- self._reader_callback = reader_callback
97
-
98
- def _read_entries(self) -> Iterator[StreamEntry]:
99
- seq = 0
100
- for reader_signature in self._readers:
101
- reader = current_datastreams.get_reader(
102
- reader_signature, identity=self._identity
103
- )
104
- try:
105
- for entry in reader:
106
- seq += 1
107
- entry.seq = seq
108
- yield entry
109
- except Exception as ex:
110
- self._reader_error(reader, exception=ex)
111
-
112
- def _read_batches(self, context) -> Iterator[StreamBatch]:
113
- batch_entries = []
114
- batch_number = 0
115
-
116
- def batch_maker(last=False):
117
- nonlocal batch_number, batch_entries
118
- batch_number += 1
119
- ret = StreamBatch(
120
- entries=batch_entries,
121
- seq=batch_number,
122
- context=copy.deepcopy(context),
123
- last=last,
124
- )
125
- batch_entries = []
126
- return ret
127
-
128
- for entry in self._read_entries():
129
- if len(batch_entries) == self._batch_size:
130
- batch = batch_maker()
131
- if self._reader_callback:
132
- self._reader_callback(batch)
133
- yield batch
134
- batch_entries = []
135
- batch_entries.append(entry)
136
- batch = batch_maker(last=True)
137
- if self._reader_callback:
138
- self._reader_callback(batch)
139
- yield batch
140
-
141
- def process(self, context=None, identity=system_identity):
142
- context = context or {}
143
- chain = self.build_chain(identity)
144
- for batch in self._read_batches(context):
145
- chain.process(batch, self._callback)
146
-
147
- @abc.abstractmethod
148
- def build_chain(self, identity) -> DataStreamChain:
149
- pass
150
-
151
- def _reader_error(self, reader, exception):
152
- self._callback.reader_error(reader, exception=exception)
@@ -1,54 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- #
3
- # Copyright (C) 2021 CERN.
4
- #
5
- # Invenio-Vocabularies is free software; you can redistribute it and/or
6
- # modify it under the terms of the MIT License; see LICENSE file for more
7
- # details.
8
-
9
- """Datastream errors."""
10
- from typing import Union
11
-
12
- from .json import JSONObject
13
-
14
-
15
- class DataStreamError(Exception):
16
- def __init__(
17
- self,
18
- message,
19
- code=None,
20
- location=None,
21
- detail: Union[JSONObject, None] = None,
22
- ):
23
- """
24
- @param message: a string message (overview)
25
- @param code: a machine processable code
26
- @param location: location inside the json, where the error was detected. Using dot notation,
27
- arrays are indexed from 0, for example: `metadata.titles.0.language`
28
- @param detail: a json-serializable object (dictionary) with details
29
- """
30
- super().__init__(message)
31
- assert detail is None or isinstance(detail, dict)
32
- self.detail = detail
33
- self.message = message
34
- self.code = code
35
- self.location = location
36
-
37
-
38
- class ReaderError(DataStreamError):
39
- """Transformer application exception."""
40
-
41
-
42
- class TransformerError(DataStreamError):
43
- """Transformer application exception."""
44
-
45
-
46
- class WriterError(DataStreamError):
47
- """Transformer application exception."""
48
-
49
-
50
- class DataStreamCatalogueError(Exception):
51
- def __init__(self, message, entry=None, stream_name=None) -> None:
52
- super().__init__(message)
53
- self.entry = entry
54
- self.stream_name = stream_name
@@ -1,41 +0,0 @@
1
- import functools
2
-
3
- from invenio_base.utils import obj_or_import_string
4
-
5
- from oarepo_runtime.datastreams.datastreams import Signature
6
-
7
-
8
- class OARepoDataStreamsExt:
9
- def __init__(self, app):
10
- self.app = app
11
-
12
- def get_reader(self, reader, identity, **kwargs):
13
- return self._get_instance("DATASTREAMS_READERS", identity, kwargs, reader)
14
-
15
- def get_writer(self, writer, identity, **kwargs):
16
- return self._get_instance("DATASTREAMS_WRITERS", identity, kwargs, writer)
17
-
18
- def get_transformer(self, transformer, identity, **kwargs):
19
- return self._get_instance(
20
- "DATASTREAMS_TRANSFORMERS", identity, kwargs, transformer
21
- )
22
-
23
- def _get_instance(self, config_name, identity, kwargs, inst):
24
- if isinstance(inst, Signature):
25
- config_classes = self._get_classes_from_config(config_name)
26
- if inst.name not in config_classes:
27
- raise KeyError(f"'{inst.name}' not found in config {config_name}")
28
- reader_class = config_classes[inst.name]
29
- all_kwargs = {**(inst.kwargs or {}), **kwargs}
30
- if "identity" not in all_kwargs:
31
- all_kwargs["identity"] = identity
32
- return reader_class(**all_kwargs)
33
- else:
34
- return inst
35
-
36
- @functools.lru_cache(maxsize=5)
37
- def _get_classes_from_config(self, config_name):
38
- return {
39
- class_key: obj_or_import_string(class_name)
40
- for class_key, class_name in self.app.config[config_name].items()
41
- }