oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +111 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +86 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +59 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/METADATA +9 -21
- oarepo_runtime-2.0.0.dev3.dist-info/RECORD +30 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev3.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/__init__.py +0 -33
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/params.py +0 -192
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,70 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This package was taken from Invenio vocabularies and modified to be more universal
|
3
|
-
#
|
4
|
-
import logging
|
5
|
-
from typing import List
|
6
|
-
|
7
|
-
from ..proxies import current_datastreams
|
8
|
-
from .datastreams import AbstractDataStream, DataStreamChain
|
9
|
-
from .transformers import BaseTransformer
|
10
|
-
from .types import DataStreamCallback, StreamEntryError
|
11
|
-
from .writers import BaseWriter
|
12
|
-
|
13
|
-
log = logging.getLogger("datastreams")
|
14
|
-
|
15
|
-
|
16
|
-
class SynchronousDataStreamChain(DataStreamChain):
|
17
|
-
def __init__(self, transformers: List[BaseTransformer], writers: List[BaseWriter]):
|
18
|
-
self._transformers = transformers
|
19
|
-
self._writers = writers
|
20
|
-
|
21
|
-
def process(self, batch, callback: DataStreamCallback):
|
22
|
-
callback.batch_started(batch)
|
23
|
-
for transformer in self._transformers:
|
24
|
-
try:
|
25
|
-
batch = transformer.apply(batch) or batch
|
26
|
-
except Exception as ex:
|
27
|
-
if log.getEffectiveLevel():
|
28
|
-
log.error(
|
29
|
-
"Unexpected error in transformer: %s: %s",
|
30
|
-
repr(transformer),
|
31
|
-
repr(batch),
|
32
|
-
)
|
33
|
-
batch.errors.append(StreamEntryError.from_exception(ex))
|
34
|
-
callback.transformer_error(batch, transformer, exception=ex)
|
35
|
-
|
36
|
-
for writer in self._writers:
|
37
|
-
try:
|
38
|
-
batch = writer.write(batch) or batch
|
39
|
-
except Exception as ex:
|
40
|
-
if log.getEffectiveLevel():
|
41
|
-
log.error(
|
42
|
-
"Unexpected error in writer: %s: %s", repr(writer), repr(batch)
|
43
|
-
)
|
44
|
-
batch.errors.append(StreamEntryError.from_exception(ex))
|
45
|
-
callback.writer_error(batch, writer, exception=ex)
|
46
|
-
callback.batch_finished(batch)
|
47
|
-
|
48
|
-
def finish(self, callback: DataStreamCallback):
|
49
|
-
for writer in self._writers:
|
50
|
-
try:
|
51
|
-
writer.finish()
|
52
|
-
except Exception as e:
|
53
|
-
log.error("Unexpected error in writer: %s", repr(writer))
|
54
|
-
callback.writer_error(batch=None, writer=writer, exception=e)
|
55
|
-
|
56
|
-
|
57
|
-
class SynchronousDataStream(AbstractDataStream):
|
58
|
-
"""Data stream."""
|
59
|
-
|
60
|
-
def build_chain(self, identity) -> DataStreamChain:
|
61
|
-
return SynchronousDataStreamChain(
|
62
|
-
transformers=[
|
63
|
-
current_datastreams.get_transformer(tr, identity=identity)
|
64
|
-
for tr in self._transformers
|
65
|
-
],
|
66
|
-
writers=[
|
67
|
-
current_datastreams.get_writer(wr, identity=identity)
|
68
|
-
for wr in self._writers
|
69
|
-
],
|
70
|
-
)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
from oarepo_runtime.datastreams.types import StreamBatch
|
5
|
-
|
6
|
-
|
7
|
-
class BaseTransformer(ABC):
|
8
|
-
"""Base transformer."""
|
9
|
-
|
10
|
-
def __init__(self, **kwargs) -> None:
|
11
|
-
pass
|
12
|
-
|
13
|
-
@abstractmethod
|
14
|
-
def apply(self, batch: StreamBatch, *args, **kwargs) -> Union[StreamBatch, None]:
|
15
|
-
"""Applies the transformation to the entry.
|
16
|
-
:returns: A StreamEntry. The transformed entry.
|
17
|
-
Raises TransformerError in case of errors.
|
18
|
-
"""
|
@@ -1,323 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This package was taken from Invenio vocabularies and modified to be more universal
|
3
|
-
#
|
4
|
-
import dataclasses
|
5
|
-
import json
|
6
|
-
import logging
|
7
|
-
import textwrap
|
8
|
-
import traceback
|
9
|
-
from typing import Any, Dict, List, Optional, Union
|
10
|
-
|
11
|
-
from .errors import DataStreamError
|
12
|
-
from .json import JSONObject
|
13
|
-
|
14
|
-
log = logging.getLogger("datastreams")
|
15
|
-
|
16
|
-
|
17
|
-
@dataclasses.dataclass
|
18
|
-
class StreamEntryError:
|
19
|
-
code: str
|
20
|
-
message: str
|
21
|
-
location: Optional[str] = None
|
22
|
-
info: Union[JSONObject, None] = None
|
23
|
-
|
24
|
-
@classmethod
|
25
|
-
def from_exception(
|
26
|
-
cls, exc: Exception, limit=30, message=None, location=None, info=None, code=None
|
27
|
-
):
|
28
|
-
if isinstance(exc, DataStreamError):
|
29
|
-
return cls(
|
30
|
-
code=exc.code,
|
31
|
-
message=exc.message,
|
32
|
-
location=exc.location,
|
33
|
-
info=exc.detail,
|
34
|
-
)
|
35
|
-
|
36
|
-
# can not use format_exception here as the signature is different for python 3.9 and python 3.10
|
37
|
-
stack = traceback.format_exc(limit=limit)
|
38
|
-
if message:
|
39
|
-
formatted_exception = message
|
40
|
-
elif hasattr(exc, "format_exception"):
|
41
|
-
formatted_exception = exc.format_exception()
|
42
|
-
else:
|
43
|
-
formatted_exception = str(exc)
|
44
|
-
|
45
|
-
return cls(
|
46
|
-
code=code or getattr(exc, "type", type(exc).__name__),
|
47
|
-
message=formatted_exception,
|
48
|
-
location=location,
|
49
|
-
info={
|
50
|
-
"message": str(exc),
|
51
|
-
"exception": type(exc).__name__,
|
52
|
-
"stack": stack,
|
53
|
-
**(info or {}),
|
54
|
-
},
|
55
|
-
)
|
56
|
-
|
57
|
-
@property
|
58
|
-
def json(self) -> JSONObject:
|
59
|
-
ret = {}
|
60
|
-
if self.code:
|
61
|
-
ret["code"] = self.code
|
62
|
-
if self.message:
|
63
|
-
ret["message"] = self.message
|
64
|
-
if self.location:
|
65
|
-
ret["location"] = self.location
|
66
|
-
if self.info:
|
67
|
-
ret["info"] = self.info
|
68
|
-
return ret
|
69
|
-
|
70
|
-
@classmethod
|
71
|
-
def from_json(cls, js: JSONObject):
|
72
|
-
if js is None:
|
73
|
-
return None
|
74
|
-
return cls(
|
75
|
-
code=js.get("code"),
|
76
|
-
message=js.get("message"),
|
77
|
-
location=js.get("location"),
|
78
|
-
info=js.get("info"),
|
79
|
-
)
|
80
|
-
|
81
|
-
def __str__(self):
|
82
|
-
formatted_info = textwrap.indent(
|
83
|
-
json.dumps(self.info or {}, ensure_ascii=False, indent=4), prefix=" "
|
84
|
-
)
|
85
|
-
return f"{self.code}:{self.location if self.location else ''} {self.message}\n{formatted_info}"
|
86
|
-
|
87
|
-
def __repr__(self):
|
88
|
-
return str(self)
|
89
|
-
|
90
|
-
|
91
|
-
@dataclasses.dataclass
|
92
|
-
class StreamEntryFile:
|
93
|
-
metadata: JSONObject
|
94
|
-
content_url: str
|
95
|
-
"data url with the content of the file or any other resolvable url"
|
96
|
-
|
97
|
-
@property
|
98
|
-
def json(self) -> JSONObject:
|
99
|
-
return {
|
100
|
-
"metadata": self.metadata,
|
101
|
-
"content_url": self.content_url,
|
102
|
-
}
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
def from_json(cls, js: JSONObject):
|
106
|
-
return cls(
|
107
|
-
metadata=js["metadata"],
|
108
|
-
content_url=js["content_url"],
|
109
|
-
)
|
110
|
-
|
111
|
-
|
112
|
-
@dataclasses.dataclass
|
113
|
-
class StreamEntry:
|
114
|
-
"""Object to encapsulate streams processing."""
|
115
|
-
|
116
|
-
entry: JSONObject
|
117
|
-
files: List[StreamEntryFile] = dataclasses.field(default_factory=list)
|
118
|
-
seq: int = 0
|
119
|
-
id: Optional[str] = None
|
120
|
-
filtered: bool = False
|
121
|
-
deleted: bool = False
|
122
|
-
errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
|
123
|
-
context: JSONObject = dataclasses.field(default_factory=dict)
|
124
|
-
|
125
|
-
@property
|
126
|
-
def ok(self):
|
127
|
-
return not self.filtered and not self.errors
|
128
|
-
|
129
|
-
@property
|
130
|
-
def json(self) -> JSONObject:
|
131
|
-
return {
|
132
|
-
"id": self.id,
|
133
|
-
"entry": self.entry,
|
134
|
-
"filtered": self.filtered,
|
135
|
-
"deleted": self.deleted,
|
136
|
-
"errors": [x.json for x in self.errors],
|
137
|
-
"context": self.context,
|
138
|
-
"seq": self.seq,
|
139
|
-
"files": [x.json for x in self.files],
|
140
|
-
}
|
141
|
-
|
142
|
-
@classmethod
|
143
|
-
def from_json(cls, js):
|
144
|
-
return cls(
|
145
|
-
id=js["id"],
|
146
|
-
entry=js["entry"],
|
147
|
-
filtered=js["filtered"],
|
148
|
-
deleted=js["deleted"],
|
149
|
-
errors=[StreamEntryError.from_json(x) for x in js["errors"]],
|
150
|
-
context=js["context"],
|
151
|
-
seq=js["seq"],
|
152
|
-
files=[StreamEntryFile.from_json(x) for x in js["files"]],
|
153
|
-
)
|
154
|
-
|
155
|
-
def __str__(self):
|
156
|
-
ret = [
|
157
|
-
f"Entry #{self.seq}: id {self.id or 'not yet set'}, filtered: {self.filtered}, deleted: {self.deleted}",
|
158
|
-
"Content:",
|
159
|
-
textwrap.indent(
|
160
|
-
json.dumps(self.entry, ensure_ascii=False, indent=4), " "
|
161
|
-
),
|
162
|
-
"Context:",
|
163
|
-
textwrap.indent(
|
164
|
-
json.dumps(self.context, ensure_ascii=False, indent=4), " "
|
165
|
-
),
|
166
|
-
]
|
167
|
-
if self.errors:
|
168
|
-
ret.append("Errors:")
|
169
|
-
for error in self.errors:
|
170
|
-
ret.append(textwrap.indent(str(error), " "))
|
171
|
-
return "\n".join(ret)
|
172
|
-
|
173
|
-
|
174
|
-
@dataclasses.dataclass
|
175
|
-
class StreamBatch:
|
176
|
-
entries: List[StreamEntry]
|
177
|
-
context: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
178
|
-
last: bool = False
|
179
|
-
seq: int = 0
|
180
|
-
errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
|
181
|
-
|
182
|
-
@property
|
183
|
-
def ok_entries(self):
|
184
|
-
if self.errors:
|
185
|
-
return []
|
186
|
-
return [x for x in self.entries if x.ok]
|
187
|
-
|
188
|
-
@property
|
189
|
-
def failed_entries(self):
|
190
|
-
if self.errors:
|
191
|
-
return self.entries
|
192
|
-
return [x for x in self.entries if x.errors]
|
193
|
-
|
194
|
-
@property
|
195
|
-
def skipped_entries(self):
|
196
|
-
if self.errors:
|
197
|
-
return []
|
198
|
-
return [x for x in self.entries if x.filtered]
|
199
|
-
|
200
|
-
@property
|
201
|
-
def deleted_entries(self):
|
202
|
-
if self.errors:
|
203
|
-
return []
|
204
|
-
return [x for x in self.entries if x.deleted]
|
205
|
-
|
206
|
-
@property
|
207
|
-
def json(self):
|
208
|
-
return {
|
209
|
-
"entries": [x.json for x in self.entries],
|
210
|
-
"context": self.context,
|
211
|
-
"last": self.last,
|
212
|
-
"seq": self.seq,
|
213
|
-
"errors": [x.json for x in self.errors],
|
214
|
-
}
|
215
|
-
|
216
|
-
@classmethod
|
217
|
-
def from_json(cls, js):
|
218
|
-
if js is None:
|
219
|
-
return None
|
220
|
-
try:
|
221
|
-
[StreamEntry.from_json(x) for x in js["entries"]]
|
222
|
-
except:
|
223
|
-
log.exception("Exception parsing %s", js)
|
224
|
-
raise
|
225
|
-
return cls(
|
226
|
-
entries=[StreamEntry.from_json(x) for x in js["entries"]],
|
227
|
-
context=js["context"],
|
228
|
-
last=js["last"],
|
229
|
-
seq=js["seq"],
|
230
|
-
errors=[StreamEntryError.from_json(x) for x in js["errors"]],
|
231
|
-
)
|
232
|
-
|
233
|
-
|
234
|
-
class DataStreamCallback:
|
235
|
-
def __init__(self, log_error_entry=False):
|
236
|
-
self.log_error_entry = log_error_entry
|
237
|
-
|
238
|
-
def batch_started(self, batch):
|
239
|
-
log.info("Batch started: %s", batch.seq)
|
240
|
-
if log.isEnabledFor(logging.DEBUG):
|
241
|
-
log.debug("Content: %s", batch)
|
242
|
-
|
243
|
-
def batch_finished(self, batch: StreamBatch):
|
244
|
-
log.info("Batch finished: %s", batch.seq)
|
245
|
-
if log.isEnabledFor(logging.DEBUG):
|
246
|
-
log.debug("Content: %s", batch)
|
247
|
-
for err in batch.errors:
|
248
|
-
log.error("Failed batch: %s: %s", err, batch.seq)
|
249
|
-
if self.log_error_entry:
|
250
|
-
for entry in batch.entries:
|
251
|
-
if entry.errors:
|
252
|
-
log.error("Failed entry: %s in batch %s", entry, batch.seq)
|
253
|
-
|
254
|
-
def reader_error(self, reader, exception):
|
255
|
-
log.error("Reader error: %s: %s", reader, exception)
|
256
|
-
|
257
|
-
def transformer_error(self, batch, transformer, exception):
|
258
|
-
log.error("Transformer error: %s: %s", transformer, exception)
|
259
|
-
|
260
|
-
def writer_error(self, batch, writer, exception):
|
261
|
-
log.error("Writer error: %s: %s", writer, exception)
|
262
|
-
|
263
|
-
|
264
|
-
class StatsKeepingDataStreamCallback(DataStreamCallback):
|
265
|
-
def __init__(self, log_error_entry=False):
|
266
|
-
super().__init__(log_error_entry=log_error_entry)
|
267
|
-
|
268
|
-
self.started_batches_count = 0
|
269
|
-
self.finished_batches_count = 0
|
270
|
-
self.ok_entries_count = 0
|
271
|
-
self.filtered_entries_count = 0
|
272
|
-
self.deleted_entries_count = 0
|
273
|
-
self.failed_entries_count = 0
|
274
|
-
self.reader_errors_count = 0
|
275
|
-
self.transformer_errors_count = 0
|
276
|
-
self.writer_errors_count = 0
|
277
|
-
|
278
|
-
def batch_started(self, batch):
|
279
|
-
super().batch_started(batch)
|
280
|
-
self.started_batches_count += 1
|
281
|
-
|
282
|
-
def batch_finished(self, batch: StreamBatch):
|
283
|
-
super().batch_finished(batch)
|
284
|
-
self.finished_batches_count += 1
|
285
|
-
for entry in batch.entries:
|
286
|
-
if entry.ok:
|
287
|
-
self.ok_entries_count += 1
|
288
|
-
if entry.filtered:
|
289
|
-
self.filtered_entries_count += 1
|
290
|
-
if entry.deleted:
|
291
|
-
self.deleted_entries_count += 1
|
292
|
-
if entry.errors:
|
293
|
-
self.failed_entries_count += 1
|
294
|
-
|
295
|
-
def reader_error(self, reader, exception):
|
296
|
-
super().reader_error(reader, exception)
|
297
|
-
self.reader_errors_count += 1
|
298
|
-
|
299
|
-
def transformer_error(self, batch, transformer, exception):
|
300
|
-
super().transformer_error(batch, transformer, exception)
|
301
|
-
self.transformer_errors_count += 1
|
302
|
-
|
303
|
-
def writer_error(self, batch, writer, exception):
|
304
|
-
super().writer_error(batch, writer, exception)
|
305
|
-
self.writer_errors_count += 1
|
306
|
-
|
307
|
-
def stats(self):
|
308
|
-
ret = [f"{self.finished_batches_count} batches finished"]
|
309
|
-
if self.ok_entries_count:
|
310
|
-
ret.append(f"ok: {self.ok_entries_count}")
|
311
|
-
if self.deleted_entries_count:
|
312
|
-
ret.append(f"deleted: {self.deleted_entries_count}")
|
313
|
-
if self.filtered_entries_count:
|
314
|
-
ret.append(f"filtered: {self.filtered_entries_count}")
|
315
|
-
if self.failed_entries_count:
|
316
|
-
ret.append(f"failed: {self.failed_entries_count}")
|
317
|
-
if self.reader_errors_count:
|
318
|
-
ret.append(f"reader errors: {self.reader_errors_count}")
|
319
|
-
if self.transformer_errors_count:
|
320
|
-
ret.append(f"transformer errors: {self.transformer_errors_count}")
|
321
|
-
if self.writer_errors_count:
|
322
|
-
ret.append(f"writer errors: {self.writer_errors_count}")
|
323
|
-
return ", ".join(ret)
|
@@ -1,131 +0,0 @@
|
|
1
|
-
from base64 import b64decode
|
2
|
-
|
3
|
-
import requests
|
4
|
-
from deprecated import deprecated
|
5
|
-
from flask import current_app
|
6
|
-
from invenio_drafts_resources.services import RecordService as DraftRecordService
|
7
|
-
from invenio_records_resources.proxies import current_service_registry
|
8
|
-
from invenio_records_resources.services import FileService, RecordService
|
9
|
-
from invenio_records_resources.services.records.results import RecordItem
|
10
|
-
from requests import PreparedRequest, Response
|
11
|
-
from requests.adapters import BaseAdapter
|
12
|
-
|
13
|
-
|
14
|
-
def get_record_service_for_record(record):
|
15
|
-
if not record:
|
16
|
-
return None
|
17
|
-
if "OAREPO_PRIMARY_RECORD_SERVICE" in current_app.config:
|
18
|
-
return get_record_service_for_record_class(type(record))
|
19
|
-
else:
|
20
|
-
return get_record_service_for_record_deprecated(record)
|
21
|
-
|
22
|
-
|
23
|
-
def get_record_service_for_record_class(record_cls):
|
24
|
-
service_id = current_app.config["OAREPO_PRIMARY_RECORD_SERVICE"][record_cls]
|
25
|
-
return current_service_registry.get(service_id)
|
26
|
-
|
27
|
-
|
28
|
-
@deprecated(
|
29
|
-
version="1.5.43", reason="Please recompile model to remove this deprecation warning"
|
30
|
-
)
|
31
|
-
def get_record_service_for_record_deprecated(record):
|
32
|
-
if getattr(record, "is_draft", False):
|
33
|
-
record_name = "draft_cls"
|
34
|
-
expect_draft_service = True
|
35
|
-
else:
|
36
|
-
record_name = "record_cls"
|
37
|
-
expect_draft_service = False
|
38
|
-
|
39
|
-
for svc in current_service_registry._services.values():
|
40
|
-
if not isinstance(svc, RecordService):
|
41
|
-
continue
|
42
|
-
if isinstance(svc, FileService):
|
43
|
-
continue
|
44
|
-
is_draft_service = isinstance(svc, DraftRecordService)
|
45
|
-
if is_draft_service != expect_draft_service:
|
46
|
-
continue
|
47
|
-
service_record = getattr(svc, record_name, None)
|
48
|
-
if service_record == type(record):
|
49
|
-
return svc
|
50
|
-
|
51
|
-
def get_file_service_for_record_class(record_class):
|
52
|
-
if not record_class:
|
53
|
-
return None
|
54
|
-
|
55
|
-
for svc in current_service_registry._services.values():
|
56
|
-
if not isinstance(svc, FileService):
|
57
|
-
continue
|
58
|
-
if svc.record_cls != record_class:
|
59
|
-
continue
|
60
|
-
return svc
|
61
|
-
|
62
|
-
def get_file_service_for_file_record_class(file_record_class):
|
63
|
-
record_class = file_record_class.record_cls
|
64
|
-
return get_file_service_for_record_class(record_class)
|
65
|
-
|
66
|
-
def get_file_service_for_record_service(
|
67
|
-
record_service, check_draft_files=True, record=None
|
68
|
-
):
|
69
|
-
if isinstance(record, RecordItem):
|
70
|
-
record = record._record
|
71
|
-
if record and getattr(record, "is_draft", False) is False:
|
72
|
-
check_draft_files = False
|
73
|
-
if (
|
74
|
-
check_draft_files
|
75
|
-
and hasattr(record_service, "draft_files")
|
76
|
-
and isinstance(record_service.draft_files, FileService)
|
77
|
-
):
|
78
|
-
return record_service.draft_files
|
79
|
-
if hasattr(record_service, "files") and isinstance(
|
80
|
-
record_service.files, FileService
|
81
|
-
):
|
82
|
-
return record_service.files
|
83
|
-
return get_file_service_for_record_class(
|
84
|
-
getattr(record_service.config, "record_cls", None)
|
85
|
-
)
|
86
|
-
|
87
|
-
|
88
|
-
def get_record_service_for_file_service(file_service, record=None):
|
89
|
-
if record and getattr(record, "is_draft", False):
|
90
|
-
record_name = "draft_cls"
|
91
|
-
expect_draft_service = True
|
92
|
-
else:
|
93
|
-
record_name = "record_cls"
|
94
|
-
expect_draft_service = False
|
95
|
-
for svc in current_service_registry._services.values():
|
96
|
-
if not isinstance(svc, RecordService):
|
97
|
-
continue
|
98
|
-
is_draft_service = isinstance(svc, DraftRecordService)
|
99
|
-
if is_draft_service != expect_draft_service:
|
100
|
-
continue
|
101
|
-
service_record = getattr(svc, record_name, None)
|
102
|
-
if service_record == file_service.record_cls:
|
103
|
-
return svc
|
104
|
-
|
105
|
-
raise KeyError(
|
106
|
-
f"Could not get service for file service {file_service}, draft {expect_draft_service}"
|
107
|
-
)
|
108
|
-
|
109
|
-
|
110
|
-
class DataAdapter(BaseAdapter):
|
111
|
-
def send(
|
112
|
-
self,
|
113
|
-
request: PreparedRequest,
|
114
|
-
stream=False,
|
115
|
-
timeout=None,
|
116
|
-
verify=True,
|
117
|
-
cert=None,
|
118
|
-
proxies=None,
|
119
|
-
):
|
120
|
-
data = request.url.replace("data:", "")
|
121
|
-
resp = Response()
|
122
|
-
resp.status_code = 200
|
123
|
-
resp._content = b64decode(data)
|
124
|
-
return resp
|
125
|
-
|
126
|
-
def close(self):
|
127
|
-
pass
|
128
|
-
|
129
|
-
|
130
|
-
attachments_requests = requests.Session()
|
131
|
-
attachments_requests.mount("data:", DataAdapter())
|
@@ -1,21 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
from oarepo_runtime.datastreams.types import StreamBatch
|
5
|
-
|
6
|
-
|
7
|
-
class BaseWriter(ABC):
|
8
|
-
"""Base writer."""
|
9
|
-
|
10
|
-
def __init__(self, **kwargs) -> None:
|
11
|
-
"""kwargs for extensions"""
|
12
|
-
|
13
|
-
@abstractmethod
|
14
|
-
def write(self, batch: StreamBatch) -> Union[StreamBatch, None]:
|
15
|
-
"""Writes the input entry to the target output.
|
16
|
-
:returns: nothing
|
17
|
-
Raises WriterException in case of errors.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def finish(self):
|
21
|
-
pass
|
@@ -1,92 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import yaml
|
5
|
-
|
6
|
-
from oarepo_runtime.datastreams import StreamBatch, StreamEntry
|
7
|
-
|
8
|
-
from ..utils import attachments_requests
|
9
|
-
from . import BaseWriter
|
10
|
-
|
11
|
-
|
12
|
-
class AttachmentsFileWriter(BaseWriter):
|
13
|
-
"""
|
14
|
-
Writes the files and its metadata into subdirectories.
|
15
|
-
|
16
|
-
The path will be files/<record-id>/<file-id>/metadata.yaml for technical metadata
|
17
|
-
and files/<record-id>/<file-id>/<key> for the data.
|
18
|
-
|
19
|
-
If the data key is "metadata.yaml", then "metadata" will be placed to "metametadata.yaml"
|
20
|
-
"""
|
21
|
-
|
22
|
-
def __init__(self, *, target, base_path=None, **kwargs):
|
23
|
-
"""Constructor.
|
24
|
-
:param file_or_path: path of the output file.
|
25
|
-
"""
|
26
|
-
super().__init__(**kwargs)
|
27
|
-
self._grouping = 3
|
28
|
-
self._min_padding = 3
|
29
|
-
if base_path:
|
30
|
-
self._dir = Path(base_path).joinpath(target)
|
31
|
-
else:
|
32
|
-
self._dir = Path(target)
|
33
|
-
|
34
|
-
def write(self, batch: StreamBatch, *args, **kwargs):
|
35
|
-
"""Writes the input stream entry using a given service."""
|
36
|
-
"""
|
37
|
-
context looks like: {
|
38
|
-
'files': [
|
39
|
-
{'metadata': {'updated': '...', 'mimetype': 'image/png', 'storage_class': 'L', 'file_id': '',
|
40
|
-
'links': {...}, 'size': 27, 'status': 'completed', 'version_id': '...',
|
41
|
-
'bucket_id': '...', 'metadata': None, 'key': 'test.png',
|
42
|
-
'checksum': 'md5:...', 'created': '...'},
|
43
|
-
'content': b'test file content: test.png'}]}
|
44
|
-
"""
|
45
|
-
for entry in batch.entries:
|
46
|
-
if entry.ok and entry.files:
|
47
|
-
self.write_entry(entry)
|
48
|
-
|
49
|
-
def write_entry(self, entry: StreamEntry):
|
50
|
-
dirname = self._dir.joinpath(format_serial(entry.seq)) / "data"
|
51
|
-
dirname.mkdir(parents=True, exist_ok=False)
|
52
|
-
file_keys = []
|
53
|
-
files_metadata = []
|
54
|
-
for fn_idx, fn in enumerate(entry.files):
|
55
|
-
md = {**fn.metadata}
|
56
|
-
content = attachments_requests.get(fn.content_url).content
|
57
|
-
# cleanup
|
58
|
-
md.pop("storage_class", None)
|
59
|
-
md.pop("file_id", None)
|
60
|
-
md.pop("links", None)
|
61
|
-
md.pop("status", None)
|
62
|
-
md.pop("version_id", None)
|
63
|
-
md.pop("bucket_id", None)
|
64
|
-
key = md["key"]
|
65
|
-
file_keys.append(key)
|
66
|
-
files_metadata.append(md)
|
67
|
-
(dirname / key).write_bytes(content)
|
68
|
-
metadata_key = "metadata.yaml"
|
69
|
-
while metadata_key in file_keys:
|
70
|
-
metadata_key = "meta_" + metadata_key
|
71
|
-
with open(dirname / metadata_key, "w") as f:
|
72
|
-
yaml.safe_dump_all(files_metadata, f)
|
73
|
-
return entry
|
74
|
-
|
75
|
-
def finish(self):
|
76
|
-
"""Finalizes writing"""
|
77
|
-
|
78
|
-
|
79
|
-
def format_serial(serial_no):
|
80
|
-
grouping = 3
|
81
|
-
min_padding = 3
|
82
|
-
serial_no = str(serial_no)
|
83
|
-
formatted_length = max(min_padding, len(serial_no))
|
84
|
-
while formatted_length % grouping:
|
85
|
-
formatted_length += 1
|
86
|
-
padded_serial = serial_no.zfill(formatted_length)
|
87
|
-
return os.sep.join(
|
88
|
-
[
|
89
|
-
padded_serial[i : i + grouping]
|
90
|
-
for i in range(0, len(padded_serial), grouping)
|
91
|
-
]
|
92
|
-
)
|