oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +210 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +98 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +61 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/facets/__init__.py +12 -33
- oarepo_runtime/services/facets/params.py +45 -110
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/METADATA +10 -21
- oarepo_runtime-2.0.0.dev4.dist-info/RECORD +32 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev4.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,323 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# This package was taken from Invenio vocabularies and modified to be more universal
|
3
|
-
#
|
4
|
-
import dataclasses
|
5
|
-
import json
|
6
|
-
import logging
|
7
|
-
import textwrap
|
8
|
-
import traceback
|
9
|
-
from typing import Any, Dict, List, Optional, Union
|
10
|
-
|
11
|
-
from .errors import DataStreamError
|
12
|
-
from .json import JSONObject
|
13
|
-
|
14
|
-
log = logging.getLogger("datastreams")
|
15
|
-
|
16
|
-
|
17
|
-
@dataclasses.dataclass
|
18
|
-
class StreamEntryError:
|
19
|
-
code: str
|
20
|
-
message: str
|
21
|
-
location: Optional[str] = None
|
22
|
-
info: Union[JSONObject, None] = None
|
23
|
-
|
24
|
-
@classmethod
|
25
|
-
def from_exception(
|
26
|
-
cls, exc: Exception, limit=30, message=None, location=None, info=None, code=None
|
27
|
-
):
|
28
|
-
if isinstance(exc, DataStreamError):
|
29
|
-
return cls(
|
30
|
-
code=exc.code,
|
31
|
-
message=exc.message,
|
32
|
-
location=exc.location,
|
33
|
-
info=exc.detail,
|
34
|
-
)
|
35
|
-
|
36
|
-
# can not use format_exception here as the signature is different for python 3.9 and python 3.10
|
37
|
-
stack = traceback.format_exc(limit=limit)
|
38
|
-
if message:
|
39
|
-
formatted_exception = message
|
40
|
-
elif hasattr(exc, "format_exception"):
|
41
|
-
formatted_exception = exc.format_exception()
|
42
|
-
else:
|
43
|
-
formatted_exception = str(exc)
|
44
|
-
|
45
|
-
return cls(
|
46
|
-
code=code or getattr(exc, "type", type(exc).__name__),
|
47
|
-
message=formatted_exception,
|
48
|
-
location=location,
|
49
|
-
info={
|
50
|
-
"message": str(exc),
|
51
|
-
"exception": type(exc).__name__,
|
52
|
-
"stack": stack,
|
53
|
-
**(info or {}),
|
54
|
-
},
|
55
|
-
)
|
56
|
-
|
57
|
-
@property
|
58
|
-
def json(self) -> JSONObject:
|
59
|
-
ret = {}
|
60
|
-
if self.code:
|
61
|
-
ret["code"] = self.code
|
62
|
-
if self.message:
|
63
|
-
ret["message"] = self.message
|
64
|
-
if self.location:
|
65
|
-
ret["location"] = self.location
|
66
|
-
if self.info:
|
67
|
-
ret["info"] = self.info
|
68
|
-
return ret
|
69
|
-
|
70
|
-
@classmethod
|
71
|
-
def from_json(cls, js: JSONObject):
|
72
|
-
if js is None:
|
73
|
-
return None
|
74
|
-
return cls(
|
75
|
-
code=js.get("code"),
|
76
|
-
message=js.get("message"),
|
77
|
-
location=js.get("location"),
|
78
|
-
info=js.get("info"),
|
79
|
-
)
|
80
|
-
|
81
|
-
def __str__(self):
|
82
|
-
formatted_info = textwrap.indent(
|
83
|
-
json.dumps(self.info or {}, ensure_ascii=False, indent=4), prefix=" "
|
84
|
-
)
|
85
|
-
return f"{self.code}:{self.location if self.location else ''} {self.message}\n{formatted_info}"
|
86
|
-
|
87
|
-
def __repr__(self):
|
88
|
-
return str(self)
|
89
|
-
|
90
|
-
|
91
|
-
@dataclasses.dataclass
|
92
|
-
class StreamEntryFile:
|
93
|
-
metadata: JSONObject
|
94
|
-
content_url: str
|
95
|
-
"data url with the content of the file or any other resolvable url"
|
96
|
-
|
97
|
-
@property
|
98
|
-
def json(self) -> JSONObject:
|
99
|
-
return {
|
100
|
-
"metadata": self.metadata,
|
101
|
-
"content_url": self.content_url,
|
102
|
-
}
|
103
|
-
|
104
|
-
@classmethod
|
105
|
-
def from_json(cls, js: JSONObject):
|
106
|
-
return cls(
|
107
|
-
metadata=js["metadata"],
|
108
|
-
content_url=js["content_url"],
|
109
|
-
)
|
110
|
-
|
111
|
-
|
112
|
-
@dataclasses.dataclass
|
113
|
-
class StreamEntry:
|
114
|
-
"""Object to encapsulate streams processing."""
|
115
|
-
|
116
|
-
entry: JSONObject
|
117
|
-
files: List[StreamEntryFile] = dataclasses.field(default_factory=list)
|
118
|
-
seq: int = 0
|
119
|
-
id: Optional[str] = None
|
120
|
-
filtered: bool = False
|
121
|
-
deleted: bool = False
|
122
|
-
errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
|
123
|
-
context: JSONObject = dataclasses.field(default_factory=dict)
|
124
|
-
|
125
|
-
@property
|
126
|
-
def ok(self):
|
127
|
-
return not self.filtered and not self.errors
|
128
|
-
|
129
|
-
@property
|
130
|
-
def json(self) -> JSONObject:
|
131
|
-
return {
|
132
|
-
"id": self.id,
|
133
|
-
"entry": self.entry,
|
134
|
-
"filtered": self.filtered,
|
135
|
-
"deleted": self.deleted,
|
136
|
-
"errors": [x.json for x in self.errors],
|
137
|
-
"context": self.context,
|
138
|
-
"seq": self.seq,
|
139
|
-
"files": [x.json for x in self.files],
|
140
|
-
}
|
141
|
-
|
142
|
-
@classmethod
|
143
|
-
def from_json(cls, js):
|
144
|
-
return cls(
|
145
|
-
id=js["id"],
|
146
|
-
entry=js["entry"],
|
147
|
-
filtered=js["filtered"],
|
148
|
-
deleted=js["deleted"],
|
149
|
-
errors=[StreamEntryError.from_json(x) for x in js["errors"]],
|
150
|
-
context=js["context"],
|
151
|
-
seq=js["seq"],
|
152
|
-
files=[StreamEntryFile.from_json(x) for x in js["files"]],
|
153
|
-
)
|
154
|
-
|
155
|
-
def __str__(self):
|
156
|
-
ret = [
|
157
|
-
f"Entry #{self.seq}: id {self.id or 'not yet set'}, filtered: {self.filtered}, deleted: {self.deleted}",
|
158
|
-
"Content:",
|
159
|
-
textwrap.indent(
|
160
|
-
json.dumps(self.entry, ensure_ascii=False, indent=4), " "
|
161
|
-
),
|
162
|
-
"Context:",
|
163
|
-
textwrap.indent(
|
164
|
-
json.dumps(self.context, ensure_ascii=False, indent=4), " "
|
165
|
-
),
|
166
|
-
]
|
167
|
-
if self.errors:
|
168
|
-
ret.append("Errors:")
|
169
|
-
for error in self.errors:
|
170
|
-
ret.append(textwrap.indent(str(error), " "))
|
171
|
-
return "\n".join(ret)
|
172
|
-
|
173
|
-
|
174
|
-
@dataclasses.dataclass
|
175
|
-
class StreamBatch:
|
176
|
-
entries: List[StreamEntry]
|
177
|
-
context: Dict[str, Any] = dataclasses.field(default_factory=dict)
|
178
|
-
last: bool = False
|
179
|
-
seq: int = 0
|
180
|
-
errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
|
181
|
-
|
182
|
-
@property
|
183
|
-
def ok_entries(self):
|
184
|
-
if self.errors:
|
185
|
-
return []
|
186
|
-
return [x for x in self.entries if x.ok]
|
187
|
-
|
188
|
-
@property
|
189
|
-
def failed_entries(self):
|
190
|
-
if self.errors:
|
191
|
-
return self.entries
|
192
|
-
return [x for x in self.entries if x.errors]
|
193
|
-
|
194
|
-
@property
|
195
|
-
def skipped_entries(self):
|
196
|
-
if self.errors:
|
197
|
-
return []
|
198
|
-
return [x for x in self.entries if x.filtered]
|
199
|
-
|
200
|
-
@property
|
201
|
-
def deleted_entries(self):
|
202
|
-
if self.errors:
|
203
|
-
return []
|
204
|
-
return [x for x in self.entries if x.deleted]
|
205
|
-
|
206
|
-
@property
|
207
|
-
def json(self):
|
208
|
-
return {
|
209
|
-
"entries": [x.json for x in self.entries],
|
210
|
-
"context": self.context,
|
211
|
-
"last": self.last,
|
212
|
-
"seq": self.seq,
|
213
|
-
"errors": [x.json for x in self.errors],
|
214
|
-
}
|
215
|
-
|
216
|
-
@classmethod
|
217
|
-
def from_json(cls, js):
|
218
|
-
if js is None:
|
219
|
-
return None
|
220
|
-
try:
|
221
|
-
[StreamEntry.from_json(x) for x in js["entries"]]
|
222
|
-
except:
|
223
|
-
log.exception("Exception parsing %s", js)
|
224
|
-
raise
|
225
|
-
return cls(
|
226
|
-
entries=[StreamEntry.from_json(x) for x in js["entries"]],
|
227
|
-
context=js["context"],
|
228
|
-
last=js["last"],
|
229
|
-
seq=js["seq"],
|
230
|
-
errors=[StreamEntryError.from_json(x) for x in js["errors"]],
|
231
|
-
)
|
232
|
-
|
233
|
-
|
234
|
-
class DataStreamCallback:
|
235
|
-
def __init__(self, log_error_entry=False):
|
236
|
-
self.log_error_entry = log_error_entry
|
237
|
-
|
238
|
-
def batch_started(self, batch):
|
239
|
-
log.info("Batch started: %s", batch.seq)
|
240
|
-
if log.isEnabledFor(logging.DEBUG):
|
241
|
-
log.debug("Content: %s", batch)
|
242
|
-
|
243
|
-
def batch_finished(self, batch: StreamBatch):
|
244
|
-
log.info("Batch finished: %s", batch.seq)
|
245
|
-
if log.isEnabledFor(logging.DEBUG):
|
246
|
-
log.debug("Content: %s", batch)
|
247
|
-
for err in batch.errors:
|
248
|
-
log.error("Failed batch: %s: %s", err, batch.seq)
|
249
|
-
if self.log_error_entry:
|
250
|
-
for entry in batch.entries:
|
251
|
-
if entry.errors:
|
252
|
-
log.error("Failed entry: %s in batch %s", entry, batch.seq)
|
253
|
-
|
254
|
-
def reader_error(self, reader, exception):
|
255
|
-
log.error("Reader error: %s: %s", reader, exception)
|
256
|
-
|
257
|
-
def transformer_error(self, batch, transformer, exception):
|
258
|
-
log.error("Transformer error: %s: %s", transformer, exception)
|
259
|
-
|
260
|
-
def writer_error(self, batch, writer, exception):
|
261
|
-
log.error("Writer error: %s: %s", writer, exception)
|
262
|
-
|
263
|
-
|
264
|
-
class StatsKeepingDataStreamCallback(DataStreamCallback):
|
265
|
-
def __init__(self, log_error_entry=False):
|
266
|
-
super().__init__(log_error_entry=log_error_entry)
|
267
|
-
|
268
|
-
self.started_batches_count = 0
|
269
|
-
self.finished_batches_count = 0
|
270
|
-
self.ok_entries_count = 0
|
271
|
-
self.filtered_entries_count = 0
|
272
|
-
self.deleted_entries_count = 0
|
273
|
-
self.failed_entries_count = 0
|
274
|
-
self.reader_errors_count = 0
|
275
|
-
self.transformer_errors_count = 0
|
276
|
-
self.writer_errors_count = 0
|
277
|
-
|
278
|
-
def batch_started(self, batch):
|
279
|
-
super().batch_started(batch)
|
280
|
-
self.started_batches_count += 1
|
281
|
-
|
282
|
-
def batch_finished(self, batch: StreamBatch):
|
283
|
-
super().batch_finished(batch)
|
284
|
-
self.finished_batches_count += 1
|
285
|
-
for entry in batch.entries:
|
286
|
-
if entry.ok:
|
287
|
-
self.ok_entries_count += 1
|
288
|
-
if entry.filtered:
|
289
|
-
self.filtered_entries_count += 1
|
290
|
-
if entry.deleted:
|
291
|
-
self.deleted_entries_count += 1
|
292
|
-
if entry.errors:
|
293
|
-
self.failed_entries_count += 1
|
294
|
-
|
295
|
-
def reader_error(self, reader, exception):
|
296
|
-
super().reader_error(reader, exception)
|
297
|
-
self.reader_errors_count += 1
|
298
|
-
|
299
|
-
def transformer_error(self, batch, transformer, exception):
|
300
|
-
super().transformer_error(batch, transformer, exception)
|
301
|
-
self.transformer_errors_count += 1
|
302
|
-
|
303
|
-
def writer_error(self, batch, writer, exception):
|
304
|
-
super().writer_error(batch, writer, exception)
|
305
|
-
self.writer_errors_count += 1
|
306
|
-
|
307
|
-
def stats(self):
|
308
|
-
ret = [f"{self.finished_batches_count} batches finished"]
|
309
|
-
if self.ok_entries_count:
|
310
|
-
ret.append(f"ok: {self.ok_entries_count}")
|
311
|
-
if self.deleted_entries_count:
|
312
|
-
ret.append(f"deleted: {self.deleted_entries_count}")
|
313
|
-
if self.filtered_entries_count:
|
314
|
-
ret.append(f"filtered: {self.filtered_entries_count}")
|
315
|
-
if self.failed_entries_count:
|
316
|
-
ret.append(f"failed: {self.failed_entries_count}")
|
317
|
-
if self.reader_errors_count:
|
318
|
-
ret.append(f"reader errors: {self.reader_errors_count}")
|
319
|
-
if self.transformer_errors_count:
|
320
|
-
ret.append(f"transformer errors: {self.transformer_errors_count}")
|
321
|
-
if self.writer_errors_count:
|
322
|
-
ret.append(f"writer errors: {self.writer_errors_count}")
|
323
|
-
return ", ".join(ret)
|
@@ -1,131 +0,0 @@
|
|
1
|
-
from base64 import b64decode
|
2
|
-
|
3
|
-
import requests
|
4
|
-
from deprecated import deprecated
|
5
|
-
from flask import current_app
|
6
|
-
from invenio_drafts_resources.services import RecordService as DraftRecordService
|
7
|
-
from invenio_records_resources.proxies import current_service_registry
|
8
|
-
from invenio_records_resources.services import FileService, RecordService
|
9
|
-
from invenio_records_resources.services.records.results import RecordItem
|
10
|
-
from requests import PreparedRequest, Response
|
11
|
-
from requests.adapters import BaseAdapter
|
12
|
-
|
13
|
-
|
14
|
-
def get_record_service_for_record(record):
|
15
|
-
if not record:
|
16
|
-
return None
|
17
|
-
if "OAREPO_PRIMARY_RECORD_SERVICE" in current_app.config:
|
18
|
-
return get_record_service_for_record_class(type(record))
|
19
|
-
else:
|
20
|
-
return get_record_service_for_record_deprecated(record)
|
21
|
-
|
22
|
-
|
23
|
-
def get_record_service_for_record_class(record_cls):
|
24
|
-
service_id = current_app.config["OAREPO_PRIMARY_RECORD_SERVICE"][record_cls]
|
25
|
-
return current_service_registry.get(service_id)
|
26
|
-
|
27
|
-
|
28
|
-
@deprecated(
|
29
|
-
version="1.5.43", reason="Please recompile model to remove this deprecation warning"
|
30
|
-
)
|
31
|
-
def get_record_service_for_record_deprecated(record):
|
32
|
-
if getattr(record, "is_draft", False):
|
33
|
-
record_name = "draft_cls"
|
34
|
-
expect_draft_service = True
|
35
|
-
else:
|
36
|
-
record_name = "record_cls"
|
37
|
-
expect_draft_service = False
|
38
|
-
|
39
|
-
for svc in current_service_registry._services.values():
|
40
|
-
if not isinstance(svc, RecordService):
|
41
|
-
continue
|
42
|
-
if isinstance(svc, FileService):
|
43
|
-
continue
|
44
|
-
is_draft_service = isinstance(svc, DraftRecordService)
|
45
|
-
if is_draft_service != expect_draft_service:
|
46
|
-
continue
|
47
|
-
service_record = getattr(svc, record_name, None)
|
48
|
-
if service_record == type(record):
|
49
|
-
return svc
|
50
|
-
|
51
|
-
def get_file_service_for_record_class(record_class):
|
52
|
-
if not record_class:
|
53
|
-
return None
|
54
|
-
|
55
|
-
for svc in current_service_registry._services.values():
|
56
|
-
if not isinstance(svc, FileService):
|
57
|
-
continue
|
58
|
-
if svc.record_cls != record_class:
|
59
|
-
continue
|
60
|
-
return svc
|
61
|
-
|
62
|
-
def get_file_service_for_file_record_class(file_record_class):
|
63
|
-
record_class = file_record_class.record_cls
|
64
|
-
return get_file_service_for_record_class(record_class)
|
65
|
-
|
66
|
-
def get_file_service_for_record_service(
|
67
|
-
record_service, check_draft_files=True, record=None
|
68
|
-
):
|
69
|
-
if isinstance(record, RecordItem):
|
70
|
-
record = record._record
|
71
|
-
if record and getattr(record, "is_draft", False) is False:
|
72
|
-
check_draft_files = False
|
73
|
-
if (
|
74
|
-
check_draft_files
|
75
|
-
and hasattr(record_service, "draft_files")
|
76
|
-
and isinstance(record_service.draft_files, FileService)
|
77
|
-
):
|
78
|
-
return record_service.draft_files
|
79
|
-
if hasattr(record_service, "files") and isinstance(
|
80
|
-
record_service.files, FileService
|
81
|
-
):
|
82
|
-
return record_service.files
|
83
|
-
return get_file_service_for_record_class(
|
84
|
-
getattr(record_service.config, "record_cls", None)
|
85
|
-
)
|
86
|
-
|
87
|
-
|
88
|
-
def get_record_service_for_file_service(file_service, record=None):
|
89
|
-
if record and getattr(record, "is_draft", False):
|
90
|
-
record_name = "draft_cls"
|
91
|
-
expect_draft_service = True
|
92
|
-
else:
|
93
|
-
record_name = "record_cls"
|
94
|
-
expect_draft_service = False
|
95
|
-
for svc in current_service_registry._services.values():
|
96
|
-
if not isinstance(svc, RecordService):
|
97
|
-
continue
|
98
|
-
is_draft_service = isinstance(svc, DraftRecordService)
|
99
|
-
if is_draft_service != expect_draft_service:
|
100
|
-
continue
|
101
|
-
service_record = getattr(svc, record_name, None)
|
102
|
-
if service_record == file_service.record_cls:
|
103
|
-
return svc
|
104
|
-
|
105
|
-
raise KeyError(
|
106
|
-
f"Could not get service for file service {file_service}, draft {expect_draft_service}"
|
107
|
-
)
|
108
|
-
|
109
|
-
|
110
|
-
class DataAdapter(BaseAdapter):
|
111
|
-
def send(
|
112
|
-
self,
|
113
|
-
request: PreparedRequest,
|
114
|
-
stream=False,
|
115
|
-
timeout=None,
|
116
|
-
verify=True,
|
117
|
-
cert=None,
|
118
|
-
proxies=None,
|
119
|
-
):
|
120
|
-
data = request.url.replace("data:", "")
|
121
|
-
resp = Response()
|
122
|
-
resp.status_code = 200
|
123
|
-
resp._content = b64decode(data)
|
124
|
-
return resp
|
125
|
-
|
126
|
-
def close(self):
|
127
|
-
pass
|
128
|
-
|
129
|
-
|
130
|
-
attachments_requests = requests.Session()
|
131
|
-
attachments_requests.mount("data:", DataAdapter())
|
@@ -1,21 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
from typing import Union
|
3
|
-
|
4
|
-
from oarepo_runtime.datastreams.types import StreamBatch
|
5
|
-
|
6
|
-
|
7
|
-
class BaseWriter(ABC):
|
8
|
-
"""Base writer."""
|
9
|
-
|
10
|
-
def __init__(self, **kwargs) -> None:
|
11
|
-
"""kwargs for extensions"""
|
12
|
-
|
13
|
-
@abstractmethod
|
14
|
-
def write(self, batch: StreamBatch) -> Union[StreamBatch, None]:
|
15
|
-
"""Writes the input entry to the target output.
|
16
|
-
:returns: nothing
|
17
|
-
Raises WriterException in case of errors.
|
18
|
-
"""
|
19
|
-
|
20
|
-
def finish(self):
|
21
|
-
pass
|
@@ -1,92 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import yaml
|
5
|
-
|
6
|
-
from oarepo_runtime.datastreams import StreamBatch, StreamEntry
|
7
|
-
|
8
|
-
from ..utils import attachments_requests
|
9
|
-
from . import BaseWriter
|
10
|
-
|
11
|
-
|
12
|
-
class AttachmentsFileWriter(BaseWriter):
|
13
|
-
"""
|
14
|
-
Writes the files and its metadata into subdirectories.
|
15
|
-
|
16
|
-
The path will be files/<record-id>/<file-id>/metadata.yaml for technical metadata
|
17
|
-
and files/<record-id>/<file-id>/<key> for the data.
|
18
|
-
|
19
|
-
If the data key is "metadata.yaml", then "metadata" will be placed to "metametadata.yaml"
|
20
|
-
"""
|
21
|
-
|
22
|
-
def __init__(self, *, target, base_path=None, **kwargs):
|
23
|
-
"""Constructor.
|
24
|
-
:param file_or_path: path of the output file.
|
25
|
-
"""
|
26
|
-
super().__init__(**kwargs)
|
27
|
-
self._grouping = 3
|
28
|
-
self._min_padding = 3
|
29
|
-
if base_path:
|
30
|
-
self._dir = Path(base_path).joinpath(target)
|
31
|
-
else:
|
32
|
-
self._dir = Path(target)
|
33
|
-
|
34
|
-
def write(self, batch: StreamBatch, *args, **kwargs):
|
35
|
-
"""Writes the input stream entry using a given service."""
|
36
|
-
"""
|
37
|
-
context looks like: {
|
38
|
-
'files': [
|
39
|
-
{'metadata': {'updated': '...', 'mimetype': 'image/png', 'storage_class': 'L', 'file_id': '',
|
40
|
-
'links': {...}, 'size': 27, 'status': 'completed', 'version_id': '...',
|
41
|
-
'bucket_id': '...', 'metadata': None, 'key': 'test.png',
|
42
|
-
'checksum': 'md5:...', 'created': '...'},
|
43
|
-
'content': b'test file content: test.png'}]}
|
44
|
-
"""
|
45
|
-
for entry in batch.entries:
|
46
|
-
if entry.ok and entry.files:
|
47
|
-
self.write_entry(entry)
|
48
|
-
|
49
|
-
def write_entry(self, entry: StreamEntry):
|
50
|
-
dirname = self._dir.joinpath(format_serial(entry.seq)) / "data"
|
51
|
-
dirname.mkdir(parents=True, exist_ok=False)
|
52
|
-
file_keys = []
|
53
|
-
files_metadata = []
|
54
|
-
for fn_idx, fn in enumerate(entry.files):
|
55
|
-
md = {**fn.metadata}
|
56
|
-
content = attachments_requests.get(fn.content_url).content
|
57
|
-
# cleanup
|
58
|
-
md.pop("storage_class", None)
|
59
|
-
md.pop("file_id", None)
|
60
|
-
md.pop("links", None)
|
61
|
-
md.pop("status", None)
|
62
|
-
md.pop("version_id", None)
|
63
|
-
md.pop("bucket_id", None)
|
64
|
-
key = md["key"]
|
65
|
-
file_keys.append(key)
|
66
|
-
files_metadata.append(md)
|
67
|
-
(dirname / key).write_bytes(content)
|
68
|
-
metadata_key = "metadata.yaml"
|
69
|
-
while metadata_key in file_keys:
|
70
|
-
metadata_key = "meta_" + metadata_key
|
71
|
-
with open(dirname / metadata_key, "w") as f:
|
72
|
-
yaml.safe_dump_all(files_metadata, f)
|
73
|
-
return entry
|
74
|
-
|
75
|
-
def finish(self):
|
76
|
-
"""Finalizes writing"""
|
77
|
-
|
78
|
-
|
79
|
-
def format_serial(serial_no):
|
80
|
-
grouping = 3
|
81
|
-
min_padding = 3
|
82
|
-
serial_no = str(serial_no)
|
83
|
-
formatted_length = max(min_padding, len(serial_no))
|
84
|
-
while formatted_length % grouping:
|
85
|
-
formatted_length += 1
|
86
|
-
padded_serial = serial_no.zfill(formatted_length)
|
87
|
-
return os.sep.join(
|
88
|
-
[
|
89
|
-
padded_serial[i : i + grouping]
|
90
|
-
for i in range(0, len(padded_serial), grouping)
|
91
|
-
]
|
92
|
-
)
|
@@ -1,118 +0,0 @@
|
|
1
|
-
from io import BytesIO
|
2
|
-
|
3
|
-
from invenio_access.permissions import system_identity
|
4
|
-
from invenio_records_resources.proxies import current_service_registry
|
5
|
-
from invenio_records_resources.services.uow import UnitOfWork
|
6
|
-
|
7
|
-
from ...uow import BulkUnitOfWork
|
8
|
-
from ...utils.identity_utils import get_user_and_identity
|
9
|
-
from ..types import StreamBatch, StreamEntry
|
10
|
-
from ..utils import attachments_requests, get_file_service_for_record_service
|
11
|
-
from . import BaseWriter
|
12
|
-
from .utils import record_invenio_exceptions
|
13
|
-
|
14
|
-
|
15
|
-
class AttachmentsServiceWriter(BaseWriter):
|
16
|
-
"""Writes the entries to a repository instance using a Service object."""
|
17
|
-
|
18
|
-
def __init__(
|
19
|
-
self,
|
20
|
-
*,
|
21
|
-
service,
|
22
|
-
identity=None,
|
23
|
-
update=False,
|
24
|
-
**kwargs,
|
25
|
-
):
|
26
|
-
"""Constructor.
|
27
|
-
:param service_or_name: a service instance or a key of the
|
28
|
-
service registry.
|
29
|
-
:param identity: access identity.
|
30
|
-
:param update: if True it will update records if they exist.
|
31
|
-
:param write_files: if True it will write files to the file service.
|
32
|
-
:param uow: UnitOfWork fully qualified class name or class to use for the unit of work.
|
33
|
-
"""
|
34
|
-
super().__init__(**kwargs)
|
35
|
-
|
36
|
-
if isinstance(service, str):
|
37
|
-
service = current_service_registry.get(service)
|
38
|
-
|
39
|
-
if isinstance(identity, str):
|
40
|
-
_, identity = get_user_and_identity(email=identity)
|
41
|
-
elif isinstance(identity, int):
|
42
|
-
_, identity = get_user_and_identity(user_id=identity)
|
43
|
-
self._identity = identity or system_identity
|
44
|
-
self._update = update
|
45
|
-
|
46
|
-
self._file_service = None
|
47
|
-
self._record_cls = getattr(service.config, "record_cls", None)
|
48
|
-
|
49
|
-
self._file_service = get_file_service_for_record_service(service)
|
50
|
-
|
51
|
-
def _get_stream_entry_id(self, entry: StreamEntry):
|
52
|
-
return entry.id
|
53
|
-
|
54
|
-
def write(self, batch: StreamBatch):
|
55
|
-
"""Writes the input entry using the given service."""
|
56
|
-
|
57
|
-
with BulkUnitOfWork() as uow:
|
58
|
-
for entry in batch.entries:
|
59
|
-
if not entry.ok or entry.deleted or not entry.entry["files"]["enabled"]:
|
60
|
-
continue
|
61
|
-
with record_invenio_exceptions(entry):
|
62
|
-
self._write_attachments(entry, uow)
|
63
|
-
|
64
|
-
uow.commit()
|
65
|
-
|
66
|
-
return batch
|
67
|
-
|
68
|
-
def _write_attachments(self, stream_entry: StreamEntry, uow: UnitOfWork):
|
69
|
-
service_kwargs = {}
|
70
|
-
if uow:
|
71
|
-
service_kwargs["uow"] = uow
|
72
|
-
entry_id = self._get_stream_entry_id(stream_entry)
|
73
|
-
|
74
|
-
existing_files = self._file_service.list_files(self._identity, entry_id)
|
75
|
-
existing_files = {f["key"]: f for f in existing_files.entries}
|
76
|
-
|
77
|
-
for f in stream_entry.files:
|
78
|
-
if f.metadata["key"] in existing_files:
|
79
|
-
if not self._update:
|
80
|
-
continue
|
81
|
-
# TODO: compare if the file should be deleted and re-created
|
82
|
-
# if so, delete the file and create again
|
83
|
-
self._file_service.delete_file(
|
84
|
-
self._identity, entry_id, f.metadata["key"], **service_kwargs
|
85
|
-
)
|
86
|
-
|
87
|
-
self._file_service.init_files(
|
88
|
-
self._identity,
|
89
|
-
entry_id,
|
90
|
-
[{"key": f.metadata["key"]}],
|
91
|
-
**service_kwargs,
|
92
|
-
)
|
93
|
-
metadata = f.metadata.get("metadata", {})
|
94
|
-
if metadata:
|
95
|
-
self._file_service.update_file_metadata(
|
96
|
-
self._identity,
|
97
|
-
entry_id,
|
98
|
-
file_key=f.metadata["key"],
|
99
|
-
data=metadata,
|
100
|
-
**service_kwargs,
|
101
|
-
)
|
102
|
-
self._file_service.set_file_content(
|
103
|
-
self._identity,
|
104
|
-
entry_id,
|
105
|
-
f.metadata["key"],
|
106
|
-
BytesIO(attachments_requests.get(f.content_url).content),
|
107
|
-
**service_kwargs,
|
108
|
-
)
|
109
|
-
self._file_service.commit_file(
|
110
|
-
self._identity, entry_id, f.metadata["key"], **service_kwargs
|
111
|
-
)
|
112
|
-
new_files_keys = set(f.metadata["key"] for f in stream_entry.files)
|
113
|
-
|
114
|
-
for existing_file_key in existing_files:
|
115
|
-
if existing_file_key not in new_files_keys:
|
116
|
-
self._file_service.delete_file(
|
117
|
-
self._identity, entry_id, existing_file_key, **service_kwargs
|
118
|
-
)
|