oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. oarepo_runtime/__init__.py +24 -0
  2. oarepo_runtime/api.py +111 -0
  3. oarepo_runtime/cli/__init__.py +10 -21
  4. oarepo_runtime/cli/search.py +34 -0
  5. oarepo_runtime/config.py +86 -13
  6. oarepo_runtime/ext.py +64 -82
  7. oarepo_runtime/proxies.py +21 -5
  8. oarepo_runtime/records/__init__.py +11 -50
  9. oarepo_runtime/records/drafts.py +24 -18
  10. oarepo_runtime/records/mapping.py +84 -0
  11. oarepo_runtime/records/pid_providers.py +43 -7
  12. oarepo_runtime/records/systemfields/__init__.py +15 -33
  13. oarepo_runtime/records/systemfields/mapping.py +41 -24
  14. oarepo_runtime/records/systemfields/publication_status.py +59 -0
  15. oarepo_runtime/services/__init__.py +12 -0
  16. oarepo_runtime/services/config/__init__.py +15 -21
  17. oarepo_runtime/services/config/link_conditions.py +69 -75
  18. oarepo_runtime/services/config/permissions.py +62 -0
  19. oarepo_runtime/services/records/__init__.py +14 -1
  20. oarepo_runtime/services/records/links.py +21 -11
  21. oarepo_runtime/services/records/mapping.py +42 -0
  22. oarepo_runtime/services/results.py +98 -109
  23. oarepo_runtime/services/schema/__init__.py +12 -44
  24. oarepo_runtime/services/schema/i18n.py +47 -22
  25. oarepo_runtime/services/schema/i18n_ui.py +61 -24
  26. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/METADATA +9 -21
  27. oarepo_runtime-2.0.0.dev3.dist-info/RECORD +30 -0
  28. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/WHEEL +1 -2
  29. oarepo_runtime-2.0.0.dev3.dist-info/entry_points.txt +5 -0
  30. oarepo_runtime/cli/assets.py +0 -145
  31. oarepo_runtime/cli/base.py +0 -25
  32. oarepo_runtime/cli/cf.py +0 -15
  33. oarepo_runtime/cli/check.py +0 -167
  34. oarepo_runtime/cli/configuration.py +0 -51
  35. oarepo_runtime/cli/fixtures.py +0 -167
  36. oarepo_runtime/cli/index.py +0 -272
  37. oarepo_runtime/cli/permissions/__init__.py +0 -6
  38. oarepo_runtime/cli/permissions/base.py +0 -26
  39. oarepo_runtime/cli/permissions/evaluate.py +0 -63
  40. oarepo_runtime/cli/permissions/list.py +0 -239
  41. oarepo_runtime/cli/permissions/search.py +0 -121
  42. oarepo_runtime/cli/validate.py +0 -150
  43. oarepo_runtime/datastreams/__init__.py +0 -38
  44. oarepo_runtime/datastreams/asynchronous.py +0 -247
  45. oarepo_runtime/datastreams/catalogue.py +0 -150
  46. oarepo_runtime/datastreams/datastreams.py +0 -152
  47. oarepo_runtime/datastreams/errors.py +0 -54
  48. oarepo_runtime/datastreams/ext.py +0 -41
  49. oarepo_runtime/datastreams/fixtures.py +0 -265
  50. oarepo_runtime/datastreams/json.py +0 -4
  51. oarepo_runtime/datastreams/readers/__init__.py +0 -39
  52. oarepo_runtime/datastreams/readers/attachments.py +0 -51
  53. oarepo_runtime/datastreams/readers/excel.py +0 -123
  54. oarepo_runtime/datastreams/readers/json.py +0 -27
  55. oarepo_runtime/datastreams/readers/service.py +0 -54
  56. oarepo_runtime/datastreams/readers/yaml.py +0 -14
  57. oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
  58. oarepo_runtime/datastreams/synchronous.py +0 -70
  59. oarepo_runtime/datastreams/transformers.py +0 -18
  60. oarepo_runtime/datastreams/types.py +0 -323
  61. oarepo_runtime/datastreams/utils.py +0 -131
  62. oarepo_runtime/datastreams/writers/__init__.py +0 -21
  63. oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
  64. oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
  65. oarepo_runtime/datastreams/writers/publish.py +0 -70
  66. oarepo_runtime/datastreams/writers/service.py +0 -175
  67. oarepo_runtime/datastreams/writers/utils.py +0 -30
  68. oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
  69. oarepo_runtime/datastreams/writers/yaml.py +0 -56
  70. oarepo_runtime/ext_config.py +0 -67
  71. oarepo_runtime/i18n/__init__.py +0 -3
  72. oarepo_runtime/info/__init__.py +0 -0
  73. oarepo_runtime/info/check.py +0 -95
  74. oarepo_runtime/info/permissions/__init__.py +0 -0
  75. oarepo_runtime/info/permissions/debug.py +0 -191
  76. oarepo_runtime/info/views.py +0 -586
  77. oarepo_runtime/profile.py +0 -60
  78. oarepo_runtime/records/dumpers/__init__.py +0 -8
  79. oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
  80. oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
  81. oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
  82. oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
  83. oarepo_runtime/records/mappings/__init__.py +0 -0
  84. oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
  85. oarepo_runtime/records/owners/__init__.py +0 -3
  86. oarepo_runtime/records/owners/registry.py +0 -22
  87. oarepo_runtime/records/relations/__init__.py +0 -22
  88. oarepo_runtime/records/relations/base.py +0 -296
  89. oarepo_runtime/records/relations/internal.py +0 -46
  90. oarepo_runtime/records/relations/lookup.py +0 -28
  91. oarepo_runtime/records/relations/pid_relation.py +0 -102
  92. oarepo_runtime/records/systemfields/featured_file.py +0 -45
  93. oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
  94. oarepo_runtime/records/systemfields/icu.py +0 -371
  95. oarepo_runtime/records/systemfields/owner.py +0 -115
  96. oarepo_runtime/records/systemfields/record_status.py +0 -35
  97. oarepo_runtime/records/systemfields/selectors.py +0 -98
  98. oarepo_runtime/records/systemfields/synthetic.py +0 -130
  99. oarepo_runtime/resources/__init__.py +0 -4
  100. oarepo_runtime/resources/config.py +0 -12
  101. oarepo_runtime/resources/file_resource.py +0 -15
  102. oarepo_runtime/resources/json_serializer.py +0 -27
  103. oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
  104. oarepo_runtime/resources/resource.py +0 -53
  105. oarepo_runtime/resources/responses.py +0 -20
  106. oarepo_runtime/services/components.py +0 -429
  107. oarepo_runtime/services/config/draft_link.py +0 -23
  108. oarepo_runtime/services/config/permissions_presets.py +0 -174
  109. oarepo_runtime/services/config/service.py +0 -117
  110. oarepo_runtime/services/custom_fields/__init__.py +0 -80
  111. oarepo_runtime/services/custom_fields/mappings.py +0 -188
  112. oarepo_runtime/services/entity/__init__.py +0 -0
  113. oarepo_runtime/services/entity/config.py +0 -14
  114. oarepo_runtime/services/entity/schema.py +0 -9
  115. oarepo_runtime/services/entity/service.py +0 -48
  116. oarepo_runtime/services/expansions/__init__.py +0 -0
  117. oarepo_runtime/services/expansions/expandable_fields.py +0 -21
  118. oarepo_runtime/services/expansions/service.py +0 -4
  119. oarepo_runtime/services/facets/__init__.py +0 -33
  120. oarepo_runtime/services/facets/base.py +0 -12
  121. oarepo_runtime/services/facets/date.py +0 -72
  122. oarepo_runtime/services/facets/enum.py +0 -11
  123. oarepo_runtime/services/facets/facet_groups_names.py +0 -17
  124. oarepo_runtime/services/facets/max_facet.py +0 -13
  125. oarepo_runtime/services/facets/multilingual_facet.py +0 -33
  126. oarepo_runtime/services/facets/nested_facet.py +0 -32
  127. oarepo_runtime/services/facets/params.py +0 -192
  128. oarepo_runtime/services/facets/year_histogram.py +0 -200
  129. oarepo_runtime/services/files/__init__.py +0 -8
  130. oarepo_runtime/services/files/components.py +0 -62
  131. oarepo_runtime/services/files/service.py +0 -16
  132. oarepo_runtime/services/generators.py +0 -10
  133. oarepo_runtime/services/permissions/__init__.py +0 -3
  134. oarepo_runtime/services/permissions/generators.py +0 -103
  135. oarepo_runtime/services/relations/__init__.py +0 -0
  136. oarepo_runtime/services/relations/components.py +0 -15
  137. oarepo_runtime/services/relations/errors.py +0 -18
  138. oarepo_runtime/services/relations/mapping.py +0 -38
  139. oarepo_runtime/services/schema/cf.py +0 -13
  140. oarepo_runtime/services/schema/i18n_validation.py +0 -7
  141. oarepo_runtime/services/schema/marshmallow.py +0 -44
  142. oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
  143. oarepo_runtime/services/schema/oneofschema.py +0 -192
  144. oarepo_runtime/services/schema/polymorphic.py +0 -21
  145. oarepo_runtime/services/schema/rdm.py +0 -146
  146. oarepo_runtime/services/schema/rdm_ui.py +0 -156
  147. oarepo_runtime/services/schema/ui.py +0 -251
  148. oarepo_runtime/services/schema/validation.py +0 -70
  149. oarepo_runtime/services/search.py +0 -282
  150. oarepo_runtime/services/service.py +0 -61
  151. oarepo_runtime/tasks.py +0 -6
  152. oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
  153. oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
  154. oarepo_runtime/translations/default_translations.py +0 -6
  155. oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
  156. oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
  157. oarepo_runtime/translations/messages.pot +0 -100
  158. oarepo_runtime/uow.py +0 -146
  159. oarepo_runtime/utils/__init__.py +0 -0
  160. oarepo_runtime/utils/functools.py +0 -37
  161. oarepo_runtime/utils/identity_utils.py +0 -35
  162. oarepo_runtime/utils/index.py +0 -11
  163. oarepo_runtime/utils/path.py +0 -97
  164. oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
  165. oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
  166. oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
  167. tests/marshmallow_to_json/__init__.py +0 -0
  168. tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
  169. tests/marshmallow_to_json/test_simple_schema.py +0 -52
  170. tests/pkg_data/__init__.py +0 -0
  171. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,70 +0,0 @@
1
- #
2
- # This package was taken from Invenio vocabularies and modified to be more universal
3
- #
4
- import logging
5
- from typing import List
6
-
7
- from ..proxies import current_datastreams
8
- from .datastreams import AbstractDataStream, DataStreamChain
9
- from .transformers import BaseTransformer
10
- from .types import DataStreamCallback, StreamEntryError
11
- from .writers import BaseWriter
12
-
13
- log = logging.getLogger("datastreams")
14
-
15
-
16
- class SynchronousDataStreamChain(DataStreamChain):
17
- def __init__(self, transformers: List[BaseTransformer], writers: List[BaseWriter]):
18
- self._transformers = transformers
19
- self._writers = writers
20
-
21
- def process(self, batch, callback: DataStreamCallback):
22
- callback.batch_started(batch)
23
- for transformer in self._transformers:
24
- try:
25
- batch = transformer.apply(batch) or batch
26
- except Exception as ex:
27
- if log.getEffectiveLevel():
28
- log.error(
29
- "Unexpected error in transformer: %s: %s",
30
- repr(transformer),
31
- repr(batch),
32
- )
33
- batch.errors.append(StreamEntryError.from_exception(ex))
34
- callback.transformer_error(batch, transformer, exception=ex)
35
-
36
- for writer in self._writers:
37
- try:
38
- batch = writer.write(batch) or batch
39
- except Exception as ex:
40
- if log.getEffectiveLevel():
41
- log.error(
42
- "Unexpected error in writer: %s: %s", repr(writer), repr(batch)
43
- )
44
- batch.errors.append(StreamEntryError.from_exception(ex))
45
- callback.writer_error(batch, writer, exception=ex)
46
- callback.batch_finished(batch)
47
-
48
- def finish(self, callback: DataStreamCallback):
49
- for writer in self._writers:
50
- try:
51
- writer.finish()
52
- except Exception as e:
53
- log.error("Unexpected error in writer: %s", repr(writer))
54
- callback.writer_error(batch=None, writer=writer, exception=e)
55
-
56
-
57
- class SynchronousDataStream(AbstractDataStream):
58
- """Data stream."""
59
-
60
- def build_chain(self, identity) -> DataStreamChain:
61
- return SynchronousDataStreamChain(
62
- transformers=[
63
- current_datastreams.get_transformer(tr, identity=identity)
64
- for tr in self._transformers
65
- ],
66
- writers=[
67
- current_datastreams.get_writer(wr, identity=identity)
68
- for wr in self._writers
69
- ],
70
- )
@@ -1,18 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Union
3
-
4
- from oarepo_runtime.datastreams.types import StreamBatch
5
-
6
-
7
- class BaseTransformer(ABC):
8
- """Base transformer."""
9
-
10
- def __init__(self, **kwargs) -> None:
11
- pass
12
-
13
- @abstractmethod
14
- def apply(self, batch: StreamBatch, *args, **kwargs) -> Union[StreamBatch, None]:
15
- """Applies the transformation to the entry.
16
- :returns: A StreamEntry. The transformed entry.
17
- Raises TransformerError in case of errors.
18
- """
@@ -1,323 +0,0 @@
1
- #
2
- # This package was taken from Invenio vocabularies and modified to be more universal
3
- #
4
- import dataclasses
5
- import json
6
- import logging
7
- import textwrap
8
- import traceback
9
- from typing import Any, Dict, List, Optional, Union
10
-
11
- from .errors import DataStreamError
12
- from .json import JSONObject
13
-
14
- log = logging.getLogger("datastreams")
15
-
16
-
17
- @dataclasses.dataclass
18
- class StreamEntryError:
19
- code: str
20
- message: str
21
- location: Optional[str] = None
22
- info: Union[JSONObject, None] = None
23
-
24
- @classmethod
25
- def from_exception(
26
- cls, exc: Exception, limit=30, message=None, location=None, info=None, code=None
27
- ):
28
- if isinstance(exc, DataStreamError):
29
- return cls(
30
- code=exc.code,
31
- message=exc.message,
32
- location=exc.location,
33
- info=exc.detail,
34
- )
35
-
36
- # can not use format_exception here as the signature is different for python 3.9 and python 3.10
37
- stack = traceback.format_exc(limit=limit)
38
- if message:
39
- formatted_exception = message
40
- elif hasattr(exc, "format_exception"):
41
- formatted_exception = exc.format_exception()
42
- else:
43
- formatted_exception = str(exc)
44
-
45
- return cls(
46
- code=code or getattr(exc, "type", type(exc).__name__),
47
- message=formatted_exception,
48
- location=location,
49
- info={
50
- "message": str(exc),
51
- "exception": type(exc).__name__,
52
- "stack": stack,
53
- **(info or {}),
54
- },
55
- )
56
-
57
- @property
58
- def json(self) -> JSONObject:
59
- ret = {}
60
- if self.code:
61
- ret["code"] = self.code
62
- if self.message:
63
- ret["message"] = self.message
64
- if self.location:
65
- ret["location"] = self.location
66
- if self.info:
67
- ret["info"] = self.info
68
- return ret
69
-
70
- @classmethod
71
- def from_json(cls, js: JSONObject):
72
- if js is None:
73
- return None
74
- return cls(
75
- code=js.get("code"),
76
- message=js.get("message"),
77
- location=js.get("location"),
78
- info=js.get("info"),
79
- )
80
-
81
- def __str__(self):
82
- formatted_info = textwrap.indent(
83
- json.dumps(self.info or {}, ensure_ascii=False, indent=4), prefix=" "
84
- )
85
- return f"{self.code}:{self.location if self.location else ''} {self.message}\n{formatted_info}"
86
-
87
- def __repr__(self):
88
- return str(self)
89
-
90
-
91
- @dataclasses.dataclass
92
- class StreamEntryFile:
93
- metadata: JSONObject
94
- content_url: str
95
- "data url with the content of the file or any other resolvable url"
96
-
97
- @property
98
- def json(self) -> JSONObject:
99
- return {
100
- "metadata": self.metadata,
101
- "content_url": self.content_url,
102
- }
103
-
104
- @classmethod
105
- def from_json(cls, js: JSONObject):
106
- return cls(
107
- metadata=js["metadata"],
108
- content_url=js["content_url"],
109
- )
110
-
111
-
112
- @dataclasses.dataclass
113
- class StreamEntry:
114
- """Object to encapsulate streams processing."""
115
-
116
- entry: JSONObject
117
- files: List[StreamEntryFile] = dataclasses.field(default_factory=list)
118
- seq: int = 0
119
- id: Optional[str] = None
120
- filtered: bool = False
121
- deleted: bool = False
122
- errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
123
- context: JSONObject = dataclasses.field(default_factory=dict)
124
-
125
- @property
126
- def ok(self):
127
- return not self.filtered and not self.errors
128
-
129
- @property
130
- def json(self) -> JSONObject:
131
- return {
132
- "id": self.id,
133
- "entry": self.entry,
134
- "filtered": self.filtered,
135
- "deleted": self.deleted,
136
- "errors": [x.json for x in self.errors],
137
- "context": self.context,
138
- "seq": self.seq,
139
- "files": [x.json for x in self.files],
140
- }
141
-
142
- @classmethod
143
- def from_json(cls, js):
144
- return cls(
145
- id=js["id"],
146
- entry=js["entry"],
147
- filtered=js["filtered"],
148
- deleted=js["deleted"],
149
- errors=[StreamEntryError.from_json(x) for x in js["errors"]],
150
- context=js["context"],
151
- seq=js["seq"],
152
- files=[StreamEntryFile.from_json(x) for x in js["files"]],
153
- )
154
-
155
- def __str__(self):
156
- ret = [
157
- f"Entry #{self.seq}: id {self.id or 'not yet set'}, filtered: {self.filtered}, deleted: {self.deleted}",
158
- "Content:",
159
- textwrap.indent(
160
- json.dumps(self.entry, ensure_ascii=False, indent=4), " "
161
- ),
162
- "Context:",
163
- textwrap.indent(
164
- json.dumps(self.context, ensure_ascii=False, indent=4), " "
165
- ),
166
- ]
167
- if self.errors:
168
- ret.append("Errors:")
169
- for error in self.errors:
170
- ret.append(textwrap.indent(str(error), " "))
171
- return "\n".join(ret)
172
-
173
-
174
- @dataclasses.dataclass
175
- class StreamBatch:
176
- entries: List[StreamEntry]
177
- context: Dict[str, Any] = dataclasses.field(default_factory=dict)
178
- last: bool = False
179
- seq: int = 0
180
- errors: List[StreamEntryError] = dataclasses.field(default_factory=list)
181
-
182
- @property
183
- def ok_entries(self):
184
- if self.errors:
185
- return []
186
- return [x for x in self.entries if x.ok]
187
-
188
- @property
189
- def failed_entries(self):
190
- if self.errors:
191
- return self.entries
192
- return [x for x in self.entries if x.errors]
193
-
194
- @property
195
- def skipped_entries(self):
196
- if self.errors:
197
- return []
198
- return [x for x in self.entries if x.filtered]
199
-
200
- @property
201
- def deleted_entries(self):
202
- if self.errors:
203
- return []
204
- return [x for x in self.entries if x.deleted]
205
-
206
- @property
207
- def json(self):
208
- return {
209
- "entries": [x.json for x in self.entries],
210
- "context": self.context,
211
- "last": self.last,
212
- "seq": self.seq,
213
- "errors": [x.json for x in self.errors],
214
- }
215
-
216
- @classmethod
217
- def from_json(cls, js):
218
- if js is None:
219
- return None
220
- try:
221
- [StreamEntry.from_json(x) for x in js["entries"]]
222
- except:
223
- log.exception("Exception parsing %s", js)
224
- raise
225
- return cls(
226
- entries=[StreamEntry.from_json(x) for x in js["entries"]],
227
- context=js["context"],
228
- last=js["last"],
229
- seq=js["seq"],
230
- errors=[StreamEntryError.from_json(x) for x in js["errors"]],
231
- )
232
-
233
-
234
- class DataStreamCallback:
235
- def __init__(self, log_error_entry=False):
236
- self.log_error_entry = log_error_entry
237
-
238
- def batch_started(self, batch):
239
- log.info("Batch started: %s", batch.seq)
240
- if log.isEnabledFor(logging.DEBUG):
241
- log.debug("Content: %s", batch)
242
-
243
- def batch_finished(self, batch: StreamBatch):
244
- log.info("Batch finished: %s", batch.seq)
245
- if log.isEnabledFor(logging.DEBUG):
246
- log.debug("Content: %s", batch)
247
- for err in batch.errors:
248
- log.error("Failed batch: %s: %s", err, batch.seq)
249
- if self.log_error_entry:
250
- for entry in batch.entries:
251
- if entry.errors:
252
- log.error("Failed entry: %s in batch %s", entry, batch.seq)
253
-
254
- def reader_error(self, reader, exception):
255
- log.error("Reader error: %s: %s", reader, exception)
256
-
257
- def transformer_error(self, batch, transformer, exception):
258
- log.error("Transformer error: %s: %s", transformer, exception)
259
-
260
- def writer_error(self, batch, writer, exception):
261
- log.error("Writer error: %s: %s", writer, exception)
262
-
263
-
264
- class StatsKeepingDataStreamCallback(DataStreamCallback):
265
- def __init__(self, log_error_entry=False):
266
- super().__init__(log_error_entry=log_error_entry)
267
-
268
- self.started_batches_count = 0
269
- self.finished_batches_count = 0
270
- self.ok_entries_count = 0
271
- self.filtered_entries_count = 0
272
- self.deleted_entries_count = 0
273
- self.failed_entries_count = 0
274
- self.reader_errors_count = 0
275
- self.transformer_errors_count = 0
276
- self.writer_errors_count = 0
277
-
278
- def batch_started(self, batch):
279
- super().batch_started(batch)
280
- self.started_batches_count += 1
281
-
282
- def batch_finished(self, batch: StreamBatch):
283
- super().batch_finished(batch)
284
- self.finished_batches_count += 1
285
- for entry in batch.entries:
286
- if entry.ok:
287
- self.ok_entries_count += 1
288
- if entry.filtered:
289
- self.filtered_entries_count += 1
290
- if entry.deleted:
291
- self.deleted_entries_count += 1
292
- if entry.errors:
293
- self.failed_entries_count += 1
294
-
295
- def reader_error(self, reader, exception):
296
- super().reader_error(reader, exception)
297
- self.reader_errors_count += 1
298
-
299
- def transformer_error(self, batch, transformer, exception):
300
- super().transformer_error(batch, transformer, exception)
301
- self.transformer_errors_count += 1
302
-
303
- def writer_error(self, batch, writer, exception):
304
- super().writer_error(batch, writer, exception)
305
- self.writer_errors_count += 1
306
-
307
- def stats(self):
308
- ret = [f"{self.finished_batches_count} batches finished"]
309
- if self.ok_entries_count:
310
- ret.append(f"ok: {self.ok_entries_count}")
311
- if self.deleted_entries_count:
312
- ret.append(f"deleted: {self.deleted_entries_count}")
313
- if self.filtered_entries_count:
314
- ret.append(f"filtered: {self.filtered_entries_count}")
315
- if self.failed_entries_count:
316
- ret.append(f"failed: {self.failed_entries_count}")
317
- if self.reader_errors_count:
318
- ret.append(f"reader errors: {self.reader_errors_count}")
319
- if self.transformer_errors_count:
320
- ret.append(f"transformer errors: {self.transformer_errors_count}")
321
- if self.writer_errors_count:
322
- ret.append(f"writer errors: {self.writer_errors_count}")
323
- return ", ".join(ret)
@@ -1,131 +0,0 @@
1
- from base64 import b64decode
2
-
3
- import requests
4
- from deprecated import deprecated
5
- from flask import current_app
6
- from invenio_drafts_resources.services import RecordService as DraftRecordService
7
- from invenio_records_resources.proxies import current_service_registry
8
- from invenio_records_resources.services import FileService, RecordService
9
- from invenio_records_resources.services.records.results import RecordItem
10
- from requests import PreparedRequest, Response
11
- from requests.adapters import BaseAdapter
12
-
13
-
14
- def get_record_service_for_record(record):
15
- if not record:
16
- return None
17
- if "OAREPO_PRIMARY_RECORD_SERVICE" in current_app.config:
18
- return get_record_service_for_record_class(type(record))
19
- else:
20
- return get_record_service_for_record_deprecated(record)
21
-
22
-
23
- def get_record_service_for_record_class(record_cls):
24
- service_id = current_app.config["OAREPO_PRIMARY_RECORD_SERVICE"][record_cls]
25
- return current_service_registry.get(service_id)
26
-
27
-
28
- @deprecated(
29
- version="1.5.43", reason="Please recompile model to remove this deprecation warning"
30
- )
31
- def get_record_service_for_record_deprecated(record):
32
- if getattr(record, "is_draft", False):
33
- record_name = "draft_cls"
34
- expect_draft_service = True
35
- else:
36
- record_name = "record_cls"
37
- expect_draft_service = False
38
-
39
- for svc in current_service_registry._services.values():
40
- if not isinstance(svc, RecordService):
41
- continue
42
- if isinstance(svc, FileService):
43
- continue
44
- is_draft_service = isinstance(svc, DraftRecordService)
45
- if is_draft_service != expect_draft_service:
46
- continue
47
- service_record = getattr(svc, record_name, None)
48
- if service_record == type(record):
49
- return svc
50
-
51
- def get_file_service_for_record_class(record_class):
52
- if not record_class:
53
- return None
54
-
55
- for svc in current_service_registry._services.values():
56
- if not isinstance(svc, FileService):
57
- continue
58
- if svc.record_cls != record_class:
59
- continue
60
- return svc
61
-
62
- def get_file_service_for_file_record_class(file_record_class):
63
- record_class = file_record_class.record_cls
64
- return get_file_service_for_record_class(record_class)
65
-
66
- def get_file_service_for_record_service(
67
- record_service, check_draft_files=True, record=None
68
- ):
69
- if isinstance(record, RecordItem):
70
- record = record._record
71
- if record and getattr(record, "is_draft", False) is False:
72
- check_draft_files = False
73
- if (
74
- check_draft_files
75
- and hasattr(record_service, "draft_files")
76
- and isinstance(record_service.draft_files, FileService)
77
- ):
78
- return record_service.draft_files
79
- if hasattr(record_service, "files") and isinstance(
80
- record_service.files, FileService
81
- ):
82
- return record_service.files
83
- return get_file_service_for_record_class(
84
- getattr(record_service.config, "record_cls", None)
85
- )
86
-
87
-
88
- def get_record_service_for_file_service(file_service, record=None):
89
- if record and getattr(record, "is_draft", False):
90
- record_name = "draft_cls"
91
- expect_draft_service = True
92
- else:
93
- record_name = "record_cls"
94
- expect_draft_service = False
95
- for svc in current_service_registry._services.values():
96
- if not isinstance(svc, RecordService):
97
- continue
98
- is_draft_service = isinstance(svc, DraftRecordService)
99
- if is_draft_service != expect_draft_service:
100
- continue
101
- service_record = getattr(svc, record_name, None)
102
- if service_record == file_service.record_cls:
103
- return svc
104
-
105
- raise KeyError(
106
- f"Could not get service for file service {file_service}, draft {expect_draft_service}"
107
- )
108
-
109
-
110
- class DataAdapter(BaseAdapter):
111
- def send(
112
- self,
113
- request: PreparedRequest,
114
- stream=False,
115
- timeout=None,
116
- verify=True,
117
- cert=None,
118
- proxies=None,
119
- ):
120
- data = request.url.replace("data:", "")
121
- resp = Response()
122
- resp.status_code = 200
123
- resp._content = b64decode(data)
124
- return resp
125
-
126
- def close(self):
127
- pass
128
-
129
-
130
- attachments_requests = requests.Session()
131
- attachments_requests.mount("data:", DataAdapter())
@@ -1,21 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Union
3
-
4
- from oarepo_runtime.datastreams.types import StreamBatch
5
-
6
-
7
- class BaseWriter(ABC):
8
- """Base writer."""
9
-
10
- def __init__(self, **kwargs) -> None:
11
- """kwargs for extensions"""
12
-
13
- @abstractmethod
14
- def write(self, batch: StreamBatch) -> Union[StreamBatch, None]:
15
- """Writes the input entry to the target output.
16
- :returns: nothing
17
- Raises WriterException in case of errors.
18
- """
19
-
20
- def finish(self):
21
- pass
@@ -1,92 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- import yaml
5
-
6
- from oarepo_runtime.datastreams import StreamBatch, StreamEntry
7
-
8
- from ..utils import attachments_requests
9
- from . import BaseWriter
10
-
11
-
12
- class AttachmentsFileWriter(BaseWriter):
13
- """
14
- Writes the files and its metadata into subdirectories.
15
-
16
- The path will be files/<record-id>/<file-id>/metadata.yaml for technical metadata
17
- and files/<record-id>/<file-id>/<key> for the data.
18
-
19
- If the data key is "metadata.yaml", then "metadata" will be placed to "metametadata.yaml"
20
- """
21
-
22
- def __init__(self, *, target, base_path=None, **kwargs):
23
- """Constructor.
24
- :param file_or_path: path of the output file.
25
- """
26
- super().__init__(**kwargs)
27
- self._grouping = 3
28
- self._min_padding = 3
29
- if base_path:
30
- self._dir = Path(base_path).joinpath(target)
31
- else:
32
- self._dir = Path(target)
33
-
34
- def write(self, batch: StreamBatch, *args, **kwargs):
35
- """Writes the input stream entry using a given service."""
36
- """
37
- context looks like: {
38
- 'files': [
39
- {'metadata': {'updated': '...', 'mimetype': 'image/png', 'storage_class': 'L', 'file_id': '',
40
- 'links': {...}, 'size': 27, 'status': 'completed', 'version_id': '...',
41
- 'bucket_id': '...', 'metadata': None, 'key': 'test.png',
42
- 'checksum': 'md5:...', 'created': '...'},
43
- 'content': b'test file content: test.png'}]}
44
- """
45
- for entry in batch.entries:
46
- if entry.ok and entry.files:
47
- self.write_entry(entry)
48
-
49
- def write_entry(self, entry: StreamEntry):
50
- dirname = self._dir.joinpath(format_serial(entry.seq)) / "data"
51
- dirname.mkdir(parents=True, exist_ok=False)
52
- file_keys = []
53
- files_metadata = []
54
- for fn_idx, fn in enumerate(entry.files):
55
- md = {**fn.metadata}
56
- content = attachments_requests.get(fn.content_url).content
57
- # cleanup
58
- md.pop("storage_class", None)
59
- md.pop("file_id", None)
60
- md.pop("links", None)
61
- md.pop("status", None)
62
- md.pop("version_id", None)
63
- md.pop("bucket_id", None)
64
- key = md["key"]
65
- file_keys.append(key)
66
- files_metadata.append(md)
67
- (dirname / key).write_bytes(content)
68
- metadata_key = "metadata.yaml"
69
- while metadata_key in file_keys:
70
- metadata_key = "meta_" + metadata_key
71
- with open(dirname / metadata_key, "w") as f:
72
- yaml.safe_dump_all(files_metadata, f)
73
- return entry
74
-
75
- def finish(self):
76
- """Finalizes writing"""
77
-
78
-
79
- def format_serial(serial_no):
80
- grouping = 3
81
- min_padding = 3
82
- serial_no = str(serial_no)
83
- formatted_length = max(min_padding, len(serial_no))
84
- while formatted_length % grouping:
85
- formatted_length += 1
86
- padded_serial = serial_no.zfill(formatted_length)
87
- return os.sep.join(
88
- [
89
- padded_serial[i : i + grouping]
90
- for i in range(0, len(padded_serial), grouping)
91
- ]
92
- )