oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +210 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +98 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +61 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/facets/__init__.py +12 -33
- oarepo_runtime/services/facets/params.py +45 -110
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/METADATA +10 -21
- oarepo_runtime-2.0.0.dev4.dist-info/RECORD +32 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev4.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,152 +0,0 @@
|
|
1
|
-
import abc
|
2
|
-
import copy
|
3
|
-
import dataclasses
|
4
|
-
from enum import Enum
|
5
|
-
from typing import Any, Callable, Iterator, List, Union
|
6
|
-
|
7
|
-
from invenio_access.permissions import system_identity
|
8
|
-
|
9
|
-
from oarepo_runtime.datastreams.types import (
|
10
|
-
DataStreamCallback,
|
11
|
-
StreamBatch,
|
12
|
-
StreamEntry,
|
13
|
-
)
|
14
|
-
from oarepo_runtime.proxies import current_datastreams
|
15
|
-
|
16
|
-
from .json import JSONObject
|
17
|
-
|
18
|
-
|
19
|
-
class DataStreamChain(abc.ABC):
|
20
|
-
@abc.abstractmethod
|
21
|
-
def process(self, batch: StreamBatch, callback: Union[DataStreamCallback, Any]):
|
22
|
-
pass
|
23
|
-
|
24
|
-
@abc.abstractmethod
|
25
|
-
def finish(self, callback: Union[DataStreamCallback, Any]):
|
26
|
-
pass
|
27
|
-
|
28
|
-
try:
|
29
|
-
from enum import StrEnum
|
30
|
-
|
31
|
-
class SignatureKind(StrEnum):
|
32
|
-
READER = "reader"
|
33
|
-
TRANSFORMER = "transformer"
|
34
|
-
WRITER = "writer"
|
35
|
-
|
36
|
-
except ImportError:
|
37
|
-
|
38
|
-
class SignatureKind(str, Enum):
|
39
|
-
READER = "reader"
|
40
|
-
TRANSFORMER = "transformer"
|
41
|
-
WRITER = "writer"
|
42
|
-
|
43
|
-
|
44
|
-
@dataclasses.dataclass
|
45
|
-
class Signature:
|
46
|
-
kind: SignatureKind
|
47
|
-
name: str
|
48
|
-
kwargs: JSONObject
|
49
|
-
|
50
|
-
@property
|
51
|
-
def json(self):
|
52
|
-
return {"kind": self.kind.value, "name": self.name, "kwargs": self.kwargs}
|
53
|
-
|
54
|
-
@classmethod
|
55
|
-
def from_json(cls, json):
|
56
|
-
return cls(
|
57
|
-
kind=SignatureKind(json["kind"]),
|
58
|
-
name=json["name"],
|
59
|
-
kwargs=json["kwargs"],
|
60
|
-
)
|
61
|
-
|
62
|
-
def resolve(self, *, identity, **kwargs):
|
63
|
-
if self.kind == SignatureKind.TRANSFORMER:
|
64
|
-
return current_datastreams.get_transformer(
|
65
|
-
self, **kwargs, identity=identity
|
66
|
-
)
|
67
|
-
elif self.kind == SignatureKind.WRITER:
|
68
|
-
return current_datastreams.get_writer(self, **kwargs, identity=identity)
|
69
|
-
else:
|
70
|
-
raise ValueError(f"Unknown signature kind: {self.kind}")
|
71
|
-
|
72
|
-
|
73
|
-
class AbstractDataStream(abc.ABC):
|
74
|
-
def __init__(
|
75
|
-
self,
|
76
|
-
*,
|
77
|
-
readers: List[Union[Signature, Any]],
|
78
|
-
writers: List[Union[Signature, Any]],
|
79
|
-
transformers: List[Union[Signature, Any]] = None,
|
80
|
-
callback: Union[DataStreamCallback, Signature],
|
81
|
-
batch_size=1,
|
82
|
-
identity=system_identity,
|
83
|
-
reader_callback: Callable[[StreamBatch], None] = None,
|
84
|
-
):
|
85
|
-
"""Constructor.
|
86
|
-
:param readers: an ordered list of readers (whatever a reader is).
|
87
|
-
:param writers: an ordered list of writers (whatever a writer is).
|
88
|
-
:param transformers: an ordered list of transformers to apply (whatever a transformer is).
|
89
|
-
"""
|
90
|
-
self._readers: List[Signature] = [*readers]
|
91
|
-
self._transformers: List[Signature] = [*(transformers or [])]
|
92
|
-
self._writers: List[Signature] = [*writers]
|
93
|
-
self._callback = callback
|
94
|
-
self._batch_size = batch_size
|
95
|
-
self._identity = identity
|
96
|
-
self._reader_callback = reader_callback
|
97
|
-
|
98
|
-
def _read_entries(self) -> Iterator[StreamEntry]:
|
99
|
-
seq = 0
|
100
|
-
for reader_signature in self._readers:
|
101
|
-
reader = current_datastreams.get_reader(
|
102
|
-
reader_signature, identity=self._identity
|
103
|
-
)
|
104
|
-
try:
|
105
|
-
for entry in reader:
|
106
|
-
seq += 1
|
107
|
-
entry.seq = seq
|
108
|
-
yield entry
|
109
|
-
except Exception as ex:
|
110
|
-
self._reader_error(reader, exception=ex)
|
111
|
-
|
112
|
-
def _read_batches(self, context) -> Iterator[StreamBatch]:
|
113
|
-
batch_entries = []
|
114
|
-
batch_number = 0
|
115
|
-
|
116
|
-
def batch_maker(last=False):
|
117
|
-
nonlocal batch_number, batch_entries
|
118
|
-
batch_number += 1
|
119
|
-
ret = StreamBatch(
|
120
|
-
entries=batch_entries,
|
121
|
-
seq=batch_number,
|
122
|
-
context=copy.deepcopy(context),
|
123
|
-
last=last,
|
124
|
-
)
|
125
|
-
batch_entries = []
|
126
|
-
return ret
|
127
|
-
|
128
|
-
for entry in self._read_entries():
|
129
|
-
if len(batch_entries) == self._batch_size:
|
130
|
-
batch = batch_maker()
|
131
|
-
if self._reader_callback:
|
132
|
-
self._reader_callback(batch)
|
133
|
-
yield batch
|
134
|
-
batch_entries = []
|
135
|
-
batch_entries.append(entry)
|
136
|
-
batch = batch_maker(last=True)
|
137
|
-
if self._reader_callback:
|
138
|
-
self._reader_callback(batch)
|
139
|
-
yield batch
|
140
|
-
|
141
|
-
def process(self, context=None, identity=system_identity):
|
142
|
-
context = context or {}
|
143
|
-
chain = self.build_chain(identity)
|
144
|
-
for batch in self._read_batches(context):
|
145
|
-
chain.process(batch, self._callback)
|
146
|
-
|
147
|
-
@abc.abstractmethod
|
148
|
-
def build_chain(self, identity) -> DataStreamChain:
|
149
|
-
pass
|
150
|
-
|
151
|
-
def _reader_error(self, reader, exception):
|
152
|
-
self._callback.reader_error(reader, exception=exception)
|
@@ -1,54 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
#
|
3
|
-
# Copyright (C) 2021 CERN.
|
4
|
-
#
|
5
|
-
# Invenio-Vocabularies is free software; you can redistribute it and/or
|
6
|
-
# modify it under the terms of the MIT License; see LICENSE file for more
|
7
|
-
# details.
|
8
|
-
|
9
|
-
"""Datastream errors."""
|
10
|
-
from typing import Union
|
11
|
-
|
12
|
-
from .json import JSONObject
|
13
|
-
|
14
|
-
|
15
|
-
class DataStreamError(Exception):
|
16
|
-
def __init__(
|
17
|
-
self,
|
18
|
-
message,
|
19
|
-
code=None,
|
20
|
-
location=None,
|
21
|
-
detail: Union[JSONObject, None] = None,
|
22
|
-
):
|
23
|
-
"""
|
24
|
-
@param message: a string message (overview)
|
25
|
-
@param code: a machine processable code
|
26
|
-
@param location: location inside the json, where the error was detected. Using dot notation,
|
27
|
-
arrays are indexed from 0, for example: `metadata.titles.0.language`
|
28
|
-
@param detail: a json-serializable object (dictionary) with details
|
29
|
-
"""
|
30
|
-
super().__init__(message)
|
31
|
-
assert detail is None or isinstance(detail, dict)
|
32
|
-
self.detail = detail
|
33
|
-
self.message = message
|
34
|
-
self.code = code
|
35
|
-
self.location = location
|
36
|
-
|
37
|
-
|
38
|
-
class ReaderError(DataStreamError):
|
39
|
-
"""Transformer application exception."""
|
40
|
-
|
41
|
-
|
42
|
-
class TransformerError(DataStreamError):
|
43
|
-
"""Transformer application exception."""
|
44
|
-
|
45
|
-
|
46
|
-
class WriterError(DataStreamError):
|
47
|
-
"""Transformer application exception."""
|
48
|
-
|
49
|
-
|
50
|
-
class DataStreamCatalogueError(Exception):
|
51
|
-
def __init__(self, message, entry=None, stream_name=None) -> None:
|
52
|
-
super().__init__(message)
|
53
|
-
self.entry = entry
|
54
|
-
self.stream_name = stream_name
|
@@ -1,41 +0,0 @@
|
|
1
|
-
import functools
|
2
|
-
|
3
|
-
from invenio_base.utils import obj_or_import_string
|
4
|
-
|
5
|
-
from oarepo_runtime.datastreams.datastreams import Signature
|
6
|
-
|
7
|
-
|
8
|
-
class OARepoDataStreamsExt:
|
9
|
-
def __init__(self, app):
|
10
|
-
self.app = app
|
11
|
-
|
12
|
-
def get_reader(self, reader, identity, **kwargs):
|
13
|
-
return self._get_instance("DATASTREAMS_READERS", identity, kwargs, reader)
|
14
|
-
|
15
|
-
def get_writer(self, writer, identity, **kwargs):
|
16
|
-
return self._get_instance("DATASTREAMS_WRITERS", identity, kwargs, writer)
|
17
|
-
|
18
|
-
def get_transformer(self, transformer, identity, **kwargs):
|
19
|
-
return self._get_instance(
|
20
|
-
"DATASTREAMS_TRANSFORMERS", identity, kwargs, transformer
|
21
|
-
)
|
22
|
-
|
23
|
-
def _get_instance(self, config_name, identity, kwargs, inst):
|
24
|
-
if isinstance(inst, Signature):
|
25
|
-
config_classes = self._get_classes_from_config(config_name)
|
26
|
-
if inst.name not in config_classes:
|
27
|
-
raise KeyError(f"'{inst.name}' not found in config {config_name}")
|
28
|
-
reader_class = config_classes[inst.name]
|
29
|
-
all_kwargs = {**(inst.kwargs or {}), **kwargs}
|
30
|
-
if "identity" not in all_kwargs:
|
31
|
-
all_kwargs["identity"] = identity
|
32
|
-
return reader_class(**all_kwargs)
|
33
|
-
else:
|
34
|
-
return inst
|
35
|
-
|
36
|
-
@functools.lru_cache(maxsize=5)
|
37
|
-
def _get_classes_from_config(self, config_name):
|
38
|
-
return {
|
39
|
-
class_key: obj_or_import_string(class_name)
|
40
|
-
for class_key, class_name in self.app.config[config_name].items()
|
41
|
-
}
|
@@ -1,265 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import re
|
3
|
-
from pathlib import Path
|
4
|
-
|
5
|
-
import pkg_resources
|
6
|
-
import yaml
|
7
|
-
from celery import shared_task
|
8
|
-
from flask import current_app
|
9
|
-
from invenio_access.permissions import system_identity
|
10
|
-
from invenio_records_resources.proxies import current_service_registry
|
11
|
-
|
12
|
-
from oarepo_runtime.datastreams import (
|
13
|
-
DataStreamCatalogue,
|
14
|
-
StreamBatch,
|
15
|
-
SynchronousDataStream,
|
16
|
-
)
|
17
|
-
from oarepo_runtime.datastreams.types import StatsKeepingDataStreamCallback
|
18
|
-
|
19
|
-
log = logging.getLogger("fixtures")
|
20
|
-
|
21
|
-
|
22
|
-
class FixturesCallback(StatsKeepingDataStreamCallback):
|
23
|
-
def fixture_started(self, fixture_name):
|
24
|
-
pass
|
25
|
-
|
26
|
-
def fixture_finished(self, fixture_name):
|
27
|
-
pass
|
28
|
-
|
29
|
-
|
30
|
-
def load_fixtures(
|
31
|
-
fixture_dir_or_catalogue=None,
|
32
|
-
include=None,
|
33
|
-
exclude=None,
|
34
|
-
system_fixtures=True,
|
35
|
-
callback: FixturesCallback = None,
|
36
|
-
batch_size=100,
|
37
|
-
datastreams_impl=SynchronousDataStream,
|
38
|
-
identity=system_identity,
|
39
|
-
):
|
40
|
-
"""
|
41
|
-
Loads fixtures. If fixture dir is set, fixtures are loaded from that directory first.
|
42
|
-
The directory must contain a catalogue.yaml file containing datastreams to load the
|
43
|
-
fixtures. The format of the catalogue is described in the 'catalogue.py' file.
|
44
|
-
|
45
|
-
Then fixture loading continues with fixtures defined in `oarepo.fixtures` entrypoint.
|
46
|
-
The entry points are sorted and those with the greatest `name` are processed first -
|
47
|
-
so the recommendation is to call the entry points 0000-something, where 0000 is a 4-digit
|
48
|
-
number. oarepo entry points always have this number set to 1000.
|
49
|
-
|
50
|
-
If a datastream is loaded from one fixture, it will not be loaded again from another fixture.
|
51
|
-
If you want to override the default fixtures, just register your own with a key bigger than 1000.
|
52
|
-
"""
|
53
|
-
include = [re.compile(x) for x in (include or [])]
|
54
|
-
exclude = [re.compile(x) for x in (exclude or [])]
|
55
|
-
fixtures = set()
|
56
|
-
|
57
|
-
if fixture_dir_or_catalogue:
|
58
|
-
if Path(fixture_dir_or_catalogue).is_dir():
|
59
|
-
fixture_catalogue = Path(fixture_dir_or_catalogue) / "catalogue.yaml"
|
60
|
-
else:
|
61
|
-
fixture_catalogue = Path(fixture_dir_or_catalogue)
|
62
|
-
|
63
|
-
catalogue = DataStreamCatalogue(fixture_catalogue)
|
64
|
-
_load_fixtures_from_catalogue(
|
65
|
-
catalogue,
|
66
|
-
fixtures,
|
67
|
-
include,
|
68
|
-
exclude,
|
69
|
-
callback,
|
70
|
-
batch_size=batch_size,
|
71
|
-
datastreams_impl=datastreams_impl,
|
72
|
-
identity=identity,
|
73
|
-
)
|
74
|
-
|
75
|
-
if system_fixtures:
|
76
|
-
|
77
|
-
def get_priority(name):
|
78
|
-
match = re.match(r"(\d+)-", name)
|
79
|
-
if match:
|
80
|
-
return -int(match.group(1))
|
81
|
-
return 0
|
82
|
-
|
83
|
-
entry_points = list(
|
84
|
-
(get_priority(r.name), r.name, r)
|
85
|
-
for r in pkg_resources.iter_entry_points("oarepo.fixtures")
|
86
|
-
)
|
87
|
-
entry_points.sort(key=lambda x: x[:2])
|
88
|
-
for r in entry_points:
|
89
|
-
pkg = r[2].load()
|
90
|
-
pkg_fixture_dir = Path(pkg.__file__)
|
91
|
-
if pkg_fixture_dir.is_file():
|
92
|
-
pkg_fixture_dir = pkg_fixture_dir.parent
|
93
|
-
catalogue = DataStreamCatalogue(pkg_fixture_dir / "catalogue.yaml")
|
94
|
-
_load_fixtures_from_catalogue(
|
95
|
-
catalogue,
|
96
|
-
fixtures,
|
97
|
-
include,
|
98
|
-
exclude,
|
99
|
-
callback,
|
100
|
-
batch_size=batch_size,
|
101
|
-
datastreams_impl=datastreams_impl,
|
102
|
-
identity=identity,
|
103
|
-
)
|
104
|
-
|
105
|
-
|
106
|
-
def _load_fixtures_from_catalogue(
|
107
|
-
catalogue,
|
108
|
-
fixtures,
|
109
|
-
include,
|
110
|
-
exclude,
|
111
|
-
callback,
|
112
|
-
batch_size,
|
113
|
-
datastreams_impl,
|
114
|
-
identity=system_identity,
|
115
|
-
):
|
116
|
-
for catalogue_datastream in catalogue.get_datastreams():
|
117
|
-
if catalogue_datastream.stream_name in fixtures:
|
118
|
-
continue
|
119
|
-
if include and not any(
|
120
|
-
x.match(catalogue_datastream.stream_name) for x in include
|
121
|
-
):
|
122
|
-
continue
|
123
|
-
if any(x.match(catalogue_datastream.stream_name) for x in exclude):
|
124
|
-
continue
|
125
|
-
|
126
|
-
fixtures.add(catalogue_datastream.stream_name)
|
127
|
-
|
128
|
-
if hasattr(callback, "fixture_started"):
|
129
|
-
callback.fixture_started(catalogue_datastream.stream_name)
|
130
|
-
datastream = datastreams_impl(
|
131
|
-
readers=catalogue_datastream.readers,
|
132
|
-
writers=catalogue_datastream.writers,
|
133
|
-
transformers=catalogue_datastream.transformers,
|
134
|
-
callback=callback,
|
135
|
-
batch_size=batch_size,
|
136
|
-
)
|
137
|
-
datastream.process(identity=identity)
|
138
|
-
if hasattr(callback, "fixture_finished"):
|
139
|
-
callback.fixture_finished(catalogue_datastream.stream_name)
|
140
|
-
|
141
|
-
|
142
|
-
def dump_fixtures(
|
143
|
-
fixture_dir,
|
144
|
-
include=None,
|
145
|
-
exclude=None,
|
146
|
-
use_files=False,
|
147
|
-
callback: FixturesCallback = None,
|
148
|
-
datastream_impl=SynchronousDataStream,
|
149
|
-
batch_size=1,
|
150
|
-
):
|
151
|
-
include = [re.compile(x) for x in (include or [])]
|
152
|
-
exclude = [
|
153
|
-
re.compile(x)
|
154
|
-
for x in (exclude or current_app.config.get("DATASTREAMS_EXCLUDES", []))
|
155
|
-
]
|
156
|
-
fixture_dir = Path(fixture_dir)
|
157
|
-
if not fixture_dir.exists():
|
158
|
-
fixture_dir.mkdir(parents=True)
|
159
|
-
catalogue_path = fixture_dir / "catalogue.yaml"
|
160
|
-
catalogue_data = {}
|
161
|
-
|
162
|
-
for service_id in current_service_registry._services:
|
163
|
-
config_generator = (
|
164
|
-
current_app.config.get(f"DATASTREAMS_CONFIG_GENERATOR_{service_id.upper()}")
|
165
|
-
or current_app.config["DATASTREAMS_CONFIG_GENERATOR"]
|
166
|
-
)
|
167
|
-
service = current_service_registry.get(service_id)
|
168
|
-
if not hasattr(service, "scan"):
|
169
|
-
continue
|
170
|
-
for fixture_name, fixture_read_config, fixture_write_config in config_generator(
|
171
|
-
service_id, use_files=use_files
|
172
|
-
):
|
173
|
-
if include and not any(x.match(fixture_name) for x in include):
|
174
|
-
continue
|
175
|
-
if any(x.match(fixture_name) for x in exclude):
|
176
|
-
continue
|
177
|
-
|
178
|
-
catalogue_data[fixture_name] = fixture_read_config
|
179
|
-
|
180
|
-
catalogue = DataStreamCatalogue(
|
181
|
-
catalogue_path, {fixture_name: fixture_write_config}
|
182
|
-
)
|
183
|
-
|
184
|
-
for stream_name in catalogue:
|
185
|
-
catalogue_datastream = catalogue.get_datastream(stream_name)
|
186
|
-
if hasattr(callback, "fixture_started"):
|
187
|
-
callback.fixture_started(stream_name)
|
188
|
-
datastream = datastream_impl(
|
189
|
-
readers=catalogue_datastream.readers,
|
190
|
-
writers=catalogue_datastream.writers,
|
191
|
-
transformers=catalogue_datastream.transformers,
|
192
|
-
callback=callback,
|
193
|
-
batch_size=batch_size,
|
194
|
-
)
|
195
|
-
datastream.process()
|
196
|
-
if hasattr(callback, "fixture_finished"):
|
197
|
-
callback.fixture_finished(stream_name)
|
198
|
-
|
199
|
-
with open(catalogue_path, "w") as f:
|
200
|
-
yaml.dump(catalogue_data, f)
|
201
|
-
|
202
|
-
|
203
|
-
def default_config_generator(service_id, use_files=False):
|
204
|
-
writers = [
|
205
|
-
{"writer": "yaml", "target": f"{service_id}.yaml"},
|
206
|
-
]
|
207
|
-
if use_files:
|
208
|
-
writers.append(
|
209
|
-
{"writer": "attachments_file", "target": "files"},
|
210
|
-
)
|
211
|
-
|
212
|
-
yield service_id, [
|
213
|
-
# load
|
214
|
-
{"writer": "service", "service": service_id},
|
215
|
-
{"writer": "attachments_service", "service": service_id},
|
216
|
-
{"source": f"{service_id}.yaml"},
|
217
|
-
], [
|
218
|
-
# dump
|
219
|
-
{"reader": "service", "service": service_id, "load_files": use_files},
|
220
|
-
*writers,
|
221
|
-
]
|
222
|
-
|
223
|
-
|
224
|
-
@shared_task
|
225
|
-
def fixtures_asynchronous_callback(*args, callback, **kwargs):
|
226
|
-
try:
|
227
|
-
if "batch" in kwargs:
|
228
|
-
batch = StreamBatch.from_json(kwargs["batch"])
|
229
|
-
log.info(
|
230
|
-
"Fixtures progress: %s in batch.seq=%s, batch.last=%s",
|
231
|
-
callback,
|
232
|
-
batch.seq,
|
233
|
-
batch.last,
|
234
|
-
)
|
235
|
-
else:
|
236
|
-
batch = None
|
237
|
-
log.info("Fixtures progress: %s", callback)
|
238
|
-
|
239
|
-
if "error" in callback:
|
240
|
-
log.error(
|
241
|
-
"Error in loading fixtures: %s\n%s\n%s",
|
242
|
-
callback,
|
243
|
-
"\n".join(args),
|
244
|
-
"\n".join(f"{kwarg}: {value}" for kwarg, value in kwargs.items()),
|
245
|
-
)
|
246
|
-
|
247
|
-
if batch:
|
248
|
-
if batch.errors:
|
249
|
-
log.error(
|
250
|
-
"Batch errors: batch %s:\n%s",
|
251
|
-
batch.seq,
|
252
|
-
"\n".join(str(x) for x in batch.errors),
|
253
|
-
)
|
254
|
-
|
255
|
-
for entry in batch.entries:
|
256
|
-
if entry.errors:
|
257
|
-
log.error(
|
258
|
-
"Errors in entry %s of batch %s:\npayload %s\n",
|
259
|
-
entry.seq,
|
260
|
-
batch.seq,
|
261
|
-
entry.entry,
|
262
|
-
"\n".join(str(x) for x in entry.errors),
|
263
|
-
)
|
264
|
-
except Exception:
|
265
|
-
print(f"Error in fixtures callback: {callback=}, {args=}, {kwargs=}")
|
@@ -1,39 +0,0 @@
|
|
1
|
-
import contextlib
|
2
|
-
from abc import ABC, abstractmethod
|
3
|
-
from pathlib import Path
|
4
|
-
from typing import Iterator, Union
|
5
|
-
|
6
|
-
from ..types import StreamEntry
|
7
|
-
|
8
|
-
|
9
|
-
class BaseReader(ABC):
|
10
|
-
"""Base reader."""
|
11
|
-
|
12
|
-
base_path: Union[Path, None]
|
13
|
-
|
14
|
-
def __init__(self, *, source=None, base_path=None, **kwargs):
|
15
|
-
"""Constructor.
|
16
|
-
:param source: Data source (e.g. filepath, stream, ...)
|
17
|
-
"""
|
18
|
-
if not source or hasattr(source, "read") or not base_path:
|
19
|
-
self.source = source
|
20
|
-
else:
|
21
|
-
self.source = Path(base_path).joinpath(source)
|
22
|
-
if base_path:
|
23
|
-
self.base_path = Path(base_path)
|
24
|
-
elif isinstance(source, (str, Path)):
|
25
|
-
self.base_path = Path(source).parent
|
26
|
-
else:
|
27
|
-
self.base_path = None
|
28
|
-
|
29
|
-
@abstractmethod
|
30
|
-
def __iter__(self) -> Iterator[StreamEntry]:
|
31
|
-
"""Yields data objects."""
|
32
|
-
|
33
|
-
@contextlib.contextmanager
|
34
|
-
def _open(self, mode="r"):
|
35
|
-
if hasattr(self.source, "read"):
|
36
|
-
yield self.source
|
37
|
-
else:
|
38
|
-
with open(self.source, mode) as f:
|
39
|
-
yield f
|
@@ -1,51 +0,0 @@
|
|
1
|
-
from base64 import b64encode
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import yaml
|
5
|
-
|
6
|
-
from oarepo_runtime.datastreams import BaseReader, StreamEntry
|
7
|
-
from oarepo_runtime.datastreams.types import StreamEntryFile
|
8
|
-
from oarepo_runtime.datastreams.writers.attachments_file import format_serial
|
9
|
-
|
10
|
-
|
11
|
-
class AttachmentsReaderMixin(BaseReader):
|
12
|
-
def __init__(self, *, source=None, base_path=None, **kwargs):
|
13
|
-
super().__init__(source=source, base_path=base_path, **kwargs)
|
14
|
-
self.has_files = self.base_path and (self.base_path / "files").is_dir()
|
15
|
-
|
16
|
-
def __iter__(self):
|
17
|
-
"""Iterate over records."""
|
18
|
-
se: StreamEntry
|
19
|
-
for idx, se in enumerate(self.iter_entries()):
|
20
|
-
if self.has_files:
|
21
|
-
file_path = (
|
22
|
-
self.base_path.joinpath("files", format_serial(idx + 1)) / "data"
|
23
|
-
)
|
24
|
-
if file_path.exists():
|
25
|
-
file_metadata = self.load_file_metadata(file_path)
|
26
|
-
for md in file_metadata:
|
27
|
-
se.files.append(
|
28
|
-
StreamEntryFile(
|
29
|
-
metadata=md,
|
30
|
-
content_url="data:"
|
31
|
-
+ b64encode(
|
32
|
-
(file_path / md["key"]).read_bytes()
|
33
|
-
).decode("ascii"),
|
34
|
-
)
|
35
|
-
)
|
36
|
-
yield se
|
37
|
-
|
38
|
-
def load_file_metadata(self, file_path: Path):
|
39
|
-
md = "metadata.yaml"
|
40
|
-
while True:
|
41
|
-
tested_md = "meta_" + md
|
42
|
-
# meta_[A]metadata.yaml does not exist, so [A]metadata.yaml is the metadata file,
|
43
|
-
# where A is (meta_)*
|
44
|
-
if not (file_path / tested_md).exists():
|
45
|
-
with open(file_path / md) as f:
|
46
|
-
return list(yaml.safe_load_all(f))
|
47
|
-
md = tested_md
|
48
|
-
|
49
|
-
def iter_entries(self):
|
50
|
-
"Return an iterator of entries"
|
51
|
-
return []
|