oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oarepo_runtime/__init__.py +24 -0
- oarepo_runtime/api.py +210 -0
- oarepo_runtime/cli/__init__.py +10 -21
- oarepo_runtime/cli/search.py +34 -0
- oarepo_runtime/config.py +98 -13
- oarepo_runtime/ext.py +64 -82
- oarepo_runtime/proxies.py +21 -5
- oarepo_runtime/records/__init__.py +11 -50
- oarepo_runtime/records/drafts.py +24 -18
- oarepo_runtime/records/mapping.py +84 -0
- oarepo_runtime/records/pid_providers.py +43 -7
- oarepo_runtime/records/systemfields/__init__.py +15 -33
- oarepo_runtime/records/systemfields/mapping.py +41 -24
- oarepo_runtime/records/systemfields/publication_status.py +61 -0
- oarepo_runtime/services/__init__.py +12 -0
- oarepo_runtime/services/config/__init__.py +15 -21
- oarepo_runtime/services/config/link_conditions.py +69 -75
- oarepo_runtime/services/config/permissions.py +62 -0
- oarepo_runtime/services/facets/__init__.py +12 -33
- oarepo_runtime/services/facets/params.py +45 -110
- oarepo_runtime/services/records/__init__.py +14 -1
- oarepo_runtime/services/records/links.py +21 -11
- oarepo_runtime/services/records/mapping.py +42 -0
- oarepo_runtime/services/results.py +98 -109
- oarepo_runtime/services/schema/__init__.py +12 -44
- oarepo_runtime/services/schema/i18n.py +47 -22
- oarepo_runtime/services/schema/i18n_ui.py +61 -24
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/METADATA +10 -21
- oarepo_runtime-2.0.0.dev4.dist-info/RECORD +32 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/WHEEL +1 -2
- oarepo_runtime-2.0.0.dev4.dist-info/entry_points.txt +5 -0
- oarepo_runtime/cli/assets.py +0 -145
- oarepo_runtime/cli/base.py +0 -25
- oarepo_runtime/cli/cf.py +0 -15
- oarepo_runtime/cli/check.py +0 -167
- oarepo_runtime/cli/configuration.py +0 -51
- oarepo_runtime/cli/fixtures.py +0 -167
- oarepo_runtime/cli/index.py +0 -272
- oarepo_runtime/cli/permissions/__init__.py +0 -6
- oarepo_runtime/cli/permissions/base.py +0 -26
- oarepo_runtime/cli/permissions/evaluate.py +0 -63
- oarepo_runtime/cli/permissions/list.py +0 -239
- oarepo_runtime/cli/permissions/search.py +0 -121
- oarepo_runtime/cli/validate.py +0 -150
- oarepo_runtime/datastreams/__init__.py +0 -38
- oarepo_runtime/datastreams/asynchronous.py +0 -247
- oarepo_runtime/datastreams/catalogue.py +0 -150
- oarepo_runtime/datastreams/datastreams.py +0 -152
- oarepo_runtime/datastreams/errors.py +0 -54
- oarepo_runtime/datastreams/ext.py +0 -41
- oarepo_runtime/datastreams/fixtures.py +0 -265
- oarepo_runtime/datastreams/json.py +0 -4
- oarepo_runtime/datastreams/readers/__init__.py +0 -39
- oarepo_runtime/datastreams/readers/attachments.py +0 -51
- oarepo_runtime/datastreams/readers/excel.py +0 -123
- oarepo_runtime/datastreams/readers/json.py +0 -27
- oarepo_runtime/datastreams/readers/service.py +0 -54
- oarepo_runtime/datastreams/readers/yaml.py +0 -14
- oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
- oarepo_runtime/datastreams/synchronous.py +0 -70
- oarepo_runtime/datastreams/transformers.py +0 -18
- oarepo_runtime/datastreams/types.py +0 -323
- oarepo_runtime/datastreams/utils.py +0 -131
- oarepo_runtime/datastreams/writers/__init__.py +0 -21
- oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
- oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
- oarepo_runtime/datastreams/writers/publish.py +0 -70
- oarepo_runtime/datastreams/writers/service.py +0 -175
- oarepo_runtime/datastreams/writers/utils.py +0 -30
- oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
- oarepo_runtime/datastreams/writers/yaml.py +0 -56
- oarepo_runtime/ext_config.py +0 -67
- oarepo_runtime/i18n/__init__.py +0 -3
- oarepo_runtime/info/__init__.py +0 -0
- oarepo_runtime/info/check.py +0 -95
- oarepo_runtime/info/permissions/__init__.py +0 -0
- oarepo_runtime/info/permissions/debug.py +0 -191
- oarepo_runtime/info/views.py +0 -586
- oarepo_runtime/profile.py +0 -60
- oarepo_runtime/records/dumpers/__init__.py +0 -8
- oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
- oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
- oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
- oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
- oarepo_runtime/records/mappings/__init__.py +0 -0
- oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
- oarepo_runtime/records/owners/__init__.py +0 -3
- oarepo_runtime/records/owners/registry.py +0 -22
- oarepo_runtime/records/relations/__init__.py +0 -22
- oarepo_runtime/records/relations/base.py +0 -296
- oarepo_runtime/records/relations/internal.py +0 -46
- oarepo_runtime/records/relations/lookup.py +0 -28
- oarepo_runtime/records/relations/pid_relation.py +0 -102
- oarepo_runtime/records/systemfields/featured_file.py +0 -45
- oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
- oarepo_runtime/records/systemfields/icu.py +0 -371
- oarepo_runtime/records/systemfields/owner.py +0 -115
- oarepo_runtime/records/systemfields/record_status.py +0 -35
- oarepo_runtime/records/systemfields/selectors.py +0 -98
- oarepo_runtime/records/systemfields/synthetic.py +0 -130
- oarepo_runtime/resources/__init__.py +0 -4
- oarepo_runtime/resources/config.py +0 -12
- oarepo_runtime/resources/file_resource.py +0 -15
- oarepo_runtime/resources/json_serializer.py +0 -27
- oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
- oarepo_runtime/resources/resource.py +0 -53
- oarepo_runtime/resources/responses.py +0 -20
- oarepo_runtime/services/components.py +0 -429
- oarepo_runtime/services/config/draft_link.py +0 -23
- oarepo_runtime/services/config/permissions_presets.py +0 -174
- oarepo_runtime/services/config/service.py +0 -117
- oarepo_runtime/services/custom_fields/__init__.py +0 -80
- oarepo_runtime/services/custom_fields/mappings.py +0 -188
- oarepo_runtime/services/entity/__init__.py +0 -0
- oarepo_runtime/services/entity/config.py +0 -14
- oarepo_runtime/services/entity/schema.py +0 -9
- oarepo_runtime/services/entity/service.py +0 -48
- oarepo_runtime/services/expansions/__init__.py +0 -0
- oarepo_runtime/services/expansions/expandable_fields.py +0 -21
- oarepo_runtime/services/expansions/service.py +0 -4
- oarepo_runtime/services/facets/base.py +0 -12
- oarepo_runtime/services/facets/date.py +0 -72
- oarepo_runtime/services/facets/enum.py +0 -11
- oarepo_runtime/services/facets/facet_groups_names.py +0 -17
- oarepo_runtime/services/facets/max_facet.py +0 -13
- oarepo_runtime/services/facets/multilingual_facet.py +0 -33
- oarepo_runtime/services/facets/nested_facet.py +0 -32
- oarepo_runtime/services/facets/year_histogram.py +0 -200
- oarepo_runtime/services/files/__init__.py +0 -8
- oarepo_runtime/services/files/components.py +0 -62
- oarepo_runtime/services/files/service.py +0 -16
- oarepo_runtime/services/generators.py +0 -10
- oarepo_runtime/services/permissions/__init__.py +0 -3
- oarepo_runtime/services/permissions/generators.py +0 -103
- oarepo_runtime/services/relations/__init__.py +0 -0
- oarepo_runtime/services/relations/components.py +0 -15
- oarepo_runtime/services/relations/errors.py +0 -18
- oarepo_runtime/services/relations/mapping.py +0 -38
- oarepo_runtime/services/schema/cf.py +0 -13
- oarepo_runtime/services/schema/i18n_validation.py +0 -7
- oarepo_runtime/services/schema/marshmallow.py +0 -44
- oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
- oarepo_runtime/services/schema/oneofschema.py +0 -192
- oarepo_runtime/services/schema/polymorphic.py +0 -21
- oarepo_runtime/services/schema/rdm.py +0 -146
- oarepo_runtime/services/schema/rdm_ui.py +0 -156
- oarepo_runtime/services/schema/ui.py +0 -251
- oarepo_runtime/services/schema/validation.py +0 -70
- oarepo_runtime/services/search.py +0 -282
- oarepo_runtime/services/service.py +0 -61
- oarepo_runtime/tasks.py +0 -6
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
- oarepo_runtime/translations/default_translations.py +0 -6
- oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
- oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
- oarepo_runtime/translations/messages.pot +0 -100
- oarepo_runtime/uow.py +0 -146
- oarepo_runtime/utils/__init__.py +0 -0
- oarepo_runtime/utils/functools.py +0 -37
- oarepo_runtime/utils/identity_utils.py +0 -35
- oarepo_runtime/utils/index.py +0 -11
- oarepo_runtime/utils/path.py +0 -97
- oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
- oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
- oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
- tests/marshmallow_to_json/__init__.py +0 -0
- tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
- tests/marshmallow_to_json/test_simple_schema.py +0 -52
- tests/pkg_data/__init__.py +0 -0
- {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,121 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
import sys
|
3
|
-
|
4
|
-
import click
|
5
|
-
import yaml
|
6
|
-
from invenio_records_resources.proxies import current_service_registry
|
7
|
-
|
8
|
-
from oarepo_runtime.info.permissions.debug import add_debugging, merge_communities
|
9
|
-
|
10
|
-
from .base import get_user_and_identity, permissions
|
11
|
-
|
12
|
-
|
13
|
-
@permissions.command(name="search")
|
14
|
-
@click.argument("user_id_or_email")
|
15
|
-
@click.argument("service_name")
|
16
|
-
@click.option("--explain/--no-explain", default=False)
|
17
|
-
@click.option("--user/--published", "user_call", default=False)
|
18
|
-
@click.option("--full-query/--query-filters", default=False)
|
19
|
-
@click.option("--merge-communities", "do_merge_communities", is_flag=True)
|
20
|
-
@click.option("--json/--yaml", "as_json", default=False)
|
21
|
-
def search_permissions(
|
22
|
-
user_id_or_email,
|
23
|
-
service_name,
|
24
|
-
explain,
|
25
|
-
user_call,
|
26
|
-
full_query,
|
27
|
-
do_merge_communities,
|
28
|
-
as_json,
|
29
|
-
):
|
30
|
-
"""Get search parameters for a given service."""
|
31
|
-
try:
|
32
|
-
service = current_service_registry.get(service_name)
|
33
|
-
except KeyError:
|
34
|
-
raise click.UsageError(
|
35
|
-
f"Service {service_name} not found in {current_service_registry._services.keys()}"
|
36
|
-
)
|
37
|
-
user, identity = get_user_and_identity(user_id_or_email)
|
38
|
-
|
39
|
-
permission_policy = service.config.permission_policy_cls
|
40
|
-
|
41
|
-
add_debugging(print_search=explain, print_needs=False, print_excludes=False)
|
42
|
-
|
43
|
-
if full_query:
|
44
|
-
previous_search = service._search
|
45
|
-
|
46
|
-
class NoExecute:
|
47
|
-
def __init__(self, query):
|
48
|
-
self.query = query
|
49
|
-
|
50
|
-
def execute(self):
|
51
|
-
return self.query
|
52
|
-
|
53
|
-
def _patched_search(*args, **kwargs):
|
54
|
-
ret = previous_search(*args, **kwargs)
|
55
|
-
return NoExecute(ret)
|
56
|
-
|
57
|
-
def _patched_result_list(self, identity, results, params, **kwargs):
|
58
|
-
return results
|
59
|
-
|
60
|
-
service._search = _patched_search
|
61
|
-
service.result_list = _patched_result_list
|
62
|
-
|
63
|
-
if user_call:
|
64
|
-
ret = service.search_drafts(identity)
|
65
|
-
else:
|
66
|
-
ret = service.search(identity)
|
67
|
-
ret = ret.to_dict()
|
68
|
-
if do_merge_communities:
|
69
|
-
ret = merge_communities(ret)
|
70
|
-
ret = {
|
71
|
-
"query": ret["query"],
|
72
|
-
}
|
73
|
-
dump_dict(ret, as_json)
|
74
|
-
else:
|
75
|
-
|
76
|
-
over = {}
|
77
|
-
if explain:
|
78
|
-
over["debug_identity"] = identity
|
79
|
-
print("## Explaining search:")
|
80
|
-
|
81
|
-
if user_call:
|
82
|
-
p = permission_policy("read_draft", identity=identity, **over)
|
83
|
-
else:
|
84
|
-
p = permission_policy("read_deleted", identity=identity, **over)
|
85
|
-
query_filters = p.query_filters
|
86
|
-
|
87
|
-
print()
|
88
|
-
print("## Query filters:")
|
89
|
-
for qf in query_filters:
|
90
|
-
dict_qf = qf.to_dict()
|
91
|
-
if explain:
|
92
|
-
dict_qf = merge_communities(dict_qf)
|
93
|
-
dump_dict(dict_qf, as_json)
|
94
|
-
print(json.dumps(dict_qf, indent=2))
|
95
|
-
|
96
|
-
|
97
|
-
def merge_name(d):
|
98
|
-
if isinstance(d, list):
|
99
|
-
return [merge_name(x) for x in d]
|
100
|
-
if isinstance(d, dict):
|
101
|
-
ret = {}
|
102
|
-
for k, v in d.items():
|
103
|
-
v = merge_name(v)
|
104
|
-
if isinstance(v, dict) and "_name" in v:
|
105
|
-
_name = v.pop("_name")
|
106
|
-
_name = _name.split("@")[0].strip()
|
107
|
-
k = f"{k}[{_name}]"
|
108
|
-
ret[k] = v
|
109
|
-
return ret
|
110
|
-
return d
|
111
|
-
|
112
|
-
|
113
|
-
def dump_dict(d, as_json=False):
|
114
|
-
if as_json:
|
115
|
-
print(json.dumps(d, indent=2))
|
116
|
-
else:
|
117
|
-
yaml.safe_dump(
|
118
|
-
merge_name(json.loads(json.dumps(d))),
|
119
|
-
sys.stdout,
|
120
|
-
default_flow_style=False,
|
121
|
-
)
|
oarepo_runtime/cli/validate.py
DELETED
@@ -1,150 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
import traceback
|
3
|
-
|
4
|
-
import click
|
5
|
-
import yaml
|
6
|
-
from flask.cli import with_appcontext
|
7
|
-
from invenio_db import db
|
8
|
-
from invenio_records import Record
|
9
|
-
from invenio_records_resources.proxies import current_service_registry
|
10
|
-
from tqdm import tqdm
|
11
|
-
|
12
|
-
from .base import oarepo
|
13
|
-
|
14
|
-
try:
|
15
|
-
import json5 as json
|
16
|
-
except ImportError:
|
17
|
-
import json
|
18
|
-
|
19
|
-
from io import StringIO
|
20
|
-
|
21
|
-
|
22
|
-
class CheckOk(Exception):
|
23
|
-
pass
|
24
|
-
|
25
|
-
|
26
|
-
def dump_data(d):
|
27
|
-
io = StringIO()
|
28
|
-
yaml.safe_dump(d, io, allow_unicode=True)
|
29
|
-
return io.getvalue()
|
30
|
-
|
31
|
-
|
32
|
-
@oarepo.command(
|
33
|
-
help="Validate a record. Takes one or two parameters - service name as "
|
34
|
-
"the first one, file name or stdin with record data as the second"
|
35
|
-
)
|
36
|
-
@click.argument("service-name")
|
37
|
-
@click.argument("record-file", required=False)
|
38
|
-
@click.option("--community", help="Community name")
|
39
|
-
@click.option("--verbose/--no-verbose", is_flag=True)
|
40
|
-
@click.option("--with-stacktrace", is_flag=True)
|
41
|
-
@click.option(
|
42
|
-
"--fail-on-error",
|
43
|
-
is_flag=True,
|
44
|
-
help="Fail on the first error (for multiple records)",
|
45
|
-
)
|
46
|
-
@with_appcontext
|
47
|
-
def validate(
|
48
|
-
service_name, record_file, community, verbose, with_stacktrace, fail_on_error
|
49
|
-
):
|
50
|
-
try:
|
51
|
-
service = current_service_registry.get(service_name)
|
52
|
-
except KeyError:
|
53
|
-
click.secho(f"Service {service_name} not found. Existing services:")
|
54
|
-
for existing in sorted(current_service_registry._services):
|
55
|
-
click.secho(f" - {existing}")
|
56
|
-
sys.exit(1)
|
57
|
-
|
58
|
-
config = service.config
|
59
|
-
schema = config.schema
|
60
|
-
|
61
|
-
if not record_file:
|
62
|
-
file_content = sys.stdin.read().strip()
|
63
|
-
else:
|
64
|
-
with open(record_file) as f:
|
65
|
-
file_content = f.read()
|
66
|
-
|
67
|
-
if file_content.startswith("{"):
|
68
|
-
data = json.loads(file_content)
|
69
|
-
else:
|
70
|
-
data = list(yaml.safe_load_all(StringIO(file_content)))
|
71
|
-
|
72
|
-
if not isinstance(data, list):
|
73
|
-
data = [data]
|
74
|
-
|
75
|
-
errors_count = 0
|
76
|
-
for idx, d in enumerate(tqdm(data)):
|
77
|
-
if community:
|
78
|
-
d.setdefault("parent", {}).setdefault("communities", {})[
|
79
|
-
"default"
|
80
|
-
] = community
|
81
|
-
try:
|
82
|
-
loaded = schema().load(d)
|
83
|
-
except Exception as e:
|
84
|
-
click.secho(
|
85
|
-
f"Marshmallow validation of record idx {idx + 1} failed",
|
86
|
-
fg="red",
|
87
|
-
)
|
88
|
-
click.secho(dump_data(d))
|
89
|
-
click.secho(e)
|
90
|
-
if with_stacktrace:
|
91
|
-
traceback.print_exc()
|
92
|
-
if fail_on_error:
|
93
|
-
sys.exit(1)
|
94
|
-
errors_count += 1
|
95
|
-
continue
|
96
|
-
|
97
|
-
if verbose:
|
98
|
-
click.secho(
|
99
|
-
f"Marshmallow validation of record idx {idx+1} has been successful",
|
100
|
-
fg="green",
|
101
|
-
)
|
102
|
-
|
103
|
-
if hasattr(config, "draft_cls"):
|
104
|
-
record_cls = config.draft_cls
|
105
|
-
else:
|
106
|
-
record_cls = config.record_cls
|
107
|
-
|
108
|
-
# Run pre create extensions to check vocabularies
|
109
|
-
try:
|
110
|
-
with db.session.begin_nested():
|
111
|
-
|
112
|
-
rec: Record = record_cls(
|
113
|
-
loaded, model=record_cls.model_cls(id=None, data=data)
|
114
|
-
)
|
115
|
-
if record_cls.parent_record_cls:
|
116
|
-
parent = record_cls.parent_record_cls(loaded["parent"])
|
117
|
-
rec.parent = parent
|
118
|
-
|
119
|
-
for e in rec._extensions:
|
120
|
-
e.pre_commit(rec)
|
121
|
-
raise CheckOk()
|
122
|
-
except CheckOk:
|
123
|
-
if verbose:
|
124
|
-
click.secho(
|
125
|
-
f"Pre-commit hook of record idx {idx+1} has been successful",
|
126
|
-
fg="green",
|
127
|
-
)
|
128
|
-
except Exception as e:
|
129
|
-
click.secho(
|
130
|
-
f"Pre-commit validation of record idx {idx + 1} failed",
|
131
|
-
fg="red",
|
132
|
-
)
|
133
|
-
click.secho(dump_data(d))
|
134
|
-
click.secho(e)
|
135
|
-
if with_stacktrace:
|
136
|
-
traceback.print_exc()
|
137
|
-
if fail_on_error:
|
138
|
-
sys.exit(1)
|
139
|
-
errors_count += 1
|
140
|
-
continue
|
141
|
-
|
142
|
-
if verbose:
|
143
|
-
yaml.safe_dump(loaded, sys.stdout, allow_unicode=True)
|
144
|
-
|
145
|
-
if errors_count:
|
146
|
-
click.secho(f"Validation finished with {errors_count} errors", fg="red")
|
147
|
-
sys.exit(1)
|
148
|
-
else:
|
149
|
-
click.secho("Validation finished successfully", fg="green")
|
150
|
-
sys.exit(0)
|
@@ -1,38 +0,0 @@
|
|
1
|
-
from .asynchronous import AsynchronousDataStream
|
2
|
-
from .catalogue import DataStreamCatalogue
|
3
|
-
from .datastreams import AbstractDataStream
|
4
|
-
from .errors import (
|
5
|
-
DataStreamCatalogueError,
|
6
|
-
DataStreamError,
|
7
|
-
ReaderError,
|
8
|
-
TransformerError,
|
9
|
-
WriterError,
|
10
|
-
)
|
11
|
-
from .json import JSON, JSONObject
|
12
|
-
from .readers import BaseReader
|
13
|
-
from .semi_asynchronous import SemiAsynchronousDataStream
|
14
|
-
from .synchronous import SynchronousDataStream
|
15
|
-
from .transformers import BaseTransformer
|
16
|
-
from .types import DataStreamCallback, StreamBatch, StreamEntry
|
17
|
-
from .writers import BaseWriter
|
18
|
-
|
19
|
-
__all__ = [
|
20
|
-
"JSONObject",
|
21
|
-
"JSON",
|
22
|
-
"StreamEntry",
|
23
|
-
"DataStreamCatalogue",
|
24
|
-
"BaseReader",
|
25
|
-
"BaseWriter",
|
26
|
-
"BaseTransformer",
|
27
|
-
"DataStreamCatalogueError",
|
28
|
-
"ReaderError",
|
29
|
-
"WriterError",
|
30
|
-
"TransformerError",
|
31
|
-
"StreamBatch",
|
32
|
-
"DataStreamError",
|
33
|
-
"DataStreamCallback",
|
34
|
-
"SynchronousDataStream",
|
35
|
-
"AbstractDataStream",
|
36
|
-
"AsynchronousDataStream",
|
37
|
-
"SemiAsynchronousDataStream",
|
38
|
-
]
|
@@ -1,247 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from typing import Any, Dict, List, Union
|
3
|
-
|
4
|
-
import celery
|
5
|
-
from celery.canvas import Signature as CelerySignature
|
6
|
-
from celery.canvas import chain
|
7
|
-
from celery.result import allow_join_result
|
8
|
-
from flask_principal import (
|
9
|
-
ActionNeed,
|
10
|
-
Identity,
|
11
|
-
ItemNeed,
|
12
|
-
Need,
|
13
|
-
RoleNeed,
|
14
|
-
TypeNeed,
|
15
|
-
UserNeed,
|
16
|
-
)
|
17
|
-
|
18
|
-
from oarepo_runtime.datastreams.datastreams import (
|
19
|
-
AbstractDataStream,
|
20
|
-
DataStreamChain,
|
21
|
-
Signature,
|
22
|
-
)
|
23
|
-
|
24
|
-
from .datastreams import DataStreamCallback, StreamBatch
|
25
|
-
from .json import JSONObject
|
26
|
-
from .types import StreamEntryError
|
27
|
-
from .writers import BaseWriter
|
28
|
-
|
29
|
-
timing = logging.getLogger("oai.harvester.timing")
|
30
|
-
log = logging.getLogger("datastreams")
|
31
|
-
|
32
|
-
|
33
|
-
class AsynchronousDataStream(AbstractDataStream):
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
*,
|
37
|
-
readers: List[Union[Signature, Any]],
|
38
|
-
writers: List[Union[Signature, Any]],
|
39
|
-
transformers: List[Union[Signature, Any]] = None,
|
40
|
-
callback: Union[DataStreamCallback, Any],
|
41
|
-
batch_size=100,
|
42
|
-
on_background=True,
|
43
|
-
reader_callback=None,
|
44
|
-
):
|
45
|
-
super().__init__(
|
46
|
-
readers=readers,
|
47
|
-
writers=writers,
|
48
|
-
transformers=transformers,
|
49
|
-
callback=callback,
|
50
|
-
batch_size=batch_size,
|
51
|
-
reader_callback=reader_callback,
|
52
|
-
)
|
53
|
-
self._on_background = on_background
|
54
|
-
|
55
|
-
def build_chain(self, identity) -> DataStreamChain:
|
56
|
-
return AsynchronousDataStreamChain(
|
57
|
-
transformers=self._transformers,
|
58
|
-
writers=self._writers,
|
59
|
-
on_background=self._on_background,
|
60
|
-
identity=identity,
|
61
|
-
)
|
62
|
-
|
63
|
-
def _reader_error(self, reader, exception):
|
64
|
-
self._callback.apply(
|
65
|
-
kwargs={
|
66
|
-
"callback": f"reader_error",
|
67
|
-
"exception": StreamEntryError.from_exception(exception).json,
|
68
|
-
}
|
69
|
-
)
|
70
|
-
|
71
|
-
|
72
|
-
class AsynchronousDataStreamChain(DataStreamChain):
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
identity: Identity,
|
76
|
-
transformers: List[Signature],
|
77
|
-
writers: List[Signature],
|
78
|
-
on_background=True,
|
79
|
-
):
|
80
|
-
self._transformers = transformers
|
81
|
-
self._writers = writers
|
82
|
-
self._on_background = on_background
|
83
|
-
self._identity = identity
|
84
|
-
|
85
|
-
def process(self, batch: StreamBatch, callback: CelerySignature):
|
86
|
-
chain = self._prepare_chain(callback)
|
87
|
-
self._call(chain, batch=batch.json)
|
88
|
-
|
89
|
-
def _prepare_chain(self, callback: CelerySignature):
|
90
|
-
chain_def = [
|
91
|
-
datastreams_call_callback.signature(
|
92
|
-
(), kwargs={"callback": callback, "callback_name": "batch_started"}
|
93
|
-
)
|
94
|
-
]
|
95
|
-
serialized_identity = serialize_identity(self._identity)
|
96
|
-
if self._transformers:
|
97
|
-
for transformer in self._transformers:
|
98
|
-
chain_def.append(
|
99
|
-
run_datastream_processor.signature(
|
100
|
-
kwargs={
|
101
|
-
"processor": transformer.json,
|
102
|
-
"identity": serialized_identity,
|
103
|
-
"callback": callback,
|
104
|
-
}
|
105
|
-
)
|
106
|
-
)
|
107
|
-
|
108
|
-
for writer in self._writers:
|
109
|
-
chain_def.append(
|
110
|
-
run_datastream_processor.signature(
|
111
|
-
kwargs={
|
112
|
-
"processor": writer.json,
|
113
|
-
"identity": serialized_identity,
|
114
|
-
"callback": callback,
|
115
|
-
}
|
116
|
-
)
|
117
|
-
)
|
118
|
-
|
119
|
-
chain_def.append(
|
120
|
-
datastreams_call_callback.signature(
|
121
|
-
(),
|
122
|
-
kwargs=dict(
|
123
|
-
callback=callback,
|
124
|
-
callback_name="batch_finished",
|
125
|
-
identity=serialized_identity,
|
126
|
-
),
|
127
|
-
)
|
128
|
-
)
|
129
|
-
|
130
|
-
chain_sig = chain(*chain_def)
|
131
|
-
chain_sig.link_error(
|
132
|
-
datastreams_error_callback.signature(
|
133
|
-
(),
|
134
|
-
kwargs=dict(
|
135
|
-
callback=callback,
|
136
|
-
callback_name="error",
|
137
|
-
identity=serialized_identity,
|
138
|
-
),
|
139
|
-
)
|
140
|
-
)
|
141
|
-
return chain_sig
|
142
|
-
|
143
|
-
def _call(self, sig, **kwargs):
|
144
|
-
if self._on_background:
|
145
|
-
call = sig.apply_async
|
146
|
-
else:
|
147
|
-
call = sig.apply
|
148
|
-
call([], kwargs)
|
149
|
-
|
150
|
-
def finish(self, callback: Signature):
|
151
|
-
"nothing to finish here, dumpers needing finish (such as file dumpers) are not supported in async"
|
152
|
-
|
153
|
-
|
154
|
-
@celery.shared_task
|
155
|
-
def run_datastream_processor(batch: Dict, *, processor: JSONObject, identity, callback):
|
156
|
-
identity = deserialize_identity(identity)
|
157
|
-
processor_signature = Signature.from_json(processor)
|
158
|
-
deserialized_batch: StreamBatch = StreamBatch.from_json(batch)
|
159
|
-
|
160
|
-
processor = processor_signature.resolve(identity=identity)
|
161
|
-
try:
|
162
|
-
if isinstance(processor, BaseWriter):
|
163
|
-
deserialized_batch = (
|
164
|
-
processor.write(deserialized_batch) or deserialized_batch
|
165
|
-
)
|
166
|
-
else:
|
167
|
-
deserialized_batch = (
|
168
|
-
processor.apply(deserialized_batch) or deserialized_batch
|
169
|
-
)
|
170
|
-
|
171
|
-
except Exception as ex:
|
172
|
-
log.exception("Error processing batch inside %s", processor_signature)
|
173
|
-
|
174
|
-
err = StreamEntryError.from_exception(ex)
|
175
|
-
deserialized_batch.errors.append(err)
|
176
|
-
callback.apply(
|
177
|
-
(),
|
178
|
-
{
|
179
|
-
"batch": deserialized_batch.json,
|
180
|
-
"identity": serialize_identity(identity),
|
181
|
-
"callback": f"{processor_signature.kind.value}_error",
|
182
|
-
"exception": err.json,
|
183
|
-
},
|
184
|
-
)
|
185
|
-
return deserialized_batch.json
|
186
|
-
|
187
|
-
|
188
|
-
@celery.shared_task
|
189
|
-
def datastreams_call_callback(
|
190
|
-
batch: Dict, *, identity=None, callback, callback_name, **kwargs
|
191
|
-
):
|
192
|
-
callback = CelerySignature(callback)
|
193
|
-
callback.apply(
|
194
|
-
kwargs=dict(batch=batch, identity=identity, callback=callback_name, **kwargs)
|
195
|
-
)
|
196
|
-
return batch
|
197
|
-
|
198
|
-
|
199
|
-
@celery.shared_task
|
200
|
-
def datastreams_error_callback(
|
201
|
-
parent_task_id, *, identity=None, callback, callback_name, **kwargs
|
202
|
-
):
|
203
|
-
with allow_join_result():
|
204
|
-
from celery import current_app
|
205
|
-
|
206
|
-
result = current_app.AsyncResult(parent_task_id)
|
207
|
-
result.get(propagate=False)
|
208
|
-
|
209
|
-
callback = CelerySignature(callback)
|
210
|
-
callback.apply(
|
211
|
-
kwargs=dict(
|
212
|
-
batch={},
|
213
|
-
identity=identity,
|
214
|
-
callback=callback_name,
|
215
|
-
result=result.result,
|
216
|
-
traceback=result.traceback,
|
217
|
-
**kwargs,
|
218
|
-
)
|
219
|
-
)
|
220
|
-
|
221
|
-
|
222
|
-
def serialize_identity(identity):
|
223
|
-
return {
|
224
|
-
"id": identity.id,
|
225
|
-
"auth_type": identity.auth_type,
|
226
|
-
"provides": [
|
227
|
-
{"type": type(x).__name__, "params": x._asdict()} for x in identity.provides
|
228
|
-
],
|
229
|
-
}
|
230
|
-
|
231
|
-
|
232
|
-
def deserialize_identity(identity_dict):
|
233
|
-
if identity_dict is None:
|
234
|
-
return None
|
235
|
-
ret = Identity(id=identity_dict["id"], auth_type=identity_dict["auth_type"])
|
236
|
-
for provide in identity_dict["provides"]:
|
237
|
-
clz = {
|
238
|
-
"Need": Need,
|
239
|
-
"UserNeed": UserNeed,
|
240
|
-
"RoleNeed": RoleNeed,
|
241
|
-
"TypeNeed": TypeNeed,
|
242
|
-
"ActionNeed": ActionNeed,
|
243
|
-
"ItemNeed": ItemNeed,
|
244
|
-
}[provide["type"]]
|
245
|
-
|
246
|
-
ret.provides.add(clz(**provide["params"]))
|
247
|
-
return ret
|
@@ -1,150 +0,0 @@
|
|
1
|
-
import dataclasses
|
2
|
-
from pathlib import Path
|
3
|
-
from typing import Iterator, List
|
4
|
-
|
5
|
-
import yaml
|
6
|
-
from flask import current_app
|
7
|
-
|
8
|
-
from oarepo_runtime.datastreams.datastreams import Signature, SignatureKind
|
9
|
-
|
10
|
-
from .errors import DataStreamCatalogueError
|
11
|
-
|
12
|
-
|
13
|
-
@dataclasses.dataclass
|
14
|
-
class CatalogueDataStream:
|
15
|
-
stream_name: str
|
16
|
-
readers: List[Signature]
|
17
|
-
writers: List[Signature]
|
18
|
-
transformers: List[Signature]
|
19
|
-
|
20
|
-
|
21
|
-
class DataStreamCatalogue:
|
22
|
-
def __init__(self, catalogue, content=None) -> None:
|
23
|
-
"""
|
24
|
-
Catalogue of data streams. The catalogue contains a dict of:
|
25
|
-
stream_name: stream_definition, where stream definition is an array of:
|
26
|
-
|
27
|
-
- reader: reader_class
|
28
|
-
<rest of parameters go to reader constructor>
|
29
|
-
- transformer: transformer_class
|
30
|
-
<rest of parameters go to transformer constructor>
|
31
|
-
- writer: writer_class
|
32
|
-
<rest of parameters go to writer constructor>
|
33
|
-
|
34
|
-
If reader class is not passed and _source_ is, then the reader class will be taken from the
|
35
|
-
DATASTREAMS_READERS_BY_EXTENSION config variable - map from file extension to reader class.
|
36
|
-
|
37
|
-
If 'service' is passed, service writer will be used with this service
|
38
|
-
|
39
|
-
Transformer class must always be passed.
|
40
|
-
"""
|
41
|
-
self._catalogue_path = Path(catalogue)
|
42
|
-
if content:
|
43
|
-
self._catalogue = content
|
44
|
-
else:
|
45
|
-
with open(catalogue) as f:
|
46
|
-
self._catalogue = yaml.safe_load(f)
|
47
|
-
|
48
|
-
@property
|
49
|
-
def path(self):
|
50
|
-
return self._catalogue_path
|
51
|
-
|
52
|
-
@property
|
53
|
-
def directory(self):
|
54
|
-
return self._catalogue_path.parent
|
55
|
-
|
56
|
-
def get_datastreams(self) -> Iterator[CatalogueDataStream]:
|
57
|
-
for stream_name in self._catalogue:
|
58
|
-
yield self.get_datastream(stream_name)
|
59
|
-
|
60
|
-
def __iter__(self):
|
61
|
-
return iter(self._catalogue)
|
62
|
-
|
63
|
-
def get_datastream(
|
64
|
-
self,
|
65
|
-
stream_name,
|
66
|
-
) -> CatalogueDataStream:
|
67
|
-
stream_definition = self._catalogue[stream_name]
|
68
|
-
readers = []
|
69
|
-
transformers = []
|
70
|
-
writers = []
|
71
|
-
for entry in stream_definition:
|
72
|
-
entry = {**entry}
|
73
|
-
try:
|
74
|
-
if "reader" in entry:
|
75
|
-
readers.append(
|
76
|
-
get_signature(
|
77
|
-
"reader",
|
78
|
-
entry,
|
79
|
-
base_path=str(self.directory),
|
80
|
-
)
|
81
|
-
)
|
82
|
-
elif "transformer" in entry:
|
83
|
-
transformers.append(
|
84
|
-
get_signature(
|
85
|
-
"transformer",
|
86
|
-
entry,
|
87
|
-
base_path=str(self.directory),
|
88
|
-
)
|
89
|
-
)
|
90
|
-
elif "writer" in entry:
|
91
|
-
writers.append(
|
92
|
-
get_signature(
|
93
|
-
"writer",
|
94
|
-
entry,
|
95
|
-
base_path=str(self.directory),
|
96
|
-
)
|
97
|
-
)
|
98
|
-
elif "source" in entry:
|
99
|
-
readers.append(self.get_reader(entry))
|
100
|
-
elif "service" in entry:
|
101
|
-
writers.append(self.get_service_writer(entry))
|
102
|
-
else:
|
103
|
-
raise DataStreamCatalogueError(
|
104
|
-
"Can not decide what this record is - reader, transformer or service?"
|
105
|
-
)
|
106
|
-
except DataStreamCatalogueError as e:
|
107
|
-
e.entry = entry
|
108
|
-
e.stream_name = stream_name
|
109
|
-
raise e
|
110
|
-
return CatalogueDataStream(
|
111
|
-
stream_name=stream_name,
|
112
|
-
readers=readers,
|
113
|
-
transformers=transformers,
|
114
|
-
writers=writers,
|
115
|
-
)
|
116
|
-
|
117
|
-
def get_reader(self, entry):
|
118
|
-
entry = {**entry}
|
119
|
-
if not entry.get("reader"):
|
120
|
-
try:
|
121
|
-
source = Path(entry["source"])
|
122
|
-
ext = source.suffix[1:]
|
123
|
-
reader_class = (
|
124
|
-
current_app.config["DATASTREAMS_READERS_BY_EXTENSION"].get(ext)
|
125
|
-
or current_app.config["DEFAULT_DATASTREAMS_READERS_BY_EXTENSION"][
|
126
|
-
ext
|
127
|
-
]
|
128
|
-
)
|
129
|
-
entry["reader"] = reader_class
|
130
|
-
except KeyError:
|
131
|
-
raise DataStreamCatalogueError(
|
132
|
-
f"Do not have loader for file {source} - extension {ext} not defined in DATASTREAMS_READERS_BY_EXTENSION config"
|
133
|
-
)
|
134
|
-
return get_signature(
|
135
|
-
"reader",
|
136
|
-
entry,
|
137
|
-
base_path=str(self.directory),
|
138
|
-
)
|
139
|
-
|
140
|
-
def get_service_writer(self, entry):
|
141
|
-
return Signature(
|
142
|
-
SignatureKind("writer"),
|
143
|
-
"service",
|
144
|
-
kwargs={**entry, "base_path": str(self.directory)},
|
145
|
-
)
|
146
|
-
|
147
|
-
|
148
|
-
def get_signature(kind, entry, **kwargs):
|
149
|
-
entry = {**entry, **kwargs}
|
150
|
-
return Signature(kind=SignatureKind(kind), name=entry.pop(kind), kwargs=entry)
|