oarepo-runtime 1.10.3__py3-none-any.whl → 2.0.0.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. oarepo_runtime/__init__.py +24 -0
  2. oarepo_runtime/api.py +210 -0
  3. oarepo_runtime/cli/__init__.py +10 -21
  4. oarepo_runtime/cli/search.py +34 -0
  5. oarepo_runtime/config.py +98 -13
  6. oarepo_runtime/ext.py +64 -82
  7. oarepo_runtime/proxies.py +21 -5
  8. oarepo_runtime/records/__init__.py +11 -50
  9. oarepo_runtime/records/drafts.py +24 -18
  10. oarepo_runtime/records/mapping.py +84 -0
  11. oarepo_runtime/records/pid_providers.py +43 -7
  12. oarepo_runtime/records/systemfields/__init__.py +15 -33
  13. oarepo_runtime/records/systemfields/mapping.py +41 -24
  14. oarepo_runtime/records/systemfields/publication_status.py +61 -0
  15. oarepo_runtime/services/__init__.py +12 -0
  16. oarepo_runtime/services/config/__init__.py +15 -21
  17. oarepo_runtime/services/config/link_conditions.py +69 -75
  18. oarepo_runtime/services/config/permissions.py +62 -0
  19. oarepo_runtime/services/facets/__init__.py +12 -33
  20. oarepo_runtime/services/facets/params.py +45 -110
  21. oarepo_runtime/services/records/__init__.py +14 -1
  22. oarepo_runtime/services/records/links.py +21 -11
  23. oarepo_runtime/services/records/mapping.py +42 -0
  24. oarepo_runtime/services/results.py +98 -109
  25. oarepo_runtime/services/schema/__init__.py +12 -44
  26. oarepo_runtime/services/schema/i18n.py +47 -22
  27. oarepo_runtime/services/schema/i18n_ui.py +61 -24
  28. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/METADATA +10 -21
  29. oarepo_runtime-2.0.0.dev4.dist-info/RECORD +32 -0
  30. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/WHEEL +1 -2
  31. oarepo_runtime-2.0.0.dev4.dist-info/entry_points.txt +5 -0
  32. oarepo_runtime/cli/assets.py +0 -145
  33. oarepo_runtime/cli/base.py +0 -25
  34. oarepo_runtime/cli/cf.py +0 -15
  35. oarepo_runtime/cli/check.py +0 -167
  36. oarepo_runtime/cli/configuration.py +0 -51
  37. oarepo_runtime/cli/fixtures.py +0 -167
  38. oarepo_runtime/cli/index.py +0 -272
  39. oarepo_runtime/cli/permissions/__init__.py +0 -6
  40. oarepo_runtime/cli/permissions/base.py +0 -26
  41. oarepo_runtime/cli/permissions/evaluate.py +0 -63
  42. oarepo_runtime/cli/permissions/list.py +0 -239
  43. oarepo_runtime/cli/permissions/search.py +0 -121
  44. oarepo_runtime/cli/validate.py +0 -150
  45. oarepo_runtime/datastreams/__init__.py +0 -38
  46. oarepo_runtime/datastreams/asynchronous.py +0 -247
  47. oarepo_runtime/datastreams/catalogue.py +0 -150
  48. oarepo_runtime/datastreams/datastreams.py +0 -152
  49. oarepo_runtime/datastreams/errors.py +0 -54
  50. oarepo_runtime/datastreams/ext.py +0 -41
  51. oarepo_runtime/datastreams/fixtures.py +0 -265
  52. oarepo_runtime/datastreams/json.py +0 -4
  53. oarepo_runtime/datastreams/readers/__init__.py +0 -39
  54. oarepo_runtime/datastreams/readers/attachments.py +0 -51
  55. oarepo_runtime/datastreams/readers/excel.py +0 -123
  56. oarepo_runtime/datastreams/readers/json.py +0 -27
  57. oarepo_runtime/datastreams/readers/service.py +0 -54
  58. oarepo_runtime/datastreams/readers/yaml.py +0 -14
  59. oarepo_runtime/datastreams/semi_asynchronous.py +0 -91
  60. oarepo_runtime/datastreams/synchronous.py +0 -70
  61. oarepo_runtime/datastreams/transformers.py +0 -18
  62. oarepo_runtime/datastreams/types.py +0 -323
  63. oarepo_runtime/datastreams/utils.py +0 -131
  64. oarepo_runtime/datastreams/writers/__init__.py +0 -21
  65. oarepo_runtime/datastreams/writers/attachments_file.py +0 -92
  66. oarepo_runtime/datastreams/writers/attachments_service.py +0 -118
  67. oarepo_runtime/datastreams/writers/publish.py +0 -70
  68. oarepo_runtime/datastreams/writers/service.py +0 -175
  69. oarepo_runtime/datastreams/writers/utils.py +0 -30
  70. oarepo_runtime/datastreams/writers/validation_errors.py +0 -20
  71. oarepo_runtime/datastreams/writers/yaml.py +0 -56
  72. oarepo_runtime/ext_config.py +0 -67
  73. oarepo_runtime/i18n/__init__.py +0 -3
  74. oarepo_runtime/info/__init__.py +0 -0
  75. oarepo_runtime/info/check.py +0 -95
  76. oarepo_runtime/info/permissions/__init__.py +0 -0
  77. oarepo_runtime/info/permissions/debug.py +0 -191
  78. oarepo_runtime/info/views.py +0 -586
  79. oarepo_runtime/profile.py +0 -60
  80. oarepo_runtime/records/dumpers/__init__.py +0 -8
  81. oarepo_runtime/records/dumpers/edtf_interval.py +0 -38
  82. oarepo_runtime/records/dumpers/multilingual_dumper.py +0 -34
  83. oarepo_runtime/records/entity_resolvers/__init__.py +0 -13
  84. oarepo_runtime/records/entity_resolvers/proxies.py +0 -57
  85. oarepo_runtime/records/mappings/__init__.py +0 -0
  86. oarepo_runtime/records/mappings/rdm_parent_mapping.json +0 -483
  87. oarepo_runtime/records/owners/__init__.py +0 -3
  88. oarepo_runtime/records/owners/registry.py +0 -22
  89. oarepo_runtime/records/relations/__init__.py +0 -22
  90. oarepo_runtime/records/relations/base.py +0 -296
  91. oarepo_runtime/records/relations/internal.py +0 -46
  92. oarepo_runtime/records/relations/lookup.py +0 -28
  93. oarepo_runtime/records/relations/pid_relation.py +0 -102
  94. oarepo_runtime/records/systemfields/featured_file.py +0 -45
  95. oarepo_runtime/records/systemfields/has_draftcheck.py +0 -47
  96. oarepo_runtime/records/systemfields/icu.py +0 -371
  97. oarepo_runtime/records/systemfields/owner.py +0 -115
  98. oarepo_runtime/records/systemfields/record_status.py +0 -35
  99. oarepo_runtime/records/systemfields/selectors.py +0 -98
  100. oarepo_runtime/records/systemfields/synthetic.py +0 -130
  101. oarepo_runtime/resources/__init__.py +0 -4
  102. oarepo_runtime/resources/config.py +0 -12
  103. oarepo_runtime/resources/file_resource.py +0 -15
  104. oarepo_runtime/resources/json_serializer.py +0 -27
  105. oarepo_runtime/resources/localized_ui_json_serializer.py +0 -54
  106. oarepo_runtime/resources/resource.py +0 -53
  107. oarepo_runtime/resources/responses.py +0 -20
  108. oarepo_runtime/services/components.py +0 -429
  109. oarepo_runtime/services/config/draft_link.py +0 -23
  110. oarepo_runtime/services/config/permissions_presets.py +0 -174
  111. oarepo_runtime/services/config/service.py +0 -117
  112. oarepo_runtime/services/custom_fields/__init__.py +0 -80
  113. oarepo_runtime/services/custom_fields/mappings.py +0 -188
  114. oarepo_runtime/services/entity/__init__.py +0 -0
  115. oarepo_runtime/services/entity/config.py +0 -14
  116. oarepo_runtime/services/entity/schema.py +0 -9
  117. oarepo_runtime/services/entity/service.py +0 -48
  118. oarepo_runtime/services/expansions/__init__.py +0 -0
  119. oarepo_runtime/services/expansions/expandable_fields.py +0 -21
  120. oarepo_runtime/services/expansions/service.py +0 -4
  121. oarepo_runtime/services/facets/base.py +0 -12
  122. oarepo_runtime/services/facets/date.py +0 -72
  123. oarepo_runtime/services/facets/enum.py +0 -11
  124. oarepo_runtime/services/facets/facet_groups_names.py +0 -17
  125. oarepo_runtime/services/facets/max_facet.py +0 -13
  126. oarepo_runtime/services/facets/multilingual_facet.py +0 -33
  127. oarepo_runtime/services/facets/nested_facet.py +0 -32
  128. oarepo_runtime/services/facets/year_histogram.py +0 -200
  129. oarepo_runtime/services/files/__init__.py +0 -8
  130. oarepo_runtime/services/files/components.py +0 -62
  131. oarepo_runtime/services/files/service.py +0 -16
  132. oarepo_runtime/services/generators.py +0 -10
  133. oarepo_runtime/services/permissions/__init__.py +0 -3
  134. oarepo_runtime/services/permissions/generators.py +0 -103
  135. oarepo_runtime/services/relations/__init__.py +0 -0
  136. oarepo_runtime/services/relations/components.py +0 -15
  137. oarepo_runtime/services/relations/errors.py +0 -18
  138. oarepo_runtime/services/relations/mapping.py +0 -38
  139. oarepo_runtime/services/schema/cf.py +0 -13
  140. oarepo_runtime/services/schema/i18n_validation.py +0 -7
  141. oarepo_runtime/services/schema/marshmallow.py +0 -44
  142. oarepo_runtime/services/schema/marshmallow_to_json_schema.py +0 -72
  143. oarepo_runtime/services/schema/oneofschema.py +0 -192
  144. oarepo_runtime/services/schema/polymorphic.py +0 -21
  145. oarepo_runtime/services/schema/rdm.py +0 -146
  146. oarepo_runtime/services/schema/rdm_ui.py +0 -156
  147. oarepo_runtime/services/schema/ui.py +0 -251
  148. oarepo_runtime/services/schema/validation.py +0 -70
  149. oarepo_runtime/services/search.py +0 -282
  150. oarepo_runtime/services/service.py +0 -61
  151. oarepo_runtime/tasks.py +0 -6
  152. oarepo_runtime/translations/cs/LC_MESSAGES/messages.mo +0 -0
  153. oarepo_runtime/translations/cs/LC_MESSAGES/messages.po +0 -95
  154. oarepo_runtime/translations/default_translations.py +0 -6
  155. oarepo_runtime/translations/en/LC_MESSAGES/messages.mo +0 -0
  156. oarepo_runtime/translations/en/LC_MESSAGES/messages.po +0 -97
  157. oarepo_runtime/translations/messages.pot +0 -100
  158. oarepo_runtime/uow.py +0 -146
  159. oarepo_runtime/utils/__init__.py +0 -0
  160. oarepo_runtime/utils/functools.py +0 -37
  161. oarepo_runtime/utils/identity_utils.py +0 -35
  162. oarepo_runtime/utils/index.py +0 -11
  163. oarepo_runtime/utils/path.py +0 -97
  164. oarepo_runtime-1.10.3.dist-info/RECORD +0 -163
  165. oarepo_runtime-1.10.3.dist-info/entry_points.txt +0 -16
  166. oarepo_runtime-1.10.3.dist-info/top_level.txt +0 -2
  167. tests/marshmallow_to_json/__init__.py +0 -0
  168. tests/marshmallow_to_json/test_datacite_ui_schema.py +0 -1410
  169. tests/marshmallow_to_json/test_simple_schema.py +0 -52
  170. tests/pkg_data/__init__.py +0 -0
  171. {oarepo_runtime-1.10.3.dist-info → oarepo_runtime-2.0.0.dev4.dist-info}/licenses/LICENSE +0 -0
@@ -1,121 +0,0 @@
1
- import json
2
- import sys
3
-
4
- import click
5
- import yaml
6
- from invenio_records_resources.proxies import current_service_registry
7
-
8
- from oarepo_runtime.info.permissions.debug import add_debugging, merge_communities
9
-
10
- from .base import get_user_and_identity, permissions
11
-
12
-
13
- @permissions.command(name="search")
14
- @click.argument("user_id_or_email")
15
- @click.argument("service_name")
16
- @click.option("--explain/--no-explain", default=False)
17
- @click.option("--user/--published", "user_call", default=False)
18
- @click.option("--full-query/--query-filters", default=False)
19
- @click.option("--merge-communities", "do_merge_communities", is_flag=True)
20
- @click.option("--json/--yaml", "as_json", default=False)
21
- def search_permissions(
22
- user_id_or_email,
23
- service_name,
24
- explain,
25
- user_call,
26
- full_query,
27
- do_merge_communities,
28
- as_json,
29
- ):
30
- """Get search parameters for a given service."""
31
- try:
32
- service = current_service_registry.get(service_name)
33
- except KeyError:
34
- raise click.UsageError(
35
- f"Service {service_name} not found in {current_service_registry._services.keys()}"
36
- )
37
- user, identity = get_user_and_identity(user_id_or_email)
38
-
39
- permission_policy = service.config.permission_policy_cls
40
-
41
- add_debugging(print_search=explain, print_needs=False, print_excludes=False)
42
-
43
- if full_query:
44
- previous_search = service._search
45
-
46
- class NoExecute:
47
- def __init__(self, query):
48
- self.query = query
49
-
50
- def execute(self):
51
- return self.query
52
-
53
- def _patched_search(*args, **kwargs):
54
- ret = previous_search(*args, **kwargs)
55
- return NoExecute(ret)
56
-
57
- def _patched_result_list(self, identity, results, params, **kwargs):
58
- return results
59
-
60
- service._search = _patched_search
61
- service.result_list = _patched_result_list
62
-
63
- if user_call:
64
- ret = service.search_drafts(identity)
65
- else:
66
- ret = service.search(identity)
67
- ret = ret.to_dict()
68
- if do_merge_communities:
69
- ret = merge_communities(ret)
70
- ret = {
71
- "query": ret["query"],
72
- }
73
- dump_dict(ret, as_json)
74
- else:
75
-
76
- over = {}
77
- if explain:
78
- over["debug_identity"] = identity
79
- print("## Explaining search:")
80
-
81
- if user_call:
82
- p = permission_policy("read_draft", identity=identity, **over)
83
- else:
84
- p = permission_policy("read_deleted", identity=identity, **over)
85
- query_filters = p.query_filters
86
-
87
- print()
88
- print("## Query filters:")
89
- for qf in query_filters:
90
- dict_qf = qf.to_dict()
91
- if explain:
92
- dict_qf = merge_communities(dict_qf)
93
- dump_dict(dict_qf, as_json)
94
- print(json.dumps(dict_qf, indent=2))
95
-
96
-
97
- def merge_name(d):
98
- if isinstance(d, list):
99
- return [merge_name(x) for x in d]
100
- if isinstance(d, dict):
101
- ret = {}
102
- for k, v in d.items():
103
- v = merge_name(v)
104
- if isinstance(v, dict) and "_name" in v:
105
- _name = v.pop("_name")
106
- _name = _name.split("@")[0].strip()
107
- k = f"{k}[{_name}]"
108
- ret[k] = v
109
- return ret
110
- return d
111
-
112
-
113
- def dump_dict(d, as_json=False):
114
- if as_json:
115
- print(json.dumps(d, indent=2))
116
- else:
117
- yaml.safe_dump(
118
- merge_name(json.loads(json.dumps(d))),
119
- sys.stdout,
120
- default_flow_style=False,
121
- )
@@ -1,150 +0,0 @@
1
- import sys
2
- import traceback
3
-
4
- import click
5
- import yaml
6
- from flask.cli import with_appcontext
7
- from invenio_db import db
8
- from invenio_records import Record
9
- from invenio_records_resources.proxies import current_service_registry
10
- from tqdm import tqdm
11
-
12
- from .base import oarepo
13
-
14
- try:
15
- import json5 as json
16
- except ImportError:
17
- import json
18
-
19
- from io import StringIO
20
-
21
-
22
- class CheckOk(Exception):
23
- pass
24
-
25
-
26
- def dump_data(d):
27
- io = StringIO()
28
- yaml.safe_dump(d, io, allow_unicode=True)
29
- return io.getvalue()
30
-
31
-
32
- @oarepo.command(
33
- help="Validate a record. Takes one or two parameters - service name as "
34
- "the first one, file name or stdin with record data as the second"
35
- )
36
- @click.argument("service-name")
37
- @click.argument("record-file", required=False)
38
- @click.option("--community", help="Community name")
39
- @click.option("--verbose/--no-verbose", is_flag=True)
40
- @click.option("--with-stacktrace", is_flag=True)
41
- @click.option(
42
- "--fail-on-error",
43
- is_flag=True,
44
- help="Fail on the first error (for multiple records)",
45
- )
46
- @with_appcontext
47
- def validate(
48
- service_name, record_file, community, verbose, with_stacktrace, fail_on_error
49
- ):
50
- try:
51
- service = current_service_registry.get(service_name)
52
- except KeyError:
53
- click.secho(f"Service {service_name} not found. Existing services:")
54
- for existing in sorted(current_service_registry._services):
55
- click.secho(f" - {existing}")
56
- sys.exit(1)
57
-
58
- config = service.config
59
- schema = config.schema
60
-
61
- if not record_file:
62
- file_content = sys.stdin.read().strip()
63
- else:
64
- with open(record_file) as f:
65
- file_content = f.read()
66
-
67
- if file_content.startswith("{"):
68
- data = json.loads(file_content)
69
- else:
70
- data = list(yaml.safe_load_all(StringIO(file_content)))
71
-
72
- if not isinstance(data, list):
73
- data = [data]
74
-
75
- errors_count = 0
76
- for idx, d in enumerate(tqdm(data)):
77
- if community:
78
- d.setdefault("parent", {}).setdefault("communities", {})[
79
- "default"
80
- ] = community
81
- try:
82
- loaded = schema().load(d)
83
- except Exception as e:
84
- click.secho(
85
- f"Marshmallow validation of record idx {idx + 1} failed",
86
- fg="red",
87
- )
88
- click.secho(dump_data(d))
89
- click.secho(e)
90
- if with_stacktrace:
91
- traceback.print_exc()
92
- if fail_on_error:
93
- sys.exit(1)
94
- errors_count += 1
95
- continue
96
-
97
- if verbose:
98
- click.secho(
99
- f"Marshmallow validation of record idx {idx+1} has been successful",
100
- fg="green",
101
- )
102
-
103
- if hasattr(config, "draft_cls"):
104
- record_cls = config.draft_cls
105
- else:
106
- record_cls = config.record_cls
107
-
108
- # Run pre create extensions to check vocabularies
109
- try:
110
- with db.session.begin_nested():
111
-
112
- rec: Record = record_cls(
113
- loaded, model=record_cls.model_cls(id=None, data=data)
114
- )
115
- if record_cls.parent_record_cls:
116
- parent = record_cls.parent_record_cls(loaded["parent"])
117
- rec.parent = parent
118
-
119
- for e in rec._extensions:
120
- e.pre_commit(rec)
121
- raise CheckOk()
122
- except CheckOk:
123
- if verbose:
124
- click.secho(
125
- f"Pre-commit hook of record idx {idx+1} has been successful",
126
- fg="green",
127
- )
128
- except Exception as e:
129
- click.secho(
130
- f"Pre-commit validation of record idx {idx + 1} failed",
131
- fg="red",
132
- )
133
- click.secho(dump_data(d))
134
- click.secho(e)
135
- if with_stacktrace:
136
- traceback.print_exc()
137
- if fail_on_error:
138
- sys.exit(1)
139
- errors_count += 1
140
- continue
141
-
142
- if verbose:
143
- yaml.safe_dump(loaded, sys.stdout, allow_unicode=True)
144
-
145
- if errors_count:
146
- click.secho(f"Validation finished with {errors_count} errors", fg="red")
147
- sys.exit(1)
148
- else:
149
- click.secho("Validation finished successfully", fg="green")
150
- sys.exit(0)
@@ -1,38 +0,0 @@
1
- from .asynchronous import AsynchronousDataStream
2
- from .catalogue import DataStreamCatalogue
3
- from .datastreams import AbstractDataStream
4
- from .errors import (
5
- DataStreamCatalogueError,
6
- DataStreamError,
7
- ReaderError,
8
- TransformerError,
9
- WriterError,
10
- )
11
- from .json import JSON, JSONObject
12
- from .readers import BaseReader
13
- from .semi_asynchronous import SemiAsynchronousDataStream
14
- from .synchronous import SynchronousDataStream
15
- from .transformers import BaseTransformer
16
- from .types import DataStreamCallback, StreamBatch, StreamEntry
17
- from .writers import BaseWriter
18
-
19
- __all__ = [
20
- "JSONObject",
21
- "JSON",
22
- "StreamEntry",
23
- "DataStreamCatalogue",
24
- "BaseReader",
25
- "BaseWriter",
26
- "BaseTransformer",
27
- "DataStreamCatalogueError",
28
- "ReaderError",
29
- "WriterError",
30
- "TransformerError",
31
- "StreamBatch",
32
- "DataStreamError",
33
- "DataStreamCallback",
34
- "SynchronousDataStream",
35
- "AbstractDataStream",
36
- "AsynchronousDataStream",
37
- "SemiAsynchronousDataStream",
38
- ]
@@ -1,247 +0,0 @@
1
- import logging
2
- from typing import Any, Dict, List, Union
3
-
4
- import celery
5
- from celery.canvas import Signature as CelerySignature
6
- from celery.canvas import chain
7
- from celery.result import allow_join_result
8
- from flask_principal import (
9
- ActionNeed,
10
- Identity,
11
- ItemNeed,
12
- Need,
13
- RoleNeed,
14
- TypeNeed,
15
- UserNeed,
16
- )
17
-
18
- from oarepo_runtime.datastreams.datastreams import (
19
- AbstractDataStream,
20
- DataStreamChain,
21
- Signature,
22
- )
23
-
24
- from .datastreams import DataStreamCallback, StreamBatch
25
- from .json import JSONObject
26
- from .types import StreamEntryError
27
- from .writers import BaseWriter
28
-
29
- timing = logging.getLogger("oai.harvester.timing")
30
- log = logging.getLogger("datastreams")
31
-
32
-
33
- class AsynchronousDataStream(AbstractDataStream):
34
- def __init__(
35
- self,
36
- *,
37
- readers: List[Union[Signature, Any]],
38
- writers: List[Union[Signature, Any]],
39
- transformers: List[Union[Signature, Any]] = None,
40
- callback: Union[DataStreamCallback, Any],
41
- batch_size=100,
42
- on_background=True,
43
- reader_callback=None,
44
- ):
45
- super().__init__(
46
- readers=readers,
47
- writers=writers,
48
- transformers=transformers,
49
- callback=callback,
50
- batch_size=batch_size,
51
- reader_callback=reader_callback,
52
- )
53
- self._on_background = on_background
54
-
55
- def build_chain(self, identity) -> DataStreamChain:
56
- return AsynchronousDataStreamChain(
57
- transformers=self._transformers,
58
- writers=self._writers,
59
- on_background=self._on_background,
60
- identity=identity,
61
- )
62
-
63
- def _reader_error(self, reader, exception):
64
- self._callback.apply(
65
- kwargs={
66
- "callback": f"reader_error",
67
- "exception": StreamEntryError.from_exception(exception).json,
68
- }
69
- )
70
-
71
-
72
- class AsynchronousDataStreamChain(DataStreamChain):
73
- def __init__(
74
- self,
75
- identity: Identity,
76
- transformers: List[Signature],
77
- writers: List[Signature],
78
- on_background=True,
79
- ):
80
- self._transformers = transformers
81
- self._writers = writers
82
- self._on_background = on_background
83
- self._identity = identity
84
-
85
- def process(self, batch: StreamBatch, callback: CelerySignature):
86
- chain = self._prepare_chain(callback)
87
- self._call(chain, batch=batch.json)
88
-
89
- def _prepare_chain(self, callback: CelerySignature):
90
- chain_def = [
91
- datastreams_call_callback.signature(
92
- (), kwargs={"callback": callback, "callback_name": "batch_started"}
93
- )
94
- ]
95
- serialized_identity = serialize_identity(self._identity)
96
- if self._transformers:
97
- for transformer in self._transformers:
98
- chain_def.append(
99
- run_datastream_processor.signature(
100
- kwargs={
101
- "processor": transformer.json,
102
- "identity": serialized_identity,
103
- "callback": callback,
104
- }
105
- )
106
- )
107
-
108
- for writer in self._writers:
109
- chain_def.append(
110
- run_datastream_processor.signature(
111
- kwargs={
112
- "processor": writer.json,
113
- "identity": serialized_identity,
114
- "callback": callback,
115
- }
116
- )
117
- )
118
-
119
- chain_def.append(
120
- datastreams_call_callback.signature(
121
- (),
122
- kwargs=dict(
123
- callback=callback,
124
- callback_name="batch_finished",
125
- identity=serialized_identity,
126
- ),
127
- )
128
- )
129
-
130
- chain_sig = chain(*chain_def)
131
- chain_sig.link_error(
132
- datastreams_error_callback.signature(
133
- (),
134
- kwargs=dict(
135
- callback=callback,
136
- callback_name="error",
137
- identity=serialized_identity,
138
- ),
139
- )
140
- )
141
- return chain_sig
142
-
143
- def _call(self, sig, **kwargs):
144
- if self._on_background:
145
- call = sig.apply_async
146
- else:
147
- call = sig.apply
148
- call([], kwargs)
149
-
150
- def finish(self, callback: Signature):
151
- "nothing to finish here, dumpers needing finish (such as file dumpers) are not supported in async"
152
-
153
-
154
- @celery.shared_task
155
- def run_datastream_processor(batch: Dict, *, processor: JSONObject, identity, callback):
156
- identity = deserialize_identity(identity)
157
- processor_signature = Signature.from_json(processor)
158
- deserialized_batch: StreamBatch = StreamBatch.from_json(batch)
159
-
160
- processor = processor_signature.resolve(identity=identity)
161
- try:
162
- if isinstance(processor, BaseWriter):
163
- deserialized_batch = (
164
- processor.write(deserialized_batch) or deserialized_batch
165
- )
166
- else:
167
- deserialized_batch = (
168
- processor.apply(deserialized_batch) or deserialized_batch
169
- )
170
-
171
- except Exception as ex:
172
- log.exception("Error processing batch inside %s", processor_signature)
173
-
174
- err = StreamEntryError.from_exception(ex)
175
- deserialized_batch.errors.append(err)
176
- callback.apply(
177
- (),
178
- {
179
- "batch": deserialized_batch.json,
180
- "identity": serialize_identity(identity),
181
- "callback": f"{processor_signature.kind.value}_error",
182
- "exception": err.json,
183
- },
184
- )
185
- return deserialized_batch.json
186
-
187
-
188
- @celery.shared_task
189
- def datastreams_call_callback(
190
- batch: Dict, *, identity=None, callback, callback_name, **kwargs
191
- ):
192
- callback = CelerySignature(callback)
193
- callback.apply(
194
- kwargs=dict(batch=batch, identity=identity, callback=callback_name, **kwargs)
195
- )
196
- return batch
197
-
198
-
199
- @celery.shared_task
200
- def datastreams_error_callback(
201
- parent_task_id, *, identity=None, callback, callback_name, **kwargs
202
- ):
203
- with allow_join_result():
204
- from celery import current_app
205
-
206
- result = current_app.AsyncResult(parent_task_id)
207
- result.get(propagate=False)
208
-
209
- callback = CelerySignature(callback)
210
- callback.apply(
211
- kwargs=dict(
212
- batch={},
213
- identity=identity,
214
- callback=callback_name,
215
- result=result.result,
216
- traceback=result.traceback,
217
- **kwargs,
218
- )
219
- )
220
-
221
-
222
- def serialize_identity(identity):
223
- return {
224
- "id": identity.id,
225
- "auth_type": identity.auth_type,
226
- "provides": [
227
- {"type": type(x).__name__, "params": x._asdict()} for x in identity.provides
228
- ],
229
- }
230
-
231
-
232
- def deserialize_identity(identity_dict):
233
- if identity_dict is None:
234
- return None
235
- ret = Identity(id=identity_dict["id"], auth_type=identity_dict["auth_type"])
236
- for provide in identity_dict["provides"]:
237
- clz = {
238
- "Need": Need,
239
- "UserNeed": UserNeed,
240
- "RoleNeed": RoleNeed,
241
- "TypeNeed": TypeNeed,
242
- "ActionNeed": ActionNeed,
243
- "ItemNeed": ItemNeed,
244
- }[provide["type"]]
245
-
246
- ret.provides.add(clz(**provide["params"]))
247
- return ret
@@ -1,150 +0,0 @@
1
- import dataclasses
2
- from pathlib import Path
3
- from typing import Iterator, List
4
-
5
- import yaml
6
- from flask import current_app
7
-
8
- from oarepo_runtime.datastreams.datastreams import Signature, SignatureKind
9
-
10
- from .errors import DataStreamCatalogueError
11
-
12
-
13
- @dataclasses.dataclass
14
- class CatalogueDataStream:
15
- stream_name: str
16
- readers: List[Signature]
17
- writers: List[Signature]
18
- transformers: List[Signature]
19
-
20
-
21
- class DataStreamCatalogue:
22
- def __init__(self, catalogue, content=None) -> None:
23
- """
24
- Catalogue of data streams. The catalogue contains a dict of:
25
- stream_name: stream_definition, where stream definition is an array of:
26
-
27
- - reader: reader_class
28
- <rest of parameters go to reader constructor>
29
- - transformer: transformer_class
30
- <rest of parameters go to transformer constructor>
31
- - writer: writer_class
32
- <rest of parameters go to writer constructor>
33
-
34
- If reader class is not passed and _source_ is, then the reader class will be taken from the
35
- DATASTREAMS_READERS_BY_EXTENSION config variable - map from file extension to reader class.
36
-
37
- If 'service' is passed, service writer will be used with this service
38
-
39
- Transformer class must always be passed.
40
- """
41
- self._catalogue_path = Path(catalogue)
42
- if content:
43
- self._catalogue = content
44
- else:
45
- with open(catalogue) as f:
46
- self._catalogue = yaml.safe_load(f)
47
-
48
- @property
49
- def path(self):
50
- return self._catalogue_path
51
-
52
- @property
53
- def directory(self):
54
- return self._catalogue_path.parent
55
-
56
- def get_datastreams(self) -> Iterator[CatalogueDataStream]:
57
- for stream_name in self._catalogue:
58
- yield self.get_datastream(stream_name)
59
-
60
- def __iter__(self):
61
- return iter(self._catalogue)
62
-
63
- def get_datastream(
64
- self,
65
- stream_name,
66
- ) -> CatalogueDataStream:
67
- stream_definition = self._catalogue[stream_name]
68
- readers = []
69
- transformers = []
70
- writers = []
71
- for entry in stream_definition:
72
- entry = {**entry}
73
- try:
74
- if "reader" in entry:
75
- readers.append(
76
- get_signature(
77
- "reader",
78
- entry,
79
- base_path=str(self.directory),
80
- )
81
- )
82
- elif "transformer" in entry:
83
- transformers.append(
84
- get_signature(
85
- "transformer",
86
- entry,
87
- base_path=str(self.directory),
88
- )
89
- )
90
- elif "writer" in entry:
91
- writers.append(
92
- get_signature(
93
- "writer",
94
- entry,
95
- base_path=str(self.directory),
96
- )
97
- )
98
- elif "source" in entry:
99
- readers.append(self.get_reader(entry))
100
- elif "service" in entry:
101
- writers.append(self.get_service_writer(entry))
102
- else:
103
- raise DataStreamCatalogueError(
104
- "Can not decide what this record is - reader, transformer or service?"
105
- )
106
- except DataStreamCatalogueError as e:
107
- e.entry = entry
108
- e.stream_name = stream_name
109
- raise e
110
- return CatalogueDataStream(
111
- stream_name=stream_name,
112
- readers=readers,
113
- transformers=transformers,
114
- writers=writers,
115
- )
116
-
117
- def get_reader(self, entry):
118
- entry = {**entry}
119
- if not entry.get("reader"):
120
- try:
121
- source = Path(entry["source"])
122
- ext = source.suffix[1:]
123
- reader_class = (
124
- current_app.config["DATASTREAMS_READERS_BY_EXTENSION"].get(ext)
125
- or current_app.config["DEFAULT_DATASTREAMS_READERS_BY_EXTENSION"][
126
- ext
127
- ]
128
- )
129
- entry["reader"] = reader_class
130
- except KeyError:
131
- raise DataStreamCatalogueError(
132
- f"Do not have loader for file {source} - extension {ext} not defined in DATASTREAMS_READERS_BY_EXTENSION config"
133
- )
134
- return get_signature(
135
- "reader",
136
- entry,
137
- base_path=str(self.directory),
138
- )
139
-
140
- def get_service_writer(self, entry):
141
- return Signature(
142
- SignatureKind("writer"),
143
- "service",
144
- kwargs={**entry, "base_path": str(self.directory)},
145
- )
146
-
147
-
148
- def get_signature(kind, entry, **kwargs):
149
- entry = {**entry, **kwargs}
150
- return Signature(kind=SignatureKind(kind), name=entry.pop(kind), kwargs=entry)