esd-services-api-client 2.1.2__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- esd_services_api_client/_version.py +1 -1
- esd_services_api_client/nexus/README.md +62 -37
- esd_services_api_client/nexus/abstractions/algrorithm_cache.py +100 -0
- esd_services_api_client/nexus/abstractions/input_object.py +63 -0
- esd_services_api_client/nexus/abstractions/nexus_object.py +11 -9
- esd_services_api_client/nexus/algorithms/_baseline_algorithm.py +6 -4
- esd_services_api_client/nexus/algorithms/_remote_algorithm.py +118 -0
- esd_services_api_client/nexus/algorithms/forked_algorithm.py +124 -0
- esd_services_api_client/nexus/algorithms/minimalistic.py +5 -1
- esd_services_api_client/nexus/algorithms/recursive.py +5 -1
- esd_services_api_client/nexus/core/app_core.py +5 -0
- esd_services_api_client/nexus/core/app_dependencies.py +20 -1
- esd_services_api_client/nexus/exceptions/cache_errors.py +49 -0
- esd_services_api_client/nexus/exceptions/startup_error.py +15 -0
- esd_services_api_client/nexus/input/__init__.py +0 -1
- esd_services_api_client/nexus/input/input_processor.py +11 -58
- esd_services_api_client/nexus/input/input_reader.py +9 -5
- {esd_services_api_client-2.1.2.dist-info → esd_services_api_client-2.2.0.dist-info}/METADATA +1 -1
- {esd_services_api_client-2.1.2.dist-info → esd_services_api_client-2.2.0.dist-info}/RECORD +21 -17
- esd_services_api_client/nexus/input/_functions.py +0 -89
- {esd_services_api_client-2.1.2.dist-info → esd_services_api_client-2.2.0.dist-info}/LICENSE +0 -0
- {esd_services_api_client-2.1.2.dist-info → esd_services_api_client-2.2.0.dist-info}/WHEEL +0 -0
@@ -1 +1 @@
|
|
1
|
-
__version__ = '2.
|
1
|
+
__version__ = '2.2.0'
|
@@ -28,7 +28,9 @@ from adapta.storage.query_enabled_store import QueryEnabledStore
|
|
28
28
|
from dataclasses_json import DataClassJsonMixin
|
29
29
|
from injector import inject
|
30
30
|
|
31
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
31
32
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
33
|
+
from esd_services_api_client.nexus.abstractions.nexus_object import AlgorithmResult
|
32
34
|
from esd_services_api_client.nexus.abstractions.socket_provider import (
|
33
35
|
ExternalSocketProvider,
|
34
36
|
)
|
@@ -36,7 +38,6 @@ from esd_services_api_client.nexus.configurations.algorithm_configuration import
|
|
36
38
|
NexusConfiguration,
|
37
39
|
)
|
38
40
|
from esd_services_api_client.nexus.core.app_core import Nexus
|
39
|
-
from esd_services_api_client.nexus.abstractions.nexus_object import AlgorithmResult
|
40
41
|
from esd_services_api_client.nexus.algorithms import MinimalisticAlgorithm
|
41
42
|
from esd_services_api_client.nexus.input import InputReader, InputProcessor
|
42
43
|
|
@@ -127,12 +128,6 @@ class MockRequestHandler(BaseHTTPRequestHandler):
|
|
127
128
|
|
128
129
|
|
129
130
|
class XReader(InputReader[MyAlgorithmPayload, pandas.DataFrame]):
|
130
|
-
async def _context_open(self):
|
131
|
-
pass
|
132
|
-
|
133
|
-
async def _context_close(self):
|
134
|
-
pass
|
135
|
-
|
136
131
|
@inject
|
137
132
|
def __init__(
|
138
133
|
self,
|
@@ -141,7 +136,8 @@ class XReader(InputReader[MyAlgorithmPayload, pandas.DataFrame]):
|
|
141
136
|
logger_factory: LoggerFactory,
|
142
137
|
payload: MyAlgorithmPayload,
|
143
138
|
socket_provider: ExternalSocketProvider,
|
144
|
-
*readers: "InputReader"
|
139
|
+
*readers: "InputReader",
|
140
|
+
cache: InputCache
|
145
141
|
):
|
146
142
|
super().__init__(
|
147
143
|
socket=socket_provider.socket("x"),
|
@@ -149,10 +145,11 @@ class XReader(InputReader[MyAlgorithmPayload, pandas.DataFrame]):
|
|
149
145
|
metrics_provider=metrics_provider,
|
150
146
|
logger_factory=logger_factory,
|
151
147
|
payload=payload,
|
148
|
+
cache=cache,
|
152
149
|
*readers
|
153
150
|
)
|
154
151
|
|
155
|
-
async def _read_input(self) -> pandas.DataFrame:
|
152
|
+
async def _read_input(self, **_) -> pandas.DataFrame:
|
156
153
|
self._logger.info(
|
157
154
|
"Payload: {payload}; Socket path: {socket_path}",
|
158
155
|
payload=self._payload.to_json(),
|
@@ -162,12 +159,6 @@ class XReader(InputReader[MyAlgorithmPayload, pandas.DataFrame]):
|
|
162
159
|
|
163
160
|
|
164
161
|
class YReader(InputReader[MyAlgorithmPayload2, pandas.DataFrame]):
|
165
|
-
async def _context_open(self):
|
166
|
-
pass
|
167
|
-
|
168
|
-
async def _context_close(self):
|
169
|
-
pass
|
170
|
-
|
171
162
|
@inject
|
172
163
|
def __init__(
|
173
164
|
self,
|
@@ -176,7 +167,8 @@ class YReader(InputReader[MyAlgorithmPayload2, pandas.DataFrame]):
|
|
176
167
|
logger_factory: LoggerFactory,
|
177
168
|
payload: MyAlgorithmPayload2,
|
178
169
|
socket_provider: ExternalSocketProvider,
|
179
|
-
*readers: "InputReader"
|
170
|
+
*readers: "InputReader",
|
171
|
+
cache: InputCache
|
180
172
|
):
|
181
173
|
super().__init__(
|
182
174
|
socket=socket_provider.socket("y"),
|
@@ -184,10 +176,11 @@ class YReader(InputReader[MyAlgorithmPayload2, pandas.DataFrame]):
|
|
184
176
|
metrics_provider=metrics_provider,
|
185
177
|
logger_factory=logger_factory,
|
186
178
|
payload=payload,
|
179
|
+
cache=cache,
|
187
180
|
*readers
|
188
181
|
)
|
189
182
|
|
190
|
-
async def _read_input(self) -> pandas.DataFrame:
|
183
|
+
async def _read_input(self, **_) -> pandas.DataFrame:
|
191
184
|
self._logger.info(
|
192
185
|
"Payload: {payload}; Socket path: {socket_path}",
|
193
186
|
payload=self._payload.to_json(),
|
@@ -196,39 +189,59 @@ class YReader(InputReader[MyAlgorithmPayload2, pandas.DataFrame]):
|
|
196
189
|
return pandas.DataFrame([{"a": 10, "b": 12}, {"a": 11, "b": 13}])
|
197
190
|
|
198
191
|
|
199
|
-
class
|
200
|
-
|
201
|
-
|
192
|
+
class XProcessor(InputProcessor[MyAlgorithmPayload, pandas.DataFrame]):
|
193
|
+
@inject
|
194
|
+
def __init__(
|
195
|
+
self,
|
196
|
+
x: XReader,
|
197
|
+
metrics_provider: MetricsProvider,
|
198
|
+
logger_factory: LoggerFactory,
|
199
|
+
my_conf: MyAlgorithmConfiguration,
|
200
|
+
cache: InputCache,
|
201
|
+
):
|
202
|
+
super().__init__(
|
203
|
+
x,
|
204
|
+
metrics_provider=metrics_provider,
|
205
|
+
logger_factory=logger_factory,
|
206
|
+
payload=None,
|
207
|
+
cache=cache,
|
208
|
+
)
|
209
|
+
|
210
|
+
self.conf = my_conf
|
211
|
+
|
212
|
+
async def _process_input(
|
213
|
+
self, x: pandas.DataFrame, **_
|
214
|
+
) -> pandas.DataFrame:
|
215
|
+
self._logger.info("Config: {config}", config=self.conf.to_json())
|
216
|
+
return x.assign(c=[-1, 1])
|
202
217
|
|
203
|
-
async def _context_close(self):
|
204
|
-
pass
|
205
218
|
|
219
|
+
class YProcessor(InputProcessor[MyAlgorithmPayload, pandas.DataFrame]):
|
206
220
|
@inject
|
207
221
|
def __init__(
|
208
222
|
self,
|
209
|
-
x: XReader,
|
210
223
|
y: YReader,
|
211
224
|
metrics_provider: MetricsProvider,
|
212
225
|
logger_factory: LoggerFactory,
|
213
226
|
my_conf: MyAlgorithmConfiguration,
|
227
|
+
cache: InputCache,
|
214
228
|
):
|
215
229
|
super().__init__(
|
216
|
-
x,
|
217
230
|
y,
|
218
231
|
metrics_provider=metrics_provider,
|
219
232
|
logger_factory=logger_factory,
|
220
233
|
payload=None,
|
234
|
+
cache=cache,
|
221
235
|
)
|
222
236
|
|
223
237
|
self.conf = my_conf
|
224
238
|
|
225
|
-
async def
|
239
|
+
async def _process_input(
|
240
|
+
self, y: pandas.DataFrame, **_
|
241
|
+
) -> pandas.DataFrame:
|
226
242
|
self._logger.info("Config: {config}", config=self.conf.to_json())
|
227
|
-
|
228
|
-
|
229
|
-
"x_ready": inputs["x"].assign(c=[-1, 1]),
|
230
|
-
"y_ready": inputs["y"].assign(c=[-1, 1]),
|
231
|
-
}
|
243
|
+
return y.assign(c=[-1, 1])
|
244
|
+
|
232
245
|
|
233
246
|
@dataclass
|
234
247
|
class MyResult(AlgorithmResult):
|
@@ -240,8 +253,8 @@ class MyResult(AlgorithmResult):
|
|
240
253
|
|
241
254
|
def to_kwargs(self) -> dict[str, Any]:
|
242
255
|
pass
|
243
|
-
|
244
|
-
|
256
|
+
|
257
|
+
|
245
258
|
class MyAlgorithm(MinimalisticAlgorithm[MyAlgorithmPayload]):
|
246
259
|
async def _context_open(self):
|
247
260
|
pass
|
@@ -250,11 +263,22 @@ class MyAlgorithm(MinimalisticAlgorithm[MyAlgorithmPayload]):
|
|
250
263
|
pass
|
251
264
|
|
252
265
|
@inject
|
253
|
-
def __init__(
|
254
|
-
|
266
|
+
def __init__(
|
267
|
+
self,
|
268
|
+
metrics_provider: MetricsProvider,
|
269
|
+
logger_factory: LoggerFactory,
|
270
|
+
x_processor: XProcessor,
|
271
|
+
y_processor: YProcessor,
|
272
|
+
cache: InputCache,
|
273
|
+
):
|
274
|
+
super().__init__(
|
275
|
+
metrics_provider, logger_factory, x_processor, y_processor, cache=cache
|
276
|
+
)
|
255
277
|
|
256
|
-
async def _run(
|
257
|
-
|
278
|
+
async def _run(
|
279
|
+
self, x: pandas.DataFrame, y: pandas.DataFrame, **kwargs
|
280
|
+
) -> MyResult:
|
281
|
+
return MyResult(x, y)
|
258
282
|
|
259
283
|
|
260
284
|
async def main():
|
@@ -270,7 +294,8 @@ async def main():
|
|
270
294
|
await Nexus.create()
|
271
295
|
.add_reader(XReader)
|
272
296
|
.add_reader(YReader)
|
273
|
-
.use_processor(
|
297
|
+
.use_processor(XProcessor)
|
298
|
+
.use_processor(YProcessor)
|
274
299
|
.use_algorithm(MyAlgorithm)
|
275
300
|
.inject_configuration(MyAlgorithmConfiguration)
|
276
301
|
.inject_payload(MyAlgorithmPayload, MyAlgorithmPayload2)
|
@@ -0,0 +1,100 @@
|
|
1
|
+
"""
|
2
|
+
Simple in-memory cache for readers and processors
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
# you may not use this file except in compliance with the License.
|
9
|
+
# You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
+
# See the License for the specific language governing permissions and
|
17
|
+
# limitations under the License.
|
18
|
+
#
|
19
|
+
|
20
|
+
import asyncio
|
21
|
+
from typing import final, Type
|
22
|
+
|
23
|
+
import azure.core.exceptions
|
24
|
+
import deltalake
|
25
|
+
|
26
|
+
from esd_services_api_client.nexus.abstractions.input_object import InputObject
|
27
|
+
from esd_services_api_client.nexus.abstractions.nexus_object import TResult, TPayload
|
28
|
+
from esd_services_api_client.nexus.exceptions.cache_errors import (
|
29
|
+
FatalCachingError,
|
30
|
+
TransientCachingError,
|
31
|
+
)
|
32
|
+
|
33
|
+
|
34
|
+
@final
|
35
|
+
class InputCache:
|
36
|
+
"""
|
37
|
+
In-memory cache for Nexus input readers/processors
|
38
|
+
"""
|
39
|
+
|
40
|
+
def __init__(self):
|
41
|
+
self._cache: dict[str, TResult] = {}
|
42
|
+
|
43
|
+
def _resolve_exc_type(
|
44
|
+
self, ex: BaseException
|
45
|
+
) -> Type[FatalCachingError] | Type[TransientCachingError]:
|
46
|
+
"""
|
47
|
+
Resolve base exception into a specific Nexus exception.
|
48
|
+
"""
|
49
|
+
match type(ex):
|
50
|
+
case azure.core.exceptions.HttpResponseError, deltalake.PyDeltaTableError:
|
51
|
+
return TransientCachingError
|
52
|
+
case azure.core.exceptions.AzureError, azure.core.exceptions.ClientAuthenticationError:
|
53
|
+
return FatalCachingError
|
54
|
+
case _:
|
55
|
+
return FatalCachingError
|
56
|
+
|
57
|
+
async def resolve(
|
58
|
+
self,
|
59
|
+
*readers_or_processors: InputObject[TPayload, TResult],
|
60
|
+
**kwargs,
|
61
|
+
) -> dict[str, TResult]:
|
62
|
+
"""
|
63
|
+
Concurrently resolve `data` property of all readers by invoking their `read` method.
|
64
|
+
"""
|
65
|
+
|
66
|
+
def get_result(alias: str, completed_task: asyncio.Task) -> TResult:
|
67
|
+
object_exc = completed_task.exception()
|
68
|
+
if object_exc:
|
69
|
+
raise self._resolve_exc_type(object_exc)(alias) from object_exc
|
70
|
+
|
71
|
+
return completed_task.result()
|
72
|
+
|
73
|
+
async def _execute(nexus_input: InputObject) -> TResult:
|
74
|
+
async with nexus_input as instance:
|
75
|
+
result = await nexus_input.process(**kwargs)
|
76
|
+
|
77
|
+
self._cache[instance.cache_key()] = result
|
78
|
+
|
79
|
+
return result
|
80
|
+
|
81
|
+
cached = {
|
82
|
+
reader_or_processor.__class__.alias(): reader_or_processor.data
|
83
|
+
for reader_or_processor in readers_or_processors
|
84
|
+
if reader_or_processor.cache_key() in self._cache
|
85
|
+
}
|
86
|
+
if len(cached) == len(readers_or_processors):
|
87
|
+
return cached
|
88
|
+
|
89
|
+
read_tasks: dict[str, asyncio.Task] = {
|
90
|
+
reader.__class__.alias(): asyncio.create_task(_execute(reader))
|
91
|
+
for reader in readers_or_processors
|
92
|
+
if reader.cache_key() not in self._cache
|
93
|
+
}
|
94
|
+
|
95
|
+
if len(read_tasks) > 0:
|
96
|
+
await asyncio.wait(fs=read_tasks.values())
|
97
|
+
|
98
|
+
return {
|
99
|
+
alias: get_result(alias, task) for alias, task in read_tasks.items()
|
100
|
+
} | cached
|
@@ -0,0 +1,63 @@
|
|
1
|
+
"""
|
2
|
+
Base class for input reading/processing.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
# you may not use this file except in compliance with the License.
|
9
|
+
# You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
+
# See the License for the specific language governing permissions and
|
17
|
+
# limitations under the License.
|
18
|
+
#
|
19
|
+
|
20
|
+
import base64
|
21
|
+
import os
|
22
|
+
from abc import ABC, abstractmethod
|
23
|
+
|
24
|
+
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
25
|
+
TPayload,
|
26
|
+
TResult,
|
27
|
+
NexusObject,
|
28
|
+
)
|
29
|
+
|
30
|
+
|
31
|
+
class InputObject(NexusObject[TPayload, TResult], ABC):
|
32
|
+
"""
|
33
|
+
Base class for input processing and reader objects.
|
34
|
+
"""
|
35
|
+
|
36
|
+
async def _context_open(self):
|
37
|
+
"""
|
38
|
+
Optional actions to perform on context activation.
|
39
|
+
"""
|
40
|
+
|
41
|
+
async def _context_close(self):
|
42
|
+
"""
|
43
|
+
Optional actions to perform on context closure.
|
44
|
+
"""
|
45
|
+
|
46
|
+
def cache_key(self) -> str:
|
47
|
+
"""
|
48
|
+
Unique identifier for this Nexus object, can be used to in-memory or external caching.
|
49
|
+
"""
|
50
|
+
return f"{base64.b64encode(hex(id(self)).encode('utf-8')).decode('utf-8')}_{os.getpid()}_{self.__class__.__name__}"
|
51
|
+
|
52
|
+
@property
|
53
|
+
def data(self) -> TResult | None:
|
54
|
+
"""
|
55
|
+
Data bound to this object.
|
56
|
+
"""
|
57
|
+
return None
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
async def process(self, **kwargs) -> TResult:
|
61
|
+
"""
|
62
|
+
Executes input processing logic (read or transform)
|
63
|
+
"""
|
@@ -1,8 +1,6 @@
|
|
1
1
|
"""
|
2
2
|
Base classes for all objects used by Nexus.
|
3
3
|
"""
|
4
|
-
import re
|
5
|
-
|
6
4
|
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
7
5
|
#
|
8
6
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -20,11 +18,13 @@ import re
|
|
20
18
|
|
21
19
|
|
22
20
|
from abc import ABC, abstractmethod
|
21
|
+
import re
|
23
22
|
from typing import Generic, TypeVar, Union, Any
|
24
23
|
|
25
24
|
import pandas
|
26
25
|
import polars
|
27
26
|
from adapta.metrics import MetricsProvider
|
27
|
+
from dataclasses_json.stringcase import snakecase
|
28
28
|
|
29
29
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
30
30
|
|
@@ -92,11 +92,13 @@ class NexusObject(Generic[TPayload, TResult], ABC):
|
|
92
92
|
"""
|
93
93
|
Alias to identify this reader's output
|
94
94
|
"""
|
95
|
-
return
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
95
|
+
return snakecase(
|
96
|
+
re.sub(
|
97
|
+
r"(?<!^)(?=[A-Z])",
|
98
|
+
"_",
|
99
|
+
cls.__name__.lower()
|
100
|
+
.replace("reader", "")
|
101
|
+
.replace("processor", "")
|
102
|
+
.replace("algorithm", ""),
|
103
|
+
)
|
102
104
|
)
|
@@ -19,11 +19,12 @@
|
|
19
19
|
|
20
20
|
|
21
21
|
from abc import abstractmethod
|
22
|
-
from functools import
|
22
|
+
from functools import partial
|
23
23
|
|
24
24
|
from adapta.metrics import MetricsProvider
|
25
25
|
from adapta.utils.decorators import run_time_metrics_async
|
26
26
|
|
27
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
27
28
|
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
28
29
|
NexusObject,
|
29
30
|
TPayload,
|
@@ -32,7 +33,6 @@ from esd_services_api_client.nexus.abstractions.nexus_object import (
|
|
32
33
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
33
34
|
from esd_services_api_client.nexus.input.input_processor import (
|
34
35
|
InputProcessor,
|
35
|
-
resolve_processors,
|
36
36
|
)
|
37
37
|
|
38
38
|
|
@@ -46,9 +46,11 @@ class BaselineAlgorithm(NexusObject[TPayload, AlgorithmResult]):
|
|
46
46
|
metrics_provider: MetricsProvider,
|
47
47
|
logger_factory: LoggerFactory,
|
48
48
|
*input_processors: InputProcessor,
|
49
|
+
cache: InputCache,
|
49
50
|
):
|
50
51
|
super().__init__(metrics_provider, logger_factory)
|
51
52
|
self._input_processors = input_processors
|
53
|
+
self._cache = cache
|
52
54
|
|
53
55
|
@abstractmethod
|
54
56
|
async def _run(self, **kwargs) -> AlgorithmResult:
|
@@ -75,11 +77,11 @@ class BaselineAlgorithm(NexusObject[TPayload, AlgorithmResult]):
|
|
75
77
|
async def _measured_run(**run_args):
|
76
78
|
return await self._run(**run_args)
|
77
79
|
|
78
|
-
results = await
|
80
|
+
results = await self._cache.resolve(*self._input_processors, **kwargs)
|
79
81
|
|
80
82
|
return await partial(
|
81
83
|
_measured_run,
|
82
|
-
**
|
84
|
+
**results,
|
83
85
|
metric_tags=self._metric_tags,
|
84
86
|
metrics_provider=self._metrics_provider,
|
85
87
|
logger=self._logger,
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""
|
2
|
+
Remotely executed algorithm
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
# you may not use this file except in compliance with the License.
|
9
|
+
# You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
+
# See the License for the specific language governing permissions and
|
17
|
+
# limitations under the License.
|
18
|
+
#
|
19
|
+
|
20
|
+
|
21
|
+
from abc import abstractmethod
|
22
|
+
from functools import partial
|
23
|
+
|
24
|
+
from adapta.metrics import MetricsProvider
|
25
|
+
from adapta.utils.decorators import run_time_metrics_async
|
26
|
+
|
27
|
+
from esd_services_api_client.crystal import CrystalConnector, AlgorithmConfiguration
|
28
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
29
|
+
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
30
|
+
NexusObject,
|
31
|
+
TPayload,
|
32
|
+
AlgorithmResult,
|
33
|
+
)
|
34
|
+
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
35
|
+
from esd_services_api_client.nexus.input.input_processor import (
|
36
|
+
InputProcessor,
|
37
|
+
)
|
38
|
+
from esd_services_api_client.nexus.input.payload_reader import AlgorithmPayload
|
39
|
+
|
40
|
+
|
41
|
+
class RemoteAlgorithm(NexusObject[TPayload, AlgorithmResult]):
|
42
|
+
"""
|
43
|
+
Base class for all algorithm implementations.
|
44
|
+
"""
|
45
|
+
|
46
|
+
def __init__(
|
47
|
+
self,
|
48
|
+
metrics_provider: MetricsProvider,
|
49
|
+
logger_factory: LoggerFactory,
|
50
|
+
remote_client: CrystalConnector,
|
51
|
+
remote_name: str,
|
52
|
+
remote_config: AlgorithmConfiguration,
|
53
|
+
*input_processors: InputProcessor,
|
54
|
+
cache: InputCache,
|
55
|
+
):
|
56
|
+
super().__init__(metrics_provider, logger_factory)
|
57
|
+
self._input_processors = input_processors
|
58
|
+
self._remote_client = remote_client
|
59
|
+
self._remote_name = remote_name
|
60
|
+
self._remote_config = remote_config
|
61
|
+
self._cache = cache
|
62
|
+
|
63
|
+
@abstractmethod
|
64
|
+
def _generate_tag(self) -> str:
|
65
|
+
"""
|
66
|
+
Generates a submission tag.
|
67
|
+
"""
|
68
|
+
|
69
|
+
@abstractmethod
|
70
|
+
def _transform_submission_result(
|
71
|
+
self, request_id: str, tag: str
|
72
|
+
) -> AlgorithmResult:
|
73
|
+
"""
|
74
|
+
Called after submitting a remote run. Use this to enrich your output with remote run id and tag.
|
75
|
+
"""
|
76
|
+
|
77
|
+
@abstractmethod
|
78
|
+
async def _run(self, **kwargs) -> AlgorithmPayload:
|
79
|
+
"""
|
80
|
+
Core logic for this algorithm. Implementing this method is mandatory.
|
81
|
+
"""
|
82
|
+
|
83
|
+
@property
|
84
|
+
def _metric_tags(self) -> dict[str, str]:
|
85
|
+
return {"algorithm": self.__class__.alias()}
|
86
|
+
|
87
|
+
async def run(self, **kwargs) -> AlgorithmResult:
|
88
|
+
"""
|
89
|
+
Coroutine that executes the algorithm logic.
|
90
|
+
"""
|
91
|
+
|
92
|
+
@run_time_metrics_async(
|
93
|
+
metric_name="algorthm_run",
|
94
|
+
on_finish_message_template="Launched a new remote {algorithm} in {elapsed:.2f}s seconds",
|
95
|
+
template_args={
|
96
|
+
"algorithm": self.__class__.alias().upper(),
|
97
|
+
},
|
98
|
+
)
|
99
|
+
async def _measured_run(**run_args) -> AlgorithmResult:
|
100
|
+
payload = await self._run(**run_args)
|
101
|
+
tag = self._generate_tag()
|
102
|
+
request_id = self._remote_client.create_run(
|
103
|
+
algorithm=self._remote_name,
|
104
|
+
payload=payload.to_dict(),
|
105
|
+
custom_config=self._remote_config,
|
106
|
+
tag=tag,
|
107
|
+
)
|
108
|
+
return self._transform_submission_result(request_id, tag)
|
109
|
+
|
110
|
+
results = await self._cache.resolve(*self._input_processors, **kwargs)
|
111
|
+
|
112
|
+
return await partial(
|
113
|
+
_measured_run,
|
114
|
+
**results,
|
115
|
+
metric_tags=self._metric_tags,
|
116
|
+
metrics_provider=self._metrics_provider,
|
117
|
+
logger=self._logger,
|
118
|
+
)()
|
@@ -0,0 +1,124 @@
|
|
1
|
+
"""
|
2
|
+
Remotely executed algorithm
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
# you may not use this file except in compliance with the License.
|
9
|
+
# You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
+
# See the License for the specific language governing permissions and
|
17
|
+
# limitations under the License.
|
18
|
+
#
|
19
|
+
|
20
|
+
import asyncio
|
21
|
+
from abc import abstractmethod
|
22
|
+
from functools import partial
|
23
|
+
|
24
|
+
from adapta.metrics import MetricsProvider
|
25
|
+
from adapta.utils.decorators import run_time_metrics_async
|
26
|
+
|
27
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
28
|
+
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
29
|
+
NexusObject,
|
30
|
+
TPayload,
|
31
|
+
AlgorithmResult,
|
32
|
+
)
|
33
|
+
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
34
|
+
from esd_services_api_client.nexus.algorithms._remote_algorithm import RemoteAlgorithm
|
35
|
+
from esd_services_api_client.nexus.input.input_processor import (
|
36
|
+
InputProcessor,
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
class ForkedAlgorithm(NexusObject[TPayload, AlgorithmResult]):
|
41
|
+
"""
|
42
|
+
Forked algorithm is an algorithm that returns a result (main scenario run) and then fires off one or more forked runs
|
43
|
+
with different configurations as specified in fork class implementation.
|
44
|
+
|
45
|
+
Forked algorithm only awaits scheduling of forked runs, but never their results.
|
46
|
+
|
47
|
+
Q: How do I spawn a ForkedAlgorithm run as a remote algorithm w/o ending in an infinite loop?
|
48
|
+
A: Provide class names for forks from your algorithm configuration and construct forks with locate(fork_class)(**kwargs) calls.
|
49
|
+
|
50
|
+
Q: Can I build execution trees with this?
|
51
|
+
A: Yes, they will look like this (F(N) - Forked with N forks):
|
52
|
+
|
53
|
+
graph TB
|
54
|
+
F3["F(3)"] --> F2["F(2)"]
|
55
|
+
F3 --> F0["F(0)"]
|
56
|
+
F3 --> F1["F(1)"]
|
57
|
+
F2 --> F1_1["F(1)"]
|
58
|
+
F2 --> F0_1["F(0)"]
|
59
|
+
F1 --> F0_2["F(0)"]
|
60
|
+
F1_1 --> F0_3["F(0)"]
|
61
|
+
"""
|
62
|
+
|
63
|
+
def __init__(
|
64
|
+
self,
|
65
|
+
metrics_provider: MetricsProvider,
|
66
|
+
logger_factory: LoggerFactory,
|
67
|
+
forks: list[RemoteAlgorithm],
|
68
|
+
*input_processors: InputProcessor,
|
69
|
+
cache: InputCache,
|
70
|
+
):
|
71
|
+
super().__init__(metrics_provider, logger_factory)
|
72
|
+
self._input_processors = input_processors
|
73
|
+
self._forks = forks
|
74
|
+
self._cache = cache
|
75
|
+
|
76
|
+
@abstractmethod
|
77
|
+
async def _run(self, **kwargs) -> AlgorithmResult:
|
78
|
+
"""
|
79
|
+
Core logic for this algorithm. Implementing this method is mandatory.
|
80
|
+
"""
|
81
|
+
|
82
|
+
@property
|
83
|
+
def _metric_tags(self) -> dict[str, str]:
|
84
|
+
return {"algorithm": self.__class__.alias()}
|
85
|
+
|
86
|
+
async def run(self, **kwargs) -> AlgorithmResult:
|
87
|
+
"""
|
88
|
+
Coroutine that executes the algorithm logic.
|
89
|
+
"""
|
90
|
+
|
91
|
+
@run_time_metrics_async(
|
92
|
+
metric_name="algorthm_run",
|
93
|
+
on_finish_message_template="Finished running algorithm {algorithm} in {elapsed:.2f}s seconds",
|
94
|
+
template_args={
|
95
|
+
"algorithm": self.__class__.alias().upper(),
|
96
|
+
},
|
97
|
+
)
|
98
|
+
async def _measured_run(**run_args) -> AlgorithmResult:
|
99
|
+
return await self._run(**run_args)
|
100
|
+
|
101
|
+
if len(self._forks) > 0:
|
102
|
+
self._logger.info(
|
103
|
+
"This algorithm has forks attached: {forks}. They will be executed after the main run",
|
104
|
+
forks=",".join([fork.alias() for fork in self._forks]),
|
105
|
+
)
|
106
|
+
else:
|
107
|
+
self._logger.info(
|
108
|
+
"This algorithm supports forks but none were injected. Proceeding with a main run only"
|
109
|
+
)
|
110
|
+
|
111
|
+
results = await self._cache.resolve(*self._input_processors, **kwargs)
|
112
|
+
|
113
|
+
run_result = await partial(
|
114
|
+
_measured_run,
|
115
|
+
**results,
|
116
|
+
metric_tags=self._metric_tags,
|
117
|
+
metrics_provider=self._metrics_provider,
|
118
|
+
logger=self._logger,
|
119
|
+
)()
|
120
|
+
|
121
|
+
# now await callback scheduling
|
122
|
+
await asyncio.wait([fork.run(**kwargs) for fork in self._forks])
|
123
|
+
|
124
|
+
return run_result
|
@@ -22,6 +22,7 @@ from abc import ABC
|
|
22
22
|
from adapta.metrics import MetricsProvider
|
23
23
|
from injector import inject
|
24
24
|
|
25
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
25
26
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
26
27
|
from esd_services_api_client.nexus.abstractions.nexus_object import TPayload
|
27
28
|
from esd_services_api_client.nexus.algorithms._baseline_algorithm import (
|
@@ -41,5 +42,8 @@ class MinimalisticAlgorithm(BaselineAlgorithm[TPayload], ABC):
|
|
41
42
|
metrics_provider: MetricsProvider,
|
42
43
|
logger_factory: LoggerFactory,
|
43
44
|
*input_processors: InputProcessor,
|
45
|
+
cache: InputCache,
|
44
46
|
):
|
45
|
-
super().__init__(
|
47
|
+
super().__init__(
|
48
|
+
metrics_provider, logger_factory, *input_processors, cache=cache
|
49
|
+
)
|
@@ -23,6 +23,7 @@ from abc import abstractmethod
|
|
23
23
|
from adapta.metrics import MetricsProvider
|
24
24
|
from injector import inject
|
25
25
|
|
26
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
26
27
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
27
28
|
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
28
29
|
TPayload,
|
@@ -45,8 +46,11 @@ class RecursiveAlgorithm(BaselineAlgorithm[TPayload]):
|
|
45
46
|
metrics_provider: MetricsProvider,
|
46
47
|
logger_factory: LoggerFactory,
|
47
48
|
*input_processors: InputProcessor,
|
49
|
+
cache: InputCache,
|
48
50
|
):
|
49
|
-
super().__init__(
|
51
|
+
super().__init__(
|
52
|
+
metrics_provider, logger_factory, *input_processors, cache=cache
|
53
|
+
)
|
50
54
|
|
51
55
|
@abstractmethod
|
52
56
|
async def _is_finished(self, **kwargs) -> bool:
|
@@ -31,6 +31,7 @@ import azure.core.exceptions
|
|
31
31
|
from adapta.process_communication import DataSocket
|
32
32
|
from adapta.storage.blob.base import StorageClient
|
33
33
|
from adapta.storage.models.format import DataFrameJsonSerializationFormat
|
34
|
+
from adapta.storage.query_enabled_store import QueryEnabledStore
|
34
35
|
from injector import Injector
|
35
36
|
|
36
37
|
import esd_services_api_client.nexus.exceptions
|
@@ -265,6 +266,10 @@ class Nexus:
|
|
265
266
|
if len(on_complete_tasks) > 0:
|
266
267
|
await asyncio.wait(on_complete_tasks)
|
267
268
|
|
269
|
+
# dispose of QES instance gracefully as it might hold open connections
|
270
|
+
qes = self._injector.get(QueryEnabledStore)
|
271
|
+
qes.close()
|
272
|
+
|
268
273
|
@classmethod
|
269
274
|
def create(cls) -> "Nexus":
|
270
275
|
"""
|
@@ -28,6 +28,7 @@ from adapta.storage.query_enabled_store import QueryEnabledStore
|
|
28
28
|
from injector import Module, singleton, provider
|
29
29
|
|
30
30
|
from esd_services_api_client.crystal import CrystalConnector
|
31
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
31
32
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
32
33
|
from esd_services_api_client.nexus.abstractions.socket_provider import (
|
33
34
|
ExternalSocketProvider,
|
@@ -111,7 +112,9 @@ class QueryEnabledStoreModule(Module):
|
|
111
112
|
"""
|
112
113
|
DI factory method.
|
113
114
|
"""
|
114
|
-
return QueryEnabledStore.from_string(
|
115
|
+
return QueryEnabledStore.from_string(
|
116
|
+
os.getenv("NEXUS__QES_CONNECTION_STRING"), lazy_init=False
|
117
|
+
)
|
115
118
|
|
116
119
|
|
117
120
|
@final
|
@@ -163,6 +166,21 @@ class ExternalSocketsModule(Module):
|
|
163
166
|
)
|
164
167
|
|
165
168
|
|
169
|
+
@final
|
170
|
+
class CacheModule(Module):
|
171
|
+
"""
|
172
|
+
Storage client module.
|
173
|
+
"""
|
174
|
+
|
175
|
+
@singleton
|
176
|
+
@provider
|
177
|
+
def provide(self) -> InputCache:
|
178
|
+
"""
|
179
|
+
Dependency provider.
|
180
|
+
"""
|
181
|
+
return InputCache()
|
182
|
+
|
183
|
+
|
166
184
|
@final
|
167
185
|
class ServiceConfigurator:
|
168
186
|
"""
|
@@ -176,6 +194,7 @@ class ServiceConfigurator:
|
|
176
194
|
QueryEnabledStoreModule(),
|
177
195
|
StorageClientModule(),
|
178
196
|
ExternalSocketsModule(),
|
197
|
+
CacheModule(),
|
179
198
|
]
|
180
199
|
|
181
200
|
@property
|
@@ -0,0 +1,49 @@
|
|
1
|
+
"""
|
2
|
+
Cache module exceptions.
|
3
|
+
"""
|
4
|
+
|
5
|
+
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
+
#
|
7
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
# you may not use this file except in compliance with the License.
|
9
|
+
# You may obtain a copy of the License at
|
10
|
+
#
|
11
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
#
|
13
|
+
# Unless required by applicable law or agreed to in writing, software
|
14
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
+
# See the License for the specific language governing permissions and
|
17
|
+
# limitations under the License.
|
18
|
+
#
|
19
|
+
|
20
|
+
from esd_services_api_client.nexus.exceptions._nexus_error import (
|
21
|
+
FatalNexusError,
|
22
|
+
TransientNexusError,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
class FatalCachingError(FatalNexusError):
|
27
|
+
"""
|
28
|
+
Cache-level exception that shuts down the Nexus.
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self, failed_object: str):
|
32
|
+
super().__init__()
|
33
|
+
self._failed_object = failed_object
|
34
|
+
|
35
|
+
def __str__(self) -> str:
|
36
|
+
return f"Nexus object with alias '{self._failed_object}' failed the caching operation that cannot be retried. Review traceback for more information"
|
37
|
+
|
38
|
+
|
39
|
+
class TransientCachingError(TransientNexusError):
|
40
|
+
"""
|
41
|
+
Cache-level exception that will initiate a retry with the Nexus re-activation.
|
42
|
+
"""
|
43
|
+
|
44
|
+
def __init__(self, failed_object):
|
45
|
+
super().__init__()
|
46
|
+
self._failed_object = failed_object
|
47
|
+
|
48
|
+
def __str__(self) -> str:
|
49
|
+
return f"Nexus object with alias '{self._failed_object}' failed the caching operation and it will be retried. Review traceback for more information"
|
@@ -1,6 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
App startup exceptions.
|
3
3
|
"""
|
4
|
+
from typing import Type
|
4
5
|
|
5
6
|
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
7
|
#
|
@@ -46,3 +47,17 @@ class FatalStartupConfigurationError(FatalNexusError):
|
|
46
47
|
|
47
48
|
def __str__(self) -> str:
|
48
49
|
return f"Algorithm initialization failed due to a missing configuration entry: {self._missing_entry}."
|
50
|
+
|
51
|
+
|
52
|
+
class FatalAlgorithmConfigurationError(FatalNexusError):
|
53
|
+
"""
|
54
|
+
Service configuration error that shuts down the Nexus.
|
55
|
+
"""
|
56
|
+
|
57
|
+
def __init__(self, message: str, algorithm_class: Type):
|
58
|
+
super().__init__()
|
59
|
+
self._message = message
|
60
|
+
self._type_name = str(algorithm_class)
|
61
|
+
|
62
|
+
def __str__(self) -> str:
|
63
|
+
return f"Algorithm {self._type_name} misconfigured: {self._message}."
|
@@ -1,7 +1,6 @@
|
|
1
1
|
"""
|
2
2
|
Input processing.
|
3
3
|
"""
|
4
|
-
import asyncio
|
5
4
|
|
6
5
|
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
7
6
|
#
|
@@ -25,22 +24,17 @@ from typing import Optional
|
|
25
24
|
from adapta.metrics import MetricsProvider
|
26
25
|
from adapta.utils.decorators import run_time_metrics_async
|
27
26
|
|
27
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
28
|
+
from esd_services_api_client.nexus.abstractions.input_object import InputObject
|
28
29
|
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
29
|
-
NexusObject,
|
30
30
|
TPayload,
|
31
31
|
TResult,
|
32
32
|
)
|
33
33
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
34
|
-
from esd_services_api_client.nexus.input._functions import (
|
35
|
-
resolve_readers,
|
36
|
-
resolve_reader_exc_type,
|
37
|
-
)
|
38
34
|
from esd_services_api_client.nexus.input.input_reader import InputReader
|
39
35
|
|
40
|
-
_processor_cache = {}
|
41
|
-
|
42
36
|
|
43
|
-
class InputProcessor(
|
37
|
+
class InputProcessor(InputObject[TPayload, TResult]):
|
44
38
|
"""
|
45
39
|
Base class for raw data processing into algorithm input.
|
46
40
|
"""
|
@@ -51,24 +45,23 @@ class InputProcessor(NexusObject[TPayload, TResult]):
|
|
51
45
|
payload: TPayload,
|
52
46
|
metrics_provider: MetricsProvider,
|
53
47
|
logger_factory: LoggerFactory,
|
48
|
+
cache: InputCache
|
54
49
|
):
|
55
50
|
super().__init__(metrics_provider, logger_factory)
|
56
51
|
self._readers = readers
|
57
52
|
self._payload = payload
|
58
53
|
self._result: Optional[TResult] = None
|
59
|
-
|
60
|
-
async def _read_input(self) -> dict[str, TResult]:
|
61
|
-
return await resolve_readers(*self._readers)
|
54
|
+
self._cache = cache
|
62
55
|
|
63
56
|
@property
|
64
|
-
def
|
57
|
+
def data(self) -> Optional[TResult]:
|
65
58
|
"""
|
66
59
|
Data returned by this processor
|
67
60
|
"""
|
68
61
|
return self._result
|
69
62
|
|
70
63
|
@abstractmethod
|
71
|
-
async def _process_input(self, **kwargs) ->
|
64
|
+
async def _process_input(self, **kwargs) -> TResult:
|
72
65
|
"""
|
73
66
|
Input processing logic. Implement this method to prepare data for your algorithm code.
|
74
67
|
"""
|
@@ -77,7 +70,7 @@ class InputProcessor(NexusObject[TPayload, TResult]):
|
|
77
70
|
def _metric_tags(self) -> dict[str, str]:
|
78
71
|
return {"processor": self.__class__.alias()}
|
79
72
|
|
80
|
-
async def
|
73
|
+
async def process(self, **kwargs) -> TResult:
|
81
74
|
"""
|
82
75
|
Input processing coroutine. Do not override this method.
|
83
76
|
"""
|
@@ -89,8 +82,9 @@ class InputProcessor(NexusObject[TPayload, TResult]):
|
|
89
82
|
"processor": self.__class__.alias().upper(),
|
90
83
|
},
|
91
84
|
)
|
92
|
-
async def _process(**_) ->
|
93
|
-
|
85
|
+
async def _process(**_) -> TResult:
|
86
|
+
readers = await self._cache.resolve(*self._readers)
|
87
|
+
return await self._process_input(**(kwargs | readers))
|
94
88
|
|
95
89
|
if self._result is None:
|
96
90
|
self._result = await partial(
|
@@ -101,44 +95,3 @@ class InputProcessor(NexusObject[TPayload, TResult]):
|
|
101
95
|
)()
|
102
96
|
|
103
97
|
return self._result
|
104
|
-
|
105
|
-
|
106
|
-
async def resolve_processors(
|
107
|
-
*processors: InputProcessor[TPayload, TResult], **kwargs
|
108
|
-
) -> dict[str, dict[str, TResult]]:
|
109
|
-
"""
|
110
|
-
Concurrently resolve `result` property of all processors by invoking their `process_input` method.
|
111
|
-
"""
|
112
|
-
|
113
|
-
def get_result(alias: str, completed_task: asyncio.Task) -> dict[str, TResult]:
|
114
|
-
reader_exc = completed_task.exception()
|
115
|
-
if reader_exc:
|
116
|
-
raise resolve_reader_exc_type(reader_exc)(alias, reader_exc) from reader_exc
|
117
|
-
|
118
|
-
return completed_task.result()
|
119
|
-
|
120
|
-
async def _process(input_processor: InputProcessor):
|
121
|
-
async with input_processor as instance:
|
122
|
-
result = await instance.process_input(**kwargs)
|
123
|
-
_processor_cache[input_processor.__class__.alias()] = result
|
124
|
-
return result
|
125
|
-
|
126
|
-
cached = {
|
127
|
-
processor.__class__.alias(): processor.result
|
128
|
-
for processor in processors
|
129
|
-
if processor.__class__.alias() in _processor_cache
|
130
|
-
}
|
131
|
-
if len(cached) == len(processors):
|
132
|
-
return cached
|
133
|
-
|
134
|
-
process_tasks: dict[str, asyncio.Task] = {
|
135
|
-
processor.__class__.alias(): asyncio.create_task(_process(processor))
|
136
|
-
for processor in processors
|
137
|
-
if processor.__class__.alias() not in _processor_cache
|
138
|
-
}
|
139
|
-
if len(process_tasks) > 0:
|
140
|
-
await asyncio.wait(fs=process_tasks.values())
|
141
|
-
|
142
|
-
return {
|
143
|
-
alias: get_result(alias, task) for alias, task in process_tasks.items()
|
144
|
-
} | cached
|
@@ -26,15 +26,16 @@ from adapta.process_communication import DataSocket
|
|
26
26
|
from adapta.storage.query_enabled_store import QueryEnabledStore
|
27
27
|
from adapta.utils.decorators import run_time_metrics_async
|
28
28
|
|
29
|
+
from esd_services_api_client.nexus.abstractions.algrorithm_cache import InputCache
|
30
|
+
from esd_services_api_client.nexus.abstractions.input_object import InputObject
|
29
31
|
from esd_services_api_client.nexus.abstractions.nexus_object import (
|
30
|
-
NexusObject,
|
31
32
|
TPayload,
|
32
33
|
TResult,
|
33
34
|
)
|
34
35
|
from esd_services_api_client.nexus.abstractions.logger_factory import LoggerFactory
|
35
36
|
|
36
37
|
|
37
|
-
class InputReader(
|
38
|
+
class InputReader(InputObject[TPayload, TResult]):
|
38
39
|
"""
|
39
40
|
Base class for a raw data reader.
|
40
41
|
"""
|
@@ -47,6 +48,7 @@ class InputReader(NexusObject[TPayload, TResult]):
|
|
47
48
|
payload: TPayload,
|
48
49
|
*readers: "InputReader",
|
49
50
|
socket: Optional[DataSocket] = None,
|
51
|
+
cache: InputCache
|
50
52
|
):
|
51
53
|
super().__init__(metrics_provider, logger_factory)
|
52
54
|
self.socket = socket
|
@@ -54,6 +56,7 @@ class InputReader(NexusObject[TPayload, TResult]):
|
|
54
56
|
self._data: Optional[TResult] = None
|
55
57
|
self._readers = readers
|
56
58
|
self._payload = payload
|
59
|
+
self._cache = cache
|
57
60
|
|
58
61
|
@property
|
59
62
|
def data(self) -> Optional[TResult]:
|
@@ -63,7 +66,7 @@ class InputReader(NexusObject[TPayload, TResult]):
|
|
63
66
|
return self._data
|
64
67
|
|
65
68
|
@abstractmethod
|
66
|
-
async def _read_input(self) -> TResult:
|
69
|
+
async def _read_input(self, **kwargs) -> TResult:
|
67
70
|
"""
|
68
71
|
Actual data reader logic. Implementing this method is mandatory for the reader to work
|
69
72
|
"""
|
@@ -72,7 +75,7 @@ class InputReader(NexusObject[TPayload, TResult]):
|
|
72
75
|
def _metric_tags(self) -> dict[str, str]:
|
73
76
|
return {"entity": self.__class__.alias()}
|
74
77
|
|
75
|
-
async def
|
78
|
+
async def process(self, **_) -> TResult:
|
76
79
|
"""
|
77
80
|
Coroutine that reads the data from external store and converts it to a dataframe, or generates data locally. Do not override this method.
|
78
81
|
"""
|
@@ -88,7 +91,8 @@ class InputReader(NexusObject[TPayload, TResult]):
|
|
88
91
|
| ({"data_path": self.socket.data_path} if self.socket else {}),
|
89
92
|
)
|
90
93
|
async def _read(**_) -> TResult:
|
91
|
-
|
94
|
+
readers = await self._cache.resolve(*self._readers)
|
95
|
+
return await self._read_input(**readers)
|
92
96
|
|
93
97
|
if self._data is None:
|
94
98
|
self._data = await partial(
|
@@ -1,5 +1,5 @@
|
|
1
1
|
esd_services_api_client/__init__.py,sha256=L-cEW1mVbnTJLCLG5V6Ucw7zBgx1zf0t1bYcQC1heyw,603
|
2
|
-
esd_services_api_client/_version.py,sha256=
|
2
|
+
esd_services_api_client/_version.py,sha256=Vyf6P6UCZKFeQtRzYujPmFfdlqSfnc01VEMWE3O0ZrA,22
|
3
3
|
esd_services_api_client/beast/__init__.py,sha256=zNhXcHSP5w4P9quM1XP4oXVJEccvC_VScG41TZ0GzZ8,723
|
4
4
|
esd_services_api_client/beast/v3/__init__.py,sha256=FtumtInoDyCCRE424Llqv8QZLRuwXzj-smyfu1od1nc,754
|
5
5
|
esd_services_api_client/beast/v3/_connector.py,sha256=WNmCiTXFRb3q56mrr7ZbqBHWDUxbfyWhiWlBFLUIOnc,11478
|
@@ -15,32 +15,36 @@ esd_services_api_client/crystal/__init__.py,sha256=oeyJjdQ9EpTnIq6XnjPq5v0DWPdHq
|
|
15
15
|
esd_services_api_client/crystal/_api_versions.py,sha256=GHbmV_5lP9fP72TZE0j_ZeQSeJjMRcRaBRxNJbz-MWQ,837
|
16
16
|
esd_services_api_client/crystal/_connector.py,sha256=wT8SahCkkRWPoHcUSLz0I-sjeK_9OSYtt07zJKie0CU,12875
|
17
17
|
esd_services_api_client/crystal/_models.py,sha256=OCaidMqipl-TA8VLs7v12BppyBR3QpRA5jU2KjZxU_Q,4030
|
18
|
-
esd_services_api_client/nexus/README.md,sha256=
|
18
|
+
esd_services_api_client/nexus/README.md,sha256=QQgvkhRwZtktxlzTmHkbp8KNnUvDvFGEVqvSM1QgpU8,9393
|
19
19
|
esd_services_api_client/nexus/__init__.py,sha256=sOgKKq3_LZGbLmQMtMS7lDw2hv027qownTmNIRV0BB8,627
|
20
20
|
esd_services_api_client/nexus/abstractions/__init__.py,sha256=sOgKKq3_LZGbLmQMtMS7lDw2hv027qownTmNIRV0BB8,627
|
21
|
+
esd_services_api_client/nexus/abstractions/algrorithm_cache.py,sha256=3Umb9bKsl8Yo5a3FMrdO_7JTk2mrYJf9MLR-_C0yzFo,3338
|
22
|
+
esd_services_api_client/nexus/abstractions/input_object.py,sha256=RUKnhekuZwd_RVvnLGAxHa4wYDFJf6wEwWQI9f-o0lM,1761
|
21
23
|
esd_services_api_client/nexus/abstractions/logger_factory.py,sha256=9biONvCqNrP__yrmeRkoDL05TMA5v-LyrcKwgiKG59U,2019
|
22
|
-
esd_services_api_client/nexus/abstractions/nexus_object.py,sha256=
|
24
|
+
esd_services_api_client/nexus/abstractions/nexus_object.py,sha256=P5lQ5jhIk4nTLESseBy-G5HPILpBd75PWykD12jn6eQ,2938
|
23
25
|
esd_services_api_client/nexus/abstractions/socket_provider.py,sha256=Rwa_aPErI4Es5AdyCd3EoGze7mg2D70u8kuc2UGEBaI,1729
|
24
26
|
esd_services_api_client/nexus/algorithms/__init__.py,sha256=yMvLFSqg5eUKOXI0zMFX69Ni0ibKQHOqAnrZsxQqhOo,903
|
25
|
-
esd_services_api_client/nexus/algorithms/_baseline_algorithm.py,sha256=
|
27
|
+
esd_services_api_client/nexus/algorithms/_baseline_algorithm.py,sha256=hGj_qNNNtz8DRhbyUDOG7ouuv8HmqAZsM-ccJKFXTH4,2739
|
28
|
+
esd_services_api_client/nexus/algorithms/_remote_algorithm.py,sha256=nQDQ2si-_-B2QdtBC8IwSM8YyNwfIhrCMto6g87BcnQ,3900
|
26
29
|
esd_services_api_client/nexus/algorithms/distributed.py,sha256=vkKSCsd480RKwrtu3uZ2iU1bh593fkgBcOBrcb9cLjA,1702
|
27
|
-
esd_services_api_client/nexus/algorithms/
|
28
|
-
esd_services_api_client/nexus/algorithms/
|
30
|
+
esd_services_api_client/nexus/algorithms/forked_algorithm.py,sha256=Y1BFCbEMmLFmQlvq0Ot_8RAlvSbFqvZFWa_RLIYvb2Y,4310
|
31
|
+
esd_services_api_client/nexus/algorithms/minimalistic.py,sha256=te8h2SQiAB8xn9OsGciZl51b_oROOodgHIX6408Lz2s,1607
|
32
|
+
esd_services_api_client/nexus/algorithms/recursive.py,sha256=uaCCl4q-st_KqbcmkdOJedJ0nAjbJvn6jdZEdW0_0ss,2007
|
29
33
|
esd_services_api_client/nexus/configurations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
30
34
|
esd_services_api_client/nexus/configurations/algorithm_configuration.py,sha256=eE7diX2PATCGkmqhvFOcZwXrr6vns4fqnJGmgNvhhZM,1091
|
31
35
|
esd_services_api_client/nexus/core/__init__.py,sha256=sOgKKq3_LZGbLmQMtMS7lDw2hv027qownTmNIRV0BB8,627
|
32
|
-
esd_services_api_client/nexus/core/app_core.py,sha256=
|
33
|
-
esd_services_api_client/nexus/core/app_dependencies.py,sha256=
|
36
|
+
esd_services_api_client/nexus/core/app_core.py,sha256=gs1oIwc9KEog46vOZp6g_JVzTa8LLfcCpuauTLGhYVM,9654
|
37
|
+
esd_services_api_client/nexus/core/app_dependencies.py,sha256=BVihH0gqfAGY851hzv4GLajC8eJx3zn8uYnOzW6-2_8,6521
|
34
38
|
esd_services_api_client/nexus/exceptions/__init__.py,sha256=feN33VdqB5-2bD9aJesJl_OlsKrNNo3hZCnQgKuaU9k,696
|
35
39
|
esd_services_api_client/nexus/exceptions/_nexus_error.py,sha256=QvtY38mNoIA6t26dUN6UIsaPfljhtVNsbQVS7ksMb-Q,895
|
40
|
+
esd_services_api_client/nexus/exceptions/cache_errors.py,sha256=IO_rBQKXfIRHHXQuC8kAHejgZZw9yvSJk5BPYBnDYbc,1622
|
36
41
|
esd_services_api_client/nexus/exceptions/input_reader_error.py,sha256=Chy8XW6Ien4-bkZZ1CmP8CWU49mi2hobS6L_R59ONs8,1765
|
37
|
-
esd_services_api_client/nexus/exceptions/startup_error.py,sha256=
|
38
|
-
esd_services_api_client/nexus/input/__init__.py,sha256=
|
39
|
-
esd_services_api_client/nexus/input/
|
40
|
-
esd_services_api_client/nexus/input/
|
41
|
-
esd_services_api_client/nexus/input/input_reader.py,sha256=MtZNdxjWLjoSqlSlwa6f7BIqsu-_GoQav5tCz2g6WrA,3214
|
42
|
+
esd_services_api_client/nexus/exceptions/startup_error.py,sha256=4Hughi57Ndi_a8YPkjPSxXWWsAZUA57Rwd-rPqwD8B8,1991
|
43
|
+
esd_services_api_client/nexus/input/__init__.py,sha256=ODYhZ791tPC4-eVxSRRlh8FLDDICU7nByLH7c4TD4Xc,758
|
44
|
+
esd_services_api_client/nexus/input/input_processor.py,sha256=vqzeQrtRFqBKTPSEiWX_JZJTF9itMwwvWjPnJVLrSwQ,3132
|
45
|
+
esd_services_api_client/nexus/input/input_reader.py,sha256=aXNMGxrdUX5RDYR666GSGkcZqYMFYoZ8zGVDuUFFFZQ,3505
|
42
46
|
esd_services_api_client/nexus/input/payload_reader.py,sha256=Kq0xN1Shyqv71v6YkcrqVTDbmsEjZc8ithsXYpyu87M,2516
|
43
|
-
esd_services_api_client-2.
|
44
|
-
esd_services_api_client-2.
|
45
|
-
esd_services_api_client-2.
|
46
|
-
esd_services_api_client-2.
|
47
|
+
esd_services_api_client-2.2.0.dist-info/LICENSE,sha256=0gS6zXsPp8qZhzi1xaGCIYPzb_0e8on7HCeFJe8fOpw,10693
|
48
|
+
esd_services_api_client-2.2.0.dist-info/METADATA,sha256=-FllzJEL9xB4GA_iM0zRfW3tmwuaAGbL_YhqQ7WfPys,1292
|
49
|
+
esd_services_api_client-2.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
50
|
+
esd_services_api_client-2.2.0.dist-info/RECORD,,
|
@@ -1,89 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Utility functions to handle input processing.
|
3
|
-
"""
|
4
|
-
|
5
|
-
# Copyright (c) 2023-2024. ECCO Sneaks & Data
|
6
|
-
#
|
7
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
-
# you may not use this file except in compliance with the License.
|
9
|
-
# You may obtain a copy of the License at
|
10
|
-
#
|
11
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
12
|
-
#
|
13
|
-
# Unless required by applicable law or agreed to in writing, software
|
14
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
15
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
16
|
-
# See the License for the specific language governing permissions and
|
17
|
-
# limitations under the License.
|
18
|
-
#
|
19
|
-
|
20
|
-
import asyncio
|
21
|
-
from typing import Union, Type
|
22
|
-
import azure.core.exceptions
|
23
|
-
import deltalake
|
24
|
-
|
25
|
-
from esd_services_api_client.nexus.abstractions.nexus_object import TResult, TPayload
|
26
|
-
from esd_services_api_client.nexus.exceptions.input_reader_error import (
|
27
|
-
FatalInputReaderError,
|
28
|
-
TransientInputReaderError,
|
29
|
-
)
|
30
|
-
from esd_services_api_client.nexus.input.input_reader import InputReader
|
31
|
-
|
32
|
-
|
33
|
-
_reader_cache = {}
|
34
|
-
|
35
|
-
|
36
|
-
def resolve_reader_exc_type(
|
37
|
-
ex: BaseException,
|
38
|
-
) -> Union[Type[FatalInputReaderError], Type[TransientInputReaderError]]:
|
39
|
-
"""
|
40
|
-
Resolve base exception into a specific Nexus exception.
|
41
|
-
"""
|
42
|
-
match type(ex):
|
43
|
-
case azure.core.exceptions.HttpResponseError, deltalake.PyDeltaTableError:
|
44
|
-
return TransientInputReaderError
|
45
|
-
case azure.core.exceptions.AzureError, azure.core.exceptions.ClientAuthenticationError:
|
46
|
-
return FatalInputReaderError
|
47
|
-
case _:
|
48
|
-
return FatalInputReaderError
|
49
|
-
|
50
|
-
|
51
|
-
async def resolve_readers(
|
52
|
-
*readers: InputReader[TPayload, TResult]
|
53
|
-
) -> dict[str, TResult]:
|
54
|
-
"""
|
55
|
-
Concurrently resolve `data` property of all readers by invoking their `read` method.
|
56
|
-
"""
|
57
|
-
|
58
|
-
def get_result(alias: str, completed_task: asyncio.Task) -> TResult:
|
59
|
-
reader_exc = completed_task.exception()
|
60
|
-
if reader_exc:
|
61
|
-
raise resolve_reader_exc_type(reader_exc)(alias, reader_exc) from reader_exc
|
62
|
-
|
63
|
-
return completed_task.result()
|
64
|
-
|
65
|
-
async def _read(input_reader: InputReader):
|
66
|
-
async with input_reader as instance:
|
67
|
-
result = await instance.read()
|
68
|
-
_reader_cache[input_reader.__class__.alias()] = result
|
69
|
-
return result
|
70
|
-
|
71
|
-
cached = {
|
72
|
-
reader.__class__.alias(): reader.data
|
73
|
-
for reader in readers
|
74
|
-
if reader.__class__.alias() in _reader_cache
|
75
|
-
}
|
76
|
-
if len(cached) == len(readers):
|
77
|
-
return cached
|
78
|
-
|
79
|
-
read_tasks: dict[str, asyncio.Task] = {
|
80
|
-
reader.__class__.alias(): asyncio.create_task(_read(reader))
|
81
|
-
for reader in readers
|
82
|
-
if reader.__class__.alias() not in _reader_cache
|
83
|
-
}
|
84
|
-
if len(read_tasks) > 0:
|
85
|
-
await asyncio.wait(fs=read_tasks.values())
|
86
|
-
|
87
|
-
return {
|
88
|
-
alias: get_result(alias, task) for alias, task in read_tasks.items()
|
89
|
-
} | cached
|
File without changes
|
File without changes
|