dara-core 1.19.1__py3-none-any.whl → 1.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dara/core/__init__.py +1 -0
- dara/core/auth/basic.py +13 -7
- dara/core/auth/definitions.py +2 -2
- dara/core/auth/utils.py +1 -1
- dara/core/base_definitions.py +7 -42
- dara/core/data_utils.py +16 -17
- dara/core/definitions.py +8 -8
- dara/core/interactivity/__init__.py +4 -0
- dara/core/interactivity/actions.py +20 -22
- dara/core/interactivity/any_data_variable.py +7 -135
- dara/core/interactivity/any_variable.py +1 -1
- dara/core/interactivity/client_variable.py +71 -0
- dara/core/interactivity/data_variable.py +8 -266
- dara/core/interactivity/derived_data_variable.py +6 -290
- dara/core/interactivity/derived_variable.py +335 -201
- dara/core/interactivity/filtering.py +29 -2
- dara/core/interactivity/loop_variable.py +2 -2
- dara/core/interactivity/non_data_variable.py +5 -68
- dara/core/interactivity/plain_variable.py +87 -14
- dara/core/interactivity/server_variable.py +325 -0
- dara/core/interactivity/state_variable.py +2 -2
- dara/core/interactivity/switch_variable.py +15 -15
- dara/core/interactivity/tabular_variable.py +94 -0
- dara/core/interactivity/url_variable.py +10 -90
- dara/core/internal/cache_store/cache_store.py +5 -20
- dara/core/internal/dependency_resolution.py +27 -69
- dara/core/internal/devtools.py +10 -3
- dara/core/internal/execute_action.py +9 -3
- dara/core/internal/multi_resource_lock.py +70 -0
- dara/core/internal/normalization.py +0 -5
- dara/core/internal/pandas_utils.py +105 -3
- dara/core/internal/pool/definitions.py +1 -1
- dara/core/internal/pool/task_pool.py +1 -1
- dara/core/internal/registries.py +3 -2
- dara/core/internal/registry.py +1 -1
- dara/core/internal/registry_lookup.py +5 -3
- dara/core/internal/routing.py +52 -121
- dara/core/internal/store.py +2 -29
- dara/core/internal/tasks.py +372 -182
- dara/core/internal/utils.py +25 -3
- dara/core/internal/websocket.py +1 -1
- dara/core/js_tooling/js_utils.py +2 -0
- dara/core/logging.py +10 -6
- dara/core/persistence.py +26 -4
- dara/core/umd/dara.core.umd.js +751 -1386
- dara/core/visual/dynamic_component.py +10 -13
- {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/METADATA +10 -10
- {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/RECORD +51 -47
- {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/LICENSE +0 -0
- {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/WHEEL +0 -0
- {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/entry_points.txt +0 -0
|
@@ -27,12 +27,16 @@ from typing import (
|
|
|
27
27
|
Generic,
|
|
28
28
|
List,
|
|
29
29
|
Optional,
|
|
30
|
+
Protocol,
|
|
31
|
+
Tuple,
|
|
30
32
|
TypeVar,
|
|
31
33
|
Union,
|
|
32
34
|
cast,
|
|
33
35
|
)
|
|
34
36
|
|
|
37
|
+
import anyio
|
|
35
38
|
from cachetools import LRUCache
|
|
39
|
+
from pandas import DataFrame
|
|
36
40
|
from pydantic import (
|
|
37
41
|
ConfigDict,
|
|
38
42
|
Field,
|
|
@@ -41,7 +45,7 @@ from pydantic import (
|
|
|
41
45
|
field_validator,
|
|
42
46
|
model_serializer,
|
|
43
47
|
)
|
|
44
|
-
from typing_extensions import TypedDict
|
|
48
|
+
from typing_extensions import TypedDict, runtime_checkable
|
|
45
49
|
|
|
46
50
|
from dara.core.base_definitions import (
|
|
47
51
|
BaseCachePolicy,
|
|
@@ -49,14 +53,17 @@ from dara.core.base_definitions import (
|
|
|
49
53
|
Cache,
|
|
50
54
|
CacheArgType,
|
|
51
55
|
CachedRegistryEntry,
|
|
56
|
+
NonTabularDataError,
|
|
52
57
|
PendingTask,
|
|
53
|
-
PendingValue,
|
|
54
58
|
)
|
|
55
59
|
from dara.core.interactivity.actions import TriggerVariable, assert_no_context
|
|
56
60
|
from dara.core.interactivity.any_variable import AnyVariable
|
|
57
|
-
from dara.core.interactivity.
|
|
61
|
+
from dara.core.interactivity.client_variable import ClientVariable
|
|
62
|
+
from dara.core.interactivity.filtering import FilterQuery, Pagination, apply_filters
|
|
58
63
|
from dara.core.internal.cache_store import CacheStore
|
|
59
64
|
from dara.core.internal.encoder_registry import deserialize
|
|
65
|
+
from dara.core.internal.multi_resource_lock import MultiResourceLock
|
|
66
|
+
from dara.core.internal.pandas_utils import DataResponse, append_index, build_data_response
|
|
60
67
|
from dara.core.internal.tasks import MetaTask, Task, TaskManager
|
|
61
68
|
from dara.core.internal.utils import get_cache_scope, run_user_handler
|
|
62
69
|
from dara.core.logging import dev_logger, eng_logger
|
|
@@ -64,6 +71,10 @@ from dara.core.metrics import RUNTIME_METRICS_TRACKER
|
|
|
64
71
|
|
|
65
72
|
VariableType = TypeVar('VariableType')
|
|
66
73
|
|
|
74
|
+
# Static lock for all DV computations, keyed by cache_key
|
|
75
|
+
# Explicitly not re-entrant, this prevents variable loops
|
|
76
|
+
DV_LOCK = MultiResourceLock()
|
|
77
|
+
|
|
67
78
|
# Global set to track force keys that have been encountered
|
|
68
79
|
# LRU with 2048 entries should be sufficient to not drop in-progress force keys
|
|
69
80
|
# but also not have to worry about memory leaks
|
|
@@ -80,7 +91,24 @@ class DerivedVariableResult(TypedDict):
|
|
|
80
91
|
value: Union[Any, BaseTask]
|
|
81
92
|
|
|
82
93
|
|
|
83
|
-
|
|
94
|
+
@runtime_checkable
|
|
95
|
+
class FilterResolver(Protocol):
|
|
96
|
+
async def __call__(
|
|
97
|
+
self, data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
|
|
98
|
+
) -> Tuple[DataFrame, int]: ...
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
async def default_filter_resolver(
|
|
102
|
+
data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
|
|
103
|
+
) -> Tuple[DataFrame, int]:
|
|
104
|
+
if not isinstance(data, DataFrame):
|
|
105
|
+
raise NonTabularDataError(
|
|
106
|
+
f'Default filter resolver expects a DataFrame to be returned from the DerivedVariable function, got {type(data)}'
|
|
107
|
+
)
|
|
108
|
+
return apply_filters(data, filters, pagination)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class DerivedVariable(ClientVariable, Generic[VariableType]):
|
|
84
112
|
"""
|
|
85
113
|
A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
|
|
86
114
|
variables with a python function. This is one of two primary ways that python logic can be embedded into the
|
|
@@ -89,6 +117,61 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
89
117
|
DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
|
|
90
118
|
structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
|
|
91
119
|
recalculated when necessary.
|
|
120
|
+
|
|
121
|
+
As a special case, DerivedVariables can be used for tabular data and retrieving its slice as a DataFrame. This functionality
|
|
122
|
+
is utilized by e.g. the built-in Table component. By default, when passing a DerivedVariable to a Table component, Dara
|
|
123
|
+
expects the resolver function to return a DataFrame or None. This behaviour can be customized by providing a custom `filter_resolver`.
|
|
124
|
+
This function will be invoked with the result of the main DerivedVariable function, as well as filters and pagination. It can be used
|
|
125
|
+
to e.g. retrieve a slice of data from an API endpoint or a database instead of retrieving the entire dataset and filtering it in-memory.
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
from typing import Optional
|
|
129
|
+
import httpx
|
|
130
|
+
import pandas as pd
|
|
131
|
+
from dara.core import DerivedVariable, Variable
|
|
132
|
+
from dara.core.interactivity.filtering import FilterQuery, Pagination
|
|
133
|
+
|
|
134
|
+
# Custom filter resolver for API-based filtering
|
|
135
|
+
async def api_filter_resolver(data, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None):
|
|
136
|
+
async with httpx.AsyncClient() as client:
|
|
137
|
+
# in this case data is a string url
|
|
138
|
+
response = await client.get(data, params={
|
|
139
|
+
# translates filters/pagination to API-specific query params
|
|
140
|
+
'filters': filters.dict() if filters else {},
|
|
141
|
+
'offset': pagination.offset if pagination else 0,
|
|
142
|
+
'limit': pagination.limit if pagination else 50
|
|
143
|
+
})
|
|
144
|
+
data = response.json()
|
|
145
|
+
# conform to the filter resolver API, return a tuple of (DataFrame, total_count)
|
|
146
|
+
return pd.DataFrame(data['results']), data['total_count']
|
|
147
|
+
|
|
148
|
+
# DerivedVariable with custom filtering
|
|
149
|
+
user_params = Variable({'dataset': 'experiments'})
|
|
150
|
+
derived_data = DerivedVariable(
|
|
151
|
+
lambda params: f"https://api.example.com/data/{params['dataset']}",
|
|
152
|
+
variables=[user_params],
|
|
153
|
+
filter_resolver=api_filter_resolver
|
|
154
|
+
)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
:param func: the function to derive a new value from the input variables.
|
|
158
|
+
:param variables: a set of input variables that will be passed to the deriving function
|
|
159
|
+
:param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
|
|
160
|
+
session, per user or to not cache at all
|
|
161
|
+
:param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
|
|
162
|
+
tasks, defaults to False
|
|
163
|
+
:param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
|
|
164
|
+
component to poll the backend and refresh itself every n seconds.
|
|
165
|
+
:param filter_resolver: an optional function to resolve the filter query for the derived variable. This can be
|
|
166
|
+
used to customize the way tabular data is resolved. This is invoked with the result of the main DerivedVariable function,
|
|
167
|
+
as well as filters and pagination. The function should return a DataFrame and total count.
|
|
168
|
+
:param deps: an optional array of variables, specifying which dependant variables changing should trigger a
|
|
169
|
+
recalculation of the derived variable
|
|
170
|
+
- `deps = None` - `func` is ran everytime (default behaviour),
|
|
171
|
+
- `deps = []` - `func` is ran once on initial startup,
|
|
172
|
+
- `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
|
|
173
|
+
- `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
|
|
174
|
+
:param uid: the unique identifier for this variable; if not provided a random one is generated
|
|
92
175
|
"""
|
|
93
176
|
|
|
94
177
|
cache: Optional[BaseCachePolicy]
|
|
@@ -97,11 +180,11 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
97
180
|
deps: Optional[List[AnyVariable]] = Field(validate_default=True)
|
|
98
181
|
nested: List[str] = Field(default_factory=list)
|
|
99
182
|
uid: str
|
|
100
|
-
model_config = ConfigDict(extra='forbid', use_enum_values=True)
|
|
183
|
+
model_config = ConfigDict(extra='forbid', use_enum_values=True, arbitrary_types_allowed=True)
|
|
101
184
|
|
|
102
185
|
def __init__(
|
|
103
186
|
self,
|
|
104
|
-
func: Callable[..., VariableType]
|
|
187
|
+
func: Union[Callable[..., VariableType], Callable[..., Awaitable[VariableType]]],
|
|
105
188
|
variables: List[AnyVariable],
|
|
106
189
|
cache: Optional[CacheArgType] = Cache.Type.GLOBAL,
|
|
107
190
|
run_as_task: bool = False,
|
|
@@ -109,33 +192,9 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
109
192
|
deps: Optional[List[AnyVariable]] = None,
|
|
110
193
|
uid: Optional[str] = None,
|
|
111
194
|
nested: Optional[List[str]] = None,
|
|
112
|
-
|
|
195
|
+
filter_resolver: Optional[FilterResolver] = None,
|
|
196
|
+
**kwargs,
|
|
113
197
|
):
|
|
114
|
-
"""
|
|
115
|
-
A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
|
|
116
|
-
variables with a python function. This is one of two primary ways that python logic can be embedded into the
|
|
117
|
-
application (the other being the @py_component decorator).
|
|
118
|
-
|
|
119
|
-
DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
|
|
120
|
-
structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
|
|
121
|
-
recalculated when necessary.
|
|
122
|
-
|
|
123
|
-
:param func: the function to derive a new value from the input variables.
|
|
124
|
-
:param variables: a set of input variables that will be passed to the deriving function
|
|
125
|
-
:param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
|
|
126
|
-
session, per user or to not cache at all
|
|
127
|
-
:param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
|
|
128
|
-
tasks, defaults to False
|
|
129
|
-
:param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
|
|
130
|
-
component to poll the backend and refresh itself every n seconds.
|
|
131
|
-
:param deps: an optional array of variables, specifying which dependant variables changing should trigger a
|
|
132
|
-
recalculation of the derived variable
|
|
133
|
-
- `deps = None` - `func` is ran everytime (default behaviour),
|
|
134
|
-
- `deps = []` - `func` is ran once on initial startup,
|
|
135
|
-
- `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
|
|
136
|
-
- `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
|
|
137
|
-
:param uid: the unique identifier for this variable; if not provided a random one is generated
|
|
138
|
-
"""
|
|
139
198
|
if nested is None:
|
|
140
199
|
nested = []
|
|
141
200
|
|
|
@@ -176,6 +235,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
176
235
|
polling_interval=polling_interval,
|
|
177
236
|
deps=deps,
|
|
178
237
|
nested=nested,
|
|
238
|
+
**kwargs,
|
|
179
239
|
)
|
|
180
240
|
|
|
181
241
|
# Import the registry of variables and register the function at import
|
|
@@ -194,12 +254,14 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
194
254
|
DerivedVariableRegistryEntry(
|
|
195
255
|
cache=cache,
|
|
196
256
|
func=func,
|
|
257
|
+
filter_resolver=filter_resolver,
|
|
197
258
|
polling_interval=polling_interval,
|
|
198
259
|
run_as_task=run_as_task,
|
|
199
260
|
uid=str(self.uid),
|
|
200
261
|
variables=variables,
|
|
201
262
|
deps=deps_indexes,
|
|
202
|
-
get_value=
|
|
263
|
+
get_value=DerivedVariable.get_value,
|
|
264
|
+
get_tabular_data=DerivedVariable.get_tabular_data,
|
|
203
265
|
),
|
|
204
266
|
)
|
|
205
267
|
|
|
@@ -351,6 +413,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
351
413
|
task_mgr: TaskManager,
|
|
352
414
|
args: List[Any],
|
|
353
415
|
force_key: Optional[str] = None,
|
|
416
|
+
_pin_result: bool = False,
|
|
354
417
|
) -> DerivedVariableResult:
|
|
355
418
|
"""
|
|
356
419
|
Get the value of this DerivedVariable. This method will check the main app store for an appropriate response
|
|
@@ -362,10 +425,18 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
362
425
|
:param task_mgr: task manager instance
|
|
363
426
|
:param args: the arguments to call the underlying function with
|
|
364
427
|
:param force_key: unique key for forced execution, if provided forces cache bypass
|
|
428
|
+
:param _pin_result: whether to pin the result in the store, used internally by derived data variables
|
|
365
429
|
"""
|
|
430
|
+
# dynamic import due to circular import
|
|
431
|
+
from dara.core.internal.dependency_resolution import (
|
|
432
|
+
is_forced,
|
|
433
|
+
resolve_dependency,
|
|
434
|
+
)
|
|
435
|
+
|
|
366
436
|
assert var_entry.func is not None, 'DerivedVariable function is not defined'
|
|
367
437
|
|
|
368
|
-
|
|
438
|
+
# Shortened UID used for logging
|
|
439
|
+
_uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
|
|
369
440
|
|
|
370
441
|
if var_entry.run_as_task:
|
|
371
442
|
from dara.core.internal.registries import utils_registry
|
|
@@ -375,212 +446,272 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
375
446
|
'Task module is not configured. Set config.task_module path to a tasks.py module to run a derived variable as task.'
|
|
376
447
|
)
|
|
377
448
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
_uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
|
|
381
|
-
|
|
382
|
-
# Extract and process nested derived variables
|
|
383
|
-
values = []
|
|
449
|
+
# Compute cache key first, before any other work
|
|
450
|
+
cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
|
|
384
451
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
eng_logger.info(
|
|
389
|
-
f'Derived Variable {_uid_short} get_value',
|
|
390
|
-
{'uid': var_entry.uid, 'args': args},
|
|
391
|
-
)
|
|
452
|
+
# Lock on this specific cache key for the entire computation
|
|
453
|
+
async with DV_LOCK.acquire(cache_key):
|
|
454
|
+
histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
|
|
392
455
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
values
|
|
456
|
+
with histogram.time():
|
|
457
|
+
# Extract and process nested derived variables
|
|
458
|
+
values: List[Any] = [None] * len(args)
|
|
396
459
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
)
|
|
402
|
-
|
|
403
|
-
# Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
|
|
404
|
-
# to an instance of the BaseModel class.
|
|
405
|
-
parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
|
|
406
|
-
|
|
407
|
-
dev_logger.debug(
|
|
408
|
-
f'DerivedVariable {_uid_short}',
|
|
409
|
-
'executing',
|
|
410
|
-
{'args': parsed_args, 'uid': var_entry.uid},
|
|
411
|
-
)
|
|
460
|
+
eng_logger.info(
|
|
461
|
+
f'Derived Variable {_uid_short} get_value',
|
|
462
|
+
{'uid': var_entry.uid, 'args': args},
|
|
463
|
+
)
|
|
412
464
|
|
|
413
|
-
|
|
414
|
-
|
|
465
|
+
# Whether one of the (grand?)children have been forced - is so, the parent should skip the cache as well
|
|
466
|
+
has_forced_child = False
|
|
415
467
|
|
|
416
|
-
|
|
417
|
-
|
|
468
|
+
async def _resolve_arg(val: Any, index: int):
|
|
469
|
+
nonlocal has_forced_child
|
|
418
470
|
|
|
419
|
-
|
|
471
|
+
if is_forced(val):
|
|
472
|
+
has_forced_child = True
|
|
473
|
+
var_value = await resolve_dependency(val, store, task_mgr)
|
|
474
|
+
values[index] = var_value
|
|
420
475
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
if force_key in _force_keys_seen:
|
|
425
|
-
# This force key has been seen before, don't force again
|
|
426
|
-
effective_force = False
|
|
427
|
-
eng_logger.debug(
|
|
428
|
-
f'DerivedVariable {_uid_short} force key already seen, using cached value',
|
|
429
|
-
extra={'uid': var_entry.uid, 'force_key': force_key},
|
|
430
|
-
)
|
|
431
|
-
else:
|
|
432
|
-
# First time seeing this force key, add it to the set
|
|
433
|
-
_force_keys_seen[force_key] = True
|
|
434
|
-
eng_logger.debug(
|
|
435
|
-
f'DerivedVariable {_uid_short} new force key, will force recalculation',
|
|
436
|
-
extra={'uid': var_entry.uid, 'force_key': force_key},
|
|
437
|
-
)
|
|
476
|
+
async with anyio.create_task_group() as tg:
|
|
477
|
+
for idx, val in enumerate(args):
|
|
478
|
+
tg.start_soon(_resolve_arg, val, idx)
|
|
438
479
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
# cache_type = CacheType.SESSION
|
|
445
|
-
|
|
446
|
-
eng_logger.debug(
|
|
447
|
-
f'DerivedVariable {_uid_short}',
|
|
448
|
-
f'using cache: {cache_type}',
|
|
449
|
-
{'uid': var_entry.uid},
|
|
450
|
-
)
|
|
480
|
+
eng_logger.debug(
|
|
481
|
+
f'DerivedVariable {_uid_short}',
|
|
482
|
+
'resolved arguments',
|
|
483
|
+
{'values': values, 'uid': var_entry.uid},
|
|
484
|
+
)
|
|
451
485
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
value = VALUE_MISSING
|
|
486
|
+
# Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
|
|
487
|
+
# to an instance of the BaseModel class.
|
|
488
|
+
parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
|
|
456
489
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
or effective_force
|
|
462
|
-
)
|
|
463
|
-
if not ignore_cache:
|
|
464
|
-
try:
|
|
465
|
-
value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
|
|
466
|
-
eng_logger.debug(
|
|
467
|
-
f'DerivedVariable {_uid_short}',
|
|
468
|
-
'retrieved value from cache',
|
|
469
|
-
{'uid': var_entry.uid, 'cached_value': value},
|
|
470
|
-
)
|
|
471
|
-
except KeyError:
|
|
472
|
-
eng_logger.debug(
|
|
473
|
-
f'DerivedVariable {_uid_short}',
|
|
474
|
-
'no value found in cache',
|
|
475
|
-
{'uid': var_entry.uid},
|
|
476
|
-
)
|
|
477
|
-
# key error means no entry found;
|
|
478
|
-
# this lets us distinguish from a None value stored and not found
|
|
479
|
-
|
|
480
|
-
# If it's a PendingTask then return that task so it can be awaited later by a MetaTask
|
|
481
|
-
if isinstance(value, PendingTask):
|
|
482
|
-
eng_logger.info(
|
|
483
|
-
f'DerivedVariable {_uid_short} waiting for pending task',
|
|
484
|
-
{'uid': var_entry.uid, 'pending_task': value.task_id},
|
|
490
|
+
dev_logger.debug(
|
|
491
|
+
f'DerivedVariable {_uid_short}',
|
|
492
|
+
'executing',
|
|
493
|
+
{'args': parsed_args, 'uid': var_entry.uid},
|
|
485
494
|
)
|
|
486
|
-
value.add_subscriber()
|
|
487
|
-
return {'cache_key': cache_key, 'value': value}
|
|
488
495
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
496
|
+
# Check if there are any Tasks to be run in the args
|
|
497
|
+
has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
|
|
498
|
+
|
|
499
|
+
await DerivedVariable.add_latest_value(store, var_entry, cache_key)
|
|
500
|
+
|
|
501
|
+
cache_type = var_entry.cache
|
|
502
|
+
|
|
503
|
+
# Handle force key tracking to prevent double execution
|
|
504
|
+
effective_force = force_key is not None
|
|
505
|
+
if force_key is not None:
|
|
506
|
+
if force_key in _force_keys_seen:
|
|
507
|
+
# This force key has been seen before, don't force again
|
|
508
|
+
effective_force = False
|
|
509
|
+
eng_logger.debug(
|
|
510
|
+
f'DerivedVariable {_uid_short} force key already seen, using cached value',
|
|
511
|
+
extra={'uid': var_entry.uid, 'force_key': force_key},
|
|
512
|
+
)
|
|
513
|
+
else:
|
|
514
|
+
# First time seeing this force key, add it to the set
|
|
515
|
+
_force_keys_seen[force_key] = True
|
|
516
|
+
eng_logger.debug(
|
|
517
|
+
f'DerivedVariable {_uid_short} new force key, will force recalculation',
|
|
518
|
+
extra={'uid': var_entry.uid, 'force_key': force_key},
|
|
519
|
+
)
|
|
499
520
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
{'uid': var_entry.uid, 'cached_value': value},
|
|
521
|
+
eng_logger.debug(
|
|
522
|
+
f'DerivedVariable {_uid_short}',
|
|
523
|
+
f'using cache: {cache_type}',
|
|
524
|
+
{'uid': var_entry.uid},
|
|
505
525
|
)
|
|
506
|
-
return {'cache_key': cache_key, 'value': value}
|
|
507
526
|
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
527
|
+
# Start with a sentinel value to indicate that the value is missing
|
|
528
|
+
# from cache, this lets us distinguish between a cache miss and a
|
|
529
|
+
# value that is None
|
|
530
|
+
value = VALUE_MISSING
|
|
531
|
+
|
|
532
|
+
ignore_cache = (
|
|
533
|
+
var_entry.cache is None
|
|
534
|
+
or var_entry.polling_interval
|
|
535
|
+
or DerivedVariable.check_polling(var_entry.variables)
|
|
536
|
+
or effective_force
|
|
537
|
+
or has_forced_child
|
|
538
|
+
)
|
|
539
|
+
if not ignore_cache:
|
|
540
|
+
try:
|
|
541
|
+
value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
|
|
542
|
+
eng_logger.debug(
|
|
543
|
+
f'DerivedVariable {_uid_short}',
|
|
544
|
+
'retrieved value from cache',
|
|
545
|
+
{'uid': var_entry.uid, 'cached_value': value},
|
|
546
|
+
)
|
|
547
|
+
except KeyError:
|
|
548
|
+
eng_logger.debug(
|
|
549
|
+
f'DerivedVariable {_uid_short}',
|
|
550
|
+
'no value found in cache',
|
|
551
|
+
{'uid': var_entry.uid},
|
|
552
|
+
)
|
|
553
|
+
# key error means no entry found;
|
|
554
|
+
# this lets us distinguish from a None value stored and not found
|
|
555
|
+
|
|
556
|
+
# If it's a PendingTask then return that task so it can be awaited later by a MetaTask
|
|
557
|
+
if isinstance(value, PendingTask):
|
|
558
|
+
eng_logger.info(
|
|
559
|
+
f'DerivedVariable {_uid_short} waiting for pending task',
|
|
560
|
+
{'uid': var_entry.uid, 'pending_task': value.task_id},
|
|
561
|
+
)
|
|
562
|
+
return {'cache_key': cache_key, 'value': value}
|
|
511
563
|
|
|
512
|
-
|
|
513
|
-
|
|
564
|
+
# We retrieved an actual value from the cache, return it
|
|
565
|
+
if not ignore_cache and value is not VALUE_MISSING:
|
|
566
|
+
eng_logger.info(
|
|
567
|
+
f'DerivedVariable {_uid_short} returning cached value directly',
|
|
568
|
+
{'uid': var_entry.uid, 'cached_value': value},
|
|
569
|
+
)
|
|
570
|
+
return {'cache_key': cache_key, 'value': value}
|
|
571
|
+
|
|
572
|
+
# Setup pending task if it needs it and then return the task
|
|
573
|
+
if var_entry.run_as_task or has_tasks:
|
|
574
|
+
var_uid = var_entry.uid or str(uuid.uuid4())
|
|
575
|
+
|
|
576
|
+
if has_tasks:
|
|
577
|
+
task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
|
|
578
|
+
|
|
579
|
+
extra_notify_channels = [
|
|
580
|
+
channel
|
|
581
|
+
for arg in parsed_args
|
|
582
|
+
if isinstance(arg, BaseTask)
|
|
583
|
+
for channel in arg.notify_channels
|
|
584
|
+
]
|
|
585
|
+
eng_logger.debug(
|
|
586
|
+
f'DerivedVariable {_uid_short}',
|
|
587
|
+
'running has tasks',
|
|
588
|
+
{'uid': var_entry.uid, 'task_id': task_id},
|
|
589
|
+
)
|
|
590
|
+
meta_task = MetaTask(
|
|
591
|
+
var_entry.func,
|
|
592
|
+
parsed_args,
|
|
593
|
+
notify_channels=list(set(extra_notify_channels)),
|
|
594
|
+
process_as_task=var_entry.run_as_task,
|
|
595
|
+
cache_key=cache_key,
|
|
596
|
+
task_id=task_id,
|
|
597
|
+
reg_entry=var_entry, # task results are set as the DV result
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
# Immediately store the pending task in the store
|
|
601
|
+
pending_task = task_mgr.register_task(meta_task)
|
|
602
|
+
await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
|
|
603
|
+
|
|
604
|
+
return {'cache_key': cache_key, 'value': meta_task}
|
|
605
|
+
|
|
606
|
+
task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
|
|
514
607
|
|
|
515
|
-
extra_notify_channels = [
|
|
516
|
-
channel for arg in parsed_args if isinstance(arg, BaseTask) for channel in arg.notify_channels
|
|
517
|
-
]
|
|
518
608
|
eng_logger.debug(
|
|
519
609
|
f'DerivedVariable {_uid_short}',
|
|
520
|
-
'running
|
|
610
|
+
'running as a task',
|
|
521
611
|
{'uid': var_entry.uid, 'task_id': task_id},
|
|
522
612
|
)
|
|
523
|
-
|
|
613
|
+
task = Task(
|
|
524
614
|
var_entry.func,
|
|
525
615
|
parsed_args,
|
|
526
|
-
notify_channels=list(set(extra_notify_channels)),
|
|
527
|
-
process_as_task=var_entry.run_as_task,
|
|
528
616
|
cache_key=cache_key,
|
|
529
617
|
task_id=task_id,
|
|
530
618
|
reg_entry=var_entry, # task results are set as the DV result
|
|
531
619
|
)
|
|
532
620
|
|
|
533
|
-
|
|
621
|
+
# Immediately store the pending task in the store
|
|
622
|
+
pending_task = task_mgr.register_task(task)
|
|
623
|
+
await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
|
|
534
624
|
|
|
535
|
-
|
|
625
|
+
return {'cache_key': cache_key, 'value': task}
|
|
536
626
|
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
627
|
+
try:
|
|
628
|
+
result = await run_user_handler(var_entry.func, args=parsed_args)
|
|
629
|
+
except Exception:
|
|
630
|
+
# Delete the store value so subsequent requests recalculate instaed
|
|
631
|
+
if var_entry.cache is not None:
|
|
632
|
+
await store.delete(var_entry, key=cache_key)
|
|
633
|
+
raise
|
|
634
|
+
|
|
635
|
+
# If a task is returned then ensure we register it
|
|
636
|
+
if isinstance(result, BaseTask):
|
|
637
|
+
eng_logger.info(
|
|
638
|
+
f'DerivedVariable {_uid_short} returning task as a result',
|
|
639
|
+
{'uid': var_entry.uid, 'task_id': result.task_id},
|
|
640
|
+
)
|
|
641
|
+
# Make sure cache settings are set on the task
|
|
642
|
+
result.cache_key = cache_key
|
|
643
|
+
result.reg_entry = var_entry
|
|
550
644
|
|
|
551
|
-
|
|
552
|
-
if var_entry.cache is not None:
|
|
553
|
-
await store.set_pending(var_entry, key=cache_key)
|
|
645
|
+
task_mgr.register_task(result)
|
|
554
646
|
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
# Set the store value to None before raising, so subsequent requests don't hang on a PendingValue
|
|
647
|
+
return {'cache_key': cache_key, 'value': result}
|
|
648
|
+
|
|
649
|
+
# only set the value if cache is not None, otherwise subsequent requests calculate the value again
|
|
559
650
|
if var_entry.cache is not None:
|
|
560
|
-
await store.set(var_entry, key=cache_key, value=
|
|
561
|
-
raise
|
|
651
|
+
await store.set(var_entry, key=cache_key, value=result, pin=_pin_result)
|
|
562
652
|
|
|
563
|
-
# If a task is returned then update pending value to pending task and return it
|
|
564
|
-
if isinstance(result, BaseTask):
|
|
565
653
|
eng_logger.info(
|
|
566
|
-
f'DerivedVariable {_uid_short} returning
|
|
567
|
-
{'uid': var_entry.uid, '
|
|
654
|
+
f'DerivedVariable {_uid_short} returning result',
|
|
655
|
+
{'uid': var_entry.uid, 'result': result},
|
|
568
656
|
)
|
|
569
|
-
# Make sure cache settings are set on the task
|
|
570
|
-
result.cache_key = cache_key
|
|
571
|
-
result.reg_entry = var_entry
|
|
572
|
-
|
|
573
657
|
return {'cache_key': cache_key, 'value': result}
|
|
574
658
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
659
|
+
@classmethod
|
|
660
|
+
async def _filter_data(
|
|
661
|
+
cls,
|
|
662
|
+
data: Union[DataFrame, Any, None],
|
|
663
|
+
filter_resolver: FilterResolver,
|
|
664
|
+
filters: Optional[FilterQuery] = None,
|
|
665
|
+
pagination: Optional[Pagination] = None,
|
|
666
|
+
) -> DataResponse:
|
|
667
|
+
if data is None:
|
|
668
|
+
return DataResponse(data=None, count=0, schema=None)
|
|
669
|
+
|
|
670
|
+
# silently add the index column for DataFrame values
|
|
671
|
+
# User resolver could technically not be returning a DataFrame
|
|
672
|
+
if isinstance(data, DataFrame):
|
|
673
|
+
data = append_index(data)
|
|
674
|
+
|
|
675
|
+
# Filtering part
|
|
676
|
+
data, count = await filter_resolver(data, filters, pagination)
|
|
677
|
+
return build_data_response(data, count)
|
|
578
678
|
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
679
|
+
@classmethod
|
|
680
|
+
async def get_tabular_data(
|
|
681
|
+
cls,
|
|
682
|
+
var_entry: DerivedVariableRegistryEntry,
|
|
683
|
+
store: CacheStore,
|
|
684
|
+
task_mgr: TaskManager,
|
|
685
|
+
args: List[Any],
|
|
686
|
+
force_key: Optional[str] = None,
|
|
687
|
+
pagination: Optional[Pagination] = None,
|
|
688
|
+
filters: Optional[FilterQuery] = None,
|
|
689
|
+
) -> Union[MetaTask, DataResponse]:
|
|
690
|
+
"""
|
|
691
|
+
Get filtered tabular data from the underlying derived variable.
|
|
692
|
+
|
|
693
|
+
Resolves the the DeriedVariable and runs filtering on the result,
|
|
694
|
+
either using a custom filter_resolver or the default logic.
|
|
695
|
+
"""
|
|
696
|
+
filter_resolver = var_entry.filter_resolver or default_filter_resolver
|
|
697
|
+
result = await cls.get_value(var_entry, store, task_mgr, args, force_key)
|
|
698
|
+
|
|
699
|
+
if isinstance(result['value'], BaseTask):
|
|
700
|
+
task_id = f'{var_entry.uid}_Filter_MetaTask_{str(uuid.uuid4())}'
|
|
701
|
+
task = MetaTask(
|
|
702
|
+
cls._filter_data,
|
|
703
|
+
task_id=task_id,
|
|
704
|
+
kwargs={
|
|
705
|
+
'data': result['value'],
|
|
706
|
+
'filters': filters,
|
|
707
|
+
'pagination': pagination,
|
|
708
|
+
'filter_resolver': filter_resolver,
|
|
709
|
+
},
|
|
582
710
|
)
|
|
583
|
-
|
|
711
|
+
task_mgr.register_task(task)
|
|
712
|
+
return task
|
|
713
|
+
|
|
714
|
+
return await cls._filter_data(result['value'], filter_resolver, filters, pagination)
|
|
584
715
|
|
|
585
716
|
@classmethod
|
|
586
717
|
def check_polling(cls, variables: List[AnyVariable]):
|
|
@@ -604,12 +735,15 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
|
|
|
604
735
|
class DerivedVariableRegistryEntry(CachedRegistryEntry):
|
|
605
736
|
deps: Optional[List[int]]
|
|
606
737
|
func: Optional[Callable[..., Any]]
|
|
738
|
+
filter_resolver: Optional[FilterResolver]
|
|
607
739
|
run_as_task: bool
|
|
608
740
|
variables: List[AnyVariable]
|
|
609
741
|
polling_interval: Optional[int]
|
|
610
742
|
get_value: Callable[..., Awaitable[Any]]
|
|
611
743
|
"""Handler to get the value of the derived variable. Defaults to DerivedVariable.get_value, should match the signature"""
|
|
612
|
-
|
|
744
|
+
get_tabular_data: Callable[..., Awaitable[Union[DataResponse, MetaTask]]]
|
|
745
|
+
"""Handler to get the tabular data of the derived variable. Defaults to DerivedVariable.get_tabular_data, should match the signature"""
|
|
746
|
+
model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
|
|
613
747
|
|
|
614
748
|
|
|
615
749
|
class LatestValueRegistryEntry(CachedRegistryEntry):
|