dara-core 1.19.1__py3-none-any.whl → 1.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. dara/core/__init__.py +1 -0
  2. dara/core/auth/basic.py +13 -7
  3. dara/core/auth/definitions.py +2 -2
  4. dara/core/auth/utils.py +1 -1
  5. dara/core/base_definitions.py +7 -42
  6. dara/core/data_utils.py +16 -17
  7. dara/core/definitions.py +8 -8
  8. dara/core/interactivity/__init__.py +4 -0
  9. dara/core/interactivity/actions.py +20 -22
  10. dara/core/interactivity/any_data_variable.py +7 -135
  11. dara/core/interactivity/any_variable.py +1 -1
  12. dara/core/interactivity/client_variable.py +71 -0
  13. dara/core/interactivity/data_variable.py +8 -266
  14. dara/core/interactivity/derived_data_variable.py +6 -290
  15. dara/core/interactivity/derived_variable.py +335 -201
  16. dara/core/interactivity/filtering.py +29 -2
  17. dara/core/interactivity/loop_variable.py +2 -2
  18. dara/core/interactivity/non_data_variable.py +5 -68
  19. dara/core/interactivity/plain_variable.py +87 -14
  20. dara/core/interactivity/server_variable.py +325 -0
  21. dara/core/interactivity/state_variable.py +2 -2
  22. dara/core/interactivity/switch_variable.py +15 -15
  23. dara/core/interactivity/tabular_variable.py +94 -0
  24. dara/core/interactivity/url_variable.py +10 -90
  25. dara/core/internal/cache_store/cache_store.py +5 -20
  26. dara/core/internal/dependency_resolution.py +27 -69
  27. dara/core/internal/devtools.py +10 -3
  28. dara/core/internal/execute_action.py +9 -3
  29. dara/core/internal/multi_resource_lock.py +70 -0
  30. dara/core/internal/normalization.py +0 -5
  31. dara/core/internal/pandas_utils.py +105 -3
  32. dara/core/internal/pool/definitions.py +1 -1
  33. dara/core/internal/pool/task_pool.py +1 -1
  34. dara/core/internal/registries.py +3 -2
  35. dara/core/internal/registry.py +1 -1
  36. dara/core/internal/registry_lookup.py +5 -3
  37. dara/core/internal/routing.py +52 -121
  38. dara/core/internal/store.py +2 -29
  39. dara/core/internal/tasks.py +372 -182
  40. dara/core/internal/utils.py +25 -3
  41. dara/core/internal/websocket.py +1 -1
  42. dara/core/js_tooling/js_utils.py +2 -0
  43. dara/core/logging.py +10 -6
  44. dara/core/persistence.py +26 -4
  45. dara/core/umd/dara.core.umd.js +751 -1386
  46. dara/core/visual/dynamic_component.py +10 -13
  47. {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/METADATA +10 -10
  48. {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/RECORD +51 -47
  49. {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/LICENSE +0 -0
  50. {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/WHEEL +0 -0
  51. {dara_core-1.19.1.dist-info → dara_core-1.20.0.dist-info}/entry_points.txt +0 -0
@@ -27,12 +27,16 @@ from typing import (
27
27
  Generic,
28
28
  List,
29
29
  Optional,
30
+ Protocol,
31
+ Tuple,
30
32
  TypeVar,
31
33
  Union,
32
34
  cast,
33
35
  )
34
36
 
37
+ import anyio
35
38
  from cachetools import LRUCache
39
+ from pandas import DataFrame
36
40
  from pydantic import (
37
41
  ConfigDict,
38
42
  Field,
@@ -41,7 +45,7 @@ from pydantic import (
41
45
  field_validator,
42
46
  model_serializer,
43
47
  )
44
- from typing_extensions import TypedDict
48
+ from typing_extensions import TypedDict, runtime_checkable
45
49
 
46
50
  from dara.core.base_definitions import (
47
51
  BaseCachePolicy,
@@ -49,14 +53,17 @@ from dara.core.base_definitions import (
49
53
  Cache,
50
54
  CacheArgType,
51
55
  CachedRegistryEntry,
56
+ NonTabularDataError,
52
57
  PendingTask,
53
- PendingValue,
54
58
  )
55
59
  from dara.core.interactivity.actions import TriggerVariable, assert_no_context
56
60
  from dara.core.interactivity.any_variable import AnyVariable
57
- from dara.core.interactivity.non_data_variable import NonDataVariable
61
+ from dara.core.interactivity.client_variable import ClientVariable
62
+ from dara.core.interactivity.filtering import FilterQuery, Pagination, apply_filters
58
63
  from dara.core.internal.cache_store import CacheStore
59
64
  from dara.core.internal.encoder_registry import deserialize
65
+ from dara.core.internal.multi_resource_lock import MultiResourceLock
66
+ from dara.core.internal.pandas_utils import DataResponse, append_index, build_data_response
60
67
  from dara.core.internal.tasks import MetaTask, Task, TaskManager
61
68
  from dara.core.internal.utils import get_cache_scope, run_user_handler
62
69
  from dara.core.logging import dev_logger, eng_logger
@@ -64,6 +71,10 @@ from dara.core.metrics import RUNTIME_METRICS_TRACKER
64
71
 
65
72
  VariableType = TypeVar('VariableType')
66
73
 
74
+ # Static lock for all DV computations, keyed by cache_key
75
+ # Explicitly not re-entrant, this prevents variable loops
76
+ DV_LOCK = MultiResourceLock()
77
+
67
78
  # Global set to track force keys that have been encountered
68
79
  # LRU with 2048 entries should be sufficient to not drop in-progress force keys
69
80
  # but also not have to worry about memory leaks
@@ -80,7 +91,24 @@ class DerivedVariableResult(TypedDict):
80
91
  value: Union[Any, BaseTask]
81
92
 
82
93
 
83
- class DerivedVariable(NonDataVariable, Generic[VariableType]):
94
+ @runtime_checkable
95
+ class FilterResolver(Protocol):
96
+ async def __call__(
97
+ self, data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
98
+ ) -> Tuple[DataFrame, int]: ...
99
+
100
+
101
+ async def default_filter_resolver(
102
+ data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
103
+ ) -> Tuple[DataFrame, int]:
104
+ if not isinstance(data, DataFrame):
105
+ raise NonTabularDataError(
106
+ f'Default filter resolver expects a DataFrame to be returned from the DerivedVariable function, got {type(data)}'
107
+ )
108
+ return apply_filters(data, filters, pagination)
109
+
110
+
111
+ class DerivedVariable(ClientVariable, Generic[VariableType]):
84
112
  """
85
113
  A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
86
114
  variables with a python function. This is one of two primary ways that python logic can be embedded into the
@@ -89,6 +117,61 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
89
117
  DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
90
118
  structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
91
119
  recalculated when necessary.
120
+
121
+ As a special case, DerivedVariables can be used for tabular data and retrieving its slice as a DataFrame. This functionality
122
+ is utilized by e.g. the built-in Table component. By default, when passing a DerivedVariable to a Table component, Dara
123
+ expects the resolver function to return a DataFrame or None. This behaviour can be customized by providing a custom `filter_resolver`.
124
+ This function will be invoked with the result of the main DerivedVariable function, as well as filters and pagination. It can be used
125
+ to e.g. retrieve a slice of data from an API endpoint or a database instead of retrieving the entire dataset and filtering it in-memory.
126
+
127
+ ```python
128
+ from typing import Optional
129
+ import httpx
130
+ import pandas as pd
131
+ from dara.core import DerivedVariable, Variable
132
+ from dara.core.interactivity.filtering import FilterQuery, Pagination
133
+
134
+ # Custom filter resolver for API-based filtering
135
+ async def api_filter_resolver(data, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None):
136
+ async with httpx.AsyncClient() as client:
137
+ # in this case data is a string url
138
+ response = await client.get(data, params={
139
+ # translates filters/pagination to API-specific query params
140
+ 'filters': filters.dict() if filters else {},
141
+ 'offset': pagination.offset if pagination else 0,
142
+ 'limit': pagination.limit if pagination else 50
143
+ })
144
+ data = response.json()
145
+ # conform to the filter resolver API, return a tuple of (DataFrame, total_count)
146
+ return pd.DataFrame(data['results']), data['total_count']
147
+
148
+ # DerivedVariable with custom filtering
149
+ user_params = Variable({'dataset': 'experiments'})
150
+ derived_data = DerivedVariable(
151
+ lambda params: f"https://api.example.com/data/{params['dataset']}",
152
+ variables=[user_params],
153
+ filter_resolver=api_filter_resolver
154
+ )
155
+ ```
156
+
157
+ :param func: the function to derive a new value from the input variables.
158
+ :param variables: a set of input variables that will be passed to the deriving function
159
+ :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
160
+ session, per user or to not cache at all
161
+ :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
162
+ tasks, defaults to False
163
+ :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
164
+ component to poll the backend and refresh itself every n seconds.
165
+ :param filter_resolver: an optional function to resolve the filter query for the derived variable. This can be
166
+ used to customize the way tabular data is resolved. This is invoked with the result of the main DerivedVariable function,
167
+ as well as filters and pagination. The function should return a DataFrame and total count.
168
+ :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
169
+ recalculation of the derived variable
170
+ - `deps = None` - `func` is ran everytime (default behaviour),
171
+ - `deps = []` - `func` is ran once on initial startup,
172
+ - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
173
+ - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
174
+ :param uid: the unique identifier for this variable; if not provided a random one is generated
92
175
  """
93
176
 
94
177
  cache: Optional[BaseCachePolicy]
@@ -97,11 +180,11 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
97
180
  deps: Optional[List[AnyVariable]] = Field(validate_default=True)
98
181
  nested: List[str] = Field(default_factory=list)
99
182
  uid: str
100
- model_config = ConfigDict(extra='forbid', use_enum_values=True)
183
+ model_config = ConfigDict(extra='forbid', use_enum_values=True, arbitrary_types_allowed=True)
101
184
 
102
185
  def __init__(
103
186
  self,
104
- func: Callable[..., VariableType] | Callable[..., Awaitable[VariableType]],
187
+ func: Union[Callable[..., VariableType], Callable[..., Awaitable[VariableType]]],
105
188
  variables: List[AnyVariable],
106
189
  cache: Optional[CacheArgType] = Cache.Type.GLOBAL,
107
190
  run_as_task: bool = False,
@@ -109,33 +192,9 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
109
192
  deps: Optional[List[AnyVariable]] = None,
110
193
  uid: Optional[str] = None,
111
194
  nested: Optional[List[str]] = None,
112
- _get_value: Optional[Callable[..., Awaitable[Any]]] = None,
195
+ filter_resolver: Optional[FilterResolver] = None,
196
+ **kwargs,
113
197
  ):
114
- """
115
- A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
116
- variables with a python function. This is one of two primary ways that python logic can be embedded into the
117
- application (the other being the @py_component decorator).
118
-
119
- DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
120
- structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
121
- recalculated when necessary.
122
-
123
- :param func: the function to derive a new value from the input variables.
124
- :param variables: a set of input variables that will be passed to the deriving function
125
- :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
126
- session, per user or to not cache at all
127
- :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
128
- tasks, defaults to False
129
- :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
130
- component to poll the backend and refresh itself every n seconds.
131
- :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
132
- recalculation of the derived variable
133
- - `deps = None` - `func` is ran everytime (default behaviour),
134
- - `deps = []` - `func` is ran once on initial startup,
135
- - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
136
- - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
137
- :param uid: the unique identifier for this variable; if not provided a random one is generated
138
- """
139
198
  if nested is None:
140
199
  nested = []
141
200
 
@@ -176,6 +235,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
176
235
  polling_interval=polling_interval,
177
236
  deps=deps,
178
237
  nested=nested,
238
+ **kwargs,
179
239
  )
180
240
 
181
241
  # Import the registry of variables and register the function at import
@@ -194,12 +254,14 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
194
254
  DerivedVariableRegistryEntry(
195
255
  cache=cache,
196
256
  func=func,
257
+ filter_resolver=filter_resolver,
197
258
  polling_interval=polling_interval,
198
259
  run_as_task=run_as_task,
199
260
  uid=str(self.uid),
200
261
  variables=variables,
201
262
  deps=deps_indexes,
202
- get_value=_get_value or DerivedVariable.get_value,
263
+ get_value=DerivedVariable.get_value,
264
+ get_tabular_data=DerivedVariable.get_tabular_data,
203
265
  ),
204
266
  )
205
267
 
@@ -351,6 +413,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
351
413
  task_mgr: TaskManager,
352
414
  args: List[Any],
353
415
  force_key: Optional[str] = None,
416
+ _pin_result: bool = False,
354
417
  ) -> DerivedVariableResult:
355
418
  """
356
419
  Get the value of this DerivedVariable. This method will check the main app store for an appropriate response
@@ -362,10 +425,18 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
362
425
  :param task_mgr: task manager instance
363
426
  :param args: the arguments to call the underlying function with
364
427
  :param force_key: unique key for forced execution, if provided forces cache bypass
428
+ :param _pin_result: whether to pin the result in the store, used internally by derived data variables
365
429
  """
430
+ # dynamic import due to circular import
431
+ from dara.core.internal.dependency_resolution import (
432
+ is_forced,
433
+ resolve_dependency,
434
+ )
435
+
366
436
  assert var_entry.func is not None, 'DerivedVariable function is not defined'
367
437
 
368
- histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
438
+ # Shortened UID used for logging
439
+ _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
369
440
 
370
441
  if var_entry.run_as_task:
371
442
  from dara.core.internal.registries import utils_registry
@@ -375,212 +446,272 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
375
446
  'Task module is not configured. Set config.task_module path to a tasks.py module to run a derived variable as task.'
376
447
  )
377
448
 
378
- with histogram.time():
379
- # Shortened UID used for logging
380
- _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
381
-
382
- # Extract and process nested derived variables
383
- values = []
449
+ # Compute cache key first, before any other work
450
+ cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
384
451
 
385
- # dynamic import due to circular import
386
- from dara.core.internal.dependency_resolution import resolve_dependency
387
-
388
- eng_logger.info(
389
- f'Derived Variable {_uid_short} get_value',
390
- {'uid': var_entry.uid, 'args': args},
391
- )
452
+ # Lock on this specific cache key for the entire computation
453
+ async with DV_LOCK.acquire(cache_key):
454
+ histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
392
455
 
393
- for val in args:
394
- var_value = await resolve_dependency(val, store, task_mgr)
395
- values.append(var_value)
456
+ with histogram.time():
457
+ # Extract and process nested derived variables
458
+ values: List[Any] = [None] * len(args)
396
459
 
397
- eng_logger.debug(
398
- f'DerivedVariable {_uid_short}',
399
- 'resolved arguments',
400
- {'values': values, 'uid': var_entry.uid},
401
- )
402
-
403
- # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
404
- # to an instance of the BaseModel class.
405
- parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
406
-
407
- dev_logger.debug(
408
- f'DerivedVariable {_uid_short}',
409
- 'executing',
410
- {'args': parsed_args, 'uid': var_entry.uid},
411
- )
460
+ eng_logger.info(
461
+ f'Derived Variable {_uid_short} get_value',
462
+ {'uid': var_entry.uid, 'args': args},
463
+ )
412
464
 
413
- # Check if there are any Tasks to be run in the args
414
- has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
465
+ # Whether one of the (grand?)children have been forced - is so, the parent should skip the cache as well
466
+ has_forced_child = False
415
467
 
416
- cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
417
- await DerivedVariable.add_latest_value(store, var_entry, cache_key)
468
+ async def _resolve_arg(val: Any, index: int):
469
+ nonlocal has_forced_child
418
470
 
419
- cache_type = var_entry.cache
471
+ if is_forced(val):
472
+ has_forced_child = True
473
+ var_value = await resolve_dependency(val, store, task_mgr)
474
+ values[index] = var_value
420
475
 
421
- # Handle force key tracking to prevent double execution
422
- effective_force = force_key is not None
423
- if force_key is not None:
424
- if force_key in _force_keys_seen:
425
- # This force key has been seen before, don't force again
426
- effective_force = False
427
- eng_logger.debug(
428
- f'DerivedVariable {_uid_short} force key already seen, using cached value',
429
- extra={'uid': var_entry.uid, 'force_key': force_key},
430
- )
431
- else:
432
- # First time seeing this force key, add it to the set
433
- _force_keys_seen[force_key] = True
434
- eng_logger.debug(
435
- f'DerivedVariable {_uid_short} new force key, will force recalculation',
436
- extra={'uid': var_entry.uid, 'force_key': force_key},
437
- )
476
+ async with anyio.create_task_group() as tg:
477
+ for idx, val in enumerate(args):
478
+ tg.start_soon(_resolve_arg, val, idx)
438
479
 
439
- # If deps is not None, force session use
440
- # Note: this is temporarily commented out as no tests were broken by removing it;
441
- # once we find what scenario this fixes, we should add a test to cover that scenario and move this snippet
442
- # to constructors of DerivedVariable and DerivedDataVariable
443
- # if cache_type == CacheType.GLOBAL and (var_entry.deps is not None and len(var_entry.deps) > 0):
444
- # cache_type = CacheType.SESSION
445
-
446
- eng_logger.debug(
447
- f'DerivedVariable {_uid_short}',
448
- f'using cache: {cache_type}',
449
- {'uid': var_entry.uid},
450
- )
480
+ eng_logger.debug(
481
+ f'DerivedVariable {_uid_short}',
482
+ 'resolved arguments',
483
+ {'values': values, 'uid': var_entry.uid},
484
+ )
451
485
 
452
- # Start with a sentinel value to indicate that the value is missing
453
- # from cache, this lets us distinguish between a cache miss and a
454
- # value that is None
455
- value = VALUE_MISSING
486
+ # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
487
+ # to an instance of the BaseModel class.
488
+ parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
456
489
 
457
- ignore_cache = (
458
- var_entry.cache is None
459
- or var_entry.polling_interval
460
- or DerivedVariable.check_polling(var_entry.variables)
461
- or effective_force
462
- )
463
- if not ignore_cache:
464
- try:
465
- value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
466
- eng_logger.debug(
467
- f'DerivedVariable {_uid_short}',
468
- 'retrieved value from cache',
469
- {'uid': var_entry.uid, 'cached_value': value},
470
- )
471
- except KeyError:
472
- eng_logger.debug(
473
- f'DerivedVariable {_uid_short}',
474
- 'no value found in cache',
475
- {'uid': var_entry.uid},
476
- )
477
- # key error means no entry found;
478
- # this lets us distinguish from a None value stored and not found
479
-
480
- # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
481
- if isinstance(value, PendingTask):
482
- eng_logger.info(
483
- f'DerivedVariable {_uid_short} waiting for pending task',
484
- {'uid': var_entry.uid, 'pending_task': value.task_id},
490
+ dev_logger.debug(
491
+ f'DerivedVariable {_uid_short}',
492
+ 'executing',
493
+ {'args': parsed_args, 'uid': var_entry.uid},
485
494
  )
486
- value.add_subscriber()
487
- return {'cache_key': cache_key, 'value': value}
488
495
 
489
- # If it's a PendingValue then wait for the value and return it
490
- if isinstance(value, PendingValue):
491
- eng_logger.info(
492
- f'DerivedVariable {_uid_short} waiting for pending value',
493
- {'uid': var_entry.uid, 'pending_value': value},
494
- )
495
- return {
496
- 'cache_key': cache_key,
497
- 'value': await store.get_or_wait(var_entry, key=cache_key),
498
- }
496
+ # Check if there are any Tasks to be run in the args
497
+ has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
498
+
499
+ await DerivedVariable.add_latest_value(store, var_entry, cache_key)
500
+
501
+ cache_type = var_entry.cache
502
+
503
+ # Handle force key tracking to prevent double execution
504
+ effective_force = force_key is not None
505
+ if force_key is not None:
506
+ if force_key in _force_keys_seen:
507
+ # This force key has been seen before, don't force again
508
+ effective_force = False
509
+ eng_logger.debug(
510
+ f'DerivedVariable {_uid_short} force key already seen, using cached value',
511
+ extra={'uid': var_entry.uid, 'force_key': force_key},
512
+ )
513
+ else:
514
+ # First time seeing this force key, add it to the set
515
+ _force_keys_seen[force_key] = True
516
+ eng_logger.debug(
517
+ f'DerivedVariable {_uid_short} new force key, will force recalculation',
518
+ extra={'uid': var_entry.uid, 'force_key': force_key},
519
+ )
499
520
 
500
- # We retrieved an actual value from the cache, return it
501
- if not ignore_cache and value is not VALUE_MISSING:
502
- eng_logger.info(
503
- f'DerivedVariable {_uid_short} returning cached value directly',
504
- {'uid': var_entry.uid, 'cached_value': value},
521
+ eng_logger.debug(
522
+ f'DerivedVariable {_uid_short}',
523
+ f'using cache: {cache_type}',
524
+ {'uid': var_entry.uid},
505
525
  )
506
- return {'cache_key': cache_key, 'value': value}
507
526
 
508
- # Setup pending task if it needs it and then return the task
509
- if var_entry.run_as_task or has_tasks:
510
- var_uid = var_entry.uid or str(uuid.uuid4())
527
+ # Start with a sentinel value to indicate that the value is missing
528
+ # from cache, this lets us distinguish between a cache miss and a
529
+ # value that is None
530
+ value = VALUE_MISSING
531
+
532
+ ignore_cache = (
533
+ var_entry.cache is None
534
+ or var_entry.polling_interval
535
+ or DerivedVariable.check_polling(var_entry.variables)
536
+ or effective_force
537
+ or has_forced_child
538
+ )
539
+ if not ignore_cache:
540
+ try:
541
+ value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
542
+ eng_logger.debug(
543
+ f'DerivedVariable {_uid_short}',
544
+ 'retrieved value from cache',
545
+ {'uid': var_entry.uid, 'cached_value': value},
546
+ )
547
+ except KeyError:
548
+ eng_logger.debug(
549
+ f'DerivedVariable {_uid_short}',
550
+ 'no value found in cache',
551
+ {'uid': var_entry.uid},
552
+ )
553
+ # key error means no entry found;
554
+ # this lets us distinguish from a None value stored and not found
555
+
556
+ # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
557
+ if isinstance(value, PendingTask):
558
+ eng_logger.info(
559
+ f'DerivedVariable {_uid_short} waiting for pending task',
560
+ {'uid': var_entry.uid, 'pending_task': value.task_id},
561
+ )
562
+ return {'cache_key': cache_key, 'value': value}
511
563
 
512
- if has_tasks:
513
- task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
564
+ # We retrieved an actual value from the cache, return it
565
+ if not ignore_cache and value is not VALUE_MISSING:
566
+ eng_logger.info(
567
+ f'DerivedVariable {_uid_short} returning cached value directly',
568
+ {'uid': var_entry.uid, 'cached_value': value},
569
+ )
570
+ return {'cache_key': cache_key, 'value': value}
571
+
572
+ # Setup pending task if it needs it and then return the task
573
+ if var_entry.run_as_task or has_tasks:
574
+ var_uid = var_entry.uid or str(uuid.uuid4())
575
+
576
+ if has_tasks:
577
+ task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
578
+
579
+ extra_notify_channels = [
580
+ channel
581
+ for arg in parsed_args
582
+ if isinstance(arg, BaseTask)
583
+ for channel in arg.notify_channels
584
+ ]
585
+ eng_logger.debug(
586
+ f'DerivedVariable {_uid_short}',
587
+ 'running has tasks',
588
+ {'uid': var_entry.uid, 'task_id': task_id},
589
+ )
590
+ meta_task = MetaTask(
591
+ var_entry.func,
592
+ parsed_args,
593
+ notify_channels=list(set(extra_notify_channels)),
594
+ process_as_task=var_entry.run_as_task,
595
+ cache_key=cache_key,
596
+ task_id=task_id,
597
+ reg_entry=var_entry, # task results are set as the DV result
598
+ )
599
+
600
+ # Immediately store the pending task in the store
601
+ pending_task = task_mgr.register_task(meta_task)
602
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
603
+
604
+ return {'cache_key': cache_key, 'value': meta_task}
605
+
606
+ task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
514
607
 
515
- extra_notify_channels = [
516
- channel for arg in parsed_args if isinstance(arg, BaseTask) for channel in arg.notify_channels
517
- ]
518
608
  eng_logger.debug(
519
609
  f'DerivedVariable {_uid_short}',
520
- 'running has tasks',
610
+ 'running as a task',
521
611
  {'uid': var_entry.uid, 'task_id': task_id},
522
612
  )
523
- meta_task = MetaTask(
613
+ task = Task(
524
614
  var_entry.func,
525
615
  parsed_args,
526
- notify_channels=list(set(extra_notify_channels)),
527
- process_as_task=var_entry.run_as_task,
528
616
  cache_key=cache_key,
529
617
  task_id=task_id,
530
618
  reg_entry=var_entry, # task results are set as the DV result
531
619
  )
532
620
 
533
- return {'cache_key': cache_key, 'value': meta_task}
621
+ # Immediately store the pending task in the store
622
+ pending_task = task_mgr.register_task(task)
623
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
534
624
 
535
- task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
625
+ return {'cache_key': cache_key, 'value': task}
536
626
 
537
- eng_logger.debug(
538
- f'DerivedVariable {_uid_short}',
539
- 'running as a task',
540
- {'uid': var_entry.uid, 'task_id': task_id},
541
- )
542
- task = Task(
543
- var_entry.func,
544
- parsed_args,
545
- cache_key=cache_key,
546
- task_id=task_id,
547
- reg_entry=var_entry, # task results are set as the DV result
548
- )
549
- return {'cache_key': cache_key, 'value': task}
627
+ try:
628
+ result = await run_user_handler(var_entry.func, args=parsed_args)
629
+ except Exception:
630
+ # Delete the store value so subsequent requests recalculate instaed
631
+ if var_entry.cache is not None:
632
+ await store.delete(var_entry, key=cache_key)
633
+ raise
634
+
635
+ # If a task is returned then ensure we register it
636
+ if isinstance(result, BaseTask):
637
+ eng_logger.info(
638
+ f'DerivedVariable {_uid_short} returning task as a result',
639
+ {'uid': var_entry.uid, 'task_id': result.task_id},
640
+ )
641
+ # Make sure cache settings are set on the task
642
+ result.cache_key = cache_key
643
+ result.reg_entry = var_entry
550
644
 
551
- # only set pending value if cache is not None, otherwise subsequent requests calculate the value again
552
- if var_entry.cache is not None:
553
- await store.set_pending(var_entry, key=cache_key)
645
+ task_mgr.register_task(result)
554
646
 
555
- try:
556
- result = await run_user_handler(var_entry.func, args=parsed_args)
557
- except Exception as e:
558
- # Set the store value to None before raising, so subsequent requests don't hang on a PendingValue
647
+ return {'cache_key': cache_key, 'value': result}
648
+
649
+ # only set the value if cache is not None, otherwise subsequent requests calculate the value again
559
650
  if var_entry.cache is not None:
560
- await store.set(var_entry, key=cache_key, value=None, error=e)
561
- raise
651
+ await store.set(var_entry, key=cache_key, value=result, pin=_pin_result)
562
652
 
563
- # If a task is returned then update pending value to pending task and return it
564
- if isinstance(result, BaseTask):
565
653
  eng_logger.info(
566
- f'DerivedVariable {_uid_short} returning task as a result',
567
- {'uid': var_entry.uid, 'task_id': result.task_id},
654
+ f'DerivedVariable {_uid_short} returning result',
655
+ {'uid': var_entry.uid, 'result': result},
568
656
  )
569
- # Make sure cache settings are set on the task
570
- result.cache_key = cache_key
571
- result.reg_entry = var_entry
572
-
573
657
  return {'cache_key': cache_key, 'value': result}
574
658
 
575
- # only set the value if cache is not None, otherwise subsequent requests calculate the value again
576
- if var_entry.cache is not None:
577
- await store.set(var_entry, key=cache_key, value=result)
659
+ @classmethod
660
+ async def _filter_data(
661
+ cls,
662
+ data: Union[DataFrame, Any, None],
663
+ filter_resolver: FilterResolver,
664
+ filters: Optional[FilterQuery] = None,
665
+ pagination: Optional[Pagination] = None,
666
+ ) -> DataResponse:
667
+ if data is None:
668
+ return DataResponse(data=None, count=0, schema=None)
669
+
670
+ # silently add the index column for DataFrame values
671
+ # User resolver could technically not be returning a DataFrame
672
+ if isinstance(data, DataFrame):
673
+ data = append_index(data)
674
+
675
+ # Filtering part
676
+ data, count = await filter_resolver(data, filters, pagination)
677
+ return build_data_response(data, count)
578
678
 
579
- eng_logger.info(
580
- f'DerivedVariable {_uid_short} returning result',
581
- {'uid': var_entry.uid, 'result': result},
679
+ @classmethod
680
+ async def get_tabular_data(
681
+ cls,
682
+ var_entry: DerivedVariableRegistryEntry,
683
+ store: CacheStore,
684
+ task_mgr: TaskManager,
685
+ args: List[Any],
686
+ force_key: Optional[str] = None,
687
+ pagination: Optional[Pagination] = None,
688
+ filters: Optional[FilterQuery] = None,
689
+ ) -> Union[MetaTask, DataResponse]:
690
+ """
691
+ Get filtered tabular data from the underlying derived variable.
692
+
693
+ Resolves the the DeriedVariable and runs filtering on the result,
694
+ either using a custom filter_resolver or the default logic.
695
+ """
696
+ filter_resolver = var_entry.filter_resolver or default_filter_resolver
697
+ result = await cls.get_value(var_entry, store, task_mgr, args, force_key)
698
+
699
+ if isinstance(result['value'], BaseTask):
700
+ task_id = f'{var_entry.uid}_Filter_MetaTask_{str(uuid.uuid4())}'
701
+ task = MetaTask(
702
+ cls._filter_data,
703
+ task_id=task_id,
704
+ kwargs={
705
+ 'data': result['value'],
706
+ 'filters': filters,
707
+ 'pagination': pagination,
708
+ 'filter_resolver': filter_resolver,
709
+ },
582
710
  )
583
- return {'cache_key': cache_key, 'value': result}
711
+ task_mgr.register_task(task)
712
+ return task
713
+
714
+ return await cls._filter_data(result['value'], filter_resolver, filters, pagination)
584
715
 
585
716
  @classmethod
586
717
  def check_polling(cls, variables: List[AnyVariable]):
@@ -604,12 +735,15 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
604
735
  class DerivedVariableRegistryEntry(CachedRegistryEntry):
605
736
  deps: Optional[List[int]]
606
737
  func: Optional[Callable[..., Any]]
738
+ filter_resolver: Optional[FilterResolver]
607
739
  run_as_task: bool
608
740
  variables: List[AnyVariable]
609
741
  polling_interval: Optional[int]
610
742
  get_value: Callable[..., Awaitable[Any]]
611
743
  """Handler to get the value of the derived variable. Defaults to DerivedVariable.get_value, should match the signature"""
612
- model_config = ConfigDict(extra='forbid')
744
+ get_tabular_data: Callable[..., Awaitable[Union[DataResponse, MetaTask]]]
745
+ """Handler to get the tabular data of the derived variable. Defaults to DerivedVariable.get_tabular_data, should match the signature"""
746
+ model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
613
747
 
614
748
 
615
749
  class LatestValueRegistryEntry(CachedRegistryEntry):