dara-core 1.19.0__py3-none-any.whl → 1.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. dara/core/__init__.py +1 -0
  2. dara/core/auth/basic.py +13 -7
  3. dara/core/auth/definitions.py +2 -2
  4. dara/core/auth/utils.py +1 -1
  5. dara/core/base_definitions.py +7 -42
  6. dara/core/data_utils.py +16 -17
  7. dara/core/definitions.py +8 -8
  8. dara/core/interactivity/__init__.py +6 -0
  9. dara/core/interactivity/actions.py +26 -22
  10. dara/core/interactivity/any_data_variable.py +7 -135
  11. dara/core/interactivity/any_variable.py +1 -1
  12. dara/core/interactivity/client_variable.py +71 -0
  13. dara/core/interactivity/data_variable.py +8 -266
  14. dara/core/interactivity/derived_data_variable.py +6 -290
  15. dara/core/interactivity/derived_variable.py +381 -201
  16. dara/core/interactivity/filtering.py +29 -2
  17. dara/core/interactivity/loop_variable.py +2 -2
  18. dara/core/interactivity/non_data_variable.py +5 -68
  19. dara/core/interactivity/plain_variable.py +87 -14
  20. dara/core/interactivity/server_variable.py +325 -0
  21. dara/core/interactivity/state_variable.py +69 -0
  22. dara/core/interactivity/switch_variable.py +15 -15
  23. dara/core/interactivity/tabular_variable.py +94 -0
  24. dara/core/interactivity/url_variable.py +10 -90
  25. dara/core/internal/cache_store/cache_store.py +5 -20
  26. dara/core/internal/dependency_resolution.py +27 -69
  27. dara/core/internal/devtools.py +10 -3
  28. dara/core/internal/execute_action.py +9 -3
  29. dara/core/internal/multi_resource_lock.py +70 -0
  30. dara/core/internal/normalization.py +0 -5
  31. dara/core/internal/pandas_utils.py +105 -3
  32. dara/core/internal/pool/definitions.py +1 -1
  33. dara/core/internal/pool/task_pool.py +9 -6
  34. dara/core/internal/pool/utils.py +19 -14
  35. dara/core/internal/registries.py +3 -2
  36. dara/core/internal/registry.py +1 -1
  37. dara/core/internal/registry_lookup.py +5 -3
  38. dara/core/internal/routing.py +52 -121
  39. dara/core/internal/store.py +2 -29
  40. dara/core/internal/tasks.py +372 -182
  41. dara/core/internal/utils.py +25 -3
  42. dara/core/internal/websocket.py +1 -1
  43. dara/core/js_tooling/js_utils.py +2 -0
  44. dara/core/logging.py +10 -6
  45. dara/core/persistence.py +26 -4
  46. dara/core/umd/dara.core.umd.js +1091 -1469
  47. dara/core/visual/dynamic_component.py +17 -13
  48. {dara_core-1.19.0.dist-info → dara_core-1.20.0.dist-info}/METADATA +11 -11
  49. {dara_core-1.19.0.dist-info → dara_core-1.20.0.dist-info}/RECORD +52 -47
  50. {dara_core-1.19.0.dist-info → dara_core-1.20.0.dist-info}/LICENSE +0 -0
  51. {dara_core-1.19.0.dist-info → dara_core-1.20.0.dist-info}/WHEEL +0 -0
  52. {dara_core-1.19.0.dist-info → dara_core-1.20.0.dist-info}/entry_points.txt +0 -0
@@ -27,12 +27,16 @@ from typing import (
27
27
  Generic,
28
28
  List,
29
29
  Optional,
30
+ Protocol,
31
+ Tuple,
30
32
  TypeVar,
31
33
  Union,
32
34
  cast,
33
35
  )
34
36
 
37
+ import anyio
35
38
  from cachetools import LRUCache
39
+ from pandas import DataFrame
36
40
  from pydantic import (
37
41
  ConfigDict,
38
42
  Field,
@@ -41,7 +45,7 @@ from pydantic import (
41
45
  field_validator,
42
46
  model_serializer,
43
47
  )
44
- from typing_extensions import TypedDict
48
+ from typing_extensions import TypedDict, runtime_checkable
45
49
 
46
50
  from dara.core.base_definitions import (
47
51
  BaseCachePolicy,
@@ -49,14 +53,17 @@ from dara.core.base_definitions import (
49
53
  Cache,
50
54
  CacheArgType,
51
55
  CachedRegistryEntry,
56
+ NonTabularDataError,
52
57
  PendingTask,
53
- PendingValue,
54
58
  )
55
59
  from dara.core.interactivity.actions import TriggerVariable, assert_no_context
56
60
  from dara.core.interactivity.any_variable import AnyVariable
57
- from dara.core.interactivity.non_data_variable import NonDataVariable
61
+ from dara.core.interactivity.client_variable import ClientVariable
62
+ from dara.core.interactivity.filtering import FilterQuery, Pagination, apply_filters
58
63
  from dara.core.internal.cache_store import CacheStore
59
64
  from dara.core.internal.encoder_registry import deserialize
65
+ from dara.core.internal.multi_resource_lock import MultiResourceLock
66
+ from dara.core.internal.pandas_utils import DataResponse, append_index, build_data_response
60
67
  from dara.core.internal.tasks import MetaTask, Task, TaskManager
61
68
  from dara.core.internal.utils import get_cache_scope, run_user_handler
62
69
  from dara.core.logging import dev_logger, eng_logger
@@ -64,6 +71,10 @@ from dara.core.metrics import RUNTIME_METRICS_TRACKER
64
71
 
65
72
  VariableType = TypeVar('VariableType')
66
73
 
74
+ # Static lock for all DV computations, keyed by cache_key
75
+ # Explicitly not re-entrant, this prevents variable loops
76
+ DV_LOCK = MultiResourceLock()
77
+
67
78
  # Global set to track force keys that have been encountered
68
79
  # LRU with 2048 entries should be sufficient to not drop in-progress force keys
69
80
  # but also not have to worry about memory leaks
@@ -80,7 +91,24 @@ class DerivedVariableResult(TypedDict):
80
91
  value: Union[Any, BaseTask]
81
92
 
82
93
 
83
- class DerivedVariable(NonDataVariable, Generic[VariableType]):
94
+ @runtime_checkable
95
+ class FilterResolver(Protocol):
96
+ async def __call__(
97
+ self, data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
98
+ ) -> Tuple[DataFrame, int]: ...
99
+
100
+
101
+ async def default_filter_resolver(
102
+ data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
103
+ ) -> Tuple[DataFrame, int]:
104
+ if not isinstance(data, DataFrame):
105
+ raise NonTabularDataError(
106
+ f'Default filter resolver expects a DataFrame to be returned from the DerivedVariable function, got {type(data)}'
107
+ )
108
+ return apply_filters(data, filters, pagination)
109
+
110
+
111
+ class DerivedVariable(ClientVariable, Generic[VariableType]):
84
112
  """
85
113
  A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
86
114
  variables with a python function. This is one of two primary ways that python logic can be embedded into the
@@ -89,6 +117,61 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
89
117
  DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
90
118
  structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
91
119
  recalculated when necessary.
120
+
121
+ As a special case, DerivedVariables can be used for tabular data and retrieving its slice as a DataFrame. This functionality
122
+ is utilized by e.g. the built-in Table component. By default, when passing a DerivedVariable to a Table component, Dara
123
+ expects the resolver function to return a DataFrame or None. This behaviour can be customized by providing a custom `filter_resolver`.
124
+ This function will be invoked with the result of the main DerivedVariable function, as well as filters and pagination. It can be used
125
+ to e.g. retrieve a slice of data from an API endpoint or a database instead of retrieving the entire dataset and filtering it in-memory.
126
+
127
+ ```python
128
+ from typing import Optional
129
+ import httpx
130
+ import pandas as pd
131
+ from dara.core import DerivedVariable, Variable
132
+ from dara.core.interactivity.filtering import FilterQuery, Pagination
133
+
134
+ # Custom filter resolver for API-based filtering
135
+ async def api_filter_resolver(data, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None):
136
+ async with httpx.AsyncClient() as client:
137
+ # in this case data is a string url
138
+ response = await client.get(data, params={
139
+ # translates filters/pagination to API-specific query params
140
+ 'filters': filters.dict() if filters else {},
141
+ 'offset': pagination.offset if pagination else 0,
142
+ 'limit': pagination.limit if pagination else 50
143
+ })
144
+ data = response.json()
145
+ # conform to the filter resolver API, return a tuple of (DataFrame, total_count)
146
+ return pd.DataFrame(data['results']), data['total_count']
147
+
148
+ # DerivedVariable with custom filtering
149
+ user_params = Variable({'dataset': 'experiments'})
150
+ derived_data = DerivedVariable(
151
+ lambda params: f"https://api.example.com/data/{params['dataset']}",
152
+ variables=[user_params],
153
+ filter_resolver=api_filter_resolver
154
+ )
155
+ ```
156
+
157
+ :param func: the function to derive a new value from the input variables.
158
+ :param variables: a set of input variables that will be passed to the deriving function
159
+ :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
160
+ session, per user or to not cache at all
161
+ :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
162
+ tasks, defaults to False
163
+ :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
164
+ component to poll the backend and refresh itself every n seconds.
165
+ :param filter_resolver: an optional function to resolve the filter query for the derived variable. This can be
166
+ used to customize the way tabular data is resolved. This is invoked with the result of the main DerivedVariable function,
167
+ as well as filters and pagination. The function should return a DataFrame and total count.
168
+ :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
169
+ recalculation of the derived variable
170
+ - `deps = None` - `func` is ran everytime (default behaviour),
171
+ - `deps = []` - `func` is ran once on initial startup,
172
+ - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
173
+ - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
174
+ :param uid: the unique identifier for this variable; if not provided a random one is generated
92
175
  """
93
176
 
94
177
  cache: Optional[BaseCachePolicy]
@@ -97,11 +180,11 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
97
180
  deps: Optional[List[AnyVariable]] = Field(validate_default=True)
98
181
  nested: List[str] = Field(default_factory=list)
99
182
  uid: str
100
- model_config = ConfigDict(extra='forbid', use_enum_values=True)
183
+ model_config = ConfigDict(extra='forbid', use_enum_values=True, arbitrary_types_allowed=True)
101
184
 
102
185
  def __init__(
103
186
  self,
104
- func: Callable[..., VariableType] | Callable[..., Awaitable[VariableType]],
187
+ func: Union[Callable[..., VariableType], Callable[..., Awaitable[VariableType]]],
105
188
  variables: List[AnyVariable],
106
189
  cache: Optional[CacheArgType] = Cache.Type.GLOBAL,
107
190
  run_as_task: bool = False,
@@ -109,36 +192,25 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
109
192
  deps: Optional[List[AnyVariable]] = None,
110
193
  uid: Optional[str] = None,
111
194
  nested: Optional[List[str]] = None,
112
- _get_value: Optional[Callable[..., Awaitable[Any]]] = None,
195
+ filter_resolver: Optional[FilterResolver] = None,
196
+ **kwargs,
113
197
  ):
114
- """
115
- A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
116
- variables with a python function. This is one of two primary ways that python logic can be embedded into the
117
- application (the other being the @py_component decorator).
118
-
119
- DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
120
- structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
121
- recalculated when necessary.
122
-
123
- :param func: the function to derive a new value from the input variables.
124
- :param variables: a set of input variables that will be passed to the deriving function
125
- :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
126
- session, per user or to not cache at all
127
- :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
128
- tasks, defaults to False
129
- :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
130
- component to poll the backend and refresh itself every n seconds.
131
- :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
132
- recalculation of the derived variable
133
- - `deps = None` - `func` is ran everytime (default behaviour),
134
- - `deps = []` - `func` is ran once on initial startup,
135
- - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
136
- - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
137
- :param uid: the unique identifier for this variable; if not provided a random one is generated
138
- """
139
198
  if nested is None:
140
199
  nested = []
141
200
 
201
+ # Validate that StateVariables are not used as inputs
202
+ from dara.core.interactivity.state_variable import StateVariable
203
+
204
+ for var in variables:
205
+ if isinstance(var, StateVariable):
206
+ raise ValueError(
207
+ 'StateVariable cannot be used as input to DerivedVariable. '
208
+ 'StateVariables are internal variables for tracking DerivedVariable states '
209
+ 'and using them as inputs would create complex dependencies that are '
210
+ 'difficult to debug. Consider using the parent DerivedVariable directly instead,'
211
+ ' or use the StateVariable with an If component or SwitchVariable.'
212
+ )
213
+
142
214
  if cache is not None:
143
215
  cache = Cache.Policy.from_arg(cache)
144
216
 
@@ -163,6 +235,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
163
235
  polling_interval=polling_interval,
164
236
  deps=deps,
165
237
  nested=nested,
238
+ **kwargs,
166
239
  )
167
240
 
168
241
  # Import the registry of variables and register the function at import
@@ -181,12 +254,14 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
181
254
  DerivedVariableRegistryEntry(
182
255
  cache=cache,
183
256
  func=func,
257
+ filter_resolver=filter_resolver,
184
258
  polling_interval=polling_interval,
185
259
  run_as_task=run_as_task,
186
260
  uid=str(self.uid),
187
261
  variables=variables,
188
262
  deps=deps_indexes,
189
- get_value=_get_value or DerivedVariable.get_value,
263
+ get_value=DerivedVariable.get_value,
264
+ get_tabular_data=DerivedVariable.get_tabular_data,
190
265
  ),
191
266
  )
192
267
 
@@ -214,6 +289,39 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
214
289
  assert_no_context('ctx.trigger')
215
290
  return TriggerVariable(variable=self, force=force)
216
291
 
292
+ @property
293
+ def is_loading(self):
294
+ """
295
+ Get a StateVariable that tracks the loading state of this DerivedVariable.
296
+
297
+ :return: StateVariable that is True when this DerivedVariable is loading, False otherwise
298
+ """
299
+ from dara.core.interactivity.state_variable import StateVariable
300
+
301
+ return StateVariable(parent_variable=self, property_name='loading')
302
+
303
+ @property
304
+ def has_error(self):
305
+ """
306
+ Get a StateVariable that tracks the error state of this DerivedVariable.
307
+
308
+ :return: StateVariable that is True when this DerivedVariable has an error, False otherwise
309
+ """
310
+ from dara.core.interactivity.state_variable import StateVariable
311
+
312
+ return StateVariable(parent_variable=self, property_name='error')
313
+
314
+ @property
315
+ def has_value(self):
316
+ """
317
+ Get a StateVariable that tracks whether this DerivedVariable has a resolved value.
318
+
319
+ :return: StateVariable that is True when this DerivedVariable has a value, False otherwise
320
+ """
321
+ from dara.core.interactivity.state_variable import StateVariable
322
+
323
+ return StateVariable(parent_variable=self, property_name='hasValue')
324
+
217
325
  @staticmethod
218
326
  def _get_cache_key(*args, uid: str, deps: Optional[List[int]] = None):
219
327
  """
@@ -305,6 +413,7 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
305
413
  task_mgr: TaskManager,
306
414
  args: List[Any],
307
415
  force_key: Optional[str] = None,
416
+ _pin_result: bool = False,
308
417
  ) -> DerivedVariableResult:
309
418
  """
310
419
  Get the value of this DerivedVariable. This method will check the main app store for an appropriate response
@@ -316,10 +425,18 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
316
425
  :param task_mgr: task manager instance
317
426
  :param args: the arguments to call the underlying function with
318
427
  :param force_key: unique key for forced execution, if provided forces cache bypass
428
+ :param _pin_result: whether to pin the result in the store, used internally by derived data variables
319
429
  """
430
+ # dynamic import due to circular import
431
+ from dara.core.internal.dependency_resolution import (
432
+ is_forced,
433
+ resolve_dependency,
434
+ )
435
+
320
436
  assert var_entry.func is not None, 'DerivedVariable function is not defined'
321
437
 
322
- histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
438
+ # Shortened UID used for logging
439
+ _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
323
440
 
324
441
  if var_entry.run_as_task:
325
442
  from dara.core.internal.registries import utils_registry
@@ -329,212 +446,272 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
329
446
  'Task module is not configured. Set config.task_module path to a tasks.py module to run a derived variable as task.'
330
447
  )
331
448
 
332
- with histogram.time():
333
- # Shortened UID used for logging
334
- _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
335
-
336
- # Extract and process nested derived variables
337
- values = []
338
-
339
- # dynamic import due to circular import
340
- from dara.core.internal.dependency_resolution import resolve_dependency
341
-
342
- eng_logger.info(
343
- f'Derived Variable {_uid_short} get_value',
344
- {'uid': var_entry.uid, 'args': args},
345
- )
346
-
347
- for val in args:
348
- var_value = await resolve_dependency(val, store, task_mgr)
349
- values.append(var_value)
449
+ # Compute cache key first, before any other work
450
+ cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
350
451
 
351
- eng_logger.debug(
352
- f'DerivedVariable {_uid_short}',
353
- 'resolved arguments',
354
- {'values': values, 'uid': var_entry.uid},
355
- )
452
+ # Lock on this specific cache key for the entire computation
453
+ async with DV_LOCK.acquire(cache_key):
454
+ histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
356
455
 
357
- # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
358
- # to an instance of the BaseModel class.
359
- parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
456
+ with histogram.time():
457
+ # Extract and process nested derived variables
458
+ values: List[Any] = [None] * len(args)
360
459
 
361
- dev_logger.debug(
362
- f'DerivedVariable {_uid_short}',
363
- 'executing',
364
- {'args': parsed_args, 'uid': var_entry.uid},
365
- )
460
+ eng_logger.info(
461
+ f'Derived Variable {_uid_short} get_value',
462
+ {'uid': var_entry.uid, 'args': args},
463
+ )
366
464
 
367
- # Check if there are any Tasks to be run in the args
368
- has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
465
+ # Whether one of the (grand?)children have been forced - is so, the parent should skip the cache as well
466
+ has_forced_child = False
369
467
 
370
- cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
371
- await DerivedVariable.add_latest_value(store, var_entry, cache_key)
468
+ async def _resolve_arg(val: Any, index: int):
469
+ nonlocal has_forced_child
372
470
 
373
- cache_type = var_entry.cache
471
+ if is_forced(val):
472
+ has_forced_child = True
473
+ var_value = await resolve_dependency(val, store, task_mgr)
474
+ values[index] = var_value
374
475
 
375
- # Handle force key tracking to prevent double execution
376
- effective_force = force_key is not None
377
- if force_key is not None:
378
- if force_key in _force_keys_seen:
379
- # This force key has been seen before, don't force again
380
- effective_force = False
381
- eng_logger.debug(
382
- f'DerivedVariable {_uid_short} force key already seen, using cached value',
383
- extra={'uid': var_entry.uid, 'force_key': force_key},
384
- )
385
- else:
386
- # First time seeing this force key, add it to the set
387
- _force_keys_seen[force_key] = True
388
- eng_logger.debug(
389
- f'DerivedVariable {_uid_short} new force key, will force recalculation',
390
- extra={'uid': var_entry.uid, 'force_key': force_key},
391
- )
476
+ async with anyio.create_task_group() as tg:
477
+ for idx, val in enumerate(args):
478
+ tg.start_soon(_resolve_arg, val, idx)
392
479
 
393
- # If deps is not None, force session use
394
- # Note: this is temporarily commented out as no tests were broken by removing it;
395
- # once we find what scenario this fixes, we should add a test to cover that scenario and move this snippet
396
- # to constructors of DerivedVariable and DerivedDataVariable
397
- # if cache_type == CacheType.GLOBAL and (var_entry.deps is not None and len(var_entry.deps) > 0):
398
- # cache_type = CacheType.SESSION
399
-
400
- eng_logger.debug(
401
- f'DerivedVariable {_uid_short}',
402
- f'using cache: {cache_type}',
403
- {'uid': var_entry.uid},
404
- )
405
-
406
- # Start with a sentinel value to indicate that the value is missing
407
- # from cache, this lets us distinguish between a cache miss and a
408
- # value that is None
409
- value = VALUE_MISSING
480
+ eng_logger.debug(
481
+ f'DerivedVariable {_uid_short}',
482
+ 'resolved arguments',
483
+ {'values': values, 'uid': var_entry.uid},
484
+ )
410
485
 
411
- ignore_cache = (
412
- var_entry.cache is None
413
- or var_entry.polling_interval
414
- or DerivedVariable.check_polling(var_entry.variables)
415
- or effective_force
416
- )
417
- if not ignore_cache:
418
- try:
419
- value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
420
- eng_logger.debug(
421
- f'DerivedVariable {_uid_short}',
422
- 'retrieved value from cache',
423
- {'uid': var_entry.uid, 'cached_value': value},
424
- )
425
- except KeyError:
426
- eng_logger.debug(
427
- f'DerivedVariable {_uid_short}',
428
- 'no value found in cache',
429
- {'uid': var_entry.uid},
430
- )
431
- # key error means no entry found;
432
- # this lets us distinguish from a None value stored and not found
486
+ # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
487
+ # to an instance of the BaseModel class.
488
+ parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
433
489
 
434
- # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
435
- if isinstance(value, PendingTask):
436
- eng_logger.info(
437
- f'DerivedVariable {_uid_short} waiting for pending task',
438
- {'uid': var_entry.uid, 'pending_task': value.task_id},
490
+ dev_logger.debug(
491
+ f'DerivedVariable {_uid_short}',
492
+ 'executing',
493
+ {'args': parsed_args, 'uid': var_entry.uid},
439
494
  )
440
- value.add_subscriber()
441
- return {'cache_key': cache_key, 'value': value}
442
495
 
443
- # If it's a PendingValue then wait for the value and return it
444
- if isinstance(value, PendingValue):
445
- eng_logger.info(
446
- f'DerivedVariable {_uid_short} waiting for pending value',
447
- {'uid': var_entry.uid, 'pending_value': value},
448
- )
449
- return {
450
- 'cache_key': cache_key,
451
- 'value': await store.get_or_wait(var_entry, key=cache_key),
452
- }
496
+ # Check if there are any Tasks to be run in the args
497
+ has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
498
+
499
+ await DerivedVariable.add_latest_value(store, var_entry, cache_key)
500
+
501
+ cache_type = var_entry.cache
502
+
503
+ # Handle force key tracking to prevent double execution
504
+ effective_force = force_key is not None
505
+ if force_key is not None:
506
+ if force_key in _force_keys_seen:
507
+ # This force key has been seen before, don't force again
508
+ effective_force = False
509
+ eng_logger.debug(
510
+ f'DerivedVariable {_uid_short} force key already seen, using cached value',
511
+ extra={'uid': var_entry.uid, 'force_key': force_key},
512
+ )
513
+ else:
514
+ # First time seeing this force key, add it to the set
515
+ _force_keys_seen[force_key] = True
516
+ eng_logger.debug(
517
+ f'DerivedVariable {_uid_short} new force key, will force recalculation',
518
+ extra={'uid': var_entry.uid, 'force_key': force_key},
519
+ )
453
520
 
454
- # We retrieved an actual value from the cache, return it
455
- if not ignore_cache and value is not VALUE_MISSING:
456
- eng_logger.info(
457
- f'DerivedVariable {_uid_short} returning cached value directly',
458
- {'uid': var_entry.uid, 'cached_value': value},
521
+ eng_logger.debug(
522
+ f'DerivedVariable {_uid_short}',
523
+ f'using cache: {cache_type}',
524
+ {'uid': var_entry.uid},
459
525
  )
460
- return {'cache_key': cache_key, 'value': value}
461
526
 
462
- # Setup pending task if it needs it and then return the task
463
- if var_entry.run_as_task or has_tasks:
464
- var_uid = var_entry.uid or str(uuid.uuid4())
527
+ # Start with a sentinel value to indicate that the value is missing
528
+ # from cache, this lets us distinguish between a cache miss and a
529
+ # value that is None
530
+ value = VALUE_MISSING
531
+
532
+ ignore_cache = (
533
+ var_entry.cache is None
534
+ or var_entry.polling_interval
535
+ or DerivedVariable.check_polling(var_entry.variables)
536
+ or effective_force
537
+ or has_forced_child
538
+ )
539
+ if not ignore_cache:
540
+ try:
541
+ value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
542
+ eng_logger.debug(
543
+ f'DerivedVariable {_uid_short}',
544
+ 'retrieved value from cache',
545
+ {'uid': var_entry.uid, 'cached_value': value},
546
+ )
547
+ except KeyError:
548
+ eng_logger.debug(
549
+ f'DerivedVariable {_uid_short}',
550
+ 'no value found in cache',
551
+ {'uid': var_entry.uid},
552
+ )
553
+ # key error means no entry found;
554
+ # this lets us distinguish from a None value stored and not found
555
+
556
+ # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
557
+ if isinstance(value, PendingTask):
558
+ eng_logger.info(
559
+ f'DerivedVariable {_uid_short} waiting for pending task',
560
+ {'uid': var_entry.uid, 'pending_task': value.task_id},
561
+ )
562
+ return {'cache_key': cache_key, 'value': value}
465
563
 
466
- if has_tasks:
467
- task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
564
+ # We retrieved an actual value from the cache, return it
565
+ if not ignore_cache and value is not VALUE_MISSING:
566
+ eng_logger.info(
567
+ f'DerivedVariable {_uid_short} returning cached value directly',
568
+ {'uid': var_entry.uid, 'cached_value': value},
569
+ )
570
+ return {'cache_key': cache_key, 'value': value}
571
+
572
+ # Setup pending task if it needs it and then return the task
573
+ if var_entry.run_as_task or has_tasks:
574
+ var_uid = var_entry.uid or str(uuid.uuid4())
575
+
576
+ if has_tasks:
577
+ task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
578
+
579
+ extra_notify_channels = [
580
+ channel
581
+ for arg in parsed_args
582
+ if isinstance(arg, BaseTask)
583
+ for channel in arg.notify_channels
584
+ ]
585
+ eng_logger.debug(
586
+ f'DerivedVariable {_uid_short}',
587
+ 'running has tasks',
588
+ {'uid': var_entry.uid, 'task_id': task_id},
589
+ )
590
+ meta_task = MetaTask(
591
+ var_entry.func,
592
+ parsed_args,
593
+ notify_channels=list(set(extra_notify_channels)),
594
+ process_as_task=var_entry.run_as_task,
595
+ cache_key=cache_key,
596
+ task_id=task_id,
597
+ reg_entry=var_entry, # task results are set as the DV result
598
+ )
599
+
600
+ # Immediately store the pending task in the store
601
+ pending_task = task_mgr.register_task(meta_task)
602
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
603
+
604
+ return {'cache_key': cache_key, 'value': meta_task}
605
+
606
+ task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
468
607
 
469
- extra_notify_channels = [
470
- channel for arg in parsed_args if isinstance(arg, BaseTask) for channel in arg.notify_channels
471
- ]
472
608
  eng_logger.debug(
473
609
  f'DerivedVariable {_uid_short}',
474
- 'running has tasks',
610
+ 'running as a task',
475
611
  {'uid': var_entry.uid, 'task_id': task_id},
476
612
  )
477
- meta_task = MetaTask(
613
+ task = Task(
478
614
  var_entry.func,
479
615
  parsed_args,
480
- notify_channels=list(set(extra_notify_channels)),
481
- process_as_task=var_entry.run_as_task,
482
616
  cache_key=cache_key,
483
617
  task_id=task_id,
484
618
  reg_entry=var_entry, # task results are set as the DV result
485
619
  )
486
620
 
487
- return {'cache_key': cache_key, 'value': meta_task}
621
+ # Immediately store the pending task in the store
622
+ pending_task = task_mgr.register_task(task)
623
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
488
624
 
489
- task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
625
+ return {'cache_key': cache_key, 'value': task}
490
626
 
491
- eng_logger.debug(
492
- f'DerivedVariable {_uid_short}',
493
- 'running as a task',
494
- {'uid': var_entry.uid, 'task_id': task_id},
495
- )
496
- task = Task(
497
- var_entry.func,
498
- parsed_args,
499
- cache_key=cache_key,
500
- task_id=task_id,
501
- reg_entry=var_entry, # task results are set as the DV result
502
- )
503
- return {'cache_key': cache_key, 'value': task}
627
+ try:
628
+ result = await run_user_handler(var_entry.func, args=parsed_args)
629
+ except Exception:
630
+ # Delete the store value so subsequent requests recalculate instaed
631
+ if var_entry.cache is not None:
632
+ await store.delete(var_entry, key=cache_key)
633
+ raise
634
+
635
+ # If a task is returned then ensure we register it
636
+ if isinstance(result, BaseTask):
637
+ eng_logger.info(
638
+ f'DerivedVariable {_uid_short} returning task as a result',
639
+ {'uid': var_entry.uid, 'task_id': result.task_id},
640
+ )
641
+ # Make sure cache settings are set on the task
642
+ result.cache_key = cache_key
643
+ result.reg_entry = var_entry
504
644
 
505
- # only set pending value if cache is not None, otherwise subsequent requests calculate the value again
506
- if var_entry.cache is not None:
507
- await store.set_pending(var_entry, key=cache_key)
645
+ task_mgr.register_task(result)
508
646
 
509
- try:
510
- result = await run_user_handler(var_entry.func, args=parsed_args)
511
- except Exception as e:
512
- # Set the store value to None before raising, so subsequent requests don't hang on a PendingValue
647
+ return {'cache_key': cache_key, 'value': result}
648
+
649
+ # only set the value if cache is not None, otherwise subsequent requests calculate the value again
513
650
  if var_entry.cache is not None:
514
- await store.set(var_entry, key=cache_key, value=None, error=e)
515
- raise
651
+ await store.set(var_entry, key=cache_key, value=result, pin=_pin_result)
516
652
 
517
- # If a task is returned then update pending value to pending task and return it
518
- if isinstance(result, BaseTask):
519
653
  eng_logger.info(
520
- f'DerivedVariable {_uid_short} returning task as a result',
521
- {'uid': var_entry.uid, 'task_id': result.task_id},
654
+ f'DerivedVariable {_uid_short} returning result',
655
+ {'uid': var_entry.uid, 'result': result},
522
656
  )
523
- # Make sure cache settings are set on the task
524
- result.cache_key = cache_key
525
- result.reg_entry = var_entry
526
-
527
657
  return {'cache_key': cache_key, 'value': result}
528
658
 
529
- # only set the value if cache is not None, otherwise subsequent requests calculate the value again
530
- if var_entry.cache is not None:
531
- await store.set(var_entry, key=cache_key, value=result)
659
+ @classmethod
660
+ async def _filter_data(
661
+ cls,
662
+ data: Union[DataFrame, Any, None],
663
+ filter_resolver: FilterResolver,
664
+ filters: Optional[FilterQuery] = None,
665
+ pagination: Optional[Pagination] = None,
666
+ ) -> DataResponse:
667
+ if data is None:
668
+ return DataResponse(data=None, count=0, schema=None)
669
+
670
+ # silently add the index column for DataFrame values
671
+ # User resolver could technically not be returning a DataFrame
672
+ if isinstance(data, DataFrame):
673
+ data = append_index(data)
674
+
675
+ # Filtering part
676
+ data, count = await filter_resolver(data, filters, pagination)
677
+ return build_data_response(data, count)
678
+
679
+ @classmethod
680
+ async def get_tabular_data(
681
+ cls,
682
+ var_entry: DerivedVariableRegistryEntry,
683
+ store: CacheStore,
684
+ task_mgr: TaskManager,
685
+ args: List[Any],
686
+ force_key: Optional[str] = None,
687
+ pagination: Optional[Pagination] = None,
688
+ filters: Optional[FilterQuery] = None,
689
+ ) -> Union[MetaTask, DataResponse]:
690
+ """
691
+ Get filtered tabular data from the underlying derived variable.
532
692
 
533
- eng_logger.info(
534
- f'DerivedVariable {_uid_short} returning result',
535
- {'uid': var_entry.uid, 'result': result},
693
+ Resolves the the DeriedVariable and runs filtering on the result,
694
+ either using a custom filter_resolver or the default logic.
695
+ """
696
+ filter_resolver = var_entry.filter_resolver or default_filter_resolver
697
+ result = await cls.get_value(var_entry, store, task_mgr, args, force_key)
698
+
699
+ if isinstance(result['value'], BaseTask):
700
+ task_id = f'{var_entry.uid}_Filter_MetaTask_{str(uuid.uuid4())}'
701
+ task = MetaTask(
702
+ cls._filter_data,
703
+ task_id=task_id,
704
+ kwargs={
705
+ 'data': result['value'],
706
+ 'filters': filters,
707
+ 'pagination': pagination,
708
+ 'filter_resolver': filter_resolver,
709
+ },
536
710
  )
537
- return {'cache_key': cache_key, 'value': result}
711
+ task_mgr.register_task(task)
712
+ return task
713
+
714
+ return await cls._filter_data(result['value'], filter_resolver, filters, pagination)
538
715
 
539
716
  @classmethod
540
717
  def check_polling(cls, variables: List[AnyVariable]):
@@ -558,12 +735,15 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
558
735
  class DerivedVariableRegistryEntry(CachedRegistryEntry):
559
736
  deps: Optional[List[int]]
560
737
  func: Optional[Callable[..., Any]]
738
+ filter_resolver: Optional[FilterResolver]
561
739
  run_as_task: bool
562
740
  variables: List[AnyVariable]
563
741
  polling_interval: Optional[int]
564
742
  get_value: Callable[..., Awaitable[Any]]
565
743
  """Handler to get the value of the derived variable. Defaults to DerivedVariable.get_value, should match the signature"""
566
- model_config = ConfigDict(extra='forbid')
744
+ get_tabular_data: Callable[..., Awaitable[Union[DataResponse, MetaTask]]]
745
+ """Handler to get the tabular data of the derived variable. Defaults to DerivedVariable.get_tabular_data, should match the signature"""
746
+ model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
567
747
 
568
748
 
569
749
  class LatestValueRegistryEntry(CachedRegistryEntry):