dara-core 1.20.1a1__py3-none-any.whl → 1.20.1a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. dara/core/__init__.py +3 -0
  2. dara/core/actions.py +1 -2
  3. dara/core/auth/basic.py +22 -16
  4. dara/core/auth/definitions.py +2 -2
  5. dara/core/auth/routes.py +5 -5
  6. dara/core/auth/utils.py +5 -5
  7. dara/core/base_definitions.py +22 -64
  8. dara/core/cli.py +8 -7
  9. dara/core/configuration.py +5 -2
  10. dara/core/css.py +1 -2
  11. dara/core/data_utils.py +18 -19
  12. dara/core/defaults.py +6 -7
  13. dara/core/definitions.py +50 -19
  14. dara/core/http.py +7 -3
  15. dara/core/interactivity/__init__.py +6 -0
  16. dara/core/interactivity/actions.py +52 -50
  17. dara/core/interactivity/any_data_variable.py +7 -134
  18. dara/core/interactivity/any_variable.py +5 -8
  19. dara/core/interactivity/client_variable.py +71 -0
  20. dara/core/interactivity/data_variable.py +8 -266
  21. dara/core/interactivity/derived_data_variable.py +7 -290
  22. dara/core/interactivity/derived_variable.py +416 -176
  23. dara/core/interactivity/filtering.py +46 -27
  24. dara/core/interactivity/loop_variable.py +2 -2
  25. dara/core/interactivity/non_data_variable.py +5 -68
  26. dara/core/interactivity/plain_variable.py +89 -15
  27. dara/core/interactivity/server_variable.py +325 -0
  28. dara/core/interactivity/state_variable.py +69 -0
  29. dara/core/interactivity/switch_variable.py +19 -19
  30. dara/core/interactivity/tabular_variable.py +94 -0
  31. dara/core/interactivity/url_variable.py +10 -90
  32. dara/core/internal/cache_store/base_impl.py +2 -1
  33. dara/core/internal/cache_store/cache_store.py +22 -25
  34. dara/core/internal/cache_store/keep_all.py +4 -1
  35. dara/core/internal/cache_store/lru.py +5 -1
  36. dara/core/internal/cache_store/ttl.py +4 -1
  37. dara/core/internal/cgroup.py +1 -1
  38. dara/core/internal/dependency_resolution.py +60 -66
  39. dara/core/internal/devtools.py +12 -5
  40. dara/core/internal/download.py +13 -4
  41. dara/core/internal/encoder_registry.py +7 -7
  42. dara/core/internal/execute_action.py +13 -13
  43. dara/core/internal/hashing.py +1 -3
  44. dara/core/internal/import_discovery.py +3 -4
  45. dara/core/internal/multi_resource_lock.py +70 -0
  46. dara/core/internal/normalization.py +9 -18
  47. dara/core/internal/pandas_utils.py +107 -5
  48. dara/core/internal/pool/definitions.py +1 -1
  49. dara/core/internal/pool/task_pool.py +25 -16
  50. dara/core/internal/pool/utils.py +21 -18
  51. dara/core/internal/pool/worker.py +3 -2
  52. dara/core/internal/port_utils.py +1 -1
  53. dara/core/internal/registries.py +12 -6
  54. dara/core/internal/registry.py +4 -2
  55. dara/core/internal/registry_lookup.py +11 -5
  56. dara/core/internal/routing.py +109 -145
  57. dara/core/internal/scheduler.py +13 -8
  58. dara/core/internal/settings.py +2 -2
  59. dara/core/internal/store.py +2 -29
  60. dara/core/internal/tasks.py +379 -195
  61. dara/core/internal/utils.py +36 -13
  62. dara/core/internal/websocket.py +21 -20
  63. dara/core/js_tooling/js_utils.py +28 -26
  64. dara/core/js_tooling/templates/vite.config.template.ts +12 -3
  65. dara/core/logging.py +13 -12
  66. dara/core/main.py +14 -11
  67. dara/core/metrics/cache.py +1 -1
  68. dara/core/metrics/utils.py +3 -3
  69. dara/core/persistence.py +27 -5
  70. dara/core/umd/dara.core.umd.js +68291 -64718
  71. dara/core/visual/components/__init__.py +2 -2
  72. dara/core/visual/components/fallback.py +30 -4
  73. dara/core/visual/components/for_cmp.py +4 -1
  74. dara/core/visual/css/__init__.py +30 -31
  75. dara/core/visual/dynamic_component.py +31 -28
  76. dara/core/visual/progress_updater.py +4 -3
  77. {dara_core-1.20.1a1.dist-info → dara_core-1.20.1a3.dist-info}/METADATA +12 -11
  78. dara_core-1.20.1a3.dist-info/RECORD +119 -0
  79. dara_core-1.20.1a1.dist-info/RECORD +0 -114
  80. {dara_core-1.20.1a1.dist-info → dara_core-1.20.1a3.dist-info}/LICENSE +0 -0
  81. {dara_core-1.20.1a1.dist-info → dara_core-1.20.1a3.dist-info}/WHEEL +0 -0
  82. {dara_core-1.20.1a1.dist-info → dara_core-1.20.1a3.dist-info}/entry_points.txt +0 -0
@@ -19,19 +19,24 @@ from __future__ import annotations
19
19
 
20
20
  import json
21
21
  import uuid
22
+ from collections.abc import Awaitable
22
23
  from inspect import Parameter, signature
23
24
  from typing import (
24
25
  Any,
25
- Awaitable,
26
26
  Callable,
27
27
  Generic,
28
28
  List,
29
29
  Optional,
30
+ Protocol,
31
+ Tuple,
30
32
  TypeVar,
31
33
  Union,
32
34
  cast,
33
35
  )
34
36
 
37
+ import anyio
38
+ from cachetools import LRUCache
39
+ from pandas import DataFrame
35
40
  from pydantic import (
36
41
  ConfigDict,
37
42
  Field,
@@ -40,7 +45,7 @@ from pydantic import (
40
45
  field_validator,
41
46
  model_serializer,
42
47
  )
43
- from typing_extensions import TypedDict
48
+ from typing_extensions import TypedDict, runtime_checkable
44
49
 
45
50
  from dara.core.base_definitions import (
46
51
  BaseCachePolicy,
@@ -48,14 +53,17 @@ from dara.core.base_definitions import (
48
53
  Cache,
49
54
  CacheArgType,
50
55
  CachedRegistryEntry,
56
+ NonTabularDataError,
51
57
  PendingTask,
52
- PendingValue,
53
58
  )
54
59
  from dara.core.interactivity.actions import TriggerVariable, assert_no_context
55
60
  from dara.core.interactivity.any_variable import AnyVariable
56
- from dara.core.interactivity.non_data_variable import NonDataVariable
61
+ from dara.core.interactivity.client_variable import ClientVariable
62
+ from dara.core.interactivity.filtering import FilterQuery, Pagination, apply_filters
57
63
  from dara.core.internal.cache_store import CacheStore
58
64
  from dara.core.internal.encoder_registry import deserialize
65
+ from dara.core.internal.multi_resource_lock import MultiResourceLock
66
+ from dara.core.internal.pandas_utils import DataResponse, append_index, build_data_response
59
67
  from dara.core.internal.tasks import MetaTask, Task, TaskManager
60
68
  from dara.core.internal.utils import get_cache_scope, run_user_handler
61
69
  from dara.core.logging import dev_logger, eng_logger
@@ -63,13 +71,44 @@ from dara.core.metrics import RUNTIME_METRICS_TRACKER
63
71
 
64
72
  VariableType = TypeVar('VariableType')
65
73
 
74
+ # Static lock for all DV computations, keyed by cache_key
75
+ # Explicitly not re-entrant, this prevents variable loops
76
+ DV_LOCK = MultiResourceLock()
77
+
78
+ # Global set to track force keys that have been encountered
79
+ # LRU with 2048 entries should be sufficient to not drop in-progress force keys
80
+ # but also not have to worry about memory leaks
81
+ _force_keys_seen: LRUCache[str, bool] = LRUCache(maxsize=2048)
82
+
83
+ VALUE_MISSING = object()
84
+ """
85
+ Sentinel value to indicate that a value is missing from the cache
86
+ """
87
+
66
88
 
67
89
  class DerivedVariableResult(TypedDict):
68
90
  cache_key: str
69
91
  value: Union[Any, BaseTask]
70
92
 
71
93
 
72
- class DerivedVariable(NonDataVariable, Generic[VariableType]):
94
+ @runtime_checkable
95
+ class FilterResolver(Protocol):
96
+ async def __call__(
97
+ self, data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
98
+ ) -> Tuple[DataFrame, int]: ...
99
+
100
+
101
+ async def default_filter_resolver(
102
+ data: Any, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None
103
+ ) -> Tuple[DataFrame, int]:
104
+ if not isinstance(data, DataFrame):
105
+ raise NonTabularDataError(
106
+ f'Default filter resolver expects a DataFrame to be returned from the DerivedVariable function, got {type(data)}'
107
+ )
108
+ return apply_filters(data, filters, pagination)
109
+
110
+
111
+ class DerivedVariable(ClientVariable, Generic[VariableType]):
73
112
  """
74
113
  A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
75
114
  variables with a python function. This is one of two primary ways that python logic can be embedded into the
@@ -78,6 +117,61 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
78
117
  DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
79
118
  structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
80
119
  recalculated when necessary.
120
+
121
+ As a special case, DerivedVariables can be used for tabular data and retrieving its slice as a DataFrame. This functionality
122
+ is utilized by e.g. the built-in Table component. By default, when passing a DerivedVariable to a Table component, Dara
123
+ expects the resolver function to return a DataFrame or None. This behaviour can be customized by providing a custom `filter_resolver`.
124
+ This function will be invoked with the result of the main DerivedVariable function, as well as filters and pagination. It can be used
125
+ to e.g. retrieve a slice of data from an API endpoint or a database instead of retrieving the entire dataset and filtering it in-memory.
126
+
127
+ ```python
128
+ from typing import Optional
129
+ import httpx
130
+ import pandas as pd
131
+ from dara.core import DerivedVariable, Variable
132
+ from dara.core.interactivity.filtering import FilterQuery, Pagination
133
+
134
+ # Custom filter resolver for API-based filtering
135
+ async def api_filter_resolver(data, filters: Optional[FilterQuery] = None, pagination: Optional[Pagination] = None):
136
+ async with httpx.AsyncClient() as client:
137
+ # in this case data is a string url
138
+ response = await client.get(data, params={
139
+ # translates filters/pagination to API-specific query params
140
+ 'filters': filters.dict() if filters else {},
141
+ 'offset': pagination.offset if pagination else 0,
142
+ 'limit': pagination.limit if pagination else 50
143
+ })
144
+ data = response.json()
145
+ # conform to the filter resolver API, return a tuple of (DataFrame, total_count)
146
+ return pd.DataFrame(data['results']), data['total_count']
147
+
148
+ # DerivedVariable with custom filtering
149
+ user_params = Variable({'dataset': 'experiments'})
150
+ derived_data = DerivedVariable(
151
+ lambda params: f"https://api.example.com/data/{params['dataset']}",
152
+ variables=[user_params],
153
+ filter_resolver=api_filter_resolver
154
+ )
155
+ ```
156
+
157
+ :param func: the function to derive a new value from the input variables.
158
+ :param variables: a set of input variables that will be passed to the deriving function
159
+ :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
160
+ session, per user or to not cache at all
161
+ :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
162
+ tasks, defaults to False
163
+ :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
164
+ component to poll the backend and refresh itself every n seconds.
165
+ :param filter_resolver: an optional function to resolve the filter query for the derived variable. This can be
166
+ used to customize the way tabular data is resolved. This is invoked with the result of the main DerivedVariable function,
167
+ as well as filters and pagination. The function should return a DataFrame and total count.
168
+ :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
169
+ recalculation of the derived variable
170
+ - `deps = None` - `func` is ran everytime (default behaviour),
171
+ - `deps = []` - `func` is ran once on initial startup,
172
+ - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
173
+ - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
174
+ :param uid: the unique identifier for this variable; if not provided a random one is generated
81
175
  """
82
176
 
83
177
  cache: Optional[BaseCachePolicy]
@@ -86,11 +180,11 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
86
180
  deps: Optional[List[AnyVariable]] = Field(validate_default=True)
87
181
  nested: List[str] = Field(default_factory=list)
88
182
  uid: str
89
- model_config = ConfigDict(extra='forbid', use_enum_values=True)
183
+ model_config = ConfigDict(extra='forbid', use_enum_values=True, arbitrary_types_allowed=True)
90
184
 
91
185
  def __init__(
92
186
  self,
93
- func: Callable[..., VariableType] | Callable[..., Awaitable[VariableType]],
187
+ func: Union[Callable[..., VariableType], Callable[..., Awaitable[VariableType]]],
94
188
  variables: List[AnyVariable],
95
189
  cache: Optional[CacheArgType] = Cache.Type.GLOBAL,
96
190
  run_as_task: bool = False,
@@ -98,36 +192,25 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
98
192
  deps: Optional[List[AnyVariable]] = None,
99
193
  uid: Optional[str] = None,
100
194
  nested: Optional[List[str]] = None,
101
- _get_value: Optional[Callable[..., Awaitable[Any]]] = None,
195
+ filter_resolver: Optional[FilterResolver] = None,
196
+ **kwargs,
102
197
  ):
103
- """
104
- A DerivedVariable allows a value to be derived (via a function) from the current value of a set of other
105
- variables with a python function. This is one of two primary ways that python logic can be embedded into the
106
- application (the other being the @py_component decorator).
107
-
108
- DerivedVariables can be chained together to form complex data flows whilst keeping everything organized and
109
- structured in an easy to follow way. DerivedVariable results are cached automatically and will only be
110
- recalculated when necessary.
111
-
112
- :param func: the function to derive a new value from the input variables.
113
- :param variables: a set of input variables that will be passed to the deriving function
114
- :param cache: whether to cache the result, defaults to global caching. Other options are to cache per user
115
- session, per user or to not cache at all
116
- :param run_as_task: whether to run the calculation in a separate process, recommended for any CPU intensive
117
- tasks, defaults to False
118
- :param polling_interval: an optional polling interval for the DerivedVariable. Setting this will cause the
119
- component to poll the backend and refresh itself every n seconds.
120
- :param deps: an optional array of variables, specifying which dependant variables changing should trigger a
121
- recalculation of the derived variable
122
- - `deps = None` - `func` is ran everytime (default behaviour),
123
- - `deps = []` - `func` is ran once on initial startup,
124
- - `deps = [var1, var2]` - `func` is ran whenever one of these vars changes
125
- - `deps = [var1.get('nested_property')]` - `func` is ran only when the nested property changes, other changes to the variable are ignored
126
- :param uid: the unique identifier for this variable; if not provided a random one is generated
127
- """
128
198
  if nested is None:
129
199
  nested = []
130
200
 
201
+ # Validate that StateVariables are not used as inputs
202
+ from dara.core.interactivity.state_variable import StateVariable
203
+
204
+ for var in variables:
205
+ if isinstance(var, StateVariable):
206
+ raise ValueError(
207
+ 'StateVariable cannot be used as input to DerivedVariable. '
208
+ 'StateVariables are internal variables for tracking DerivedVariable states '
209
+ 'and using them as inputs would create complex dependencies that are '
210
+ 'difficult to debug. Consider using the parent DerivedVariable directly instead,'
211
+ ' or use the StateVariable with an If component or SwitchVariable.'
212
+ )
213
+
131
214
  if cache is not None:
132
215
  cache = Cache.Policy.from_arg(cache)
133
216
 
@@ -146,7 +229,13 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
146
229
  raise RuntimeError('run_as_task is not supported within a Jupyter environment')
147
230
 
148
231
  super().__init__(
149
- cache=cache, uid=uid, variables=variables, polling_interval=polling_interval, deps=deps, nested=nested
232
+ cache=cache,
233
+ uid=uid,
234
+ variables=variables,
235
+ polling_interval=polling_interval,
236
+ deps=deps,
237
+ nested=nested,
238
+ **kwargs,
150
239
  )
151
240
 
152
241
  # Import the registry of variables and register the function at import
@@ -165,12 +254,14 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
165
254
  DerivedVariableRegistryEntry(
166
255
  cache=cache,
167
256
  func=func,
257
+ filter_resolver=filter_resolver,
168
258
  polling_interval=polling_interval,
169
259
  run_as_task=run_as_task,
170
260
  uid=str(self.uid),
171
261
  variables=variables,
172
262
  deps=deps_indexes,
173
- get_value=_get_value or DerivedVariable.get_value,
263
+ get_value=DerivedVariable.get_value,
264
+ get_tabular_data=DerivedVariable.get_tabular_data,
174
265
  ),
175
266
  )
176
267
 
@@ -198,6 +289,39 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
198
289
  assert_no_context('ctx.trigger')
199
290
  return TriggerVariable(variable=self, force=force)
200
291
 
292
+ @property
293
+ def is_loading(self):
294
+ """
295
+ Get a StateVariable that tracks the loading state of this DerivedVariable.
296
+
297
+ :return: StateVariable that is True when this DerivedVariable is loading, False otherwise
298
+ """
299
+ from dara.core.interactivity.state_variable import StateVariable
300
+
301
+ return StateVariable(parent_variable=self, property_name='loading')
302
+
303
+ @property
304
+ def has_error(self):
305
+ """
306
+ Get a StateVariable that tracks the error state of this DerivedVariable.
307
+
308
+ :return: StateVariable that is True when this DerivedVariable has an error, False otherwise
309
+ """
310
+ from dara.core.interactivity.state_variable import StateVariable
311
+
312
+ return StateVariable(parent_variable=self, property_name='error')
313
+
314
+ @property
315
+ def has_value(self):
316
+ """
317
+ Get a StateVariable that tracks whether this DerivedVariable has a resolved value.
318
+
319
+ :return: StateVariable that is True when this DerivedVariable has a value, False otherwise
320
+ """
321
+ from dara.core.interactivity.state_variable import StateVariable
322
+
323
+ return StateVariable(parent_variable=self, property_name='hasValue')
324
+
201
325
  @staticmethod
202
326
  def _get_cache_key(*args, uid: str, deps: Optional[List[int]] = None):
203
327
  """
@@ -208,15 +332,17 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
208
332
  :param uid: uid of a DerivedVariable
209
333
  :param deps: list of indexes of dependencies
210
334
  """
335
+ from dara.core.internal.dependency_resolution import clean_force_key
336
+
211
337
  key = f'{uid}'
212
338
 
213
339
  filtered_args = [arg for idx, arg in enumerate(args) if idx in deps] if deps is not None else args
214
340
 
215
- for arg in filtered_args:
216
- if isinstance(arg, dict):
217
- key = f'{key}:{json.dumps(arg, sort_keys=True, default=str)}'
218
- else:
219
- key = f'{key}:{arg}'
341
+ for raw_arg in filtered_args:
342
+ # remove force keys from the arg to not cause extra cache misses
343
+ arg = clean_force_key(raw_arg)
344
+
345
+ key = f'{key}:{json.dumps(arg, sort_keys=True, default=str)}' if isinstance(arg, dict) else f'{key}:{arg}'
220
346
  return key
221
347
 
222
348
  @staticmethod
@@ -269,7 +395,8 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
269
395
  if not latest_value_registry.has(var_entry.uid):
270
396
  # Keep latest entry per scope (user,session); if cache_type is None, use GLOBAL
271
397
  reg_entry = LatestValueRegistryEntry(
272
- uid=var_entry.uid, cache=Cache.Policy.MostRecent(cache_type=cache_type or Cache.Type.GLOBAL)
398
+ uid=var_entry.uid,
399
+ cache=Cache.Policy.MostRecent(cache_type=cache_type or Cache.Type.GLOBAL),
273
400
  )
274
401
  latest_value_registry.register(var_entry.uid, reg_entry)
275
402
  else:
@@ -285,7 +412,8 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
285
412
  store: CacheStore,
286
413
  task_mgr: TaskManager,
287
414
  args: List[Any],
288
- force: bool = False,
415
+ force_key: Optional[str] = None,
416
+ _pin_result: bool = False,
289
417
  ) -> DerivedVariableResult:
290
418
  """
291
419
  Get the value of this DerivedVariable. This method will check the main app store for an appropriate response
@@ -296,11 +424,19 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
296
424
  :param store: the store instance to check for cached values
297
425
  :param task_mgr: task manager instance
298
426
  :param args: the arguments to call the underlying function with
299
- :param force: whether to ignore cache
427
+ :param force_key: unique key for forced execution, if provided forces cache bypass
428
+ :param _pin_result: whether to pin the result in the store, used internally by derived data variables
300
429
  """
430
+ # dynamic import due to circular import
431
+ from dara.core.internal.dependency_resolution import (
432
+ is_forced,
433
+ resolve_dependency,
434
+ )
435
+
301
436
  assert var_entry.func is not None, 'DerivedVariable function is not defined'
302
437
 
303
- histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
438
+ # Shortened UID used for logging
439
+ _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
304
440
 
305
441
  if var_entry.run_as_task:
306
442
  from dara.core.internal.registries import utils_registry
@@ -310,175 +446,272 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
310
446
  'Task module is not configured. Set config.task_module path to a tasks.py module to run a derived variable as task.'
311
447
  )
312
448
 
313
- with histogram.time():
314
- # Shortened UID used for logging
315
- _uid_short = f'{var_entry.uid[:3]}..{var_entry.uid[-3:]}'
316
-
317
- # Extract and process nested derived variables
318
- values = []
449
+ # Compute cache key first, before any other work
450
+ cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
319
451
 
320
- # dynamic import due to circular import
321
- from dara.core.internal.dependency_resolution import (
322
- is_resolved_derived_variable,
323
- resolve_dependency,
324
- )
325
-
326
- eng_logger.info(f'Derived Variable {_uid_short} get_value', {'uid': var_entry.uid, 'args': args})
327
-
328
- for val in args:
329
- # Don't force nested DVs
330
- if is_resolved_derived_variable(val):
331
- val['force'] = False
452
+ # Lock on this specific cache key for the entire computation
453
+ async with DV_LOCK.acquire(cache_key):
454
+ histogram = RUNTIME_METRICS_TRACKER.get_dv_histogram(var_entry.uid)
332
455
 
333
- var_value = await resolve_dependency(val, store, task_mgr)
334
- values.append(var_value)
335
-
336
- eng_logger.debug(
337
- f'DerivedVariable {_uid_short}', 'resolved arguments', {'values': values, 'uid': var_entry.uid}
338
- )
456
+ with histogram.time():
457
+ # Extract and process nested derived variables
458
+ values: List[Any] = [None] * len(args)
339
459
 
340
- # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
341
- # to an instance of the BaseModel class.
342
- parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
343
-
344
- dev_logger.debug(f'DerivedVariable {_uid_short}', 'executing', {'args': parsed_args, 'uid': var_entry.uid})
345
-
346
- # Check if there are any Tasks to be run in the args
347
- has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
460
+ eng_logger.info(
461
+ f'Derived Variable {_uid_short} get_value',
462
+ {'uid': var_entry.uid, 'args': args},
463
+ )
348
464
 
349
- cache_key = DerivedVariable._get_cache_key(*args, uid=var_entry.uid, deps=var_entry.deps)
350
- await DerivedVariable.add_latest_value(store, var_entry, cache_key)
465
+ # Whether one of the (grand?)children have been forced - is so, the parent should skip the cache as well
466
+ has_forced_child = False
351
467
 
352
- cache_type = var_entry.cache
468
+ async def _resolve_arg(val: Any, index: int):
469
+ nonlocal has_forced_child
353
470
 
354
- # If deps is not None, force session use
355
- # Note: this is temporarily commented out as no tests were broken by removing it;
356
- # once we find what scenario this fixes, we should add a test to cover that scenario and move this snippet
357
- # to constructors of DerivedVariable and DerivedDataVariable
358
- # if cache_type == CacheType.GLOBAL and (var_entry.deps is not None and len(var_entry.deps) > 0):
359
- # cache_type = CacheType.SESSION
471
+ if is_forced(val):
472
+ has_forced_child = True
473
+ var_value = await resolve_dependency(val, store, task_mgr)
474
+ values[index] = var_value
360
475
 
361
- eng_logger.debug(
362
- f'DerivedVariable {_uid_short}',
363
- f'using cache: {cache_type}',
364
- {'uid': var_entry.uid},
365
- )
476
+ async with anyio.create_task_group() as tg:
477
+ for idx, val in enumerate(args):
478
+ tg.start_soon(_resolve_arg, val, idx)
366
479
 
367
- ignore_cache = (
368
- var_entry.cache is None
369
- or var_entry.polling_interval
370
- or DerivedVariable.check_polling(var_entry.variables)
371
- )
372
- value = await store.get(var_entry, key=cache_key) if not ignore_cache else None
480
+ eng_logger.debug(
481
+ f'DerivedVariable {_uid_short}',
482
+ 'resolved arguments',
483
+ {'values': values, 'uid': var_entry.uid},
484
+ )
373
485
 
374
- eng_logger.debug(
375
- f'DerivedVariable {_uid_short}',
376
- 'retrieved value from cache',
377
- {'uid': var_entry.uid, 'cached_value': value},
378
- )
486
+ # Loop over the passed arguments and if the expected type is a BaseModel and arg is a dict then convert the dict
487
+ # to an instance of the BaseModel class.
488
+ parsed_args = DerivedVariable._restore_pydantic_models(var_entry.func, *values)
379
489
 
380
- # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
381
- if isinstance(value, PendingTask):
382
- eng_logger.info(
383
- f'DerivedVariable {_uid_short} waiting for pending task',
384
- {'uid': var_entry.uid, 'pending_task': value.task_id},
490
+ dev_logger.debug(
491
+ f'DerivedVariable {_uid_short}',
492
+ 'executing',
493
+ {'args': parsed_args, 'uid': var_entry.uid},
385
494
  )
386
- value.add_subscriber()
387
- return {'cache_key': cache_key, 'value': value}
388
495
 
389
- # If it's a PendingValue then wait for the value and return it
390
- if isinstance(value, PendingValue):
391
- eng_logger.info(
392
- f'DerivedVariable {_uid_short} waiting for pending value',
393
- {'uid': var_entry.uid, 'pending_value': value},
394
- )
395
- return {'cache_key': cache_key, 'value': await store.get_or_wait(var_entry, key=cache_key)}
496
+ # Check if there are any Tasks to be run in the args
497
+ has_tasks = any(isinstance(arg, BaseTask) for arg in parsed_args)
498
+
499
+ await DerivedVariable.add_latest_value(store, var_entry, cache_key)
500
+
501
+ cache_type = var_entry.cache
502
+
503
+ # Handle force key tracking to prevent double execution
504
+ effective_force = force_key is not None
505
+ if force_key is not None:
506
+ if force_key in _force_keys_seen:
507
+ # This force key has been seen before, don't force again
508
+ effective_force = False
509
+ eng_logger.debug(
510
+ f'DerivedVariable {_uid_short} force key already seen, using cached value',
511
+ extra={'uid': var_entry.uid, 'force_key': force_key},
512
+ )
513
+ else:
514
+ # First time seeing this force key, add it to the set
515
+ _force_keys_seen[force_key] = True
516
+ eng_logger.debug(
517
+ f'DerivedVariable {_uid_short} new force key, will force recalculation',
518
+ extra={'uid': var_entry.uid, 'force_key': force_key},
519
+ )
396
520
 
397
- # If there is a value that is not pending then we have the result so return it
398
- # If force is True, don't return even if value is found and recalculate
399
- if not force and value is not None:
400
- eng_logger.info(
401
- f'DerivedVariable {_uid_short} returning cached value directly',
402
- {'uid': var_entry.uid, 'cached_value': value},
521
+ eng_logger.debug(
522
+ f'DerivedVariable {_uid_short}',
523
+ f'using cache: {cache_type}',
524
+ {'uid': var_entry.uid},
403
525
  )
404
- return {'cache_key': cache_key, 'value': value}
405
526
 
406
- # Setup pending task if it needs it and then return the task
407
- if var_entry.run_as_task or has_tasks:
408
- var_uid = var_entry.uid or str(uuid.uuid4())
527
+ # Start with a sentinel value to indicate that the value is missing
528
+ # from cache, this lets us distinguish between a cache miss and a
529
+ # value that is None
530
+ value = VALUE_MISSING
531
+
532
+ ignore_cache = (
533
+ var_entry.cache is None
534
+ or var_entry.polling_interval
535
+ or DerivedVariable.check_polling(var_entry.variables)
536
+ or effective_force
537
+ or has_forced_child
538
+ )
539
+ if not ignore_cache:
540
+ try:
541
+ value = await store.get(var_entry, key=cache_key, raise_for_missing=True)
542
+ eng_logger.debug(
543
+ f'DerivedVariable {_uid_short}',
544
+ 'retrieved value from cache',
545
+ {'uid': var_entry.uid, 'cached_value': value},
546
+ )
547
+ except KeyError:
548
+ eng_logger.debug(
549
+ f'DerivedVariable {_uid_short}',
550
+ 'no value found in cache',
551
+ {'uid': var_entry.uid},
552
+ )
553
+ # key error means no entry found;
554
+ # this lets us distinguish from a None value stored and not found
555
+
556
+ # If it's a PendingTask then return that task so it can be awaited later by a MetaTask
557
+ if isinstance(value, PendingTask):
558
+ eng_logger.info(
559
+ f'DerivedVariable {_uid_short} waiting for pending task',
560
+ {'uid': var_entry.uid, 'pending_task': value.task_id},
561
+ )
562
+ return {'cache_key': cache_key, 'value': value}
409
563
 
410
- if has_tasks:
411
- task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
564
+ # We retrieved an actual value from the cache, return it
565
+ if not ignore_cache and value is not VALUE_MISSING:
566
+ eng_logger.info(
567
+ f'DerivedVariable {_uid_short} returning cached value directly',
568
+ {'uid': var_entry.uid, 'cached_value': value},
569
+ )
570
+ return {'cache_key': cache_key, 'value': value}
571
+
572
+ # Setup pending task if it needs it and then return the task
573
+ if var_entry.run_as_task or has_tasks:
574
+ var_uid = var_entry.uid or str(uuid.uuid4())
575
+
576
+ if has_tasks:
577
+ task_id = f'{var_uid}_MetaTask_{str(uuid.uuid4())}'
578
+
579
+ extra_notify_channels = [
580
+ channel
581
+ for arg in parsed_args
582
+ if isinstance(arg, BaseTask)
583
+ for channel in arg.notify_channels
584
+ ]
585
+ eng_logger.debug(
586
+ f'DerivedVariable {_uid_short}',
587
+ 'running has tasks',
588
+ {'uid': var_entry.uid, 'task_id': task_id},
589
+ )
590
+ meta_task = MetaTask(
591
+ var_entry.func,
592
+ parsed_args,
593
+ notify_channels=list(set(extra_notify_channels)),
594
+ process_as_task=var_entry.run_as_task,
595
+ cache_key=cache_key,
596
+ task_id=task_id,
597
+ reg_entry=var_entry, # task results are set as the DV result
598
+ )
599
+
600
+ # Immediately store the pending task in the store
601
+ pending_task = task_mgr.register_task(meta_task)
602
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
603
+
604
+ return {'cache_key': cache_key, 'value': meta_task}
605
+
606
+ task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
412
607
 
413
- extra_notify_channels = [
414
- channel for arg in parsed_args if isinstance(arg, BaseTask) for channel in arg.notify_channels
415
- ]
416
608
  eng_logger.debug(
417
609
  f'DerivedVariable {_uid_short}',
418
- 'running has tasks',
610
+ 'running as a task',
419
611
  {'uid': var_entry.uid, 'task_id': task_id},
420
612
  )
421
- meta_task = MetaTask(
613
+ task = Task(
422
614
  var_entry.func,
423
615
  parsed_args,
424
- notify_channels=list(set(extra_notify_channels)),
425
- process_as_task=var_entry.run_as_task,
426
616
  cache_key=cache_key,
427
617
  task_id=task_id,
428
618
  reg_entry=var_entry, # task results are set as the DV result
429
619
  )
430
620
 
431
- return {'cache_key': cache_key, 'value': meta_task}
432
-
433
- task_id = f'{var_uid}_Task_{str(uuid.uuid4())}'
621
+ # Immediately store the pending task in the store
622
+ pending_task = task_mgr.register_task(task)
623
+ await store.set(var_entry, key=cache_key, value=pending_task, pin=_pin_result)
624
+
625
+ return {'cache_key': cache_key, 'value': task}
626
+
627
+ try:
628
+ result = await run_user_handler(var_entry.func, args=parsed_args)
629
+ except Exception:
630
+ # Delete the store value so subsequent requests recalculate instaed
631
+ if var_entry.cache is not None:
632
+ await store.delete(var_entry, key=cache_key)
633
+ raise
634
+
635
+ # If a task is returned then ensure we register it
636
+ if isinstance(result, BaseTask):
637
+ eng_logger.info(
638
+ f'DerivedVariable {_uid_short} returning task as a result',
639
+ {'uid': var_entry.uid, 'task_id': result.task_id},
640
+ )
641
+ # Make sure cache settings are set on the task
642
+ result.cache_key = cache_key
643
+ result.reg_entry = var_entry
434
644
 
435
- eng_logger.debug(
436
- f'DerivedVariable {_uid_short}',
437
- 'running as a task',
438
- {'uid': var_entry.uid, 'task_id': task_id},
439
- )
440
- task = Task(
441
- var_entry.func,
442
- parsed_args,
443
- cache_key=cache_key,
444
- task_id=task_id,
445
- reg_entry=var_entry, # task results are set as the DV result
446
- )
447
- return {'cache_key': cache_key, 'value': task}
645
+ task_mgr.register_task(result)
448
646
 
449
- # only set pending value if cache is not None, otherwise subsequent requests calculate the value again
450
- if var_entry.cache is not None:
451
- await store.set_pending(var_entry, key=cache_key)
647
+ return {'cache_key': cache_key, 'value': result}
452
648
 
453
- try:
454
- result = await run_user_handler(var_entry.func, args=parsed_args)
455
- except Exception as e:
456
- # Set the store value to None before raising, so subsequent requests don't hang on a PendingValue
649
+ # only set the value if cache is not None, otherwise subsequent requests calculate the value again
457
650
  if var_entry.cache is not None:
458
- await store.set(var_entry, key=cache_key, value=None, error=e)
459
- raise
651
+ await store.set(var_entry, key=cache_key, value=result, pin=_pin_result)
460
652
 
461
- # If a task is returned then update pending value to pending task and return it
462
- if isinstance(result, BaseTask):
463
653
  eng_logger.info(
464
- f'DerivedVariable {_uid_short} returning task as a result',
465
- {'uid': var_entry.uid, 'task_id': result.task_id},
654
+ f'DerivedVariable {_uid_short} returning result',
655
+ {'uid': var_entry.uid, 'result': result},
466
656
  )
467
- # Make sure cache settings are set on the task
468
- result.cache_key = cache_key
469
- result.reg_entry = var_entry
470
-
471
657
  return {'cache_key': cache_key, 'value': result}
472
658
 
473
- # only set the value if cache is not None, otherwise subsequent requests calculate the value again
474
- if var_entry.cache is not None:
475
- await store.set(var_entry, key=cache_key, value=result)
659
+ @classmethod
660
+ async def _filter_data(
661
+ cls,
662
+ data: Union[DataFrame, Any, None],
663
+ filter_resolver: FilterResolver,
664
+ filters: Optional[FilterQuery] = None,
665
+ pagination: Optional[Pagination] = None,
666
+ ) -> DataResponse:
667
+ if data is None:
668
+ return DataResponse(data=None, count=0, schema=None)
669
+
670
+ # silently add the index column for DataFrame values
671
+ # User resolver could technically not be returning a DataFrame
672
+ if isinstance(data, DataFrame):
673
+ data = append_index(data)
674
+
675
+ # Filtering part
676
+ data, count = await filter_resolver(data, filters, pagination)
677
+ return build_data_response(data, count)
678
+
679
+ @classmethod
680
+ async def get_tabular_data(
681
+ cls,
682
+ var_entry: DerivedVariableRegistryEntry,
683
+ store: CacheStore,
684
+ task_mgr: TaskManager,
685
+ args: List[Any],
686
+ force_key: Optional[str] = None,
687
+ pagination: Optional[Pagination] = None,
688
+ filters: Optional[FilterQuery] = None,
689
+ ) -> Union[MetaTask, DataResponse]:
690
+ """
691
+ Get filtered tabular data from the underlying derived variable.
476
692
 
477
- eng_logger.info(
478
- f'DerivedVariable {_uid_short} returning result',
479
- {'uid': var_entry.uid, 'result': result},
693
+ Resolves the the DeriedVariable and runs filtering on the result,
694
+ either using a custom filter_resolver or the default logic.
695
+ """
696
+ filter_resolver = var_entry.filter_resolver or default_filter_resolver
697
+ result = await cls.get_value(var_entry, store, task_mgr, args, force_key)
698
+
699
+ if isinstance(result['value'], BaseTask):
700
+ task_id = f'{var_entry.uid}_Filter_MetaTask_{str(uuid.uuid4())}'
701
+ task = MetaTask(
702
+ cls._filter_data,
703
+ task_id=task_id,
704
+ kwargs={
705
+ 'data': result['value'],
706
+ 'filters': filters,
707
+ 'pagination': pagination,
708
+ 'filter_resolver': filter_resolver,
709
+ },
480
710
  )
481
- return {'cache_key': cache_key, 'value': result}
711
+ task_mgr.register_task(task)
712
+ return task
713
+
714
+ return await cls._filter_data(result['value'], filter_resolver, filters, pagination)
482
715
 
483
716
  @classmethod
484
717
  def check_polling(cls, variables: List[AnyVariable]):
@@ -492,18 +725,25 @@ class DerivedVariable(NonDataVariable, Generic[VariableType]):
492
725
  @model_serializer(mode='wrap')
493
726
  def ser_model(self, nxt: SerializerFunctionWrapHandler) -> dict:
494
727
  parent_dict = nxt(self)
495
- return {**parent_dict, '__typename': 'DerivedVariable', 'uid': str(parent_dict['uid'])}
728
+ return {
729
+ **parent_dict,
730
+ '__typename': 'DerivedVariable',
731
+ 'uid': str(parent_dict['uid']),
732
+ }
496
733
 
497
734
 
498
735
  class DerivedVariableRegistryEntry(CachedRegistryEntry):
499
736
  deps: Optional[List[int]]
500
737
  func: Optional[Callable[..., Any]]
738
+ filter_resolver: Optional[FilterResolver]
501
739
  run_as_task: bool
502
740
  variables: List[AnyVariable]
503
741
  polling_interval: Optional[int]
504
742
  get_value: Callable[..., Awaitable[Any]]
505
743
  """Handler to get the value of the derived variable. Defaults to DerivedVariable.get_value, should match the signature"""
506
- model_config = ConfigDict(extra='forbid')
744
+ get_tabular_data: Callable[..., Awaitable[Union[DataResponse, MetaTask]]]
745
+ """Handler to get the tabular data of the derived variable. Defaults to DerivedVariable.get_tabular_data, should match the signature"""
746
+ model_config = ConfigDict(extra='forbid', arbitrary_types_allowed=True)
507
747
 
508
748
 
509
749
  class LatestValueRegistryEntry(CachedRegistryEntry):