kumoai 2.12.0.dev202510231830__cp311-cp311-win_amd64.whl → 2.14.0.dev202512311733__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +41 -35
- kumoai/_version.py +1 -1
- kumoai/client/client.py +15 -13
- kumoai/client/endpoints.py +1 -0
- kumoai/client/jobs.py +24 -0
- kumoai/client/pquery.py +6 -2
- kumoai/client/rfm.py +35 -7
- kumoai/connector/utils.py +23 -2
- kumoai/experimental/rfm/__init__.py +191 -48
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +42 -0
- kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +65 -127
- kumoai/experimental/rfm/backend/local/sampler.py +312 -0
- kumoai/experimental/rfm/backend/local/table.py +113 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
- kumoai/experimental/rfm/backend/snow/table.py +242 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
- kumoai/experimental/rfm/base/__init__.py +30 -0
- kumoai/experimental/rfm/base/column.py +152 -0
- kumoai/experimental/rfm/base/expression.py +44 -0
- kumoai/experimental/rfm/base/sampler.py +761 -0
- kumoai/experimental/rfm/base/source.py +19 -0
- kumoai/experimental/rfm/base/sql_sampler.py +143 -0
- kumoai/experimental/rfm/base/table.py +735 -0
- kumoai/experimental/rfm/graph.py +1237 -0
- kumoai/experimental/rfm/infer/__init__.py +8 -0
- kumoai/experimental/rfm/infer/dtype.py +82 -0
- kumoai/experimental/rfm/infer/multicategorical.py +1 -1
- kumoai/experimental/rfm/infer/pkey.py +128 -0
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +61 -0
- kumoai/experimental/rfm/pquery/__init__.py +0 -4
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +64 -40
- kumoai/experimental/rfm/relbench.py +76 -0
- kumoai/experimental/rfm/rfm.py +386 -276
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/kumolib.cp311-win_amd64.pyd +0 -0
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/spcs.py +1 -3
- kumoai/testing/decorators.py +1 -1
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/distilled_trainer.py +175 -0
- kumoai/trainer/trainer.py +9 -10
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/display.py +51 -0
- kumoai/utils/progress_logger.py +188 -16
- kumoai/utils/sql.py +3 -0
- {kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/METADATA +13 -2
- {kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/RECORD +57 -36
- kumoai/experimental/rfm/local_graph.py +0 -810
- kumoai/experimental/rfm/local_graph_sampler.py +0 -184
- kumoai/experimental/rfm/local_pquery_driver.py +0 -494
- kumoai/experimental/rfm/local_table.py +0 -545
- kumoai/experimental/rfm/pquery/backend.py +0 -136
- kumoai/experimental/rfm/pquery/pandas_backend.py +0 -478
- kumoai/experimental/rfm/utils.py +0 -344
- {kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/WHEEL +0 -0
- {kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/top_level.txt +0 -0
|
@@ -1,478 +0,0 @@
|
|
|
1
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from kumoapi.rfm import PQueryDefinition
|
|
6
|
-
from kumoapi.rfm.pquery import (
|
|
7
|
-
Aggregation,
|
|
8
|
-
AggregationType,
|
|
9
|
-
BoolOp,
|
|
10
|
-
Column,
|
|
11
|
-
Condition,
|
|
12
|
-
Filter,
|
|
13
|
-
Float,
|
|
14
|
-
FloatList,
|
|
15
|
-
Int,
|
|
16
|
-
IntList,
|
|
17
|
-
LogicalOperation,
|
|
18
|
-
MemberOp,
|
|
19
|
-
RelOp,
|
|
20
|
-
Str,
|
|
21
|
-
StrList,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
from kumoai.experimental.rfm.pquery import PQueryBackend
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class PQueryPandasBackend(PQueryBackend[pd.DataFrame, pd.Series, np.ndarray]):
|
|
28
|
-
def eval_aggregation_type(
|
|
29
|
-
self,
|
|
30
|
-
op: AggregationType,
|
|
31
|
-
feat: Optional[pd.Series],
|
|
32
|
-
batch: np.ndarray,
|
|
33
|
-
batch_size: int,
|
|
34
|
-
filter_na: bool = True,
|
|
35
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
36
|
-
|
|
37
|
-
if op != AggregationType.COUNT:
|
|
38
|
-
assert feat is not None
|
|
39
|
-
|
|
40
|
-
if feat is not None:
|
|
41
|
-
mask = feat.notna()
|
|
42
|
-
feat, batch = feat[mask], batch[mask]
|
|
43
|
-
|
|
44
|
-
if op == AggregationType.LIST_DISTINCT:
|
|
45
|
-
df = pd.DataFrame(dict(feat=feat, batch=batch))
|
|
46
|
-
df = df.drop_duplicates()
|
|
47
|
-
out = df.groupby('batch')['feat'].agg(list)
|
|
48
|
-
|
|
49
|
-
else:
|
|
50
|
-
df = pd.DataFrame(dict(feat=feat, batch=batch))
|
|
51
|
-
if op == AggregationType.AVG:
|
|
52
|
-
agg = 'mean'
|
|
53
|
-
elif op == AggregationType.COUNT:
|
|
54
|
-
agg = 'size'
|
|
55
|
-
else:
|
|
56
|
-
agg = op.lower()
|
|
57
|
-
out = df.groupby('batch')['feat'].agg(agg)
|
|
58
|
-
|
|
59
|
-
if not pd.api.types.is_datetime64_any_dtype(out):
|
|
60
|
-
out = out.astype('float32')
|
|
61
|
-
|
|
62
|
-
out.name = None
|
|
63
|
-
out.index.name = None
|
|
64
|
-
|
|
65
|
-
if op in {AggregationType.SUM, AggregationType.COUNT}:
|
|
66
|
-
out = out.reindex(range(batch_size), fill_value=0)
|
|
67
|
-
mask = np.ones(batch_size, dtype=bool)
|
|
68
|
-
return out, mask
|
|
69
|
-
|
|
70
|
-
mask = np.zeros(batch_size, dtype=bool)
|
|
71
|
-
mask[batch] = True
|
|
72
|
-
|
|
73
|
-
if filter_na:
|
|
74
|
-
return out.reset_index(drop=True), mask
|
|
75
|
-
|
|
76
|
-
out = out.reindex(range(batch_size), fill_value=pd.NA)
|
|
77
|
-
|
|
78
|
-
return out, mask
|
|
79
|
-
|
|
80
|
-
def eval_rel_op(
|
|
81
|
-
self,
|
|
82
|
-
left: pd.Series,
|
|
83
|
-
op: RelOp,
|
|
84
|
-
right: Union[Int, Float, Str, None],
|
|
85
|
-
) -> pd.Series:
|
|
86
|
-
|
|
87
|
-
if right is None:
|
|
88
|
-
if op == RelOp.EQ:
|
|
89
|
-
return left.isna()
|
|
90
|
-
assert op == RelOp.NEQ
|
|
91
|
-
return left.notna()
|
|
92
|
-
|
|
93
|
-
value = pd.Series([right.value], dtype=left.dtype).iloc[0]
|
|
94
|
-
|
|
95
|
-
if op == RelOp.EQ:
|
|
96
|
-
return (left == value).fillna(False).astype(bool)
|
|
97
|
-
if op == RelOp.NEQ:
|
|
98
|
-
out = (left != value).fillna(False).astype(bool)
|
|
99
|
-
out[left.isna()] = False # N/A != right should always be `False`.
|
|
100
|
-
return out
|
|
101
|
-
if op == RelOp.LEQ:
|
|
102
|
-
return (left <= value).fillna(False).astype(bool)
|
|
103
|
-
if op == RelOp.GEQ:
|
|
104
|
-
return (left >= value).fillna(False).astype(bool)
|
|
105
|
-
if op == RelOp.LT:
|
|
106
|
-
return (left < value).fillna(False).astype(bool)
|
|
107
|
-
if op == RelOp.GT:
|
|
108
|
-
return (left > value).fillna(False).astype(bool)
|
|
109
|
-
|
|
110
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
111
|
-
|
|
112
|
-
def eval_member_op(
|
|
113
|
-
self,
|
|
114
|
-
left: pd.Series,
|
|
115
|
-
op: MemberOp,
|
|
116
|
-
right: Union[IntList, FloatList, StrList],
|
|
117
|
-
) -> pd.Series:
|
|
118
|
-
|
|
119
|
-
if op == MemberOp.IN:
|
|
120
|
-
ser = pd.Series(right.value, dtype=left.dtype)
|
|
121
|
-
return left.isin(ser).astype(bool)
|
|
122
|
-
|
|
123
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
124
|
-
|
|
125
|
-
def eval_bool_op(
|
|
126
|
-
self,
|
|
127
|
-
left: pd.Series,
|
|
128
|
-
op: BoolOp,
|
|
129
|
-
right: Optional[pd.Series],
|
|
130
|
-
) -> pd.Series:
|
|
131
|
-
|
|
132
|
-
# TODO Implement Kleene-Priest three-value logic.
|
|
133
|
-
if op == BoolOp.AND:
|
|
134
|
-
assert right is not None
|
|
135
|
-
return left & right
|
|
136
|
-
if op == BoolOp.OR:
|
|
137
|
-
assert right is not None
|
|
138
|
-
return left | right
|
|
139
|
-
if op == BoolOp.NOT:
|
|
140
|
-
return ~left
|
|
141
|
-
|
|
142
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
143
|
-
|
|
144
|
-
def eval_column(
|
|
145
|
-
self,
|
|
146
|
-
column: Column,
|
|
147
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
148
|
-
filter_na: bool = True,
|
|
149
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
150
|
-
|
|
151
|
-
out = feat_dict[column.table_name][column.column_name]
|
|
152
|
-
out = out.reset_index(drop=True)
|
|
153
|
-
|
|
154
|
-
if pd.api.types.is_float_dtype(out):
|
|
155
|
-
out = out.astype('float32')
|
|
156
|
-
|
|
157
|
-
out.name = None
|
|
158
|
-
out.index.name = None
|
|
159
|
-
|
|
160
|
-
mask = out.notna().to_numpy()
|
|
161
|
-
|
|
162
|
-
if not filter_na:
|
|
163
|
-
return out, mask
|
|
164
|
-
|
|
165
|
-
out = out[mask].reset_index(drop=True)
|
|
166
|
-
|
|
167
|
-
# Cast to primitive dtype:
|
|
168
|
-
if pd.api.types.is_integer_dtype(out):
|
|
169
|
-
out = out.astype('int64')
|
|
170
|
-
elif pd.api.types.is_bool_dtype(out):
|
|
171
|
-
out = out.astype('bool')
|
|
172
|
-
|
|
173
|
-
return out, mask
|
|
174
|
-
|
|
175
|
-
def eval_aggregation(
|
|
176
|
-
self,
|
|
177
|
-
aggr: Aggregation,
|
|
178
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
179
|
-
time_dict: Dict[str, pd.Series],
|
|
180
|
-
batch_dict: Dict[str, np.ndarray],
|
|
181
|
-
anchor_time: pd.Series,
|
|
182
|
-
filter_na: bool = True,
|
|
183
|
-
num_forecasts: int = 1,
|
|
184
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
185
|
-
|
|
186
|
-
target_table = aggr.column.table_name
|
|
187
|
-
target_batch = batch_dict[target_table]
|
|
188
|
-
target_time = time_dict[target_table]
|
|
189
|
-
|
|
190
|
-
outs: List[pd.Series] = []
|
|
191
|
-
masks: List[np.ndarray] = []
|
|
192
|
-
for _ in range(num_forecasts):
|
|
193
|
-
anchor_target_time = anchor_time[target_batch]
|
|
194
|
-
anchor_target_time = anchor_target_time.reset_index(drop=True)
|
|
195
|
-
|
|
196
|
-
target_mask = target_time <= anchor_target_time + aggr.end_offset
|
|
197
|
-
|
|
198
|
-
if aggr.start is not None:
|
|
199
|
-
start_offset = aggr.start * aggr.time_unit.to_offset()
|
|
200
|
-
target_mask &= target_time > anchor_target_time + start_offset
|
|
201
|
-
else:
|
|
202
|
-
assert num_forecasts == 1
|
|
203
|
-
|
|
204
|
-
if aggr.filter is not None:
|
|
205
|
-
target_mask &= self.eval_filter(
|
|
206
|
-
filter=aggr.filter,
|
|
207
|
-
feat_dict=feat_dict,
|
|
208
|
-
time_dict=time_dict,
|
|
209
|
-
batch_dict=batch_dict,
|
|
210
|
-
anchor_time=anchor_time,
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
if (aggr.type == AggregationType.COUNT
|
|
214
|
-
and aggr.column.column_name == '*'):
|
|
215
|
-
target_feat = None
|
|
216
|
-
else:
|
|
217
|
-
target_feat, _ = self.eval_column(
|
|
218
|
-
aggr.column,
|
|
219
|
-
feat_dict,
|
|
220
|
-
filter_na=False,
|
|
221
|
-
)
|
|
222
|
-
target_feat = target_feat[target_mask]
|
|
223
|
-
|
|
224
|
-
out, mask = self.eval_aggregation_type(
|
|
225
|
-
aggr.type,
|
|
226
|
-
feat=target_feat,
|
|
227
|
-
batch=target_batch[target_mask],
|
|
228
|
-
batch_size=len(anchor_time),
|
|
229
|
-
filter_na=False if num_forecasts > 1 else filter_na,
|
|
230
|
-
)
|
|
231
|
-
outs.append(out)
|
|
232
|
-
masks.append(mask)
|
|
233
|
-
|
|
234
|
-
if num_forecasts > 1:
|
|
235
|
-
anchor_time = anchor_time + aggr.end_offset
|
|
236
|
-
|
|
237
|
-
if len(outs) == 1:
|
|
238
|
-
assert len(masks) == 1
|
|
239
|
-
return outs[0], masks[0]
|
|
240
|
-
|
|
241
|
-
out = pd.Series([list(ser) for ser in zip(*outs)])
|
|
242
|
-
mask = np.stack(masks, axis=-1).any(axis=-1) # type: ignore
|
|
243
|
-
|
|
244
|
-
if filter_na:
|
|
245
|
-
out = out[mask].reset_index(drop=True)
|
|
246
|
-
|
|
247
|
-
return out, mask
|
|
248
|
-
|
|
249
|
-
def eval_condition(
|
|
250
|
-
self,
|
|
251
|
-
condition: Condition,
|
|
252
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
253
|
-
time_dict: Dict[str, pd.Series],
|
|
254
|
-
batch_dict: Dict[str, np.ndarray],
|
|
255
|
-
anchor_time: pd.Series,
|
|
256
|
-
filter_na: bool = True,
|
|
257
|
-
num_forecasts: int = 1,
|
|
258
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
259
|
-
|
|
260
|
-
if num_forecasts > 1:
|
|
261
|
-
raise NotImplementedError("Forecasting not yet implemented for "
|
|
262
|
-
"non-regression tasks")
|
|
263
|
-
|
|
264
|
-
if isinstance(condition.left, Column):
|
|
265
|
-
left, mask = self.eval_column(
|
|
266
|
-
column=condition.left,
|
|
267
|
-
feat_dict=feat_dict,
|
|
268
|
-
filter_na=filter_na if condition.right is not None else False,
|
|
269
|
-
)
|
|
270
|
-
else:
|
|
271
|
-
assert isinstance(condition.left, Aggregation)
|
|
272
|
-
left, mask = self.eval_aggregation(
|
|
273
|
-
aggr=condition.left,
|
|
274
|
-
feat_dict=feat_dict,
|
|
275
|
-
time_dict=time_dict,
|
|
276
|
-
batch_dict=batch_dict,
|
|
277
|
-
anchor_time=anchor_time,
|
|
278
|
-
filter_na=filter_na if condition.right is not None else False,
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
if filter_na and condition.right is None:
|
|
282
|
-
mask = np.ones(len(left), dtype=bool)
|
|
283
|
-
|
|
284
|
-
if isinstance(condition.op, RelOp):
|
|
285
|
-
out = self.eval_rel_op(
|
|
286
|
-
left=left,
|
|
287
|
-
op=condition.op,
|
|
288
|
-
right=condition.right,
|
|
289
|
-
)
|
|
290
|
-
else:
|
|
291
|
-
assert isinstance(condition.op, MemberOp)
|
|
292
|
-
out = self.eval_member_op(
|
|
293
|
-
left=left,
|
|
294
|
-
op=condition.op,
|
|
295
|
-
right=condition.right,
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
return out, mask
|
|
299
|
-
|
|
300
|
-
def eval_logical_operation(
|
|
301
|
-
self,
|
|
302
|
-
logical_operation: LogicalOperation,
|
|
303
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
304
|
-
time_dict: Dict[str, pd.Series],
|
|
305
|
-
batch_dict: Dict[str, np.ndarray],
|
|
306
|
-
anchor_time: pd.Series,
|
|
307
|
-
filter_na: bool = True,
|
|
308
|
-
num_forecasts: int = 1,
|
|
309
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
310
|
-
|
|
311
|
-
if num_forecasts > 1:
|
|
312
|
-
raise NotImplementedError("Forecasting not yet implemented for "
|
|
313
|
-
"non-regression tasks")
|
|
314
|
-
|
|
315
|
-
if isinstance(logical_operation.left, Condition):
|
|
316
|
-
left, mask = self.eval_condition(
|
|
317
|
-
condition=logical_operation.left,
|
|
318
|
-
feat_dict=feat_dict,
|
|
319
|
-
time_dict=time_dict,
|
|
320
|
-
batch_dict=batch_dict,
|
|
321
|
-
anchor_time=anchor_time,
|
|
322
|
-
filter_na=False,
|
|
323
|
-
)
|
|
324
|
-
else:
|
|
325
|
-
assert isinstance(logical_operation.left, LogicalOperation)
|
|
326
|
-
left, mask = self.eval_logical_operation(
|
|
327
|
-
logical_operation=logical_operation.left,
|
|
328
|
-
feat_dict=feat_dict,
|
|
329
|
-
time_dict=time_dict,
|
|
330
|
-
batch_dict=batch_dict,
|
|
331
|
-
anchor_time=anchor_time,
|
|
332
|
-
filter_na=False,
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
right = right_mask = None
|
|
336
|
-
if isinstance(logical_operation.right, Condition):
|
|
337
|
-
right, right_mask = self.eval_condition(
|
|
338
|
-
condition=logical_operation.right,
|
|
339
|
-
feat_dict=feat_dict,
|
|
340
|
-
time_dict=time_dict,
|
|
341
|
-
batch_dict=batch_dict,
|
|
342
|
-
anchor_time=anchor_time,
|
|
343
|
-
filter_na=False,
|
|
344
|
-
)
|
|
345
|
-
elif isinstance(logical_operation.right, LogicalOperation):
|
|
346
|
-
right, right_mask = self.eval_logical_operation(
|
|
347
|
-
logical_operation=logical_operation.right,
|
|
348
|
-
feat_dict=feat_dict,
|
|
349
|
-
time_dict=time_dict,
|
|
350
|
-
batch_dict=batch_dict,
|
|
351
|
-
anchor_time=anchor_time,
|
|
352
|
-
filter_na=False,
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
out = self.eval_bool_op(left, logical_operation.op, right)
|
|
356
|
-
|
|
357
|
-
if right_mask is not None:
|
|
358
|
-
mask &= right_mask
|
|
359
|
-
|
|
360
|
-
if filter_na:
|
|
361
|
-
out = out[mask].reset_index(drop=True)
|
|
362
|
-
|
|
363
|
-
return out, mask
|
|
364
|
-
|
|
365
|
-
def eval_filter(
|
|
366
|
-
self,
|
|
367
|
-
filter: Filter,
|
|
368
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
369
|
-
time_dict: Dict[str, pd.Series],
|
|
370
|
-
batch_dict: Dict[str, np.ndarray],
|
|
371
|
-
anchor_time: pd.Series,
|
|
372
|
-
) -> np.ndarray:
|
|
373
|
-
if isinstance(filter.condition, Condition):
|
|
374
|
-
return self.eval_condition(
|
|
375
|
-
condition=filter.condition,
|
|
376
|
-
feat_dict=feat_dict,
|
|
377
|
-
time_dict=time_dict,
|
|
378
|
-
batch_dict=batch_dict,
|
|
379
|
-
anchor_time=anchor_time,
|
|
380
|
-
filter_na=False,
|
|
381
|
-
)[0].to_numpy()
|
|
382
|
-
else:
|
|
383
|
-
assert isinstance(filter.condition, LogicalOperation)
|
|
384
|
-
return self.eval_logical_operation(
|
|
385
|
-
logical_operation=filter.condition,
|
|
386
|
-
feat_dict=feat_dict,
|
|
387
|
-
time_dict=time_dict,
|
|
388
|
-
batch_dict=batch_dict,
|
|
389
|
-
anchor_time=anchor_time,
|
|
390
|
-
filter_na=False,
|
|
391
|
-
)[0].to_numpy()
|
|
392
|
-
|
|
393
|
-
def eval_pquery(
|
|
394
|
-
self,
|
|
395
|
-
query: PQueryDefinition,
|
|
396
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
397
|
-
time_dict: Dict[str, pd.Series],
|
|
398
|
-
batch_dict: Dict[str, np.ndarray],
|
|
399
|
-
anchor_time: pd.Series,
|
|
400
|
-
num_forecasts: int = 1,
|
|
401
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
402
|
-
|
|
403
|
-
mask = np.ones(len(anchor_time), dtype=bool)
|
|
404
|
-
|
|
405
|
-
if query.entity.filter is not None:
|
|
406
|
-
mask &= self.eval_filter(
|
|
407
|
-
filter=query.entity.filter,
|
|
408
|
-
feat_dict=feat_dict,
|
|
409
|
-
time_dict=time_dict,
|
|
410
|
-
batch_dict=batch_dict,
|
|
411
|
-
anchor_time=anchor_time,
|
|
412
|
-
)
|
|
413
|
-
|
|
414
|
-
if getattr(query, 'assuming', None) is not None:
|
|
415
|
-
if isinstance(query.assuming, Condition):
|
|
416
|
-
mask &= self.eval_condition(
|
|
417
|
-
condition=query.assuming,
|
|
418
|
-
feat_dict=feat_dict,
|
|
419
|
-
time_dict=time_dict,
|
|
420
|
-
batch_dict=batch_dict,
|
|
421
|
-
anchor_time=anchor_time,
|
|
422
|
-
filter_na=False,
|
|
423
|
-
)[0].to_numpy()
|
|
424
|
-
else:
|
|
425
|
-
assert isinstance(query.assuming, LogicalOperation)
|
|
426
|
-
mask &= self.eval_logical_operation(
|
|
427
|
-
logical_operation=query.assuming,
|
|
428
|
-
feat_dict=feat_dict,
|
|
429
|
-
time_dict=time_dict,
|
|
430
|
-
batch_dict=batch_dict,
|
|
431
|
-
anchor_time=anchor_time,
|
|
432
|
-
filter_na=False,
|
|
433
|
-
)[0].to_numpy()
|
|
434
|
-
|
|
435
|
-
if isinstance(query.target, Column):
|
|
436
|
-
out, _mask = self.eval_column(
|
|
437
|
-
column=query.target,
|
|
438
|
-
feat_dict=feat_dict,
|
|
439
|
-
filter_na=True,
|
|
440
|
-
)
|
|
441
|
-
elif isinstance(query.target, Aggregation):
|
|
442
|
-
out, _mask = self.eval_aggregation(
|
|
443
|
-
aggr=query.target,
|
|
444
|
-
feat_dict=feat_dict,
|
|
445
|
-
time_dict=time_dict,
|
|
446
|
-
batch_dict=batch_dict,
|
|
447
|
-
anchor_time=anchor_time,
|
|
448
|
-
filter_na=True,
|
|
449
|
-
num_forecasts=num_forecasts,
|
|
450
|
-
)
|
|
451
|
-
elif isinstance(query.target, Condition):
|
|
452
|
-
out, _mask = self.eval_condition(
|
|
453
|
-
condition=query.target,
|
|
454
|
-
feat_dict=feat_dict,
|
|
455
|
-
time_dict=time_dict,
|
|
456
|
-
batch_dict=batch_dict,
|
|
457
|
-
anchor_time=anchor_time,
|
|
458
|
-
filter_na=True,
|
|
459
|
-
num_forecasts=num_forecasts,
|
|
460
|
-
)
|
|
461
|
-
else:
|
|
462
|
-
assert isinstance(query.target, LogicalOperation)
|
|
463
|
-
out, _mask = self.eval_logical_operation(
|
|
464
|
-
logical_operation=query.target,
|
|
465
|
-
feat_dict=feat_dict,
|
|
466
|
-
time_dict=time_dict,
|
|
467
|
-
batch_dict=batch_dict,
|
|
468
|
-
anchor_time=anchor_time,
|
|
469
|
-
filter_na=True,
|
|
470
|
-
num_forecasts=num_forecasts,
|
|
471
|
-
)
|
|
472
|
-
|
|
473
|
-
out = out[mask[_mask]]
|
|
474
|
-
mask &= _mask
|
|
475
|
-
|
|
476
|
-
out = out.reset_index(drop=True)
|
|
477
|
-
|
|
478
|
-
return out, mask
|