kumoai 2.11.0.dev202510181831__cp311-cp311-macosx_11_0_arm64.whl → 2.12.0.dev202511111731__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
2
2
  kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
3
- kumoai/_version.py,sha256=HKmzLWQzbgO1FpYD6EzQK0ge2WmooFnp40R0dVjHuwA,39
4
- kumoai/__init__.py,sha256=x3DjDsWBgWSNwo7mDwb3XAoRm2NuSO09yvhQTL9tBT8,10673
3
+ kumoai/_version.py,sha256=EmBJ4U0JvENPiq7lq8M80mpSdMDFEwNkBsjWDdzaLT4,39
4
+ kumoai/__init__.py,sha256=LU1zmKYc0KV5hy2VGKUuXgSvbJwj2rSRQ_R_bpHyl1o,10708
5
5
  kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
6
6
  kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
7
7
  kumoai/kumolib.cpython-311-darwin.so,sha256=AmB_Fysmud1y7Gm5CuBQ5lWDuSzpxVDV_iTA2cjH1s8,232544
@@ -13,17 +13,15 @@ kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
13
13
  kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  kumoai/experimental/rfm/local_graph_sampler.py,sha256=o60_sdMa_fr60DrdmCIaE6lKQAD2msp1t-GGubFNt-o,6738
15
15
  kumoai/experimental/rfm/local_graph.py,sha256=2iJDlsGVzqCe1bD_puXWlhwGkn7YnQyJ4p4C-fwCZNE,30076
16
- kumoai/experimental/rfm/local_pquery_driver.py,sha256=xqAQ9fJfkqM1axknFpg0NLQbIYmExh-s7vGdUyDEkwA,18600
17
- kumoai/experimental/rfm/__init__.py,sha256=F1aUOCLDN2yrIRDAiOlogDfXKUkUQgp8Mt0pVX9rLX8,1641
16
+ kumoai/experimental/rfm/local_pquery_driver.py,sha256=aO7Jfwx9gxGKYvpqxZx1LLWdI1MhuZQOPtAITxoOQO0,26162
17
+ kumoai/experimental/rfm/__init__.py,sha256=ornmi2x947jkQLptMn7ZLvTf2Sw-RMcVW73AnjVsWAo,1709
18
18
  kumoai/experimental/rfm/utils.py,sha256=3IiBvT_aLBkkcJh3H11_50yt_XlEzHR0cm9Kprrtl8k,11123
19
19
  kumoai/experimental/rfm/local_table.py,sha256=r8xZ33Mjs6JD8ud6h23tZ99Dag2DvZ4h6tWjmGrKQg4,19605
20
- kumoai/experimental/rfm/rfm.py,sha256=K5c8pmWe3lCO1J4EMZBZPgJQPcdsvBAyvmvrLsa1tVU,45519
20
+ kumoai/experimental/rfm/rfm.py,sha256=V2NxxhrYi_MqLi_xcZsOYsdciT7V44iS5Fc9Ewq9eiM,48101
21
21
  kumoai/experimental/rfm/local_graph_store.py,sha256=8BqonuaMftAAsjgZpB369i5AeNd1PkisMbbEqc0cKBo,13847
22
22
  kumoai/experimental/rfm/authenticate.py,sha256=FiuHMvP7V3zBZUlHMDMbNLhc-UgDZgz4hjVSTuQ7DRw,18888
23
- kumoai/experimental/rfm/pquery/backend.py,sha256=6wtB0yFpxQUraBSA2TbKMVSIMD0dcLwYV5P4SQx2g_k,3287
24
- kumoai/experimental/rfm/pquery/__init__.py,sha256=9uLXixjp78y0IzO2F__lFqKNm37OGhN3iDh56akWLNU,283
25
- kumoai/experimental/rfm/pquery/pandas_backend.py,sha256=pgHCErSo6U-KJMhgIYijYt96uubtFB2WtsrTdLU7NYc,15396
26
- kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=BgF3saosisgLHx1RyLj-HSEbMp4xLatNuARdKWwiiLY,17326
23
+ kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
24
+ kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=kiBJq7uVGbasG7TiqsubEl6ey3UYzZiM4bwxILqp_54,18487
27
25
  kumoai/experimental/rfm/pquery/executor.py,sha256=f7-pJhL0BgFU9E4o4gQpQyArOvyrZtwxFmks34-QOAE,2741
28
26
  kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
29
27
  kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
@@ -40,7 +38,7 @@ kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0
40
38
  kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
41
39
  kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
42
40
  kumoai/utils/__init__.py,sha256=wGDC_31XJ-7ipm6eawjLAJaP4EfmtNOH8BHzaetQ9Ko,268
43
- kumoai/utils/progress_logger.py,sha256=jHAS_iDD008VSa_P_XzJsRS6TVIXviK017KE5ict-4M,4875
41
+ kumoai/utils/progress_logger.py,sha256=pngEGzMHkiOUKOa6fbzxCEc2xlA4SJKV4TDTVVoqObM,5062
44
42
  kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
45
43
  kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
46
44
  kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
@@ -84,17 +82,17 @@ kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
84
82
  kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
85
83
  kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
86
84
  kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
87
- kumoai/client/rfm.py,sha256=15Wt_45mf7WJyCKylxF6_biHis9R_qmplPk9cwR9JeU,2918
88
- kumoai/client/endpoints.py,sha256=0VPeWgy2AEA1BD4zFB6DQaP4N2Ln2lPEnBIs_9fM1y4,5315
85
+ kumoai/client/rfm.py,sha256=NxKk8mH2A-B58rSXhDWaph4KeiSyJYDq-RO-vAHh7es,3726
86
+ kumoai/client/endpoints.py,sha256=iF2ZD25AJCIVbmBJ8tTZ8y1Ch0m6nTp18ydN7h4WiTk,5382
89
87
  kumoai/trainer/config.py,sha256=-2RfK10AsVVThSyfWtlyfH4Fc4EwTdu0V3yrDRtIOjk,98
90
88
  kumoai/trainer/util.py,sha256=bDPGkMF9KOy4HgtA-OwhXP17z9cbrfMnZGtyGuUq_Eo,4062
91
89
  kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
92
90
  kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
93
91
  kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
94
92
  kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
95
- kumoai/trainer/trainer.py,sha256=nPeZMMp17TtRFd4lKbF-TlMPnhYR4_VyPDPI0T9W9PU,20094
96
- kumoai-2.11.0.dev202510181831.dist-info/RECORD,,
97
- kumoai-2.11.0.dev202510181831.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
98
- kumoai-2.11.0.dev202510181831.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
99
- kumoai-2.11.0.dev202510181831.dist-info/METADATA,sha256=gSsdQnJBns8JH7zVpN55mX2iGPxpecpLkWCnyenAXC8,2052
100
- kumoai-2.11.0.dev202510181831.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
93
+ kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
94
+ kumoai-2.12.0.dev202511111731.dist-info/RECORD,,
95
+ kumoai-2.12.0.dev202511111731.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
96
+ kumoai-2.12.0.dev202511111731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
97
+ kumoai-2.12.0.dev202511111731.dist-info/METADATA,sha256=sNoIEIZxJx58O-0mQyfBmpsnrkAzg3ZVQhucsvlDX64,2052
98
+ kumoai-2.12.0.dev202511111731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
@@ -1,136 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Dict, Generic, Optional, Tuple, TypeVar, Union
3
-
4
- from kumoapi.rfm import PQueryDefinition
5
- from kumoapi.rfm.pquery import (
6
- Aggregation,
7
- AggregationType,
8
- BoolOp,
9
- Column,
10
- Condition,
11
- Filter,
12
- Float,
13
- FloatList,
14
- Int,
15
- IntList,
16
- LogicalOperation,
17
- MemberOp,
18
- RelOp,
19
- Str,
20
- StrList,
21
- )
22
-
23
- TableData = TypeVar('TableData')
24
- ColumnData = TypeVar('ColumnData')
25
- IndexData = TypeVar('IndexData')
26
-
27
-
28
- class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
29
- @abstractmethod
30
- def eval_aggregation_type(
31
- self,
32
- op: AggregationType,
33
- feat: Optional[ColumnData],
34
- batch: IndexData,
35
- batch_size: int,
36
- filter_na: bool = True,
37
- ) -> Tuple[ColumnData, IndexData]:
38
- pass
39
-
40
- @abstractmethod
41
- def eval_rel_op(
42
- self,
43
- left: ColumnData,
44
- op: RelOp,
45
- right: Union[Int, Float, Str, None],
46
- ) -> ColumnData:
47
- pass
48
-
49
- @abstractmethod
50
- def eval_member_op(
51
- self,
52
- left: ColumnData,
53
- op: MemberOp,
54
- right: Union[IntList, FloatList, StrList],
55
- ) -> ColumnData:
56
- pass
57
-
58
- @abstractmethod
59
- def eval_bool_op(
60
- self,
61
- left: ColumnData,
62
- op: BoolOp,
63
- right: Optional[ColumnData],
64
- ) -> ColumnData:
65
- pass
66
-
67
- @abstractmethod
68
- def eval_column(
69
- self,
70
- column: Column,
71
- feat_dict: Dict[str, TableData],
72
- filter_na: bool = True,
73
- ) -> Tuple[ColumnData, IndexData]:
74
- pass
75
-
76
- @abstractmethod
77
- def eval_aggregation(
78
- self,
79
- aggr: Aggregation,
80
- feat_dict: Dict[str, TableData],
81
- time_dict: Dict[str, ColumnData],
82
- batch_dict: Dict[str, IndexData],
83
- anchor_time: ColumnData,
84
- filter_na: bool = True,
85
- num_forecasts: int = 1,
86
- ) -> Tuple[ColumnData, IndexData]:
87
- pass
88
-
89
- @abstractmethod
90
- def eval_condition(
91
- self,
92
- condition: Condition,
93
- feat_dict: Dict[str, TableData],
94
- time_dict: Dict[str, ColumnData],
95
- batch_dict: Dict[str, IndexData],
96
- anchor_time: ColumnData,
97
- filter_na: bool = True,
98
- num_forecasts: int = 1,
99
- ) -> Tuple[ColumnData, IndexData]:
100
- pass
101
-
102
- @abstractmethod
103
- def eval_logical_operation(
104
- self,
105
- logical_operation: LogicalOperation,
106
- feat_dict: Dict[str, TableData],
107
- time_dict: Dict[str, ColumnData],
108
- batch_dict: Dict[str, IndexData],
109
- anchor_time: ColumnData,
110
- filter_na: bool = True,
111
- num_forecasts: int = 1,
112
- ) -> Tuple[ColumnData, IndexData]:
113
- pass
114
-
115
- @abstractmethod
116
- def eval_filter(
117
- self,
118
- filter: Filter,
119
- feat_dict: Dict[str, TableData],
120
- time_dict: Dict[str, ColumnData],
121
- batch_dict: Dict[str, IndexData],
122
- anchor_time: ColumnData,
123
- ) -> IndexData:
124
- pass
125
-
126
- @abstractmethod
127
- def eval_pquery(
128
- self,
129
- query: PQueryDefinition,
130
- feat_dict: Dict[str, TableData],
131
- time_dict: Dict[str, ColumnData],
132
- batch_dict: Dict[str, IndexData],
133
- anchor_time: ColumnData,
134
- num_forecasts: int = 1,
135
- ) -> Tuple[ColumnData, IndexData]:
136
- pass
@@ -1,478 +0,0 @@
1
- from typing import Dict, List, Optional, Tuple, Union
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from kumoapi.rfm import PQueryDefinition
6
- from kumoapi.rfm.pquery import (
7
- Aggregation,
8
- AggregationType,
9
- BoolOp,
10
- Column,
11
- Condition,
12
- Filter,
13
- Float,
14
- FloatList,
15
- Int,
16
- IntList,
17
- LogicalOperation,
18
- MemberOp,
19
- RelOp,
20
- Str,
21
- StrList,
22
- )
23
-
24
- from kumoai.experimental.rfm.pquery import PQueryBackend
25
-
26
-
27
- class PQueryPandasBackend(PQueryBackend[pd.DataFrame, pd.Series, np.ndarray]):
28
- def eval_aggregation_type(
29
- self,
30
- op: AggregationType,
31
- feat: Optional[pd.Series],
32
- batch: np.ndarray,
33
- batch_size: int,
34
- filter_na: bool = True,
35
- ) -> Tuple[pd.Series, np.ndarray]:
36
-
37
- if op != AggregationType.COUNT:
38
- assert feat is not None
39
-
40
- if feat is not None:
41
- mask = feat.notna()
42
- feat, batch = feat[mask], batch[mask]
43
-
44
- if op == AggregationType.LIST_DISTINCT:
45
- df = pd.DataFrame(dict(feat=feat, batch=batch))
46
- df = df.drop_duplicates()
47
- out = df.groupby('batch')['feat'].agg(list)
48
-
49
- else:
50
- df = pd.DataFrame(dict(feat=feat, batch=batch))
51
- if op == AggregationType.AVG:
52
- agg = 'mean'
53
- elif op == AggregationType.COUNT:
54
- agg = 'size'
55
- else:
56
- agg = op.lower()
57
- out = df.groupby('batch')['feat'].agg(agg)
58
-
59
- if not pd.api.types.is_datetime64_any_dtype(out):
60
- out = out.astype('float32')
61
-
62
- out.name = None
63
- out.index.name = None
64
-
65
- if op in {AggregationType.SUM, AggregationType.COUNT}:
66
- out = out.reindex(range(batch_size), fill_value=0)
67
- mask = np.ones(batch_size, dtype=bool)
68
- return out, mask
69
-
70
- mask = np.zeros(batch_size, dtype=bool)
71
- mask[batch] = True
72
-
73
- if filter_na:
74
- return out.reset_index(drop=True), mask
75
-
76
- out = out.reindex(range(batch_size), fill_value=pd.NA)
77
-
78
- return out, mask
79
-
80
- def eval_rel_op(
81
- self,
82
- left: pd.Series,
83
- op: RelOp,
84
- right: Union[Int, Float, Str, None],
85
- ) -> pd.Series:
86
-
87
- if right is None:
88
- if op == RelOp.EQ:
89
- return left.isna()
90
- assert op == RelOp.NEQ
91
- return left.notna()
92
-
93
- value = pd.Series([right.value], dtype=left.dtype).iloc[0]
94
-
95
- if op == RelOp.EQ:
96
- return (left == value).fillna(False).astype(bool)
97
- if op == RelOp.NEQ:
98
- out = (left != value).fillna(False).astype(bool)
99
- out[left.isna()] = False # N/A != right should always be `False`.
100
- return out
101
- if op == RelOp.LEQ:
102
- return (left <= value).fillna(False).astype(bool)
103
- if op == RelOp.GEQ:
104
- return (left >= value).fillna(False).astype(bool)
105
- if op == RelOp.LT:
106
- return (left < value).fillna(False).astype(bool)
107
- if op == RelOp.GT:
108
- return (left > value).fillna(False).astype(bool)
109
-
110
- raise NotImplementedError(f"Operator '{op}' not implemented")
111
-
112
- def eval_member_op(
113
- self,
114
- left: pd.Series,
115
- op: MemberOp,
116
- right: Union[IntList, FloatList, StrList],
117
- ) -> pd.Series:
118
-
119
- if op == MemberOp.IN:
120
- ser = pd.Series(right.value, dtype=left.dtype)
121
- return left.isin(ser).astype(bool)
122
-
123
- raise NotImplementedError(f"Operator '{op}' not implemented")
124
-
125
- def eval_bool_op(
126
- self,
127
- left: pd.Series,
128
- op: BoolOp,
129
- right: Optional[pd.Series],
130
- ) -> pd.Series:
131
-
132
- # TODO Implement Kleene-Priest three-value logic.
133
- if op == BoolOp.AND:
134
- assert right is not None
135
- return left & right
136
- if op == BoolOp.OR:
137
- assert right is not None
138
- return left | right
139
- if op == BoolOp.NOT:
140
- return ~left
141
-
142
- raise NotImplementedError(f"Operator '{op}' not implemented")
143
-
144
- def eval_column(
145
- self,
146
- column: Column,
147
- feat_dict: Dict[str, pd.DataFrame],
148
- filter_na: bool = True,
149
- ) -> Tuple[pd.Series, np.ndarray]:
150
-
151
- out = feat_dict[column.table_name][column.column_name]
152
- out = out.reset_index(drop=True)
153
-
154
- if pd.api.types.is_float_dtype(out):
155
- out = out.astype('float32')
156
-
157
- out.name = None
158
- out.index.name = None
159
-
160
- mask = out.notna().to_numpy()
161
-
162
- if not filter_na:
163
- return out, mask
164
-
165
- out = out[mask].reset_index(drop=True)
166
-
167
- # Cast to primitive dtype:
168
- if pd.api.types.is_integer_dtype(out):
169
- out = out.astype('int64')
170
- elif pd.api.types.is_bool_dtype(out):
171
- out = out.astype('bool')
172
-
173
- return out, mask
174
-
175
- def eval_aggregation(
176
- self,
177
- aggr: Aggregation,
178
- feat_dict: Dict[str, pd.DataFrame],
179
- time_dict: Dict[str, pd.Series],
180
- batch_dict: Dict[str, np.ndarray],
181
- anchor_time: pd.Series,
182
- filter_na: bool = True,
183
- num_forecasts: int = 1,
184
- ) -> Tuple[pd.Series, np.ndarray]:
185
-
186
- target_table = aggr.column.table_name
187
- target_batch = batch_dict[target_table]
188
- target_time = time_dict[target_table]
189
-
190
- outs: List[pd.Series] = []
191
- masks: List[np.ndarray] = []
192
- for _ in range(num_forecasts):
193
- anchor_target_time = anchor_time[target_batch]
194
- anchor_target_time = anchor_target_time.reset_index(drop=True)
195
-
196
- target_mask = target_time <= anchor_target_time + aggr.end_offset
197
-
198
- if aggr.start is not None:
199
- start_offset = aggr.start * aggr.time_unit.to_offset()
200
- target_mask &= target_time > anchor_target_time + start_offset
201
- else:
202
- assert num_forecasts == 1
203
-
204
- if aggr.filter is not None:
205
- target_mask &= self.eval_filter(
206
- filter=aggr.filter,
207
- feat_dict=feat_dict,
208
- time_dict=time_dict,
209
- batch_dict=batch_dict,
210
- anchor_time=anchor_time,
211
- )
212
-
213
- if (aggr.type == AggregationType.COUNT
214
- and aggr.column.column_name == '*'):
215
- target_feat = None
216
- else:
217
- target_feat, _ = self.eval_column(
218
- aggr.column,
219
- feat_dict,
220
- filter_na=False,
221
- )
222
- target_feat = target_feat[target_mask]
223
-
224
- out, mask = self.eval_aggregation_type(
225
- aggr.type,
226
- feat=target_feat,
227
- batch=target_batch[target_mask],
228
- batch_size=len(anchor_time),
229
- filter_na=False if num_forecasts > 1 else filter_na,
230
- )
231
- outs.append(out)
232
- masks.append(mask)
233
-
234
- if num_forecasts > 1:
235
- anchor_time = anchor_time + aggr.end_offset
236
-
237
- if len(outs) == 1:
238
- assert len(masks) == 1
239
- return outs[0], masks[0]
240
-
241
- out = pd.Series([list(ser) for ser in zip(*outs)])
242
- mask = np.stack(masks, axis=-1).any(axis=-1) # type: ignore
243
-
244
- if filter_na:
245
- out = out[mask].reset_index(drop=True)
246
-
247
- return out, mask
248
-
249
- def eval_condition(
250
- self,
251
- condition: Condition,
252
- feat_dict: Dict[str, pd.DataFrame],
253
- time_dict: Dict[str, pd.Series],
254
- batch_dict: Dict[str, np.ndarray],
255
- anchor_time: pd.Series,
256
- filter_na: bool = True,
257
- num_forecasts: int = 1,
258
- ) -> Tuple[pd.Series, np.ndarray]:
259
-
260
- if num_forecasts > 1:
261
- raise NotImplementedError("Forecasting not yet implemented for "
262
- "non-regression tasks")
263
-
264
- if isinstance(condition.left, Column):
265
- left, mask = self.eval_column(
266
- column=condition.left,
267
- feat_dict=feat_dict,
268
- filter_na=filter_na if condition.right is not None else False,
269
- )
270
- else:
271
- assert isinstance(condition.left, Aggregation)
272
- left, mask = self.eval_aggregation(
273
- aggr=condition.left,
274
- feat_dict=feat_dict,
275
- time_dict=time_dict,
276
- batch_dict=batch_dict,
277
- anchor_time=anchor_time,
278
- filter_na=filter_na if condition.right is not None else False,
279
- )
280
-
281
- if filter_na and condition.right is None:
282
- mask = np.ones(len(left), dtype=bool)
283
-
284
- if isinstance(condition.op, RelOp):
285
- out = self.eval_rel_op(
286
- left=left,
287
- op=condition.op,
288
- right=condition.right,
289
- )
290
- else:
291
- assert isinstance(condition.op, MemberOp)
292
- out = self.eval_member_op(
293
- left=left,
294
- op=condition.op,
295
- right=condition.right,
296
- )
297
-
298
- return out, mask
299
-
300
- def eval_logical_operation(
301
- self,
302
- logical_operation: LogicalOperation,
303
- feat_dict: Dict[str, pd.DataFrame],
304
- time_dict: Dict[str, pd.Series],
305
- batch_dict: Dict[str, np.ndarray],
306
- anchor_time: pd.Series,
307
- filter_na: bool = True,
308
- num_forecasts: int = 1,
309
- ) -> Tuple[pd.Series, np.ndarray]:
310
-
311
- if num_forecasts > 1:
312
- raise NotImplementedError("Forecasting not yet implemented for "
313
- "non-regression tasks")
314
-
315
- if isinstance(logical_operation.left, Condition):
316
- left, mask = self.eval_condition(
317
- condition=logical_operation.left,
318
- feat_dict=feat_dict,
319
- time_dict=time_dict,
320
- batch_dict=batch_dict,
321
- anchor_time=anchor_time,
322
- filter_na=False,
323
- )
324
- else:
325
- assert isinstance(logical_operation.left, LogicalOperation)
326
- left, mask = self.eval_logical_operation(
327
- logical_operation=logical_operation.left,
328
- feat_dict=feat_dict,
329
- time_dict=time_dict,
330
- batch_dict=batch_dict,
331
- anchor_time=anchor_time,
332
- filter_na=False,
333
- )
334
-
335
- right = right_mask = None
336
- if isinstance(logical_operation.right, Condition):
337
- right, right_mask = self.eval_condition(
338
- condition=logical_operation.right,
339
- feat_dict=feat_dict,
340
- time_dict=time_dict,
341
- batch_dict=batch_dict,
342
- anchor_time=anchor_time,
343
- filter_na=False,
344
- )
345
- elif isinstance(logical_operation.right, LogicalOperation):
346
- right, right_mask = self.eval_logical_operation(
347
- logical_operation=logical_operation.right,
348
- feat_dict=feat_dict,
349
- time_dict=time_dict,
350
- batch_dict=batch_dict,
351
- anchor_time=anchor_time,
352
- filter_na=False,
353
- )
354
-
355
- out = self.eval_bool_op(left, logical_operation.op, right)
356
-
357
- if right_mask is not None:
358
- mask &= right_mask
359
-
360
- if filter_na:
361
- out = out[mask].reset_index(drop=True)
362
-
363
- return out, mask
364
-
365
- def eval_filter(
366
- self,
367
- filter: Filter,
368
- feat_dict: Dict[str, pd.DataFrame],
369
- time_dict: Dict[str, pd.Series],
370
- batch_dict: Dict[str, np.ndarray],
371
- anchor_time: pd.Series,
372
- ) -> np.ndarray:
373
- if isinstance(filter.condition, Condition):
374
- return self.eval_condition(
375
- condition=filter.condition,
376
- feat_dict=feat_dict,
377
- time_dict=time_dict,
378
- batch_dict=batch_dict,
379
- anchor_time=anchor_time,
380
- filter_na=False,
381
- )[0].to_numpy()
382
- else:
383
- assert isinstance(filter.condition, LogicalOperation)
384
- return self.eval_logical_operation(
385
- logical_operation=filter.condition,
386
- feat_dict=feat_dict,
387
- time_dict=time_dict,
388
- batch_dict=batch_dict,
389
- anchor_time=anchor_time,
390
- filter_na=False,
391
- )[0].to_numpy()
392
-
393
- def eval_pquery(
394
- self,
395
- query: PQueryDefinition,
396
- feat_dict: Dict[str, pd.DataFrame],
397
- time_dict: Dict[str, pd.Series],
398
- batch_dict: Dict[str, np.ndarray],
399
- anchor_time: pd.Series,
400
- num_forecasts: int = 1,
401
- ) -> Tuple[pd.Series, np.ndarray]:
402
-
403
- mask = np.ones(len(anchor_time), dtype=bool)
404
-
405
- if query.entity.filter is not None:
406
- mask &= self.eval_filter(
407
- filter=query.entity.filter,
408
- feat_dict=feat_dict,
409
- time_dict=time_dict,
410
- batch_dict=batch_dict,
411
- anchor_time=anchor_time,
412
- )
413
-
414
- if getattr(query, 'assuming', None) is not None:
415
- if isinstance(query.assuming, Condition):
416
- mask &= self.eval_condition(
417
- condition=query.assuming,
418
- feat_dict=feat_dict,
419
- time_dict=time_dict,
420
- batch_dict=batch_dict,
421
- anchor_time=anchor_time,
422
- filter_na=False,
423
- )[0].to_numpy()
424
- else:
425
- assert isinstance(query.assuming, LogicalOperation)
426
- mask &= self.eval_logical_operation(
427
- logical_operation=query.assuming,
428
- feat_dict=feat_dict,
429
- time_dict=time_dict,
430
- batch_dict=batch_dict,
431
- anchor_time=anchor_time,
432
- filter_na=False,
433
- )[0].to_numpy()
434
-
435
- if isinstance(query.target, Column):
436
- out, _mask = self.eval_column(
437
- column=query.target,
438
- feat_dict=feat_dict,
439
- filter_na=True,
440
- )
441
- elif isinstance(query.target, Aggregation):
442
- out, _mask = self.eval_aggregation(
443
- aggr=query.target,
444
- feat_dict=feat_dict,
445
- time_dict=time_dict,
446
- batch_dict=batch_dict,
447
- anchor_time=anchor_time,
448
- filter_na=True,
449
- num_forecasts=num_forecasts,
450
- )
451
- elif isinstance(query.target, Condition):
452
- out, _mask = self.eval_condition(
453
- condition=query.target,
454
- feat_dict=feat_dict,
455
- time_dict=time_dict,
456
- batch_dict=batch_dict,
457
- anchor_time=anchor_time,
458
- filter_na=True,
459
- num_forecasts=num_forecasts,
460
- )
461
- else:
462
- assert isinstance(query.target, LogicalOperation)
463
- out, _mask = self.eval_logical_operation(
464
- logical_operation=query.target,
465
- feat_dict=feat_dict,
466
- time_dict=time_dict,
467
- batch_dict=batch_dict,
468
- anchor_time=anchor_time,
469
- filter_na=True,
470
- num_forecasts=num_forecasts,
471
- )
472
-
473
- out = out[mask[_mask]]
474
- mask &= _mask
475
-
476
- out = out.reset_index(drop=True)
477
-
478
- return out, mask