kumoai 2.11.0.dev202510181831__cp311-cp311-macosx_11_0_arm64.whl → 2.12.0.dev202511111731__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +4 -2
- kumoai/_version.py +1 -1
- kumoai/client/endpoints.py +1 -0
- kumoai/client/rfm.py +35 -7
- kumoai/experimental/rfm/__init__.py +3 -1
- kumoai/experimental/rfm/local_pquery_driver.py +221 -26
- kumoai/experimental/rfm/pquery/__init__.py +0 -4
- kumoai/experimental/rfm/pquery/pandas_executor.py +34 -8
- kumoai/experimental/rfm/rfm.py +139 -71
- kumoai/trainer/trainer.py +9 -10
- kumoai/utils/progress_logger.py +10 -4
- {kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/METADATA +2 -2
- {kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/RECORD +16 -18
- kumoai/experimental/rfm/pquery/backend.py +0 -136
- kumoai/experimental/rfm/pquery/pandas_backend.py +0 -478
- {kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/WHEEL +0 -0
- {kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
|
|
2
2
|
kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
|
|
3
|
-
kumoai/_version.py,sha256=
|
|
4
|
-
kumoai/__init__.py,sha256=
|
|
3
|
+
kumoai/_version.py,sha256=EmBJ4U0JvENPiq7lq8M80mpSdMDFEwNkBsjWDdzaLT4,39
|
|
4
|
+
kumoai/__init__.py,sha256=LU1zmKYc0KV5hy2VGKUuXgSvbJwj2rSRQ_R_bpHyl1o,10708
|
|
5
5
|
kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
|
|
6
6
|
kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
|
|
7
7
|
kumoai/kumolib.cpython-311-darwin.so,sha256=AmB_Fysmud1y7Gm5CuBQ5lWDuSzpxVDV_iTA2cjH1s8,232544
|
|
@@ -13,17 +13,15 @@ kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
|
|
|
13
13
|
kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
kumoai/experimental/rfm/local_graph_sampler.py,sha256=o60_sdMa_fr60DrdmCIaE6lKQAD2msp1t-GGubFNt-o,6738
|
|
15
15
|
kumoai/experimental/rfm/local_graph.py,sha256=2iJDlsGVzqCe1bD_puXWlhwGkn7YnQyJ4p4C-fwCZNE,30076
|
|
16
|
-
kumoai/experimental/rfm/local_pquery_driver.py,sha256=
|
|
17
|
-
kumoai/experimental/rfm/__init__.py,sha256=
|
|
16
|
+
kumoai/experimental/rfm/local_pquery_driver.py,sha256=aO7Jfwx9gxGKYvpqxZx1LLWdI1MhuZQOPtAITxoOQO0,26162
|
|
17
|
+
kumoai/experimental/rfm/__init__.py,sha256=ornmi2x947jkQLptMn7ZLvTf2Sw-RMcVW73AnjVsWAo,1709
|
|
18
18
|
kumoai/experimental/rfm/utils.py,sha256=3IiBvT_aLBkkcJh3H11_50yt_XlEzHR0cm9Kprrtl8k,11123
|
|
19
19
|
kumoai/experimental/rfm/local_table.py,sha256=r8xZ33Mjs6JD8ud6h23tZ99Dag2DvZ4h6tWjmGrKQg4,19605
|
|
20
|
-
kumoai/experimental/rfm/rfm.py,sha256=
|
|
20
|
+
kumoai/experimental/rfm/rfm.py,sha256=V2NxxhrYi_MqLi_xcZsOYsdciT7V44iS5Fc9Ewq9eiM,48101
|
|
21
21
|
kumoai/experimental/rfm/local_graph_store.py,sha256=8BqonuaMftAAsjgZpB369i5AeNd1PkisMbbEqc0cKBo,13847
|
|
22
22
|
kumoai/experimental/rfm/authenticate.py,sha256=FiuHMvP7V3zBZUlHMDMbNLhc-UgDZgz4hjVSTuQ7DRw,18888
|
|
23
|
-
kumoai/experimental/rfm/pquery/
|
|
24
|
-
kumoai/experimental/rfm/pquery/
|
|
25
|
-
kumoai/experimental/rfm/pquery/pandas_backend.py,sha256=pgHCErSo6U-KJMhgIYijYt96uubtFB2WtsrTdLU7NYc,15396
|
|
26
|
-
kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=BgF3saosisgLHx1RyLj-HSEbMp4xLatNuARdKWwiiLY,17326
|
|
23
|
+
kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
|
|
24
|
+
kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=kiBJq7uVGbasG7TiqsubEl6ey3UYzZiM4bwxILqp_54,18487
|
|
27
25
|
kumoai/experimental/rfm/pquery/executor.py,sha256=f7-pJhL0BgFU9E4o4gQpQyArOvyrZtwxFmks34-QOAE,2741
|
|
28
26
|
kumoai/experimental/rfm/infer/multicategorical.py,sha256=0-cLpDnGryhr76QhZNO-klKokJ6MUSfxXcGdQ61oykY,1102
|
|
29
27
|
kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGsEThcDEfbi-KqW5c,853
|
|
@@ -40,7 +38,7 @@ kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0
|
|
|
40
38
|
kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
|
|
41
39
|
kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
|
|
42
40
|
kumoai/utils/__init__.py,sha256=wGDC_31XJ-7ipm6eawjLAJaP4EfmtNOH8BHzaetQ9Ko,268
|
|
43
|
-
kumoai/utils/progress_logger.py,sha256=
|
|
41
|
+
kumoai/utils/progress_logger.py,sha256=pngEGzMHkiOUKOa6fbzxCEc2xlA4SJKV4TDTVVoqObM,5062
|
|
44
42
|
kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
|
|
45
43
|
kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
|
|
46
44
|
kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
|
|
@@ -84,17 +82,17 @@ kumoai/client/jobs.py,sha256=iu_Wrta6BQMlV6ZtzSnmhjwNPKDMQDXOsqVVIyWodqw,17074
|
|
|
84
82
|
kumoai/client/utils.py,sha256=lz1NubwMDHCwzQRowRXm7mjAoYRd5UjRQIwXdtWAl90,3849
|
|
85
83
|
kumoai/client/connector.py,sha256=x3i2aBTJTEMZvYRcWkY-UfWVOANZjqAso4GBbcshFjw,3920
|
|
86
84
|
kumoai/client/table.py,sha256=cQG-RPm-e91idEgse1IPJDvBmzddIDGDkuyrR1rq4wU,3235
|
|
87
|
-
kumoai/client/rfm.py,sha256=
|
|
88
|
-
kumoai/client/endpoints.py,sha256=
|
|
85
|
+
kumoai/client/rfm.py,sha256=NxKk8mH2A-B58rSXhDWaph4KeiSyJYDq-RO-vAHh7es,3726
|
|
86
|
+
kumoai/client/endpoints.py,sha256=iF2ZD25AJCIVbmBJ8tTZ8y1Ch0m6nTp18ydN7h4WiTk,5382
|
|
89
87
|
kumoai/trainer/config.py,sha256=-2RfK10AsVVThSyfWtlyfH4Fc4EwTdu0V3yrDRtIOjk,98
|
|
90
88
|
kumoai/trainer/util.py,sha256=bDPGkMF9KOy4HgtA-OwhXP17z9cbrfMnZGtyGuUq_Eo,4062
|
|
91
89
|
kumoai/trainer/job.py,sha256=Wk69nzFhbvuA3nEvtCstI04z5CxkgvQ6tHnGchE0Lkg,44938
|
|
92
90
|
kumoai/trainer/baseline_trainer.py,sha256=LlfViNOmswNv4c6zJJLsyv0pC2mM2WKMGYx06ogtEVc,4024
|
|
93
91
|
kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,939
|
|
94
92
|
kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
|
|
95
|
-
kumoai/trainer/trainer.py,sha256=
|
|
96
|
-
kumoai-2.
|
|
97
|
-
kumoai-2.
|
|
98
|
-
kumoai-2.
|
|
99
|
-
kumoai-2.
|
|
100
|
-
kumoai-2.
|
|
93
|
+
kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
|
|
94
|
+
kumoai-2.12.0.dev202511111731.dist-info/RECORD,,
|
|
95
|
+
kumoai-2.12.0.dev202511111731.dist-info/WHEEL,sha256=sunMa2yiYbrNLGeMVDqEA0ayyJbHlex7SCn1TZrEq60,136
|
|
96
|
+
kumoai-2.12.0.dev202511111731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
|
|
97
|
+
kumoai-2.12.0.dev202511111731.dist-info/METADATA,sha256=sNoIEIZxJx58O-0mQyfBmpsnrkAzg3ZVQhucsvlDX64,2052
|
|
98
|
+
kumoai-2.12.0.dev202511111731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Dict, Generic, Optional, Tuple, TypeVar, Union
|
|
3
|
-
|
|
4
|
-
from kumoapi.rfm import PQueryDefinition
|
|
5
|
-
from kumoapi.rfm.pquery import (
|
|
6
|
-
Aggregation,
|
|
7
|
-
AggregationType,
|
|
8
|
-
BoolOp,
|
|
9
|
-
Column,
|
|
10
|
-
Condition,
|
|
11
|
-
Filter,
|
|
12
|
-
Float,
|
|
13
|
-
FloatList,
|
|
14
|
-
Int,
|
|
15
|
-
IntList,
|
|
16
|
-
LogicalOperation,
|
|
17
|
-
MemberOp,
|
|
18
|
-
RelOp,
|
|
19
|
-
Str,
|
|
20
|
-
StrList,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
TableData = TypeVar('TableData')
|
|
24
|
-
ColumnData = TypeVar('ColumnData')
|
|
25
|
-
IndexData = TypeVar('IndexData')
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
|
|
29
|
-
@abstractmethod
|
|
30
|
-
def eval_aggregation_type(
|
|
31
|
-
self,
|
|
32
|
-
op: AggregationType,
|
|
33
|
-
feat: Optional[ColumnData],
|
|
34
|
-
batch: IndexData,
|
|
35
|
-
batch_size: int,
|
|
36
|
-
filter_na: bool = True,
|
|
37
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
38
|
-
pass
|
|
39
|
-
|
|
40
|
-
@abstractmethod
|
|
41
|
-
def eval_rel_op(
|
|
42
|
-
self,
|
|
43
|
-
left: ColumnData,
|
|
44
|
-
op: RelOp,
|
|
45
|
-
right: Union[Int, Float, Str, None],
|
|
46
|
-
) -> ColumnData:
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
@abstractmethod
|
|
50
|
-
def eval_member_op(
|
|
51
|
-
self,
|
|
52
|
-
left: ColumnData,
|
|
53
|
-
op: MemberOp,
|
|
54
|
-
right: Union[IntList, FloatList, StrList],
|
|
55
|
-
) -> ColumnData:
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
@abstractmethod
|
|
59
|
-
def eval_bool_op(
|
|
60
|
-
self,
|
|
61
|
-
left: ColumnData,
|
|
62
|
-
op: BoolOp,
|
|
63
|
-
right: Optional[ColumnData],
|
|
64
|
-
) -> ColumnData:
|
|
65
|
-
pass
|
|
66
|
-
|
|
67
|
-
@abstractmethod
|
|
68
|
-
def eval_column(
|
|
69
|
-
self,
|
|
70
|
-
column: Column,
|
|
71
|
-
feat_dict: Dict[str, TableData],
|
|
72
|
-
filter_na: bool = True,
|
|
73
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
74
|
-
pass
|
|
75
|
-
|
|
76
|
-
@abstractmethod
|
|
77
|
-
def eval_aggregation(
|
|
78
|
-
self,
|
|
79
|
-
aggr: Aggregation,
|
|
80
|
-
feat_dict: Dict[str, TableData],
|
|
81
|
-
time_dict: Dict[str, ColumnData],
|
|
82
|
-
batch_dict: Dict[str, IndexData],
|
|
83
|
-
anchor_time: ColumnData,
|
|
84
|
-
filter_na: bool = True,
|
|
85
|
-
num_forecasts: int = 1,
|
|
86
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
87
|
-
pass
|
|
88
|
-
|
|
89
|
-
@abstractmethod
|
|
90
|
-
def eval_condition(
|
|
91
|
-
self,
|
|
92
|
-
condition: Condition,
|
|
93
|
-
feat_dict: Dict[str, TableData],
|
|
94
|
-
time_dict: Dict[str, ColumnData],
|
|
95
|
-
batch_dict: Dict[str, IndexData],
|
|
96
|
-
anchor_time: ColumnData,
|
|
97
|
-
filter_na: bool = True,
|
|
98
|
-
num_forecasts: int = 1,
|
|
99
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
100
|
-
pass
|
|
101
|
-
|
|
102
|
-
@abstractmethod
|
|
103
|
-
def eval_logical_operation(
|
|
104
|
-
self,
|
|
105
|
-
logical_operation: LogicalOperation,
|
|
106
|
-
feat_dict: Dict[str, TableData],
|
|
107
|
-
time_dict: Dict[str, ColumnData],
|
|
108
|
-
batch_dict: Dict[str, IndexData],
|
|
109
|
-
anchor_time: ColumnData,
|
|
110
|
-
filter_na: bool = True,
|
|
111
|
-
num_forecasts: int = 1,
|
|
112
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
113
|
-
pass
|
|
114
|
-
|
|
115
|
-
@abstractmethod
|
|
116
|
-
def eval_filter(
|
|
117
|
-
self,
|
|
118
|
-
filter: Filter,
|
|
119
|
-
feat_dict: Dict[str, TableData],
|
|
120
|
-
time_dict: Dict[str, ColumnData],
|
|
121
|
-
batch_dict: Dict[str, IndexData],
|
|
122
|
-
anchor_time: ColumnData,
|
|
123
|
-
) -> IndexData:
|
|
124
|
-
pass
|
|
125
|
-
|
|
126
|
-
@abstractmethod
|
|
127
|
-
def eval_pquery(
|
|
128
|
-
self,
|
|
129
|
-
query: PQueryDefinition,
|
|
130
|
-
feat_dict: Dict[str, TableData],
|
|
131
|
-
time_dict: Dict[str, ColumnData],
|
|
132
|
-
batch_dict: Dict[str, IndexData],
|
|
133
|
-
anchor_time: ColumnData,
|
|
134
|
-
num_forecasts: int = 1,
|
|
135
|
-
) -> Tuple[ColumnData, IndexData]:
|
|
136
|
-
pass
|
|
@@ -1,478 +0,0 @@
|
|
|
1
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from kumoapi.rfm import PQueryDefinition
|
|
6
|
-
from kumoapi.rfm.pquery import (
|
|
7
|
-
Aggregation,
|
|
8
|
-
AggregationType,
|
|
9
|
-
BoolOp,
|
|
10
|
-
Column,
|
|
11
|
-
Condition,
|
|
12
|
-
Filter,
|
|
13
|
-
Float,
|
|
14
|
-
FloatList,
|
|
15
|
-
Int,
|
|
16
|
-
IntList,
|
|
17
|
-
LogicalOperation,
|
|
18
|
-
MemberOp,
|
|
19
|
-
RelOp,
|
|
20
|
-
Str,
|
|
21
|
-
StrList,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
from kumoai.experimental.rfm.pquery import PQueryBackend
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class PQueryPandasBackend(PQueryBackend[pd.DataFrame, pd.Series, np.ndarray]):
|
|
28
|
-
def eval_aggregation_type(
|
|
29
|
-
self,
|
|
30
|
-
op: AggregationType,
|
|
31
|
-
feat: Optional[pd.Series],
|
|
32
|
-
batch: np.ndarray,
|
|
33
|
-
batch_size: int,
|
|
34
|
-
filter_na: bool = True,
|
|
35
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
36
|
-
|
|
37
|
-
if op != AggregationType.COUNT:
|
|
38
|
-
assert feat is not None
|
|
39
|
-
|
|
40
|
-
if feat is not None:
|
|
41
|
-
mask = feat.notna()
|
|
42
|
-
feat, batch = feat[mask], batch[mask]
|
|
43
|
-
|
|
44
|
-
if op == AggregationType.LIST_DISTINCT:
|
|
45
|
-
df = pd.DataFrame(dict(feat=feat, batch=batch))
|
|
46
|
-
df = df.drop_duplicates()
|
|
47
|
-
out = df.groupby('batch')['feat'].agg(list)
|
|
48
|
-
|
|
49
|
-
else:
|
|
50
|
-
df = pd.DataFrame(dict(feat=feat, batch=batch))
|
|
51
|
-
if op == AggregationType.AVG:
|
|
52
|
-
agg = 'mean'
|
|
53
|
-
elif op == AggregationType.COUNT:
|
|
54
|
-
agg = 'size'
|
|
55
|
-
else:
|
|
56
|
-
agg = op.lower()
|
|
57
|
-
out = df.groupby('batch')['feat'].agg(agg)
|
|
58
|
-
|
|
59
|
-
if not pd.api.types.is_datetime64_any_dtype(out):
|
|
60
|
-
out = out.astype('float32')
|
|
61
|
-
|
|
62
|
-
out.name = None
|
|
63
|
-
out.index.name = None
|
|
64
|
-
|
|
65
|
-
if op in {AggregationType.SUM, AggregationType.COUNT}:
|
|
66
|
-
out = out.reindex(range(batch_size), fill_value=0)
|
|
67
|
-
mask = np.ones(batch_size, dtype=bool)
|
|
68
|
-
return out, mask
|
|
69
|
-
|
|
70
|
-
mask = np.zeros(batch_size, dtype=bool)
|
|
71
|
-
mask[batch] = True
|
|
72
|
-
|
|
73
|
-
if filter_na:
|
|
74
|
-
return out.reset_index(drop=True), mask
|
|
75
|
-
|
|
76
|
-
out = out.reindex(range(batch_size), fill_value=pd.NA)
|
|
77
|
-
|
|
78
|
-
return out, mask
|
|
79
|
-
|
|
80
|
-
def eval_rel_op(
|
|
81
|
-
self,
|
|
82
|
-
left: pd.Series,
|
|
83
|
-
op: RelOp,
|
|
84
|
-
right: Union[Int, Float, Str, None],
|
|
85
|
-
) -> pd.Series:
|
|
86
|
-
|
|
87
|
-
if right is None:
|
|
88
|
-
if op == RelOp.EQ:
|
|
89
|
-
return left.isna()
|
|
90
|
-
assert op == RelOp.NEQ
|
|
91
|
-
return left.notna()
|
|
92
|
-
|
|
93
|
-
value = pd.Series([right.value], dtype=left.dtype).iloc[0]
|
|
94
|
-
|
|
95
|
-
if op == RelOp.EQ:
|
|
96
|
-
return (left == value).fillna(False).astype(bool)
|
|
97
|
-
if op == RelOp.NEQ:
|
|
98
|
-
out = (left != value).fillna(False).astype(bool)
|
|
99
|
-
out[left.isna()] = False # N/A != right should always be `False`.
|
|
100
|
-
return out
|
|
101
|
-
if op == RelOp.LEQ:
|
|
102
|
-
return (left <= value).fillna(False).astype(bool)
|
|
103
|
-
if op == RelOp.GEQ:
|
|
104
|
-
return (left >= value).fillna(False).astype(bool)
|
|
105
|
-
if op == RelOp.LT:
|
|
106
|
-
return (left < value).fillna(False).astype(bool)
|
|
107
|
-
if op == RelOp.GT:
|
|
108
|
-
return (left > value).fillna(False).astype(bool)
|
|
109
|
-
|
|
110
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
111
|
-
|
|
112
|
-
def eval_member_op(
|
|
113
|
-
self,
|
|
114
|
-
left: pd.Series,
|
|
115
|
-
op: MemberOp,
|
|
116
|
-
right: Union[IntList, FloatList, StrList],
|
|
117
|
-
) -> pd.Series:
|
|
118
|
-
|
|
119
|
-
if op == MemberOp.IN:
|
|
120
|
-
ser = pd.Series(right.value, dtype=left.dtype)
|
|
121
|
-
return left.isin(ser).astype(bool)
|
|
122
|
-
|
|
123
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
124
|
-
|
|
125
|
-
def eval_bool_op(
|
|
126
|
-
self,
|
|
127
|
-
left: pd.Series,
|
|
128
|
-
op: BoolOp,
|
|
129
|
-
right: Optional[pd.Series],
|
|
130
|
-
) -> pd.Series:
|
|
131
|
-
|
|
132
|
-
# TODO Implement Kleene-Priest three-value logic.
|
|
133
|
-
if op == BoolOp.AND:
|
|
134
|
-
assert right is not None
|
|
135
|
-
return left & right
|
|
136
|
-
if op == BoolOp.OR:
|
|
137
|
-
assert right is not None
|
|
138
|
-
return left | right
|
|
139
|
-
if op == BoolOp.NOT:
|
|
140
|
-
return ~left
|
|
141
|
-
|
|
142
|
-
raise NotImplementedError(f"Operator '{op}' not implemented")
|
|
143
|
-
|
|
144
|
-
def eval_column(
|
|
145
|
-
self,
|
|
146
|
-
column: Column,
|
|
147
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
148
|
-
filter_na: bool = True,
|
|
149
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
150
|
-
|
|
151
|
-
out = feat_dict[column.table_name][column.column_name]
|
|
152
|
-
out = out.reset_index(drop=True)
|
|
153
|
-
|
|
154
|
-
if pd.api.types.is_float_dtype(out):
|
|
155
|
-
out = out.astype('float32')
|
|
156
|
-
|
|
157
|
-
out.name = None
|
|
158
|
-
out.index.name = None
|
|
159
|
-
|
|
160
|
-
mask = out.notna().to_numpy()
|
|
161
|
-
|
|
162
|
-
if not filter_na:
|
|
163
|
-
return out, mask
|
|
164
|
-
|
|
165
|
-
out = out[mask].reset_index(drop=True)
|
|
166
|
-
|
|
167
|
-
# Cast to primitive dtype:
|
|
168
|
-
if pd.api.types.is_integer_dtype(out):
|
|
169
|
-
out = out.astype('int64')
|
|
170
|
-
elif pd.api.types.is_bool_dtype(out):
|
|
171
|
-
out = out.astype('bool')
|
|
172
|
-
|
|
173
|
-
return out, mask
|
|
174
|
-
|
|
175
|
-
def eval_aggregation(
|
|
176
|
-
self,
|
|
177
|
-
aggr: Aggregation,
|
|
178
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
179
|
-
time_dict: Dict[str, pd.Series],
|
|
180
|
-
batch_dict: Dict[str, np.ndarray],
|
|
181
|
-
anchor_time: pd.Series,
|
|
182
|
-
filter_na: bool = True,
|
|
183
|
-
num_forecasts: int = 1,
|
|
184
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
185
|
-
|
|
186
|
-
target_table = aggr.column.table_name
|
|
187
|
-
target_batch = batch_dict[target_table]
|
|
188
|
-
target_time = time_dict[target_table]
|
|
189
|
-
|
|
190
|
-
outs: List[pd.Series] = []
|
|
191
|
-
masks: List[np.ndarray] = []
|
|
192
|
-
for _ in range(num_forecasts):
|
|
193
|
-
anchor_target_time = anchor_time[target_batch]
|
|
194
|
-
anchor_target_time = anchor_target_time.reset_index(drop=True)
|
|
195
|
-
|
|
196
|
-
target_mask = target_time <= anchor_target_time + aggr.end_offset
|
|
197
|
-
|
|
198
|
-
if aggr.start is not None:
|
|
199
|
-
start_offset = aggr.start * aggr.time_unit.to_offset()
|
|
200
|
-
target_mask &= target_time > anchor_target_time + start_offset
|
|
201
|
-
else:
|
|
202
|
-
assert num_forecasts == 1
|
|
203
|
-
|
|
204
|
-
if aggr.filter is not None:
|
|
205
|
-
target_mask &= self.eval_filter(
|
|
206
|
-
filter=aggr.filter,
|
|
207
|
-
feat_dict=feat_dict,
|
|
208
|
-
time_dict=time_dict,
|
|
209
|
-
batch_dict=batch_dict,
|
|
210
|
-
anchor_time=anchor_time,
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
if (aggr.type == AggregationType.COUNT
|
|
214
|
-
and aggr.column.column_name == '*'):
|
|
215
|
-
target_feat = None
|
|
216
|
-
else:
|
|
217
|
-
target_feat, _ = self.eval_column(
|
|
218
|
-
aggr.column,
|
|
219
|
-
feat_dict,
|
|
220
|
-
filter_na=False,
|
|
221
|
-
)
|
|
222
|
-
target_feat = target_feat[target_mask]
|
|
223
|
-
|
|
224
|
-
out, mask = self.eval_aggregation_type(
|
|
225
|
-
aggr.type,
|
|
226
|
-
feat=target_feat,
|
|
227
|
-
batch=target_batch[target_mask],
|
|
228
|
-
batch_size=len(anchor_time),
|
|
229
|
-
filter_na=False if num_forecasts > 1 else filter_na,
|
|
230
|
-
)
|
|
231
|
-
outs.append(out)
|
|
232
|
-
masks.append(mask)
|
|
233
|
-
|
|
234
|
-
if num_forecasts > 1:
|
|
235
|
-
anchor_time = anchor_time + aggr.end_offset
|
|
236
|
-
|
|
237
|
-
if len(outs) == 1:
|
|
238
|
-
assert len(masks) == 1
|
|
239
|
-
return outs[0], masks[0]
|
|
240
|
-
|
|
241
|
-
out = pd.Series([list(ser) for ser in zip(*outs)])
|
|
242
|
-
mask = np.stack(masks, axis=-1).any(axis=-1) # type: ignore
|
|
243
|
-
|
|
244
|
-
if filter_na:
|
|
245
|
-
out = out[mask].reset_index(drop=True)
|
|
246
|
-
|
|
247
|
-
return out, mask
|
|
248
|
-
|
|
249
|
-
def eval_condition(
|
|
250
|
-
self,
|
|
251
|
-
condition: Condition,
|
|
252
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
253
|
-
time_dict: Dict[str, pd.Series],
|
|
254
|
-
batch_dict: Dict[str, np.ndarray],
|
|
255
|
-
anchor_time: pd.Series,
|
|
256
|
-
filter_na: bool = True,
|
|
257
|
-
num_forecasts: int = 1,
|
|
258
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
259
|
-
|
|
260
|
-
if num_forecasts > 1:
|
|
261
|
-
raise NotImplementedError("Forecasting not yet implemented for "
|
|
262
|
-
"non-regression tasks")
|
|
263
|
-
|
|
264
|
-
if isinstance(condition.left, Column):
|
|
265
|
-
left, mask = self.eval_column(
|
|
266
|
-
column=condition.left,
|
|
267
|
-
feat_dict=feat_dict,
|
|
268
|
-
filter_na=filter_na if condition.right is not None else False,
|
|
269
|
-
)
|
|
270
|
-
else:
|
|
271
|
-
assert isinstance(condition.left, Aggregation)
|
|
272
|
-
left, mask = self.eval_aggregation(
|
|
273
|
-
aggr=condition.left,
|
|
274
|
-
feat_dict=feat_dict,
|
|
275
|
-
time_dict=time_dict,
|
|
276
|
-
batch_dict=batch_dict,
|
|
277
|
-
anchor_time=anchor_time,
|
|
278
|
-
filter_na=filter_na if condition.right is not None else False,
|
|
279
|
-
)
|
|
280
|
-
|
|
281
|
-
if filter_na and condition.right is None:
|
|
282
|
-
mask = np.ones(len(left), dtype=bool)
|
|
283
|
-
|
|
284
|
-
if isinstance(condition.op, RelOp):
|
|
285
|
-
out = self.eval_rel_op(
|
|
286
|
-
left=left,
|
|
287
|
-
op=condition.op,
|
|
288
|
-
right=condition.right,
|
|
289
|
-
)
|
|
290
|
-
else:
|
|
291
|
-
assert isinstance(condition.op, MemberOp)
|
|
292
|
-
out = self.eval_member_op(
|
|
293
|
-
left=left,
|
|
294
|
-
op=condition.op,
|
|
295
|
-
right=condition.right,
|
|
296
|
-
)
|
|
297
|
-
|
|
298
|
-
return out, mask
|
|
299
|
-
|
|
300
|
-
def eval_logical_operation(
|
|
301
|
-
self,
|
|
302
|
-
logical_operation: LogicalOperation,
|
|
303
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
304
|
-
time_dict: Dict[str, pd.Series],
|
|
305
|
-
batch_dict: Dict[str, np.ndarray],
|
|
306
|
-
anchor_time: pd.Series,
|
|
307
|
-
filter_na: bool = True,
|
|
308
|
-
num_forecasts: int = 1,
|
|
309
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
310
|
-
|
|
311
|
-
if num_forecasts > 1:
|
|
312
|
-
raise NotImplementedError("Forecasting not yet implemented for "
|
|
313
|
-
"non-regression tasks")
|
|
314
|
-
|
|
315
|
-
if isinstance(logical_operation.left, Condition):
|
|
316
|
-
left, mask = self.eval_condition(
|
|
317
|
-
condition=logical_operation.left,
|
|
318
|
-
feat_dict=feat_dict,
|
|
319
|
-
time_dict=time_dict,
|
|
320
|
-
batch_dict=batch_dict,
|
|
321
|
-
anchor_time=anchor_time,
|
|
322
|
-
filter_na=False,
|
|
323
|
-
)
|
|
324
|
-
else:
|
|
325
|
-
assert isinstance(logical_operation.left, LogicalOperation)
|
|
326
|
-
left, mask = self.eval_logical_operation(
|
|
327
|
-
logical_operation=logical_operation.left,
|
|
328
|
-
feat_dict=feat_dict,
|
|
329
|
-
time_dict=time_dict,
|
|
330
|
-
batch_dict=batch_dict,
|
|
331
|
-
anchor_time=anchor_time,
|
|
332
|
-
filter_na=False,
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
right = right_mask = None
|
|
336
|
-
if isinstance(logical_operation.right, Condition):
|
|
337
|
-
right, right_mask = self.eval_condition(
|
|
338
|
-
condition=logical_operation.right,
|
|
339
|
-
feat_dict=feat_dict,
|
|
340
|
-
time_dict=time_dict,
|
|
341
|
-
batch_dict=batch_dict,
|
|
342
|
-
anchor_time=anchor_time,
|
|
343
|
-
filter_na=False,
|
|
344
|
-
)
|
|
345
|
-
elif isinstance(logical_operation.right, LogicalOperation):
|
|
346
|
-
right, right_mask = self.eval_logical_operation(
|
|
347
|
-
logical_operation=logical_operation.right,
|
|
348
|
-
feat_dict=feat_dict,
|
|
349
|
-
time_dict=time_dict,
|
|
350
|
-
batch_dict=batch_dict,
|
|
351
|
-
anchor_time=anchor_time,
|
|
352
|
-
filter_na=False,
|
|
353
|
-
)
|
|
354
|
-
|
|
355
|
-
out = self.eval_bool_op(left, logical_operation.op, right)
|
|
356
|
-
|
|
357
|
-
if right_mask is not None:
|
|
358
|
-
mask &= right_mask
|
|
359
|
-
|
|
360
|
-
if filter_na:
|
|
361
|
-
out = out[mask].reset_index(drop=True)
|
|
362
|
-
|
|
363
|
-
return out, mask
|
|
364
|
-
|
|
365
|
-
def eval_filter(
|
|
366
|
-
self,
|
|
367
|
-
filter: Filter,
|
|
368
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
369
|
-
time_dict: Dict[str, pd.Series],
|
|
370
|
-
batch_dict: Dict[str, np.ndarray],
|
|
371
|
-
anchor_time: pd.Series,
|
|
372
|
-
) -> np.ndarray:
|
|
373
|
-
if isinstance(filter.condition, Condition):
|
|
374
|
-
return self.eval_condition(
|
|
375
|
-
condition=filter.condition,
|
|
376
|
-
feat_dict=feat_dict,
|
|
377
|
-
time_dict=time_dict,
|
|
378
|
-
batch_dict=batch_dict,
|
|
379
|
-
anchor_time=anchor_time,
|
|
380
|
-
filter_na=False,
|
|
381
|
-
)[0].to_numpy()
|
|
382
|
-
else:
|
|
383
|
-
assert isinstance(filter.condition, LogicalOperation)
|
|
384
|
-
return self.eval_logical_operation(
|
|
385
|
-
logical_operation=filter.condition,
|
|
386
|
-
feat_dict=feat_dict,
|
|
387
|
-
time_dict=time_dict,
|
|
388
|
-
batch_dict=batch_dict,
|
|
389
|
-
anchor_time=anchor_time,
|
|
390
|
-
filter_na=False,
|
|
391
|
-
)[0].to_numpy()
|
|
392
|
-
|
|
393
|
-
def eval_pquery(
|
|
394
|
-
self,
|
|
395
|
-
query: PQueryDefinition,
|
|
396
|
-
feat_dict: Dict[str, pd.DataFrame],
|
|
397
|
-
time_dict: Dict[str, pd.Series],
|
|
398
|
-
batch_dict: Dict[str, np.ndarray],
|
|
399
|
-
anchor_time: pd.Series,
|
|
400
|
-
num_forecasts: int = 1,
|
|
401
|
-
) -> Tuple[pd.Series, np.ndarray]:
|
|
402
|
-
|
|
403
|
-
mask = np.ones(len(anchor_time), dtype=bool)
|
|
404
|
-
|
|
405
|
-
if query.entity.filter is not None:
|
|
406
|
-
mask &= self.eval_filter(
|
|
407
|
-
filter=query.entity.filter,
|
|
408
|
-
feat_dict=feat_dict,
|
|
409
|
-
time_dict=time_dict,
|
|
410
|
-
batch_dict=batch_dict,
|
|
411
|
-
anchor_time=anchor_time,
|
|
412
|
-
)
|
|
413
|
-
|
|
414
|
-
if getattr(query, 'assuming', None) is not None:
|
|
415
|
-
if isinstance(query.assuming, Condition):
|
|
416
|
-
mask &= self.eval_condition(
|
|
417
|
-
condition=query.assuming,
|
|
418
|
-
feat_dict=feat_dict,
|
|
419
|
-
time_dict=time_dict,
|
|
420
|
-
batch_dict=batch_dict,
|
|
421
|
-
anchor_time=anchor_time,
|
|
422
|
-
filter_na=False,
|
|
423
|
-
)[0].to_numpy()
|
|
424
|
-
else:
|
|
425
|
-
assert isinstance(query.assuming, LogicalOperation)
|
|
426
|
-
mask &= self.eval_logical_operation(
|
|
427
|
-
logical_operation=query.assuming,
|
|
428
|
-
feat_dict=feat_dict,
|
|
429
|
-
time_dict=time_dict,
|
|
430
|
-
batch_dict=batch_dict,
|
|
431
|
-
anchor_time=anchor_time,
|
|
432
|
-
filter_na=False,
|
|
433
|
-
)[0].to_numpy()
|
|
434
|
-
|
|
435
|
-
if isinstance(query.target, Column):
|
|
436
|
-
out, _mask = self.eval_column(
|
|
437
|
-
column=query.target,
|
|
438
|
-
feat_dict=feat_dict,
|
|
439
|
-
filter_na=True,
|
|
440
|
-
)
|
|
441
|
-
elif isinstance(query.target, Aggregation):
|
|
442
|
-
out, _mask = self.eval_aggregation(
|
|
443
|
-
aggr=query.target,
|
|
444
|
-
feat_dict=feat_dict,
|
|
445
|
-
time_dict=time_dict,
|
|
446
|
-
batch_dict=batch_dict,
|
|
447
|
-
anchor_time=anchor_time,
|
|
448
|
-
filter_na=True,
|
|
449
|
-
num_forecasts=num_forecasts,
|
|
450
|
-
)
|
|
451
|
-
elif isinstance(query.target, Condition):
|
|
452
|
-
out, _mask = self.eval_condition(
|
|
453
|
-
condition=query.target,
|
|
454
|
-
feat_dict=feat_dict,
|
|
455
|
-
time_dict=time_dict,
|
|
456
|
-
batch_dict=batch_dict,
|
|
457
|
-
anchor_time=anchor_time,
|
|
458
|
-
filter_na=True,
|
|
459
|
-
num_forecasts=num_forecasts,
|
|
460
|
-
)
|
|
461
|
-
else:
|
|
462
|
-
assert isinstance(query.target, LogicalOperation)
|
|
463
|
-
out, _mask = self.eval_logical_operation(
|
|
464
|
-
logical_operation=query.target,
|
|
465
|
-
feat_dict=feat_dict,
|
|
466
|
-
time_dict=time_dict,
|
|
467
|
-
batch_dict=batch_dict,
|
|
468
|
-
anchor_time=anchor_time,
|
|
469
|
-
filter_na=True,
|
|
470
|
-
num_forecasts=num_forecasts,
|
|
471
|
-
)
|
|
472
|
-
|
|
473
|
-
out = out[mask[_mask]]
|
|
474
|
-
mask &= _mask
|
|
475
|
-
|
|
476
|
-
out = out.reset_index(drop=True)
|
|
477
|
-
|
|
478
|
-
return out, mask
|
|
File without changes
|
{kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{kumoai-2.11.0.dev202510181831.dist-info → kumoai-2.12.0.dev202511111731.dist-info}/top_level.txt
RENAMED
|
File without changes
|