kumoai 2.10.0.dev202509231831__cp313-cp313-macosx_11_0_arm64.whl → 2.14.0.dev202512161731__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kumoai might be problematic. Click here for more details.
- kumoai/__init__.py +22 -11
- kumoai/_version.py +1 -1
- kumoai/client/client.py +17 -16
- kumoai/client/endpoints.py +1 -0
- kumoai/client/pquery.py +6 -2
- kumoai/client/rfm.py +37 -8
- kumoai/connector/utils.py +23 -2
- kumoai/experimental/rfm/__init__.py +164 -46
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +42 -0
- kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +49 -86
- kumoai/experimental/rfm/backend/local/sampler.py +315 -0
- kumoai/experimental/rfm/backend/local/table.py +119 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +274 -0
- kumoai/experimental/rfm/backend/snow/table.py +135 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +353 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +126 -0
- kumoai/experimental/rfm/base/__init__.py +25 -0
- kumoai/experimental/rfm/base/column.py +66 -0
- kumoai/experimental/rfm/base/sampler.py +773 -0
- kumoai/experimental/rfm/base/source.py +19 -0
- kumoai/experimental/rfm/base/sql_sampler.py +60 -0
- kumoai/experimental/rfm/{local_table.py → base/table.py} +245 -156
- kumoai/experimental/rfm/{local_graph.py → graph.py} +425 -137
- kumoai/experimental/rfm/infer/__init__.py +6 -0
- kumoai/experimental/rfm/infer/dtype.py +79 -0
- kumoai/experimental/rfm/infer/pkey.py +126 -0
- kumoai/experimental/rfm/infer/time_col.py +62 -0
- kumoai/experimental/rfm/infer/timestamp.py +7 -4
- kumoai/experimental/rfm/pquery/__init__.py +4 -4
- kumoai/experimental/rfm/pquery/{backend.py → executor.py} +24 -58
- kumoai/experimental/rfm/pquery/{pandas_backend.py → pandas_executor.py} +278 -224
- kumoai/experimental/rfm/rfm.py +669 -246
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/jobs.py +1 -0
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/spcs.py +1 -3
- kumoai/testing/decorators.py +1 -1
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/trainer.py +12 -10
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/progress_logger.py +239 -4
- kumoai/utils/sql.py +3 -0
- {kumoai-2.10.0.dev202509231831.dist-info → kumoai-2.14.0.dev202512161731.dist-info}/METADATA +15 -5
- {kumoai-2.10.0.dev202509231831.dist-info → kumoai-2.14.0.dev202512161731.dist-info}/RECORD +50 -32
- kumoai/experimental/rfm/local_graph_sampler.py +0 -176
- kumoai/experimental/rfm/local_pquery_driver.py +0 -404
- kumoai/experimental/rfm/utils.py +0 -344
- {kumoai-2.10.0.dev202509231831.dist-info → kumoai-2.14.0.dev202512161731.dist-info}/WHEEL +0 -0
- {kumoai-2.10.0.dev202509231831.dist-info → kumoai-2.14.0.dev202512161731.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.10.0.dev202509231831.dist-info → kumoai-2.14.0.dev202512161731.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,15 @@
|
|
|
1
|
+
from .dtype import infer_dtype
|
|
2
|
+
from .pkey import infer_primary_key
|
|
3
|
+
from .time_col import infer_time_column
|
|
1
4
|
from .id import contains_id
|
|
2
5
|
from .timestamp import contains_timestamp
|
|
3
6
|
from .categorical import contains_categorical
|
|
4
7
|
from .multicategorical import contains_multicategorical
|
|
5
8
|
|
|
6
9
|
__all__ = [
|
|
10
|
+
'infer_dtype',
|
|
11
|
+
'infer_primary_key',
|
|
12
|
+
'infer_time_column',
|
|
7
13
|
'contains_id',
|
|
8
14
|
'contains_timestamp',
|
|
9
15
|
'contains_categorical',
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import pyarrow as pa
|
|
6
|
+
from kumoapi.typing import Dtype
|
|
7
|
+
|
|
8
|
+
PANDAS_TO_DTYPE: Dict[str, Dtype] = {
|
|
9
|
+
'bool': Dtype.bool,
|
|
10
|
+
'boolean': Dtype.bool,
|
|
11
|
+
'int8': Dtype.int,
|
|
12
|
+
'int16': Dtype.int,
|
|
13
|
+
'int32': Dtype.int,
|
|
14
|
+
'int64': Dtype.int,
|
|
15
|
+
'float16': Dtype.float,
|
|
16
|
+
'float32': Dtype.float,
|
|
17
|
+
'float64': Dtype.float,
|
|
18
|
+
'object': Dtype.string,
|
|
19
|
+
'string': Dtype.string,
|
|
20
|
+
'string[python]': Dtype.string,
|
|
21
|
+
'string[pyarrow]': Dtype.string,
|
|
22
|
+
'binary': Dtype.binary,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def infer_dtype(ser: pd.Series) -> Dtype:
|
|
27
|
+
"""Extracts the :class:`Dtype` from a :class:`pandas.Series`.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
ser: A :class:`pandas.Series` to analyze.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The data type.
|
|
34
|
+
"""
|
|
35
|
+
if pd.api.types.is_datetime64_any_dtype(ser.dtype):
|
|
36
|
+
return Dtype.date
|
|
37
|
+
if pd.api.types.is_timedelta64_dtype(ser.dtype):
|
|
38
|
+
return Dtype.timedelta
|
|
39
|
+
if isinstance(ser.dtype, pd.CategoricalDtype):
|
|
40
|
+
return Dtype.string
|
|
41
|
+
|
|
42
|
+
if (pd.api.types.is_object_dtype(ser.dtype)
|
|
43
|
+
and not isinstance(ser.dtype, pd.ArrowDtype)):
|
|
44
|
+
index = ser.iloc[:1000].first_valid_index()
|
|
45
|
+
if index is not None and pd.api.types.is_list_like(ser[index]):
|
|
46
|
+
pos = ser.index.get_loc(index)
|
|
47
|
+
assert isinstance(pos, int)
|
|
48
|
+
ser = ser.iloc[pos:pos + 1000].dropna()
|
|
49
|
+
arr = pa.array(ser.tolist())
|
|
50
|
+
ser = pd.Series(arr, dtype=pd.ArrowDtype(arr.type))
|
|
51
|
+
|
|
52
|
+
if isinstance(ser.dtype, pd.ArrowDtype):
|
|
53
|
+
if pa.types.is_list(ser.dtype.pyarrow_dtype):
|
|
54
|
+
elem_dtype = ser.dtype.pyarrow_dtype.value_type
|
|
55
|
+
if pa.types.is_integer(elem_dtype):
|
|
56
|
+
return Dtype.intlist
|
|
57
|
+
if pa.types.is_floating(elem_dtype):
|
|
58
|
+
return Dtype.floatlist
|
|
59
|
+
if pa.types.is_decimal(elem_dtype):
|
|
60
|
+
return Dtype.floatlist
|
|
61
|
+
if pa.types.is_string(elem_dtype):
|
|
62
|
+
return Dtype.stringlist
|
|
63
|
+
if pa.types.is_null(elem_dtype):
|
|
64
|
+
return Dtype.floatlist
|
|
65
|
+
|
|
66
|
+
if isinstance(ser.dtype, np.dtype):
|
|
67
|
+
dtype_str = str(ser.dtype).lower()
|
|
68
|
+
elif isinstance(ser.dtype, pd.api.extensions.ExtensionDtype):
|
|
69
|
+
dtype_str = ser.dtype.name.lower()
|
|
70
|
+
dtype_str = dtype_str.split('[')[0] # Remove backend metadata
|
|
71
|
+
elif isinstance(ser.dtype, pa.DataType):
|
|
72
|
+
dtype_str = str(ser.dtype).lower()
|
|
73
|
+
else:
|
|
74
|
+
dtype_str = 'object'
|
|
75
|
+
|
|
76
|
+
if dtype_str not in PANDAS_TO_DTYPE:
|
|
77
|
+
raise ValueError(f"Unsupported data type '{ser.dtype}'")
|
|
78
|
+
|
|
79
|
+
return PANDAS_TO_DTYPE[dtype_str]
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def infer_primary_key(
|
|
9
|
+
table_name: str,
|
|
10
|
+
df: pd.DataFrame,
|
|
11
|
+
candidates: list[str],
|
|
12
|
+
) -> Optional[str]:
|
|
13
|
+
r"""Auto-detect potential primary key column.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
table_name: The table name.
|
|
17
|
+
df: The pandas DataFrame to analyze.
|
|
18
|
+
candidates: A list of potential candidates.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
The name of the detected primary key, or ``None`` if not found.
|
|
22
|
+
"""
|
|
23
|
+
# A list of (potentially modified) table names that are eligible to match
|
|
24
|
+
# with a primary key, i.e.:
|
|
25
|
+
# - UserInfo -> User
|
|
26
|
+
# - snakecase <-> camelcase
|
|
27
|
+
# - camelcase <-> snakecase
|
|
28
|
+
# - plural <-> singular (users -> user, eligibilities -> eligibility)
|
|
29
|
+
# - verb -> noun (qualifying -> qualify)
|
|
30
|
+
_table_names = {table_name}
|
|
31
|
+
if table_name.lower().endswith('_info'):
|
|
32
|
+
_table_names.add(table_name[:-5])
|
|
33
|
+
elif table_name.lower().endswith('info'):
|
|
34
|
+
_table_names.add(table_name[:-4])
|
|
35
|
+
|
|
36
|
+
table_names = set()
|
|
37
|
+
for _table_name in _table_names:
|
|
38
|
+
table_names.add(_table_name.lower())
|
|
39
|
+
snakecase = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', _table_name)
|
|
40
|
+
snakecase = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', snakecase)
|
|
41
|
+
table_names.add(snakecase.lower())
|
|
42
|
+
camelcase = _table_name.replace('_', '')
|
|
43
|
+
table_names.add(camelcase.lower())
|
|
44
|
+
if _table_name.lower().endswith('s'):
|
|
45
|
+
table_names.add(_table_name.lower()[:-1])
|
|
46
|
+
table_names.add(snakecase.lower()[:-1])
|
|
47
|
+
table_names.add(camelcase.lower()[:-1])
|
|
48
|
+
else:
|
|
49
|
+
table_names.add(_table_name.lower() + 's')
|
|
50
|
+
table_names.add(snakecase.lower() + 's')
|
|
51
|
+
table_names.add(camelcase.lower() + 's')
|
|
52
|
+
if _table_name.lower().endswith('ies'):
|
|
53
|
+
table_names.add(_table_name.lower()[:-3] + 'y')
|
|
54
|
+
table_names.add(snakecase.lower()[:-3] + 'y')
|
|
55
|
+
table_names.add(camelcase.lower()[:-3] + 'y')
|
|
56
|
+
elif _table_name.lower().endswith('y'):
|
|
57
|
+
table_names.add(_table_name.lower()[:-1] + 'ies')
|
|
58
|
+
table_names.add(snakecase.lower()[:-1] + 'ies')
|
|
59
|
+
table_names.add(camelcase.lower()[:-1] + 'ies')
|
|
60
|
+
if _table_name.lower().endswith('ing'):
|
|
61
|
+
table_names.add(_table_name.lower()[:-3])
|
|
62
|
+
table_names.add(snakecase.lower()[:-3])
|
|
63
|
+
table_names.add(camelcase.lower()[:-3])
|
|
64
|
+
|
|
65
|
+
scores: list[tuple[str, int]] = []
|
|
66
|
+
for col_name in candidates:
|
|
67
|
+
col_name_lower = col_name.lower()
|
|
68
|
+
|
|
69
|
+
score = 0
|
|
70
|
+
|
|
71
|
+
if col_name_lower == 'id':
|
|
72
|
+
score += 4
|
|
73
|
+
|
|
74
|
+
for table_name_lower in table_names:
|
|
75
|
+
|
|
76
|
+
if col_name_lower == table_name_lower:
|
|
77
|
+
score += 4 # USER -> USER
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
for suffix in ['id', 'hash', 'key', 'code', 'uuid']:
|
|
81
|
+
if not col_name_lower.endswith(suffix):
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
if col_name_lower == f'{table_name_lower}_{suffix}':
|
|
85
|
+
score += 5 # USER -> USER_ID
|
|
86
|
+
break
|
|
87
|
+
|
|
88
|
+
if col_name_lower == f'{table_name_lower}{suffix}':
|
|
89
|
+
score += 5 # User -> UserId
|
|
90
|
+
break
|
|
91
|
+
|
|
92
|
+
if col_name_lower.endswith(f'{table_name_lower}_{suffix}'):
|
|
93
|
+
score += 2
|
|
94
|
+
|
|
95
|
+
if col_name_lower.endswith(f'{table_name_lower}{suffix}'):
|
|
96
|
+
score += 2
|
|
97
|
+
|
|
98
|
+
# `rel-bench` hard-coding :(
|
|
99
|
+
if table_name == 'studies' and col_name == 'nct_id':
|
|
100
|
+
score += 1
|
|
101
|
+
|
|
102
|
+
ser = df[col_name].iloc[:1_000_000]
|
|
103
|
+
score += 3 * (ser.nunique() / len(ser))
|
|
104
|
+
|
|
105
|
+
scores.append((col_name, score))
|
|
106
|
+
|
|
107
|
+
scores = [x for x in scores if x[-1] >= 4]
|
|
108
|
+
scores.sort(key=lambda x: x[-1], reverse=True)
|
|
109
|
+
|
|
110
|
+
if len(scores) == 0:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
if len(scores) == 1:
|
|
114
|
+
return scores[0][0]
|
|
115
|
+
|
|
116
|
+
# In case of multiple candidates, only return one if its score is unique:
|
|
117
|
+
if scores[0][1] != scores[1][1]:
|
|
118
|
+
return scores[0][0]
|
|
119
|
+
|
|
120
|
+
max_score = max(scores, key=lambda x: x[1])
|
|
121
|
+
candidates = [col_name for col_name, score in scores if score == max_score]
|
|
122
|
+
warnings.warn(f"Found multiple potential primary keys in table "
|
|
123
|
+
f"'{table_name}': {candidates}. Please specify the primary "
|
|
124
|
+
f"key for this table manually.")
|
|
125
|
+
|
|
126
|
+
return None
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def infer_time_column(
|
|
9
|
+
df: pd.DataFrame,
|
|
10
|
+
candidates: list[str],
|
|
11
|
+
) -> Optional[str]:
|
|
12
|
+
r"""Auto-detect potential time column.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
df: The pandas DataFrame to analyze.
|
|
16
|
+
candidates: A list of potential candidates.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
The name of the detected time column, or ``None`` if not found.
|
|
20
|
+
"""
|
|
21
|
+
candidates = [ # Exclude all candidates with `*last*` in column names:
|
|
22
|
+
col_name for col_name in candidates
|
|
23
|
+
if not re.search(r'(^|_)last(_|$)', col_name, re.IGNORECASE)
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
if len(candidates) == 0:
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
if len(candidates) == 1:
|
|
30
|
+
return candidates[0]
|
|
31
|
+
|
|
32
|
+
# If there exists a dedicated `create*` column, use it as time column:
|
|
33
|
+
create_candidates = [
|
|
34
|
+
candidate for candidate in candidates
|
|
35
|
+
if candidate.lower().startswith('create')
|
|
36
|
+
]
|
|
37
|
+
if len(create_candidates) == 1:
|
|
38
|
+
return create_candidates[0]
|
|
39
|
+
if len(create_candidates) > 1:
|
|
40
|
+
candidates = create_candidates
|
|
41
|
+
|
|
42
|
+
# Find the most optimal time column. Usually, it is the one pointing to
|
|
43
|
+
# the oldest timestamps:
|
|
44
|
+
with warnings.catch_warnings():
|
|
45
|
+
warnings.filterwarnings('ignore', message='Could not infer format')
|
|
46
|
+
min_timestamp_dict = {
|
|
47
|
+
key: pd.to_datetime(df[key].iloc[:10_000], 'coerce')
|
|
48
|
+
for key in candidates
|
|
49
|
+
}
|
|
50
|
+
min_timestamp_dict = {
|
|
51
|
+
key: value.min().tz_localize(None)
|
|
52
|
+
for key, value in min_timestamp_dict.items()
|
|
53
|
+
}
|
|
54
|
+
min_timestamp_dict = {
|
|
55
|
+
key: value
|
|
56
|
+
for key, value in min_timestamp_dict.items() if not pd.isna(value)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if len(min_timestamp_dict) == 0:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
return min(min_timestamp_dict, key=min_timestamp_dict.get) # type: ignore
|
|
@@ -2,6 +2,7 @@ import re
|
|
|
2
2
|
import warnings
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
+
from dateutil.parser import UnknownTimezoneWarning
|
|
5
6
|
from kumoapi.typing import Dtype, Stype
|
|
6
7
|
|
|
7
8
|
|
|
@@ -20,9 +21,7 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
|
|
|
20
21
|
column_name,
|
|
21
22
|
re.IGNORECASE,
|
|
22
23
|
)
|
|
23
|
-
|
|
24
|
-
if match is not None:
|
|
25
|
-
return True
|
|
24
|
+
score = 0.3 if match is not None else 0.0
|
|
26
25
|
|
|
27
26
|
ser = ser.iloc[:100]
|
|
28
27
|
ser = ser.dropna()
|
|
@@ -34,5 +33,9 @@ def contains_timestamp(ser: pd.Series, column_name: str, dtype: Dtype) -> bool:
|
|
|
34
33
|
ser = ser.astype(str) # Avoid parsing numbers as unix timestamps.
|
|
35
34
|
|
|
36
35
|
with warnings.catch_warnings():
|
|
36
|
+
warnings.simplefilter('ignore', UnknownTimezoneWarning)
|
|
37
37
|
warnings.filterwarnings('ignore', message='Could not infer format')
|
|
38
|
-
|
|
38
|
+
mask = pd.to_datetime(ser, errors='coerce').notna()
|
|
39
|
+
score += int(mask.sum()) / len(mask)
|
|
40
|
+
|
|
41
|
+
return score >= 1.0
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
1
|
+
from .executor import PQueryExecutor
|
|
2
|
+
from .pandas_executor import PQueryPandasExecutor
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
'
|
|
6
|
-
'
|
|
5
|
+
'PQueryExecutor',
|
|
6
|
+
'PQueryPandasExecutor',
|
|
7
7
|
]
|
|
@@ -1,23 +1,14 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Dict, Generic,
|
|
2
|
+
from typing import Dict, Generic, Tuple, TypeVar
|
|
3
3
|
|
|
4
|
-
from kumoapi.
|
|
5
|
-
from kumoapi.
|
|
4
|
+
from kumoapi.pquery import ValidatedPredictiveQuery
|
|
5
|
+
from kumoapi.pquery.AST import (
|
|
6
6
|
Aggregation,
|
|
7
|
-
AggregationType,
|
|
8
|
-
BoolOp,
|
|
9
7
|
Column,
|
|
10
8
|
Condition,
|
|
11
9
|
Filter,
|
|
12
|
-
|
|
13
|
-
FloatList,
|
|
14
|
-
Int,
|
|
15
|
-
IntList,
|
|
10
|
+
Join,
|
|
16
11
|
LogicalOperation,
|
|
17
|
-
MemberOp,
|
|
18
|
-
RelOp,
|
|
19
|
-
Str,
|
|
20
|
-
StrList,
|
|
21
12
|
)
|
|
22
13
|
|
|
23
14
|
TableData = TypeVar('TableData')
|
|
@@ -25,58 +16,33 @@ ColumnData = TypeVar('ColumnData')
|
|
|
25
16
|
IndexData = TypeVar('IndexData')
|
|
26
17
|
|
|
27
18
|
|
|
28
|
-
class
|
|
19
|
+
class PQueryExecutor(Generic[TableData, ColumnData, IndexData], ABC):
|
|
29
20
|
@abstractmethod
|
|
30
|
-
def
|
|
21
|
+
def execute_column(
|
|
31
22
|
self,
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
batch: IndexData,
|
|
35
|
-
batch_size: int,
|
|
23
|
+
column: Column,
|
|
24
|
+
feat_dict: Dict[str, TableData],
|
|
36
25
|
filter_na: bool = True,
|
|
37
26
|
) -> Tuple[ColumnData, IndexData]:
|
|
38
27
|
pass
|
|
39
28
|
|
|
40
29
|
@abstractmethod
|
|
41
|
-
def
|
|
42
|
-
self,
|
|
43
|
-
left: ColumnData,
|
|
44
|
-
op: RelOp,
|
|
45
|
-
right: Union[Int, Float, Str, None],
|
|
46
|
-
) -> ColumnData:
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
@abstractmethod
|
|
50
|
-
def eval_member_op(
|
|
51
|
-
self,
|
|
52
|
-
left: ColumnData,
|
|
53
|
-
op: MemberOp,
|
|
54
|
-
right: Union[IntList, FloatList, StrList],
|
|
55
|
-
) -> ColumnData:
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
@abstractmethod
|
|
59
|
-
def eval_bool_op(
|
|
60
|
-
self,
|
|
61
|
-
left: ColumnData,
|
|
62
|
-
op: BoolOp,
|
|
63
|
-
right: Optional[ColumnData],
|
|
64
|
-
) -> ColumnData:
|
|
65
|
-
pass
|
|
66
|
-
|
|
67
|
-
@abstractmethod
|
|
68
|
-
def eval_column(
|
|
30
|
+
def execute_aggregation(
|
|
69
31
|
self,
|
|
70
|
-
|
|
32
|
+
aggr: Aggregation,
|
|
71
33
|
feat_dict: Dict[str, TableData],
|
|
34
|
+
time_dict: Dict[str, ColumnData],
|
|
35
|
+
batch_dict: Dict[str, IndexData],
|
|
36
|
+
anchor_time: ColumnData,
|
|
72
37
|
filter_na: bool = True,
|
|
38
|
+
num_forecasts: int = 1,
|
|
73
39
|
) -> Tuple[ColumnData, IndexData]:
|
|
74
40
|
pass
|
|
75
41
|
|
|
76
42
|
@abstractmethod
|
|
77
|
-
def
|
|
43
|
+
def execute_condition(
|
|
78
44
|
self,
|
|
79
|
-
|
|
45
|
+
condition: Condition,
|
|
80
46
|
feat_dict: Dict[str, TableData],
|
|
81
47
|
time_dict: Dict[str, ColumnData],
|
|
82
48
|
batch_dict: Dict[str, IndexData],
|
|
@@ -87,9 +53,9 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
|
|
|
87
53
|
pass
|
|
88
54
|
|
|
89
55
|
@abstractmethod
|
|
90
|
-
def
|
|
56
|
+
def execute_logical_operation(
|
|
91
57
|
self,
|
|
92
|
-
|
|
58
|
+
logical_operation: LogicalOperation,
|
|
93
59
|
feat_dict: Dict[str, TableData],
|
|
94
60
|
time_dict: Dict[str, ColumnData],
|
|
95
61
|
batch_dict: Dict[str, IndexData],
|
|
@@ -100,9 +66,9 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
|
|
|
100
66
|
pass
|
|
101
67
|
|
|
102
68
|
@abstractmethod
|
|
103
|
-
def
|
|
69
|
+
def execute_join(
|
|
104
70
|
self,
|
|
105
|
-
|
|
71
|
+
join: Join,
|
|
106
72
|
feat_dict: Dict[str, TableData],
|
|
107
73
|
time_dict: Dict[str, ColumnData],
|
|
108
74
|
batch_dict: Dict[str, IndexData],
|
|
@@ -113,20 +79,20 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
|
|
|
113
79
|
pass
|
|
114
80
|
|
|
115
81
|
@abstractmethod
|
|
116
|
-
def
|
|
82
|
+
def execute_filter(
|
|
117
83
|
self,
|
|
118
84
|
filter: Filter,
|
|
119
85
|
feat_dict: Dict[str, TableData],
|
|
120
86
|
time_dict: Dict[str, ColumnData],
|
|
121
87
|
batch_dict: Dict[str, IndexData],
|
|
122
88
|
anchor_time: ColumnData,
|
|
123
|
-
) -> IndexData:
|
|
89
|
+
) -> Tuple[ColumnData, IndexData]:
|
|
124
90
|
pass
|
|
125
91
|
|
|
126
92
|
@abstractmethod
|
|
127
|
-
def
|
|
93
|
+
def execute(
|
|
128
94
|
self,
|
|
129
|
-
query:
|
|
95
|
+
query: ValidatedPredictiveQuery,
|
|
130
96
|
feat_dict: Dict[str, TableData],
|
|
131
97
|
time_dict: Dict[str, ColumnData],
|
|
132
98
|
batch_dict: Dict[str, IndexData],
|