kumoai 2.13.0.dev202512081731__cp313-cp313-macosx_11_0_arm64.whl → 2.14.0.dev202512151351__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/_version.py +1 -1
- kumoai/client/pquery.py +6 -2
- kumoai/experimental/rfm/backend/local/graph_store.py +19 -62
- kumoai/experimental/rfm/backend/local/sampler.py +213 -14
- kumoai/experimental/rfm/backend/local/table.py +12 -2
- kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +264 -0
- kumoai/experimental/rfm/backend/snow/table.py +35 -17
- kumoai/experimental/rfm/backend/sqlite/__init__.py +2 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +354 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +36 -11
- kumoai/experimental/rfm/base/__init__.py +17 -6
- kumoai/experimental/rfm/base/sampler.py +438 -38
- kumoai/experimental/rfm/base/source.py +1 -0
- kumoai/experimental/rfm/base/sql_sampler.py +56 -0
- kumoai/experimental/rfm/base/table.py +12 -1
- kumoai/experimental/rfm/graph.py +26 -9
- kumoai/experimental/rfm/pquery/pandas_executor.py +1 -1
- kumoai/experimental/rfm/rfm.py +214 -151
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/testing/snow.py +50 -0
- kumoai/utils/__init__.py +2 -0
- kumoai/utils/sql.py +3 -0
- {kumoai-2.13.0.dev202512081731.dist-info → kumoai-2.14.0.dev202512151351.dist-info}/METADATA +2 -2
- {kumoai-2.13.0.dev202512081731.dist-info → kumoai-2.14.0.dev202512151351.dist-info}/RECORD +28 -25
- kumoai/experimental/rfm/local_graph_sampler.py +0 -223
- kumoai/experimental/rfm/local_pquery_driver.py +0 -689
- {kumoai-2.13.0.dev202512081731.dist-info → kumoai-2.14.0.dev202512151351.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202512081731.dist-info → kumoai-2.14.0.dev202512151351.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202512081731.dist-info → kumoai-2.14.0.dev202512151351.dist-info}/top_level.txt +0 -0
kumoai/_version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.14.0.dev202512151351'
|
kumoai/client/pquery.py
CHANGED
|
@@ -176,8 +176,12 @@ def filter_model_plan(
|
|
|
176
176
|
# Undefined
|
|
177
177
|
pass
|
|
178
178
|
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
# Forward compatibility - Remove any newly introduced arguments not
|
|
180
|
+
# returned yet by the backend:
|
|
181
|
+
value = getattr(section, field.name)
|
|
182
|
+
if value != MissingType.VALUE:
|
|
183
|
+
new_opt_fields.append((field.name, _type, default))
|
|
184
|
+
new_opts.append(value)
|
|
181
185
|
|
|
182
186
|
Section = dataclass(
|
|
183
187
|
config=dict(validate_assignment=True),
|
|
@@ -34,26 +34,21 @@ class LocalGraphStore:
|
|
|
34
34
|
|
|
35
35
|
with verbose as logger:
|
|
36
36
|
self.df_dict, self.mask_dict = self.sanitize(graph)
|
|
37
|
-
self.stype_dict = self.get_stype_dict(graph)
|
|
38
37
|
logger.log("Sanitized input data")
|
|
39
38
|
|
|
40
|
-
self.
|
|
39
|
+
self.pkey_map_dict = self.get_pkey_map_dict(graph)
|
|
41
40
|
num_pkeys = sum(t.has_primary_key() for t in graph.tables.values())
|
|
42
41
|
if num_pkeys > 1:
|
|
43
42
|
logger.log(f"Collected primary keys from {num_pkeys} tables")
|
|
44
43
|
else:
|
|
45
44
|
logger.log(f"Collected primary key from {num_pkeys} table")
|
|
46
45
|
|
|
47
|
-
(
|
|
48
|
-
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
51
|
-
self.min_time,
|
|
52
|
-
self.max_time,
|
|
53
|
-
) = self.get_time_data(graph)
|
|
54
|
-
if self.max_time != pd.Timestamp.min:
|
|
46
|
+
self.time_dict, self.min_max_time_dict = self.get_time_data(graph)
|
|
47
|
+
if len(self.min_max_time_dict) > 0:
|
|
48
|
+
min_time = min(t for t, _ in self.min_max_time_dict.values())
|
|
49
|
+
max_time = max(t for _, t in self.min_max_time_dict.values())
|
|
55
50
|
logger.log(f"Identified temporal graph from "
|
|
56
|
-
f"{
|
|
51
|
+
f"{min_time.date()} to {max_time.date()}")
|
|
57
52
|
else:
|
|
58
53
|
logger.log("Identified static graph without timestamps")
|
|
59
54
|
|
|
@@ -63,14 +58,6 @@ class LocalGraphStore:
|
|
|
63
58
|
logger.log(f"Created graph with {num_nodes:,} nodes and "
|
|
64
59
|
f"{num_edges:,} edges")
|
|
65
60
|
|
|
66
|
-
@property
|
|
67
|
-
def node_types(self) -> List[str]:
|
|
68
|
-
return list(self.df_dict.keys())
|
|
69
|
-
|
|
70
|
-
@property
|
|
71
|
-
def edge_types(self) -> List[Tuple[str, str, str]]:
|
|
72
|
-
return list(self.row_dict.keys())
|
|
73
|
-
|
|
74
61
|
def get_node_id(self, table_name: str, pkey: pd.Series) -> np.ndarray:
|
|
75
62
|
r"""Returns the node ID given primary keys.
|
|
76
63
|
|
|
@@ -154,34 +141,16 @@ class LocalGraphStore:
|
|
|
154
141
|
|
|
155
142
|
return df_dict, mask_dict
|
|
156
143
|
|
|
157
|
-
def
|
|
158
|
-
stype_dict: Dict[str, Dict[str, Stype]] = {}
|
|
159
|
-
foreign_keys = {(edge.src_table, edge.fkey) for edge in graph.edges}
|
|
160
|
-
for table in graph.tables.values():
|
|
161
|
-
stype_dict[table.name] = {}
|
|
162
|
-
for column in table.columns:
|
|
163
|
-
if column == table.primary_key:
|
|
164
|
-
continue
|
|
165
|
-
if (table.name, column.name) in foreign_keys:
|
|
166
|
-
continue
|
|
167
|
-
stype_dict[table.name][column.name] = column.stype
|
|
168
|
-
return stype_dict
|
|
169
|
-
|
|
170
|
-
def get_pkey_data(
|
|
144
|
+
def get_pkey_map_dict(
|
|
171
145
|
self,
|
|
172
146
|
graph: 'Graph',
|
|
173
|
-
) ->
|
|
174
|
-
Dict[str, str],
|
|
175
|
-
Dict[str, pd.DataFrame],
|
|
176
|
-
]:
|
|
177
|
-
pkey_name_dict: Dict[str, str] = {}
|
|
147
|
+
) -> Dict[str, pd.DataFrame]:
|
|
178
148
|
pkey_map_dict: Dict[str, pd.DataFrame] = {}
|
|
179
149
|
|
|
180
150
|
for table in graph.tables.values():
|
|
181
151
|
if table._primary_key is None:
|
|
182
152
|
continue
|
|
183
153
|
|
|
184
|
-
pkey_name_dict[table.name] = table._primary_key
|
|
185
154
|
pkey = self.df_dict[table.name][table._primary_key]
|
|
186
155
|
pkey_map = pd.DataFrame(
|
|
187
156
|
dict(arange=range(len(pkey))),
|
|
@@ -203,27 +172,18 @@ class LocalGraphStore:
|
|
|
203
172
|
|
|
204
173
|
pkey_map_dict[table.name] = pkey_map
|
|
205
174
|
|
|
206
|
-
return
|
|
175
|
+
return pkey_map_dict
|
|
207
176
|
|
|
208
177
|
def get_time_data(
|
|
209
178
|
self,
|
|
210
179
|
graph: 'Graph',
|
|
211
180
|
) -> Tuple[
|
|
212
|
-
Dict[str, str],
|
|
213
|
-
Dict[str, str],
|
|
214
181
|
Dict[str, np.ndarray],
|
|
215
|
-
pd.Timestamp,
|
|
216
|
-
pd.Timestamp,
|
|
182
|
+
Dict[str, Tuple[pd.Timestamp, pd.Timestamp]],
|
|
217
183
|
]:
|
|
218
|
-
time_column_dict: Dict[str, str] = {}
|
|
219
|
-
end_time_column_dict: Dict[str, str] = {}
|
|
220
184
|
time_dict: Dict[str, np.ndarray] = {}
|
|
221
|
-
|
|
222
|
-
max_time = pd.Timestamp.min
|
|
185
|
+
min_max_time_dict: Dict[str, tuple[pd.Timestamp, pd.Timestamp]] = {}
|
|
223
186
|
for table in graph.tables.values():
|
|
224
|
-
if table._end_time_column is not None:
|
|
225
|
-
end_time_column_dict[table.name] = table._end_time_column
|
|
226
|
-
|
|
227
187
|
if table._time_column is None:
|
|
228
188
|
continue
|
|
229
189
|
|
|
@@ -231,21 +191,18 @@ class LocalGraphStore:
|
|
|
231
191
|
if time.dtype != 'datetime64[ns]':
|
|
232
192
|
time = time.astype('datetime64[ns]')
|
|
233
193
|
time_dict[table.name] = time.astype(int).to_numpy() // 1000**3
|
|
234
|
-
time_column_dict[table.name] = table._time_column
|
|
235
194
|
|
|
236
195
|
if table.name in self.mask_dict.keys():
|
|
237
196
|
time = time[self.mask_dict[table.name]]
|
|
238
197
|
if len(time) > 0:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
max_time,
|
|
248
|
-
)
|
|
198
|
+
min_max_time_dict[table.name] = (time.min(), time.max())
|
|
199
|
+
else:
|
|
200
|
+
min_max_time_dict[table.name] = (
|
|
201
|
+
pd.Timestamp.max,
|
|
202
|
+
pd.Timestamp.min,
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
return time_dict, min_max_time_dict
|
|
249
206
|
|
|
250
207
|
def get_csc(
|
|
251
208
|
self,
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING
|
|
1
|
+
from typing import TYPE_CHECKING, Literal
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
|
+
from kumoapi.pquery import ValidatedPredictiveQuery
|
|
5
6
|
|
|
6
7
|
from kumoai.experimental.rfm.backend.local import LocalGraphStore
|
|
7
|
-
from kumoai.experimental.rfm.base import
|
|
8
|
+
from kumoai.experimental.rfm.base import Sampler, SamplerOutput
|
|
9
|
+
from kumoai.experimental.rfm.pquery import PQueryPandasExecutor
|
|
8
10
|
from kumoai.utils import ProgressLogger
|
|
9
11
|
|
|
10
12
|
if TYPE_CHECKING:
|
|
@@ -17,7 +19,7 @@ class LocalSampler(Sampler):
|
|
|
17
19
|
graph: 'Graph',
|
|
18
20
|
verbose: bool | ProgressLogger = True,
|
|
19
21
|
) -> None:
|
|
20
|
-
super().__init__(graph=graph)
|
|
22
|
+
super().__init__(graph=graph, verbose=verbose)
|
|
21
23
|
|
|
22
24
|
import kumoai.kumolib as kumolib
|
|
23
25
|
|
|
@@ -36,19 +38,32 @@ class LocalSampler(Sampler):
|
|
|
36
38
|
self._graph_store.time_dict,
|
|
37
39
|
)
|
|
38
40
|
|
|
39
|
-
def
|
|
41
|
+
def _get_min_max_time_dict(
|
|
42
|
+
self,
|
|
43
|
+
table_names: list[str],
|
|
44
|
+
) -> dict[str, tuple[pd.Timestamp, pd.Timestamp]]:
|
|
45
|
+
return {
|
|
46
|
+
key: value
|
|
47
|
+
for key, value in self._graph_store.min_max_time_dict.items()
|
|
48
|
+
if key in table_names
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def _sample_subgraph(
|
|
40
52
|
self,
|
|
41
53
|
entity_table_name: str,
|
|
42
54
|
entity_pkey: pd.Series,
|
|
43
|
-
anchor_time: pd.Series,
|
|
55
|
+
anchor_time: pd.Series | Literal['entity'],
|
|
44
56
|
columns_dict: dict[str, set[str]],
|
|
45
57
|
num_neighbors: list[int],
|
|
46
|
-
) ->
|
|
58
|
+
) -> SamplerOutput:
|
|
47
59
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
60
|
+
index = self._graph_store.get_node_id(entity_table_name, entity_pkey)
|
|
61
|
+
|
|
62
|
+
if isinstance(anchor_time, pd.Series):
|
|
63
|
+
time = anchor_time.astype(int).to_numpy() // 1000**3 # to seconds
|
|
64
|
+
else:
|
|
65
|
+
assert anchor_time == 'entity'
|
|
66
|
+
time = self._graph_store.time_dict[entity_table_name][index]
|
|
52
67
|
|
|
53
68
|
(
|
|
54
69
|
row_dict,
|
|
@@ -58,11 +73,14 @@ class LocalSampler(Sampler):
|
|
|
58
73
|
num_sampled_nodes_dict,
|
|
59
74
|
num_sampled_edges_dict,
|
|
60
75
|
) = self._graph_sampler.sample(
|
|
61
|
-
|
|
76
|
+
{
|
|
77
|
+
'__'.join(edge_type): num_neighbors
|
|
78
|
+
for edge_type in self.edge_types
|
|
79
|
+
},
|
|
62
80
|
{},
|
|
63
81
|
entity_table_name,
|
|
64
|
-
|
|
65
|
-
|
|
82
|
+
index,
|
|
83
|
+
time,
|
|
66
84
|
)
|
|
67
85
|
|
|
68
86
|
df_dict: dict[str, pd.DataFrame] = {}
|
|
@@ -105,7 +123,8 @@ class LocalSampler(Sampler):
|
|
|
105
123
|
for edge_type in self.edge_types
|
|
106
124
|
}
|
|
107
125
|
|
|
108
|
-
return
|
|
126
|
+
return SamplerOutput(
|
|
127
|
+
anchor_time=time * 1000**3, # to nanoseconds
|
|
109
128
|
df_dict=df_dict,
|
|
110
129
|
inverse_dict=inverse_dict,
|
|
111
130
|
batch_dict=batch_dict,
|
|
@@ -114,3 +133,183 @@ class LocalSampler(Sampler):
|
|
|
114
133
|
col_dict=col_dict,
|
|
115
134
|
num_sampled_edges_dict=num_sampled_edges_dict,
|
|
116
135
|
)
|
|
136
|
+
|
|
137
|
+
def _sample_entity_table(
|
|
138
|
+
self,
|
|
139
|
+
table_name: str,
|
|
140
|
+
columns: set[str],
|
|
141
|
+
num_rows: int,
|
|
142
|
+
random_seed: int | None = None,
|
|
143
|
+
) -> pd.DataFrame:
|
|
144
|
+
pkey_map = self._graph_store.pkey_map_dict[table_name]
|
|
145
|
+
if len(pkey_map) > num_rows:
|
|
146
|
+
pkey_map = pkey_map.sample(
|
|
147
|
+
n=num_rows,
|
|
148
|
+
random_state=random_seed,
|
|
149
|
+
ignore_index=True,
|
|
150
|
+
)
|
|
151
|
+
df = self._graph_store.df_dict[table_name]
|
|
152
|
+
df = df.iloc[pkey_map['arange']][list(columns)]
|
|
153
|
+
return df
|
|
154
|
+
|
|
155
|
+
def _sample_target(
|
|
156
|
+
self,
|
|
157
|
+
query: ValidatedPredictiveQuery,
|
|
158
|
+
entity_df: pd.DataFrame,
|
|
159
|
+
train_index: np.ndarray,
|
|
160
|
+
train_time: pd.Series,
|
|
161
|
+
num_train_examples: int,
|
|
162
|
+
test_index: np.ndarray,
|
|
163
|
+
test_time: pd.Series,
|
|
164
|
+
num_test_examples: int,
|
|
165
|
+
columns_dict: dict[str, set[str]],
|
|
166
|
+
time_offset_dict: dict[
|
|
167
|
+
tuple[str, str, str],
|
|
168
|
+
tuple[pd.DateOffset | None, pd.DateOffset],
|
|
169
|
+
],
|
|
170
|
+
) -> tuple[pd.Series, np.ndarray, pd.Series, np.ndarray]:
|
|
171
|
+
|
|
172
|
+
train_y, train_mask = self._sample_target_set(
|
|
173
|
+
query=query,
|
|
174
|
+
pkey=entity_df[self.primary_key_dict[query.entity_table]],
|
|
175
|
+
index=train_index,
|
|
176
|
+
anchor_time=train_time,
|
|
177
|
+
num_examples=num_train_examples,
|
|
178
|
+
columns_dict=columns_dict,
|
|
179
|
+
time_offset_dict=time_offset_dict,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
test_y, test_mask = self._sample_target_set(
|
|
183
|
+
query=query,
|
|
184
|
+
pkey=entity_df[self.primary_key_dict[query.entity_table]],
|
|
185
|
+
index=test_index,
|
|
186
|
+
anchor_time=test_time,
|
|
187
|
+
num_examples=num_test_examples,
|
|
188
|
+
columns_dict=columns_dict,
|
|
189
|
+
time_offset_dict=time_offset_dict,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return train_y, train_mask, test_y, test_mask
|
|
193
|
+
|
|
194
|
+
# Helper Methods ##########################################################
|
|
195
|
+
|
|
196
|
+
def _sample_target_set(
|
|
197
|
+
self,
|
|
198
|
+
query: ValidatedPredictiveQuery,
|
|
199
|
+
pkey: pd.Series,
|
|
200
|
+
index: np.ndarray,
|
|
201
|
+
anchor_time: pd.Series,
|
|
202
|
+
num_examples: int,
|
|
203
|
+
columns_dict: dict[str, set[str]],
|
|
204
|
+
time_offset_dict: dict[
|
|
205
|
+
tuple[str, str, str],
|
|
206
|
+
tuple[pd.DateOffset | None, pd.DateOffset],
|
|
207
|
+
],
|
|
208
|
+
batch_size: int = 10_000,
|
|
209
|
+
) -> tuple[pd.Series, np.ndarray]:
|
|
210
|
+
|
|
211
|
+
num_hops = 1 if len(time_offset_dict) > 0 else 0
|
|
212
|
+
num_neighbors_dict: dict[str, list[int]] = {}
|
|
213
|
+
unix_time_offset_dict: dict[str, list[list[int | None]]] = {}
|
|
214
|
+
for edge_type, (start, end) in time_offset_dict.items():
|
|
215
|
+
unix_time_offset_dict['__'.join(edge_type)] = [[
|
|
216
|
+
date_offset_to_seconds(start) if start is not None else None,
|
|
217
|
+
date_offset_to_seconds(end),
|
|
218
|
+
]]
|
|
219
|
+
for edge_type in set(self.edge_types) - set(time_offset_dict.keys()):
|
|
220
|
+
num_neighbors_dict['__'.join(edge_type)] = [0] * num_hops
|
|
221
|
+
|
|
222
|
+
if anchor_time.dtype != 'datetime64[ns]':
|
|
223
|
+
anchor_time = anchor_time.astype('datetime64')
|
|
224
|
+
|
|
225
|
+
count = 0
|
|
226
|
+
ys: list[pd.Series] = []
|
|
227
|
+
mask = np.full(len(index), False, dtype=bool)
|
|
228
|
+
for start in range(0, len(index), batch_size):
|
|
229
|
+
subset = pkey.iloc[index[start:start + batch_size]]
|
|
230
|
+
time = anchor_time.iloc[start:start + batch_size]
|
|
231
|
+
|
|
232
|
+
_, _, node_dict, batch_dict, _, _ = self._graph_sampler.sample(
|
|
233
|
+
num_neighbors_dict,
|
|
234
|
+
unix_time_offset_dict,
|
|
235
|
+
query.entity_table,
|
|
236
|
+
self._graph_store.get_node_id(query.entity_table, subset),
|
|
237
|
+
time.astype(int).to_numpy() // 1000**3, # to seconds
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
feat_dict: dict[str, pd.DataFrame] = {}
|
|
241
|
+
time_dict: dict[str, pd.Series] = {}
|
|
242
|
+
for table_name, columns in columns_dict.items():
|
|
243
|
+
df = self._graph_store.df_dict[table_name]
|
|
244
|
+
df = df.iloc[node_dict[table_name]].reset_index(drop=True)
|
|
245
|
+
df = df[list(columns)]
|
|
246
|
+
feat_dict[table_name] = df
|
|
247
|
+
|
|
248
|
+
time_column = self.time_column_dict.get(table_name)
|
|
249
|
+
if time_column in columns:
|
|
250
|
+
time_dict[table_name] = df[time_column]
|
|
251
|
+
|
|
252
|
+
y, _mask = PQueryPandasExecutor().execute(
|
|
253
|
+
query=query,
|
|
254
|
+
feat_dict=feat_dict,
|
|
255
|
+
time_dict=time_dict,
|
|
256
|
+
batch_dict=batch_dict,
|
|
257
|
+
anchor_time=time,
|
|
258
|
+
num_forecasts=query.num_forecasts,
|
|
259
|
+
)
|
|
260
|
+
ys.append(y)
|
|
261
|
+
mask[start:start + batch_size] = _mask
|
|
262
|
+
|
|
263
|
+
count += len(y)
|
|
264
|
+
if count >= num_examples:
|
|
265
|
+
break
|
|
266
|
+
|
|
267
|
+
if len(ys) == 0:
|
|
268
|
+
y = pd.Series([], dtype=float)
|
|
269
|
+
elif len(ys) == 1:
|
|
270
|
+
y = ys[0]
|
|
271
|
+
else:
|
|
272
|
+
y = pd.concat(ys, axis=0, ignore_index=True)
|
|
273
|
+
|
|
274
|
+
return y, mask
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
# Helper Functions ############################################################
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def date_offset_to_seconds(offset: pd.DateOffset) -> int:
|
|
281
|
+
r"""Convert a :class:`pandas.DateOffset` into a number of seconds.
|
|
282
|
+
|
|
283
|
+
.. note::
|
|
284
|
+
We are conservative and take months and years as their maximum value.
|
|
285
|
+
Additional values are then dropped in label computation where we know
|
|
286
|
+
the actual dates.
|
|
287
|
+
"""
|
|
288
|
+
MAX_DAYS_IN_MONTH = 31
|
|
289
|
+
MAX_DAYS_IN_YEAR = 366
|
|
290
|
+
|
|
291
|
+
SECONDS_IN_MINUTE = 60
|
|
292
|
+
SECONDS_IN_HOUR = 60 * SECONDS_IN_MINUTE
|
|
293
|
+
SECONDS_IN_DAY = 24 * SECONDS_IN_HOUR
|
|
294
|
+
|
|
295
|
+
total_sec = 0
|
|
296
|
+
multiplier = getattr(offset, 'n', 1) # The multiplier (if present).
|
|
297
|
+
|
|
298
|
+
for attr, value in offset.__dict__.items():
|
|
299
|
+
if value is None or value == 0:
|
|
300
|
+
continue
|
|
301
|
+
scaled_value = value * multiplier
|
|
302
|
+
if attr == 'years':
|
|
303
|
+
total_sec += scaled_value * MAX_DAYS_IN_YEAR * SECONDS_IN_DAY
|
|
304
|
+
elif attr == 'months':
|
|
305
|
+
total_sec += scaled_value * MAX_DAYS_IN_MONTH * SECONDS_IN_DAY
|
|
306
|
+
elif attr == 'days':
|
|
307
|
+
total_sec += scaled_value * SECONDS_IN_DAY
|
|
308
|
+
elif attr == 'hours':
|
|
309
|
+
total_sec += scaled_value * SECONDS_IN_HOUR
|
|
310
|
+
elif attr == 'minutes':
|
|
311
|
+
total_sec += scaled_value * SECONDS_IN_MINUTE
|
|
312
|
+
elif attr == 'seconds':
|
|
313
|
+
total_sec += scaled_value
|
|
314
|
+
|
|
315
|
+
return total_sec
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import List, Optional, cast
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
|
-
from kumoai.experimental.rfm.base import
|
|
6
|
+
from kumoai.experimental.rfm.base import (
|
|
7
|
+
DataBackend,
|
|
8
|
+
SourceColumn,
|
|
9
|
+
SourceForeignKey,
|
|
10
|
+
Table,
|
|
11
|
+
)
|
|
7
12
|
from kumoai.experimental.rfm.infer import infer_dtype
|
|
8
13
|
|
|
9
14
|
|
|
@@ -76,6 +81,10 @@ class LocalTable(Table):
|
|
|
76
81
|
end_time_column=end_time_column,
|
|
77
82
|
)
|
|
78
83
|
|
|
84
|
+
@property
|
|
85
|
+
def backend(self) -> DataBackend:
|
|
86
|
+
return cast(DataBackend, DataBackend.LOCAL)
|
|
87
|
+
|
|
79
88
|
def _get_source_columns(self) -> List[SourceColumn]:
|
|
80
89
|
source_columns: List[SourceColumn] = []
|
|
81
90
|
for column in self._data.columns:
|
|
@@ -94,6 +103,7 @@ class LocalTable(Table):
|
|
|
94
103
|
dtype=dtype,
|
|
95
104
|
is_primary_key=False,
|
|
96
105
|
is_unique_key=False,
|
|
106
|
+
is_nullable=True,
|
|
97
107
|
)
|
|
98
108
|
source_columns.append(source_column)
|
|
99
109
|
|