kumoai 2.13.0.dev202512091732__cp311-cp311-macosx_11_0_arm64.whl → 2.14.0.dev202512191731__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/_version.py +1 -1
- kumoai/client/pquery.py +6 -2
- kumoai/experimental/rfm/__init__.py +33 -8
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/local/graph_store.py +40 -83
- kumoai/experimental/rfm/backend/local/sampler.py +128 -55
- kumoai/experimental/rfm/backend/local/table.py +21 -16
- kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +252 -0
- kumoai/experimental/rfm/backend/snow/table.py +101 -49
- kumoai/experimental/rfm/backend/sqlite/__init__.py +4 -2
- kumoai/experimental/rfm/backend/sqlite/sampler.py +349 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +84 -31
- kumoai/experimental/rfm/base/__init__.py +24 -5
- kumoai/experimental/rfm/base/column.py +14 -12
- kumoai/experimental/rfm/base/column_expression.py +50 -0
- kumoai/experimental/rfm/base/sampler.py +429 -30
- kumoai/experimental/rfm/base/source.py +1 -0
- kumoai/experimental/rfm/base/sql_sampler.py +84 -0
- kumoai/experimental/rfm/base/sql_table.py +229 -0
- kumoai/experimental/rfm/base/table.py +165 -135
- kumoai/experimental/rfm/graph.py +266 -102
- kumoai/experimental/rfm/infer/__init__.py +6 -4
- kumoai/experimental/rfm/infer/dtype.py +3 -3
- kumoai/experimental/rfm/infer/pkey.py +4 -2
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +1 -2
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +29 -31
- kumoai/experimental/rfm/rfm.py +299 -230
- kumoai/experimental/rfm/sagemaker.py +4 -4
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/testing/snow.py +50 -0
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/progress_logger.py +178 -12
- kumoai/utils/sql.py +3 -0
- {kumoai-2.13.0.dev202512091732.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/METADATA +3 -2
- {kumoai-2.13.0.dev202512091732.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/RECORD +41 -35
- kumoai/experimental/rfm/local_graph_sampler.py +0 -223
- kumoai/experimental/rfm/local_pquery_driver.py +0 -689
- {kumoai-2.13.0.dev202512091732.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202512091732.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202512091732.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from collections import
|
|
2
|
+
from collections.abc import Sequence
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import Dict, List, Optional, Sequence, Set
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
6
|
+
from kumoapi.model_plan import MissingType
|
|
7
7
|
from kumoapi.source_table import UnavailableSourceTable
|
|
8
8
|
from kumoapi.table import Column as ColumnDefinition
|
|
9
9
|
from kumoapi.table import TableDefinition
|
|
@@ -11,13 +11,10 @@ from kumoapi.typing import Stype
|
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
13
|
from kumoai import in_notebook, in_snowflake_notebook
|
|
14
|
-
from kumoai.experimental.rfm.base import Column,
|
|
14
|
+
from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
|
|
15
15
|
from kumoai.experimental.rfm.infer import (
|
|
16
|
-
contains_categorical,
|
|
17
|
-
contains_id,
|
|
18
|
-
contains_multicategorical,
|
|
19
|
-
contains_timestamp,
|
|
20
16
|
infer_primary_key,
|
|
17
|
+
infer_stype,
|
|
21
18
|
infer_time_column,
|
|
22
19
|
)
|
|
23
20
|
|
|
@@ -38,44 +35,32 @@ class Table(ABC):
|
|
|
38
35
|
def __init__(
|
|
39
36
|
self,
|
|
40
37
|
name: str,
|
|
41
|
-
columns:
|
|
42
|
-
primary_key:
|
|
43
|
-
time_column:
|
|
44
|
-
end_time_column:
|
|
38
|
+
columns: Sequence[str] | None = None,
|
|
39
|
+
primary_key: MissingType | str | None = MissingType.VALUE,
|
|
40
|
+
time_column: str | None = None,
|
|
41
|
+
end_time_column: str | None = None,
|
|
45
42
|
) -> None:
|
|
46
43
|
|
|
47
44
|
self._name = name
|
|
48
|
-
self._primary_key:
|
|
49
|
-
self._time_column:
|
|
50
|
-
self._end_time_column:
|
|
45
|
+
self._primary_key: str | None = None
|
|
46
|
+
self._time_column: str | None = None
|
|
47
|
+
self._end_time_column: str | None = None
|
|
51
48
|
|
|
52
|
-
if
|
|
53
|
-
|
|
54
|
-
f"a supported data type")
|
|
49
|
+
if columns is None:
|
|
50
|
+
columns = list(self._source_column_dict.keys())
|
|
55
51
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if column.is_primary_key
|
|
59
|
-
]
|
|
60
|
-
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
61
|
-
if primary_key is not None and primary_key != primary_keys[0]:
|
|
62
|
-
raise ValueError(f"Found duplicate primary key "
|
|
63
|
-
f"definition '{primary_key}' and "
|
|
64
|
-
f"'{primary_keys[0]}' in table '{name}'")
|
|
65
|
-
primary_key = primary_keys[0]
|
|
66
|
-
|
|
67
|
-
unique_keys = [
|
|
68
|
-
column.name for column in self._source_column_dict.values()
|
|
69
|
-
if column.is_unique_key
|
|
70
|
-
]
|
|
71
|
-
if primary_key is None and len(unique_keys) == 1:
|
|
72
|
-
primary_key = unique_keys[0]
|
|
73
|
-
|
|
74
|
-
self._columns: Dict[str, Column] = {}
|
|
75
|
-
for column_name in columns or list(self._source_column_dict.keys()):
|
|
52
|
+
self._columns: dict[str, Column] = {}
|
|
53
|
+
for column_name in columns:
|
|
76
54
|
self.add_column(column_name)
|
|
77
55
|
|
|
78
|
-
if primary_key
|
|
56
|
+
if isinstance(primary_key, MissingType):
|
|
57
|
+
# Inference from source column metadata:
|
|
58
|
+
if '_source_column_dict' in self.__dict__:
|
|
59
|
+
primary_key = self._source_primary_key
|
|
60
|
+
if (primary_key is not None and primary_key in self
|
|
61
|
+
and self[primary_key].is_physical):
|
|
62
|
+
self.primary_key = primary_key
|
|
63
|
+
elif primary_key is not None:
|
|
79
64
|
if primary_key not in self:
|
|
80
65
|
self.add_column(primary_key)
|
|
81
66
|
self.primary_key = primary_key
|
|
@@ -95,7 +80,7 @@ class Table(ABC):
|
|
|
95
80
|
r"""The name of this table."""
|
|
96
81
|
return self._name
|
|
97
82
|
|
|
98
|
-
#
|
|
83
|
+
# Column ##################################################################
|
|
99
84
|
|
|
100
85
|
def has_column(self, name: str) -> bool:
|
|
101
86
|
r"""Returns ``True`` if this table holds a column with name ``name``;
|
|
@@ -117,7 +102,7 @@ class Table(ABC):
|
|
|
117
102
|
return self._columns[name]
|
|
118
103
|
|
|
119
104
|
@property
|
|
120
|
-
def columns(self) ->
|
|
105
|
+
def columns(self) -> list[Column]:
|
|
121
106
|
r"""Returns a list of :class:`Column` objects that represent the
|
|
122
107
|
columns in this table.
|
|
123
108
|
"""
|
|
@@ -140,36 +125,22 @@ class Table(ABC):
|
|
|
140
125
|
raise KeyError(f"Column '{name}' does not exist in the underlying "
|
|
141
126
|
f"source table")
|
|
142
127
|
|
|
143
|
-
|
|
144
|
-
dtype = self._source_column_dict[name].dtype
|
|
145
|
-
except Exception as e:
|
|
146
|
-
raise RuntimeError(f"Could not obtain data type for column "
|
|
147
|
-
f"'{name}' in table '{self.name}'. Change "
|
|
148
|
-
f"the data type of the column in the source "
|
|
149
|
-
f"table or remove it from the table.") from e
|
|
128
|
+
dtype = self._source_column_dict[name].dtype
|
|
150
129
|
|
|
130
|
+
ser = self._source_sample_df[name]
|
|
151
131
|
try:
|
|
152
|
-
|
|
153
|
-
if contains_id(ser, name, dtype):
|
|
154
|
-
stype = Stype.ID
|
|
155
|
-
elif contains_timestamp(ser, name, dtype):
|
|
156
|
-
stype = Stype.timestamp
|
|
157
|
-
elif contains_multicategorical(ser, name, dtype):
|
|
158
|
-
stype = Stype.multicategorical
|
|
159
|
-
elif contains_categorical(ser, name, dtype):
|
|
160
|
-
stype = Stype.categorical
|
|
161
|
-
else:
|
|
162
|
-
stype = dtype.default_stype
|
|
132
|
+
stype = infer_stype(ser, name, dtype)
|
|
163
133
|
except Exception as e:
|
|
164
134
|
raise RuntimeError(f"Could not obtain semantic type for column "
|
|
165
|
-
f"'{name}'
|
|
166
|
-
f"the data type of the
|
|
167
|
-
f"table or remove it from
|
|
135
|
+
f"'{name}' with data type '{dtype}' in table "
|
|
136
|
+
f"'{self.name}'. Change the data type of the "
|
|
137
|
+
f"column in the source table or remove it from "
|
|
138
|
+
f"this table.") from e
|
|
168
139
|
|
|
169
140
|
self._columns[name] = Column(
|
|
170
141
|
name=name,
|
|
171
|
-
dtype=dtype,
|
|
172
142
|
stype=stype,
|
|
143
|
+
dtype=dtype,
|
|
173
144
|
)
|
|
174
145
|
|
|
175
146
|
return self._columns[name]
|
|
@@ -205,7 +176,7 @@ class Table(ABC):
|
|
|
205
176
|
return self._primary_key is not None
|
|
206
177
|
|
|
207
178
|
@property
|
|
208
|
-
def primary_key(self) ->
|
|
179
|
+
def primary_key(self) -> Column | None:
|
|
209
180
|
r"""The primary key column of this table.
|
|
210
181
|
|
|
211
182
|
The getter returns the primary key column of this table, or ``None`` if
|
|
@@ -220,7 +191,7 @@ class Table(ABC):
|
|
|
220
191
|
return self[self._primary_key]
|
|
221
192
|
|
|
222
193
|
@primary_key.setter
|
|
223
|
-
def primary_key(self, name:
|
|
194
|
+
def primary_key(self, name: str | None) -> None:
|
|
224
195
|
if name is not None and name == self._time_column:
|
|
225
196
|
raise ValueError(f"Cannot specify column '{name}' as a primary "
|
|
226
197
|
f"key since it is already defined to be a time "
|
|
@@ -250,7 +221,7 @@ class Table(ABC):
|
|
|
250
221
|
return self._time_column is not None
|
|
251
222
|
|
|
252
223
|
@property
|
|
253
|
-
def time_column(self) ->
|
|
224
|
+
def time_column(self) -> Column | None:
|
|
254
225
|
r"""The time column of this table.
|
|
255
226
|
|
|
256
227
|
The getter returns the time column of this table, or ``None`` if no
|
|
@@ -265,7 +236,7 @@ class Table(ABC):
|
|
|
265
236
|
return self[self._time_column]
|
|
266
237
|
|
|
267
238
|
@time_column.setter
|
|
268
|
-
def time_column(self, name:
|
|
239
|
+
def time_column(self, name: str | None) -> None:
|
|
269
240
|
if name is not None and name == self._primary_key:
|
|
270
241
|
raise ValueError(f"Cannot specify column '{name}' as a time "
|
|
271
242
|
f"column since it is already defined to be a "
|
|
@@ -295,7 +266,7 @@ class Table(ABC):
|
|
|
295
266
|
return self._end_time_column is not None
|
|
296
267
|
|
|
297
268
|
@property
|
|
298
|
-
def end_time_column(self) ->
|
|
269
|
+
def end_time_column(self) -> Column | None:
|
|
299
270
|
r"""The end time column of this table.
|
|
300
271
|
|
|
301
272
|
The getter returns the end time column of this table, or ``None`` if no
|
|
@@ -311,7 +282,7 @@ class Table(ABC):
|
|
|
311
282
|
return self[self._end_time_column]
|
|
312
283
|
|
|
313
284
|
@end_time_column.setter
|
|
314
|
-
def end_time_column(self, name:
|
|
285
|
+
def end_time_column(self, name: str | None) -> None:
|
|
315
286
|
if name is not None and name == self._primary_key:
|
|
316
287
|
raise ValueError(f"Cannot specify column '{name}' as an end time "
|
|
317
288
|
f"column since it is already defined to be a "
|
|
@@ -405,8 +376,91 @@ class Table(ABC):
|
|
|
405
376
|
print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
|
|
406
377
|
print(self.metadata.to_string(index=False))
|
|
407
378
|
|
|
379
|
+
def infer_primary_key(self, verbose: bool = True) -> Self:
|
|
380
|
+
r"""Infers the primary key in this table.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
verbose: Whether to print verbose output.
|
|
384
|
+
"""
|
|
385
|
+
if self.has_primary_key():
|
|
386
|
+
return self
|
|
387
|
+
|
|
388
|
+
def _set_primary_key(primary_key: str) -> None:
|
|
389
|
+
self.primary_key = primary_key
|
|
390
|
+
if verbose:
|
|
391
|
+
print(f"Detected primary key '{primary_key}' in table "
|
|
392
|
+
f"'{self.name}'")
|
|
393
|
+
|
|
394
|
+
# Inference from source column metadata:
|
|
395
|
+
if '_source_column_dict' in self.__dict__:
|
|
396
|
+
primary_key = self._source_primary_key
|
|
397
|
+
if (primary_key is not None and primary_key in self
|
|
398
|
+
and self[primary_key].is_physical):
|
|
399
|
+
_set_primary_key(primary_key)
|
|
400
|
+
return self
|
|
401
|
+
|
|
402
|
+
unique_keys = [
|
|
403
|
+
column.name for column in self._source_column_dict.values()
|
|
404
|
+
if column.is_unique_key
|
|
405
|
+
]
|
|
406
|
+
if (len(unique_keys) == 1 # NOTE No composite keys yet.
|
|
407
|
+
and unique_keys[0] in self
|
|
408
|
+
and self[unique_keys[0]].is_physical):
|
|
409
|
+
_set_primary_key(unique_keys[0])
|
|
410
|
+
return self
|
|
411
|
+
|
|
412
|
+
# Heuristic-based inference:
|
|
413
|
+
candidates = [
|
|
414
|
+
column.name for column in self.columns if column.stype == Stype.ID
|
|
415
|
+
]
|
|
416
|
+
if len(candidates) == 0:
|
|
417
|
+
for column in self.columns:
|
|
418
|
+
if self.name.lower() == column.name.lower():
|
|
419
|
+
candidates.append(column.name)
|
|
420
|
+
elif (self.name.lower().endswith('s')
|
|
421
|
+
and self.name.lower()[:-1] == column.name.lower()):
|
|
422
|
+
candidates.append(column.name)
|
|
423
|
+
|
|
424
|
+
if primary_key := infer_primary_key(
|
|
425
|
+
table_name=self.name,
|
|
426
|
+
df=self._sample_current_df(columns=candidates),
|
|
427
|
+
candidates=candidates,
|
|
428
|
+
):
|
|
429
|
+
_set_primary_key(primary_key)
|
|
430
|
+
return self
|
|
431
|
+
|
|
432
|
+
return self
|
|
433
|
+
|
|
434
|
+
def infer_time_column(self, verbose: bool = True) -> Self:
|
|
435
|
+
r"""Infers the time column in this table.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
verbose: Whether to print verbose output.
|
|
439
|
+
"""
|
|
440
|
+
if self.has_time_column():
|
|
441
|
+
return self
|
|
442
|
+
|
|
443
|
+
# Heuristic-based inference:
|
|
444
|
+
candidates = [
|
|
445
|
+
column.name for column in self.columns
|
|
446
|
+
if column.stype == Stype.timestamp
|
|
447
|
+
and column.name != self._end_time_column
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
if time_column := infer_time_column(
|
|
451
|
+
df=self._sample_current_df(columns=candidates),
|
|
452
|
+
candidates=candidates,
|
|
453
|
+
):
|
|
454
|
+
self.time_column = time_column
|
|
455
|
+
|
|
456
|
+
if verbose:
|
|
457
|
+
print(f"Detected time column '{time_column}' in table "
|
|
458
|
+
f"'{self.name}'")
|
|
459
|
+
|
|
460
|
+
return self
|
|
461
|
+
|
|
408
462
|
def infer_metadata(self, verbose: bool = True) -> Self:
|
|
409
|
-
r"""Infers metadata, *i.e.*, primary keys and time columns, in
|
|
463
|
+
r"""Infers metadata, *i.e.*, primary keys and time columns, in this
|
|
410
464
|
table.
|
|
411
465
|
|
|
412
466
|
Args:
|
|
@@ -414,45 +468,15 @@ class Table(ABC):
|
|
|
414
468
|
"""
|
|
415
469
|
logs = []
|
|
416
470
|
|
|
417
|
-
# Try to detect primary key if not set:
|
|
418
471
|
if not self.has_primary_key():
|
|
472
|
+
self.infer_primary_key(verbose=False)
|
|
473
|
+
if self.has_primary_key():
|
|
474
|
+
logs.append(f"primary key '{self._primary_key}'")
|
|
419
475
|
|
|
420
|
-
def is_candidate(column: Column) -> bool:
|
|
421
|
-
if column.stype == Stype.ID:
|
|
422
|
-
return True
|
|
423
|
-
if all(column.stype != Stype.ID for column in self.columns):
|
|
424
|
-
if self.name == column.name:
|
|
425
|
-
return True
|
|
426
|
-
if (self.name.endswith('s')
|
|
427
|
-
and self.name[:-1] == column.name):
|
|
428
|
-
return True
|
|
429
|
-
return False
|
|
430
|
-
|
|
431
|
-
candidates = [
|
|
432
|
-
column.name for column in self.columns if is_candidate(column)
|
|
433
|
-
]
|
|
434
|
-
|
|
435
|
-
if primary_key := infer_primary_key(
|
|
436
|
-
table_name=self.name,
|
|
437
|
-
df=self._sample_df,
|
|
438
|
-
candidates=candidates,
|
|
439
|
-
):
|
|
440
|
-
self.primary_key = primary_key
|
|
441
|
-
logs.append(f"primary key '{primary_key}'")
|
|
442
|
-
|
|
443
|
-
# Try to detect time column if not set:
|
|
444
476
|
if not self.has_time_column():
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
and column.name != self._end_time_column
|
|
449
|
-
]
|
|
450
|
-
if time_column := infer_time_column(
|
|
451
|
-
df=self._sample_df,
|
|
452
|
-
candidates=candidates,
|
|
453
|
-
):
|
|
454
|
-
self.time_column = time_column
|
|
455
|
-
logs.append(f"time column '{time_column}'")
|
|
477
|
+
self.infer_time_column(verbose=False)
|
|
478
|
+
if self.has_time_column():
|
|
479
|
+
logs.append(f"time column '{self._time_column}'")
|
|
456
480
|
|
|
457
481
|
if verbose and len(logs) > 0:
|
|
458
482
|
print(f"Detected {' and '.join(logs)} in table '{self.name}'")
|
|
@@ -473,6 +497,36 @@ class Table(ABC):
|
|
|
473
497
|
end_time_col=self._end_time_column,
|
|
474
498
|
)
|
|
475
499
|
|
|
500
|
+
@cached_property
|
|
501
|
+
def _source_column_dict(self) -> dict[str, SourceColumn]:
|
|
502
|
+
source_columns = self._get_source_columns()
|
|
503
|
+
if len(source_columns) == 0:
|
|
504
|
+
raise ValueError(f"Table '{self.name}' does not hold any column "
|
|
505
|
+
f"with a supported data type")
|
|
506
|
+
return {column.name: column for column in source_columns}
|
|
507
|
+
|
|
508
|
+
@cached_property
|
|
509
|
+
def _source_sample_df(self) -> pd.DataFrame:
|
|
510
|
+
return self._get_source_sample_df()
|
|
511
|
+
|
|
512
|
+
@property
|
|
513
|
+
def _source_primary_key(self) -> str | None:
|
|
514
|
+
primary_keys = [
|
|
515
|
+
column.name for column in self._source_column_dict.values()
|
|
516
|
+
if column.is_primary_key
|
|
517
|
+
]
|
|
518
|
+
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
519
|
+
return primary_keys[0]
|
|
520
|
+
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
@cached_property
|
|
524
|
+
def _num_rows(self) -> int | None:
|
|
525
|
+
return self._get_num_rows()
|
|
526
|
+
|
|
527
|
+
def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
|
|
528
|
+
return self._source_sample_df[columns]
|
|
529
|
+
|
|
476
530
|
# Python builtins #########################################################
|
|
477
531
|
|
|
478
532
|
def __hash__(self) -> int:
|
|
@@ -503,43 +557,19 @@ class Table(ABC):
|
|
|
503
557
|
|
|
504
558
|
# Abstract Methods ########################################################
|
|
505
559
|
|
|
506
|
-
@
|
|
507
|
-
def _source_column_dict(self) -> Dict[str, SourceColumn]:
|
|
508
|
-
return {col.name: col for col in self._get_source_columns()}
|
|
509
|
-
|
|
560
|
+
@property
|
|
510
561
|
@abstractmethod
|
|
511
|
-
def
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
@cached_property
|
|
515
|
-
def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
|
|
516
|
-
fkeys = self._get_source_foreign_keys()
|
|
517
|
-
# NOTE Drop all keys that link to different primary keys in the same
|
|
518
|
-
# table since we don't support composite keys yet:
|
|
519
|
-
table_pkeys: Dict[str, Set[str]] = defaultdict(set)
|
|
520
|
-
for fkey in fkeys:
|
|
521
|
-
table_pkeys[fkey.dst_table].add(fkey.primary_key)
|
|
522
|
-
return {
|
|
523
|
-
fkey.name: fkey
|
|
524
|
-
for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
|
|
525
|
-
}
|
|
562
|
+
def backend(self) -> DataBackend:
|
|
563
|
+
r"""The data backend of this table."""
|
|
526
564
|
|
|
527
565
|
@abstractmethod
|
|
528
|
-
def
|
|
566
|
+
def _get_source_columns(self) -> list[SourceColumn]:
|
|
529
567
|
pass
|
|
530
568
|
|
|
531
|
-
@cached_property
|
|
532
|
-
def _sample_df(self) -> pd.DataFrame:
|
|
533
|
-
return self._get_sample_df()
|
|
534
|
-
|
|
535
569
|
@abstractmethod
|
|
536
|
-
def
|
|
570
|
+
def _get_source_sample_df(self) -> pd.DataFrame:
|
|
537
571
|
pass
|
|
538
572
|
|
|
539
|
-
@cached_property
|
|
540
|
-
def _num_rows(self) -> Optional[int]:
|
|
541
|
-
return self._get_num_rows()
|
|
542
|
-
|
|
543
573
|
@abstractmethod
|
|
544
|
-
def _get_num_rows(self) ->
|
|
574
|
+
def _get_num_rows(self) -> int | None:
|
|
545
575
|
pass
|