kumoai 2.14.0.dev202512151351__cp313-cp313-macosx_11_0_arm64.whl → 2.14.0.dev202512211732__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/_version.py +1 -1
- kumoai/experimental/rfm/__init__.py +33 -8
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/local/graph_store.py +25 -25
- kumoai/experimental/rfm/backend/local/table.py +16 -21
- kumoai/experimental/rfm/backend/snow/sampler.py +22 -34
- kumoai/experimental/rfm/backend/snow/table.py +67 -33
- kumoai/experimental/rfm/backend/sqlite/__init__.py +2 -2
- kumoai/experimental/rfm/backend/sqlite/sampler.py +21 -26
- kumoai/experimental/rfm/backend/sqlite/table.py +54 -26
- kumoai/experimental/rfm/base/__init__.py +8 -0
- kumoai/experimental/rfm/base/column.py +14 -12
- kumoai/experimental/rfm/base/column_expression.py +50 -0
- kumoai/experimental/rfm/base/sql_sampler.py +31 -3
- kumoai/experimental/rfm/base/sql_table.py +229 -0
- kumoai/experimental/rfm/base/table.py +162 -143
- kumoai/experimental/rfm/graph.py +242 -95
- kumoai/experimental/rfm/infer/__init__.py +6 -4
- kumoai/experimental/rfm/infer/dtype.py +3 -3
- kumoai/experimental/rfm/infer/pkey.py +4 -2
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +1 -2
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +29 -31
- kumoai/experimental/rfm/rfm.py +86 -80
- kumoai/experimental/rfm/sagemaker.py +4 -4
- kumoai/utils/__init__.py +1 -2
- kumoai/utils/progress_logger.py +178 -12
- {kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/METADATA +2 -1
- {kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/RECORD +33 -30
- {kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/WHEEL +0 -0
- {kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from collections import
|
|
2
|
+
from collections.abc import Sequence
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import Dict, List, Optional, Sequence, Set
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
6
|
+
from kumoapi.model_plan import MissingType
|
|
7
7
|
from kumoapi.source_table import UnavailableSourceTable
|
|
8
8
|
from kumoapi.table import Column as ColumnDefinition
|
|
9
9
|
from kumoapi.table import TableDefinition
|
|
@@ -11,18 +11,10 @@ from kumoapi.typing import Stype
|
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
13
|
from kumoai import in_notebook, in_snowflake_notebook
|
|
14
|
-
from kumoai.experimental.rfm.base import
|
|
15
|
-
Column,
|
|
16
|
-
DataBackend,
|
|
17
|
-
SourceColumn,
|
|
18
|
-
SourceForeignKey,
|
|
19
|
-
)
|
|
14
|
+
from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
|
|
20
15
|
from kumoai.experimental.rfm.infer import (
|
|
21
|
-
contains_categorical,
|
|
22
|
-
contains_id,
|
|
23
|
-
contains_multicategorical,
|
|
24
|
-
contains_timestamp,
|
|
25
16
|
infer_primary_key,
|
|
17
|
+
infer_stype,
|
|
26
18
|
infer_time_column,
|
|
27
19
|
)
|
|
28
20
|
|
|
@@ -43,44 +35,32 @@ class Table(ABC):
|
|
|
43
35
|
def __init__(
|
|
44
36
|
self,
|
|
45
37
|
name: str,
|
|
46
|
-
columns:
|
|
47
|
-
primary_key:
|
|
48
|
-
time_column:
|
|
49
|
-
end_time_column:
|
|
38
|
+
columns: Sequence[str] | None = None,
|
|
39
|
+
primary_key: MissingType | str | None = MissingType.VALUE,
|
|
40
|
+
time_column: str | None = None,
|
|
41
|
+
end_time_column: str | None = None,
|
|
50
42
|
) -> None:
|
|
51
43
|
|
|
52
44
|
self._name = name
|
|
53
|
-
self._primary_key:
|
|
54
|
-
self._time_column:
|
|
55
|
-
self._end_time_column:
|
|
56
|
-
|
|
57
|
-
if len(self._source_column_dict) == 0:
|
|
58
|
-
raise ValueError(f"Table '{name}' does not hold any column with "
|
|
59
|
-
f"a supported data type")
|
|
45
|
+
self._primary_key: str | None = None
|
|
46
|
+
self._time_column: str | None = None
|
|
47
|
+
self._end_time_column: str | None = None
|
|
60
48
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if column.is_primary_key
|
|
64
|
-
]
|
|
65
|
-
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
66
|
-
if primary_key is not None and primary_key != primary_keys[0]:
|
|
67
|
-
raise ValueError(f"Found duplicate primary key "
|
|
68
|
-
f"definition '{primary_key}' and "
|
|
69
|
-
f"'{primary_keys[0]}' in table '{name}'")
|
|
70
|
-
primary_key = primary_keys[0]
|
|
71
|
-
|
|
72
|
-
unique_keys = [
|
|
73
|
-
column.name for column in self._source_column_dict.values()
|
|
74
|
-
if column.is_unique_key
|
|
75
|
-
]
|
|
76
|
-
if primary_key is None and len(unique_keys) == 1:
|
|
77
|
-
primary_key = unique_keys[0]
|
|
49
|
+
if columns is None:
|
|
50
|
+
columns = list(self._source_column_dict.keys())
|
|
78
51
|
|
|
79
|
-
self._columns:
|
|
80
|
-
for column_name in columns
|
|
52
|
+
self._columns: dict[str, Column] = {}
|
|
53
|
+
for column_name in columns:
|
|
81
54
|
self.add_column(column_name)
|
|
82
55
|
|
|
83
|
-
if primary_key
|
|
56
|
+
if isinstance(primary_key, MissingType):
|
|
57
|
+
# Inference from source column metadata:
|
|
58
|
+
if '_source_column_dict' in self.__dict__:
|
|
59
|
+
primary_key = self._source_primary_key
|
|
60
|
+
if (primary_key is not None and primary_key in self
|
|
61
|
+
and self[primary_key].is_physical):
|
|
62
|
+
self.primary_key = primary_key
|
|
63
|
+
elif primary_key is not None:
|
|
84
64
|
if primary_key not in self:
|
|
85
65
|
self.add_column(primary_key)
|
|
86
66
|
self.primary_key = primary_key
|
|
@@ -100,7 +80,7 @@ class Table(ABC):
|
|
|
100
80
|
r"""The name of this table."""
|
|
101
81
|
return self._name
|
|
102
82
|
|
|
103
|
-
#
|
|
83
|
+
# Column ##################################################################
|
|
104
84
|
|
|
105
85
|
def has_column(self, name: str) -> bool:
|
|
106
86
|
r"""Returns ``True`` if this table holds a column with name ``name``;
|
|
@@ -122,7 +102,7 @@ class Table(ABC):
|
|
|
122
102
|
return self._columns[name]
|
|
123
103
|
|
|
124
104
|
@property
|
|
125
|
-
def columns(self) ->
|
|
105
|
+
def columns(self) -> list[Column]:
|
|
126
106
|
r"""Returns a list of :class:`Column` objects that represent the
|
|
127
107
|
columns in this table.
|
|
128
108
|
"""
|
|
@@ -145,36 +125,22 @@ class Table(ABC):
|
|
|
145
125
|
raise KeyError(f"Column '{name}' does not exist in the underlying "
|
|
146
126
|
f"source table")
|
|
147
127
|
|
|
148
|
-
|
|
149
|
-
dtype = self._source_column_dict[name].dtype
|
|
150
|
-
except Exception as e:
|
|
151
|
-
raise RuntimeError(f"Could not obtain data type for column "
|
|
152
|
-
f"'{name}' in table '{self.name}'. Change "
|
|
153
|
-
f"the data type of the column in the source "
|
|
154
|
-
f"table or remove it from the table.") from e
|
|
128
|
+
dtype = self._source_column_dict[name].dtype
|
|
155
129
|
|
|
130
|
+
ser = self._source_sample_df[name]
|
|
156
131
|
try:
|
|
157
|
-
|
|
158
|
-
if contains_id(ser, name, dtype):
|
|
159
|
-
stype = Stype.ID
|
|
160
|
-
elif contains_timestamp(ser, name, dtype):
|
|
161
|
-
stype = Stype.timestamp
|
|
162
|
-
elif contains_multicategorical(ser, name, dtype):
|
|
163
|
-
stype = Stype.multicategorical
|
|
164
|
-
elif contains_categorical(ser, name, dtype):
|
|
165
|
-
stype = Stype.categorical
|
|
166
|
-
else:
|
|
167
|
-
stype = dtype.default_stype
|
|
132
|
+
stype = infer_stype(ser, name, dtype)
|
|
168
133
|
except Exception as e:
|
|
169
134
|
raise RuntimeError(f"Could not obtain semantic type for column "
|
|
170
|
-
f"'{name}'
|
|
171
|
-
f"the data type of the
|
|
172
|
-
f"table or remove it from
|
|
135
|
+
f"'{name}' with data type '{dtype}' in table "
|
|
136
|
+
f"'{self.name}'. Change the data type of the "
|
|
137
|
+
f"column in the source table or remove it from "
|
|
138
|
+
f"this table.") from e
|
|
173
139
|
|
|
174
140
|
self._columns[name] = Column(
|
|
175
141
|
name=name,
|
|
176
|
-
dtype=dtype,
|
|
177
142
|
stype=stype,
|
|
143
|
+
dtype=dtype,
|
|
178
144
|
)
|
|
179
145
|
|
|
180
146
|
return self._columns[name]
|
|
@@ -210,7 +176,7 @@ class Table(ABC):
|
|
|
210
176
|
return self._primary_key is not None
|
|
211
177
|
|
|
212
178
|
@property
|
|
213
|
-
def primary_key(self) ->
|
|
179
|
+
def primary_key(self) -> Column | None:
|
|
214
180
|
r"""The primary key column of this table.
|
|
215
181
|
|
|
216
182
|
The getter returns the primary key column of this table, or ``None`` if
|
|
@@ -225,7 +191,7 @@ class Table(ABC):
|
|
|
225
191
|
return self[self._primary_key]
|
|
226
192
|
|
|
227
193
|
@primary_key.setter
|
|
228
|
-
def primary_key(self, name:
|
|
194
|
+
def primary_key(self, name: str | None) -> None:
|
|
229
195
|
if name is not None and name == self._time_column:
|
|
230
196
|
raise ValueError(f"Cannot specify column '{name}' as a primary "
|
|
231
197
|
f"key since it is already defined to be a time "
|
|
@@ -255,7 +221,7 @@ class Table(ABC):
|
|
|
255
221
|
return self._time_column is not None
|
|
256
222
|
|
|
257
223
|
@property
|
|
258
|
-
def time_column(self) ->
|
|
224
|
+
def time_column(self) -> Column | None:
|
|
259
225
|
r"""The time column of this table.
|
|
260
226
|
|
|
261
227
|
The getter returns the time column of this table, or ``None`` if no
|
|
@@ -270,7 +236,7 @@ class Table(ABC):
|
|
|
270
236
|
return self[self._time_column]
|
|
271
237
|
|
|
272
238
|
@time_column.setter
|
|
273
|
-
def time_column(self, name:
|
|
239
|
+
def time_column(self, name: str | None) -> None:
|
|
274
240
|
if name is not None and name == self._primary_key:
|
|
275
241
|
raise ValueError(f"Cannot specify column '{name}' as a time "
|
|
276
242
|
f"column since it is already defined to be a "
|
|
@@ -300,7 +266,7 @@ class Table(ABC):
|
|
|
300
266
|
return self._end_time_column is not None
|
|
301
267
|
|
|
302
268
|
@property
|
|
303
|
-
def end_time_column(self) ->
|
|
269
|
+
def end_time_column(self) -> Column | None:
|
|
304
270
|
r"""The end time column of this table.
|
|
305
271
|
|
|
306
272
|
The getter returns the end time column of this table, or ``None`` if no
|
|
@@ -316,7 +282,7 @@ class Table(ABC):
|
|
|
316
282
|
return self[self._end_time_column]
|
|
317
283
|
|
|
318
284
|
@end_time_column.setter
|
|
319
|
-
def end_time_column(self, name:
|
|
285
|
+
def end_time_column(self, name: str | None) -> None:
|
|
320
286
|
if name is not None and name == self._primary_key:
|
|
321
287
|
raise ValueError(f"Cannot specify column '{name}' as an end time "
|
|
322
288
|
f"column since it is already defined to be a "
|
|
@@ -410,8 +376,91 @@ class Table(ABC):
|
|
|
410
376
|
print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
|
|
411
377
|
print(self.metadata.to_string(index=False))
|
|
412
378
|
|
|
379
|
+
def infer_primary_key(self, verbose: bool = True) -> Self:
|
|
380
|
+
r"""Infers the primary key in this table.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
verbose: Whether to print verbose output.
|
|
384
|
+
"""
|
|
385
|
+
if self.has_primary_key():
|
|
386
|
+
return self
|
|
387
|
+
|
|
388
|
+
def _set_primary_key(primary_key: str) -> None:
|
|
389
|
+
self.primary_key = primary_key
|
|
390
|
+
if verbose:
|
|
391
|
+
print(f"Detected primary key '{primary_key}' in table "
|
|
392
|
+
f"'{self.name}'")
|
|
393
|
+
|
|
394
|
+
# Inference from source column metadata:
|
|
395
|
+
if '_source_column_dict' in self.__dict__:
|
|
396
|
+
primary_key = self._source_primary_key
|
|
397
|
+
if (primary_key is not None and primary_key in self
|
|
398
|
+
and self[primary_key].is_physical):
|
|
399
|
+
_set_primary_key(primary_key)
|
|
400
|
+
return self
|
|
401
|
+
|
|
402
|
+
unique_keys = [
|
|
403
|
+
column.name for column in self._source_column_dict.values()
|
|
404
|
+
if column.is_unique_key
|
|
405
|
+
]
|
|
406
|
+
if (len(unique_keys) == 1 # NOTE No composite keys yet.
|
|
407
|
+
and unique_keys[0] in self
|
|
408
|
+
and self[unique_keys[0]].is_physical):
|
|
409
|
+
_set_primary_key(unique_keys[0])
|
|
410
|
+
return self
|
|
411
|
+
|
|
412
|
+
# Heuristic-based inference:
|
|
413
|
+
candidates = [
|
|
414
|
+
column.name for column in self.columns if column.stype == Stype.ID
|
|
415
|
+
]
|
|
416
|
+
if len(candidates) == 0:
|
|
417
|
+
for column in self.columns:
|
|
418
|
+
if self.name.lower() == column.name.lower():
|
|
419
|
+
candidates.append(column.name)
|
|
420
|
+
elif (self.name.lower().endswith('s')
|
|
421
|
+
and self.name.lower()[:-1] == column.name.lower()):
|
|
422
|
+
candidates.append(column.name)
|
|
423
|
+
|
|
424
|
+
if primary_key := infer_primary_key(
|
|
425
|
+
table_name=self.name,
|
|
426
|
+
df=self._sample_current_df(columns=candidates),
|
|
427
|
+
candidates=candidates,
|
|
428
|
+
):
|
|
429
|
+
_set_primary_key(primary_key)
|
|
430
|
+
return self
|
|
431
|
+
|
|
432
|
+
return self
|
|
433
|
+
|
|
434
|
+
def infer_time_column(self, verbose: bool = True) -> Self:
|
|
435
|
+
r"""Infers the time column in this table.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
verbose: Whether to print verbose output.
|
|
439
|
+
"""
|
|
440
|
+
if self.has_time_column():
|
|
441
|
+
return self
|
|
442
|
+
|
|
443
|
+
# Heuristic-based inference:
|
|
444
|
+
candidates = [
|
|
445
|
+
column.name for column in self.columns
|
|
446
|
+
if column.stype == Stype.timestamp
|
|
447
|
+
and column.name != self._end_time_column
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
if time_column := infer_time_column(
|
|
451
|
+
df=self._sample_current_df(columns=candidates),
|
|
452
|
+
candidates=candidates,
|
|
453
|
+
):
|
|
454
|
+
self.time_column = time_column
|
|
455
|
+
|
|
456
|
+
if verbose:
|
|
457
|
+
print(f"Detected time column '{time_column}' in table "
|
|
458
|
+
f"'{self.name}'")
|
|
459
|
+
|
|
460
|
+
return self
|
|
461
|
+
|
|
413
462
|
def infer_metadata(self, verbose: bool = True) -> Self:
|
|
414
|
-
r"""Infers metadata, *i.e.*, primary keys and time columns, in
|
|
463
|
+
r"""Infers metadata, *i.e.*, primary keys and time columns, in this
|
|
415
464
|
table.
|
|
416
465
|
|
|
417
466
|
Args:
|
|
@@ -419,45 +468,15 @@ class Table(ABC):
|
|
|
419
468
|
"""
|
|
420
469
|
logs = []
|
|
421
470
|
|
|
422
|
-
# Try to detect primary key if not set:
|
|
423
471
|
if not self.has_primary_key():
|
|
472
|
+
self.infer_primary_key(verbose=False)
|
|
473
|
+
if self.has_primary_key():
|
|
474
|
+
logs.append(f"primary key '{self._primary_key}'")
|
|
424
475
|
|
|
425
|
-
def is_candidate(column: Column) -> bool:
|
|
426
|
-
if column.stype == Stype.ID:
|
|
427
|
-
return True
|
|
428
|
-
if all(column.stype != Stype.ID for column in self.columns):
|
|
429
|
-
if self.name == column.name:
|
|
430
|
-
return True
|
|
431
|
-
if (self.name.endswith('s')
|
|
432
|
-
and self.name[:-1] == column.name):
|
|
433
|
-
return True
|
|
434
|
-
return False
|
|
435
|
-
|
|
436
|
-
candidates = [
|
|
437
|
-
column.name for column in self.columns if is_candidate(column)
|
|
438
|
-
]
|
|
439
|
-
|
|
440
|
-
if primary_key := infer_primary_key(
|
|
441
|
-
table_name=self.name,
|
|
442
|
-
df=self._sample_df,
|
|
443
|
-
candidates=candidates,
|
|
444
|
-
):
|
|
445
|
-
self.primary_key = primary_key
|
|
446
|
-
logs.append(f"primary key '{primary_key}'")
|
|
447
|
-
|
|
448
|
-
# Try to detect time column if not set:
|
|
449
476
|
if not self.has_time_column():
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
and column.name != self._end_time_column
|
|
454
|
-
]
|
|
455
|
-
if time_column := infer_time_column(
|
|
456
|
-
df=self._sample_df,
|
|
457
|
-
candidates=candidates,
|
|
458
|
-
):
|
|
459
|
-
self.time_column = time_column
|
|
460
|
-
logs.append(f"time column '{time_column}'")
|
|
477
|
+
self.infer_time_column(verbose=False)
|
|
478
|
+
if self.has_time_column():
|
|
479
|
+
logs.append(f"time column '{self._time_column}'")
|
|
461
480
|
|
|
462
481
|
if verbose and len(logs) > 0:
|
|
463
482
|
print(f"Detected {' and '.join(logs)} in table '{self.name}'")
|
|
@@ -478,6 +497,36 @@ class Table(ABC):
|
|
|
478
497
|
end_time_col=self._end_time_column,
|
|
479
498
|
)
|
|
480
499
|
|
|
500
|
+
@cached_property
|
|
501
|
+
def _source_column_dict(self) -> dict[str, SourceColumn]:
|
|
502
|
+
source_columns = self._get_source_columns()
|
|
503
|
+
if len(source_columns) == 0:
|
|
504
|
+
raise ValueError(f"Table '{self.name}' does not hold any column "
|
|
505
|
+
f"with a supported data type")
|
|
506
|
+
return {column.name: column for column in source_columns}
|
|
507
|
+
|
|
508
|
+
@cached_property
|
|
509
|
+
def _source_sample_df(self) -> pd.DataFrame:
|
|
510
|
+
return self._get_source_sample_df()
|
|
511
|
+
|
|
512
|
+
@property
|
|
513
|
+
def _source_primary_key(self) -> str | None:
|
|
514
|
+
primary_keys = [
|
|
515
|
+
column.name for column in self._source_column_dict.values()
|
|
516
|
+
if column.is_primary_key
|
|
517
|
+
]
|
|
518
|
+
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
519
|
+
return primary_keys[0]
|
|
520
|
+
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
@cached_property
|
|
524
|
+
def _num_rows(self) -> int | None:
|
|
525
|
+
return self._get_num_rows()
|
|
526
|
+
|
|
527
|
+
def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
|
|
528
|
+
return self._source_sample_df[columns]
|
|
529
|
+
|
|
481
530
|
# Python builtins #########################################################
|
|
482
531
|
|
|
483
532
|
def __hash__(self) -> int:
|
|
@@ -512,45 +561,15 @@ class Table(ABC):
|
|
|
512
561
|
@abstractmethod
|
|
513
562
|
def backend(self) -> DataBackend:
|
|
514
563
|
r"""The data backend of this table."""
|
|
515
|
-
pass
|
|
516
|
-
|
|
517
|
-
@cached_property
|
|
518
|
-
def _source_column_dict(self) -> Dict[str, SourceColumn]:
|
|
519
|
-
return {col.name: col for col in self._get_source_columns()}
|
|
520
564
|
|
|
521
565
|
@abstractmethod
|
|
522
|
-
def _get_source_columns(self) ->
|
|
566
|
+
def _get_source_columns(self) -> list[SourceColumn]:
|
|
523
567
|
pass
|
|
524
568
|
|
|
525
|
-
@cached_property
|
|
526
|
-
def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
|
|
527
|
-
fkeys = self._get_source_foreign_keys()
|
|
528
|
-
# NOTE Drop all keys that link to different primary keys in the same
|
|
529
|
-
# table since we don't support composite keys yet:
|
|
530
|
-
table_pkeys: Dict[str, Set[str]] = defaultdict(set)
|
|
531
|
-
for fkey in fkeys:
|
|
532
|
-
table_pkeys[fkey.dst_table].add(fkey.primary_key)
|
|
533
|
-
return {
|
|
534
|
-
fkey.name: fkey
|
|
535
|
-
for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
|
|
536
|
-
}
|
|
537
|
-
|
|
538
569
|
@abstractmethod
|
|
539
|
-
def
|
|
570
|
+
def _get_source_sample_df(self) -> pd.DataFrame:
|
|
540
571
|
pass
|
|
541
572
|
|
|
542
|
-
@cached_property
|
|
543
|
-
def _sample_df(self) -> pd.DataFrame:
|
|
544
|
-
return self._get_sample_df()
|
|
545
|
-
|
|
546
|
-
@abstractmethod
|
|
547
|
-
def _get_sample_df(self) -> pd.DataFrame:
|
|
548
|
-
pass
|
|
549
|
-
|
|
550
|
-
@cached_property
|
|
551
|
-
def _num_rows(self) -> Optional[int]:
|
|
552
|
-
return self._get_num_rows()
|
|
553
|
-
|
|
554
573
|
@abstractmethod
|
|
555
|
-
def _get_num_rows(self) ->
|
|
574
|
+
def _get_num_rows(self) -> int | None:
|
|
556
575
|
pass
|