kumoai 2.13.0.dev202512011731__cp312-cp312-macosx_11_0_arm64.whl → 2.13.0.dev202512031731__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kumoai/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = '2.13.0.dev202512011731'
1
+ __version__ = '2.13.0.dev202512031731'
@@ -1,11 +1,9 @@
1
1
  from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
- from kumoapi.typing import Dtype, Stype
5
- from typing_extensions import Self
6
4
 
7
- from kumoai.experimental.rfm import utils
8
- from kumoai.experimental.rfm.base import Column, Table
5
+ from kumoai.experimental.rfm.base import SourceColumn, SourceForeignKey, Table
6
+ from kumoai.experimental.rfm.infer import infer_dtype
9
7
 
10
8
 
11
9
  class LocalTable(Table):
@@ -59,7 +57,7 @@ class LocalTable(Table):
59
57
  ) -> None:
60
58
 
61
59
  if df.empty:
62
- raise ValueError("Data frame must have at least one row")
60
+ raise ValueError("Data frame is empty")
63
61
  if isinstance(df.columns, pd.MultiIndex):
64
62
  raise ValueError("Data frame must not have a multi-index")
65
63
  if not df.columns.is_unique:
@@ -77,75 +75,21 @@ class LocalTable(Table):
77
75
  end_time_column=end_time_column,
78
76
  )
79
77
 
80
- def infer_metadata(self, verbose: bool = True) -> Self:
81
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
82
- table.
83
-
84
- Args:
85
- verbose: Whether to print verbose output.
86
- """
87
- logs = []
88
-
89
- # Try to detect primary key if not set:
90
- if not self.has_primary_key():
91
-
92
- def is_candidate(column: Column) -> bool:
93
- if column.stype == Stype.ID:
94
- return True
95
- if all(column.stype != Stype.ID for column in self.columns):
96
- if self.name == column.name:
97
- return True
98
- if (self.name.endswith('s')
99
- and self.name[:-1] == column.name):
100
- return True
101
- return False
102
-
103
- candidates = [
104
- column.name for column in self.columns if is_candidate(column)
105
- ]
106
-
107
- if primary_key := utils.detect_primary_key(
108
- table_name=self.name,
109
- df=self._data,
110
- candidates=candidates,
111
- ):
112
- self.primary_key = primary_key
113
- logs.append(f"primary key '{primary_key}'")
114
-
115
- # Try to detect time column if not set:
116
- if not self.has_time_column():
117
- candidates = [
118
- column.name for column in self.columns
119
- if column.stype == Stype.timestamp
120
- and column.name != self._end_time_column
121
- ]
122
- if time_column := utils.detect_time_column(self._data, candidates):
123
- self.time_column = time_column
124
- logs.append(f"time column '{time_column}'")
125
-
126
- if verbose and len(logs) > 0:
127
- print(f"Detected {' and '.join(logs)} in table '{self.name}'")
128
-
129
- return self
130
-
131
- def _has_source_column(self, name: str) -> bool:
132
- return name in self._data.columns
133
-
134
- def _get_source_dtype(self, name: str) -> Dtype:
135
- return utils.to_dtype(self._data[name])
136
-
137
- def _get_source_stype(self, name: str, dtype: Dtype) -> Stype:
138
- return utils.infer_stype(self._data[name], name, dtype)
139
-
140
- def _infer_primary_key(self, candidates: List[str]) -> Optional[str]:
141
- return utils.detect_primary_key(
142
- table_name=self.name,
143
- df=self._data,
144
- candidates=candidates,
145
- )
78
+ def _get_source_columns(self) -> List[SourceColumn]:
79
+ return [
80
+ SourceColumn(
81
+ name=column,
82
+ dtype=infer_dtype(self._data[column]),
83
+ is_primary_key=False,
84
+ is_unique_key=False,
85
+ ) for column in self._data.columns
86
+ ]
87
+
88
+ def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
89
+ return []
146
90
 
147
- def _infer_time_column(self, candidates: List[str]) -> Optional[str]:
148
- return utils.detect_time_column(df=self._data, candidates=candidates)
91
+ def _get_sample_df(self) -> pd.DataFrame:
92
+ return self._data
149
93
 
150
- def _num_rows(self) -> Optional[int]:
94
+ def _get_num_rows(self) -> Optional[int]:
151
95
  return len(self._data)
@@ -0,0 +1,35 @@
1
+ from typing import Any, TypeAlias
2
+
3
+ try:
4
+ import snowflake.connector
5
+ except ImportError:
6
+ raise ImportError("No module named 'snowflake'. Please install Kumo SDK "
7
+ "with the 'snowflake' extension via "
8
+ "`pip install kumoai[snowflake]`.")
9
+
10
+ Connection: TypeAlias = snowflake.connector.SnowflakeConnection
11
+
12
+
13
+ def connect(**kwargs: Any) -> Connection:
14
+ r"""Opens a connection to a :class:`snowflake` database.
15
+
16
+ If available, will return a connection to the active session.
17
+
18
+ kwargs: Connection arguments, following the :class:`snowflake` protocol.
19
+ """
20
+ try:
21
+ from snowflake.snowpark.context import get_active_session
22
+ return get_active_session().connection
23
+ except Exception:
24
+ pass
25
+
26
+ return snowflake.connector.connect(**kwargs)
27
+
28
+
29
+ from .table import SnowTable # noqa: E402
30
+
31
+ __all__ = [
32
+ 'connect',
33
+ 'Connection',
34
+ 'SnowTable',
35
+ ]
@@ -0,0 +1,95 @@
1
+ import re
2
+ from typing import List, Optional, Sequence
3
+
4
+ import pandas as pd
5
+ from kumoapi.typing import Dtype
6
+
7
+ from kumoai.experimental.rfm.backend.sqlite import Connection
8
+ from kumoai.experimental.rfm.base import SourceColumn, SourceForeignKey, Table
9
+
10
+
11
+ class SnowTable(Table):
12
+ r"""A table backed by a :class:`sqlite` database.
13
+
14
+ Args:
15
+ connection: The connection to a :class:`snowflake` database.
16
+ name: The name of this table.
17
+ columns: The selected columns of this table.
18
+ primary_key: The name of the primary key of this table, if it exists.
19
+ time_column: The name of the time column of this table, if it exists.
20
+ end_time_column: The name of the end time column of this table, if it
21
+ exists.
22
+ """
23
+ def __init__(
24
+ self,
25
+ connection: Connection,
26
+ name: str,
27
+ columns: Optional[Sequence[str]] = None,
28
+ primary_key: Optional[str] = None,
29
+ time_column: Optional[str] = None,
30
+ end_time_column: Optional[str] = None,
31
+ ) -> None:
32
+
33
+ self._connection = connection
34
+
35
+ super().__init__(
36
+ name=name,
37
+ columns=columns,
38
+ primary_key=primary_key,
39
+ time_column=time_column,
40
+ end_time_column=end_time_column,
41
+ )
42
+
43
+ def _get_source_columns(self) -> List[SourceColumn]:
44
+ source_columns: List[SourceColumn] = []
45
+ with self._connection.cursor() as cursor:
46
+ try:
47
+ cursor.execute(f"DESCRIBE TABLE {self.name}")
48
+ except Exception as e:
49
+ raise ValueError(f"Table '{self.name}' does not exist") from e
50
+
51
+ for row in cursor.fetchall():
52
+ column, type, _, _, _, is_pkey, is_unique = row[:7]
53
+
54
+ type = type.strip().upper()
55
+ if type.startswith('NUMBER'):
56
+ dtype = Dtype.int
57
+ elif type.startswith('VARCHAR'):
58
+ dtype = Dtype.string
59
+ elif type == 'FLOAT':
60
+ dtype = Dtype.float
61
+ elif type == 'BOOLEAN':
62
+ dtype = Dtype.bool
63
+ elif re.search('DATE|TIMESTAMP', type):
64
+ dtype = Dtype.date
65
+ else:
66
+ continue
67
+
68
+ source_column = SourceColumn(
69
+ name=column,
70
+ dtype=dtype,
71
+ is_primary_key=is_pkey.strip().upper() == 'Y',
72
+ is_unique_key=is_unique.strip().upper() == 'Y',
73
+ )
74
+ source_columns.append(source_column)
75
+
76
+ return source_columns
77
+
78
+ def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
79
+ source_fkeys: List[SourceForeignKey] = []
80
+ with self._connection.cursor() as cursor:
81
+ cursor.execute(f"SHOW IMPORTED KEYS IN TABLE {self.name}")
82
+ for row in cursor.fetchall():
83
+ _, _, _, dst_table, pkey, _, _, _, fkey = row[:9]
84
+ source_fkeys.append(SourceForeignKey(fkey, dst_table, pkey))
85
+ return source_fkeys
86
+
87
+ def _get_sample_df(self) -> pd.DataFrame:
88
+ with self._connection.cursor() as cursor:
89
+ columns = ', '.join(self._source_column_dict.keys())
90
+ cursor.execute(f"SELECT {columns} FROM {self.name} LIMIT 1000")
91
+ table = cursor.fetch_arrow_all()
92
+ return table.to_pandas()
93
+
94
+ def _get_num_rows(self) -> Optional[int]:
95
+ return None
@@ -12,12 +12,19 @@ Connection: TypeAlias = adbc.AdbcSqliteConnection
12
12
 
13
13
 
14
14
  def connect(uri: Union[str, Path, None] = None, **kwargs: Any) -> Connection:
15
+ r"""Opens a connection to a :class:`sqlite` database.
16
+
17
+ uri: The path to the database file to be opened.
18
+ kwargs: Additional connection arguments, following the
19
+ :class:`adbc_driver_sqlite` protocol.
20
+ """
15
21
  return adbc.connect(uri, **kwargs)
16
22
 
17
23
 
18
24
  from .table import SQLiteTable # noqa: E402
19
25
 
20
26
  __all__ = [
27
+ 'connect',
21
28
  'Connection',
22
29
  'SQLiteTable',
23
30
  ]
@@ -1,13 +1,12 @@
1
1
  import re
2
- from typing import Dict, List, Optional, Sequence
2
+ from typing import List, Optional, Sequence
3
3
 
4
- import pyarrow as pa
5
- from kumoapi.typing import Dtype, Stype
6
- from typing_extensions import Self
4
+ import pandas as pd
5
+ from kumoapi.typing import Dtype
7
6
 
8
- from kumoai.experimental.rfm import utils
9
7
  from kumoai.experimental.rfm.backend.sqlite import Connection
10
- from kumoai.experimental.rfm.base import Table
8
+ from kumoai.experimental.rfm.base import SourceColumn, SourceForeignKey, Table
9
+ from kumoai.experimental.rfm.infer import infer_dtype
11
10
 
12
11
 
13
12
  class SQLiteTable(Table):
@@ -33,85 +32,63 @@ class SQLiteTable(Table):
33
32
  ) -> None:
34
33
 
35
34
  self._connection = connection
36
- self._dtype_dict: Dict[str, Dtype] = {}
37
-
38
- with connection.cursor() as cursor:
39
- cursor.execute(f"PRAGMA table_info({name})")
40
- for _, column, dtype, _, _, is_pkey in cursor.fetchall():
41
- if bool(is_pkey):
42
- if primary_key is not None and primary_key != column:
43
- raise ValueError(f"Found duplicate primary key "
44
- f"definition '{primary_key}' and "
45
- f"'{column}' in table '{name}'")
46
- primary_key = column
47
-
48
- # Determine colun affinity:
49
- dtype = dtype.strip().upper()
50
- if re.search('INT', dtype):
51
- self._dtype_dict[column] = Dtype.int
52
- elif re.search('TEXT|CHAR|CLOB', dtype):
53
- self._dtype_dict[column] = Dtype.string
54
- elif re.search('REAL|FLOA|DOUB', dtype):
55
- self._dtype_dict[column] = Dtype.float
56
- else: # NUMERIC affinity.
57
- self._dtype_dict[column] = Dtype.unsupported
58
-
59
- if len(self._dtype_dict) > 0:
60
- column_names = ', '.join(self._dtype_dict.keys())
61
- cursor.execute(f"SELECT {column_names} FROM {name} "
62
- f"ORDER BY rowid LIMIT 1000")
63
- self._sample = cursor.fetch_arrow_table()
64
-
65
- for column_name in list(self._dtype_dict.keys()):
66
- if self._dtype_dict[column_name] == Dtype.unsupported:
67
- dtype = self._sample[column_name].type
68
- if pa.types.is_integer(dtype):
69
- self._dtype_dict[column_name] = Dtype.int
70
- elif pa.types.is_floating(dtype):
71
- self._dtype_dict[column_name] = Dtype.float
72
- elif pa.types.is_decimal(dtype):
73
- self._dtype_dict[column_name] = Dtype.float
74
- elif pa.types.is_string(dtype):
75
- self._dtype_dict[column_name] = Dtype.string
76
- else:
77
- del self._dtype_dict[column_name]
78
-
79
- if len(self._dtype_dict) == 0:
80
- raise RuntimeError(f"Table '{name}' does not exist or does not "
81
- f"hold any column with a supported data type")
82
35
 
83
36
  super().__init__(
84
37
  name=name,
85
- columns=columns or list(self._dtype_dict.keys()),
38
+ columns=columns,
86
39
  primary_key=primary_key,
87
40
  time_column=time_column,
88
41
  end_time_column=end_time_column,
89
42
  )
90
43
 
91
- def infer_metadata(self, verbose: bool = True) -> Self:
92
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
93
- table.
94
-
95
- Args:
96
- verbose: Whether to print verbose output.
97
- """
98
- return self
99
-
100
- def _has_source_column(self, name: str) -> bool:
101
- return name in self._dtype_dict
102
-
103
- def _get_source_dtype(self, name: str) -> Dtype:
104
- return self._dtype_dict[name]
105
-
106
- def _get_source_stype(self, name: str, dtype: Dtype) -> Stype:
107
- ser = self._sample[name].to_pandas()
108
- return utils.infer_stype(ser, name, dtype)
109
-
110
- def _infer_primary_key(self, candidates: List[str]) -> Optional[str]:
111
- return None # TODO
112
-
113
- def _infer_time_column(self, candidates: List[str]) -> Optional[str]:
114
- return None # TODO
115
-
116
- def _num_rows(self) -> Optional[int]:
44
+ def _get_source_columns(self) -> List[SourceColumn]:
45
+ source_columns: List[SourceColumn] = []
46
+ with self._connection.cursor() as cursor:
47
+ cursor.execute(f"PRAGMA table_info({self.name})")
48
+ rows = cursor.fetchall()
49
+
50
+ if len(rows) == 0:
51
+ raise ValueError(f"Table '{self.name}' does not exist")
52
+
53
+ for _, column, type, _, _, is_pkey in rows:
54
+ # Determine column affinity:
55
+ type = type.strip().upper()
56
+ if re.search('INT', type):
57
+ dtype = Dtype.int
58
+ elif re.search('TEXT|CHAR|CLOB', type):
59
+ dtype = Dtype.string
60
+ elif re.search('REAL|FLOA|DOUB', type):
61
+ dtype = Dtype.float
62
+ else: # NUMERIC affinity.
63
+ try:
64
+ dtype = infer_dtype(self._sample_df[column])
65
+ except Exception as e:
66
+ raise e
67
+
68
+ source_column = SourceColumn(
69
+ name=column,
70
+ dtype=dtype,
71
+ is_primary_key=bool(is_pkey),
72
+ is_unique_key=False,
73
+ )
74
+ source_columns.append(source_column)
75
+
76
+ return source_columns
77
+
78
+ def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
79
+ source_fkeys: List[SourceForeignKey] = []
80
+ with self._connection.cursor() as cursor:
81
+ cursor.execute(f"PRAGMA foreign_key_list({self.name})")
82
+ for _, _, dst_table, fkey, pkey, _, _, _ in cursor.fetchall():
83
+ source_fkeys.append(SourceForeignKey(fkey, dst_table, pkey))
84
+ return source_fkeys
85
+
86
+ def _get_sample_df(self) -> pd.DataFrame:
87
+ with self._connection.cursor() as cursor:
88
+ cursor.execute(f"SELECT * FROM {self.name} "
89
+ f"ORDER BY rowid LIMIT 1000")
90
+ table = cursor.fetch_arrow_table()
91
+ return table.to_pandas()
92
+
93
+ def _get_num_rows(self) -> Optional[int]:
117
94
  return None
@@ -1,7 +1,10 @@
1
+ from .source import SourceColumn, SourceForeignKey
1
2
  from .column import Column
2
3
  from .table import Table
3
4
 
4
5
  __all__ = [
6
+ 'SourceColumn',
7
+ 'SourceForeignKey',
5
8
  'Column',
6
9
  'Table',
7
10
  ]
@@ -0,0 +1,18 @@
1
+ from dataclasses import dataclass
2
+
3
+ from kumoapi.typing import Dtype
4
+
5
+
6
+ @dataclass
7
+ class SourceColumn:
8
+ name: str
9
+ dtype: Dtype
10
+ is_primary_key: bool
11
+ is_unique_key: bool
12
+
13
+
14
+ @dataclass
15
+ class SourceForeignKey:
16
+ name: str
17
+ dst_table: str
18
+ primary_key: str
@@ -1,15 +1,25 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Dict, List, Optional, Sequence
2
+ from collections import defaultdict
3
+ from functools import cached_property
4
+ from typing import Dict, List, Optional, Sequence, Set
3
5
 
4
6
  import pandas as pd
5
7
  from kumoapi.source_table import UnavailableSourceTable
6
8
  from kumoapi.table import Column as ColumnDefinition
7
9
  from kumoapi.table import TableDefinition
8
- from kumoapi.typing import Dtype, Stype
10
+ from kumoapi.typing import Stype
9
11
  from typing_extensions import Self
10
12
 
11
13
  from kumoai import in_notebook
12
- from kumoai.experimental.rfm.base import Column
14
+ from kumoai.experimental.rfm.base import Column, SourceColumn, SourceForeignKey
15
+ from kumoai.experimental.rfm.infer import (
16
+ contains_categorical,
17
+ contains_id,
18
+ contains_multicategorical,
19
+ contains_timestamp,
20
+ infer_primary_key,
21
+ infer_time_column,
22
+ )
13
23
 
14
24
 
15
25
  class Table(ABC):
@@ -39,8 +49,30 @@ class Table(ABC):
39
49
  self._time_column: Optional[str] = None
40
50
  self._end_time_column: Optional[str] = None
41
51
 
52
+ if len(self._source_column_dict) == 0:
53
+ raise ValueError(f"Table '{name}' does not hold any column with "
54
+ f"a supported data type")
55
+
56
+ primary_keys = [
57
+ column.name for column in self._source_column_dict.values()
58
+ if column.is_primary_key
59
+ ]
60
+ if len(primary_keys) == 1: # NOTE No composite keys yet.
61
+ if primary_key is not None and primary_key != primary_keys[0]:
62
+ raise ValueError(f"Found duplicate primary key "
63
+ f"definition '{primary_key}' and "
64
+ f"'{primary_keys[0]}' in table '{name}'")
65
+ primary_key = primary_keys[0]
66
+
67
+ unique_keys = [
68
+ column.name for column in self._source_column_dict.values()
69
+ if column.is_unique_key
70
+ ]
71
+ if primary_key is None and len(unique_keys) == 1:
72
+ primary_key = unique_keys[0]
73
+
42
74
  self._columns: Dict[str, Column] = {}
43
- for column_name in columns or []:
75
+ for column_name in columns or list(self._source_column_dict.keys()):
44
76
  self.add_column(column_name)
45
77
 
46
78
  if primary_key is not None:
@@ -104,12 +136,12 @@ class Table(ABC):
104
136
  raise KeyError(f"Column '{name}' already exists in table "
105
137
  f"'{self.name}'")
106
138
 
107
- if not self._has_source_column(name):
139
+ if name not in self._source_column_dict:
108
140
  raise KeyError(f"Column '{name}' does not exist in the underlying "
109
141
  f"source table")
110
142
 
111
143
  try:
112
- dtype = self._get_source_dtype(name)
144
+ dtype = self._source_column_dict[name].dtype
113
145
  except Exception as e:
114
146
  raise RuntimeError(f"Could not obtain data type for column "
115
147
  f"'{name}' in table '{self.name}'. Change "
@@ -117,7 +149,17 @@ class Table(ABC):
117
149
  f"table or remove it from the table.") from e
118
150
 
119
151
  try:
120
- stype = self._get_source_stype(name, dtype)
152
+ ser = self._sample_df[name]
153
+ if contains_id(ser, name, dtype):
154
+ stype = Stype.ID
155
+ elif contains_timestamp(ser, name, dtype):
156
+ stype = Stype.timestamp
157
+ elif contains_multicategorical(ser, name, dtype):
158
+ stype = Stype.multicategorical
159
+ elif contains_categorical(ser, name, dtype):
160
+ stype = Stype.categorical
161
+ else:
162
+ stype = dtype.default_stype
121
163
  except Exception as e:
122
164
  raise RuntimeError(f"Could not obtain semantic type for column "
123
165
  f"'{name}' in table '{self.name}'. Change "
@@ -338,8 +380,9 @@ class Table(ABC):
338
380
 
339
381
  def print_metadata(self) -> None:
340
382
  r"""Prints the :meth:`~metadata` of this table."""
341
- num_rows = self._num_rows()
342
- num_rows_repr = ' ({num_rows:,} rows)' if num_rows is not None else ''
383
+ num_rows_repr = ''
384
+ if self._num_rows is not None:
385
+ num_rows_repr = ' ({self._num_rows:,} rows)'
343
386
 
344
387
  if in_notebook():
345
388
  from IPython.display import Markdown, display
@@ -384,7 +427,11 @@ class Table(ABC):
384
427
  column.name for column in self.columns if is_candidate(column)
385
428
  ]
386
429
 
387
- if primary_key := self._infer_primary_key(candidates):
430
+ if primary_key := infer_primary_key(
431
+ table_name=self.name,
432
+ df=self._sample_df,
433
+ candidates=candidates,
434
+ ):
388
435
  self.primary_key = primary_key
389
436
  logs.append(f"primary key '{primary_key}'")
390
437
 
@@ -395,7 +442,10 @@ class Table(ABC):
395
442
  if column.stype == Stype.timestamp
396
443
  and column.name != self._end_time_column
397
444
  ]
398
- if time_column := self._infer_time_column(candidates):
445
+ if time_column := infer_time_column(
446
+ df=self._sample_df,
447
+ candidates=candidates,
448
+ ):
399
449
  self.time_column = time_column
400
450
  logs.append(f"time column '{time_column}'")
401
451
 
@@ -448,26 +498,43 @@ class Table(ABC):
448
498
 
449
499
  # Abstract method #########################################################
450
500
 
451
- @abstractmethod
452
- def _has_source_column(self, name: str) -> bool:
453
- pass
501
+ @cached_property
502
+ def _source_column_dict(self) -> Dict[str, SourceColumn]:
503
+ return {col.name: col for col in self._get_source_columns()}
454
504
 
455
505
  @abstractmethod
456
- def _get_source_dtype(self, name: str) -> Dtype:
506
+ def _get_source_columns(self) -> List[SourceColumn]:
457
507
  pass
458
508
 
459
- @abstractmethod
460
- def _get_source_stype(self, name: str, dtype: Dtype) -> Stype:
461
- pass
509
+ @cached_property
510
+ def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
511
+ fkeys = self._get_source_foreign_keys()
512
+ # NOTE Drop all keys that link to different primary keys in the same
513
+ # table since we don't support composite keys yet:
514
+ table_pkeys: Dict[str, Set[str]] = defaultdict(set)
515
+ for fkey in fkeys:
516
+ table_pkeys[fkey.dst_table].add(fkey.primary_key)
517
+ return {
518
+ fkey.name: fkey
519
+ for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
520
+ }
462
521
 
463
522
  @abstractmethod
464
- def _infer_primary_key(self, candidates: List[str]) -> Optional[str]:
523
+ def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
465
524
  pass
466
525
 
526
+ @cached_property
527
+ def _sample_df(self) -> pd.DataFrame:
528
+ return self._get_sample_df()
529
+
467
530
  @abstractmethod
468
- def _infer_time_column(self, candidates: List[str]) -> Optional[str]:
531
+ def _get_sample_df(self) -> pd.DataFrame:
469
532
  pass
470
533
 
471
- @abstractmethod
534
+ @cached_property
472
535
  def _num_rows(self) -> Optional[int]:
536
+ return self._get_num_rows()
537
+
538
+ @abstractmethod
539
+ def _get_num_rows(self) -> Optional[int]:
473
540
  pass