kumoai 2.13.0.dev202511181731__cp311-cp311-macosx_11_0_arm64.whl → 2.14.0.dev202512191731__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. kumoai/__init__.py +12 -0
  2. kumoai/_version.py +1 -1
  3. kumoai/client/pquery.py +6 -2
  4. kumoai/connector/utils.py +23 -2
  5. kumoai/experimental/rfm/__init__.py +52 -52
  6. kumoai/experimental/rfm/authenticate.py +3 -4
  7. kumoai/experimental/rfm/backend/__init__.py +0 -0
  8. kumoai/experimental/rfm/backend/local/__init__.py +42 -0
  9. kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +57 -110
  10. kumoai/experimental/rfm/backend/local/sampler.py +315 -0
  11. kumoai/experimental/rfm/backend/local/table.py +114 -0
  12. kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
  13. kumoai/experimental/rfm/backend/snow/sampler.py +252 -0
  14. kumoai/experimental/rfm/backend/snow/table.py +169 -0
  15. kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
  16. kumoai/experimental/rfm/backend/sqlite/sampler.py +349 -0
  17. kumoai/experimental/rfm/backend/sqlite/table.py +154 -0
  18. kumoai/experimental/rfm/base/__init__.py +33 -0
  19. kumoai/experimental/rfm/base/column.py +68 -0
  20. kumoai/experimental/rfm/base/column_expression.py +50 -0
  21. kumoai/experimental/rfm/base/sampler.py +773 -0
  22. kumoai/experimental/rfm/base/source.py +19 -0
  23. kumoai/experimental/rfm/base/sql_sampler.py +84 -0
  24. kumoai/experimental/rfm/base/sql_table.py +229 -0
  25. kumoai/experimental/rfm/{local_table.py → base/table.py} +219 -189
  26. kumoai/experimental/rfm/{local_graph.py → graph.py} +510 -91
  27. kumoai/experimental/rfm/infer/__init__.py +8 -0
  28. kumoai/experimental/rfm/infer/dtype.py +79 -0
  29. kumoai/experimental/rfm/infer/pkey.py +128 -0
  30. kumoai/experimental/rfm/infer/stype.py +35 -0
  31. kumoai/experimental/rfm/infer/time_col.py +61 -0
  32. kumoai/experimental/rfm/pquery/executor.py +27 -27
  33. kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
  34. kumoai/experimental/rfm/rfm.py +313 -246
  35. kumoai/experimental/rfm/sagemaker.py +15 -7
  36. kumoai/pquery/predictive_query.py +10 -6
  37. kumoai/testing/decorators.py +1 -1
  38. kumoai/testing/snow.py +50 -0
  39. kumoai/utils/__init__.py +3 -2
  40. kumoai/utils/progress_logger.py +178 -12
  41. kumoai/utils/sql.py +3 -0
  42. {kumoai-2.13.0.dev202511181731.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/METADATA +10 -8
  43. {kumoai-2.13.0.dev202511181731.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/RECORD +46 -26
  44. kumoai/experimental/rfm/local_graph_sampler.py +0 -184
  45. kumoai/experimental/rfm/local_pquery_driver.py +0 -689
  46. kumoai/experimental/rfm/utils.py +0 -344
  47. {kumoai-2.13.0.dev202511181731.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/WHEEL +0 -0
  48. {kumoai-2.13.0.dev202511181731.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/licenses/LICENSE +0 -0
  49. {kumoai-2.13.0.dev202511181731.dist-info → kumoai-2.14.0.dev202512191731.dist-info}/top_level.txt +0 -0
@@ -1,115 +1,32 @@
1
- from dataclasses import dataclass
2
- from typing import Any, Dict, List, Optional
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import Sequence
3
+ from functools import cached_property
3
4
 
4
5
  import pandas as pd
6
+ from kumoapi.model_plan import MissingType
5
7
  from kumoapi.source_table import UnavailableSourceTable
6
8
  from kumoapi.table import Column as ColumnDefinition
7
9
  from kumoapi.table import TableDefinition
8
- from kumoapi.typing import Dtype, Stype
10
+ from kumoapi.typing import Stype
9
11
  from typing_extensions import Self
10
12
 
11
- from kumoai import in_notebook
12
- from kumoai.experimental.rfm import utils
13
+ from kumoai import in_notebook, in_snowflake_notebook
14
+ from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
15
+ from kumoai.experimental.rfm.infer import (
16
+ infer_primary_key,
17
+ infer_stype,
18
+ infer_time_column,
19
+ )
13
20
 
14
21
 
15
- @dataclass(init=False, repr=False, eq=False)
16
- class Column:
17
- stype: Stype
18
-
19
- def __init__(
20
- self,
21
- name: str,
22
- dtype: Dtype,
23
- stype: Stype,
24
- is_primary_key: bool = False,
25
- is_time_column: bool = False,
26
- is_end_time_column: bool = False,
27
- ) -> None:
28
- self._name = name
29
- self._dtype = Dtype(dtype)
30
- self._is_primary_key = is_primary_key
31
- self._is_time_column = is_time_column
32
- self._is_end_time_column = is_end_time_column
33
- self.stype = Stype(stype)
34
-
35
- @property
36
- def name(self) -> str:
37
- return self._name
38
-
39
- @property
40
- def dtype(self) -> Dtype:
41
- return self._dtype
42
-
43
- def __setattr__(self, key: str, val: Any) -> None:
44
- if key == 'stype':
45
- if isinstance(val, str):
46
- val = Stype(val)
47
- assert isinstance(val, Stype)
48
- if not val.supports_dtype(self.dtype):
49
- raise ValueError(f"Column '{self.name}' received an "
50
- f"incompatible semantic type (got "
51
- f"dtype='{self.dtype}' and stype='{val}')")
52
- if self._is_primary_key and val != Stype.ID:
53
- raise ValueError(f"Primary key '{self.name}' must have 'ID' "
54
- f"semantic type (got '{val}')")
55
- if self._is_time_column and val != Stype.timestamp:
56
- raise ValueError(f"Time column '{self.name}' must have "
57
- f"'timestamp' semantic type (got '{val}')")
58
- if self._is_end_time_column and val != Stype.timestamp:
59
- raise ValueError(f"End time column '{self.name}' must have "
60
- f"'timestamp' semantic type (got '{val}')")
61
-
62
- super().__setattr__(key, val)
63
-
64
- def __hash__(self) -> int:
65
- return hash((self.name, self.stype, self.dtype))
66
-
67
- def __eq__(self, other: Any) -> bool:
68
- if not isinstance(other, Column):
69
- return False
70
- return hash(self) == hash(other)
71
-
72
- def __repr__(self) -> str:
73
- return (f'{self.__class__.__name__}(name={self.name}, '
74
- f'stype={self.stype}, dtype={self.dtype})')
75
-
76
-
77
- class LocalTable:
78
- r"""A table backed by a :class:`pandas.DataFrame`.
79
-
80
- A :class:`LocalTable` fully specifies the relevant metadata, *i.e.*
81
- selected columns, column semantic types, primary keys and time columns.
82
- :class:`LocalTable` is used to create a :class:`LocalGraph`.
83
-
84
- .. code-block:: python
85
-
86
- import pandas as pd
87
- import kumoai.experimental.rfm as rfm
88
-
89
- # Load data from a CSV file:
90
- df = pd.read_csv("data.csv")
91
-
92
- # Create a table from a `pandas.DataFrame` and infer its metadata ...
93
- table = rfm.LocalTable(df, name="my_table").infer_metadata()
94
-
95
- # ... or create a table explicitly:
96
- table = rfm.LocalTable(
97
- df=df,
98
- name="my_table",
99
- primary_key="id",
100
- time_column="time",
101
- end_time_column=None,
102
- )
103
-
104
- # Verify metadata:
105
- table.print_metadata()
106
-
107
- # Change the semantic type of a column:
108
- table[column].stype = "text"
22
+ class Table(ABC):
23
+ r"""A :class:`Table` fully specifies the relevant metadata of a single
24
+ table, *i.e.* its selected columns, data types, semantic types, primary
25
+ keys and time columns.
109
26
 
110
27
  Args:
111
- df: The data frame to create the table from.
112
- name: The name of the table.
28
+ name: The name of this table.
29
+ columns: The selected columns of this table.
113
30
  primary_key: The name of the primary key of this table, if it exists.
114
31
  time_column: The name of the time column of this table, if it exists.
115
32
  end_time_column: The name of the end time column of this table, if it
@@ -117,49 +34,53 @@ class LocalTable:
117
34
  """
118
35
  def __init__(
119
36
  self,
120
- df: pd.DataFrame,
121
37
  name: str,
122
- primary_key: Optional[str] = None,
123
- time_column: Optional[str] = None,
124
- end_time_column: Optional[str] = None,
38
+ columns: Sequence[str] | None = None,
39
+ primary_key: MissingType | str | None = MissingType.VALUE,
40
+ time_column: str | None = None,
41
+ end_time_column: str | None = None,
125
42
  ) -> None:
126
43
 
127
- if df.empty:
128
- raise ValueError("Data frame must have at least one row")
129
- if isinstance(df.columns, pd.MultiIndex):
130
- raise ValueError("Data frame must not have a multi-index")
131
- if not df.columns.is_unique:
132
- raise ValueError("Data frame must have unique column names")
133
- if any(col == '' for col in df.columns):
134
- raise ValueError("Data frame must have non-empty column names")
135
-
136
- df = df.copy(deep=False)
137
-
138
- self._data = df
139
44
  self._name = name
140
- self._primary_key: Optional[str] = None
141
- self._time_column: Optional[str] = None
142
- self._end_time_column: Optional[str] = None
45
+ self._primary_key: str | None = None
46
+ self._time_column: str | None = None
47
+ self._end_time_column: str | None = None
48
+
49
+ if columns is None:
50
+ columns = list(self._source_column_dict.keys())
143
51
 
144
- self._columns: Dict[str, Column] = {}
145
- for column_name in df.columns:
52
+ self._columns: dict[str, Column] = {}
53
+ for column_name in columns:
146
54
  self.add_column(column_name)
147
55
 
148
- if primary_key is not None:
56
+ if isinstance(primary_key, MissingType):
57
+ # Inference from source column metadata:
58
+ if '_source_column_dict' in self.__dict__:
59
+ primary_key = self._source_primary_key
60
+ if (primary_key is not None and primary_key in self
61
+ and self[primary_key].is_physical):
62
+ self.primary_key = primary_key
63
+ elif primary_key is not None:
64
+ if primary_key not in self:
65
+ self.add_column(primary_key)
149
66
  self.primary_key = primary_key
150
67
 
151
68
  if time_column is not None:
69
+ if time_column not in self:
70
+ self.add_column(time_column)
152
71
  self.time_column = time_column
153
72
 
154
73
  if end_time_column is not None:
74
+ if end_time_column not in self:
75
+ self.add_column(end_time_column)
155
76
  self.end_time_column = end_time_column
156
77
 
157
78
  @property
158
79
  def name(self) -> str:
159
- r"""The name of the table."""
80
+ r"""The name of this table."""
160
81
  return self._name
161
82
 
162
- # Data column #############################################################
83
+ # Column ##################################################################
163
84
 
164
85
  def has_column(self, name: str) -> bool:
165
86
  r"""Returns ``True`` if this table holds a column with name ``name``;
@@ -181,7 +102,7 @@ class LocalTable:
181
102
  return self._columns[name]
182
103
 
183
104
  @property
184
- def columns(self) -> List[Column]:
105
+ def columns(self) -> list[Column]:
185
106
  r"""Returns a list of :class:`Column` objects that represent the
186
107
  columns in this table.
187
108
  """
@@ -200,29 +121,26 @@ class LocalTable:
200
121
  raise KeyError(f"Column '{name}' already exists in table "
201
122
  f"'{self.name}'")
202
123
 
203
- if name not in self._data.columns:
204
- raise KeyError(f"Column '{name}' does not exist in the underyling "
205
- f"data frame")
124
+ if name not in self._source_column_dict:
125
+ raise KeyError(f"Column '{name}' does not exist in the underlying "
126
+ f"source table")
206
127
 
128
+ dtype = self._source_column_dict[name].dtype
129
+
130
+ ser = self._source_sample_df[name]
207
131
  try:
208
- dtype = utils.to_dtype(self._data[name])
209
- except Exception as e:
210
- raise RuntimeError(f"Data type inference for column '{name}' in "
211
- f"table '{self.name}' failed. Consider "
212
- f"changing the data type of the column or "
213
- f"removing it from the table.") from e
214
- try:
215
- stype = utils.infer_stype(self._data[name], name, dtype)
132
+ stype = infer_stype(ser, name, dtype)
216
133
  except Exception as e:
217
- raise RuntimeError(f"Semantic type inference for column '{name}' "
218
- f"in table '{self.name}' failed. Consider "
219
- f"changing the data type of the column or "
220
- f"removing it from the table.") from e
134
+ raise RuntimeError(f"Could not obtain semantic type for column "
135
+ f"'{name}' with data type '{dtype}' in table "
136
+ f"'{self.name}'. Change the data type of the "
137
+ f"column in the source table or remove it from "
138
+ f"this table.") from e
221
139
 
222
140
  self._columns[name] = Column(
223
141
  name=name,
224
- dtype=dtype,
225
142
  stype=stype,
143
+ dtype=dtype,
226
144
  )
227
145
 
228
146
  return self._columns[name]
@@ -258,7 +176,7 @@ class LocalTable:
258
176
  return self._primary_key is not None
259
177
 
260
178
  @property
261
- def primary_key(self) -> Optional[Column]:
179
+ def primary_key(self) -> Column | None:
262
180
  r"""The primary key column of this table.
263
181
 
264
182
  The getter returns the primary key column of this table, or ``None`` if
@@ -273,7 +191,7 @@ class LocalTable:
273
191
  return self[self._primary_key]
274
192
 
275
193
  @primary_key.setter
276
- def primary_key(self, name: Optional[str]) -> None:
194
+ def primary_key(self, name: str | None) -> None:
277
195
  if name is not None and name == self._time_column:
278
196
  raise ValueError(f"Cannot specify column '{name}' as a primary "
279
197
  f"key since it is already defined to be a time "
@@ -303,7 +221,7 @@ class LocalTable:
303
221
  return self._time_column is not None
304
222
 
305
223
  @property
306
- def time_column(self) -> Optional[Column]:
224
+ def time_column(self) -> Column | None:
307
225
  r"""The time column of this table.
308
226
 
309
227
  The getter returns the time column of this table, or ``None`` if no
@@ -318,7 +236,7 @@ class LocalTable:
318
236
  return self[self._time_column]
319
237
 
320
238
  @time_column.setter
321
- def time_column(self, name: Optional[str]) -> None:
239
+ def time_column(self, name: str | None) -> None:
322
240
  if name is not None and name == self._primary_key:
323
241
  raise ValueError(f"Cannot specify column '{name}' as a time "
324
242
  f"column since it is already defined to be a "
@@ -348,7 +266,7 @@ class LocalTable:
348
266
  return self._end_time_column is not None
349
267
 
350
268
  @property
351
- def end_time_column(self) -> Optional[Column]:
269
+ def end_time_column(self) -> Column | None:
352
270
  r"""The end time column of this table.
353
271
 
354
272
  The getter returns the end time column of this table, or ``None`` if no
@@ -364,7 +282,7 @@ class LocalTable:
364
282
  return self[self._end_time_column]
365
283
 
366
284
  @end_time_column.setter
367
- def end_time_column(self, name: Optional[str]) -> None:
285
+ def end_time_column(self, name: str | None) -> None:
368
286
  if name is not None and name == self._primary_key:
369
287
  raise ValueError(f"Cannot specify column '{name}' as an end time "
370
288
  f"column since it is already defined to be a "
@@ -432,12 +350,20 @@ class LocalTable:
432
350
  })
433
351
 
434
352
  def print_metadata(self) -> None:
435
- r"""Prints the :meth:`~LocalTable.metadata` of the table."""
436
- if in_notebook():
353
+ r"""Prints the :meth:`~metadata` of this table."""
354
+ num_rows_repr = ''
355
+ if self._num_rows is not None:
356
+ num_rows_repr = ' ({self._num_rows:,} rows)'
357
+
358
+ if in_snowflake_notebook():
359
+ import streamlit as st
360
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
361
+ st.markdown(md_repr)
362
+ st.dataframe(self.metadata, hide_index=True)
363
+ elif in_notebook():
437
364
  from IPython.display import Markdown, display
438
- display(
439
- Markdown(f"### 🏷️ Metadata of Table `{self.name}` "
440
- f"({len(self._data):,} rows)"))
365
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
366
+ display(Markdown(md_repr))
441
367
  df = self.metadata
442
368
  try:
443
369
  if hasattr(df.style, 'hide'):
@@ -447,12 +373,94 @@ class LocalTable:
447
373
  except ImportError:
448
374
  print(df.to_string(index=False)) # missing jinja2
449
375
  else:
450
- print(f"🏷️ Metadata of Table '{self.name}' "
451
- f"({len(self._data):,} rows):")
376
+ print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
452
377
  print(self.metadata.to_string(index=False))
453
378
 
379
+ def infer_primary_key(self, verbose: bool = True) -> Self:
380
+ r"""Infers the primary key in this table.
381
+
382
+ Args:
383
+ verbose: Whether to print verbose output.
384
+ """
385
+ if self.has_primary_key():
386
+ return self
387
+
388
+ def _set_primary_key(primary_key: str) -> None:
389
+ self.primary_key = primary_key
390
+ if verbose:
391
+ print(f"Detected primary key '{primary_key}' in table "
392
+ f"'{self.name}'")
393
+
394
+ # Inference from source column metadata:
395
+ if '_source_column_dict' in self.__dict__:
396
+ primary_key = self._source_primary_key
397
+ if (primary_key is not None and primary_key in self
398
+ and self[primary_key].is_physical):
399
+ _set_primary_key(primary_key)
400
+ return self
401
+
402
+ unique_keys = [
403
+ column.name for column in self._source_column_dict.values()
404
+ if column.is_unique_key
405
+ ]
406
+ if (len(unique_keys) == 1 # NOTE No composite keys yet.
407
+ and unique_keys[0] in self
408
+ and self[unique_keys[0]].is_physical):
409
+ _set_primary_key(unique_keys[0])
410
+ return self
411
+
412
+ # Heuristic-based inference:
413
+ candidates = [
414
+ column.name for column in self.columns if column.stype == Stype.ID
415
+ ]
416
+ if len(candidates) == 0:
417
+ for column in self.columns:
418
+ if self.name.lower() == column.name.lower():
419
+ candidates.append(column.name)
420
+ elif (self.name.lower().endswith('s')
421
+ and self.name.lower()[:-1] == column.name.lower()):
422
+ candidates.append(column.name)
423
+
424
+ if primary_key := infer_primary_key(
425
+ table_name=self.name,
426
+ df=self._sample_current_df(columns=candidates),
427
+ candidates=candidates,
428
+ ):
429
+ _set_primary_key(primary_key)
430
+ return self
431
+
432
+ return self
433
+
434
+ def infer_time_column(self, verbose: bool = True) -> Self:
435
+ r"""Infers the time column in this table.
436
+
437
+ Args:
438
+ verbose: Whether to print verbose output.
439
+ """
440
+ if self.has_time_column():
441
+ return self
442
+
443
+ # Heuristic-based inference:
444
+ candidates = [
445
+ column.name for column in self.columns
446
+ if column.stype == Stype.timestamp
447
+ and column.name != self._end_time_column
448
+ ]
449
+
450
+ if time_column := infer_time_column(
451
+ df=self._sample_current_df(columns=candidates),
452
+ candidates=candidates,
453
+ ):
454
+ self.time_column = time_column
455
+
456
+ if verbose:
457
+ print(f"Detected time column '{time_column}' in table "
458
+ f"'{self.name}'")
459
+
460
+ return self
461
+
454
462
  def infer_metadata(self, verbose: bool = True) -> Self:
455
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
463
+ r"""Infers metadata, *i.e.*, primary keys and time columns, in this
456
464
  table.
457
465
 
458
466
  Args:
@@ -460,42 +468,15 @@ class LocalTable:
460
468
  """
461
469
  logs = []
462
470
 
463
- # Try to detect primary key if not set:
464
471
  if not self.has_primary_key():
472
+ self.infer_primary_key(verbose=False)
473
+ if self.has_primary_key():
474
+ logs.append(f"primary key '{self._primary_key}'")
465
475
 
466
- def is_candidate(column: Column) -> bool:
467
- if column.stype == Stype.ID:
468
- return True
469
- if all(column.stype != Stype.ID for column in self.columns):
470
- if self.name == column.name:
471
- return True
472
- if (self.name.endswith('s')
473
- and self.name[:-1] == column.name):
474
- return True
475
- return False
476
-
477
- candidates = [
478
- column.name for column in self.columns if is_candidate(column)
479
- ]
480
-
481
- if primary_key := utils.detect_primary_key(
482
- table_name=self.name,
483
- df=self._data,
484
- candidates=candidates,
485
- ):
486
- self.primary_key = primary_key
487
- logs.append(f"primary key '{primary_key}'")
488
-
489
- # Try to detect time column if not set:
490
476
  if not self.has_time_column():
491
- candidates = [
492
- column.name for column in self.columns
493
- if column.stype == Stype.timestamp
494
- and column.name != self._end_time_column
495
- ]
496
- if time_column := utils.detect_time_column(self._data, candidates):
497
- self.time_column = time_column
498
- logs.append(f"time column '{time_column}'")
477
+ self.infer_time_column(verbose=False)
478
+ if self.has_time_column():
479
+ logs.append(f"time column '{self._time_column}'")
499
480
 
500
481
  if verbose and len(logs) > 0:
501
482
  print(f"Detected {' and '.join(logs)} in table '{self.name}'")
@@ -516,6 +497,36 @@ class LocalTable:
516
497
  end_time_col=self._end_time_column,
517
498
  )
518
499
 
500
+ @cached_property
501
+ def _source_column_dict(self) -> dict[str, SourceColumn]:
502
+ source_columns = self._get_source_columns()
503
+ if len(source_columns) == 0:
504
+ raise ValueError(f"Table '{self.name}' does not hold any column "
505
+ f"with a supported data type")
506
+ return {column.name: column for column in source_columns}
507
+
508
+ @cached_property
509
+ def _source_sample_df(self) -> pd.DataFrame:
510
+ return self._get_source_sample_df()
511
+
512
+ @property
513
+ def _source_primary_key(self) -> str | None:
514
+ primary_keys = [
515
+ column.name for column in self._source_column_dict.values()
516
+ if column.is_primary_key
517
+ ]
518
+ if len(primary_keys) == 1: # NOTE No composite keys yet.
519
+ return primary_keys[0]
520
+
521
+ return None
522
+
523
+ @cached_property
524
+ def _num_rows(self) -> int | None:
525
+ return self._get_num_rows()
526
+
527
+ def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
528
+ return self._source_sample_df[columns]
529
+
519
530
  # Python builtins #########################################################
520
531
 
521
532
  def __hash__(self) -> int:
@@ -543,3 +554,22 @@ class LocalTable:
543
554
  f' time_column={self._time_column},\n'
544
555
  f' end_time_column={self._end_time_column},\n'
545
556
  f')')
557
+
558
+ # Abstract Methods ########################################################
559
+
560
+ @property
561
+ @abstractmethod
562
+ def backend(self) -> DataBackend:
563
+ r"""The data backend of this table."""
564
+
565
+ @abstractmethod
566
+ def _get_source_columns(self) -> list[SourceColumn]:
567
+ pass
568
+
569
+ @abstractmethod
570
+ def _get_source_sample_df(self) -> pd.DataFrame:
571
+ pass
572
+
573
+ @abstractmethod
574
+ def _get_num_rows(self) -> int | None:
575
+ pass