kumoai 2.14.0.dev202512181731__cp312-cp312-macosx_11_0_arm64.whl → 2.14.0.dev202601041732__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. kumoai/__init__.py +23 -26
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +6 -0
  4. kumoai/client/jobs.py +24 -0
  5. kumoai/connector/utils.py +21 -7
  6. kumoai/experimental/rfm/__init__.py +24 -22
  7. kumoai/experimental/rfm/backend/local/graph_store.py +12 -21
  8. kumoai/experimental/rfm/backend/local/sampler.py +0 -3
  9. kumoai/experimental/rfm/backend/local/table.py +25 -24
  10. kumoai/experimental/rfm/backend/snow/sampler.py +106 -61
  11. kumoai/experimental/rfm/backend/snow/table.py +146 -51
  12. kumoai/experimental/rfm/backend/sqlite/sampler.py +127 -78
  13. kumoai/experimental/rfm/backend/sqlite/table.py +94 -47
  14. kumoai/experimental/rfm/base/__init__.py +6 -7
  15. kumoai/experimental/rfm/base/column.py +97 -5
  16. kumoai/experimental/rfm/base/expression.py +44 -0
  17. kumoai/experimental/rfm/base/sampler.py +5 -17
  18. kumoai/experimental/rfm/base/source.py +1 -1
  19. kumoai/experimental/rfm/base/sql_sampler.py +68 -9
  20. kumoai/experimental/rfm/base/table.py +291 -126
  21. kumoai/experimental/rfm/graph.py +139 -86
  22. kumoai/experimental/rfm/infer/__init__.py +6 -4
  23. kumoai/experimental/rfm/infer/dtype.py +6 -1
  24. kumoai/experimental/rfm/infer/multicategorical.py +1 -1
  25. kumoai/experimental/rfm/infer/stype.py +35 -0
  26. kumoai/experimental/rfm/relbench.py +76 -0
  27. kumoai/experimental/rfm/rfm.py +30 -42
  28. kumoai/experimental/rfm/task_table.py +247 -0
  29. kumoai/trainer/distilled_trainer.py +175 -0
  30. kumoai/utils/display.py +51 -0
  31. {kumoai-2.14.0.dev202512181731.dist-info → kumoai-2.14.0.dev202601041732.dist-info}/METADATA +1 -1
  32. {kumoai-2.14.0.dev202512181731.dist-info → kumoai-2.14.0.dev202601041732.dist-info}/RECORD +35 -31
  33. kumoai/experimental/rfm/base/column_expression.py +0 -16
  34. kumoai/experimental/rfm/base/sql_table.py +0 -113
  35. {kumoai-2.14.0.dev202512181731.dist-info → kumoai-2.14.0.dev202601041732.dist-info}/WHEEL +0 -0
  36. {kumoai-2.14.0.dev202512181731.dist-info → kumoai-2.14.0.dev202601041732.dist-info}/licenses/LICENSE +0 -0
  37. {kumoai-2.14.0.dev202512181731.dist-info → kumoai-2.14.0.dev202601041732.dist-info}/top_level.txt +0 -0
@@ -1,25 +1,32 @@
1
+ import warnings
1
2
  from abc import ABC, abstractmethod
2
3
  from collections.abc import Sequence
3
4
  from functools import cached_property
4
5
 
6
+ import numpy as np
5
7
  import pandas as pd
6
8
  from kumoapi.model_plan import MissingType
7
9
  from kumoapi.source_table import UnavailableSourceTable
8
10
  from kumoapi.table import Column as ColumnDefinition
9
11
  from kumoapi.table import TableDefinition
10
- from kumoapi.typing import Stype
12
+ from kumoapi.typing import Dtype, Stype
11
13
  from typing_extensions import Self
12
14
 
13
- from kumoai import in_notebook, in_snowflake_notebook
14
- from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
15
+ from kumoai.experimental.rfm.base import (
16
+ Column,
17
+ ColumnSpec,
18
+ ColumnSpecType,
19
+ DataBackend,
20
+ SourceColumn,
21
+ SourceForeignKey,
22
+ )
15
23
  from kumoai.experimental.rfm.infer import (
16
- contains_categorical,
17
- contains_id,
18
- contains_multicategorical,
19
- contains_timestamp,
24
+ infer_dtype,
20
25
  infer_primary_key,
26
+ infer_stype,
21
27
  infer_time_column,
22
28
  )
29
+ from kumoai.utils import display, quote_ident
23
30
 
24
31
 
25
32
  class Table(ABC):
@@ -29,41 +36,48 @@ class Table(ABC):
29
36
 
30
37
  Args:
31
38
  name: The name of this table.
39
+ source_name: The source name of this table. If set to ``None``,
40
+ ``name`` is being used.
32
41
  columns: The selected columns of this table.
33
42
  primary_key: The name of the primary key of this table, if it exists.
34
43
  time_column: The name of the time column of this table, if it exists.
35
44
  end_time_column: The name of the end time column of this table, if it
36
45
  exists.
37
46
  """
47
+ _NUM_SAMPLE_ROWS = 1_000
48
+
38
49
  def __init__(
39
50
  self,
40
51
  name: str,
41
- columns: Sequence[str] | None = None,
52
+ source_name: str | None = None,
53
+ columns: Sequence[ColumnSpecType] | None = None,
42
54
  primary_key: MissingType | str | None = MissingType.VALUE,
43
55
  time_column: str | None = None,
44
56
  end_time_column: str | None = None,
45
57
  ) -> None:
46
58
 
47
59
  self._name = name
60
+ self._source_name = source_name or name
61
+ self._column_dict: dict[str, Column] = {}
48
62
  self._primary_key: str | None = None
49
63
  self._time_column: str | None = None
50
64
  self._end_time_column: str | None = None
65
+ self._expr_sample_df = pd.DataFrame(index=range(self._NUM_SAMPLE_ROWS))
51
66
 
52
67
  if columns is None:
53
68
  columns = list(self._source_column_dict.keys())
54
69
 
55
- if len(self._source_column_dict) == 0:
56
- raise ValueError(f"Table '{name}' does not hold any column with "
57
- f"a supported data type")
70
+ self.add_columns(columns)
58
71
 
59
72
  if isinstance(primary_key, MissingType):
60
- primary_key = self._source_primary_key
61
-
62
- self._columns: dict[str, Column] = {}
63
- for column_name in columns:
64
- self.add_column(column_name)
65
-
66
- if primary_key is not None:
73
+ # Infer primary key from source metadata, but only set it in case
74
+ # it is already part of the column set (don't magically add it):
75
+ if any(column.is_source for column in self.columns):
76
+ primary_key = self._source_primary_key
77
+ if (primary_key is not None and primary_key in self
78
+ and self[primary_key].is_source):
79
+ self.primary_key = primary_key
80
+ elif primary_key is not None:
67
81
  if primary_key not in self:
68
82
  self.add_column(primary_key)
69
83
  self.primary_key = primary_key
@@ -83,13 +97,22 @@ class Table(ABC):
83
97
  r"""The name of this table."""
84
98
  return self._name
85
99
 
100
+ @property
101
+ def source_name(self) -> str:
102
+ r"""The source name of this table."""
103
+ return self._source_name
104
+
105
+ @property
106
+ def _quoted_source_name(self) -> str:
107
+ return quote_ident(self._source_name)
108
+
86
109
  # Column ##################################################################
87
110
 
88
111
  def has_column(self, name: str) -> bool:
89
112
  r"""Returns ``True`` if this table holds a column with name ``name``;
90
113
  ``False`` otherwise.
91
114
  """
92
- return name in self._columns
115
+ return name in self._column_dict
93
116
 
94
117
  def column(self, name: str) -> Column:
95
118
  r"""Returns the data column named with name ``name`` in this table.
@@ -102,59 +125,113 @@ class Table(ABC):
102
125
  """
103
126
  if not self.has_column(name):
104
127
  raise KeyError(f"Column '{name}' not found in table '{self.name}'")
105
- return self._columns[name]
128
+ return self._column_dict[name]
106
129
 
107
130
  @property
108
131
  def columns(self) -> list[Column]:
109
132
  r"""Returns a list of :class:`Column` objects that represent the
110
133
  columns in this table.
111
134
  """
112
- return list(self._columns.values())
135
+ return list(self._column_dict.values())
113
136
 
114
- def add_column(self, name: str) -> Column:
115
- r"""Adds a column to this table.
137
+ def add_columns(self, columns: Sequence[ColumnSpecType]) -> None:
138
+ r"""Adds a set of columns to this table.
116
139
 
117
140
  Args:
118
- name: The name of the column.
141
+ columns: The columns to add.
119
142
 
120
143
  Raises:
121
- KeyError: If ``name`` is already present in this table.
144
+ KeyError: If any of the column names already exist in this table.
122
145
  """
123
- if name in self:
124
- raise KeyError(f"Column '{name}' already exists in table "
125
- f"'{self.name}'")
126
-
127
- if name not in self._source_column_dict:
128
- raise KeyError(f"Column '{name}' does not exist in the underlying "
129
- f"source table")
130
-
131
- dtype = self._source_column_dict[name].dtype
132
-
133
- try:
134
- ser = self._sample_df[name]
135
- if contains_id(ser, name, dtype):
136
- stype = Stype.ID
137
- elif contains_timestamp(ser, name, dtype):
138
- stype = Stype.timestamp
139
- elif contains_multicategorical(ser, name, dtype):
140
- stype = Stype.multicategorical
141
- elif contains_categorical(ser, name, dtype):
142
- stype = Stype.categorical
143
- else:
144
- stype = dtype.default_stype
145
- except Exception as e:
146
- raise RuntimeError(f"Could not obtain semantic type for column "
147
- f"'{name}' in table '{self.name}'. Change "
148
- f"the data type of the column in the source "
149
- f"table or remove it from the table.") from e
150
-
151
- self._columns[name] = Column(
152
- name=name,
153
- stype=stype,
154
- dtype=dtype,
155
- )
146
+ if len(columns) == 0:
147
+ return
148
+
149
+ column_specs = [ColumnSpec.coerce(column) for column in columns]
150
+
151
+ # Obtain a batch-wise sample for all column expressions:
152
+ expr_specs = [spec for spec in column_specs if not spec.is_source]
153
+ if len(expr_specs) > 0:
154
+ dfs = [
155
+ self._expr_sample_df,
156
+ self._get_expr_sample_df(expr_specs).reset_index(drop=True),
157
+ ]
158
+ size = min(map(len, dfs))
159
+ df = pd.concat([dfs[0].iloc[:size], dfs[1].iloc[:size]], axis=1)
160
+ df = df.loc[:, ~df.columns.duplicated(keep='last')]
161
+ self._expr_sample_df = df
162
+
163
+ for column_spec in column_specs:
164
+ if column_spec.name in self:
165
+ raise KeyError(f"Column '{column_spec.name}' already exists "
166
+ f"in table '{self.name}'")
167
+
168
+ dtype = column_spec.dtype
169
+ stype = column_spec.stype
170
+
171
+ if column_spec.is_source:
172
+ if column_spec.name not in self._source_column_dict:
173
+ raise ValueError(
174
+ f"Column '{column_spec.name}' does not exist in the "
175
+ f"underlying source table")
176
+
177
+ if dtype is None:
178
+ dtype = self._source_column_dict[column_spec.name].dtype
179
+
180
+ if dtype == Dtype.unsupported:
181
+ raise ValueError(
182
+ f"Encountered unsupported data type for column "
183
+ f"'{column_spec.name}' in table '{self.name}'. Please "
184
+ f"either change the column's data type or remove the "
185
+ f"column from this table.")
186
+
187
+ if dtype is None:
188
+ if column_spec.is_source:
189
+ ser = self._source_sample_df[column_spec.name]
190
+ else:
191
+ ser = self._expr_sample_df[column_spec.name]
192
+ try:
193
+ dtype = infer_dtype(ser)
194
+ except Exception as e:
195
+ raise RuntimeError(
196
+ f"Encountered unsupported data type '{ser.dtype}' for "
197
+ f"column '{column_spec.name}' in table '{self.name}'. "
198
+ f"Please either manually override the columns's data "
199
+ f"type or remove the column from this table.") from e
200
+
201
+ if stype is None:
202
+ if column_spec.is_source:
203
+ ser = self._source_sample_df[column_spec.name]
204
+ else:
205
+ ser = self._expr_sample_df[column_spec.name]
206
+ try:
207
+ stype = infer_stype(ser, column_spec.name, dtype)
208
+ except Exception as e:
209
+ raise RuntimeError(
210
+ f"Could not determine semantic type for column "
211
+ f"'{column_spec.name}' with data type '{dtype}' in "
212
+ f"table '{self.name}'. Please either change the "
213
+ f"column's data type or remove the column from this "
214
+ f"table.") from e
215
+
216
+ self._column_dict[column_spec.name] = Column(
217
+ name=column_spec.name,
218
+ expr=column_spec.expr,
219
+ dtype=dtype,
220
+ stype=stype,
221
+ )
222
+
223
+ def add_column(self, column: ColumnSpecType) -> Column:
224
+ r"""Adds a column to this table.
225
+
226
+ Args:
227
+ column: The column to add.
156
228
 
157
- return self._columns[name]
229
+ Raises:
230
+ KeyError: If the column name already exists in this table.
231
+ """
232
+ column_spec = ColumnSpec.coerce(column)
233
+ self.add_columns([column_spec])
234
+ return self[column_spec.name]
158
235
 
159
236
  def remove_column(self, name: str) -> Self:
160
237
  r"""Removes a column from this table.
@@ -174,7 +251,7 @@ class Table(ABC):
174
251
  self.time_column = None
175
252
  if self._end_time_column == name:
176
253
  self.end_time_column = None
177
- del self._columns[name]
254
+ del self._column_dict[name]
178
255
 
179
256
  return self
180
257
 
@@ -194,8 +271,8 @@ class Table(ABC):
194
271
  no such primary key is present.
195
272
 
196
273
  The setter sets a column as a primary key on this table, and raises a
197
- :class:`ValueError` if the primary key has a non-ID semantic type or
198
- if the column name does not match a column in the data frame.
274
+ :class:`ValueError` if the primary key has a non-ID compatible data
275
+ type or if the column name does not match a column in the data frame.
199
276
  """
200
277
  if self._primary_key is None:
201
278
  return None
@@ -239,8 +316,9 @@ class Table(ABC):
239
316
  such time column is present.
240
317
 
241
318
  The setter sets a column as a time column on this table, and raises a
242
- :class:`ValueError` if the time column has a non-timestamp semantic
243
- type or if the column name does not match a column in the data frame.
319
+ :class:`ValueError` if the time column has a non-timestamp compatible
320
+ data type or if the column name does not match a column in the data
321
+ frame.
244
322
  """
245
323
  if self._time_column is None:
246
324
  return None
@@ -285,8 +363,8 @@ class Table(ABC):
285
363
 
286
364
  The setter sets a column as an end time column on this table, and
287
365
  raises a :class:`ValueError` if the end time column has a non-timestamp
288
- semantic type or if the column name does not match a column in the data
289
- frame.
366
+ compatible data type or if the column name does not match a column in
367
+ the data frame.
290
368
  """
291
369
  if self._end_time_column is None:
292
370
  return None
@@ -362,30 +440,12 @@ class Table(ABC):
362
440
 
363
441
  def print_metadata(self) -> None:
364
442
  r"""Prints the :meth:`~metadata` of this table."""
365
- num_rows_repr = ''
366
- if self._num_rows is not None:
367
- num_rows_repr = ' ({self._num_rows:,} rows)'
368
-
369
- if in_snowflake_notebook():
370
- import streamlit as st
371
- md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
372
- st.markdown(md_repr)
373
- st.dataframe(self.metadata, hide_index=True)
374
- elif in_notebook():
375
- from IPython.display import Markdown, display
376
- md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
377
- display(Markdown(md_repr))
378
- df = self.metadata
379
- try:
380
- if hasattr(df.style, 'hide'):
381
- display(df.style.hide(axis='index')) # pandas=2
382
- else:
383
- display(df.style.hide_index()) # pandas<1.3
384
- except ImportError:
385
- print(df.to_string(index=False)) # missing jinja2
386
- else:
387
- print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
388
- print(self.metadata.to_string(index=False))
443
+ msg = f"🏷️ Metadata of Table `{self.name}`"
444
+ if num := self._num_rows:
445
+ msg += " (1 row)" if num == 1 else f" ({num:,} rows)"
446
+
447
+ display.title(msg)
448
+ display.dataframe(self.metadata)
389
449
 
390
450
  def infer_primary_key(self, verbose: bool = True) -> Self:
391
451
  r"""Infers the primary key in this table.
@@ -399,21 +459,28 @@ class Table(ABC):
399
459
  def _set_primary_key(primary_key: str) -> None:
400
460
  self.primary_key = primary_key
401
461
  if verbose:
402
- print(f"Detected primary key '{primary_key}' in table "
403
- f"'{self.name}'")
404
-
405
- if primary_key := self._source_primary_key:
406
- _set_primary_key(primary_key)
407
- return self
408
-
409
- unique_keys = [
410
- column.name for column in self._source_column_dict.values()
411
- if column.is_unique_key
412
- ]
413
- if len(unique_keys) == 1: # NOTE No composite keys yet.
414
- _set_primary_key(unique_keys[0])
415
- return self
462
+ display.message(f"Inferred primary key `{primary_key}` for "
463
+ f"table `{self.name}`")
416
464
 
465
+ # Inference from source column metadata:
466
+ if any(column.is_source for column in self.columns):
467
+ primary_key = self._source_primary_key
468
+ if (primary_key is not None and primary_key in self
469
+ and self[primary_key].is_source):
470
+ _set_primary_key(primary_key)
471
+ return self
472
+
473
+ unique_keys = [
474
+ column.name for column in self._source_column_dict.values()
475
+ if column.is_unique_key
476
+ ]
477
+ if (len(unique_keys) == 1 # NOTE No composite keys yet.
478
+ and unique_keys[0] in self
479
+ and self[unique_keys[0]].is_source):
480
+ _set_primary_key(unique_keys[0])
481
+ return self
482
+
483
+ # Heuristic-based inference:
417
484
  candidates = [
418
485
  column.name for column in self.columns if column.stype == Stype.ID
419
486
  ]
@@ -427,7 +494,7 @@ class Table(ABC):
427
494
 
428
495
  if primary_key := infer_primary_key(
429
496
  table_name=self.name,
430
- df=self._sample_df,
497
+ df=self._get_sample_df(),
431
498
  candidates=candidates,
432
499
  ):
433
500
  _set_primary_key(primary_key)
@@ -444,6 +511,7 @@ class Table(ABC):
444
511
  if self.has_time_column():
445
512
  return self
446
513
 
514
+ # Heuristic-based inference:
447
515
  candidates = [
448
516
  column.name for column in self.columns
449
517
  if column.stype == Stype.timestamp
@@ -451,14 +519,14 @@ class Table(ABC):
451
519
  ]
452
520
 
453
521
  if time_column := infer_time_column(
454
- df=self._sample_df,
522
+ df=self._get_sample_df(),
455
523
  candidates=candidates,
456
524
  ):
457
525
  self.time_column = time_column
458
526
 
459
527
  if verbose:
460
- print(f"Detected time column '{time_column}' in table "
461
- f"'{self.name}'")
528
+ display.message(f"Inferred time column `{time_column}` for "
529
+ f"table `{self.name}`")
462
530
 
463
531
  return self
464
532
 
@@ -474,15 +542,16 @@ class Table(ABC):
474
542
  if not self.has_primary_key():
475
543
  self.infer_primary_key(verbose=False)
476
544
  if self.has_primary_key():
477
- logs.append(f"primary key '{self._primary_key}'")
545
+ logs.append(f"primary key `{self._primary_key}`")
478
546
 
479
547
  if not self.has_time_column():
480
548
  self.infer_time_column(verbose=False)
481
549
  if self.has_time_column():
482
- logs.append(f"time column '{self._time_column}'")
550
+ logs.append(f"time column `{self._time_column}`")
483
551
 
484
552
  if verbose and len(logs) > 0:
485
- print(f"Detected {' and '.join(logs)} in table '{self.name}'")
553
+ display.message(f"Inferred {' and '.join(logs)} for table "
554
+ f"`{self.name}`")
486
555
 
487
556
  return self
488
557
 
@@ -500,16 +569,113 @@ class Table(ABC):
500
569
  end_time_col=self._end_time_column,
501
570
  )
502
571
 
503
- @property
572
+ @cached_property
573
+ def _source_column_dict(self) -> dict[str, SourceColumn]:
574
+ source_columns = self._get_source_columns()
575
+ if len(source_columns) == 0:
576
+ raise ValueError(f"Table '{self.name}' has no columns")
577
+ return {column.name: column for column in source_columns}
578
+
579
+ @cached_property
504
580
  def _source_primary_key(self) -> str | None:
505
581
  primary_keys = [
506
582
  column.name for column in self._source_column_dict.values()
507
583
  if column.is_primary_key
508
584
  ]
509
- if len(primary_keys) == 1: # NOTE No composite keys yet.
510
- return primary_keys[0]
585
+ # NOTE No composite keys yet.
586
+ return primary_keys[0] if len(primary_keys) == 1 else None
511
587
 
512
- return None
588
+ @cached_property
589
+ def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
590
+ return {key.name: key for key in self._get_source_foreign_keys()}
591
+
592
+ @cached_property
593
+ def _source_sample_df(self) -> pd.DataFrame:
594
+ return self._get_source_sample_df().reset_index(drop=True)
595
+
596
+ @cached_property
597
+ def _num_rows(self) -> int | None:
598
+ return self._get_num_rows()
599
+
600
+ def _get_sample_df(self) -> pd.DataFrame:
601
+ dfs: list[pd.DataFrame] = []
602
+ if any(column.is_source for column in self.columns):
603
+ dfs.append(self._source_sample_df)
604
+ if any(not column.is_source for column in self.columns):
605
+ dfs.append(self._expr_sample_df)
606
+
607
+ if len(dfs) == 0:
608
+ return pd.DataFrame(index=range(1000))
609
+ if len(dfs) == 1:
610
+ return dfs[0]
611
+
612
+ size = min(map(len, dfs))
613
+ df = pd.concat([dfs[0].iloc[:size], dfs[1].iloc[:size]], axis=1)
614
+ df = df.loc[:, ~df.columns.duplicated(keep='last')]
615
+ return df
616
+
617
+ @staticmethod
618
+ def _sanitize(
619
+ df: pd.DataFrame,
620
+ dtype_dict: dict[str, Dtype | None] | None = None,
621
+ stype_dict: dict[str, Stype | None] | None = None,
622
+ ) -> pd.DataFrame:
623
+ r"""Sanitzes a :class:`pandas.DataFrame` in-place such that its data
624
+ types match table data and semantic type specification.
625
+ """
626
+ def _to_datetime(ser: pd.Series) -> pd.Series:
627
+ if not pd.api.types.is_datetime64_any_dtype(ser):
628
+ with warnings.catch_warnings():
629
+ warnings.filterwarnings(
630
+ 'ignore',
631
+ message='Could not infer format',
632
+ )
633
+ ser = pd.to_datetime(ser, errors='coerce')
634
+ if isinstance(ser.dtype, pd.DatetimeTZDtype):
635
+ ser = ser.dt.tz_localize(None)
636
+ if ser.dtype != 'datetime64[ns]':
637
+ ser = ser.astype('datetime64[ns]')
638
+ return ser
639
+
640
+ def _to_list(ser: pd.Series, dtype: Dtype | None) -> pd.Series:
641
+ if (pd.api.types.is_string_dtype(ser)
642
+ and dtype in {Dtype.intlist, Dtype.floatlist}):
643
+ try:
644
+ ser = ser.map(lambda row: np.fromstring(
645
+ row.strip('[]'),
646
+ sep=',',
647
+ dtype=int if dtype == Dtype.intlist else np.float32,
648
+ ) if row is not None else None)
649
+ except Exception:
650
+ pass
651
+
652
+ if pd.api.types.is_string_dtype(ser):
653
+ try:
654
+ import orjson as json
655
+ except ImportError:
656
+ import json
657
+ try:
658
+ ser = ser.map(lambda row: json.loads(row)
659
+ if row is not None else None)
660
+ except Exception:
661
+ pass
662
+
663
+ return ser
664
+
665
+ for column_name in df.columns:
666
+ dtype = (dtype_dict or {}).get(column_name)
667
+ stype = (stype_dict or {}).get(column_name)
668
+
669
+ if dtype == Dtype.time:
670
+ df[column_name] = _to_datetime(df[column_name])
671
+ elif stype == Stype.timestamp:
672
+ df[column_name] = _to_datetime(df[column_name])
673
+ elif dtype is not None and dtype.is_list():
674
+ df[column_name] = _to_list(df[column_name], dtype)
675
+ elif stype == Stype.sequence:
676
+ df[column_name] = _to_list(df[column_name], Dtype.floatlist)
677
+
678
+ return df
513
679
 
514
680
  # Python builtins #########################################################
515
681
 
@@ -546,25 +712,24 @@ class Table(ABC):
546
712
  def backend(self) -> DataBackend:
547
713
  r"""The data backend of this table."""
548
714
 
549
- @cached_property
550
- def _source_column_dict(self) -> dict[str, SourceColumn]:
551
- return {col.name: col for col in self._get_source_columns()}
552
-
553
715
  @abstractmethod
554
716
  def _get_source_columns(self) -> list[SourceColumn]:
555
717
  pass
556
718
 
557
- @cached_property
558
- def _sample_df(self) -> pd.DataFrame:
559
- return self._get_sample_df()
719
+ @abstractmethod
720
+ def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
721
+ pass
560
722
 
561
723
  @abstractmethod
562
- def _get_sample_df(self) -> pd.DataFrame:
724
+ def _get_source_sample_df(self) -> pd.DataFrame:
563
725
  pass
564
726
 
565
- @cached_property
566
- def _num_rows(self) -> int | None:
567
- return self._get_num_rows()
727
+ @abstractmethod
728
+ def _get_expr_sample_df(
729
+ self,
730
+ columns: Sequence[ColumnSpec],
731
+ ) -> pd.DataFrame:
732
+ pass
568
733
 
569
734
  @abstractmethod
570
735
  def _get_num_rows(self) -> int | None: