kumoai 2.13.0.dev202512011731__cp312-cp312-macosx_11_0_arm64.whl → 2.14.0.dev202512181731__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. kumoai/__init__.py +12 -0
  2. kumoai/_version.py +1 -1
  3. kumoai/client/pquery.py +6 -2
  4. kumoai/experimental/rfm/__init__.py +33 -8
  5. kumoai/experimental/rfm/authenticate.py +3 -4
  6. kumoai/experimental/rfm/backend/local/__init__.py +4 -0
  7. kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +53 -107
  8. kumoai/experimental/rfm/backend/local/sampler.py +315 -0
  9. kumoai/experimental/rfm/backend/local/table.py +41 -80
  10. kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
  11. kumoai/experimental/rfm/backend/snow/sampler.py +252 -0
  12. kumoai/experimental/rfm/backend/snow/table.py +147 -0
  13. kumoai/experimental/rfm/backend/sqlite/__init__.py +11 -2
  14. kumoai/experimental/rfm/backend/sqlite/sampler.py +349 -0
  15. kumoai/experimental/rfm/backend/sqlite/table.py +108 -88
  16. kumoai/experimental/rfm/base/__init__.py +26 -2
  17. kumoai/experimental/rfm/base/column.py +6 -12
  18. kumoai/experimental/rfm/base/column_expression.py +16 -0
  19. kumoai/experimental/rfm/base/sampler.py +773 -0
  20. kumoai/experimental/rfm/base/source.py +19 -0
  21. kumoai/experimental/rfm/base/sql_sampler.py +84 -0
  22. kumoai/experimental/rfm/base/sql_table.py +113 -0
  23. kumoai/experimental/rfm/base/table.py +174 -76
  24. kumoai/experimental/rfm/graph.py +444 -84
  25. kumoai/experimental/rfm/infer/__init__.py +6 -0
  26. kumoai/experimental/rfm/infer/dtype.py +77 -0
  27. kumoai/experimental/rfm/infer/pkey.py +128 -0
  28. kumoai/experimental/rfm/infer/time_col.py +61 -0
  29. kumoai/experimental/rfm/pquery/executor.py +27 -27
  30. kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
  31. kumoai/experimental/rfm/rfm.py +299 -240
  32. kumoai/experimental/rfm/sagemaker.py +4 -4
  33. kumoai/pquery/predictive_query.py +10 -6
  34. kumoai/testing/snow.py +50 -0
  35. kumoai/utils/__init__.py +3 -2
  36. kumoai/utils/progress_logger.py +178 -12
  37. kumoai/utils/sql.py +3 -0
  38. {kumoai-2.13.0.dev202512011731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/METADATA +6 -2
  39. {kumoai-2.13.0.dev202512011731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/RECORD +42 -30
  40. kumoai/experimental/rfm/local_graph_sampler.py +0 -182
  41. kumoai/experimental/rfm/local_pquery_driver.py +0 -689
  42. kumoai/experimental/rfm/utils.py +0 -344
  43. {kumoai-2.13.0.dev202512011731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/WHEEL +0 -0
  44. {kumoai-2.13.0.dev202512011731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/licenses/LICENSE +0 -0
  45. {kumoai-2.13.0.dev202512011731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
1
+ from dataclasses import dataclass
2
+
3
+ from kumoapi.typing import Dtype
4
+
5
+
6
+ @dataclass
7
+ class SourceColumn:
8
+ name: str
9
+ dtype: Dtype
10
+ is_primary_key: bool
11
+ is_unique_key: bool
12
+ is_nullable: bool
13
+
14
+
15
+ @dataclass
16
+ class SourceForeignKey:
17
+ name: str
18
+ dst_table: str
19
+ primary_key: str
@@ -0,0 +1,84 @@
1
+ from abc import abstractmethod
2
+ from typing import TYPE_CHECKING, Literal
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from kumoai.experimental.rfm.base import Sampler, SamplerOutput, SQLTable
8
+ from kumoai.utils import ProgressLogger
9
+
10
+ if TYPE_CHECKING:
11
+ from kumoai.experimental.rfm import Graph
12
+
13
+
14
+ class SQLSampler(Sampler):
15
+ def __init__(
16
+ self,
17
+ graph: 'Graph',
18
+ verbose: bool | ProgressLogger = True,
19
+ ) -> None:
20
+ super().__init__(graph=graph, verbose=verbose)
21
+
22
+ self._fqn_dict: dict[str, str] = {}
23
+ for table in graph.tables.values():
24
+ assert isinstance(table, SQLTable)
25
+ self._connection = table._connection
26
+ self._fqn_dict[table.name] = table.fqn
27
+
28
+ @property
29
+ def fqn_dict(self) -> dict[str, str]:
30
+ r"""The fully-qualified quoted source name for all table names in the
31
+ graph.
32
+ """
33
+ return self._fqn_dict
34
+
35
+ def _sample_subgraph(
36
+ self,
37
+ entity_table_name: str,
38
+ entity_pkey: pd.Series,
39
+ anchor_time: pd.Series | Literal['entity'],
40
+ columns_dict: dict[str, set[str]],
41
+ num_neighbors: list[int],
42
+ ) -> SamplerOutput:
43
+
44
+ df, batch = self._by_pkey(
45
+ table_name=entity_table_name,
46
+ pkey=entity_pkey,
47
+ columns=columns_dict[entity_table_name],
48
+ )
49
+ if len(batch) != len(entity_pkey):
50
+ mask = np.ones(len(entity_pkey), dtype=bool)
51
+ mask[batch] = False
52
+ raise KeyError(f"The primary keys "
53
+ f"{entity_pkey.iloc[mask].tolist()} do not exist "
54
+ f"in the '{entity_table_name}' table")
55
+
56
+ perm = batch.argsort()
57
+ batch = batch[perm]
58
+ df = df.iloc[perm].reset_index(drop=True)
59
+
60
+ if not isinstance(anchor_time, pd.Series):
61
+ time_column = self.time_column_dict[entity_table_name]
62
+ anchor_time = df[time_column]
63
+
64
+ return SamplerOutput(
65
+ anchor_time=anchor_time.astype(int).to_numpy(),
66
+ df_dict={entity_table_name: df},
67
+ inverse_dict={},
68
+ batch_dict={entity_table_name: batch},
69
+ num_sampled_nodes_dict={entity_table_name: [len(batch)]},
70
+ row_dict={},
71
+ col_dict={},
72
+ num_sampled_edges_dict={},
73
+ )
74
+
75
+ # Abstract Methods ########################################################
76
+
77
+ @abstractmethod
78
+ def _by_pkey(
79
+ self,
80
+ table_name: str,
81
+ pkey: pd.Series,
82
+ columns: set[str],
83
+ ) -> tuple[pd.DataFrame, np.ndarray]:
84
+ pass
@@ -0,0 +1,113 @@
1
+ from abc import abstractmethod
2
+ from collections import defaultdict
3
+ from collections.abc import Sequence
4
+ from functools import cached_property
5
+ from typing import Any
6
+
7
+ from kumoapi.model_plan import MissingType
8
+
9
+ from kumoai.experimental.rfm.base import (
10
+ ColumnExpressionType,
11
+ SourceForeignKey,
12
+ Table,
13
+ )
14
+ from kumoai.utils import quote_ident
15
+
16
+
17
+ class SQLTable(Table):
18
+ r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
19
+ database.
20
+
21
+ Args:
22
+ name: The logical name of this table.
23
+ source_name: The physical name of this table in the database. If set to
24
+ ``None``, ``name`` is being used.
25
+ columns: The selected physical columns of this table.
26
+ column_expressions: The logical columns of this table.
27
+ primary_key: The name of the primary key of this table, if it exists.
28
+ time_column: The name of the time column of this table, if it exists.
29
+ end_time_column: The name of the end time column of this table, if it
30
+ exists.
31
+ """
32
+ def __init__(
33
+ self,
34
+ name: str,
35
+ source_name: str | None = None,
36
+ columns: Sequence[str] | None = None,
37
+ column_expressions: Sequence[ColumnExpressionType] | None = None,
38
+ primary_key: MissingType | str | None = MissingType.VALUE,
39
+ time_column: str | None = None,
40
+ end_time_column: str | None = None,
41
+ ) -> None:
42
+
43
+ self._connection: Any
44
+ self._source_name = source_name or name
45
+
46
+ super().__init__(
47
+ name=name,
48
+ columns=[],
49
+ primary_key=None,
50
+ time_column=None,
51
+ end_time_column=None,
52
+ )
53
+
54
+ if isinstance(primary_key, MissingType):
55
+ primary_key = self._source_primary_key
56
+
57
+ # Add column expressions with highest priority:
58
+ self._add_column_expressions(column_expressions or [])
59
+
60
+ if columns is None:
61
+ for column_name in self._source_column_dict.keys():
62
+ if column_name not in self:
63
+ self.add_column(column_name)
64
+ else:
65
+ for column_name in columns:
66
+ self.add_column(column_name)
67
+
68
+ if primary_key is not None:
69
+ if primary_key not in self:
70
+ self.add_column(primary_key)
71
+ self.primary_key = primary_key
72
+
73
+ if time_column is not None:
74
+ if time_column not in self:
75
+ self.add_column(time_column)
76
+ self.time_column = time_column
77
+
78
+ if end_time_column is not None:
79
+ if end_time_column not in self:
80
+ self.add_column(end_time_column)
81
+ self.end_time_column = end_time_column
82
+
83
+ @property
84
+ def fqn(self) -> str:
85
+ r"""The fully-qualified quoted source table name."""
86
+ return quote_ident(self._source_name)
87
+
88
+ # Column ##################################################################
89
+
90
+ def _add_column_expressions(
91
+ self,
92
+ columns: Sequence[ColumnExpressionType],
93
+ ) -> None:
94
+ pass
95
+
96
+ # Abstract Methods ########################################################
97
+
98
+ @cached_property
99
+ def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
100
+ fkeys = self._get_source_foreign_keys()
101
+ # NOTE Drop all keys that link to multiple keys in the same table since
102
+ # we don't support composite keys yet:
103
+ table_pkeys: dict[str, set[str]] = defaultdict(set)
104
+ for fkey in fkeys:
105
+ table_pkeys[fkey.dst_table].add(fkey.primary_key)
106
+ return {
107
+ fkey.name: fkey
108
+ for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
109
+ }
110
+
111
+ @abstractmethod
112
+ def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
113
+ pass
@@ -1,15 +1,25 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Dict, List, Optional, Sequence
2
+ from collections.abc import Sequence
3
+ from functools import cached_property
3
4
 
4
5
  import pandas as pd
6
+ from kumoapi.model_plan import MissingType
5
7
  from kumoapi.source_table import UnavailableSourceTable
6
8
  from kumoapi.table import Column as ColumnDefinition
7
9
  from kumoapi.table import TableDefinition
8
- from kumoapi.typing import Dtype, Stype
10
+ from kumoapi.typing import Stype
9
11
  from typing_extensions import Self
10
12
 
11
- from kumoai import in_notebook
12
- from kumoai.experimental.rfm.base import Column
13
+ from kumoai import in_notebook, in_snowflake_notebook
14
+ from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
15
+ from kumoai.experimental.rfm.infer import (
16
+ contains_categorical,
17
+ contains_id,
18
+ contains_multicategorical,
19
+ contains_timestamp,
20
+ infer_primary_key,
21
+ infer_time_column,
22
+ )
13
23
 
14
24
 
15
25
  class Table(ABC):
@@ -28,19 +38,29 @@ class Table(ABC):
28
38
  def __init__(
29
39
  self,
30
40
  name: str,
31
- columns: Optional[Sequence[str]] = None,
32
- primary_key: Optional[str] = None,
33
- time_column: Optional[str] = None,
34
- end_time_column: Optional[str] = None,
41
+ columns: Sequence[str] | None = None,
42
+ primary_key: MissingType | str | None = MissingType.VALUE,
43
+ time_column: str | None = None,
44
+ end_time_column: str | None = None,
35
45
  ) -> None:
36
46
 
37
47
  self._name = name
38
- self._primary_key: Optional[str] = None
39
- self._time_column: Optional[str] = None
40
- self._end_time_column: Optional[str] = None
48
+ self._primary_key: str | None = None
49
+ self._time_column: str | None = None
50
+ self._end_time_column: str | None = None
41
51
 
42
- self._columns: Dict[str, Column] = {}
43
- for column_name in columns or []:
52
+ if columns is None:
53
+ columns = list(self._source_column_dict.keys())
54
+
55
+ if len(self._source_column_dict) == 0:
56
+ raise ValueError(f"Table '{name}' does not hold any column with "
57
+ f"a supported data type")
58
+
59
+ if isinstance(primary_key, MissingType):
60
+ primary_key = self._source_primary_key
61
+
62
+ self._columns: dict[str, Column] = {}
63
+ for column_name in columns:
44
64
  self.add_column(column_name)
45
65
 
46
66
  if primary_key is not None:
@@ -63,7 +83,7 @@ class Table(ABC):
63
83
  r"""The name of this table."""
64
84
  return self._name
65
85
 
66
- # Data column #############################################################
86
+ # Column ##################################################################
67
87
 
68
88
  def has_column(self, name: str) -> bool:
69
89
  r"""Returns ``True`` if this table holds a column with name ``name``;
@@ -85,7 +105,7 @@ class Table(ABC):
85
105
  return self._columns[name]
86
106
 
87
107
  @property
88
- def columns(self) -> List[Column]:
108
+ def columns(self) -> list[Column]:
89
109
  r"""Returns a list of :class:`Column` objects that represent the
90
110
  columns in this table.
91
111
  """
@@ -104,20 +124,24 @@ class Table(ABC):
104
124
  raise KeyError(f"Column '{name}' already exists in table "
105
125
  f"'{self.name}'")
106
126
 
107
- if not self._has_source_column(name):
127
+ if name not in self._source_column_dict:
108
128
  raise KeyError(f"Column '{name}' does not exist in the underlying "
109
129
  f"source table")
110
130
 
111
- try:
112
- dtype = self._get_source_dtype(name)
113
- except Exception as e:
114
- raise RuntimeError(f"Could not obtain data type for column "
115
- f"'{name}' in table '{self.name}'. Change "
116
- f"the data type of the column in the source "
117
- f"table or remove it from the table.") from e
131
+ dtype = self._source_column_dict[name].dtype
118
132
 
119
133
  try:
120
- stype = self._get_source_stype(name, dtype)
134
+ ser = self._sample_df[name]
135
+ if contains_id(ser, name, dtype):
136
+ stype = Stype.ID
137
+ elif contains_timestamp(ser, name, dtype):
138
+ stype = Stype.timestamp
139
+ elif contains_multicategorical(ser, name, dtype):
140
+ stype = Stype.multicategorical
141
+ elif contains_categorical(ser, name, dtype):
142
+ stype = Stype.categorical
143
+ else:
144
+ stype = dtype.default_stype
121
145
  except Exception as e:
122
146
  raise RuntimeError(f"Could not obtain semantic type for column "
123
147
  f"'{name}' in table '{self.name}'. Change "
@@ -126,8 +150,8 @@ class Table(ABC):
126
150
 
127
151
  self._columns[name] = Column(
128
152
  name=name,
129
- dtype=dtype,
130
153
  stype=stype,
154
+ dtype=dtype,
131
155
  )
132
156
 
133
157
  return self._columns[name]
@@ -163,7 +187,7 @@ class Table(ABC):
163
187
  return self._primary_key is not None
164
188
 
165
189
  @property
166
- def primary_key(self) -> Optional[Column]:
190
+ def primary_key(self) -> Column | None:
167
191
  r"""The primary key column of this table.
168
192
 
169
193
  The getter returns the primary key column of this table, or ``None`` if
@@ -178,7 +202,7 @@ class Table(ABC):
178
202
  return self[self._primary_key]
179
203
 
180
204
  @primary_key.setter
181
- def primary_key(self, name: Optional[str]) -> None:
205
+ def primary_key(self, name: str | None) -> None:
182
206
  if name is not None and name == self._time_column:
183
207
  raise ValueError(f"Cannot specify column '{name}' as a primary "
184
208
  f"key since it is already defined to be a time "
@@ -208,7 +232,7 @@ class Table(ABC):
208
232
  return self._time_column is not None
209
233
 
210
234
  @property
211
- def time_column(self) -> Optional[Column]:
235
+ def time_column(self) -> Column | None:
212
236
  r"""The time column of this table.
213
237
 
214
238
  The getter returns the time column of this table, or ``None`` if no
@@ -223,7 +247,7 @@ class Table(ABC):
223
247
  return self[self._time_column]
224
248
 
225
249
  @time_column.setter
226
- def time_column(self, name: Optional[str]) -> None:
250
+ def time_column(self, name: str | None) -> None:
227
251
  if name is not None and name == self._primary_key:
228
252
  raise ValueError(f"Cannot specify column '{name}' as a time "
229
253
  f"column since it is already defined to be a "
@@ -253,7 +277,7 @@ class Table(ABC):
253
277
  return self._end_time_column is not None
254
278
 
255
279
  @property
256
- def end_time_column(self) -> Optional[Column]:
280
+ def end_time_column(self) -> Column | None:
257
281
  r"""The end time column of this table.
258
282
 
259
283
  The getter returns the end time column of this table, or ``None`` if no
@@ -269,7 +293,7 @@ class Table(ABC):
269
293
  return self[self._end_time_column]
270
294
 
271
295
  @end_time_column.setter
272
- def end_time_column(self, name: Optional[str]) -> None:
296
+ def end_time_column(self, name: str | None) -> None:
273
297
  if name is not None and name == self._primary_key:
274
298
  raise ValueError(f"Cannot specify column '{name}' as an end time "
275
299
  f"column since it is already defined to be a "
@@ -338,10 +362,16 @@ class Table(ABC):
338
362
 
339
363
  def print_metadata(self) -> None:
340
364
  r"""Prints the :meth:`~metadata` of this table."""
341
- num_rows = self._num_rows()
342
- num_rows_repr = ' ({num_rows:,} rows)' if num_rows is not None else ''
365
+ num_rows_repr = ''
366
+ if self._num_rows is not None:
367
+ num_rows_repr = ' ({self._num_rows:,} rows)'
343
368
 
344
- if in_notebook():
369
+ if in_snowflake_notebook():
370
+ import streamlit as st
371
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
372
+ st.markdown(md_repr)
373
+ st.dataframe(self.metadata, hide_index=True)
374
+ elif in_notebook():
345
375
  from IPython.display import Markdown, display
346
376
  md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
347
377
  display(Markdown(md_repr))
@@ -357,8 +387,83 @@ class Table(ABC):
357
387
  print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
358
388
  print(self.metadata.to_string(index=False))
359
389
 
390
+ def infer_primary_key(self, verbose: bool = True) -> Self:
391
+ r"""Infers the primary key in this table.
392
+
393
+ Args:
394
+ verbose: Whether to print verbose output.
395
+ """
396
+ if self.has_primary_key():
397
+ return self
398
+
399
+ def _set_primary_key(primary_key: str) -> None:
400
+ self.primary_key = primary_key
401
+ if verbose:
402
+ print(f"Detected primary key '{primary_key}' in table "
403
+ f"'{self.name}'")
404
+
405
+ if primary_key := self._source_primary_key:
406
+ _set_primary_key(primary_key)
407
+ return self
408
+
409
+ unique_keys = [
410
+ column.name for column in self._source_column_dict.values()
411
+ if column.is_unique_key
412
+ ]
413
+ if len(unique_keys) == 1: # NOTE No composite keys yet.
414
+ _set_primary_key(unique_keys[0])
415
+ return self
416
+
417
+ candidates = [
418
+ column.name for column in self.columns if column.stype == Stype.ID
419
+ ]
420
+ if len(candidates) == 0:
421
+ for column in self.columns:
422
+ if self.name.lower() == column.name.lower():
423
+ candidates.append(column.name)
424
+ elif (self.name.lower().endswith('s')
425
+ and self.name.lower()[:-1] == column.name.lower()):
426
+ candidates.append(column.name)
427
+
428
+ if primary_key := infer_primary_key(
429
+ table_name=self.name,
430
+ df=self._sample_df,
431
+ candidates=candidates,
432
+ ):
433
+ _set_primary_key(primary_key)
434
+ return self
435
+
436
+ return self
437
+
438
+ def infer_time_column(self, verbose: bool = True) -> Self:
439
+ r"""Infers the time column in this table.
440
+
441
+ Args:
442
+ verbose: Whether to print verbose output.
443
+ """
444
+ if self.has_time_column():
445
+ return self
446
+
447
+ candidates = [
448
+ column.name for column in self.columns
449
+ if column.stype == Stype.timestamp
450
+ and column.name != self._end_time_column
451
+ ]
452
+
453
+ if time_column := infer_time_column(
454
+ df=self._sample_df,
455
+ candidates=candidates,
456
+ ):
457
+ self.time_column = time_column
458
+
459
+ if verbose:
460
+ print(f"Detected time column '{time_column}' in table "
461
+ f"'{self.name}'")
462
+
463
+ return self
464
+
360
465
  def infer_metadata(self, verbose: bool = True) -> Self:
361
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
466
+ r"""Infers metadata, *i.e.*, primary keys and time columns, in this
362
467
  table.
363
468
 
364
469
  Args:
@@ -366,38 +471,15 @@ class Table(ABC):
366
471
  """
367
472
  logs = []
368
473
 
369
- # Try to detect primary key if not set:
370
474
  if not self.has_primary_key():
475
+ self.infer_primary_key(verbose=False)
476
+ if self.has_primary_key():
477
+ logs.append(f"primary key '{self._primary_key}'")
371
478
 
372
- def is_candidate(column: Column) -> bool:
373
- if column.stype == Stype.ID:
374
- return True
375
- if all(column.stype != Stype.ID for column in self.columns):
376
- if self.name == column.name:
377
- return True
378
- if (self.name.endswith('s')
379
- and self.name[:-1] == column.name):
380
- return True
381
- return False
382
-
383
- candidates = [
384
- column.name for column in self.columns if is_candidate(column)
385
- ]
386
-
387
- if primary_key := self._infer_primary_key(candidates):
388
- self.primary_key = primary_key
389
- logs.append(f"primary key '{primary_key}'")
390
-
391
- # Try to detect time column if not set:
392
479
  if not self.has_time_column():
393
- candidates = [
394
- column.name for column in self.columns
395
- if column.stype == Stype.timestamp
396
- and column.name != self._end_time_column
397
- ]
398
- if time_column := self._infer_time_column(candidates):
399
- self.time_column = time_column
400
- logs.append(f"time column '{time_column}'")
480
+ self.infer_time_column(verbose=False)
481
+ if self.has_time_column():
482
+ logs.append(f"time column '{self._time_column}'")
401
483
 
402
484
  if verbose and len(logs) > 0:
403
485
  print(f"Detected {' and '.join(logs)} in table '{self.name}'")
@@ -418,6 +500,17 @@ class Table(ABC):
418
500
  end_time_col=self._end_time_column,
419
501
  )
420
502
 
503
+ @property
504
+ def _source_primary_key(self) -> str | None:
505
+ primary_keys = [
506
+ column.name for column in self._source_column_dict.values()
507
+ if column.is_primary_key
508
+ ]
509
+ if len(primary_keys) == 1: # NOTE No composite keys yet.
510
+ return primary_keys[0]
511
+
512
+ return None
513
+
421
514
  # Python builtins #########################################################
422
515
 
423
516
  def __hash__(self) -> int:
@@ -446,28 +539,33 @@ class Table(ABC):
446
539
  f' end_time_column={self._end_time_column},\n'
447
540
  f')')
448
541
 
449
- # Abstract method #########################################################
542
+ # Abstract Methods ########################################################
450
543
 
544
+ @property
451
545
  @abstractmethod
452
- def _has_source_column(self, name: str) -> bool:
453
- pass
546
+ def backend(self) -> DataBackend:
547
+ r"""The data backend of this table."""
454
548
 
455
- @abstractmethod
456
- def _get_source_dtype(self, name: str) -> Dtype:
457
- pass
549
+ @cached_property
550
+ def _source_column_dict(self) -> dict[str, SourceColumn]:
551
+ return {col.name: col for col in self._get_source_columns()}
458
552
 
459
553
  @abstractmethod
460
- def _get_source_stype(self, name: str, dtype: Dtype) -> Stype:
554
+ def _get_source_columns(self) -> list[SourceColumn]:
461
555
  pass
462
556
 
463
- @abstractmethod
464
- def _infer_primary_key(self, candidates: List[str]) -> Optional[str]:
465
- pass
557
+ @cached_property
558
+ def _sample_df(self) -> pd.DataFrame:
559
+ return self._get_sample_df()
466
560
 
467
561
  @abstractmethod
468
- def _infer_time_column(self, candidates: List[str]) -> Optional[str]:
562
+ def _get_sample_df(self) -> pd.DataFrame:
469
563
  pass
470
564
 
565
+ @cached_property
566
+ def _num_rows(self) -> int | None:
567
+ return self._get_num_rows()
568
+
471
569
  @abstractmethod
472
- def _num_rows(self) -> Optional[int]:
570
+ def _get_num_rows(self) -> int | None:
473
571
  pass