kumoai 2.13.0.dev202512031731__cp312-cp312-macosx_11_0_arm64.whl → 2.14.0.dev202512181731__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. kumoai/__init__.py +12 -0
  2. kumoai/_version.py +1 -1
  3. kumoai/client/pquery.py +6 -2
  4. kumoai/experimental/rfm/__init__.py +33 -8
  5. kumoai/experimental/rfm/authenticate.py +3 -4
  6. kumoai/experimental/rfm/backend/local/__init__.py +4 -0
  7. kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +52 -91
  8. kumoai/experimental/rfm/backend/local/sampler.py +315 -0
  9. kumoai/experimental/rfm/backend/local/table.py +31 -14
  10. kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
  11. kumoai/experimental/rfm/backend/snow/sampler.py +252 -0
  12. kumoai/experimental/rfm/backend/snow/table.py +75 -23
  13. kumoai/experimental/rfm/backend/sqlite/__init__.py +4 -2
  14. kumoai/experimental/rfm/backend/sqlite/sampler.py +349 -0
  15. kumoai/experimental/rfm/backend/sqlite/table.py +71 -28
  16. kumoai/experimental/rfm/base/__init__.py +24 -3
  17. kumoai/experimental/rfm/base/column.py +6 -12
  18. kumoai/experimental/rfm/base/column_expression.py +16 -0
  19. kumoai/experimental/rfm/base/sampler.py +773 -0
  20. kumoai/experimental/rfm/base/source.py +1 -0
  21. kumoai/experimental/rfm/base/sql_sampler.py +84 -0
  22. kumoai/experimental/rfm/base/sql_table.py +113 -0
  23. kumoai/experimental/rfm/base/table.py +136 -105
  24. kumoai/experimental/rfm/graph.py +296 -89
  25. kumoai/experimental/rfm/infer/dtype.py +46 -59
  26. kumoai/experimental/rfm/infer/pkey.py +4 -2
  27. kumoai/experimental/rfm/infer/time_col.py +1 -2
  28. kumoai/experimental/rfm/pquery/executor.py +27 -27
  29. kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
  30. kumoai/experimental/rfm/rfm.py +299 -230
  31. kumoai/experimental/rfm/sagemaker.py +4 -4
  32. kumoai/pquery/predictive_query.py +10 -6
  33. kumoai/testing/snow.py +50 -0
  34. kumoai/utils/__init__.py +3 -2
  35. kumoai/utils/progress_logger.py +178 -12
  36. kumoai/utils/sql.py +3 -0
  37. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/METADATA +4 -2
  38. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/RECORD +41 -34
  39. kumoai/experimental/rfm/local_graph_sampler.py +0 -223
  40. kumoai/experimental/rfm/local_pquery_driver.py +0 -689
  41. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/WHEEL +0 -0
  42. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/licenses/LICENSE +0 -0
  43. {kumoai-2.13.0.dev202512031731.dist-info → kumoai-2.14.0.dev202512181731.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,7 @@ class SourceColumn:
9
9
  dtype: Dtype
10
10
  is_primary_key: bool
11
11
  is_unique_key: bool
12
+ is_nullable: bool
12
13
 
13
14
 
14
15
  @dataclass
@@ -0,0 +1,84 @@
1
+ from abc import abstractmethod
2
+ from typing import TYPE_CHECKING, Literal
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from kumoai.experimental.rfm.base import Sampler, SamplerOutput, SQLTable
8
+ from kumoai.utils import ProgressLogger
9
+
10
+ if TYPE_CHECKING:
11
+ from kumoai.experimental.rfm import Graph
12
+
13
+
14
+ class SQLSampler(Sampler):
15
+ def __init__(
16
+ self,
17
+ graph: 'Graph',
18
+ verbose: bool | ProgressLogger = True,
19
+ ) -> None:
20
+ super().__init__(graph=graph, verbose=verbose)
21
+
22
+ self._fqn_dict: dict[str, str] = {}
23
+ for table in graph.tables.values():
24
+ assert isinstance(table, SQLTable)
25
+ self._connection = table._connection
26
+ self._fqn_dict[table.name] = table.fqn
27
+
28
+ @property
29
+ def fqn_dict(self) -> dict[str, str]:
30
+ r"""The fully-qualified quoted source name for all table names in the
31
+ graph.
32
+ """
33
+ return self._fqn_dict
34
+
35
+ def _sample_subgraph(
36
+ self,
37
+ entity_table_name: str,
38
+ entity_pkey: pd.Series,
39
+ anchor_time: pd.Series | Literal['entity'],
40
+ columns_dict: dict[str, set[str]],
41
+ num_neighbors: list[int],
42
+ ) -> SamplerOutput:
43
+
44
+ df, batch = self._by_pkey(
45
+ table_name=entity_table_name,
46
+ pkey=entity_pkey,
47
+ columns=columns_dict[entity_table_name],
48
+ )
49
+ if len(batch) != len(entity_pkey):
50
+ mask = np.ones(len(entity_pkey), dtype=bool)
51
+ mask[batch] = False
52
+ raise KeyError(f"The primary keys "
53
+ f"{entity_pkey.iloc[mask].tolist()} do not exist "
54
+ f"in the '{entity_table_name}' table")
55
+
56
+ perm = batch.argsort()
57
+ batch = batch[perm]
58
+ df = df.iloc[perm].reset_index(drop=True)
59
+
60
+ if not isinstance(anchor_time, pd.Series):
61
+ time_column = self.time_column_dict[entity_table_name]
62
+ anchor_time = df[time_column]
63
+
64
+ return SamplerOutput(
65
+ anchor_time=anchor_time.astype(int).to_numpy(),
66
+ df_dict={entity_table_name: df},
67
+ inverse_dict={},
68
+ batch_dict={entity_table_name: batch},
69
+ num_sampled_nodes_dict={entity_table_name: [len(batch)]},
70
+ row_dict={},
71
+ col_dict={},
72
+ num_sampled_edges_dict={},
73
+ )
74
+
75
+ # Abstract Methods ########################################################
76
+
77
+ @abstractmethod
78
+ def _by_pkey(
79
+ self,
80
+ table_name: str,
81
+ pkey: pd.Series,
82
+ columns: set[str],
83
+ ) -> tuple[pd.DataFrame, np.ndarray]:
84
+ pass
@@ -0,0 +1,113 @@
1
+ from abc import abstractmethod
2
+ from collections import defaultdict
3
+ from collections.abc import Sequence
4
+ from functools import cached_property
5
+ from typing import Any
6
+
7
+ from kumoapi.model_plan import MissingType
8
+
9
+ from kumoai.experimental.rfm.base import (
10
+ ColumnExpressionType,
11
+ SourceForeignKey,
12
+ Table,
13
+ )
14
+ from kumoai.utils import quote_ident
15
+
16
+
17
+ class SQLTable(Table):
18
+ r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
19
+ database.
20
+
21
+ Args:
22
+ name: The logical name of this table.
23
+ source_name: The physical name of this table in the database. If set to
24
+ ``None``, ``name`` is being used.
25
+ columns: The selected physical columns of this table.
26
+ column_expressions: The logical columns of this table.
27
+ primary_key: The name of the primary key of this table, if it exists.
28
+ time_column: The name of the time column of this table, if it exists.
29
+ end_time_column: The name of the end time column of this table, if it
30
+ exists.
31
+ """
32
+ def __init__(
33
+ self,
34
+ name: str,
35
+ source_name: str | None = None,
36
+ columns: Sequence[str] | None = None,
37
+ column_expressions: Sequence[ColumnExpressionType] | None = None,
38
+ primary_key: MissingType | str | None = MissingType.VALUE,
39
+ time_column: str | None = None,
40
+ end_time_column: str | None = None,
41
+ ) -> None:
42
+
43
+ self._connection: Any
44
+ self._source_name = source_name or name
45
+
46
+ super().__init__(
47
+ name=name,
48
+ columns=[],
49
+ primary_key=None,
50
+ time_column=None,
51
+ end_time_column=None,
52
+ )
53
+
54
+ if isinstance(primary_key, MissingType):
55
+ primary_key = self._source_primary_key
56
+
57
+ # Add column expressions with highest priority:
58
+ self._add_column_expressions(column_expressions or [])
59
+
60
+ if columns is None:
61
+ for column_name in self._source_column_dict.keys():
62
+ if column_name not in self:
63
+ self.add_column(column_name)
64
+ else:
65
+ for column_name in columns:
66
+ self.add_column(column_name)
67
+
68
+ if primary_key is not None:
69
+ if primary_key not in self:
70
+ self.add_column(primary_key)
71
+ self.primary_key = primary_key
72
+
73
+ if time_column is not None:
74
+ if time_column not in self:
75
+ self.add_column(time_column)
76
+ self.time_column = time_column
77
+
78
+ if end_time_column is not None:
79
+ if end_time_column not in self:
80
+ self.add_column(end_time_column)
81
+ self.end_time_column = end_time_column
82
+
83
+ @property
84
+ def fqn(self) -> str:
85
+ r"""The fully-qualified quoted source table name."""
86
+ return quote_ident(self._source_name)
87
+
88
+ # Column ##################################################################
89
+
90
+ def _add_column_expressions(
91
+ self,
92
+ columns: Sequence[ColumnExpressionType],
93
+ ) -> None:
94
+ pass
95
+
96
+ # Abstract Methods ########################################################
97
+
98
+ @cached_property
99
+ def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
100
+ fkeys = self._get_source_foreign_keys()
101
+ # NOTE Drop all keys that link to multiple keys in the same table since
102
+ # we don't support composite keys yet:
103
+ table_pkeys: dict[str, set[str]] = defaultdict(set)
104
+ for fkey in fkeys:
105
+ table_pkeys[fkey.dst_table].add(fkey.primary_key)
106
+ return {
107
+ fkey.name: fkey
108
+ for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
109
+ }
110
+
111
+ @abstractmethod
112
+ def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
113
+ pass
@@ -1,17 +1,17 @@
1
1
  from abc import ABC, abstractmethod
2
- from collections import defaultdict
2
+ from collections.abc import Sequence
3
3
  from functools import cached_property
4
- from typing import Dict, List, Optional, Sequence, Set
5
4
 
6
5
  import pandas as pd
6
+ from kumoapi.model_plan import MissingType
7
7
  from kumoapi.source_table import UnavailableSourceTable
8
8
  from kumoapi.table import Column as ColumnDefinition
9
9
  from kumoapi.table import TableDefinition
10
10
  from kumoapi.typing import Stype
11
11
  from typing_extensions import Self
12
12
 
13
- from kumoai import in_notebook
14
- from kumoai.experimental.rfm.base import Column, SourceColumn, SourceForeignKey
13
+ from kumoai import in_notebook, in_snowflake_notebook
14
+ from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
15
15
  from kumoai.experimental.rfm.infer import (
16
16
  contains_categorical,
17
17
  contains_id,
@@ -38,41 +38,29 @@ class Table(ABC):
38
38
  def __init__(
39
39
  self,
40
40
  name: str,
41
- columns: Optional[Sequence[str]] = None,
42
- primary_key: Optional[str] = None,
43
- time_column: Optional[str] = None,
44
- end_time_column: Optional[str] = None,
41
+ columns: Sequence[str] | None = None,
42
+ primary_key: MissingType | str | None = MissingType.VALUE,
43
+ time_column: str | None = None,
44
+ end_time_column: str | None = None,
45
45
  ) -> None:
46
46
 
47
47
  self._name = name
48
- self._primary_key: Optional[str] = None
49
- self._time_column: Optional[str] = None
50
- self._end_time_column: Optional[str] = None
48
+ self._primary_key: str | None = None
49
+ self._time_column: str | None = None
50
+ self._end_time_column: str | None = None
51
+
52
+ if columns is None:
53
+ columns = list(self._source_column_dict.keys())
51
54
 
52
55
  if len(self._source_column_dict) == 0:
53
56
  raise ValueError(f"Table '{name}' does not hold any column with "
54
57
  f"a supported data type")
55
58
 
56
- primary_keys = [
57
- column.name for column in self._source_column_dict.values()
58
- if column.is_primary_key
59
- ]
60
- if len(primary_keys) == 1: # NOTE No composite keys yet.
61
- if primary_key is not None and primary_key != primary_keys[0]:
62
- raise ValueError(f"Found duplicate primary key "
63
- f"definition '{primary_key}' and "
64
- f"'{primary_keys[0]}' in table '{name}'")
65
- primary_key = primary_keys[0]
66
-
67
- unique_keys = [
68
- column.name for column in self._source_column_dict.values()
69
- if column.is_unique_key
70
- ]
71
- if primary_key is None and len(unique_keys) == 1:
72
- primary_key = unique_keys[0]
59
+ if isinstance(primary_key, MissingType):
60
+ primary_key = self._source_primary_key
73
61
 
74
- self._columns: Dict[str, Column] = {}
75
- for column_name in columns or list(self._source_column_dict.keys()):
62
+ self._columns: dict[str, Column] = {}
63
+ for column_name in columns:
76
64
  self.add_column(column_name)
77
65
 
78
66
  if primary_key is not None:
@@ -95,7 +83,7 @@ class Table(ABC):
95
83
  r"""The name of this table."""
96
84
  return self._name
97
85
 
98
- # Data column #############################################################
86
+ # Column ##################################################################
99
87
 
100
88
  def has_column(self, name: str) -> bool:
101
89
  r"""Returns ``True`` if this table holds a column with name ``name``;
@@ -117,7 +105,7 @@ class Table(ABC):
117
105
  return self._columns[name]
118
106
 
119
107
  @property
120
- def columns(self) -> List[Column]:
108
+ def columns(self) -> list[Column]:
121
109
  r"""Returns a list of :class:`Column` objects that represent the
122
110
  columns in this table.
123
111
  """
@@ -140,13 +128,7 @@ class Table(ABC):
140
128
  raise KeyError(f"Column '{name}' does not exist in the underlying "
141
129
  f"source table")
142
130
 
143
- try:
144
- dtype = self._source_column_dict[name].dtype
145
- except Exception as e:
146
- raise RuntimeError(f"Could not obtain data type for column "
147
- f"'{name}' in table '{self.name}'. Change "
148
- f"the data type of the column in the source "
149
- f"table or remove it from the table.") from e
131
+ dtype = self._source_column_dict[name].dtype
150
132
 
151
133
  try:
152
134
  ser = self._sample_df[name]
@@ -168,8 +150,8 @@ class Table(ABC):
168
150
 
169
151
  self._columns[name] = Column(
170
152
  name=name,
171
- dtype=dtype,
172
153
  stype=stype,
154
+ dtype=dtype,
173
155
  )
174
156
 
175
157
  return self._columns[name]
@@ -205,7 +187,7 @@ class Table(ABC):
205
187
  return self._primary_key is not None
206
188
 
207
189
  @property
208
- def primary_key(self) -> Optional[Column]:
190
+ def primary_key(self) -> Column | None:
209
191
  r"""The primary key column of this table.
210
192
 
211
193
  The getter returns the primary key column of this table, or ``None`` if
@@ -220,7 +202,7 @@ class Table(ABC):
220
202
  return self[self._primary_key]
221
203
 
222
204
  @primary_key.setter
223
- def primary_key(self, name: Optional[str]) -> None:
205
+ def primary_key(self, name: str | None) -> None:
224
206
  if name is not None and name == self._time_column:
225
207
  raise ValueError(f"Cannot specify column '{name}' as a primary "
226
208
  f"key since it is already defined to be a time "
@@ -250,7 +232,7 @@ class Table(ABC):
250
232
  return self._time_column is not None
251
233
 
252
234
  @property
253
- def time_column(self) -> Optional[Column]:
235
+ def time_column(self) -> Column | None:
254
236
  r"""The time column of this table.
255
237
 
256
238
  The getter returns the time column of this table, or ``None`` if no
@@ -265,7 +247,7 @@ class Table(ABC):
265
247
  return self[self._time_column]
266
248
 
267
249
  @time_column.setter
268
- def time_column(self, name: Optional[str]) -> None:
250
+ def time_column(self, name: str | None) -> None:
269
251
  if name is not None and name == self._primary_key:
270
252
  raise ValueError(f"Cannot specify column '{name}' as a time "
271
253
  f"column since it is already defined to be a "
@@ -295,7 +277,7 @@ class Table(ABC):
295
277
  return self._end_time_column is not None
296
278
 
297
279
  @property
298
- def end_time_column(self) -> Optional[Column]:
280
+ def end_time_column(self) -> Column | None:
299
281
  r"""The end time column of this table.
300
282
 
301
283
  The getter returns the end time column of this table, or ``None`` if no
@@ -311,7 +293,7 @@ class Table(ABC):
311
293
  return self[self._end_time_column]
312
294
 
313
295
  @end_time_column.setter
314
- def end_time_column(self, name: Optional[str]) -> None:
296
+ def end_time_column(self, name: str | None) -> None:
315
297
  if name is not None and name == self._primary_key:
316
298
  raise ValueError(f"Cannot specify column '{name}' as an end time "
317
299
  f"column since it is already defined to be a "
@@ -384,7 +366,12 @@ class Table(ABC):
384
366
  if self._num_rows is not None:
385
367
  num_rows_repr = ' ({self._num_rows:,} rows)'
386
368
 
387
- if in_notebook():
369
+ if in_snowflake_notebook():
370
+ import streamlit as st
371
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
372
+ st.markdown(md_repr)
373
+ st.dataframe(self.metadata, hide_index=True)
374
+ elif in_notebook():
388
375
  from IPython.display import Markdown, display
389
376
  md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
390
377
  display(Markdown(md_repr))
@@ -400,8 +387,83 @@ class Table(ABC):
400
387
  print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
401
388
  print(self.metadata.to_string(index=False))
402
389
 
390
+ def infer_primary_key(self, verbose: bool = True) -> Self:
391
+ r"""Infers the primary key in this table.
392
+
393
+ Args:
394
+ verbose: Whether to print verbose output.
395
+ """
396
+ if self.has_primary_key():
397
+ return self
398
+
399
+ def _set_primary_key(primary_key: str) -> None:
400
+ self.primary_key = primary_key
401
+ if verbose:
402
+ print(f"Detected primary key '{primary_key}' in table "
403
+ f"'{self.name}'")
404
+
405
+ if primary_key := self._source_primary_key:
406
+ _set_primary_key(primary_key)
407
+ return self
408
+
409
+ unique_keys = [
410
+ column.name for column in self._source_column_dict.values()
411
+ if column.is_unique_key
412
+ ]
413
+ if len(unique_keys) == 1: # NOTE No composite keys yet.
414
+ _set_primary_key(unique_keys[0])
415
+ return self
416
+
417
+ candidates = [
418
+ column.name for column in self.columns if column.stype == Stype.ID
419
+ ]
420
+ if len(candidates) == 0:
421
+ for column in self.columns:
422
+ if self.name.lower() == column.name.lower():
423
+ candidates.append(column.name)
424
+ elif (self.name.lower().endswith('s')
425
+ and self.name.lower()[:-1] == column.name.lower()):
426
+ candidates.append(column.name)
427
+
428
+ if primary_key := infer_primary_key(
429
+ table_name=self.name,
430
+ df=self._sample_df,
431
+ candidates=candidates,
432
+ ):
433
+ _set_primary_key(primary_key)
434
+ return self
435
+
436
+ return self
437
+
438
+ def infer_time_column(self, verbose: bool = True) -> Self:
439
+ r"""Infers the time column in this table.
440
+
441
+ Args:
442
+ verbose: Whether to print verbose output.
443
+ """
444
+ if self.has_time_column():
445
+ return self
446
+
447
+ candidates = [
448
+ column.name for column in self.columns
449
+ if column.stype == Stype.timestamp
450
+ and column.name != self._end_time_column
451
+ ]
452
+
453
+ if time_column := infer_time_column(
454
+ df=self._sample_df,
455
+ candidates=candidates,
456
+ ):
457
+ self.time_column = time_column
458
+
459
+ if verbose:
460
+ print(f"Detected time column '{time_column}' in table "
461
+ f"'{self.name}'")
462
+
463
+ return self
464
+
403
465
  def infer_metadata(self, verbose: bool = True) -> Self:
404
- r"""Infers metadata, *i.e.*, primary keys and time columns, in the
466
+ r"""Infers metadata, *i.e.*, primary keys and time columns, in this
405
467
  table.
406
468
 
407
469
  Args:
@@ -409,45 +471,15 @@ class Table(ABC):
409
471
  """
410
472
  logs = []
411
473
 
412
- # Try to detect primary key if not set:
413
474
  if not self.has_primary_key():
475
+ self.infer_primary_key(verbose=False)
476
+ if self.has_primary_key():
477
+ logs.append(f"primary key '{self._primary_key}'")
414
478
 
415
- def is_candidate(column: Column) -> bool:
416
- if column.stype == Stype.ID:
417
- return True
418
- if all(column.stype != Stype.ID for column in self.columns):
419
- if self.name == column.name:
420
- return True
421
- if (self.name.endswith('s')
422
- and self.name[:-1] == column.name):
423
- return True
424
- return False
425
-
426
- candidates = [
427
- column.name for column in self.columns if is_candidate(column)
428
- ]
429
-
430
- if primary_key := infer_primary_key(
431
- table_name=self.name,
432
- df=self._sample_df,
433
- candidates=candidates,
434
- ):
435
- self.primary_key = primary_key
436
- logs.append(f"primary key '{primary_key}'")
437
-
438
- # Try to detect time column if not set:
439
479
  if not self.has_time_column():
440
- candidates = [
441
- column.name for column in self.columns
442
- if column.stype == Stype.timestamp
443
- and column.name != self._end_time_column
444
- ]
445
- if time_column := infer_time_column(
446
- df=self._sample_df,
447
- candidates=candidates,
448
- ):
449
- self.time_column = time_column
450
- logs.append(f"time column '{time_column}'")
480
+ self.infer_time_column(verbose=False)
481
+ if self.has_time_column():
482
+ logs.append(f"time column '{self._time_column}'")
451
483
 
452
484
  if verbose and len(logs) > 0:
453
485
  print(f"Detected {' and '.join(logs)} in table '{self.name}'")
@@ -468,6 +500,17 @@ class Table(ABC):
468
500
  end_time_col=self._end_time_column,
469
501
  )
470
502
 
503
+ @property
504
+ def _source_primary_key(self) -> str | None:
505
+ primary_keys = [
506
+ column.name for column in self._source_column_dict.values()
507
+ if column.is_primary_key
508
+ ]
509
+ if len(primary_keys) == 1: # NOTE No composite keys yet.
510
+ return primary_keys[0]
511
+
512
+ return None
513
+
471
514
  # Python builtins #########################################################
472
515
 
473
516
  def __hash__(self) -> int:
@@ -496,31 +539,19 @@ class Table(ABC):
496
539
  f' end_time_column={self._end_time_column},\n'
497
540
  f')')
498
541
 
499
- # Abstract method #########################################################
500
-
501
- @cached_property
502
- def _source_column_dict(self) -> Dict[str, SourceColumn]:
503
- return {col.name: col for col in self._get_source_columns()}
542
+ # Abstract Methods ########################################################
504
543
 
544
+ @property
505
545
  @abstractmethod
506
- def _get_source_columns(self) -> List[SourceColumn]:
507
- pass
546
+ def backend(self) -> DataBackend:
547
+ r"""The data backend of this table."""
508
548
 
509
549
  @cached_property
510
- def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
511
- fkeys = self._get_source_foreign_keys()
512
- # NOTE Drop all keys that link to different primary keys in the same
513
- # table since we don't support composite keys yet:
514
- table_pkeys: Dict[str, Set[str]] = defaultdict(set)
515
- for fkey in fkeys:
516
- table_pkeys[fkey.dst_table].add(fkey.primary_key)
517
- return {
518
- fkey.name: fkey
519
- for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
520
- }
550
+ def _source_column_dict(self) -> dict[str, SourceColumn]:
551
+ return {col.name: col for col in self._get_source_columns()}
521
552
 
522
553
  @abstractmethod
523
- def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
554
+ def _get_source_columns(self) -> list[SourceColumn]:
524
555
  pass
525
556
 
526
557
  @cached_property
@@ -532,9 +563,9 @@ class Table(ABC):
532
563
  pass
533
564
 
534
565
  @cached_property
535
- def _num_rows(self) -> Optional[int]:
566
+ def _num_rows(self) -> int | None:
536
567
  return self._get_num_rows()
537
568
 
538
569
  @abstractmethod
539
- def _get_num_rows(self) -> Optional[int]:
570
+ def _get_num_rows(self) -> int | None:
540
571
  pass