kumoai 2.13.0.dev202512040649__cp313-cp313-win_amd64.whl → 2.14.0.dev202512211732__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kumoai/__init__.py +12 -0
- kumoai/_version.py +1 -1
- kumoai/client/pquery.py +6 -2
- kumoai/experimental/rfm/__init__.py +33 -8
- kumoai/experimental/rfm/authenticate.py +3 -4
- kumoai/experimental/rfm/backend/local/__init__.py +4 -0
- kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +52 -91
- kumoai/experimental/rfm/backend/local/sampler.py +315 -0
- kumoai/experimental/rfm/backend/local/table.py +21 -16
- kumoai/experimental/rfm/backend/snow/__init__.py +2 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +252 -0
- kumoai/experimental/rfm/backend/snow/table.py +102 -48
- kumoai/experimental/rfm/backend/sqlite/__init__.py +4 -2
- kumoai/experimental/rfm/backend/sqlite/sampler.py +349 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +84 -31
- kumoai/experimental/rfm/base/__init__.py +26 -3
- kumoai/experimental/rfm/base/column.py +14 -12
- kumoai/experimental/rfm/base/column_expression.py +50 -0
- kumoai/experimental/rfm/base/sampler.py +773 -0
- kumoai/experimental/rfm/base/source.py +1 -0
- kumoai/experimental/rfm/base/sql_sampler.py +84 -0
- kumoai/experimental/rfm/base/sql_table.py +229 -0
- kumoai/experimental/rfm/base/table.py +173 -138
- kumoai/experimental/rfm/graph.py +302 -108
- kumoai/experimental/rfm/infer/__init__.py +6 -4
- kumoai/experimental/rfm/infer/dtype.py +3 -3
- kumoai/experimental/rfm/infer/pkey.py +4 -2
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +1 -2
- kumoai/experimental/rfm/pquery/executor.py +27 -27
- kumoai/experimental/rfm/pquery/pandas_executor.py +30 -32
- kumoai/experimental/rfm/rfm.py +299 -230
- kumoai/experimental/rfm/sagemaker.py +4 -4
- kumoai/kumolib.cp313-win_amd64.pyd +0 -0
- kumoai/pquery/predictive_query.py +10 -6
- kumoai/testing/snow.py +50 -0
- kumoai/utils/__init__.py +3 -2
- kumoai/utils/progress_logger.py +178 -12
- kumoai/utils/sql.py +3 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/METADATA +3 -2
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/RECORD +44 -36
- kumoai/experimental/rfm/local_graph_sampler.py +0 -223
- kumoai/experimental/rfm/local_pquery_driver.py +0 -689
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/WHEEL +0 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/licenses/LICENSE +0 -0
- {kumoai-2.13.0.dev202512040649.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/top_level.txt +0 -0
|
@@ -1,23 +1,20 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from collections import
|
|
2
|
+
from collections.abc import Sequence
|
|
3
3
|
from functools import cached_property
|
|
4
|
-
from typing import Dict, List, Optional, Sequence, Set
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
6
|
+
from kumoapi.model_plan import MissingType
|
|
7
7
|
from kumoapi.source_table import UnavailableSourceTable
|
|
8
8
|
from kumoapi.table import Column as ColumnDefinition
|
|
9
9
|
from kumoapi.table import TableDefinition
|
|
10
10
|
from kumoapi.typing import Stype
|
|
11
11
|
from typing_extensions import Self
|
|
12
12
|
|
|
13
|
-
from kumoai import in_notebook
|
|
14
|
-
from kumoai.experimental.rfm.base import Column,
|
|
13
|
+
from kumoai import in_notebook, in_snowflake_notebook
|
|
14
|
+
from kumoai.experimental.rfm.base import Column, DataBackend, SourceColumn
|
|
15
15
|
from kumoai.experimental.rfm.infer import (
|
|
16
|
-
contains_categorical,
|
|
17
|
-
contains_id,
|
|
18
|
-
contains_multicategorical,
|
|
19
|
-
contains_timestamp,
|
|
20
16
|
infer_primary_key,
|
|
17
|
+
infer_stype,
|
|
21
18
|
infer_time_column,
|
|
22
19
|
)
|
|
23
20
|
|
|
@@ -38,44 +35,32 @@ class Table(ABC):
|
|
|
38
35
|
def __init__(
|
|
39
36
|
self,
|
|
40
37
|
name: str,
|
|
41
|
-
columns:
|
|
42
|
-
primary_key:
|
|
43
|
-
time_column:
|
|
44
|
-
end_time_column:
|
|
38
|
+
columns: Sequence[str] | None = None,
|
|
39
|
+
primary_key: MissingType | str | None = MissingType.VALUE,
|
|
40
|
+
time_column: str | None = None,
|
|
41
|
+
end_time_column: str | None = None,
|
|
45
42
|
) -> None:
|
|
46
43
|
|
|
47
44
|
self._name = name
|
|
48
|
-
self._primary_key:
|
|
49
|
-
self._time_column:
|
|
50
|
-
self._end_time_column:
|
|
45
|
+
self._primary_key: str | None = None
|
|
46
|
+
self._time_column: str | None = None
|
|
47
|
+
self._end_time_column: str | None = None
|
|
51
48
|
|
|
52
|
-
if
|
|
53
|
-
|
|
54
|
-
f"a supported data type")
|
|
49
|
+
if columns is None:
|
|
50
|
+
columns = list(self._source_column_dict.keys())
|
|
55
51
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
if column.is_primary_key
|
|
59
|
-
]
|
|
60
|
-
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
61
|
-
if primary_key is not None and primary_key != primary_keys[0]:
|
|
62
|
-
raise ValueError(f"Found duplicate primary key "
|
|
63
|
-
f"definition '{primary_key}' and "
|
|
64
|
-
f"'{primary_keys[0]}' in table '{name}'")
|
|
65
|
-
primary_key = primary_keys[0]
|
|
66
|
-
|
|
67
|
-
unique_keys = [
|
|
68
|
-
column.name for column in self._source_column_dict.values()
|
|
69
|
-
if column.is_unique_key
|
|
70
|
-
]
|
|
71
|
-
if primary_key is None and len(unique_keys) == 1:
|
|
72
|
-
primary_key = unique_keys[0]
|
|
73
|
-
|
|
74
|
-
self._columns: Dict[str, Column] = {}
|
|
75
|
-
for column_name in columns or list(self._source_column_dict.keys()):
|
|
52
|
+
self._columns: dict[str, Column] = {}
|
|
53
|
+
for column_name in columns:
|
|
76
54
|
self.add_column(column_name)
|
|
77
55
|
|
|
78
|
-
if primary_key
|
|
56
|
+
if isinstance(primary_key, MissingType):
|
|
57
|
+
# Inference from source column metadata:
|
|
58
|
+
if '_source_column_dict' in self.__dict__:
|
|
59
|
+
primary_key = self._source_primary_key
|
|
60
|
+
if (primary_key is not None and primary_key in self
|
|
61
|
+
and self[primary_key].is_physical):
|
|
62
|
+
self.primary_key = primary_key
|
|
63
|
+
elif primary_key is not None:
|
|
79
64
|
if primary_key not in self:
|
|
80
65
|
self.add_column(primary_key)
|
|
81
66
|
self.primary_key = primary_key
|
|
@@ -95,7 +80,7 @@ class Table(ABC):
|
|
|
95
80
|
r"""The name of this table."""
|
|
96
81
|
return self._name
|
|
97
82
|
|
|
98
|
-
#
|
|
83
|
+
# Column ##################################################################
|
|
99
84
|
|
|
100
85
|
def has_column(self, name: str) -> bool:
|
|
101
86
|
r"""Returns ``True`` if this table holds a column with name ``name``;
|
|
@@ -117,7 +102,7 @@ class Table(ABC):
|
|
|
117
102
|
return self._columns[name]
|
|
118
103
|
|
|
119
104
|
@property
|
|
120
|
-
def columns(self) ->
|
|
105
|
+
def columns(self) -> list[Column]:
|
|
121
106
|
r"""Returns a list of :class:`Column` objects that represent the
|
|
122
107
|
columns in this table.
|
|
123
108
|
"""
|
|
@@ -140,36 +125,22 @@ class Table(ABC):
|
|
|
140
125
|
raise KeyError(f"Column '{name}' does not exist in the underlying "
|
|
141
126
|
f"source table")
|
|
142
127
|
|
|
143
|
-
|
|
144
|
-
dtype = self._source_column_dict[name].dtype
|
|
145
|
-
except Exception as e:
|
|
146
|
-
raise RuntimeError(f"Could not obtain data type for column "
|
|
147
|
-
f"'{name}' in table '{self.name}'. Change "
|
|
148
|
-
f"the data type of the column in the source "
|
|
149
|
-
f"table or remove it from the table.") from e
|
|
128
|
+
dtype = self._source_column_dict[name].dtype
|
|
150
129
|
|
|
130
|
+
ser = self._source_sample_df[name]
|
|
151
131
|
try:
|
|
152
|
-
|
|
153
|
-
if contains_id(ser, name, dtype):
|
|
154
|
-
stype = Stype.ID
|
|
155
|
-
elif contains_timestamp(ser, name, dtype):
|
|
156
|
-
stype = Stype.timestamp
|
|
157
|
-
elif contains_multicategorical(ser, name, dtype):
|
|
158
|
-
stype = Stype.multicategorical
|
|
159
|
-
elif contains_categorical(ser, name, dtype):
|
|
160
|
-
stype = Stype.categorical
|
|
161
|
-
else:
|
|
162
|
-
stype = dtype.default_stype
|
|
132
|
+
stype = infer_stype(ser, name, dtype)
|
|
163
133
|
except Exception as e:
|
|
164
134
|
raise RuntimeError(f"Could not obtain semantic type for column "
|
|
165
|
-
f"'{name}'
|
|
166
|
-
f"the data type of the
|
|
167
|
-
f"table or remove it from
|
|
135
|
+
f"'{name}' with data type '{dtype}' in table "
|
|
136
|
+
f"'{self.name}'. Change the data type of the "
|
|
137
|
+
f"column in the source table or remove it from "
|
|
138
|
+
f"this table.") from e
|
|
168
139
|
|
|
169
140
|
self._columns[name] = Column(
|
|
170
141
|
name=name,
|
|
171
|
-
dtype=dtype,
|
|
172
142
|
stype=stype,
|
|
143
|
+
dtype=dtype,
|
|
173
144
|
)
|
|
174
145
|
|
|
175
146
|
return self._columns[name]
|
|
@@ -205,7 +176,7 @@ class Table(ABC):
|
|
|
205
176
|
return self._primary_key is not None
|
|
206
177
|
|
|
207
178
|
@property
|
|
208
|
-
def primary_key(self) ->
|
|
179
|
+
def primary_key(self) -> Column | None:
|
|
209
180
|
r"""The primary key column of this table.
|
|
210
181
|
|
|
211
182
|
The getter returns the primary key column of this table, or ``None`` if
|
|
@@ -220,7 +191,7 @@ class Table(ABC):
|
|
|
220
191
|
return self[self._primary_key]
|
|
221
192
|
|
|
222
193
|
@primary_key.setter
|
|
223
|
-
def primary_key(self, name:
|
|
194
|
+
def primary_key(self, name: str | None) -> None:
|
|
224
195
|
if name is not None and name == self._time_column:
|
|
225
196
|
raise ValueError(f"Cannot specify column '{name}' as a primary "
|
|
226
197
|
f"key since it is already defined to be a time "
|
|
@@ -250,7 +221,7 @@ class Table(ABC):
|
|
|
250
221
|
return self._time_column is not None
|
|
251
222
|
|
|
252
223
|
@property
|
|
253
|
-
def time_column(self) ->
|
|
224
|
+
def time_column(self) -> Column | None:
|
|
254
225
|
r"""The time column of this table.
|
|
255
226
|
|
|
256
227
|
The getter returns the time column of this table, or ``None`` if no
|
|
@@ -265,7 +236,7 @@ class Table(ABC):
|
|
|
265
236
|
return self[self._time_column]
|
|
266
237
|
|
|
267
238
|
@time_column.setter
|
|
268
|
-
def time_column(self, name:
|
|
239
|
+
def time_column(self, name: str | None) -> None:
|
|
269
240
|
if name is not None and name == self._primary_key:
|
|
270
241
|
raise ValueError(f"Cannot specify column '{name}' as a time "
|
|
271
242
|
f"column since it is already defined to be a "
|
|
@@ -295,7 +266,7 @@ class Table(ABC):
|
|
|
295
266
|
return self._end_time_column is not None
|
|
296
267
|
|
|
297
268
|
@property
|
|
298
|
-
def end_time_column(self) ->
|
|
269
|
+
def end_time_column(self) -> Column | None:
|
|
299
270
|
r"""The end time column of this table.
|
|
300
271
|
|
|
301
272
|
The getter returns the end time column of this table, or ``None`` if no
|
|
@@ -311,7 +282,7 @@ class Table(ABC):
|
|
|
311
282
|
return self[self._end_time_column]
|
|
312
283
|
|
|
313
284
|
@end_time_column.setter
|
|
314
|
-
def end_time_column(self, name:
|
|
285
|
+
def end_time_column(self, name: str | None) -> None:
|
|
315
286
|
if name is not None and name == self._primary_key:
|
|
316
287
|
raise ValueError(f"Cannot specify column '{name}' as an end time "
|
|
317
288
|
f"column since it is already defined to be a "
|
|
@@ -384,7 +355,12 @@ class Table(ABC):
|
|
|
384
355
|
if self._num_rows is not None:
|
|
385
356
|
num_rows_repr = ' ({self._num_rows:,} rows)'
|
|
386
357
|
|
|
387
|
-
if
|
|
358
|
+
if in_snowflake_notebook():
|
|
359
|
+
import streamlit as st
|
|
360
|
+
md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
|
|
361
|
+
st.markdown(md_repr)
|
|
362
|
+
st.dataframe(self.metadata, hide_index=True)
|
|
363
|
+
elif in_notebook():
|
|
388
364
|
from IPython.display import Markdown, display
|
|
389
365
|
md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
|
|
390
366
|
display(Markdown(md_repr))
|
|
@@ -400,8 +376,91 @@ class Table(ABC):
|
|
|
400
376
|
print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
|
|
401
377
|
print(self.metadata.to_string(index=False))
|
|
402
378
|
|
|
379
|
+
def infer_primary_key(self, verbose: bool = True) -> Self:
|
|
380
|
+
r"""Infers the primary key in this table.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
verbose: Whether to print verbose output.
|
|
384
|
+
"""
|
|
385
|
+
if self.has_primary_key():
|
|
386
|
+
return self
|
|
387
|
+
|
|
388
|
+
def _set_primary_key(primary_key: str) -> None:
|
|
389
|
+
self.primary_key = primary_key
|
|
390
|
+
if verbose:
|
|
391
|
+
print(f"Detected primary key '{primary_key}' in table "
|
|
392
|
+
f"'{self.name}'")
|
|
393
|
+
|
|
394
|
+
# Inference from source column metadata:
|
|
395
|
+
if '_source_column_dict' in self.__dict__:
|
|
396
|
+
primary_key = self._source_primary_key
|
|
397
|
+
if (primary_key is not None and primary_key in self
|
|
398
|
+
and self[primary_key].is_physical):
|
|
399
|
+
_set_primary_key(primary_key)
|
|
400
|
+
return self
|
|
401
|
+
|
|
402
|
+
unique_keys = [
|
|
403
|
+
column.name for column in self._source_column_dict.values()
|
|
404
|
+
if column.is_unique_key
|
|
405
|
+
]
|
|
406
|
+
if (len(unique_keys) == 1 # NOTE No composite keys yet.
|
|
407
|
+
and unique_keys[0] in self
|
|
408
|
+
and self[unique_keys[0]].is_physical):
|
|
409
|
+
_set_primary_key(unique_keys[0])
|
|
410
|
+
return self
|
|
411
|
+
|
|
412
|
+
# Heuristic-based inference:
|
|
413
|
+
candidates = [
|
|
414
|
+
column.name for column in self.columns if column.stype == Stype.ID
|
|
415
|
+
]
|
|
416
|
+
if len(candidates) == 0:
|
|
417
|
+
for column in self.columns:
|
|
418
|
+
if self.name.lower() == column.name.lower():
|
|
419
|
+
candidates.append(column.name)
|
|
420
|
+
elif (self.name.lower().endswith('s')
|
|
421
|
+
and self.name.lower()[:-1] == column.name.lower()):
|
|
422
|
+
candidates.append(column.name)
|
|
423
|
+
|
|
424
|
+
if primary_key := infer_primary_key(
|
|
425
|
+
table_name=self.name,
|
|
426
|
+
df=self._sample_current_df(columns=candidates),
|
|
427
|
+
candidates=candidates,
|
|
428
|
+
):
|
|
429
|
+
_set_primary_key(primary_key)
|
|
430
|
+
return self
|
|
431
|
+
|
|
432
|
+
return self
|
|
433
|
+
|
|
434
|
+
def infer_time_column(self, verbose: bool = True) -> Self:
|
|
435
|
+
r"""Infers the time column in this table.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
verbose: Whether to print verbose output.
|
|
439
|
+
"""
|
|
440
|
+
if self.has_time_column():
|
|
441
|
+
return self
|
|
442
|
+
|
|
443
|
+
# Heuristic-based inference:
|
|
444
|
+
candidates = [
|
|
445
|
+
column.name for column in self.columns
|
|
446
|
+
if column.stype == Stype.timestamp
|
|
447
|
+
and column.name != self._end_time_column
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
if time_column := infer_time_column(
|
|
451
|
+
df=self._sample_current_df(columns=candidates),
|
|
452
|
+
candidates=candidates,
|
|
453
|
+
):
|
|
454
|
+
self.time_column = time_column
|
|
455
|
+
|
|
456
|
+
if verbose:
|
|
457
|
+
print(f"Detected time column '{time_column}' in table "
|
|
458
|
+
f"'{self.name}'")
|
|
459
|
+
|
|
460
|
+
return self
|
|
461
|
+
|
|
403
462
|
def infer_metadata(self, verbose: bool = True) -> Self:
|
|
404
|
-
r"""Infers metadata, *i.e.*, primary keys and time columns, in
|
|
463
|
+
r"""Infers metadata, *i.e.*, primary keys and time columns, in this
|
|
405
464
|
table.
|
|
406
465
|
|
|
407
466
|
Args:
|
|
@@ -409,45 +468,15 @@ class Table(ABC):
|
|
|
409
468
|
"""
|
|
410
469
|
logs = []
|
|
411
470
|
|
|
412
|
-
# Try to detect primary key if not set:
|
|
413
471
|
if not self.has_primary_key():
|
|
472
|
+
self.infer_primary_key(verbose=False)
|
|
473
|
+
if self.has_primary_key():
|
|
474
|
+
logs.append(f"primary key '{self._primary_key}'")
|
|
414
475
|
|
|
415
|
-
def is_candidate(column: Column) -> bool:
|
|
416
|
-
if column.stype == Stype.ID:
|
|
417
|
-
return True
|
|
418
|
-
if all(column.stype != Stype.ID for column in self.columns):
|
|
419
|
-
if self.name == column.name:
|
|
420
|
-
return True
|
|
421
|
-
if (self.name.endswith('s')
|
|
422
|
-
and self.name[:-1] == column.name):
|
|
423
|
-
return True
|
|
424
|
-
return False
|
|
425
|
-
|
|
426
|
-
candidates = [
|
|
427
|
-
column.name for column in self.columns if is_candidate(column)
|
|
428
|
-
]
|
|
429
|
-
|
|
430
|
-
if primary_key := infer_primary_key(
|
|
431
|
-
table_name=self.name,
|
|
432
|
-
df=self._sample_df,
|
|
433
|
-
candidates=candidates,
|
|
434
|
-
):
|
|
435
|
-
self.primary_key = primary_key
|
|
436
|
-
logs.append(f"primary key '{primary_key}'")
|
|
437
|
-
|
|
438
|
-
# Try to detect time column if not set:
|
|
439
476
|
if not self.has_time_column():
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
and column.name != self._end_time_column
|
|
444
|
-
]
|
|
445
|
-
if time_column := infer_time_column(
|
|
446
|
-
df=self._sample_df,
|
|
447
|
-
candidates=candidates,
|
|
448
|
-
):
|
|
449
|
-
self.time_column = time_column
|
|
450
|
-
logs.append(f"time column '{time_column}'")
|
|
477
|
+
self.infer_time_column(verbose=False)
|
|
478
|
+
if self.has_time_column():
|
|
479
|
+
logs.append(f"time column '{self._time_column}'")
|
|
451
480
|
|
|
452
481
|
if verbose and len(logs) > 0:
|
|
453
482
|
print(f"Detected {' and '.join(logs)} in table '{self.name}'")
|
|
@@ -468,6 +497,36 @@ class Table(ABC):
|
|
|
468
497
|
end_time_col=self._end_time_column,
|
|
469
498
|
)
|
|
470
499
|
|
|
500
|
+
@cached_property
|
|
501
|
+
def _source_column_dict(self) -> dict[str, SourceColumn]:
|
|
502
|
+
source_columns = self._get_source_columns()
|
|
503
|
+
if len(source_columns) == 0:
|
|
504
|
+
raise ValueError(f"Table '{self.name}' does not hold any column "
|
|
505
|
+
f"with a supported data type")
|
|
506
|
+
return {column.name: column for column in source_columns}
|
|
507
|
+
|
|
508
|
+
@cached_property
|
|
509
|
+
def _source_sample_df(self) -> pd.DataFrame:
|
|
510
|
+
return self._get_source_sample_df()
|
|
511
|
+
|
|
512
|
+
@property
|
|
513
|
+
def _source_primary_key(self) -> str | None:
|
|
514
|
+
primary_keys = [
|
|
515
|
+
column.name for column in self._source_column_dict.values()
|
|
516
|
+
if column.is_primary_key
|
|
517
|
+
]
|
|
518
|
+
if len(primary_keys) == 1: # NOTE No composite keys yet.
|
|
519
|
+
return primary_keys[0]
|
|
520
|
+
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
@cached_property
|
|
524
|
+
def _num_rows(self) -> int | None:
|
|
525
|
+
return self._get_num_rows()
|
|
526
|
+
|
|
527
|
+
def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
|
|
528
|
+
return self._source_sample_df[columns]
|
|
529
|
+
|
|
471
530
|
# Python builtins #########################################################
|
|
472
531
|
|
|
473
532
|
def __hash__(self) -> int:
|
|
@@ -496,45 +555,21 @@ class Table(ABC):
|
|
|
496
555
|
f' end_time_column={self._end_time_column},\n'
|
|
497
556
|
f')')
|
|
498
557
|
|
|
499
|
-
# Abstract
|
|
500
|
-
|
|
501
|
-
@cached_property
|
|
502
|
-
def _source_column_dict(self) -> Dict[str, SourceColumn]:
|
|
503
|
-
return {col.name: col for col in self._get_source_columns()}
|
|
558
|
+
# Abstract Methods ########################################################
|
|
504
559
|
|
|
560
|
+
@property
|
|
505
561
|
@abstractmethod
|
|
506
|
-
def
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
@cached_property
|
|
510
|
-
def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
|
|
511
|
-
fkeys = self._get_source_foreign_keys()
|
|
512
|
-
# NOTE Drop all keys that link to different primary keys in the same
|
|
513
|
-
# table since we don't support composite keys yet:
|
|
514
|
-
table_pkeys: Dict[str, Set[str]] = defaultdict(set)
|
|
515
|
-
for fkey in fkeys:
|
|
516
|
-
table_pkeys[fkey.dst_table].add(fkey.primary_key)
|
|
517
|
-
return {
|
|
518
|
-
fkey.name: fkey
|
|
519
|
-
for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
|
|
520
|
-
}
|
|
562
|
+
def backend(self) -> DataBackend:
|
|
563
|
+
r"""The data backend of this table."""
|
|
521
564
|
|
|
522
565
|
@abstractmethod
|
|
523
|
-
def
|
|
566
|
+
def _get_source_columns(self) -> list[SourceColumn]:
|
|
524
567
|
pass
|
|
525
568
|
|
|
526
|
-
@cached_property
|
|
527
|
-
def _sample_df(self) -> pd.DataFrame:
|
|
528
|
-
return self._get_sample_df()
|
|
529
|
-
|
|
530
569
|
@abstractmethod
|
|
531
|
-
def
|
|
570
|
+
def _get_source_sample_df(self) -> pd.DataFrame:
|
|
532
571
|
pass
|
|
533
572
|
|
|
534
|
-
@cached_property
|
|
535
|
-
def _num_rows(self) -> Optional[int]:
|
|
536
|
-
return self._get_num_rows()
|
|
537
|
-
|
|
538
573
|
@abstractmethod
|
|
539
|
-
def _get_num_rows(self) ->
|
|
574
|
+
def _get_num_rows(self) -> int | None:
|
|
540
575
|
pass
|