kumoai 2.8.0.dev202508221830__cp312-cp312-win_amd64.whl → 2.13.0.dev202512041141__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kumoai might be problematic. Click here for more details.

Files changed (52) hide show
  1. kumoai/__init__.py +22 -11
  2. kumoai/_version.py +1 -1
  3. kumoai/client/client.py +17 -16
  4. kumoai/client/endpoints.py +1 -0
  5. kumoai/client/rfm.py +37 -8
  6. kumoai/connector/file_upload_connector.py +94 -85
  7. kumoai/connector/utils.py +1399 -210
  8. kumoai/experimental/rfm/__init__.py +164 -46
  9. kumoai/experimental/rfm/authenticate.py +8 -5
  10. kumoai/experimental/rfm/backend/__init__.py +0 -0
  11. kumoai/experimental/rfm/backend/local/__init__.py +38 -0
  12. kumoai/experimental/rfm/backend/local/table.py +109 -0
  13. kumoai/experimental/rfm/backend/snow/__init__.py +35 -0
  14. kumoai/experimental/rfm/backend/snow/table.py +117 -0
  15. kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
  16. kumoai/experimental/rfm/backend/sqlite/table.py +101 -0
  17. kumoai/experimental/rfm/base/__init__.py +10 -0
  18. kumoai/experimental/rfm/base/column.py +66 -0
  19. kumoai/experimental/rfm/base/source.py +18 -0
  20. kumoai/experimental/rfm/base/table.py +545 -0
  21. kumoai/experimental/rfm/{local_graph.py → graph.py} +413 -144
  22. kumoai/experimental/rfm/infer/__init__.py +6 -0
  23. kumoai/experimental/rfm/infer/dtype.py +79 -0
  24. kumoai/experimental/rfm/infer/pkey.py +126 -0
  25. kumoai/experimental/rfm/infer/time_col.py +62 -0
  26. kumoai/experimental/rfm/infer/timestamp.py +7 -4
  27. kumoai/experimental/rfm/local_graph_sampler.py +58 -11
  28. kumoai/experimental/rfm/local_graph_store.py +45 -37
  29. kumoai/experimental/rfm/local_pquery_driver.py +342 -46
  30. kumoai/experimental/rfm/pquery/__init__.py +4 -4
  31. kumoai/experimental/rfm/pquery/{backend.py → executor.py} +28 -58
  32. kumoai/experimental/rfm/pquery/pandas_executor.py +532 -0
  33. kumoai/experimental/rfm/rfm.py +559 -148
  34. kumoai/experimental/rfm/sagemaker.py +138 -0
  35. kumoai/jobs.py +27 -1
  36. kumoai/kumolib.cp312-win_amd64.pyd +0 -0
  37. kumoai/pquery/prediction_table.py +5 -3
  38. kumoai/pquery/training_table.py +5 -3
  39. kumoai/spcs.py +1 -3
  40. kumoai/testing/decorators.py +1 -1
  41. kumoai/trainer/job.py +9 -30
  42. kumoai/trainer/trainer.py +19 -10
  43. kumoai/utils/__init__.py +2 -1
  44. kumoai/utils/progress_logger.py +96 -16
  45. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/METADATA +14 -5
  46. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/RECORD +49 -36
  47. kumoai/experimental/rfm/local_table.py +0 -448
  48. kumoai/experimental/rfm/pquery/pandas_backend.py +0 -437
  49. kumoai/experimental/rfm/utils.py +0 -347
  50. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/WHEEL +0 -0
  51. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/licenses/LICENSE +0 -0
  52. {kumoai-2.8.0.dev202508221830.dist-info → kumoai-2.13.0.dev202512041141.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,10 @@
1
+ from .source import SourceColumn, SourceForeignKey
2
+ from .column import Column
3
+ from .table import Table
4
+
5
+ __all__ = [
6
+ 'SourceColumn',
7
+ 'SourceForeignKey',
8
+ 'Column',
9
+ 'Table',
10
+ ]
@@ -0,0 +1,66 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+ from kumoapi.typing import Dtype, Stype
5
+
6
+
7
+ @dataclass(init=False, repr=False, eq=False)
8
+ class Column:
9
+ stype: Stype
10
+
11
+ def __init__(
12
+ self,
13
+ name: str,
14
+ dtype: Dtype,
15
+ stype: Stype,
16
+ is_primary_key: bool = False,
17
+ is_time_column: bool = False,
18
+ is_end_time_column: bool = False,
19
+ ) -> None:
20
+ self._name = name
21
+ self._dtype = Dtype(dtype)
22
+ self._is_primary_key = is_primary_key
23
+ self._is_time_column = is_time_column
24
+ self._is_end_time_column = is_end_time_column
25
+ self.stype = Stype(stype)
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return self._name
30
+
31
+ @property
32
+ def dtype(self) -> Dtype:
33
+ return self._dtype
34
+
35
+ def __setattr__(self, key: str, val: Any) -> None:
36
+ if key == 'stype':
37
+ if isinstance(val, str):
38
+ val = Stype(val)
39
+ assert isinstance(val, Stype)
40
+ if not val.supports_dtype(self.dtype):
41
+ raise ValueError(f"Column '{self.name}' received an "
42
+ f"incompatible semantic type (got "
43
+ f"dtype='{self.dtype}' and stype='{val}')")
44
+ if self._is_primary_key and val != Stype.ID:
45
+ raise ValueError(f"Primary key '{self.name}' must have 'ID' "
46
+ f"semantic type (got '{val}')")
47
+ if self._is_time_column and val != Stype.timestamp:
48
+ raise ValueError(f"Time column '{self.name}' must have "
49
+ f"'timestamp' semantic type (got '{val}')")
50
+ if self._is_end_time_column and val != Stype.timestamp:
51
+ raise ValueError(f"End time column '{self.name}' must have "
52
+ f"'timestamp' semantic type (got '{val}')")
53
+
54
+ super().__setattr__(key, val)
55
+
56
+ def __hash__(self) -> int:
57
+ return hash((self.name, self.stype, self.dtype))
58
+
59
+ def __eq__(self, other: Any) -> bool:
60
+ if not isinstance(other, Column):
61
+ return False
62
+ return hash(self) == hash(other)
63
+
64
+ def __repr__(self) -> str:
65
+ return (f'{self.__class__.__name__}(name={self.name}, '
66
+ f'stype={self.stype}, dtype={self.dtype})')
@@ -0,0 +1,18 @@
1
+ from dataclasses import dataclass
2
+
3
+ from kumoapi.typing import Dtype
4
+
5
+
6
+ @dataclass
7
+ class SourceColumn:
8
+ name: str
9
+ dtype: Dtype
10
+ is_primary_key: bool
11
+ is_unique_key: bool
12
+
13
+
14
+ @dataclass
15
+ class SourceForeignKey:
16
+ name: str
17
+ dst_table: str
18
+ primary_key: str
@@ -0,0 +1,545 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections import defaultdict
3
+ from functools import cached_property
4
+ from typing import Dict, List, Optional, Sequence, Set
5
+
6
+ import pandas as pd
7
+ from kumoapi.source_table import UnavailableSourceTable
8
+ from kumoapi.table import Column as ColumnDefinition
9
+ from kumoapi.table import TableDefinition
10
+ from kumoapi.typing import Stype
11
+ from typing_extensions import Self
12
+
13
+ from kumoai import in_notebook, in_snowflake_notebook
14
+ from kumoai.experimental.rfm.base import Column, SourceColumn, SourceForeignKey
15
+ from kumoai.experimental.rfm.infer import (
16
+ contains_categorical,
17
+ contains_id,
18
+ contains_multicategorical,
19
+ contains_timestamp,
20
+ infer_primary_key,
21
+ infer_time_column,
22
+ )
23
+
24
+
25
+ class Table(ABC):
26
+ r"""A :class:`Table` fully specifies the relevant metadata of a single
27
+ table, *i.e.* its selected columns, data types, semantic types, primary
28
+ keys and time columns.
29
+
30
+ Args:
31
+ name: The name of this table.
32
+ columns: The selected columns of this table.
33
+ primary_key: The name of the primary key of this table, if it exists.
34
+ time_column: The name of the time column of this table, if it exists.
35
+ end_time_column: The name of the end time column of this table, if it
36
+ exists.
37
+ """
38
+ def __init__(
39
+ self,
40
+ name: str,
41
+ columns: Optional[Sequence[str]] = None,
42
+ primary_key: Optional[str] = None,
43
+ time_column: Optional[str] = None,
44
+ end_time_column: Optional[str] = None,
45
+ ) -> None:
46
+
47
+ self._name = name
48
+ self._primary_key: Optional[str] = None
49
+ self._time_column: Optional[str] = None
50
+ self._end_time_column: Optional[str] = None
51
+
52
+ if len(self._source_column_dict) == 0:
53
+ raise ValueError(f"Table '{name}' does not hold any column with "
54
+ f"a supported data type")
55
+
56
+ primary_keys = [
57
+ column.name for column in self._source_column_dict.values()
58
+ if column.is_primary_key
59
+ ]
60
+ if len(primary_keys) == 1: # NOTE No composite keys yet.
61
+ if primary_key is not None and primary_key != primary_keys[0]:
62
+ raise ValueError(f"Found duplicate primary key "
63
+ f"definition '{primary_key}' and "
64
+ f"'{primary_keys[0]}' in table '{name}'")
65
+ primary_key = primary_keys[0]
66
+
67
+ unique_keys = [
68
+ column.name for column in self._source_column_dict.values()
69
+ if column.is_unique_key
70
+ ]
71
+ if primary_key is None and len(unique_keys) == 1:
72
+ primary_key = unique_keys[0]
73
+
74
+ self._columns: Dict[str, Column] = {}
75
+ for column_name in columns or list(self._source_column_dict.keys()):
76
+ self.add_column(column_name)
77
+
78
+ if primary_key is not None:
79
+ if primary_key not in self:
80
+ self.add_column(primary_key)
81
+ self.primary_key = primary_key
82
+
83
+ if time_column is not None:
84
+ if time_column not in self:
85
+ self.add_column(time_column)
86
+ self.time_column = time_column
87
+
88
+ if end_time_column is not None:
89
+ if end_time_column not in self:
90
+ self.add_column(end_time_column)
91
+ self.end_time_column = end_time_column
92
+
93
+ @property
94
+ def name(self) -> str:
95
+ r"""The name of this table."""
96
+ return self._name
97
+
98
+ # Data column #############################################################
99
+
100
+ def has_column(self, name: str) -> bool:
101
+ r"""Returns ``True`` if this table holds a column with name ``name``;
102
+ ``False`` otherwise.
103
+ """
104
+ return name in self._columns
105
+
106
+ def column(self, name: str) -> Column:
107
+ r"""Returns the data column named with name ``name`` in this table.
108
+
109
+ Args:
110
+ name: The name of the column.
111
+
112
+ Raises:
113
+ KeyError: If ``name`` is not present in this table.
114
+ """
115
+ if not self.has_column(name):
116
+ raise KeyError(f"Column '{name}' not found in table '{self.name}'")
117
+ return self._columns[name]
118
+
119
+ @property
120
+ def columns(self) -> List[Column]:
121
+ r"""Returns a list of :class:`Column` objects that represent the
122
+ columns in this table.
123
+ """
124
+ return list(self._columns.values())
125
+
126
+ def add_column(self, name: str) -> Column:
127
+ r"""Adds a column to this table.
128
+
129
+ Args:
130
+ name: The name of the column.
131
+
132
+ Raises:
133
+ KeyError: If ``name`` is already present in this table.
134
+ """
135
+ if name in self:
136
+ raise KeyError(f"Column '{name}' already exists in table "
137
+ f"'{self.name}'")
138
+
139
+ if name not in self._source_column_dict:
140
+ raise KeyError(f"Column '{name}' does not exist in the underlying "
141
+ f"source table")
142
+
143
+ try:
144
+ dtype = self._source_column_dict[name].dtype
145
+ except Exception as e:
146
+ raise RuntimeError(f"Could not obtain data type for column "
147
+ f"'{name}' in table '{self.name}'. Change "
148
+ f"the data type of the column in the source "
149
+ f"table or remove it from the table.") from e
150
+
151
+ try:
152
+ ser = self._sample_df[name]
153
+ if contains_id(ser, name, dtype):
154
+ stype = Stype.ID
155
+ elif contains_timestamp(ser, name, dtype):
156
+ stype = Stype.timestamp
157
+ elif contains_multicategorical(ser, name, dtype):
158
+ stype = Stype.multicategorical
159
+ elif contains_categorical(ser, name, dtype):
160
+ stype = Stype.categorical
161
+ else:
162
+ stype = dtype.default_stype
163
+ except Exception as e:
164
+ raise RuntimeError(f"Could not obtain semantic type for column "
165
+ f"'{name}' in table '{self.name}'. Change "
166
+ f"the data type of the column in the source "
167
+ f"table or remove it from the table.") from e
168
+
169
+ self._columns[name] = Column(
170
+ name=name,
171
+ dtype=dtype,
172
+ stype=stype,
173
+ )
174
+
175
+ return self._columns[name]
176
+
177
+ def remove_column(self, name: str) -> Self:
178
+ r"""Removes a column from this table.
179
+
180
+ Args:
181
+ name: The name of the column.
182
+
183
+ Raises:
184
+ KeyError: If ``name`` is not present in this table.
185
+ """
186
+ if name not in self:
187
+ raise KeyError(f"Column '{name}' not found in table '{self.name}'")
188
+
189
+ if self._primary_key == name:
190
+ self.primary_key = None
191
+ if self._time_column == name:
192
+ self.time_column = None
193
+ if self._end_time_column == name:
194
+ self.end_time_column = None
195
+ del self._columns[name]
196
+
197
+ return self
198
+
199
+ # Primary key #############################################################
200
+
201
+ def has_primary_key(self) -> bool:
202
+ r"""Returns ``True``` if this table has a primary key; ``False``
203
+ otherwise.
204
+ """
205
+ return self._primary_key is not None
206
+
207
+ @property
208
+ def primary_key(self) -> Optional[Column]:
209
+ r"""The primary key column of this table.
210
+
211
+ The getter returns the primary key column of this table, or ``None`` if
212
+ no such primary key is present.
213
+
214
+ The setter sets a column as a primary key on this table, and raises a
215
+ :class:`ValueError` if the primary key has a non-ID semantic type or
216
+ if the column name does not match a column in the data frame.
217
+ """
218
+ if self._primary_key is None:
219
+ return None
220
+ return self[self._primary_key]
221
+
222
+ @primary_key.setter
223
+ def primary_key(self, name: Optional[str]) -> None:
224
+ if name is not None and name == self._time_column:
225
+ raise ValueError(f"Cannot specify column '{name}' as a primary "
226
+ f"key since it is already defined to be a time "
227
+ f"column")
228
+ if name is not None and name == self._end_time_column:
229
+ raise ValueError(f"Cannot specify column '{name}' as a primary "
230
+ f"key since it is already defined to be an end "
231
+ f"time column")
232
+
233
+ if self.primary_key is not None:
234
+ self.primary_key._is_primary_key = False
235
+
236
+ if name is None:
237
+ self._primary_key = None
238
+ return
239
+
240
+ self[name].stype = Stype.ID
241
+ self[name]._is_primary_key = True
242
+ self._primary_key = name
243
+
244
+ # Time column #############################################################
245
+
246
+ def has_time_column(self) -> bool:
247
+ r"""Returns ``True`` if this table has a time column; ``False``
248
+ otherwise.
249
+ """
250
+ return self._time_column is not None
251
+
252
+ @property
253
+ def time_column(self) -> Optional[Column]:
254
+ r"""The time column of this table.
255
+
256
+ The getter returns the time column of this table, or ``None`` if no
257
+ such time column is present.
258
+
259
+ The setter sets a column as a time column on this table, and raises a
260
+ :class:`ValueError` if the time column has a non-timestamp semantic
261
+ type or if the column name does not match a column in the data frame.
262
+ """
263
+ if self._time_column is None:
264
+ return None
265
+ return self[self._time_column]
266
+
267
+ @time_column.setter
268
+ def time_column(self, name: Optional[str]) -> None:
269
+ if name is not None and name == self._primary_key:
270
+ raise ValueError(f"Cannot specify column '{name}' as a time "
271
+ f"column since it is already defined to be a "
272
+ f"primary key")
273
+ if name is not None and name == self._end_time_column:
274
+ raise ValueError(f"Cannot specify column '{name}' as a time "
275
+ f"column since it is already defined to be an "
276
+ f"end time column")
277
+
278
+ if self.time_column is not None:
279
+ self.time_column._is_time_column = False
280
+
281
+ if name is None:
282
+ self._time_column = None
283
+ return
284
+
285
+ self[name].stype = Stype.timestamp
286
+ self[name]._is_time_column = True
287
+ self._time_column = name
288
+
289
+ # End Time column #########################################################
290
+
291
+ def has_end_time_column(self) -> bool:
292
+ r"""Returns ``True`` if this table has an end time column; ``False``
293
+ otherwise.
294
+ """
295
+ return self._end_time_column is not None
296
+
297
+ @property
298
+ def end_time_column(self) -> Optional[Column]:
299
+ r"""The end time column of this table.
300
+
301
+ The getter returns the end time column of this table, or ``None`` if no
302
+ such end time column is present.
303
+
304
+ The setter sets a column as an end time column on this table, and
305
+ raises a :class:`ValueError` if the end time column has a non-timestamp
306
+ semantic type or if the column name does not match a column in the data
307
+ frame.
308
+ """
309
+ if self._end_time_column is None:
310
+ return None
311
+ return self[self._end_time_column]
312
+
313
+ @end_time_column.setter
314
+ def end_time_column(self, name: Optional[str]) -> None:
315
+ if name is not None and name == self._primary_key:
316
+ raise ValueError(f"Cannot specify column '{name}' as an end time "
317
+ f"column since it is already defined to be a "
318
+ f"primary key")
319
+ if name is not None and name == self._time_column:
320
+ raise ValueError(f"Cannot specify column '{name}' as an end time "
321
+ f"column since it is already defined to be a "
322
+ f"time column")
323
+
324
+ if self.end_time_column is not None:
325
+ self.end_time_column._is_end_time_column = False
326
+
327
+ if name is None:
328
+ self._end_time_column = None
329
+ return
330
+
331
+ self[name].stype = Stype.timestamp
332
+ self[name]._is_end_time_column = True
333
+ self._end_time_column = name
334
+
335
+ # Metadata ################################################################
336
+
337
+ @property
338
+ def metadata(self) -> pd.DataFrame:
339
+ r"""Returns a :class:`pandas.DataFrame` object containing metadata
340
+ information about the columns in this table.
341
+
342
+ The returned dataframe has columns ``name``, ``dtype``, ``stype``,
343
+ ``is_primary_key``, ``is_time_column`` and ``is_end_time_column``,
344
+ which provide an aggregate view of the properties of the columns of
345
+ this table.
346
+
347
+ Example:
348
+ >>> # doctest: +SKIP
349
+ >>> import kumoai.experimental.rfm as rfm
350
+ >>> table = rfm.LocalTable(df=..., name=...).infer_metadata()
351
+ >>> table.metadata
352
+ name dtype stype is_primary_key is_time_column is_end_time_column
353
+ 0 CustomerID float64 ID True False False
354
+ """ # noqa: E501
355
+ cols = self.columns
356
+
357
+ return pd.DataFrame({
358
+ 'name':
359
+ pd.Series(dtype=str, data=[c.name for c in cols]),
360
+ 'dtype':
361
+ pd.Series(dtype=str, data=[c.dtype for c in cols]),
362
+ 'stype':
363
+ pd.Series(dtype=str, data=[c.stype for c in cols]),
364
+ 'is_primary_key':
365
+ pd.Series(
366
+ dtype=bool,
367
+ data=[self._primary_key == c.name for c in cols],
368
+ ),
369
+ 'is_time_column':
370
+ pd.Series(
371
+ dtype=bool,
372
+ data=[self._time_column == c.name for c in cols],
373
+ ),
374
+ 'is_end_time_column':
375
+ pd.Series(
376
+ dtype=bool,
377
+ data=[self._end_time_column == c.name for c in cols],
378
+ ),
379
+ })
380
+
381
+ def print_metadata(self) -> None:
382
+ r"""Prints the :meth:`~metadata` of this table."""
383
+ num_rows_repr = ''
384
+ if self._num_rows is not None:
385
+ num_rows_repr = ' ({self._num_rows:,} rows)'
386
+
387
+ if in_snowflake_notebook():
388
+ import streamlit as st
389
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
390
+ st.markdown(md_repr)
391
+ st.dataframe(self.metadata, hide_index=True)
392
+ elif in_notebook():
393
+ from IPython.display import Markdown, display
394
+ md_repr = f"### 🏷️ Metadata of Table `{self.name}`{num_rows_repr}"
395
+ display(Markdown(md_repr))
396
+ df = self.metadata
397
+ try:
398
+ if hasattr(df.style, 'hide'):
399
+ display(df.style.hide(axis='index')) # pandas=2
400
+ else:
401
+ display(df.style.hide_index()) # pandas<1.3
402
+ except ImportError:
403
+ print(df.to_string(index=False)) # missing jinja2
404
+ else:
405
+ print(f"🏷️ Metadata of Table '{self.name}'{num_rows_repr}")
406
+ print(self.metadata.to_string(index=False))
407
+
408
+ def infer_metadata(self, verbose: bool = True) -> Self:
409
+ r"""Infers metadata, *i.e.*, primary keys and time columns, in the
410
+ table.
411
+
412
+ Args:
413
+ verbose: Whether to print verbose output.
414
+ """
415
+ logs = []
416
+
417
+ # Try to detect primary key if not set:
418
+ if not self.has_primary_key():
419
+
420
+ def is_candidate(column: Column) -> bool:
421
+ if column.stype == Stype.ID:
422
+ return True
423
+ if all(column.stype != Stype.ID for column in self.columns):
424
+ if self.name == column.name:
425
+ return True
426
+ if (self.name.endswith('s')
427
+ and self.name[:-1] == column.name):
428
+ return True
429
+ return False
430
+
431
+ candidates = [
432
+ column.name for column in self.columns if is_candidate(column)
433
+ ]
434
+
435
+ if primary_key := infer_primary_key(
436
+ table_name=self.name,
437
+ df=self._sample_df,
438
+ candidates=candidates,
439
+ ):
440
+ self.primary_key = primary_key
441
+ logs.append(f"primary key '{primary_key}'")
442
+
443
+ # Try to detect time column if not set:
444
+ if not self.has_time_column():
445
+ candidates = [
446
+ column.name for column in self.columns
447
+ if column.stype == Stype.timestamp
448
+ and column.name != self._end_time_column
449
+ ]
450
+ if time_column := infer_time_column(
451
+ df=self._sample_df,
452
+ candidates=candidates,
453
+ ):
454
+ self.time_column = time_column
455
+ logs.append(f"time column '{time_column}'")
456
+
457
+ if verbose and len(logs) > 0:
458
+ print(f"Detected {' and '.join(logs)} in table '{self.name}'")
459
+
460
+ return self
461
+
462
+ # Helpers #################################################################
463
+
464
+ def _to_api_table_definition(self) -> TableDefinition:
465
+ return TableDefinition(
466
+ cols=[
467
+ ColumnDefinition(col.name, col.stype, col.dtype)
468
+ for col in self.columns
469
+ ],
470
+ source_table=UnavailableSourceTable(table=self.name),
471
+ pkey=self._primary_key,
472
+ time_col=self._time_column,
473
+ end_time_col=self._end_time_column,
474
+ )
475
+
476
+ # Python builtins #########################################################
477
+
478
+ def __hash__(self) -> int:
479
+ special_columns = [
480
+ self.primary_key,
481
+ self.time_column,
482
+ self.end_time_column,
483
+ ]
484
+ return hash(tuple(self.columns + special_columns))
485
+
486
+ def __contains__(self, name: str) -> bool:
487
+ return self.has_column(name)
488
+
489
+ def __getitem__(self, name: str) -> Column:
490
+ return self.column(name)
491
+
492
+ def __delitem__(self, name: str) -> None:
493
+ self.remove_column(name)
494
+
495
+ def __repr__(self) -> str:
496
+ return (f'{self.__class__.__name__}(\n'
497
+ f' name={self.name},\n'
498
+ f' num_columns={len(self.columns)},\n'
499
+ f' primary_key={self._primary_key},\n'
500
+ f' time_column={self._time_column},\n'
501
+ f' end_time_column={self._end_time_column},\n'
502
+ f')')
503
+
504
+ # Abstract method #########################################################
505
+
506
+ @cached_property
507
+ def _source_column_dict(self) -> Dict[str, SourceColumn]:
508
+ return {col.name: col for col in self._get_source_columns()}
509
+
510
+ @abstractmethod
511
+ def _get_source_columns(self) -> List[SourceColumn]:
512
+ pass
513
+
514
+ @cached_property
515
+ def _source_foreign_key_dict(self) -> Dict[str, SourceForeignKey]:
516
+ fkeys = self._get_source_foreign_keys()
517
+ # NOTE Drop all keys that link to different primary keys in the same
518
+ # table since we don't support composite keys yet:
519
+ table_pkeys: Dict[str, Set[str]] = defaultdict(set)
520
+ for fkey in fkeys:
521
+ table_pkeys[fkey.dst_table].add(fkey.primary_key)
522
+ return {
523
+ fkey.name: fkey
524
+ for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
525
+ }
526
+
527
+ @abstractmethod
528
+ def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
529
+ pass
530
+
531
+ @cached_property
532
+ def _sample_df(self) -> pd.DataFrame:
533
+ return self._get_sample_df()
534
+
535
+ @abstractmethod
536
+ def _get_sample_df(self) -> pd.DataFrame:
537
+ pass
538
+
539
+ @cached_property
540
+ def _num_rows(self) -> Optional[int]:
541
+ return self._get_num_rows()
542
+
543
+ @abstractmethod
544
+ def _get_num_rows(self) -> Optional[int]:
545
+ pass