kumoai 2.14.0.dev202601011731__cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kumoai might be problematic. Click here for more details.
- kumoai/__init__.py +300 -0
- kumoai/_logging.py +29 -0
- kumoai/_singleton.py +25 -0
- kumoai/_version.py +1 -0
- kumoai/artifact_export/__init__.py +9 -0
- kumoai/artifact_export/config.py +209 -0
- kumoai/artifact_export/job.py +108 -0
- kumoai/client/__init__.py +5 -0
- kumoai/client/client.py +223 -0
- kumoai/client/connector.py +110 -0
- kumoai/client/endpoints.py +150 -0
- kumoai/client/graph.py +120 -0
- kumoai/client/jobs.py +471 -0
- kumoai/client/online.py +78 -0
- kumoai/client/pquery.py +207 -0
- kumoai/client/rfm.py +112 -0
- kumoai/client/source_table.py +53 -0
- kumoai/client/table.py +101 -0
- kumoai/client/utils.py +130 -0
- kumoai/codegen/__init__.py +19 -0
- kumoai/codegen/cli.py +100 -0
- kumoai/codegen/context.py +16 -0
- kumoai/codegen/edits.py +473 -0
- kumoai/codegen/exceptions.py +10 -0
- kumoai/codegen/generate.py +222 -0
- kumoai/codegen/handlers/__init__.py +4 -0
- kumoai/codegen/handlers/connector.py +118 -0
- kumoai/codegen/handlers/graph.py +71 -0
- kumoai/codegen/handlers/pquery.py +62 -0
- kumoai/codegen/handlers/table.py +109 -0
- kumoai/codegen/handlers/utils.py +42 -0
- kumoai/codegen/identity.py +114 -0
- kumoai/codegen/loader.py +93 -0
- kumoai/codegen/naming.py +94 -0
- kumoai/codegen/registry.py +121 -0
- kumoai/connector/__init__.py +31 -0
- kumoai/connector/base.py +153 -0
- kumoai/connector/bigquery_connector.py +200 -0
- kumoai/connector/databricks_connector.py +213 -0
- kumoai/connector/file_upload_connector.py +189 -0
- kumoai/connector/glue_connector.py +150 -0
- kumoai/connector/s3_connector.py +278 -0
- kumoai/connector/snowflake_connector.py +252 -0
- kumoai/connector/source_table.py +471 -0
- kumoai/connector/utils.py +1796 -0
- kumoai/databricks.py +14 -0
- kumoai/encoder/__init__.py +4 -0
- kumoai/exceptions.py +26 -0
- kumoai/experimental/__init__.py +0 -0
- kumoai/experimental/rfm/__init__.py +210 -0
- kumoai/experimental/rfm/authenticate.py +432 -0
- kumoai/experimental/rfm/backend/__init__.py +0 -0
- kumoai/experimental/rfm/backend/local/__init__.py +42 -0
- kumoai/experimental/rfm/backend/local/graph_store.py +297 -0
- kumoai/experimental/rfm/backend/local/sampler.py +312 -0
- kumoai/experimental/rfm/backend/local/table.py +113 -0
- kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
- kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
- kumoai/experimental/rfm/backend/snow/table.py +242 -0
- kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
- kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
- kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
- kumoai/experimental/rfm/base/__init__.py +30 -0
- kumoai/experimental/rfm/base/column.py +152 -0
- kumoai/experimental/rfm/base/expression.py +44 -0
- kumoai/experimental/rfm/base/sampler.py +761 -0
- kumoai/experimental/rfm/base/source.py +19 -0
- kumoai/experimental/rfm/base/sql_sampler.py +143 -0
- kumoai/experimental/rfm/base/table.py +736 -0
- kumoai/experimental/rfm/graph.py +1237 -0
- kumoai/experimental/rfm/infer/__init__.py +19 -0
- kumoai/experimental/rfm/infer/categorical.py +40 -0
- kumoai/experimental/rfm/infer/dtype.py +82 -0
- kumoai/experimental/rfm/infer/id.py +46 -0
- kumoai/experimental/rfm/infer/multicategorical.py +48 -0
- kumoai/experimental/rfm/infer/pkey.py +128 -0
- kumoai/experimental/rfm/infer/stype.py +35 -0
- kumoai/experimental/rfm/infer/time_col.py +61 -0
- kumoai/experimental/rfm/infer/timestamp.py +41 -0
- kumoai/experimental/rfm/pquery/__init__.py +7 -0
- kumoai/experimental/rfm/pquery/executor.py +102 -0
- kumoai/experimental/rfm/pquery/pandas_executor.py +530 -0
- kumoai/experimental/rfm/relbench.py +76 -0
- kumoai/experimental/rfm/rfm.py +1184 -0
- kumoai/experimental/rfm/sagemaker.py +138 -0
- kumoai/experimental/rfm/task_table.py +231 -0
- kumoai/formatting.py +30 -0
- kumoai/futures.py +99 -0
- kumoai/graph/__init__.py +12 -0
- kumoai/graph/column.py +106 -0
- kumoai/graph/graph.py +948 -0
- kumoai/graph/table.py +838 -0
- kumoai/jobs.py +80 -0
- kumoai/kumolib.cpython-310-x86_64-linux-gnu.so +0 -0
- kumoai/mixin.py +28 -0
- kumoai/pquery/__init__.py +25 -0
- kumoai/pquery/prediction_table.py +287 -0
- kumoai/pquery/predictive_query.py +641 -0
- kumoai/pquery/training_table.py +424 -0
- kumoai/spcs.py +121 -0
- kumoai/testing/__init__.py +8 -0
- kumoai/testing/decorators.py +57 -0
- kumoai/testing/snow.py +50 -0
- kumoai/trainer/__init__.py +42 -0
- kumoai/trainer/baseline_trainer.py +93 -0
- kumoai/trainer/config.py +2 -0
- kumoai/trainer/distilled_trainer.py +175 -0
- kumoai/trainer/job.py +1192 -0
- kumoai/trainer/online_serving.py +258 -0
- kumoai/trainer/trainer.py +475 -0
- kumoai/trainer/util.py +103 -0
- kumoai/utils/__init__.py +11 -0
- kumoai/utils/datasets.py +83 -0
- kumoai/utils/display.py +51 -0
- kumoai/utils/forecasting.py +209 -0
- kumoai/utils/progress_logger.py +343 -0
- kumoai/utils/sql.py +3 -0
- kumoai-2.14.0.dev202601011731.dist-info/METADATA +71 -0
- kumoai-2.14.0.dev202601011731.dist-info/RECORD +122 -0
- kumoai-2.14.0.dev202601011731.dist-info/WHEEL +6 -0
- kumoai-2.14.0.dev202601011731.dist-info/licenses/LICENSE +9 -0
- kumoai-2.14.0.dev202601011731.dist-info/top_level.txt +1 -0
kumoai/graph/table.py
ADDED
|
@@ -0,0 +1,838 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
import kumoapi.data_snapshot as snapshot_api
|
|
7
|
+
import kumoapi.table as api
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from kumoapi.common import JobStatus
|
|
10
|
+
from kumoapi.data_snapshot import TableSnapshotID
|
|
11
|
+
from kumoapi.typing import Stype
|
|
12
|
+
from typing_extensions import Self
|
|
13
|
+
|
|
14
|
+
from kumoai import global_state
|
|
15
|
+
from kumoai.client.table import TableID
|
|
16
|
+
from kumoai.connector import SourceColumn, SourceTable
|
|
17
|
+
from kumoai.graph.column import Column
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
_DEFAULT_INTERVAL_S = 20
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Table:
|
|
25
|
+
r"""A Table represents metadata information for a table in a Kumo
|
|
26
|
+
:class:`~kumoai.graph.Graph`.
|
|
27
|
+
|
|
28
|
+
Whereas a :class:`~kumoai.connector.SourceTable` is simply a reference to a
|
|
29
|
+
table behind a backing :class:`~kumoai.connector.Connector`, a table fully
|
|
30
|
+
specifies the relevant metadata (including selected source columns, column
|
|
31
|
+
data type and semantic type, and relational constraint information)
|
|
32
|
+
necessary to train a :class:`~kumoai.pquery.PredictiveQuery` on graph of
|
|
33
|
+
tables. A table can either be constructed explicitly, or with the
|
|
34
|
+
convenience method :meth:`~kumoai.graph.Table.from_source_table`.
|
|
35
|
+
|
|
36
|
+
.. code-block:: python
|
|
37
|
+
|
|
38
|
+
import kumoai
|
|
39
|
+
|
|
40
|
+
# Define connector to source data:
|
|
41
|
+
connector = kumoai.S3Connector('s3://...')
|
|
42
|
+
|
|
43
|
+
# Create table using `from_source_table`:
|
|
44
|
+
customer = kumoai.Table.from_source_table(
|
|
45
|
+
source_table=connector['customer'],
|
|
46
|
+
primary_key='CustomerID',
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Create a table by constructing it directly:
|
|
50
|
+
customer = kumoai.Table(
|
|
51
|
+
source_table=connector['customer'],
|
|
52
|
+
columns=[kumoai.Column(name='CustomerID', dtype='string', stype='ID')],
|
|
53
|
+
primary_key='CustomerID',
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Infer any missing metadata in the table, from source table
|
|
57
|
+
# properties:
|
|
58
|
+
print("Current metadata: ", customer.metadata)
|
|
59
|
+
customer.infer_metadata()
|
|
60
|
+
|
|
61
|
+
# Validate the table configuration, for use in Kumo downstream models:
|
|
62
|
+
customer.validate(verbose=True)
|
|
63
|
+
|
|
64
|
+
# Fetch statistics from a snapshot of this table (this method will
|
|
65
|
+
# take a table snapshot, and as a result may have high latency):
|
|
66
|
+
customer.get_stats(wait_for="minimal")
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
source_table: The source table this Kumo table is created from.
|
|
70
|
+
columns: The selected columns of the source table that are part of this
|
|
71
|
+
Kumo table. Note that each column must specify its data type and
|
|
72
|
+
semantic type; see the :class:`~kumoai.graph.Column` documentation
|
|
73
|
+
for more information. If `None` all columns from the
|
|
74
|
+
source table are included by default.
|
|
75
|
+
primary_key: The primary key of the table, if present. The primary key
|
|
76
|
+
must exist in the :obj:`columns` argument.
|
|
77
|
+
time_column: The time column of the table, if present. The time column
|
|
78
|
+
must exist in the :obj:`columns` argument.
|
|
79
|
+
end_time_column: The end time column of the table, if present. The end
|
|
80
|
+
time column must exist in the :obj:`columns` argument.
|
|
81
|
+
""" # noqa: E501
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
source_table: SourceTable,
|
|
86
|
+
columns: Optional[List[Union[SourceColumn, Column]]] = None,
|
|
87
|
+
primary_key: Optional[str] = None,
|
|
88
|
+
time_column: Optional[str] = None,
|
|
89
|
+
end_time_column: Optional[str] = None,
|
|
90
|
+
) -> None:
|
|
91
|
+
# Reference to the source (raw) table:
|
|
92
|
+
self.source_table = source_table
|
|
93
|
+
self.source_name = source_table.name
|
|
94
|
+
|
|
95
|
+
# Columns. Note that there is no distinction between columns treated as
|
|
96
|
+
# features and those treated as constraints at this stage. The
|
|
97
|
+
# treatment of columns as "feature" or "schema-only" columns will be
|
|
98
|
+
# decided at the model plan stage (e.g. by encoding as `Null()`):
|
|
99
|
+
self._columns: Dict[str, Column] = {}
|
|
100
|
+
|
|
101
|
+
# Basic schema. This information is defined at the table level:
|
|
102
|
+
self._primary_key: Optional[str] = None
|
|
103
|
+
self._time_column: Optional[str] = None
|
|
104
|
+
self._end_time_column: Optional[str] = None
|
|
105
|
+
|
|
106
|
+
# Update values:
|
|
107
|
+
if columns is None:
|
|
108
|
+
columns = list(source_table.column_dict.values())
|
|
109
|
+
for col in (columns or []):
|
|
110
|
+
if isinstance(col, SourceColumn):
|
|
111
|
+
col = Column(name=col.name, stype=col.stype, dtype=col.dtype)
|
|
112
|
+
self.add_column(Column._cast(col))
|
|
113
|
+
self.primary_key = Column._cast(primary_key)
|
|
114
|
+
self.time_column = Column._cast(time_column)
|
|
115
|
+
self.end_time_column = Column._cast(end_time_column)
|
|
116
|
+
|
|
117
|
+
# Cached from backend. Note there is no such thing as a table resource
|
|
118
|
+
# as tables are only persisted in the context of a graph. However,
|
|
119
|
+
# table snapshot resources exist, as tables can be ingested and have
|
|
120
|
+
# data fetched:
|
|
121
|
+
self._table_snapshot_id: Optional[TableSnapshotID] = None
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def from_source_table(
|
|
125
|
+
source_table: SourceTable,
|
|
126
|
+
column_names: Optional[List[str]] = None,
|
|
127
|
+
primary_key: Optional[str] = None,
|
|
128
|
+
time_column: Optional[str] = None,
|
|
129
|
+
end_time_column: Optional[str] = None,
|
|
130
|
+
) -> 'Table':
|
|
131
|
+
r"""Creates a Kumo Table from a source table. If no column names are
|
|
132
|
+
specified, all source columns are included by default.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
source_table: The :class:`~kumoai.connector.SourceTable` object
|
|
136
|
+
that this table is constructed on.
|
|
137
|
+
column_names: A list of columns to include from the source table;
|
|
138
|
+
if not specified, all columns are included by default.
|
|
139
|
+
primary_key: The name of the primary key of this table, if it
|
|
140
|
+
exists.
|
|
141
|
+
time_column: The name of the time column of this table, if it
|
|
142
|
+
exists.
|
|
143
|
+
end_time_column: The name of the end time column of this table, if
|
|
144
|
+
it exists.
|
|
145
|
+
"""
|
|
146
|
+
cols = [
|
|
147
|
+
Column(name, col.stype, col.dtype)
|
|
148
|
+
for name, col in source_table.column_dict.items()
|
|
149
|
+
if (name in column_names if column_names is not None else True)
|
|
150
|
+
]
|
|
151
|
+
out = Table(source_table, cols)
|
|
152
|
+
out.primary_key = Column._cast(primary_key)
|
|
153
|
+
out.time_column = Column._cast(time_column)
|
|
154
|
+
out.end_time_column = Column._cast(end_time_column)
|
|
155
|
+
return out
|
|
156
|
+
|
|
157
|
+
def print_definition(self) -> None:
|
|
158
|
+
r"""Prints the full definition for this table; this definition can be
|
|
159
|
+
copied-and-pasted verbatim to re-create this table.
|
|
160
|
+
"""
|
|
161
|
+
pkey_name = (f"\"{self.primary_key.name}\""
|
|
162
|
+
if self.primary_key is not None else "None")
|
|
163
|
+
t_name = (f"\"{self.time_column.name}\""
|
|
164
|
+
if self.time_column is not None else "None")
|
|
165
|
+
et_name = (f"\"{self.end_time_column.name}\""
|
|
166
|
+
if self.end_time_column is not None else "None")
|
|
167
|
+
col_dict = "\n".join([f' {c},' for c in self.columns])
|
|
168
|
+
source_repr = f"{self.source_table.connector}[\"{self.source_name}\"]"
|
|
169
|
+
print(f'{self.__class__.__name__}(\n'
|
|
170
|
+
f' source_table={source_repr},\n'
|
|
171
|
+
f' primary_key={pkey_name},\n'
|
|
172
|
+
f' time_column={t_name},\n'
|
|
173
|
+
f' end_time_column={et_name},\n'
|
|
174
|
+
f' columns=[\n{col_dict}\n'
|
|
175
|
+
f' ],\n'
|
|
176
|
+
f')')
|
|
177
|
+
|
|
178
|
+
# Data column #############################################################
|
|
179
|
+
|
|
180
|
+
def has_column(self, name: str) -> bool:
|
|
181
|
+
r"""Returns True if this table has column with name :obj:`name`; False
|
|
182
|
+
otherwise.
|
|
183
|
+
"""
|
|
184
|
+
return name in self._columns
|
|
185
|
+
|
|
186
|
+
def column(self, name: str) -> Column:
|
|
187
|
+
r"""Returns the data column named with name :obj:`name` in this table,
|
|
188
|
+
or raises a :obj:`KeyError` if no such column is present.
|
|
189
|
+
|
|
190
|
+
Raises:
|
|
191
|
+
:class:`KeyError`
|
|
192
|
+
if :obj:`name` is not present in this table.
|
|
193
|
+
"""
|
|
194
|
+
if not self.has_column(name):
|
|
195
|
+
raise KeyError(
|
|
196
|
+
f"Column '{name}' not found in table '{self.source_name}'")
|
|
197
|
+
return self._columns[name]
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def columns(self) -> List[Column]:
|
|
201
|
+
r"""Returns a list of :class:`~kumoai.Column` objects that represent
|
|
202
|
+
the columns in this table.
|
|
203
|
+
"""
|
|
204
|
+
return list(self._columns.values())
|
|
205
|
+
|
|
206
|
+
def add_column(self, *args: Any, **kwargs: Any) -> None:
|
|
207
|
+
r"""Adds a :obj:`~kumoai.graph.Column` to this table. A column can
|
|
208
|
+
either be added by directly specifying its configuration in this call,
|
|
209
|
+
or by creating a Column object and passing it as an argument.
|
|
210
|
+
|
|
211
|
+
Example:
|
|
212
|
+
>>> import kumoai
|
|
213
|
+
>>> table = kumoai.Table(source_table=...) # doctest: +SKIP
|
|
214
|
+
>>> table.add_column(name='col1', dtype='string') # doctest: +SKIP
|
|
215
|
+
>>> table.add_column(kumoai.Column('col2', 'int')) # doctest: +SKIP
|
|
216
|
+
|
|
217
|
+
.. # noqa: E501
|
|
218
|
+
"""
|
|
219
|
+
col = Column._cast(*args, **kwargs)
|
|
220
|
+
if col is None:
|
|
221
|
+
raise ValueError("Cannot add a 'None' column to a table.")
|
|
222
|
+
if self.has_column(col.name):
|
|
223
|
+
self._columns[col.name].update(col)
|
|
224
|
+
else:
|
|
225
|
+
self._columns[col.name] = col
|
|
226
|
+
|
|
227
|
+
def remove_column(self, name: str) -> Self:
|
|
228
|
+
r"""Removes a :obj:`~kumoai.graph.Column` from this table.
|
|
229
|
+
|
|
230
|
+
Raises:
|
|
231
|
+
:class:`KeyError`
|
|
232
|
+
if :obj:`name` is not present in this table.
|
|
233
|
+
"""
|
|
234
|
+
if not self.has_column(name):
|
|
235
|
+
raise KeyError(
|
|
236
|
+
f"Column '{name}' not found in table '{self.source_name}'")
|
|
237
|
+
|
|
238
|
+
if self.has_primary_key() and self._primary_key == name:
|
|
239
|
+
self.primary_key = None
|
|
240
|
+
if self.has_time_column() and self._time_column == name:
|
|
241
|
+
self.time_column = None
|
|
242
|
+
if self.has_end_time_column() and self._end_time_column == name:
|
|
243
|
+
self.end_time_column = None
|
|
244
|
+
del self._columns[name]
|
|
245
|
+
return self
|
|
246
|
+
|
|
247
|
+
# Primary key #############################################################
|
|
248
|
+
|
|
249
|
+
def has_primary_key(self) -> bool:
|
|
250
|
+
r"""Returns :obj:`True` if this table has a primary key; :obj:`False`
|
|
251
|
+
otherwise.
|
|
252
|
+
"""
|
|
253
|
+
return self._primary_key is not None
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def primary_key(self) -> Optional[Column]:
|
|
257
|
+
r"""The primary key column of this table.
|
|
258
|
+
|
|
259
|
+
The getter returns the primary key column of this table, or None if no
|
|
260
|
+
such primary key is present.
|
|
261
|
+
|
|
262
|
+
The setter sets a column as a primary key on this table, and raises a
|
|
263
|
+
:class:`ValueError` if the primary key has a non-ID semantic type.
|
|
264
|
+
"""
|
|
265
|
+
if not self.has_primary_key():
|
|
266
|
+
return None
|
|
267
|
+
assert self._primary_key is not None
|
|
268
|
+
return self._columns[self._primary_key]
|
|
269
|
+
|
|
270
|
+
@primary_key.setter
|
|
271
|
+
def primary_key(self, *args: Any, **kwargs: Any) -> Self:
|
|
272
|
+
col = Column._cast(*args, **kwargs)
|
|
273
|
+
if col is None:
|
|
274
|
+
self._primary_key = None
|
|
275
|
+
return self
|
|
276
|
+
|
|
277
|
+
if col.stype is not None and col.stype != Stype.ID:
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"The semantic type of a primary key must be 'ID' (got "
|
|
280
|
+
f"{col.stype}).")
|
|
281
|
+
|
|
282
|
+
col.stype = Stype.ID
|
|
283
|
+
self.add_column(col)
|
|
284
|
+
self._primary_key = col.name
|
|
285
|
+
return self
|
|
286
|
+
|
|
287
|
+
# Time column #############################################################
|
|
288
|
+
|
|
289
|
+
def has_time_column(self) -> bool:
|
|
290
|
+
r"""Returns :obj:`True` if this table has a time column; :obj:`False`
|
|
291
|
+
otherwise.
|
|
292
|
+
"""
|
|
293
|
+
return self._time_column is not None
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
def time_column(self) -> Optional[Column]:
|
|
297
|
+
r"""The time column of this table.
|
|
298
|
+
|
|
299
|
+
The getter returns the time column of this table, or :obj:`None` if no
|
|
300
|
+
such time column is present.
|
|
301
|
+
|
|
302
|
+
The setter sets a column as a time column on this table, and raises a
|
|
303
|
+
:class:`ValueError` if the time column is the same as the end time
|
|
304
|
+
column, or has a non-timestamp semantic type.
|
|
305
|
+
"""
|
|
306
|
+
if not self.has_time_column():
|
|
307
|
+
return None
|
|
308
|
+
assert self._time_column is not None
|
|
309
|
+
return self._columns[self._time_column]
|
|
310
|
+
|
|
311
|
+
@time_column.setter
|
|
312
|
+
def time_column(self, *args: Any, **kwargs: Any) -> Self:
|
|
313
|
+
col = Column._cast(*args, **kwargs)
|
|
314
|
+
if col is None:
|
|
315
|
+
self._time_column = None
|
|
316
|
+
return self
|
|
317
|
+
|
|
318
|
+
if self.has_end_time_column() and self._end_time_column == col.name:
|
|
319
|
+
raise ValueError(f"Cannot set the time column ('{col.name}') "
|
|
320
|
+
f"to be the same as the end time column "
|
|
321
|
+
f"('{self._end_time_column}')")
|
|
322
|
+
|
|
323
|
+
if col.stype is not None and col.stype != Stype.timestamp:
|
|
324
|
+
raise ValueError(
|
|
325
|
+
f"The semantic type of a time column must be 'timestamp' (got "
|
|
326
|
+
f"{col.stype}).")
|
|
327
|
+
|
|
328
|
+
col.stype = Stype.timestamp
|
|
329
|
+
self.add_column(col)
|
|
330
|
+
self._time_column = col.name
|
|
331
|
+
return self
|
|
332
|
+
|
|
333
|
+
# End time column #########################################################
|
|
334
|
+
|
|
335
|
+
def has_end_time_column(self) -> bool:
|
|
336
|
+
r"""Returns :obj:`True` if this table has an end time column;
|
|
337
|
+
:obj:`False` otherwise.
|
|
338
|
+
"""
|
|
339
|
+
return self._end_time_column is not None
|
|
340
|
+
|
|
341
|
+
@property
|
|
342
|
+
def end_time_column(self) -> Optional[Column]:
|
|
343
|
+
r"""The end time column of this table.
|
|
344
|
+
|
|
345
|
+
The getter returns the end time column of this table, or :obj:`None` if
|
|
346
|
+
no such column is present.
|
|
347
|
+
|
|
348
|
+
The setter sets a column as a time column on this table, and raises a
|
|
349
|
+
:class:`ValueError` if the time column is the same as the end time
|
|
350
|
+
column, or has a non-timestamp semantic type.
|
|
351
|
+
"""
|
|
352
|
+
if not self.has_end_time_column():
|
|
353
|
+
return None
|
|
354
|
+
assert self._end_time_column is not None
|
|
355
|
+
return self._columns[self._end_time_column]
|
|
356
|
+
|
|
357
|
+
@end_time_column.setter
|
|
358
|
+
def end_time_column(self, *args: Any, **kwargs: Any) -> Self:
|
|
359
|
+
col = Column._cast(*args, **kwargs)
|
|
360
|
+
if col is None:
|
|
361
|
+
self._end_time_column = None
|
|
362
|
+
return self
|
|
363
|
+
|
|
364
|
+
if self.has_time_column() and self._time_column == col.name:
|
|
365
|
+
raise ValueError(f"Cannot set the end time column ('{col.name}') "
|
|
366
|
+
f"to be the same as the time column "
|
|
367
|
+
f"('{self._time_column}')")
|
|
368
|
+
|
|
369
|
+
if col.stype is not None and col.stype != Stype.timestamp:
|
|
370
|
+
raise ValueError(
|
|
371
|
+
f"The semantic type of an end time column must be 'timestamp' "
|
|
372
|
+
f"(got {col.stype}).")
|
|
373
|
+
|
|
374
|
+
col.stype = Stype.timestamp
|
|
375
|
+
self.add_column(col)
|
|
376
|
+
self._end_time_column = col.name
|
|
377
|
+
return self
|
|
378
|
+
|
|
379
|
+
# Metadata ################################################################
|
|
380
|
+
|
|
381
|
+
@property
|
|
382
|
+
def metadata(self) -> pd.DataFrame:
|
|
383
|
+
r"""Returns a :class:`~pandas.DataFrame` object containing Kumo metadata
|
|
384
|
+
information about the columns in this table.
|
|
385
|
+
|
|
386
|
+
The returned dataframe has columns ``name``, ``dtype``, ``stype``,
|
|
387
|
+
``is_primary_key``, ``is_time_column``, and ``is_end_time_column``,
|
|
388
|
+
which provide an aggregate view of the properties of the columns of
|
|
389
|
+
this table.
|
|
390
|
+
|
|
391
|
+
Example:
|
|
392
|
+
>>> import kumoai
|
|
393
|
+
>>> table = kumoai.Table(source_table=...) # doctest: +SKIP
|
|
394
|
+
>>> table.add_column(name='CustomerID', dtype='float64', stype='ID') # doctest: +SKIP
|
|
395
|
+
>>> table.metadata # doctest: +SKIP
|
|
396
|
+
name dtype stype is_time_column is_end_time_column
|
|
397
|
+
0 CustomerID float64 ID False False
|
|
398
|
+
""" # noqa: E501
|
|
399
|
+
items = self._columns.items()
|
|
400
|
+
col_names: List[str] = [i[0] for i in items]
|
|
401
|
+
cols: List[Column] = [i[1] for i in items]
|
|
402
|
+
|
|
403
|
+
return pd.DataFrame({
|
|
404
|
+
'name':
|
|
405
|
+
pd.Series(dtype=str, data=col_names),
|
|
406
|
+
'dtype':
|
|
407
|
+
pd.Series(
|
|
408
|
+
dtype=str, data=[
|
|
409
|
+
c.dtype.value if c.dtype is not None else None
|
|
410
|
+
for c in cols
|
|
411
|
+
]),
|
|
412
|
+
'stype':
|
|
413
|
+
pd.Series(
|
|
414
|
+
dtype=str, data=[
|
|
415
|
+
c.stype.value if c.stype is not None else None
|
|
416
|
+
for c in cols
|
|
417
|
+
]),
|
|
418
|
+
'is_primary_key':
|
|
419
|
+
pd.Series(dtype=bool, data=[self.primary_key == c for c in cols]),
|
|
420
|
+
'is_time_column':
|
|
421
|
+
pd.Series(dtype=bool, data=[self.time_column == c for c in cols]),
|
|
422
|
+
'is_end_time_column':
|
|
423
|
+
pd.Series(dtype=bool,
|
|
424
|
+
data=[self.end_time_column == c for c in cols]),
|
|
425
|
+
})
|
|
426
|
+
|
|
427
|
+
def infer_metadata(self, inplace: bool = True) -> Self:
|
|
428
|
+
r"""Infers all metadata for this table's specified columns, including
|
|
429
|
+
the column data types, semantic types, timestamp formats, primary keys,
|
|
430
|
+
and time/end-time columns
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
inplace: Whether the method should modify the table columns in
|
|
434
|
+
place or return a new :class:`~kumoai.graph.Table` object.
|
|
435
|
+
|
|
436
|
+
.. note::
|
|
437
|
+
This method in-place modifies the Table object if `inplace = True`,
|
|
438
|
+
and returns a copy if ``inplace = False``.
|
|
439
|
+
"""
|
|
440
|
+
col_requests: List[api.ColumnMetadataRequest] = []
|
|
441
|
+
for col in self.columns:
|
|
442
|
+
col_requests.append(
|
|
443
|
+
# stype and dtype are None to support inferral:
|
|
444
|
+
api.ColumnMetadataRequest(
|
|
445
|
+
name=col.name,
|
|
446
|
+
stype=None,
|
|
447
|
+
dtype=None,
|
|
448
|
+
timestamp_format=col.timestamp_format,
|
|
449
|
+
))
|
|
450
|
+
|
|
451
|
+
pk_name: Optional[str] = None
|
|
452
|
+
if self.has_primary_key():
|
|
453
|
+
pk_name = self.primary_key.name # type: ignore
|
|
454
|
+
|
|
455
|
+
tc_name: Optional[str] = None
|
|
456
|
+
if self.has_time_column():
|
|
457
|
+
tc_name = self.time_column.name # type: ignore
|
|
458
|
+
|
|
459
|
+
request = api.TableMetadataRequest(
|
|
460
|
+
cols=col_requests,
|
|
461
|
+
source_table=self.source_table._to_api_source_table(),
|
|
462
|
+
pkey=pk_name,
|
|
463
|
+
time_col=tc_name,
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
response = global_state.client.table_api.infer_metadata(request)
|
|
467
|
+
inferred_cols: Dict[str, api.Column] = {
|
|
468
|
+
col.name: col
|
|
469
|
+
for col in response.cols
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
# Handle inplace:
|
|
473
|
+
out = self
|
|
474
|
+
if not inplace:
|
|
475
|
+
out = copy.deepcopy(self)
|
|
476
|
+
|
|
477
|
+
# TODO(manan): respect user overrides
|
|
478
|
+
# TODO(manan): what happens when the ts format is set based on an
|
|
479
|
+
# override?
|
|
480
|
+
for col in out.columns:
|
|
481
|
+
inferred_col = inferred_cols[col.name]
|
|
482
|
+
|
|
483
|
+
col.dtype = inferred_col.dtype
|
|
484
|
+
col.stype = inferred_col.stype
|
|
485
|
+
col.timestamp_format = (col.timestamp_format
|
|
486
|
+
or inferred_col.timestamp_format)
|
|
487
|
+
|
|
488
|
+
# TODO(manan): support end-time column
|
|
489
|
+
if not out.has_primary_key() and response.pkey is not None:
|
|
490
|
+
out.primary_key = response.pkey
|
|
491
|
+
if not out.has_time_column() and response.time_col is not None:
|
|
492
|
+
out.time_column = response.time_col
|
|
493
|
+
|
|
494
|
+
# Override for Kumo backend, always:
|
|
495
|
+
if out.has_primary_key():
|
|
496
|
+
out.primary_key.stype = Stype.ID # type: ignore
|
|
497
|
+
|
|
498
|
+
if out.has_time_column():
|
|
499
|
+
out.time_column.stype = Stype.timestamp # type: ignore
|
|
500
|
+
|
|
501
|
+
if out.has_end_time_column():
|
|
502
|
+
out.end_time_column.stype = Stype.timestamp # type: ignore
|
|
503
|
+
|
|
504
|
+
return out
|
|
505
|
+
|
|
506
|
+
def _validate_definition(self) -> None:
|
|
507
|
+
for col in self.columns:
|
|
508
|
+
if col.dtype is None or col.stype is None:
|
|
509
|
+
raise ValueError(
|
|
510
|
+
f"Column {col.name} is not fully specified. Please "
|
|
511
|
+
f"specify this column's data type and semantic type "
|
|
512
|
+
f"before proceeding. {col.name} currently has a "
|
|
513
|
+
f"data type of {col.dtype} and semantic type of "
|
|
514
|
+
f"{col.stype}.")
|
|
515
|
+
|
|
516
|
+
def validate(self, verbose: bool = True) -> Self:
|
|
517
|
+
r"""Validates a Table to ensure that all relevant metadata is specified
|
|
518
|
+
for a table to be used in a downstream :class:`~kumoai.graph.Graph` and
|
|
519
|
+
:class:`~kumoai.pquery.PredictiveQuery`.
|
|
520
|
+
|
|
521
|
+
Conceretely, validation ensures that all columns have valid
|
|
522
|
+
data and semantic types, with respect to the table's source data.
|
|
523
|
+
For example, if a text column is assigned a ``dtype`` of ``"int"``,
|
|
524
|
+
this method will raise an exception detailing the mismatch. Similarly,
|
|
525
|
+
if a column cannot be cast from its source data type to the specified
|
|
526
|
+
data type (*e.g* ``"int"`` to ``"binary"``), this method will raise an
|
|
527
|
+
exception.
|
|
528
|
+
|
|
529
|
+
.. warning::
|
|
530
|
+
Data type validation is performed on a sample of table data. A
|
|
531
|
+
valid response may not indicate your entire data source is
|
|
532
|
+
configured correctly.
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
verbose: Whether to log non-error output of this validation.
|
|
536
|
+
|
|
537
|
+
Example:
|
|
538
|
+
>>> import kumoai
|
|
539
|
+
>>> table = kumoai.Table(...) # doctest: +SKIP
|
|
540
|
+
>>> table.validate() # doctest: +SKIP
|
|
541
|
+
|
|
542
|
+
Raises:
|
|
543
|
+
ValueError:
|
|
544
|
+
if validation fails.
|
|
545
|
+
"""
|
|
546
|
+
self._validate_definition()
|
|
547
|
+
|
|
548
|
+
# Actual heavy lifting:
|
|
549
|
+
resp = global_state.client.table_api.validate_table(
|
|
550
|
+
api.TableValidationRequest(self._to_api_table_definition()))
|
|
551
|
+
if not resp.ok:
|
|
552
|
+
raise ValueError(resp.error_message())
|
|
553
|
+
if verbose:
|
|
554
|
+
if resp.empty():
|
|
555
|
+
logger.info("Table %s is configured correctly.",
|
|
556
|
+
self.source_name)
|
|
557
|
+
else:
|
|
558
|
+
logger.warning(resp.message())
|
|
559
|
+
return self
|
|
560
|
+
|
|
561
|
+
# Snapshot ################################################################
|
|
562
|
+
|
|
563
|
+
@property
|
|
564
|
+
def snapshot_id(self) -> Optional[snapshot_api.TableSnapshotID]:
|
|
565
|
+
r"""Returns the snapshot ID of this table's snapshot, if a snapshot
|
|
566
|
+
has been taken. Returns `None` otherwise.
|
|
567
|
+
|
|
568
|
+
.. warning::
|
|
569
|
+
This property currently only returns a snapshot ID if a snapshot
|
|
570
|
+
has been taken *in this session.*
|
|
571
|
+
"""
|
|
572
|
+
return self._table_snapshot_id
|
|
573
|
+
|
|
574
|
+
def snapshot(
|
|
575
|
+
self,
|
|
576
|
+
*,
|
|
577
|
+
force_refresh: bool = False,
|
|
578
|
+
non_blocking: bool = False,
|
|
579
|
+
) -> snapshot_api.TableSnapshotID:
|
|
580
|
+
r"""Takes a *snapshot* of this table's underlying data, and returns a
|
|
581
|
+
unique identifier for this snapshot.
|
|
582
|
+
|
|
583
|
+
The *snapshot* functionality allows one to freeze a table in time, so
|
|
584
|
+
that underlying data changes do not require Kumo to re-process the
|
|
585
|
+
data. This allows for fast iterative machine learning model
|
|
586
|
+
development, on a consistent set of input data.
|
|
587
|
+
|
|
588
|
+
.. warning::
|
|
589
|
+
Please note that snapshots are intended to freeze tables in
|
|
590
|
+
time, and not to allow for "time-traveling" to an earlier version
|
|
591
|
+
of data with a prior snapshot. In particular, this means that a
|
|
592
|
+
table can only have one version of a snapshot, which represents
|
|
593
|
+
the latest snapshot taken for that table.
|
|
594
|
+
|
|
595
|
+
.. note::
|
|
596
|
+
If you are using Kumo as a Snowpark Container Services native
|
|
597
|
+
application, please note that *snapshot* is a no-op for all
|
|
598
|
+
non-view tables.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
force_refresh: Indicates whether a snapshot should be taken, if one
|
|
602
|
+
already exists in Kumo. If :obj:`False`, a previously existing
|
|
603
|
+
snapshot may be re-used. If :obj:`True`, a new snapshot is
|
|
604
|
+
always taken.
|
|
605
|
+
non_blocking: Whether this operation should return immediately
|
|
606
|
+
after creating the snapshot, or await completion of the
|
|
607
|
+
snapshot. If :obj:`True`, the snapshot will proceed in the
|
|
608
|
+
background, and will be used for any downstream job.
|
|
609
|
+
"""
|
|
610
|
+
if self._table_snapshot_id is None or force_refresh:
|
|
611
|
+
self._table_snapshot_id = (
|
|
612
|
+
global_state.client.table_api.create_snapshot(
|
|
613
|
+
table_definition=self._to_api_table_definition(),
|
|
614
|
+
refresh_source=True,
|
|
615
|
+
))
|
|
616
|
+
|
|
617
|
+
stage = snapshot_api.TableSnapshotStage.INGEST
|
|
618
|
+
resource: snapshot_api.TableSnapshotResource = (
|
|
619
|
+
global_state.client.table_api.get_snapshot(
|
|
620
|
+
snapshot_id=self._table_snapshot_id))
|
|
621
|
+
|
|
622
|
+
if not non_blocking:
|
|
623
|
+
status = resource.stages[stage].status
|
|
624
|
+
while not status.is_terminal:
|
|
625
|
+
# TODO(manan, siyang): fix start and end time
|
|
626
|
+
resource = (global_state.client.table_api.get_snapshot(
|
|
627
|
+
snapshot_id=self._table_snapshot_id))
|
|
628
|
+
logger.info(
|
|
629
|
+
"Awaiting snapshot completion: current status is %s ",
|
|
630
|
+
status)
|
|
631
|
+
time.sleep(_DEFAULT_INTERVAL_S)
|
|
632
|
+
status = resource.stages[stage].status
|
|
633
|
+
|
|
634
|
+
state = resource.stages[stage]
|
|
635
|
+
status = state.status
|
|
636
|
+
warnings = "\n".join([
|
|
637
|
+
f"{i}. {message}"
|
|
638
|
+
for i, message in enumerate(state.warnings)
|
|
639
|
+
])
|
|
640
|
+
error = state.error
|
|
641
|
+
if status == JobStatus.FAILED:
|
|
642
|
+
raise RuntimeError(
|
|
643
|
+
f"Table snapshot with identifier "
|
|
644
|
+
f"{self._table_snapshot_id} failed, with error "
|
|
645
|
+
f"{error} and warnings {warnings}")
|
|
646
|
+
if len(state.warnings) > 0:
|
|
647
|
+
logger.warning(
|
|
648
|
+
"Table snapshot completed with the following "
|
|
649
|
+
"warnings: %s", warnings)
|
|
650
|
+
|
|
651
|
+
# <prefix>@<data_version>
|
|
652
|
+
assert self._table_snapshot_id is not None
|
|
653
|
+
return self._table_snapshot_id
|
|
654
|
+
|
|
655
|
+
def get_stats(
|
|
656
|
+
self,
|
|
657
|
+
wait_for: Optional[str] = None,
|
|
658
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
659
|
+
r"""Returns all currently computed statistics on the latest snapshot of
|
|
660
|
+
this table. If a snapshot on this table has not been taken, this method
|
|
661
|
+
will take a snapshot.
|
|
662
|
+
|
|
663
|
+
.. note::
|
|
664
|
+
Table statstics are computed in multiple stages after ingestion is
|
|
665
|
+
complete. These stages are called *minimal* and *full*; minimal
|
|
666
|
+
statistics are always computed before full statistics.
|
|
667
|
+
|
|
668
|
+
Args:
|
|
669
|
+
wait_for: Whether this operation should block on the existence of
|
|
670
|
+
statistics availability. This argument can take one of three
|
|
671
|
+
values: :obj:`None`, which indicates that the method should
|
|
672
|
+
return immediately with whatever statistics are present,
|
|
673
|
+
:obj:`"minimal"`, which indicates that the method should return
|
|
674
|
+
the when the minimum, maximum, and fraction of NA values
|
|
675
|
+
statistics are present, or :obj:`"full"`, which indicates that
|
|
676
|
+
the method should return when all computed statistics are
|
|
677
|
+
present.
|
|
678
|
+
"""
|
|
679
|
+
assert wait_for is None or wait_for in {"minimal", "full"}
|
|
680
|
+
|
|
681
|
+
# Attempt to snapshot, use cached snapshot if possible:
|
|
682
|
+
if not self._table_snapshot_id:
|
|
683
|
+
self.snapshot(force_refresh=False, non_blocking=False)
|
|
684
|
+
assert self._table_snapshot_id is not None
|
|
685
|
+
|
|
686
|
+
# Fetch resource:
|
|
687
|
+
resource: snapshot_api.TableSnapshotResource = (
|
|
688
|
+
global_state.client.table_api.get_snapshot(
|
|
689
|
+
snapshot_id=self._table_snapshot_id))
|
|
690
|
+
|
|
691
|
+
# Wait for a stage, if we need to:
|
|
692
|
+
if wait_for:
|
|
693
|
+
if wait_for == "minimal":
|
|
694
|
+
stage = snapshot_api.TableSnapshotStage.MIN_COL_STATS
|
|
695
|
+
else:
|
|
696
|
+
stage = snapshot_api.TableSnapshotStage.FULL_COL_STATS
|
|
697
|
+
|
|
698
|
+
status = resource.stages[stage].status
|
|
699
|
+
while not status.is_terminal:
|
|
700
|
+
resource = (global_state.client.table_api.get_snapshot(
|
|
701
|
+
snapshot_id=self._table_snapshot_id))
|
|
702
|
+
logger.info(
|
|
703
|
+
"Awaiting %s column statistics: current status is %s ",
|
|
704
|
+
wait_for, status)
|
|
705
|
+
time.sleep(_DEFAULT_INTERVAL_S)
|
|
706
|
+
status = resource.stages[stage].status
|
|
707
|
+
|
|
708
|
+
# Write out statistics:
|
|
709
|
+
out = {}
|
|
710
|
+
col_stats = resource.column_stats
|
|
711
|
+
for stat in (col_stats or []):
|
|
712
|
+
out[stat.column_name] = stat.stats
|
|
713
|
+
return out
|
|
714
|
+
|
|
715
|
+
# Persistence #############################################################
|
|
716
|
+
|
|
717
|
+
def _to_api_table_definition(self) -> api.TableDefinition:
|
|
718
|
+
# TODO(manan): type narrowing?
|
|
719
|
+
pk_name: Optional[str] = None
|
|
720
|
+
if self.has_primary_key():
|
|
721
|
+
pk_name = self.primary_key.name # type: ignore
|
|
722
|
+
|
|
723
|
+
tc_name: Optional[str] = None
|
|
724
|
+
if self.has_time_column():
|
|
725
|
+
tc_name = self.time_column.name # type: ignore
|
|
726
|
+
|
|
727
|
+
etc_name: Optional[str] = None
|
|
728
|
+
if self.has_end_time_column():
|
|
729
|
+
etc_name = self.end_time_column.name # type: ignore
|
|
730
|
+
|
|
731
|
+
return api.TableDefinition(
|
|
732
|
+
cols=[
|
|
733
|
+
api.Column(col.name, col.stype, col.dtype,
|
|
734
|
+
col.timestamp_format) for col in self.columns
|
|
735
|
+
],
|
|
736
|
+
source_table=self.source_table._to_api_source_table(),
|
|
737
|
+
pkey=pk_name,
|
|
738
|
+
time_col=tc_name,
|
|
739
|
+
end_time_col=etc_name,
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
@staticmethod
|
|
743
|
+
def _from_api_table_definition(
|
|
744
|
+
table_definition: api.TableDefinition) -> 'Table':
|
|
745
|
+
return Table(
|
|
746
|
+
source_table=SourceTable._from_api_table_definition(
|
|
747
|
+
table_definition),
|
|
748
|
+
columns=[
|
|
749
|
+
Column(col.name, col.stype, col.dtype, col.timestamp_format)
|
|
750
|
+
for col in table_definition.cols
|
|
751
|
+
],
|
|
752
|
+
primary_key=table_definition.pkey,
|
|
753
|
+
time_column=table_definition.time_col,
|
|
754
|
+
end_time_column=table_definition.end_time_col,
|
|
755
|
+
)
|
|
756
|
+
|
|
757
|
+
def save(self, name: Optional[str] = None) -> Union[TableID, str]:
|
|
758
|
+
r"""Associates this table with a unique name, that can later be
|
|
759
|
+
used to fetch the table either in the Kumo UI or in the Kumo SDK
|
|
760
|
+
with method :meth:`~kumoai.Table.load`.
|
|
761
|
+
|
|
762
|
+
Args:
|
|
763
|
+
name: The name to associate with this table definition. If the
|
|
764
|
+
name is already associated with another table, that table will
|
|
765
|
+
be overridden.
|
|
766
|
+
|
|
767
|
+
Example:
|
|
768
|
+
>>> import kumoai
|
|
769
|
+
>>> table = kumoai.Table(...) # doctest: +SKIP
|
|
770
|
+
>>> unique_id = table.save() # doctest: +SKIP
|
|
771
|
+
>>> loaded = kumoai.Table.load(unique_id) # doctest: +SKIP
|
|
772
|
+
>>> name = table.save("name") # doctest: +SKIP
|
|
773
|
+
>>> loaded = kumoai.Table.load("name") # doctest: +SKIP
|
|
774
|
+
"""
|
|
775
|
+
self.validate(verbose=False)
|
|
776
|
+
template_resource = (global_state.client.table_api.get_table_if_exists(
|
|
777
|
+
table_id_or_name=name)) if name else None
|
|
778
|
+
|
|
779
|
+
if template_resource is not None:
|
|
780
|
+
config = self._from_api_table_definition(template_resource.table)
|
|
781
|
+
logger.warning(
|
|
782
|
+
("Table template %s already exists, with configuration %s. "
|
|
783
|
+
"This template will be overridden with configuration %s."),
|
|
784
|
+
name, str(config), str(self))
|
|
785
|
+
|
|
786
|
+
# TODO(manan): fix
|
|
787
|
+
_id = global_state.client.table_api.create_table(
|
|
788
|
+
table_def=self._to_api_table_definition(),
|
|
789
|
+
name_alias=name,
|
|
790
|
+
force_rename=True if name else False,
|
|
791
|
+
)
|
|
792
|
+
return f"table-{_id.split('-', maxsplit=1)[1]}"
|
|
793
|
+
|
|
794
|
+
@classmethod
|
|
795
|
+
def load(cls, table_id_or_template: str) -> 'Table':
|
|
796
|
+
r"""Loads a table from either a table ID or a named template. Returns a
|
|
797
|
+
:class:`Table` object that contains the loaded table along with its
|
|
798
|
+
columns, etc.
|
|
799
|
+
"""
|
|
800
|
+
api = global_state.client.table_api
|
|
801
|
+
res = api.get_table_if_exists(table_id_or_template)
|
|
802
|
+
if not res:
|
|
803
|
+
raise ValueError(f"Table {table_id_or_template} was not found.")
|
|
804
|
+
out = cls._from_api_table_definition(res.table)
|
|
805
|
+
return out
|
|
806
|
+
|
|
807
|
+
# Class properties ########################################################
|
|
808
|
+
|
|
809
|
+
def __hash__(self) -> int:
|
|
810
|
+
return hash(
|
|
811
|
+
tuple(self.columns +
|
|
812
|
+
[self.primary_key, self.time_column, self.end_time_column]))
|
|
813
|
+
|
|
814
|
+
def __contains__(self, name: str) -> bool:
|
|
815
|
+
return self.has_column(name)
|
|
816
|
+
|
|
817
|
+
def __getitem__(self, name: str) -> Column:
|
|
818
|
+
return self.column(name)
|
|
819
|
+
|
|
820
|
+
def __delitem__(self, name: str) -> None:
|
|
821
|
+
self.remove_column(name)
|
|
822
|
+
|
|
823
|
+
def __repr__(self) -> str:
|
|
824
|
+
col_names = str(list(self._columns.keys())).replace("'", "")
|
|
825
|
+
pkey_name = (self.primary_key.name
|
|
826
|
+
if self.primary_key is not None else "None")
|
|
827
|
+
t_name = (self.time_column.name
|
|
828
|
+
if self.time_column is not None else "None")
|
|
829
|
+
et_name = (self.end_time_column.name
|
|
830
|
+
if self.end_time_column is not None else "None")
|
|
831
|
+
return (f'{self.__class__.__name__}(\n'
|
|
832
|
+
f' source_name={self.source_name},\n'
|
|
833
|
+
f' data_source={self.source_table.connector.name},\n'
|
|
834
|
+
f' columns={col_names},\n'
|
|
835
|
+
f' primary_key={pkey_name},\n'
|
|
836
|
+
f' time_column={t_name},\n'
|
|
837
|
+
f' end_time_column={et_name},\n'
|
|
838
|
+
f')')
|