PyPI - kumoai - Versions diffs - 2.14.0.dev202512151351__cp313-cp313-macosx_11_0_arm64.whl → 2.14.0.dev202512211732__cp313-cp313-macosx_11_0_arm64.whl - Mend

kumoai 2.14.0.dev202512151351__cp313-cp313-macosx_11_0_arm64.whl → 2.14.0.dev202512211732__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

kumoai/_version.py +1 -1
kumoai/experimental/rfm/__init__.py +33 -8
kumoai/experimental/rfm/authenticate.py +3 -4
kumoai/experimental/rfm/backend/local/graph_store.py +25 -25
kumoai/experimental/rfm/backend/local/table.py +16 -21
kumoai/experimental/rfm/backend/snow/sampler.py +22 -34
kumoai/experimental/rfm/backend/snow/table.py +67 -33
kumoai/experimental/rfm/backend/sqlite/__init__.py +2 -2
kumoai/experimental/rfm/backend/sqlite/sampler.py +21 -26
kumoai/experimental/rfm/backend/sqlite/table.py +54 -26
kumoai/experimental/rfm/base/__init__.py +8 -0
kumoai/experimental/rfm/base/column.py +14 -12
kumoai/experimental/rfm/base/column_expression.py +50 -0
kumoai/experimental/rfm/base/sql_sampler.py +31 -3
kumoai/experimental/rfm/base/sql_table.py +229 -0
kumoai/experimental/rfm/base/table.py +162 -143
kumoai/experimental/rfm/graph.py +242 -95
kumoai/experimental/rfm/infer/__init__.py +6 -4
kumoai/experimental/rfm/infer/dtype.py +3 -3
kumoai/experimental/rfm/infer/pkey.py +4 -2
kumoai/experimental/rfm/infer/stype.py +35 -0
kumoai/experimental/rfm/infer/time_col.py +1 -2
kumoai/experimental/rfm/pquery/executor.py +27 -27
kumoai/experimental/rfm/pquery/pandas_executor.py +29 -31
kumoai/experimental/rfm/rfm.py +86 -80
kumoai/experimental/rfm/sagemaker.py +4 -4
kumoai/utils/__init__.py +1 -2
kumoai/utils/progress_logger.py +178 -12
{kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/METADATA +2 -1
{kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/RECORD +33 -30
{kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/WHEEL +0 -0
{kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/licenses/LICENSE +0 -0
{kumoai-2.14.0.dev202512151351.dist-info → kumoai-2.14.0.dev202512211732.dist-info}/top_level.txt +0 -0

kumoai/experimental/rfm/backend/sqlite/sampler.py CHANGED Viewed

@@ -8,10 +8,9 @@ import pyarrow as pa
 from kumoapi.pquery import ValidatedPredictiveQuery
 from kumoapi.typing import Stype
-from kumoai.experimental.rfm.backend.sqlite import SQLiteTable
 from kumoai.experimental.rfm.base import SQLSampler
 from kumoai.experimental.rfm.pquery import PQueryPandasExecutor
-from kumoai.utils import InteractiveProgressLogger, ProgressLogger, quote_ident
+from kumoai.utils import ProgressLogger, quote_ident
 if TYPE_CHECKING:
     from kumoai.experimental.rfm import Graph
@@ -26,10 +25,6 @@ class SQLiteSampler(SQLSampler):
     ) -> None:
         super().__init__(graph=graph, verbose=verbose)
-        for table in graph.tables.values():
-            assert isinstance(table, SQLiteTable)
-            self._connection = table._connection
         if optimize:
             with self._connection.cursor() as cursor:
                 cursor.execute("PRAGMA temp_store = MEMORY")
@@ -54,7 +49,7 @@ class SQLiteSampler(SQLSampler):
         with self._connection.cursor() as cursor:
             for table_name in list(index_dict.keys()):
                 indices = index_dict[table_name]
-                sql = f"PRAGMA index_list({quote_ident(table_name)})"
+                sql = f"PRAGMA index_list({self.fqn_dict[table_name]})"
                 cursor.execute(sql)
                 for _, index_name, *_ in cursor.fetchall():
                     sql = f"PRAGMA index_info({quote_ident(index_name)})"
@@ -72,22 +67,22 @@ class SQLiteSampler(SQLSampler):
         if optimize and len(index_dict) > 0:
             if not isinstance(verbose, ProgressLogger):
-                verbose = InteractiveProgressLogger(
-                    "Optimizing SQLite database",
+                verbose = ProgressLogger.default(
+                    msg="Optimizing SQLite database",
                     verbose=verbose,
                 )
-            with verbose as logger:
-                with self._connection.cursor() as cursor:
-                    for table_name, indices in index_dict.items():
-                        for index in indices:
-                            name = f"kumo_index_{table_name}_{'_'.join(index)}"
-                            columns = ', '.join(quote_ident(v) for v in index)
-                            columns += ' DESC' if len(index) > 1 else ''
-                            sql = (f"CREATE INDEX IF NOT EXISTS {name}\n"
-                                   f"ON {quote_ident(table_name)}({columns})")
-                            cursor.execute(sql)
-                    self._connection.commit()
+            with verbose as logger, self._connection.cursor() as cursor:
+                for table_name, indices in index_dict.items():
+                    for index in indices:
+                        name = f"kumo_index_{table_name}_{'_'.join(index)}"
+                        name = quote_ident(name)
+                        columns = ', '.join(quote_ident(v) for v in index)
+                        columns += ' DESC' if len(index) > 1 else ''
+                        sql = (f"CREATE INDEX IF NOT EXISTS {name}\n"
+                               f"ON {self.fqn_dict[table_name]}({columns})")
+                        cursor.execute(sql)
+                self._connection.commit()
                 logger.log(f"Created {index_repr} in {table_repr}")
         elif len(index_dict) > 0:
@@ -108,7 +103,7 @@ class SQLiteSampler(SQLSampler):
                       f"  ? as table_name,\n"
                       f"  MIN({quote_ident(time_column)}) as min_date,\n"
                       f"  MAX({quote_ident(time_column)}) as max_date\n"
-                      f"FROM {quote_ident(table_name)}")
+                      f"FROM {self.fqn_dict[table_name]}")
             selects.append(select)
         sql = "\nUNION ALL\n".join(selects)
@@ -142,7 +137,7 @@ class SQLiteSampler(SQLSampler):
         # TODO Make this query more efficient - it does full table scan.
         sql = (f"SELECT {', '.join(quote_ident(col) for col in columns)}\n"
-               f"FROM {quote_ident(table_name)}")
+               f"FROM {self.fqn_dict[table_name]}")
         if len(filters) > 0:
             sql += f"\nWHERE{' AND'.join(filters)}"
         sql += f"\nORDER BY RANDOM() LIMIT {num_rows}"
@@ -207,15 +202,15 @@ class SQLiteSampler(SQLSampler):
             sql = (f"SELECT tmp.rowid - 1 as __batch__, "
                    f"{', '.join('ent.' + quote_ident(c) for c in columns)}\n"
                    f"FROM {quote_ident(tmp_name)} tmp\n"
-                   f"JOIN {quote_ident(table_name)} ent\n"
+                   f"JOIN {self.fqn_dict[table_name]} ent\n"
                    f"  ON ent.{quote_ident(pkey_name)} = tmp.id")
         else:
             sql = (f"SELECT tmp.rowid - 1 as __batch__, "
                    f"{', '.join('ent.' + quote_ident(c) for c in columns)}\n"
                    f"FROM {quote_ident(tmp_name)} tmp\n"
-                   f"JOIN {quote_ident(table_name)} ent\n"
+                   f"JOIN {self.fqn_dict[table_name]} ent\n"
                    f"  ON ent.rowid = (\n"
-                   f"    SELECT rowid FROM {quote_ident(table_name)}\n"
+                   f"    SELECT rowid FROM {self.fqn_dict[table_name]}\n"
                    f"    WHERE {quote_ident(pkey_name)} == tmp.id\n"
                    f"    LIMIT 1\n"
                    f")")
@@ -258,7 +253,7 @@ class SQLiteSampler(SQLSampler):
         sql = (f"SELECT tmp.rowid - 1 as __batch__, "
                f"{', '.join('fact.' + quote_ident(col) for col in columns)}\n"
                f"FROM {quote_ident(tmp_name)} tmp\n"
-               f"JOIN {quote_ident(table_name)} fact\n"
+               f"JOIN {self.fqn_dict[table_name]} fact\n"
                f"  ON fact.{quote_ident(fkey)} = tmp.id\n"
                f" AND fact.{quote_ident(time_column)} <= tmp.end")
         if min_offset is not None:

kumoai/experimental/rfm/backend/sqlite/table.py CHANGED Viewed

@@ -1,28 +1,35 @@
 import re
 import warnings
-from typing import List, Optional, Sequence, cast
+from collections.abc import Sequence
+from typing import cast
 import pandas as pd
+from kumoapi.model_plan import MissingType
 from kumoapi.typing import Dtype
 from kumoai.experimental.rfm.backend.sqlite import Connection
 from kumoai.experimental.rfm.base import (
+    ColumnExpressionSpec,
+    ColumnExpressionType,
     DataBackend,
     SourceColumn,
     SourceForeignKey,
-    Table,
+    SQLTable,
 )
 from kumoai.experimental.rfm.infer import infer_dtype
 from kumoai.utils import quote_ident
-class SQLiteTable(Table):
+class SQLiteTable(SQLTable):
     r"""A table backed by a :class:`sqlite` database.
     Args:
         connection: The connection to a :class:`sqlite` database.
-        name: The name of this table.
-        columns: The selected columns of this table.
+        name: The logical name of this table.
+        source_name: The physical name of this table in the database. If set to
+            ``None``, ``name`` is being used.
+        columns: The selected physical columns of this table.
+        column_expressions: The logical columns of this table.
         primary_key: The name of the primary key of this table, if it exists.
         time_column: The name of the time column of this table, if it exists.
         end_time_column: The name of the end time column of this table, if it
@@ -32,17 +39,21 @@ class SQLiteTable(Table):
         self,
         connection: Connection,
         name: str,
-        columns: Optional[Sequence[str]] = None,
-        primary_key: Optional[str] = None,
-        time_column: Optional[str] = None,
-        end_time_column: Optional[str] = None,
+        source_name: str | None = None,
+        columns: Sequence[str] | None = None,
+        column_expressions: Sequence[ColumnExpressionType] | None = None,
+        primary_key: MissingType | str | None = MissingType.VALUE,
+        time_column: str | None = None,
+        end_time_column: str | None = None,
     ) -> None:
         self._connection = connection
         super().__init__(
             name=name,
+            source_name=source_name,
             columns=columns,
+            column_expressions=column_expressions,
             primary_key=primary_key,
             time_column=time_column,
             end_time_column=end_time_column,
@@ -52,18 +63,19 @@ class SQLiteTable(Table):
     def backend(self) -> DataBackend:
         return cast(DataBackend, DataBackend.SQLITE)
-    def _get_source_columns(self) -> List[SourceColumn]:
-        source_columns: List[SourceColumn] = []
+    def _get_source_columns(self) -> list[SourceColumn]:
+        source_columns: list[SourceColumn] = []
         with self._connection.cursor() as cursor:
-            sql = f"PRAGMA table_info({quote_ident(self.name)})"
+            sql = f"PRAGMA table_info({self.fqn})"
             cursor.execute(sql)
             columns = cursor.fetchall()
             if len(columns) == 0:
-                raise ValueError(f"Table '{self.name}' does not exist")
+                raise ValueError(f"Table '{self._source_name}' does not exist "
+                                 f"in the SQLite database")
             unique_keys: set[str] = set()
-            sql = f"PRAGMA index_list({quote_ident(self.name)})"
+            sql = f"PRAGMA index_list({self.fqn})"
             cursor.execute(sql)
             for _, index_name, is_unique, *_ in cursor.fetchall():
                 if bool(is_unique):
@@ -83,15 +95,17 @@ class SQLiteTable(Table):
                 elif re.search('REAL|FLOA|DOUB', type):
                     dtype = Dtype.float
                 else:  # NUMERIC affinity.
-                    ser = self._sample_df[column]
+                    ser = self._source_sample_df[column]
                     try:
                         dtype = infer_dtype(ser)
                     except Exception:
-                        warnings.warn(
-                            f"Data type inference for column '{column}' in "
-                            f"table '{self.name}' failed. Consider changing "
-                            f"the data type of the column to use it within "
-                            f"this table.")
+                        warnings.warn(f"Encountered unsupported data type "
+                                      f"'{ser.dtype}' with source data type "
+                                      f"'{type}' for column '{column}' in "
+                                      f"table '{self.name}'. If possible, "
+                                      f"change the data type of the column in "
+                                      f"your SQLite database to use it within "
+                                      f"this table.")
                         continue
                 source_column = SourceColumn(
@@ -105,22 +119,36 @@ class SQLiteTable(Table):
         return source_columns
-    def _get_source_foreign_keys(self) -> List[SourceForeignKey]:
-        source_fkeys: List[SourceForeignKey] = []
+    def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
+        source_fkeys: list[SourceForeignKey] = []
         with self._connection.cursor() as cursor:
-            sql = f"PRAGMA foreign_key_list({quote_ident(self.name)})"
+            sql = f"PRAGMA foreign_key_list({self.fqn})"
             cursor.execute(sql)
             for _, _, dst_table, fkey, pkey, *_ in cursor.fetchall():
                 source_fkeys.append(SourceForeignKey(fkey, dst_table, pkey))
         return source_fkeys
-    def _get_sample_df(self) -> pd.DataFrame:
+    def _get_source_sample_df(self) -> pd.DataFrame:
         with self._connection.cursor() as cursor:
-            sql = (f"SELECT * FROM {quote_ident(self.name)} "
+            sql = (f"SELECT * FROM {self.fqn} "
                    f"ORDER BY rowid LIMIT 1000")
             cursor.execute(sql)
             table = cursor.fetch_arrow_table()
             return table.to_pandas(types_mapper=pd.ArrowDtype)
-    def _get_num_rows(self) -> Optional[int]:
+    def _get_num_rows(self) -> int | None:
         return None
+    def _get_expression_sample_df(
+        self,
+        specs: Sequence[ColumnExpressionSpec],
+    ) -> pd.DataFrame:
+        with self._connection.cursor() as cursor:
+            columns = [
+                f"{spec.expr} AS {quote_ident(spec.name)}" for spec in specs
+            ]
+            sql = (f"SELECT {', '.join(columns)} FROM {self.fqn} "
+                   f"ORDER BY rowid LIMIT 1000")
+            cursor.execute(sql)
+            table = cursor.fetch_arrow_table()
+            return table.to_pandas(types_mapper=pd.ArrowDtype)

kumoai/experimental/rfm/base/__init__.py CHANGED Viewed

@@ -9,7 +9,11 @@ class DataBackend(StrEnum):
 from .source import SourceColumn, SourceForeignKey  # noqa: E402
 from .column import Column  # noqa: E402
+from .column_expression import ColumnExpressionSpec  # noqa: E402
+from .column_expression import ColumnExpressionType  # noqa: E402
+from .column_expression import ColumnExpression  # noqa: E402
 from .table import Table  # noqa: E402
+from .sql_table import SQLTable  # noqa: E402
 from .sampler import SamplerOutput, Sampler  # noqa: E402
 from .sql_sampler import SQLSampler  # noqa: E402
@@ -18,7 +22,11 @@ __all__ = [
     'SourceColumn',
     'SourceForeignKey',
     'Column',
+    'ColumnExpressionSpec',
+    'ColumnExpressionType',
+    'ColumnExpression',
     'Table',
+    'SQLTable',
     'SamplerOutput',
     'Sampler',
     'SQLSampler',

kumoai/experimental/rfm/base/column.py CHANGED Viewed

@@ -8,20 +8,14 @@ from kumoapi.typing import Dtype, Stype
 class Column:
     stype: Stype
-    def __init__(
-        self,
-        name: str,
-        dtype: Dtype,
-        stype: Stype,
-        is_primary_key: bool = False,
-        is_time_column: bool = False,
-        is_end_time_column: bool = False,
-    ) -> None:
+    def __init__(self, name: str, stype: Stype, dtype: Dtype) -> None:
         self._name = name
         self._dtype = Dtype(dtype)
-        self._is_primary_key = is_primary_key
-        self._is_time_column = is_time_column
-        self._is_end_time_column = is_end_time_column
+        self._is_primary_key = False
+        self._is_time_column = False
+        self._is_end_time_column = False
         self.stype = Stype(stype)
     @property
@@ -32,6 +26,14 @@ class Column:
     def dtype(self) -> Dtype:
         return self._dtype
+    @property
+    def is_physical(self) -> bool:
+        return True
+    @property
+    def is_logical(self) -> bool:
+        return not self.is_physical
     def __setattr__(self, key: str, val: Any) -> None:
         if key == 'stype':
             if isinstance(val, str):

kumoai/experimental/rfm/base/column_expression.py ADDED Viewed

@@ -0,0 +1,50 @@
+from dataclasses import dataclass
+from typing import Any, TypeAlias
+from kumoapi.typing import Dtype, Stype
+from kumoai.experimental.rfm.base import Column
+from kumoai.mixin import CastMixin
+@dataclass(frozen=True)
+class ColumnExpressionSpec(CastMixin):
+    name: str
+    expr: str
+    dtype: Dtype | None = None
+ColumnExpressionType: TypeAlias = ColumnExpressionSpec | dict[str, Any]
+@dataclass(init=False, repr=False, eq=False)
+class ColumnExpression(Column):
+    def __init__(
+        self,
+        name: str,
+        expr: str,
+        stype: Stype,
+        dtype: Dtype,
+    ) -> None:
+        super().__init__(name=name, stype=stype, dtype=dtype)
+        self._expr = expr
+    @property
+    def expr(self) -> str:
+        return self._expr
+    @property
+    def is_physical(self) -> bool:
+        return False
+    def __hash__(self) -> int:
+        return hash((self.name, self.expr, self.stype, self.dtype))
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, ColumnExpression):
+            return False
+        return hash(self) == hash(other)
+    def __repr__(self) -> str:
+        return (f'{self.__class__.__name__}(name={self.name}, '
+                f'expr={self.expr}, stype={self.stype}, dtype={self.dtype})')

kumoai/experimental/rfm/base/sql_sampler.py CHANGED Viewed

@@ -1,13 +1,37 @@
 from abc import abstractmethod
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 import numpy as np
 import pandas as pd
-from kumoai.experimental.rfm.base import Sampler, SamplerOutput
+from kumoai.experimental.rfm.base import Sampler, SamplerOutput, SQLTable
+from kumoai.utils import ProgressLogger
+if TYPE_CHECKING:
+    from kumoai.experimental.rfm import Graph
 class SQLSampler(Sampler):
+    def __init__(
+        self,
+        graph: 'Graph',
+        verbose: bool | ProgressLogger = True,
+    ) -> None:
+        super().__init__(graph=graph, verbose=verbose)
+        self._fqn_dict: dict[str, str] = {}
+        for table in graph.tables.values():
+            assert isinstance(table, SQLTable)
+            self._connection = table._connection
+            self._fqn_dict[table.name] = table.fqn
+    @property
+    def fqn_dict(self) -> dict[str, str]:
+        r"""The fully-qualified quoted source name for all table names in the
+        graph.
+        """
+        return self._fqn_dict
     def _sample_subgraph(
         self,
         entity_table_name: str,
@@ -23,7 +47,11 @@ class SQLSampler(Sampler):
             columns=columns_dict[entity_table_name],
         )
         if len(batch) != len(entity_pkey):
-            raise KeyError("Invalid primary keys")  # TODO
+            mask = np.ones(len(entity_pkey), dtype=bool)
+            mask[batch] = False
+            raise KeyError(f"The primary keys "
+                           f"{entity_pkey.iloc[mask].tolist()} do not exist "
+                           f"in the '{entity_table_name}' table")
         perm = batch.argsort()
         batch = batch[perm]

kumoai/experimental/rfm/base/sql_table.py ADDED Viewed

@@ -0,0 +1,229 @@
+import warnings
+from abc import abstractmethod
+from collections import defaultdict
+from collections.abc import Sequence
+from functools import cached_property
+from typing import Any
+import pandas as pd
+from kumoapi.model_plan import MissingType
+from kumoai.experimental.rfm.base import (
+    ColumnExpression,
+    ColumnExpressionSpec,
+    ColumnExpressionType,
+    SourceForeignKey,
+    Table,
+)
+from kumoai.experimental.rfm.infer import infer_dtype, infer_stype
+from kumoai.utils import quote_ident
+class SQLTable(Table):
+    r"""A :class:`SQLTable` specifies a :class:`Table` backed by a SQL
+    database.
+    Args:
+        name: The logical name of this table.
+        source_name: The physical name of this table in the database. If set to
+            ``None``, ``name`` is being used.
+        columns: The selected physical columns of this table.
+        column_expressions: The logical columns of this table.
+        primary_key: The name of the primary key of this table, if it exists.
+        time_column: The name of the time column of this table, if it exists.
+        end_time_column: The name of the end time column of this table, if it
+            exists.
+    """
+    def __init__(
+        self,
+        name: str,
+        source_name: str | None = None,
+        columns: Sequence[str] | None = None,
+        column_expressions: Sequence[ColumnExpressionType] | None = None,
+        primary_key: MissingType | str | None = MissingType.VALUE,
+        time_column: str | None = None,
+        end_time_column: str | None = None,
+    ) -> None:
+        self._connection: Any
+        self._source_name = source_name or name
+        self._expression_sample_df = pd.DataFrame()
+        super().__init__(
+            name=name,
+            columns=[],
+            primary_key=None,
+            time_column=None,
+            end_time_column=None,
+        )
+        # Add column expressions with highest priority:
+        self.add_column_expressions(column_expressions or [])
+        if columns is None:
+            for column_name in self._source_column_dict.keys():
+                if column_name not in self:
+                    self.add_column(column_name)
+        else:
+            for column_name in columns:
+                self.add_column(column_name)
+        if isinstance(primary_key, MissingType):
+            # Inference from source column metadata:
+            if '_source_column_dict' in self.__dict__:
+                primary_key = self._source_primary_key
+                if (primary_key is not None and primary_key in self
+                        and self[primary_key].is_physical):
+                    self.primary_key = primary_key
+        elif primary_key is not None:
+            if primary_key not in self:
+                self.add_column(primary_key)
+            self.primary_key = primary_key
+        if time_column is not None:
+            if time_column not in self:
+                self.add_column(time_column)
+            self.time_column = time_column
+        if end_time_column is not None:
+            if end_time_column not in self:
+                self.add_column(end_time_column)
+            self.end_time_column = end_time_column
+    @property
+    def fqn(self) -> str:
+        r"""The fully-qualified quoted source table name."""
+        return quote_ident(self._source_name)
+    @cached_property
+    def _source_foreign_key_dict(self) -> dict[str, SourceForeignKey]:
+        fkeys = self._get_source_foreign_keys()
+        # NOTE Drop all keys that link to multiple keys in the same table since
+        # we don't support composite keys yet:
+        table_pkeys: dict[str, set[str]] = defaultdict(set)
+        for fkey in fkeys:
+            table_pkeys[fkey.dst_table].add(fkey.primary_key)
+        return {
+            fkey.name: fkey
+            for fkey in fkeys if len(table_pkeys[fkey.dst_table]) == 1
+        }
+    def _sample_current_df(self, columns: Sequence[str]) -> pd.DataFrame:
+        expr_columns: list[str] = []
+        source_columns: list[str] = []
+        for column_name in columns:
+            column = self[column_name]
+            if isinstance(column, ColumnExpression):
+                expr_columns.append(column_name)
+            else:
+                source_columns.append(column_name)
+        dfs: list[pd.DataFrame] = []
+        if len(expr_columns) > 0:
+            dfs.append(self._expression_sample_df[expr_columns])
+        if len(source_columns) > 0:
+            dfs.append(self._source_sample_df[source_columns])
+        if len(dfs) == 0:
+            return pd.DataFrame(index=range(1000))
+        if len(dfs) == 1:
+            return dfs[0]
+        return pd.concat(dfs, axis=1, ignore_index=True)
+    # Column ##################################################################
+    def add_column_expressions(
+        self,
+        columns: Sequence[ColumnExpressionType],
+    ) -> None:
+        r"""Adds a set of column expressions to this table.
+        Args:
+            columns: The set of column expressions.
+        Raises:
+            KeyError: If a column with the same name already exists in the
+                table.
+        """
+        if len(columns) == 0:
+            return
+        column_expression_specs = [
+            spec for column in columns
+            if (spec := ColumnExpressionSpec._cast(column))
+        ]
+        df = self._get_expression_sample_df(column_expression_specs)
+        for spec in column_expression_specs:
+            if spec.name in self:
+                raise KeyError(f"Column '{spec.name}' already exists in table "
+                               f"'{self.name}'")
+            dtype = spec.dtype
+            if dtype is None:
+                ser = df[spec.name]
+                try:
+                    dtype = infer_dtype(ser)
+                except Exception:
+                    warnings.warn(f"Encountered unsupported data type "
+                                  f"'{ser.dtype}' for column expression "
+                                  f"'{spec.name}' in table '{self.name}'."
+                                  f"Please manually specify the data type for "
+                                  f"this column expression to use it within "
+                                  f"this table, or remove it to suppress "
+                                  f"this warning.")
+                    continue
+            ser = df[spec.name]
+            try:
+                stype = infer_stype(ser, spec.name, dtype)
+            except Exception as e:
+                raise RuntimeError(f"Could not obtain semantic type for "
+                                   f"column expression '{spec.name}' with "
+                                   f"data type '{dtype}' in table "
+                                   f"'{self.name}'. Change the data type of "
+                                   f"the column expression or remove it from "
+                                   f"this table.") from e
+            self._columns[spec.name] = ColumnExpression(
+                name=spec.name,
+                expr=spec.expr,
+                stype=stype,
+                dtype=dtype,
+            )
+            with warnings.catch_warnings():
+                warnings.simplefilter('ignore', pd.errors.PerformanceWarning)
+                self._expression_sample_df[spec.name] = ser
+    def add_column_expression(
+        self,
+        column: ColumnExpressionType,
+    ) -> ColumnExpression:
+        r"""Adds a column expression to this table.
+        Args:
+            column: The column expression.
+        Raises:
+            KeyError: If a column with the same name already exists in the
+                table.
+        """
+        spec = ColumnExpressionSpec._cast(column)
+        assert spec is not None
+        self.add_column_expressions([spec])
+        column_expression = self.column(spec.name)
+        assert isinstance(column_expression, ColumnExpression)
+        return column_expression
+    # Abstract Methods ########################################################
+    @abstractmethod
+    def _get_source_foreign_keys(self) -> list[SourceForeignKey]:
+        pass
+    @abstractmethod
+    def _get_expression_sample_df(
+        self,
+        specs: Sequence[ColumnExpressionSpec],
+    ) -> pd.DataFrame:
+        pass