PyPI - kumoai - Versions diffs - 2.15.0.dev202601121731__cp313-cp313-macosx_11_0_arm64.whl → 2.15.0.dev202601181732__cp313-cp313-macosx_11_0_arm64.whl - Mend

kumoai 2.15.0.dev202601121731__cp313-cp313-macosx_11_0_arm64.whl → 2.15.0.dev202601181732__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

kumoai/__init__.py CHANGED Viewed

@@ -277,7 +277,7 @@ __all__ = [
 ]
-def in_snowflake_notebook() -> bool:
+def in_streamlit_notebook() -> bool:
     try:
         from snowflake.snowpark.context import get_active_session
         import streamlit  # noqa: F401
@@ -287,9 +287,7 @@ def in_snowflake_notebook() -> bool:
         return False
-def in_notebook() -> bool:
-    if in_snowflake_notebook():
-        return True
+def in_jupyter_notebook() -> bool:
     try:
         from IPython import get_ipython
         shell = get_ipython()
@@ -298,3 +296,16 @@ def in_notebook() -> bool:
         return shell.__class__.__name__ == 'ZMQInteractiveShell'
     except Exception:
         return False
+def in_vnext_notebook() -> bool:
+    try:
+        from snowflake.snowpark.context import get_active_session
+        get_active_session()
+        return in_jupyter_notebook()
+    except Exception:
+        return False
+def in_notebook() -> bool:
+    return in_streamlit_notebook() or in_jupyter_notebook()

kumoai/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.15.0.~~dev202601121731~~'
1	+ __version__ = '2.15.0.dev202601181732'

kumoai/experimental/rfm/backend/snow/sampler.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
+import math
 from collections.abc import Iterator
 from contextlib import contextmanager
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 import numpy as np
 import pandas as pd
@@ -11,7 +12,7 @@ from kumoapi.pquery import ValidatedPredictiveQuery
 from kumoai.experimental.rfm.backend.snow import Connection, SnowTable
 from kumoai.experimental.rfm.base import SQLSampler, Table
 from kumoai.experimental.rfm.pquery import PQueryPandasExecutor
-from kumoai.utils import ProgressLogger
+from kumoai.utils import ProgressLogger, quote_ident
 if TYPE_CHECKING:
     from kumoai.experimental.rfm import Graph
@@ -37,6 +38,15 @@ class SnowSampler(SQLSampler):
             assert isinstance(table, SnowTable)
             self._connection = table._connection
+        self._num_rows_dict: dict[str, int] = {
+            table.name: cast(int, table._num_rows)
+            for table in graph.tables.values()
+        }
+    @property
+    def num_rows_dict(self) -> dict[str, int]:
+        return self._num_rows_dict
     def _get_min_max_time_dict(
         self,
         table_names: list[str],
@@ -45,8 +55,9 @@ class SnowSampler(SQLSampler):
         for table_name in table_names:
             column = self.time_column_dict[table_name]
             column_ref = self.table_column_ref_dict[table_name][column]
+            ident = quote_ident(table_name, char="'")
             select = (f"SELECT\n"
-                      f"  ? as table_name,\n"
+                      f"  {ident} as table_name,\n"
                       f"  MIN({column_ref}) as min_date,\n"
                       f"  MAX({column_ref}) as max_date\n"
                       f"FROM {self.source_name_dict[table_name]}")
@@ -54,14 +65,13 @@ class SnowSampler(SQLSampler):
         sql = "\nUNION ALL\n".join(selects)
         out_dict: dict[str, tuple[pd.Timestamp, pd.Timestamp]] = {}
-        with paramstyle(self._connection), self._connection.cursor() as cursor:
-            cursor.execute(sql, table_names)
-            rows = cursor.fetchall()
-        for table_name, _min, _max in rows:
-            out_dict[table_name] = (
-                pd.Timestamp.max if _min is None else pd.Timestamp(_min),
-                pd.Timestamp.min if _max is None else pd.Timestamp(_max),
-            )
+        with self._connection.cursor() as cursor:
+            cursor.execute(sql)
+            for table_name, _min, _max in cursor.fetchall():
+                out_dict[table_name] = (
+                    pd.Timestamp.max if _min is None else pd.Timestamp(_min),
+                    pd.Timestamp.min if _max is None else pd.Timestamp(_max),
+                )
         return out_dict
@@ -239,9 +249,30 @@ class SnowSampler(SQLSampler):
     ) -> tuple[pd.DataFrame, np.ndarray]:
         time_column = self.time_column_dict.get(table_name)
+        end_time: pd.Series | None = None
+        start_time: pd.Series | None = None
         if time_column is not None and anchor_time is not None:
-            anchor_time = anchor_time.dt.strftime("%Y-%m-%d %H:%M:%S")
-            payload = json.dumps(list(zip(index, anchor_time)))
+            # In order to avoid a full table scan, we limit foreign key
+            # sampling to a certain time range, approximated by the number of
+            # rows, timestamp ranges and `num_neighbors` value.
+            # Downstream, this helps Snowflake to apply partition pruning:
+            dst_table_name = [
+                dst_table
+                for key, dst_table in self.foreign_key_dict[table_name]
+                if key == foreign_key
+            ][0]
+            num_facts = self.num_rows_dict[table_name]
+            num_entities = self.num_rows_dict[dst_table_name]
+            min_time = self.get_min_time([table_name])
+            max_time = self.get_max_time([table_name])
+            freq = num_facts / num_entities
+            freq = freq / max((max_time - min_time).total_seconds(), 1)
+            offset = pd.Timedelta(seconds=math.ceil(5 * num_neighbors / freq))
+            end_time = anchor_time.dt.strftime("%Y-%m-%d %H:%M:%S")
+            start_time = anchor_time - offset
+            start_time = start_time.dt.strftime("%Y-%m-%d %H:%M:%S")
+            payload = json.dumps(list(zip(index, end_time, start_time)))
         else:
             payload = json.dumps(list(zip(index)))
@@ -260,9 +291,10 @@ class SnowSampler(SQLSampler):
             sql += "    f.value[0]::FLOAT as __KUMO_ID__"
         else:
             sql += "    f.value[0]::VARCHAR as __KUMO_ID__"
-        if time_column is not None and anchor_time is not None:
+        if end_time is not None and start_time is not None:
             sql += (",\n"
-                    "    f.value[1]::TIMESTAMP_NTZ as __KUMO_TIME__")
+                    "    f.value[1]::TIMESTAMP_NTZ as __KUMO_END_TIME__,\n"
+                    "    f.value[2]::TIMESTAMP_NTZ as __KUMO_START_TIME__")
         sql += (f"\n"
                 f"  FROM TABLE(FLATTEN(INPUT => PARSE_JSON(?))) f\n"
                 f")\n"
@@ -272,9 +304,13 @@ class SnowSampler(SQLSampler):
                 f"FROM TMP\n"
                 f"JOIN {self.source_name_dict[table_name]}\n"
                 f"  ON {key_ref} = TMP.__KUMO_ID__\n")
-        if time_column is not None and anchor_time is not None:
+        if end_time is not None and start_time is not None:
+            assert time_column is not None
             time_ref = self.table_column_ref_dict[table_name][time_column]
-            sql += f" AND {time_ref} <= TMP.__KUMO_TIME__\n"
+            sql += (f" AND {time_ref} <= TMP.__KUMO_END_TIME__\n"
+                    f" AND {time_ref} > TMP.__KUMO_START_TIME__\n"
+                    f"WHERE {time_ref} <= '{end_time.max()}'\n"
+                    f"  AND {time_ref} > '{start_time.min()}'\n")
         sql += ("QUALIFY ROW_NUMBER() OVER (\n"
                 "  PARTITION BY TMP.__KUMO_BATCH__\n")
         if time_column is not None:

kumoai/experimental/rfm/backend/snow/table.py CHANGED Viewed

@@ -76,21 +76,13 @@ class SnowTable(Table):
     @property
     def source_name(self) -> str:
-        names: list[str] = []
-        if self._database is not None:
-            names.append(self._database)
-        if self._schema is not None:
-            names.append(self._schema)
-        return '.'.join(names + [self._source_name])
+        names = [self._database, self._schema, self._source_name]
+        return '.'.join(names)
     @property
     def _quoted_source_name(self) -> str:
-        names: list[str] = []
-        if self._database is not None:
-            names.append(quote_ident(self._database))
-        if self._schema is not None:
-            names.append(quote_ident(self._schema))
-        return '.'.join(names + [quote_ident(self._source_name)])
+        names = [self._database, self._schema, self._source_name]
+        return '.'.join([quote_ident(name) for name in names])
     @property
     def backend(self) -> DataBackend:
@@ -159,7 +151,18 @@ class SnowTable(Table):
         )
     def _get_num_rows(self) -> int | None:
-        return None
+        with self._connection.cursor() as cursor:
+            quoted_source_name = quote_ident(self._source_name, char="'")
+            sql = (f"SHOW TABLES LIKE {quoted_source_name} "
+                   f"IN SCHEMA {quote_ident(self._database)}."
+                   f"{quote_ident(self._schema)}")
+            cursor.execute(sql)
+            num_rows = cursor.fetchone()[7]
+        if num_rows == 0:
+            raise RuntimeError("Table '{self.source_name}' is empty")
+        return num_rows
     def _get_expr_sample_df(
         self,

kumoai/experimental/rfm/backend/sqlite/sampler.py CHANGED Viewed

@@ -121,8 +121,9 @@ class SQLiteSampler(SQLSampler):
         for table_name in table_names:
             column = self.time_column_dict[table_name]
             column_ref = self.table_column_ref_dict[table_name][column]
+            ident = quote_ident(table_name, char="'")
             select = (f"SELECT\n"
-                      f"  ? as table_name,\n"
+                      f"  {ident} as table_name,\n"
                       f"  MIN({column_ref}) as min_date,\n"
                       f"  MAX({column_ref}) as max_date\n"
                       f"FROM {self.source_name_dict[table_name]}")
@@ -131,12 +132,13 @@ class SQLiteSampler(SQLSampler):
         out_dict: dict[str, tuple[pd.Timestamp, pd.Timestamp]] = {}
         with self._connection.cursor() as cursor:
-            cursor.execute(sql, table_names)
+            cursor.execute(sql)
             for table_name, _min, _max in cursor.fetchall():
                 out_dict[table_name] = (
                     pd.Timestamp.max if _min is None else pd.Timestamp(_min),
                     pd.Timestamp.min if _max is None else pd.Timestamp(_max),
                 )
         return out_dict
     def _sample_entity_table(

kumoai/experimental/rfm/base/utils.py CHANGED Viewed

@@ -4,11 +4,22 @@ import pandas as pd
 import pyarrow as pa
+def is_datetime(ser: pd.Series) -> bool:
+    r"""Check whether a :class:`pandas.Series` holds datetime values."""
+    if isinstance(ser.dtype, pd.ArrowDtype):
+        dtype = ser.dtype.pyarrow_dtype
+        return (pa.types.is_timestamp(dtype) or pa.types.is_date(dtype)
+                or pa.types.is_time(dtype))
+    return pd.api.types.is_datetime64_any_dtype(ser)
 def to_datetime(ser: pd.Series) -> pd.Series:
-    """Converts a :class:`panads.Series` to ``datetime64[ns]`` format."""
-    if (not pd.api.types.is_datetime64_any_dtype(ser)
-            and not (isinstance(ser.dtype, pd.ArrowDtype)
-                     and pa.types.is_timestamp(ser.dtype.pyarrow_dtype))):
+    """Converts a :class:`pandas.Series` to ``datetime64[ns]`` format."""
+    if isinstance(ser.dtype, pd.ArrowDtype):
+        ser = pd.Series(ser.to_numpy(), index=ser.index, name=ser.name)
+    if not pd.api.types.is_datetime64_any_dtype(ser):
         with warnings.catch_warnings():
             warnings.filterwarnings(
                 'ignore',
@@ -16,9 +27,7 @@ def to_datetime(ser: pd.Series) -> pd.Series:
             )
             ser = pd.to_datetime(ser, errors='coerce')
-    if (isinstance(ser.dtype, pd.DatetimeTZDtype)
-            or (isinstance(ser.dtype, pd.ArrowDtype)
-                and ser.dtype.pyarrow_dtype.tz is not None)):
+    if isinstance(ser.dtype, pd.DatetimeTZDtype):
         ser = ser.dt.tz_localize(None)
     if ser.dtype != 'datetime64[ns]':

kumoai/experimental/rfm/graph.py CHANGED Viewed

@@ -17,8 +17,9 @@ from kumoapi.table import TableDefinition
 from kumoapi.typing import Stype
 from typing_extensions import Self
-from kumoai import in_notebook, in_snowflake_notebook
+from kumoai import in_jupyter_notebook, in_streamlit_notebook
 from kumoai.experimental.rfm.base import ColumnSpec, DataBackend, Table
+from kumoai.experimental.rfm.infer import infer_time_column
 from kumoai.graph import Edge
 from kumoai.mixin import CastMixin
 from kumoai.utils import display
@@ -415,8 +416,9 @@ class Graph:
         assert isinstance(connection, Connection)
         with connection.cursor() as cursor:
-            cursor.execute(f"SELECT SYSTEM$READ_YAML_FROM_SEMANTIC_VIEW("
-                           f"'{semantic_view_name}')")
+            sql = (f"SELECT SYSTEM$READ_YAML_FROM_SEMANTIC_VIEW("
+                   f"'{semantic_view_name}')")
+            cursor.execute(sql)
             cfg = yaml.safe_load(cursor.fetchone()[0])
         graph = cls(tables=[])
@@ -492,7 +494,17 @@ class Graph:
             )
             # TODO Add a way to register time columns without heuristic usage.
-            table.infer_time_column(verbose=False)
+            time_candidates = [
+                column_cfg['name']
+                for column_cfg in table_cfg.get('time_dimensions', [])
+                if table.has_column(column_cfg['name'])
+                and table[column_cfg['name']].stype == Stype.timestamp
+            ]
+            if time_column := infer_time_column(
+                    df=table._get_sample_df(),
+                    candidates=time_candidates,
+            ):
+                table.time_column = time_column
             graph.add_table(table)
@@ -1071,7 +1083,7 @@ class Graph:
             raise ImportError("The 'graphviz' package is required for "
                               "visualization") from e
-        if not in_snowflake_notebook() and not has_graphviz_executables():
+        if not in_streamlit_notebook() and not has_graphviz_executables():
             raise RuntimeError("Could not visualize graph as 'graphviz' "
                                "executables are not installed. These "
                                "dependencies are required in addition to the "
@@ -1161,10 +1173,10 @@ class Graph:
             graph.render(path, cleanup=True)
         elif isinstance(path, io.BytesIO):
             path.write(graph.pipe())
-        elif in_snowflake_notebook():
+        elif in_streamlit_notebook():
             import streamlit as st
             st.graphviz_chart(graph)
-        elif in_notebook():
+        elif in_jupyter_notebook():
             from IPython.display import display
             display(graph)
         else:

kumoai/experimental/rfm/infer/dtype.py CHANGED Viewed

@@ -3,6 +3,8 @@ import pandas as pd
 import pyarrow as pa
 from kumoapi.typing import Dtype
+from kumoai.experimental.rfm.base.utils import is_datetime
 PANDAS_TO_DTYPE: dict[str, Dtype] = {
     'bool': Dtype.bool,
     'boolean': Dtype.bool,
@@ -34,7 +36,7 @@ def infer_dtype(ser: pd.Series) -> Dtype:
     Returns:
         The data type.
     """
-    if pd.api.types.is_datetime64_any_dtype(ser.dtype):
+    if is_datetime(ser):
         return Dtype.date
     if pd.api.types.is_timedelta64_dtype(ser.dtype):
         return Dtype.timedelta

kumoai/experimental/rfm/rfm.py CHANGED Viewed

@@ -610,7 +610,7 @@ class KumoRFM:
                 if start == 0 and task.num_prediction_examples > batch_size:
                     num = math.ceil(task.num_prediction_examples / batch_size)
-                    verbose.init_progress(total=num, description='Predicting')
+                    verbose.init_progress(msg='Predicting', total=num)
                 for attempt in range(self._num_retries + 1):
                     try:
@@ -643,7 +643,7 @@ class KumoRFM:
                                 df['ANCHOR_TIMESTAMP'] = pd.to_datetime(
                                     ser, errors='coerce', unit=unit)
-                        predictions.append(df)
+                        predictions.append(df.reset_index(drop=True))
                         if task.num_prediction_examples > batch_size:
                             verbose.step()

kumoai/utils/display.py CHANGED Viewed

@@ -6,14 +6,19 @@ from rich.console import Console
 from rich.table import Table
 from rich.text import Text
-from kumoai import in_notebook, in_snowflake_notebook
+from kumoai import (
+    in_jupyter_notebook,
+    in_notebook,
+    in_streamlit_notebook,
+    in_vnext_notebook,
+)
 def message(msg: str) -> None:
-    if in_snowflake_notebook():
+    if in_streamlit_notebook():
         import streamlit as st
         st.markdown(msg)
-    elif in_notebook():
+    elif in_jupyter_notebook():
         from IPython.display import Markdown, display
         display(Markdown(msg))
     else:
@@ -54,10 +59,13 @@ def unordered_list(items: Sequence[str]) -> None:
 def dataframe(df: pd.DataFrame) -> None:
-    if in_snowflake_notebook():
+    if in_streamlit_notebook():
         import streamlit as st
         st.dataframe(df, hide_index=True)
-    elif in_notebook():
+    elif in_vnext_notebook():
+        from IPython.display import display
+        display(df.reset_index(drop=True))
+    elif in_jupyter_notebook():
         from IPython.display import display
         try:
             if hasattr(df.style, 'hide'):

kumoai/utils/progress_logger.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import re
 import sys
 import time
+from abc import ABC, abstractmethod
 from typing import Any
 from rich.console import Console, ConsoleOptions, RenderResult
@@ -20,52 +21,179 @@ from rich.text import Text
 from typing_extensions import Self
-class ProgressLogger:
+class ProgressLogger(ABC):
+    r"""An abstract base class for logging progress updates."""
     def __init__(self, msg: str, verbose: bool = True) -> None:
-        self.msg = msg
-        self.verbose = verbose
-        self.depth = 0
+        self.msg: str = msg
+        self.verbose: bool = verbose
         self.logs: list[str] = []
         self.start_time: float | None = None
         self.end_time: float | None = None
+        # Handle nested loggers gracefully:
+        self._depth: int = 0
+        # Internal progress bar cache:
+        self._progress_bar_msg: str | None = None
+        self._total: int = 0
+        self._current: int = 0
+    def __repr__(self) -> str:
+        return f'{self.__class__.__name__}()'
     @classmethod
     def default(cls, msg: str, verbose: bool = True) -> 'ProgressLogger':
-        from kumoai import in_snowflake_notebook
+        r"""The default progress logger for the current environment."""
+        from kumoai import in_streamlit_notebook, in_vnext_notebook
-        if in_snowflake_notebook():
+        if in_streamlit_notebook():
             return StreamlitProgressLogger(msg, verbose)
+        if in_vnext_notebook():
+            return PlainProgressLogger(msg, verbose)
         return RichProgressLogger(msg, verbose)
     @property
     def duration(self) -> float:
+        r"""The current/final duration."""
         assert self.start_time is not None
         if self.end_time is not None:
             return self.end_time - self.start_time
         return time.perf_counter() - self.start_time
+    def __enter__(self) -> Self:
+        from kumoai import in_notebook
+        self._depth += 1
+        if self._depth == 1:
+            self.start_time = time.perf_counter()
+        if self._depth == 1 and not in_notebook():  # Show progress bar in TUI.
+            sys.stdout.write("\x1b]9;4;3\x07")
+            sys.stdout.flush()
+        if self._depth == 1 and self.verbose:
+            self.on_enter()
+        return self
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        from kumoai import in_notebook
+        self._depth -= 1
+        if self._depth == 0:
+            self.end_time = time.perf_counter()
+        if self._depth == 0 and self.verbose:
+            self.on_exit(error=exc_val is not None)
+        if self._depth == 0 and not in_notebook():  # Stop progress bar in TUI.
+            sys.stdout.write("\x1b]9;4;0\x07")
+            sys.stdout.flush()
     def log(self, msg: str) -> None:
+        r"""Logs a new message."""
         self.logs.append(msg)
+        if self.verbose:
+            self.on_log(msg)
-    def init_progress(self, total: int, description: str) -> None:
-        pass
+    def init_progress(self, msg: str, total: int) -> None:
+        r"""Initializes a progress bar."""
+        if self._progress_bar_msg is not None:
+            raise RuntimeError("Current progress not yet finished")
+        self._progress_bar_msg = msg
+        self._current = 0
+        self._total = total
+        if self.verbose:
+            self.on_init_progress(msg, total)
     def step(self) -> None:
+        r"""Increments an active progress bar."""
+        assert self._progress_bar_msg is not None
+        self._current += 1
+        if self.verbose:
+            self.on_step(self._progress_bar_msg, self._current, self._total)
+        if self._current >= self._total:
+            self._progress_bar_msg = None
+            self._current = self._total = 0
+    @abstractmethod
+    def on_enter(self) -> None:
         pass
-    def __enter__(self) -> Self:
-        self.depth += 1
-        self.start_time = time.perf_counter()
-        return self
+    @abstractmethod
+    def on_exit(self, error: bool) -> None:
+        pass
-    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        self.depth -= 1
-        self.end_time = time.perf_counter()
+    @abstractmethod
+    def on_log(self, msg: str) -> None:
+        pass
-    def __repr__(self) -> str:
-        return f'{self.__class__.__name__}({self.msg})'
+    @abstractmethod
+    def on_init_progress(self, msg: str, total: int) -> None:
+        pass
+    @abstractmethod
+    def on_step(self, msg: str, current: int, total: int) -> None:
+        pass
+class PlainProgressLogger(ProgressLogger):
+    RESET: str = '\x1b[0m'
+    BOLD: str = '\x1b[1m'
+    DIM: str = '\x1b[2m'
+    RED: str = '\x1b[31m'
+    GREEN: str = '\x1b[32m'
+    CYAN: str = '\x1b[36m'
+    def on_enter(self) -> None:
+        from kumoai import in_vnext_notebook
+        msg = self.msg.replace('[bold]', self.BOLD)
+        msg = msg.replace('[/bold]', self.RESET + self.CYAN)
+        msg = self.CYAN + msg + self.RESET
+        print(msg, end='\n' if in_vnext_notebook() else '', flush=True)
+    def on_exit(self, error: bool) -> None:
+        from kumoai import in_vnext_notebook
+        if error:
+            msg = f"❌ {self.RED}({self.duration:.2f}s){self.RESET}"
+        else:
+            msg = f"✅ {self.GREEN}({self.duration:.2f}s){self.RESET}"
+        if in_vnext_notebook():
+            print(f"{self.DIM}↳{self.RESET} {msg}", flush=True)
+        else:
+            print(f" {msg}", flush=True)
+    def on_log(self, msg: str) -> None:
+        from kumoai import in_vnext_notebook
+        msg = f"{self.DIM}↳ {msg}{self.RESET}"
+        if in_vnext_notebook():
+            print(msg, flush=True)
+        else:
+            print(f"\n{msg}", end='', flush=True)
+    def on_init_progress(self, msg: str, total: int) -> None:
+        from kumoai import in_vnext_notebook
+        msg = f"{self.DIM}↳ {msg}{self.RESET}"
+        if in_vnext_notebook():
+            print(msg, flush=True)
+        else:
+            print(f"\n{msg} {self.DIM}[{self.RESET}", end='', flush=True)
+    def on_step(self, msg: str, current: int, total: int) -> None:
+        from kumoai import in_vnext_notebook
+        if in_vnext_notebook():
+            return
+        msg = f"{self.DIM}#{self.RESET}"
+        if current == total:
+            msg += f"{self.DIM}]{self.RESET}"
+        print(msg, end='', flush=True)
 class ColoredMofNCompleteColumn(MofNCompleteColumn):
@@ -103,71 +231,51 @@ class RichProgressLogger(ProgressLogger):
         self._live: Live | None = None
         self._exception: bool = False
-    def init_progress(self, total: int, description: str) -> None:
-        assert self._progress is None
-        if self.verbose:
-            self._progress = Progress(
-                TextColumn(f'   ↳ {description}', style='dim'),
-                BarColumn(bar_width=None),
-                ColoredMofNCompleteColumn(style='dim'),
-                TextColumn('•', style='dim'),
-                ColoredTimeRemainingColumn(style='dim'),
-            )
-            self._task = self._progress.add_task("Progress", total=total)
-    def step(self) -> None:
-        if self.verbose:
-            assert self._progress is not None
-            assert self._task is not None
-            self._progress.update(self._task, advance=1)  # type: ignore
-    def __enter__(self) -> Self:
-        from kumoai import in_notebook
-        super().__enter__()
-        if self.depth > 1:
-            return self
+    def on_enter(self) -> None:
+        self._live = Live(
+            self,
+            refresh_per_second=self.refresh_per_second,
+            vertical_overflow='visible',
+        )
+        self._live.start()
-        if not in_notebook():  # Render progress bar in TUI.
-            sys.stdout.write("\x1b]9;4;3\x07")
-            sys.stdout.flush()
+    def on_exit(self, error: bool) -> None:
+        self._exception = error
-        if self.verbose:
-            self._live = Live(
-                self,
-                refresh_per_second=self.refresh_per_second,
-                vertical_overflow='visible',
-            )
-            self._live.start()
+        if self._progress is not None:
+            self._progress.stop()
-        return self
+        if self._live is not None:
+            self._live.update(self, refresh=True)
+            self._live.stop()
-    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        from kumoai import in_notebook
+        self._progress = None
+        self._task = None
+        self._live = None
-        super().__exit__(exc_type, exc_val, exc_tb)
+    def on_log(self, msg: str) -> None:
+        pass
-        if self.depth > 1:
-            return
+    def on_init_progress(self, msg: str, total: int) -> None:
+        self._progress = Progress(
+            TextColumn(f'   ↳ {msg}', style='dim'),
+            BarColumn(bar_width=None),
+            ColoredMofNCompleteColumn(style='dim'),
+            TextColumn('•', style='dim'),
+            ColoredTimeRemainingColumn(style='dim'),
+        )
+        self._task = self._progress.add_task("Progress", total=total)
-        if exc_type is not None:
-            self._exception = True
+    def on_step(self, msg: str, current: int, total: int) -> None:
+        assert self._progress is not None
+        assert self._task is not None
+        self._progress.update(self._task, advance=1)  # type: ignore
-        if self._progress is not None:
+        if current == total:
             self._progress.stop()
             self._progress = None
             self._task = None
-        if self._live is not None:
-            self._live.update(self, refresh=True)
-            self._live.stop()
-            self._live = None
-        if not in_notebook():
-            sys.stdout.write("\x1b]9;4;0\x07")
-            sys.stdout.flush()
     def __rich_console__(
         self,
         console: Console,
@@ -198,7 +306,7 @@ class RichProgressLogger(ProgressLogger):
         yield table
-        if self.verbose and self._progress is not None:
+        if self._progress is not None:
             yield self._progress.get_renderable()
@@ -211,82 +319,50 @@ class StreamlitProgressLogger(ProgressLogger):
         super().__init__(msg=msg, verbose=verbose)
         self._status: Any = None
-        self._total = 0
-        self._current = 0
-        self._description: str = ''
         self._progress: Any = None
-    def __enter__(self) -> Self:
-        super().__enter__()
+    @staticmethod
+    def _sanitize_text(msg: str) -> str:
+        return re.sub(r'\[/?bold\]', '**', msg)
+    def on_enter(self) -> None:
         import streamlit as st
-        if self.depth > 1:
-            return self
         # Adjust layout for prettier output:
         st.markdown(STREAMLIT_CSS, unsafe_allow_html=True)
-        if self.verbose:
-            self._status = st.status(
-                f':blue[{self._sanitize_text(self.msg)}]',
+        self._status = st.status(
+            f':blue[{self._sanitize_text(self.msg)}]',
+            expanded=True,
+        )
+    def on_exit(self, error: bool) -> None:
+        if self._status is not None:
+            label = f'{self._sanitize_text(self.msg)} ({self.duration:.2f}s)'
+            self._status.update(
+                label=f':red[{label}]' if error else f':green[{label}]',
+                state='error' if error else 'complete',
                 expanded=True,
             )
-        return self
+    def on_log(self, msg: str) -> None:
+        if self._status is not None:
+            self._status.write(msg)
-    def log(self, msg: str) -> None:
-        super().log(msg)
-        if self.verbose and self._status is not None:
-            self._status.write(self._sanitize_text(msg))
-    def init_progress(self, total: int, description: str) -> None:
-        if self.verbose and self._status is not None:
-            self._total = total
-            self._current = 0
-            self._description = self._sanitize_text(description)
-            percent = min(self._current / self._total, 1.0)
+    def on_init_progress(self, msg: str, total: int) -> None:
+        if self._status is not None:
             self._progress = self._status.progress(
-                value=percent,
-                text=f'{self._description} [{self._current}/{self._total}]',
+                value=0.0,
+                text=f'{msg} [{0}/{total}]',
             )
-    def step(self) -> None:
-        self._current += 1
-        if self.verbose and self._progress is not None:
-            percent = min(self._current / self._total, 1.0)
+    def on_step(self, msg: str, current: int, total: int) -> None:
+        if self._progress is not None:
             self._progress.progress(
-                value=percent,
-                text=f'{self._description} [{self._current}/{self._total}]',
+                value=min(current / total, 1.0),
+                text=f'{msg} [{current}/{total}]',
             )
-    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
-        super().__exit__(exc_type, exc_val, exc_tb)
-        if not self.verbose or self._status is None or self.depth > 1:
-            return
-        label = f'{self._sanitize_text(self.msg)} ({self.duration:.2f}s)'
-        if exc_type is not None:
-            self._status.update(
-                label=f':red[{label}]',
-                state='error',
-                expanded=True,
-            )
-        else:
-            self._status.update(
-                label=f':green[{label}]',
-                state='complete',
-                expanded=True,
-            )
-    @staticmethod
-    def _sanitize_text(msg: str) -> str:
-        return re.sub(r'\[/?bold\]', '**', msg)
 STREAMLIT_CSS = """
 <style>

kumoai/utils/sql.py CHANGED Viewed

@@ -1,3 +1,3 @@
-def quote_ident(name: str) -> str:
+def quote_ident(ident: str, char: str = '"') -> str:
     r"""Quotes a SQL identifier."""
-    return '"' + name.replace('"', '""') + '"'
+    return char + ident.replace(char, char + char) + char

{kumoai-2.15.0.dev202601121731.dist-info → kumoai-2.15.0.dev202601181732.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kumoai
-Version: 2.15.0.dev202601121731
+Version: 2.15.0.dev202601181732
 Summary: AI on the Modern Data Stack
 Author-email: "Kumo.AI" <hello@kumo.ai>
 License-Expression: MIT

{kumoai-2.15.0.dev202601121731.dist-info → kumoai-2.15.0.dev202601181732.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 kumoai/kumolib.cpython-313-darwin.so,sha256=waBv-DiZ3WcasxiCQ-OM9EbSTgTtCfBTZIibXAK-JiQ,232816
 kumoai/_logging.py,sha256=U2_5ROdyk92P4xO4H2WJV8EC7dr6YxmmnM-b7QX9M7I,886
 kumoai/mixin.py,sha256=MP413xzuCqWhxAPUHmloLA3j4ZyF1tEtfi516b_hOXQ,812
-kumoai/_version.py,sha256=Xo4G3EKaSBfPJqe6ahgplEBLNj9WpK6dS9XukJO3Dlk,39
-kumoai/__init__.py,sha256=x6Emn6VesHQz0wR7ZnbddPRYO9A5-0JTHDkzJ3Ocq6w,10907
+kumoai/_version.py,sha256=2ksd2GuX-AZRJGtcuDxCIRV0etIpcKZFRxJlf6Of638,39
+kumoai/__init__.py,sha256=n2Mi2n5S_WKpxpCInQKfGEmsIWVwrX86nGnYn5HwtIE,11171
 kumoai/formatting.py,sha256=jA_rLDCGKZI8WWCha-vtuLenVKTZvli99Tqpurz1H84,953
 kumoai/futures.py,sha256=oJFIfdCM_3nWIqQteBKYMY4fPhoYlYWE_JA2o6tx-ng,3737
 kumoai/jobs.py,sha256=NrdLEFNo7oeCYSy-kj2nAvCFrz9BZ_xrhkqHFHk5ksY,2496
@@ -12,23 +12,23 @@ kumoai/spcs.py,sha256=N31d7rLa-bgYh8e2J4YzX1ScxGLqiVXrqJnCl1y4Mts,4139
 kumoai/_singleton.py,sha256=UTwrbDkoZSGB8ZelorvprPDDv9uZkUi1q_SrmsyngpQ,836
 kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 kumoai/experimental/rfm/relbench.py,sha256=cVsxxV3TIL3PLEoYb-8tAVW3GSef6NQAd3rxdHJL63I,2276
-kumoai/experimental/rfm/graph.py,sha256=JtpnP-NIowKgtEggif_MzgXjbc6mi3tUyBGi1WuzsI0,46346
+kumoai/experimental/rfm/graph.py,sha256=4Jo17oYSoZouzvNQT2-Ai9GOX-bIdefPcxj_gcoM3dI,46873
 kumoai/experimental/rfm/__init__.py,sha256=bW2XyYtkbdiu_iICYFF2Fu1Fx5fyGbqne6m_6c1P-fY,7016
 kumoai/experimental/rfm/sagemaker.py,sha256=6fyXO1Jd_scq-DH7kcv6JcV8QPyTbh4ceqwQDPADlZ0,4963
-kumoai/experimental/rfm/rfm.py,sha256=dCDHR-yNhtdH2Ja1yasbwSYYstDxlEkVOUNCUEOCTLM,60002
+kumoai/experimental/rfm/rfm.py,sha256=XsxwiDIvlZ_js7rvvffrOiXFsLX15-C7N0T9M-aptCw,60017
 kumoai/experimental/rfm/authenticate.py,sha256=G2RkRWznMVQUzvhvbKhn0bMCY7VmoNYxluz3THRqSdE,18851
 kumoai/experimental/rfm/task_table.py,sha256=n_gZNQlCqHOiAkbeaa18nnQ-amt1oWKA9riO2rkrZuw,9847
 kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=jl-DBbhsqQ-dUXyWhyQTM1AU2qNAtXCmi1mokdhtBTg,902
 kumoai/experimental/rfm/backend/sqlite/table.py,sha256=WqYtd_rwlawItRMXZUfv14qdyU6huQmODuFjDo483dI,6683
-kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=G5INoEAvoPlg8pSN_QlJIOy2B-2D40eTDPZJxU0Dr0g,18651
+kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=I-zaSMd5XLg0qaJOoCR8arFBauUfhW_ZMl7gI97ress,18699
 kumoai/experimental/rfm/backend/local/__init__.py,sha256=2s9sSA-E-8pfkkzCH4XPuaSxSznEURMfMgwEIfYYPsg,1014
 kumoai/experimental/rfm/backend/local/table.py,sha256=GKeYGcu52ztCU8EBMqp5UVj85E145Ug41xiCPiTCXq4,3489
 kumoai/experimental/rfm/backend/local/graph_store.py,sha256=RHhkI13KpdPxqb4vXkwEwuFiX5DkrEsfZsOLywNnrvU,11294
 kumoai/experimental/rfm/backend/local/sampler.py,sha256=UKxTjsYs00sYuV_LAlDuZOvQq0BZzPCzZK1Fki2Fd70,10726
 kumoai/experimental/rfm/backend/snow/__init__.py,sha256=BYfsiuJ4Ee30GjG9EuUtitMHXnRfvVKi85zNlIwldV4,993
-kumoai/experimental/rfm/backend/snow/table.py,sha256=9N7TOcXX8hhAjCawnhuvQCArBFTCdng3gBakunUxg90,8892
-kumoai/experimental/rfm/backend/snow/sampler.py,sha256=qst_9nRuiAT-rJecq9ZX3DbNFgIK4MZxeK9_HP8i5NM,14602
+kumoai/experimental/rfm/backend/snow/table.py,sha256=1RXpPiTxawTTOFprXvu7jDLG0ZGio_vE9lSfB6wqbWM,9078
+kumoai/experimental/rfm/backend/snow/sampler.py,sha256=tDOEiPTFFG6pWDcuuTvaOBAsMJLsxu4PzqryIgH1Kb4,16322
 kumoai/experimental/rfm/pquery/__init__.py,sha256=X0O3EIq5SMfBEE-ii5Cq6iDhR3s3XMXB52Cx5htoePw,152
 kumoai/experimental/rfm/pquery/pandas_executor.py,sha256=MwSvFRwLq-z19LEdF0G0AT7Gj9tCqu-XLEA7mNbqXwc,18454
 kumoai/experimental/rfm/pquery/executor.py,sha256=gs5AVNaA50ci8zXOBD3qt5szdTReSwTs4BGuEyx4BEE,2728
@@ -37,14 +37,14 @@ kumoai/experimental/rfm/infer/categorical.py,sha256=VwNaKwKbRYkTxEJ1R6gziffC8dGs
 kumoai/experimental/rfm/infer/time_col.py,sha256=iw_aUcHD2bHr7uRa3E7uDC30kU37aLIRTVAFdQEpt68,1818
 kumoai/experimental/rfm/infer/pkey.py,sha256=IaJI5GHK8ds_a3AOr3YYVgUlSmYYEgr4Nu92s2RyBV4,4412
 kumoai/experimental/rfm/infer/id.py,sha256=ZIO0DWIoiEoS_8MVc5lkqBfkTWWQ0yGCgjkwLdaYa_Q,908
-kumoai/experimental/rfm/infer/dtype.py,sha256=FyAqvtrOWQC9hGrhQ7sC4BAI6c9k6ew-fo8ClS1sewM,2782
+kumoai/experimental/rfm/infer/dtype.py,sha256=fbRRyyKSzO4riqX3RlhvBK7DhnjhwTgZVUjQ9inVPYI,2811
 kumoai/experimental/rfm/infer/__init__.py,sha256=8GDxQKd0pxZULdk7mpwl3CsOpL4v2HPuPEsbi2t_vzc,519
 kumoai/experimental/rfm/infer/timestamp.py,sha256=vM9--7eStzaGG13Y-oLYlpNJyhL6f9dp17HDXwtl_DM,1094
 kumoai/experimental/rfm/infer/stype.py,sha256=fu4zsOB-C7jNeMnq6dsK4bOZSewe7PtZe_AkohSRLoM,894
 kumoai/experimental/rfm/base/sql_sampler.py,sha256=_go8TnH7AHki-0gg_pB7xd228VYhogQh10OkxT7PEnI,15682
 kumoai/experimental/rfm/base/mapper.py,sha256=WbWXSF8Vkdeud7UeQ2JgSX7z4d27b_b6o7nR4zET1aw,2420
 kumoai/experimental/rfm/base/__init__.py,sha256=rjmMux5lG8srw1bjQGcFQFv6zET9e5riP81nPkw28Jg,724
-kumoai/experimental/rfm/base/utils.py,sha256=MODr8v9aeIxwwdO6N2V8mzdjpOkBgFFcxvfFsXYfNm8,892
+kumoai/experimental/rfm/base/utils.py,sha256=Easg1bvjPLR8oZIoxIQCtCyl92pp2dUskdnSv1eayxQ,1133
 kumoai/experimental/rfm/base/table.py,sha256=eJuOUM64VWDkHaslNgeR5A_FZjlPF_4czC8OfFGR62E,26015
 kumoai/experimental/rfm/base/sampler.py,sha256=2G6VmgAGV1mSQWHK4wUgf5Ngr8nnH8Hg6_D3sPZZx1A,31951
 kumoai/experimental/rfm/base/expression.py,sha256=Y7NtLTnKlx6euG_N3fLTcrFKheB6P5KS_jhCfoXV9DE,1252
@@ -60,9 +60,9 @@ kumoai/artifact_export/job.py,sha256=GEisSwvcjK_35RgOfsLXGgxMTXIWm765B_BW_Kgs-V0
 kumoai/artifact_export/__init__.py,sha256=BsfDrc3mCHpO9-BqvqKm8qrXDIwfdaoH5UIoG4eQkc4,238
 kumoai/utils/datasets.py,sha256=ptKIUoBONVD55pTVNdRCkQT3NWdN_r9UAUu4xewPa3U,2928
 kumoai/utils/__init__.py,sha256=6S-UtwjeLpnCYRCCIEWhkitPYGaqOGXC1ChE13DzXiU,256
-kumoai/utils/display.py,sha256=gnQR8QO0QQYfusefr7lObVEwZ3xajsv0XhhjAqOlz1A,2432
-kumoai/utils/progress_logger.py,sha256=rRcfWnfV6uHuvb7cD0mIIfUz3JvnSae0U4SesncODU8,9505
-kumoai/utils/sql.py,sha256=f6lR6rBEW7Dtk0NdM26dOZXUHDizEHb1WPlBCJrwoq0,118
+kumoai/utils/display.py,sha256=QmgeQQT7SzoC1CK2A0ftWbfkEuVN4KQfrKoPCrCDaGc,2626
+kumoai/utils/progress_logger.py,sha256=1PtXxfMteg2nyQAfTGx6qnljiZMZvhwDTndQ9_4_nCE,12161
+kumoai/utils/sql.py,sha256=CNKa-M56QiWoCSe9WLuumahsu3_ugQGr2YoTbveFHq0,147
 kumoai/utils/forecasting.py,sha256=-nDS6ucKNfQhTQOfebjefj0wwWH3-KYNslIomxwwMBM,7415
 kumoai/codegen/generate.py,sha256=SvfWWa71xSAOjH9645yQvgoEM-o4BYjupM_EpUxqB_E,7331
 kumoai/codegen/naming.py,sha256=_XVQGxHfuub4bhvyuBKjltD5Lm_oPpibvP_LZteCGk0,3021
@@ -117,8 +117,8 @@ kumoai/trainer/__init__.py,sha256=zUdFl-f-sBWmm2x8R-rdVzPBeU2FaMzUY5mkcgoTa1k,93
 kumoai/trainer/online_serving.py,sha256=9cddb5paeZaCgbUeceQdAOxysCtV5XP-KcsgFz_XR5w,9566
 kumoai/trainer/distilled_trainer.py,sha256=2pPs5clakNxkLfaak7uqPJOrpTWe1RVVM7ztDSqQZvU,6484
 kumoai/trainer/trainer.py,sha256=hBXO7gwpo3t59zKFTeIkK65B8QRmWCwO33sbDuEAPlY,20133
-kumoai-2.15.0.dev202601121731.dist-info/RECORD,,
-kumoai-2.15.0.dev202601121731.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
-kumoai-2.15.0.dev202601121731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
-kumoai-2.15.0.dev202601121731.dist-info/METADATA,sha256=hIe7QHOe9wHsuE1EZQjEoBNGi6W8Pr4GNT5tIAmVLUI,2564
-kumoai-2.15.0.dev202601121731.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102
+kumoai-2.15.0.dev202601181732.dist-info/RECORD,,
+kumoai-2.15.0.dev202601181732.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
+kumoai-2.15.0.dev202601181732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
+kumoai-2.15.0.dev202601181732.dist-info/METADATA,sha256=ETr8-9Zfq1pM_Smk8KOWZZm14cRdtNl9vcd_hqSQBKg,2564
+kumoai-2.15.0.dev202601181732.dist-info/licenses/LICENSE,sha256=TbWlyqRmhq9PEzCaTI0H0nWLQCCOywQM8wYH8MbjfLo,1102

{kumoai-2.15.0.dev202601121731.dist-info → kumoai-2.15.0.dev202601181732.dist-info}/WHEEL RENAMED Viewed

File without changes

{kumoai-2.15.0.dev202601121731.dist-info → kumoai-2.15.0.dev202601181732.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{kumoai-2.15.0.dev202601121731.dist-info → kumoai-2.15.0.dev202601181732.dist-info}/top_level.txt RENAMED Viewed

File without changes