PyPI - kumoai - Versions diffs - 2.14.0.dev202601051732__cp311-cp311-macosx_11_0_arm64.whl → 2.15.0.dev202601141731__cp311-cp311-macosx_11_0_arm64.whl - Mend

kumoai 2.14.0.dev202601051732__cp311-cp311-macosx_11_0_arm64.whl → 2.15.0.dev202601141731__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

kumoai/_version.py +1 -1
kumoai/client/jobs.py +2 -0
kumoai/experimental/rfm/backend/snow/sampler.py +138 -28
kumoai/experimental/rfm/backend/snow/table.py +16 -13
kumoai/experimental/rfm/backend/sqlite/sampler.py +73 -15
kumoai/experimental/rfm/base/mapper.py +69 -0
kumoai/experimental/rfm/base/sampler.py +23 -1
kumoai/experimental/rfm/base/sql_sampler.py +252 -11
kumoai/experimental/rfm/base/table.py +15 -29
kumoai/experimental/rfm/base/utils.py +36 -0
kumoai/experimental/rfm/graph.py +9 -9
kumoai/experimental/rfm/infer/dtype.py +3 -1
kumoai/experimental/rfm/infer/time_col.py +4 -2
kumoai/experimental/rfm/rfm.py +195 -114
kumoai/experimental/rfm/task_table.py +2 -0
kumoai/pquery/training_table.py +16 -2
kumoai/testing/snow.py +3 -3
kumoai/utils/display.py +44 -8
kumoai/utils/progress_logger.py +2 -1
kumoai/utils/sql.py +2 -2
{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/METADATA +2 -2
{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/RECORD +25 -23
{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/WHEEL +0 -0
{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/licenses/LICENSE +0 -0
{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/top_level.txt +0 -0

kumoai/experimental/rfm/rfm.py CHANGED Viewed

@@ -8,7 +8,6 @@ from contextlib import contextmanager
 from dataclasses import dataclass, replace
 from typing import Any, Literal, overload
-import numpy as np
 import pandas as pd
 from kumoapi.model_plan import RunMode
 from kumoapi.pquery import QueryType, ValidatedPredictiveQuery
@@ -28,7 +27,10 @@ from kumoapi.rfm import (
 )
 from kumoapi.task import TaskType
 from kumoapi.typing import AggregationType, Stype
+from rich.console import Console
+from rich.markdown import Markdown
+from kumoai import in_notebook
 from kumoai.client.rfm import RFMAPI
 from kumoai.exceptions import HTTPException
 from kumoai.experimental.rfm import Graph, TaskTable
@@ -106,10 +108,20 @@ class Explanation:
     def __repr__(self) -> str:
         return str((self.prediction, self.summary))
+    def __str__(self) -> str:
+        console = Console(soft_wrap=True)
+        with console.capture() as cap:
+            console.print(display.to_rich_table(self.prediction))
+            console.print(Markdown(self.summary))
+        return cap.get()[:-1]
     def print(self) -> None:
         r"""Prints the explanation."""
-        display.dataframe(self.prediction)
-        display.message(self.summary)
+        if in_notebook():
+            display.dataframe(self.prediction)
+            display.message(self.summary)
+        else:
+            print(self)
     def _ipython_display_(self) -> None:
         self.print()
@@ -180,7 +192,7 @@ class KumoRFM:
         self._client: RFMAPI | None = None
         self._batch_size: int | Literal['max'] | None = None
-        self.num_retries: int = 0
+        self._num_retries: int = 0
     @property
     def _api_client(self) -> RFMAPI:
@@ -194,6 +206,30 @@ class KumoRFM:
     def __repr__(self) -> str:
         return f'{self.__class__.__name__}()'
+    @contextmanager
+    def retry(
+        self,
+        num_retries: int = 1,
+    ) -> Generator[None, None, None]:
+        """Context manager to retry failed queries due to unexpected server
+        issues.
+        .. code-block:: python
+            with model.retry(num_retries=1):
+                df = model.predict(query, indices=...)
+        Args:
+            num_retries: The maximum number of retries.
+        """
+        if num_retries < 0:
+            raise ValueError(f"'num_retries' must be greater than or equal to "
+                             f"zero (got {num_retries})")
+        self._num_retries = num_retries
+        yield
+        self._num_retries = 0
     @contextmanager
     def batch_mode(
         self,
@@ -217,15 +253,10 @@ class KumoRFM:
             raise ValueError(f"'batch_size' must be greater than zero "
                              f"(got {batch_size})")
-        if num_retries < 0:
-            raise ValueError(f"'num_retries' must be greater than or equal to "
-                             f"zero (got {num_retries})")
         self._batch_size = batch_size
-        self.num_retries = num_retries
-        yield
+        with self.retry(self._num_retries or num_retries):
+            yield
         self._batch_size = None
-        self.num_retries = 0
     @overload
     def predict(
@@ -265,6 +296,25 @@ class KumoRFM:
     ) -> Explanation:
         pass
+    @overload
+    def predict(
+        self,
+        query: str,
+        indices: list[str] | list[float] | list[int] | None = None,
+        *,
+        explain: bool | ExplainConfig | dict[str, Any] = False,
+        anchor_time: pd.Timestamp | Literal['entity'] | None = None,
+        context_anchor_time: pd.Timestamp | None = None,
+        run_mode: RunMode | str = RunMode.FAST,
+        num_neighbors: list[int] | None = None,
+        num_hops: int = 2,
+        max_pq_iterations: int = 10,
+        random_seed: int | None = _RANDOM_SEED,
+        verbose: bool | ProgressLogger = True,
+        use_prediction_time: bool = False,
+    ) -> pd.DataFrame | Explanation:
+        pass
     def predict(
         self,
         query: str,
@@ -288,8 +338,7 @@ class KumoRFM:
             indices: The entity primary keys to predict for. Will override the
                 indices given as part of the predictive query. Predictions will
                 be generated for all indices, independent of whether they
-                fulfill entity filter constraints. To pre-filter entities, use
-                :meth:`~KumoRFM.is_valid_entity`.
+                fulfill entity filter constraints.
             explain: Configuration for explainability.
                 If set to ``True``, will additionally explain the prediction.
                 Passing in an :class:`ExplainConfig` instance provides control
@@ -329,8 +378,11 @@ class KumoRFM:
                 raise ValueError("Cannot find entities to predict for. Please "
                                  "pass them via `predict(query, indices=...)`")
             indices = query_def.get_rfm_entity_id_list()
-        else:
-            query_def = replace(query_def, rfm_entity_ids=None)
+        query_def = replace(
+            query_def,
+            for_each='FOR EACH',
+            rfm_entity_ids=None,
+        )
         if not isinstance(verbose, ProgressLogger):
             query_repr = query_def.to_string(rich=True, exclude_predict=True)
@@ -351,11 +403,11 @@ class KumoRFM:
                 random_seed=random_seed,
                 logger=logger,
             )
-            task_table._query = query_def.to_string()  # type: ignore
+            task_table._query = query_def.to_string()
             return self.predict_task(
                 task_table,
-                explain=explain,  # type: ignore
+                explain=explain,
                 run_mode=run_mode,
                 num_neighbors=num_neighbors,
                 num_hops=num_hops,
@@ -397,6 +449,22 @@ class KumoRFM:
     ) -> Explanation:
         pass
+    @overload
+    def predict_task(
+        self,
+        task: TaskTable,
+        *,
+        explain: bool | ExplainConfig | dict[str, Any] = False,
+        run_mode: RunMode | str = RunMode.FAST,
+        num_neighbors: list[int] | None = None,
+        num_hops: int = 2,
+        verbose: bool | ProgressLogger = True,
+        exclude_cols_dict: dict[str, list[str]] | None = None,
+        use_prediction_time: bool = False,
+        top_k: int | None = None,
+    ) -> pd.DataFrame | Explanation:
+        pass
     def predict_task(
         self,
         task: TaskTable,
@@ -477,9 +545,9 @@ class KumoRFM:
                 task_type_repr = str(task.task_type)
             if explain_config is not None:
-                msg = f'Explain {task_type_repr} task'
+                msg = f"Explaining {task_type_repr} task"
             else:
-                msg = f'Predict {task_type_repr} task'
+                msg = f"Predicting {task_type_repr} task"
             verbose = ProgressLogger.default(msg=msg, verbose=verbose)
         with verbose as logger:
@@ -525,7 +593,7 @@ class KumoRFM:
                 request = RFMPredictRequest(
                     context=context,
                     run_mode=RunMode(run_mode),
-                    query=getattr(task, '_query', ''),
+                    query=task._query,
                     use_prediction_time=use_prediction_time,
                 )
                 with warnings.catch_warnings():
@@ -544,7 +612,7 @@ class KumoRFM:
                     num = math.ceil(task.num_prediction_examples / batch_size)
                     verbose.init_progress(total=num, description='Predicting')
-                for attempt in range(self.num_retries + 1):
+                for attempt in range(self._num_retries + 1):
                     try:
                         if explain_config is not None:
                             resp = self._api_client.explain(
@@ -582,7 +650,7 @@ class KumoRFM:
                         break
                     except HTTPException as e:
-                        if attempt == self.num_retries:
+                        if attempt == self._num_retries:
                             try:
                                 msg = json.loads(e.detail)['detail']
                             except Exception:
@@ -612,51 +680,6 @@ class KumoRFM:
         return prediction
-    def is_valid_entity(
-        self,
-        query: str,
-        indices: list[str] | list[float] | list[int] | None = None,
-        *,
-        anchor_time: pd.Timestamp | Literal['entity'] | None = None,
-    ) -> np.ndarray:
-        r"""Returns a mask that denotes which entities are valid for the
-        given predictive query, *i.e.*, which entities fulfill (temporal)
-        entity filter constraints.
-        Args:
-            query: The predictive query.
-            indices: The entity primary keys to predict for. Will override the
-                indices given as part of the predictive query.
-            anchor_time: The anchor timestamp for the prediction. If set to
-                ``None``, will use the maximum timestamp in the data.
-                If set to ``"entity"``, will use the timestamp of the entity.
-        """
-        query_def = self._parse_query(query)
-        if indices is None:
-            if query_def.rfm_entity_ids is None:
-                raise ValueError("Cannot find entities to predict for. Please "
-                                 "pass them via "
-                                 "`is_valid_entity(query, indices=...)`")
-            indices = query_def.get_rfm_entity_id_list()
-        if len(indices) == 0:
-            raise ValueError("At least one entity is required")
-        if anchor_time is None:
-            anchor_time = self._get_default_anchor_time(query_def)
-        if isinstance(anchor_time, pd.Timestamp):
-            self._validate_time(query_def, anchor_time, None, False)
-        else:
-            assert anchor_time == 'entity'
-            if query_def.entity_table not in self._sampler.time_column_dict:
-                raise ValueError(f"Anchor time 'entity' requires the entity "
-                                 f"table '{query_def.entity_table}' "
-                                 f"to have a time column.")
-        raise NotImplementedError
     def evaluate(
         self,
         query: str,
@@ -701,29 +724,12 @@ class KumoRFM:
         Returns:
             The metrics as a :class:`pandas.DataFrame`
         """
-        query_def = self._parse_query(query)
-        if query_def.rfm_entity_ids is not None:
-            query_def = replace(
-                query_def,
-                rfm_entity_ids=None,
-            )
-        task_type = self._get_task_type(
-            query=query_def,
-            edge_types=self._sampler.edge_types,
+        query_def = replace(
+            self._parse_query(query),
+            for_each='FOR EACH',
+            rfm_entity_ids=None,
         )
-        if num_hops != 2 and num_neighbors is not None:
-            warnings.warn(f"Received custom 'num_neighbors' option; ignoring "
-                          f"custom 'num_hops={num_hops}' option")
-        if num_neighbors is None:
-            key = RunMode.FAST if task_type.is_link_pred else run_mode
-            num_neighbors = _DEFAULT_NUM_NEIGHBORS[key][:num_hops]
-        if metrics is not None and len(metrics) > 0:
-            self._validate_metrics(metrics, task_type)
-            metrics = list(dict.fromkeys(metrics))
         if not isinstance(verbose, ProgressLogger):
             query_repr = query_def.to_string(rich=True, exclude_predict=True)
             msg = f'[bold]EVALUATE[/bold] {query_repr}'
@@ -740,11 +746,96 @@ class KumoRFM:
                 random_seed=random_seed,
                 logger=logger,
             )
-            context = self._get_context(
-                task=task_table,
+            return self.evaluate_task(
+                task_table,
+                metrics=metrics,
                 run_mode=run_mode,
                 num_neighbors=num_neighbors,
+                num_hops=num_hops,
+                verbose=verbose,
                 exclude_cols_dict=query_def.get_exclude_cols_dict(),
+                use_prediction_time=use_prediction_time,
+            )
+    def evaluate_task(
+        self,
+        task: TaskTable,
+        *,
+        metrics: list[str] | None = None,
+        run_mode: RunMode | str = RunMode.FAST,
+        num_neighbors: list[int] | None = None,
+        num_hops: int = 2,
+        verbose: bool | ProgressLogger = True,
+        exclude_cols_dict: dict[str, list[str]] | None = None,
+        use_prediction_time: bool = False,
+    ) -> pd.DataFrame:
+        """Evaluates a custom task specification.
+        Args:
+            task: The custom :class:`TaskTable`.
+            metrics: The metrics to use.
+            run_mode: The :class:`RunMode` for the query.
+            num_neighbors: The number of neighbors to sample for each hop.
+                If specified, the ``num_hops`` option will be ignored.
+            num_hops: The number of hops to sample when generating the context.
+            verbose: Whether to print verbose output.
+            exclude_cols_dict: Any column in any table to exclude from the
+                model input.
+            use_prediction_time: Whether to use the anchor timestamp as an
+                additional feature during prediction. This is typically
+                beneficial for time series forecasting tasks.
+        Returns:
+            The metrics as a :class:`pandas.DataFrame`
+        """
+        if num_hops != 2 and num_neighbors is not None:
+            warnings.warn(f"Received custom 'num_neighbors' option; ignoring "
+                          f"custom 'num_hops={num_hops}' option")
+        if num_neighbors is None:
+            key = RunMode.FAST if task.task_type.is_link_pred else run_mode
+            num_neighbors = _DEFAULT_NUM_NEIGHBORS[key][:num_hops]
+        if metrics is not None and len(metrics) > 0:
+            self._validate_metrics(metrics, task.task_type)
+            metrics = list(dict.fromkeys(metrics))
+        if not isinstance(verbose, ProgressLogger):
+            if task.task_type == TaskType.BINARY_CLASSIFICATION:
+                task_type_repr = 'binary classification'
+            elif task.task_type == TaskType.MULTICLASS_CLASSIFICATION:
+                task_type_repr = 'multi-class classification'
+            elif task.task_type == TaskType.REGRESSION:
+                task_type_repr = 'regression'
+            elif task.task_type == TaskType.TEMPORAL_LINK_PREDICTION:
+                task_type_repr = 'link prediction'
+            else:
+                task_type_repr = str(task.task_type)
+            msg = f"Evaluating {task_type_repr} task"
+            verbose = ProgressLogger.default(msg=msg, verbose=verbose)
+        with verbose as logger:
+            if task.num_context_examples > _MAX_CONTEXT_SIZE[run_mode]:
+                logger.log(f"Sub-sampled {_MAX_CONTEXT_SIZE[run_mode]:,} "
+                           f"out of {task.num_context_examples:,} in-context "
+                           f"examples")
+                task = task.narrow_context(0, _MAX_CONTEXT_SIZE[run_mode])
+            if task.num_prediction_examples > _MAX_TEST_SIZE[task.task_type]:
+                logger.log(f"Sub-sampled {_MAX_TEST_SIZE[task.task_type]:,} "
+                           f"out of {task.num_prediction_examples:,} test "
+                           f"examples")
+                task = task.narrow_prediction(
+                    start=0,
+                    length=_MAX_TEST_SIZE[task.task_type],
+                )
+            context = self._get_context(
+                task=task,
+                run_mode=run_mode,
+                num_neighbors=num_neighbors,
+                exclude_cols_dict=exclude_cols_dict,
             )
             request = RFMEvaluateRequest(
@@ -764,12 +855,12 @@ class KumoRFM:
                 stats_msg = Context.get_memory_stats(request_msg.context)
                 raise ValueError(_SIZE_LIMIT_MSG.format(stats=stats_msg))
-            for attempt in range(self.num_retries + 1):
+            for attempt in range(self._num_retries + 1):
                 try:
                     resp = self._api_client.evaluate(request_bytes)
                     break
                 except HTTPException as e:
-                    if attempt == self.num_retries:
+                    if attempt == self._num_retries:
                         try:
                             msg = json.loads(e.detail)['detail']
                         except Exception:
@@ -865,12 +956,12 @@ class KumoRFM:
             graph_definition=self._graph_def,
         )
-        for attempt in range(self.num_retries + 1):
+        for attempt in range(self._num_retries + 1):
             try:
                 resp = self._api_client.parse_query(request)
                 break
             except HTTPException as e:
-                if attempt == self.num_retries:
+                if attempt == self._num_retries:
                     try:
                         msg = json.loads(e.detail)['detail']
                     except Exception:
@@ -953,8 +1044,16 @@ class KumoRFM:
         if len(self._sampler.time_column_dict) == 0:
             return  # Graph without timestamps
-        min_time = self._sampler.get_min_time()
-        max_time = self._sampler.get_max_time()
+        if query.query_type == QueryType.TEMPORAL:
+            aggr_table_names = [
+                aggr._get_target_column_name().split('.')[0]
+                for aggr in query.get_all_target_aggregations()
+            ]
+            min_time = self._sampler.get_min_time(aggr_table_names)
+            max_time = self._sampler.get_max_time(aggr_table_names)
+        else:
+            min_time = self._sampler.get_min_time()
+            max_time = self._sampler.get_max_time()
         if anchor_time < min_time:
             raise ValueError(f"Anchor timestamp '{anchor_time}' is before "
@@ -1193,24 +1292,6 @@ class KumoRFM:
         top_k: int | None = None,
     ) -> Context:
-        # TODO Remove all
-        if task.num_context_examples > max(_MAX_CONTEXT_SIZE.values()):
-            raise ValueError(f"Cannot process a context with more than "
-                             f"{max(_MAX_CONTEXT_SIZE.values()):,} samples "
-                             f"(got {task.num_context_examples:,})")
-        if task.evaluate:
-            if task.num_prediction_examples > _MAX_TEST_SIZE[task.task_type]:
-                raise ValueError(f"Cannot process a test set with more than "
-                                 f"{_MAX_TEST_SIZE[task.task_type]:,} samples "
-                                 f"for evaluation "
-                                 f"(got {task.num_prediction_examples:,})")
-        else:
-            if task.num_prediction_examples > _MAX_PRED_SIZE[task.task_type]:
-                raise ValueError(f"Cannot predict for more than "
-                                 f"{_MAX_PRED_SIZE[task.task_type]:,} "
-                                 f"entities at once "
-                                 f"(got {task.num_prediction_examples:,})")
         if num_neighbors is None:
             key = RunMode.FAST if task.task_type.is_link_pred else run_mode
             num_neighbors = _DEFAULT_NUM_NEIGHBORS[key][:2]

kumoai/experimental/rfm/task_table.py CHANGED Viewed

@@ -87,6 +87,8 @@ class TaskTable:
         if time_column is not None:
             self.time_column = time_column
+        self._query: str = ''  # A description of the task, e.g., for XAI.
     @property
     def num_context_examples(self) -> int:
         return len(self._context_df)

kumoai/pquery/training_table.py CHANGED Viewed

@@ -199,6 +199,7 @@ class TrainingTable:
         self,
         source_table_type: SourceTableType,
         train_table_mod: TrainingTableSpec,
+        extensive_validation: bool = False,
     ) -> None:
         r"""Validates the modified training table.
@@ -206,6 +207,8 @@ class TrainingTable:
             source_table_type: The source table to be used as the modified
                 training table.
             train_table_mod: The modification specification.
+            extensive_validation: Enable extensive validation for custom
+                table.
         Raises:
             ValueError: If the modified training table is invalid.
@@ -215,7 +218,8 @@ class TrainingTable:
             global_state.client.generate_train_table_job_api)
         response = api.validate_custom_train_table(self.job_id,
                                                    source_table_type,
-                                                   train_table_mod)
+                                                   train_table_mod,
+                                                   extensive_validation)
         if not response.ok:
             raise ValueError("Invalid weighted train table",
                              response.error_message)
@@ -225,6 +229,7 @@ class TrainingTable:
         source_table: SourceTable,
         train_table_mod: TrainingTableSpec,
         validate: bool = True,
+        extensive_validation: bool = False,
     ) -> Self:
         r"""Sets the `source_table` as the modified training table.
@@ -243,6 +248,9 @@ class TrainingTable:
             train_table_mod: The modification specification.
             validate: Whether to validate the modified training table. This can
                 be slow for large tables.
+            extensive_validation: Whether to validate number of rows in
+                existing and modified training table.
+                It can be slow for large tables.
         """
         if isinstance(source_table.connector, S3Connector):
             # Special handling for s3 as `source_table._to_api_source_table`
@@ -252,7 +260,13 @@ class TrainingTable:
         else:
             source_table_type = source_table._to_api_source_table()
         if validate:
-            self.validate_custom_table(source_table_type, train_table_mod)
+            if extensive_validation:
+                logger.warning(
+                    "You have opted in to perform extensive validation on"
+                    " your custom training table."
+                    " This operation can be slow for large tables.")
+            self.validate_custom_table(source_table_type, train_table_mod,
+                                       extensive_validation)
         self._custom_train_table = CustomTrainingTable(
             source_table=source_table_type, table_mod_spec=train_table_mod,
             validated=validate)

kumoai/testing/snow.py CHANGED Viewed

@@ -10,7 +10,7 @@ def connect(
     id: str,
     account: str,
     user: str,
-    warehouse: str,
+    warehouse: str | None = None,
     database: str | None = None,
     schema: str | None = None,
 ) -> Connection:
@@ -42,8 +42,8 @@ def connect(
     return _connect(
         account=account,
         user=user,
-        warehouse='WH_XS',
-        database='KUMO',
+        warehouse=warehouse or 'WH_XS',
+        database=database or 'KUMO',
         schema=schema,
         session_parameters=dict(CLIENT_TELEMETRY_ENABLED=False),
         **kwargs,

kumoai/utils/display.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from collections.abc import Sequence
 import pandas as pd
+from rich import box
+from rich.console import Console
+from rich.table import Table
+from rich.text import Text
 from kumoai import in_notebook, in_snowflake_notebook
 def message(msg: str) -> None:
-    msg = msg.replace("`", "'") if not in_notebook() else msg
     if in_snowflake_notebook():
         import streamlit as st
         st.markdown(msg)
@@ -15,23 +17,40 @@ def message(msg: str) -> None:
         from IPython.display import Markdown, display
         display(Markdown(msg))
     else:
-        print(msg)
+        print(msg.replace("`", "'"))
 def title(msg: str) -> None:
-    message(f"### {msg}" if in_notebook() else f"{msg}:")
+    if in_notebook():
+        message(f"### {msg}")
+    else:
+        msg = msg.replace("`", "'")
+        Console().print(f"[bold]{msg}[/bold]", highlight=False)
 def italic(msg: str) -> None:
-    message(f"*{msg}*" if in_notebook() else msg)
+    if in_notebook():
+        message(f"*{msg}*")
+    else:
+        msg = msg.replace("`", "'")
+        Console().print(
+            f"[italic]{msg}[/italic]",
+            highlight=False,
+            style='dim',
+        )
 def unordered_list(items: Sequence[str]) -> None:
     if in_notebook():
         msg = '\n'.join([f"- {item}" for item in items])
+        message(msg)
     else:
-        msg = '\n'.join([f"• {item.replace('`', '')}" for item in items])
-    message(msg)
+        text = Text('\n').join(
+            Text.assemble(
+                Text(' • ', style='yellow'),
+                Text(item.replace('`', '')),
+            ) for item in items)
+        Console().print(text, highlight=False)
 def dataframe(df: pd.DataFrame) -> None:
@@ -48,4 +67,21 @@ def dataframe(df: pd.DataFrame) -> None:
         except ImportError:
             print(df.to_string(index=False))  # missing jinja2
     else:
-        print(df.to_string(index=False))
+        Console().print(to_rich_table(df))
+def to_rich_table(df: pd.DataFrame) -> Table:
+    table = Table(box=box.ROUNDED)
+    for column in df.columns:
+        table.add_column(str(column))
+    for _, row in df.iterrows():
+        values: list[str | Text] = []
+        for value in row:
+            if str(value) == 'True':
+                values.append('✅')
+            elif str(value) in {'False', '-'}:
+                values.append(Text('-', style='dim'))
+            else:
+                values.append(str(value))
+        table.add_row(*values)
+    return table

kumoai/utils/progress_logger.py CHANGED Viewed

@@ -57,7 +57,8 @@ class ProgressLogger:
     def __enter__(self) -> Self:
         self.depth += 1
-        self.start_time = time.perf_counter()
+        if self.depth == 1:
+            self.start_time = time.perf_counter()
         return self
     def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:

kumoai/utils/sql.py CHANGED Viewed

@@ -1,3 +1,3 @@
-def quote_ident(name: str) -> str:
+def quote_ident(ident: str, char: str = '"') -> str:
     r"""Quotes a SQL identifier."""
-    return '"' + name.replace('"', '""') + '"'
+    return char + ident.replace(char, char + char) + char

{kumoai-2.14.0.dev202601051732.dist-info → kumoai-2.15.0.dev202601141731.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kumoai
-Version: 2.14.0.dev202601051732
+Version: 2.15.0.dev202601141731
 Summary: AI on the Modern Data Stack
 Author-email: "Kumo.AI" <hello@kumo.ai>
 License-Expression: MIT
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.28.2
 Requires-Dist: urllib3
 Requires-Dist: plotly
 Requires-Dist: typing_extensions>=4.5.0
-Requires-Dist: kumo-api==0.49.0
+Requires-Dist: kumo-api<1.0.0,>=0.53.0
 Requires-Dist: tqdm>=4.66.0
 Requires-Dist: aiohttp>=3.10.0
 Requires-Dist: pydantic>=1.10.21