PyPI - kumoai - Versions diffs - 2.14.0.dev202601081732__cp313-cp313-win_amd64.whl → 2.15.0.dev202601151732__cp313-cp313-win_amd64.whl - Mend

kumoai 2.14.0.dev202601081732__cp313-cp313-win_amd64.whl → 2.15.0.dev202601151732__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

kumoai/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = '2.14.0.~~dev202601081732~~'
1	+ __version__ = '2.15.0.dev202601151732'

kumoai/experimental/rfm/backend/snow/sampler.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import json
+import math
 from collections.abc import Iterator
 from contextlib import contextmanager
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 import numpy as np
 import pandas as pd
@@ -11,7 +12,7 @@ from kumoapi.pquery import ValidatedPredictiveQuery
 from kumoai.experimental.rfm.backend.snow import Connection, SnowTable
 from kumoai.experimental.rfm.base import SQLSampler, Table
 from kumoai.experimental.rfm.pquery import PQueryPandasExecutor
-from kumoai.utils import ProgressLogger
+from kumoai.utils import ProgressLogger, quote_ident
 if TYPE_CHECKING:
     from kumoai.experimental.rfm import Graph
@@ -37,6 +38,15 @@ class SnowSampler(SQLSampler):
             assert isinstance(table, SnowTable)
             self._connection = table._connection
+        self._num_rows_dict: dict[str, int] = {
+            table.name: cast(int, table._num_rows)
+            for table in graph.tables.values()
+        }
+    @property
+    def num_rows_dict(self) -> dict[str, int]:
+        return self._num_rows_dict
     def _get_min_max_time_dict(
         self,
         table_names: list[str],
@@ -45,8 +55,9 @@ class SnowSampler(SQLSampler):
         for table_name in table_names:
             column = self.time_column_dict[table_name]
             column_ref = self.table_column_ref_dict[table_name][column]
+            ident = quote_ident(table_name, char="'")
             select = (f"SELECT\n"
-                      f"  ? as table_name,\n"
+                      f"  {ident} as table_name,\n"
                       f"  MIN({column_ref}) as min_date,\n"
                       f"  MAX({column_ref}) as max_date\n"
                       f"FROM {self.source_name_dict[table_name]}")
@@ -54,14 +65,13 @@ class SnowSampler(SQLSampler):
         sql = "\nUNION ALL\n".join(selects)
         out_dict: dict[str, tuple[pd.Timestamp, pd.Timestamp]] = {}
-        with paramstyle(self._connection), self._connection.cursor() as cursor:
-            cursor.execute(sql, table_names)
-            rows = cursor.fetchall()
-        for table_name, _min, _max in rows:
-            out_dict[table_name] = (
-                pd.Timestamp.max if _min is None else pd.Timestamp(_min),
-                pd.Timestamp.min if _max is None else pd.Timestamp(_max),
-            )
+        with self._connection.cursor() as cursor:
+            cursor.execute(sql)
+            for table_name, _min, _max in cursor.fetchall():
+                out_dict[table_name] = (
+                    pd.Timestamp.max if _min is None else pd.Timestamp(_min),
+                    pd.Timestamp.min if _max is None else pd.Timestamp(_max),
+                )
         return out_dict
@@ -239,9 +249,30 @@ class SnowSampler(SQLSampler):
     ) -> tuple[pd.DataFrame, np.ndarray]:
         time_column = self.time_column_dict.get(table_name)
+        end_time: pd.Series | None = None
+        start_time: pd.Series | None = None
         if time_column is not None and anchor_time is not None:
-            anchor_time = anchor_time.dt.strftime("%Y-%m-%d %H:%M:%S")
-            payload = json.dumps(list(zip(index, anchor_time)))
+            # In order to avoid a full table scan, we limit foreign key
+            # sampling to a certain time range, approximated by the number of
+            # rows, timestamp ranges and `num_neighbors` value.
+            # Downstream, this helps Snowflake to apply partition pruning:
+            dst_table_name = [
+                dst_table
+                for key, dst_table in self.foreign_key_dict[table_name]
+                if key == foreign_key
+            ][0]
+            num_facts = self.num_rows_dict[table_name]
+            num_entities = self.num_rows_dict[dst_table_name]
+            min_time = self.get_min_time([table_name])
+            max_time = self.get_max_time([table_name])
+            freq = num_facts / num_entities
+            freq = freq / max((max_time - min_time).total_seconds(), 1)
+            offset = pd.Timedelta(seconds=math.ceil(5 * num_neighbors / freq))
+            end_time = anchor_time.dt.strftime("%Y-%m-%d %H:%M:%S")
+            start_time = anchor_time - offset
+            start_time = start_time.dt.strftime("%Y-%m-%d %H:%M:%S")
+            payload = json.dumps(list(zip(index, end_time, start_time)))
         else:
             payload = json.dumps(list(zip(index)))
@@ -260,9 +291,10 @@ class SnowSampler(SQLSampler):
             sql += "    f.value[0]::FLOAT as __KUMO_ID__"
         else:
             sql += "    f.value[0]::VARCHAR as __KUMO_ID__"
-        if time_column is not None and anchor_time is not None:
+        if end_time is not None and start_time is not None:
             sql += (",\n"
-                    "    f.value[1]::TIMESTAMP_NTZ as __KUMO_TIME__")
+                    "    f.value[1]::TIMESTAMP_NTZ as __KUMO_END_TIME__,\n"
+                    "    f.value[2]::TIMESTAMP_NTZ as __KUMO_START_TIME__")
         sql += (f"\n"
                 f"  FROM TABLE(FLATTEN(INPUT => PARSE_JSON(?))) f\n"
                 f")\n"
@@ -272,9 +304,13 @@ class SnowSampler(SQLSampler):
                 f"FROM TMP\n"
                 f"JOIN {self.source_name_dict[table_name]}\n"
                 f"  ON {key_ref} = TMP.__KUMO_ID__\n")
-        if time_column is not None and anchor_time is not None:
+        if end_time is not None and start_time is not None:
+            assert time_column is not None
             time_ref = self.table_column_ref_dict[table_name][time_column]
-            sql += f" AND {time_ref} <= TMP.__KUMO_TIME__\n"
+            sql += (f" AND {time_ref} <= TMP.__KUMO_END_TIME__\n"
+                    f" AND {time_ref} > TMP.__KUMO_START_TIME__\n"
+                    f"WHERE {time_ref} <= '{end_time.max()}'\n"
+                    f"  AND {time_ref} > '{start_time.min()}'\n")
         sql += ("QUALIFY ROW_NUMBER() OVER (\n"
                 "  PARTITION BY TMP.__KUMO_BATCH__\n")
         if time_column is not None:
@@ -313,6 +349,7 @@ class SnowSampler(SQLSampler):
         end_time = anchor_time + max_offset
         end_time = end_time.dt.strftime("%Y-%m-%d %H:%M:%S")
+        start_time: pd.Series | None = None
         if min_offset is not None:
             start_time = anchor_time + min_offset
             start_time = start_time.dt.strftime("%Y-%m-%d %H:%M:%S")
@@ -347,9 +384,13 @@ class SnowSampler(SQLSampler):
                 f"FROM TMP\n"
                 f"JOIN {self.source_name_dict[table_name]}\n"
                 f"  ON {key_ref} = TMP.__KUMO_ID__\n"
-                f" AND {time_ref} <= TMP.__KUMO_END_TIME__")
-        if min_offset is not None:
-            sql += f"\n AND {time_ref} > TMP.__KUMO_START_TIME__"
+                f" AND {time_ref} <= TMP.__KUMO_END_TIME__\n")
+        if start_time is not None:
+            sql += f"AND {time_ref} > TMP.__KUMO_START_TIME__\n"
+        # Add global time bounds to enable partition pruning:
+        sql += f"WHERE {time_ref} <= '{end_time.max()}'"
+        if start_time is not None:
+            sql += f"\nAND {time_ref} > '{start_time.min()}'"
         with paramstyle(self._connection), self._connection.cursor() as cursor:
             cursor.execute(sql, (payload, ))

kumoai/experimental/rfm/backend/snow/table.py CHANGED Viewed

@@ -76,21 +76,13 @@ class SnowTable(Table):
     @property
     def source_name(self) -> str:
-        names: list[str] = []
-        if self._database is not None:
-            names.append(self._database)
-        if self._schema is not None:
-            names.append(self._schema)
-        return '.'.join(names + [self._source_name])
+        names = [self._database, self._schema, self._source_name]
+        return '.'.join(names)
     @property
     def _quoted_source_name(self) -> str:
-        names: list[str] = []
-        if self._database is not None:
-            names.append(quote_ident(self._database))
-        if self._schema is not None:
-            names.append(quote_ident(self._schema))
-        return '.'.join(names + [quote_ident(self._source_name)])
+        names = [self._database, self._schema, self._source_name]
+        return '.'.join([quote_ident(name) for name in names])
     @property
     def backend(self) -> DataBackend:
@@ -159,7 +151,18 @@ class SnowTable(Table):
         )
     def _get_num_rows(self) -> int | None:
-        return None
+        with self._connection.cursor() as cursor:
+            quoted_source_name = quote_ident(self._source_name, char="'")
+            sql = (f"SHOW TABLES LIKE {quoted_source_name} "
+                   f"IN SCHEMA {quote_ident(self._database)}."
+                   f"{quote_ident(self._schema)}")
+            cursor.execute(sql)
+            num_rows = cursor.fetchone()[7]
+        if num_rows == 0:
+            raise RuntimeError("Table '{self.source_name}' is empty")
+        return num_rows
     def _get_expr_sample_df(
         self,

kumoai/experimental/rfm/backend/sqlite/sampler.py CHANGED Viewed

@@ -121,8 +121,9 @@ class SQLiteSampler(SQLSampler):
         for table_name in table_names:
             column = self.time_column_dict[table_name]
             column_ref = self.table_column_ref_dict[table_name][column]
+            ident = quote_ident(table_name, char="'")
             select = (f"SELECT\n"
-                      f"  ? as table_name,\n"
+                      f"  {ident} as table_name,\n"
                       f"  MIN({column_ref}) as min_date,\n"
                       f"  MAX({column_ref}) as max_date\n"
                       f"FROM {self.source_name_dict[table_name]}")
@@ -131,12 +132,13 @@ class SQLiteSampler(SQLSampler):
         out_dict: dict[str, tuple[pd.Timestamp, pd.Timestamp]] = {}
         with self._connection.cursor() as cursor:
-            cursor.execute(sql, table_names)
+            cursor.execute(sql)
             for table_name, _min, _max in cursor.fetchall():
                 out_dict[table_name] = (
                     pd.Timestamp.max if _min is None else pd.Timestamp(_min),
                     pd.Timestamp.min if _max is None else pd.Timestamp(_max),
                 )
         return out_dict
     def _sample_entity_table(
@@ -434,7 +436,7 @@ class SQLiteSampler(SQLSampler):
                 feat_dict=feat_dict,
                 time_dict=time_dict,
                 batch_dict=batch_dict,
-                anchor_time=anchor_time,
+                anchor_time=time,
                 num_forecasts=query.num_forecasts,
             )
             ys.append(y)

kumoai/experimental/rfm/base/mapper.py ADDED Viewed

@@ -0,0 +1,69 @@
+import numpy as np
+import pandas as pd
+class Mapper:
+    r"""A mapper to map ``(pkey, batch)`` pairs to contiguous node IDs.
+    Args:
+        num_examples: The maximum number of examples to add/retrieve.
+    """
+    def __init__(self, num_examples: int):
+        self._pkey_dtype: pd.CategoricalDtype | None = None
+        self._indices: list[np.ndarray] = []
+        self._index_dtype: pd.CategoricalDtype | None = None
+        self._num_examples = num_examples
+    def add(self, pkey: pd.Series, batch: np.ndarray) -> None:
+        r"""Adds a set of ``(pkey, batch)`` pairs to the mapper.
+        Args:
+            pkey: The primary keys.
+            batch: The batch vector.
+        """
+        if self._pkey_dtype is not None:
+            category = np.concatenate([
+                self._pkey_dtype.categories.values,
+                pkey,
+            ], axis=0)
+            category = pd.unique(category)
+            self._pkey_dtype = pd.CategoricalDtype(category)
+        elif pd.api.types.is_string_dtype(pkey):
+            category = pd.unique(pkey)
+            self._pkey_dtype = pd.CategoricalDtype(category)
+        if self._pkey_dtype is not None:
+            index = pd.Categorical(pkey, dtype=self._pkey_dtype).codes
+            index = index.astype('int64')
+        else:
+            index = pkey.to_numpy()
+        index = self._num_examples * index + batch
+        self._indices.append(index)
+        self._index_dtype = None
+    def get(self, pkey: pd.Series, batch: np.ndarray) -> np.ndarray:
+        r"""Retrieves the node IDs for a set of ``(pkey, batch)`` pairs.
+        Returns ``-1`` for any pair not registered in the mapping.
+        Args:
+            pkey: The primary keys.
+            batch: The batch vector.
+        """
+        if len(self._indices) == 0:
+            return np.full(len(pkey), -1, dtype=np.int64)
+        if self._index_dtype is None:  # Lazy build index:
+            category = pd.unique(np.concatenate(self._indices))
+            self._index_dtype = pd.CategoricalDtype(category)
+        if self._pkey_dtype is not None:
+            index = pd.Categorical(pkey, dtype=self._pkey_dtype).codes
+            index = index.astype('int64')
+        else:
+            index = pkey.to_numpy()
+        index = self._num_examples * index + batch
+        out = pd.Categorical(index, dtype=self._index_dtype).codes
+        out = out.astype('int64')
+        return out

kumoai/experimental/rfm/base/sampler.py CHANGED Viewed

@@ -295,7 +295,8 @@ class Sampler(ABC):
             # Store in compressed representation if more efficient:
             num_cols = subgraph.table_dict[edge_type[2]].num_rows
-            if col is not None and len(col) > num_cols + 1:
+            if (col is not None and len(col) > num_cols + 1
+                    and ((col[1:] - col[:-1]) >= 0).all()):
                 layout = EdgeLayout.CSC
                 colcount = np.bincount(col, minlength=num_cols)
                 col = np.empty(num_cols + 1, dtype=col.dtype)

kumoai/experimental/rfm/base/sql_sampler.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Literal
 import numpy as np
 import pandas as pd
+from kumoapi.rfm.context import Subgraph
 from kumoapi.typing import Dtype
 from kumoai.experimental.rfm.base import (
@@ -12,11 +13,14 @@ from kumoai.experimental.rfm.base import (
     SamplerOutput,
     SourceColumn,
 )
+from kumoai.experimental.rfm.base.mapper import Mapper
 from kumoai.utils import ProgressLogger, quote_ident
 if TYPE_CHECKING:
     from kumoai.experimental.rfm import Graph
+EdgeType = tuple[str, str, str]
 class SQLSampler(Sampler):
     def __init__(
@@ -101,7 +105,8 @@ class SQLSampler(Sampler):
         num_neighbors: list[int],
     ) -> SamplerOutput:
-        # Make sure to include primary key, foreign key and time columns:
+        # Make sure to always include primary key, foreign key and time columns
+        # during data fetching since these are needed for graph traversal:
         sample_columns_dict: dict[str, set[str]] = {}
         for table, columns in columns_dict.items():
             sample_columns = columns | {
@@ -110,9 +115,11 @@ class SQLSampler(Sampler):
             }
             if primary_key := self.primary_key_dict.get(table):
                 sample_columns |= {primary_key}
-            if time_column := self.time_column_dict.get(table):
-                sample_columns |= {time_column}
             sample_columns_dict[table] = sample_columns
+        if not isinstance(anchor_time, pd.Series):
+            sample_columns_dict[entity_table_name] |= {
+                self.time_column_dict[entity_table_name]
+            }
         # Sample Entity Table #################################################
@@ -139,88 +146,219 @@ class SQLSampler(Sampler):
             anchor_time = df[time_column]
         assert isinstance(anchor_time, pd.Series)
-        df_hop_dict: dict[tuple[str, int], pd.DataFrame] = {
-            (entity_table_name, 0): df,
-        }
-        batch_hop_dict: dict[tuple[str, int], np.ndarray] = {
-            (entity_table_name, 0): batch,
-        }
         # Recursive Neighbor Sampling #########################################
+        mapper_dict: dict[str, Mapper] = defaultdict(
+            lambda: Mapper(num_examples=len(entity_pkey)))
+        mapper_dict[entity_table_name].add(
+            pkey=df[self.primary_key_dict[entity_table_name]],
+            batch=batch,
+        )
+        dfs_dict: dict[str, list[pd.DataFrame]] = defaultdict(list)
+        dfs_dict[entity_table_name].append(df)
+        batches_dict: dict[str, list[np.ndarray]] = defaultdict(list)
+        batches_dict[entity_table_name].append(batch)
+        num_sampled_nodes_dict: dict[str, list[int]] = defaultdict(
+            lambda: [0] * (len(num_neighbors) + 1))
+        num_sampled_nodes_dict[entity_table_name][0] = len(entity_pkey)
+        rows_dict: dict[EdgeType, list[np.ndarray]] = defaultdict(list)
+        cols_dict: dict[EdgeType, list[np.ndarray]] = defaultdict(list)
+        num_sampled_edges_dict: dict[EdgeType, list[int]] = defaultdict(
+            lambda: [0] * len(num_neighbors))
+        # The start index of data frame slices of the previous hop:
+        offset_dict: dict[str, int] = defaultdict(int)
         for hop, neighbors in enumerate(num_neighbors):
             if neighbors == 0:
                 break  # Abort early.
-            dfs: dict[str, list[pd.DataFrame]] = defaultdict(list)
-            batches: dict[str, list[np.ndarray]] = defaultdict(list)
+            for table in list(num_sampled_nodes_dict.keys()):
+                # Only sample from tables that have been visited in the
+                # previous hop:
+                if num_sampled_nodes_dict[table][hop] == 0:
+                    continue
-            tables = [table for table, i in batch_hop_dict if i == hop]
-            for table in tables:
-                df = df_hop_dict[(table, hop)]
-                batch = batch_hop_dict[(table, hop)]
+                # Collect the slices of data sampled in the previous hop
+                # (but maintain only required key information):
+                cols = [fkey for fkey, _ in self.foreign_key_dict[table]]
+                if table in self.primary_key_dict:
+                    cols.append(self.primary_key_dict[table])
+                dfs = [df[cols] for df in dfs_dict[table][offset_dict[table]:]]
+                df = pd.concat(
+                    dfs,
+                    axis=0,
+                    ignore_index=True,
+                ) if len(dfs) > 1 else dfs[0]
+                batches = batches_dict[table][offset_dict[table]:]
+                batch = (np.concatenate(batches)
+                         if len(batches) > 1 else batches[0])
+                offset_dict[table] = len(batches_dict[table])  # Increase.
+                pkey: pd.Series | None = None
+                index: pd.ndarray | None = None
+                if table in self.primary_key_dict:
+                    pkey = df[self.primary_key_dict[table]]
+                    index = mapper_dict[table].get(pkey, batch)
                 # Iterate over foreign keys in the current table:
                 for fkey, dst_table in self.foreign_key_dict[table]:
-                    raise NotImplementedError
+                    row = mapper_dict[dst_table].get(df[fkey], batch)
+                    mask = row == -1
+                    if mask.any():
+                        key_df = pd.DataFrame({
+                            'fkey': df[fkey],
+                            'batch': batch,
+                        }).iloc[mask]
+                        # Only maintain unique keys per example:
+                        unique_key_df = key_df.drop_duplicates()
+                        # Fully de-duplicate keys across examples:
+                        code, fkey_index = pd.factorize(unique_key_df['fkey'])
+                        _df, _batch = self._by_pkey(
+                            table_name=dst_table,
+                            index=fkey_index,
+                            columns=sample_columns_dict[dst_table],
+                        )  # Ensure result is sorted according to input order:
+                        _df = _df.iloc[_batch.argsort()]
+                        # Compute valid entries (without dangling foreign keys)
+                        # in `unique_fkey_df`:
+                        _mask = np.full(len(fkey_index), fill_value=False)
+                        _mask[_batch] = True
+                        _mask = _mask[code]
+                        # Recontruct unique (key, batch) pairs:
+                        code, _ = pd.factorize(unique_key_df['fkey'][_mask])
+                        _df = _df.iloc[code].reset_index(drop=True)
+                        _batch = unique_key_df['batch'].to_numpy()[_mask]
+                        # Register node IDs:
+                        mapper_dict[dst_table].add(
+                            pkey=_df[self.primary_key_dict[dst_table]],
+                            batch=_batch,
+                        )
+                        row[mask] = mapper_dict[dst_table].get(
+                            pkey=key_df['fkey'],
+                            batch=key_df['batch'].to_numpy(),
+                        )  # NOTE `row` may still hold `-1` for dangling fkeys.
+                        dfs_dict[dst_table].append(_df)
+                        batches_dict[dst_table].append(_batch)
+                        num_sampled_nodes_dict[dst_table][hop + 1] += (  #
+                            len(_batch))
+                    mask = row != -1
+                    col = index
+                    if col is None:
+                        start = sum(num_sampled_nodes_dict[table][:hop])
+                        end = sum(num_sampled_nodes_dict[table][:hop + 1])
+                        col = np.arange(start, end)
+                    row = row[mask]
+                    col = col[mask]
+                    edge_type = (table, fkey, dst_table)
+                    edge_type = Subgraph.rev_edge_type(edge_type)
+                    rows_dict[edge_type].append(row)
+                    cols_dict[edge_type].append(col)
+                    num_sampled_edges_dict[edge_type][hop] = len(col)
                 # Iterate over foreign keys that reference the current table:
                 for src_table, fkey in self.rev_foreign_key_dict[table]:
+                    assert pkey is not None and index is not None
                     _df, _batch = self._by_fkey(
                         table_name=src_table,
                         foreign_key=fkey,
-                        index=df[self.primary_key_dict[table]],
+                        index=pkey,
                         num_neighbors=neighbors,
                         anchor_time=anchor_time.iloc[batch],
                         columns=sample_columns_dict[src_table],
                     )
-                    _batch = batch[_batch]
-                    # TODO Filter out duplicates if `src_table` has a pkey.
-                    dfs[src_table].append(_df)
-                    batches[src_table].append(_batch)
+                    edge_type = (src_table, fkey, table)
+                    cols_dict[edge_type].append(index[_batch])
+                    num_sampled_edges_dict[edge_type][hop] = len(_batch)
-                    # TODO Add edges to all sampled nodes.
+                    _batch = batch[_batch]
+                    num_nodes = sum(num_sampled_nodes_dict[src_table])
+                    if src_table in self.primary_key_dict:
+                        _pkey = _df[self.primary_key_dict]
+                        mapper_dict[src_table].add(_pkey, _batch)
+                        row = mapper_dict[src_table].get(_pkey, _batch)
+                        # Only preserve unknown rows:
+                        mask = row >= num_nodes  # type: ignore
+                        mask[pd.duplicated(row)] = False
+                        _df = _df.iloc[mask]
+                        _batch = _batch[mask]
+                    else:
+                        row = np.arange(num_nodes, num_nodes + len(_batch))
+                    rows_dict[edge_type].append(row)
+                    num_sampled_nodes_dict[src_table][hop + 1] += len(_batch)
+                    dfs_dict[src_table].append(_df)
+                    batches_dict[src_table].append(_batch)
         # Post-Processing #####################################################
-        dfs_dict: dict[str, list[pd.DataFrame]] = defaultdict(list)
-        batches_dict: dict[str, list[np.ndarray]] = defaultdict(list)
-        num_hops = max(hop for _, hop in df_hop_dict.keys())  # TODO
-        num_sampled_nodes_dict: dict[str, list[int]] = {
-            table: [0] * (num_hops + 1)
-            for table in [table for table, _ in df_hop_dict.keys()]
-        }
-        for (table, hop), df in df_hop_dict.items():
-            dfs_dict[table].append(df)
-            batches_dict[table].append(batch_hop_dict[(table, hop)])
-            num_sampled_nodes_dict[table][hop] = len(df)
-        df_dict = {  # Concatenate data frames across hops:
+        df_dict = {
             table:
             pd.concat(dfs, axis=0, ignore_index=True)
             if len(dfs) > 1 else dfs[0]
             for table, dfs in dfs_dict.items()
         }
+        # Only store unique rows in `df` above a certain threshold:
+        inverse_dict: dict[str, np.ndarray] = {}
+        for table, df in df_dict.items():
+            if table not in self.primary_key_dict:
+                continue
+            unique, index, inverse = np.unique(
+                df_dict[table][self.primary_key_dict[table]],
+                return_index=True,
+                return_inverse=True,
+            )
+            if len(df) > 1.05 * len(unique):
+                df_dict[table] = df.iloc[index].reset_index(drop=True)
+                inverse_dict[table] = inverse
         df_dict = {  # Post-filter column set:
             table: df[list(columns_dict[table])]
-            for table_name, df in df_dict.items()
+            for table, df in df_dict.items()
         }
-        batch_dict = {  # Concatenate batch vector across hops:
-            table:
-            np.concatenate(batches, axis=0) if len(batches) > 1 else batches[0]
+        batch_dict = {
+            table: np.concatenate(batches) if len(batches) > 1 else batches[0]
             for table, batches in batches_dict.items()
         }
+        row_dict = {
+            edge_type: np.concatenate(rows)
+            for edge_type, rows in rows_dict.items()
+        }
+        col_dict = {
+            edge_type: np.concatenate(cols)
+            for edge_type, cols in cols_dict.items()
+        }
+        if len(num_sampled_edges_dict) == 0:  # Single table:
+            num_sampled_nodes_dict = {
+                key: value[:1]
+                for key, value in num_sampled_nodes_dict.items()
+            }
         return SamplerOutput(
             anchor_time=anchor_time.astype(int).to_numpy(),
             df_dict=df_dict,
-            inverse_dict={},  # TODO
+            inverse_dict=inverse_dict,
             batch_dict=batch_dict,
             num_sampled_nodes_dict=num_sampled_nodes_dict,
-            row_dict={},  # TODO
-            col_dict={},  # TODO
-            num_sampled_edges_dict={},  # TODO
+            row_dict=row_dict,
+            col_dict=col_dict,
+            num_sampled_edges_dict=num_sampled_edges_dict,
         )
     # Abstract Methods ########################################################

kumoai/experimental/rfm/base/table.py CHANGED Viewed

@@ -1,11 +1,9 @@
-import warnings
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from functools import cached_property
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 from kumoapi.model_plan import MissingType
 from kumoapi.source_table import UnavailableSourceTable
 from kumoapi.table import Column as ColumnDefinition
@@ -21,6 +19,7 @@ from kumoai.experimental.rfm.base import (
     SourceColumn,
     SourceForeignKey,
 )
+from kumoai.experimental.rfm.base.utils import to_datetime
 from kumoai.experimental.rfm.infer import (
     infer_dtype,
     infer_primary_key,
@@ -624,24 +623,6 @@ class Table(ABC):
         r"""Sanitzes a :class:`pandas.DataFrame` in-place such that its data
         types match table data and semantic type specification.
         """
-        def _to_datetime(ser: pd.Series) -> pd.Series:
-            if (not pd.api.types.is_datetime64_any_dtype(ser)
-                    and not (isinstance(ser.dtype, pd.ArrowDtype) and
-                             pa.types.is_timestamp(ser.dtype.pyarrow_dtype))):
-                with warnings.catch_warnings():
-                    warnings.filterwarnings(
-                        'ignore',
-                        message='Could not infer format',
-                    )
-                    ser = pd.to_datetime(ser, errors='coerce')
-            if (isinstance(ser.dtype, pd.DatetimeTZDtype)
-                    or (isinstance(ser.dtype, pd.ArrowDtype)
-                        and ser.dtype.pyarrow_dtype.tz is not None)):
-                ser = ser.dt.tz_localize(None)
-            if ser.dtype != 'datetime64[ns]':
-                ser = ser.astype('datetime64[ns]')
-            return ser
         def _to_list(ser: pd.Series, dtype: Dtype | None) -> pd.Series:
             if (pd.api.types.is_string_dtype(ser)
                     and dtype in {Dtype.intlist, Dtype.floatlist}):
@@ -672,9 +653,9 @@ class Table(ABC):
             stype = (stype_dict or {}).get(column_name)
             if dtype == Dtype.time:
-                df[column_name] = _to_datetime(df[column_name])
+                df[column_name] = to_datetime(df[column_name])
             elif stype == Stype.timestamp:
-                df[column_name] = _to_datetime(df[column_name])
+                df[column_name] = to_datetime(df[column_name])
             elif dtype is not None and dtype.is_list():
                 df[column_name] = _to_list(df[column_name], dtype)
             elif stype == Stype.sequence:

kumoai/experimental/rfm/base/utils.py ADDED Viewed

@@ -0,0 +1,36 @@
+import warnings
+import pandas as pd
+import pyarrow as pa
+def is_datetime(ser: pd.Series) -> bool:
+    r"""Check whether a :class:`pandas.Series` holds datetime values."""
+    if isinstance(ser.dtype, pd.ArrowDtype):
+        dtype = ser.dtype.pyarrow_dtype
+        return (pa.types.is_timestamp(dtype) or pa.types.is_date(dtype)
+                or pa.types.is_time(dtype))
+    return pd.api.types.is_datetime64_any_dtype(ser)
+def to_datetime(ser: pd.Series) -> pd.Series:
+    """Converts a :class:`pandas.Series` to ``datetime64[ns]`` format."""
+    if isinstance(ser.dtype, pd.ArrowDtype):
+        ser = pd.Series(ser.to_numpy(), index=ser.index, name=ser.name)
+    if not pd.api.types.is_datetime64_any_dtype(ser):
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                'ignore',
+                message='Could not infer format',
+            )
+            ser = pd.to_datetime(ser, errors='coerce')
+    if isinstance(ser.dtype, pd.DatetimeTZDtype):
+        ser = ser.dt.tz_localize(None)
+    if ser.dtype != 'datetime64[ns]':
+        ser = ser.astype('datetime64[ns]')
+    return ser

kumoai/experimental/rfm/graph.py CHANGED Viewed

@@ -19,6 +19,7 @@ from typing_extensions import Self
 from kumoai import in_notebook, in_snowflake_notebook
 from kumoai.experimental.rfm.base import ColumnSpec, DataBackend, Table
+from kumoai.experimental.rfm.infer import infer_time_column
 from kumoai.graph import Edge
 from kumoai.mixin import CastMixin
 from kumoai.utils import display
@@ -415,8 +416,9 @@ class Graph:
         assert isinstance(connection, Connection)
         with connection.cursor() as cursor:
-            cursor.execute(f"SELECT SYSTEM$READ_YAML_FROM_SEMANTIC_VIEW("
-                           f"'{semantic_view_name}')")
+            sql = (f"SELECT SYSTEM$READ_YAML_FROM_SEMANTIC_VIEW("
+                   f"'{semantic_view_name}')")
+            cursor.execute(sql)
             cfg = yaml.safe_load(cursor.fetchone()[0])
         graph = cls(tables=[])
@@ -492,7 +494,17 @@ class Graph:
             )
             # TODO Add a way to register time columns without heuristic usage.
-            table.infer_time_column(verbose=False)
+            time_candidates = [
+                column_cfg['name']
+                for column_cfg in table_cfg.get('time_dimensions', [])
+                if table.has_column(column_cfg['name'])
+                and table[column_cfg['name']].stype == Stype.timestamp
+            ]
+            if time_column := infer_time_column(
+                    df=table._get_sample_df(),
+                    candidates=time_candidates,
+            ):
+                table.time_column = time_column
             graph.add_table(table)

kumoai/experimental/rfm/infer/dtype.py CHANGED Viewed

@@ -3,6 +3,8 @@ import pandas as pd
 import pyarrow as pa
 from kumoapi.typing import Dtype
+from kumoai.experimental.rfm.base.utils import is_datetime
 PANDAS_TO_DTYPE: dict[str, Dtype] = {
     'bool': Dtype.bool,
     'boolean': Dtype.bool,
@@ -34,7 +36,7 @@ def infer_dtype(ser: pd.Series) -> Dtype:
     Returns:
         The data type.
     """
-    if pd.api.types.is_datetime64_any_dtype(ser.dtype):
+    if is_datetime(ser):
         return Dtype.date
     if pd.api.types.is_timedelta64_dtype(ser.dtype):
         return Dtype.timedelta

kumoai/experimental/rfm/infer/time_col.py CHANGED Viewed

@@ -3,6 +3,8 @@ import warnings
 import pandas as pd
+from kumoai.experimental.rfm.base.utils import to_datetime
 def infer_time_column(
     df: pd.DataFrame,
@@ -43,11 +45,11 @@ def infer_time_column(
     with warnings.catch_warnings():
         warnings.filterwarnings('ignore', message='Could not infer format')
         min_timestamp_dict = {
-            key: pd.to_datetime(df[key].iloc[:10_000], 'coerce')
+            key: to_datetime(df[key].iloc[:10_000])
             for key in candidates
         }
     min_timestamp_dict = {
-        key: value.min().tz_localize(None)
+        key: value.min()
         for key, value in min_timestamp_dict.items()
     }
     min_timestamp_dict = {

kumoai/experimental/rfm/rfm.py CHANGED Viewed

@@ -1044,8 +1044,16 @@ class KumoRFM:
         if len(self._sampler.time_column_dict) == 0:
             return  # Graph without timestamps
-        min_time = self._sampler.get_min_time()
-        max_time = self._sampler.get_max_time()
+        if query.query_type == QueryType.TEMPORAL:
+            aggr_table_names = [
+                aggr._get_target_column_name().split('.')[0]
+                for aggr in query.get_all_target_aggregations()
+            ]
+            min_time = self._sampler.get_min_time(aggr_table_names)
+            max_time = self._sampler.get_max_time(aggr_table_names)
+        else:
+            min_time = self._sampler.get_min_time()
+            max_time = self._sampler.get_max_time()
         if anchor_time < min_time:
             raise ValueError(f"Anchor timestamp '{anchor_time}' is before "

kumoai/kumolib.cp313-win_amd64.pyd CHANGED Viewed

Binary file

kumoai/testing/snow.py CHANGED Viewed

@@ -10,7 +10,7 @@ def connect(
     id: str,
     account: str,
     user: str,
-    warehouse: str,
+    warehouse: str | None = None,
     database: str | None = None,
     schema: str | None = None,
 ) -> Connection:
@@ -42,8 +42,8 @@ def connect(
     return _connect(
         account=account,
         user=user,
-        warehouse='WH_XS',
-        database='KUMO',
+        warehouse=warehouse or 'WH_XS',
+        database=database or 'KUMO',
         schema=schema,
         session_parameters=dict(CLIENT_TELEMETRY_ENABLED=False),
         **kwargs,

kumoai/utils/progress_logger.py CHANGED Viewed

@@ -57,7 +57,8 @@ class ProgressLogger:
     def __enter__(self) -> Self:
         self.depth += 1
-        self.start_time = time.perf_counter()
+        if self.depth == 1:
+            self.start_time = time.perf_counter()
         return self
     def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:

kumoai/utils/sql.py CHANGED Viewed

@@ -1,3 +1,3 @@
-def quote_ident(name: str) -> str:
+def quote_ident(ident: str, char: str = '"') -> str:
     r"""Quotes a SQL identifier."""
-    return '"' + name.replace('"', '""') + '"'
+    return char + ident.replace(char, char + char) + char

{kumoai-2.14.0.dev202601081732.dist-info → kumoai-2.15.0.dev202601151732.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kumoai
-Version: 2.14.0.dev202601081732
+Version: 2.15.0.dev202601151732
 Summary: AI on the Modern Data Stack
 Author-email: "Kumo.AI" <hello@kumo.ai>
 License-Expression: MIT
@@ -23,7 +23,7 @@ Requires-Dist: requests>=2.28.2
 Requires-Dist: urllib3
 Requires-Dist: plotly
 Requires-Dist: typing_extensions>=4.5.0
-Requires-Dist: kumo-api==0.49.0
+Requires-Dist: kumo-api<1.0.0,>=0.53.0
 Requires-Dist: tqdm>=4.66.0
 Requires-Dist: aiohttp>=3.10.0
 Requires-Dist: pydantic>=1.10.21

{kumoai-2.14.0.dev202601081732.dist-info → kumoai-2.15.0.dev202601151732.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 kumoai/__init__.py,sha256=cKL7QeT-b5OHi75jtvFzbIKGjeJV5Tago7jKLX0nuYE,11207
 kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
 kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
-kumoai/_version.py,sha256=kzdldxHHR5QHfvOxMMZW6iK135Mpc7XrTltLFgYNQws,39
+kumoai/_version.py,sha256=IPXsgCkME-eTqhUBmuva76Ngg8kOKbQ3VUDkC6t6dI4,39
 kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
 kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
 kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
 kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
 kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
-kumoai/kumolib.cp313-win_amd64.pyd,sha256=hvnQnFCKpDyjgNu-pYLOFmRwl8KjNHrYn4VTaDS8-g4,198144
+kumoai/kumolib.cp313-win_amd64.pyd,sha256=KuxCQKoXH9eksQws8WB2LImapu-jOY0d42huAGFInoQ,198144
 kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
 kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
 kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
@@ -55,9 +55,9 @@ kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,18
 kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 kumoai/experimental/rfm/__init__.py,sha256=dibc0t7g-PYanT90TncRlceD0ZqxtKStVdzzG1_cXC8,7226
 kumoai/experimental/rfm/authenticate.py,sha256=odKaqOAEkdC_wB340cs_ozjSvQLTce45WLiJSEzQaL8,19283
-kumoai/experimental/rfm/graph.py,sha256=LHPJQyTSf_traFDX2AZj9ylpP69aATIB-TCDh_mj_gc,47583
+kumoai/experimental/rfm/graph.py,sha256=DiJZaEXiwNB7DzujRc9Fo__8u19VAsz7VagjmSKScVQ,48106
 kumoai/experimental/rfm/relbench.py,sha256=30O7QAKYcMgr6C9Qpgev7gxSMAtWXop25p7DtmzrBlE,2352
-kumoai/experimental/rfm/rfm.py,sha256=D67eDTSHDkpCm1dPJAZcLZKyKc26AdT4REU1g0xk5hs,61047
+kumoai/experimental/rfm/rfm.py,sha256=l31iaWoDujjmPilzTbh8BL_Ajlvpg4TSTYdnkbelsIg,61436
 kumoai/experimental/rfm/sagemaker.py,sha256=7Yk4um0gBBn7u-Bz8JRv53z0__FcD0uESoiImJhxsBw,5101
 kumoai/experimental/rfm/task_table.py,sha256=4sx9z6JhHQVQaPAlbyfDwbyOBApOUs6SEXHHcfsdxl0,10139
 kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -66,26 +66,28 @@ kumoai/experimental/rfm/backend/local/graph_store.py,sha256=fmBOdXK6a7hHqfB5Nqpc
 kumoai/experimental/rfm/backend/local/sampler.py,sha256=tD3l5xfcxjsWDaC45V-xOAI_-Jyyk_au-E7wyrMqCx4,11038
 kumoai/experimental/rfm/backend/local/table.py,sha256=86lztrVxdpya25X4r8mR2c_t-tI8gAEyahz-mNmk9tA,3602
 kumoai/experimental/rfm/backend/snow/__init__.py,sha256=lsF0sJXZ0Pc3NvBTBXJHudp-iZJXdidrhyqFQKEU5_Q,1030
-kumoai/experimental/rfm/backend/snow/sampler.py,sha256=9mtVpsyBYjPxWjBbqyoDLJlhttTnbOLwm3ixA9cTpKw,14707
-kumoai/experimental/rfm/backend/snow/table.py,sha256=ZEaHsTV7dt4aS1Wp_4gYV475Ysyr7icRVlyKxKrjh7o,9134
+kumoai/experimental/rfm/backend/snow/sampler.py,sha256=qmjhO_Nz9cCiqmMCesw6PCGwFfY6705EkOdireHI0KM,16729
+kumoai/experimental/rfm/backend/snow/table.py,sha256=5F_E3E4pGelFwbGe0zhXH31BZa5qZnDec0Uxtn38d2M,9323
 kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=wkSr2D_E5VCH4RGW8FCN2iJp-6wb_RTCMO8R3p5lkiw,934
-kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=l6Ht8nXWKLjbmwDESmYKWhwVO_sbgx-YtYZ0uaO24gM,19112
+kumoai/experimental/rfm/backend/sqlite/sampler.py,sha256=3vt4i2sv1cOjHq3_4JHya_NJEjMcy5TijopDwTY8F0Q,19155
 kumoai/experimental/rfm/backend/sqlite/table.py,sha256=nH3S3lBVfG6aWp0DtCUVJRBZhlQV4ieskbz-5D0AlG0,6867
 kumoai/experimental/rfm/base/__init__.py,sha256=is8HTLng28h5AtpledQ-hdIheGM052JdBhjv8HtKhDw,754
 kumoai/experimental/rfm/base/column.py,sha256=JeDKSZnTChFHMaIC3TcEgdPG9Rr2PATTAMIMhjnvXrs,5117
 kumoai/experimental/rfm/base/expression.py,sha256=04NgmrrvjM1yFXnOMDZtb5V1-oFufqCamv2KTETOHik,1296
-kumoai/experimental/rfm/base/sampler.py,sha256=yTAUGRL_UmZVsj7ctf2W1DtciQLNrktwtU9Qd_wE52A,32673
+kumoai/experimental/rfm/base/mapper.py,sha256=WWPwpIOYa4Ppw8UOh4zf2D9fY2t3FtYKdEM3VCHJNiE,2489
+kumoai/experimental/rfm/base/sampler.py,sha256=2hvFfvgwjqbQljZKAKtZCIcQCsmWGj_CMpSEWpSx3uk,32734
 kumoai/experimental/rfm/base/source.py,sha256=67rpePejkZli4B_eDWzDrn_8Q5Msyo2XZ9F8IGB0ImI,320
-kumoai/experimental/rfm/base/sql_sampler.py,sha256=Wd60cvIs06WkW13Jh64QtAgQCEbA5M_7Rde473Jh_SU,9605
-kumoai/experimental/rfm/base/table.py,sha256=5tVaTFTBlUoP_-2I0IdFonmpFRzlwQe0vGthkApQucM,27629
+kumoai/experimental/rfm/base/sql_sampler.py,sha256=bsHQ1UTIjodkkU4c3oY-_6DRZHS8m6RIqLqSwPUfLZ4,16067
+kumoai/experimental/rfm/base/table.py,sha256=N47ym2p7jJ5RO8jjt_KQWfATTW__MlJnGi4zV6EVUIk,26737
+kumoai/experimental/rfm/base/utils.py,sha256=VEMeOehsQCKVatqPzMTzXnVFsLz-NkyQQ67pgivtuCE,1169
 kumoai/experimental/rfm/infer/__init__.py,sha256=Uf4Od7B2G80U61mkkxsnxHPGu1Hh2RqOazTkOYtNLvA,538
 kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
-kumoai/experimental/rfm/infer/dtype.py,sha256=LnAazTqfic0SOH0Py_ooXvVxXR5OVi6-Og1L_9lMOZc,2864
+kumoai/experimental/rfm/infer/dtype.py,sha256=SDZR9ULx6Z35Ij29v6t79y-VuTvikEfrHDQLOIL_xI4,2895
 kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
 kumoai/experimental/rfm/infer/multicategorical.py,sha256=mMuRCbfs0zsfOoPB_eCs6nlt4WgNPvklmYPRq7w85L4,1167
 kumoai/experimental/rfm/infer/pkey.py,sha256=GCAUN8Hz5-leVv2-H8soP3k-DsXJ1O_uQU25-CsSWN0,4540
 kumoai/experimental/rfm/infer/stype.py,sha256=lOgiGJ_rsaeiFWyVUw0IMwn_7hGOqL8mvy2rGzXfi3Q,929
-kumoai/experimental/rfm/infer/time_col.py,sha256=-OJbjHxD05UuSF2ePBkywzm-h2Qd9kC4BEFaHuglUbs,1850
+kumoai/experimental/rfm/infer/time_col.py,sha256=G2zMtcy7gEPgz7O4ljXBws5LgZ1qpQpoFUk3t5q5eqA,1881
 kumoai/experimental/rfm/infer/timestamp.py,sha256=L2VxjtYTSyUBYAo4M-L08xSQlPpqnHMAVF5_vxjh3Y0,1135
 kumoai/experimental/rfm/pquery/__init__.py,sha256=RkTn0I74uXOUuOiBpa6S-_QEYctMutkUnBEfF9ztQzI,159
 kumoai/experimental/rfm/pquery/executor.py,sha256=mz5mqhHbgZM0f5oNFLyThWGM4UePx_kd1O4zyJ_8ToQ,2830
@@ -100,7 +102,7 @@ kumoai/pquery/predictive_query.py,sha256=I5Ntc7YO1qEGxKrLuhAzZO3SySr8Wnjhde8eDbb
 kumoai/pquery/training_table.py,sha256=ex5FpA4_rY5OSIl2koisQENFoPbTz2PmG-DR3rvnysg,17004
 kumoai/testing/__init__.py,sha256=XBQ_Sa3WnOYlpXZ3gUn8w6nVfZt-nfPhytfIBeiPt4w,178
 kumoai/testing/decorators.py,sha256=p79ZCQqPY_MHWy0_l7-xQ6wUIqFTn4AbrGWTHLvpbQY,1664
-kumoai/testing/snow.py,sha256=i0m8y7ciqUnQeP1Xe_-bOxVh_xyAuuyz_rTEHJFkYY0,1537
+kumoai/testing/snow.py,sha256=QItmVyelgPRW7dRcG1IQGAUdXFuWNULtz5Jo7GrxDtM,1576
 kumoai/trainer/__init__.py,sha256=uCFXy9bw_byn_wYd3M-BTZCHTVvv4XXr8qRlh-QOvag,981
 kumoai/trainer/baseline_trainer.py,sha256=oXweh8j1sar6KhQfr3A7gmQxcDq7SG0Bx3jIenbtyC4,4117
 kumoai/trainer/config.py,sha256=7_Jv1w1mqaokCQwQdJkqCSgVpmh8GqE3fL1Ky_vvttI,100
@@ -113,10 +115,10 @@ kumoai/utils/__init__.py,sha256=lazi9gAl5YBg1Nk121zSDg-BIKTVETjFTZwTFUlGngo,267
 kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
 kumoai/utils/display.py,sha256=oPNcXLUUnSKo0m2Hxc330QFPPtnV-wjJMjKoBseB1HY,2519
 kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
-kumoai/utils/progress_logger.py,sha256=UYVaPhY6BFVhV48bYeMWMtYWRJFPvnQq2UqkZhLbzFQ,9860
-kumoai/utils/sql.py,sha256=a9HT5IIUaXfbQaLbZ2HuuYHLBDev_cer1Tzif7xE-R4,121
-kumoai-2.14.0.dev202601081732.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
-kumoai-2.14.0.dev202601081732.dist-info/METADATA,sha256=vYTHC9RcmY73aISUFIuzPUuvQzTRYYo6tAb8rgMjUBY,2628
-kumoai-2.14.0.dev202601081732.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
-kumoai-2.14.0.dev202601081732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
-kumoai-2.14.0.dev202601081732.dist-info/RECORD,,
+kumoai/utils/progress_logger.py,sha256=z1eZwxMLcSymhS3r9_GQ35AgoRl1Hz5BfxAyUJkmifg,9893
+kumoai/utils/sql.py,sha256=e4dMLBxIdxqOLgwdgsFshX1JQq4gpA5UlStI-XiuUBw,150
+kumoai-2.15.0.dev202601151732.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
+kumoai-2.15.0.dev202601151732.dist-info/METADATA,sha256=YzS0_Lc5sPPg0ArySpiRvrYGdMmeO7W6n8jAUD0Y8jA,2635
+kumoai-2.15.0.dev202601151732.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
+kumoai-2.15.0.dev202601151732.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
+kumoai-2.15.0.dev202601151732.dist-info/RECORD,,

{kumoai-2.14.0.dev202601081732.dist-info → kumoai-2.15.0.dev202601151732.dist-info}/WHEEL RENAMED Viewed

File without changes

{kumoai-2.14.0.dev202601081732.dist-info → kumoai-2.15.0.dev202601151732.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{kumoai-2.14.0.dev202601081732.dist-info → kumoai-2.15.0.dev202601151732.dist-info}/top_level.txt RENAMED Viewed

File without changes