PyPI - kumoai - Versions diffs - 2.13.0.dev202511261731__cp310-cp310-win_amd64.whl → 2.13.0.dev202512040252__cp310-cp310-win_amd64.whl - Mend

kumoai 2.13.0.dev202511261731__cp310-cp310-win_amd64.whl → 2.13.0.dev202512040252__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

kumoai/_version.py +1 -1
kumoai/connector/utils.py +23 -2
kumoai/experimental/rfm/__init__.py +20 -45
kumoai/experimental/rfm/backend/__init__.py +0 -0
kumoai/experimental/rfm/backend/local/__init__.py +38 -0
kumoai/experimental/rfm/backend/local/table.py +109 -0
kumoai/experimental/rfm/backend/snow/__init__.py +35 -0
kumoai/experimental/rfm/backend/snow/table.py +115 -0
kumoai/experimental/rfm/backend/sqlite/__init__.py +30 -0
kumoai/experimental/rfm/backend/sqlite/table.py +101 -0
kumoai/experimental/rfm/base/__init__.py +10 -0
kumoai/experimental/rfm/base/column.py +66 -0
kumoai/experimental/rfm/base/source.py +18 -0
kumoai/experimental/rfm/{local_table.py → base/table.py} +134 -139
kumoai/experimental/rfm/{local_graph.py → graph.py} +287 -62
kumoai/experimental/rfm/infer/__init__.py +6 -0
kumoai/experimental/rfm/infer/dtype.py +79 -0
kumoai/experimental/rfm/infer/pkey.py +126 -0
kumoai/experimental/rfm/infer/time_col.py +62 -0
kumoai/experimental/rfm/local_graph_sampler.py +42 -1
kumoai/experimental/rfm/local_graph_store.py +13 -27
kumoai/experimental/rfm/rfm.py +6 -16
kumoai/experimental/rfm/sagemaker.py +11 -3
kumoai/kumolib.cp310-win_amd64.pyd +0 -0
kumoai/testing/decorators.py +1 -1
{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/METADATA +9 -8
{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/RECORD +30 -18
kumoai/experimental/rfm/utils.py +0 -344
{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/WHEEL +0 -0
{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/licenses/LICENSE +0 -0
{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/top_level.txt +0 -0

kumoai/experimental/rfm/infer/pkey.py ADDED Viewed

@@ -0,0 +1,126 @@
+import re
+import warnings
+from typing import Optional
+import pandas as pd
+def infer_primary_key(
+    table_name: str,
+    df: pd.DataFrame,
+    candidates: list[str],
+) -> Optional[str]:
+    r"""Auto-detect potential primary key column.
+    Args:
+        table_name: The table name.
+        df: The pandas DataFrame to analyze.
+        candidates: A list of potential candidates.
+    Returns:
+        The name of the detected primary key, or ``None`` if not found.
+    """
+    # A list of (potentially modified) table names that are eligible to match
+    # with a primary key, i.e.:
+    # - UserInfo -> User
+    # - snakecase <-> camelcase
+    # - camelcase <-> snakecase
+    # - plural <-> singular (users -> user, eligibilities -> eligibility)
+    # - verb -> noun (qualifying -> qualify)
+    _table_names = {table_name}
+    if table_name.lower().endswith('_info'):
+        _table_names.add(table_name[:-5])
+    elif table_name.lower().endswith('info'):
+        _table_names.add(table_name[:-4])
+    table_names = set()
+    for _table_name in _table_names:
+        table_names.add(_table_name.lower())
+        snakecase = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', _table_name)
+        snakecase = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', snakecase)
+        table_names.add(snakecase.lower())
+        camelcase = _table_name.replace('_', '')
+        table_names.add(camelcase.lower())
+        if _table_name.lower().endswith('s'):
+            table_names.add(_table_name.lower()[:-1])
+            table_names.add(snakecase.lower()[:-1])
+            table_names.add(camelcase.lower()[:-1])
+        else:
+            table_names.add(_table_name.lower() + 's')
+            table_names.add(snakecase.lower() + 's')
+            table_names.add(camelcase.lower() + 's')
+        if _table_name.lower().endswith('ies'):
+            table_names.add(_table_name.lower()[:-3] + 'y')
+            table_names.add(snakecase.lower()[:-3] + 'y')
+            table_names.add(camelcase.lower()[:-3] + 'y')
+        elif _table_name.lower().endswith('y'):
+            table_names.add(_table_name.lower()[:-1] + 'ies')
+            table_names.add(snakecase.lower()[:-1] + 'ies')
+            table_names.add(camelcase.lower()[:-1] + 'ies')
+        if _table_name.lower().endswith('ing'):
+            table_names.add(_table_name.lower()[:-3])
+            table_names.add(snakecase.lower()[:-3])
+            table_names.add(camelcase.lower()[:-3])
+    scores: list[tuple[str, int]] = []
+    for col_name in candidates:
+        col_name_lower = col_name.lower()
+        score = 0
+        if col_name_lower == 'id':
+            score += 4
+        for table_name_lower in table_names:
+            if col_name_lower == table_name_lower:
+                score += 4  # USER -> USER
+                break
+            for suffix in ['id', 'hash', 'key', 'code', 'uuid']:
+                if not col_name_lower.endswith(suffix):
+                    continue
+                if col_name_lower == f'{table_name_lower}_{suffix}':
+                    score += 5  # USER -> USER_ID
+                    break
+                if col_name_lower == f'{table_name_lower}{suffix}':
+                    score += 5  # User -> UserId
+                    break
+                if col_name_lower.endswith(f'{table_name_lower}_{suffix}'):
+                    score += 2
+                if col_name_lower.endswith(f'{table_name_lower}{suffix}'):
+                    score += 2
+            # `rel-bench` hard-coding :(
+            if table_name == 'studies' and col_name == 'nct_id':
+                score += 1
+        ser = df[col_name].iloc[:1_000_000]
+        score += 3 * (ser.nunique() / len(ser))
+        scores.append((col_name, score))
+    scores = [x for x in scores if x[-1] >= 4]
+    scores.sort(key=lambda x: x[-1], reverse=True)
+    if len(scores) == 0:
+        return None
+    if len(scores) == 1:
+        return scores[0][0]
+    # In case of multiple candidates, only return one if its score is unique:
+    if scores[0][1] != scores[1][1]:
+        return scores[0][0]
+    max_score = max(scores, key=lambda x: x[1])
+    candidates = [col_name for col_name, score in scores if score == max_score]
+    warnings.warn(f"Found multiple potential primary keys in table "
+                  f"'{table_name}': {candidates}. Please specify the primary "
+                  f"key for this table manually.")
+    return None

kumoai/experimental/rfm/infer/time_col.py ADDED Viewed

@@ -0,0 +1,62 @@
+import re
+import warnings
+from typing import Optional
+import pandas as pd
+def infer_time_column(
+    df: pd.DataFrame,
+    candidates: list[str],
+) -> Optional[str]:
+    r"""Auto-detect potential time column.
+    Args:
+        df: The pandas DataFrame to analyze.
+        candidates: A list of potential candidates.
+    Returns:
+        The name of the detected time column, or ``None`` if not found.
+    """
+    candidates = [  # Exclude all candidates with `*last*` in column names:
+        col_name for col_name in candidates
+        if not re.search(r'(^|_)last(_|$)', col_name, re.IGNORECASE)
+    ]
+    if len(candidates) == 0:
+        return None
+    if len(candidates) == 1:
+        return candidates[0]
+    # If there exists a dedicated `create*` column, use it as time column:
+    create_candidates = [
+        candidate for candidate in candidates
+        if candidate.lower().startswith('create')
+    ]
+    if len(create_candidates) == 1:
+        return create_candidates[0]
+    if len(create_candidates) > 1:
+        candidates = create_candidates
+    # Find the most optimal time column. Usually, it is the one pointing to
+    # the oldest timestamps:
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', message='Could not infer format')
+        min_timestamp_dict = {
+            key: pd.to_datetime(df[key].iloc[:10_000], 'coerce')
+            for key in candidates
+        }
+    min_timestamp_dict = {
+        key: value.min().tz_localize(None)
+        for key, value in min_timestamp_dict.items()
+    }
+    min_timestamp_dict = {
+        key: value
+        for key, value in min_timestamp_dict.items() if not pd.isna(value)
+    }
+    if len(min_timestamp_dict) == 0:
+        return None
+    return min(min_timestamp_dict, key=min_timestamp_dict.get)  # type: ignore

kumoai/experimental/rfm/local_graph_sampler.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import re
 from typing import Dict, List, Optional, Tuple
 import numpy as np
@@ -7,7 +8,47 @@ from kumoapi.typing import Stype
 import kumoai.kumolib as kumolib
 from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
-from kumoai.experimental.rfm.utils import normalize_text
+PUNCTUATION = re.compile(r"[\'\"\.,\(\)\!\?\;\:]")
+MULTISPACE = re.compile(r"\s+")
+def normalize_text(
+    ser: pd.Series,
+    max_words: Optional[int] = 50,
+) -> pd.Series:
+    r"""Normalizes text into a list of lower-case words.
+    Args:
+        ser: The :class:`pandas.Series` to normalize.
+        max_words: The maximum number of words to return.
+            This will auto-shrink any large text column to avoid blowing up
+            context size.
+    """
+    if len(ser) == 0 or pd.api.types.is_list_like(ser.iloc[0]):
+        return ser
+    def normalize_fn(line: str) -> list[str]:
+        line = PUNCTUATION.sub(" ", line)
+        line = re.sub(r"<br\s*/?>", " ", line)  # Handle <br /> or <br>
+        line = MULTISPACE.sub(" ", line)
+        words = line.split()
+        if max_words is not None:
+            words = words[:max_words]
+        return words
+    ser = ser.fillna('').astype(str)
+    if max_words is not None:
+        # We estimate the number of words as 5 characters + 1 space in an
+        # English text on average. We need this pre-filter here, as word
+        # splitting on a giant text can be very expensive:
+        ser = ser.str[:6 * max_words]
+    ser = ser.str.lower()
+    ser = ser.map(normalize_fn)
+    return ser
 class LocalGraphSampler:

kumoai/experimental/rfm/local_graph_store.py CHANGED Viewed

@@ -6,8 +6,7 @@ import pandas as pd
 from kumoapi.rfm.context import Subgraph
 from kumoapi.typing import Stype
-from kumoai.experimental.rfm import LocalGraph
-from kumoai.experimental.rfm.utils import normalize_text
+from kumoai.experimental.rfm import Graph, LocalTable
 from kumoai.utils import InteractiveProgressLogger, ProgressLogger
 try:
@@ -20,8 +19,7 @@ except ImportError:
 class LocalGraphStore:
     def __init__(
         self,
-        graph: LocalGraph,
-        preprocess: bool = False,
+        graph: Graph,
         verbose: Union[bool, ProgressLogger] = True,
     ) -> None:
@@ -32,7 +30,7 @@ class LocalGraphStore:
             )
         with verbose as logger:
-            self.df_dict, self.mask_dict = self.sanitize(graph, preprocess)
+            self.df_dict, self.mask_dict = self.sanitize(graph)
             self.stype_dict = self.get_stype_dict(graph)
             logger.log("Sanitized input data")
@@ -105,8 +103,7 @@ class LocalGraphStore:
     def sanitize(
         self,
-        graph: LocalGraph,
-        preprocess: bool = False,
+        graph: Graph,
     ) -> Tuple[Dict[str, pd.DataFrame], Dict[str, np.ndarray]]:
         r"""Sanitizes raw data according to table schema definition:
@@ -115,17 +112,12 @@ class LocalGraphStore:
         * drops timezone information from timestamps
         * drops duplicate primary keys
         * removes rows with missing primary keys or time values
-        If ``preprocess`` is set to ``True``, it will additionally pre-process
-        data for faster model processing. In particular, it:
-        * tokenizes any text column that is not a foreign key
         """
-        df_dict: Dict[str, pd.DataFrame] = {
-            table_name: table._data.copy(deep=False).reset_index(drop=True)
-            for table_name, table in graph.tables.items()
-        }
-        foreign_keys = {(edge.src_table, edge.fkey) for edge in graph.edges}
+        df_dict: Dict[str, pd.DataFrame] = {}
+        for table_name, table in graph.tables.items():
+            assert isinstance(table, LocalTable)
+            df = table._data
+            df_dict[table_name] = df.copy(deep=False).reset_index(drop=True)
         mask_dict: Dict[str, np.ndarray] = {}
         for table in graph.tables.values():
@@ -144,12 +136,6 @@ class LocalGraphStore:
                         ser = ser.dt.tz_localize(None)
                         df_dict[table.name][col.name] = ser
-                # Normalize text in advance (but exclude foreign keys):
-                if (preprocess and col.stype == Stype.text
-                        and (table.name, col.name) not in foreign_keys):
-                    ser = df_dict[table.name][col.name]
-                    df_dict[table.name][col.name] = normalize_text(ser)
             mask: Optional[np.ndarray] = None
             if table._time_column is not None:
                 ser = df_dict[table.name][table._time_column]
@@ -165,7 +151,7 @@ class LocalGraphStore:
         return df_dict, mask_dict
-    def get_stype_dict(self, graph: LocalGraph) -> Dict[str, Dict[str, Stype]]:
+    def get_stype_dict(self, graph: Graph) -> Dict[str, Dict[str, Stype]]:
         stype_dict: Dict[str, Dict[str, Stype]] = {}
         foreign_keys = {(edge.src_table, edge.fkey) for edge in graph.edges}
         for table in graph.tables.values():
@@ -180,7 +166,7 @@ class LocalGraphStore:
     def get_pkey_data(
         self,
-        graph: LocalGraph,
+        graph: Graph,
     ) -> Tuple[
             Dict[str, str],
             Dict[str, pd.DataFrame],
@@ -218,7 +204,7 @@ class LocalGraphStore:
     def get_time_data(
         self,
-        graph: LocalGraph,
+        graph: Graph,
     ) -> Tuple[
             Dict[str, str],
             Dict[str, str],
@@ -259,7 +245,7 @@ class LocalGraphStore:
     def get_csc(
         self,
-        graph: LocalGraph,
+        graph: Graph,
     ) -> Tuple[
             Dict[Tuple[str, str, str], np.ndarray],
             Dict[Tuple[str, str, str], np.ndarray],

kumoai/experimental/rfm/rfm.py CHANGED Viewed

@@ -32,7 +32,7 @@ from kumoapi.task import TaskType
 from kumoai.client.rfm import RFMAPI
 from kumoai.exceptions import HTTPException
-from kumoai.experimental.rfm import LocalGraph
+from kumoai.experimental.rfm import Graph
 from kumoai.experimental.rfm.local_graph_sampler import LocalGraphSampler
 from kumoai.experimental.rfm.local_graph_store import LocalGraphStore
 from kumoai.experimental.rfm.local_pquery_driver import (
@@ -123,17 +123,17 @@ class KumoRFM:
     :class:`KumoRFM` is a foundation model to generate predictions for any
     relational dataset without training.
     The model is pre-trained and the class provides an interface to query the
-    model from a :class:`LocalGraph` object.
+    model from a :class:`Graph` object.
     .. code-block:: python
-        from kumoai.experimental.rfm import LocalGraph, KumoRFM
+        from kumoai.experimental.rfm import Graph, KumoRFM
         df_users = pd.DataFrame(...)
         df_items = pd.DataFrame(...)
         df_orders = pd.DataFrame(...)
-        graph = LocalGraph.from_data({
+        graph = Graph.from_data({
             'users': df_users,
             'items': df_items,
             'orders': df_orders,
@@ -150,26 +150,16 @@ class KumoRFM:
     Args:
         graph: The graph.
-        preprocess: Whether to pre-process the data in advance during graph
-            materialization.
-            This is a runtime trade-off between graph materialization and model
-            processing speed.
-            It can be benefical to preprocess your data once and then run many
-            queries on top to achieve maximum model speed.
-            However, if activiated, graph materialization can take potentially
-            much longer, especially on graphs with many large text columns.
-            Best to tune this option manually.
         verbose: Whether to print verbose output.
     """
     def __init__(
         self,
-        graph: LocalGraph,
-        preprocess: bool = False,
+        graph: Graph,
         verbose: Union[bool, ProgressLogger] = True,
     ) -> None:
         graph = graph.validate()
         self._graph_def = graph._to_api_graph_definition()
-        self._graph_store = LocalGraphStore(graph, preprocess, verbose)
+        self._graph_store = LocalGraphStore(graph, verbose)
         self._graph_sampler = LocalGraphSampler(self._graph_store)
         self._client: Optional[RFMAPI] = None

kumoai/experimental/rfm/sagemaker.py CHANGED Viewed

@@ -2,15 +2,22 @@ import base64
 import json
 from typing import Any, Dict, List, Tuple
-import boto3
 import requests
-from mypy_boto3_sagemaker_runtime.client import SageMakerRuntimeClient
-from mypy_boto3_sagemaker_runtime.type_defs import InvokeEndpointOutputTypeDef
 from kumoai.client import KumoClient
 from kumoai.client.endpoints import Endpoint, HTTPMethod
 from kumoai.exceptions import HTTPException
+try:
+    # isort: off
+    from mypy_boto3_sagemaker_runtime.client import SageMakerRuntimeClient
+    from mypy_boto3_sagemaker_runtime.type_defs import (
+        InvokeEndpointOutputTypeDef, )
+    # isort: on
+except ImportError:
+    SageMakerRuntimeClient = Any
+    InvokeEndpointOutputTypeDef = Any
 class SageMakerResponseAdapter(requests.Response):
     def __init__(self, sm_response: InvokeEndpointOutputTypeDef):
@@ -34,6 +41,7 @@ class SageMakerResponseAdapter(requests.Response):
 class KumoClient_SageMakerAdapter(KumoClient):
     def __init__(self, region: str, endpoint_name: str):
+        import boto3
         self._client: SageMakerRuntimeClient = boto3.client(
             service_name="sagemaker-runtime", region_name=region)
         self._endpoint_name = endpoint_name

kumoai/kumolib.cp310-win_amd64.pyd CHANGED Viewed

Binary file

kumoai/testing/decorators.py CHANGED Viewed

@@ -25,7 +25,7 @@ def onlyFullTest(func: Callable) -> Callable:
 def has_package(package: str) -> bool:
     r"""Returns ``True`` in case ``package`` is installed."""
     req = Requirement(package)
-    if importlib.util.find_spec(req.name) is None:
+    if importlib.util.find_spec(req.name) is None:  # type: ignore
         return False
     try:

{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kumoai
-Version: 2.13.0.dev202511261731
+Version: 2.13.0.dev202512040252
 Summary: AI on the Modern Data Stack
 Author-email: "Kumo.AI" <hello@kumo.ai>
 License-Expression: MIT
@@ -23,13 +23,11 @@ Requires-Dist: requests>=2.28.2
 Requires-Dist: urllib3
 Requires-Dist: plotly
 Requires-Dist: typing_extensions>=4.5.0
-Requires-Dist: kumo-api==0.46.0
+Requires-Dist: kumo-api==0.48.0
 Requires-Dist: tqdm>=4.66.0
 Requires-Dist: aiohttp>=3.10.0
 Requires-Dist: pydantic>=1.10.21
 Requires-Dist: rich>=9.0.0
-Requires-Dist: mypy-boto3-sagemaker-runtime
-Requires-Dist: boto3
 Provides-Extra: doc
 Requires-Dist: sphinx; extra == "doc"
 Requires-Dist: sphinx-book-theme; extra == "doc"
@@ -40,13 +38,16 @@ Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-mock; extra == "test"
 Requires-Dist: requests-mock; extra == "test"
-Provides-Extra: test-sagemaker
-Requires-Dist: sagemaker; extra == "test-sagemaker"
-Requires-Dist: pandas==2.1.4; extra == "test-sagemaker"
-Requires-Dist: pyarrow==12.0.1; extra == "test-sagemaker"
+Provides-Extra: sqlite
+Requires-Dist: adbc_driver_sqlite; extra == "sqlite"
+Provides-Extra: snowflake
+Requires-Dist: snowflake-connector-python; extra == "snowflake"
+Requires-Dist: pyyaml; extra == "snowflake"
 Provides-Extra: sagemaker
 Requires-Dist: boto3<2.0,>=1.30.0; extra == "sagemaker"
 Requires-Dist: mypy-boto3-sagemaker-runtime<2.0,>=1.34.0; extra == "sagemaker"
+Provides-Extra: test-sagemaker
+Requires-Dist: sagemaker<3.0; extra == "test-sagemaker"
 Dynamic: license-file
 Dynamic: requires-dist

{kumoai-2.13.0.dev202511261731.dist-info → kumoai-2.13.0.dev202512040252.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 kumoai/__init__.py,sha256=qu-qohU2cQlManX1aZIlzA3ivKl52m-cSQBPSW8urUU,10837
 kumoai/_logging.py,sha256=qL4JbMQwKXri2f-SEJoFB8TY5ALG12S-nobGTNWxW-A,915
 kumoai/_singleton.py,sha256=i2BHWKpccNh5SJGDyU0IXsnYzJAYr8Xb0wz4c6LRbpo,861
-kumoai/_version.py,sha256=P7PbPaqmt6kLq-80AyouMRr_ZBx8A7_nPBBPEXW44ag,39
+kumoai/_version.py,sha256=16u1rVm-N2IEE7QbyS9U5nn_hjp7P_wxBIQzzAKSnDA,39
 kumoai/databricks.py,sha256=ahwJz6DWLXMkndT0XwEDBxF-hoqhidFR8wBUQ4TLZ68,490
 kumoai/exceptions.py,sha256=7TMs0SC8xrU009_Pgd4QXtSF9lxJq8MtRbeX9pcQUy4,859
 kumoai/formatting.py,sha256=o3uCnLwXPhe1KI5WV9sBgRrcU7ed4rgu_pf89GL9Nc0,983
 kumoai/futures.py,sha256=J8rtZMEYFzdn5xF_x-LAiKJz3KGL6PT02f6rq_2bOJk,3836
 kumoai/jobs.py,sha256=dCi7BAdfm2tCnonYlGU4WJokJWbh3RzFfaOX2EYCIHU,2576
-kumoai/kumolib.cp310-win_amd64.pyd,sha256=lfpQDN2Fu1tGqTxas5A9Jv3fLm-WdA_oSkhQfo6-pvg,194048
+kumoai/kumolib.cp310-win_amd64.pyd,sha256=3iE0thfrVDx0Yhh0I0li-BwZcIpQfRpaYxYMsSpYofc,194048
 kumoai/mixin.py,sha256=IaiB8SAI0VqOoMVzzIaUlqMt53-QPUK6OB0HikG-V9E,840
 kumoai/spcs.py,sha256=KWfENrwSLruprlD-QPh63uU0N6npiNrwkeKfBk3EUyQ,4260
 kumoai/artifact_export/__init__.py,sha256=UXAQI5q92ChBzWAk8o3J6pElzYHudAzFZssQXd4o7i8,247
@@ -50,23 +50,35 @@ kumoai/connector/glue_connector.py,sha256=kqT2q53Da7PeeaZrvLVzFXC186E7glh5eGitKL
 kumoai/connector/s3_connector.py,sha256=AUzENbQ20bYXh3XOXEOsWRKlaGGkm3YrW9JfBLm-LqY,10433
 kumoai/connector/snowflake_connector.py,sha256=tQzIWxC4oDGqxFt0212w5eoIPT4QBP2nuF9SdKRNwNI,9274
 kumoai/connector/source_table.py,sha256=fnqwIKY6qYo4G0EsRzchb6FgZ-dQyU6aRaD9UAxsml0,18010
-kumoai/connector/utils.py,sha256=SlkjPJS_wqfwFzIaQOHZtENQnbOz5sgLbvvvPDXE1ww,65786
+kumoai/connector/utils.py,sha256=5K9BMdWiIP3hhdkUc6Xt1e0xv5YyziXtZ4PnBqq0Ehw,66490
 kumoai/encoder/__init__.py,sha256=8FeP6mUyCeXxr1b8kUIi5dxe5vEXQRft9tPoaV1CBqg,186
 kumoai/experimental/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kumoai/experimental/rfm/__init__.py,sha256=gpjpeN8PT3ZESi6kUaeyZqYnoJnysRVXDaY9hrycJA4,7020
+kumoai/experimental/rfm/__init__.py,sha256=EFZz6IvvskmeO85Vig6p1m_6jdimS_BkeREOndHuRsc,6247
 kumoai/experimental/rfm/authenticate.py,sha256=G89_4TMeUpr5fG_0VTzMF5sdNhaciitA1oc2loTlTmo,19321
-kumoai/experimental/rfm/local_graph.py,sha256=nZ9hDfyWg1dHFLoTEKoLt0ZJPvf9MUA1MNyfTRzJThg,30886
-kumoai/experimental/rfm/local_graph_sampler.py,sha256=3JNpktW__nwxVKZxP4cQBgsIin7J_LNXYS7YlV36xbU,6854
-kumoai/experimental/rfm/local_graph_store.py,sha256=eUuIMFcdIRqN1kRxnqOdJpKEt-S_oyupAyHr7YuQoSU,14206
+kumoai/experimental/rfm/graph.py,sha256=kSWve-Fn_9qERFjEpCDO5zDnngtd9T4MOhR_o46PI7s,39602
+kumoai/experimental/rfm/local_graph_sampler.py,sha256=dQ3JnuozTNeZyUFRu2h8OTMNmV1RAoaCA0gvkpgOstg,8110
+kumoai/experimental/rfm/local_graph_store.py,sha256=6jY1ciVIlnBBhZCxWwBTl7SKX1fxRIDLszwrftD0Cdk,13485
 kumoai/experimental/rfm/local_pquery_driver.py,sha256=Yd_yHIrvuDj16IC1pvsqiQvZS41vvOOCRMiuDGtN6Fk,26851
-kumoai/experimental/rfm/local_table.py,sha256=5H08657TIyH7n_QnpFKr2g4BtVqdXTymmrfhSGaDmkU,20150
-kumoai/experimental/rfm/rfm.py,sha256=MarISSPKuv6nIaGG69zFAwIagF6EA37xcSRClZrQMFc,49470
-kumoai/experimental/rfm/sagemaker.py,sha256=eebpZtASqiIGF2FpY53bbWLj6p-u5hkK4RLgBNAvEzg,4953
-kumoai/experimental/rfm/utils.py,sha256=dLx2wdyTWg7vZI_7R-I0z_lA-2aV5M8h9n3bnnLyylI,11467
-kumoai/experimental/rfm/infer/__init__.py,sha256=fPsdDr4D3hgC8snW0j3pAVpCyR-xrauuogMnTOMrfok,304
+kumoai/experimental/rfm/rfm.py,sha256=vOnL8ecHTo1TX2B8_T8xaWGou8qYYz8DyVENu1H93mM,48834
+kumoai/experimental/rfm/sagemaker.py,sha256=sEJSyfEFBA3-7wKinBEzSooKHEn0BgPjrgRnPhYo79g,5120
+kumoai/experimental/rfm/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+kumoai/experimental/rfm/backend/local/__init__.py,sha256=usMh0fuDxKK-aOVT1sU30BQWFS0eSkfUrhUVILisQQI,934
+kumoai/experimental/rfm/backend/local/table.py,sha256=1PqNOROzlnK3SaZHNcU2hyzeifs0N4wssQAS3-Z0Myc,3674
+kumoai/experimental/rfm/backend/snow/__init__.py,sha256=viMeR9VWpB1kjRdSWCTNFMdM7a8Mj_Dtck1twJW8dV8,962
+kumoai/experimental/rfm/backend/snow/table.py,sha256=HVrPtCVvfsisFmq9jMovowsE5Wl5oti3O-kru7ruXlc,4312
+kumoai/experimental/rfm/backend/sqlite/__init__.py,sha256=xw5NNLrWSvUvRkD49X_9hZYjas5EuP1XDANPy0EEjOg,874
+kumoai/experimental/rfm/backend/sqlite/table.py,sha256=mBiZC21gQwfR4demFrP37GmawMHfIm-G82mLQeBqIZo,3901
+kumoai/experimental/rfm/base/__init__.py,sha256=oXPkeBemtuDxRUK61-0sOT84GZB_oQ6HvaZNU1KFNaw,199
+kumoai/experimental/rfm/base/column.py,sha256=OE-PRQ8HO4uTq0e3_3eHJFfhp5nzw79zd-43g3iMh4g,2385
+kumoai/experimental/rfm/base/source.py,sha256=H5yN9xAwK3i_69EdqOV_x58muPGKQiI8ev5BhHQDZEo,290
+kumoai/experimental/rfm/base/table.py,sha256=glyAg4LCQdddM3lIRClJSA7qMyfoHUVAGBf1rEs6B8Y,20113
+kumoai/experimental/rfm/infer/__init__.py,sha256=qKg8or-SpgTApD6ePw1PJ4aUZPrOLTHLRCmBIJ92hrk,486
 kumoai/experimental/rfm/infer/categorical.py,sha256=bqmfrE5ZCBTcb35lA4SyAkCu3MgttAn29VBJYMBNhVg,893
+kumoai/experimental/rfm/infer/dtype.py,sha256=Hf_drluYNuN59lTSe-8GuXalg20Pv93kCktB6Hb9f74,2686
 kumoai/experimental/rfm/infer/id.py,sha256=xaJBETLZa8ttzZCsDwFSwfyCi3VYsLc_kDWT_t_6Ih4,954
 kumoai/experimental/rfm/infer/multicategorical.py,sha256=D-1KwYRkOSkBrOJr4Xa3eTCoAF9O9hPGa7Vg67V5_HU,1150
+kumoai/experimental/rfm/infer/pkey.py,sha256=Hvztcircd4iGdsnFU9Xi1kq_A5ONMnkAdnrpQT5svSs,4519
+kumoai/experimental/rfm/infer/time_col.py,sha256=G98Cgz1m9G9VA-ApnCmGYnJxEFwp1jfaPf3nCMOz_N0,1882
 kumoai/experimental/rfm/infer/timestamp.py,sha256=L2VxjtYTSyUBYAo4M-L08xSQlPpqnHMAVF5_vxjh3Y0,1135
 kumoai/experimental/rfm/pquery/__init__.py,sha256=RkTn0I74uXOUuOiBpa6S-_QEYctMutkUnBEfF9ztQzI,159
 kumoai/experimental/rfm/pquery/executor.py,sha256=S8wwXbAkH-YSnmEVYB8d6wyJF4JJ003mH_0zFTvOp_I,2843
@@ -80,7 +92,7 @@ kumoai/pquery/prediction_table.py,sha256=hWG4L_ze4PLgUoxCXNKk8_nkYxVXELQs8_X8KGO
 kumoai/pquery/predictive_query.py,sha256=GWhQpQxf6apyyu-bvE3z63mX6NLd8lKbyu_jzj7rNms,25608
 kumoai/pquery/training_table.py,sha256=L1QjaVlY4SAPD8OUmTaH6YjZzBbPOnS9mnAT69znWv0,16233
 kumoai/testing/__init__.py,sha256=XBQ_Sa3WnOYlpXZ3gUn8w6nVfZt-nfPhytfIBeiPt4w,178
-kumoai/testing/decorators.py,sha256=yznguzsdkL0UaZtBbnO6oaUrXisJvziaiO3dmN41UXE,1648
+kumoai/testing/decorators.py,sha256=p79ZCQqPY_MHWy0_l7-xQ6wUIqFTn4AbrGWTHLvpbQY,1664
 kumoai/trainer/__init__.py,sha256=uCFXy9bw_byn_wYd3M-BTZCHTVvv4XXr8qRlh-QOvag,981
 kumoai/trainer/baseline_trainer.py,sha256=oXweh8j1sar6KhQfr3A7gmQxcDq7SG0Bx3jIenbtyC4,4117
 kumoai/trainer/config.py,sha256=7_Jv1w1mqaokCQwQdJkqCSgVpmh8GqE3fL1Ky_vvttI,100
@@ -92,8 +104,8 @@ kumoai/utils/__init__.py,sha256=wAKgmwtMIGuiauW9D_GGKH95K-24Kgwmld27mm4nsro,278
 kumoai/utils/datasets.py,sha256=UyAII-oAn7x3ombuvpbSQ41aVF9SYKBjQthTD-vcT2A,3011
 kumoai/utils/forecasting.py,sha256=ZgKeUCbWLOot0giAkoigwU5du8LkrwAicFOi5hVn6wg,7624
 kumoai/utils/progress_logger.py,sha256=MZsWgHd4UZQKCXiJZgQeW-Emi_BmzlCKPLPXOL_HqBo,5239
-kumoai-2.13.0.dev202511261731.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
-kumoai-2.13.0.dev202511261731.dist-info/METADATA,sha256=KEPt_QdWVLyZhYMj3PNjwvb1gm6fXI9_FvWyMOvMrtw,2544
-kumoai-2.13.0.dev202511261731.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
-kumoai-2.13.0.dev202511261731.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
-kumoai-2.13.0.dev202511261731.dist-info/RECORD,,
+kumoai-2.13.0.dev202512040252.dist-info/licenses/LICENSE,sha256=ZUilBDp--4vbhsEr6f_Upw9rnIx09zQ3K9fXQ0rfd6w,1111
+kumoai-2.13.0.dev202512040252.dist-info/METADATA,sha256=T-O--qEm_2QPzB-dDkwR6Ei7r79H7v6qQBbR1e1J8gg,2580
+kumoai-2.13.0.dev202512040252.dist-info/WHEEL,sha256=KUuBC6lxAbHCKilKua8R9W_TM71_-9Sg5uEP3uDWcoU,101
+kumoai-2.13.0.dev202512040252.dist-info/top_level.txt,sha256=YjU6UcmomoDx30vEXLsOU784ED7VztQOsFApk1SFwvs,7
+kumoai-2.13.0.dev202512040252.dist-info/RECORD,,