PyPI - kumoai - Versions diffs - 2.12.0.dev202510231830__cp311-cp311-win_amd64.whl → 2.14.0.dev202512311733__cp311-cp311-win_amd64.whl - Mend

kumoai 2.12.0.dev202510231830__cp311-cp311-win_amd64.whl → 2.14.0.dev202512311733__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

kumoai/__init__.py +41 -35
kumoai/_version.py +1 -1
kumoai/client/client.py +15 -13
kumoai/client/endpoints.py +1 -0
kumoai/client/jobs.py +24 -0
kumoai/client/pquery.py +6 -2
kumoai/client/rfm.py +35 -7
kumoai/connector/utils.py +23 -2
kumoai/experimental/rfm/__init__.py +191 -48
kumoai/experimental/rfm/authenticate.py +3 -4
kumoai/experimental/rfm/backend/__init__.py +0 -0
kumoai/experimental/rfm/backend/local/__init__.py +42 -0
kumoai/experimental/rfm/{local_graph_store.py → backend/local/graph_store.py} +65 -127
kumoai/experimental/rfm/backend/local/sampler.py +312 -0
kumoai/experimental/rfm/backend/local/table.py +113 -0
kumoai/experimental/rfm/backend/snow/__init__.py +37 -0
kumoai/experimental/rfm/backend/snow/sampler.py +297 -0
kumoai/experimental/rfm/backend/snow/table.py +242 -0
kumoai/experimental/rfm/backend/sqlite/__init__.py +32 -0
kumoai/experimental/rfm/backend/sqlite/sampler.py +398 -0
kumoai/experimental/rfm/backend/sqlite/table.py +184 -0
kumoai/experimental/rfm/base/__init__.py +30 -0
kumoai/experimental/rfm/base/column.py +152 -0
kumoai/experimental/rfm/base/expression.py +44 -0
kumoai/experimental/rfm/base/sampler.py +761 -0
kumoai/experimental/rfm/base/source.py +19 -0
kumoai/experimental/rfm/base/sql_sampler.py +143 -0
kumoai/experimental/rfm/base/table.py +735 -0
kumoai/experimental/rfm/graph.py +1237 -0
kumoai/experimental/rfm/infer/__init__.py +8 -0
kumoai/experimental/rfm/infer/dtype.py +82 -0
kumoai/experimental/rfm/infer/multicategorical.py +1 -1
kumoai/experimental/rfm/infer/pkey.py +128 -0
kumoai/experimental/rfm/infer/stype.py +35 -0
kumoai/experimental/rfm/infer/time_col.py +61 -0
kumoai/experimental/rfm/pquery/__init__.py +0 -4
kumoai/experimental/rfm/pquery/executor.py +27 -27
kumoai/experimental/rfm/pquery/pandas_executor.py +64 -40
kumoai/experimental/rfm/relbench.py +76 -0
kumoai/experimental/rfm/rfm.py +386 -276
kumoai/experimental/rfm/sagemaker.py +138 -0
kumoai/kumolib.cp311-win_amd64.pyd +0 -0
kumoai/pquery/predictive_query.py +10 -6
kumoai/spcs.py +1 -3
kumoai/testing/decorators.py +1 -1
kumoai/testing/snow.py +50 -0
kumoai/trainer/distilled_trainer.py +175 -0
kumoai/trainer/trainer.py +9 -10
kumoai/utils/__init__.py +3 -2
kumoai/utils/display.py +51 -0
kumoai/utils/progress_logger.py +188 -16
kumoai/utils/sql.py +3 -0
{kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/METADATA +13 -2
{kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/RECORD +57 -36
kumoai/experimental/rfm/local_graph.py +0 -810
kumoai/experimental/rfm/local_graph_sampler.py +0 -184
kumoai/experimental/rfm/local_pquery_driver.py +0 -494
kumoai/experimental/rfm/local_table.py +0 -545
kumoai/experimental/rfm/pquery/backend.py +0 -136
kumoai/experimental/rfm/pquery/pandas_backend.py +0 -478
kumoai/experimental/rfm/utils.py +0 -344
{kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/WHEEL +0 -0
{kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/licenses/LICENSE +0 -0
{kumoai-2.12.0.dev202510231830.dist-info → kumoai-2.14.0.dev202512311733.dist-info}/top_level.txt +0 -0

kumoai/experimental/rfm/infer/__init__.py CHANGED Viewed

@@ -1,11 +1,19 @@
+from .dtype import infer_dtype
 from .id import contains_id
 from .timestamp import contains_timestamp
 from .categorical import contains_categorical
 from .multicategorical import contains_multicategorical
+from .stype import infer_stype
+from .pkey import infer_primary_key
+from .time_col import infer_time_column
 __all__ = [
+    'infer_dtype',
     'contains_id',
     'contains_timestamp',
     'contains_categorical',
     'contains_multicategorical',
+    'infer_stype',
+    'infer_primary_key',
+    'infer_time_column',
 ]

kumoai/experimental/rfm/infer/dtype.py ADDED Viewed

@@ -0,0 +1,82 @@
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from kumoapi.typing import Dtype
+PANDAS_TO_DTYPE: dict[str, Dtype] = {
+    'bool': Dtype.bool,
+    'boolean': Dtype.bool,
+    'int8': Dtype.int,
+    'int16': Dtype.int,
+    'int32': Dtype.int,
+    'int64': Dtype.int,
+    'float': Dtype.float,
+    'double': Dtype.float,
+    'float16': Dtype.float,
+    'float32': Dtype.float,
+    'float64': Dtype.float,
+    'object': Dtype.string,
+    'string': Dtype.string,
+    'string[python]': Dtype.string,
+    'string[pyarrow]': Dtype.string,
+    'binary': Dtype.binary,
+    'binary[python]': Dtype.binary,
+    'binary[pyarrow]': Dtype.binary,
+}
+def infer_dtype(ser: pd.Series) -> Dtype:
+    """Extracts the :class:`Dtype` from a :class:`pandas.Series`.
+    Args:
+        ser: A :class:`pandas.Series` to analyze.
+    Returns:
+        The data type.
+    """
+    if pd.api.types.is_datetime64_any_dtype(ser.dtype):
+        return Dtype.date
+    if pd.api.types.is_timedelta64_dtype(ser.dtype):
+        return Dtype.timedelta
+    if isinstance(ser.dtype, pd.CategoricalDtype):
+        return Dtype.string
+    if (pd.api.types.is_object_dtype(ser.dtype)
+            and not isinstance(ser.dtype, pd.ArrowDtype)):
+        index = ser.iloc[:1000].first_valid_index()
+        if index is not None and pd.api.types.is_list_like(ser[index]):
+            pos = ser.index.get_loc(index)
+            assert isinstance(pos, int)
+            ser = ser.iloc[pos:pos + 1000].dropna()
+            arr = pa.array(ser.tolist())
+            ser = pd.Series(arr, dtype=pd.ArrowDtype(arr.type))
+    if isinstance(ser.dtype, pd.ArrowDtype):
+        if (pa.types.is_list(ser.dtype.pyarrow_dtype)
+                or pa.types.is_fixed_size_list(ser.dtype.pyarrow_dtype)):
+            elem_dtype = ser.dtype.pyarrow_dtype.value_type
+            if pa.types.is_integer(elem_dtype):
+                return Dtype.intlist
+            if pa.types.is_floating(elem_dtype):
+                return Dtype.floatlist
+            if pa.types.is_decimal(elem_dtype):
+                return Dtype.floatlist
+            if pa.types.is_string(elem_dtype):
+                return Dtype.stringlist
+            if pa.types.is_null(elem_dtype):
+                return Dtype.floatlist
+    if isinstance(ser.dtype, np.dtype):
+        dtype_str = str(ser.dtype).lower()
+    elif isinstance(ser.dtype, pd.api.extensions.ExtensionDtype):
+        dtype_str = ser.dtype.name.lower()
+        dtype_str = dtype_str.split('[')[0]  # Remove backend metadata
+    elif isinstance(ser.dtype, pa.DataType):
+        dtype_str = str(ser.dtype).lower()
+    else:
+        dtype_str = 'object'
+    if dtype_str not in PANDAS_TO_DTYPE:
+        raise ValueError(f"Unsupported data type '{ser.dtype}'")
+    return PANDAS_TO_DTYPE[dtype_str]

kumoai/experimental/rfm/infer/multicategorical.py CHANGED Viewed

@@ -40,7 +40,7 @@ def contains_multicategorical(
         sep = max(candidates, key=candidates.get)  # type: ignore
         ser = ser.str.split(sep)
-    num_unique_multi = ser.explode().nunique()
+    num_unique_multi = ser.astype('object').explode().nunique()
     if dtype.is_list():
         return num_unique_multi <= MAX_CAT

kumoai/experimental/rfm/infer/pkey.py ADDED Viewed

@@ -0,0 +1,128 @@
+import re
+import warnings
+import pandas as pd
+def infer_primary_key(
+    table_name: str,
+    df: pd.DataFrame,
+    candidates: list[str],
+) -> str | None:
+    r"""Auto-detect potential primary key column.
+    Args:
+        table_name: The table name.
+        df: The pandas DataFrame to analyze.
+        candidates: A list of potential candidates.
+    Returns:
+        The name of the detected primary key, or ``None`` if not found.
+    """
+    if len(candidates) == 0:
+        return None
+    # A list of (potentially modified) table names that are eligible to match
+    # with a primary key, i.e.:
+    # - UserInfo -> User
+    # - snakecase <-> camelcase
+    # - camelcase <-> snakecase
+    # - plural <-> singular (users -> user, eligibilities -> eligibility)
+    # - verb -> noun (qualifying -> qualify)
+    _table_names = {table_name}
+    if table_name.lower().endswith('_info'):
+        _table_names.add(table_name[:-5])
+    elif table_name.lower().endswith('info'):
+        _table_names.add(table_name[:-4])
+    table_names = set()
+    for _table_name in _table_names:
+        table_names.add(_table_name.lower())
+        snakecase = re.sub(r'(.)([A-Z][a-z]+)', r'\1_\2', _table_name)
+        snakecase = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', snakecase)
+        table_names.add(snakecase.lower())
+        camelcase = _table_name.replace('_', '')
+        table_names.add(camelcase.lower())
+        if _table_name.lower().endswith('s'):
+            table_names.add(_table_name.lower()[:-1])
+            table_names.add(snakecase.lower()[:-1])
+            table_names.add(camelcase.lower()[:-1])
+        else:
+            table_names.add(_table_name.lower() + 's')
+            table_names.add(snakecase.lower() + 's')
+            table_names.add(camelcase.lower() + 's')
+        if _table_name.lower().endswith('ies'):
+            table_names.add(_table_name.lower()[:-3] + 'y')
+            table_names.add(snakecase.lower()[:-3] + 'y')
+            table_names.add(camelcase.lower()[:-3] + 'y')
+        elif _table_name.lower().endswith('y'):
+            table_names.add(_table_name.lower()[:-1] + 'ies')
+            table_names.add(snakecase.lower()[:-1] + 'ies')
+            table_names.add(camelcase.lower()[:-1] + 'ies')
+        if _table_name.lower().endswith('ing'):
+            table_names.add(_table_name.lower()[:-3])
+            table_names.add(snakecase.lower()[:-3])
+            table_names.add(camelcase.lower()[:-3])
+    scores: list[tuple[str, int]] = []
+    for col_name in candidates:
+        col_name_lower = col_name.lower()
+        score = 0
+        if col_name_lower == 'id':
+            score += 4
+        for table_name_lower in table_names:
+            if col_name_lower == table_name_lower:
+                score += 4  # USER -> USER
+                break
+            for suffix in ['id', 'hash', 'key', 'code', 'uuid']:
+                if not col_name_lower.endswith(suffix):
+                    continue
+                if col_name_lower == f'{table_name_lower}_{suffix}':
+                    score += 5  # USER -> USER_ID
+                    break
+                if col_name_lower == f'{table_name_lower}{suffix}':
+                    score += 5  # User -> UserId
+                    break
+                if col_name_lower.endswith(f'{table_name_lower}_{suffix}'):
+                    score += 2
+                if col_name_lower.endswith(f'{table_name_lower}{suffix}'):
+                    score += 2
+            # `rel-bench` hard-coding :(
+            if table_name == 'studies' and col_name == 'nct_id':
+                score += 1
+        ser = df[col_name].iloc[:1_000_000]
+        score += 3 * (ser.nunique() / len(ser))
+        scores.append((col_name, score))
+    scores = [x for x in scores if x[-1] >= 4]
+    scores.sort(key=lambda x: x[-1], reverse=True)
+    if len(scores) == 0:
+        return None
+    if len(scores) == 1:
+        return scores[0][0]
+    # In case of multiple candidates, only return one if its score is unique:
+    if scores[0][1] != scores[1][1]:
+        return scores[0][0]
+    max_score = max(scores, key=lambda x: x[1])
+    candidates = [col_name for col_name, score in scores if score == max_score]
+    warnings.warn(f"Found multiple potential primary keys in table "
+                  f"'{table_name}': {candidates}. Please specify the primary "
+                  f"key for this table manually.")
+    return None

kumoai/experimental/rfm/infer/stype.py ADDED Viewed

@@ -0,0 +1,35 @@
+import pandas as pd
+from kumoapi.typing import Dtype, Stype
+from kumoai.experimental.rfm.infer import (
+    contains_categorical,
+    contains_id,
+    contains_multicategorical,
+    contains_timestamp,
+)
+def infer_stype(ser: pd.Series, column_name: str, dtype: Dtype) -> Stype:
+    """Infers the :class:`Stype` from a :class:`pandas.Series`.
+    Args:
+        ser: A :class:`pandas.Series` to analyze.
+        column_name: The column name.
+        dtype: The data type.
+    Returns:
+        The semantic type.
+    """
+    if contains_id(ser, column_name, dtype):
+        return Stype.ID
+    if contains_timestamp(ser, column_name, dtype):
+        return Stype.timestamp
+    if contains_multicategorical(ser, column_name, dtype):
+        return Stype.multicategorical
+    if contains_categorical(ser, column_name, dtype):
+        return Stype.categorical
+    return dtype.default_stype

kumoai/experimental/rfm/infer/time_col.py ADDED Viewed

@@ -0,0 +1,61 @@
+import re
+import warnings
+import pandas as pd
+def infer_time_column(
+    df: pd.DataFrame,
+    candidates: list[str],
+) -> str | None:
+    r"""Auto-detect potential time column.
+    Args:
+        df: The pandas DataFrame to analyze.
+        candidates: A list of potential candidates.
+    Returns:
+        The name of the detected time column, or ``None`` if not found.
+    """
+    candidates = [  # Exclude all candidates with `*last*` in column names:
+        col_name for col_name in candidates
+        if not re.search(r'(^|_)last(_|$)', col_name, re.IGNORECASE)
+    ]
+    if len(candidates) == 0:
+        return None
+    if len(candidates) == 1:
+        return candidates[0]
+    # If there exists a dedicated `create*` column, use it as time column:
+    create_candidates = [
+        candidate for candidate in candidates
+        if candidate.lower().startswith('create')
+    ]
+    if len(create_candidates) == 1:
+        return create_candidates[0]
+    if len(create_candidates) > 1:
+        candidates = create_candidates
+    # Find the most optimal time column. Usually, it is the one pointing to
+    # the oldest timestamps:
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', message='Could not infer format')
+        min_timestamp_dict = {
+            key: pd.to_datetime(df[key].iloc[:10_000], 'coerce')
+            for key in candidates
+        }
+    min_timestamp_dict = {
+        key: value.min().tz_localize(None)
+        for key, value in min_timestamp_dict.items()
+    }
+    min_timestamp_dict = {
+        key: value
+        for key, value in min_timestamp_dict.items() if not pd.isna(value)
+    }
+    if len(min_timestamp_dict) == 0:
+        return None
+    return min(min_timestamp_dict, key=min_timestamp_dict.get)  # type: ignore

kumoai/experimental/rfm/pquery/__init__.py CHANGED Viewed

@@ -1,11 +1,7 @@
-from .backend import PQueryBackend
-from .pandas_backend import PQueryPandasBackend
 from .executor import PQueryExecutor
 from .pandas_executor import PQueryPandasExecutor
 __all__ = [
-    'PQueryBackend',
-    'PQueryPandasBackend',
     'PQueryExecutor',
     'PQueryPandasExecutor',
 ]

kumoai/experimental/rfm/pquery/executor.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Dict, Generic, Tuple, TypeVar
+from typing import Generic, TypeVar
 from kumoapi.pquery import ValidatedPredictiveQuery
 from kumoapi.pquery.AST import (
@@ -21,82 +21,82 @@ class PQueryExecutor(Generic[TableData, ColumnData, IndexData], ABC):
     def execute_column(
         self,
         column: Column,
-        feat_dict: Dict[str, TableData],
+        feat_dict: dict[str, TableData],
         filter_na: bool = True,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute_aggregation(
         self,
         aggr: Aggregation,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute_condition(
         self,
         condition: Condition,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute_logical_operation(
         self,
         logical_operation: LogicalOperation,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute_join(
         self,
         join: Join,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute_filter(
         self,
         filter: Filter,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass
     @abstractmethod
     def execute(
         self,
         query: ValidatedPredictiveQuery,
-        feat_dict: Dict[str, TableData],
-        time_dict: Dict[str, ColumnData],
-        batch_dict: Dict[str, IndexData],
+        feat_dict: dict[str, TableData],
+        time_dict: dict[str, ColumnData],
+        batch_dict: dict[str, IndexData],
         anchor_time: ColumnData,
         num_forecasts: int = 1,
-    ) -> Tuple[ColumnData, IndexData]:
+    ) -> tuple[ColumnData, IndexData]:
         pass

kumoai/experimental/rfm/pquery/pandas_executor.py CHANGED Viewed

@@ -1,5 +1,3 @@
-from typing import Dict, List, Tuple
 import numpy as np
 import pandas as pd
 from kumoapi.pquery import ValidatedPredictiveQuery
@@ -22,9 +20,9 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_column(
         self,
         column: Column,
-        feat_dict: Dict[str, pd.DataFrame],
+        feat_dict: dict[str, pd.DataFrame],
         filter_na: bool = True,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         table_name, column_name = column.fqn.split(".")
         if column_name == '*':
             out = pd.Series(np.ones(len(feat_dict[table_name]), dtype='int64'))
@@ -60,7 +58,7 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
         batch: np.ndarray,
         batch_size: int,
         filter_na: bool = True,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         mask = feat.notna()
         feat, batch = feat[mask], batch[mask]
@@ -104,13 +102,13 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_aggregation(
         self,
         aggr: Aggregation,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         target_table = aggr._get_target_column_name().split('.')[0]
         target_batch = batch_dict[target_table]
         target_time = time_dict[target_table]
@@ -118,7 +116,7 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
             target_feat, target_mask = self.execute_column(
                 column=aggr.target,
                 feat_dict=feat_dict,
-                filter_na=False,
+                filter_na=True,
             )
         else:
             assert isinstance(aggr.target, Filter)
@@ -128,28 +126,29 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
                 time_dict=time_dict,
                 batch_dict=batch_dict,
                 anchor_time=anchor_time,
-                filter_na=False,
+                filter_na=True,
             )
-        outs: List[pd.Series] = []
-        masks: List[np.ndarray] = []
+        outs: list[pd.Series] = []
+        masks: list[np.ndarray] = []
         for _ in range(num_forecasts):
-            anchor_target_time = anchor_time[target_batch]
+            anchor_target_time = anchor_time.iloc[target_batch]
             anchor_target_time = anchor_target_time.reset_index(drop=True)
-            curr_target_mask = target_mask & (
-                target_time
-                <= anchor_target_time + aggr.aggr_time_range.end_date_offset)
+            time_filter_mask = (target_time <= anchor_target_time +
+                                aggr.aggr_time_range.end_date_offset)
             if aggr.aggr_time_range.start is not None:
                 start_offset = aggr.aggr_time_range.start_date_offset
-                curr_target_mask &= (target_time
+                time_filter_mask &= (target_time
                                      > anchor_target_time + start_offset)
             else:
                 assert num_forecasts == 1
+            curr_target_mask = target_mask & time_filter_mask
             out, mask = self.execute_aggregation_type(
                 aggr.aggr,
-                feat=target_feat[curr_target_mask],
+                feat=target_feat[time_filter_mask[target_mask].reset_index(
+                    drop=True)],
                 batch=target_batch[curr_target_mask],
                 batch_size=len(anchor_time),
                 filter_na=False if num_forecasts > 1 else filter_na,
@@ -225,13 +224,13 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_condition(
         self,
         condition: Condition,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         if num_forecasts > 1:
             raise NotImplementedError("Forecasting not yet implemented for "
                                       "non-regression tasks")
@@ -305,13 +304,13 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_logical_operation(
         self,
         logical_operation: LogicalOperation,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         if num_forecasts > 1:
             raise NotImplementedError("Forecasting not yet implemented for "
                                       "non-regression tasks")
@@ -369,13 +368,13 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_join(
         self,
         join: Join,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         filter_na: bool = True,
         num_forecasts: int = 1,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         if isinstance(join.rhs_target, Aggregation):
             return self.execute_aggregation(
                 aggr=join.rhs_target,
@@ -392,12 +391,12 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute_filter(
         self,
         filter: Filter,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         filter_na: bool = True,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         out, mask = self.execute_column(
             column=filter.target,
             feat_dict=feat_dict,
@@ -430,12 +429,12 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
     def execute(
         self,
         query: ValidatedPredictiveQuery,
-        feat_dict: Dict[str, pd.DataFrame],
-        time_dict: Dict[str, pd.Series],
-        batch_dict: Dict[str, np.ndarray],
+        feat_dict: dict[str, pd.DataFrame],
+        time_dict: dict[str, pd.Series],
+        batch_dict: dict[str, np.ndarray],
         anchor_time: pd.Series,
         num_forecasts: int = 1,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> tuple[pd.Series, np.ndarray]:
         if isinstance(query.entity_ast, Column):
             out, mask = self.execute_column(
                 column=query.entity_ast,
@@ -499,7 +498,32 @@ class PQueryPandasExecutor(PQueryExecutor[pd.DataFrame, pd.Series,
             )
         else:
             raise NotImplementedError(
-                f'{type(query.target)} compilation missing.')
+                f'{type(query.target_ast)} compilation missing.')
+        if query.whatif_ast is not None:
+            if isinstance(query.whatif_ast, Condition):
+                mask &= self.execute_condition(
+                    condition=query.whatif_ast,
+                    feat_dict=feat_dict,
+                    time_dict=time_dict,
+                    batch_dict=batch_dict,
+                    anchor_time=anchor_time,
+                    filter_na=True,
+                    num_forecasts=num_forecasts,
+                )[0]
+            elif isinstance(query.whatif_ast, LogicalOperation):
+                mask &= self.execute_logical_operation(
+                    logical_operation=query.whatif_ast,
+                    feat_dict=feat_dict,
+                    time_dict=time_dict,
+                    batch_dict=batch_dict,
+                    anchor_time=anchor_time,
+                    filter_na=True,
+                    num_forecasts=num_forecasts,
+                )[0]
+            else:
+                raise ValueError(
+                    f'Unsupported ASSUMING condition {type(query.whatif_ast)}')
         out = out[mask[_mask]]
         mask &= _mask
         out = out.reset_index(drop=True)