PyPI - kumoai - Versions diffs - 2.9.0.dev202509061830__cp311-cp311-macosx_11_0_arm64.whl → 2.12.0.dev202511031731__cp311-cp311-macosx_11_0_arm64.whl - Mend

kumoai 2.9.0.dev202509061830__cp311-cp311-macosx_11_0_arm64.whl → 2.12.0.dev202511031731__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

kumoai/__init__.py +4 -2
kumoai/_version.py +1 -1
kumoai/client/client.py +10 -5
kumoai/client/rfm.py +3 -2
kumoai/connector/file_upload_connector.py +71 -102
kumoai/connector/utils.py +1367 -236
kumoai/experimental/rfm/__init__.py +2 -2
kumoai/experimental/rfm/authenticate.py +8 -5
kumoai/experimental/rfm/infer/timestamp.py +7 -4
kumoai/experimental/rfm/local_graph.py +90 -80
kumoai/experimental/rfm/local_graph_sampler.py +16 -8
kumoai/experimental/rfm/local_graph_store.py +22 -6
kumoai/experimental/rfm/local_pquery_driver.py +129 -28
kumoai/experimental/rfm/local_table.py +100 -22
kumoai/experimental/rfm/pquery/__init__.py +4 -0
kumoai/experimental/rfm/pquery/backend.py +4 -0
kumoai/experimental/rfm/pquery/executor.py +102 -0
kumoai/experimental/rfm/pquery/pandas_backend.py +71 -30
kumoai/experimental/rfm/pquery/pandas_executor.py +506 -0
kumoai/experimental/rfm/rfm.py +442 -94
kumoai/jobs.py +1 -0
kumoai/trainer/trainer.py +19 -10
kumoai/utils/progress_logger.py +62 -0
{kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/METADATA +4 -5
{kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/RECORD +28 -26
{kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/WHEEL +0 -0
{kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/licenses/LICENSE +0 -0
{kumoai-2.9.0.dev202509061830.dist-info → kumoai-2.12.0.dev202511031731.dist-info}/top_level.txt +0 -0

kumoai/experimental/rfm/local_pquery_driver.py CHANGED Viewed

@@ -18,7 +18,7 @@ class LocalPQueryDriver:
         self,
         graph_store: LocalGraphStore,
         query: PQueryDefinition,
-        random_seed: Optional[int],
+        random_seed: Optional[int] = None,
     ) -> None:
         self._graph_store = graph_store
         self._query = query
@@ -27,7 +27,6 @@ class LocalPQueryDriver:
     def _get_candidates(
         self,
-        anchor_time: Union[pd.Timestamp, Literal['entity']],
         exclude_node: Optional[np.ndarray] = None,
     ) -> np.ndarray:
@@ -61,12 +60,37 @@ class LocalPQueryDriver:
         return candidate
+    def _filter_candidates_by_time(
+        self,
+        candidate: np.ndarray,
+        anchor_time: pd.Timestamp,
+    ) -> np.ndarray:
+        entity = self._query.entity.pkey.table_name
+        # Filter out entities that do not exist yet in time:
+        time_sec = self._graph_store.time_dict.get(entity)
+        if time_sec is not None:
+            mask = time_sec[candidate] <= (anchor_time.value // (1000**3))
+            candidate = candidate[mask]
+        # Filter out entities that no longer exist in time:
+        end_time_col = self._graph_store.end_time_column_dict.get(entity)
+        if end_time_col is not None:
+            ser = self._graph_store.df_dict[entity][end_time_col]
+            ser = ser.iloc[candidate]
+            mask = (anchor_time < ser) | ser.isna().to_numpy()
+            candidate = candidate[mask]
+        return candidate
     def collect_test(
         self,
         size: int,
         anchor_time: Union[pd.Timestamp, Literal['entity']],
         batch_size: Optional[int] = None,
         max_iterations: int = 20,
+        guarantee_train_examples: bool = True,
     ) -> Tuple[np.ndarray, pd.Series, pd.Series]:
         r"""Collects test nodes and their labels used for evaluation.
@@ -75,13 +99,15 @@ class LocalPQueryDriver:
             anchor_time: The anchor time.
             batch_size: How many nodes to process in a single batch.
             max_iterations: The number of steps to run before aborting.
+            guarantee_train_examples: Ensures that test examples do not occupy
+                the entire set of entity candidates.
         Returns:
             A triplet holding the nodes, timestamps and labels.
         """
         batch_size = size if batch_size is None else batch_size
-        candidate = self._get_candidates(anchor_time)
+        candidate = self._get_candidates()
         nodes: List[np.ndarray] = []
         times: List[pd.Series] = []
@@ -93,13 +119,7 @@ class LocalPQueryDriver:
             node = candidate[candidate_offset:candidate_offset + batch_size]
             if isinstance(anchor_time, pd.Timestamp):
-                # Filter out non-existent entities:
-                time = self._graph_store.time_dict.get(
-                    self._query.entity.pkey.table_name)
-                if time is not None:
-                    node = node[time[node] <= (anchor_time.value // (1000**3))]
-            if isinstance(anchor_time, pd.Timestamp):
+                node = self._filter_candidates_by_time(node, anchor_time)
                 time = pd.Series(anchor_time).repeat(len(node))
                 time = time.astype('datetime64[ns]').reset_index(drop=True)
             else:
@@ -148,6 +168,16 @@ class LocalPQueryDriver:
                           f"using the 'max_pq_iterations' option. This "
                           f"warning will not be shown again in this run.")
+        if (guarantee_train_examples
+                and self._query.query_type == QueryType.STATIC
+                and candidate_offset >= len(candidate)):
+            # In case all valid entities are used as test examples, we can no
+            # longer find any training example. Fallback to a 50/50 split:
+            size = len(node) // 2
+            node = node[:size]
+            time = time.iloc[:size]
+            y = y.iloc[:size]
         return node, time, y
     def collect_train(
@@ -172,7 +202,7 @@ class LocalPQueryDriver:
         """
         batch_size = size if batch_size is None else batch_size
-        candidate = self._get_candidates(anchor_time, exclude_node)
+        candidate = self._get_candidates(exclude_node)
         if len(candidate) == 0:
             raise RuntimeError("Failed to generate any context examples "
@@ -182,22 +212,13 @@ class LocalPQueryDriver:
         times: List[pd.Series] = []
         ys: List[pd.Series] = []
-        if isinstance(anchor_time, pd.Timestamp):
-            anchor_time = anchor_time - self._query.target.end_offset
         reached_end = False
         num_labels = candidate_offset = 0
         for _ in range(max_iterations):
             node = candidate[candidate_offset:candidate_offset + batch_size]
             if isinstance(anchor_time, pd.Timestamp):
-                # Filter out non-existent entities:
-                time = self._graph_store.time_dict.get(
-                    self._query.entity.pkey.table_name)
-                if time is not None:
-                    node = node[time[node] <= (anchor_time.value // (1000**3))]
-            if isinstance(anchor_time, pd.Timestamp):
+                node = self._filter_candidates_by_time(node, anchor_time)
                 time = pd.Series(anchor_time).repeat(len(node))
                 time = time.astype('datetime64[ns]').reset_index(drop=True)
             else:
@@ -228,7 +249,8 @@ class LocalPQueryDriver:
                     reached_end = True
                     break
                 candidate_offset = 0
-                anchor_time = anchor_time - self._query.target.end_offset
+                anchor_time = anchor_time - (self._query.target.end_offset *
+                                             self._query.num_forecasts)
                 if anchor_time < self._graph_store.min_time:
                     reached_end = True
                     break  # No earlier anchor time left. Abort.
@@ -257,12 +279,81 @@ class LocalPQueryDriver:
         return node, time, y
-    def __call__(
+    def is_valid(
+        self,
+        node: np.ndarray,
+        anchor_time: Union[pd.Timestamp, Literal['entity']],
+        batch_size: int = 10_000,
+    ) -> np.ndarray:
+        r"""Denotes which nodes are valid for a given anchor time, *e.g.*,
+        which nodes fulfill entity filter constraints.
+        Args:
+            node: The nodes to check for.
+            anchor_time: The anchor time.
+            batch_size: How many nodes to process in a single batch.
+        Returns:
+            The mask.
+        """
+        mask: Optional[np.ndarray] = None
+        if isinstance(anchor_time, pd.Timestamp):
+            node = self._filter_candidates_by_time(node, anchor_time)
+            time = pd.Series(anchor_time).repeat(len(node))
+            time = time.astype('datetime64[ns]').reset_index(drop=True)
+        else:
+            assert anchor_time == 'entity'
+            time = self._graph_store.time_dict[
+                self._query.entity.pkey.table_name]
+            time = pd.Series(time[node] * 1000**3, dtype='datetime64[ns]')
+        if self._query.entity.filter is not None:
+            # Mask out via (temporal) entity filter:
+            backend = PQueryPandasBackend()
+            masks: List[np.ndarray] = []
+            for start in range(0, len(node), batch_size):
+                feat_dict, time_dict, batch_dict = self._sample(
+                    node[start:start + batch_size],
+                    time.iloc[start:start + batch_size],
+                )
+                _mask = backend.eval_filter(
+                    filter=self._query.entity.filter,
+                    feat_dict=feat_dict,
+                    time_dict=time_dict,
+                    batch_dict=batch_dict,
+                    anchor_time=time.iloc[start:start + batch_size],
+                )
+                masks.append(_mask)
+            _mask = np.concatenate(masks)
+            mask = (mask & _mask) if mask is not None else _mask
+        if mask is None:
+            mask = np.ones(len(node), dtype=bool)
+        return mask
+    def _sample(
         self,
         node: np.ndarray,
         anchor_time: pd.Series,
-    ) -> Tuple[pd.Series, np.ndarray]:
+    ) -> Tuple[
+            Dict[str, pd.DataFrame],
+            Dict[str, pd.Series],
+            Dict[str, np.ndarray],
+    ]:
+        r"""Samples a subgraph that contains all relevant information to
+        evaluate the predictive query.
+        Args:
+            node: The nodes to check for.
+            anchor_time: The anchor time.
+        Returns:
+            The feature dictionary, the time column dictionary and the batch
+            dictionary.
+        """
         specs = self._query.get_sampling_specs(self._graph_store.edge_types)
         num_hops = max([spec.hop for spec in specs] + [0])
         num_neighbors: Dict[Tuple[str, str, str], list[int]] = {}
@@ -275,11 +366,10 @@ class LocalPQueryDriver:
                 if spec.edge_type not in time_offsets:
                     time_offsets[spec.edge_type] = [[0, 0]
                                                     for _ in range(num_hops)]
-                offset: Optional[int] = _date_offset_to_seconds(
-                    spec.end_offset)
+                offset: Optional[int] = date_offset_to_seconds(spec.end_offset)
                 time_offsets[spec.edge_type][spec.hop - 1][1] = offset
                 if spec.start_offset is not None:
-                    offset = _date_offset_to_seconds(spec.start_offset)
+                    offset = date_offset_to_seconds(spec.start_offset)
                 else:
                     offset = None
                 time_offsets[spec.edge_type][spec.hop - 1][0] = offset
@@ -341,18 +431,29 @@ class LocalPQueryDriver:
                 time_col = self._graph_store.time_column_dict[table_name]
                 time_dict[table_name] = df[time_col]
+        return feat_dict, time_dict, batch_dict
+    def __call__(
+        self,
+        node: np.ndarray,
+        anchor_time: pd.Series,
+    ) -> Tuple[pd.Series, np.ndarray]:
+        feat_dict, time_dict, batch_dict = self._sample(node, anchor_time)
         y, mask = PQueryPandasBackend().eval_pquery(
             query=self._query,
             feat_dict=feat_dict,
             time_dict=time_dict,
             batch_dict=batch_dict,
             anchor_time=anchor_time,
+            num_forecasts=self._query.num_forecasts,
         )
         return y, mask
-def _date_offset_to_seconds(offset: pd.DateOffset) -> int:
+def date_offset_to_seconds(offset: pd.DateOffset) -> int:
     r"""Convert a :class:`pandas.DateOffset` into a maximum number of
     nanoseconds.

kumoai/experimental/rfm/local_table.py CHANGED Viewed

@@ -23,11 +23,13 @@ class Column:
         stype: Stype,
         is_primary_key: bool = False,
         is_time_column: bool = False,
+        is_end_time_column: bool = False,
     ) -> None:
         self._name = name
         self._dtype = Dtype(dtype)
         self._is_primary_key = is_primary_key
         self._is_time_column = is_time_column
+        self._is_end_time_column = is_end_time_column
         self.stype = Stype(stype)
     @property
@@ -50,9 +52,12 @@ class Column:
             if self._is_primary_key and val != Stype.ID:
                 raise ValueError(f"Primary key '{self.name}' must have 'ID' "
                                  f"semantic type (got '{val}')")
-            if self.name == self._is_time_column and val != Stype.timestamp:
+            if self._is_time_column and val != Stype.timestamp:
                 raise ValueError(f"Time column '{self.name}' must have "
                                  f"'timestamp' semantic type (got '{val}')")
+            if self._is_end_time_column and val != Stype.timestamp:
+                raise ValueError(f"End time column '{self.name}' must have "
+                                 f"'timestamp' semantic type (got '{val}')")
         super().__setattr__(key, val)
@@ -93,6 +98,7 @@ class LocalTable:
             name="my_table",
             primary_key="id",
             time_column="time",
+            end_time_column=None,
         )
         # Verify metadata:
@@ -106,6 +112,8 @@ class LocalTable:
         name: The name of the table.
         primary_key: The name of the primary key of this table, if it exists.
         time_column: The name of the time column of this table, if it exists.
+        end_time_column: The name of the end time column of this table, if it
+            exists.
     """
     def __init__(
         self,
@@ -113,6 +121,7 @@ class LocalTable:
         name: str,
         primary_key: Optional[str] = None,
         time_column: Optional[str] = None,
+        end_time_column: Optional[str] = None,
     ) -> None:
         if df.empty:
@@ -130,6 +139,7 @@ class LocalTable:
         self._name = name
         self._primary_key: Optional[str] = None
         self._time_column: Optional[str] = None
+        self._end_time_column: Optional[str] = None
         self._columns: Dict[str, Column] = {}
         for column_name in df.columns:
@@ -141,6 +151,9 @@ class LocalTable:
         if time_column is not None:
             self.time_column = time_column
+        if end_time_column is not None:
+            self.end_time_column = end_time_column
     @property
     def name(self) -> str:
         r"""The name of the table."""
@@ -230,6 +243,8 @@ class LocalTable:
             self.primary_key = None
         if self._time_column == name:
             self.time_column = None
+        if self._end_time_column == name:
+            self.end_time_column = None
         del self._columns[name]
         return self
@@ -253,9 +268,8 @@ class LocalTable:
         :class:`ValueError` if the primary key has a non-ID semantic type or
         if the column name does not match a column in the data frame.
         """
-        if not self.has_primary_key():
+        if self._primary_key is None:
             return None
-        assert self._primary_key is not None
         return self[self._primary_key]
     @primary_key.setter
@@ -264,6 +278,10 @@ class LocalTable:
             raise ValueError(f"Cannot specify column '{name}' as a primary "
                              f"key since it is already defined to be a time "
                              f"column")
+        if name is not None and name == self._end_time_column:
+            raise ValueError(f"Cannot specify column '{name}' as a primary "
+                             f"key since it is already defined to be an end "
+                             f"time column")
         if self.primary_key is not None:
             self.primary_key._is_primary_key = False
@@ -295,9 +313,8 @@ class LocalTable:
         :class:`ValueError` if the time column has a non-timestamp semantic
         type or if the column name does not match a column in the data frame.
         """
-        if not self.has_time_column():
+        if self._time_column is None:
             return None
-        assert self._time_column is not None
         return self[self._time_column]
     @time_column.setter
@@ -306,6 +323,10 @@ class LocalTable:
             raise ValueError(f"Cannot specify column '{name}' as a time "
                              f"column since it is already defined to be a "
                              f"primary key")
+        if name is not None and name == self._end_time_column:
+            raise ValueError(f"Cannot specify column '{name}' as a time "
+                             f"column since it is already defined to be an "
+                             f"end time column")
         if self.time_column is not None:
             self.time_column._is_time_column = False
@@ -318,6 +339,52 @@ class LocalTable:
         self[name]._is_time_column = True
         self._time_column = name
+    # End Time column #########################################################
+    def has_end_time_column(self) -> bool:
+        r"""Returns ``True`` if this table has an end time column; ``False``
+        otherwise.
+        """
+        return self._end_time_column is not None
+    @property
+    def end_time_column(self) -> Optional[Column]:
+        r"""The end time column of this table.
+        The getter returns the end time column of this table, or ``None`` if no
+        such end time column is present.
+        The setter sets a column as an end time column on this table, and
+        raises a :class:`ValueError` if the end time column has a non-timestamp
+        semantic type or if the column name does not match a column in the data
+        frame.
+        """
+        if self._end_time_column is None:
+            return None
+        return self[self._end_time_column]
+    @end_time_column.setter
+    def end_time_column(self, name: Optional[str]) -> None:
+        if name is not None and name == self._primary_key:
+            raise ValueError(f"Cannot specify column '{name}' as an end time "
+                             f"column since it is already defined to be a "
+                             f"primary key")
+        if name is not None and name == self._time_column:
+            raise ValueError(f"Cannot specify column '{name}' as an end time "
+                             f"column since it is already defined to be a "
+                             f"time column")
+        if self.end_time_column is not None:
+            self.end_time_column._is_end_time_column = False
+        if name is None:
+            self._end_time_column = None
+            return
+        self[name].stype = Stype.timestamp
+        self[name]._is_end_time_column = True
+        self._end_time_column = name
     # Metadata ################################################################
     @property
@@ -326,16 +393,18 @@ class LocalTable:
         information about the columns in this table.
         The returned dataframe has columns ``name``, ``dtype``, ``stype``,
-        ``is_primary_key``, and ``is_time_column``, which provide an aggregate
-        view of the properties of the columns of this table.
+        ``is_primary_key``, ``is_time_column`` and ``is_end_time_column``,
+        which provide an aggregate view of the properties of the columns of
+        this table.
         Example:
+            >>> # doctest: +SKIP
             >>> import kumoai.experimental.rfm as rfm
             >>> table = rfm.LocalTable(df=..., name=...).infer_metadata()
             >>> table.metadata
-                name        dtype       stype    is_primary_key is_time_column
-            0   CustomerID  float64     ID       True            False
-        """
+                name        dtype    stype  is_primary_key  is_time_column  is_end_time_column
+            0   CustomerID  float64  ID     True            False           False
+        """  # noqa: E501
         cols = self.columns
         return pd.DataFrame({
@@ -355,6 +424,11 @@ class LocalTable:
                 dtype=bool,
                 data=[self._time_column == c.name for c in cols],
             ),
+            'is_end_time_column':
+            pd.Series(
+                dtype=bool,
+                data=[self._end_time_column == c.name for c in cols],
+            ),
         })
     def print_metadata(self) -> None:
@@ -417,6 +491,7 @@ class LocalTable:
             candidates = [
                 column.name for column in self.columns
                 if column.stype == Stype.timestamp
+                and column.name != self._end_time_column
             ]
             if time_column := utils.detect_time_column(self._data, candidates):
                 self.time_column = time_column
@@ -430,24 +505,26 @@ class LocalTable:
     # Helpers #################################################################
     def _to_api_table_definition(self) -> TableDefinition:
-        cols: List[ColumnDefinition] = []
-        for col in self.columns:
-            cols.append(ColumnDefinition(col.name, col.stype, col.dtype))
-        pkey = self._primary_key
-        time_col = self._time_column
-        source_table = UnavailableSourceTable(table=self.name)
         return TableDefinition(
-            cols=cols,
-            source_table=source_table,
-            pkey=pkey,
-            time_col=time_col,
+            cols=[
+                ColumnDefinition(col.name, col.stype, col.dtype)
+                for col in self.columns
+            ],
+            source_table=UnavailableSourceTable(table=self.name),
+            pkey=self._primary_key,
+            time_col=self._time_column,
+            end_time_col=self._end_time_column,
         )
     # Python builtins #########################################################
     def __hash__(self) -> int:
-        return hash(tuple(self.columns + [self.primary_key, self.time_column]))
+        special_columns = [
+            self.primary_key,
+            self.time_column,
+            self.end_time_column,
+        ]
+        return hash(tuple(self.columns + special_columns))
     def __contains__(self, name: str) -> bool:
         return self.has_column(name)
@@ -464,4 +541,5 @@ class LocalTable:
                 f'  num_columns={len(self.columns)},\n'
                 f'  primary_key={self._primary_key},\n'
                 f'  time_column={self._time_column},\n'
+                f'  end_time_column={self._end_time_column},\n'
                 f')')

kumoai/experimental/rfm/pquery/__init__.py CHANGED Viewed

@@ -1,7 +1,11 @@
 from .backend import PQueryBackend
 from .pandas_backend import PQueryPandasBackend
+from .executor import PQueryExecutor
+from .pandas_executor import PQueryPandasExecutor
 __all__ = [
     'PQueryBackend',
     'PQueryPandasBackend',
+    'PQueryExecutor',
+    'PQueryPandasExecutor',
 ]

kumoai/experimental/rfm/pquery/backend.py CHANGED Viewed

@@ -82,6 +82,7 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
         batch_dict: Dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
+        num_forecasts: int = 1,
     ) -> Tuple[ColumnData, IndexData]:
         pass
@@ -94,6 +95,7 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
         batch_dict: Dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
+        num_forecasts: int = 1,
     ) -> Tuple[ColumnData, IndexData]:
         pass
@@ -106,6 +108,7 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
         batch_dict: Dict[str, IndexData],
         anchor_time: ColumnData,
         filter_na: bool = True,
+        num_forecasts: int = 1,
     ) -> Tuple[ColumnData, IndexData]:
         pass
@@ -128,5 +131,6 @@ class PQueryBackend(Generic[TableData, ColumnData, IndexData], ABC):
         time_dict: Dict[str, ColumnData],
         batch_dict: Dict[str, IndexData],
         anchor_time: ColumnData,
+        num_forecasts: int = 1,
     ) -> Tuple[ColumnData, IndexData]:
         pass

kumoai/experimental/rfm/pquery/executor.py ADDED Viewed

@@ -0,0 +1,102 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Generic, Tuple, TypeVar
+from kumoapi.pquery import ValidatedPredictiveQuery
+from kumoapi.pquery.AST import (
+    Aggregation,
+    Column,
+    Condition,
+    Filter,
+    Join,
+    LogicalOperation,
+)
+TableData = TypeVar('TableData')
+ColumnData = TypeVar('ColumnData')
+IndexData = TypeVar('IndexData')
+class PQueryExecutor(Generic[TableData, ColumnData, IndexData], ABC):
+    @abstractmethod
+    def execute_column(
+        self,
+        column: Column,
+        feat_dict: Dict[str, TableData],
+        filter_na: bool = True,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute_aggregation(
+        self,
+        aggr: Aggregation,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+        filter_na: bool = True,
+        num_forecasts: int = 1,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute_condition(
+        self,
+        condition: Condition,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+        filter_na: bool = True,
+        num_forecasts: int = 1,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute_logical_operation(
+        self,
+        logical_operation: LogicalOperation,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+        filter_na: bool = True,
+        num_forecasts: int = 1,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute_join(
+        self,
+        join: Join,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+        filter_na: bool = True,
+        num_forecasts: int = 1,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute_filter(
+        self,
+        filter: Filter,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass
+    @abstractmethod
+    def execute(
+        self,
+        query: ValidatedPredictiveQuery,
+        feat_dict: Dict[str, TableData],
+        time_dict: Dict[str, ColumnData],
+        batch_dict: Dict[str, IndexData],
+        anchor_time: ColumnData,
+        num_forecasts: int = 1,
+    ) -> Tuple[ColumnData, IndexData]:
+        pass