PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/feature_store/retrieval/base.py CHANGED Viewed

@@ -13,16 +13,11 @@
 # limitations under the License.
 #
 import abc
-import typing
-from datetime import datetime
 import mlrun
 from mlrun.datastore.targets import CSVTarget, ParquetTarget
-from mlrun.feature_store.feature_set import FeatureSet
-from mlrun.feature_store.feature_vector import Feature
 from ...utils import logger
-from ..feature_vector import OfflineVectorResponse
 class BaseMerger(abc.ABC):
@@ -41,7 +36,6 @@ class BaseMerger(abc.ABC):
         self._drop_indexes = True
         self._target = None
         self._alias = dict()
-        self._origin_alias = dict()
     def _append_drop_column(self, key):
         if key and key not in self._drop_columns:
@@ -77,7 +71,6 @@ class BaseMerger(abc.ABC):
         update_stats=None,
         query=None,
         join_type="inner",
-        order_by=None,
     ):
         self._target = target
         self._join_type = join_type
@@ -117,11 +110,9 @@ class BaseMerger(abc.ABC):
             start_time=start_time,
             end_time=end_time,
             query=query,
-            order_by=order_by,
         )
     def _write_to_target(self):
-        self.vector.spec.with_indexes = not self._drop_indexes
         if self._target:
             is_persistent_vector = self.vector.metadata.name is not None
             if not self._target.path and not is_persistent_vector:
@@ -134,14 +125,6 @@ class BaseMerger(abc.ABC):
                 target_status = self._target.update_resource_status("ready", size=size)
                 logger.info(f"wrote target: {target_status}")
                 self.vector.save()
-        if self.vector.spec.with_indexes:
-            self.vector.spec.entity_fields = [
-                Feature(name=feature, value_type=self._result_df[feature].dtype)
-                if self._result_df[feature].dtype.name != "object"
-                else Feature(name=feature, value_type="str")
-                for feature in self._index_columns
-            ]
-            self.vector.save()
     def _set_indexes(self, df):
         if self._index_columns and not self._drop_indexes:
@@ -151,15 +134,28 @@ class BaseMerger(abc.ABC):
                     if index not in df.columns:
                         index_columns_missing.append(index)
                 if not index_columns_missing:
-                    df.set_index(self._index_columns, inplace=True)
+                    if self.engine == "local" or self.engine == "spark":
+                        df.set_index(self._index_columns, inplace=True)
+                    elif self.engine == "dask":
+                        if len(self._index_columns) == 1:
+                            return df.set_index(self._index_columns[0])
+                        elif len(self._index_columns) != 1:
+                            return self._reset_index(self._result_df)
+                        else:
+                            logger.info(
+                                "The entities will stay as columns because "
+                                "Dask dataframe does not yet support multi-indexes"
+                            )
+                            return self._result_df
                 else:
                     logger.warn(
                         f"Can't set index, not all index columns found: {index_columns_missing}. "
                         f"It is possible that column was already indexed."
                     )
-        else:
-            df.reset_index(drop=True, inplace=True)
+            else:
+                return df
+    @abc.abstractmethod
     def _generate_vector(
         self,
         entity_rows,
@@ -169,148 +165,8 @@ class BaseMerger(abc.ABC):
         start_time=None,
         end_time=None,
         query=None,
-        order_by=None,
     ):
-        self._create_engine_env()
-        feature_sets = []
-        dfs = []
-        keys = (
-            []
-        )  # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
-        # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
-        # and the right keys in index 1, this keys will be the keys that will be used in this join
-        fs_link_list = self._create_linked_relation_list(
-            feature_set_objects, feature_set_fields
-        )
-        for node in fs_link_list:
-            name = node.name
-            feature_set = feature_set_objects[name]
-            feature_sets.append(feature_set)
-            columns = feature_set_fields[name]
-            self._origin_alias.update({name: alias for name, alias in columns})
-            column_names = [name for name, _ in columns]
-            for column in node.data["save_cols"]:
-                if column not in column_names:
-                    self._append_drop_column(column)
-                    column_names.append(column)
-            df = self._get_engine_df(
-                feature_set,
-                name,
-                column_names,
-                start_time,
-                end_time,
-                entity_timestamp_column,
-            )
-            column_names += node.data["save_index"]
-            node.data["save_cols"] += node.data["save_index"]
-            if feature_set.spec.timestamp_key:
-                entity_timestamp_column_list = [feature_set.spec.timestamp_key]
-                column_names += entity_timestamp_column_list
-                node.data["save_cols"] += entity_timestamp_column_list
-                if not entity_timestamp_column:
-                    # if not entity_timestamp_column the firs `FeatureSet` will define it
-                    entity_timestamp_column = feature_set.spec.timestamp_key
-            # rename columns to be unique for each feature set and select if needed
-            rename_col_dict = {
-                column: f"{column}_{name}"
-                for column in column_names
-                if column not in node.data["save_cols"]
-            }
-            fs_entities = list(feature_set.spec.entities.keys())
-            df_temp = self._rename_columns_and_select(
-                df, rename_col_dict, columns=list(set(column_names + fs_entities))
-            )
-            if df_temp is not None:
-                df = df_temp
-                del df_temp
-            dfs.append(df)
-            del df
-            keys.append([node.data["left_keys"], node.data["right_keys"]])
-            # update alias according to the unique column name
-            new_columns = []
-            if not self._drop_indexes:
-                new_columns.extend([(ind, ind) for ind in fs_entities])
-            for column, alias in columns:
-                if column in rename_col_dict:
-                    new_columns.append((rename_col_dict[column], alias or column))
-                else:
-                    new_columns.append((column, alias))
-            self._update_alias(dictionary={name: alias for name, alias in new_columns})
-        # convert pandas entity_rows to spark DF if needed
-        if (
-            entity_rows is not None
-            and not hasattr(entity_rows, "rdd")
-            and self.engine == "spark"
-        ):
-            entity_rows = self.spark.createDataFrame(entity_rows)
-        # join the feature data frames
-        self.merge(
-            entity_df=entity_rows,
-            entity_timestamp_column=entity_timestamp_column,
-            featuresets=feature_sets,
-            featureset_dfs=dfs,
-            keys=keys,
-        )
-        all_columns = None
-        if not self._drop_indexes and entity_timestamp_column:
-            if entity_timestamp_column not in self._alias.values():
-                self._update_alias(
-                    key=entity_timestamp_column, val=entity_timestamp_column
-                )
-            all_columns = list(self._alias.keys())
-        df_temp = self._rename_columns_and_select(
-            self._result_df, self._alias, columns=all_columns
-        )
-        if df_temp is not None:
-            self._result_df = df_temp
-            del df_temp
-        df_temp = self._drop_columns_from_result()
-        if df_temp is not None:
-            self._result_df = df_temp
-            del df_temp
-        if self.vector.status.label_column:
-            self._result_df = self._result_df.dropna(
-                subset=[self.vector.status.label_column]
-            )
-        # filter joined data frame by the query param
-        if query:
-            self._filter(query)
-        if order_by:
-            if isinstance(order_by, str):
-                order_by = [order_by]
-            order_by_active = [
-                order_col
-                if order_col in self._result_df.columns
-                else self._origin_alias.get(order_col, None)
-                for order_col in order_by
-            ]
-            if None in order_by_active:
-                raise mlrun.errors.MLRunInvalidArgumentError(
-                    f"Result dataframe contains {self._result_df.columns} "
-                    f"columns and can't order by {order_by}"
-                )
-            self._order_by(order_by_active)
-        self._write_to_target()
-        return OfflineVectorResponse(self)
+        raise NotImplementedError("_generate_vector() operation not supported in class")
     def _unpersist_df(self, df):
         pass
@@ -322,6 +178,7 @@ class BaseMerger(abc.ABC):
         featuresets: list,
         featureset_dfs: list,
         keys: list = None,
+        all_columns: list = None,
     ):
         """join the entities and feature set features into a result dataframe"""
         merged_df = entity_df
@@ -333,6 +190,10 @@ class BaseMerger(abc.ABC):
             else:
                 # keys can be multiple keys on each side of the join
                 keys = [[[], []]] * len(featureset_dfs)
+            if all_columns is not None:
+                all_columns.pop(0)
+            else:
+                all_columns = [[]] * len(featureset_dfs)
             entity_timestamp_column = (
                 entity_timestamp_column or featureset.spec.timestamp_key
             )
@@ -342,7 +203,9 @@ class BaseMerger(abc.ABC):
             # and it can join only by the entities of the first `featureset`
             keys[0][0] = keys[0][1] = list(featuresets[0].spec.entities.keys())
-        for featureset, featureset_df, lr_key in zip(featuresets, featureset_dfs, keys):
+        for featureset, featureset_df, lr_key, columns in zip(
+            featuresets, featureset_dfs, keys, all_columns
+        ):
             if featureset.spec.timestamp_key:
                 merge_func = self._asof_join
                 if self._join_type != "inner":
@@ -360,6 +223,7 @@ class BaseMerger(abc.ABC):
                 featureset_df,
                 lr_key[0],
                 lr_key[1],
+                columns,
             )
             # unpersist as required by the implementation (e.g. spark) and delete references
@@ -378,6 +242,7 @@ class BaseMerger(abc.ABC):
         featureset_df,
         left_keys: list,
         right_keys: list,
+        columns: list,
     ):
         raise NotImplementedError("_asof_join() operation not implemented in class")
@@ -390,6 +255,7 @@ class BaseMerger(abc.ABC):
         featureset_df,
         left_keys: list,
         right_keys: list,
+        columns: list,
     ):
         raise NotImplementedError("_join() operation not implemented in class")
@@ -401,7 +267,6 @@ class BaseMerger(abc.ABC):
     def get_df(self, to_pandas=True):
         """return the result as a dataframe (pandas by default)"""
-        self._set_indexes(self._result_df)
         return self._result_df
     def to_parquet(self, target_path, **kw):
@@ -428,9 +293,6 @@ class BaseMerger(abc.ABC):
         def __eq__(self, other):
             return self.name == other.name
-        def __copy__(self):
-            return BaseMerger._Node(self.name, self.order, self.data.copy())
     class _LinkedList:
         def __init__(self, head=None):
             self.head = head
@@ -451,19 +313,6 @@ class BaseMerger(abc.ABC):
                 yield node
                 node = node.next
-        def __copy__(self):
-            ll = BaseMerger._LinkedList()
-            prev_node = None
-            for node in self:
-                new_node = node.__copy__()
-                if ll.head is None:
-                    ll.head = new_node
-                else:
-                    prev_node.next = new_node
-                prev_node = new_node
-            ll.len = self.len
-            return ll
         def add_first(self, node):
             node.next = self.head
             self.head = node
@@ -476,9 +325,7 @@ class BaseMerger(abc.ABC):
             for current_node in self:
                 pass
             current_node.next = node
-            while node:
-                self.len += 1
-                node = node.next
+            self.len += 1
         def add_after(self, target_node, new_node):
             new_node.next = target_node.next
@@ -499,9 +346,7 @@ class BaseMerger(abc.ABC):
             node = self.find_node(other_head.name)
             if node is None:
                 return
-            for col in other_head.data["save_cols"]:
-                if col not in node.data["save_cols"]:
-                    node.data["save_cols"].append(col)
+            node.data["save_cols"] += other_head.data["save_cols"]
             for other_node in other_iter:
                 if self.find_node(other_node.name) is None:
                     while node is not None and other_node.order > node.order:
@@ -575,9 +420,10 @@ class BaseMerger(abc.ABC):
                 )
             )
-            if all(
-                curr_col_relation_list
-            ):  # checking if feature_set have relation with feature_set_in
+            # checking if feature_set have relation with feature_set_in
+            relation_wise = all(curr_col_relation_list)
+            if relation_wise:
                 # add to the link list feature set according to the defined relation
                 linked_list_relation.add_last(
                     BaseMerger._Node(
@@ -591,8 +437,8 @@ class BaseMerger(abc.ABC):
                         order=name_in_order,
                     )
                 )
-                linked_list_relation.head.data["save_cols"].extend(
-                    curr_col_relation_list
+                linked_list_relation.head.data["save_cols"].append(
+                    *curr_col_relation_list
                 )
             elif name_in_order > head_order and sorted(
                 feature_set_in_entity_list_names
@@ -622,14 +468,14 @@ class BaseMerger(abc.ABC):
             relation_linked_lists.append(linked_relation)
         # concat all the link lists to one, for the merging process
-        for i in range(len(relation_linked_lists)):
-            return_relation = relation_linked_lists[i].__copy__()
-            for relation_list in relation_linked_lists:
-                return_relation.concat(relation_list)
-            if return_relation.len == len(feature_set_objects):
-                return return_relation
+        link_list_iter = iter(relation_linked_lists)
+        return_relation = next(link_list_iter)
+        for relation_list in link_list_iter:
+            return_relation.concat(relation_list)
+        if return_relation.len != len(feature_set_objects):
+            raise mlrun.errors.MLRunRuntimeError("Failed to merge")
-        raise mlrun.errors.MLRunRuntimeError("Failed to merge")
+        return return_relation
     @classmethod
     def get_default_image(cls, kind):
@@ -637,77 +483,3 @@ class BaseMerger(abc.ABC):
     def _reset_index(self, _result_df):
         raise NotImplementedError
-    @abc.abstractmethod
-    def _create_engine_env(self):
-        """
-        initialize engine env if needed
-        """
-        raise NotImplementedError
-    @abc.abstractmethod
-    def _get_engine_df(
-        self,
-        feature_set: FeatureSet,
-        feature_set_name: typing.List[str],
-        column_names: typing.List[str] = None,
-        start_time: typing.Union[str, datetime] = None,
-        end_time: typing.Union[str, datetime] = None,
-        entity_timestamp_column: str = None,
-    ):
-        """
-        Return the feature_set data frame according to the args
-        :param feature_set:             current feature_set to extract from the data frame
-        :param feature_set_name:        the name of the current feature_set
-        :param column_names:            list of columns to select (if not all)
-        :param start_time:              filter by start time
-        :param end_time:                filter by end time
-        :param entity_timestamp_column: specify the time column name in the file
-        :return: Data frame of the current engine
-        """
-        raise NotImplementedError
-    @abc.abstractmethod
-    def _rename_columns_and_select(
-        self,
-        df,
-        rename_col_dict: typing.Dict[str, str],
-        columns: typing.List[str] = None,
-    ):
-        """
-        rename the columns of the df according to rename_col_dict, and select only `columns` if it is not none
-        :param df:              the data frame to change
-        :param rename_col_dict: the renaming dictionary - {<current_column_name>: <new_column_name>, ...}
-        :param columns:         list of columns to select (if not all)
-        :return: the data frame after the transformation or None if the transformation were preformed inplace
-        """
-        raise NotImplementedError
-    @abc.abstractmethod
-    def _drop_columns_from_result(self):
-        """
-        drop `self._drop_columns` from `self._result_df`
-        """
-        raise NotImplementedError
-    @abc.abstractmethod
-    def _filter(self, query: str):
-        """
-        filter `self._result_df` by `query`
-        :param query: The query string used to filter rows
-        """
-        raise NotImplementedError
-    @abc.abstractmethod
-    def _order_by(self, order_by_active: typing.List[str]):
-        """
-        Order by `order_by_active` along all axis.
-        :param order_by_active: list of names to sort by.
-        """
-        raise NotImplementedError

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl