PyPI - mlrun - Versions diffs - 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl - Mend

mlrun 1.4.0rc25py3-none-any.whl → 1.5.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (184) hide show

mlrun/__init__.py +2 -35
mlrun/__main__.py +3 -41
mlrun/api/api/api.py +6 -0
mlrun/api/api/endpoints/feature_store.py +0 -4
mlrun/api/api/endpoints/files.py +14 -2
mlrun/api/api/endpoints/frontend_spec.py +2 -1
mlrun/api/api/endpoints/functions.py +95 -59
mlrun/api/api/endpoints/grafana_proxy.py +9 -9
mlrun/api/api/endpoints/logs.py +17 -3
mlrun/api/api/endpoints/model_endpoints.py +3 -2
mlrun/api/api/endpoints/pipelines.py +1 -5
mlrun/api/api/endpoints/projects.py +88 -0
mlrun/api/api/endpoints/runs.py +48 -6
mlrun/api/api/endpoints/submit.py +2 -1
mlrun/api/api/endpoints/workflows.py +355 -0
mlrun/api/api/utils.py +3 -4
mlrun/api/crud/__init__.py +1 -0
mlrun/api/crud/client_spec.py +6 -2
mlrun/api/crud/feature_store.py +5 -0
mlrun/api/crud/model_monitoring/__init__.py +1 -0
mlrun/api/crud/model_monitoring/deployment.py +497 -0
mlrun/api/crud/model_monitoring/grafana.py +96 -42
mlrun/api/crud/model_monitoring/helpers.py +159 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +202 -476
mlrun/api/crud/notifications.py +9 -4
mlrun/api/crud/pipelines.py +6 -11
mlrun/api/crud/projects.py +2 -2
mlrun/api/crud/runtime_resources.py +4 -3
mlrun/api/crud/runtimes/nuclio/helpers.py +5 -1
mlrun/api/crud/secrets.py +21 -0
mlrun/api/crud/workflows.py +352 -0
mlrun/api/db/base.py +16 -1
mlrun/api/db/init_db.py +2 -4
mlrun/api/db/session.py +1 -1
mlrun/api/db/sqldb/db.py +129 -31
mlrun/api/db/sqldb/models/models_mysql.py +15 -1
mlrun/api/db/sqldb/models/models_sqlite.py +16 -2
mlrun/api/launcher.py +38 -6
mlrun/api/main.py +3 -2
mlrun/api/rundb/__init__.py +13 -0
mlrun/{db → api/rundb}/sqldb.py +36 -84
mlrun/api/runtime_handlers/__init__.py +56 -0
mlrun/api/runtime_handlers/base.py +1247 -0
mlrun/api/runtime_handlers/daskjob.py +209 -0
mlrun/api/runtime_handlers/kubejob.py +37 -0
mlrun/api/runtime_handlers/mpijob.py +147 -0
mlrun/api/runtime_handlers/remotesparkjob.py +29 -0
mlrun/api/runtime_handlers/sparkjob.py +148 -0
mlrun/api/schemas/__init__.py +17 -6
mlrun/api/utils/builder.py +1 -4
mlrun/api/utils/clients/chief.py +14 -0
mlrun/api/utils/clients/iguazio.py +33 -33
mlrun/api/utils/clients/nuclio.py +2 -2
mlrun/api/utils/periodic.py +9 -2
mlrun/api/utils/projects/follower.py +14 -7
mlrun/api/utils/projects/leader.py +2 -1
mlrun/api/utils/projects/remotes/nop_follower.py +2 -2
mlrun/api/utils/projects/remotes/nop_leader.py +2 -2
mlrun/api/utils/runtimes/__init__.py +14 -0
mlrun/api/utils/runtimes/nuclio.py +43 -0
mlrun/api/utils/scheduler.py +98 -15
mlrun/api/utils/singletons/db.py +5 -1
mlrun/api/utils/singletons/project_member.py +4 -1
mlrun/api/utils/singletons/scheduler.py +1 -1
mlrun/artifacts/base.py +6 -6
mlrun/artifacts/dataset.py +4 -4
mlrun/artifacts/manager.py +2 -3
mlrun/artifacts/model.py +2 -2
mlrun/artifacts/plots.py +8 -8
mlrun/common/db/__init__.py +14 -0
mlrun/common/helpers.py +37 -0
mlrun/{mlutils → common/model_monitoring}/__init__.py +3 -2
mlrun/common/model_monitoring/helpers.py +69 -0
mlrun/common/schemas/__init__.py +13 -1
mlrun/common/schemas/auth.py +4 -1
mlrun/common/schemas/client_spec.py +1 -1
mlrun/common/schemas/function.py +17 -0
mlrun/common/schemas/model_monitoring/__init__.py +48 -0
mlrun/common/{model_monitoring.py → schemas/model_monitoring/constants.py} +11 -23
mlrun/common/schemas/model_monitoring/grafana.py +55 -0
mlrun/common/schemas/{model_endpoints.py → model_monitoring/model_endpoints.py} +32 -65
mlrun/common/schemas/notification.py +1 -0
mlrun/common/schemas/object.py +4 -0
mlrun/common/schemas/project.py +1 -0
mlrun/common/schemas/regex.py +1 -1
mlrun/common/schemas/runs.py +1 -8
mlrun/common/schemas/schedule.py +1 -8
mlrun/common/schemas/workflow.py +54 -0
mlrun/config.py +45 -42
mlrun/datastore/__init__.py +21 -0
mlrun/datastore/base.py +1 -1
mlrun/datastore/datastore.py +9 -0
mlrun/datastore/dbfs_store.py +168 -0
mlrun/datastore/helpers.py +18 -0
mlrun/datastore/sources.py +1 -0
mlrun/datastore/store_resources.py +2 -5
mlrun/datastore/v3io.py +1 -2
mlrun/db/__init__.py +4 -68
mlrun/db/base.py +12 -0
mlrun/db/factory.py +65 -0
mlrun/db/httpdb.py +175 -20
mlrun/db/nopdb.py +4 -2
mlrun/execution.py +4 -2
mlrun/feature_store/__init__.py +1 -0
mlrun/feature_store/api.py +1 -2
mlrun/feature_store/common.py +2 -1
mlrun/feature_store/feature_set.py +1 -11
mlrun/feature_store/feature_vector.py +340 -2
mlrun/feature_store/ingestion.py +5 -10
mlrun/feature_store/retrieval/base.py +118 -104
mlrun/feature_store/retrieval/dask_merger.py +17 -10
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/local_merger.py +18 -18
mlrun/feature_store/retrieval/spark_merger.py +21 -14
mlrun/feature_store/retrieval/storey_merger.py +22 -16
mlrun/kfpops.py +3 -9
mlrun/launcher/base.py +57 -53
mlrun/launcher/client.py +5 -4
mlrun/launcher/factory.py +24 -13
mlrun/launcher/local.py +6 -6
mlrun/launcher/remote.py +4 -4
mlrun/lists.py +0 -11
mlrun/model.py +11 -17
mlrun/model_monitoring/__init__.py +2 -22
mlrun/model_monitoring/features_drift_table.py +1 -1
mlrun/model_monitoring/helpers.py +22 -210
mlrun/model_monitoring/model_endpoint.py +1 -1
mlrun/model_monitoring/model_monitoring_batch.py +127 -50
mlrun/model_monitoring/prometheus.py +219 -0
mlrun/model_monitoring/stores/__init__.py +16 -11
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +95 -23
mlrun/model_monitoring/stores/models/mysql.py +47 -29
mlrun/model_monitoring/stores/models/sqlite.py +47 -29
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +31 -19
mlrun/model_monitoring/{stream_processing_fs.py → stream_processing.py} +206 -64
mlrun/model_monitoring/tracking_policy.py +104 -0
mlrun/package/packager.py +6 -8
mlrun/package/packagers/default_packager.py +121 -10
mlrun/package/packagers/numpy_packagers.py +1 -1
mlrun/platforms/__init__.py +0 -2
mlrun/platforms/iguazio.py +0 -56
mlrun/projects/pipelines.py +53 -159
mlrun/projects/project.py +10 -37
mlrun/render.py +1 -1
mlrun/run.py +8 -124
mlrun/runtimes/__init__.py +6 -42
mlrun/runtimes/base.py +29 -1249
mlrun/runtimes/daskjob.py +2 -198
mlrun/runtimes/funcdoc.py +0 -9
mlrun/runtimes/function.py +25 -29
mlrun/runtimes/kubejob.py +5 -29
mlrun/runtimes/local.py +1 -1
mlrun/runtimes/mpijob/__init__.py +2 -2
mlrun/runtimes/mpijob/abstract.py +10 -1
mlrun/runtimes/mpijob/v1.py +0 -76
mlrun/runtimes/mpijob/v1alpha1.py +1 -74
mlrun/runtimes/nuclio.py +3 -2
mlrun/runtimes/pod.py +28 -18
mlrun/runtimes/remotesparkjob.py +1 -15
mlrun/runtimes/serving.py +14 -6
mlrun/runtimes/sparkjob/__init__.py +0 -1
mlrun/runtimes/sparkjob/abstract.py +4 -131
mlrun/runtimes/utils.py +0 -26
mlrun/serving/routers.py +7 -7
mlrun/serving/server.py +11 -8
mlrun/serving/states.py +7 -1
mlrun/serving/v2_serving.py +6 -6
mlrun/utils/helpers.py +23 -42
mlrun/utils/notifications/notification/__init__.py +4 -0
mlrun/utils/notifications/notification/webhook.py +61 -0
mlrun/utils/notifications/notification_pusher.py +5 -25
mlrun/utils/regex.py +7 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/METADATA +26 -25
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/RECORD +180 -158
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/WHEEL +1 -1
mlrun/mlutils/data.py +0 -160
mlrun/mlutils/models.py +0 -78
mlrun/mlutils/plots.py +0 -902
mlrun/utils/model_monitoring.py +0 -249
/mlrun/{api/db/sqldb/session.py → common/db/sql_session.py} +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/LICENSE +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.4.0rc25.dist-info → mlrun-1.5.0rc2.dist-info}/top_level.txt +0 -0

mlrun/feature_store/retrieval/base.py CHANGED Viewed

@@ -16,13 +16,12 @@ import abc
 import typing
 from datetime import datetime
-import dask.dataframe as dd
 import pandas as pd
 import mlrun
 from mlrun.datastore.targets import CSVTarget, ParquetTarget
 from mlrun.feature_store.feature_set import FeatureSet
-from mlrun.feature_store.feature_vector import Feature
+from mlrun.feature_store.feature_vector import Feature, JoinGraph
 from ...utils import logger, str_to_timestamp
 from ..feature_vector import OfflineVectorResponse
@@ -42,6 +41,7 @@ class BaseMerger(abc.ABC):
     def __init__(self, vector, **engine_args):
         self._relation = dict()
         self._join_type = "inner"
+        self._default_join_type = "default_join"
         self.vector = vector
         self._result_df = None
@@ -196,21 +196,34 @@ class BaseMerger(abc.ABC):
         )  # the struct of key is [[[],[]], ..] So that each record indicates which way the corresponding
         # featureset is connected to the previous one, and within each record the left keys are indicated in index 0
         # and the right keys in index 1, this keys will be the keys that will be used in this join
+        join_types = []
-        fs_link_list = self._create_linked_relation_list(
-            feature_set_objects, feature_set_fields
+        entity_rows_keys = (
+            list(entity_rows.columns) if entity_rows is not None else None
         )
+        join_graph = self._get_graph(
+            feature_set_objects, feature_set_fields, entity_rows_keys
+        )
+        if entity_rows_keys:
+            entity_rows = self._convert_entity_rows_to_engine_df(entity_rows)
+            dfs.append(entity_rows)
+            keys.append([[], []])
+            feature_sets.append(None)
+            join_types.append(None)
         filtered = False
-        for node in fs_link_list:
-            name = node.name
+        for step in join_graph.steps:
+            name = step.right_feature_set_name
             feature_set = feature_set_objects[name]
+            saved_columns_for_relation = list(
+                self.vector.get_feature_set_relations(feature_set).keys()
+            )
             feature_sets.append(feature_set)
             columns = feature_set_fields[name]
             self._origin_alias.update({name: alias for name, alias in columns})
             column_names = [name for name, _ in columns]
-            for column in node.data["save_cols"]:
+            for column in saved_columns_for_relation:
                 if column not in column_names:
                     column_names.append(column)
                     if column not in self._index_columns:
@@ -247,19 +260,19 @@ class BaseMerger(abc.ABC):
                 time_column,
             )
-            column_names += node.data["save_index"]
-            node.data["save_cols"] += node.data["save_index"]
             fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
+            column_names += fs_entities_and_timestamp
+            saved_columns_for_relation += fs_entities_and_timestamp
             if feature_set.spec.timestamp_key:
                 column_names.append(feature_set.spec.timestamp_key)
-                node.data["save_cols"].append(feature_set.spec.timestamp_key)
+                saved_columns_for_relation.append(feature_set.spec.timestamp_key)
                 fs_entities_and_timestamp.append(feature_set.spec.timestamp_key)
             # rename columns to be unique for each feature set and select if needed
             rename_col_dict = {
                 column: f"{column}_{name}"
                 for column in column_names
-                if column not in node.data["save_cols"]
+                if column not in saved_columns_for_relation
             }
             df_temp = self._rename_columns_and_select(
                 df,
@@ -274,7 +287,8 @@ class BaseMerger(abc.ABC):
             dfs.append(df)
             del df
-            keys.append([node.data["left_keys"], node.data["right_keys"]])
+            keys.append([step.left_keys, step.right_keys])
+            join_types.append([step.join_type, step.asof_join])
             # update alias according to the unique column name
             new_columns = []
@@ -293,31 +307,13 @@ class BaseMerger(abc.ABC):
                 "start_time and end_time can only be provided in conjunction with "
                 "a timestamp column, or when the at least one feature_set has a timestamp key"
             )
-        # convert pandas entity_rows to spark\dask DF if needed
-        if (
-            entity_rows is not None
-            and not hasattr(entity_rows, "rdd")
-            and self.engine == "spark"
-        ):
-            entity_rows = self.spark.createDataFrame(entity_rows)
-        elif (
-            entity_rows is not None
-            and not hasattr(entity_rows, "dask")
-            and self.engine == "dask"
-        ):
-            entity_rows = dd.from_pandas(
-                entity_rows, npartitions=len(entity_rows.columns)
-            )
         # join the feature data frames
         result_timestamp = self.merge(
-            entity_df=entity_rows,
-            entity_timestamp_column=entity_timestamp_column
-            if entity_rows is not None
-            else None,
+            entity_timestamp_column=entity_timestamp_column,
             featuresets=feature_sets,
             featureset_dfs=dfs,
             keys=keys,
+            join_types=join_types,
         )
         all_columns = None
@@ -386,41 +382,46 @@ class BaseMerger(abc.ABC):
     def merge(
         self,
-        entity_df,
         entity_timestamp_column: str,
         featuresets: list,
         featureset_dfs: list,
         keys: list = None,
+        join_types: list = None,
     ):
         """join the entities and feature set features into a result dataframe"""
-        merged_df = entity_df
-        if entity_df is None and featureset_dfs:
-            merged_df = featureset_dfs.pop(0)
-            featureset = featuresets.pop(0)
-            if keys is not None:
-                keys.pop(0)
-            else:
-                # keys can be multiple keys on each side of the join
-                keys = [[[], []]] * len(featureset_dfs)
-            entity_timestamp_column = (
-                entity_timestamp_column or featureset.spec.timestamp_key
-            )
-        elif entity_df is not None and featureset_dfs:
-            # when `entity_rows` passed to `get_offline_features`
-            # keys[0] mention the way that `entity_rows`  joins to the first `featureset`
-            # and it can join only by the entities of the first `featureset`
-            keys[0][0] = keys[0][1] = list(featuresets[0].spec.entities.keys())
-        for featureset, featureset_df, lr_key in zip(featuresets, featureset_dfs, keys):
-            if featureset.spec.timestamp_key and entity_timestamp_column:
+        merged_df = featureset_dfs.pop(0)
+        featureset = featuresets.pop(0)
+        keys.pop(0)
+        join_types.pop(0)
+        if not entity_timestamp_column and featureset:
+            entity_timestamp_column = featureset.spec.timestamp_key
+        for featureset, featureset_df, lr_key, join_type in zip(
+            featuresets, featureset_dfs, keys, join_types
+        ):
+            join_type, as_of = join_type
+            if (
+                featureset.spec.timestamp_key
+                and entity_timestamp_column
+                and join_type == self._default_join_type
+            ):
                 merge_func = self._asof_join
-            else:
+            elif join_type == self._default_join_type:
+                merge_func = self._join
+            elif join_type != self._default_join_type and not as_of:
+                self._join_type = join_type
                 merge_func = self._join
+            else:
+                self._join_type = join_type
+                merge_func = self._asof_join
             merged_df = merge_func(
                 merged_df,
                 entity_timestamp_column,
-                featureset,
+                featureset.metadata.name,
+                featureset.spec.timestamp_key,
                 featureset_df,
                 lr_key[0],
                 lr_key[1],
@@ -441,7 +442,8 @@ class BaseMerger(abc.ABC):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
+        featureset_name: str,
+        featureset_timstamp: str,
         featureset_df,
         left_keys: list,
         right_keys: list,
@@ -452,7 +454,8 @@ class BaseMerger(abc.ABC):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
+        featureset_name: str,
+        featureset_timestamp: str,
         featureset_df,
         left_keys: list,
         right_keys: list,
@@ -480,10 +483,42 @@ class BaseMerger(abc.ABC):
         size = CSVTarget(path=target_path).write_dataframe(self._result_df, **kw)
         return size
+    def _get_graph(
+        self, feature_set_objects, feature_set_fields, entity_rows_keys=None
+    ):
+        join_graph = self.vector.spec.join_graph
+        if not join_graph:
+            fs_link_list = self._create_linked_relation_list(
+                feature_set_objects, feature_set_fields, entity_rows_keys
+            )
+            join_graph = None
+            for i, node in enumerate(fs_link_list):
+                if node.name != self._entity_rows_node_name and join_graph is None:
+                    join_graph = JoinGraph(first_feature_set=node.name)
+                elif node.name == self._entity_rows_node_name:
+                    continue
+                else:
+                    join_graph.inner(other_operand=node.name)
+                last_step = join_graph.steps[-1]
+                last_step.join_type = self._default_join_type
+                last_step.left_keys = node.left_keys
+                last_step.right_keys = node.right_keys
+        else:
+            join_graph._init_all_join_keys(feature_set_objects, self.vector)
+        return join_graph
     class _Node:
-        def __init__(self, name: str, order: int, data=None):
+        def __init__(
+            self,
+            name: str,
+            order: int,
+            left_keys: typing.List[str] = None,
+            right_keys: typing.List[str] = None,
+        ):
             self.name = name
-            self.data = data
+            self.left_keys = left_keys if left_keys is not None else []
+            self.right_keys = right_keys if right_keys is not None else []
             # order of this feature_set in the original list
             self.order = order
             self.next = None
@@ -495,7 +530,9 @@ class BaseMerger(abc.ABC):
             return self.name == other.name
         def __copy__(self):
-            return BaseMerger._Node(self.name, self.order, self.data.copy())
+            return BaseMerger._Node(
+                self.name, self.order, self.left_keys, self.right_keys
+            )
     class _LinkedList:
         def __init__(self, head=None):
@@ -565,9 +602,6 @@ class BaseMerger(abc.ABC):
             node = self.find_node(other_head.name)
             if node is None:
                 return
-            for col in other_head.data["save_cols"]:
-                if col not in node.data["save_cols"]:
-                    node.data["save_cols"].append(col)
             for other_node in other_iter:
                 if self.find_node(other_node.name) is None:
                     while node is not None and other_node.order > node.order:
@@ -587,24 +621,24 @@ class BaseMerger(abc.ABC):
                 head=BaseMerger._Node(
                     name=feature_set_names[0],
                     order=0,
-                    data={
-                        "left_keys": [],
-                        "right_keys": [],
-                        "save_cols": [],
-                        "save_index": [],
-                    },
                 )
             )
         relation_linked_lists = []
         feature_set_entity_list_dict = {
             name: feature_set_objects[name].spec.entities for name in feature_set_names
         }
-        entity_relation_val_list = {
-            name: list(feature_set_objects[name].spec.relations.values())
+        relation_val_list = {
+            name: list(
+                self.vector.get_feature_set_relations(
+                    feature_set_objects[name]
+                ).values()
+            )
             for name in feature_set_names
         }
-        entity_relation_key_list = {
-            name: list(feature_set_objects[name].spec.relations.keys())
+        relation_key_list = {
+            name: list(
+                self.vector.get_feature_set_relations(feature_set_objects[name]).keys()
+            )
             for name in feature_set_names
         }
@@ -612,12 +646,6 @@ class BaseMerger(abc.ABC):
             relations = BaseMerger._LinkedList()
             main_node = BaseMerger._Node(
                 name,
-                data={
-                    "left_keys": [],
-                    "right_keys": [],
-                    "save_cols": [],
-                    "save_index": [],
-                },
                 order=order,
             )
             relations.add_first(main_node)
@@ -629,8 +657,8 @@ class BaseMerger(abc.ABC):
             name_head = linked_list_relation.head.name
             feature_set_in_entity_list = feature_set_entity_list_dict[fs_name_in]
             feature_set_in_entity_list_names = list(feature_set_in_entity_list.keys())
-            entity_relation_list = entity_relation_val_list[name_head]
-            col_relation_list = entity_relation_key_list[name_head]
+            entity_relation_list = relation_val_list[name_head]
+            col_relation_list = relation_key_list[name_head]
             curr_col_relation_list = list(
                 map(
                     lambda ent: (
@@ -649,18 +677,11 @@ class BaseMerger(abc.ABC):
                 linked_list_relation.add_last(
                     BaseMerger._Node(
                         fs_name_in,
-                        data={
-                            "left_keys": curr_col_relation_list,
-                            "right_keys": feature_set_in_entity_list_names,
-                            "save_cols": [],
-                            "save_index": [],
-                        },
+                        left_keys=curr_col_relation_list,
+                        right_keys=feature_set_in_entity_list_names,
                         order=name_in_order,
                     )
                 )
-                linked_list_relation.head.data["save_cols"].extend(
-                    curr_col_relation_list
-                )
             elif name_in_order > head_order and sorted(
                 feature_set_in_entity_list_names
             ) == sorted(feature_set_entity_list_dict[name_head].keys()):
@@ -669,16 +690,11 @@ class BaseMerger(abc.ABC):
                 linked_list_relation.add_last(
                     BaseMerger._Node(
                         fs_name_in,
-                        data={
-                            "left_keys": keys,
-                            "right_keys": keys,
-                            "save_cols": [],
-                            "save_index": keys,
-                        },
+                        left_keys=keys,
+                        right_keys=keys,
                         order=name_in_order,
                     )
                 )
-                linked_list_relation.head.data["save_index"] = keys
             return linked_list_relation
         def _build_entity_rows_relation(entity_rows_relation, fs_name, fs_order):
@@ -692,16 +708,11 @@ class BaseMerger(abc.ABC):
                 entity_rows_relation.add_last(
                     BaseMerger._Node(
                         fs_name,
-                        data={
-                            "left_keys": keys,
-                            "right_keys": keys,
-                            "save_cols": [],
-                            "save_index": keys,
-                        },
+                        left_keys=keys,
+                        right_keys=keys,
                         order=fs_order,
                     )
                 )
-                entity_rows_relation.head.data["save_index"] = keys
         if entity_rows_keys is not None:
             entity_rows_linked_relation = _create_relation(
@@ -805,3 +816,6 @@ class BaseMerger(abc.ABC):
         :param order_by_active: list of names to sort by.
         """
         raise NotImplementedError
+    def _convert_entity_rows_to_engine_df(self, entity_rows):
+        raise NotImplementedError

mlrun/feature_store/retrieval/dask_merger.py CHANGED Viewed

@@ -41,8 +41,9 @@ class DaskFeatureMerger(BaseMerger):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
-        featureset_df,
+        featureset_name: str,
+        featureset_timestamp: str,
+        featureset_df: list,
         left_keys: list,
         right_keys: list,
     ):
@@ -53,20 +54,20 @@ class DaskFeatureMerger(BaseMerger):
             sort_partition, timestamp=entity_timestamp_column
         )
         featureset_df = featureset_df.map_partitions(
-            sort_partition, timestamp=featureset.spec.timestamp_key
+            sort_partition, timestamp=featureset_timestamp
         )
         merged_df = merge_asof(
             entity_df,
             featureset_df,
             left_on=entity_timestamp_column,
-            right_on=featureset.spec.timestamp_key,
+            right_on=featureset_timestamp,
             left_by=left_keys or None,
             right_by=right_keys or None,
-            suffixes=("", f"_{featureset.metadata.name}_"),
+            suffixes=("", f"_{featureset_name}_"),
         )
         for col in merged_df.columns:
-            if re.findall(f"_{featureset.metadata.name}_$", col):
+            if re.findall(f"_{featureset_name}_$", col):
                 self._append_drop_column(col)
         return merged_df
@@ -75,23 +76,23 @@ class DaskFeatureMerger(BaseMerger):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
+        featureset_name,
+        featureset_timestamp,
         featureset_df,
         left_keys: list,
         right_keys: list,
     ):
-        fs_name = featureset.metadata.name
         merged_df = merge(
             entity_df,
             featureset_df,
             how=self._join_type,
             left_on=left_keys,
             right_on=right_keys,
-            suffixes=("", f"_{fs_name}_"),
+            suffixes=("", f"_{featureset_name}_"),
         )
         for col in merged_df.columns:
-            if re.findall(f"_{fs_name}_$", col):
+            if re.findall(f"_{featureset_name}_$", col):
                 self._append_drop_column(col)
         return merged_df
@@ -155,3 +156,9 @@ class DaskFeatureMerger(BaseMerger):
     def _order_by(self, order_by_active):
         self._result_df.sort_values(by=order_by_active)
+    def _convert_entity_rows_to_engine_df(self, entity_rows):
+        if entity_rows is not None and not hasattr(entity_rows, "dask"):
+            return dd.from_pandas(entity_rows, npartitions=len(entity_rows.columns))
+        return entity_rows

mlrun/feature_store/retrieval/job.py CHANGED Viewed

@@ -62,9 +62,12 @@ def run_merge_job(
     function = run_config.to_function(kind, merger.get_default_image(kind))
     # Avoid overriding a handler that was provided by the user
-    # The user shouldn't have to provide a handler, but we leave this option open just in case
     if not run_config.handler:
         function.with_code(body=default_code)
+    else:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            "get_offline_features does not support run_config with a handler"
+        )
     function.metadata.project = vector.metadata.project
     function.metadata.name = function.metadata.name or name

mlrun/feature_store/retrieval/local_merger.py CHANGED Viewed

@@ -30,45 +30,42 @@ class LocalFeatureMerger(BaseMerger):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
-        featureset_df,
+        featureset_name,
+        featureset_timstamp,
+        featureset_df: list,
         left_keys: list,
         right_keys: list,
     ):
-        indexes = None
-        if not right_keys:
-            indexes = list(featureset.spec.entities.keys())
         index_col_not_in_entity = "index" not in entity_df.columns
         index_col_not_in_featureset = "index" not in featureset_df.columns
         entity_df[entity_timestamp_column] = pd.to_datetime(
             entity_df[entity_timestamp_column]
         )
-        featureset_df[featureset.spec.timestamp_key] = pd.to_datetime(
-            featureset_df[featureset.spec.timestamp_key]
+        featureset_df[featureset_timstamp] = pd.to_datetime(
+            featureset_df[featureset_timstamp]
         )
         entity_df.sort_values(by=entity_timestamp_column, inplace=True)
-        featureset_df.sort_values(by=featureset.spec.timestamp_key, inplace=True)
+        featureset_df.sort_values(by=featureset_timstamp, inplace=True)
         merged_df = pd.merge_asof(
             entity_df,
             featureset_df,
             left_on=entity_timestamp_column,
-            right_on=featureset.spec.timestamp_key,
-            by=indexes,
+            right_on=featureset_timstamp,
             left_by=left_keys or None,
             right_by=right_keys or None,
-            suffixes=("", f"_{featureset.metadata.name}_"),
+            suffixes=("", f"_{featureset_name}_"),
         )
         for col in merged_df.columns:
-            if re.findall(f"_{featureset.metadata.name}_$", col):
+            if re.findall(f"_{featureset_name}_$", col):
                 self._append_drop_column(col)
         # Undo indexing tricks for asof merge
         # to return the correct indexes and not
         # overload `index` columns
         if (
-            indexes
-            and "index" not in indexes
+            "index" not in left_keys
+            and "index" not in right_keys
             and index_col_not_in_entity
             and index_col_not_in_featureset
             and "index" in merged_df.columns
@@ -80,22 +77,22 @@ class LocalFeatureMerger(BaseMerger):
         self,
         entity_df,
         entity_timestamp_column: str,
-        featureset,
+        featureset_name,
+        featureset_timestamp,
         featureset_df,
         left_keys: list,
         right_keys: list,
     ):
-        fs_name = featureset.metadata.name
         merged_df = pd.merge(
             entity_df,
             featureset_df,
             how=self._join_type,
             left_on=left_keys,
             right_on=right_keys,
-            suffixes=("", f"_{fs_name}_"),
+            suffixes=("", f"_{featureset_name}_"),
         )
         for col in merged_df.columns:
-            if re.findall(f"_{fs_name}_$", col):
+            if re.findall(f"_{featureset_name}_$", col):
                 self._append_drop_column(col)
         return merged_df
@@ -135,3 +132,6 @@ class LocalFeatureMerger(BaseMerger):
     def _order_by(self, order_by_active):
         self._result_df.sort_values(by=order_by_active, ignore_index=True, inplace=True)
+    def _convert_entity_rows_to_engine_df(self, entity_rows):
+        return entity_rows

mlrun 1.4.0rc25__py3-none-any.whl → 1.5.0rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.4.0rc25py3-none-any.whl → 1.5.0rc2py3-none-any.whl