PyPI - mlrun - Versions diffs - 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl - Mend

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (93) hide show

mlrun/api/api/deps.py +14 -1
mlrun/api/api/endpoints/frontend_spec.py +0 -2
mlrun/api/api/endpoints/functions.py +15 -27
mlrun/api/api/endpoints/grafana_proxy.py +435 -74
mlrun/api/api/endpoints/healthz.py +5 -18
mlrun/api/api/endpoints/model_endpoints.py +33 -37
mlrun/api/api/utils.py +6 -13
mlrun/api/crud/__init__.py +14 -16
mlrun/api/crud/logs.py +5 -7
mlrun/api/crud/model_monitoring/__init__.py +2 -2
mlrun/api/crud/model_monitoring/model_endpoint_store.py +847 -0
mlrun/api/crud/model_monitoring/model_endpoints.py +105 -328
mlrun/api/crud/pipelines.py +2 -3
mlrun/api/db/sqldb/models/models_mysql.py +52 -19
mlrun/api/db/sqldb/models/models_sqlite.py +52 -19
mlrun/api/db/sqldb/session.py +19 -26
mlrun/api/schemas/__init__.py +2 -0
mlrun/api/schemas/constants.py +0 -13
mlrun/api/schemas/frontend_spec.py +0 -1
mlrun/api/schemas/model_endpoints.py +38 -195
mlrun/api/schemas/schedule.py +2 -2
mlrun/api/utils/clients/log_collector.py +5 -0
mlrun/builder.py +9 -41
mlrun/config.py +1 -76
mlrun/data_types/__init__.py +1 -6
mlrun/data_types/data_types.py +1 -3
mlrun/datastore/__init__.py +2 -9
mlrun/datastore/sources.py +20 -25
mlrun/datastore/store_resources.py +1 -1
mlrun/datastore/targets.py +34 -67
mlrun/datastore/utils.py +4 -26
mlrun/db/base.py +2 -4
mlrun/db/filedb.py +5 -13
mlrun/db/httpdb.py +32 -64
mlrun/db/sqldb.py +2 -4
mlrun/errors.py +0 -5
mlrun/execution.py +0 -2
mlrun/feature_store/api.py +8 -24
mlrun/feature_store/feature_set.py +6 -28
mlrun/feature_store/feature_vector.py +0 -2
mlrun/feature_store/ingestion.py +11 -8
mlrun/feature_store/retrieval/base.py +43 -271
mlrun/feature_store/retrieval/dask_merger.py +153 -55
mlrun/feature_store/retrieval/job.py +3 -12
mlrun/feature_store/retrieval/local_merger.py +130 -48
mlrun/feature_store/retrieval/spark_merger.py +125 -126
mlrun/features.py +2 -7
mlrun/model_monitoring/constants.py +6 -48
mlrun/model_monitoring/helpers.py +35 -118
mlrun/model_monitoring/model_monitoring_batch.py +260 -293
mlrun/model_monitoring/stream_processing_fs.py +253 -220
mlrun/platforms/iguazio.py +0 -33
mlrun/projects/project.py +72 -34
mlrun/runtimes/base.py +0 -5
mlrun/runtimes/daskjob.py +0 -2
mlrun/runtimes/function.py +3 -29
mlrun/runtimes/kubejob.py +15 -39
mlrun/runtimes/local.py +45 -7
mlrun/runtimes/mpijob/abstract.py +0 -2
mlrun/runtimes/mpijob/v1.py +0 -2
mlrun/runtimes/pod.py +0 -2
mlrun/runtimes/remotesparkjob.py +0 -2
mlrun/runtimes/serving.py +0 -6
mlrun/runtimes/sparkjob/abstract.py +2 -39
mlrun/runtimes/sparkjob/spark3job.py +0 -2
mlrun/serving/__init__.py +1 -2
mlrun/serving/routers.py +35 -35
mlrun/serving/server.py +12 -22
mlrun/serving/states.py +30 -162
mlrun/serving/v2_serving.py +10 -13
mlrun/utils/clones.py +1 -1
mlrun/utils/model_monitoring.py +96 -122
mlrun/utils/version/version.json +2 -2
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/METADATA +27 -23
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/RECORD +79 -92
mlrun/api/crud/model_monitoring/grafana.py +0 -427
mlrun/datastore/spark_udf.py +0 -40
mlrun/model_monitoring/__init__.py +0 -44
mlrun/model_monitoring/common.py +0 -112
mlrun/model_monitoring/model_endpoint.py +0 -141
mlrun/model_monitoring/stores/__init__.py +0 -106
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -448
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -23
mlrun/model_monitoring/stores/models/base.py +0 -18
mlrun/model_monitoring/stores/models/mysql.py +0 -100
mlrun/model_monitoring/stores/models/sqlite.py +0 -98
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -375
mlrun/utils/db.py +0 -52
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/LICENSE +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/WHEEL +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.3.2rc1.dist-info → mlrun-1.3.2rc2.dist-info}/top_level.txt +0 -0

mlrun/feature_store/api.py CHANGED Viewed

@@ -28,6 +28,7 @@ from ..datastore.store_resources import parse_store_uri
 from ..datastore.targets import (
     BaseStoreTarget,
     get_default_prefix_for_source,
+    get_default_targets,
     get_target_driver,
     kind_to_driver,
     validate_target_list,
@@ -102,7 +103,6 @@ def get_offline_features(
     engine_args: dict = None,
     query: str = None,
     join_type: str = "inner",
-    order_by: Union[str, List[str]] = None,
     spark_service: str = None,
 ) -> OfflineVectorResponse:
     """retrieve offline feature vector results
@@ -161,8 +161,6 @@ def get_offline_features(
                                     * right: use only keys from right frame (SQL: right outer join)
                                     * outer: use union of keys from both frames (SQL: full outer join)
                                     * inner: use intersection of keys from both frames (SQL: inner join).
-    :param order_by:        Name or list of names to order by. The name or the names in the list can be the feature name
-                            or the alias of the feature you pass in the feature list.
     """
     if isinstance(feature_vector, FeatureVector):
         update_stats = True
@@ -192,7 +190,6 @@ def get_offline_features(
             with_indexes=with_indexes,
             query=query,
             join_type=join_type,
-            order_by=order_by,
         )
     start_time = str_to_timestamp(start_time)
@@ -216,7 +213,6 @@ def get_offline_features(
         update_stats=update_stats,
         query=query,
         join_type=join_type,
-        order_by=order_by,
     )
@@ -409,15 +405,6 @@ def ingest(
         raise mlrun.errors.MLRunInvalidArgumentError(
             "feature set and source must be specified"
         )
-    if (
-        not mlrun_context
-        and not targets
-        and not (featureset.spec.targets or featureset.spec.with_default_targets)
-        and (run_config is not None and not run_config.local)
-    ):
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            f"Feature set {featureset.metadata.name} is remote ingested with no targets defined, aborting"
-        )
     if featureset is not None:
         featureset.validate_steps(namespace=namespace)
@@ -490,11 +477,10 @@ def ingest(
                 f"Source.end_time is {str(source.end_time)}"
             )
-        if mlrun_context:
-            mlrun_context.logger.info(
-                f"starting ingestion task to {featureset.uri}.{filter_time_string}"
-            )
+    if mlrun_context:
+        mlrun_context.logger.info(
+            f"starting ingestion task to {featureset.uri}.{filter_time_string}"
+        )
         return_df = False
     if featureset.spec.passthrough:
@@ -503,7 +489,7 @@ def ingest(
     namespace = namespace or get_caller_globals()
-    targets_to_ingest = targets or featureset.spec.targets
+    targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
     targets_to_ingest = copy.deepcopy(targets_to_ingest)
     validate_target_paths_for_engine(targets_to_ingest, featureset.spec.engine, source)
@@ -700,9 +686,7 @@ def preview(
             )
         # reduce the size of the ingestion if we do not infer stats
         rows_limit = (
-            None
-            if InferOptions.get_common_options(options, InferOptions.Stats)
-            else 1000
+            0 if InferOptions.get_common_options(options, InferOptions.Stats) else 1000
         )
         source = init_featureset_graph(
             source,
@@ -786,7 +770,7 @@ def deploy_ingestion_service(
             name=featureset.metadata.name,
         )
-    targets_to_ingest = targets or featureset.spec.targets
+    targets_to_ingest = targets or featureset.spec.targets or get_default_targets()
     targets_to_ingest = copy.deepcopy(targets_to_ingest)
     featureset.update_targets_for_ingest(targets_to_ingest)

mlrun/feature_store/feature_set.py CHANGED Viewed

@@ -118,7 +118,7 @@ class FeatureSetSpec(ModelObj):
         self.owner = owner
         self.description = description
         self.entities: List[Union[Entity, str]] = entities or []
-        self.relations: Dict[str, Union[Entity, str]] = relations or {}
+        self.relations: Dict[str, Entity] = relations or {}
         self.features: List[Feature] = features or []
         self.partition_keys = partition_keys or []
         self.timestamp_key = timestamp_key
@@ -131,7 +131,6 @@ class FeatureSetSpec(ModelObj):
         self.engine = engine
         self.output_path = output_path or mlconf.artifact_path
         self.passthrough = passthrough
-        self.with_default_targets = True
     @property
     def entities(self) -> List[Entity]:
@@ -233,9 +232,6 @@ class FeatureSetSpec(ModelObj):
     @relations.setter
     def relations(self, relations: Dict[str, Entity]):
-        for col, ent in relations.items():
-            if isinstance(ent, str):
-                relations[col] = Entity(ent)
         self._relations = ObjectDict.from_dict({"entity": Entity}, relations, "entity")
     def require_processing(self):
@@ -328,7 +324,7 @@ class FeatureSet(ModelObj):
         timestamp_key: str = None,
         engine: str = None,
         label_column: str = None,
-        relations: Dict[str, Union[Entity, str]] = None,
+        relations: Dict[str, Entity] = None,
         passthrough: bool = None,
     ):
         """Feature set object, defines a set of features and their data pipeline
@@ -376,7 +372,6 @@ class FeatureSet(ModelObj):
         self.status = None
         self._last_state = ""
         self._aggregations = {}
-        self.set_targets()
     @property
     def spec(self) -> FeatureSetSpec:
@@ -475,25 +470,10 @@ class FeatureSet(ModelObj):
             )
         targets = targets or []
         if with_defaults:
-            self.spec.with_default_targets = True
             targets.extend(get_default_targets())
-        else:
-            self.spec.with_default_targets = False
-        self.spec.targets = []
-        self.__set_targets_add_targets_helper(targets)
-        if default_final_step:
-            self.spec.graph.final_step = default_final_step
-    def __set_targets_add_targets_helper(self, targets):
-        """
-        Add the desired target list
-        :param targets: list of target type names ('csv', 'nosql', ..) or target objects
-                         CSVTarget(), ParquetTarget(), NoSqlTarget(), StreamTarget(), ..
-        """
         validate_target_list(targets=targets)
         for target in targets:
             kind = target.kind if hasattr(target, "kind") else target
             if kind not in TargetTypes.all():
@@ -505,6 +485,8 @@ class FeatureSet(ModelObj):
                     target, name=str(target), partitioned=(target == "parquet")
                 )
             self.spec.targets.update(target)
+        if default_final_step:
+            self.spec.graph.final_step = default_final_step
     def validate_steps(self, namespace):
         if not self.spec:
@@ -941,11 +923,7 @@ class FeatureSet(ModelObj):
                 raise mlrun.errors.MLRunNotFoundError(
                     "passthrough feature set {self.metadata.name} with no source"
                 )
-            df = self.spec.source.to_dataframe()
-            # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
-            if not isinstance(df, pd.DataFrame):
-                df = pd.concat(df)
-            return df
+            return self.spec.source.to_dataframe()
         target = get_offline_target(self, name=target_name)
         if not target:

mlrun/feature_store/feature_vector.py CHANGED Viewed

@@ -520,8 +520,6 @@ class OnlineVectorService:
                     v = data[name]
                     if v is None or (type(v) == float and (np.isinf(v) or np.isnan(v))):
                         data[name] = self._impute_values.get(name, v)
-            for name in list(self.vector.spec.entity_fields.keys()):
-                data.pop(name, None)
             if as_list and data:
                 data = [

mlrun/feature_store/ingestion.py CHANGED Viewed

@@ -89,7 +89,7 @@ def init_featureset_graph(
     key_fields = entity_columns if entity_columns else None
     sizes = [0] * len(targets)
-    result_dfs = []
+    data_result = None
     total_rows = 0
     targets = [get_target_driver(target, featureset) for target in targets]
     if featureset.spec.passthrough:
@@ -100,11 +100,11 @@ def init_featureset_graph(
             # set the entities to be the indexes of the df
             event.body = entities_to_index(featureset, event.body)
-        df = server.run(event, get_body=True)
-        if df is not None:
+        data = server.run(event, get_body=True)
+        if data is not None:
             for i, target in enumerate(targets):
                 size = target.write_dataframe(
-                    df,
+                    data,
                     key_column=key_fields,
                     timestamp_key=featureset.spec.timestamp_key,
                     chunk_id=chunk_id,
@@ -112,18 +112,21 @@ def init_featureset_graph(
                 if size:
                     sizes[i] += size
         chunk_id += 1
-        result_dfs.append(df)
-        total_rows += df.shape[0]
+        if data_result is None:
+            # in case of multiple chunks only return the first chunk (last may be too small)
+            data_result = data
+        total_rows += data.shape[0]
         if rows_limit and total_rows >= rows_limit:
             break
+    # todo: fire termination event if iterator
     for i, target in enumerate(targets):
         target_status = target.update_resource_status("ready", size=sizes[i])
         if verbose:
             logger.info(f"wrote target: {target_status}")
-    result_df = pd.concat(result_dfs)
-    return result_df.head(rows_limit)
+    return data_result
 def featureset_initializer(server):

mlrun 1.3.2rc1__py3-none-any.whl → 1.3.2rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.3.2rc1py3-none-any.whl → 1.3.2rc2py3-none-any.whl