PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/METADATA +81 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/feature_store/api.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import copy
 import importlib.util
 import pathlib
@@ -19,7 +20,6 @@ from datetime import datetime
 from typing import Any, Optional, Union
 import pandas as pd
-from deprecated import deprecated
 import mlrun
 import mlrun.errors
@@ -47,362 +47,20 @@ from .common import (
     get_feature_vector_by_uri,
     verify_feature_set_exists,
     verify_feature_set_permissions,
-    verify_feature_vector_permissions,
 )
 from .feature_set import FeatureSet
-from .feature_vector import (
-    FeatureVector,
-    FixedWindowType,
-    OfflineVectorResponse,
-    OnlineVectorService,
-)
 from .ingestion import (
     context_to_ingestion_params,
     init_featureset_graph,
     run_ingestion_job,
     run_spark_graph,
 )
-from .retrieval import RemoteVectorResponse, get_merger, run_merge_job
 _v3iofs = None
 spark_transform_handler = "transform"
 _TRANS_TABLE = str.maketrans({" ": "_", "(": "", ")": ""})
-def _features_to_vector_and_check_permissions(features, update_stats):
-    if isinstance(features, str):
-        vector = get_feature_vector_by_uri(features, update=update_stats)
-    elif isinstance(features, FeatureVector):
-        vector = features
-        if not vector.metadata.name:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "feature vector name must be specified"
-            )
-        verify_feature_vector_permissions(
-            vector, mlrun.common.schemas.AuthorizationAction.update
-        )
-        vector.save()
-    else:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            f"illegal features value/type ({type(features)})"
-        )
-    return vector
-@deprecated(
-    version="1.6.0",
-    reason="get_offline_features() will be removed in 1.8.0, please instead use "
-    "get_feature_vector('store://feature_vector_name').get_offline_features()",
-    category=FutureWarning,
-)
-def get_offline_features(
-    feature_vector: Union[str, FeatureVector],
-    entity_rows=None,
-    entity_timestamp_column: str = None,
-    target: DataTargetBase = None,
-    run_config: RunConfig = None,
-    drop_columns: list[str] = None,
-    start_time: Union[str, datetime] = None,
-    end_time: Union[str, datetime] = None,
-    with_indexes: bool = False,
-    update_stats: bool = False,
-    engine: str = None,
-    engine_args: dict = None,
-    query: str = None,
-    order_by: Union[str, list[str]] = None,
-    spark_service: str = None,
-    timestamp_for_filtering: Union[str, dict[str, str]] = None,
-    additional_filters: list = None,
-):
-    """retrieve offline feature vector results
-    specify a feature vector object/uri and retrieve the desired features, their metadata
-    and statistics. returns :py:class:`~mlrun.feature_store.OfflineVectorResponse`,
-    results can be returned as a dataframe or written to a target
-    The start_time and end_time attributes allow filtering the data to a given time range, they accept
-    string values or pandas `Timestamp` objects, string values can also be relative, for example:
-    "now", "now - 1d2h", "now+5m", where a valid pandas Timedelta string follows the verb "now",
-    for time alignment you can use the verb "floor" e.g. "now -1d floor 1H" will align the time to the last hour
-    (the floor string is passed to pandas.Timestamp.floor(), can use D, H, T, S for day, hour, min, sec alignment).
-    Another option to filter the data is by the `query` argument - can be seen in the example.
-    example::
-        features = [
-            "stock-quotes.bid",
-            "stock-quotes.asks_sum_5h",
-            "stock-quotes.ask as mycol",
-            "stocks.*",
-        ]
-        vector = FeatureVector(features=features)
-        resp = get_offline_features(
-            vector,
-            entity_rows=trades,
-            entity_timestamp_column="time",
-            query="ticker in ['GOOG'] and bid>100",
-        )
-        print(resp.to_dataframe())
-        print(vector.get_stats_table())
-        resp.to_parquet("./out.parquet")
-    :param feature_vector:          feature vector uri or FeatureVector object. passing feature vector obj requires
-                                    update permissions
-    :param entity_rows:             dataframe with entity rows to join with
-    :param target:                  where to write the results to
-    :param drop_columns:            list of columns to drop from the final result
-    :param entity_timestamp_column: timestamp column name in the entity rows dataframe. can be specified
-                                    only if param entity_rows was specified.
-    :param run_config:              function and/or run configuration
-                                    see :py:class:`~mlrun.feature_store.RunConfig`
-    :param start_time:              datetime, low limit of time needed to be filtered. Optional.
-    :param end_time:                datetime, high limit of time needed to be filtered. Optional.
-    :param with_indexes:            Return vector with/without the entities and the timestamp_key of the feature sets
-                                    and with/without entity_timestamp_column and timestamp_for_filtering columns.
-                                    This property can be specified also in the feature vector spec
-                                    (feature_vector.spec.with_indexes)
-                                    (default False)
-    :param update_stats:            update features statistics from the requested feature sets on the vector.
-                                    (default False).
-    :param engine:                  processing engine kind ("local", "dask", or "spark")
-    :param engine_args:             kwargs for the processing engine
-    :param query:                   The query string used to filter rows on the output
-    :param spark_service:           Name of the spark service to be used (when using a remote-spark runtime)
-    :param order_by:                Name or list of names to order by. The name or the names in the list can be the
-                                    feature name or the alias of the feature you pass in the feature list.
-    :param timestamp_for_filtering: name of the column to filter by, can be str for all the feature sets or a
-                                    dictionary ({<feature set name>: <timestamp column name>, ...})
-                                    that indicates the timestamp column name for each feature set. Optional.
-                                    By default, the filter executes on the timestamp_key of each feature set.
-                                    Note: the time filtering is performed on each feature set before the
-                                    merge process using start_time and end_time params.
-    :param additional_filters: List of additional_filter conditions as tuples.
-                                Each tuple should be in the format (column_name, operator, value).
-                                Supported operators: "=", ">=", "<=", ">", "<".
-                                Example: [("Product", "=", "Computer")]
-                                For all supported filters, please see:
-                                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
-    """
-    return _get_offline_features(
-        feature_vector,
-        entity_rows,
-        entity_timestamp_column,
-        target,
-        run_config,
-        drop_columns,
-        start_time,
-        end_time,
-        with_indexes,
-        update_stats,
-        engine,
-        engine_args,
-        query,
-        order_by,
-        spark_service,
-        timestamp_for_filtering,
-        additional_filters,
-    )
-def _get_offline_features(
-    feature_vector: Union[str, FeatureVector],
-    entity_rows=None,
-    entity_timestamp_column: str = None,
-    target: DataTargetBase = None,
-    run_config: RunConfig = None,
-    drop_columns: list[str] = None,
-    start_time: Union[str, datetime] = None,
-    end_time: Union[str, datetime] = None,
-    with_indexes: bool = False,
-    update_stats: bool = False,
-    engine: str = None,
-    engine_args: dict = None,
-    query: str = None,
-    order_by: Union[str, list[str]] = None,
-    spark_service: str = None,
-    timestamp_for_filtering: Union[str, dict[str, str]] = None,
-    additional_filters=None,
-) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
-    if entity_rows is None and entity_timestamp_column is not None:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            "entity_timestamp_column param "
-            "can not be specified without entity_rows param"
-        )
-    if isinstance(target, BaseStoreTarget) and not target.support_pandas:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            f"get_offline_features does not support targets that do not support pandas engine."
-            f" Target kind: {target.kind}"
-        )
-    if isinstance(feature_vector, FeatureVector):
-        update_stats = True
-    feature_vector = _features_to_vector_and_check_permissions(
-        feature_vector, update_stats
-    )
-    entity_timestamp_column = (
-        entity_timestamp_column or feature_vector.spec.timestamp_field
-    )
-    merger_engine = get_merger(engine)
-    if run_config and not run_config.local:
-        return run_merge_job(
-            feature_vector,
-            target,
-            merger_engine,
-            engine,
-            engine_args,
-            spark_service,
-            entity_rows,
-            entity_timestamp_column=entity_timestamp_column,
-            run_config=run_config,
-            drop_columns=drop_columns,
-            with_indexes=with_indexes,
-            query=query,
-            order_by=order_by,
-            start_time=start_time,
-            end_time=end_time,
-            timestamp_for_filtering=timestamp_for_filtering,
-            additional_filters=additional_filters,
-        )
-    merger = merger_engine(feature_vector, **(engine_args or {}))
-    return merger.start(
-        entity_rows,
-        entity_timestamp_column,
-        target=target,
-        drop_columns=drop_columns,
-        start_time=start_time,
-        end_time=end_time,
-        timestamp_for_filtering=timestamp_for_filtering,
-        with_indexes=with_indexes,
-        update_stats=update_stats,
-        query=query,
-        order_by=order_by,
-        additional_filters=additional_filters,
-    )
-@deprecated(
-    version="1.6.0",
-    reason="get_online_feature_service() will be removed in 1.8.0, please instead use "
-    "get_feature_vector('store://feature_vector_name').get_online_feature_service()",
-    category=FutureWarning,
-)
-def get_online_feature_service(
-    feature_vector: Union[str, FeatureVector],
-    run_config: RunConfig = None,
-    fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
-    impute_policy: dict = None,
-    update_stats: bool = False,
-    entity_keys: list[str] = None,
-):
-    """initialize and return online feature vector service api,
-    returns :py:class:`~mlrun.feature_store.OnlineVectorService`
-    :**usage**:
-        There are two ways to use the function:
-        1. As context manager
-            Example::
-                with get_online_feature_service(vector_uri) as svc:
-                    resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
-                    print(resp)
-                    resp = svc.get([{"ticker": "AAPL"}], as_list=True)
-                    print(resp)
-            Example with imputing::
-                with get_online_feature_service(vector_uri, entity_keys=['id'],
-                                                impute_policy={"*": "$mean", "amount": 0)) as svc:
-                    resp = svc.get([{"id": "C123487"}])
-        2. as simple function, note that in that option you need to close the session.
-            Example::
-                svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
-                try:
-                    resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
-                    print(resp)
-                    resp = svc.get([{"ticker": "AAPL"}], as_list=True)
-                    print(resp)
-                finally:
-                    svc.close()
-            Example with imputing::
-                svc = get_online_feature_service(vector_uri, entity_keys=['id'],
-                                                 impute_policy={"*": "$mean", "amount": 0))
-                try:
-                    resp = svc.get([{"id": "C123487"}])
-                except Exception as e:
-                    handling exception...
-                finally:
-                    svc.close()
-    :param feature_vector:      feature vector uri or FeatureVector object. passing feature vector obj requires update
-                                permissions.
-    :param run_config:          function and/or run configuration for remote jobs/services
-    :param impute_policy:       a dict with `impute_policy` per feature, the dict key is the feature name and the dict
-                                value indicate which value will be used in case the feature is NaN/empty, the replaced
-                                value can be fixed number for constants or $mean, $max, $min, $std, $count
-                                for statistical
-                                values. "*" is used to specify the default for all features, example: `{"*": "$mean"}`
-    :param fixed_window_type:   determines how to query the fixed window values which were previously inserted by ingest
-    :param update_stats:        update features statistics from the requested feature sets on the vector.
-                                Default: False.
-    :param entity_keys:         Entity list of the first feature_set in the vector.
-                                The indexes that are used to query the online service.
-    :return:                    Initialize the `OnlineVectorService`.
-                                Will be used in subclasses where `support_online=True`.
-    """
-    return _get_online_feature_service(
-        feature_vector,
-        run_config,
-        fixed_window_type,
-        impute_policy,
-        update_stats,
-        entity_keys,
-    )
-def _get_online_feature_service(
-    feature_vector: Union[str, FeatureVector],
-    run_config: RunConfig = None,
-    fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
-    impute_policy: dict = None,
-    update_stats: bool = False,
-    entity_keys: list[str] = None,
-) -> OnlineVectorService:
-    if isinstance(feature_vector, FeatureVector):
-        update_stats = True
-    feature_vector = _features_to_vector_and_check_permissions(
-        feature_vector, update_stats
-    )
-    # Impute policies rely on statistics in many cases, so verifying that the fvec has stats in it
-    if impute_policy and not feature_vector.status.stats:
-        update_stats = True
-    engine_args = {"impute_policy": impute_policy}
-    merger_engine = get_merger("storey")
-    # todo: support remote service (using remote nuclio/mlrun function if run_config)
-    merger = merger_engine(feature_vector, **engine_args)
-    return merger.init_online_vector_service(
-        entity_keys, fixed_window_type, update_stats=update_stats
-    )
 def norm_column_name(name: str) -> str:
     """
     Remove parentheses () and replace whitespaces with an underscore _.
@@ -448,14 +106,14 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
 def ingest(
+    mlrun_context: Union["mlrun.MLrunProject", "mlrun.MLClientCtx"],
     featureset: Union[FeatureSet, str] = None,
     source=None,
-    targets: list[DataTargetBase] = None,
+    targets: Optional[list[DataTargetBase]] = None,
     namespace=None,
     return_df: bool = True,
     infer_options: InferOptions = InferOptions.default(),
     run_config: RunConfig = None,
-    mlrun_context=None,
     spark_context=None,
     overwrite=None,
 ) -> Optional[pd.DataFrame]:
@@ -484,6 +142,7 @@ def ingest(
         targets = [CSVTarget("mycsv", path="./mycsv.csv")]
         ingest(measurements, source, targets)
+    :param mlrun_context: mlrun context
     :param featureset:    feature set object or featureset.uri. (uri must be of a feature set that is in the DB,
                           call `.save()` if it's not)
     :param source:        source dataframe or other sources (e.g. parquet source see:
@@ -496,7 +155,6 @@ def ingest(
                           histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
     :param run_config:    function and/or run configuration for remote jobs,
                           see :py:class:`~mlrun.feature_store.RunConfig`
-    :param mlrun_context: mlrun context (when running as a job), for internal use !
     :param spark_context: local spark session for spark ingestion, example for creating the spark context:
                           `spark = SparkSession.builder.appName("Spark function").getOrCreate()`
                           For remote spark ingestion, this should contain the remote spark service name
@@ -505,12 +163,9 @@ def ingest(
                           False for scheduled ingest - does not delete the target)
     :return:              if return_df is True, a dataframe will be returned based on the graph
     """
-    if mlrun_context is None:
-        deprecated(
-            version="1.6.0",
-            reason="Calling 'ingest' with mlrun_context=None is deprecated and will be removed in 1.8.0,\
-            use 'FeatureSet.ingest()' instead",
-            category=FutureWarning,
+    if not mlrun_context:
+        raise mlrun.errors.MLRunValueError(
+            "mlrun_context must be defined when calling ingest()"
         )
     return _ingest(
@@ -530,7 +185,7 @@ def ingest(
 def _ingest(
     featureset: Union[FeatureSet, str] = None,
     source=None,
-    targets: list[DataTargetBase] = None,
+    targets: Optional[list[DataTargetBase]] = None,
     namespace=None,
     return_df: bool = True,
     infer_options: InferOptions = InferOptions.default(),
@@ -775,61 +430,14 @@ def _ingest(
         return df
-@deprecated(
-    version="1.6.0",
-    reason="'preview' will be removed in 1.8.0, use 'FeatureSet.preview()' instead",
-    category=FutureWarning,
-)
-def preview(
-    featureset: FeatureSet,
-    source,
-    entity_columns: list = None,
-    namespace=None,
-    options: InferOptions = None,
-    verbose: bool = False,
-    sample_size: int = None,
-) -> pd.DataFrame:
-    """run the ingestion pipeline with local DataFrame/file data and infer features schema and stats
-    example::
-        quotes_set = FeatureSet("stock-quotes", entities=[Entity("ticker")])
-        quotes_set.add_aggregation("ask", ["sum", "max"], ["1h", "5h"], "10m")
-        quotes_set.add_aggregation("bid", ["min", "max"], ["1h"], "10m")
-        df = preview(
-            quotes_set,
-            quotes_df,
-            entity_columns=["ticker"],
-        )
-    :param featureset:     feature set object or uri
-    :param source:         source dataframe or csv/parquet file path
-    :param entity_columns: list of entity (index) column names
-    :param namespace:      namespace or module containing graph classes
-    :param options:        schema (for discovery of entities, features in featureset), index, stats,
-                           histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
-    :param verbose:        verbose log
-    :param sample_size:    num of rows to sample from the dataset (for large datasets)
-    """
-    return _preview(
-        featureset,
-        source,
-        entity_columns,
-        namespace,
-        options,
-        verbose,
-        sample_size,
-    )
 def _preview(
     featureset: FeatureSet,
     source,
-    entity_columns: list = None,
+    entity_columns: Optional[list] = None,
     namespace=None,
     options: InferOptions = None,
     verbose: bool = False,
-    sample_size: int = None,
+    sample_size: Optional[int] = None,
 ) -> pd.DataFrame:
     if isinstance(source, pd.DataFrame):
         source = _rename_source_dataframe_columns(source)
@@ -895,8 +503,8 @@ def _preview(
 def _run_ingestion_job(
     featureset: Union[FeatureSet, str],
     source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    name: str = None,
+    targets: Optional[list[DataTargetBase]] = None,
+    name: Optional[str] = None,
     infer_options: InferOptions = InferOptions.default(),
     run_config: RunConfig = None,
 ):
@@ -911,60 +519,11 @@ def _run_ingestion_job(
     return run_ingestion_job(name, featureset, run_config, source.schedule)
-@deprecated(
-    version="1.6.0",
-    reason="'deploy_ingestion_service_v2' will be removed in 1.8.0, "
-    "use 'FeatureSet.deploy_ingestion_service()' instead",
-    category=FutureWarning,
-)
-def deploy_ingestion_service_v2(
-    featureset: Union[FeatureSet, str],
-    source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    name: str = None,
-    run_config: RunConfig = None,
-    verbose=False,
-) -> tuple[str, BaseRuntime]:
-    """Start real-time ingestion service using nuclio function
-    Deploy a real-time function implementing feature ingestion pipeline
-    the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
-    the `run_config` parameter allow specifying the function and job configuration,
-    see: :py:class:`~mlrun.feature_store.RunConfig`
-    example::
-        source = HTTPSource()
-        func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
-        config = RunConfig(function=func)
-        deploy_ingestion_service_v2(my_set, source, run_config=config)
-    :param featureset:    feature set object or uri
-    :param source:        data source object describing the online or offline source
-    :param targets:       list of data target objects
-    :param name:          name for the job/function
-    :param run_config:    service runtime configuration (function object/uri, resources, etc..)
-    :param verbose:       verbose log
-    :return: URL to access the deployed ingestion service, and the function that was deployed (which will
-             differ from the function passed in via the run_config parameter).
-    """
-    return _deploy_ingestion_service_v2(
-        featureset,
-        source,
-        targets,
-        name,
-        run_config,
-        verbose,
-    )
 def _deploy_ingestion_service_v2(
     featureset: Union[FeatureSet, str],
     source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    name: str = None,
+    targets: Optional[list[DataTargetBase]] = None,
+    name: Optional[str] = None,
     run_config: RunConfig = None,
     verbose=False,
 ) -> tuple[str, BaseRuntime]:
@@ -1010,6 +569,9 @@ def _deploy_ingestion_service_v2(
     function.metadata.name = function.metadata.name or name
     function.spec.graph = featureset.spec.graph
+    function.spec.graph.engine = (
+        "async" if featureset.spec.engine == "storey" else "sync"
+    )
     function.spec.parameters = run_config.parameters
     function.spec.graph_initializer = (
         "mlrun.feature_store.ingestion.featureset_initializer"
@@ -1026,7 +588,7 @@ def _ingest_with_spark(
     spark=None,
     featureset: Union[FeatureSet, str] = None,
     source: BaseSourceDriver = None,
-    targets: list[BaseStoreTarget] = None,
+    targets: Optional[list[BaseStoreTarget]] = None,
     infer_options: InferOptions = InferOptions.default(),
     mlrun_context=None,
     namespace=None,
@@ -1199,8 +761,8 @@ def _infer_from_static_df(
 def set_task_params(
     featureset: FeatureSet,
     source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    parameters: dict = None,
+    targets: Optional[list[DataTargetBase]] = None,
+    parameters: Optional[dict] = None,
     infer_options: InferOptions = InferOptions.Null,
     overwrite=None,
 ):

mlrun/feature_store/common.py CHANGED Viewed

@@ -178,17 +178,17 @@ class RunConfig:
     def __init__(
         self,
         function: typing.Union[str, FunctionReference, BaseRuntime] = None,
-        local: bool = None,
-        image: str = None,
-        kind: str = None,
-        handler: str = None,
-        parameters: dict = None,
-        watch: bool = None,
+        local: typing.Optional[bool] = None,
+        image: typing.Optional[str] = None,
+        kind: typing.Optional[str] = None,
+        handler: typing.Optional[str] = None,
+        parameters: typing.Optional[dict] = None,
+        watch: typing.Optional[bool] = None,
         owner=None,
         credentials: typing.Optional[mlrun.model.Credentials] = None,
-        code: str = None,
-        requirements: typing.Union[str, list[str]] = None,
-        extra_spec: dict = None,
+        code: typing.Optional[str] = None,
+        requirements: typing.Optional[typing.Union[str, list[str]]] = None,
+        extra_spec: typing.Optional[dict] = None,
         auth_info=None,
     ):
         """class for holding function and run specs for jobs and serving functions

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0py3-none-any.whl