PyPI - mlrun - Versions diffs - 1.8.0rc37__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl - Mend

mlrun 1.8.0rc37py3-none-any.whl → 1.8.0rc39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (43) hide show

mlrun/__main__.py +1 -8
mlrun/artifacts/base.py +3 -3
mlrun/artifacts/manager.py +1 -1
mlrun/common/model_monitoring/helpers.py +0 -13
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/model_monitoring/constants.py +2 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +5 -11
mlrun/datastore/__init__.py +57 -16
mlrun/datastore/base.py +0 -11
mlrun/datastore/datastore_profile.py +10 -7
mlrun/datastore/sources.py +6 -17
mlrun/datastore/storeytargets.py +29 -15
mlrun/datastore/utils.py +73 -0
mlrun/db/base.py +1 -0
mlrun/db/httpdb.py +16 -0
mlrun/db/nopdb.py +1 -0
mlrun/feature_store/__init__.py +2 -0
mlrun/feature_store/api.py +77 -0
mlrun/model_monitoring/api.py +2 -20
mlrun/model_monitoring/controller.py +18 -2
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +21 -6
mlrun/model_monitoring/helpers.py +15 -27
mlrun/model_monitoring/stream_processing.py +7 -34
mlrun/projects/operations.py +3 -3
mlrun/projects/pipelines.py +5 -0
mlrun/projects/project.py +4 -4
mlrun/run.py +4 -4
mlrun/runtimes/kubejob.py +2 -2
mlrun/runtimes/nuclio/application/application.py +0 -2
mlrun/runtimes/nuclio/function.py +1 -46
mlrun/runtimes/pod.py +37 -145
mlrun/serving/routers.py +80 -64
mlrun/serving/states.py +30 -1
mlrun/serving/v2_serving.py +24 -62
mlrun/utils/async_http.py +1 -2
mlrun/utils/helpers.py +1 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/METADATA +1 -1
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/RECORD +43 -43
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/WHEEL +1 -1
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/LICENSE +0 -0
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/entry_points.txt +0 -0
{mlrun-1.8.0rc37.dist-info → mlrun-1.8.0rc39.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -17,7 +17,6 @@ import json
 import pathlib
 import socket
 import traceback
-import warnings
 from ast import literal_eval
 from base64 import b64decode
 from os import environ, path, remove
@@ -864,14 +863,8 @@ def version():
 )
 @click.option("--offset", type=int, default=0, help="byte offset")
 @click.option("--db", help="api and db service path/url")
-@click.option("--watch", "-w", is_flag=True, help="Deprecated. not in use")
-def logs(uid, project, offset, db, watch):
+def logs(uid, project, offset, db):
     """Get or watch task logs"""
-    if watch:
-        warnings.warn(
-            "'--watch' is deprecated in 1.6.0, and will be removed in 1.8.0, "
-            # TODO: Remove in 1.8.0
-        )
     mldb = get_run_db(db or mlconf.dbpath)
     if mldb.kind == "http":
         state, _ = mldb.watch_log(uid, project, watch=False, offset=offset)

mlrun/artifacts/base.py CHANGED Viewed

@@ -893,7 +893,7 @@ def generate_target_path(item: Artifact, artifact_path, producer):
     return f"{artifact_path}{item.key}{suffix}"
-# TODO: left to support data migration from legacy artifacts to new artifacts. Remove in 1.8.0.
+# TODO: Remove once data migration v5 is obsolete
 def convert_legacy_artifact_to_new_format(
     legacy_artifact: dict,
 ) -> Artifact:
@@ -905,9 +905,9 @@ def convert_legacy_artifact_to_new_format(
     artifact_tag = legacy_artifact.get("tag", "")
     if artifact_tag:
         artifact_key = f"{artifact_key}:{artifact_tag}"
-    # TODO: remove in 1.8.0
+    # TODO: Remove once data migration v5 is obsolete
     warnings.warn(
-        f"Converting legacy artifact '{artifact_key}' to new format. This will not be supported in MLRun 1.8.0. "
+        f"Converting legacy artifact '{artifact_key}' to new format. This will not be supported in MLRun 1.9.0. "
         f"Make sure to save the artifact/project in the new format.",
         FutureWarning,
     )

mlrun/artifacts/manager.py CHANGED Viewed

@@ -108,7 +108,7 @@ class ArtifactProducer:
 def dict_to_artifact(struct: dict) -> Artifact:
     kind = struct.get("kind", "")
-    # TODO: remove this in 1.8.0
+    # TODO: Remove once data migration v5 is obsolete
     if mlrun.utils.is_legacy_artifact(struct):
         return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)

mlrun/common/model_monitoring/helpers.py CHANGED Viewed

@@ -50,19 +50,6 @@ def get_kafka_topic(project: str, function_name: typing.Optional[str] = None) ->
     )
-def parse_monitoring_stream_path(
-    stream_uri: str, project: str, function_name: typing.Optional[str] = None
-) -> str:
-    if stream_uri.startswith("kafka://"):
-        if "?topic" in stream_uri:
-            raise mlrun.errors.MLRunValueError("Custom kafka topic is not allowed")
-        # Add topic to stream kafka uri
-        topic = get_kafka_topic(project=project, function_name=function_name)
-        stream_uri += f"?topic={topic}"
-    return stream_uri
 def _get_counts(hist: Histogram) -> BinCounts:
     """Return the histogram counts"""
     return BinCounts(hist[0])

mlrun/common/schemas/client_spec.py CHANGED Viewed

@@ -66,3 +66,4 @@ class ClientSpec(pydantic.v1.BaseModel):
     external_platform_tracking: typing.Optional[dict]
     alerts_mode: typing.Optional[str]
     system_id: typing.Optional[str]
+    model_endpoint_monitoring_store_prefixes: typing.Optional[dict[str, str]]

mlrun/common/schemas/model_monitoring/constants.py CHANGED Viewed

@@ -42,12 +42,10 @@ class ModelEndpointSchema(MonitoringStrEnum):
     # spec
     FUNCTION_NAME = "function_name"
     FUNCTION_TAG = "function_tag"
-    FUNCTION_UID = "function_uid"
     MODEL_NAME = "model_name"
-    MODEL_DB_KEY = "model_db_key"
-    MODEL_TAG = "model_tag"
+    MODEL_TAGS = "model_tags"
+    MODEL_PATH = "model_path"
     MODEL_CLASS = "model_class"
-    MODEL_UID = "model_uid"
     FEATURE_NAMES = "feature_names"
     LABEL_NAMES = "label_names"
     FEATURE_STATS = "feature_stats"

mlrun/common/schemas/model_monitoring/model_endpoints.py CHANGED Viewed

@@ -117,14 +117,13 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
 class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
-    model_uid: Optional[str] = ""
-    model_name: Optional[str] = ""
-    model_db_key: Optional[str] = ""
-    model_tag: Optional[str] = ""
     model_class: Optional[str] = ""
     function_name: Optional[str] = ""
     function_tag: Optional[str] = ""
-    function_uid: Optional[str] = ""
+    model_path: Optional[str] = ""
+    model_name: Optional[str] = ""
+    model_tags: Optional[list[str]] = []
+    _model_id: Optional[int] = ""
     feature_names: Optional[list[str]] = []
     label_names: Optional[list[str]] = []
     feature_stats: Optional[dict] = {}
@@ -137,12 +136,8 @@ class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
     @classmethod
     def mutable_fields(cls):
         return [
-            "model_uid",
-            "model_name",
-            "model_db_key",
-            "model_tag",
+            "model_path",
             "model_class",
-            "function_uid",
             "feature_names",
             "label_names",
             "children",
@@ -206,7 +201,6 @@ class ModelEndpoint(BaseModel):
             ModelEndpointSchema.CURRENT_STATS,
             ModelEndpointSchema.DRIFT_MEASURES,
             ModelEndpointSchema.FUNCTION_URI,
-            ModelEndpointSchema.MODEL_URI,
         }
         # Initialize a flattened dictionary that will be filled with the model endpoint dictionary attributes
         flatten_dict = {}

mlrun/datastore/__init__.py CHANGED Viewed

@@ -34,9 +34,17 @@ __all__ = [
     "VectorStoreCollection",
 ]
+from urllib.parse import urlparse
 import fsspec
+from mergedeep import merge
 import mlrun.datastore.wasbfs
+from mlrun.datastore.datastore_profile import (
+    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaTarget,
+    DatastoreProfileV3io,
+)
 from mlrun.platforms.iguazio import (
     HTTPOutputStream,
     KafkaOutputStream,
@@ -106,23 +114,56 @@ def get_stream_pusher(stream_path: str, **kwargs):
     :param stream_path:        path/url of stream
     """
-    kafka_brokers = get_kafka_brokers_from_dict(kwargs)
-    if stream_path.startswith("kafka://") or kafka_brokers:
-        topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
-        return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
-    elif stream_path.startswith("http://") or stream_path.startswith("https://"):
-        return HTTPOutputStream(stream_path=stream_path)
-    elif "://" not in stream_path:
-        return OutputStream(stream_path, **kwargs)
-    elif stream_path.startswith("v3io"):
-        endpoint, stream_path = parse_path(stream_path)
-        endpoint = kwargs.pop("endpoint", None) or endpoint
-        return OutputStream(stream_path, endpoint=endpoint, **kwargs)
-    elif stream_path.startswith("dummy://"):
-        return _DummyStream(**kwargs)
+    if stream_path.startswith("ds://"):
+        datastore_profile = mlrun.datastore.datastore_profile.datastore_profile_read(
+            stream_path
+        )
+        if isinstance(
+            datastore_profile,
+            (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+        ):
+            attributes = datastore_profile.attributes()
+            brokers = attributes.pop("brokers", None)
+            # Override the topic with the one in the url (if any)
+            parsed_url = urlparse(stream_path)
+            topic = (
+                parsed_url.path.strip("/")
+                if parsed_url.path
+                else datastore_profile.get_topic()
+            )
+            producer_options = mlrun.datastore.utils.KafkaParameters(
+                attributes
+            ).producer()
+            return KafkaOutputStream(topic, brokers, producer_options=producer_options)
+        elif isinstance(datastore_profile, DatastoreProfileV3io):
+            parsed_url = urlparse(stream_path)
+            stream_path = datastore_profile.url(parsed_url.path)
+            endpoint, stream_path = parse_path(stream_path)
+            return OutputStream(stream_path, endpoint=endpoint, **kwargs)
+        else:
+            raise ValueError(
+                f"Unsupported datastore profile type: {type(datastore_profile)}"
+            )
     else:
-        raise ValueError(f"unsupported stream path {stream_path}")
+        kafka_brokers = get_kafka_brokers_from_dict(kwargs)
+        if stream_path.startswith("kafka://") or kafka_brokers:
+            topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
+            return KafkaOutputStream(
+                topic, brokers, kwargs.get("kafka_producer_options")
+            )
+        elif stream_path.startswith("http://") or stream_path.startswith("https://"):
+            return HTTPOutputStream(stream_path=stream_path)
+        elif "://" not in stream_path:
+            return OutputStream(stream_path, **kwargs)
+        elif stream_path.startswith("v3io"):
+            endpoint, stream_path = parse_path(stream_path)
+            endpoint = kwargs.pop("endpoint", None) or endpoint
+            return OutputStream(stream_path, endpoint=endpoint, **kwargs)
+        elif stream_path.startswith("dummy://"):
+            return _DummyStream(**kwargs)
+        else:
+            raise ValueError(f"unsupported stream path {stream_path}")
 class _DummyStream:

mlrun/datastore/base.py CHANGED Viewed

@@ -24,7 +24,6 @@ import pandas as pd
 import pyarrow
 import pytz
 import requests
-from deprecated import deprecated
 import mlrun.config
 import mlrun.errors
@@ -95,16 +94,6 @@ class DataStore:
     def uri_to_ipython(endpoint, subpath):
         return ""
-    # TODO: remove in 1.8.0
-    @deprecated(
-        version="1.8.0",
-        reason="'get_filesystem()' will be removed in 1.8.0, use "
-        "'filesystem' property instead",
-        category=FutureWarning,
-    )
-    def get_filesystem(self):
-        return self.filesystem
     @property
     def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
         """return fsspec file system object, if supported"""

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -171,6 +171,9 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
                 FutureWarning,
             )
+    def get_topic(self) -> typing.Optional[str]:
+        return self.topic
     def attributes(self):
         attributes = {"brokers": self.brokers or self.bootstrap_servers}
         if self.kwargs_public:
@@ -193,6 +196,10 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
     kwargs_public: typing.Optional[dict]
     kwargs_private: typing.Optional[dict]
+    def get_topic(self) -> typing.Optional[str]:
+        topics = [self.topics] if isinstance(self.topics, str) else self.topics
+        return topics[0] if topics else None
     def attributes(self) -> dict[str, typing.Any]:
         attributes = {}
         if self.kwargs_public:
@@ -209,13 +216,9 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
         attributes["initial_offset"] = self.initial_offset
         if self.partitions is not None:
             attributes["partitions"] = self.partitions
-        sasl = attributes.pop("sasl", {})
-        if self.sasl_user and self.sasl_pass:
-            sasl["enable"] = True
-            sasl["user"] = self.sasl_user
-            sasl["password"] = self.sasl_pass
-            sasl["mechanism"] = "PLAIN"
-        if sasl:
+        if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
+            usr=self.sasl_user, pwd=self.sasl_pass
+        ):
             attributes["sasl"] = sasl
         return attributes

mlrun/datastore/sources.py CHANGED Viewed

@@ -1100,13 +1100,9 @@ class KafkaSource(OnlineSource):
         attributes["initial_offset"] = initial_offset
         if partitions is not None:
             attributes["partitions"] = partitions
-        sasl = attributes.pop("sasl", {})
-        if sasl_user and sasl_pass:
-            sasl["enable"] = True
-            sasl["user"] = sasl_user
-            sasl["password"] = sasl_pass
-            sasl["mechanism"] = "PLAIN"
-        if sasl:
+        if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
+            usr=sasl_user, pwd=sasl_pass
+        ):
             attributes["sasl"] = sasl
         super().__init__(attributes=attributes, **kwargs)
@@ -1207,16 +1203,9 @@ class KafkaSource(OnlineSource):
         ]
         kafka_admin_kwargs = {}
-        if "sasl" in self.attributes:
-            sasl = self.attributes["sasl"]
-            kafka_admin_kwargs.update(
-                {
-                    "security_protocol": "SASL_PLAINTEXT",
-                    "sasl_mechanism": sasl["mechanism"],
-                    "sasl_plain_username": sasl["user"],
-                    "sasl_plain_password": sasl["password"],
-                }
-            )
+        kafka_admin_kwargs = mlrun.datastore.utils.KafkaParameters(
+            self.attributes
+        ).admin()
         kafka_admin = KafkaAdminClient(bootstrap_servers=brokers, **kafka_admin_kwargs)
         try:

mlrun/datastore/storeytargets.py CHANGED Viewed

@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from urllib.parse import urlparse
 import storey
 from mergedeep import merge
 from storey import V3ioDriver
@@ -18,6 +20,12 @@ from storey import V3ioDriver
 import mlrun
 import mlrun.model_monitoring.helpers
 from mlrun.datastore.base import DataStore
+from mlrun.datastore.datastore_profile import (
+    DatastoreProfileKafkaSource,
+    DatastoreProfileKafkaTarget,
+    TDEngineDatastoreProfile,
+    datastore_profile_read,
+)
 from ..platforms.iguazio import parse_path
 from .utils import (
@@ -44,13 +52,8 @@ def get_url_and_storage_options(path, external_storage_options=None):
 class TDEngineStoreyTarget(storey.TDEngineTarget):
     def __init__(self, *args, url: str, **kwargs):
         if url.startswith("ds://"):
-            datastore_profile = (
-                mlrun.datastore.datastore_profile.datastore_profile_read(url)
-            )
-            if not isinstance(
-                datastore_profile,
-                mlrun.datastore.datastore_profile.TDEngineDatastoreProfile,
-            ):
+            datastore_profile = datastore_profile_read(url)
+            if not isinstance(datastore_profile, TDEngineDatastoreProfile):
                 raise ValueError(
                     f"Unexpected datastore profile type:{datastore_profile.type}."
                     "Only TDEngineDatastoreProfile is supported"
@@ -126,16 +129,24 @@ class StreamStoreyTarget(storey.StreamTarget):
 class KafkaStoreyTarget(storey.KafkaTarget):
     def __init__(self, *args, **kwargs):
         path = kwargs.pop("path")
-        attributes = kwargs.pop("attributes", None)
+        attributes = kwargs.pop("attributes", {})
         if path and path.startswith("ds://"):
-            datastore_profile = (
-                mlrun.datastore.datastore_profile.datastore_profile_read(path)
-            )
+            datastore_profile = datastore_profile_read(path)
+            if not isinstance(
+                datastore_profile,
+                (DatastoreProfileKafkaSource, DatastoreProfileKafkaTarget),
+            ):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"Unsupported datastore profile type: {type(datastore_profile)}"
+                )
             attributes = merge(attributes, datastore_profile.attributes())
-            brokers = attributes.pop(
-                "brokers", attributes.pop("bootstrap_servers", None)
+            brokers = attributes.pop("brokers", None)
+            # Override the topic with the one in the url (if any)
+            parsed = urlparse(path)
+            topic = (
+                parsed.path.strip("/") if parsed.path else datastore_profile.get_topic()
             )
-            topic = datastore_profile.topic
         else:
             brokers = attributes.pop(
                 "brokers", attributes.pop("bootstrap_servers", None)
@@ -146,7 +157,10 @@ class KafkaStoreyTarget(storey.KafkaTarget):
             raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
         kwargs["brokers"] = brokers
         kwargs["topic"] = topic
-        super().__init__(*args, **kwargs, **attributes)
+        attributes = mlrun.datastore.utils.KafkaParameters(attributes).producer()
+        super().__init__(*args, **kwargs, producer_options=attributes)
 class NoSqlStoreyTarget(storey.NoSqlTarget):

mlrun/datastore/utils.py CHANGED Viewed

@@ -222,3 +222,76 @@ def validate_additional_filters(additional_filters):
             for sub_value in value:
                 if isinstance(sub_value, float) and math.isnan(sub_value):
                     raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
+class KafkaParameters:
+    def __init__(self, kwargs: dict):
+        import kafka
+        self._kafka = kafka
+        self._kwargs = kwargs
+        self._client_configs = {
+            "consumer": self._kafka.KafkaConsumer.DEFAULT_CONFIG,
+            "producer": self._kafka.KafkaProducer.DEFAULT_CONFIG,
+            "admin": self._kafka.KafkaAdminClient.DEFAULT_CONFIG,
+        }
+        self._custom_attributes = {
+            "max_workers": "",
+            "brokers": "",
+            "topics": "",
+            "group": "",
+            "initial_offset": "",
+            "partitions": "",
+            "sasl": "",
+            "worker_allocation_mode": "",
+        }
+        self._validate_keys()
+    def _validate_keys(self) -> None:
+        reference_dicts = (
+            self._custom_attributes,
+            self._kafka.KafkaAdminClient.DEFAULT_CONFIG,
+            self._kafka.KafkaProducer.DEFAULT_CONFIG,
+            self._kafka.KafkaConsumer.DEFAULT_CONFIG,
+        )
+        for key in self._kwargs:
+            if all(key not in d for d in reference_dicts):
+                raise ValueError(
+                    f"Key '{key}' not found in any of the Kafka reference dictionaries"
+                )
+    def _get_config(self, client_type: str) -> dict:
+        res = {
+            k: self._kwargs[k]
+            for k in self._kwargs.keys() & self._client_configs[client_type].keys()
+        }
+        if sasl := self._kwargs.get("sasl"):
+            res |= {
+                "security_protocol": "SASL_PLAINTEXT",
+                "sasl_mechanism": sasl["mechanism"],
+                "sasl_plain_username": sasl["user"],
+                "sasl_plain_password": sasl["password"],
+            }
+        return res
+    def consumer(self) -> dict:
+        return self._get_config("consumer")
+    def producer(self) -> dict:
+        return self._get_config("producer")
+    def admin(self) -> dict:
+        return self._get_config("admin")
+    def sasl(
+        self, *, usr: typing.Optional[str] = None, pwd: typing.Optional[str] = None
+    ) -> dict:
+        usr = usr or self._kwargs.get("sasl_plain_username", None)
+        pwd = pwd or self._kwargs.get("sasl_plain_password", None)
+        res = self._kwargs.get("sasl", {})
+        if usr and pwd:
+            res["enable"] = True
+            res["user"] = usr
+            res["password"] = pwd
+            res["mechanism"] = self._kwargs.get("sasl_mechanism", "PLAIN")
+        return res

mlrun/db/base.py CHANGED Viewed

@@ -257,6 +257,7 @@ class RunDBInterface(ABC):
         tag: Optional[str] = None,
         kind: Optional[str] = None,
         labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        states: Optional[list[mlrun.common.schemas.FunctionState]] = None,
         format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
         since: Optional[datetime.datetime] = None,
         until: Optional[datetime.datetime] = None,

mlrun/db/httpdb.py CHANGED Viewed

@@ -566,6 +566,17 @@ class HTTPRunDB(RunDBInterface):
             )
             config.alerts.mode = server_cfg.get("alerts_mode") or config.alerts.mode
             config.system_id = server_cfg.get("system_id") or config.system_id
+            model_monitoring_store_prefixes = (
+                server_cfg.get("model_endpoint_monitoring_store_prefixes") or {}
+            )
+            for prefix in ["default", "user_space", "monitoring_application"]:
+                store_prefix_value = model_monitoring_store_prefixes.get(prefix)
+                if server_prefix_value is not None:
+                    setattr(
+                        config.model_endpoint_monitoring.store_prefixes,
+                        prefix,
+                        store_prefix_value,
+                    )
         except Exception as exc:
             logger.warning(
@@ -1497,6 +1508,7 @@ class HTTPRunDB(RunDBInterface):
         until: Optional[datetime] = None,
         kind: Optional[str] = None,
         format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
+        states: typing.Optional[list[mlrun.common.schemas.FunctionState]] = None,
     ):
         """Retrieve a list of functions, filtered by specific criteria.
@@ -1514,6 +1526,7 @@ class HTTPRunDB(RunDBInterface):
         :param until: Return functions updated before this date (as datetime object).
         :param kind: Return only functions of a specific kind.
         :param format_: The format in which to return the functions. Default is 'full'.
+        :param states: Return only functions whose state is one of the provided states.
         :returns: List of function objects (as dictionary).
         """
         functions, _ = self._list_functions(
@@ -1525,6 +1538,7 @@ class HTTPRunDB(RunDBInterface):
             format_=format_,
             since=since,
             until=until,
+            states=states,
             return_all=True,
         )
         return functions
@@ -5135,6 +5149,7 @@ class HTTPRunDB(RunDBInterface):
         format_: Optional[str] = None,
         since: Optional[datetime] = None,
         until: Optional[datetime] = None,
+        states: typing.Optional[list[mlrun.common.schemas.FunctionState]] = None,
         page: Optional[int] = None,
         page_size: Optional[int] = None,
         page_token: Optional[str] = None,
@@ -5152,6 +5167,7 @@ class HTTPRunDB(RunDBInterface):
             "since": datetime_to_iso(since),
             "until": datetime_to_iso(until),
             "format": format_,
+            "state": states or None,
             "page": page,
             "page-size": page_size,
             "page-token": page_token,

mlrun/db/nopdb.py CHANGED Viewed

@@ -274,6 +274,7 @@ class NopDB(RunDBInterface):
         tag: Optional[str] = None,
         kind: Optional[str] = None,
         labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
+        states: Optional[list[mlrun.common.schemas.FunctionState]] = None,
         format_: mlrun.common.formatters.FunctionFormat = mlrun.common.formatters.FunctionFormat.full,
         since: Optional[datetime.datetime] = None,
         until: Optional[datetime.datetime] = None,

mlrun/feature_store/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 __all__ = [
+    "ingest",
     "delete_feature_set",
     "delete_feature_vector",
     "get_feature_set",
@@ -35,6 +36,7 @@ from .api import (
     delete_feature_vector,
     get_feature_set,
     get_feature_vector,
+    ingest,
 )
 from .common import RunConfig
 from .feature_set import FeatureSet

mlrun/feature_store/api.py CHANGED Viewed

@@ -244,6 +244,83 @@ def _get_namespace(run_config: RunConfig) -> dict[str, Any]:
         return get_caller_globals()
+def ingest(
+    mlrun_context: Union["mlrun.MLrunProject", "mlrun.MLClientCtx"],
+    featureset: Union[FeatureSet, str] = None,
+    source=None,
+    targets: Optional[list[DataTargetBase]] = None,
+    namespace=None,
+    return_df: bool = True,
+    infer_options: InferOptions = InferOptions.default(),
+    run_config: RunConfig = None,
+    spark_context=None,
+    overwrite=None,
+) -> Optional[pd.DataFrame]:
+    """Read local DataFrame, file, URL, or source into the feature store
+    Ingest reads from the source, run the graph transformations, infers  metadata and stats
+    and writes the results to the default of specified targets
+    when targets are not specified data is stored in the configured default targets
+    (will usually be NoSQL for real-time and Parquet for offline).
+    the `run_config` parameter allow specifying the function and job configuration,
+    see: :py:class:`~mlrun.feature_store.RunConfig`
+    example::
+        stocks_set = FeatureSet("stocks", entities=[Entity("ticker")])
+        stocks = pd.read_csv("stocks.csv")
+        df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
+        # for running as remote job
+        config = RunConfig(image="mlrun/mlrun")
+        df = ingest(stocks_set, stocks, run_config=config)
+        # specify source and targets
+        source = CSVSource("mycsv", path="measurements.csv")
+        targets = [CSVTarget("mycsv", path="./mycsv.csv")]
+        ingest(measurements, source, targets)
+    :param mlrun_context: mlrun context
+    :param featureset:    feature set object or featureset.uri. (uri must be of a feature set that is in the DB,
+                          call `.save()` if it's not)
+    :param source:        source dataframe or other sources (e.g. parquet source see:
+                          :py:class:`~mlrun.datastore.ParquetSource` and other classes in mlrun.datastore with suffix
+                          Source)
+    :param targets:       optional list of data target objects
+    :param namespace:     namespace or module containing graph classes
+    :param return_df:     indicate if to return a dataframe with the graph results
+    :param infer_options: schema (for discovery of entities, features in featureset), index, stats,
+                          histogram and preview infer options (:py:class:`~mlrun.feature_store.InferOptions`)
+    :param run_config:    function and/or run configuration for remote jobs,
+                          see :py:class:`~mlrun.feature_store.RunConfig`
+    :param spark_context: local spark session for spark ingestion, example for creating the spark context:
+                          `spark = SparkSession.builder.appName("Spark function").getOrCreate()`
+                          For remote spark ingestion, this should contain the remote spark service name
+    :param overwrite:     delete the targets' data prior to ingestion
+                          (default: True for non scheduled ingest - deletes the targets that are about to be ingested.
+                          False for scheduled ingest - does not delete the target)
+    :return:              if return_df is True, a dataframe will be returned based on the graph
+    """
+    if not mlrun_context:
+        raise mlrun.errors.MLRunValueError(
+            "mlrun_context must be defined when calling ingest()"
+        )
+    return _ingest(
+        featureset,
+        source,
+        targets,
+        namespace,
+        return_df,
+        infer_options,
+        run_config,
+        mlrun_context,
+        spark_context,
+        overwrite,
+    )
 def _ingest(
     featureset: Union[FeatureSet, str] = None,
     source=None,

mlrun 1.8.0rc37__py3-none-any.whl → 1.8.0rc39__py3-none-any.whl

Potentially problematic release.

mlrun 1.8.0rc37py3-none-any.whl → 1.8.0rc39py3-none-any.whl