PyPI - mlrun - Versions diffs - 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl - Mend

mlrun 1.7.0rc5py3-none-any.whl → 1.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +39 -121
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +39 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +73 -46
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +73 -2
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +46 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +44 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +11 -1
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +21 -4
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +113 -2
mlrun/common/schemas/artifact.py +28 -1
mlrun/common/schemas/auth.py +11 -0
mlrun/common/schemas/client_spec.py +2 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +58 -28
mlrun/common/schemas/frontend_spec.py +8 -0
mlrun/common/schemas/function.py +11 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +21 -4
mlrun/common/schemas/model_monitoring/constants.py +136 -42
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
mlrun/common/schemas/notification.py +69 -12
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +7 -0
mlrun/common/schemas/project.py +67 -16
mlrun/common/schemas/runs.py +17 -0
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +14 -1
mlrun/config.py +224 -58
mlrun/data_types/data_types.py +11 -1
mlrun/data_types/spark.py +5 -4
mlrun/data_types/to_pandas.py +75 -34
mlrun/datastore/__init__.py +8 -10
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +131 -43
mlrun/datastore/base.py +107 -47
mlrun/datastore/datastore.py +17 -7
mlrun/datastore/datastore_profile.py +91 -7
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +92 -32
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +3 -2
mlrun/datastore/s3.py +30 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +274 -59
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +374 -102
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +28 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +231 -22
mlrun/db/factory.py +1 -4
mlrun/db/httpdb.py +864 -228
mlrun/db/nopdb.py +268 -16
mlrun/errors.py +35 -5
mlrun/execution.py +111 -38
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +46 -53
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +13 -2
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +38 -19
mlrun/features.py +6 -14
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +4 -4
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +57 -12
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +15 -5
mlrun/launcher/remote.py +10 -3
mlrun/lists.py +6 -2
mlrun/model.py +297 -48
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +152 -357
mlrun/model_monitoring/applications/__init__.py +10 -0
mlrun/model_monitoring/applications/_application_steps.py +190 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +130 -303
mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +177 -39
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +165 -398
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/packagers/default_packager.py +2 -2
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +67 -228
mlrun/projects/__init__.py +6 -1
mlrun/projects/operations.py +47 -20
mlrun/projects/pipelines.py +396 -249
mlrun/projects/project.py +1125 -414
mlrun/render.py +28 -22
mlrun/run.py +207 -180
mlrun/runtimes/__init__.py +76 -11
mlrun/runtimes/base.py +40 -14
mlrun/runtimes/daskjob.py +9 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +39 -10
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +646 -177
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +188 -68
mlrun/runtimes/nuclio/serving.py +57 -60
mlrun/runtimes/pod.py +191 -58
mlrun/runtimes/remotesparkjob.py +11 -8
mlrun/runtimes/sparkjob/spark3job.py +17 -18
mlrun/runtimes/utils.py +40 -73
mlrun/secrets.py +6 -2
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +89 -64
mlrun/serving/server.py +54 -26
mlrun/serving/states.py +187 -56
mlrun/serving/utils.py +19 -11
mlrun/serving/v2_serving.py +136 -63
mlrun/track/tracker.py +2 -1
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +26 -6
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +375 -105
mlrun/utils/http.py +2 -2
mlrun/utils/logger.py +75 -9
mlrun/utils/notifications/notification/__init__.py +14 -10
mlrun/utils/notifications/notification/base.py +48 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +63 -2
mlrun/utils/notifications/notification_pusher.py +146 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +2 -3
mlrun/utils/version/version.json +2 -2
mlrun-1.7.2.dist-info/METADATA +390 -0
mlrun-1.7.2.dist-info/RECORD +351 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/prometheus.py +0 -216
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc5.dist-info/METADATA +0 -269
mlrun-1.7.0rc5.dist-info/RECORD +0 -323
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0

mlrun/artifacts/base.py CHANGED Viewed

@@ -20,7 +20,6 @@ import warnings
 import zipfile
 import yaml
-from deprecated import deprecated
 import mlrun
 import mlrun.artifacts
@@ -88,9 +87,10 @@ class ArtifactSpec(ModelObj):
         "db_key",
         "extra_data",
         "unpackaging_instructions",
+        "producer",
     ]
-    _extra_fields = ["annotations", "producer", "sources", "license", "encoding"]
+    _extra_fields = ["annotations", "sources", "license", "encoding"]
     _exclude_fields_from_uid_hash = [
         # if the artifact is first created, it will not have a db_key,
         # exclude it so further updates of the artifacts will have the same hash
@@ -191,12 +191,30 @@ class Artifact(ModelObj):
         format=None,
         size=None,
         target_path=None,
-        # All params up until here are legacy params for compatibility with legacy artifacts.
         project=None,
+        src_path: str = None,
+        # All params up until here are legacy params for compatibility with legacy artifacts.
+        # TODO: remove them in 1.9.0.
         metadata: ArtifactMetadata = None,
         spec: ArtifactSpec = None,
-        src_path: str = None,
     ):
+        if (
+            key
+            or body
+            or viewer
+            or is_inline
+            or format
+            or size
+            or target_path
+            or project
+            or src_path
+        ):
+            warnings.warn(
+                "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
+                "Use the metadata and spec parameters instead.",
+                DeprecationWarning,
+            )
         self._metadata = None
         self.metadata = metadata
         self._spec = None
@@ -614,6 +632,7 @@ class DirArtifactSpec(ArtifactSpec):
         "src_path",
         "target_path",
         "db_key",
+        "producer",
     ]
@@ -698,11 +717,18 @@ class LinkArtifact(Artifact):
         link_iteration=None,
         link_key=None,
         link_tree=None,
-        # All params up until here are legacy params for compatibility with legacy artifacts.
         project=None,
+        # All params up until here are legacy params for compatibility with legacy artifacts.
+        # TODO: remove them in 1.9.0.
         metadata: ArtifactMetadata = None,
         spec: LinkArtifactSpec = None,
     ):
+        if key or target_path or link_iteration or link_key or link_tree or project:
+            warnings.warn(
+                "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
+                "Use the metadata and spec parameters instead.",
+                DeprecationWarning,
+            )
         super().__init__(
             key, target_path=target_path, project=project, metadata=metadata, spec=spec
         )
@@ -719,238 +745,6 @@ class LinkArtifact(Artifact):
         self._spec = self._verify_dict(spec, "spec", LinkArtifactSpec)
-# TODO: remove in 1.7.0
-@deprecated(
-    version="1.3.0",
-    reason="'LegacyArtifact' will be removed in 1.7.0, use 'Artifact' instead",
-    category=FutureWarning,
-)
-class LegacyArtifact(ModelObj):
-    _dict_fields = [
-        "key",
-        "kind",
-        "iter",
-        "tree",
-        "src_path",
-        "target_path",
-        "hash",
-        "description",
-        "viewer",
-        "inline",
-        "format",
-        "size",
-        "db_key",
-        "extra_data",
-        "tag",
-    ]
-    kind = ""
-    _store_prefix = StorePrefix.Artifact
-    def __init__(
-        self,
-        key=None,
-        body=None,
-        viewer=None,
-        is_inline=False,
-        format=None,
-        size=None,
-        target_path=None,
-    ):
-        self.key = key
-        self.project = ""
-        self.db_key = None
-        self.size = size
-        self.iter = None
-        self.tree = None
-        self.updated = None
-        self.target_path = target_path
-        self.src_path = None
-        self._body = body
-        self.format = format
-        self.description = None
-        self.viewer = viewer
-        self.encoding = None
-        self.labels = {}
-        self.annotations = None
-        self.sources = []
-        self.producer = None
-        self.hash = None
-        self._inline = is_inline
-        self.license = ""
-        self.extra_data = {}
-        self.tag = None  # temp store of the tag
-    def before_log(self):
-        for key, item in self.extra_data.items():
-            if hasattr(item, "target_path"):
-                self.extra_data[key] = item.target_path
-    def is_inline(self):
-        return self._inline
-    @property
-    def is_dir(self):
-        """this is a directory"""
-        return False
-    @property
-    def inline(self):
-        """inline data (body)"""
-        if self._inline:
-            return self.get_body()
-        return None
-    @inline.setter
-    def inline(self, body):
-        self._body = body
-        if body:
-            self._inline = True
-    @property
-    def uri(self):
-        """return artifact uri (store://..)"""
-        return self.get_store_url()
-    def to_dataitem(self):
-        """return a DataItem object (if available) representing the artifact content"""
-        uri = self.get_store_url()
-        if uri:
-            return mlrun.get_dataitem(uri)
-    def get_body(self):
-        """get the artifact body when inline"""
-        return self._body
-    def get_target_path(self):
-        """get the absolute target path for the artifact"""
-        return self.target_path
-    def get_store_url(self, with_tag=True, project=None):
-        """get the artifact uri (store://..) with optional parameters"""
-        tag = self.tree if with_tag else None
-        uri = generate_artifact_uri(
-            project or self.project, self.db_key, tag, self.iter
-        )
-        return mlrun.datastore.get_store_uri(self._store_prefix, uri)
-    def base_dict(self):
-        """return short dict form of the artifact"""
-        return super().to_dict()
-    def to_dict(self, fields: list = None, exclude: list = None, strip: bool = False):
-        """return long dict form of the artifact"""
-        return super().to_dict(
-            self._dict_fields
-            + ["updated", "labels", "annotations", "producer", "sources", "project"],
-            strip=strip,
-        )
-    @classmethod
-    def from_dict(cls, struct=None, fields=None):
-        fields = fields or cls._dict_fields + [
-            "updated",
-            "labels",
-            "annotations",
-            "producer",
-            "sources",
-            "project",
-        ]
-        return super().from_dict(struct, fields=fields)
-    def upload(self):
-        """internal, upload to target store"""
-        src_path = self.src_path
-        body = self.get_body()
-        if body:
-            self._upload_body(body)
-        else:
-            if src_path and os.path.isfile(src_path):
-                self._upload_file(src_path)
-    def _upload_body(self, body, target=None):
-        if mlrun.mlconf.artifacts.calculate_hash:
-            self.hash = calculate_blob_hash(body)
-        self.size = len(body)
-        mlrun.datastore.store_manager.object(url=target or self.target_path).put(body)
-    def _upload_file(self, src, target=None):
-        if mlrun.mlconf.artifacts.calculate_hash:
-            self.hash = calculate_local_file_hash(src)
-        self.size = os.stat(src).st_size
-        mlrun.datastore.store_manager.object(url=target or self.target_path).upload(src)
-    def artifact_kind(self):
-        return self.kind
-    def generate_target_path(self, artifact_path, producer):
-        return generate_target_path(self, artifact_path, producer)
-# TODO: remove in 1.7.0
-@deprecated(
-    version="1.3.0",
-    reason="'LegacyDirArtifact' will be removed in 1.7.0, use 'DirArtifact' instead",
-    category=FutureWarning,
-)
-class LegacyDirArtifact(LegacyArtifact):
-    _dict_fields = [
-        "key",
-        "kind",
-        "iter",
-        "tree",
-        "src_path",
-        "target_path",
-        "description",
-        "db_key",
-    ]
-    kind = "dir"
-    @property
-    def is_dir(self):
-        return True
-    def upload(self):
-        if not self.src_path:
-            raise ValueError("local/source path not specified")
-        files = os.listdir(self.src_path)
-        for f in files:
-            file_path = os.path.join(self.src_path, f)
-            if not os.path.isfile(file_path):
-                raise ValueError(f"file {file_path} not found, cant upload")
-            target = os.path.join(self.target_path, f)
-            mlrun.datastore.store_manager.object(url=target).upload(file_path)
-# TODO: remove in 1.7.0
-@deprecated(
-    version="1.3.0",
-    reason="'LegacyLinkArtifact' will be removed in 1.7.0, use 'LinkArtifact' instead",
-    category=FutureWarning,
-)
-class LegacyLinkArtifact(LegacyArtifact):
-    _dict_fields = LegacyArtifact._dict_fields + [
-        "link_iteration",
-        "link_key",
-        "link_tree",
-    ]
-    kind = "link"
-    def __init__(
-        self,
-        key=None,
-        target_path="",
-        link_iteration=None,
-        link_key=None,
-        link_tree=None,
-    ):
-        super().__init__(key)
-        self.target_path = target_path
-        self.link_iteration = link_iteration
-        self.link_key = link_key
-        self.link_tree = link_tree
 def calculate_blob_hash(data):
     if isinstance(data, str):
         data = data.encode()
@@ -1056,25 +850,16 @@ def generate_target_path(item: Artifact, artifact_path, producer):
     return f"{artifact_path}{item.key}{suffix}"
+# TODO: left to support data migration from legacy artifacts to new artifacts. Remove in 1.8.0.
 def convert_legacy_artifact_to_new_format(
-    legacy_artifact: typing.Union[LegacyArtifact, dict],
+    legacy_artifact: dict,
 ) -> Artifact:
     """Converts a legacy artifact to a new format.
     :param legacy_artifact: The legacy artifact to convert.
     :return: The converted artifact.
     """
-    if isinstance(legacy_artifact, LegacyArtifact):
-        legacy_artifact_dict = legacy_artifact.to_dict()
-    elif isinstance(legacy_artifact, dict):
-        legacy_artifact_dict = legacy_artifact
-    else:
-        raise TypeError(
-            f"Unsupported type '{type(legacy_artifact)}' for legacy artifact"
-        )
-    artifact_key = legacy_artifact_dict.get("key", "")
-    artifact_tag = legacy_artifact_dict.get("tag", "")
+    artifact_key = legacy_artifact.get("key", "")
+    artifact_tag = legacy_artifact.get("tag", "")
     if artifact_tag:
         artifact_key = f"{artifact_key}:{artifact_tag}"
     # TODO: remove in 1.8.0
@@ -1085,12 +870,12 @@ def convert_legacy_artifact_to_new_format(
     )
     artifact = mlrun.artifacts.artifact_types.get(
-        legacy_artifact_dict.get("kind", "artifact"), mlrun.artifacts.Artifact
+        legacy_artifact.get("kind", "artifact"), mlrun.artifacts.Artifact
     )()
-    artifact.metadata = artifact.metadata.from_dict(legacy_artifact_dict)
-    artifact.spec = artifact.spec.from_dict(legacy_artifact_dict)
-    artifact.status = artifact.status.from_dict(legacy_artifact_dict)
+    artifact.metadata = artifact.metadata.from_dict(legacy_artifact)
+    artifact.spec = artifact.spec.from_dict(legacy_artifact)
+    artifact.status = artifact.status.from_dict(legacy_artifact)
     return artifact

mlrun/artifacts/dataset.py CHANGED Viewed

@@ -13,12 +13,12 @@
 # limitations under the License.
 import os
 import pathlib
+import warnings
 from io import StringIO
 from typing import Optional
 import numpy as np
 import pandas as pd
-from deprecated import deprecated
 from pandas.io.json import build_table_schema
 import mlrun
@@ -27,7 +27,7 @@ import mlrun.datastore
 import mlrun.utils.helpers
 from mlrun.config import config as mlconf
-from .base import Artifact, ArtifactSpec, LegacyArtifact, StorePrefix
+from .base import Artifact, ArtifactSpec, StorePrefix
 default_preview_rows_length = 20
 max_preview_columns = mlconf.artifacts.datasets.max_preview_columns
@@ -161,6 +161,13 @@ class DatasetArtifact(Artifact):
         label_column: str = None,
         **kwargs,
     ):
+        if key or format or target_path:
+            warnings.warn(
+                "Artifact constructor parameters are deprecated and will be removed in 1.9.0. "
+                "Use the metadata and spec parameters instead.",
+                DeprecationWarning,
+            )
         format = (format or "").lower()
         super().__init__(key, None, format=format, target_path=target_path)
         if format and format not in self.SUPPORTED_FORMATS:
@@ -360,194 +367,6 @@ class DatasetArtifact(Artifact):
         self.status.stats = stats
-# TODO: remove in 1.7.0
-@deprecated(
-    version="1.3.0",
-    reason="'LegacyTableArtifact' will be removed in 1.7.0, use 'TableArtifact' instead",
-    category=FutureWarning,
-)
-class LegacyTableArtifact(LegacyArtifact):
-    _dict_fields = LegacyArtifact._dict_fields + ["schema", "header"]
-    kind = "table"
-    def __init__(
-        self,
-        key=None,
-        body=None,
-        df=None,
-        viewer=None,
-        visible=False,
-        inline=False,
-        format=None,
-        header=None,
-        schema=None,
-    ):
-        if key:
-            key_suffix = pathlib.Path(key).suffix
-            if not format and key_suffix:
-                format = key_suffix[1:]
-        super().__init__(key, body, viewer=viewer, is_inline=inline, format=format)
-        if df is not None:
-            self._is_df = True
-            self.header = df.reset_index(drop=True).columns.values.tolist()
-            self.format = "csv"  # todo other formats
-            # if visible and not key_suffix:
-            #     key += '.csv'
-            self._body = df
-        else:
-            self._is_df = False
-            self.header = header
-        self.schema = schema
-        if not viewer:
-            viewer = "table" if visible else None
-        self.viewer = viewer
-    def get_body(self):
-        if not self._is_df:
-            return self._body
-        csv_buffer = StringIO()
-        self._body.to_csv(
-            csv_buffer,
-            encoding="utf-8",
-            **mlrun.utils.line_terminator_kwargs(),
-        )
-        return csv_buffer.getvalue()
-# TODO: remove in 1.7.0
-@deprecated(
-    version="1.3.0",
-    reason="'LegacyDatasetArtifact' will be removed in 1.7.0, use 'DatasetArtifact' instead",
-    category=FutureWarning,
-)
-class LegacyDatasetArtifact(LegacyArtifact):
-    # List of all the supported saving formats of a DataFrame:
-    SUPPORTED_FORMATS = ["csv", "parquet", "pq", "tsdb", "kv"]
-    _dict_fields = LegacyArtifact._dict_fields + [
-        "schema",
-        "header",
-        "length",
-        "preview",
-        "stats",
-        "extra_data",
-        "column_metadata",
-    ]
-    kind = "dataset"
-    def __init__(
-        self,
-        key: str = None,
-        df=None,
-        preview: int = None,
-        format: str = "",  # TODO: should be changed to 'fmt'.
-        stats: bool = None,
-        target_path: str = None,
-        extra_data: dict = None,
-        column_metadata: dict = None,
-        ignore_preview_limits: bool = False,
-        **kwargs,
-    ):
-        format = (format or "").lower()
-        super().__init__(key, None, format=format, target_path=target_path)
-        if format and format not in self.SUPPORTED_FORMATS:
-            raise ValueError(
-                f"unsupported format {format} use one of {'|'.join(self.SUPPORTED_FORMATS)}"
-            )
-        if format == "pq":
-            format = "parquet"
-        self.format = format
-        self.stats = None
-        self.extra_data = extra_data or {}
-        self.column_metadata = column_metadata or {}
-        if df is not None:
-            if hasattr(df, "dask"):
-                # If df is a Dask DataFrame, and it's small in-memory, convert to Pandas
-                if (df.memory_usage(deep=True).sum().compute() / 1e9) < max_ddf_size:
-                    df = df.compute()
-            self.update_preview_fields_from_df(
-                self, df, stats, preview, ignore_preview_limits
-            )
-        self._df = df
-        self._kw = kwargs
-    def upload(self):
-        suffix = pathlib.Path(self.target_path).suffix
-        format = self.format
-        if not format:
-            if suffix and suffix in [".csv", ".parquet", ".pq"]:
-                format = "csv" if suffix == ".csv" else "parquet"
-            else:
-                format = "parquet"
-        if not suffix and not self.target_path.startswith("memory://"):
-            self.target_path = self.target_path + "." + format
-        self.size, self.hash = upload_dataframe(
-            self._df,
-            self.target_path,
-            format=format,
-            src_path=self.src_path,
-            **self._kw,
-        )
-    @property
-    def df(self) -> pd.DataFrame:
-        """
-        Get the dataset in this artifact.
-        :return: The dataset as a DataFrame.
-        """
-        return self._df
-    @staticmethod
-    def is_format_supported(fmt: str) -> bool:
-        """
-        Check whether the given dataset format is supported by the DatasetArtifact.
-        :param fmt: The format string to check.
-        :return: True if the format is supported and False if not.
-        """
-        return fmt in DatasetArtifact.SUPPORTED_FORMATS
-    @staticmethod
-    def update_preview_fields_from_df(
-        artifact, df, stats=None, preview_rows_length=None, ignore_preview_limits=False
-    ):
-        preview_rows_length = preview_rows_length or default_preview_rows_length
-        if hasattr(df, "dask"):
-            artifact.length = df.shape[0].compute()
-            preview_df = df.sample(frac=ddf_sample_pct).compute()
-        else:
-            artifact.length = df.shape[0]
-            preview_df = df
-        if artifact.length > preview_rows_length and not ignore_preview_limits:
-            preview_df = df.head(preview_rows_length)
-        preview_df = preview_df.reset_index()
-        if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
-            preview_df = preview_df.iloc[:, :max_preview_columns]
-        artifact.header = preview_df.columns.values.tolist()
-        artifact.preview = preview_df.values.tolist()
-        # Table schema parsing doesn't require a column named "index"
-        # to align its output with previously generated header and preview data
-        if "index" in preview_df.columns:
-            preview_df.drop("index", axis=1, inplace=True)
-        artifact.schema = build_table_schema(preview_df)
-        if (
-            stats
-            or (artifact.length < max_csv and len(df.columns) < max_preview_columns)
-            or ignore_preview_limits
-        ):
-            artifact.stats = get_df_stats(df)
 def get_df_stats(df):
     if hasattr(df, "dask"):
         df = df.sample(frac=ddf_sample_pct).compute()

mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc5py3-none-any.whl → 1.7.2py3-none-any.whl