PyPI - mlrun - Versions diffs - 1.6.0rc13__py3-none-any.whl → 1.6.0rc15__py3-none-any.whl - Mend

mlrun 1.6.0rc13py3-none-any.whl → 1.6.0rc15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (37) hide show

mlrun/__main__.py +7 -2
mlrun/artifacts/__init__.py +7 -1
mlrun/artifacts/base.py +38 -3
mlrun/artifacts/dataset.py +1 -1
mlrun/artifacts/manager.py +5 -5
mlrun/artifacts/model.py +1 -1
mlrun/common/schemas/__init__.py +8 -1
mlrun/common/schemas/artifact.py +36 -1
mlrun/config.py +11 -0
mlrun/datastore/azure_blob.py +37 -79
mlrun/datastore/datastore_profile.py +2 -1
mlrun/datastore/store_resources.py +2 -3
mlrun/datastore/targets.py +3 -3
mlrun/db/base.py +8 -5
mlrun/db/httpdb.py +151 -71
mlrun/db/nopdb.py +6 -3
mlrun/feature_store/feature_vector.py +1 -1
mlrun/feature_store/steps.py +2 -2
mlrun/frameworks/_common/model_handler.py +1 -1
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +0 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +1 -1
mlrun/frameworks/sklearn/metric.py +0 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +1 -2
mlrun/model_monitoring/application.py +20 -27
mlrun/projects/pipelines.py +5 -5
mlrun/projects/project.py +3 -3
mlrun/runtimes/constants.py +10 -0
mlrun/runtimes/local.py +2 -3
mlrun/utils/db.py +6 -5
mlrun/utils/helpers.py +53 -9
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/METADATA +26 -30
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/RECORD +37 -37
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/WHEEL +0 -0
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc13.dist-info → mlrun-1.6.0rc15.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -940,12 +940,17 @@ def version():
 )
 @click.option("--offset", type=int, default=0, help="byte offset")
 @click.option("--db", help="api and db service path/url")
-@click.option("--watch", "-w", is_flag=True, help="watch/follow log")
+@click.option("--watch", "-w", is_flag=True, help="Deprecated. not in use")
 def logs(uid, project, offset, db, watch):
     """Get or watch task logs"""
+    if watch:
+        warnings.warn(
+            "'--watch' is deprecated in 1.6.0, and will be removed in 1.8.0, "
+            # TODO: Remove in 1.8.0
+        )
     mldb = get_run_db(db or mlconf.dbpath)
     if mldb.kind == "http":
-        state, _ = mldb.watch_log(uid, project, watch=watch, offset=offset)
+        state, _ = mldb.watch_log(uid, project, watch=False, offset=offset)
     else:
         state, text = mldb.get_log(uid, project, offset=offset)
         if text:

mlrun/artifacts/__init__.py CHANGED Viewed

@@ -19,6 +19,12 @@ __all__ = ["get_model", "update_model"]
 from .base import Artifact, ArtifactMetadata, ArtifactSpec, get_artifact_meta
 from .dataset import DatasetArtifact, TableArtifact, update_dataset_meta
-from .manager import ArtifactManager, ArtifactProducer, dict_to_artifact
+from .manager import (
+    ArtifactManager,
+    ArtifactProducer,
+    artifact_types,
+    dict_to_artifact,
+    legacy_artifact_types,
+)
 from .model import ModelArtifact, get_model, update_model
 from .plots import BokehArtifact, ChartArtifact, PlotArtifact, PlotlyArtifact

mlrun/artifacts/base.py CHANGED Viewed

@@ -22,6 +22,7 @@ import yaml
 from deprecated import deprecated
 import mlrun
+import mlrun.artifacts
 import mlrun.errors
 from ..datastore import get_store_uri, is_store_uri, store_manager
@@ -312,11 +313,17 @@ class Artifact(ModelObj):
         """get the absolute target path for the artifact"""
         return self.spec.target_path
-    def get_store_url(self, with_tag=True, project=None):
+    def get_store_url(self, with_tag=True, project=None, with_tree=True):
         """get the artifact uri (store://..) with optional parameters"""
-        tag = self.metadata.tree if with_tag else None
+        tag = self.metadata.tag if with_tag else None
+        tree = self.metadata.tree if with_tree else None
         uri = generate_artifact_uri(
-            project or self.metadata.project, self.spec.db_key, tag, self.metadata.iter
+            project or self.metadata.project,
+            self.spec.db_key,
+            iter=self.metadata.iter,
+            tree=tree,
+            tag=tag,
         )
         return get_store_uri(self._store_prefix, uri)
@@ -1034,3 +1041,31 @@ def generate_target_path(item: Artifact, artifact_path, producer):
             suffix = f".{item.format}"
     return f"{artifact_path}{item.key}{suffix}"
+def convert_legacy_artifact_to_new_format(
+    legacy_artifact: typing.Union[LegacyArtifact, dict]
+) -> Artifact:
+    """Converts a legacy artifact to a new format.
+    :param legacy_artifact: The legacy artifact to convert.
+    :return: The converted artifact.
+    """
+    if isinstance(legacy_artifact, LegacyArtifact):
+        legacy_artifact_dict = legacy_artifact.to_dict()
+    elif isinstance(legacy_artifact, dict):
+        legacy_artifact_dict = legacy_artifact
+    else:
+        raise TypeError(
+            f"Unsupported type '{type(legacy_artifact)}' for legacy artifact"
+        )
+    artifact = mlrun.artifacts.artifact_types.get(
+        legacy_artifact_dict.get("kind", "artifact"), mlrun.artifacts.Artifact
+    )()
+    artifact.metadata = artifact.metadata.from_dict(legacy_artifact_dict)
+    artifact.spec = artifact.spec.from_dict(legacy_artifact_dict)
+    artifact.status = artifact.status.from_dict(legacy_artifact_dict)
+    return artifact

mlrun/artifacts/dataset.py CHANGED Viewed

@@ -611,7 +611,7 @@ def update_dataset_meta(
     mlrun.get_run_db().store_artifact(
         artifact_spec.spec.db_key,
         artifact_spec.to_dict(),
-        artifact_spec.metadata.tree,
+        tree=artifact_spec.metadata.tree,
         iter=artifact_spec.metadata.iter,
         project=artifact_spec.metadata.project,
     )

mlrun/artifacts/manager.py CHANGED Viewed

@@ -102,9 +102,9 @@ def dict_to_artifact(struct: dict) -> Artifact:
     kind = struct.get("kind", "")
     if is_legacy_artifact(struct):
-        artifact_class = legacy_artifact_types[kind]
-    else:
-        artifact_class = artifact_types[kind]
+        return mlrun.artifacts.base.convert_legacy_artifact_to_new_format(struct)
+    artifact_class = artifact_types[kind]
     return artifact_class.from_dict(struct)
@@ -295,10 +295,10 @@ class ArtifactManager:
             self.artifact_db.store_artifact(
                 key,
                 item.to_dict(),
-                item.tree,
                 iter=item.iter,
                 tag=tag or item.tag,
                 project=project,
+                tree=item.tree,
             )
     def link_artifact(
@@ -329,7 +329,7 @@ class ArtifactManager:
             self.artifact_db.store_artifact(
                 item.db_key,
                 item.to_dict(),
-                item.tree,
+                tree=item.tree,
                 iter=iter,
                 tag=tag,
                 project=project,

mlrun/artifacts/model.py CHANGED Viewed

@@ -722,7 +722,7 @@ def update_model(
         mlrun.get_run_db().store_artifact(
             model_spec.db_key,
             model_spec.to_dict(),
-            model_spec.tree,
+            tree=model_spec.tree,
             iter=model_spec.iter,
             project=model_spec.project,
         )

mlrun/common/schemas/__init__.py CHANGED Viewed

@@ -14,7 +14,14 @@
 #
 # flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
-from .artifact import ArtifactCategories, ArtifactIdentifier, ArtifactsFormat
+from .artifact import (
+    Artifact,
+    ArtifactCategories,
+    ArtifactIdentifier,
+    ArtifactMetadata,
+    ArtifactsFormat,
+    ArtifactSpec,
+)
 from .auth import (
     AuthInfo,
     AuthorizationAction,

mlrun/common/schemas/artifact.py CHANGED Viewed

@@ -18,6 +18,8 @@ import pydantic
 import mlrun.common.types
+from .object import ObjectStatus
 class ArtifactCategories(mlrun.common.types.StrEnum):
     model = "model"
@@ -51,10 +53,43 @@ class ArtifactIdentifier(pydantic.BaseModel):
     key: typing.Optional[str]
     iter: typing.Optional[int]
     uid: typing.Optional[str]
+    producer_id: typing.Optional[str]
     # TODO support hash once saved as a column in the artifacts table
     # hash: typing.Optional[str]
 class ArtifactsFormat(mlrun.common.types.StrEnum):
+    # TODO: add a format that returns a minimal response
     full = "full"
-    legacy = "legacy"
+class ArtifactMetadata(pydantic.BaseModel):
+    key: str
+    project: str
+    iter: typing.Optional[int]
+    tree: typing.Optional[str]
+    tag: typing.Optional[str]
+    class Config:
+        extra = pydantic.Extra.allow
+class ArtifactSpec(pydantic.BaseModel):
+    src_path: typing.Optional[str]
+    target_path: typing.Optional[str]
+    viewer: typing.Optional[str]
+    inline: typing.Optional[str]
+    size: typing.Optional[int]
+    db_key: typing.Optional[str]
+    extra_data: typing.Optional[typing.Dict[str, typing.Any]]
+    unpackaging_instructions: typing.Optional[typing.Dict[str, typing.Any]]
+    class Config:
+        extra = pydantic.Extra.allow
+class Artifact(pydantic.BaseModel):
+    kind: str
+    metadata: ArtifactMetadata
+    spec: ArtifactSpec
+    status: ObjectStatus

mlrun/config.py CHANGED Viewed

@@ -105,6 +105,12 @@ default_config = {
             "list_runs_time_period_in_days": 7,  # days
         }
     },
+    "crud": {
+        "runs": {
+            # deleting runs is a heavy operation that includes deleting runtime resources, therefore we do it in chunks
+            "batch_delete_runs_chunk_size": 10,
+        }
+    },
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
     "runtime_resources_deletion_grace_period": "14400",
@@ -121,6 +127,10 @@ default_config = {
         # But if both the server and the client set some value, we want the client to take precedence over the server.
         # By setting the default to None we are able to differentiate between the two cases.
         "generate_target_path_from_artifact_hash": None,
+        # migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
+        # migration progress.
+        "artifact_migration_batch_size": 200,
+        "artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
     },
     # FIXME: Adding these defaults here so we won't need to patch the "installing component" (provazio-controller) to
     #  configure this values on field systems, for newer system this will be configured correctly
@@ -358,6 +368,7 @@ default_config = {
             # this is the default interval period for pulling logs, if not specified different timeout interval
             "pull_logs_default_interval": 3,  # seconds
             "pull_logs_backoff_no_logs_default_interval": 10,  # seconds
+            "pull_logs_default_size_limit": 1024 * 1024,  # 1 MB
         },
         "authorization": {
             "mode": "none",  # one of none, opa

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -15,7 +15,6 @@
 import time
 from pathlib import Path
-from azure.storage.blob import BlobServiceClient
 from fsspec.registry import get_filesystem_class
 import mlrun.errors
@@ -32,13 +31,7 @@ class AzureBlobStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
-        self.bsc = None
-        con_string = self._get_secret_or_env("AZURE_STORAGE_CONNECTION_STRING")
-        if con_string:
-            self.bsc = BlobServiceClient.from_connection_string(con_string)
-        else:
-            self.get_filesystem()
+        self.get_filesystem()
     def get_filesystem(self, silent=True):
         """return fsspec file system object, if supported"""
@@ -86,89 +79,54 @@ class AzureBlobStore(DataStore):
         return path
     def upload(self, key, src_path):
-        if self.bsc:
-            # Need to strip leading / from key
-            with self.bsc.get_blob_client(
-                container=self.endpoint, blob=key[1:]
-            ) as blob_client:
-                with open(src_path, "rb") as data:
-                    blob_client.upload_blob(data, overwrite=True)
-        else:
-            remote_path = self._convert_key_to_remote_path(key)
-            self._filesystem.put_file(src_path, remote_path, overwrite=True)
+        remote_path = self._convert_key_to_remote_path(key)
+        self._filesystem.put_file(src_path, remote_path, overwrite=True)
     def get(self, key, size=None, offset=0):
-        if self.bsc:
-            with self.bsc.get_blob_client(
-                container=self.endpoint, blob=key[1:]
-            ) as blob_client:
-                size = size if size else None
-                blob = blob_client.download_blob(offset, size).readall()
-                return blob
-        else:
-            remote_path = self._convert_key_to_remote_path(key)
-            end = offset + size if size else None
-            blob = self._filesystem.cat_file(remote_path, start=offset, end=end)
-            return blob
+        remote_path = self._convert_key_to_remote_path(key)
+        end = offset + size if size else None
+        blob = self._filesystem.cat_file(remote_path, start=offset, end=end)
+        return blob
     def put(self, key, data, append=False):
         if append:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Append mode not supported for Azure blob datastore"
             )
-        if self.bsc:
-            with self.bsc.get_blob_client(
-                container=self.endpoint, blob=key[1:]
-            ) as blob_client:
-                # Note that append=True is not supported. If the blob already exists, this call will fail
-                blob_client.upload_blob(data, overwrite=True)
+        remote_path = self._convert_key_to_remote_path(key)
+        if isinstance(data, bytes):
+            mode = "wb"
+        elif isinstance(data, str):
+            mode = "w"
         else:
-            remote_path = self._convert_key_to_remote_path(key)
-            if isinstance(data, bytes):
-                mode = "wb"
-            elif isinstance(data, str):
-                mode = "w"
-            else:
-                raise TypeError("Data type unknown.  Unable to put in Azure!")
-            with self._filesystem.open(remote_path, mode) as f:
-                f.write(data)
+            raise TypeError("Data type unknown.  Unable to put in Azure!")
+        with self._filesystem.open(remote_path, mode) as f:
+            f.write(data)
     def stat(self, key):
-        if self.bsc:
-            with self.bsc.get_blob_client(
-                container=self.endpoint, blob=key[1:]
-            ) as blob_client:
-                props = blob_client.get_blob_properties()
-            size = props.size
-            modified = props.last_modified
+        remote_path = self._convert_key_to_remote_path(key)
+        files = self._filesystem.ls(remote_path, detail=True)
+        if len(files) == 1 and files[0]["type"] == "file":
+            size = files[0]["size"]
+            modified = files[0]["last_modified"]
+        elif len(files) == 1 and files[0]["type"] == "directory":
+            raise FileNotFoundError("Operation expects a file not a directory!")
         else:
-            remote_path = self._convert_key_to_remote_path(key)
-            files = self._filesystem.ls(remote_path, detail=True)
-            if len(files) == 1 and files[0]["type"] == "file":
-                size = files[0]["size"]
-                modified = files[0]["last_modified"]
-            elif len(files) == 1 and files[0]["type"] == "directory":
-                raise FileNotFoundError("Operation expects a file not a directory!")
-            else:
-                raise ValueError("Operation expects to receive a single file!")
+            raise ValueError("Operation expects to receive a single file!")
         return FileStats(size, time.mktime(modified.timetuple()))
     def listdir(self, key):
-        if self.bsc:
-            if key and not key.endswith("/"):
-                key = key[1:] + "/"
-            key_length = len(key)
-            with self.bsc.get_container_client(self.endpoint) as container_client:
-                blob_list = container_client.list_blobs(name_starts_with=key)
-            return [blob.name[key_length:] for blob in blob_list]
-        else:
-            remote_path = self._convert_key_to_remote_path(key)
-            if self._filesystem.isfile(remote_path):
-                return key
-            remote_path = f"{remote_path}/**"
-            files = self._filesystem.glob(remote_path)
-            key_length = len(key)
-            files = [
-                f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
-            ]
-            return files
+        remote_path = self._convert_key_to_remote_path(key)
+        if self._filesystem.isfile(remote_path):
+            return key
+        remote_path = f"{remote_path}/**"
+        files = self._filesystem.glob(remote_path)
+        key_length = len(key)
+        files = [
+            f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
+        ]
+        return files
+    def rm(self, path, recursive=False, maxdepth=None):
+        path = self._convert_key_to_remote_path(key=path)
+        super().rm(path=path, recursive=recursive, maxdepth=maxdepth)

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -302,7 +302,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
             {
                 k: v
                 for k, v in profile.dict().items()
-                if not str(k) in profile._private_attributes
+                if str(k) not in profile._private_attributes
             }
         )
@@ -344,6 +344,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
             "kafka_source": DatastoreProfileKafkaSource,
             "dbfs": DatastoreProfileDBFS,
             "gcs": DatastoreProfileGCS,
+            "az": DatastoreProfileAzureBlob,
         }
         if datastore_type in ds_profile_factory:
             return ds_profile_factory[datastore_type].parse_obj(decoded_dict)

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -158,12 +158,11 @@ def get_store_resource(
         return db.get_feature_vector(name, project, tag, uid)
     elif StorePrefix.is_artifact(kind):
-        project, key, iteration, tag, uid = parse_artifact_uri(
+        project, key, iteration, tag, tree = parse_artifact_uri(
             uri, project or config.default_project
         )
         resource = db.read_artifact(
-            key, project=project, tag=tag or uid, iter=iteration
+            key, project=project, tag=tag, iter=iteration, tree=tree
         )
         if resource.get("kind", "") == "link":
             # todo: support other link types (not just iter, move this to the db/api layer

mlrun/datastore/targets.py CHANGED Viewed

@@ -96,7 +96,7 @@ def get_default_targets(offline_only=False):
 def update_targets_run_id_for_ingest(overwrite, targets, targets_in_status):
     run_id = generate_target_run_id()
     for target in targets:
-        if overwrite or not (target.name in targets_in_status.keys()):
+        if overwrite or target.name not in targets_in_status.keys():
             target.run_id = run_id
         else:
             target.run_id = targets_in_status[target.name].run_id
@@ -192,7 +192,7 @@ def validate_target_list(targets):
     if not targets:
         return
-    targets_by_kind_name = [kind for kind in targets if type(kind) is str]
+    targets_by_kind_name = [kind for kind in targets if isinstance(kind, str)]
     no_name_target_types_count = Counter(
         [
             target.kind
@@ -898,7 +898,7 @@ class ParquetTarget(BaseStoreTarget):
         def delete_update_last_written(*arg, **kargs):
             result = original_to_dict(*arg, **kargs)
-            del result["class_args"]["update_last_written"]
+            result["class_args"].pop("update_last_written", None)
             return result
         # update_last_written is not serializable (ML-5108)

mlrun/db/base.py CHANGED Viewed

@@ -91,11 +91,13 @@ class RunDBInterface(ABC):
         pass
     @abstractmethod
-    def store_artifact(self, key, artifact, uid, iter=None, tag="", project=""):
+    def store_artifact(
+        self, key, artifact, uid=None, iter=None, tag="", project="", tree=None
+    ):
         pass
     @abstractmethod
-    def read_artifact(self, key, tag="", iter=None, project=""):
+    def read_artifact(self, key, tag="", iter=None, project="", tree=None, uid=None):
         pass
     @abstractmethod
@@ -111,11 +113,12 @@ class RunDBInterface(ABC):
         best_iteration: bool = False,
         kind: str = None,
         category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
+        tree: str = None,
     ):
         pass
     @abstractmethod
-    def del_artifact(self, key, tag="", project=""):
+    def del_artifact(self, key, tag="", project="", tree=None, uid=None):
         pass
     @abstractmethod
@@ -203,8 +206,8 @@ class RunDBInterface(ABC):
                     key=mlrun.utils.get_in_artifact(artifact_obj, "key"),
                     # we are passing tree as uid when storing an artifact, so if uid is not defined,
                     # pass the tree as uid
-                    uid=mlrun.utils.get_in_artifact(artifact_obj, "uid")
-                    or mlrun.utils.get_in_artifact(artifact_obj, "tree"),
+                    uid=mlrun.utils.get_in_artifact(artifact_obj, "uid"),
+                    producer_id=mlrun.utils.get_in_artifact(artifact_obj, "tree"),
                     kind=mlrun.utils.get_in_artifact(artifact_obj, "kind"),
                     iter=mlrun.utils.get_in_artifact(artifact_obj, "iter"),
                 )

mlrun 1.6.0rc13__py3-none-any.whl → 1.6.0rc15__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc13py3-none-any.whl → 1.6.0rc15py3-none-any.whl