PyPI - mlrun - Versions diffs - 1.6.0rc6__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl - Mend

mlrun 1.6.0rc6py3-none-any.whl → 1.6.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (50) hide show

mlrun/__main__.py +32 -31
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/workflow.py +2 -0
mlrun/config.py +3 -3
mlrun/datastore/base.py +9 -3
mlrun/datastore/datastore.py +10 -7
mlrun/datastore/datastore_profile.py +19 -2
mlrun/datastore/dbfs_store.py +6 -6
mlrun/datastore/s3.py +6 -2
mlrun/datastore/sources.py +12 -2
mlrun/datastore/targets.py +43 -20
mlrun/db/httpdb.py +22 -0
mlrun/feature_store/feature_set.py +5 -2
mlrun/feature_store/retrieval/spark_merger.py +7 -1
mlrun/kfpops.py +1 -1
mlrun/launcher/client.py +1 -6
mlrun/launcher/remote.py +5 -3
mlrun/model.py +2 -2
mlrun/model_monitoring/batch_application.py +61 -94
mlrun/package/packager.py +115 -89
mlrun/package/packagers/default_packager.py +66 -65
mlrun/package/packagers/numpy_packagers.py +109 -62
mlrun/package/packagers/pandas_packagers.py +12 -23
mlrun/package/packagers/python_standard_library_packagers.py +35 -57
mlrun/package/packagers_manager.py +16 -13
mlrun/package/utils/_pickler.py +8 -18
mlrun/package/utils/_supported_format.py +1 -1
mlrun/projects/pipelines.py +63 -4
mlrun/projects/project.py +34 -11
mlrun/runtimes/__init__.py +6 -0
mlrun/runtimes/base.py +12 -1
mlrun/runtimes/daskjob.py +73 -5
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -0
mlrun/runtimes/function.py +53 -4
mlrun/runtimes/kubejob.py +1 -1
mlrun/runtimes/local.py +9 -9
mlrun/runtimes/pod.py +1 -1
mlrun/runtimes/remotesparkjob.py +1 -0
mlrun/runtimes/serving.py +11 -1
mlrun/runtimes/sparkjob/spark3job.py +4 -1
mlrun/runtimes/utils.py +1 -46
mlrun/utils/helpers.py +1 -17
mlrun/utils/notifications/notification_pusher.py +27 -6
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +7 -6
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +50 -50
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc6.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -152,7 +152,7 @@ def main():
 @click.option("--schedule", help="cron schedule")
 @click.option("--from-env", is_flag=True, help="read the spec from the env var")
 @click.option("--dump", is_flag=True, help="dump run results as YAML")
-@click.option("--image", default="mlrun/mlrun", help="container image")
+@click.option("--image", default="", help="container image (defaults to mlrun/mlrun)")
 @click.option("--kind", default="", help="serverless runtime kind")
 @click.option("--source", default="", help="source code archive/git")
 @click.option("--local", is_flag=True, help="run the task locally (ignore runtime)")
@@ -289,7 +289,7 @@ def run(
                 exit(1)
         else:
             kind = kind or "job"
-            runtime = {"kind": kind, "spec": {"image": image}}
+            runtime = {"kind": kind, "spec": {"image": image or "mlrun/mlrun"}}
         if kind not in ["", "local", "dask"] and url:
             if url_file and path.isfile(url_file):
@@ -303,7 +303,7 @@ def run(
     elif runtime:
         runtime = py_eval(runtime)
         if not isinstance(runtime, dict):
-            print(f"runtime parameter must be a dict, not {type(runtime)}")
+            print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
     else:
         runtime = {}
@@ -317,7 +317,7 @@ def run(
             get_in(runtime, "spec.build.origin_filename", origin_file)
         )
         if kfp:
-            print(f"code:\n{code}\n")
+            print(f"Code:\n{code}\n")
         suffix = pathlib.Path(url_file).suffix if url else ".py"
         # * is a placeholder for the url file when we want to use url args and let mlrun resolve the url file
@@ -340,7 +340,7 @@ def run(
                 url = f"bash {url_file} {url_args}".strip()
             else:
                 print(
-                    "error, command must be specified with '{codefile}' in it "
+                    "Error: command must be specified with '{codefile}' in it "
                     "(to determine the position of the code file)"
                 )
                 exit(1)
@@ -365,8 +365,9 @@ def run(
     if run_args:
         update_in(runtime, "spec.args", list(run_args))
-    if image:
-        update_in(runtime, "spec.image", image)
+    update_in(runtime, "spec.image", image or "mlrun/mlrun", replace=bool(image))
     set_item(runobj.spec, handler, "handler")
     set_item(runobj.spec, param, "parameters", fill_params(param))
@@ -427,7 +428,7 @@ def run(
         if resp and dump:
             print(resp.to_yaml())
     except RunError as err:
-        print(f"runtime error: {err_to_str(err)}")
+        print(f"Runtime error: {err_to_str(err)}")
         exit(1)
@@ -499,7 +500,7 @@ def build(
     if runtime:
         runtime = py_eval(runtime)
         if not isinstance(runtime, dict):
-            print(f"runtime parameter must be a dict, not {type(runtime)}")
+            print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
         if kfp:
             print("Runtime:")
@@ -514,7 +515,7 @@ def build(
         func = import_function(func_url)
     else:
-        print("please specify the function path or url")
+        print("Error: Function path or url are required")
         exit(1)
     meta = func.metadata
@@ -531,12 +532,12 @@ def build(
     if source.endswith(".py"):
         if not path.isfile(source):
-            print(f"source file doesnt exist ({source})")
+            print(f"Source file doesnt exist ({source})")
             exit(1)
         with open(source) as fp:
             body = fp.read()
         based = b64encode(body.encode("utf-8")).decode("utf-8")
-        logger.info(f"packing code at {source}")
+        logger.info(f"Packing code at {source}")
         b.functionSourceCode = based
         func.spec.command = ""
     else:
@@ -562,13 +563,13 @@ def build(
         )
     if hasattr(func, "deploy"):
-        logger.info("remote deployment started")
+        logger.info("Remote deployment started")
         try:
             func.deploy(
                 with_mlrun=with_mlrun, watch=not silent, is_kfp=kfp, skip_deployed=skip
             )
         except Exception as err:
-            print(f"deploy error, {err_to_str(err)}")
+            print(f"Deploy error, {err_to_str(err)}")
             exit(1)
         state = func.status.state
@@ -583,9 +584,9 @@ def build(
                 fp.write(full_image)
             print("full image path = ", full_image)
-        print(f"function built, state={state} image={image}")
+        print(f"Function built, state={state} image={image}")
     else:
-        print("function does not have a deploy() method")
+        print("Function does not have a deploy() method")
         exit(1)
@@ -644,7 +645,7 @@ def deploy(
     else:
         runtime = {}
     if not isinstance(runtime, dict):
-        print(f"runtime parameter must be a dict, not {type(runtime)}")
+        print(f"Runtime parameter must be a dict, not {type(runtime)}")
         exit(1)
     if verbose:
@@ -682,7 +683,7 @@ def deploy(
         print(f"deploy error: {err_to_str(err)}")
         exit(1)
-    print(f"function deployed, address={addr}")
+    print(f"Function deployed, address={addr}")
     with open("/tmp/output", "w") as fp:
         fp.write(addr)
     with open("/tmp/name", "w") as fp:
@@ -715,7 +716,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     if db:
         mlconf.dbpath = db
     if not project:
-        print("warning, project parameter was not specified using default !")
+        print("Warning, project parameter was not specified using default !")
     if kind.startswith("po"):
         print("Unsupported, use 'get runtimes' instead")
         return
@@ -793,7 +794,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     elif kind.startswith("workflow"):
         run_db = get_run_db()
         if project == "*":
-            print("warning, reading workflows for all projects may take a long time !")
+            print("Warning, reading workflows for all projects may take a long time !")
             pipelines = run_db.list_pipelines(project=project, page_size=200)
             pipe_runs = pipelines.runs
             while pipelines.next_page_token is not None:
@@ -820,7 +821,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     else:
         print(
-            "currently only get runs | runtimes | workflows | artifacts  | func [name] | runtime are supported"
+            "Currently only get runs | runtimes | workflows | artifacts  | func [name] | runtime are supported"
         )
@@ -905,7 +906,7 @@ def db(
         )
         pid = child.pid
         print(
-            f"background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
+            f"Background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
             f"`kill {pid}` (linux/mac) or `taskkill /pid {pid} /t /f` (windows), to kill the mlrun service process"
         )
     else:
@@ -923,7 +924,7 @@ def db(
         dotenv.set_key(filename, "MLRUN_MOCK_NUCLIO_DEPLOYMENT", "auto", quote_mode="")
         if pid:
             dotenv.set_key(filename, "MLRUN_SERVICE_PID", str(pid), quote_mode="")
-        print(f"updated configuration in {update_env} .env file")
+        print(f"Updated configuration in {update_env} .env file")
 @main.command()
@@ -951,7 +952,7 @@ def logs(uid, project, offset, db, watch):
             print(text.decode())
     if state:
-        print(f"final state: {state}")
+        print(f"Final state: {state}")
 @main.command()
@@ -1119,7 +1120,7 @@ def project(
         if arguments:
             args = fill_params(arguments)
-        print(f"running workflow {run} file: {workflow_path}")
+        print(f"Running workflow {run} file: {workflow_path}")
         gitops = (
             git_issue
             or environ.get("GITHUB_EVENT_PATH")
@@ -1158,7 +1159,7 @@ def project(
             exit(1)
     elif sync:
-        print("saving project functions to db ..")
+        print("Saving project functions to db ..")
         proj.sync_functions(save=True)
@@ -1295,7 +1296,7 @@ def show_or_set_config(
     if not op or op == "get":
         # print out the configuration (default or based on the specified env/api)
         if env_file and not path.isfile(path.expanduser(env_file)):
-            print(f"error, env file {env_file} does not exist")
+            print(f"Error: Env file {env_file} does not exist")
             exit(1)
         if env_file or api:
             mlrun.set_environment(
@@ -1315,7 +1316,7 @@ def show_or_set_config(
                 f".env file {filename} not found, creating new and setting configuration"
             )
         else:
-            print(f"updating configuration in .env file {filename}")
+            print(f"Updating configuration in .env file {filename}")
         env_dict = {
             "MLRUN_DBPATH": api,
             "MLRUN_ARTIFACT_PATH": artifact_path,
@@ -1331,7 +1332,7 @@ def show_or_set_config(
         if env_file:
             # if its not the default file print the usage details
             print(
-                f"to use the {env_file} .env file add the following to your development environment:\n"
+                f"To use the {env_file} .env file add the following to your development environment:\n"
                 f"MLRUN_ENV_FILE={env_file}"
             )
@@ -1340,11 +1341,11 @@ def show_or_set_config(
         if not path.isfile(filename):
             print(f".env file {filename} not found")
         else:
-            print(f"deleting .env file {filename}")
+            print(f"Deleting .env file {filename}")
             remove(filename)
     else:
-        print(f"Error, unsupported config option {op}")
+        print(f"Error: Unsupported config option {op}")
 def fill_params(params, params_dict=None):

mlrun/common/schemas/auth.py CHANGED Viewed

@@ -59,6 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
     hub_source = "hub-source"
     workflow = "workflow"
     datastore_profile = "datastore-profile"
+    api_gateways = "api-gateways"
     def to_resource_string(
         self,
@@ -94,6 +95,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
             AuthorizationResourceTypes.hub_source: "/marketplace/sources",
             # workflow define how to run a pipeline and can be considered as the specification of a pipeline.
             AuthorizationResourceTypes.workflow: "/projects/{project_name}/workflows/{resource_name}",
+            AuthorizationResourceTypes.api_gateways: "/projects/{project_name}/api-gateways",
         }[self].format(project_name=project_name, resource_name=resource_name)

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -16,6 +16,7 @@ import typing
 import pydantic
+from .notification import Notification
 from .schedule import ScheduleCronTrigger
@@ -40,6 +41,7 @@ class WorkflowRequest(pydantic.BaseModel):
     source: typing.Optional[str] = None
     run_name: typing.Optional[str] = None
     namespace: typing.Optional[str] = None
+    notifications: typing.Optional[typing.List[Notification]] = None
 class WorkflowResponse(pydantic.BaseModel):

mlrun/config.py CHANGED Viewed

@@ -462,7 +462,7 @@ default_config = {
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.mlrun.svc.cluster.local:8080",
         "batch_processing_function_branch": "master",
         "parquet_batching_max_events": 10_000,
-        "parquet_batching_timeout_secs": timedelta(minutes=30).total_seconds(),
+        "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
         # See mlrun.model_monitoring.stores.ModelEndpointStoreType for available options
         "store_type": "v3io-nosql",
         "endpoint_store_connection": "",
@@ -1016,9 +1016,9 @@ class Config:
             mock_nuclio = not mlrun.mlconf.is_nuclio_detected()
         return True if mock_nuclio and force_mock is None else force_mock
-    def get_v3io_access_key(self):
+    def get_v3io_access_key(self) -> typing.Optional[str]:
         # Get v3io access key from the environment
-        return os.environ.get("V3IO_ACCESS_KEY")
+        return os.getenv("V3IO_ACCESS_KEY")
     def get_model_monitoring_file_target_path(
         self,

mlrun/datastore/base.py CHANGED Viewed

@@ -49,6 +49,8 @@ class FileStats:
 class DataStore:
+    using_bucket = False
     def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
         self._parent = parent
         self.kind = kind
@@ -303,7 +305,9 @@ class DataStore:
                 storage_options = self.get_storage_options()
                 if url.startswith("ds://"):
                     parsed_url = urllib.parse.urlparse(url)
-                    url = parsed_url.path[1:]
+                    url = parsed_url.path
+                    if self.using_bucket:
+                        url = url[1:]
                     # Pass the underlying file system
                     kwargs["filesystem"] = file_system
                 elif storage_options:
@@ -707,7 +711,7 @@ class HttpStore(DataStore):
 # As an example, it converts an S3 URL 's3://s3bucket/path' to just 's3bucket/path'.
 # Since 'ds' schemas are not inherently processed by fsspec, we have adapted the _strip_protocol()
 # method specifically to strip away the 'ds' schema as required.
-def makeDatastoreSchemaSanitizer(cls, *args, **kwargs):
+def makeDatastoreSchemaSanitizer(cls, using_bucket=False, *args, **kwargs):
     if not issubclass(cls, fsspec.AbstractFileSystem):
         raise ValueError("Class must be a subclass of fsspec.AbstractFileSystem")
@@ -716,7 +720,9 @@ def makeDatastoreSchemaSanitizer(cls, *args, **kwargs):
         def _strip_protocol(cls, url):
             if url.startswith("ds://"):
                 parsed_url = urlparse(url)
-                url = parsed_url.path[1:]
+                url = parsed_url.path
+                if using_bucket:
+                    url = url[1:]
             return super()._strip_protocol(url)
     return DatastoreSchemaSanitizer(*args, **kwargs)

mlrun/datastore/datastore.py CHANGED Viewed

@@ -194,18 +194,18 @@ class StoreManager:
         if schema == "ds":
             profile_name = endpoint
-            datastore = TemporaryClientDatastoreProfiles().get(profile_name)
-            if not datastore:
+            datastore_profile = TemporaryClientDatastoreProfiles().get(profile_name)
+            if not datastore_profile:
                 project_name = urlparse(url).username or mlrun.mlconf.default_project
-                datastore = mlrun.db.get_run_db(
+                datastore_profile = mlrun.db.get_run_db(
                     secrets=self._secrets
                 ).get_datastore_profile(profile_name, project_name)
-            if secrets and datastore.secrets():
-                secrets = merge(secrets, datastore.secrets())
+            if secrets and datastore_profile.secrets():
+                secrets = merge(secrets, datastore_profile.secrets())
             else:
-                secrets = secrets or datastore.secrets()
-            url = datastore.url(subpath)
+                secrets = secrets or datastore_profile.secrets()
+            url = datastore_profile.url(subpath)
             schema, endpoint, parsed_url = parse_url(url)
             subpath = parsed_url.path
@@ -233,3 +233,6 @@ class StoreManager:
             self._stores[store_key] = store
         # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url
         return store, url if store.kind == "file" else subpath
+    def reset_secrets(self):
+        self._secrets = {}

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -48,11 +48,9 @@ class DatastoreProfile(pydantic.BaseModel):
         )
         return full_key
-    @classmethod
     def secrets(self) -> dict:
         return None
-    @classmethod
     def url(self, subpath) -> str:
         return None
@@ -204,6 +202,24 @@ class DatastoreProfileRedis(DatastoreProfile):
         return self.endpoint_url + subpath
+class DatastoreProfileDBFS(DatastoreProfile):
+    type: str = pydantic.Field("dbfs")
+    _private_attributes = ("token",)
+    endpoint_url: typing.Optional[str] = None  # host
+    token: typing.Optional[str] = None
+    def url(self, subpath) -> str:
+        return f"dbfs://{subpath}"
+    def secrets(self) -> dict:
+        res = {}
+        if self.token:
+            res["DATABRICKS_TOKEN"] = self.token
+        if self.endpoint_url:
+            res["DATABRICKS_HOST"] = self.endpoint_url
+        return res if res else None
 class DatastoreProfile2Json(pydantic.BaseModel):
     @staticmethod
     def _to_json(attributes):
@@ -260,6 +276,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
             "basic": DatastoreProfileBasic,
             "kafka_target": DatastoreProfileKafkaTarget,
             "kafka_source": DatastoreProfileKafkaSource,
+            "dbfs": DatastoreProfileDBFS,
         }
         if datastore_type in ds_profile_factory:
             return ds_profile_factory[datastore_type].parse_obj(decoded_dict)

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -14,12 +14,11 @@
 import pathlib
-import fsspec
 from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
 import mlrun.errors
-from .base import DataStore, FileStats
+from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
 class DatabricksFileBugFixed(DatabricksFile):
@@ -83,15 +82,16 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
 class DBFSStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
-        if not endpoint:
-            endpoint = self._get_secret_or_env("DATABRICKS_HOST")
-        self.endpoint = endpoint
         self.get_filesystem(silent=False)
     def get_filesystem(self, silent=True):
         """return fsspec file system object, if supported"""
         if not self._filesystem:
-            self._filesystem = fsspec.filesystem("dbfs", **self.get_storage_options())
+            self._filesystem = makeDatastoreSchemaSanitizer(
+                cls=DatabricksFileSystemDisableCache,
+                using_bucket=False,
+                **self.get_storage_options(),
+            )
         return self._filesystem
     def get_storage_options(self):

mlrun/datastore/s3.py CHANGED Viewed

@@ -22,6 +22,8 @@ from .base import DataStore, FileStats, get_range, makeDatastoreSchemaSanitizer
 class S3Store(DataStore):
+    using_bucket = True
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets)
         # will be used in case user asks to assume a role and work through fsspec
@@ -108,7 +110,9 @@ class S3Store(DataStore):
             return None
         self._filesystem = makeDatastoreSchemaSanitizer(
-            s3fs.S3FileSystem, **self.get_storage_options()
+            s3fs.S3FileSystem,
+            using_bucket=self.using_bucket,
+            **self.get_storage_options(),
         )
         return self._filesystem
@@ -173,7 +177,7 @@ class S3Store(DataStore):
         if not key.endswith("/"):
             key += "/"
         # Object names is S3 are not fully following filesystem semantics - they do not start with /, even for
-        # "absolute paths". Therefore, we are are removing leading / from path filter.
+        # "absolute paths". Therefore, we are removing leading / from path filter.
         if key.startswith("/"):
             key = key[1:]
         key_length = len(key)

mlrun/datastore/sources.py CHANGED Viewed

@@ -177,9 +177,14 @@ class CSVSource(BaseSourceDriver):
             parse_dates.append(time_field)
         data_item = mlrun.store_manager.object(self.path)
+        if self.path.startswith("ds://"):
+            store, path = mlrun.store_manager.get_or_create_store(self.path)
+            path = store.url + path
+        else:
+            path = data_item.url
         return storey.CSVSource(
-            paths=data_item.url,  # unlike self.path, it already has store:// replaced
+            paths=path,  # unlike self.path, it already has store:// replaced
             build_dict=True,
             key_field=self.key_field or key_field,
             storage_options=data_item.store.get_storage_options(),
@@ -323,9 +328,14 @@ class ParquetSource(BaseSourceDriver):
             attributes["context"] = context
         data_item = mlrun.store_manager.object(self.path)
+        if self.path.startswith("ds://"):
+            store, path = mlrun.store_manager.get_or_create_store(self.path)
+            path = store.url + path
+        else:
+            path = data_item.url
         return storey.ParquetSource(
-            paths=data_item.url,  # unlike self.path, it already has store:// replaced
+            paths=path,  # unlike self.path, it already has store:// replaced
             key_field=self.key_field or key_field,
             storage_options=data_item.store.get_storage_options(),
             end_filter=self.end_time,

mlrun 1.6.0rc6__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc6py3-none-any.whl → 1.6.0rc8py3-none-any.whl