PyPI - mlrun - Versions diffs - 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl - Mend

mlrun 1.6.0rc7py3-none-any.whl → 1.6.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show

mlrun/__main__.py +27 -27
mlrun/common/schemas/auth.py +2 -0
mlrun/config.py +2 -2
mlrun/datastore/dbfs_store.py +0 -3
mlrun/datastore/sources.py +12 -2
mlrun/datastore/targets.py +3 -0
mlrun/db/httpdb.py +15 -0
mlrun/feature_store/feature_set.py +5 -2
mlrun/feature_store/retrieval/spark_merger.py +7 -1
mlrun/kfpops.py +1 -1
mlrun/launcher/client.py +1 -6
mlrun/launcher/remote.py +5 -3
mlrun/model.py +1 -1
mlrun/model_monitoring/batch_application.py +48 -85
mlrun/package/packager.py +115 -89
mlrun/package/packagers/default_packager.py +66 -65
mlrun/package/packagers/numpy_packagers.py +109 -62
mlrun/package/packagers/pandas_packagers.py +12 -23
mlrun/package/packagers/python_standard_library_packagers.py +35 -57
mlrun/package/packagers_manager.py +16 -13
mlrun/package/utils/_pickler.py +8 -18
mlrun/package/utils/_supported_format.py +1 -1
mlrun/projects/pipelines.py +11 -6
mlrun/projects/project.py +11 -4
mlrun/runtimes/__init__.py +6 -0
mlrun/runtimes/base.py +8 -0
mlrun/runtimes/daskjob.py +73 -5
mlrun/runtimes/local.py +9 -9
mlrun/runtimes/remotesparkjob.py +1 -0
mlrun/runtimes/utils.py +1 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +2 -2
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +38 -38
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0

mlrun/__main__.py CHANGED Viewed

@@ -303,7 +303,7 @@ def run(
     elif runtime:
         runtime = py_eval(runtime)
         if not isinstance(runtime, dict):
-            print(f"runtime parameter must be a dict, not {type(runtime)}")
+            print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
     else:
         runtime = {}
@@ -317,7 +317,7 @@ def run(
             get_in(runtime, "spec.build.origin_filename", origin_file)
         )
         if kfp:
-            print(f"code:\n{code}\n")
+            print(f"Code:\n{code}\n")
         suffix = pathlib.Path(url_file).suffix if url else ".py"
         # * is a placeholder for the url file when we want to use url args and let mlrun resolve the url file
@@ -340,7 +340,7 @@ def run(
                 url = f"bash {url_file} {url_args}".strip()
             else:
                 print(
-                    "error, command must be specified with '{codefile}' in it "
+                    "Error: command must be specified with '{codefile}' in it "
                     "(to determine the position of the code file)"
                 )
                 exit(1)
@@ -428,7 +428,7 @@ def run(
         if resp and dump:
             print(resp.to_yaml())
     except RunError as err:
-        print(f"runtime error: {err_to_str(err)}")
+        print(f"Runtime error: {err_to_str(err)}")
         exit(1)
@@ -500,7 +500,7 @@ def build(
     if runtime:
         runtime = py_eval(runtime)
         if not isinstance(runtime, dict):
-            print(f"runtime parameter must be a dict, not {type(runtime)}")
+            print(f"Runtime parameter must be a dict, not {type(runtime)}")
             exit(1)
         if kfp:
             print("Runtime:")
@@ -515,7 +515,7 @@ def build(
         func = import_function(func_url)
     else:
-        print("please specify the function path or url")
+        print("Error: Function path or url are required")
         exit(1)
     meta = func.metadata
@@ -532,12 +532,12 @@ def build(
     if source.endswith(".py"):
         if not path.isfile(source):
-            print(f"source file doesnt exist ({source})")
+            print(f"Source file doesnt exist ({source})")
             exit(1)
         with open(source) as fp:
             body = fp.read()
         based = b64encode(body.encode("utf-8")).decode("utf-8")
-        logger.info(f"packing code at {source}")
+        logger.info(f"Packing code at {source}")
         b.functionSourceCode = based
         func.spec.command = ""
     else:
@@ -563,13 +563,13 @@ def build(
         )
     if hasattr(func, "deploy"):
-        logger.info("remote deployment started")
+        logger.info("Remote deployment started")
         try:
             func.deploy(
                 with_mlrun=with_mlrun, watch=not silent, is_kfp=kfp, skip_deployed=skip
             )
         except Exception as err:
-            print(f"deploy error, {err_to_str(err)}")
+            print(f"Deploy error, {err_to_str(err)}")
             exit(1)
         state = func.status.state
@@ -584,9 +584,9 @@ def build(
                 fp.write(full_image)
             print("full image path = ", full_image)
-        print(f"function built, state={state} image={image}")
+        print(f"Function built, state={state} image={image}")
     else:
-        print("function does not have a deploy() method")
+        print("Function does not have a deploy() method")
         exit(1)
@@ -645,7 +645,7 @@ def deploy(
     else:
         runtime = {}
     if not isinstance(runtime, dict):
-        print(f"runtime parameter must be a dict, not {type(runtime)}")
+        print(f"Runtime parameter must be a dict, not {type(runtime)}")
         exit(1)
     if verbose:
@@ -683,7 +683,7 @@ def deploy(
         print(f"deploy error: {err_to_str(err)}")
         exit(1)
-    print(f"function deployed, address={addr}")
+    print(f"Function deployed, address={addr}")
     with open("/tmp/output", "w") as fp:
         fp.write(addr)
     with open("/tmp/name", "w") as fp:
@@ -716,7 +716,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     if db:
         mlconf.dbpath = db
     if not project:
-        print("warning, project parameter was not specified using default !")
+        print("Warning, project parameter was not specified using default !")
     if kind.startswith("po"):
         print("Unsupported, use 'get runtimes' instead")
         return
@@ -794,7 +794,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     elif kind.startswith("workflow"):
         run_db = get_run_db()
         if project == "*":
-            print("warning, reading workflows for all projects may take a long time !")
+            print("Warning, reading workflows for all projects may take a long time !")
             pipelines = run_db.list_pipelines(project=project, page_size=200)
             pipe_runs = pipelines.runs
             while pipelines.next_page_token is not None:
@@ -821,7 +821,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
     else:
         print(
-            "currently only get runs | runtimes | workflows | artifacts  | func [name] | runtime are supported"
+            "Currently only get runs | runtimes | workflows | artifacts  | func [name] | runtime are supported"
         )
@@ -906,7 +906,7 @@ def db(
         )
         pid = child.pid
         print(
-            f"background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
+            f"Background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
             f"`kill {pid}` (linux/mac) or `taskkill /pid {pid} /t /f` (windows), to kill the mlrun service process"
         )
     else:
@@ -924,7 +924,7 @@ def db(
         dotenv.set_key(filename, "MLRUN_MOCK_NUCLIO_DEPLOYMENT", "auto", quote_mode="")
         if pid:
             dotenv.set_key(filename, "MLRUN_SERVICE_PID", str(pid), quote_mode="")
-        print(f"updated configuration in {update_env} .env file")
+        print(f"Updated configuration in {update_env} .env file")
 @main.command()
@@ -952,7 +952,7 @@ def logs(uid, project, offset, db, watch):
             print(text.decode())
     if state:
-        print(f"final state: {state}")
+        print(f"Final state: {state}")
 @main.command()
@@ -1120,7 +1120,7 @@ def project(
         if arguments:
             args = fill_params(arguments)
-        print(f"running workflow {run} file: {workflow_path}")
+        print(f"Running workflow {run} file: {workflow_path}")
         gitops = (
             git_issue
             or environ.get("GITHUB_EVENT_PATH")
@@ -1159,7 +1159,7 @@ def project(
             exit(1)
     elif sync:
-        print("saving project functions to db ..")
+        print("Saving project functions to db ..")
         proj.sync_functions(save=True)
@@ -1296,7 +1296,7 @@ def show_or_set_config(
     if not op or op == "get":
         # print out the configuration (default or based on the specified env/api)
         if env_file and not path.isfile(path.expanduser(env_file)):
-            print(f"error, env file {env_file} does not exist")
+            print(f"Error: Env file {env_file} does not exist")
             exit(1)
         if env_file or api:
             mlrun.set_environment(
@@ -1316,7 +1316,7 @@ def show_or_set_config(
                 f".env file {filename} not found, creating new and setting configuration"
             )
         else:
-            print(f"updating configuration in .env file {filename}")
+            print(f"Updating configuration in .env file {filename}")
         env_dict = {
             "MLRUN_DBPATH": api,
             "MLRUN_ARTIFACT_PATH": artifact_path,
@@ -1332,7 +1332,7 @@ def show_or_set_config(
         if env_file:
             # if its not the default file print the usage details
             print(
-                f"to use the {env_file} .env file add the following to your development environment:\n"
+                f"To use the {env_file} .env file add the following to your development environment:\n"
                 f"MLRUN_ENV_FILE={env_file}"
             )
@@ -1341,11 +1341,11 @@ def show_or_set_config(
         if not path.isfile(filename):
             print(f".env file {filename} not found")
         else:
-            print(f"deleting .env file {filename}")
+            print(f"Deleting .env file {filename}")
             remove(filename)
     else:
-        print(f"Error, unsupported config option {op}")
+        print(f"Error: Unsupported config option {op}")
 def fill_params(params, params_dict=None):

mlrun/common/schemas/auth.py CHANGED Viewed

@@ -59,6 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
     hub_source = "hub-source"
     workflow = "workflow"
     datastore_profile = "datastore-profile"
+    api_gateways = "api-gateways"
     def to_resource_string(
         self,
@@ -94,6 +95,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
             AuthorizationResourceTypes.hub_source: "/marketplace/sources",
             # workflow define how to run a pipeline and can be considered as the specification of a pipeline.
             AuthorizationResourceTypes.workflow: "/projects/{project_name}/workflows/{resource_name}",
+            AuthorizationResourceTypes.api_gateways: "/projects/{project_name}/api-gateways",
         }[self].format(project_name=project_name, resource_name=resource_name)

mlrun/config.py CHANGED Viewed

@@ -1016,9 +1016,9 @@ class Config:
             mock_nuclio = not mlrun.mlconf.is_nuclio_detected()
         return True if mock_nuclio and force_mock is None else force_mock
-    def get_v3io_access_key(self):
+    def get_v3io_access_key(self) -> typing.Optional[str]:
         # Get v3io access key from the environment
-        return os.environ.get("V3IO_ACCESS_KEY")
+        return os.getenv("V3IO_ACCESS_KEY")
     def get_model_monitoring_file_target_path(
         self,

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -82,9 +82,6 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
 class DBFSStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
-        if not endpoint:
-            endpoint = self._get_secret_or_env("DATABRICKS_HOST")
-        self.endpoint = endpoint
         self.get_filesystem(silent=False)
     def get_filesystem(self, silent=True):

mlrun/datastore/sources.py CHANGED Viewed

@@ -177,9 +177,14 @@ class CSVSource(BaseSourceDriver):
             parse_dates.append(time_field)
         data_item = mlrun.store_manager.object(self.path)
+        if self.path.startswith("ds://"):
+            store, path = mlrun.store_manager.get_or_create_store(self.path)
+            path = store.url + path
+        else:
+            path = data_item.url
         return storey.CSVSource(
-            paths=data_item.url,  # unlike self.path, it already has store:// replaced
+            paths=path,  # unlike self.path, it already has store:// replaced
             build_dict=True,
             key_field=self.key_field or key_field,
             storage_options=data_item.store.get_storage_options(),
@@ -323,9 +328,14 @@ class ParquetSource(BaseSourceDriver):
             attributes["context"] = context
         data_item = mlrun.store_manager.object(self.path)
+        if self.path.startswith("ds://"):
+            store, path = mlrun.store_manager.get_or_create_store(self.path)
+            path = store.url + path
+        else:
+            path = data_item.url
         return storey.ParquetSource(
-            paths=data_item.url,  # unlike self.path, it already has store:// replaced
+            paths=path,  # unlike self.path, it already has store:// replaced
             key_field=self.key_field or key_field,
             storage_options=data_item.store.get_storage_options(),
             end_filter=self.end_time,

mlrun/datastore/targets.py CHANGED Viewed

@@ -294,6 +294,8 @@ def add_target_steps(graph, resource, targets, to_df=False, final_step=None):
         driver = get_target_driver(target, resource)
         table = driver.get_table_object() or table
         driver.update_resource_status()
+        if target.after_step:
+            target.attributes["infer_columns_from_data"] = True
         driver.add_writer_step(
             graph,
             target.after_step or final_step,
@@ -615,6 +617,7 @@ class BaseStoreTarget(DataTargetBase):
         driver._resource = resource
         driver.run_id = spec.run_id
+        driver.after_step = spec.after_step
         return driver
     def get_table_object(self):

mlrun/db/httpdb.py CHANGED Viewed

@@ -3143,6 +3143,21 @@ class HTTPRunDB(RunDBInterface):
             body=dict_to_json(authorization_verification_input.dict()),
         )
+    def list_api_gateways(self, project=None):
+        """
+        Returns a list of Nuclio api gateways
+        :param project: optional str parameter to filter by project, if not passed, default Nuclio's value is taken
+        :return: json with the list of Nuclio Api Gateways
+            (json example is here
+            https://github.com/nuclio/nuclio/blob/development/docs/reference/api/README.md#listing-all-api-gateways)
+        """
+        project = project or config.default_project
+        error = "list api gateways"
+        endpoint_path = f"projects/{project}/nuclio/api-gateways"
+        resp = self.api_call("GET", endpoint_path, error)
+        return resp.json()
     def trigger_migrations(self) -> Optional[mlrun.common.schemas.BackgroundTask]:
         """Trigger migrations (will do nothing if no migrations are needed) and wait for them to finish if actually
         triggered

mlrun/feature_store/feature_set.py CHANGED Viewed

@@ -16,6 +16,7 @@ from datetime import datetime
 from typing import Dict, List, Optional, Union
 import pandas as pd
+import pytz
 from storey import EmitEveryEvent, EmitPolicy
 import mlrun
@@ -929,9 +930,11 @@ class FeatureSet(ModelObj):
                 )
             df = self.spec.source.to_dataframe(
                 columns=columns,
+                # overwrite `source.start_time` when the source is schedule.
                 start_time=start_time
-                or pd.Timestamp.min,  # overwrite `source.start_time` when the source is schedule.
-                end_time=end_time or pd.Timestamp.max,
+                or pd.to_datetime(pd.Timestamp.min, unit="ns").replace(tzinfo=pytz.UTC),
+                end_time=end_time
+                or pd.to_datetime(pd.Timestamp.max, unit="ns").replace(tzinfo=pytz.UTC),
                 time_field=time_column,
                 **kwargs,
             )

mlrun/feature_store/retrieval/spark_merger.py CHANGED Viewed

@@ -172,11 +172,17 @@ class SparkFeatureMerger(BaseMerger):
                 # when we upgrade pyspark, we should check whether this workaround is still necessary
                 # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
                 if semver.parse(pd.__version__)["major"] >= 2:
+                    import pyspark.sql.functions as pyspark_functions
                     type_conversion_dict = {}
                     for field in df.schema.fields:
                         if str(field.dataType) == "TimestampType":
                             df = df.withColumn(
-                                field.name, df[field.name].cast("string")
+                                field.name,
+                                pyspark_functions.date_format(
+                                    pyspark_functions.to_timestamp(field.name),
+                                    "yyyy-MM-dd'T'HH:mm:ss.SSS",
+                                ),
                             )
                             type_conversion_dict[field.name] = "datetime64[ns]"
                     df = df.toPandas()

mlrun/kfpops.py CHANGED Viewed

@@ -93,7 +93,7 @@ def write_kfpmeta(struct):
             val = results[key]
         try:
             path = "/".join([KFP_ARTIFACTS_DIR, key])
-            logger.info("writing artifact output", path=path, val=val)
+            logger.info("Writing artifact output", path=path, val=val)
             with open(path, "w") as fp:
                 fp.write(str(val))
         except Exception as exc:

mlrun/launcher/client.py CHANGED Viewed

@@ -52,12 +52,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
         if runtime.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
             return
-        build = runtime.spec.build
-        require_build = (
-            build.commands
-            or build.requirements
-            or (build.source and not build.load_source_on_run)
-        )
+        require_build = runtime.requires_build()
         image = runtime.spec.image
         # we allow users to not set an image, in that case we'll use the default
         if (

mlrun/launcher/remote.py CHANGED Viewed

@@ -90,9 +90,11 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
                 runtime.deploy(skip_deployed=True, show_on_failure=True)
             else:
-                raise mlrun.errors.MLRunRuntimeError(
-                    "Function image is not built/ready, set auto_build=True or use .deploy() method first"
-                )
+                if runtime.requires_build():
+                    logger.warning(
+                        "Function image is not built/ready and function requires build - execution will fail. "
+                        "Need to set auto_build=True or use .deploy() method first"
+                    )
         if runtime.verbose:
             logger.info(f"runspec:\n{run.to_yaml()}")

mlrun/model.py CHANGED Viewed

@@ -1433,7 +1433,7 @@ class RunObject(RunTemplate):
             self.logs(watch=False)
         if raise_on_failure and state != mlrun.runtimes.constants.RunStates.completed:
             raise mlrun.errors.MLRunRuntimeError(
-                f"task {self.metadata.name} did not complete (state={state})"
+                f"Task {self.metadata.name} did not complete (state={state})"
             )
         return state

mlrun 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.0rc7py3-none-any.whl → 1.6.0rc8py3-none-any.whl