mlrun 1.6.0rc7__py3-none-any.whl → 1.6.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show
  1. mlrun/__main__.py +27 -27
  2. mlrun/common/schemas/auth.py +2 -0
  3. mlrun/config.py +2 -2
  4. mlrun/datastore/dbfs_store.py +0 -3
  5. mlrun/datastore/sources.py +12 -2
  6. mlrun/datastore/targets.py +3 -0
  7. mlrun/db/httpdb.py +15 -0
  8. mlrun/feature_store/feature_set.py +5 -2
  9. mlrun/feature_store/retrieval/spark_merger.py +7 -1
  10. mlrun/kfpops.py +1 -1
  11. mlrun/launcher/client.py +1 -6
  12. mlrun/launcher/remote.py +5 -3
  13. mlrun/model.py +1 -1
  14. mlrun/model_monitoring/batch_application.py +48 -85
  15. mlrun/package/packager.py +115 -89
  16. mlrun/package/packagers/default_packager.py +66 -65
  17. mlrun/package/packagers/numpy_packagers.py +109 -62
  18. mlrun/package/packagers/pandas_packagers.py +12 -23
  19. mlrun/package/packagers/python_standard_library_packagers.py +35 -57
  20. mlrun/package/packagers_manager.py +16 -13
  21. mlrun/package/utils/_pickler.py +8 -18
  22. mlrun/package/utils/_supported_format.py +1 -1
  23. mlrun/projects/pipelines.py +11 -6
  24. mlrun/projects/project.py +11 -4
  25. mlrun/runtimes/__init__.py +6 -0
  26. mlrun/runtimes/base.py +8 -0
  27. mlrun/runtimes/daskjob.py +73 -5
  28. mlrun/runtimes/local.py +9 -9
  29. mlrun/runtimes/remotesparkjob.py +1 -0
  30. mlrun/runtimes/utils.py +1 -1
  31. mlrun/utils/notifications/notification_pusher.py +1 -1
  32. mlrun/utils/version/version.json +2 -2
  33. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/METADATA +2 -2
  34. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/RECORD +38 -38
  35. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/WHEEL +1 -1
  36. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/LICENSE +0 -0
  37. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/entry_points.txt +0 -0
  38. {mlrun-1.6.0rc7.dist-info → mlrun-1.6.0rc8.dist-info}/top_level.txt +0 -0
mlrun/__main__.py CHANGED
@@ -303,7 +303,7 @@ def run(
303
303
  elif runtime:
304
304
  runtime = py_eval(runtime)
305
305
  if not isinstance(runtime, dict):
306
- print(f"runtime parameter must be a dict, not {type(runtime)}")
306
+ print(f"Runtime parameter must be a dict, not {type(runtime)}")
307
307
  exit(1)
308
308
  else:
309
309
  runtime = {}
@@ -317,7 +317,7 @@ def run(
317
317
  get_in(runtime, "spec.build.origin_filename", origin_file)
318
318
  )
319
319
  if kfp:
320
- print(f"code:\n{code}\n")
320
+ print(f"Code:\n{code}\n")
321
321
  suffix = pathlib.Path(url_file).suffix if url else ".py"
322
322
 
323
323
  # * is a placeholder for the url file when we want to use url args and let mlrun resolve the url file
@@ -340,7 +340,7 @@ def run(
340
340
  url = f"bash {url_file} {url_args}".strip()
341
341
  else:
342
342
  print(
343
- "error, command must be specified with '{codefile}' in it "
343
+ "Error: command must be specified with '{codefile}' in it "
344
344
  "(to determine the position of the code file)"
345
345
  )
346
346
  exit(1)
@@ -428,7 +428,7 @@ def run(
428
428
  if resp and dump:
429
429
  print(resp.to_yaml())
430
430
  except RunError as err:
431
- print(f"runtime error: {err_to_str(err)}")
431
+ print(f"Runtime error: {err_to_str(err)}")
432
432
  exit(1)
433
433
 
434
434
 
@@ -500,7 +500,7 @@ def build(
500
500
  if runtime:
501
501
  runtime = py_eval(runtime)
502
502
  if not isinstance(runtime, dict):
503
- print(f"runtime parameter must be a dict, not {type(runtime)}")
503
+ print(f"Runtime parameter must be a dict, not {type(runtime)}")
504
504
  exit(1)
505
505
  if kfp:
506
506
  print("Runtime:")
@@ -515,7 +515,7 @@ def build(
515
515
  func = import_function(func_url)
516
516
 
517
517
  else:
518
- print("please specify the function path or url")
518
+ print("Error: Function path or url are required")
519
519
  exit(1)
520
520
 
521
521
  meta = func.metadata
@@ -532,12 +532,12 @@ def build(
532
532
 
533
533
  if source.endswith(".py"):
534
534
  if not path.isfile(source):
535
- print(f"source file doesnt exist ({source})")
535
+ print(f"Source file doesnt exist ({source})")
536
536
  exit(1)
537
537
  with open(source) as fp:
538
538
  body = fp.read()
539
539
  based = b64encode(body.encode("utf-8")).decode("utf-8")
540
- logger.info(f"packing code at {source}")
540
+ logger.info(f"Packing code at {source}")
541
541
  b.functionSourceCode = based
542
542
  func.spec.command = ""
543
543
  else:
@@ -563,13 +563,13 @@ def build(
563
563
  )
564
564
 
565
565
  if hasattr(func, "deploy"):
566
- logger.info("remote deployment started")
566
+ logger.info("Remote deployment started")
567
567
  try:
568
568
  func.deploy(
569
569
  with_mlrun=with_mlrun, watch=not silent, is_kfp=kfp, skip_deployed=skip
570
570
  )
571
571
  except Exception as err:
572
- print(f"deploy error, {err_to_str(err)}")
572
+ print(f"Deploy error, {err_to_str(err)}")
573
573
  exit(1)
574
574
 
575
575
  state = func.status.state
@@ -584,9 +584,9 @@ def build(
584
584
  fp.write(full_image)
585
585
  print("full image path = ", full_image)
586
586
 
587
- print(f"function built, state={state} image={image}")
587
+ print(f"Function built, state={state} image={image}")
588
588
  else:
589
- print("function does not have a deploy() method")
589
+ print("Function does not have a deploy() method")
590
590
  exit(1)
591
591
 
592
592
 
@@ -645,7 +645,7 @@ def deploy(
645
645
  else:
646
646
  runtime = {}
647
647
  if not isinstance(runtime, dict):
648
- print(f"runtime parameter must be a dict, not {type(runtime)}")
648
+ print(f"Runtime parameter must be a dict, not {type(runtime)}")
649
649
  exit(1)
650
650
 
651
651
  if verbose:
@@ -683,7 +683,7 @@ def deploy(
683
683
  print(f"deploy error: {err_to_str(err)}")
684
684
  exit(1)
685
685
 
686
- print(f"function deployed, address={addr}")
686
+ print(f"Function deployed, address={addr}")
687
687
  with open("/tmp/output", "w") as fp:
688
688
  fp.write(addr)
689
689
  with open("/tmp/name", "w") as fp:
@@ -716,7 +716,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
716
716
  if db:
717
717
  mlconf.dbpath = db
718
718
  if not project:
719
- print("warning, project parameter was not specified using default !")
719
+ print("Warning, project parameter was not specified using default !")
720
720
  if kind.startswith("po"):
721
721
  print("Unsupported, use 'get runtimes' instead")
722
722
  return
@@ -794,7 +794,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
794
794
  elif kind.startswith("workflow"):
795
795
  run_db = get_run_db()
796
796
  if project == "*":
797
- print("warning, reading workflows for all projects may take a long time !")
797
+ print("Warning, reading workflows for all projects may take a long time !")
798
798
  pipelines = run_db.list_pipelines(project=project, page_size=200)
799
799
  pipe_runs = pipelines.runs
800
800
  while pipelines.next_page_token is not None:
@@ -821,7 +821,7 @@ def get(kind, name, selector, namespace, uid, project, tag, db, extra_args):
821
821
 
822
822
  else:
823
823
  print(
824
- "currently only get runs | runtimes | workflows | artifacts | func [name] | runtime are supported"
824
+ "Currently only get runs | runtimes | workflows | artifacts | func [name] | runtime are supported"
825
825
  )
826
826
 
827
827
 
@@ -906,7 +906,7 @@ def db(
906
906
  )
907
907
  pid = child.pid
908
908
  print(
909
- f"background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
909
+ f"Background pid: {pid}, logs written to mlrun-stdout.log and mlrun-stderr.log, use:\n"
910
910
  f"`kill {pid}` (linux/mac) or `taskkill /pid {pid} /t /f` (windows), to kill the mlrun service process"
911
911
  )
912
912
  else:
@@ -924,7 +924,7 @@ def db(
924
924
  dotenv.set_key(filename, "MLRUN_MOCK_NUCLIO_DEPLOYMENT", "auto", quote_mode="")
925
925
  if pid:
926
926
  dotenv.set_key(filename, "MLRUN_SERVICE_PID", str(pid), quote_mode="")
927
- print(f"updated configuration in {update_env} .env file")
927
+ print(f"Updated configuration in {update_env} .env file")
928
928
 
929
929
 
930
930
  @main.command()
@@ -952,7 +952,7 @@ def logs(uid, project, offset, db, watch):
952
952
  print(text.decode())
953
953
 
954
954
  if state:
955
- print(f"final state: {state}")
955
+ print(f"Final state: {state}")
956
956
 
957
957
 
958
958
  @main.command()
@@ -1120,7 +1120,7 @@ def project(
1120
1120
  if arguments:
1121
1121
  args = fill_params(arguments)
1122
1122
 
1123
- print(f"running workflow {run} file: {workflow_path}")
1123
+ print(f"Running workflow {run} file: {workflow_path}")
1124
1124
  gitops = (
1125
1125
  git_issue
1126
1126
  or environ.get("GITHUB_EVENT_PATH")
@@ -1159,7 +1159,7 @@ def project(
1159
1159
  exit(1)
1160
1160
 
1161
1161
  elif sync:
1162
- print("saving project functions to db ..")
1162
+ print("Saving project functions to db ..")
1163
1163
  proj.sync_functions(save=True)
1164
1164
 
1165
1165
 
@@ -1296,7 +1296,7 @@ def show_or_set_config(
1296
1296
  if not op or op == "get":
1297
1297
  # print out the configuration (default or based on the specified env/api)
1298
1298
  if env_file and not path.isfile(path.expanduser(env_file)):
1299
- print(f"error, env file {env_file} does not exist")
1299
+ print(f"Error: Env file {env_file} does not exist")
1300
1300
  exit(1)
1301
1301
  if env_file or api:
1302
1302
  mlrun.set_environment(
@@ -1316,7 +1316,7 @@ def show_or_set_config(
1316
1316
  f".env file {filename} not found, creating new and setting configuration"
1317
1317
  )
1318
1318
  else:
1319
- print(f"updating configuration in .env file {filename}")
1319
+ print(f"Updating configuration in .env file {filename}")
1320
1320
  env_dict = {
1321
1321
  "MLRUN_DBPATH": api,
1322
1322
  "MLRUN_ARTIFACT_PATH": artifact_path,
@@ -1332,7 +1332,7 @@ def show_or_set_config(
1332
1332
  if env_file:
1333
1333
  # if its not the default file print the usage details
1334
1334
  print(
1335
- f"to use the {env_file} .env file add the following to your development environment:\n"
1335
+ f"To use the {env_file} .env file add the following to your development environment:\n"
1336
1336
  f"MLRUN_ENV_FILE={env_file}"
1337
1337
  )
1338
1338
 
@@ -1341,11 +1341,11 @@ def show_or_set_config(
1341
1341
  if not path.isfile(filename):
1342
1342
  print(f".env file {filename} not found")
1343
1343
  else:
1344
- print(f"deleting .env file {filename}")
1344
+ print(f"Deleting .env file {filename}")
1345
1345
  remove(filename)
1346
1346
 
1347
1347
  else:
1348
- print(f"Error, unsupported config option {op}")
1348
+ print(f"Error: Unsupported config option {op}")
1349
1349
 
1350
1350
 
1351
1351
  def fill_params(params, params_dict=None):
@@ -59,6 +59,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
59
59
  hub_source = "hub-source"
60
60
  workflow = "workflow"
61
61
  datastore_profile = "datastore-profile"
62
+ api_gateways = "api-gateways"
62
63
 
63
64
  def to_resource_string(
64
65
  self,
@@ -94,6 +95,7 @@ class AuthorizationResourceTypes(mlrun.common.types.StrEnum):
94
95
  AuthorizationResourceTypes.hub_source: "/marketplace/sources",
95
96
  # workflow define how to run a pipeline and can be considered as the specification of a pipeline.
96
97
  AuthorizationResourceTypes.workflow: "/projects/{project_name}/workflows/{resource_name}",
98
+ AuthorizationResourceTypes.api_gateways: "/projects/{project_name}/api-gateways",
97
99
  }[self].format(project_name=project_name, resource_name=resource_name)
98
100
 
99
101
 
mlrun/config.py CHANGED
@@ -1016,9 +1016,9 @@ class Config:
1016
1016
  mock_nuclio = not mlrun.mlconf.is_nuclio_detected()
1017
1017
  return True if mock_nuclio and force_mock is None else force_mock
1018
1018
 
1019
- def get_v3io_access_key(self):
1019
+ def get_v3io_access_key(self) -> typing.Optional[str]:
1020
1020
  # Get v3io access key from the environment
1021
- return os.environ.get("V3IO_ACCESS_KEY")
1021
+ return os.getenv("V3IO_ACCESS_KEY")
1022
1022
 
1023
1023
  def get_model_monitoring_file_target_path(
1024
1024
  self,
@@ -82,9 +82,6 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
82
82
  class DBFSStore(DataStore):
83
83
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
84
84
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
85
- if not endpoint:
86
- endpoint = self._get_secret_or_env("DATABRICKS_HOST")
87
- self.endpoint = endpoint
88
85
  self.get_filesystem(silent=False)
89
86
 
90
87
  def get_filesystem(self, silent=True):
@@ -177,9 +177,14 @@ class CSVSource(BaseSourceDriver):
177
177
  parse_dates.append(time_field)
178
178
 
179
179
  data_item = mlrun.store_manager.object(self.path)
180
+ if self.path.startswith("ds://"):
181
+ store, path = mlrun.store_manager.get_or_create_store(self.path)
182
+ path = store.url + path
183
+ else:
184
+ path = data_item.url
180
185
 
181
186
  return storey.CSVSource(
182
- paths=data_item.url, # unlike self.path, it already has store:// replaced
187
+ paths=path, # unlike self.path, it already has store:// replaced
183
188
  build_dict=True,
184
189
  key_field=self.key_field or key_field,
185
190
  storage_options=data_item.store.get_storage_options(),
@@ -323,9 +328,14 @@ class ParquetSource(BaseSourceDriver):
323
328
  attributes["context"] = context
324
329
 
325
330
  data_item = mlrun.store_manager.object(self.path)
331
+ if self.path.startswith("ds://"):
332
+ store, path = mlrun.store_manager.get_or_create_store(self.path)
333
+ path = store.url + path
334
+ else:
335
+ path = data_item.url
326
336
 
327
337
  return storey.ParquetSource(
328
- paths=data_item.url, # unlike self.path, it already has store:// replaced
338
+ paths=path, # unlike self.path, it already has store:// replaced
329
339
  key_field=self.key_field or key_field,
330
340
  storage_options=data_item.store.get_storage_options(),
331
341
  end_filter=self.end_time,
@@ -294,6 +294,8 @@ def add_target_steps(graph, resource, targets, to_df=False, final_step=None):
294
294
  driver = get_target_driver(target, resource)
295
295
  table = driver.get_table_object() or table
296
296
  driver.update_resource_status()
297
+ if target.after_step:
298
+ target.attributes["infer_columns_from_data"] = True
297
299
  driver.add_writer_step(
298
300
  graph,
299
301
  target.after_step or final_step,
@@ -615,6 +617,7 @@ class BaseStoreTarget(DataTargetBase):
615
617
 
616
618
  driver._resource = resource
617
619
  driver.run_id = spec.run_id
620
+ driver.after_step = spec.after_step
618
621
  return driver
619
622
 
620
623
  def get_table_object(self):
mlrun/db/httpdb.py CHANGED
@@ -3143,6 +3143,21 @@ class HTTPRunDB(RunDBInterface):
3143
3143
  body=dict_to_json(authorization_verification_input.dict()),
3144
3144
  )
3145
3145
 
3146
+ def list_api_gateways(self, project=None):
3147
+ """
3148
+ Returns a list of Nuclio api gateways
3149
+ :param project: optional str parameter to filter by project, if not passed, default Nuclio's value is taken
3150
+
3151
+ :return: json with the list of Nuclio Api Gateways
3152
+ (json example is here
3153
+ https://github.com/nuclio/nuclio/blob/development/docs/reference/api/README.md#listing-all-api-gateways)
3154
+ """
3155
+ project = project or config.default_project
3156
+ error = "list api gateways"
3157
+ endpoint_path = f"projects/{project}/nuclio/api-gateways"
3158
+ resp = self.api_call("GET", endpoint_path, error)
3159
+ return resp.json()
3160
+
3146
3161
  def trigger_migrations(self) -> Optional[mlrun.common.schemas.BackgroundTask]:
3147
3162
  """Trigger migrations (will do nothing if no migrations are needed) and wait for them to finish if actually
3148
3163
  triggered
@@ -16,6 +16,7 @@ from datetime import datetime
16
16
  from typing import Dict, List, Optional, Union
17
17
 
18
18
  import pandas as pd
19
+ import pytz
19
20
  from storey import EmitEveryEvent, EmitPolicy
20
21
 
21
22
  import mlrun
@@ -929,9 +930,11 @@ class FeatureSet(ModelObj):
929
930
  )
930
931
  df = self.spec.source.to_dataframe(
931
932
  columns=columns,
933
+ # overwrite `source.start_time` when the source is schedule.
932
934
  start_time=start_time
933
- or pd.Timestamp.min, # overwrite `source.start_time` when the source is schedule.
934
- end_time=end_time or pd.Timestamp.max,
935
+ or pd.to_datetime(pd.Timestamp.min, unit="ns").replace(tzinfo=pytz.UTC),
936
+ end_time=end_time
937
+ or pd.to_datetime(pd.Timestamp.max, unit="ns").replace(tzinfo=pytz.UTC),
935
938
  time_field=time_column,
936
939
  **kwargs,
937
940
  )
@@ -172,11 +172,17 @@ class SparkFeatureMerger(BaseMerger):
172
172
  # when we upgrade pyspark, we should check whether this workaround is still necessary
173
173
  # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
174
174
  if semver.parse(pd.__version__)["major"] >= 2:
175
+ import pyspark.sql.functions as pyspark_functions
176
+
175
177
  type_conversion_dict = {}
176
178
  for field in df.schema.fields:
177
179
  if str(field.dataType) == "TimestampType":
178
180
  df = df.withColumn(
179
- field.name, df[field.name].cast("string")
181
+ field.name,
182
+ pyspark_functions.date_format(
183
+ pyspark_functions.to_timestamp(field.name),
184
+ "yyyy-MM-dd'T'HH:mm:ss.SSS",
185
+ ),
180
186
  )
181
187
  type_conversion_dict[field.name] = "datetime64[ns]"
182
188
  df = df.toPandas()
mlrun/kfpops.py CHANGED
@@ -93,7 +93,7 @@ def write_kfpmeta(struct):
93
93
  val = results[key]
94
94
  try:
95
95
  path = "/".join([KFP_ARTIFACTS_DIR, key])
96
- logger.info("writing artifact output", path=path, val=val)
96
+ logger.info("Writing artifact output", path=path, val=val)
97
97
  with open(path, "w") as fp:
98
98
  fp.write(str(val))
99
99
  except Exception as exc:
mlrun/launcher/client.py CHANGED
@@ -52,12 +52,7 @@ class ClientBaseLauncher(launcher.BaseLauncher, abc.ABC):
52
52
  if runtime.kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
53
53
  return
54
54
 
55
- build = runtime.spec.build
56
- require_build = (
57
- build.commands
58
- or build.requirements
59
- or (build.source and not build.load_source_on_run)
60
- )
55
+ require_build = runtime.requires_build()
61
56
  image = runtime.spec.image
62
57
  # we allow users to not set an image, in that case we'll use the default
63
58
  if (
mlrun/launcher/remote.py CHANGED
@@ -90,9 +90,11 @@ class ClientRemoteLauncher(launcher.ClientBaseLauncher):
90
90
  runtime.deploy(skip_deployed=True, show_on_failure=True)
91
91
 
92
92
  else:
93
- raise mlrun.errors.MLRunRuntimeError(
94
- "Function image is not built/ready, set auto_build=True or use .deploy() method first"
95
- )
93
+ if runtime.requires_build():
94
+ logger.warning(
95
+ "Function image is not built/ready and function requires build - execution will fail. "
96
+ "Need to set auto_build=True or use .deploy() method first"
97
+ )
96
98
 
97
99
  if runtime.verbose:
98
100
  logger.info(f"runspec:\n{run.to_yaml()}")
mlrun/model.py CHANGED
@@ -1433,7 +1433,7 @@ class RunObject(RunTemplate):
1433
1433
  self.logs(watch=False)
1434
1434
  if raise_on_failure and state != mlrun.runtimes.constants.RunStates.completed:
1435
1435
  raise mlrun.errors.MLRunRuntimeError(
1436
- f"task {self.metadata.name} did not complete (state={state})"
1436
+ f"Task {self.metadata.name} did not complete (state={state})"
1437
1437
  )
1438
1438
 
1439
1439
  return state