mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (42) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/schemas/alert.py +3 -0
  3. mlrun/common/schemas/notification.py +1 -0
  4. mlrun/datastore/alibaba_oss.py +2 -2
  5. mlrun/datastore/azure_blob.py +6 -3
  6. mlrun/datastore/base.py +1 -1
  7. mlrun/datastore/dbfs_store.py +2 -2
  8. mlrun/datastore/google_cloud_storage.py +83 -20
  9. mlrun/datastore/s3.py +2 -2
  10. mlrun/datastore/sources.py +54 -0
  11. mlrun/datastore/targets.py +9 -53
  12. mlrun/db/httpdb.py +6 -1
  13. mlrun/errors.py +8 -0
  14. mlrun/execution.py +7 -0
  15. mlrun/feature_store/api.py +5 -0
  16. mlrun/feature_store/retrieval/job.py +1 -0
  17. mlrun/model.py +24 -3
  18. mlrun/model_monitoring/api.py +9 -0
  19. mlrun/model_monitoring/applications/_application_steps.py +36 -0
  20. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  21. mlrun/model_monitoring/controller.py +15 -11
  22. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +5 -5
  23. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  24. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +35 -7
  25. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +3 -1
  26. mlrun/model_monitoring/helpers.py +16 -17
  27. mlrun/model_monitoring/stream_processing.py +2 -3
  28. mlrun/projects/pipelines.py +19 -30
  29. mlrun/projects/project.py +69 -51
  30. mlrun/run.py +8 -6
  31. mlrun/runtimes/__init__.py +4 -0
  32. mlrun/runtimes/nuclio/api_gateway.py +9 -0
  33. mlrun/runtimes/nuclio/application/application.py +112 -54
  34. mlrun/runtimes/nuclio/function.py +1 -1
  35. mlrun/utils/helpers.py +33 -2
  36. mlrun/utils/version/version.json +2 -2
  37. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/METADATA +8 -11
  38. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/RECORD +42 -42
  39. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/WHEEL +1 -1
  40. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/LICENSE +0 -0
  41. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/entry_points.txt +0 -0
  42. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc40.dist-info}/top_level.txt +0 -0
@@ -447,7 +447,6 @@ class _PipelineRunner(abc.ABC):
447
447
  namespace=None,
448
448
  source=None,
449
449
  notifications: list[mlrun.model.Notification] = None,
450
- send_start_notification: bool = True,
451
450
  ) -> _PipelineRunStatus:
452
451
  pass
453
452
 
@@ -567,7 +566,6 @@ class _KFPRunner(_PipelineRunner):
567
566
  namespace=None,
568
567
  source=None,
569
568
  notifications: list[mlrun.model.Notification] = None,
570
- send_start_notification: bool = True,
571
569
  ) -> _PipelineRunStatus:
572
570
  pipeline_context.set(project, workflow_spec)
573
571
  workflow_handler = _PipelineRunner._get_handler(
@@ -585,7 +583,8 @@ class _KFPRunner(_PipelineRunner):
585
583
  "Notifications will only be sent if you wait for pipeline completion. "
586
584
  "To use the new notification behavior, use the remote pipeline runner."
587
585
  )
588
- for notification in notifications:
586
+ # for start message, fallback to old notification behavior
587
+ for notification in notifications or []:
589
588
  project.notifiers.add_notification(
590
589
  notification.kind, notification.params
591
590
  )
@@ -616,13 +615,12 @@ class _KFPRunner(_PipelineRunner):
616
615
  func_name=func.metadata.name,
617
616
  exc_info=err_to_str(exc),
618
617
  )
619
- if send_start_notification:
620
- project.notifiers.push_pipeline_start_message(
621
- project.metadata.name,
622
- project.get_param("commit_id", None),
623
- run_id,
624
- True,
625
- )
618
+ project.notifiers.push_pipeline_start_message(
619
+ project.metadata.name,
620
+ project.get_param("commit_id", None),
621
+ run_id,
622
+ True,
623
+ )
626
624
  pipeline_context.clear()
627
625
  return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
628
626
 
@@ -670,7 +668,6 @@ class _LocalRunner(_PipelineRunner):
670
668
  namespace=None,
671
669
  source=None,
672
670
  notifications: list[mlrun.model.Notification] = None,
673
- send_start_notification: bool = True,
674
671
  ) -> _PipelineRunStatus:
675
672
  pipeline_context.set(project, workflow_spec)
676
673
  workflow_handler = _PipelineRunner._get_handler(
@@ -692,10 +689,9 @@ class _LocalRunner(_PipelineRunner):
692
689
  project.set_source(source=source)
693
690
  pipeline_context.workflow_artifact_path = artifact_path
694
691
 
695
- if send_start_notification:
696
- project.notifiers.push_pipeline_start_message(
697
- project.metadata.name, pipeline_id=workflow_id
698
- )
692
+ project.notifiers.push_pipeline_start_message(
693
+ project.metadata.name, pipeline_id=workflow_id
694
+ )
699
695
  err = None
700
696
  try:
701
697
  workflow_handler(**workflow_spec.args)
@@ -755,22 +751,10 @@ class _RemoteRunner(_PipelineRunner):
755
751
  namespace: str = None,
756
752
  source: str = None,
757
753
  notifications: list[mlrun.model.Notification] = None,
758
- send_start_notification: bool = True,
759
754
  ) -> typing.Optional[_PipelineRunStatus]:
760
755
  workflow_name = normalize_workflow_name(name=name, project_name=project.name)
761
756
  workflow_id = None
762
757
 
763
- # for start message, fallback to old notification behavior
764
- if send_start_notification:
765
- for notification in notifications or []:
766
- project.notifiers.add_notification(
767
- notification.kind, notification.params
768
- )
769
- # if a notification with `when=running` is provided, it will be used explicitly and others
770
- # will be ignored
771
- if "running" in notification.when:
772
- break
773
-
774
758
  # The returned engine for this runner is the engine of the workflow.
775
759
  # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
776
760
  inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -870,9 +854,6 @@ class _RemoteRunner(_PipelineRunner):
870
854
  state = mlrun_pipelines.common.models.RunStatuses.failed
871
855
  else:
872
856
  state = mlrun_pipelines.common.models.RunStatuses.running
873
- project.notifiers.push_pipeline_start_message(
874
- project.metadata.name,
875
- )
876
857
  pipeline_context.clear()
877
858
  return _PipelineRunStatus(
878
859
  run_id=workflow_id,
@@ -1078,6 +1059,13 @@ def load_and_run(
1078
1059
  if load_only:
1079
1060
  return
1080
1061
 
1062
+ # extract "start" notification if exists
1063
+ start_notifications = [
1064
+ notification
1065
+ for notification in context.get_notifications()
1066
+ if "running" in notification.when
1067
+ ]
1068
+
1081
1069
  workflow_log_message = workflow_name or workflow_path
1082
1070
  context.logger.info(f"Running workflow {workflow_log_message} from remote")
1083
1071
  run = project.run(
@@ -1093,6 +1081,7 @@ def load_and_run(
1093
1081
  cleanup_ttl=cleanup_ttl,
1094
1082
  engine=engine,
1095
1083
  local=local,
1084
+ notifications=start_notifications,
1096
1085
  )
1097
1086
  context.log_result(key="workflow_id", value=run.run_id)
1098
1087
  context.log_result(key="engine", value=run._engine.engine, commit=True)
mlrun/projects/project.py CHANGED
@@ -517,17 +517,24 @@ def get_or_create_project(
517
517
  parameters=parameters,
518
518
  allow_cross_project=allow_cross_project,
519
519
  )
520
- logger.info("Project loaded successfully", project_name=name)
520
+ logger.info("Project loaded successfully", project_name=project.name)
521
521
  return project
522
522
  except mlrun.errors.MLRunNotFoundError:
523
- logger.debug("Project not found in db", project_name=name)
523
+ logger.debug(
524
+ "Project not found in db", project_name=name, user_project=user_project
525
+ )
524
526
 
525
527
  spec_path = path.join(context, subpath or "", "project.yaml")
526
528
  load_from_path = url or path.isfile(spec_path)
527
529
  # do not nest under "try" or else the exceptions raised below will be logged along with the "not found" message
528
530
  if load_from_path:
529
531
  # loads a project from archive or local project.yaml
530
- logger.info("Loading project from path", project_name=name, path=url or context)
532
+ logger.info(
533
+ "Loading project from path",
534
+ project_name=name,
535
+ user_project=user_project,
536
+ path=url or context,
537
+ )
531
538
  project = load_project(
532
539
  context,
533
540
  url,
@@ -544,7 +551,7 @@ def get_or_create_project(
544
551
 
545
552
  logger.info(
546
553
  "Project loaded successfully",
547
- project_name=name,
554
+ project_name=project.name,
548
555
  path=url or context,
549
556
  stored_in_db=save,
550
557
  )
@@ -562,7 +569,9 @@ def get_or_create_project(
562
569
  save=save,
563
570
  parameters=parameters,
564
571
  )
565
- logger.info("Project created successfully", project_name=name, stored_in_db=save)
572
+ logger.info(
573
+ "Project created successfully", project_name=project.name, stored_in_db=save
574
+ )
566
575
  return project
567
576
 
568
577
 
@@ -2394,7 +2403,11 @@ class MlrunProject(ModelObj):
2394
2403
  requirements: typing.Union[str, list[str]] = None,
2395
2404
  requirements_file: str = "",
2396
2405
  ) -> tuple[str, str, mlrun.runtimes.BaseRuntime, dict]:
2397
- if func is None and not _has_module(handler, kind):
2406
+ if (
2407
+ func is None
2408
+ and not _has_module(handler, kind)
2409
+ and mlrun.runtimes.RuntimeKinds.supports_from_notebook(kind)
2410
+ ):
2398
2411
  # if function path is not provided and it is not a module (no ".")
2399
2412
  # use the current notebook as default
2400
2413
  if is_ipython:
@@ -2973,7 +2986,6 @@ class MlrunProject(ModelObj):
2973
2986
  source: str = None,
2974
2987
  cleanup_ttl: int = None,
2975
2988
  notifications: list[mlrun.model.Notification] = None,
2976
- send_start_notification: bool = True,
2977
2989
  ) -> _PipelineRunStatus:
2978
2990
  """Run a workflow using kubeflow pipelines
2979
2991
 
@@ -3010,8 +3022,6 @@ class MlrunProject(ModelObj):
3010
3022
  workflow and all its resources are deleted)
3011
3023
  :param notifications:
3012
3024
  List of notifications to send for workflow completion
3013
- :param send_start_notification:
3014
- Send a notification when the workflow starts
3015
3025
 
3016
3026
  :returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
3017
3027
  """
@@ -3089,7 +3099,6 @@ class MlrunProject(ModelObj):
3089
3099
  namespace=namespace,
3090
3100
  source=source,
3091
3101
  notifications=notifications,
3092
- send_start_notification=send_start_notification,
3093
3102
  )
3094
3103
  # run is None when scheduling
3095
3104
  if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
@@ -3222,30 +3231,30 @@ class MlrunProject(ModelObj):
3222
3231
  infrastructure functions. Important to note that you have to set the credentials before deploying any
3223
3232
  model monitoring or serving function.
3224
3233
 
3225
- :param access_key: Model Monitoring access key for managing user permissions.
3226
- :param endpoint_store_connection: Endpoint store connection string. By default, None.
3227
- Options:
3228
- 1. None, will be set from the system configuration.
3229
- 2. v3io - for v3io endpoint store,
3230
- pass `v3io` and the system will generate the exact path.
3231
- 3. MySQL/SQLite - for SQL endpoint store, please provide full
3232
- connection string, for example
3233
- mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>
3234
- :param stream_path: Path to the model monitoring stream. By default, None.
3235
- Options:
3236
- 1. None, will be set from the system configuration.
3237
- 2. v3io - for v3io stream,
3238
- pass `v3io` and the system will generate the exact path.
3239
- 3. Kafka - for Kafka stream, please provide full connection string without
3240
- custom topic, for example kafka://<some_kafka_broker>:<port>.
3234
+ :param access_key: Model monitoring access key for managing user permissions.
3235
+ :param endpoint_store_connection: Endpoint store connection string. By default, None. Options:
3236
+
3237
+ * None - will be set from the system configuration.
3238
+ * v3io - for v3io endpoint store, pass `v3io` and the system will generate the
3239
+ exact path.
3240
+ * MySQL/SQLite - for SQL endpoint store, provide the full connection string,
3241
+ for example: mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>
3242
+ :param stream_path: Path to the model monitoring stream. By default, None. Options:
3243
+
3244
+ * None - will be set from the system configuration.
3245
+ * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3246
+ path.
3247
+ * Kafka - for Kafka stream, provide the full connection string without custom
3248
+ topic, for example kafka://<some_kafka_broker>:<port>.
3241
3249
  :param tsdb_connection: Connection string to the time series database. By default, None.
3242
3250
  Options:
3243
- 1. None, will be set from the system configuration.
3244
- 2. v3io - for v3io stream,
3245
- pass `v3io` and the system will generate the exact path.
3246
- 3. TDEngine - for TDEngine tsdb, please provide full websocket connection URL,
3247
- for example taosws://<username>:<password>@<host>:<port>.
3248
- :param replace_creds: If True, will override the existing credentials.
3251
+
3252
+ * None - will be set from the system configuration.
3253
+ * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3254
+ path.
3255
+ * TDEngine - for TDEngine tsdb, provide the full websocket connection URL,
3256
+ for example taosws://<username>:<password>@<host>:<port>.
3257
+ :param replace_creds: If True, will override the existing credentials.
3249
3258
  Please keep in mind that if you already enabled model monitoring on
3250
3259
  your project this action can cause data loose and will require redeploying
3251
3260
  all model monitoring functions & model monitoring infra
@@ -3351,7 +3360,8 @@ class MlrunProject(ModelObj):
3351
3360
  * A dictionary of configurations to use when logging. Further info per object type and
3352
3361
  artifact type can be given there. The artifact key must appear in the dictionary as
3353
3362
  "key": "the_key".
3354
- :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
3363
+ :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN":
3364
+ token}
3355
3365
  :param reset_on_run: When True, function python modules would reload prior to code execution.
3356
3366
  This ensures latest code changes are executed. This argument must be used in
3357
3367
  conjunction with the local=True argument.
@@ -4061,7 +4071,7 @@ class MlrunProject(ModelObj):
4061
4071
  mlrun.db.get_run_db().delete_api_gateway(name=name, project=self.name)
4062
4072
 
4063
4073
  def store_alert_config(
4064
- self, alert_data: AlertConfig, alert_name=None
4074
+ self, alert_data: AlertConfig, alert_name: typing.Optional[str] = None
4065
4075
  ) -> AlertConfig:
4066
4076
  """
4067
4077
  Create/modify an alert.
@@ -4070,9 +4080,11 @@ class MlrunProject(ModelObj):
4070
4080
  :param alert_name: The name of the alert.
4071
4081
  :return: the created/modified alert.
4072
4082
  """
4083
+ if not alert_data:
4084
+ raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
4085
+
4073
4086
  db = mlrun.db.get_run_db(secrets=self._secrets)
4074
- if alert_name is None:
4075
- alert_name = alert_data.name
4087
+ alert_name = alert_name or alert_data.name
4076
4088
  if alert_data.project is not None and alert_data.project != self.metadata.name:
4077
4089
  logger.warn(
4078
4090
  "Project in alert does not match project in operation",
@@ -4375,18 +4387,17 @@ def _init_function_from_dict(
4375
4387
  )
4376
4388
 
4377
4389
  elif url.endswith(".py"):
4378
- # when load_source_on_run is used we allow not providing image as code will be loaded pre-run. ML-4994
4379
- if (
4380
- not image
4381
- and not project.default_image
4382
- and kind != "local"
4383
- and not project.spec.load_source_on_run
4384
- ):
4385
- raise ValueError(
4386
- "image must be provided with py code files which do not "
4387
- "run on 'local' engine kind"
4388
- )
4389
4390
  if in_context and with_repo:
4391
+ # when load_source_on_run is used we allow not providing image as code will be loaded pre-run. ML-4994
4392
+ if (
4393
+ not image
4394
+ and not project.default_image
4395
+ and kind != "local"
4396
+ and not project.spec.load_source_on_run
4397
+ ):
4398
+ raise ValueError(
4399
+ "image must be provided with py code files which do not run on 'local' engine kind"
4400
+ )
4390
4401
  func = new_function(
4391
4402
  name,
4392
4403
  command=relative_url,
@@ -4408,7 +4419,6 @@ def _init_function_from_dict(
4408
4419
  elif kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
4409
4420
  func = new_function(
4410
4421
  name,
4411
- command=relative_url,
4412
4422
  image=image,
4413
4423
  kind=kind,
4414
4424
  handler=handler,
@@ -4462,9 +4472,17 @@ def _init_function_from_obj(
4462
4472
  def _has_module(handler, kind):
4463
4473
  if not handler:
4464
4474
  return False
4465
- return (
4466
- kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes() and ":" in handler
4467
- ) or "." in handler
4475
+
4476
+ if (
4477
+ kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes()
4478
+ and ":" in handler
4479
+ ):
4480
+ return True
4481
+
4482
+ if "." in handler:
4483
+ return True
4484
+
4485
+ return False
4468
4486
 
4469
4487
 
4470
4488
  def _is_imported_artifact(artifact):
mlrun/run.py CHANGED
@@ -65,6 +65,7 @@ from .runtimes.nuclio.application import ApplicationRuntime
65
65
  from .runtimes.utils import add_code_metadata, global_context
66
66
  from .utils import (
67
67
  RunKeys,
68
+ create_ipython_display,
68
69
  extend_hub_uri_if_needed,
69
70
  get_in,
70
71
  logger,
@@ -744,11 +745,10 @@ def code_to_function(
744
745
  raise ValueError("Databricks tasks only support embed_code=True")
745
746
 
746
747
  if kind == RuntimeKinds.application:
747
- if handler:
748
- raise MLRunInvalidArgumentError(
749
- "Handler is not supported for application runtime"
750
- )
751
- filename, handler = ApplicationRuntime.get_filename_and_handler()
748
+ raise MLRunInvalidArgumentError(
749
+ "Embedding a code file is not supported for application runtime. "
750
+ "Code files should be specified via project/function source."
751
+ )
752
752
 
753
753
  is_nuclio, sub_kind = RuntimeKinds.resolve_nuclio_sub_kind(kind)
754
754
  code_origin = add_name(add_code_metadata(filename), name)
@@ -942,10 +942,12 @@ def wait_for_pipeline_completion(
942
942
  if remote:
943
943
  mldb = mlrun.db.get_run_db()
944
944
 
945
+ dag_display_id = create_ipython_display()
946
+
945
947
  def _wait_for_pipeline_completion():
946
948
  pipeline = mldb.get_pipeline(run_id, namespace=namespace, project=project)
947
949
  pipeline_status = pipeline["run"]["status"]
948
- show_kfp_run(pipeline, clear_output=True)
950
+ show_kfp_run(pipeline, dag_display_id=dag_display_id, with_html=False)
949
951
  if pipeline_status not in RunStatuses.stable_statuses():
950
952
  logger.debug(
951
953
  "Waiting for pipeline completion",
@@ -242,6 +242,10 @@ class RuntimeKinds:
242
242
  # both spark and remote spark uses different mechanism for assigning images
243
243
  return kind not in [RuntimeKinds.spark, RuntimeKinds.remotespark]
244
244
 
245
+ @staticmethod
246
+ def supports_from_notebook(kind):
247
+ return kind not in [RuntimeKinds.application]
248
+
245
249
  @staticmethod
246
250
  def resolve_nuclio_runtime(kind: str, sub_kind: str):
247
251
  kind = kind.split(":")[0]
@@ -386,6 +386,7 @@ class APIGateway(ModelObj):
386
386
  headers: dict = None,
387
387
  credentials: Optional[tuple[str, str]] = None,
388
388
  path: Optional[str] = None,
389
+ body: Optional[Union[str, bytes, dict]] = None,
389
390
  **kwargs,
390
391
  ):
391
392
  """
@@ -396,6 +397,7 @@ class APIGateway(ModelObj):
396
397
  :param credentials: (Optional[tuple[str, str]], optional) The (username,password) for the invocation if required
397
398
  can also be set by the environment variable (_, V3IO_ACCESS_KEY) for access key authentication.
398
399
  :param path: (str, optional) The sub-path for the invocation.
400
+ :param body: (Optional[Union[str, bytes, dict]]) The body of the invocation.
399
401
  :param kwargs: (dict) Additional keyword arguments.
400
402
 
401
403
  :return: The response from the API gateway invocation.
@@ -444,6 +446,13 @@ class APIGateway(ModelObj):
444
446
  "API Gateway invocation requires authentication. Please set V3IO_ACCESS_KEY env var"
445
447
  )
446
448
  url = urljoin(self.invoke_url, path or "")
449
+
450
+ # Determine the correct keyword argument for the body
451
+ if isinstance(body, dict):
452
+ kwargs["json"] = body
453
+ elif isinstance(body, (str, bytes)):
454
+ kwargs["data"] = body
455
+
447
456
  return requests.request(
448
457
  method=method,
449
458
  url=url,