mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (59) hide show
  1. mlrun/alerts/alert.py +30 -27
  2. mlrun/common/constants.py +3 -0
  3. mlrun/common/helpers.py +0 -1
  4. mlrun/common/schemas/alert.py +3 -0
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  6. mlrun/common/schemas/notification.py +1 -0
  7. mlrun/config.py +1 -1
  8. mlrun/data_types/to_pandas.py +9 -9
  9. mlrun/datastore/alibaba_oss.py +3 -2
  10. mlrun/datastore/azure_blob.py +7 -9
  11. mlrun/datastore/base.py +13 -1
  12. mlrun/datastore/dbfs_store.py +3 -7
  13. mlrun/datastore/filestore.py +1 -3
  14. mlrun/datastore/google_cloud_storage.py +84 -29
  15. mlrun/datastore/redis.py +1 -0
  16. mlrun/datastore/s3.py +3 -2
  17. mlrun/datastore/sources.py +54 -0
  18. mlrun/datastore/storeytargets.py +147 -0
  19. mlrun/datastore/targets.py +76 -122
  20. mlrun/datastore/v3io.py +1 -0
  21. mlrun/db/httpdb.py +6 -1
  22. mlrun/errors.py +8 -0
  23. mlrun/execution.py +7 -0
  24. mlrun/feature_store/api.py +5 -0
  25. mlrun/feature_store/retrieval/job.py +1 -0
  26. mlrun/model.py +24 -3
  27. mlrun/model_monitoring/api.py +10 -2
  28. mlrun/model_monitoring/applications/_application_steps.py +52 -34
  29. mlrun/model_monitoring/applications/context.py +206 -70
  30. mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
  31. mlrun/model_monitoring/controller.py +15 -12
  32. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  33. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
  34. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
  37. mlrun/model_monitoring/helpers.py +54 -18
  38. mlrun/model_monitoring/stream_processing.py +10 -29
  39. mlrun/projects/pipelines.py +19 -30
  40. mlrun/projects/project.py +86 -67
  41. mlrun/run.py +8 -6
  42. mlrun/runtimes/__init__.py +4 -0
  43. mlrun/runtimes/nuclio/api_gateway.py +18 -0
  44. mlrun/runtimes/nuclio/application/application.py +150 -59
  45. mlrun/runtimes/nuclio/function.py +5 -11
  46. mlrun/runtimes/nuclio/serving.py +2 -2
  47. mlrun/runtimes/utils.py +16 -0
  48. mlrun/serving/routers.py +1 -1
  49. mlrun/serving/server.py +19 -5
  50. mlrun/serving/states.py +8 -0
  51. mlrun/serving/v2_serving.py +34 -26
  52. mlrun/utils/helpers.py +33 -2
  53. mlrun/utils/version/version.json +2 -2
  54. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
  55. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
  56. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
  57. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
  58. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
  59. {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
mlrun/projects/project.py CHANGED
@@ -517,17 +517,24 @@ def get_or_create_project(
517
517
  parameters=parameters,
518
518
  allow_cross_project=allow_cross_project,
519
519
  )
520
- logger.info("Project loaded successfully", project_name=name)
520
+ logger.info("Project loaded successfully", project_name=project.name)
521
521
  return project
522
522
  except mlrun.errors.MLRunNotFoundError:
523
- logger.debug("Project not found in db", project_name=name)
523
+ logger.debug(
524
+ "Project not found in db", project_name=name, user_project=user_project
525
+ )
524
526
 
525
527
  spec_path = path.join(context, subpath or "", "project.yaml")
526
528
  load_from_path = url or path.isfile(spec_path)
527
529
  # do not nest under "try" or else the exceptions raised below will be logged along with the "not found" message
528
530
  if load_from_path:
529
531
  # loads a project from archive or local project.yaml
530
- logger.info("Loading project from path", project_name=name, path=url or context)
532
+ logger.info(
533
+ "Loading project from path",
534
+ project_name=name,
535
+ user_project=user_project,
536
+ path=url or context,
537
+ )
531
538
  project = load_project(
532
539
  context,
533
540
  url,
@@ -544,7 +551,7 @@ def get_or_create_project(
544
551
 
545
552
  logger.info(
546
553
  "Project loaded successfully",
547
- project_name=name,
554
+ project_name=project.name,
548
555
  path=url or context,
549
556
  stored_in_db=save,
550
557
  )
@@ -562,7 +569,9 @@ def get_or_create_project(
562
569
  save=save,
563
570
  parameters=parameters,
564
571
  )
565
- logger.info("Project created successfully", project_name=name, stored_in_db=save)
572
+ logger.info(
573
+ "Project created successfully", project_name=project.name, stored_in_db=save
574
+ )
566
575
  return project
567
576
 
568
577
 
@@ -1548,15 +1557,15 @@ class MlrunProject(ModelObj):
1548
1557
  self,
1549
1558
  item,
1550
1559
  body=None,
1551
- tag="",
1552
- local_path="",
1553
- artifact_path=None,
1554
- format=None,
1555
- upload=None,
1556
- labels=None,
1557
- target_path=None,
1560
+ tag: str = "",
1561
+ local_path: str = "",
1562
+ artifact_path: Optional[str] = None,
1563
+ format: Optional[str] = None,
1564
+ upload: Optional[bool] = None,
1565
+ labels: Optional[dict[str, str]] = None,
1566
+ target_path: Optional[str] = None,
1558
1567
  **kwargs,
1559
- ):
1568
+ ) -> Artifact:
1560
1569
  """Log an output artifact and optionally upload it to datastore
1561
1570
 
1562
1571
  If the artifact already exists with the same key and tag, it will be overwritten.
@@ -1655,7 +1664,7 @@ class MlrunProject(ModelObj):
1655
1664
  stats=None,
1656
1665
  target_path="",
1657
1666
  extra_data=None,
1658
- label_column: str = None,
1667
+ label_column: Optional[str] = None,
1659
1668
  **kwargs,
1660
1669
  ) -> DatasetArtifact:
1661
1670
  """
@@ -1732,15 +1741,15 @@ class MlrunProject(ModelObj):
1732
1741
  artifact_path=None,
1733
1742
  upload=None,
1734
1743
  labels=None,
1735
- inputs: list[Feature] = None,
1736
- outputs: list[Feature] = None,
1737
- feature_vector: str = None,
1738
- feature_weights: list = None,
1744
+ inputs: Optional[list[Feature]] = None,
1745
+ outputs: Optional[list[Feature]] = None,
1746
+ feature_vector: Optional[str] = None,
1747
+ feature_weights: Optional[list] = None,
1739
1748
  training_set=None,
1740
1749
  label_column=None,
1741
1750
  extra_data=None,
1742
1751
  **kwargs,
1743
- ):
1752
+ ) -> ModelArtifact:
1744
1753
  """Log a model artifact and optionally upload it to datastore
1745
1754
 
1746
1755
  If the model already exists with the same key and tag, it will be overwritten.
@@ -2394,7 +2403,11 @@ class MlrunProject(ModelObj):
2394
2403
  requirements: typing.Union[str, list[str]] = None,
2395
2404
  requirements_file: str = "",
2396
2405
  ) -> tuple[str, str, mlrun.runtimes.BaseRuntime, dict]:
2397
- if func is None and not _has_module(handler, kind):
2406
+ if (
2407
+ func is None
2408
+ and not _has_module(handler, kind)
2409
+ and mlrun.runtimes.RuntimeKinds.supports_from_notebook(kind)
2410
+ ):
2398
2411
  # if function path is not provided and it is not a module (no ".")
2399
2412
  # use the current notebook as default
2400
2413
  if is_ipython:
@@ -2973,7 +2986,6 @@ class MlrunProject(ModelObj):
2973
2986
  source: str = None,
2974
2987
  cleanup_ttl: int = None,
2975
2988
  notifications: list[mlrun.model.Notification] = None,
2976
- send_start_notification: bool = True,
2977
2989
  ) -> _PipelineRunStatus:
2978
2990
  """Run a workflow using kubeflow pipelines
2979
2991
 
@@ -3010,8 +3022,6 @@ class MlrunProject(ModelObj):
3010
3022
  workflow and all its resources are deleted)
3011
3023
  :param notifications:
3012
3024
  List of notifications to send for workflow completion
3013
- :param send_start_notification:
3014
- Send a notification when the workflow starts
3015
3025
 
3016
3026
  :returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
3017
3027
  """
@@ -3030,8 +3040,9 @@ class MlrunProject(ModelObj):
3030
3040
  "Remote repo is not defined, use .create_remote() + push()"
3031
3041
  )
3032
3042
 
3033
- if engine not in ["remote"]:
3034
- # for remote runs we don't require the functions to be synced as they can be loaded dynamically during run
3043
+ if engine not in ["remote"] and not schedule:
3044
+ # For remote/scheduled runs we don't require the functions to be synced as they can be loaded dynamically
3045
+ # during run
3035
3046
  self.sync_functions(always=sync)
3036
3047
  if not self.spec._function_objects:
3037
3048
  raise ValueError(
@@ -3089,7 +3100,6 @@ class MlrunProject(ModelObj):
3089
3100
  namespace=namespace,
3090
3101
  source=source,
3091
3102
  notifications=notifications,
3092
- send_start_notification=send_start_notification,
3093
3103
  )
3094
3104
  # run is None when scheduling
3095
3105
  if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
@@ -3222,30 +3232,30 @@ class MlrunProject(ModelObj):
3222
3232
  infrastructure functions. Important to note that you have to set the credentials before deploying any
3223
3233
  model monitoring or serving function.
3224
3234
 
3225
- :param access_key: Model Monitoring access key for managing user permissions.
3226
- :param endpoint_store_connection: Endpoint store connection string. By default, None.
3227
- Options:
3228
- 1. None, will be set from the system configuration.
3229
- 2. v3io - for v3io endpoint store,
3230
- pass `v3io` and the system will generate the exact path.
3231
- 3. MySQL/SQLite - for SQL endpoint store, please provide full
3232
- connection string, for example
3233
- mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>
3234
- :param stream_path: Path to the model monitoring stream. By default, None.
3235
- Options:
3236
- 1. None, will be set from the system configuration.
3237
- 2. v3io - for v3io stream,
3238
- pass `v3io` and the system will generate the exact path.
3239
- 3. Kafka - for Kafka stream, please provide full connection string without
3240
- custom topic, for example kafka://<some_kafka_broker>:<port>.
3235
+ :param access_key: Model monitoring access key for managing user permissions.
3236
+ :param endpoint_store_connection: Endpoint store connection string. By default, None. Options:
3237
+
3238
+ * None - will be set from the system configuration.
3239
+ * v3io - for v3io endpoint store, pass `v3io` and the system will generate the
3240
+ exact path.
3241
+ * MySQL/SQLite - for SQL endpoint store, provide the full connection string,
3242
+ for example: mysql+pymysql://<username>:<password>@<host>:<port>/<db_name>
3243
+ :param stream_path: Path to the model monitoring stream. By default, None. Options:
3244
+
3245
+ * None - will be set from the system configuration.
3246
+ * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3247
+ path.
3248
+ * Kafka - for Kafka stream, provide the full connection string without custom
3249
+ topic, for example kafka://<some_kafka_broker>:<port>.
3241
3250
  :param tsdb_connection: Connection string to the time series database. By default, None.
3242
3251
  Options:
3243
- 1. None, will be set from the system configuration.
3244
- 2. v3io - for v3io stream,
3245
- pass `v3io` and the system will generate the exact path.
3246
- 3. TDEngine - for TDEngine tsdb, please provide full websocket connection URL,
3247
- for example taosws://<username>:<password>@<host>:<port>.
3248
- :param replace_creds: If True, will override the existing credentials.
3252
+
3253
+ * None - will be set from the system configuration.
3254
+ * v3io - for v3io stream, pass `v3io` and the system will generate the exact
3255
+ path.
3256
+ * TDEngine - for TDEngine tsdb, provide the full websocket connection URL,
3257
+ for example taosws://<username>:<password>@<host>:<port>.
3258
+ :param replace_creds: If True, will override the existing credentials.
3249
3259
  Please keep in mind that if you already enabled model monitoring on
3250
3260
  your project this action can cause data loose and will require redeploying
3251
3261
  all model monitoring functions & model monitoring infra
@@ -3351,7 +3361,8 @@ class MlrunProject(ModelObj):
3351
3361
  * A dictionary of configurations to use when logging. Further info per object type and
3352
3362
  artifact type can be given there. The artifact key must appear in the dictionary as
3353
3363
  "key": "the_key".
3354
- :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
3364
+ :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN":
3365
+ token}
3355
3366
  :param reset_on_run: When True, function python modules would reload prior to code execution.
3356
3367
  This ensures latest code changes are executed. This argument must be used in
3357
3368
  conjunction with the local=True argument.
@@ -4061,7 +4072,7 @@ class MlrunProject(ModelObj):
4061
4072
  mlrun.db.get_run_db().delete_api_gateway(name=name, project=self.name)
4062
4073
 
4063
4074
  def store_alert_config(
4064
- self, alert_data: AlertConfig, alert_name=None
4075
+ self, alert_data: AlertConfig, alert_name: typing.Optional[str] = None
4065
4076
  ) -> AlertConfig:
4066
4077
  """
4067
4078
  Create/modify an alert.
@@ -4070,9 +4081,11 @@ class MlrunProject(ModelObj):
4070
4081
  :param alert_name: The name of the alert.
4071
4082
  :return: the created/modified alert.
4072
4083
  """
4084
+ if not alert_data:
4085
+ raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
4086
+
4073
4087
  db = mlrun.db.get_run_db(secrets=self._secrets)
4074
- if alert_name is None:
4075
- alert_name = alert_data.name
4088
+ alert_name = alert_name or alert_data.name
4076
4089
  if alert_data.project is not None and alert_data.project != self.metadata.name:
4077
4090
  logger.warn(
4078
4091
  "Project in alert does not match project in operation",
@@ -4375,18 +4388,17 @@ def _init_function_from_dict(
4375
4388
  )
4376
4389
 
4377
4390
  elif url.endswith(".py"):
4378
- # when load_source_on_run is used we allow not providing image as code will be loaded pre-run. ML-4994
4379
- if (
4380
- not image
4381
- and not project.default_image
4382
- and kind != "local"
4383
- and not project.spec.load_source_on_run
4384
- ):
4385
- raise ValueError(
4386
- "image must be provided with py code files which do not "
4387
- "run on 'local' engine kind"
4388
- )
4389
4391
  if in_context and with_repo:
4392
+ # when load_source_on_run is used we allow not providing image as code will be loaded pre-run. ML-4994
4393
+ if (
4394
+ not image
4395
+ and not project.default_image
4396
+ and kind != "local"
4397
+ and not project.spec.load_source_on_run
4398
+ ):
4399
+ raise ValueError(
4400
+ "image must be provided with py code files which do not run on 'local' engine kind"
4401
+ )
4390
4402
  func = new_function(
4391
4403
  name,
4392
4404
  command=relative_url,
@@ -4408,7 +4420,6 @@ def _init_function_from_dict(
4408
4420
  elif kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
4409
4421
  func = new_function(
4410
4422
  name,
4411
- command=relative_url,
4412
4423
  image=image,
4413
4424
  kind=kind,
4414
4425
  handler=handler,
@@ -4462,9 +4473,17 @@ def _init_function_from_obj(
4462
4473
  def _has_module(handler, kind):
4463
4474
  if not handler:
4464
4475
  return False
4465
- return (
4466
- kind in mlrun.runtimes.RuntimeKinds.nuclio_runtimes() and ":" in handler
4467
- ) or "." in handler
4476
+
4477
+ if (
4478
+ kind in mlrun.runtimes.RuntimeKinds.pure_nuclio_deployed_runtimes()
4479
+ and ":" in handler
4480
+ ):
4481
+ return True
4482
+
4483
+ if "." in handler:
4484
+ return True
4485
+
4486
+ return False
4468
4487
 
4469
4488
 
4470
4489
  def _is_imported_artifact(artifact):
mlrun/run.py CHANGED
@@ -65,6 +65,7 @@ from .runtimes.nuclio.application import ApplicationRuntime
65
65
  from .runtimes.utils import add_code_metadata, global_context
66
66
  from .utils import (
67
67
  RunKeys,
68
+ create_ipython_display,
68
69
  extend_hub_uri_if_needed,
69
70
  get_in,
70
71
  logger,
@@ -744,11 +745,10 @@ def code_to_function(
744
745
  raise ValueError("Databricks tasks only support embed_code=True")
745
746
 
746
747
  if kind == RuntimeKinds.application:
747
- if handler:
748
- raise MLRunInvalidArgumentError(
749
- "Handler is not supported for application runtime"
750
- )
751
- filename, handler = ApplicationRuntime.get_filename_and_handler()
748
+ raise MLRunInvalidArgumentError(
749
+ "Embedding a code file is not supported for application runtime. "
750
+ "Code files should be specified via project/function source."
751
+ )
752
752
 
753
753
  is_nuclio, sub_kind = RuntimeKinds.resolve_nuclio_sub_kind(kind)
754
754
  code_origin = add_name(add_code_metadata(filename), name)
@@ -942,10 +942,12 @@ def wait_for_pipeline_completion(
942
942
  if remote:
943
943
  mldb = mlrun.db.get_run_db()
944
944
 
945
+ dag_display_id = create_ipython_display()
946
+
945
947
  def _wait_for_pipeline_completion():
946
948
  pipeline = mldb.get_pipeline(run_id, namespace=namespace, project=project)
947
949
  pipeline_status = pipeline["run"]["status"]
948
- show_kfp_run(pipeline, clear_output=True)
950
+ show_kfp_run(pipeline, dag_display_id=dag_display_id, with_html=False)
949
951
  if pipeline_status not in RunStatuses.stable_statuses():
950
952
  logger.debug(
951
953
  "Waiting for pipeline completion",
@@ -242,6 +242,10 @@ class RuntimeKinds:
242
242
  # both spark and remote spark uses different mechanism for assigning images
243
243
  return kind not in [RuntimeKinds.spark, RuntimeKinds.remotespark]
244
244
 
245
+ @staticmethod
246
+ def supports_from_notebook(kind):
247
+ return kind not in [RuntimeKinds.application]
248
+
245
249
  @staticmethod
246
250
  def resolve_nuclio_runtime(kind: str, sub_kind: str):
247
251
  kind = kind.split(":")[0]
@@ -386,6 +386,7 @@ class APIGateway(ModelObj):
386
386
  headers: dict = None,
387
387
  credentials: Optional[tuple[str, str]] = None,
388
388
  path: Optional[str] = None,
389
+ body: Optional[Union[str, bytes, dict]] = None,
389
390
  **kwargs,
390
391
  ):
391
392
  """
@@ -396,6 +397,7 @@ class APIGateway(ModelObj):
396
397
  :param credentials: (Optional[tuple[str, str]], optional) The (username,password) for the invocation if required
397
398
  can also be set by the environment variable (_, V3IO_ACCESS_KEY) for access key authentication.
398
399
  :param path: (str, optional) The sub-path for the invocation.
400
+ :param body: (Optional[Union[str, bytes, dict]]) The body of the invocation.
399
401
  :param kwargs: (dict) Additional keyword arguments.
400
402
 
401
403
  :return: The response from the API gateway invocation.
@@ -444,6 +446,13 @@ class APIGateway(ModelObj):
444
446
  "API Gateway invocation requires authentication. Please set V3IO_ACCESS_KEY env var"
445
447
  )
446
448
  url = urljoin(self.invoke_url, path or "")
449
+
450
+ # Determine the correct keyword argument for the body
451
+ if isinstance(body, dict):
452
+ kwargs["json"] = body
453
+ elif isinstance(body, (str, bytes)):
454
+ kwargs["data"] = body
455
+
447
456
  return requests.request(
448
457
  method=method,
449
458
  url=url,
@@ -569,6 +578,15 @@ class APIGateway(ModelObj):
569
578
  "true"
570
579
  )
571
580
 
581
+ def with_gateway_timeout(self, gateway_timeout: int):
582
+ """
583
+ Set gateway proxy connect/read/send timeout annotations
584
+ :param gateway_timeout: The timeout in seconds
585
+ """
586
+ mlrun.runtimes.utils.enrich_gateway_timeout_annotations(
587
+ self.metadata.annotations, gateway_timeout
588
+ )
589
+
572
590
  @classmethod
573
591
  def from_scheme(cls, api_gateway: schemas.APIGateway):
574
592
  project = api_gateway.metadata.labels.get(