mlrun 1.7.0rc6__py3-none-any.whl → 1.7.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (84) hide show
  1. mlrun/__main__.py +2 -0
  2. mlrun/common/constants.py +6 -0
  3. mlrun/common/schemas/__init__.py +5 -0
  4. mlrun/common/schemas/api_gateway.py +8 -1
  5. mlrun/common/schemas/hub.py +7 -9
  6. mlrun/common/schemas/model_monitoring/__init__.py +4 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +36 -19
  8. mlrun/{model_monitoring/stores/models/__init__.py → common/schemas/pagination.py} +9 -10
  9. mlrun/common/schemas/project.py +16 -10
  10. mlrun/common/types.py +7 -1
  11. mlrun/config.py +35 -10
  12. mlrun/data_types/data_types.py +4 -0
  13. mlrun/datastore/__init__.py +3 -7
  14. mlrun/datastore/alibaba_oss.py +130 -0
  15. mlrun/datastore/azure_blob.py +4 -5
  16. mlrun/datastore/base.py +22 -16
  17. mlrun/datastore/datastore.py +4 -0
  18. mlrun/datastore/datastore_profile.py +19 -1
  19. mlrun/datastore/google_cloud_storage.py +1 -1
  20. mlrun/datastore/snowflake_utils.py +43 -0
  21. mlrun/datastore/sources.py +11 -29
  22. mlrun/datastore/targets.py +131 -11
  23. mlrun/datastore/utils.py +10 -5
  24. mlrun/db/base.py +58 -6
  25. mlrun/db/httpdb.py +183 -77
  26. mlrun/db/nopdb.py +110 -0
  27. mlrun/feature_store/api.py +3 -2
  28. mlrun/feature_store/retrieval/spark_merger.py +27 -23
  29. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  30. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  31. mlrun/kfpops.py +2 -5
  32. mlrun/launcher/base.py +1 -1
  33. mlrun/launcher/client.py +2 -2
  34. mlrun/model.py +1 -0
  35. mlrun/model_monitoring/__init__.py +1 -1
  36. mlrun/model_monitoring/api.py +104 -295
  37. mlrun/model_monitoring/controller.py +25 -25
  38. mlrun/model_monitoring/db/__init__.py +16 -0
  39. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -34
  40. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  41. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +47 -6
  42. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  43. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +49 -0
  44. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +76 -3
  45. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +68 -0
  46. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/sqlite.py +13 -1
  47. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +662 -0
  48. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  49. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +134 -3
  50. mlrun/model_monitoring/helpers.py +3 -3
  51. mlrun/model_monitoring/stream_processing.py +41 -9
  52. mlrun/model_monitoring/tracking_policy.py +7 -1
  53. mlrun/model_monitoring/writer.py +4 -36
  54. mlrun/projects/pipelines.py +14 -2
  55. mlrun/projects/project.py +141 -122
  56. mlrun/run.py +8 -2
  57. mlrun/runtimes/__init__.py +16 -0
  58. mlrun/runtimes/base.py +10 -1
  59. mlrun/runtimes/kubejob.py +26 -121
  60. mlrun/runtimes/nuclio/api_gateway.py +243 -66
  61. mlrun/runtimes/nuclio/application/application.py +79 -1
  62. mlrun/runtimes/nuclio/application/reverse_proxy.go +9 -1
  63. mlrun/runtimes/nuclio/function.py +14 -8
  64. mlrun/runtimes/nuclio/serving.py +30 -34
  65. mlrun/runtimes/pod.py +171 -0
  66. mlrun/runtimes/utils.py +0 -28
  67. mlrun/serving/remote.py +2 -3
  68. mlrun/serving/routers.py +4 -3
  69. mlrun/serving/server.py +5 -7
  70. mlrun/serving/states.py +40 -23
  71. mlrun/serving/v2_serving.py +4 -3
  72. mlrun/utils/helpers.py +34 -0
  73. mlrun/utils/http.py +1 -1
  74. mlrun/utils/retryer.py +1 -0
  75. mlrun/utils/version/version.json +2 -2
  76. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/METADATA +25 -16
  77. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/RECORD +81 -75
  78. mlrun/model_monitoring/batch.py +0 -933
  79. mlrun/model_monitoring/stores/models/mysql.py +0 -34
  80. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  81. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/LICENSE +0 -0
  82. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/WHEEL +0 -0
  83. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/entry_points.txt +0 -0
  84. {mlrun-1.7.0rc6.dist-info → mlrun-1.7.0rc9.dist-info}/top_level.txt +0 -0
mlrun/db/base.py CHANGED
@@ -17,7 +17,7 @@ from abc import ABC, abstractmethod
17
17
  from typing import Optional, Union
18
18
 
19
19
  import mlrun.common.schemas
20
- import mlrun.model_monitoring.model_endpoint
20
+ import mlrun.model_monitoring
21
21
 
22
22
 
23
23
  class RunDBError(Exception):
@@ -509,9 +509,7 @@ class RunDBInterface(ABC):
509
509
  self,
510
510
  project: str,
511
511
  endpoint_id: str,
512
- model_endpoint: Union[
513
- mlrun.model_monitoring.model_endpoint.ModelEndpoint, dict
514
- ],
512
+ model_endpoint: Union[mlrun.model_monitoring.ModelEndpoint, dict],
515
513
  ):
516
514
  pass
517
515
 
@@ -632,6 +630,31 @@ class RunDBInterface(ABC):
632
630
  def get_api_gateway(self, name, project=None) -> mlrun.common.schemas.APIGateway:
633
631
  pass
634
632
 
633
+ @abstractmethod
634
+ def delete_api_gateway(self, name, project=None):
635
+ pass
636
+
637
+ @abstractmethod
638
+ def remote_builder(
639
+ self,
640
+ func: "mlrun.runtimes.BaseRuntime",
641
+ with_mlrun: bool,
642
+ mlrun_version_specifier: Optional[str] = None,
643
+ skip_deployed: bool = False,
644
+ builder_env: Optional[dict] = None,
645
+ force_build: bool = False,
646
+ ):
647
+ pass
648
+
649
+ @abstractmethod
650
+ def deploy_nuclio_function(
651
+ self,
652
+ func: "mlrun.runtimes.RemoteRuntime",
653
+ builder_env: Optional[dict] = None,
654
+ ):
655
+ pass
656
+
657
+ @abstractmethod
635
658
  def get_builder_status(
636
659
  self,
637
660
  func: "mlrun.runtimes.BaseRuntime",
@@ -642,6 +665,16 @@ class RunDBInterface(ABC):
642
665
  ):
643
666
  pass
644
667
 
668
+ @abstractmethod
669
+ def get_nuclio_deploy_status(
670
+ self,
671
+ func: "mlrun.runtimes.RemoteRuntime",
672
+ last_log_timestamp: float = 0.0,
673
+ verbose: bool = False,
674
+ ):
675
+ pass
676
+
677
+ @abstractmethod
645
678
  def set_run_notifications(
646
679
  self,
647
680
  project: str,
@@ -650,6 +683,7 @@ class RunDBInterface(ABC):
650
683
  ):
651
684
  pass
652
685
 
686
+ @abstractmethod
653
687
  def store_run_notifications(
654
688
  self,
655
689
  notification_objects: list[mlrun.model.Notification],
@@ -659,40 +693,49 @@ class RunDBInterface(ABC):
659
693
  ):
660
694
  pass
661
695
 
696
+ @abstractmethod
662
697
  def get_log_size(self, uid, project=""):
663
698
  pass
664
699
 
700
+ @abstractmethod
665
701
  def watch_log(self, uid, project="", watch=True, offset=0):
666
702
  pass
667
703
 
704
+ @abstractmethod
668
705
  def get_datastore_profile(
669
706
  self, name: str, project: str
670
707
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
671
708
  pass
672
709
 
710
+ @abstractmethod
673
711
  def delete_datastore_profile(
674
712
  self, name: str, project: str
675
713
  ) -> mlrun.common.schemas.DatastoreProfile:
676
714
  pass
677
715
 
716
+ @abstractmethod
678
717
  def list_datastore_profiles(
679
718
  self, project: str
680
719
  ) -> list[mlrun.common.schemas.DatastoreProfile]:
681
720
  pass
682
721
 
722
+ @abstractmethod
683
723
  def store_datastore_profile(
684
724
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
685
725
  ):
686
726
  pass
687
727
 
728
+ @abstractmethod
688
729
  def function_status(self, project, name, kind, selector):
689
730
  pass
690
731
 
732
+ @abstractmethod
691
733
  def start_function(
692
734
  self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
693
735
  ):
694
736
  pass
695
737
 
738
+ @abstractmethod
696
739
  def submit_workflow(
697
740
  self,
698
741
  project: str,
@@ -711,6 +754,7 @@ class RunDBInterface(ABC):
711
754
  ) -> "mlrun.common.schemas.WorkflowResponse":
712
755
  pass
713
756
 
757
+ @abstractmethod
714
758
  def update_model_monitoring_controller(
715
759
  self,
716
760
  project: str,
@@ -719,10 +763,18 @@ class RunDBInterface(ABC):
719
763
  ):
720
764
  pass
721
765
 
766
+ @abstractmethod
722
767
  def enable_model_monitoring(
723
768
  self,
724
769
  project: str,
725
770
  base_period: int = 10,
726
771
  image: str = "mlrun/mlrun",
727
- ):
728
- pass
772
+ deploy_histogram_data_drift_app: bool = True,
773
+ ) -> None:
774
+ raise NotImplementedError
775
+
776
+ @abstractmethod
777
+ def deploy_histogram_data_drift_app(
778
+ self, project: str, image: str = "mlrun/mlrun"
779
+ ) -> None:
780
+ raise NotImplementedError
mlrun/db/httpdb.py CHANGED
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import enum
15
16
  import http
16
17
  import re
@@ -30,10 +31,12 @@ import semver
30
31
 
31
32
  import mlrun
32
33
  import mlrun.common.schemas
34
+ import mlrun.common.types
33
35
  import mlrun.model_monitoring.model_endpoint
34
36
  import mlrun.platforms
35
37
  import mlrun.projects
36
38
  import mlrun.runtimes.nuclio.api_gateway
39
+ import mlrun.utils
37
40
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
38
41
 
39
42
  from ..artifacts import Artifact
@@ -180,7 +183,7 @@ class HTTPRunDB(RunDBInterface):
180
183
  headers=None,
181
184
  timeout=45,
182
185
  version=None,
183
- ):
186
+ ) -> requests.Response:
184
187
  """Perform a direct REST API call on the :py:mod:`mlrun` API server.
185
188
 
186
189
  Caution:
@@ -198,7 +201,7 @@ class HTTPRunDB(RunDBInterface):
198
201
  :param version: API version to use, None (the default) will mean to use the default value from config,
199
202
  for un-versioned api set an empty string.
200
203
 
201
- :return: Python HTTP response object
204
+ :return: `requests.Response` HTTP response object
202
205
  """
203
206
  url = self.get_base_api_url(path, version)
204
207
  kw = {
@@ -1181,7 +1184,7 @@ class HTTPRunDB(RunDBInterface):
1181
1184
  period didn't pass.
1182
1185
  :param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
1183
1186
  the moment they moved to terminal state
1184
- (defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
1187
+ (defaults to mlrun.mlconf.runtime_resources_deletion_grace_period).
1185
1188
 
1186
1189
  :returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
1187
1190
  that were removed.
@@ -1340,21 +1343,7 @@ class HTTPRunDB(RunDBInterface):
1340
1343
  :param builder_env: Kaniko builder pod env vars dict (for config/credentials)
1341
1344
  :param force_build: Force building the image, even when no changes were made
1342
1345
  """
1343
- is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
1344
- "s3://"
1345
- )
1346
- is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
1347
- if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
1348
- logger.warning(
1349
- "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
1350
- "keys. Only the permissions granted to the platform's configured secret will take affect "
1351
- "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
1352
- "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
1353
- source=func.spec.build.source,
1354
- load_source_on_run=func.spec.build.load_source_on_run,
1355
- default_docker_registry=config.httpdb.builder.docker_registry,
1356
- )
1357
-
1346
+ self.warn_on_s3_and_ecr_permissions_conflict(func)
1358
1347
  try:
1359
1348
  req = {
1360
1349
  "function": func.to_dict(),
@@ -1373,10 +1362,103 @@ class HTTPRunDB(RunDBInterface):
1373
1362
 
1374
1363
  if not resp.ok:
1375
1364
  logger.error(f"bad resp!!\n{resp.text}")
1376
- raise ValueError("bad function run response")
1365
+ raise ValueError("bad submit build response")
1366
+
1367
+ return resp.json()
1368
+
1369
+ def deploy_nuclio_function(
1370
+ self,
1371
+ func: mlrun.runtimes.RemoteRuntime,
1372
+ builder_env: Optional[dict] = None,
1373
+ ):
1374
+ """
1375
+ Deploy a Nuclio function.
1376
+ :param func: Function to build.
1377
+ :param builder_env: Kaniko builder pod env vars dict (for config/credentials)
1378
+ """
1379
+ func.metadata.project = func.metadata.project or config.default_project
1380
+ self.warn_on_s3_and_ecr_permissions_conflict(func)
1381
+ try:
1382
+ req = {
1383
+ "function": func.to_dict(),
1384
+ }
1385
+ if builder_env:
1386
+ req["builder_env"] = builder_env
1387
+ _path = (
1388
+ f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
1389
+ )
1390
+ resp = self.api_call("POST", _path, json=req)
1391
+ except OSError as err:
1392
+ logger.error(f"error submitting nuclio deploy task: {err_to_str(err)}")
1393
+ raise OSError(f"error: cannot submit deploy, {err_to_str(err)}")
1394
+
1395
+ if not resp.ok:
1396
+ logger.error(f"deploy nuclio - bad response:\n{resp.text}")
1397
+ raise ValueError("bad nuclio deploy response")
1377
1398
 
1378
1399
  return resp.json()
1379
1400
 
1401
+ def get_nuclio_deploy_status(
1402
+ self,
1403
+ func: mlrun.runtimes.RemoteRuntime,
1404
+ last_log_timestamp: float = 0.0,
1405
+ verbose: bool = False,
1406
+ ):
1407
+ """Retrieve the status of a deploy operation currently in progress.
1408
+
1409
+ :param func: Function object that is being built.
1410
+ :param last_log_timestamp: Last timestamp of logs that were already retrieved. Function will return only logs
1411
+ later than this parameter.
1412
+ :param verbose: Add verbose logs into the output.
1413
+
1414
+ :returns: The following parameters:
1415
+
1416
+ - Text of builder logs.
1417
+ - Timestamp of last log retrieved, to be used in subsequent calls to this function.
1418
+ """
1419
+
1420
+ try:
1421
+ params = {
1422
+ "name": normalize_name(func.metadata.name),
1423
+ "project": func.metadata.project,
1424
+ "tag": func.metadata.tag,
1425
+ "last_log_timestamp": str(last_log_timestamp),
1426
+ "verbose": bool2str(verbose),
1427
+ }
1428
+ _path = (
1429
+ f"projects/{func.metadata.project}/nuclio/{func.metadata.name}/deploy"
1430
+ )
1431
+ resp = self.api_call("GET", _path, params=params)
1432
+ except OSError as err:
1433
+ logger.error(f"error getting deploy status: {err_to_str(err)}")
1434
+ raise OSError(f"error: cannot get deploy status, {err_to_str(err)}")
1435
+
1436
+ if not resp.ok:
1437
+ logger.warning(f"failed resp, {resp.text}")
1438
+ raise RunDBError("bad function build response")
1439
+
1440
+ if resp.headers:
1441
+ func.status.state = resp.headers.get("x-mlrun-function-status", "")
1442
+ last_log_timestamp = float(
1443
+ resp.headers.get("x-mlrun-last-timestamp", "0.0")
1444
+ )
1445
+ func.status.address = resp.headers.get("x-mlrun-address", "")
1446
+ func.status.nuclio_name = resp.headers.get("x-mlrun-name", "")
1447
+ func.status.internal_invocation_urls = resp.headers.get(
1448
+ "x-mlrun-internal-invocation-urls", ""
1449
+ ).split(",")
1450
+ func.status.external_invocation_urls = resp.headers.get(
1451
+ "x-mlrun-external-invocation-urls", ""
1452
+ ).split(",")
1453
+ func.status.container_image = resp.headers.get(
1454
+ "x-mlrun-container-image", ""
1455
+ )
1456
+
1457
+ text = ""
1458
+ if resp.content:
1459
+ text = resp.content.decode()
1460
+ return text, last_log_timestamp
1461
+
1380
1462
  def get_builder_status(
1381
1463
  self,
1382
1464
  func: BaseRuntime,
@@ -1438,9 +1520,14 @@ class HTTPRunDB(RunDBInterface):
1438
1520
  func.status.container_image = resp.headers.get(
1439
1521
  "x-mlrun-container-image", ""
1440
1522
  )
1441
- else:
1442
- func.status.build_pod = resp.headers.get("builder_pod", "")
1443
- func.spec.image = resp.headers.get("function_image", "")
1523
+
1524
+ builder_pod = resp.headers.get("builder_pod", "")
1525
+ if builder_pod:
1526
+ func.status.build_pod = builder_pod
1527
+
1528
+ function_image = resp.headers.get("function_image", "")
1529
+ if function_image:
1530
+ func.spec.image = function_image
1444
1531
 
1445
1532
  text = ""
1446
1533
  if resp.content:
@@ -1503,7 +1590,7 @@ class HTTPRunDB(RunDBInterface):
1503
1590
  Retrieve updated information on project background tasks being executed.
1504
1591
  If no filter is provided, will return background tasks from the last week.
1505
1592
 
1506
- :param project: Project name (defaults to mlrun.config.config.default_project).
1593
+ :param project: Project name (defaults to mlrun.mlconf.default_project).
1507
1594
  :param state: List only background tasks whose state is specified.
1508
1595
  :param created_from: Filter by background task created time in ``[created_from, created_to]``.
1509
1596
  :param created_to: Filter by background task created time in ``[created_from, created_to]``.
@@ -1616,19 +1703,21 @@ class HTTPRunDB(RunDBInterface):
1616
1703
  artifact_path=None,
1617
1704
  ops=None,
1618
1705
  cleanup_ttl=None,
1706
+ timeout=60,
1619
1707
  ):
1620
1708
  """Submit a KFP pipeline for execution.
1621
1709
 
1622
- :param project: The project of the pipeline
1623
- :param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
1624
- :param arguments: A dictionary of arguments to pass to the pipeline.
1625
- :param experiment: A name to assign for the specific experiment.
1626
- :param run: A name for this specific run.
1627
- :param namespace: Kubernetes namespace to execute the pipeline in.
1628
- :param artifact_path: A path to artifacts used by this pipeline.
1629
- :param ops: Transformers to apply on all ops in the pipeline.
1630
- :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1631
- workflow and all its resources are deleted)
1710
+ :param project: The project of the pipeline
1711
+ :param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
1712
+ :param arguments: A dictionary of arguments to pass to the pipeline.
1713
+ :param experiment: A name to assign for the specific experiment.
1714
+ :param run: A name for this specific run.
1715
+ :param namespace: Kubernetes namespace to execute the pipeline in.
1716
+ :param artifact_path: A path to artifacts used by this pipeline.
1717
+ :param ops: Transformers to apply on all ops in the pipeline.
1718
+ :param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1719
+ workflow and all its resources are deleted)
1720
+ :param timeout: Timeout for the API call.
1632
1721
  """
1633
1722
 
1634
1723
  if isinstance(pipeline, str):
@@ -1670,7 +1759,7 @@ class HTTPRunDB(RunDBInterface):
1670
1759
  "POST",
1671
1760
  f"projects/{project}/pipelines",
1672
1761
  params=params,
1673
- timeout=20,
1762
+ timeout=timeout,
1674
1763
  body=data,
1675
1764
  headers=headers,
1676
1765
  )
@@ -3050,35 +3139,6 @@ class HTTPRunDB(RunDBInterface):
3050
3139
  params=attributes,
3051
3140
  )
3052
3141
 
3053
- def deploy_monitoring_batch_job(
3054
- self,
3055
- project: str = "",
3056
- default_batch_image: str = "mlrun/mlrun",
3057
- with_schedule: bool = False,
3058
- ):
3059
- """
3060
- Submit model monitoring batch job. By default, submit only the batch job as ML function without scheduling.
3061
- To submit a scheduled job as well, please set with_schedule = True.
3062
-
3063
- :param project: Project name.
3064
- :param default_batch_image: The default image of the model monitoring batch job. By default, the image
3065
- is mlrun/mlrun.
3066
- :param with_schedule: If true, submit the model monitoring scheduled job as well.
3067
-
3068
-
3069
- :returns: model monitoring batch job as a dictionary. You can easily convert the returned function into a
3070
- runtime object by calling ~mlrun.new_function.
3071
- """
3072
-
3073
- params = {
3074
- "default_batch_image": default_batch_image,
3075
- "with_schedule": with_schedule,
3076
- }
3077
- path = f"projects/{project}/jobs/batch-monitoring"
3078
-
3079
- resp = self.api_call(method="POST", path=path, params=params)
3080
- return resp.json()["func"]
3081
-
3082
3142
  def update_model_monitoring_controller(
3083
3143
  self,
3084
3144
  project: str,
@@ -3107,7 +3167,8 @@ class HTTPRunDB(RunDBInterface):
3107
3167
  project: str,
3108
3168
  base_period: int = 10,
3109
3169
  image: str = "mlrun/mlrun",
3110
- ):
3170
+ deploy_histogram_data_drift_app: bool = True,
3171
+ ) -> None:
3111
3172
  """
3112
3173
  Deploy model monitoring application controller, writer and stream functions.
3113
3174
  While the main goal of the controller function is to handle the monitoring processing and triggering
@@ -3116,21 +3177,38 @@ class HTTPRunDB(RunDBInterface):
3116
3177
  The stream function goal is to monitor the log of the data stream. It is triggered when a new log entry
3117
3178
  is detected. It processes the new events into statistics that are then written to statistics databases.
3118
3179
 
3180
+ :param project: Project name.
3181
+ :param base_period: The time period in minutes in which the model monitoring controller function
3182
+ triggers. By default, the base period is 10 minutes.
3183
+ :param image: The image of the model monitoring controller, writer & monitoring
3184
+ stream functions, which are real time nuclio functions.
3185
+ By default, the image is mlrun/mlrun.
3186
+ :param deploy_histogram_data_drift_app: If true, deploy the default histogram-based data drift application.
3187
+ """
3188
+ self.api_call(
3189
+ method=mlrun.common.types.HTTPMethod.POST,
3190
+ path=f"projects/{project}/model-monitoring/enable-model-monitoring",
3191
+ params={
3192
+ "base_period": base_period,
3193
+ "image": image,
3194
+ "deploy_histogram_data_drift_app": deploy_histogram_data_drift_app,
3195
+ },
3196
+ )
3119
3197
 
3120
- :param project: Project name.
3121
- :param base_period: The time period in minutes in which the model monitoring controller function
3122
- triggers. By default, the base period is 10 minutes.
3123
- :param image: The image of the model monitoring controller, writer & monitoring
3124
- stream functions, which are real time nuclio functions.
3125
- By default, the image is mlrun/mlrun.
3198
+ def deploy_histogram_data_drift_app(
3199
+ self, project: str, image: str = "mlrun/mlrun"
3200
+ ) -> None:
3126
3201
  """
3202
+ Deploy the histogram data drift application.
3127
3203
 
3128
- params = {
3129
- "base_period": base_period,
3130
- "image": image,
3131
- }
3132
- path = f"projects/{project}/model-monitoring/enable-model-monitoring"
3133
- self.api_call(method="POST", path=path, params=params)
3204
+ :param project: Project name.
3205
+ :param image: The image on which the application will run.
3206
+ """
3207
+ self.api_call(
3208
+ method=mlrun.common.types.HTTPMethod.POST,
3209
+ path=f"projects/{project}/model-monitoring/deploy-histogram-data-drift-app",
3210
+ params={"image": image},
3211
+ )
3134
3212
 
3135
3213
  def create_hub_source(
3136
3214
  self, source: Union[dict, mlrun.common.schemas.IndexedHubSource]
@@ -3397,6 +3475,17 @@ class HTTPRunDB(RunDBInterface):
3397
3475
  response = self.api_call("GET", endpoint_path, error)
3398
3476
  return mlrun.common.schemas.APIGateway(**response.json())
3399
3477
 
3478
+ def delete_api_gateway(self, name, project=None):
3479
+ """
3480
+ Deletes an API gateway
3481
+ :param name: API gateway name
3482
+ :param project: Project name
3483
+ """
3484
+ project = project or config.default_project
3485
+ error = "delete api gateway"
3486
+ endpoint_path = f"projects/{project}/api-gateways/{name}"
3487
+ self.api_call("DELETE", endpoint_path, error)
3488
+
3400
3489
  def store_api_gateway(
3401
3490
  self,
3402
3491
  api_gateway: Union[
@@ -3422,7 +3511,7 @@ class HTTPRunDB(RunDBInterface):
3422
3511
  "PUT",
3423
3512
  endpoint_path,
3424
3513
  error,
3425
- json=api_gateway.dict(exclude_unset=True, exclude_none=True),
3514
+ json=api_gateway.dict(exclude_none=True),
3426
3515
  )
3427
3516
  return mlrun.common.schemas.APIGateway(**response.json())
3428
3517
 
@@ -3698,6 +3787,23 @@ class HTTPRunDB(RunDBInterface):
3698
3787
 
3699
3788
  self.api_call(method="PUT", path=_path, json=profile.dict())
3700
3789
 
3790
+ @staticmethod
3791
+ def warn_on_s3_and_ecr_permissions_conflict(func):
3792
+ is_s3_source = func.spec.build.source and func.spec.build.source.startswith(
3793
+ "s3://"
3794
+ )
3795
+ is_ecr_image = mlrun.utils.is_ecr_url(config.httpdb.builder.docker_registry)
3796
+ if not func.spec.build.load_source_on_run and is_s3_source and is_ecr_image:
3797
+ logger.warning(
3798
+ "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
3799
+ "keys. Only the permissions granted to the platform's configured secret will take affect "
3800
+ "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
3801
+ "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
3802
+ source=func.spec.build.source,
3803
+ load_source_on_run=func.spec.build.load_source_on_run,
3804
+ default_docker_registry=config.httpdb.builder.docker_registry,
3805
+ )
3806
+
3701
3807
 
3702
3808
  def _as_json(obj):
3703
3809
  fn = getattr(obj, "to_json", None)
mlrun/db/nopdb.py CHANGED
@@ -519,12 +519,74 @@ class NopDB(RunDBInterface):
519
519
  def get_api_gateway(self, name, project=None):
520
520
  pass
521
521
 
522
+ def delete_api_gateway(self, name, project=None):
523
+ pass
524
+
522
525
  def verify_authorization(
523
526
  self,
524
527
  authorization_verification_input: mlrun.common.schemas.AuthorizationVerificationInput,
525
528
  ):
526
529
  pass
527
530
 
531
+ def remote_builder(
532
+ self,
533
+ func: "mlrun.runtimes.BaseRuntime",
534
+ with_mlrun: bool,
535
+ mlrun_version_specifier: Optional[str] = None,
536
+ skip_deployed: bool = False,
537
+ builder_env: Optional[dict] = None,
538
+ force_build: bool = False,
539
+ ):
540
+ pass
541
+
542
+ def deploy_nuclio_function(
543
+ self,
544
+ func: "mlrun.runtimes.RemoteRuntime",
545
+ builder_env: Optional[dict] = None,
546
+ ):
547
+ pass
548
+
549
+ def get_builder_status(
550
+ self,
551
+ func: "mlrun.runtimes.BaseRuntime",
552
+ offset: int = 0,
553
+ logs: bool = True,
554
+ last_log_timestamp: float = 0.0,
555
+ verbose: bool = False,
556
+ ):
557
+ pass
558
+
559
+ def get_nuclio_deploy_status(
560
+ self,
561
+ func: "mlrun.runtimes.RemoteRuntime",
562
+ last_log_timestamp: float = 0.0,
563
+ verbose: bool = False,
564
+ ):
565
+ pass
566
+
567
+ def set_run_notifications(
568
+ self,
569
+ project: str,
570
+ runs: list[mlrun.model.RunObject],
571
+ notifications: list[mlrun.model.Notification],
572
+ ):
573
+ pass
574
+
575
+ def store_run_notifications(
576
+ self,
577
+ notification_objects: list[mlrun.model.Notification],
578
+ run_uid: str,
579
+ project: str = None,
580
+ mask_params: bool = True,
581
+ ):
582
+ pass
583
+
584
+ def get_log_size(self, uid, project=""):
585
+ pass
586
+
587
+ def watch_log(self, uid, project="", watch=True, offset=0):
588
+ pass
589
+
528
590
  def get_datastore_profile(
529
591
  self, name: str, project: str
530
592
  ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
@@ -542,3 +604,51 @@ class NopDB(RunDBInterface):
542
604
  self, profile: mlrun.common.schemas.DatastoreProfile, project: str
543
605
  ):
544
606
  pass
607
+
608
+ def function_status(self, project, name, kind, selector):
609
+ pass
610
+
611
+ def start_function(
612
+ self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
613
+ ):
614
+ pass
615
+
616
+ def submit_workflow(
617
+ self,
618
+ project: str,
619
+ name: str,
620
+ workflow_spec: Union[
621
+ "mlrun.projects.pipelines.WorkflowSpec",
622
+ "mlrun.common.schemas.WorkflowSpec",
623
+ dict,
624
+ ],
625
+ arguments: Optional[dict] = None,
626
+ artifact_path: Optional[str] = None,
627
+ source: Optional[str] = None,
628
+ run_name: Optional[str] = None,
629
+ namespace: Optional[str] = None,
630
+ notifications: list["mlrun.model.Notification"] = None,
631
+ ) -> "mlrun.common.schemas.WorkflowResponse":
632
+ pass
633
+
634
+ def update_model_monitoring_controller(
635
+ self,
636
+ project: str,
637
+ base_period: int = 10,
638
+ image: str = "mlrun/mlrun",
639
+ ):
640
+ pass
641
+
642
+ def enable_model_monitoring(
643
+ self,
644
+ project: str,
645
+ base_period: int = 10,
646
+ image: str = "mlrun/mlrun",
647
+ deploy_histogram_data_drift_app: bool = True,
648
+ ) -> None:
649
+ raise NotImplementedError
650
+
651
+ def deploy_histogram_data_drift_app(
652
+ self, project: str, image: str = "mlrun/mlrun"
653
+ ) -> None:
654
+ raise NotImplementedError
@@ -1121,9 +1121,10 @@ def _ingest_with_spark(
1121
1121
  df_to_write = target.prepare_spark_df(
1122
1122
  df_to_write, key_columns, timestamp_key, spark_options
1123
1123
  )
1124
+ write_format = spark_options.pop("format", None)
1124
1125
  if overwrite:
1125
1126
  write_spark_dataframe_with_options(
1126
- spark_options, df_to_write, "overwrite"
1127
+ spark_options, df_to_write, "overwrite", write_format=write_format
1127
1128
  )
1128
1129
  else:
1129
1130
  # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1131,7 +1132,7 @@ def _ingest_with_spark(
1131
1132
  df_to_write.persist()
1132
1133
  if df_to_write.count() > 0:
1133
1134
  write_spark_dataframe_with_options(
1134
- spark_options, df_to_write, "append"
1135
+ spark_options, df_to_write, "append", write_format=write_format
1135
1136
  )
1136
1137
  target.update_resource_status("ready")
1137
1138